diff --git a/webrtc/modules/audio_processing/aecm/aecm_core.c b/webrtc/modules/audio_processing/aecm/aecm_core.c index 2c0a40f01..fc94f1b88 100644 --- a/webrtc/modules/audio_processing/aecm/aecm_core.c +++ b/webrtc/modules/audio_processing/aecm/aecm_core.c @@ -27,65 +27,7 @@ FILE *dfile; FILE *testfile; #endif -// Square root of Hanning window in Q14. -#if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON) -// Table is defined in an ARM assembly file. -extern const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END; -#else -static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { - 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, - 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, - 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040, - 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, - 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, - 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079, - 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, - 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384 -}; -#endif - -#ifdef AECM_WITH_ABS_APPROX -//Q15 alpha = 0.99439986968132 const Factor for magnitude approximation -static const uint16_t kAlpha1 = 32584; -//Q15 beta = 0.12967166976970 const Factor for magnitude approximation -static const uint16_t kBeta1 = 4249; -//Q15 alpha = 0.94234827210087 const Factor for magnitude approximation -static const uint16_t kAlpha2 = 30879; -//Q15 beta = 0.33787806009150 const Factor for magnitude approximation -static const uint16_t kBeta2 = 11072; -//Q15 alpha = 0.82247698684306 const Factor for magnitude approximation -static const uint16_t kAlpha3 = 26951; -//Q15 beta = 0.57762063060713 const Factor for magnitude approximation -static const uint16_t kBeta3 = 18927; -#endif - -// Initialization table for echo channel in 8 kHz -static const int16_t kChannelStored8kHz[PART_LEN1] = { - 2040, 1815, 1590, 1498, 1405, 1395, 1385, 1418, - 1451, 1506, 1562, 1644, 1726, 1804, 1882, 1918, - 1953, 1982, 2010, 2025, 2040, 2034, 2027, 2021, - 2014, 1997, 1980, 1925, 1869, 1800, 1732, 1683, - 1635, 1604, 1572, 1545, 1517, 1481, 1444, 1405, - 1367, 1331, 1294, 1270, 1245, 1239, 1233, 1247, - 1260, 1282, 1303, 1338, 1373, 1407, 1441, 1470, - 1499, 1524, 1549, 1565, 1582, 1601, 1621, 1649, - 1676 -}; - -// Initialization table for echo channel in 16 kHz -static const int16_t kChannelStored16kHz[PART_LEN1] = { - 2040, 1590, 1405, 1385, 1451, 1562, 1726, 1882, - 1953, 2010, 2040, 2027, 2014, 1980, 1869, 1732, - 1635, 1572, 1517, 1444, 1367, 1294, 1245, 1233, - 1260, 1303, 1373, 1441, 1499, 1549, 1582, 1621, - 1676, 1741, 1802, 1861, 1921, 1983, 2040, 2102, - 2170, 2265, 2375, 2515, 2651, 2781, 2922, 3075, - 3253, 3471, 3738, 3976, 4151, 4258, 4308, 4288, - 4270, 4253, 4237, 4179, 4086, 3947, 3757, 3484, - 3153 -}; - -static const int16_t kCosTable[] = { +const int16_t WebRtcAecm_kCosTable[] = { 8192, 8190, 8187, 8180, 8172, 8160, 8147, 8130, 8112, 8091, 8067, 8041, 8012, 7982, 7948, 7912, 7874, 7834, 7791, 7745, 7697, 7647, 7595, 7540, 7483, 7424, 7362, @@ -128,7 +70,7 @@ static const int16_t kCosTable[] = { 8091, 8112, 8130, 8147, 8160, 8172, 8180, 8187, 8190 }; -static const int16_t kSinTable[] = { +const int16_t WebRtcAecm_kSinTable[] = { 0, 142, 285, 428, 571, 713, 856, 998, 1140, 1281, 1422, 1563, 1703, 1842, 1981, 2120, 2258, 2395, 2531, 2667, 2801, 2935, 3068, 3200, @@ -176,15 +118,31 @@ static const int16_t kSinTable[] = { -1140, -998, -856, -713, -571, -428, -285, -142 }; -static const int16_t kNoiseEstQDomain = 15; -static const int16_t kNoiseEstIncCount = 5; +// Initialization table for echo channel in 8 kHz +static const int16_t kChannelStored8kHz[PART_LEN1] = { + 2040, 1815, 1590, 1498, 1405, 1395, 1385, 1418, + 1451, 1506, 1562, 1644, 1726, 1804, 1882, 1918, + 1953, 1982, 2010, 2025, 2040, 2034, 2027, 2021, + 2014, 1997, 1980, 1925, 1869, 1800, 1732, 1683, + 1635, 1604, 1572, 1545, 1517, 1481, 1444, 1405, + 1367, 1331, 1294, 1270, 1245, 1239, 1233, 1247, + 1260, 1282, 1303, 1338, 1373, 1407, 1441, 1470, + 1499, 1524, 1549, 1565, 1582, 1601, 1621, 1649, + 1676 +}; -static void ComfortNoise(AecmCore_t* aecm, - const uint16_t* dfa, - complex16_t* out, - const int16_t* lambda); - -static int16_t CalcSuppressionGain(AecmCore_t * const aecm); +// Initialization table for echo channel in 16 kHz +static const int16_t kChannelStored16kHz[PART_LEN1] = { + 2040, 1590, 1405, 1385, 1451, 1562, 1726, 1882, + 1953, 2010, 2040, 2027, 2014, 1980, 1869, 1732, + 1635, 1572, 1517, 1444, 1367, 1294, 1245, 1233, + 1260, 1303, 1373, 1441, 1499, 1549, 1582, 1621, + 1676, 1741, 1802, 1861, 1921, 1983, 2040, 2102, + 2170, 2265, 2375, 2515, 2651, 2781, 2922, 3075, + 3253, 3471, 3738, 3976, 4151, 4258, 4308, 4288, + 4270, 4253, 4237, 4179, 4086, 3947, 3757, 3484, + 3153 +}; // Moves the pointer to the next entry and inserts |far_spectrum| and // corresponding Q-domain in its buffer. @@ -194,9 +152,9 @@ static int16_t CalcSuppressionGain(AecmCore_t * const aecm); // - far_spectrum : Pointer to the far end spectrum // - far_q : Q-domain of far end spectrum // -static void UpdateFarHistory(AecmCore_t* self, - uint16_t* far_spectrum, - int far_q) { +void WebRtcAecm_UpdateFarHistory(AecmCore_t* self, + uint16_t* far_spectrum, + int far_q) { // Get new buffer position self->far_history_pos++; if (self->far_history_pos >= MAX_DELAY) { @@ -227,7 +185,9 @@ static void UpdateFarHistory(AecmCore_t* self, // - far_spectrum : Pointer to the aligned far end spectrum // NULL - Error // -static const uint16_t* AlignedFarend(AecmCore_t* self, int* far_q, int delay) { +const uint16_t* WebRtcAecm_AlignedFarend(AecmCore_t* self, + int* far_q, + int delay) { int buffer_position = 0; assert(self != NULL); buffer_position = self->far_history_pos - delay; @@ -351,85 +311,6 @@ void WebRtcAecm_InitEchoPathCore(AecmCore_t* aecm, const int16_t* echo_path) aecm->mseChannelCount = 0; } -static void WindowAndFFT(AecmCore_t* aecm, - int16_t* fft, - const int16_t* time_signal, - complex16_t* freq_signal, - int time_signal_scaling) { - int i = 0; - - // FFT of signal - for (i = 0; i < PART_LEN; i++) { - // Window time domain signal and insert into real part of - // transformation array |fft| - fft[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT( - (time_signal[i] << time_signal_scaling), - WebRtcAecm_kSqrtHanning[i], - 14); - fft[PART_LEN + i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT( - (time_signal[i + PART_LEN] << time_signal_scaling), - WebRtcAecm_kSqrtHanning[PART_LEN - i], - 14); - } - - // Do forward FFT, then take only the first PART_LEN complex samples, - // and change signs of the imaginary parts. - WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal); - for (i = 0; i < PART_LEN; i++) { - freq_signal[i].imag = -freq_signal[i].imag; - } -} - -static void InverseFFTAndWindow(AecmCore_t* aecm, - int16_t* fft, - complex16_t* efw, - int16_t* output, - const int16_t* nearendClean) -{ - int i, j, outCFFT; - int32_t tmp32no1; - // Reuse |efw| for the inverse FFT output after transferring - // the contents to |fft|. - int16_t* ifft_out = (int16_t*)efw; - - // Synthesis - for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) { - fft[j] = efw[i].real; - fft[j + 1] = -efw[i].imag; - } - fft[0] = efw[0].real; - fft[1] = -efw[0].imag; - - fft[PART_LEN2] = efw[PART_LEN].real; - fft[PART_LEN2 + 1] = -efw[PART_LEN].imag; - - // Inverse FFT. Keep outCFFT to scale the samples in the next block. - outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out); - for (i = 0; i < PART_LEN; i++) { - ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( - ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14); - tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i], - outCFFT - aecm->dfaCleanQDomain); - output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, - tmp32no1 + aecm->outBuf[i], WEBRTC_SPL_WORD16_MIN); - - tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT(ifft_out[PART_LEN + i], - WebRtcAecm_kSqrtHanning[PART_LEN - i], 14); - tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, - outCFFT - aecm->dfaCleanQDomain); - aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT( - WEBRTC_SPL_WORD16_MAX, tmp32no1, WEBRTC_SPL_WORD16_MIN); - } - - // Copy the current block to the old position (aecm->outBuf is shifted elsewhere) - memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN); - memcpy(aecm->dBufNoisy, aecm->dBufNoisy + PART_LEN, sizeof(int16_t) * PART_LEN); - if (nearendClean != NULL) - { - memcpy(aecm->dBufClean, aecm->dBufClean + PART_LEN, sizeof(int16_t) * PART_LEN); - } -} - static void CalcLinearEnergiesC(AecmCore_t* aecm, const uint16_t* far_spectrum, int32_t* echo_est, @@ -509,6 +390,18 @@ static void WebRtcAecm_InitNeon(void) } #endif +// Initialize function pointers for MIPS platform. +#if defined(MIPS32_LE) +static void WebRtcAecm_InitMips(void) +{ +#if defined(MIPS_DSP_R1_LE) + WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannel_mips; + WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannel_mips; +#endif + WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergies_mips; +} +#endif + // WebRtcAecm_InitCore(...) // // This function initializes the AECM instant created with WebRtcAecm_CreateCore(...) @@ -646,6 +539,9 @@ int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq) WebRtcAecm_InitNeon(); #endif +#if defined(MIPS32_LE) + WebRtcAecm_InitMips(); +#endif return 0; } @@ -1265,7 +1161,7 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * aecm, // level (Q14). // // -static int16_t CalcSuppressionGain(AecmCore_t * const aecm) +int16_t WebRtcAecm_CalcSuppressionGain(AecmCore_t * const aecm) { int32_t tmp32no1; @@ -1334,639 +1230,6 @@ static int16_t CalcSuppressionGain(AecmCore_t * const aecm) return aecm->supGain; } -// Transforms a time domain signal into the frequency domain, outputting the -// complex valued signal, absolute value and sum of absolute values. -// -// time_signal [in] Pointer to time domain signal -// freq_signal_real [out] Pointer to real part of frequency domain array -// freq_signal_imag [out] Pointer to imaginary part of frequency domain -// array -// freq_signal_abs [out] Pointer to absolute value of frequency domain -// array -// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in -// the frequency domain array -// return value The Q-domain of current frequency values -// -static int TimeToFrequencyDomain(AecmCore_t* aecm, - const int16_t* time_signal, - complex16_t* freq_signal, - uint16_t* freq_signal_abs, - uint32_t* freq_signal_sum_abs) -{ - int i = 0; - int time_signal_scaling = 0; - - int32_t tmp32no1 = 0; - int32_t tmp32no2 = 0; - - // In fft_buf, +16 for 32-byte alignment. - int16_t fft_buf[PART_LEN4 + 16]; - int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31); - - int16_t tmp16no1; -#ifndef WEBRTC_ARCH_ARM_V7 - int16_t tmp16no2; -#endif -#ifdef AECM_WITH_ABS_APPROX - int16_t max_value = 0; - int16_t min_value = 0; - uint16_t alpha = 0; - uint16_t beta = 0; -#endif - -#ifdef AECM_DYNAMIC_Q - tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); - time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); -#endif - - WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling); - - // Extract imaginary and real part, calculate the magnitude for all frequency bins - freq_signal[0].imag = 0; - freq_signal[PART_LEN].imag = 0; - freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16( - freq_signal[0].real); - freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16( - freq_signal[PART_LEN].real); - (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) + - (uint32_t)(freq_signal_abs[PART_LEN]); - - for (i = 1; i < PART_LEN; i++) - { - if (freq_signal[i].real == 0) - { - freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16( - freq_signal[i].imag); - } - else if (freq_signal[i].imag == 0) - { - freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16( - freq_signal[i].real); - } - else - { - // Approximation for magnitude of complex fft output - // magn = sqrt(real^2 + imag^2) - // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) - // - // The parameters alpha and beta are stored in Q15 - -#ifdef AECM_WITH_ABS_APPROX - tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); - tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); - - if(tmp16no1 > tmp16no2) - { - max_value = tmp16no1; - min_value = tmp16no2; - } else - { - max_value = tmp16no2; - min_value = tmp16no1; - } - - // Magnitude in Q(-6) - if ((max_value >> 2) > min_value) - { - alpha = kAlpha1; - beta = kBeta1; - } else if ((max_value >> 1) > min_value) - { - alpha = kAlpha2; - beta = kBeta2; - } else - { - alpha = kAlpha3; - beta = kBeta3; - } - tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(max_value, - alpha, - 15); - tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(min_value, - beta, - 15); - freq_signal_abs[i] = (uint16_t)tmp16no1 + - (uint16_t)tmp16no2; -#else -#ifdef WEBRTC_ARCH_ARM_V7 - __asm __volatile( - "smulbb %[tmp32no1], %[real], %[real]\n\t" - "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t" - :[tmp32no1]"+r"(tmp32no1), - [tmp32no2]"=r"(tmp32no2) - :[real]"r"(freq_signal[i].real), - [imag]"r"(freq_signal[i].imag) - ); -#else - tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); - tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); - tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1); - tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2); - tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2); -#endif // WEBRTC_ARCH_ARM_V7 - tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2); - - freq_signal_abs[i] = (uint16_t)tmp32no1; -#endif // AECM_WITH_ABS_APPROX - } - (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i]; - } - - return time_signal_scaling; -} - -int WebRtcAecm_ProcessBlock(AecmCore_t * aecm, - const int16_t * farend, - const int16_t * nearendNoisy, - const int16_t * nearendClean, - int16_t * output) -{ - int i; - - uint32_t xfaSum; - uint32_t dfaNoisySum; - uint32_t dfaCleanSum; - uint32_t echoEst32Gained; - uint32_t tmpU32; - - int32_t tmp32no1; - - uint16_t xfa[PART_LEN1]; - uint16_t dfaNoisy[PART_LEN1]; - uint16_t dfaClean[PART_LEN1]; - uint16_t* ptrDfaClean = dfaClean; - const uint16_t* far_spectrum_ptr = NULL; - - // 32 byte aligned buffers (with +8 or +16). - // TODO (kma): define fft with complex16_t. - int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe. - int32_t echoEst32_buf[PART_LEN1 + 8]; - int32_t dfw_buf[PART_LEN2 + 8]; - int32_t efw_buf[PART_LEN2 + 8]; - - int16_t* fft = (int16_t*) (((uintptr_t) fft_buf + 31) & ~ 31); - int32_t* echoEst32 = (int32_t*) (((uintptr_t) echoEst32_buf + 31) & ~ 31); - complex16_t* dfw = (complex16_t*) (((uintptr_t) dfw_buf + 31) & ~ 31); - complex16_t* efw = (complex16_t*) (((uintptr_t) efw_buf + 31) & ~ 31); - - int16_t hnl[PART_LEN1]; - int16_t numPosCoef = 0; - int16_t nlpGain = ONE_Q14; - int delay; - int16_t tmp16no1; - int16_t tmp16no2; - int16_t mu; - int16_t supGain; - int16_t zeros32, zeros16; - int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf; - int far_q; - int16_t resolutionDiff, qDomainDiff; - - const int kMinPrefBand = 4; - const int kMaxPrefBand = 24; - int32_t avgHnl32 = 0; - - // Determine startup state. There are three states: - // (0) the first CONV_LEN blocks - // (1) another CONV_LEN blocks - // (2) the rest - - if (aecm->startupState < 2) - { - aecm->startupState = (aecm->totCount >= CONV_LEN) + (aecm->totCount >= CONV_LEN2); - } - // END: Determine startup state - - // Buffer near and far end signals - memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN); - memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN); - if (nearendClean != NULL) - { - memcpy(aecm->dBufClean + PART_LEN, nearendClean, sizeof(int16_t) * PART_LEN); - } - - // Transform far end signal from time domain to frequency domain. - far_q = TimeToFrequencyDomain(aecm, - aecm->xBuf, - dfw, - xfa, - &xfaSum); - - // Transform noisy near end signal from time domain to frequency domain. - zerosDBufNoisy = TimeToFrequencyDomain(aecm, - aecm->dBufNoisy, - dfw, - dfaNoisy, - &dfaNoisySum); - aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; - aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy; - - - if (nearendClean == NULL) - { - ptrDfaClean = dfaNoisy; - aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; - aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; - dfaCleanSum = dfaNoisySum; - } else - { - // Transform clean near end signal from time domain to frequency domain. - zerosDBufClean = TimeToFrequencyDomain(aecm, - aecm->dBufClean, - dfw, - dfaClean, - &dfaCleanSum); - aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; - aecm->dfaCleanQDomain = (int16_t)zerosDBufClean; - } - - // Get the delay - // Save far-end history and estimate delay - UpdateFarHistory(aecm, xfa, far_q); - if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1, - far_q) == -1) { - return -1; - } - delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, - dfaNoisy, - PART_LEN1, - zerosDBufNoisy); - if (delay == -1) - { - return -1; - } - else if (delay == -2) - { - // If the delay is unknown, we assume zero. - // NOTE: this will have to be adjusted if we ever add lookahead. - delay = 0; - } - - if (aecm->fixedDelay >= 0) - { - // Use fixed delay - delay = aecm->fixedDelay; - } - - // Get aligned far end spectrum - far_spectrum_ptr = AlignedFarend(aecm, &far_q, delay); - zerosXBuf = (int16_t) far_q; - if (far_spectrum_ptr == NULL) - { - return -1; - } - - // Calculate log(energy) and update energy threshold levels - WebRtcAecm_CalcEnergies(aecm, - far_spectrum_ptr, - zerosXBuf, - dfaNoisySum, - echoEst32); - - // Calculate stepsize - mu = WebRtcAecm_CalcStepSize(aecm); - - // Update counters - aecm->totCount++; - - // This is the channel estimation algorithm. - // It is base on NLMS but has a variable step length, which was calculated above. - WebRtcAecm_UpdateChannel(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisy, mu, echoEst32); - supGain = CalcSuppressionGain(aecm); - - - // Calculate Wiener filter hnl[] - for (i = 0; i < PART_LEN1; i++) - { - // Far end signal through channel estimate in Q8 - // How much can we shift right to preserve resolution - tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; - aecm->echoFilt[i] += WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32no1, 50), 8); - - zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; - zeros16 = WebRtcSpl_NormW16(supGain) + 1; - if (zeros32 + zeros16 > 16) - { - // Multiplication is safe - // Result in Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff]) - echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], - (uint16_t)supGain); - resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; - resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); - } else - { - tmp16no1 = 17 - zeros32 - zeros16; - resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; - resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); - if (zeros32 > tmp16no1) - { - echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], - (uint16_t)WEBRTC_SPL_RSHIFT_W16(supGain, - tmp16no1)); // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) - } else - { - // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) - echoEst32Gained = WEBRTC_SPL_UMUL_32_16( - (uint32_t)WEBRTC_SPL_RSHIFT_W32(aecm->echoFilt[i], tmp16no1), - (uint16_t)supGain); - } - } - - zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); - if ((zeros16 < (aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld)) - & (aecm->nearFilt[i])) - { - tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], zeros16); - qDomainDiff = zeros16 - aecm->dfaCleanQDomain + aecm->dfaCleanQDomainOld; - } else - { - tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], - aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld); - qDomainDiff = 0; - } - tmp16no2 = WEBRTC_SPL_SHIFT_W16(ptrDfaClean[i], qDomainDiff); - tmp32no1 = (int32_t)(tmp16no2 - tmp16no1); - tmp16no2 = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 4); - tmp16no2 += tmp16no1; - zeros16 = WebRtcSpl_NormW16(tmp16no2); - if ((tmp16no2) & (-qDomainDiff > zeros16)) - { - aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; - } else - { - aecm->nearFilt[i] = WEBRTC_SPL_SHIFT_W16(tmp16no2, -qDomainDiff); - } - - // Wiener filter coefficients, resulting hnl in Q14 - if (echoEst32Gained == 0) - { - hnl[i] = ONE_Q14; - } else if (aecm->nearFilt[i] == 0) - { - hnl[i] = 0; - } else - { - // Multiply the suppression gain - // Rounding - echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1); - tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained, (uint16_t)aecm->nearFilt[i]); - - // Current resolution is - // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN - max(0, 17 - zeros16 - zeros32)) - // Make sure we are in Q14 - tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); - if (tmp32no1 > ONE_Q14) - { - hnl[i] = 0; - } else if (tmp32no1 < 0) - { - hnl[i] = ONE_Q14; - } else - { - // 1-echoEst/dfa - hnl[i] = ONE_Q14 - (int16_t)tmp32no1; - if (hnl[i] < 0) - { - hnl[i] = 0; - } - } - } - if (hnl[i]) - { - numPosCoef++; - } - } - // Only in wideband. Prevent the gain in upper band from being larger than - // in lower band. - if (aecm->mult == 2) - { - // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause - // speech distortion in double-talk. - for (i = 0; i < PART_LEN1; i++) - { - hnl[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(hnl[i], hnl[i], 14); - } - - for (i = kMinPrefBand; i <= kMaxPrefBand; i++) - { - avgHnl32 += (int32_t)hnl[i]; - } - assert(kMaxPrefBand - kMinPrefBand + 1 > 0); - avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1); - - for (i = kMaxPrefBand; i < PART_LEN1; i++) - { - if (hnl[i] > (int16_t)avgHnl32) - { - hnl[i] = (int16_t)avgHnl32; - } - } - } - - // Calculate NLP gain, result is in Q14 - if (aecm->nlpFlag) - { - for (i = 0; i < PART_LEN1; i++) - { - // Truncate values close to zero and one. - if (hnl[i] > NLP_COMP_HIGH) - { - hnl[i] = ONE_Q14; - } else if (hnl[i] < NLP_COMP_LOW) - { - hnl[i] = 0; - } - - // Remove outliers - if (numPosCoef < 3) - { - nlpGain = 0; - } else - { - nlpGain = ONE_Q14; - } - - // NLP - if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14)) - { - hnl[i] = ONE_Q14; - } else - { - hnl[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(hnl[i], nlpGain, 14); - } - - // multiply with Wiener coefficients - efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, - hnl[i], 14)); - efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, - hnl[i], 14)); - } - } - else - { - // multiply with Wiener coefficients - for (i = 0; i < PART_LEN1; i++) - { - efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, - hnl[i], 14)); - efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, - hnl[i], 14)); - } - } - - if (aecm->cngMode == AecmTrue) - { - ComfortNoise(aecm, ptrDfaClean, efw, hnl); - } - - InverseFFTAndWindow(aecm, fft, efw, output, nearendClean); - - return 0; -} - - -// Generate comfort noise and add to output signal. -// -// \param[in] aecm Handle of the AECM instance. -// \param[in] dfa Absolute value of the nearend signal (Q[aecm->dfaQDomain]). -// \param[in,out] outReal Real part of the output signal (Q[aecm->dfaQDomain]). -// \param[in,out] outImag Imaginary part of the output signal (Q[aecm->dfaQDomain]). -// \param[in] lambda Suppression gain with which to scale the noise level (Q14). -// -static void ComfortNoise(AecmCore_t* aecm, - const uint16_t* dfa, - complex16_t* out, - const int16_t* lambda) -{ - int16_t i; - int16_t tmp16; - int32_t tmp32; - - int16_t randW16[PART_LEN]; - int16_t uReal[PART_LEN1]; - int16_t uImag[PART_LEN1]; - int32_t outLShift32; - int16_t noiseRShift16[PART_LEN1]; - - int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain; - int16_t minTrackShift; - - assert(shiftFromNearToNoise >= 0); - assert(shiftFromNearToNoise < 16); - - if (aecm->noiseEstCtr < 100) - { - // Track the minimum more quickly initially. - aecm->noiseEstCtr++; - minTrackShift = 6; - } else - { - minTrackShift = 9; - } - - // Estimate noise power. - for (i = 0; i < PART_LEN1; i++) - { - - // Shift to the noise domain. - tmp32 = (int32_t)dfa[i]; - outLShift32 = WEBRTC_SPL_LSHIFT_W32(tmp32, shiftFromNearToNoise); - - if (outLShift32 < aecm->noiseEst[i]) - { - // Reset "too low" counter - aecm->noiseEstTooLowCtr[i] = 0; - // Track the minimum. - if (aecm->noiseEst[i] < (1 << minTrackShift)) - { - // For small values, decrease noiseEst[i] every - // |kNoiseEstIncCount| block. The regular approach below can not - // go further down due to truncation. - aecm->noiseEstTooHighCtr[i]++; - if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) - { - aecm->noiseEst[i]--; - aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter - } - } - else - { - aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32) >> minTrackShift); - } - } else - { - // Reset "too high" counter - aecm->noiseEstTooHighCtr[i] = 0; - // Ramp slowly upwards until we hit the minimum again. - if ((aecm->noiseEst[i] >> 19) > 0) - { - // Avoid overflow. - // Multiplication with 2049 will cause wrap around. Scale - // down first and then multiply - aecm->noiseEst[i] >>= 11; - aecm->noiseEst[i] *= 2049; - } - else if ((aecm->noiseEst[i] >> 11) > 0) - { - // Large enough for relative increase - aecm->noiseEst[i] *= 2049; - aecm->noiseEst[i] >>= 11; - } - else - { - // Make incremental increases based on size every - // |kNoiseEstIncCount| block - aecm->noiseEstTooLowCtr[i]++; - if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) - { - aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1; - aecm->noiseEstTooLowCtr[i] = 0; // Reset counter - } - } - } - } - - for (i = 0; i < PART_LEN1; i++) - { - tmp32 = WEBRTC_SPL_RSHIFT_W32(aecm->noiseEst[i], shiftFromNearToNoise); - if (tmp32 > 32767) - { - tmp32 = 32767; - aecm->noiseEst[i] = WEBRTC_SPL_LSHIFT_W32(tmp32, shiftFromNearToNoise); - } - noiseRShift16[i] = (int16_t)tmp32; - - tmp16 = ONE_Q14 - lambda[i]; - noiseRShift16[i] - = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16, noiseRShift16[i], 14); - } - - // Generate a uniform random array on [0 2^15-1]. - WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); - - // Generate noise according to estimated energy. - uReal[0] = 0; // Reject LF noise. - uImag[0] = 0; - for (i = 1; i < PART_LEN1; i++) - { - // Get a random index for the cos and sin tables over [0 359]. - tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(359, randW16[i - 1], 15); - - // Tables are in Q13. - uReal[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(noiseRShift16[i], - kCosTable[tmp16], 13); - uImag[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(-noiseRShift16[i], - kSinTable[tmp16], 13); - } - uImag[PART_LEN] = 0; - - for (i = 0; i < PART_LEN1; i++) - { - out[i].real = WEBRTC_SPL_ADD_SAT_W16(out[i].real, uReal[i]); - out[i].imag = WEBRTC_SPL_ADD_SAT_W16(out[i].imag, uImag[i]); - } -} - void WebRtcAecm_BufferFarFrame(AecmCore_t* const aecm, const int16_t* const farend, const int farLen) diff --git a/webrtc/modules/audio_processing/aecm/aecm_core.h b/webrtc/modules/audio_processing/aecm/aecm_core.h index 64251d522..e56ede690 100644 --- a/webrtc/modules/audio_processing/aecm/aecm_core.h +++ b/webrtc/modules/audio_processing/aecm/aecm_core.h @@ -272,6 +272,125 @@ void WebRtcAecm_FetchFarFrame(AecmCore_t * const aecm, int16_t * const farend, const int farLen, const int knownDelay); + +// All the functions below are intended to be private + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_UpdateFarHistory() +// +// Moves the pointer to the next entry and inserts |far_spectrum| and +// corresponding Q-domain in its buffer. +// +// Inputs: +// - self : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum +// - far_q : Q-domain of far end spectrum +// +void WebRtcAecm_UpdateFarHistory(AecmCore_t* self, + uint16_t* far_spectrum, + int far_q); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_AlignedFarend() +// +// Returns a pointer to the far end spectrum aligned to current near end +// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been +// called before AlignedFarend(...). Otherwise, you get the pointer to the +// previous frame. The memory is only valid until the next call of +// WebRtc_DelayEstimatorProcessFix(...). +// +// Inputs: +// - self : Pointer to the AECM instance. +// - delay : Current delay estimate. +// +// Output: +// - far_q : The Q-domain of the aligned far end spectrum +// +// Return value: +// - far_spectrum : Pointer to the aligned far end spectrum +// NULL - Error +// +const uint16_t* WebRtcAecm_AlignedFarend(AecmCore_t* self, + int* far_q, + int delay); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcSuppressionGain() +// +// This function calculates the suppression gain that is used in the +// Wiener filter. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// +// Return value: +// - supGain : Suppression gain with which to scale the noise +// level (Q14). +// +int16_t WebRtcAecm_CalcSuppressionGain(AecmCore_t * const aecm); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcEnergies() +// +// This function calculates the log of energies for nearend, farend and +// estimated echoes. There is also an update of energy decision levels, +// i.e. internal VAD. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// - far_spectrum : Pointer to farend spectrum. +// - far_q : Q-domain of farend spectrum. +// - nearEner : Near end energy for current block in +// Q(aecm->dfaQDomain). +// +// Output: +// - echoEst : Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_CalcEnergies(AecmCore_t * aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint32_t nearEner, + int32_t * echoEst); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcStepSize() +// +// This function calculates the step size used in channel estimation +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// +// Return value: +// - mu : Stepsize in log2(), i.e. number of shifts. +// +int16_t WebRtcAecm_CalcStepSize(AecmCore_t * const aecm); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_UpdateChannel(...) +// +// This function performs channel estimation. +// NLMS and decision on channel storage. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// - far_spectrum : Absolute value of the farend signal in Q(far_q) +// - far_q : Q-domain of the farend signal +// - dfa : Absolute value of the nearend signal +// (Q[aecm->dfaQDomain]) +// - mu : NLMS step size. +// Input/Output: +// - echoEst : Estimated echo in Q(far_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_UpdateChannel(AecmCore_t * aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint16_t * const dfa, + const int16_t mu, + int32_t * echoEst); + +extern const int16_t WebRtcAecm_kCosTable[]; +extern const int16_t WebRtcAecm_kSinTable[]; + /////////////////////////////////////////////////////////////////////////////// // Some function pointers, for internal functions shared by ARM NEON and // generic C code. @@ -312,4 +431,20 @@ void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore_t* aecm, void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore_t* aecm); #endif +#if defined(MIPS32_LE) +void WebRtcAecm_CalcLinearEnergies_mips(AecmCore_t* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored); +#if defined(MIPS_DSP_R1_LE) +void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore_t* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est); + +void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore_t* aecm); +#endif +#endif + #endif diff --git a/webrtc/modules/audio_processing/aecm/aecm_core_c.c b/webrtc/modules/audio_processing/aecm/aecm_core_c.c new file mode 100644 index 000000000..63d4ac902 --- /dev/null +++ b/webrtc/modules/audio_processing/aecm/aecm_core_c.c @@ -0,0 +1,792 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/aecm/aecm_core.h" + +#include +#include +#include + +#include "webrtc/common_audio/signal_processing/include/real_fft.h" +#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" +#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" +#include "webrtc/modules/audio_processing/utility/ring_buffer.h" +#include "webrtc/system_wrappers/interface/compile_assert_c.h" +#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" +#include "webrtc/typedefs.h" + +// Square root of Hanning window in Q14. +#if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON) +// Table is defined in an ARM assembly file. +extern const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END; +#else +static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { + 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, + 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, + 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040, + 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, + 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, + 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079, + 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, + 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384 +}; +#endif + +#ifdef AECM_WITH_ABS_APPROX +//Q15 alpha = 0.99439986968132 const Factor for magnitude approximation +static const uint16_t kAlpha1 = 32584; +//Q15 beta = 0.12967166976970 const Factor for magnitude approximation +static const uint16_t kBeta1 = 4249; +//Q15 alpha = 0.94234827210087 const Factor for magnitude approximation +static const uint16_t kAlpha2 = 30879; +//Q15 beta = 0.33787806009150 const Factor for magnitude approximation +static const uint16_t kBeta2 = 11072; +//Q15 alpha = 0.82247698684306 const Factor for magnitude approximation +static const uint16_t kAlpha3 = 26951; +//Q15 beta = 0.57762063060713 const Factor for magnitude approximation +static const uint16_t kBeta3 = 18927; +#endif + +static const int16_t kNoiseEstQDomain = 15; +static const int16_t kNoiseEstIncCount = 5; + +static void ComfortNoise(AecmCore_t* aecm, + const uint16_t* dfa, + complex16_t* out, + const int16_t* lambda); + +static void WindowAndFFT(AecmCore_t* aecm, + int16_t* fft, + const int16_t* time_signal, + complex16_t* freq_signal, + int time_signal_scaling) { + int i = 0; + + // FFT of signal + for (i = 0; i < PART_LEN; i++) { + // Window time domain signal and insert into real part of + // transformation array |fft| + fft[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT( + (time_signal[i] << time_signal_scaling), + WebRtcAecm_kSqrtHanning[i], + 14); + fft[PART_LEN + i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT( + (time_signal[i + PART_LEN] << time_signal_scaling), + WebRtcAecm_kSqrtHanning[PART_LEN - i], + 14); + } + + // Do forward FFT, then take only the first PART_LEN complex samples, + // and change signs of the imaginary parts. + WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal); + for (i = 0; i < PART_LEN; i++) { + freq_signal[i].imag = -freq_signal[i].imag; + } +} + +static void InverseFFTAndWindow(AecmCore_t* aecm, + int16_t* fft, + complex16_t* efw, + int16_t* output, + const int16_t* nearendClean) +{ + int i, j, outCFFT; + int32_t tmp32no1; + // Reuse |efw| for the inverse FFT output after transferring + // the contents to |fft|. + int16_t* ifft_out = (int16_t*)efw; + + // Synthesis + for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) { + fft[j] = efw[i].real; + fft[j + 1] = -efw[i].imag; + } + fft[0] = efw[0].real; + fft[1] = -efw[0].imag; + + fft[PART_LEN2] = efw[PART_LEN].real; + fft[PART_LEN2 + 1] = -efw[PART_LEN].imag; + + // Inverse FFT. Keep outCFFT to scale the samples in the next block. + outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out); + for (i = 0; i < PART_LEN; i++) { + ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14); + tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i], + outCFFT - aecm->dfaCleanQDomain); + output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + tmp32no1 + aecm->outBuf[i], + WEBRTC_SPL_WORD16_MIN); + + tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT(ifft_out[PART_LEN + i], + WebRtcAecm_kSqrtHanning[PART_LEN - i], + 14); + tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, + outCFFT - aecm->dfaCleanQDomain); + aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + tmp32no1, + WEBRTC_SPL_WORD16_MIN); + } + + // Copy the current block to the old position + // (aecm->outBuf is shifted elsewhere) + memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy, + aecm->dBufNoisy + PART_LEN, + sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) + { + memcpy(aecm->dBufClean, + aecm->dBufClean + PART_LEN, + sizeof(int16_t) * PART_LEN); + } +} + +// Transforms a time domain signal into the frequency domain, outputting the +// complex valued signal, absolute value and sum of absolute values. +// +// time_signal [in] Pointer to time domain signal +// freq_signal_real [out] Pointer to real part of frequency domain array +// freq_signal_imag [out] Pointer to imaginary part of frequency domain +// array +// freq_signal_abs [out] Pointer to absolute value of frequency domain +// array +// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in +// the frequency domain array +// return value The Q-domain of current frequency values +// +static int TimeToFrequencyDomain(AecmCore_t* aecm, + const int16_t* time_signal, + complex16_t* freq_signal, + uint16_t* freq_signal_abs, + uint32_t* freq_signal_sum_abs) +{ + int i = 0; + int time_signal_scaling = 0; + + int32_t tmp32no1 = 0; + int32_t tmp32no2 = 0; + + // In fft_buf, +16 for 32-byte alignment. + int16_t fft_buf[PART_LEN4 + 16]; + int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31); + + int16_t tmp16no1; +#ifndef WEBRTC_ARCH_ARM_V7 + int16_t tmp16no2; +#endif +#ifdef AECM_WITH_ABS_APPROX + int16_t max_value = 0; + int16_t min_value = 0; + uint16_t alpha = 0; + uint16_t beta = 0; +#endif + +#ifdef AECM_DYNAMIC_Q + tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); + time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); +#endif + + WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling); + + // Extract imaginary and real part, calculate the magnitude for + // all frequency bins + freq_signal[0].imag = 0; + freq_signal[PART_LEN].imag = 0; + freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real); + freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16( + freq_signal[PART_LEN].real); + (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) + + (uint32_t)(freq_signal_abs[PART_LEN]); + + for (i = 1; i < PART_LEN; i++) + { + if (freq_signal[i].real == 0) + { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + } + else if (freq_signal[i].imag == 0) + { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real); + } + else + { + // Approximation for magnitude of complex fft output + // magn = sqrt(real^2 + imag^2) + // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) + // + // The parameters alpha and beta are stored in Q15 + +#ifdef AECM_WITH_ABS_APPROX + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + + if(tmp16no1 > tmp16no2) + { + max_value = tmp16no1; + min_value = tmp16no2; + } else + { + max_value = tmp16no2; + min_value = tmp16no1; + } + + // Magnitude in Q(-6) + if ((max_value >> 2) > min_value) + { + alpha = kAlpha1; + beta = kBeta1; + } else if ((max_value >> 1) > min_value) + { + alpha = kAlpha2; + beta = kBeta2; + } else + { + alpha = kAlpha3; + beta = kBeta3; + } + tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(max_value, alpha, 15); + tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(min_value, beta, 15); + freq_signal_abs[i] = (uint16_t)tmp16no1 + (uint16_t)tmp16no2; +#else +#ifdef WEBRTC_ARCH_ARM_V7 + __asm __volatile( + "smulbb %[tmp32no1], %[real], %[real]\n\t" + "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t" + :[tmp32no1]"+r"(tmp32no1), + [tmp32no2]"=r"(tmp32no2) + :[real]"r"(freq_signal[i].real), + [imag]"r"(freq_signal[i].imag) + ); +#else + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1); + tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2); + tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2); +#endif // WEBRTC_ARCH_ARM_V7 + tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2); + + freq_signal_abs[i] = (uint16_t)tmp32no1; +#endif // AECM_WITH_ABS_APPROX + } + (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i]; + } + + return time_signal_scaling; +} + +int WebRtcAecm_ProcessBlock(AecmCore_t * aecm, + const int16_t * farend, + const int16_t * nearendNoisy, + const int16_t * nearendClean, + int16_t * output) +{ + int i; + + uint32_t xfaSum; + uint32_t dfaNoisySum; + uint32_t dfaCleanSum; + uint32_t echoEst32Gained; + uint32_t tmpU32; + + int32_t tmp32no1; + + uint16_t xfa[PART_LEN1]; + uint16_t dfaNoisy[PART_LEN1]; + uint16_t dfaClean[PART_LEN1]; + uint16_t* ptrDfaClean = dfaClean; + const uint16_t* far_spectrum_ptr = NULL; + + // 32 byte aligned buffers (with +8 or +16). + // TODO (kma): define fft with complex16_t. + int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe. + int32_t echoEst32_buf[PART_LEN1 + 8]; + int32_t dfw_buf[PART_LEN2 + 8]; + int32_t efw_buf[PART_LEN2 + 8]; + + int16_t* fft = (int16_t*) (((uintptr_t) fft_buf + 31) & ~ 31); + int32_t* echoEst32 = (int32_t*) (((uintptr_t) echoEst32_buf + 31) & ~ 31); + complex16_t* dfw = (complex16_t*) (((uintptr_t) dfw_buf + 31) & ~ 31); + complex16_t* efw = (complex16_t*) (((uintptr_t) efw_buf + 31) & ~ 31); + + int16_t hnl[PART_LEN1]; + int16_t numPosCoef = 0; + int16_t nlpGain = ONE_Q14; + int delay; + int16_t tmp16no1; + int16_t tmp16no2; + int16_t mu; + int16_t supGain; + int16_t zeros32, zeros16; + int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf; + int far_q; + int16_t resolutionDiff, qDomainDiff; + + const int kMinPrefBand = 4; + const int kMaxPrefBand = 24; + int32_t avgHnl32 = 0; + + // Determine startup state. There are three states: + // (0) the first CONV_LEN blocks + // (1) another CONV_LEN blocks + // (2) the rest + + if (aecm->startupState < 2) + { + aecm->startupState = (aecm->totCount >= CONV_LEN) + + (aecm->totCount >= CONV_LEN2); + } + // END: Determine startup state + + // Buffer near and far end signals + memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) + { + memcpy(aecm->dBufClean + PART_LEN, + nearendClean, + sizeof(int16_t) * PART_LEN); + } + + // Transform far end signal from time domain to frequency domain. + far_q = TimeToFrequencyDomain(aecm, + aecm->xBuf, + dfw, + xfa, + &xfaSum); + + // Transform noisy near end signal from time domain to frequency domain. + zerosDBufNoisy = TimeToFrequencyDomain(aecm, + aecm->dBufNoisy, + dfw, + dfaNoisy, + &dfaNoisySum); + aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; + aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy; + + + if (nearendClean == NULL) + { + ptrDfaClean = dfaNoisy; + aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; + aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; + dfaCleanSum = dfaNoisySum; + } else + { + // Transform clean near end signal from time domain to frequency domain. + zerosDBufClean = TimeToFrequencyDomain(aecm, + aecm->dBufClean, + dfw, + dfaClean, + &dfaCleanSum); + aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; + aecm->dfaCleanQDomain = (int16_t)zerosDBufClean; + } + + // Get the delay + // Save far-end history and estimate delay + WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q); + if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, + xfa, + PART_LEN1, + far_q) == -1) { + return -1; + } + delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, + dfaNoisy, + PART_LEN1, + zerosDBufNoisy); + if (delay == -1) + { + return -1; + } + else if (delay == -2) + { + // If the delay is unknown, we assume zero. + // NOTE: this will have to be adjusted if we ever add lookahead. + delay = 0; + } + + if (aecm->fixedDelay >= 0) + { + // Use fixed delay + delay = aecm->fixedDelay; + } + + // Get aligned far end spectrum + far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay); + zerosXBuf = (int16_t) far_q; + if (far_spectrum_ptr == NULL) + { + return -1; + } + + // Calculate log(energy) and update energy threshold levels + WebRtcAecm_CalcEnergies(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisySum, + echoEst32); + + // Calculate stepsize + mu = WebRtcAecm_CalcStepSize(aecm); + + // Update counters + aecm->totCount++; + + // This is the channel estimation algorithm. + // It is base on NLMS but has a variable step length, + // which was calculated above. + WebRtcAecm_UpdateChannel(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisy, + mu, + echoEst32); + supGain = WebRtcAecm_CalcSuppressionGain(aecm); + + + // Calculate Wiener filter hnl[] + for (i = 0; i < PART_LEN1; i++) + { + // Far end signal through channel estimate in Q8 + // How much can we shift right to preserve resolution + tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; + aecm->echoFilt[i] += WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32no1, + 50), 8); + + zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; + zeros16 = WebRtcSpl_NormW16(supGain) + 1; + if (zeros32 + zeros16 > 16) + { + // Multiplication is safe + // Result in + // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+ + // aecm->xfaQDomainBuf[diff]) + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], + (uint16_t)supGain); + resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + } else + { + tmp16no1 = 17 - zeros32 - zeros16; + resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - + RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + if (zeros32 > tmp16no1) + { + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], + (uint16_t)WEBRTC_SPL_RSHIFT_W16( + supGain, + tmp16no1) + ); + } else + { + // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)WEBRTC_SPL_RSHIFT_W32( + aecm->echoFilt[i], + tmp16no1), + (uint16_t)supGain); + } + } + + zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); + if ((zeros16 < (aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld)) + & (aecm->nearFilt[i])) + { + tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], zeros16); + qDomainDiff = zeros16 - aecm->dfaCleanQDomain + aecm->dfaCleanQDomainOld; + } else + { + tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], + aecm->dfaCleanQDomain - + aecm->dfaCleanQDomainOld); + qDomainDiff = 0; + } + tmp16no2 = WEBRTC_SPL_SHIFT_W16(ptrDfaClean[i], qDomainDiff); + tmp32no1 = (int32_t)(tmp16no2 - tmp16no1); + tmp16no2 = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 4); + tmp16no2 += tmp16no1; + zeros16 = WebRtcSpl_NormW16(tmp16no2); + if ((tmp16no2) & (-qDomainDiff > zeros16)) + { + aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; + } else + { + aecm->nearFilt[i] = WEBRTC_SPL_SHIFT_W16(tmp16no2, -qDomainDiff); + } + + // Wiener filter coefficients, resulting hnl in Q14 + if (echoEst32Gained == 0) + { + hnl[i] = ONE_Q14; + } else if (aecm->nearFilt[i] == 0) + { + hnl[i] = 0; + } else + { + // Multiply the suppression gain + // Rounding + echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1); + tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained, + (uint16_t)aecm->nearFilt[i]); + + // Current resolution is + // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN- max(0,17-zeros16- zeros32)) + // Make sure we are in Q14 + tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); + if (tmp32no1 > ONE_Q14) + { + hnl[i] = 0; + } else if (tmp32no1 < 0) + { + hnl[i] = ONE_Q14; + } else + { + // 1-echoEst/dfa + hnl[i] = ONE_Q14 - (int16_t)tmp32no1; + if (hnl[i] < 0) + { + hnl[i] = 0; + } + } + } + if (hnl[i]) + { + numPosCoef++; + } + } + // Only in wideband. Prevent the gain in upper band from being larger than + // in lower band. + if (aecm->mult == 2) + { + // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause + // speech distortion in double-talk. + for (i = 0; i < PART_LEN1; i++) + { + hnl[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(hnl[i], hnl[i], 14); + } + + for (i = kMinPrefBand; i <= kMaxPrefBand; i++) + { + avgHnl32 += (int32_t)hnl[i]; + } + assert(kMaxPrefBand - kMinPrefBand + 1 > 0); + avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1); + + for (i = kMaxPrefBand; i < PART_LEN1; i++) + { + if (hnl[i] > (int16_t)avgHnl32) + { + hnl[i] = (int16_t)avgHnl32; + } + } + } + + // Calculate NLP gain, result is in Q14 + if (aecm->nlpFlag) + { + for (i = 0; i < PART_LEN1; i++) + { + // Truncate values close to zero and one. + if (hnl[i] > NLP_COMP_HIGH) + { + hnl[i] = ONE_Q14; + } else if (hnl[i] < NLP_COMP_LOW) + { + hnl[i] = 0; + } + + // Remove outliers + if (numPosCoef < 3) + { + nlpGain = 0; + } else + { + nlpGain = ONE_Q14; + } + + // NLP + if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14)) + { + hnl[i] = ONE_Q14; + } else + { + hnl[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(hnl[i], nlpGain, 14); + } + + // multiply with Wiener coefficients + efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, + hnl[i], 14)); + efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, + hnl[i], 14)); + } + } + else + { + // multiply with Wiener coefficients + for (i = 0; i < PART_LEN1; i++) + { + efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, + hnl[i], 14)); + efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, + hnl[i], 14)); + } + } + + if (aecm->cngMode == AecmTrue) + { + ComfortNoise(aecm, ptrDfaClean, efw, hnl); + } + + InverseFFTAndWindow(aecm, fft, efw, output, nearendClean); + + return 0; +} + + +static void ComfortNoise(AecmCore_t* aecm, + const uint16_t* dfa, + complex16_t* out, + const int16_t* lambda) +{ + int16_t i; + int16_t tmp16; + int32_t tmp32; + + int16_t randW16[PART_LEN]; + int16_t uReal[PART_LEN1]; + int16_t uImag[PART_LEN1]; + int32_t outLShift32; + int16_t noiseRShift16[PART_LEN1]; + + int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain; + int16_t minTrackShift; + + assert(shiftFromNearToNoise >= 0); + assert(shiftFromNearToNoise < 16); + + if (aecm->noiseEstCtr < 100) + { + // Track the minimum more quickly initially. + aecm->noiseEstCtr++; + minTrackShift = 6; + } else + { + minTrackShift = 9; + } + + // Estimate noise power. + for (i = 0; i < PART_LEN1; i++) + { + // Shift to the noise domain. + tmp32 = (int32_t)dfa[i]; + outLShift32 = WEBRTC_SPL_LSHIFT_W32(tmp32, shiftFromNearToNoise); + + if (outLShift32 < aecm->noiseEst[i]) + { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i] = 0; + // Track the minimum. + if (aecm->noiseEst[i] < (1 << minTrackShift)) + { + // For small values, decrease noiseEst[i] every + // |kNoiseEstIncCount| block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i]++; + if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) + { + aecm->noiseEst[i]--; + aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter + } + } + else + { + aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32) + >> minTrackShift); + } + } else + { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((aecm->noiseEst[i] >> 19) > 0) + { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + aecm->noiseEst[i] >>= 11; + aecm->noiseEst[i] *= 2049; + } + else if ((aecm->noiseEst[i] >> 11) > 0) + { + // Large enough for relative increase + aecm->noiseEst[i] *= 2049; + aecm->noiseEst[i] >>= 11; + } + else + { + // Make incremental increases based on size every + // |kNoiseEstIncCount| block + aecm->noiseEstTooLowCtr[i]++; + if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) + { + aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1; + aecm->noiseEstTooLowCtr[i] = 0; // Reset counter + } + } + } + } + + for (i = 0; i < PART_LEN1; i++) + { + tmp32 = WEBRTC_SPL_RSHIFT_W32(aecm->noiseEst[i], shiftFromNearToNoise); + if (tmp32 > 32767) + { + tmp32 = 32767; + aecm->noiseEst[i] = WEBRTC_SPL_LSHIFT_W32(tmp32, shiftFromNearToNoise); + } + noiseRShift16[i] = (int16_t)tmp32; + + tmp16 = ONE_Q14 - lambda[i]; + noiseRShift16[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16, + noiseRShift16[i], + 14); + } + + // Generate a uniform random array on [0 2^15-1]. + WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); + + // Generate noise according to estimated energy. + uReal[0] = 0; // Reject LF noise. + uImag[0] = 0; + for (i = 1; i < PART_LEN1; i++) + { + // Get a random index for the cos and sin tables over [0 359]. + tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(359, randW16[i - 1], 15); + + // Tables are in Q13. + uReal[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(noiseRShift16[i], + WebRtcAecm_kCosTable[tmp16], + 13); + uImag[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(-noiseRShift16[i], + WebRtcAecm_kSinTable[tmp16], + 13); + } + uImag[PART_LEN] = 0; + + for (i = 0; i < PART_LEN1; i++) + { + out[i].real = WEBRTC_SPL_ADD_SAT_W16(out[i].real, uReal[i]); + out[i].imag = WEBRTC_SPL_ADD_SAT_W16(out[i].imag, uImag[i]); + } +} + diff --git a/webrtc/modules/audio_processing/aecm/aecm_core_mips.c b/webrtc/modules/audio_processing/aecm/aecm_core_mips.c new file mode 100644 index 000000000..6a231b384 --- /dev/null +++ b/webrtc/modules/audio_processing/aecm/aecm_core_mips.c @@ -0,0 +1,1571 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/aecm/aecm_core.h" + +#include + +#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" +#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" + +static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { + 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, + 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, + 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040, + 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, + 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, + 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079, + 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, + 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384 +}; + +static const int16_t kNoiseEstQDomain = 15; +static const int16_t kNoiseEstIncCount = 5; + +static int16_t coefTable[] = { + 0, 4, 256, 260, 128, 132, 384, 388, + 64, 68, 320, 324, 192, 196, 448, 452, + 32, 36, 288, 292, 160, 164, 416, 420, + 96, 100, 352, 356, 224, 228, 480, 484, + 16, 20, 272, 276, 144, 148, 400, 404, + 80, 84, 336, 340, 208, 212, 464, 468, + 48, 52, 304, 308, 176, 180, 432, 436, + 112, 116, 368, 372, 240, 244, 496, 500, + 8, 12, 264, 268, 136, 140, 392, 396, + 72, 76, 328, 332, 200, 204, 456, 460, + 40, 44, 296, 300, 168, 172, 424, 428, + 104, 108, 360, 364, 232, 236, 488, 492, + 24, 28, 280, 284, 152, 156, 408, 412, + 88, 92, 344, 348, 216, 220, 472, 476, + 56, 60, 312, 316, 184, 188, 440, 444, + 120, 124, 376, 380, 248, 252, 504, 508 +}; + +static int16_t coefTable_ifft[] = { + 0, 512, 256, 508, 128, 252, 384, 380, + 64, 124, 320, 444, 192, 188, 448, 316, + 32, 60, 288, 476, 160, 220, 416, 348, + 96, 92, 352, 412, 224, 156, 480, 284, + 16, 28, 272, 492, 144, 236, 400, 364, + 80, 108, 336, 428, 208, 172, 464, 300, + 48, 44, 304, 460, 176, 204, 432, 332, + 112, 76, 368, 396, 240, 140, 496, 268, + 8, 12, 264, 500, 136, 244, 392, 372, + 72, 116, 328, 436, 200, 180, 456, 308, + 40, 52, 296, 468, 168, 212, 424, 340, + 104, 84, 360, 404, 232, 148, 488, 276, + 24, 20, 280, 484, 152, 228, 408, 356, + 88, 100, 344, 420, 216, 164, 472, 292, + 56, 36, 312, 452, 184, 196, 440, 324, + 120, 68, 376, 388, 248, 132, 504, 260 +}; + +static void ComfortNoise(AecmCore_t* aecm, + const uint16_t* dfa, + complex16_t* out, + const int16_t* lambda); + +static void WindowAndFFT(AecmCore_t* aecm, + int16_t* fft, + const int16_t* time_signal, + complex16_t* freq_signal, + int time_signal_scaling) { + int i, j; + int32_t tmp1, tmp2, tmp3, tmp4; + int16_t* pfrfi; + complex16_t* pfreq_signal; + int16_t f_coef, s_coef; + int32_t load_ptr, store_ptr1, store_ptr2, shift, shift1; + int32_t hann, hann1, coefs; + + memset(fft, 0, sizeof(int16_t) * PART_LEN4); + + // FFT of signal + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[shift], %[time_signal_scaling], -14 \n\t" + "addiu %[i], $zero, 64 \n\t" + "addiu %[load_ptr], %[time_signal], 0 \n\t" + "addiu %[hann], %[hanning], 0 \n\t" + "addiu %[hann1], %[hanning], 128 \n\t" + "addiu %[coefs], %[coefTable], 0 \n\t" + "bltz %[shift], 2f \n\t" + " negu %[shift1], %[shift] \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[load_ptr]) \n\t" + "lh %[tmp2], 0(%[hann]) \n\t" + "lh %[tmp3], 128(%[load_ptr]) \n\t" + "lh %[tmp4], 0(%[hann1]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "lh %[f_coef], 0(%[coefs]) \n\t" + "lh %[s_coef], 2(%[coefs]) \n\t" + "addiu %[load_ptr], %[load_ptr], 2 \n\t" + "addiu %[hann], %[hann], 2 \n\t" + "addiu %[hann1], %[hann1], -2 \n\t" + "addu %[store_ptr1], %[fft], %[f_coef] \n\t" + "addu %[store_ptr2], %[fft], %[s_coef] \n\t" + "sllv %[tmp1], %[tmp1], %[shift] \n\t" + "sllv %[tmp3], %[tmp3], %[shift] \n\t" + "sh %[tmp1], 0(%[store_ptr1]) \n\t" + "sh %[tmp3], 0(%[store_ptr2]) \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[coefs], %[coefs], 4 \n\t" + "b 3f \n\t" + " nop \n\t" + "2: \n\t" + "lh %[tmp1], 0(%[load_ptr]) \n\t" + "lh %[tmp2], 0(%[hann]) \n\t" + "lh %[tmp3], 128(%[load_ptr]) \n\t" + "lh %[tmp4], 0(%[hann1]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "lh %[f_coef], 0(%[coefs]) \n\t" + "lh %[s_coef], 2(%[coefs]) \n\t" + "addiu %[load_ptr], %[load_ptr], 2 \n\t" + "addiu %[hann], %[hann], 2 \n\t" + "addiu %[hann1], %[hann1], -2 \n\t" + "addu %[store_ptr1], %[fft], %[f_coef] \n\t" + "addu %[store_ptr2], %[fft], %[s_coef] \n\t" + "srav %[tmp1], %[tmp1], %[shift1] \n\t" + "srav %[tmp3], %[tmp3], %[shift1] \n\t" + "sh %[tmp1], 0(%[store_ptr1]) \n\t" + "sh %[tmp3], 0(%[store_ptr2]) \n\t" + "bgtz %[i], 2b \n\t" + " addiu %[coefs], %[coefs], 4 \n\t" + "3: \n\t" + ".set pop \n\t" + : [load_ptr] "=&r" (load_ptr), [shift] "=&r" (shift), [hann] "=&r" (hann), + [hann1] "=&r" (hann1), [shift1] "=&r" (shift1), [coefs] "=&r" (coefs), + [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [tmp4] "=&r" (tmp4), [i] "=&r" (i), [f_coef] "=&r" (f_coef), + [s_coef] "=&r" (s_coef), [store_ptr1] "=&r" (store_ptr1), + [store_ptr2] "=&r" (store_ptr2) + : [time_signal] "r" (time_signal), [coefTable] "r" (coefTable), + [time_signal_scaling] "r" (time_signal_scaling), + [hanning] "r" (WebRtcAecm_kSqrtHanning), [fft] "r" (fft) + : "memory", "hi", "lo" + ); + + WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); + pfrfi = fft; + pfreq_signal = freq_signal; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[j], $zero, 128 \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[pfrfi]) \n\t" + "lh %[tmp2], 2(%[pfrfi]) \n\t" + "lh %[tmp3], 4(%[pfrfi]) \n\t" + "lh %[tmp4], 6(%[pfrfi]) \n\t" + "subu %[tmp2], $zero, %[tmp2] \n\t" + "sh %[tmp1], 0(%[pfreq_signal]) \n\t" + "sh %[tmp2], 2(%[pfreq_signal]) \n\t" + "subu %[tmp4], $zero, %[tmp4] \n\t" + "sh %[tmp3], 4(%[pfreq_signal]) \n\t" + "sh %[tmp4], 6(%[pfreq_signal]) \n\t" + "lh %[tmp1], 8(%[pfrfi]) \n\t" + "lh %[tmp2], 10(%[pfrfi]) \n\t" + "lh %[tmp3], 12(%[pfrfi]) \n\t" + "lh %[tmp4], 14(%[pfrfi]) \n\t" + "addiu %[j], %[j], -8 \n\t" + "subu %[tmp2], $zero, %[tmp2] \n\t" + "sh %[tmp1], 8(%[pfreq_signal]) \n\t" + "sh %[tmp2], 10(%[pfreq_signal]) \n\t" + "subu %[tmp4], $zero, %[tmp4] \n\t" + "sh %[tmp3], 12(%[pfreq_signal]) \n\t" + "sh %[tmp4], 14(%[pfreq_signal]) \n\t" + "addiu %[pfreq_signal], %[pfreq_signal], 16 \n\t" + "bgtz %[j], 1b \n\t" + " addiu %[pfrfi], %[pfrfi], 16 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [j] "=&r" (j), [pfrfi] "+r" (pfrfi), [pfreq_signal] "+r" (pfreq_signal), + [tmp4] "=&r" (tmp4) + : + : "memory" + ); +} + +static void InverseFFTAndWindow(AecmCore_t* aecm, + int16_t* fft, + complex16_t* efw, + int16_t* output, + const int16_t* nearendClean) { + int i, outCFFT; + int32_t tmp1, tmp2, tmp3, tmp4, tmp_re, tmp_im; + int16_t* pcoefTable_ifft = coefTable_ifft; + int16_t* pfft = fft; + int16_t* ppfft = fft; + complex16_t* pefw = efw; + int32_t out_aecm; + int16_t* paecm_buf = aecm->outBuf; + const int16_t* p_kSqrtHanning = WebRtcAecm_kSqrtHanning; + const int16_t* pp_kSqrtHanning = &WebRtcAecm_kSqrtHanning[PART_LEN]; + int16_t* output1 = output; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], $zero, 64 \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[pcoefTable_ifft]) \n\t" + "lh %[tmp2], 2(%[pcoefTable_ifft]) \n\t" + "lh %[tmp_re], 0(%[pefw]) \n\t" + "lh %[tmp_im], 2(%[pefw]) \n\t" + "addu %[pfft], %[fft], %[tmp2] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addu %[pfft], %[fft], %[tmp1] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "subu %[tmp_im], $zero, %[tmp_im] \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "lh %[tmp1], 4(%[pcoefTable_ifft]) \n\t" + "lh %[tmp2], 6(%[pcoefTable_ifft]) \n\t" + "lh %[tmp_re], 4(%[pefw]) \n\t" + "lh %[tmp_im], 6(%[pefw]) \n\t" + "addu %[pfft], %[fft], %[tmp2] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addu %[pfft], %[fft], %[tmp1] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "subu %[tmp_im], $zero, %[tmp_im] \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "lh %[tmp1], 8(%[pcoefTable_ifft]) \n\t" + "lh %[tmp2], 10(%[pcoefTable_ifft]) \n\t" + "lh %[tmp_re], 8(%[pefw]) \n\t" + "lh %[tmp_im], 10(%[pefw]) \n\t" + "addu %[pfft], %[fft], %[tmp2] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addu %[pfft], %[fft], %[tmp1] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "subu %[tmp_im], $zero, %[tmp_im] \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "lh %[tmp1], 12(%[pcoefTable_ifft]) \n\t" + "lh %[tmp2], 14(%[pcoefTable_ifft]) \n\t" + "lh %[tmp_re], 12(%[pefw]) \n\t" + "lh %[tmp_im], 14(%[pefw]) \n\t" + "addu %[pfft], %[fft], %[tmp2] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addu %[pfft], %[fft], %[tmp1] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "subu %[tmp_im], $zero, %[tmp_im] \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addiu %[pcoefTable_ifft], %[pcoefTable_ifft], 16 \n\t" + "addiu %[i], %[i], -4 \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[pefw], %[pefw], 16 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), + [i] "=&r" (i), [tmp_re] "=&r" (tmp_re), [tmp_im] "=&r" (tmp_im), + [pefw] "+r" (pefw), [pcoefTable_ifft] "+r" (pcoefTable_ifft), + [fft] "+r" (fft) + : + : "memory" + ); + + fft[2] = efw[PART_LEN].real; + fft[3] = -efw[PART_LEN].imag; + + outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1); + pfft = fft; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], $zero, 128 \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[ppfft]) \n\t" + "lh %[tmp2], 4(%[ppfft]) \n\t" + "lh %[tmp3], 8(%[ppfft]) \n\t" + "lh %[tmp4], 12(%[ppfft]) \n\t" + "addiu %[i], %[i], -4 \n\t" + "sh %[tmp1], 0(%[pfft]) \n\t" + "sh %[tmp2], 2(%[pfft]) \n\t" + "sh %[tmp3], 4(%[pfft]) \n\t" + "sh %[tmp4], 6(%[pfft]) \n\t" + "addiu %[ppfft], %[ppfft], 16 \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[pfft], %[pfft], 8 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), + [i] "=&r" (i), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4), + [ppfft] "+r" (ppfft) + : + : "memory" + ); + + pfft = fft; + out_aecm = (int32_t)(outCFFT - aecm->dfaCleanQDomain); + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], $zero, 64 \n\t" + "11: \n\t" + "lh %[tmp1], 0(%[pfft]) \n\t" + "lh %[tmp2], 0(%[p_kSqrtHanning]) \n\t" + "addiu %[i], %[i], -2 \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "lh %[tmp3], 2(%[pfft]) \n\t" + "lh %[tmp4], 2(%[p_kSqrtHanning]) \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "addiu %[tmp1], %[tmp1], 8192 \n\t" + "sra %[tmp1], %[tmp1], 14 \n\t" + "addiu %[tmp3], %[tmp3], 8192 \n\t" + "sra %[tmp3], %[tmp3], 14 \n\t" + "bgez %[out_aecm], 1f \n\t" + " negu %[tmp2], %[out_aecm] \n\t" + "srav %[tmp1], %[tmp1], %[tmp2] \n\t" + "b 2f \n\t" + " srav %[tmp3], %[tmp3], %[tmp2] \n\t" + "1: \n\t" + "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t" + "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t" + "2: \n\t" + "lh %[tmp4], 0(%[paecm_buf]) \n\t" + "lh %[tmp2], 2(%[paecm_buf]) \n\t" + "addu %[tmp3], %[tmp3], %[tmp2] \n\t" + "addu %[tmp1], %[tmp1], %[tmp4] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[tmp1], %[tmp1], 16 \n\t" + "sra %[tmp1], %[tmp1], 16 \n\t" + "shll_s.w %[tmp3], %[tmp3], 16 \n\t" + "sra %[tmp3], %[tmp3], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sra %[tmp4], %[tmp1], 31 \n\t" + "sra %[tmp2], %[tmp1], 15 \n\t" + "beq %[tmp4], %[tmp2], 3f \n\t" + " ori %[tmp2], $zero, 0x7fff \n\t" + "xor %[tmp1], %[tmp2], %[tmp4] \n\t" + "3: \n\t" + "sra %[tmp2], %[tmp3], 31 \n\t" + "sra %[tmp4], %[tmp3], 15 \n\t" + "beq %[tmp2], %[tmp4], 4f \n\t" + " ori %[tmp4], $zero, 0x7fff \n\t" + "xor %[tmp3], %[tmp4], %[tmp2] \n\t" + "4: \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sh %[tmp1], 0(%[pfft]) \n\t" + "sh %[tmp1], 0(%[output1]) \n\t" + "sh %[tmp3], 2(%[pfft]) \n\t" + "sh %[tmp3], 2(%[output1]) \n\t" + "lh %[tmp1], 128(%[pfft]) \n\t" + "lh %[tmp2], 0(%[pp_kSqrtHanning]) \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "lh %[tmp3], 130(%[pfft]) \n\t" + "lh %[tmp4], -2(%[pp_kSqrtHanning]) \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "sra %[tmp1], %[tmp1], 14 \n\t" + "sra %[tmp3], %[tmp3], 14 \n\t" + "bgez %[out_aecm], 5f \n\t" + " negu %[tmp2], %[out_aecm] \n\t" + "srav %[tmp3], %[tmp3], %[tmp2] \n\t" + "b 6f \n\t" + " srav %[tmp1], %[tmp1], %[tmp2] \n\t" + "5: \n\t" + "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t" + "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t" + "6: \n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[tmp1], %[tmp1], 16 \n\t" + "sra %[tmp1], %[tmp1], 16 \n\t" + "shll_s.w %[tmp3], %[tmp3], 16 \n\t" + "sra %[tmp3], %[tmp3], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sra %[tmp4], %[tmp1], 31 \n\t" + "sra %[tmp2], %[tmp1], 15 \n\t" + "beq %[tmp4], %[tmp2], 7f \n\t" + " ori %[tmp2], $zero, 0x7fff \n\t" + "xor %[tmp1], %[tmp2], %[tmp4] \n\t" + "7: \n\t" + "sra %[tmp2], %[tmp3], 31 \n\t" + "sra %[tmp4], %[tmp3], 15 \n\t" + "beq %[tmp2], %[tmp4], 8f \n\t" + " ori %[tmp4], $zero, 0x7fff \n\t" + "xor %[tmp3], %[tmp4], %[tmp2] \n\t" + "8: \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sh %[tmp1], 0(%[paecm_buf]) \n\t" + "sh %[tmp3], 2(%[paecm_buf]) \n\t" + "addiu %[output1], %[output1], 4 \n\t" + "addiu %[paecm_buf], %[paecm_buf], 4 \n\t" + "addiu %[pfft], %[pfft], 4 \n\t" + "addiu %[p_kSqrtHanning], %[p_kSqrtHanning], 4 \n\t" + "bgtz %[i], 11b \n\t" + " addiu %[pp_kSqrtHanning], %[pp_kSqrtHanning], -4 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), + [output1] "+r" (output1), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4), + [paecm_buf] "+r" (paecm_buf), [i] "=&r" (i), + [pp_kSqrtHanning] "+r" (pp_kSqrtHanning), + [p_kSqrtHanning] "+r" (p_kSqrtHanning) + : [out_aecm] "r" (out_aecm), + [WebRtcAecm_kSqrtHanning] "r" (WebRtcAecm_kSqrtHanning) + : "hi", "lo","memory" + ); + + // Copy the current block to the old position + // (aecm->outBuf is shifted elsewhere) + memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy, + aecm->dBufNoisy + PART_LEN, + sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) { + memcpy(aecm->dBufClean, + aecm->dBufClean + PART_LEN, + sizeof(int16_t) * PART_LEN); + } +} + +void WebRtcAecm_CalcLinearEnergies_mips(AecmCore_t* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored) { + int i; + uint32_t par1 = (*far_energy); + uint32_t par2 = (*echo_energy_adapt); + uint32_t par3 = (*echo_energy_stored); + int16_t* ch_stored_p = &(aecm->channelStored[0]); + int16_t* ch_adapt_p = &(aecm->channelAdapt16[0]); + uint16_t* spectrum_p = (uint16_t*)(&(far_spectrum[0])); + int32_t* echo_p = &(echo_est[0]); + int32_t temp0, stored0, echo0, adept0, spectrum0; + int32_t stored1, adept1, spectrum1, echo1, temp1; + + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + for (i = 0; i < PART_LEN; i+= 4) { + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[stored0], 0(%[ch_stored_p]) \n\t" + "lhu %[adept0], 0(%[ch_adapt_p]) \n\t" + "lhu %[spectrum0], 0(%[spectrum_p]) \n\t" + "lh %[stored1], 2(%[ch_stored_p]) \n\t" + "lhu %[adept1], 2(%[ch_adapt_p]) \n\t" + "lhu %[spectrum1], 2(%[spectrum_p]) \n\t" + "mul %[echo0], %[stored0], %[spectrum0] \n\t" + "mul %[temp0], %[adept0], %[spectrum0] \n\t" + "mul %[echo1], %[stored1], %[spectrum1] \n\t" + "mul %[temp1], %[adept1], %[spectrum1] \n\t" + "addu %[par1], %[par1], %[spectrum0] \n\t" + "addu %[par1], %[par1], %[spectrum1] \n\t" + "addiu %[echo_p], %[echo_p], 16 \n\t" + "addu %[par3], %[par3], %[echo0] \n\t" + "addu %[par2], %[par2], %[temp0] \n\t" + "addu %[par3], %[par3], %[echo1] \n\t" + "addu %[par2], %[par2], %[temp1] \n\t" + "usw %[echo0], -16(%[echo_p]) \n\t" + "usw %[echo1], -12(%[echo_p]) \n\t" + "lh %[stored0], 4(%[ch_stored_p]) \n\t" + "lhu %[adept0], 4(%[ch_adapt_p]) \n\t" + "lhu %[spectrum0], 4(%[spectrum_p]) \n\t" + "lh %[stored1], 6(%[ch_stored_p]) \n\t" + "lhu %[adept1], 6(%[ch_adapt_p]) \n\t" + "lhu %[spectrum1], 6(%[spectrum_p]) \n\t" + "mul %[echo0], %[stored0], %[spectrum0] \n\t" + "mul %[temp0], %[adept0], %[spectrum0] \n\t" + "mul %[echo1], %[stored1], %[spectrum1] \n\t" + "mul %[temp1], %[adept1], %[spectrum1] \n\t" + "addu %[par1], %[par1], %[spectrum0] \n\t" + "addu %[par1], %[par1], %[spectrum1] \n\t" + "addiu %[ch_stored_p], %[ch_stored_p], 8 \n\t" + "addiu %[ch_adapt_p], %[ch_adapt_p], 8 \n\t" + "addiu %[spectrum_p], %[spectrum_p], 8 \n\t" + "addu %[par3], %[par3], %[echo0] \n\t" + "addu %[par2], %[par2], %[temp0] \n\t" + "addu %[par3], %[par3], %[echo1] \n\t" + "addu %[par2], %[par2], %[temp1] \n\t" + "usw %[echo0], -8(%[echo_p]) \n\t" + "usw %[echo1], -4(%[echo_p]) \n\t" + ".set pop \n\t" + : [temp0] "=&r" (temp0), [stored0] "=&r" (stored0), + [adept0] "=&r" (adept0), [spectrum0] "=&r" (spectrum0), + [echo0] "=&r" (echo0), [echo_p] "+r" (echo_p), [par3] "+r" (par3), + [par1] "+r" (par1), [par2] "+r" (par2), [stored1] "=&r" (stored1), + [adept1] "=&r" (adept1), [echo1] "=&r" (echo1), + [spectrum1] "=&r" (spectrum1), [temp1] "=&r" (temp1), + [ch_stored_p] "+r" (ch_stored_p), [ch_adapt_p] "+r" (ch_adapt_p), + [spectrum_p] "+r" (spectrum_p) + : + : "hi", "lo", "memory" + ); + } + + echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], + far_spectrum[PART_LEN]); + par1 += (uint32_t)(far_spectrum[PART_LEN]); + par2 += WEBRTC_SPL_UMUL_16_16(aecm->channelAdapt16[PART_LEN], + far_spectrum[PART_LEN]); + par3 += (uint32_t)echo_est[PART_LEN]; + + (*far_energy) = par1; + (*echo_energy_adapt) = par2; + (*echo_energy_stored) = par3; +} + +#if defined(MIPS_DSP_R1_LE) +void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore_t* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est) { + int i; + int16_t* temp1; + uint16_t* temp8; + int32_t temp0, temp2, temp3, temp4, temp5, temp6; + int32_t* temp7 = &(echo_est[0]); + temp1 = &(aecm->channelStored[0]); + temp8 = (uint16_t*)(&far_spectrum[0]); + + // During startup we store the channel every block. + memcpy(aecm->channelStored, aecm->channelAdapt16, + sizeof(int16_t) * PART_LEN1); + // Recalculate echo estimate + for (i = 0; i < PART_LEN; i += 4) { + __asm __volatile ( + "ulw %[temp0], 0(%[temp8]) \n\t" + "ulw %[temp2], 0(%[temp1]) \n\t" + "ulw %[temp4], 4(%[temp8]) \n\t" + "ulw %[temp5], 4(%[temp1]) \n\t" + "muleq_s.w.phl %[temp3], %[temp2], %[temp0] \n\t" + "muleq_s.w.phr %[temp0], %[temp2], %[temp0] \n\t" + "muleq_s.w.phl %[temp6], %[temp5], %[temp4] \n\t" + "muleq_s.w.phr %[temp4], %[temp5], %[temp4] \n\t" + "addiu %[temp7], %[temp7], 16 \n\t" + "addiu %[temp1], %[temp1], 8 \n\t" + "addiu %[temp8], %[temp8], 8 \n\t" + "sra %[temp3], %[temp3], 1 \n\t" + "sra %[temp0], %[temp0], 1 \n\t" + "sra %[temp6], %[temp6], 1 \n\t" + "sra %[temp4], %[temp4], 1 \n\t" + "usw %[temp3], -12(%[temp7]) \n\t" + "usw %[temp0], -16(%[temp7]) \n\t" + "usw %[temp6], -4(%[temp7]) \n\t" + "usw %[temp4], -8(%[temp7]) \n\t" + : [temp0] "=&r" (temp0), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6), + [temp1] "+r" (temp1), [temp8] "+r" (temp8), [temp7] "+r" (temp7) + : + : "hi", "lo", "memory" + ); + } + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); +} + +void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore_t* aecm) { + int i; + int32_t* temp3; + int16_t* temp0; + int32_t temp1, temp2, temp4, temp5; + + temp0 = &(aecm->channelStored[0]); + temp3 = &(aecm->channelAdapt32[0]); + + // The stored channel has a significantly lower MSE than the adaptive one for + // two consecutive calculations. Reset the adaptive channel. + memcpy(aecm->channelAdapt16, + aecm->channelStored, + sizeof(int16_t) * PART_LEN1); + + // Restore the W32 channel + for (i = 0; i < PART_LEN; i += 4) { + __asm __volatile ( + "ulw %[temp1], 0(%[temp0]) \n\t" + "ulw %[temp4], 4(%[temp0]) \n\t" + "preceq.w.phl %[temp2], %[temp1] \n\t" + "preceq.w.phr %[temp1], %[temp1] \n\t" + "preceq.w.phl %[temp5], %[temp4] \n\t" + "preceq.w.phr %[temp4], %[temp4] \n\t" + "addiu %[temp0], %[temp0], 8 \n\t" + "usw %[temp2], 4(%[temp3]) \n\t" + "usw %[temp1], 0(%[temp3]) \n\t" + "usw %[temp5], 12(%[temp3]) \n\t" + "usw %[temp4], 8(%[temp3]) \n\t" + "addiu %[temp3], %[temp3], 16 \n\t" + : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), + [temp3] "+r" (temp3), [temp0] "+r" (temp0) + : + : "memory" + ); + } + + aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32( + (int32_t)aecm->channelStored[i], 16); +} +#endif // #if defined(MIPS_DSP_R1_LE) + +// Transforms a time domain signal into the frequency domain, outputting the +// complex valued signal, absolute value and sum of absolute values. +// +// time_signal [in] Pointer to time domain signal +// freq_signal_real [out] Pointer to real part of frequency domain array +// freq_signal_imag [out] Pointer to imaginary part of frequency domain +// array +// freq_signal_abs [out] Pointer to absolute value of frequency domain +// array +// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in +// the frequency domain array +// return value The Q-domain of current frequency values +// +static int TimeToFrequencyDomain(AecmCore_t* aecm, + const int16_t* time_signal, + complex16_t* freq_signal, + uint16_t* freq_signal_abs, + uint32_t* freq_signal_sum_abs) +{ + int i = 0; + int time_signal_scaling = 0; + + // In fft_buf, +16 for 32-byte alignment. + int16_t fft_buf[PART_LEN4 + 16]; + int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31); + + int16_t tmp16no1; +#if !defined(MIPS_DSP_R2_LE) + int32_t tmp32no1; + int32_t tmp32no2; + int16_t tmp16no2; +#else + int32_t tmp32no10, tmp32no11, tmp32no12, tmp32no13; + int32_t tmp32no20, tmp32no21, tmp32no22, tmp32no23; + int16_t* freqp; + uint16_t* freqabsp; + uint32_t freqt0, freqt1, freqt2, freqt3; + uint32_t freqs; +#endif + +#ifdef AECM_DYNAMIC_Q + tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); + time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); +#endif + + WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling); + + // Extract imaginary and real part, + // calculate the magnitude for all frequency bins + freq_signal[0].imag = 0; + freq_signal[PART_LEN].imag = 0; + freq_signal[PART_LEN].real = fft[PART_LEN2]; + freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real); + freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16( + freq_signal[PART_LEN].real); + (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) + + (uint32_t)(freq_signal_abs[PART_LEN]); + +#if !defined(MIPS_DSP_R2_LE) + for (i = 1; i < PART_LEN; i++) { + if (freq_signal[i].real == 0) + { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16( + freq_signal[i].imag); + } + else if (freq_signal[i].imag == 0) + { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16( + freq_signal[i].real); + } + else + { + // Approximation for magnitude of complex fft output + // magn = sqrt(real^2 + imag^2) + // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) + // + // The parameters alpha and beta are stored in Q15 + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1); + tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2); + tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2); + tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2); + + freq_signal_abs[i] = (uint16_t)tmp32no1; + } + (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i]; + } +#else // #if !defined(MIPS_DSP_R2_LE) + freqs = (uint32_t)(freq_signal_abs[0]) + + (uint32_t)(freq_signal_abs[PART_LEN]); + freqp = &(freq_signal[1].real); + + __asm __volatile ( + "lw %[freqt0], 0(%[freqp]) \n\t" + "lw %[freqt1], 4(%[freqp]) \n\t" + "lw %[freqt2], 8(%[freqp]) \n\t" + "mult $ac0, $zero, $zero \n\t" + "mult $ac1, $zero, $zero \n\t" + "mult $ac2, $zero, $zero \n\t" + "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t" + "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t" + "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t" + "addiu %[freqp], %[freqp], 12 \n\t" + "extr.w %[tmp32no20], $ac0, 1 \n\t" + "extr.w %[tmp32no21], $ac1, 1 \n\t" + "extr.w %[tmp32no22], $ac2, 1 \n\t" + : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1), + [freqt2] "=&r" (freqt2), [freqp] "+r" (freqp), + [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21), + [tmp32no22] "=r" (tmp32no22) + : + : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo" + ); + + tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20); + tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21); + tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22); + freq_signal_abs[1] = (uint16_t)tmp32no10; + freq_signal_abs[2] = (uint16_t)tmp32no11; + freq_signal_abs[3] = (uint16_t)tmp32no12; + freqs += (uint32_t)tmp32no10; + freqs += (uint32_t)tmp32no11; + freqs += (uint32_t)tmp32no12; + freqabsp = &(freq_signal_abs[4]); + for (i = 4; i < PART_LEN; i+=4) + { + __asm __volatile ( + "ulw %[freqt0], 0(%[freqp]) \n\t" + "ulw %[freqt1], 4(%[freqp]) \n\t" + "ulw %[freqt2], 8(%[freqp]) \n\t" + "ulw %[freqt3], 12(%[freqp]) \n\t" + "mult $ac0, $zero, $zero \n\t" + "mult $ac1, $zero, $zero \n\t" + "mult $ac2, $zero, $zero \n\t" + "mult $ac3, $zero, $zero \n\t" + "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t" + "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t" + "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t" + "dpaq_s.w.ph $ac3, %[freqt3], %[freqt3] \n\t" + "addiu %[freqp], %[freqp], 16 \n\t" + "addiu %[freqabsp], %[freqabsp], 8 \n\t" + "extr.w %[tmp32no20], $ac0, 1 \n\t" + "extr.w %[tmp32no21], $ac1, 1 \n\t" + "extr.w %[tmp32no22], $ac2, 1 \n\t" + "extr.w %[tmp32no23], $ac3, 1 \n\t" + : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1), + [freqt2] "=&r" (freqt2), [freqt3] "=&r" (freqt3), + [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21), + [tmp32no22] "=r" (tmp32no22), [tmp32no23] "=r" (tmp32no23), + [freqabsp] "+r" (freqabsp), [freqp] "+r" (freqp) + : + : "memory", "hi", "lo", "$ac1hi", "$ac1lo", + "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo" + ); + + tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20); + tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21); + tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22); + tmp32no13 = WebRtcSpl_SqrtFloor(tmp32no23); + + __asm __volatile ( + "sh %[tmp32no10], -8(%[freqabsp]) \n\t" + "sh %[tmp32no11], -6(%[freqabsp]) \n\t" + "sh %[tmp32no12], -4(%[freqabsp]) \n\t" + "sh %[tmp32no13], -2(%[freqabsp]) \n\t" + "addu %[freqs], %[freqs], %[tmp32no10] \n\t" + "addu %[freqs], %[freqs], %[tmp32no11] \n\t" + "addu %[freqs], %[freqs], %[tmp32no12] \n\t" + "addu %[freqs], %[freqs], %[tmp32no13] \n\t" + : [freqs] "+r" (freqs) + : [tmp32no10] "r" (tmp32no10), [tmp32no11] "r" (tmp32no11), + [tmp32no12] "r" (tmp32no12), [tmp32no13] "r" (tmp32no13), + [freqabsp] "r" (freqabsp) + : "memory" + ); + } + + (*freq_signal_sum_abs) = freqs; +#endif + + return time_signal_scaling; +} + +int WebRtcAecm_ProcessBlock(AecmCore_t* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* output) { + int i; + uint32_t xfaSum; + uint32_t dfaNoisySum; + uint32_t dfaCleanSum; + uint32_t echoEst32Gained; + uint32_t tmpU32; + int32_t tmp32no1; + + uint16_t xfa[PART_LEN1]; + uint16_t dfaNoisy[PART_LEN1]; + uint16_t dfaClean[PART_LEN1]; + uint16_t* ptrDfaClean = dfaClean; + const uint16_t* far_spectrum_ptr = NULL; + + // 32 byte aligned buffers (with +8 or +16). + int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe. + int32_t echoEst32_buf[PART_LEN1 + 8]; + int32_t dfw_buf[PART_LEN2 + 8]; + int32_t efw_buf[PART_LEN2 + 8]; + + int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~ 31); + int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~ 31); + complex16_t* dfw = (complex16_t*)(((uint32_t)dfw_buf + 31) & ~ 31); + complex16_t* efw = (complex16_t*)(((uint32_t)efw_buf + 31) & ~ 31); + + int16_t hnl[PART_LEN1]; + int16_t numPosCoef = 0; + int delay; + int16_t tmp16no1; + int16_t tmp16no2; + int16_t mu; + int16_t supGain; + int16_t zeros32, zeros16; + int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf; + int far_q; + int16_t resolutionDiff, qDomainDiff; + + const int kMinPrefBand = 4; + const int kMaxPrefBand = 24; + int32_t avgHnl32 = 0; + + int32_t temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8; + int16_t* ptr; + int16_t* ptr1; + int16_t* er_ptr; + int16_t* dr_ptr; + + ptr = &hnl[0]; + ptr1 = &hnl[0]; + er_ptr = &efw[0].real; + dr_ptr = &dfw[0].real; + + // Determine startup state. There are three states: + // (0) the first CONV_LEN blocks + // (1) another CONV_LEN blocks + // (2) the rest + + if (aecm->startupState < 2) { + aecm->startupState = (aecm->totCount >= CONV_LEN) + + (aecm->totCount >= CONV_LEN2); + } + // END: Determine startup state + + // Buffer near and far end signals + memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy + PART_LEN, + nearendNoisy, + sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) { + memcpy(aecm->dBufClean + PART_LEN, + nearendClean, + sizeof(int16_t) * PART_LEN); + } + + // Transform far end signal from time domain to frequency domain. + far_q = TimeToFrequencyDomain(aecm, + aecm->xBuf, + dfw, + xfa, + &xfaSum); + + // Transform noisy near end signal from time domain to frequency domain. + zerosDBufNoisy = TimeToFrequencyDomain(aecm, + aecm->dBufNoisy, + dfw, + dfaNoisy, + &dfaNoisySum); + aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; + aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy; + + if (nearendClean == NULL) { + ptrDfaClean = dfaNoisy; + aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; + aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; + dfaCleanSum = dfaNoisySum; + } else { + // Transform clean near end signal from time domain to frequency domain. + zerosDBufClean = TimeToFrequencyDomain(aecm, + aecm->dBufClean, + dfw, + dfaClean, + &dfaCleanSum); + aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; + aecm->dfaCleanQDomain = (int16_t)zerosDBufClean; + } + + // Get the delay + // Save far-end history and estimate delay + WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q); + + if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1, + far_q) == -1) { + return -1; + } + delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, + dfaNoisy, + PART_LEN1, + zerosDBufNoisy); + if (delay == -1) { + return -1; + } + else if (delay == -2) { + // If the delay is unknown, we assume zero. + // NOTE: this will have to be adjusted if we ever add lookahead. + delay = 0; + } + + if (aecm->fixedDelay >= 0) { + // Use fixed delay + delay = aecm->fixedDelay; + } + + // Get aligned far end spectrum + far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay); + zerosXBuf = (int16_t) far_q; + + if (far_spectrum_ptr == NULL) { + return -1; + } + + // Calculate log(energy) and update energy threshold levels + WebRtcAecm_CalcEnergies(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisySum, + echoEst32); + // Calculate stepsize + mu = WebRtcAecm_CalcStepSize(aecm); + + // Update counters + aecm->totCount++; + + // This is the channel estimation algorithm. + // It is base on NLMS but has a variable step length, + // which was calculated above. + WebRtcAecm_UpdateChannel(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisy, + mu, + echoEst32); + + supGain = WebRtcAecm_CalcSuppressionGain(aecm); + + // Calculate Wiener filter hnl[] + for (i = 0; i < PART_LEN1; i++) { + // Far end signal through channel estimate in Q8 + // How much can we shift right to preserve resolution + tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; + aecm->echoFilt[i] += WEBRTC_SPL_RSHIFT_W32( + WEBRTC_SPL_MUL_32_16(tmp32no1, 50), 8); + + zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; + zeros16 = WebRtcSpl_NormW16(supGain) + 1; + if (zeros32 + zeros16 > 16) { + // Multiplication is safe + // Result in + // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff]) + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], + (uint16_t)supGain); + resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + } else { + tmp16no1 = 17 - zeros32 - zeros16; + resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - + RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + if (zeros32 > tmp16no1) { + echoEst32Gained = WEBRTC_SPL_UMUL_32_16( + (uint32_t)aecm->echoFilt[i], + (uint16_t)WEBRTC_SPL_RSHIFT_W16(supGain, tmp16no1)); + } else { + // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) + echoEst32Gained = WEBRTC_SPL_UMUL_32_16( + (uint32_t)WEBRTC_SPL_RSHIFT_W32(aecm->echoFilt[i], + tmp16no1), + (uint16_t)supGain); + } + } + + zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); + if ((zeros16 < (aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld)) + & (aecm->nearFilt[i])) { + tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], zeros16); + qDomainDiff = zeros16 - aecm->dfaCleanQDomain + aecm->dfaCleanQDomainOld; + tmp16no2 = WEBRTC_SPL_SHIFT_W16(ptrDfaClean[i], qDomainDiff); + } else { + tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], + aecm->dfaCleanQDomain + - aecm->dfaCleanQDomainOld); + qDomainDiff = 0; + tmp16no2 = ptrDfaClean[i]; + } + + tmp32no1 = (int32_t)(tmp16no2 - tmp16no1); + tmp16no2 = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 4); + tmp16no2 += tmp16no1; + zeros16 = WebRtcSpl_NormW16(tmp16no2); + if ((tmp16no2) & (-qDomainDiff > zeros16)) { + aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; + } else { + aecm->nearFilt[i] = WEBRTC_SPL_SHIFT_W16(tmp16no2, -qDomainDiff); + } + + // Wiener filter coefficients, resulting hnl in Q14 + if (echoEst32Gained == 0) { + hnl[i] = ONE_Q14; + numPosCoef++; + } else if (aecm->nearFilt[i] == 0) { + hnl[i] = 0; + } else { + // Multiply the suppression gain + // Rounding + echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1); + tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained, + (uint16_t)aecm->nearFilt[i]); + + // Current resolution is + // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN + // - max(0, 17 - zeros16 - zeros32)) + // Make sure we are in Q14 + tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); + if (tmp32no1 > ONE_Q14) { + hnl[i] = 0; + } else if (tmp32no1 < 0) { + hnl[i] = ONE_Q14; + numPosCoef++; + } else { + // 1-echoEst/dfa + hnl[i] = ONE_Q14 - (int16_t)tmp32no1; + if (hnl[i] <= 0) { + hnl[i] = 0; + } else { + numPosCoef++; + } + } + } + } + + // Only in wideband. Prevent the gain in upper band from being larger than + // in lower band. + if (aecm->mult == 2) { + // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause + // speech distortion in double-talk. + for (i = 0; i < (PART_LEN1 >> 3); i++) { + __asm __volatile ( + "lh %[temp1], 0(%[ptr1]) \n\t" + "lh %[temp2], 2(%[ptr1]) \n\t" + "lh %[temp3], 4(%[ptr1]) \n\t" + "lh %[temp4], 6(%[ptr1]) \n\t" + "lh %[temp5], 8(%[ptr1]) \n\t" + "lh %[temp6], 10(%[ptr1]) \n\t" + "lh %[temp7], 12(%[ptr1]) \n\t" + "lh %[temp8], 14(%[ptr1]) \n\t" + "mul %[temp1], %[temp1], %[temp1] \n\t" + "mul %[temp2], %[temp2], %[temp2] \n\t" + "mul %[temp3], %[temp3], %[temp3] \n\t" + "mul %[temp4], %[temp4], %[temp4] \n\t" + "mul %[temp5], %[temp5], %[temp5] \n\t" + "mul %[temp6], %[temp6], %[temp6] \n\t" + "mul %[temp7], %[temp7], %[temp7] \n\t" + "mul %[temp8], %[temp8], %[temp8] \n\t" + "sra %[temp1], %[temp1], 14 \n\t" + "sra %[temp2], %[temp2], 14 \n\t" + "sra %[temp3], %[temp3], 14 \n\t" + "sra %[temp4], %[temp4], 14 \n\t" + "sra %[temp5], %[temp5], 14 \n\t" + "sra %[temp6], %[temp6], 14 \n\t" + "sra %[temp7], %[temp7], 14 \n\t" + "sra %[temp8], %[temp8], 14 \n\t" + "sh %[temp1], 0(%[ptr1]) \n\t" + "sh %[temp2], 2(%[ptr1]) \n\t" + "sh %[temp3], 4(%[ptr1]) \n\t" + "sh %[temp4], 6(%[ptr1]) \n\t" + "sh %[temp5], 8(%[ptr1]) \n\t" + "sh %[temp6], 10(%[ptr1]) \n\t" + "sh %[temp7], 12(%[ptr1]) \n\t" + "sh %[temp8], 14(%[ptr1]) \n\t" + "addiu %[ptr1], %[ptr1], 16 \n\t" + : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6), + [temp7] "=&r" (temp7), [temp8] "=&r" (temp8), [ptr1] "+r" (ptr1) + : + : "memory", "hi", "lo" + ); + } + for(i = 0; i < (PART_LEN1 & 7); i++) { + __asm __volatile ( + "lh %[temp1], 0(%[ptr1]) \n\t" + "mul %[temp1], %[temp1], %[temp1] \n\t" + "sra %[temp1], %[temp1], 14 \n\t" + "sh %[temp1], 0(%[ptr1]) \n\t" + "addiu %[ptr1], %[ptr1], 2 \n\t" + : [temp1] "=&r" (temp1), [ptr1] "+r" (ptr1) + : + : "memory", "hi", "lo" + ); + } + + for (i = kMinPrefBand; i <= kMaxPrefBand; i++) { + avgHnl32 += (int32_t)hnl[i]; + } + + assert(kMaxPrefBand - kMinPrefBand + 1 > 0); + avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1); + + for (i = kMaxPrefBand; i < PART_LEN1; i++) { + if (hnl[i] > (int16_t)avgHnl32) { + hnl[i] = (int16_t)avgHnl32; + } + } + } + + // Calculate NLP gain, result is in Q14 + if (aecm->nlpFlag) { + if (numPosCoef < 3) { + for (i = 0; i < PART_LEN1; i++) { + efw[i].real = 0; + efw[i].imag = 0; + hnl[i] = 0; + } + } else { + for (i = 0; i < PART_LEN1; i++) { +#if defined(MIPS_DSP_R1_LE) + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[temp1], 0(%[ptr]) \n\t" + "lh %[temp2], 0(%[dr_ptr]) \n\t" + "slti %[temp4], %[temp1], 0x4001 \n\t" + "beqz %[temp4], 3f \n\t" + " lh %[temp3], 2(%[dr_ptr]) \n\t" + "slti %[temp5], %[temp1], 3277 \n\t" + "bnez %[temp5], 2f \n\t" + " addiu %[dr_ptr], %[dr_ptr], 4 \n\t" + "mul %[temp2], %[temp2], %[temp1] \n\t" + "mul %[temp3], %[temp3], %[temp1] \n\t" + "shra_r.w %[temp2], %[temp2], 14 \n\t" + "shra_r.w %[temp3], %[temp3], 14 \n\t" + "b 4f \n\t" + " nop \n\t" + "2: \n\t" + "addu %[temp1], $zero, $zero \n\t" + "addu %[temp2], $zero, $zero \n\t" + "addu %[temp3], $zero, $zero \n\t" + "b 1f \n\t" + " nop \n\t" + "3: \n\t" + "addiu %[temp1], $0, 0x4000 \n\t" + "1: \n\t" + "sh %[temp1], 0(%[ptr]) \n\t" + "4: \n\t" + "sh %[temp2], 0(%[er_ptr]) \n\t" + "sh %[temp3], 2(%[er_ptr]) \n\t" + "addiu %[ptr], %[ptr], 2 \n\t" + "addiu %[er_ptr], %[er_ptr], 4 \n\t" + ".set pop \n\t" + : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr), + [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr) + : + : "memory", "hi", "lo" + ); +#else + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[temp1], 0(%[ptr]) \n\t" + "lh %[temp2], 0(%[dr_ptr]) \n\t" + "slti %[temp4], %[temp1], 0x4001 \n\t" + "beqz %[temp4], 3f \n\t" + " lh %[temp3], 2(%[dr_ptr]) \n\t" + "slti %[temp5], %[temp1], 3277 \n\t" + "bnez %[temp5], 2f \n\t" + " addiu %[dr_ptr], %[dr_ptr], 4 \n\t" + "mul %[temp2], %[temp2], %[temp1] \n\t" + "mul %[temp3], %[temp3], %[temp1] \n\t" + "addiu %[temp2], %[temp2], 0x2000 \n\t" + "addiu %[temp3], %[temp3], 0x2000 \n\t" + "sra %[temp2], %[temp2], 14 \n\t" + "sra %[temp3], %[temp3], 14 \n\t" + "b 4f \n\t" + " nop \n\t" + "2: \n\t" + "addu %[temp1], $zero, $zero \n\t" + "addu %[temp2], $zero, $zero \n\t" + "addu %[temp3], $zero, $zero \n\t" + "b 1f \n\t" + " nop \n\t" + "3: \n\t" + "addiu %[temp1], $0, 0x4000 \n\t" + "1: \n\t" + "sh %[temp1], 0(%[ptr]) \n\t" + "4: \n\t" + "sh %[temp2], 0(%[er_ptr]) \n\t" + "sh %[temp3], 2(%[er_ptr]) \n\t" + "addiu %[ptr], %[ptr], 2 \n\t" + "addiu %[er_ptr], %[er_ptr], 4 \n\t" + ".set pop \n\t" + : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr), + [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr) + : + : "memory", "hi", "lo" + ); +#endif + } + } + } + else { + // multiply with Wiener coefficients + for (i = 0; i < PART_LEN1; i++) { + efw[i].real = (int16_t) + (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, + hnl[i], + 14)); + efw[i].imag = (int16_t) + (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, + hnl[i], + 14)); + } + } + + if (aecm->cngMode == AecmTrue) { + ComfortNoise(aecm, ptrDfaClean, efw, hnl); + } + + InverseFFTAndWindow(aecm, fft, efw, output, nearendClean); + + return 0; +} + +// Generate comfort noise and add to output signal. +static void ComfortNoise(AecmCore_t* aecm, + const uint16_t* dfa, + complex16_t* out, + const int16_t* lambda) { + int16_t i; + int16_t tmp16, tmp161, tmp162, tmp163, nrsh1, nrsh2; + int32_t tmp32, tmp321, tnoise, tnoise1; + int32_t tmp322, tmp323, *tmp1; + int16_t* dfap; + int16_t* lambdap; + const int32_t c2049 = 2049; + const int32_t c359 = 359; + const int32_t c114 = ONE_Q14; + + int16_t randW16[PART_LEN]; + int16_t uReal[PART_LEN1]; + int16_t uImag[PART_LEN1]; + int32_t outLShift32; + + int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain; + int16_t minTrackShift = 9; + + assert(shiftFromNearToNoise >= 0); + assert(shiftFromNearToNoise < 16); + + if (aecm->noiseEstCtr < 100) { + // Track the minimum more quickly initially. + aecm->noiseEstCtr++; + minTrackShift = 6; + } + + // Generate a uniform random array on [0 2^15-1]. + WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); + int16_t* randW16p = (int16_t*)randW16; +#if defined (MIPS_DSP_R1_LE) + int16_t* kCosTablep = (int16_t*)WebRtcAecm_kCosTable; + int16_t* kSinTablep = (int16_t*)WebRtcAecm_kSinTable; +#endif // #if defined(MIPS_DSP_R1_LE) + tmp1 = (int32_t*)aecm->noiseEst + 1; + dfap = (int16_t*)dfa + 1; + lambdap = (int16_t*)lambda + 1; + // Estimate noise power. + for (i = 1; i < PART_LEN1; i+=2) { + // Shift to the noise domain. + __asm __volatile ( + "lh %[tmp32], 0(%[dfap]) \n\t" + "lw %[tnoise], 0(%[tmp1]) \n\t" + "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t" + : [tmp32] "=&r" (tmp32), [outLShift32] "=r" (outLShift32), + [tnoise] "=&r" (tnoise) + : [tmp1] "r" (tmp1), [dfap] "r" (dfap), + [shiftFromNearToNoise] "r" (shiftFromNearToNoise) + : "memory" + ); + + if (outLShift32 < tnoise) { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i] = 0; + // Track the minimum. + if (tnoise < (1 << minTrackShift)) { + // For small values, decrease noiseEst[i] every + // |kNoiseEstIncCount| block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i]++; + if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) { + tnoise--; + aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter + } + } else { + __asm __volatile ( + "subu %[tmp32], %[tnoise], %[outLShift32] \n\t" + "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t" + "subu %[tnoise], %[tnoise], %[tmp32] \n\t" + : [tmp32] "=&r" (tmp32), [tnoise] "+r" (tnoise) + : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift) + ); + } + } else { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((tnoise >> 19) <= 0) { + if ((tnoise >> 11) > 0) { + // Large enough for relative increase + __asm __volatile ( + "mul %[tnoise], %[tnoise], %[c2049] \n\t" + "sra %[tnoise], %[tnoise], 11 \n\t" + : [tnoise] "+r" (tnoise) + : [c2049] "r" (c2049) + : "hi", "lo" + ); + } else { + // Make incremental increases based on size every + // |kNoiseEstIncCount| block + aecm->noiseEstTooLowCtr[i]++; + if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) { + __asm __volatile ( + "sra %[tmp32], %[tnoise], 9 \n\t" + "addi %[tnoise], %[tnoise], 1 \n\t" + "addu %[tnoise], %[tnoise], %[tmp32] \n\t" + : [tnoise] "+r" (tnoise), [tmp32] "=&r" (tmp32) + : + ); + aecm->noiseEstTooLowCtr[i] = 0; // Reset counter + } + } + } else { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + __asm __volatile ( + "sra %[tnoise], %[tnoise], 11 \n\t" + "mul %[tnoise], %[tnoise], %[c2049] \n\t" + : [tnoise] "+r" (tnoise) + : [c2049] "r" (c2049) + : "hi", "lo" + ); + } + } + + // Shift to the noise domain. + __asm __volatile ( + "lh %[tmp32], 2(%[dfap]) \n\t" + "lw %[tnoise1], 4(%[tmp1]) \n\t" + "addiu %[dfap], %[dfap], 4 \n\t" + "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t" + : [tmp32] "=&r" (tmp32), [dfap] "+r" (dfap), + [outLShift32] "=r" (outLShift32), [tnoise1] "=&r" (tnoise1) + : [tmp1] "r" (tmp1), [shiftFromNearToNoise] "r" (shiftFromNearToNoise) + : "memory" + ); + + if (outLShift32 < tnoise1) { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i + 1] = 0; + // Track the minimum. + if (tnoise1 < (1 << minTrackShift)) { + // For small values, decrease noiseEst[i] every + // |kNoiseEstIncCount| block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i + 1]++; + if (aecm->noiseEstTooHighCtr[i + 1] >= kNoiseEstIncCount) { + tnoise1--; + aecm->noiseEstTooHighCtr[i + 1] = 0; // Reset the counter + } + } else { + __asm __volatile ( + "subu %[tmp32], %[tnoise1], %[outLShift32] \n\t" + "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t" + "subu %[tnoise1], %[tnoise1], %[tmp32] \n\t" + : [tmp32] "=&r" (tmp32), [tnoise1] "+r" (tnoise1) + : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift) + ); + } + } else { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i + 1] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((tnoise1 >> 19) <= 0) { + if ((tnoise1 >> 11) > 0) { + // Large enough for relative increase + __asm __volatile ( + "mul %[tnoise1], %[tnoise1], %[c2049] \n\t" + "sra %[tnoise1], %[tnoise1], 11 \n\t" + : [tnoise1] "+r" (tnoise1) + : [c2049] "r" (c2049) + : "hi", "lo" + ); + } else { + // Make incremental increases based on size every + // |kNoiseEstIncCount| block + aecm->noiseEstTooLowCtr[i + 1]++; + if (aecm->noiseEstTooLowCtr[i + 1] >= kNoiseEstIncCount) { + __asm __volatile ( + "sra %[tmp32], %[tnoise1], 9 \n\t" + "addi %[tnoise1], %[tnoise1], 1 \n\t" + "addu %[tnoise1], %[tnoise1], %[tmp32] \n\t" + : [tnoise1] "+r" (tnoise1), [tmp32] "=&r" (tmp32) + : + ); + aecm->noiseEstTooLowCtr[i + 1] = 0; // Reset counter + } + } + } else { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + __asm __volatile ( + "sra %[tnoise1], %[tnoise1], 11 \n\t" + "mul %[tnoise1], %[tnoise1], %[c2049] \n\t" + : [tnoise1] "+r" (tnoise1) + : [c2049] "r" (c2049) + : "hi", "lo" + ); + } + } + + __asm __volatile ( + "lh %[tmp16], 0(%[lambdap]) \n\t" + "lh %[tmp161], 2(%[lambdap]) \n\t" + "sw %[tnoise], 0(%[tmp1]) \n\t" + "sw %[tnoise1], 4(%[tmp1]) \n\t" + "subu %[tmp16], %[c114], %[tmp16] \n\t" + "subu %[tmp161], %[c114], %[tmp161] \n\t" + "srav %[tmp32], %[tnoise], %[shiftFromNearToNoise] \n\t" + "srav %[tmp321], %[tnoise1], %[shiftFromNearToNoise] \n\t" + "addiu %[lambdap], %[lambdap], 4 \n\t" + "addiu %[tmp1], %[tmp1], 8 \n\t" + : [tmp16] "=&r" (tmp16), [tmp161] "=&r" (tmp161), [tmp1] "+r" (tmp1), + [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), [lambdap] "+r" (lambdap) + : [tnoise] "r" (tnoise), [tnoise1] "r" (tnoise1), [c114] "r" (c114), + [shiftFromNearToNoise] "r" (shiftFromNearToNoise) + : "memory" + ); + + if (tmp32 > 32767) { + tmp32 = 32767; + aecm->noiseEst[i] = WEBRTC_SPL_LSHIFT_W32(tmp32, shiftFromNearToNoise); + } + if (tmp321 > 32767) { + tmp321 = 32767; + aecm->noiseEst[i+1] = WEBRTC_SPL_LSHIFT_W32(tmp321, shiftFromNearToNoise); + } + + __asm __volatile ( + "mul %[tmp32], %[tmp32], %[tmp16] \n\t" + "mul %[tmp321], %[tmp321], %[tmp161] \n\t" + "sra %[nrsh1], %[tmp32], 14 \n\t" + "sra %[nrsh2], %[tmp321], 14 \n\t" + : [nrsh1] "=r" (nrsh1), [nrsh2] "=r" (nrsh2) + : [tmp16] "r" (tmp16), [tmp161] "r" (tmp161), [tmp32] "r" (tmp32), + [tmp321] "r" (tmp321) + : "memory", "hi", "lo" + ); + + __asm __volatile ( + "lh %[tmp32], 0(%[randW16p]) \n\t" + "lh %[tmp321], 2(%[randW16p]) \n\t" + "addiu %[randW16p], %[randW16p], 4 \n\t" + "mul %[tmp32], %[tmp32], %[c359] \n\t" + "mul %[tmp321], %[tmp321], %[c359] \n\t" + "sra %[tmp16], %[tmp32], 15 \n\t" + "sra %[tmp161], %[tmp321], 15 \n\t" + : [randW16p] "+r" (randW16p), [tmp32] "=&r" (tmp32), + [tmp16] "=r" (tmp16), [tmp161] "=r" (tmp161), [tmp321] "=&r" (tmp321) + : [c359] "r" (c359) + : "memory", "hi", "lo" + ); + +#if !defined(MIPS_DSP_R1_LE) + tmp32 = WebRtcAecm_kCosTable[tmp16]; + tmp321 = WebRtcAecm_kSinTable[tmp16]; + tmp322 = WebRtcAecm_kCosTable[tmp161]; + tmp323 = WebRtcAecm_kSinTable[tmp161]; +#else + __asm __volatile ( + "sll %[tmp16], %[tmp16], 1 \n\t" + "sll %[tmp161], %[tmp161], 1 \n\t" + "lhx %[tmp32], %[tmp16](%[kCosTablep]) \n\t" + "lhx %[tmp321], %[tmp16](%[kSinTablep]) \n\t" + "lhx %[tmp322], %[tmp161](%[kCosTablep]) \n\t" + "lhx %[tmp323], %[tmp161](%[kSinTablep]) \n\t" + : [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), + [tmp322] "=&r" (tmp322), [tmp323] "=&r" (tmp323) + : [kCosTablep] "r" (kCosTablep), [tmp16] "r" (tmp16), + [tmp161] "r" (tmp161), [kSinTablep] "r" (kSinTablep) + : "memory" + ); +#endif + __asm __volatile ( + "mul %[tmp32], %[tmp32], %[nrsh1] \n\t" + "negu %[tmp162], %[nrsh1] \n\t" + "mul %[tmp322], %[tmp322], %[nrsh2] \n\t" + "negu %[tmp163], %[nrsh2] \n\t" + "sra %[tmp32], %[tmp32], 13 \n\t" + "mul %[tmp321], %[tmp321], %[tmp162] \n\t" + "sra %[tmp322], %[tmp322], 13 \n\t" + "mul %[tmp323], %[tmp323], %[tmp163] \n\t" + "sra %[tmp321], %[tmp321], 13 \n\t" + "sra %[tmp323], %[tmp323], 13 \n\t" + : [tmp32] "+r" (tmp32), [tmp321] "+r" (tmp321), [tmp162] "=&r" (tmp162), + [tmp322] "+r" (tmp322), [tmp323] "+r" (tmp323), [tmp163] "=&r" (tmp163) + : [nrsh1] "r" (nrsh1), [nrsh2] "r" (nrsh2) + : "hi", "lo" + ); + // Tables are in Q13. + uReal[i] = (int16_t)tmp32; + uImag[i] = (int16_t)tmp321; + uReal[i + 1] = (int16_t)tmp322; + uImag[i + 1] = (int16_t)tmp323; + } + + int32_t tt, sgn; + tt = out[0].real; + sgn = ((int)tt) >> 31; + out[0].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[0].imag; + sgn = ((int)tt) >> 31; + out[0].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + for (i = 1; i < PART_LEN; i++) { + tt = out[i].real + uReal[i]; + sgn = ((int)tt) >> 31; + out[i].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[i].imag + uImag[i]; + sgn = ((int)tt) >> 31; + out[i].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + } + tt = out[PART_LEN].real + uReal[PART_LEN]; + sgn = ((int)tt) >> 31; + out[PART_LEN].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[PART_LEN].imag; + sgn = ((int)tt) >> 31; + out[PART_LEN].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); +} + diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi index 4d9988aec..555bdc346 100644 --- a/webrtc/modules/audio_processing/audio_processing.gypi +++ b/webrtc/modules/audio_processing/audio_processing.gypi @@ -120,6 +120,15 @@ ['(target_arch=="arm" and armv7==1) or target_arch=="armv7"', { 'dependencies': ['audio_processing_neon',], }], + ['target_arch=="mipsel"', { + 'sources': [ + 'aecm/aecm_core_mips.c', + ], + }, { + 'sources': [ + 'aecm/aecm_core_c.c', + ], + }], ], # TODO(jschuh): Bug 1348: fix size_t to int truncations. 'msvs_disabled_warnings': [ 4267, ],