diff --git a/webrtc/modules/audio_processing/aecm/aecm_core.c b/webrtc/modules/audio_processing/aecm/aecm_core.c
index 2c0a40f01..fc94f1b88 100644
--- a/webrtc/modules/audio_processing/aecm/aecm_core.c
+++ b/webrtc/modules/audio_processing/aecm/aecm_core.c
@@ -27,65 +27,7 @@ FILE *dfile;
 FILE *testfile;
 #endif
 
-// Square root of Hanning window in Q14.
-#if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON)
-// Table is defined in an ARM assembly file.
-extern const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END;
-#else
-static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
-  0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
-  3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
-  6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
-  9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
-  11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
-  13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
-  15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
-  16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
-};
-#endif
-
-#ifdef AECM_WITH_ABS_APPROX
-//Q15 alpha = 0.99439986968132  const Factor for magnitude approximation
-static const uint16_t kAlpha1 = 32584;
-//Q15 beta = 0.12967166976970   const Factor for magnitude approximation
-static const uint16_t kBeta1 = 4249;
-//Q15 alpha = 0.94234827210087  const Factor for magnitude approximation
-static const uint16_t kAlpha2 = 30879;
-//Q15 beta = 0.33787806009150   const Factor for magnitude approximation
-static const uint16_t kBeta2 = 11072;
-//Q15 alpha = 0.82247698684306  const Factor for magnitude approximation
-static const uint16_t kAlpha3 = 26951;
-//Q15 beta = 0.57762063060713   const Factor for magnitude approximation
-static const uint16_t kBeta3 = 18927;
-#endif
-
-// Initialization table for echo channel in 8 kHz
-static const int16_t kChannelStored8kHz[PART_LEN1] = {
-    2040,   1815,   1590,   1498,   1405,   1395,   1385,   1418,
-    1451,   1506,   1562,   1644,   1726,   1804,   1882,   1918,
-    1953,   1982,   2010,   2025,   2040,   2034,   2027,   2021,
-    2014,   1997,   1980,   1925,   1869,   1800,   1732,   1683,
-    1635,   1604,   1572,   1545,   1517,   1481,   1444,   1405,
-    1367,   1331,   1294,   1270,   1245,   1239,   1233,   1247,
-    1260,   1282,   1303,   1338,   1373,   1407,   1441,   1470,
-    1499,   1524,   1549,   1565,   1582,   1601,   1621,   1649,
-    1676
-};
-
-// Initialization table for echo channel in 16 kHz
-static const int16_t kChannelStored16kHz[PART_LEN1] = {
-    2040,   1590,   1405,   1385,   1451,   1562,   1726,   1882,
-    1953,   2010,   2040,   2027,   2014,   1980,   1869,   1732,
-    1635,   1572,   1517,   1444,   1367,   1294,   1245,   1233,
-    1260,   1303,   1373,   1441,   1499,   1549,   1582,   1621,
-    1676,   1741,   1802,   1861,   1921,   1983,   2040,   2102,
-    2170,   2265,   2375,   2515,   2651,   2781,   2922,   3075,
-    3253,   3471,   3738,   3976,   4151,   4258,   4308,   4288,
-    4270,   4253,   4237,   4179,   4086,   3947,   3757,   3484,
-    3153
-};
-
-static const int16_t kCosTable[] = {
+const int16_t WebRtcAecm_kCosTable[] = {
     8192,  8190,  8187,  8180,  8172,  8160,  8147,  8130,  8112,
     8091,  8067,  8041,  8012,  7982,  7948,  7912,  7874,  7834,
     7791,  7745,  7697,  7647,  7595,  7540,  7483,  7424,  7362,
@@ -128,7 +70,7 @@ static const int16_t kCosTable[] = {
     8091,  8112,  8130,  8147,  8160,  8172,  8180,  8187,  8190
 };
 
-static const int16_t kSinTable[] = {
+const int16_t WebRtcAecm_kSinTable[] = {
        0,    142,    285,    428,    571,    713,    856,    998,
     1140,   1281,   1422,   1563,   1703,   1842,   1981,   2120,
     2258,   2395,   2531,   2667,   2801,   2935,   3068,   3200,
@@ -176,15 +118,31 @@ static const int16_t kSinTable[] = {
    -1140,   -998,   -856,   -713,   -571,   -428,   -285,   -142
 };
 
-static const int16_t kNoiseEstQDomain = 15;
-static const int16_t kNoiseEstIncCount = 5;
+// Initialization table for echo channel in 8 kHz
+static const int16_t kChannelStored8kHz[PART_LEN1] = {
+    2040,   1815,   1590,   1498,   1405,   1395,   1385,   1418,
+    1451,   1506,   1562,   1644,   1726,   1804,   1882,   1918,
+    1953,   1982,   2010,   2025,   2040,   2034,   2027,   2021,
+    2014,   1997,   1980,   1925,   1869,   1800,   1732,   1683,
+    1635,   1604,   1572,   1545,   1517,   1481,   1444,   1405,
+    1367,   1331,   1294,   1270,   1245,   1239,   1233,   1247,
+    1260,   1282,   1303,   1338,   1373,   1407,   1441,   1470,
+    1499,   1524,   1549,   1565,   1582,   1601,   1621,   1649,
+    1676
+};
 
-static void ComfortNoise(AecmCore_t* aecm,
-                         const uint16_t* dfa,
-                         complex16_t* out,
-                         const int16_t* lambda);
-
-static int16_t CalcSuppressionGain(AecmCore_t * const aecm);
+// Initialization table for echo channel in 16 kHz
+static const int16_t kChannelStored16kHz[PART_LEN1] = {
+    2040,   1590,   1405,   1385,   1451,   1562,   1726,   1882,
+    1953,   2010,   2040,   2027,   2014,   1980,   1869,   1732,
+    1635,   1572,   1517,   1444,   1367,   1294,   1245,   1233,
+    1260,   1303,   1373,   1441,   1499,   1549,   1582,   1621,
+    1676,   1741,   1802,   1861,   1921,   1983,   2040,   2102,
+    2170,   2265,   2375,   2515,   2651,   2781,   2922,   3075,
+    3253,   3471,   3738,   3976,   4151,   4258,   4308,   4288,
+    4270,   4253,   4237,   4179,   4086,   3947,   3757,   3484,
+    3153
+};
 
 // Moves the pointer to the next entry and inserts |far_spectrum| and
 // corresponding Q-domain in its buffer.
@@ -194,9 +152,9 @@ static int16_t CalcSuppressionGain(AecmCore_t * const aecm);
 //      - far_spectrum  : Pointer to the far end spectrum
 //      - far_q         : Q-domain of far end spectrum
 //
-static void UpdateFarHistory(AecmCore_t* self,
-                             uint16_t* far_spectrum,
-                             int far_q) {
+void WebRtcAecm_UpdateFarHistory(AecmCore_t* self,
+                                 uint16_t* far_spectrum,
+                                 int far_q) {
   // Get new buffer position
   self->far_history_pos++;
   if (self->far_history_pos >= MAX_DELAY) {
@@ -227,7 +185,9 @@ static void UpdateFarHistory(AecmCore_t* self,
 //      - far_spectrum      : Pointer to the aligned far end spectrum
 //                            NULL - Error
 //
-static const uint16_t* AlignedFarend(AecmCore_t* self, int* far_q, int delay) {
+const uint16_t* WebRtcAecm_AlignedFarend(AecmCore_t* self,
+                                         int* far_q,
+                                         int delay) {
   int buffer_position = 0;
   assert(self != NULL);
   buffer_position = self->far_history_pos - delay;
@@ -351,85 +311,6 @@ void WebRtcAecm_InitEchoPathCore(AecmCore_t* aecm, const int16_t* echo_path)
     aecm->mseChannelCount = 0;
 }
 
-static void WindowAndFFT(AecmCore_t* aecm,
-                          int16_t* fft,
-                          const int16_t* time_signal,
-                          complex16_t* freq_signal,
-                          int time_signal_scaling) {
-  int i = 0;
-
-  // FFT of signal
-  for (i = 0; i < PART_LEN; i++) {
-    // Window time domain signal and insert into real part of
-    // transformation array |fft|
-    fft[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
-        (time_signal[i] << time_signal_scaling),
-        WebRtcAecm_kSqrtHanning[i],
-        14);
-    fft[PART_LEN + i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
-        (time_signal[i + PART_LEN] << time_signal_scaling),
-        WebRtcAecm_kSqrtHanning[PART_LEN - i],
-        14);
-  }
-
-  // Do forward FFT, then take only the first PART_LEN complex samples,
-  // and change signs of the imaginary parts.
-  WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
-  for (i = 0; i < PART_LEN; i++) {
-    freq_signal[i].imag = -freq_signal[i].imag;
-  }
-}
-
-static void InverseFFTAndWindow(AecmCore_t* aecm,
-                                int16_t* fft,
-                                complex16_t* efw,
-                                int16_t* output,
-                                const int16_t* nearendClean)
-{
-    int i, j, outCFFT;
-    int32_t tmp32no1;
-    // Reuse |efw| for the inverse FFT output after transferring
-    // the contents to |fft|.
-    int16_t* ifft_out = (int16_t*)efw;
-
-    // Synthesis
-    for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) {
-      fft[j] = efw[i].real;
-      fft[j + 1] = -efw[i].imag;
-    }
-    fft[0] = efw[0].real;
-    fft[1] = -efw[0].imag;
-
-    fft[PART_LEN2] = efw[PART_LEN].real;
-    fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
-
-    // Inverse FFT. Keep outCFFT to scale the samples in the next block.
-    outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out);
-    for (i = 0; i < PART_LEN; i++) {
-      ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
-          ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14);
-      tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i],
-                                      outCFFT - aecm->dfaCleanQDomain);
-      output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
-          tmp32no1 + aecm->outBuf[i], WEBRTC_SPL_WORD16_MIN);
-
-      tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT(ifft_out[PART_LEN + i],
-          WebRtcAecm_kSqrtHanning[PART_LEN - i], 14);
-      tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1,
-          outCFFT - aecm->dfaCleanQDomain);
-      aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(
-          WEBRTC_SPL_WORD16_MAX, tmp32no1, WEBRTC_SPL_WORD16_MIN);
-    }
-
-    // Copy the current block to the old position (aecm->outBuf is shifted elsewhere)
-    memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
-    memcpy(aecm->dBufNoisy, aecm->dBufNoisy + PART_LEN, sizeof(int16_t) * PART_LEN);
-    if (nearendClean != NULL)
-    {
-        memcpy(aecm->dBufClean, aecm->dBufClean + PART_LEN, sizeof(int16_t) * PART_LEN);
-    }
-}
-
 static void CalcLinearEnergiesC(AecmCore_t* aecm,
                                 const uint16_t* far_spectrum,
                                 int32_t* echo_est,
@@ -509,6 +390,18 @@ static void WebRtcAecm_InitNeon(void)
 }
 #endif
 
+// Initialize function pointers for MIPS platform.
+#if defined(MIPS32_LE)
+static void WebRtcAecm_InitMips(void)
+{
+#if defined(MIPS_DSP_R1_LE)
+  WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannel_mips;
+  WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannel_mips;
+#endif
+  WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergies_mips;
+}
+#endif
+
 // WebRtcAecm_InitCore(...)
 //
 // This function initializes the AECM instant created with WebRtcAecm_CreateCore(...)
@@ -646,6 +539,9 @@ int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq)
     WebRtcAecm_InitNeon();
 #endif
 
+#if defined(MIPS32_LE)
+    WebRtcAecm_InitMips();
+#endif
     return 0;
 }
 
@@ -1265,7 +1161,7 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * aecm,
 //                          level (Q14).
 //
 //
-static int16_t CalcSuppressionGain(AecmCore_t * const aecm)
+int16_t WebRtcAecm_CalcSuppressionGain(AecmCore_t * const aecm)
 {
     int32_t tmp32no1;
 
@@ -1334,639 +1230,6 @@ static int16_t CalcSuppressionGain(AecmCore_t * const aecm)
     return aecm->supGain;
 }
 
-// Transforms a time domain signal into the frequency domain, outputting the
-// complex valued signal, absolute value and sum of absolute values.
-//
-// time_signal          [in]    Pointer to time domain signal
-// freq_signal_real     [out]   Pointer to real part of frequency domain array
-// freq_signal_imag     [out]   Pointer to imaginary part of frequency domain
-//                              array
-// freq_signal_abs      [out]   Pointer to absolute value of frequency domain
-//                              array
-// freq_signal_sum_abs  [out]   Pointer to the sum of all absolute values in
-//                              the frequency domain array
-// return value                 The Q-domain of current frequency values
-//
-static int TimeToFrequencyDomain(AecmCore_t* aecm,
-                                 const int16_t* time_signal,
-                                 complex16_t* freq_signal,
-                                 uint16_t* freq_signal_abs,
-                                 uint32_t* freq_signal_sum_abs)
-{
-    int i = 0;
-    int time_signal_scaling = 0;
-
-    int32_t tmp32no1 = 0;
-    int32_t tmp32no2 = 0;
-
-    // In fft_buf, +16 for 32-byte alignment.
-    int16_t fft_buf[PART_LEN4 + 16];
-    int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31);
-
-    int16_t tmp16no1;
-#ifndef WEBRTC_ARCH_ARM_V7
-    int16_t tmp16no2;
-#endif
-#ifdef AECM_WITH_ABS_APPROX
-    int16_t max_value = 0;
-    int16_t min_value = 0;
-    uint16_t alpha = 0;
-    uint16_t beta = 0;
-#endif
-
-#ifdef AECM_DYNAMIC_Q
-    tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
-    time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
-#endif
-
-    WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
-
-    // Extract imaginary and real part, calculate the magnitude for all frequency bins
-    freq_signal[0].imag = 0;
-    freq_signal[PART_LEN].imag = 0;
-    freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(
-        freq_signal[0].real);
-    freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(
-        freq_signal[PART_LEN].real);
-    (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +
-        (uint32_t)(freq_signal_abs[PART_LEN]);
-
-    for (i = 1; i < PART_LEN; i++)
-    {
-        if (freq_signal[i].real == 0)
-        {
-            freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(
-                freq_signal[i].imag);
-        }
-        else if (freq_signal[i].imag == 0)
-        {
-            freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(
-                freq_signal[i].real);
-        }
-        else
-        {
-            // Approximation for magnitude of complex fft output
-            // magn = sqrt(real^2 + imag^2)
-            // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
-            //
-            // The parameters alpha and beta are stored in Q15
-
-#ifdef AECM_WITH_ABS_APPROX
-            tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
-            tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
-
-            if(tmp16no1 > tmp16no2)
-            {
-                max_value = tmp16no1;
-                min_value = tmp16no2;
-            } else
-            {
-                max_value = tmp16no2;
-                min_value = tmp16no1;
-            }
-
-            // Magnitude in Q(-6)
-            if ((max_value >> 2) > min_value)
-            {
-                alpha = kAlpha1;
-                beta = kBeta1;
-            } else if ((max_value >> 1) > min_value)
-            {
-                alpha = kAlpha2;
-                beta = kBeta2;
-            } else
-            {
-                alpha = kAlpha3;
-                beta = kBeta3;
-            }
-            tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(max_value,
-                                                                alpha,
-                                                                15);
-            tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(min_value,
-                                                                beta,
-                                                                15);
-            freq_signal_abs[i] = (uint16_t)tmp16no1 +
-                (uint16_t)tmp16no2;
-#else
-#ifdef WEBRTC_ARCH_ARM_V7
-            __asm __volatile(
-              "smulbb %[tmp32no1], %[real], %[real]\n\t"
-              "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t"
-              :[tmp32no1]"+r"(tmp32no1),
-               [tmp32no2]"=r"(tmp32no2)
-              :[real]"r"(freq_signal[i].real),
-               [imag]"r"(freq_signal[i].imag)
-            );
-#else
-            tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
-            tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
-            tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1);
-            tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2);
-            tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2);
-#endif // WEBRTC_ARCH_ARM_V7
-            tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
-
-            freq_signal_abs[i] = (uint16_t)tmp32no1;
-#endif // AECM_WITH_ABS_APPROX
-        }
-        (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
-    }
-
-    return time_signal_scaling;
-}
-
-int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
-                            const int16_t * farend,
-                            const int16_t * nearendNoisy,
-                            const int16_t * nearendClean,
-                            int16_t * output)
-{
-    int i;
-
-    uint32_t xfaSum;
-    uint32_t dfaNoisySum;
-    uint32_t dfaCleanSum;
-    uint32_t echoEst32Gained;
-    uint32_t tmpU32;
-
-    int32_t tmp32no1;
-
-    uint16_t xfa[PART_LEN1];
-    uint16_t dfaNoisy[PART_LEN1];
-    uint16_t dfaClean[PART_LEN1];
-    uint16_t* ptrDfaClean = dfaClean;
-    const uint16_t* far_spectrum_ptr = NULL;
-
-    // 32 byte aligned buffers (with +8 or +16).
-    // TODO (kma): define fft with complex16_t.
-    int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.
-    int32_t echoEst32_buf[PART_LEN1 + 8];
-    int32_t dfw_buf[PART_LEN2 + 8];
-    int32_t efw_buf[PART_LEN2 + 8];
-
-    int16_t* fft = (int16_t*) (((uintptr_t) fft_buf + 31) & ~ 31);
-    int32_t* echoEst32 = (int32_t*) (((uintptr_t) echoEst32_buf + 31) & ~ 31);
-    complex16_t* dfw = (complex16_t*) (((uintptr_t) dfw_buf + 31) & ~ 31);
-    complex16_t* efw = (complex16_t*) (((uintptr_t) efw_buf + 31) & ~ 31);
-
-    int16_t hnl[PART_LEN1];
-    int16_t numPosCoef = 0;
-    int16_t nlpGain = ONE_Q14;
-    int delay;
-    int16_t tmp16no1;
-    int16_t tmp16no2;
-    int16_t mu;
-    int16_t supGain;
-    int16_t zeros32, zeros16;
-    int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
-    int far_q;
-    int16_t resolutionDiff, qDomainDiff;
-
-    const int kMinPrefBand = 4;
-    const int kMaxPrefBand = 24;
-    int32_t avgHnl32 = 0;
-
-    // Determine startup state. There are three states:
-    // (0) the first CONV_LEN blocks
-    // (1) another CONV_LEN blocks
-    // (2) the rest
-
-    if (aecm->startupState < 2)
-    {
-        aecm->startupState = (aecm->totCount >= CONV_LEN) + (aecm->totCount >= CONV_LEN2);
-    }
-    // END: Determine startup state
-
-    // Buffer near and far end signals
-    memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
-    memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN);
-    if (nearendClean != NULL)
-    {
-        memcpy(aecm->dBufClean + PART_LEN, nearendClean, sizeof(int16_t) * PART_LEN);
-    }
-
-    // Transform far end signal from time domain to frequency domain.
-    far_q = TimeToFrequencyDomain(aecm,
-                                  aecm->xBuf,
-                                  dfw,
-                                  xfa,
-                                  &xfaSum);
-
-    // Transform noisy near end signal from time domain to frequency domain.
-    zerosDBufNoisy = TimeToFrequencyDomain(aecm,
-                                           aecm->dBufNoisy,
-                                           dfw,
-                                           dfaNoisy,
-                                           &dfaNoisySum);
-    aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
-    aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
-
-
-    if (nearendClean == NULL)
-    {
-        ptrDfaClean = dfaNoisy;
-        aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
-        aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
-        dfaCleanSum = dfaNoisySum;
-    } else
-    {
-        // Transform clean near end signal from time domain to frequency domain.
-        zerosDBufClean = TimeToFrequencyDomain(aecm,
-                                               aecm->dBufClean,
-                                               dfw,
-                                               dfaClean,
-                                               &dfaCleanSum);
-        aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
-        aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
-    }
-
-    // Get the delay
-    // Save far-end history and estimate delay
-    UpdateFarHistory(aecm, xfa, far_q);
-    if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1,
-                                 far_q) == -1) {
-      return -1;
-    }
-    delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,
-                                            dfaNoisy,
-                                            PART_LEN1,
-                                            zerosDBufNoisy);
-    if (delay == -1)
-    {
-        return -1;
-    }
-    else if (delay == -2)
-    {
-        // If the delay is unknown, we assume zero.
-        // NOTE: this will have to be adjusted if we ever add lookahead.
-        delay = 0;
-    }
-
-    if (aecm->fixedDelay >= 0)
-    {
-        // Use fixed delay
-        delay = aecm->fixedDelay;
-    }
-
-    // Get aligned far end spectrum
-    far_spectrum_ptr = AlignedFarend(aecm, &far_q, delay);
-    zerosXBuf = (int16_t) far_q;
-    if (far_spectrum_ptr == NULL)
-    {
-        return -1;
-    }
-
-    // Calculate log(energy) and update energy threshold levels
-    WebRtcAecm_CalcEnergies(aecm,
-                            far_spectrum_ptr,
-                            zerosXBuf,
-                            dfaNoisySum,
-                            echoEst32);
-
-    // Calculate stepsize
-    mu = WebRtcAecm_CalcStepSize(aecm);
-
-    // Update counters
-    aecm->totCount++;
-
-    // This is the channel estimation algorithm.
-    // It is base on NLMS but has a variable step length, which was calculated above.
-    WebRtcAecm_UpdateChannel(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisy, mu, echoEst32);
-    supGain = CalcSuppressionGain(aecm);
-
-
-    // Calculate Wiener filter hnl[]
-    for (i = 0; i < PART_LEN1; i++)
-    {
-        // Far end signal through channel estimate in Q8
-        // How much can we shift right to preserve resolution
-        tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
-        aecm->echoFilt[i] += WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32no1, 50), 8);
-
-        zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
-        zeros16 = WebRtcSpl_NormW16(supGain) + 1;
-        if (zeros32 + zeros16 > 16)
-        {
-            // Multiplication is safe
-            // Result in Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff])
-            echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
-                                                    (uint16_t)supGain);
-            resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
-            resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
-        } else
-        {
-            tmp16no1 = 17 - zeros32 - zeros16;
-            resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
-            resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
-            if (zeros32 > tmp16no1)
-            {
-                echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
-                        (uint16_t)WEBRTC_SPL_RSHIFT_W16(supGain,
-                                tmp16no1)); // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
-            } else
-            {
-                // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
-                echoEst32Gained = WEBRTC_SPL_UMUL_32_16(
-                        (uint32_t)WEBRTC_SPL_RSHIFT_W32(aecm->echoFilt[i], tmp16no1),
-                        (uint16_t)supGain);
-            }
-        }
-
-        zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
-        if ((zeros16 < (aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld))
-                & (aecm->nearFilt[i]))
-        {
-            tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], zeros16);
-            qDomainDiff = zeros16 - aecm->dfaCleanQDomain + aecm->dfaCleanQDomainOld;
-        } else
-        {
-            tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i],
-                                            aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld);
-            qDomainDiff = 0;
-        }
-        tmp16no2 = WEBRTC_SPL_SHIFT_W16(ptrDfaClean[i], qDomainDiff);
-        tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
-        tmp16no2 = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 4);
-        tmp16no2 += tmp16no1;
-        zeros16 = WebRtcSpl_NormW16(tmp16no2);
-        if ((tmp16no2) & (-qDomainDiff > zeros16))
-        {
-            aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
-        } else
-        {
-            aecm->nearFilt[i] = WEBRTC_SPL_SHIFT_W16(tmp16no2, -qDomainDiff);
-        }
-
-        // Wiener filter coefficients, resulting hnl in Q14
-        if (echoEst32Gained == 0)
-        {
-            hnl[i] = ONE_Q14;
-        } else if (aecm->nearFilt[i] == 0)
-        {
-            hnl[i] = 0;
-        } else
-        {
-            // Multiply the suppression gain
-            // Rounding
-            echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
-            tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained, (uint16_t)aecm->nearFilt[i]);
-
-            // Current resolution is
-            // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN - max(0, 17 - zeros16 - zeros32))
-            // Make sure we are in Q14
-            tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
-            if (tmp32no1 > ONE_Q14)
-            {
-                hnl[i] = 0;
-            } else if (tmp32no1 < 0)
-            {
-                hnl[i] = ONE_Q14;
-            } else
-            {
-                // 1-echoEst/dfa
-                hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
-                if (hnl[i] < 0)
-                {
-                    hnl[i] = 0;
-                }
-            }
-        }
-        if (hnl[i])
-        {
-            numPosCoef++;
-        }
-    }
-    // Only in wideband. Prevent the gain in upper band from being larger than
-    // in lower band.
-    if (aecm->mult == 2)
-    {
-        // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
-        //               speech distortion in double-talk.
-        for (i = 0; i < PART_LEN1; i++)
-        {
-            hnl[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(hnl[i], hnl[i], 14);
-        }
-
-        for (i = kMinPrefBand; i <= kMaxPrefBand; i++)
-        {
-            avgHnl32 += (int32_t)hnl[i];
-        }
-        assert(kMaxPrefBand - kMinPrefBand + 1 > 0);
-        avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
-
-        for (i = kMaxPrefBand; i < PART_LEN1; i++)
-        {
-            if (hnl[i] > (int16_t)avgHnl32)
-            {
-                hnl[i] = (int16_t)avgHnl32;
-            }
-        }
-    }
-
-    // Calculate NLP gain, result is in Q14
-    if (aecm->nlpFlag)
-    {
-        for (i = 0; i < PART_LEN1; i++)
-        {
-            // Truncate values close to zero and one.
-            if (hnl[i] > NLP_COMP_HIGH)
-            {
-                hnl[i] = ONE_Q14;
-            } else if (hnl[i] < NLP_COMP_LOW)
-            {
-                hnl[i] = 0;
-            }
-
-            // Remove outliers
-            if (numPosCoef < 3)
-            {
-                nlpGain = 0;
-            } else
-            {
-                nlpGain = ONE_Q14;
-            }
-
-            // NLP
-            if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14))
-            {
-                hnl[i] = ONE_Q14;
-            } else
-            {
-                hnl[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(hnl[i], nlpGain, 14);
-            }
-
-            // multiply with Wiener coefficients
-            efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
-                                                                            hnl[i], 14));
-            efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
-                                                                            hnl[i], 14));
-        }
-    }
-    else
-    {
-        // multiply with Wiener coefficients
-        for (i = 0; i < PART_LEN1; i++)
-        {
-            efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
-                                                                           hnl[i], 14));
-            efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
-                                                                           hnl[i], 14));
-        }
-    }
-
-    if (aecm->cngMode == AecmTrue)
-    {
-        ComfortNoise(aecm, ptrDfaClean, efw, hnl);
-    }
-
-    InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
-
-    return 0;
-}
-
-
-// Generate comfort noise and add to output signal.
-//
-// \param[in]     aecm     Handle of the AECM instance.
-// \param[in]     dfa     Absolute value of the nearend signal (Q[aecm->dfaQDomain]).
-// \param[in,out] outReal Real part of the output signal (Q[aecm->dfaQDomain]).
-// \param[in,out] outImag Imaginary part of the output signal (Q[aecm->dfaQDomain]).
-// \param[in]     lambda  Suppression gain with which to scale the noise level (Q14).
-//
-static void ComfortNoise(AecmCore_t* aecm,
-                         const uint16_t* dfa,
-                         complex16_t* out,
-                         const int16_t* lambda)
-{
-    int16_t i;
-    int16_t tmp16;
-    int32_t tmp32;
-
-    int16_t randW16[PART_LEN];
-    int16_t uReal[PART_LEN1];
-    int16_t uImag[PART_LEN1];
-    int32_t outLShift32;
-    int16_t noiseRShift16[PART_LEN1];
-
-    int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
-    int16_t minTrackShift;
-
-    assert(shiftFromNearToNoise >= 0);
-    assert(shiftFromNearToNoise < 16);
-
-    if (aecm->noiseEstCtr < 100)
-    {
-        // Track the minimum more quickly initially.
-        aecm->noiseEstCtr++;
-        minTrackShift = 6;
-    } else
-    {
-        minTrackShift = 9;
-    }
-
-    // Estimate noise power.
-    for (i = 0; i < PART_LEN1; i++)
-    {
-
-        // Shift to the noise domain.
-        tmp32 = (int32_t)dfa[i];
-        outLShift32 = WEBRTC_SPL_LSHIFT_W32(tmp32, shiftFromNearToNoise);
-
-        if (outLShift32 < aecm->noiseEst[i])
-        {
-            // Reset "too low" counter
-            aecm->noiseEstTooLowCtr[i] = 0;
-            // Track the minimum.
-            if (aecm->noiseEst[i] < (1 << minTrackShift))
-            {
-                // For small values, decrease noiseEst[i] every
-                // |kNoiseEstIncCount| block. The regular approach below can not
-                // go further down due to truncation.
-                aecm->noiseEstTooHighCtr[i]++;
-                if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount)
-                {
-                    aecm->noiseEst[i]--;
-                    aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter
-                }
-            }
-            else
-            {
-                aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32) >> minTrackShift);
-            }
-        } else
-        {
-            // Reset "too high" counter
-            aecm->noiseEstTooHighCtr[i] = 0;
-            // Ramp slowly upwards until we hit the minimum again.
-            if ((aecm->noiseEst[i] >> 19) > 0)
-            {
-                // Avoid overflow.
-                // Multiplication with 2049 will cause wrap around. Scale
-                // down first and then multiply
-                aecm->noiseEst[i] >>= 11;
-                aecm->noiseEst[i] *= 2049;
-            }
-            else if ((aecm->noiseEst[i] >> 11) > 0)
-            {
-                // Large enough for relative increase
-                aecm->noiseEst[i] *= 2049;
-                aecm->noiseEst[i] >>= 11;
-            }
-            else
-            {
-                // Make incremental increases based on size every
-                // |kNoiseEstIncCount| block
-                aecm->noiseEstTooLowCtr[i]++;
-                if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount)
-                {
-                    aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1;
-                    aecm->noiseEstTooLowCtr[i] = 0; // Reset counter
-                }
-            }
-        }
-    }
-
-    for (i = 0; i < PART_LEN1; i++)
-    {
-        tmp32 = WEBRTC_SPL_RSHIFT_W32(aecm->noiseEst[i], shiftFromNearToNoise);
-        if (tmp32 > 32767)
-        {
-            tmp32 = 32767;
-            aecm->noiseEst[i] = WEBRTC_SPL_LSHIFT_W32(tmp32, shiftFromNearToNoise);
-        }
-        noiseRShift16[i] = (int16_t)tmp32;
-
-        tmp16 = ONE_Q14 - lambda[i];
-        noiseRShift16[i]
-                = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16, noiseRShift16[i], 14);
-    }
-
-    // Generate a uniform random array on [0 2^15-1].
-    WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
-
-    // Generate noise according to estimated energy.
-    uReal[0] = 0; // Reject LF noise.
-    uImag[0] = 0;
-    for (i = 1; i < PART_LEN1; i++)
-    {
-        // Get a random index for the cos and sin tables over [0 359].
-        tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(359, randW16[i - 1], 15);
-
-        // Tables are in Q13.
-        uReal[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(noiseRShift16[i],
-                kCosTable[tmp16], 13);
-        uImag[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(-noiseRShift16[i],
-                kSinTable[tmp16], 13);
-    }
-    uImag[PART_LEN] = 0;
-
-    for (i = 0; i < PART_LEN1; i++)
-    {
-        out[i].real = WEBRTC_SPL_ADD_SAT_W16(out[i].real, uReal[i]);
-        out[i].imag = WEBRTC_SPL_ADD_SAT_W16(out[i].imag, uImag[i]);
-    }
-}
-
 void WebRtcAecm_BufferFarFrame(AecmCore_t* const aecm,
                                const int16_t* const farend,
                                const int farLen)
diff --git a/webrtc/modules/audio_processing/aecm/aecm_core.h b/webrtc/modules/audio_processing/aecm/aecm_core.h
index 64251d522..e56ede690 100644
--- a/webrtc/modules/audio_processing/aecm/aecm_core.h
+++ b/webrtc/modules/audio_processing/aecm/aecm_core.h
@@ -272,6 +272,125 @@ void WebRtcAecm_FetchFarFrame(AecmCore_t * const aecm,
                               int16_t * const farend,
                               const int farLen, const int knownDelay);
 
+
+// All the functions below are intended to be private
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_UpdateFarHistory()
+//
+// Moves the pointer to the next entry and inserts |far_spectrum| and
+// corresponding Q-domain in its buffer.
+//
+// Inputs:
+//      - self          : Pointer to the delay estimation instance
+//      - far_spectrum  : Pointer to the far end spectrum
+//      - far_q         : Q-domain of far end spectrum
+//
+void WebRtcAecm_UpdateFarHistory(AecmCore_t* self,
+                                 uint16_t* far_spectrum,
+                                 int far_q);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_AlignedFarend()
+//
+// Returns a pointer to the far end spectrum aligned to current near end
+// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been
+// called before AlignedFarend(...). Otherwise, you get the pointer to the
+// previous frame. The memory is only valid until the next call of
+// WebRtc_DelayEstimatorProcessFix(...).
+//
+// Inputs:
+//      - self              : Pointer to the AECM instance.
+//      - delay             : Current delay estimate.
+//
+// Output:
+//      - far_q             : The Q-domain of the aligned far end spectrum
+//
+// Return value:
+//      - far_spectrum      : Pointer to the aligned far end spectrum
+//                            NULL - Error
+//
+const uint16_t* WebRtcAecm_AlignedFarend(AecmCore_t* self,
+                                         int* far_q,
+                                         int delay);
+
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CalcSuppressionGain()
+//
+// This function calculates the suppression gain that is used in the
+// Wiener filter.
+//
+// Inputs:
+//      - aecm              : Pointer to the AECM instance.
+//
+// Return value:
+//      - supGain           : Suppression gain with which to scale the noise
+//                            level (Q14).
+//
+int16_t WebRtcAecm_CalcSuppressionGain(AecmCore_t * const aecm);
+
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CalcEnergies()
+//
+// This function calculates the log of energies for nearend, farend and
+// estimated echoes. There is also an update of energy decision levels,
+// i.e. internal VAD.
+//
+// Inputs:
+//      - aecm              : Pointer to the AECM instance.
+//      - far_spectrum      : Pointer to farend spectrum.
+//      - far_q             : Q-domain of farend spectrum.
+//      - nearEner          : Near end energy for current block in
+//                            Q(aecm->dfaQDomain).
+//
+// Output:
+//     - echoEst            : Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16).
+//
+void WebRtcAecm_CalcEnergies(AecmCore_t * aecm,
+                             const uint16_t* far_spectrum,
+                             const int16_t far_q,
+                             const uint32_t nearEner,
+                             int32_t * echoEst);
+
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CalcStepSize()
+//
+// This function calculates the step size used in channel estimation
+//
+// Inputs:
+//      - aecm              : Pointer to the AECM instance.
+//
+// Return value:
+//      - mu                : Stepsize in log2(), i.e. number of shifts.
+//
+int16_t WebRtcAecm_CalcStepSize(AecmCore_t * const aecm);
+
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_UpdateChannel(...)
+//
+// This function performs channel estimation.
+// NLMS and decision on channel storage.
+//
+// Inputs:
+//      - aecm              : Pointer to the AECM instance.
+//      - far_spectrum      : Absolute value of the farend signal in Q(far_q)
+//      - far_q             : Q-domain of the farend signal
+//      - dfa               : Absolute value of the nearend signal
+//                            (Q[aecm->dfaQDomain])
+//      - mu                : NLMS step size.
+// Input/Output:
+//      - echoEst           : Estimated echo in Q(far_q+RESOLUTION_CHANNEL16).
+//
+void WebRtcAecm_UpdateChannel(AecmCore_t * aecm,
+                              const uint16_t* far_spectrum,
+                              const int16_t far_q,
+                              const uint16_t * const dfa,
+                              const int16_t mu,
+                              int32_t * echoEst);
+
+extern const int16_t WebRtcAecm_kCosTable[];
+extern const int16_t WebRtcAecm_kSinTable[];
+
 ///////////////////////////////////////////////////////////////////////////////
 // Some function pointers, for internal functions shared by ARM NEON and
 // generic C code.
@@ -312,4 +431,20 @@ void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore_t* aecm,
 void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore_t* aecm);
 #endif
 
+#if defined(MIPS32_LE)
+void WebRtcAecm_CalcLinearEnergies_mips(AecmCore_t* aecm,
+                                        const uint16_t* far_spectrum,
+                                        int32_t* echo_est,
+                                        uint32_t* far_energy,
+                                        uint32_t* echo_energy_adapt,
+                                        uint32_t* echo_energy_stored);
+#if defined(MIPS_DSP_R1_LE)
+void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore_t* aecm,
+                                          const uint16_t* far_spectrum,
+                                          int32_t* echo_est);
+
+void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore_t* aecm);
+#endif
+#endif
+
 #endif
diff --git a/webrtc/modules/audio_processing/aecm/aecm_core_c.c b/webrtc/modules/audio_processing/aecm/aecm_core_c.c
new file mode 100644
index 000000000..63d4ac902
--- /dev/null
+++ b/webrtc/modules/audio_processing/aecm/aecm_core_c.c
@@ -0,0 +1,792 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
+#include "webrtc/modules/audio_processing/utility/ring_buffer.h"
+#include "webrtc/system_wrappers/interface/compile_assert_c.h"
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+#include "webrtc/typedefs.h"
+
+// Square root of Hanning window in Q14.
+#if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON)
+// Table is defined in an ARM assembly file.
+extern const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END;
+#else
+static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
+  0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
+  3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
+  6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
+  9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
+  11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
+  13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
+  15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
+  16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
+};
+#endif
+
+#ifdef AECM_WITH_ABS_APPROX
+//Q15 alpha = 0.99439986968132  const Factor for magnitude approximation
+static const uint16_t kAlpha1 = 32584;
+//Q15 beta = 0.12967166976970   const Factor for magnitude approximation
+static const uint16_t kBeta1 = 4249;
+//Q15 alpha = 0.94234827210087  const Factor for magnitude approximation
+static const uint16_t kAlpha2 = 30879;
+//Q15 beta = 0.33787806009150   const Factor for magnitude approximation
+static const uint16_t kBeta2 = 11072;
+//Q15 alpha = 0.82247698684306  const Factor for magnitude approximation
+static const uint16_t kAlpha3 = 26951;
+//Q15 beta = 0.57762063060713   const Factor for magnitude approximation
+static const uint16_t kBeta3 = 18927;
+#endif
+
+static const int16_t kNoiseEstQDomain = 15;
+static const int16_t kNoiseEstIncCount = 5;
+
+static void ComfortNoise(AecmCore_t* aecm,
+                         const uint16_t* dfa,
+                         complex16_t* out,
+                         const int16_t* lambda);
+
+static void WindowAndFFT(AecmCore_t* aecm,
+                          int16_t* fft,
+                          const int16_t* time_signal,
+                          complex16_t* freq_signal,
+                          int time_signal_scaling) {
+  int i = 0;
+
+  // FFT of signal
+  for (i = 0; i < PART_LEN; i++) {
+    // Window time domain signal and insert into real part of
+    // transformation array |fft|
+    fft[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
+        (time_signal[i] << time_signal_scaling),
+        WebRtcAecm_kSqrtHanning[i],
+        14);
+    fft[PART_LEN + i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
+        (time_signal[i + PART_LEN] << time_signal_scaling),
+        WebRtcAecm_kSqrtHanning[PART_LEN - i],
+        14);
+  }
+
+  // Do forward FFT, then take only the first PART_LEN complex samples,
+  // and change signs of the imaginary parts.
+  WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
+  for (i = 0; i < PART_LEN; i++) {
+    freq_signal[i].imag = -freq_signal[i].imag;
+  }
+}
+
+static void InverseFFTAndWindow(AecmCore_t* aecm,
+                                int16_t* fft,
+                                complex16_t* efw,
+                                int16_t* output,
+                                const int16_t* nearendClean)
+{
+  int i, j, outCFFT;
+  int32_t tmp32no1;
+  // Reuse |efw| for the inverse FFT output after transferring
+  // the contents to |fft|.
+  int16_t* ifft_out = (int16_t*)efw;
+
+  // Synthesis
+  for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) {
+    fft[j] = efw[i].real;
+    fft[j + 1] = -efw[i].imag;
+  }
+  fft[0] = efw[0].real;
+  fft[1] = -efw[0].imag;
+
+  fft[PART_LEN2] = efw[PART_LEN].real;
+  fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
+
+  // Inverse FFT. Keep outCFFT to scale the samples in the next block.
+  outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out);
+  for (i = 0; i < PART_LEN; i++) {
+    ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+                    ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14);
+    tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i],
+                                     outCFFT - aecm->dfaCleanQDomain);
+    output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+                                        tmp32no1 + aecm->outBuf[i],
+                                        WEBRTC_SPL_WORD16_MIN);
+
+    tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT(ifft_out[PART_LEN + i],
+                                         WebRtcAecm_kSqrtHanning[PART_LEN - i],
+                                         14);
+    tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1,
+                                    outCFFT - aecm->dfaCleanQDomain);
+    aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+                                                tmp32no1,
+                                                WEBRTC_SPL_WORD16_MIN);
+  }
+
+  // Copy the current block to the old position
+  // (aecm->outBuf is shifted elsewhere)
+  memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
+  memcpy(aecm->dBufNoisy,
+         aecm->dBufNoisy + PART_LEN,
+         sizeof(int16_t) * PART_LEN);
+  if (nearendClean != NULL)
+  {
+    memcpy(aecm->dBufClean,
+           aecm->dBufClean + PART_LEN,
+           sizeof(int16_t) * PART_LEN);
+  }
+}
+
+// Transforms a time domain signal into the frequency domain, outputting the
+// complex valued signal, absolute value and sum of absolute values.
+//
+// time_signal          [in]    Pointer to time domain signal
+// freq_signal_real     [out]   Pointer to real part of frequency domain array
+// freq_signal_imag     [out]   Pointer to imaginary part of frequency domain
+//                              array
+// freq_signal_abs      [out]   Pointer to absolute value of frequency domain
+//                              array
+// freq_signal_sum_abs  [out]   Pointer to the sum of all absolute values in
+//                              the frequency domain array
+// return value                 The Q-domain of current frequency values
+//
+static int TimeToFrequencyDomain(AecmCore_t* aecm,
+                                 const int16_t* time_signal,
+                                 complex16_t* freq_signal,
+                                 uint16_t* freq_signal_abs,
+                                 uint32_t* freq_signal_sum_abs)
+{
+  int i = 0;
+  int time_signal_scaling = 0;
+
+  int32_t tmp32no1 = 0;
+  int32_t tmp32no2 = 0;
+
+  // In fft_buf, +16 for 32-byte alignment.
+  int16_t fft_buf[PART_LEN4 + 16];
+  int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31);
+
+  int16_t tmp16no1;
+#ifndef WEBRTC_ARCH_ARM_V7
+  int16_t tmp16no2;
+#endif
+#ifdef AECM_WITH_ABS_APPROX
+  int16_t max_value = 0;
+  int16_t min_value = 0;
+  uint16_t alpha = 0;
+  uint16_t beta = 0;
+#endif
+
+#ifdef AECM_DYNAMIC_Q
+  tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
+  time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
+#endif
+
+  WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
+
+  // Extract imaginary and real part, calculate the magnitude for
+  // all frequency bins
+  freq_signal[0].imag = 0;
+  freq_signal[PART_LEN].imag = 0;
+  freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);
+  freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(
+                                freq_signal[PART_LEN].real);
+  (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +
+                           (uint32_t)(freq_signal_abs[PART_LEN]);
+
+  for (i = 1; i < PART_LEN; i++)
+  {
+    if (freq_signal[i].real == 0)
+    {
+      freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+    }
+    else if (freq_signal[i].imag == 0)
+    {
+      freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+    }
+    else
+    {
+      // Approximation for magnitude of complex fft output
+      // magn = sqrt(real^2 + imag^2)
+      // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
+      //
+      // The parameters alpha and beta are stored in Q15
+
+#ifdef AECM_WITH_ABS_APPROX
+      tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+      tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+
+      if(tmp16no1 > tmp16no2)
+      {
+        max_value = tmp16no1;
+        min_value = tmp16no2;
+      } else
+      {
+        max_value = tmp16no2;
+        min_value = tmp16no1;
+      }
+
+      // Magnitude in Q(-6)
+      if ((max_value >> 2) > min_value)
+      {
+        alpha = kAlpha1;
+        beta = kBeta1;
+      } else if ((max_value >> 1) > min_value)
+      {
+        alpha = kAlpha2;
+        beta = kBeta2;
+      } else
+      {
+        alpha = kAlpha3;
+        beta = kBeta3;
+      }
+      tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(max_value, alpha, 15);
+      tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(min_value, beta, 15);
+      freq_signal_abs[i] = (uint16_t)tmp16no1 + (uint16_t)tmp16no2;
+#else
+#ifdef WEBRTC_ARCH_ARM_V7
+      __asm __volatile(
+        "smulbb %[tmp32no1], %[real], %[real]\n\t"
+        "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t"
+        :[tmp32no1]"+r"(tmp32no1),
+         [tmp32no2]"=r"(tmp32no2)
+        :[real]"r"(freq_signal[i].real),
+         [imag]"r"(freq_signal[i].imag)
+      );
+#else
+      tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+      tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+      tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1);
+      tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2);
+      tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2);
+#endif // WEBRTC_ARCH_ARM_V7
+      tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
+
+      freq_signal_abs[i] = (uint16_t)tmp32no1;
+#endif // AECM_WITH_ABS_APPROX
+    }
+    (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
+  }
+
+  return time_signal_scaling;
+}
+
+int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
+                            const int16_t * farend,
+                            const int16_t * nearendNoisy,
+                            const int16_t * nearendClean,
+                            int16_t * output)
+{
+  int i;
+
+  uint32_t xfaSum;
+  uint32_t dfaNoisySum;
+  uint32_t dfaCleanSum;
+  uint32_t echoEst32Gained;
+  uint32_t tmpU32;
+
+  int32_t tmp32no1;
+
+  uint16_t xfa[PART_LEN1];
+  uint16_t dfaNoisy[PART_LEN1];
+  uint16_t dfaClean[PART_LEN1];
+  uint16_t* ptrDfaClean = dfaClean;
+  const uint16_t* far_spectrum_ptr = NULL;
+
+  // 32 byte aligned buffers (with +8 or +16).
+  // TODO (kma): define fft with complex16_t.
+  int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.
+  int32_t echoEst32_buf[PART_LEN1 + 8];
+  int32_t dfw_buf[PART_LEN2 + 8];
+  int32_t efw_buf[PART_LEN2 + 8];
+
+  int16_t* fft = (int16_t*) (((uintptr_t) fft_buf + 31) & ~ 31);
+  int32_t* echoEst32 = (int32_t*) (((uintptr_t) echoEst32_buf + 31) & ~ 31);
+  complex16_t* dfw = (complex16_t*) (((uintptr_t) dfw_buf + 31) & ~ 31);
+  complex16_t* efw = (complex16_t*) (((uintptr_t) efw_buf + 31) & ~ 31);
+
+  int16_t hnl[PART_LEN1];
+  int16_t numPosCoef = 0;
+  int16_t nlpGain = ONE_Q14;
+  int delay;
+  int16_t tmp16no1;
+  int16_t tmp16no2;
+  int16_t mu;
+  int16_t supGain;
+  int16_t zeros32, zeros16;
+  int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
+  int far_q;
+  int16_t resolutionDiff, qDomainDiff;
+
+  const int kMinPrefBand = 4;
+  const int kMaxPrefBand = 24;
+  int32_t avgHnl32 = 0;
+
+  // Determine startup state. There are three states:
+  // (0) the first CONV_LEN blocks
+  // (1) another CONV_LEN blocks
+  // (2) the rest
+
+  if (aecm->startupState < 2)
+  {
+    aecm->startupState = (aecm->totCount >= CONV_LEN) +
+                         (aecm->totCount >= CONV_LEN2);
+  }
+  // END: Determine startup state
+
+  // Buffer near and far end signals
+  memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
+  memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN);
+  if (nearendClean != NULL)
+  {
+    memcpy(aecm->dBufClean + PART_LEN,
+           nearendClean,
+           sizeof(int16_t) * PART_LEN);
+  }
+
+  // Transform far end signal from time domain to frequency domain.
+  far_q = TimeToFrequencyDomain(aecm,
+                                aecm->xBuf,
+                                dfw,
+                                xfa,
+                                &xfaSum);
+
+  // Transform noisy near end signal from time domain to frequency domain.
+  zerosDBufNoisy = TimeToFrequencyDomain(aecm,
+                                         aecm->dBufNoisy,
+                                         dfw,
+                                         dfaNoisy,
+                                         &dfaNoisySum);
+  aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
+  aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
+
+
+  if (nearendClean == NULL)
+  {
+    ptrDfaClean = dfaNoisy;
+    aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
+    aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
+    dfaCleanSum = dfaNoisySum;
+  } else
+  {
+    // Transform clean near end signal from time domain to frequency domain.
+    zerosDBufClean = TimeToFrequencyDomain(aecm,
+                                           aecm->dBufClean,
+                                           dfw,
+                                           dfaClean,
+                                           &dfaCleanSum);
+    aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
+    aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
+  }
+
+  // Get the delay
+  // Save far-end history and estimate delay
+  WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);
+  if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend,
+                               xfa,
+                               PART_LEN1,
+                               far_q) == -1) {
+    return -1;
+  }
+  delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,
+                                          dfaNoisy,
+                                          PART_LEN1,
+                                          zerosDBufNoisy);
+  if (delay == -1)
+  {
+    return -1;
+  }
+  else if (delay == -2)
+  {
+    // If the delay is unknown, we assume zero.
+    // NOTE: this will have to be adjusted if we ever add lookahead.
+    delay = 0;
+  }
+
+  if (aecm->fixedDelay >= 0)
+  {
+    // Use fixed delay
+    delay = aecm->fixedDelay;
+  }
+
+  // Get aligned far end spectrum
+  far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);
+  zerosXBuf = (int16_t) far_q;
+  if (far_spectrum_ptr == NULL)
+  {
+    return -1;
+  }
+
+  // Calculate log(energy) and update energy threshold levels
+  WebRtcAecm_CalcEnergies(aecm,
+                          far_spectrum_ptr,
+                          zerosXBuf,
+                          dfaNoisySum,
+                          echoEst32);
+
+  // Calculate stepsize
+  mu = WebRtcAecm_CalcStepSize(aecm);
+
+  // Update counters
+  aecm->totCount++;
+
+  // This is the channel estimation algorithm.
+  // It is base on NLMS but has a variable step length,
+  // which was calculated above.
+  WebRtcAecm_UpdateChannel(aecm,
+                           far_spectrum_ptr,
+                           zerosXBuf,
+                           dfaNoisy,
+                           mu,
+                           echoEst32);
+  supGain = WebRtcAecm_CalcSuppressionGain(aecm);
+
+
+  // Calculate Wiener filter hnl[]
+  for (i = 0; i < PART_LEN1; i++)
+  {
+    // Far end signal through channel estimate in Q8
+    // How much can we shift right to preserve resolution
+    tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
+    aecm->echoFilt[i] += WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32no1,
+                                                                    50), 8);
+
+    zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
+    zeros16 = WebRtcSpl_NormW16(supGain) + 1;
+    if (zeros32 + zeros16 > 16)
+    {
+      // Multiplication is safe
+      // Result in
+      // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+
+      //   aecm->xfaQDomainBuf[diff])
+      echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
+                                              (uint16_t)supGain);
+      resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
+      resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
+    } else
+    {
+      tmp16no1 = 17 - zeros32 - zeros16;
+      resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 -
+                       RESOLUTION_SUPGAIN;
+      resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
+      if (zeros32 > tmp16no1)
+      {
+        echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
+                                                (uint16_t)WEBRTC_SPL_RSHIFT_W16(
+                                                  supGain,
+                                                  tmp16no1)
+                                                );
+      } else
+      {
+        // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
+        echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)WEBRTC_SPL_RSHIFT_W32(
+                                                  aecm->echoFilt[i],
+                                                  tmp16no1),
+                                                (uint16_t)supGain);
+      }
+    }
+
+    zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
+    if ((zeros16 < (aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld))
+        & (aecm->nearFilt[i]))
+    {
+      tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], zeros16);
+      qDomainDiff = zeros16 - aecm->dfaCleanQDomain + aecm->dfaCleanQDomainOld;
+    } else
+    {
+      tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i],
+                                      aecm->dfaCleanQDomain -
+                                      aecm->dfaCleanQDomainOld);
+      qDomainDiff = 0;
+    }
+    tmp16no2 = WEBRTC_SPL_SHIFT_W16(ptrDfaClean[i], qDomainDiff);
+    tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
+    tmp16no2 = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 4);
+    tmp16no2 += tmp16no1;
+    zeros16 = WebRtcSpl_NormW16(tmp16no2);
+    if ((tmp16no2) & (-qDomainDiff > zeros16))
+    {
+      aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
+    } else
+    {
+      aecm->nearFilt[i] = WEBRTC_SPL_SHIFT_W16(tmp16no2, -qDomainDiff);
+    }
+
+    // Wiener filter coefficients, resulting hnl in Q14
+    if (echoEst32Gained == 0)
+    {
+      hnl[i] = ONE_Q14;
+    } else if (aecm->nearFilt[i] == 0)
+    {
+      hnl[i] = 0;
+    } else
+    {
+      // Multiply the suppression gain
+      // Rounding
+      echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
+      tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained,
+                                   (uint16_t)aecm->nearFilt[i]);
+
+      // Current resolution is
+      // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN- max(0,17-zeros16- zeros32))
+      // Make sure we are in Q14
+      tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
+      if (tmp32no1 > ONE_Q14)
+      {
+        hnl[i] = 0;
+      } else if (tmp32no1 < 0)
+      {
+        hnl[i] = ONE_Q14;
+      } else
+      {
+        // 1-echoEst/dfa
+        hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
+        if (hnl[i] < 0)
+        {
+          hnl[i] = 0;
+        }
+      }
+    }
+    if (hnl[i])
+    {
+      numPosCoef++;
+    }
+  }
+  // Only in wideband. Prevent the gain in upper band from being larger than
+  // in lower band.
+  if (aecm->mult == 2)
+  {
+    // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
+    //               speech distortion in double-talk.
+    for (i = 0; i < PART_LEN1; i++)
+    {
+      hnl[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(hnl[i], hnl[i], 14);
+    }
+
+    for (i = kMinPrefBand; i <= kMaxPrefBand; i++)
+    {
+      avgHnl32 += (int32_t)hnl[i];
+    }
+    assert(kMaxPrefBand - kMinPrefBand + 1 > 0);
+    avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
+
+    for (i = kMaxPrefBand; i < PART_LEN1; i++)
+    {
+      if (hnl[i] > (int16_t)avgHnl32)
+      {
+        hnl[i] = (int16_t)avgHnl32;
+      }
+    }
+  }
+
+  // Calculate NLP gain, result is in Q14
+  if (aecm->nlpFlag)
+  {
+    for (i = 0; i < PART_LEN1; i++)
+    {
+      // Truncate values close to zero and one.
+      if (hnl[i] > NLP_COMP_HIGH)
+      {
+        hnl[i] = ONE_Q14;
+      } else if (hnl[i] < NLP_COMP_LOW)
+      {
+        hnl[i] = 0;
+      }
+
+      // Remove outliers
+      if (numPosCoef < 3)
+      {
+        nlpGain = 0;
+      } else
+      {
+        nlpGain = ONE_Q14;
+      }
+
+      // NLP
+      if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14))
+      {
+        hnl[i] = ONE_Q14;
+      } else
+      {
+        hnl[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(hnl[i], nlpGain, 14);
+      }
+
+      // multiply with Wiener coefficients
+      efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
+                                                                   hnl[i], 14));
+      efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
+                                                                   hnl[i], 14));
+    }
+  }
+  else
+  {
+    // multiply with Wiener coefficients
+    for (i = 0; i < PART_LEN1; i++)
+    {
+      efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
+                                                                   hnl[i], 14));
+      efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
+                                                                   hnl[i], 14));
+    }
+  }
+
+  if (aecm->cngMode == AecmTrue)
+  {
+    ComfortNoise(aecm, ptrDfaClean, efw, hnl);
+  }
+
+  InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
+
+  return 0;
+}
+
+
+static void ComfortNoise(AecmCore_t* aecm,
+                         const uint16_t* dfa,
+                         complex16_t* out,
+                         const int16_t* lambda)
+{
+  int16_t i;
+  int16_t tmp16;
+  int32_t tmp32;
+
+  int16_t randW16[PART_LEN];
+  int16_t uReal[PART_LEN1];
+  int16_t uImag[PART_LEN1];
+  int32_t outLShift32;
+  int16_t noiseRShift16[PART_LEN1];
+
+  int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
+  int16_t minTrackShift;
+
+  assert(shiftFromNearToNoise >= 0);
+  assert(shiftFromNearToNoise < 16);
+
+  if (aecm->noiseEstCtr < 100)
+  {
+    // Track the minimum more quickly initially.
+    aecm->noiseEstCtr++;
+    minTrackShift = 6;
+  } else
+  {
+    minTrackShift = 9;
+  }
+
+  // Estimate noise power.
+  for (i = 0; i < PART_LEN1; i++)
+  {
+    // Shift to the noise domain.
+    tmp32 = (int32_t)dfa[i];
+    outLShift32 = WEBRTC_SPL_LSHIFT_W32(tmp32, shiftFromNearToNoise);
+
+    if (outLShift32 < aecm->noiseEst[i])
+    {
+      // Reset "too low" counter
+      aecm->noiseEstTooLowCtr[i] = 0;
+      // Track the minimum.
+      if (aecm->noiseEst[i] < (1 << minTrackShift))
+      {
+        // For small values, decrease noiseEst[i] every
+        // |kNoiseEstIncCount| block. The regular approach below can not
+        // go further down due to truncation.
+        aecm->noiseEstTooHighCtr[i]++;
+        if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount)
+        {
+          aecm->noiseEst[i]--;
+          aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter
+        }
+      }
+      else
+      {
+        aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32)
+                              >> minTrackShift);
+      }
+    } else
+    {
+      // Reset "too high" counter
+      aecm->noiseEstTooHighCtr[i] = 0;
+      // Ramp slowly upwards until we hit the minimum again.
+      if ((aecm->noiseEst[i] >> 19) > 0)
+      {
+        // Avoid overflow.
+        // Multiplication with 2049 will cause wrap around. Scale
+        // down first and then multiply
+        aecm->noiseEst[i] >>= 11;
+        aecm->noiseEst[i] *= 2049;
+      }
+      else if ((aecm->noiseEst[i] >> 11) > 0)
+      {
+        // Large enough for relative increase
+        aecm->noiseEst[i] *= 2049;
+        aecm->noiseEst[i] >>= 11;
+      }
+      else
+      {
+        // Make incremental increases based on size every
+        // |kNoiseEstIncCount| block
+        aecm->noiseEstTooLowCtr[i]++;
+        if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount)
+        {
+          aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1;
+          aecm->noiseEstTooLowCtr[i] = 0; // Reset counter
+        }
+      }
+    }
+  }
+
+  for (i = 0; i < PART_LEN1; i++)
+  {
+    tmp32 = WEBRTC_SPL_RSHIFT_W32(aecm->noiseEst[i], shiftFromNearToNoise);
+    if (tmp32 > 32767)
+    {
+      tmp32 = 32767;
+      aecm->noiseEst[i] = WEBRTC_SPL_LSHIFT_W32(tmp32, shiftFromNearToNoise);
+    }
+    noiseRShift16[i] = (int16_t)tmp32;
+
+    tmp16 = ONE_Q14 - lambda[i];
+    noiseRShift16[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16,
+                                                          noiseRShift16[i],
+                                                          14);
+  }
+
+  // Generate a uniform random array on [0 2^15-1].
+  WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
+
+  // Generate noise according to estimated energy.
+  uReal[0] = 0; // Reject LF noise.
+  uImag[0] = 0;
+  for (i = 1; i < PART_LEN1; i++)
+  {
+    // Get a random index for the cos and sin tables over [0 359].
+    tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(359, randW16[i - 1], 15);
+
+    // Tables are in Q13.
+    uReal[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(noiseRShift16[i],
+                                                  WebRtcAecm_kCosTable[tmp16],
+                                                  13);
+    uImag[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(-noiseRShift16[i],
+                                                  WebRtcAecm_kSinTable[tmp16],
+                                                  13);
+  }
+  uImag[PART_LEN] = 0;
+
+  for (i = 0; i < PART_LEN1; i++)
+  {
+    out[i].real = WEBRTC_SPL_ADD_SAT_W16(out[i].real, uReal[i]);
+    out[i].imag = WEBRTC_SPL_ADD_SAT_W16(out[i].imag, uImag[i]);
+  }
+}
+
diff --git a/webrtc/modules/audio_processing/aecm/aecm_core_mips.c b/webrtc/modules/audio_processing/aecm/aecm_core_mips.c
new file mode 100644
index 000000000..6a231b384
--- /dev/null
+++ b/webrtc/modules/audio_processing/aecm/aecm_core_mips.c
@@ -0,0 +1,1571 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
+
+#include <assert.h>
+
+#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
+
+static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
+  0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
+  3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
+  6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
+  9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
+  11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
+  13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
+  15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
+  16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
+};
+
+static const int16_t kNoiseEstQDomain = 15;
+static const int16_t kNoiseEstIncCount = 5;
+
+static int16_t coefTable[] = {
+   0,   4, 256, 260, 128, 132, 384, 388,
+  64,  68, 320, 324, 192, 196, 448, 452,
+  32,  36, 288, 292, 160, 164, 416, 420,
+  96, 100, 352, 356, 224, 228, 480, 484,
+  16,  20, 272, 276, 144, 148, 400, 404,
+  80,  84, 336, 340, 208, 212, 464, 468,
+  48,  52, 304, 308, 176, 180, 432, 436,
+ 112, 116, 368, 372, 240, 244, 496, 500,
+   8,  12, 264, 268, 136, 140, 392, 396,
+  72,  76, 328, 332, 200, 204, 456, 460,
+  40,  44, 296, 300, 168, 172, 424, 428,
+ 104, 108, 360, 364, 232, 236, 488, 492,
+  24,  28, 280, 284, 152, 156, 408, 412,
+  88,  92, 344, 348, 216, 220, 472, 476,
+  56,  60, 312, 316, 184, 188, 440, 444,
+ 120, 124, 376, 380, 248, 252, 504, 508
+};
+
+static int16_t coefTable_ifft[] = {
+    0, 512, 256, 508, 128, 252, 384, 380,
+   64, 124, 320, 444, 192, 188, 448, 316,
+   32,  60, 288, 476, 160, 220, 416, 348,
+   96,  92, 352, 412, 224, 156, 480, 284,
+   16,  28, 272, 492, 144, 236, 400, 364,
+   80, 108, 336, 428, 208, 172, 464, 300,
+   48,  44, 304, 460, 176, 204, 432, 332,
+  112,  76, 368, 396, 240, 140, 496, 268,
+    8,  12, 264, 500, 136, 244, 392, 372,
+   72, 116, 328, 436, 200, 180, 456, 308,
+   40,  52, 296, 468, 168, 212, 424, 340,
+  104,  84, 360, 404, 232, 148, 488, 276,
+   24,  20, 280, 484, 152, 228, 408, 356,
+   88, 100, 344, 420, 216, 164, 472, 292,
+   56,  36, 312, 452, 184, 196, 440, 324,
+  120,  68, 376, 388, 248, 132, 504, 260
+};
+
+static void ComfortNoise(AecmCore_t* aecm,
+                         const uint16_t* dfa,
+                         complex16_t* out,
+                         const int16_t* lambda);
+
+static void WindowAndFFT(AecmCore_t* aecm,
+                         int16_t* fft,
+                         const int16_t* time_signal,
+                         complex16_t* freq_signal,
+                         int time_signal_scaling) {
+  int i, j;
+  int32_t tmp1, tmp2, tmp3, tmp4;
+  int16_t* pfrfi;
+  complex16_t* pfreq_signal;
+  int16_t  f_coef, s_coef;
+  int32_t load_ptr, store_ptr1, store_ptr2, shift, shift1;
+  int32_t hann, hann1, coefs;
+
+  memset(fft, 0, sizeof(int16_t) * PART_LEN4);
+
+  // FFT of signal
+  __asm __volatile (
+    ".set        push                                                    \n\t"
+    ".set        noreorder                                               \n\t"
+    "addiu       %[shift],          %[time_signal_scaling], -14          \n\t"
+    "addiu       %[i],              $zero,                  64           \n\t"
+    "addiu       %[load_ptr],       %[time_signal],         0            \n\t"
+    "addiu       %[hann],           %[hanning],             0            \n\t"
+    "addiu       %[hann1],          %[hanning],             128          \n\t"
+    "addiu       %[coefs],          %[coefTable],           0            \n\t"
+    "bltz        %[shift],          2f                                   \n\t"
+    " negu       %[shift1],         %[shift]                             \n\t"
+   "1:                                                                   \n\t"
+    "lh          %[tmp1],           0(%[load_ptr])                       \n\t"
+    "lh          %[tmp2],           0(%[hann])                           \n\t"
+    "lh          %[tmp3],           128(%[load_ptr])                     \n\t"
+    "lh          %[tmp4],           0(%[hann1])                          \n\t"
+    "addiu       %[i],              %[i],                   -1           \n\t"
+    "mul         %[tmp1],           %[tmp1],                %[tmp2]      \n\t"
+    "mul         %[tmp3],           %[tmp3],                %[tmp4]      \n\t"
+    "lh          %[f_coef],         0(%[coefs])                          \n\t"
+    "lh          %[s_coef],         2(%[coefs])                          \n\t"
+    "addiu       %[load_ptr],       %[load_ptr],            2            \n\t"
+    "addiu       %[hann],           %[hann],                2            \n\t"
+    "addiu       %[hann1],          %[hann1],               -2           \n\t"
+    "addu        %[store_ptr1],     %[fft],                 %[f_coef]    \n\t"
+    "addu        %[store_ptr2],     %[fft],                 %[s_coef]    \n\t"
+    "sllv        %[tmp1],           %[tmp1],                %[shift]     \n\t"
+    "sllv        %[tmp3],           %[tmp3],                %[shift]     \n\t"
+    "sh          %[tmp1],           0(%[store_ptr1])                     \n\t"
+    "sh          %[tmp3],           0(%[store_ptr2])                     \n\t"
+    "bgtz        %[i],              1b                                   \n\t"
+    " addiu      %[coefs],          %[coefs],               4            \n\t"
+    "b           3f                                                      \n\t"
+    " nop                                                                \n\t"
+   "2:                                                                   \n\t"
+    "lh          %[tmp1],           0(%[load_ptr])                       \n\t"
+    "lh          %[tmp2],           0(%[hann])                           \n\t"
+    "lh          %[tmp3],           128(%[load_ptr])                     \n\t"
+    "lh          %[tmp4],           0(%[hann1])                          \n\t"
+    "addiu       %[i],              %[i],                   -1           \n\t"
+    "mul         %[tmp1],           %[tmp1],                %[tmp2]      \n\t"
+    "mul         %[tmp3],           %[tmp3],                %[tmp4]      \n\t"
+    "lh          %[f_coef],         0(%[coefs])                          \n\t"
+    "lh          %[s_coef],         2(%[coefs])                          \n\t"
+    "addiu       %[load_ptr],       %[load_ptr],            2            \n\t"
+    "addiu       %[hann],           %[hann],                2            \n\t"
+    "addiu       %[hann1],          %[hann1],               -2           \n\t"
+    "addu        %[store_ptr1],     %[fft],                 %[f_coef]    \n\t"
+    "addu        %[store_ptr2],     %[fft],                 %[s_coef]    \n\t"
+    "srav        %[tmp1],           %[tmp1],                %[shift1]    \n\t"
+    "srav        %[tmp3],           %[tmp3],                %[shift1]    \n\t"
+    "sh          %[tmp1],           0(%[store_ptr1])                     \n\t"
+    "sh          %[tmp3],           0(%[store_ptr2])                     \n\t"
+    "bgtz        %[i],              2b                                   \n\t"
+    " addiu      %[coefs],          %[coefs],               4            \n\t"
+   "3:                                                                   \n\t"
+    ".set        pop                                                     \n\t"
+    : [load_ptr] "=&r" (load_ptr), [shift] "=&r" (shift), [hann] "=&r" (hann),
+      [hann1] "=&r" (hann1), [shift1] "=&r" (shift1), [coefs] "=&r" (coefs),
+      [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+      [tmp4] "=&r" (tmp4), [i] "=&r" (i), [f_coef] "=&r" (f_coef),
+      [s_coef] "=&r" (s_coef), [store_ptr1] "=&r" (store_ptr1),
+      [store_ptr2] "=&r" (store_ptr2)
+    : [time_signal] "r" (time_signal), [coefTable] "r" (coefTable),
+      [time_signal_scaling] "r" (time_signal_scaling),
+      [hanning] "r" (WebRtcAecm_kSqrtHanning), [fft] "r" (fft)
+    : "memory", "hi", "lo"
+  );
+
+  WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1);
+  pfrfi = fft;
+  pfreq_signal = freq_signal;
+
+  __asm __volatile (
+    ".set        push                                                     \n\t"
+    ".set        noreorder                                                \n\t"
+    "addiu       %[j],              $zero,                 128            \n\t"
+   "1:                                                                    \n\t"
+    "lh          %[tmp1],           0(%[pfrfi])                           \n\t"
+    "lh          %[tmp2],           2(%[pfrfi])                           \n\t"
+    "lh          %[tmp3],           4(%[pfrfi])                           \n\t"
+    "lh          %[tmp4],           6(%[pfrfi])                           \n\t"
+    "subu        %[tmp2],           $zero,                 %[tmp2]        \n\t"
+    "sh          %[tmp1],           0(%[pfreq_signal])                    \n\t"
+    "sh          %[tmp2],           2(%[pfreq_signal])                    \n\t"
+    "subu        %[tmp4],           $zero,                 %[tmp4]        \n\t"
+    "sh          %[tmp3],           4(%[pfreq_signal])                    \n\t"
+    "sh          %[tmp4],           6(%[pfreq_signal])                    \n\t"
+    "lh          %[tmp1],           8(%[pfrfi])                           \n\t"
+    "lh          %[tmp2],           10(%[pfrfi])                          \n\t"
+    "lh          %[tmp3],           12(%[pfrfi])                          \n\t"
+    "lh          %[tmp4],           14(%[pfrfi])                          \n\t"
+    "addiu       %[j],              %[j],                  -8             \n\t"
+    "subu        %[tmp2],           $zero,                 %[tmp2]        \n\t"
+    "sh          %[tmp1],           8(%[pfreq_signal])                    \n\t"
+    "sh          %[tmp2],           10(%[pfreq_signal])                   \n\t"
+    "subu        %[tmp4],           $zero,                 %[tmp4]        \n\t"
+    "sh          %[tmp3],           12(%[pfreq_signal])                   \n\t"
+    "sh          %[tmp4],           14(%[pfreq_signal])                   \n\t"
+    "addiu       %[pfreq_signal],   %[pfreq_signal],       16             \n\t"
+    "bgtz        %[j],              1b                                    \n\t"
+    " addiu      %[pfrfi],          %[pfrfi],              16             \n\t"
+    ".set        pop                                                      \n\t"
+    : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+      [j] "=&r" (j), [pfrfi] "+r" (pfrfi), [pfreq_signal] "+r" (pfreq_signal),
+      [tmp4] "=&r" (tmp4)
+    :
+    : "memory"
+  );
+}
+
+static void InverseFFTAndWindow(AecmCore_t* aecm,
+                                int16_t* fft,
+                                complex16_t* efw,
+                                int16_t* output,
+                                const int16_t* nearendClean) {
+  int i, outCFFT;
+  int32_t tmp1, tmp2, tmp3, tmp4, tmp_re, tmp_im;
+  int16_t* pcoefTable_ifft = coefTable_ifft;
+  int16_t* pfft = fft;
+  int16_t* ppfft = fft;
+  complex16_t* pefw = efw;
+  int32_t out_aecm;
+  int16_t* paecm_buf = aecm->outBuf;
+  const int16_t* p_kSqrtHanning = WebRtcAecm_kSqrtHanning;
+  const int16_t* pp_kSqrtHanning = &WebRtcAecm_kSqrtHanning[PART_LEN];
+  int16_t* output1 = output;
+
+  __asm __volatile (
+    ".set      push                                                        \n\t"
+    ".set      noreorder                                                   \n\t"
+    "addiu     %[i],                $zero,                   64            \n\t"
+   "1:                                                                     \n\t"
+    "lh        %[tmp1],             0(%[pcoefTable_ifft])                  \n\t"
+    "lh        %[tmp2],             2(%[pcoefTable_ifft])                  \n\t"
+    "lh        %[tmp_re],           0(%[pefw])                             \n\t"
+    "lh        %[tmp_im],           2(%[pefw])                             \n\t"
+    "addu      %[pfft],             %[fft],                  %[tmp2]       \n\t"
+    "sh        %[tmp_re],           0(%[pfft])                             \n\t"
+    "sh        %[tmp_im],           2(%[pfft])                             \n\t"
+    "addu      %[pfft],             %[fft],                  %[tmp1]       \n\t"
+    "sh        %[tmp_re],           0(%[pfft])                             \n\t"
+    "subu      %[tmp_im],           $zero,                   %[tmp_im]     \n\t"
+    "sh        %[tmp_im],           2(%[pfft])                             \n\t"
+    "lh        %[tmp1],             4(%[pcoefTable_ifft])                  \n\t"
+    "lh        %[tmp2],             6(%[pcoefTable_ifft])                  \n\t"
+    "lh        %[tmp_re],           4(%[pefw])                             \n\t"
+    "lh        %[tmp_im],           6(%[pefw])                             \n\t"
+    "addu      %[pfft],             %[fft],                  %[tmp2]       \n\t"
+    "sh        %[tmp_re],           0(%[pfft])                             \n\t"
+    "sh        %[tmp_im],           2(%[pfft])                             \n\t"
+    "addu      %[pfft],             %[fft],                  %[tmp1]       \n\t"
+    "sh        %[tmp_re],           0(%[pfft])                             \n\t"
+    "subu      %[tmp_im],           $zero,                   %[tmp_im]     \n\t"
+    "sh        %[tmp_im],           2(%[pfft])                             \n\t"
+    "lh        %[tmp1],             8(%[pcoefTable_ifft])                  \n\t"
+    "lh        %[tmp2],             10(%[pcoefTable_ifft])                 \n\t"
+    "lh        %[tmp_re],           8(%[pefw])                             \n\t"
+    "lh        %[tmp_im],           10(%[pefw])                            \n\t"
+    "addu      %[pfft],             %[fft],                  %[tmp2]       \n\t"
+    "sh        %[tmp_re],           0(%[pfft])                             \n\t"
+    "sh        %[tmp_im],           2(%[pfft])                             \n\t"
+    "addu      %[pfft],             %[fft],                  %[tmp1]       \n\t"
+    "sh        %[tmp_re],           0(%[pfft])                             \n\t"
+    "subu      %[tmp_im],           $zero,                   %[tmp_im]     \n\t"
+    "sh        %[tmp_im],           2(%[pfft])                             \n\t"
+    "lh        %[tmp1],             12(%[pcoefTable_ifft])                 \n\t"
+    "lh        %[tmp2],             14(%[pcoefTable_ifft])                 \n\t"
+    "lh        %[tmp_re],           12(%[pefw])                            \n\t"
+    "lh        %[tmp_im],           14(%[pefw])                            \n\t"
+    "addu      %[pfft],             %[fft],                  %[tmp2]       \n\t"
+    "sh        %[tmp_re],           0(%[pfft])                             \n\t"
+    "sh        %[tmp_im],           2(%[pfft])                             \n\t"
+    "addu      %[pfft],             %[fft],                  %[tmp1]       \n\t"
+    "sh        %[tmp_re],           0(%[pfft])                             \n\t"
+    "subu      %[tmp_im],           $zero,                   %[tmp_im]     \n\t"
+    "sh        %[tmp_im],           2(%[pfft])                             \n\t"
+    "addiu     %[pcoefTable_ifft],  %[pcoefTable_ifft],      16            \n\t"
+    "addiu     %[i],                %[i],                    -4            \n\t"
+    "bgtz      %[i],                1b                                     \n\t"
+    " addiu    %[pefw],             %[pefw],                 16            \n\t"
+    ".set      pop                                                         \n\t"
+    : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),
+      [i] "=&r" (i), [tmp_re] "=&r" (tmp_re), [tmp_im] "=&r" (tmp_im),
+      [pefw] "+r" (pefw), [pcoefTable_ifft] "+r" (pcoefTable_ifft),
+      [fft] "+r" (fft)
+    :
+    : "memory"
+  );
+
+  fft[2] = efw[PART_LEN].real;
+  fft[3] = -efw[PART_LEN].imag;
+
+  outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1);
+  pfft = fft;
+
+  __asm __volatile (
+    ".set       push                                               \n\t"
+    ".set       noreorder                                          \n\t"
+    "addiu      %[i],            $zero,               128          \n\t"
+   "1:                                                             \n\t"
+    "lh         %[tmp1],         0(%[ppfft])                       \n\t"
+    "lh         %[tmp2],         4(%[ppfft])                       \n\t"
+    "lh         %[tmp3],         8(%[ppfft])                       \n\t"
+    "lh         %[tmp4],         12(%[ppfft])                      \n\t"
+    "addiu      %[i],            %[i],                -4           \n\t"
+    "sh         %[tmp1],         0(%[pfft])                        \n\t"
+    "sh         %[tmp2],         2(%[pfft])                        \n\t"
+    "sh         %[tmp3],         4(%[pfft])                        \n\t"
+    "sh         %[tmp4],         6(%[pfft])                        \n\t"
+    "addiu      %[ppfft],        %[ppfft],            16           \n\t"
+    "bgtz       %[i],            1b                                \n\t"
+    " addiu     %[pfft],         %[pfft],             8            \n\t"
+    ".set       pop                                                \n\t"
+    : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),
+      [i] "=&r" (i), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),
+      [ppfft] "+r" (ppfft)
+    :
+    : "memory"
+  );
+
+  pfft = fft;
+  out_aecm = (int32_t)(outCFFT - aecm->dfaCleanQDomain);
+
+  __asm __volatile (
+    ".set       push                                                       \n\t"
+    ".set       noreorder                                                  \n\t"
+    "addiu      %[i],                $zero,                  64            \n\t"
+   "11:                                                                    \n\t"
+    "lh         %[tmp1],             0(%[pfft])                            \n\t"
+    "lh         %[tmp2],             0(%[p_kSqrtHanning])                  \n\t"
+    "addiu      %[i],                %[i],                   -2            \n\t"
+    "mul        %[tmp1],             %[tmp1],                %[tmp2]       \n\t"
+    "lh         %[tmp3],             2(%[pfft])                            \n\t"
+    "lh         %[tmp4],             2(%[p_kSqrtHanning])                  \n\t"
+    "mul        %[tmp3],             %[tmp3],                %[tmp4]       \n\t"
+    "addiu      %[tmp1],             %[tmp1],                8192          \n\t"
+    "sra        %[tmp1],             %[tmp1],                14            \n\t"
+    "addiu      %[tmp3],             %[tmp3],                8192          \n\t"
+    "sra        %[tmp3],             %[tmp3],                14            \n\t"
+    "bgez       %[out_aecm],         1f                                    \n\t"
+    " negu      %[tmp2],             %[out_aecm]                           \n\t"
+    "srav       %[tmp1],             %[tmp1],                %[tmp2]       \n\t"
+    "b          2f                                                         \n\t"
+    " srav      %[tmp3],             %[tmp3],                %[tmp2]       \n\t"
+   "1:                                                                     \n\t"
+    "sllv       %[tmp1],             %[tmp1],                %[out_aecm]   \n\t"
+    "sllv       %[tmp3],             %[tmp3],                %[out_aecm]   \n\t"
+   "2:                                                                     \n\t"
+    "lh         %[tmp4],             0(%[paecm_buf])                       \n\t"
+    "lh         %[tmp2],             2(%[paecm_buf])                       \n\t"
+    "addu       %[tmp3],             %[tmp3],                %[tmp2]       \n\t"
+    "addu       %[tmp1],             %[tmp1],                %[tmp4]       \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "shll_s.w   %[tmp1],             %[tmp1],                16            \n\t"
+    "sra        %[tmp1],             %[tmp1],                16            \n\t"
+    "shll_s.w   %[tmp3],             %[tmp3],                16            \n\t"
+    "sra        %[tmp3],             %[tmp3],                16            \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "sra        %[tmp4],             %[tmp1],                31            \n\t"
+    "sra        %[tmp2],             %[tmp1],                15            \n\t"
+    "beq        %[tmp4],             %[tmp2],                3f            \n\t"
+    " ori       %[tmp2],             $zero,                  0x7fff        \n\t"
+    "xor        %[tmp1],             %[tmp2],                %[tmp4]       \n\t"
+   "3:                                                                     \n\t"
+    "sra        %[tmp2],             %[tmp3],                31            \n\t"
+    "sra        %[tmp4],             %[tmp3],                15            \n\t"
+    "beq        %[tmp2],             %[tmp4],                4f            \n\t"
+    " ori       %[tmp4],             $zero,                  0x7fff        \n\t"
+    "xor        %[tmp3],             %[tmp4],                %[tmp2]       \n\t"
+   "4:                                                                     \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+    "sh         %[tmp1],             0(%[pfft])                            \n\t"
+    "sh         %[tmp1],             0(%[output1])                         \n\t"
+    "sh         %[tmp3],             2(%[pfft])                            \n\t"
+    "sh         %[tmp3],             2(%[output1])                         \n\t"
+    "lh         %[tmp1],             128(%[pfft])                          \n\t"
+    "lh         %[tmp2],             0(%[pp_kSqrtHanning])                 \n\t"
+    "mul        %[tmp1],             %[tmp1],                %[tmp2]       \n\t"
+    "lh         %[tmp3],             130(%[pfft])                          \n\t"
+    "lh         %[tmp4],             -2(%[pp_kSqrtHanning])                \n\t"
+    "mul        %[tmp3],             %[tmp3],                %[tmp4]       \n\t"
+    "sra        %[tmp1],             %[tmp1],                14            \n\t"
+    "sra        %[tmp3],             %[tmp3],                14            \n\t"
+    "bgez       %[out_aecm],         5f                                    \n\t"
+    " negu      %[tmp2],             %[out_aecm]                           \n\t"
+    "srav       %[tmp3],             %[tmp3],                %[tmp2]       \n\t"
+    "b          6f                                                         \n\t"
+    " srav      %[tmp1],             %[tmp1],                %[tmp2]       \n\t"
+   "5:                                                                     \n\t"
+    "sllv       %[tmp1],             %[tmp1],                %[out_aecm]   \n\t"
+    "sllv       %[tmp3],             %[tmp3],                %[out_aecm]   \n\t"
+   "6:                                                                     \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "shll_s.w   %[tmp1],             %[tmp1],                16            \n\t"
+    "sra        %[tmp1],             %[tmp1],                16            \n\t"
+    "shll_s.w   %[tmp3],             %[tmp3],                16            \n\t"
+    "sra        %[tmp3],             %[tmp3],                16            \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "sra        %[tmp4],             %[tmp1],                31            \n\t"
+    "sra        %[tmp2],             %[tmp1],                15            \n\t"
+    "beq        %[tmp4],             %[tmp2],                7f            \n\t"
+    " ori       %[tmp2],             $zero,                  0x7fff        \n\t"
+    "xor        %[tmp1],             %[tmp2],                %[tmp4]       \n\t"
+   "7:                                                                     \n\t"
+    "sra        %[tmp2],             %[tmp3],                31            \n\t"
+    "sra        %[tmp4],             %[tmp3],                15            \n\t"
+    "beq        %[tmp2],             %[tmp4],                8f            \n\t"
+    " ori       %[tmp4],             $zero,                  0x7fff        \n\t"
+    "xor        %[tmp3],             %[tmp4],                %[tmp2]       \n\t"
+   "8:                                                                     \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+    "sh         %[tmp1],             0(%[paecm_buf])                       \n\t"
+    "sh         %[tmp3],             2(%[paecm_buf])                       \n\t"
+    "addiu      %[output1],          %[output1],             4             \n\t"
+    "addiu      %[paecm_buf],        %[paecm_buf],           4             \n\t"
+    "addiu      %[pfft],             %[pfft],                4             \n\t"
+    "addiu      %[p_kSqrtHanning],   %[p_kSqrtHanning],      4             \n\t"
+    "bgtz       %[i],                11b                                   \n\t"
+    " addiu     %[pp_kSqrtHanning],  %[pp_kSqrtHanning],     -4            \n\t"
+    ".set       pop                                                        \n\t"
+    : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),
+      [output1] "+r" (output1), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),
+      [paecm_buf] "+r" (paecm_buf), [i] "=&r" (i),
+      [pp_kSqrtHanning] "+r" (pp_kSqrtHanning),
+      [p_kSqrtHanning] "+r" (p_kSqrtHanning)
+    : [out_aecm] "r" (out_aecm),
+      [WebRtcAecm_kSqrtHanning] "r" (WebRtcAecm_kSqrtHanning)
+    : "hi", "lo","memory"
+  );
+
+  // Copy the current block to the old position
+  // (aecm->outBuf is shifted elsewhere)
+  memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
+  memcpy(aecm->dBufNoisy,
+         aecm->dBufNoisy + PART_LEN,
+         sizeof(int16_t) * PART_LEN);
+  if (nearendClean != NULL) {
+    memcpy(aecm->dBufClean,
+           aecm->dBufClean + PART_LEN,
+           sizeof(int16_t) * PART_LEN);
+  }
+}
+
+void WebRtcAecm_CalcLinearEnergies_mips(AecmCore_t* aecm,
+                                        const uint16_t* far_spectrum,
+                                        int32_t* echo_est,
+                                        uint32_t* far_energy,
+                                        uint32_t* echo_energy_adapt,
+                                        uint32_t* echo_energy_stored) {
+  int i;
+  uint32_t par1 = (*far_energy);
+  uint32_t par2 = (*echo_energy_adapt);
+  uint32_t par3 = (*echo_energy_stored);
+  int16_t* ch_stored_p = &(aecm->channelStored[0]);
+  int16_t* ch_adapt_p = &(aecm->channelAdapt16[0]);
+  uint16_t* spectrum_p = (uint16_t*)(&(far_spectrum[0]));
+  int32_t* echo_p = &(echo_est[0]);
+  int32_t temp0, stored0, echo0, adept0, spectrum0;
+  int32_t stored1, adept1, spectrum1, echo1, temp1;
+
+  // Get energy for the delayed far end signal and estimated
+  // echo using both stored and adapted channels.
+  for (i = 0; i < PART_LEN; i+= 4) {
+    __asm __volatile (
+      ".set           push                                            \n\t"
+      ".set           noreorder                                       \n\t"
+      "lh             %[stored0],     0(%[ch_stored_p])               \n\t"
+      "lhu            %[adept0],      0(%[ch_adapt_p])                \n\t"
+      "lhu            %[spectrum0],   0(%[spectrum_p])                \n\t"
+      "lh             %[stored1],     2(%[ch_stored_p])               \n\t"
+      "lhu            %[adept1],      2(%[ch_adapt_p])                \n\t"
+      "lhu            %[spectrum1],   2(%[spectrum_p])                \n\t"
+      "mul            %[echo0],       %[stored0],     %[spectrum0]    \n\t"
+      "mul            %[temp0],       %[adept0],      %[spectrum0]    \n\t"
+      "mul            %[echo1],       %[stored1],     %[spectrum1]    \n\t"
+      "mul            %[temp1],       %[adept1],      %[spectrum1]    \n\t"
+      "addu           %[par1],        %[par1],        %[spectrum0]    \n\t"
+      "addu           %[par1],        %[par1],        %[spectrum1]    \n\t"
+      "addiu          %[echo_p],      %[echo_p],      16              \n\t"
+      "addu           %[par3],        %[par3],        %[echo0]        \n\t"
+      "addu           %[par2],        %[par2],        %[temp0]        \n\t"
+      "addu           %[par3],        %[par3],        %[echo1]        \n\t"
+      "addu           %[par2],        %[par2],        %[temp1]        \n\t"
+      "usw            %[echo0],       -16(%[echo_p])                  \n\t"
+      "usw            %[echo1],       -12(%[echo_p])                  \n\t"
+      "lh             %[stored0],     4(%[ch_stored_p])               \n\t"
+      "lhu            %[adept0],      4(%[ch_adapt_p])                \n\t"
+      "lhu            %[spectrum0],   4(%[spectrum_p])                \n\t"
+      "lh             %[stored1],     6(%[ch_stored_p])               \n\t"
+      "lhu            %[adept1],      6(%[ch_adapt_p])                \n\t"
+      "lhu            %[spectrum1],   6(%[spectrum_p])                \n\t"
+      "mul            %[echo0],       %[stored0],     %[spectrum0]    \n\t"
+      "mul            %[temp0],       %[adept0],      %[spectrum0]    \n\t"
+      "mul            %[echo1],       %[stored1],     %[spectrum1]    \n\t"
+      "mul            %[temp1],       %[adept1],      %[spectrum1]    \n\t"
+      "addu           %[par1],        %[par1],        %[spectrum0]    \n\t"
+      "addu           %[par1],        %[par1],        %[spectrum1]    \n\t"
+      "addiu          %[ch_stored_p], %[ch_stored_p], 8               \n\t"
+      "addiu          %[ch_adapt_p],  %[ch_adapt_p],  8               \n\t"
+      "addiu          %[spectrum_p],  %[spectrum_p],  8               \n\t"
+      "addu           %[par3],        %[par3],        %[echo0]        \n\t"
+      "addu           %[par2],        %[par2],        %[temp0]        \n\t"
+      "addu           %[par3],        %[par3],        %[echo1]        \n\t"
+      "addu           %[par2],        %[par2],        %[temp1]        \n\t"
+      "usw            %[echo0],       -8(%[echo_p])                   \n\t"
+      "usw            %[echo1],       -4(%[echo_p])                   \n\t"
+      ".set           pop                                             \n\t"
+      : [temp0] "=&r" (temp0), [stored0] "=&r" (stored0),
+        [adept0] "=&r" (adept0), [spectrum0] "=&r" (spectrum0),
+        [echo0] "=&r" (echo0), [echo_p] "+r" (echo_p), [par3] "+r" (par3),
+        [par1] "+r" (par1), [par2] "+r" (par2), [stored1] "=&r" (stored1),
+        [adept1] "=&r" (adept1), [echo1] "=&r" (echo1),
+        [spectrum1] "=&r" (spectrum1), [temp1] "=&r" (temp1),
+        [ch_stored_p] "+r" (ch_stored_p), [ch_adapt_p] "+r" (ch_adapt_p),
+        [spectrum_p] "+r" (spectrum_p)
+      :
+      : "hi", "lo", "memory"
+    );
+  }
+
+  echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
+                                             far_spectrum[PART_LEN]);
+  par1 += (uint32_t)(far_spectrum[PART_LEN]);
+  par2 += WEBRTC_SPL_UMUL_16_16(aecm->channelAdapt16[PART_LEN],
+                                far_spectrum[PART_LEN]);
+  par3 += (uint32_t)echo_est[PART_LEN];
+
+  (*far_energy) = par1;
+  (*echo_energy_adapt) = par2;
+  (*echo_energy_stored) = par3;
+}
+
+#if defined(MIPS_DSP_R1_LE)
+void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore_t* aecm,
+                                          const uint16_t* far_spectrum,
+                                          int32_t* echo_est) {
+  int i;
+  int16_t* temp1;
+  uint16_t* temp8;
+  int32_t temp0, temp2, temp3, temp4, temp5, temp6;
+  int32_t* temp7 = &(echo_est[0]);
+  temp1 = &(aecm->channelStored[0]);
+  temp8 = (uint16_t*)(&far_spectrum[0]);
+
+  // During startup we store the channel every block.
+  memcpy(aecm->channelStored, aecm->channelAdapt16,
+         sizeof(int16_t) * PART_LEN1);
+  // Recalculate echo estimate
+  for (i = 0; i < PART_LEN; i += 4) {
+    __asm __volatile (
+      "ulw            %[temp0],   0(%[temp8])               \n\t"
+      "ulw            %[temp2],   0(%[temp1])               \n\t"
+      "ulw            %[temp4],   4(%[temp8])               \n\t"
+      "ulw            %[temp5],   4(%[temp1])               \n\t"
+      "muleq_s.w.phl  %[temp3],   %[temp2],     %[temp0]    \n\t"
+      "muleq_s.w.phr  %[temp0],   %[temp2],     %[temp0]    \n\t"
+      "muleq_s.w.phl  %[temp6],   %[temp5],     %[temp4]    \n\t"
+      "muleq_s.w.phr  %[temp4],   %[temp5],     %[temp4]    \n\t"
+      "addiu          %[temp7],   %[temp7],     16          \n\t"
+      "addiu          %[temp1],   %[temp1],     8           \n\t"
+      "addiu          %[temp8],   %[temp8],     8           \n\t"
+      "sra            %[temp3],   %[temp3],     1           \n\t"
+      "sra            %[temp0],   %[temp0],     1           \n\t"
+      "sra            %[temp6],   %[temp6],     1           \n\t"
+      "sra            %[temp4],   %[temp4],     1           \n\t"
+      "usw            %[temp3],   -12(%[temp7])             \n\t"
+      "usw            %[temp0],   -16(%[temp7])             \n\t"
+      "usw            %[temp6],   -4(%[temp7])              \n\t"
+      "usw            %[temp4],   -8(%[temp7])              \n\t"
+      : [temp0] "=&r" (temp0), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+        [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6),
+        [temp1] "+r" (temp1), [temp8] "+r" (temp8), [temp7] "+r" (temp7)
+      :
+      : "hi", "lo", "memory"
+    );
+  }
+  echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+                                      far_spectrum[i]);
+}
+
+void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore_t* aecm) {
+  int i;
+  int32_t* temp3;
+  int16_t* temp0;
+  int32_t temp1, temp2, temp4, temp5;
+
+  temp0 = &(aecm->channelStored[0]);
+  temp3 = &(aecm->channelAdapt32[0]);
+
+  // The stored channel has a significantly lower MSE than the adaptive one for
+  // two consecutive calculations. Reset the adaptive channel.
+  memcpy(aecm->channelAdapt16,
+         aecm->channelStored,
+         sizeof(int16_t) * PART_LEN1);
+
+  // Restore the W32 channel
+  for (i = 0; i < PART_LEN; i += 4) {
+    __asm __volatile (
+      "ulw            %[temp1], 0(%[temp0])           \n\t"
+      "ulw            %[temp4], 4(%[temp0])           \n\t"
+      "preceq.w.phl   %[temp2], %[temp1]              \n\t"
+      "preceq.w.phr   %[temp1], %[temp1]              \n\t"
+      "preceq.w.phl   %[temp5], %[temp4]              \n\t"
+      "preceq.w.phr   %[temp4], %[temp4]              \n\t"
+      "addiu          %[temp0], %[temp0], 8           \n\t"
+      "usw            %[temp2], 4(%[temp3])           \n\t"
+      "usw            %[temp1], 0(%[temp3])           \n\t"
+      "usw            %[temp5], 12(%[temp3])          \n\t"
+      "usw            %[temp4], 8(%[temp3])           \n\t"
+      "addiu          %[temp3], %[temp3], 16          \n\t"
+      : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2),
+        [temp4] "=&r" (temp4), [temp5] "=&r" (temp5),
+        [temp3] "+r" (temp3), [temp0] "+r" (temp0)
+      :
+      : "memory"
+    );
+  }
+
+  aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32(
+                              (int32_t)aecm->channelStored[i], 16);
+}
+#endif  // #if defined(MIPS_DSP_R1_LE)
+
+// Transforms a time domain signal into the frequency domain, outputting the
+// complex valued signal, absolute value and sum of absolute values.
+//
+// time_signal          [in]    Pointer to time domain signal
+// freq_signal_real     [out]   Pointer to real part of frequency domain array
+// freq_signal_imag     [out]   Pointer to imaginary part of frequency domain
+//                              array
+// freq_signal_abs      [out]   Pointer to absolute value of frequency domain
+//                              array
+// freq_signal_sum_abs  [out]   Pointer to the sum of all absolute values in
+//                              the frequency domain array
+// return value                 The Q-domain of current frequency values
+//
+static int TimeToFrequencyDomain(AecmCore_t* aecm,
+                                 const int16_t* time_signal,
+                                 complex16_t* freq_signal,
+                                 uint16_t* freq_signal_abs,
+                                 uint32_t* freq_signal_sum_abs)
+{
+  int i = 0;
+  int time_signal_scaling = 0;
+
+  // In fft_buf, +16 for 32-byte alignment.
+  int16_t fft_buf[PART_LEN4 + 16];
+  int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31);
+
+  int16_t tmp16no1;
+#if !defined(MIPS_DSP_R2_LE)
+  int32_t tmp32no1;
+  int32_t tmp32no2;
+  int16_t tmp16no2;
+#else
+  int32_t tmp32no10, tmp32no11, tmp32no12, tmp32no13;
+  int32_t tmp32no20, tmp32no21, tmp32no22, tmp32no23;
+  int16_t* freqp;
+  uint16_t* freqabsp;
+  uint32_t freqt0, freqt1, freqt2, freqt3;
+  uint32_t freqs;
+#endif
+
+#ifdef AECM_DYNAMIC_Q
+  tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
+  time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
+#endif
+
+  WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
+
+  // Extract imaginary and real part,
+  // calculate the magnitude for all frequency bins
+  freq_signal[0].imag = 0;
+  freq_signal[PART_LEN].imag = 0;
+  freq_signal[PART_LEN].real = fft[PART_LEN2];
+  freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);
+  freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(
+    freq_signal[PART_LEN].real);
+  (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +
+    (uint32_t)(freq_signal_abs[PART_LEN]);
+
+#if !defined(MIPS_DSP_R2_LE)
+  for (i = 1; i < PART_LEN; i++) {
+    if (freq_signal[i].real == 0)
+    {
+      freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(
+        freq_signal[i].imag);
+    }
+    else if (freq_signal[i].imag == 0)
+    {
+      freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(
+        freq_signal[i].real);
+    }
+    else
+    {
+      // Approximation for magnitude of complex fft output
+      // magn = sqrt(real^2 + imag^2)
+      // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
+      //
+      // The parameters alpha and beta are stored in Q15
+      tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+      tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+      tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1);
+      tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2);
+      tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2);
+      tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
+
+      freq_signal_abs[i] = (uint16_t)tmp32no1;
+    }
+    (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
+  }
+#else // #if !defined(MIPS_DSP_R2_LE)
+  freqs = (uint32_t)(freq_signal_abs[0]) +
+          (uint32_t)(freq_signal_abs[PART_LEN]);
+  freqp = &(freq_signal[1].real);
+
+  __asm __volatile (
+    "lw             %[freqt0],      0(%[freqp])             \n\t"
+    "lw             %[freqt1],      4(%[freqp])             \n\t"
+    "lw             %[freqt2],      8(%[freqp])             \n\t"
+    "mult           $ac0,           $zero,      $zero       \n\t"
+    "mult           $ac1,           $zero,      $zero       \n\t"
+    "mult           $ac2,           $zero,      $zero       \n\t"
+    "dpaq_s.w.ph    $ac0,           %[freqt0],  %[freqt0]   \n\t"
+    "dpaq_s.w.ph    $ac1,           %[freqt1],  %[freqt1]   \n\t"
+    "dpaq_s.w.ph    $ac2,           %[freqt2],  %[freqt2]   \n\t"
+    "addiu          %[freqp],       %[freqp],   12          \n\t"
+    "extr.w         %[tmp32no20],   $ac0,       1           \n\t"
+    "extr.w         %[tmp32no21],   $ac1,       1           \n\t"
+    "extr.w         %[tmp32no22],   $ac2,       1           \n\t"
+    : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1),
+      [freqt2] "=&r" (freqt2), [freqp] "+r" (freqp),
+      [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21),
+      [tmp32no22] "=r" (tmp32no22)
+    :
+    : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo"
+  );
+
+  tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20);
+  tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21);
+  tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22);
+  freq_signal_abs[1] = (uint16_t)tmp32no10;
+  freq_signal_abs[2] = (uint16_t)tmp32no11;
+  freq_signal_abs[3] = (uint16_t)tmp32no12;
+  freqs += (uint32_t)tmp32no10;
+  freqs += (uint32_t)tmp32no11;
+  freqs += (uint32_t)tmp32no12;
+  freqabsp = &(freq_signal_abs[4]);
+  for (i = 4; i < PART_LEN; i+=4)
+  {
+    __asm __volatile (
+      "ulw            %[freqt0],      0(%[freqp])                 \n\t"
+      "ulw            %[freqt1],      4(%[freqp])                 \n\t"
+      "ulw            %[freqt2],      8(%[freqp])                 \n\t"
+      "ulw            %[freqt3],      12(%[freqp])                \n\t"
+      "mult           $ac0,           $zero,          $zero       \n\t"
+      "mult           $ac1,           $zero,          $zero       \n\t"
+      "mult           $ac2,           $zero,          $zero       \n\t"
+      "mult           $ac3,           $zero,          $zero       \n\t"
+      "dpaq_s.w.ph    $ac0,           %[freqt0],      %[freqt0]   \n\t"
+      "dpaq_s.w.ph    $ac1,           %[freqt1],      %[freqt1]   \n\t"
+      "dpaq_s.w.ph    $ac2,           %[freqt2],      %[freqt2]   \n\t"
+      "dpaq_s.w.ph    $ac3,           %[freqt3],      %[freqt3]   \n\t"
+      "addiu          %[freqp],       %[freqp],       16          \n\t"
+      "addiu          %[freqabsp],    %[freqabsp],    8           \n\t"
+      "extr.w         %[tmp32no20],   $ac0,           1           \n\t"
+      "extr.w         %[tmp32no21],   $ac1,           1           \n\t"
+      "extr.w         %[tmp32no22],   $ac2,           1           \n\t"
+      "extr.w         %[tmp32no23],   $ac3,           1           \n\t"
+      : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1),
+        [freqt2] "=&r" (freqt2), [freqt3] "=&r" (freqt3),
+        [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21),
+        [tmp32no22] "=r" (tmp32no22), [tmp32no23] "=r" (tmp32no23),
+        [freqabsp] "+r" (freqabsp), [freqp] "+r" (freqp)
+      :
+      : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
+        "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
+    );
+
+    tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20);
+    tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21);
+    tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22);
+    tmp32no13 = WebRtcSpl_SqrtFloor(tmp32no23);
+
+    __asm __volatile (
+      "sh             %[tmp32no10],   -8(%[freqabsp])                 \n\t"
+      "sh             %[tmp32no11],   -6(%[freqabsp])                 \n\t"
+      "sh             %[tmp32no12],   -4(%[freqabsp])                 \n\t"
+      "sh             %[tmp32no13],   -2(%[freqabsp])                 \n\t"
+      "addu           %[freqs],       %[freqs],       %[tmp32no10]    \n\t"
+      "addu           %[freqs],       %[freqs],       %[tmp32no11]    \n\t"
+      "addu           %[freqs],       %[freqs],       %[tmp32no12]    \n\t"
+      "addu           %[freqs],       %[freqs],       %[tmp32no13]    \n\t"
+      : [freqs] "+r" (freqs)
+      : [tmp32no10] "r" (tmp32no10), [tmp32no11] "r" (tmp32no11),
+        [tmp32no12] "r" (tmp32no12), [tmp32no13] "r" (tmp32no13),
+        [freqabsp] "r" (freqabsp)
+      : "memory"
+    );
+  }
+
+  (*freq_signal_sum_abs) = freqs;
+#endif
+
+  return time_signal_scaling;
+}
+
+int WebRtcAecm_ProcessBlock(AecmCore_t* aecm,
+                            const int16_t* farend,
+                            const int16_t* nearendNoisy,
+                            const int16_t* nearendClean,
+                            int16_t* output) {
+  int i;
+  uint32_t xfaSum;
+  uint32_t dfaNoisySum;
+  uint32_t dfaCleanSum;
+  uint32_t echoEst32Gained;
+  uint32_t tmpU32;
+  int32_t tmp32no1;
+
+  uint16_t xfa[PART_LEN1];
+  uint16_t dfaNoisy[PART_LEN1];
+  uint16_t dfaClean[PART_LEN1];
+  uint16_t* ptrDfaClean = dfaClean;
+  const uint16_t* far_spectrum_ptr = NULL;
+
+  // 32 byte aligned buffers (with +8 or +16).
+  int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.
+  int32_t echoEst32_buf[PART_LEN1 + 8];
+  int32_t dfw_buf[PART_LEN2 + 8];
+  int32_t efw_buf[PART_LEN2 + 8];
+
+  int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~ 31);
+  int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~ 31);
+  complex16_t* dfw = (complex16_t*)(((uint32_t)dfw_buf + 31) & ~ 31);
+  complex16_t* efw = (complex16_t*)(((uint32_t)efw_buf + 31) & ~ 31);
+
+  int16_t hnl[PART_LEN1];
+  int16_t numPosCoef = 0;
+  int delay;
+  int16_t tmp16no1;
+  int16_t tmp16no2;
+  int16_t mu;
+  int16_t supGain;
+  int16_t zeros32, zeros16;
+  int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
+  int far_q;
+  int16_t resolutionDiff, qDomainDiff;
+
+  const int kMinPrefBand = 4;
+  const int kMaxPrefBand = 24;
+  int32_t avgHnl32 = 0;
+
+  int32_t temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+  int16_t* ptr;
+  int16_t* ptr1;
+  int16_t* er_ptr;
+  int16_t* dr_ptr;
+
+  ptr = &hnl[0];
+  ptr1 = &hnl[0];
+  er_ptr = &efw[0].real;
+  dr_ptr = &dfw[0].real;
+
+  // Determine startup state. There are three states:
+  // (0) the first CONV_LEN blocks
+  // (1) another CONV_LEN blocks
+  // (2) the rest
+
+  if (aecm->startupState < 2) {
+    aecm->startupState = (aecm->totCount >= CONV_LEN) +
+                         (aecm->totCount >= CONV_LEN2);
+  }
+  // END: Determine startup state
+
+  // Buffer near and far end signals
+  memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
+  memcpy(aecm->dBufNoisy + PART_LEN,
+         nearendNoisy,
+         sizeof(int16_t) * PART_LEN);
+  if (nearendClean != NULL) {
+    memcpy(aecm->dBufClean + PART_LEN,
+           nearendClean,
+           sizeof(int16_t) * PART_LEN);
+  }
+
+  // Transform far end signal from time domain to frequency domain.
+  far_q = TimeToFrequencyDomain(aecm,
+                                aecm->xBuf,
+                                dfw,
+                                xfa,
+                                &xfaSum);
+
+  // Transform noisy near end signal from time domain to frequency domain.
+  zerosDBufNoisy = TimeToFrequencyDomain(aecm,
+                                         aecm->dBufNoisy,
+                                         dfw,
+                                         dfaNoisy,
+                                         &dfaNoisySum);
+  aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
+  aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
+
+  if (nearendClean == NULL) {
+    ptrDfaClean = dfaNoisy;
+    aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
+    aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
+    dfaCleanSum = dfaNoisySum;
+  } else {
+    // Transform clean near end signal from time domain to frequency domain.
+    zerosDBufClean = TimeToFrequencyDomain(aecm,
+                                           aecm->dBufClean,
+                                           dfw,
+                                           dfaClean,
+                                           &dfaCleanSum);
+    aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
+    aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
+  }
+
+  // Get the delay
+  // Save far-end history and estimate delay
+  WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);
+
+  if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1,
+                               far_q) == -1) {
+    return -1;
+  }
+  delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,
+                                          dfaNoisy,
+                                          PART_LEN1,
+                                          zerosDBufNoisy);
+  if (delay == -1) {
+    return -1;
+  }
+  else if (delay == -2) {
+    // If the delay is unknown, we assume zero.
+    // NOTE: this will have to be adjusted if we ever add lookahead.
+    delay = 0;
+  }
+
+  if (aecm->fixedDelay >= 0) {
+    // Use fixed delay
+    delay = aecm->fixedDelay;
+  }
+
+  // Get aligned far end spectrum
+  far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);
+  zerosXBuf = (int16_t) far_q;
+
+  if (far_spectrum_ptr == NULL) {
+    return -1;
+  }
+
+  // Calculate log(energy) and update energy threshold levels
+  WebRtcAecm_CalcEnergies(aecm,
+                          far_spectrum_ptr,
+                          zerosXBuf,
+                          dfaNoisySum,
+                          echoEst32);
+  // Calculate stepsize
+  mu = WebRtcAecm_CalcStepSize(aecm);
+
+  // Update counters
+  aecm->totCount++;
+
+  // This is the channel estimation algorithm.
+  // It is base on NLMS but has a variable step length,
+  // which was calculated above.
+  WebRtcAecm_UpdateChannel(aecm,
+                           far_spectrum_ptr,
+                           zerosXBuf,
+                           dfaNoisy,
+                           mu,
+                           echoEst32);
+
+  supGain = WebRtcAecm_CalcSuppressionGain(aecm);
+
+  // Calculate Wiener filter hnl[]
+  for (i = 0; i < PART_LEN1; i++) {
+    // Far end signal through channel estimate in Q8
+    // How much can we shift right to preserve resolution
+    tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
+    aecm->echoFilt[i] += WEBRTC_SPL_RSHIFT_W32(
+                           WEBRTC_SPL_MUL_32_16(tmp32no1, 50), 8);
+
+    zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
+    zeros16 = WebRtcSpl_NormW16(supGain) + 1;
+    if (zeros32 + zeros16 > 16) {
+      // Multiplication is safe
+      // Result in
+      // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff])
+      echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
+                                              (uint16_t)supGain);
+      resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
+      resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
+    } else {
+      tmp16no1 = 17 - zeros32 - zeros16;
+      resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 -
+                       RESOLUTION_SUPGAIN;
+      resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
+      if (zeros32 > tmp16no1) {
+        echoEst32Gained = WEBRTC_SPL_UMUL_32_16(
+                            (uint32_t)aecm->echoFilt[i],
+                            (uint16_t)WEBRTC_SPL_RSHIFT_W16(supGain, tmp16no1));
+      } else {
+        // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
+        echoEst32Gained = WEBRTC_SPL_UMUL_32_16(
+                            (uint32_t)WEBRTC_SPL_RSHIFT_W32(aecm->echoFilt[i],
+                                                            tmp16no1),
+                            (uint16_t)supGain);
+      }
+    }
+
+    zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
+    if ((zeros16 < (aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld))
+         & (aecm->nearFilt[i])) {
+      tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], zeros16);
+      qDomainDiff = zeros16 - aecm->dfaCleanQDomain + aecm->dfaCleanQDomainOld;
+      tmp16no2 = WEBRTC_SPL_SHIFT_W16(ptrDfaClean[i], qDomainDiff);
+    } else {
+      tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i],
+                                      aecm->dfaCleanQDomain
+                                        - aecm->dfaCleanQDomainOld);
+      qDomainDiff = 0;
+      tmp16no2 = ptrDfaClean[i];
+    }
+
+    tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
+    tmp16no2 = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 4);
+    tmp16no2 += tmp16no1;
+    zeros16 = WebRtcSpl_NormW16(tmp16no2);
+    if ((tmp16no2) & (-qDomainDiff > zeros16)) {
+      aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
+    } else {
+      aecm->nearFilt[i] = WEBRTC_SPL_SHIFT_W16(tmp16no2, -qDomainDiff);
+    }
+
+    // Wiener filter coefficients, resulting hnl in Q14
+    if (echoEst32Gained == 0) {
+      hnl[i] = ONE_Q14;
+      numPosCoef++;
+    } else if (aecm->nearFilt[i] == 0) {
+      hnl[i] = 0;
+    } else {
+      // Multiply the suppression gain
+      // Rounding
+      echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
+      tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained,
+                                   (uint16_t)aecm->nearFilt[i]);
+
+      // Current resolution is
+      // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN
+      //    - max(0, 17 - zeros16 - zeros32))
+      // Make sure we are in Q14
+      tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
+      if (tmp32no1 > ONE_Q14) {
+        hnl[i] = 0;
+      } else if (tmp32no1 < 0) {
+        hnl[i] = ONE_Q14;
+        numPosCoef++;
+      } else {
+        // 1-echoEst/dfa
+        hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
+        if (hnl[i] <= 0) {
+          hnl[i] = 0;
+        } else {
+          numPosCoef++;
+        }
+      }
+    }
+  }
+
+  // Only in wideband. Prevent the gain in upper band from being larger than
+  // in lower band.
+  if (aecm->mult == 2) {
+    // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
+    //               speech distortion in double-talk.
+    for (i = 0; i < (PART_LEN1 >> 3); i++) {
+      __asm __volatile (
+        "lh         %[temp1],       0(%[ptr1])                  \n\t"
+        "lh         %[temp2],       2(%[ptr1])                  \n\t"
+        "lh         %[temp3],       4(%[ptr1])                  \n\t"
+        "lh         %[temp4],       6(%[ptr1])                  \n\t"
+        "lh         %[temp5],       8(%[ptr1])                  \n\t"
+        "lh         %[temp6],       10(%[ptr1])                 \n\t"
+        "lh         %[temp7],       12(%[ptr1])                 \n\t"
+        "lh         %[temp8],       14(%[ptr1])                 \n\t"
+        "mul        %[temp1],       %[temp1],       %[temp1]    \n\t"
+        "mul        %[temp2],       %[temp2],       %[temp2]    \n\t"
+        "mul        %[temp3],       %[temp3],       %[temp3]    \n\t"
+        "mul        %[temp4],       %[temp4],       %[temp4]    \n\t"
+        "mul        %[temp5],       %[temp5],       %[temp5]    \n\t"
+        "mul        %[temp6],       %[temp6],       %[temp6]    \n\t"
+        "mul        %[temp7],       %[temp7],       %[temp7]    \n\t"
+        "mul        %[temp8],       %[temp8],       %[temp8]    \n\t"
+        "sra        %[temp1],       %[temp1],       14          \n\t"
+        "sra        %[temp2],       %[temp2],       14          \n\t"
+        "sra        %[temp3],       %[temp3],       14          \n\t"
+        "sra        %[temp4],       %[temp4],       14          \n\t"
+        "sra        %[temp5],       %[temp5],       14          \n\t"
+        "sra        %[temp6],       %[temp6],       14          \n\t"
+        "sra        %[temp7],       %[temp7],       14          \n\t"
+        "sra        %[temp8],       %[temp8],       14          \n\t"
+        "sh         %[temp1],       0(%[ptr1])                  \n\t"
+        "sh         %[temp2],       2(%[ptr1])                  \n\t"
+        "sh         %[temp3],       4(%[ptr1])                  \n\t"
+        "sh         %[temp4],       6(%[ptr1])                  \n\t"
+        "sh         %[temp5],       8(%[ptr1])                  \n\t"
+        "sh         %[temp6],       10(%[ptr1])                 \n\t"
+        "sh         %[temp7],       12(%[ptr1])                 \n\t"
+        "sh         %[temp8],       14(%[ptr1])                 \n\t"
+        "addiu      %[ptr1],        %[ptr1],        16          \n\t"
+        : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+          [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6),
+          [temp7] "=&r" (temp7), [temp8] "=&r" (temp8), [ptr1] "+r" (ptr1)
+        :
+        : "memory", "hi", "lo"
+      );
+    }
+    for(i = 0; i < (PART_LEN1 & 7); i++) {
+      __asm __volatile (
+        "lh         %[temp1],       0(%[ptr1])                  \n\t"
+        "mul        %[temp1],       %[temp1],       %[temp1]    \n\t"
+        "sra        %[temp1],       %[temp1],       14          \n\t"
+        "sh         %[temp1],       0(%[ptr1])                  \n\t"
+        "addiu      %[ptr1],        %[ptr1],        2           \n\t"
+        : [temp1] "=&r" (temp1), [ptr1] "+r" (ptr1)
+        :
+        : "memory", "hi", "lo"
+      );
+    }
+
+    for (i = kMinPrefBand; i <= kMaxPrefBand; i++) {
+      avgHnl32 += (int32_t)hnl[i];
+    }
+
+    assert(kMaxPrefBand - kMinPrefBand + 1 > 0);
+    avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
+
+    for (i = kMaxPrefBand; i < PART_LEN1; i++) {
+      if (hnl[i] > (int16_t)avgHnl32) {
+        hnl[i] = (int16_t)avgHnl32;
+      }
+    }
+  }
+
+  // Calculate NLP gain, result is in Q14
+  if (aecm->nlpFlag) {
+    if (numPosCoef < 3) {
+      for (i = 0; i < PART_LEN1; i++) {
+        efw[i].real = 0;
+        efw[i].imag = 0;
+        hnl[i] = 0;
+      }
+    } else {
+      for (i = 0; i < PART_LEN1; i++) {
+#if defined(MIPS_DSP_R1_LE)
+        __asm __volatile (
+          ".set       push                                        \n\t"
+          ".set       noreorder                                   \n\t"
+          "lh         %[temp1],       0(%[ptr])                   \n\t"
+          "lh         %[temp2],       0(%[dr_ptr])                \n\t"
+          "slti       %[temp4],       %[temp1],       0x4001      \n\t"
+          "beqz       %[temp4],       3f                          \n\t"
+          " lh        %[temp3],       2(%[dr_ptr])                \n\t"
+          "slti       %[temp5],       %[temp1],       3277        \n\t"
+          "bnez       %[temp5],       2f                          \n\t"
+          " addiu     %[dr_ptr],      %[dr_ptr],      4           \n\t"
+          "mul        %[temp2],       %[temp2],       %[temp1]    \n\t"
+          "mul        %[temp3],       %[temp3],       %[temp1]    \n\t"
+          "shra_r.w   %[temp2],       %[temp2],       14          \n\t"
+          "shra_r.w   %[temp3],       %[temp3],       14          \n\t"
+          "b          4f                                          \n\t"
+          " nop                                                   \n\t"
+         "2:                                                      \n\t"
+          "addu       %[temp1],       $zero,          $zero       \n\t"
+          "addu       %[temp2],       $zero,          $zero       \n\t"
+          "addu       %[temp3],       $zero,          $zero       \n\t"
+          "b          1f                                          \n\t"
+          " nop                                                   \n\t"
+         "3:                                                      \n\t"
+          "addiu      %[temp1],       $0,             0x4000      \n\t"
+         "1:                                                      \n\t"
+          "sh         %[temp1],       0(%[ptr])                   \n\t"
+         "4:                                                      \n\t"
+          "sh         %[temp2],       0(%[er_ptr])                \n\t"
+          "sh         %[temp3],       2(%[er_ptr])                \n\t"
+          "addiu      %[ptr],         %[ptr],         2           \n\t"
+          "addiu      %[er_ptr],      %[er_ptr],      4           \n\t"
+          ".set       pop                                         \n\t"
+          : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+            [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr),
+            [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr)
+          :
+          : "memory", "hi", "lo"
+        );
+#else
+        __asm __volatile (
+          ".set       push                                        \n\t"
+          ".set       noreorder                                   \n\t"
+          "lh         %[temp1],       0(%[ptr])                   \n\t"
+          "lh         %[temp2],       0(%[dr_ptr])                \n\t"
+          "slti       %[temp4],       %[temp1],       0x4001      \n\t"
+          "beqz       %[temp4],       3f                          \n\t"
+          " lh        %[temp3],       2(%[dr_ptr])                \n\t"
+          "slti       %[temp5],       %[temp1],       3277        \n\t"
+          "bnez       %[temp5],       2f                          \n\t"
+          " addiu     %[dr_ptr],      %[dr_ptr],      4           \n\t"
+          "mul        %[temp2],       %[temp2],       %[temp1]    \n\t"
+          "mul        %[temp3],       %[temp3],       %[temp1]    \n\t"
+          "addiu      %[temp2],       %[temp2],       0x2000      \n\t"
+          "addiu      %[temp3],       %[temp3],       0x2000      \n\t"
+          "sra        %[temp2],       %[temp2],       14          \n\t"
+          "sra        %[temp3],       %[temp3],       14          \n\t"
+          "b          4f                                          \n\t"
+          " nop                                                   \n\t"
+         "2:                                                      \n\t"
+          "addu       %[temp1],       $zero,          $zero       \n\t"
+          "addu       %[temp2],       $zero,          $zero       \n\t"
+          "addu       %[temp3],       $zero,          $zero       \n\t"
+          "b          1f                                          \n\t"
+          " nop                                                   \n\t"
+         "3:                                                      \n\t"
+          "addiu      %[temp1],       $0,             0x4000      \n\t"
+         "1:                                                      \n\t"
+          "sh         %[temp1],       0(%[ptr])                   \n\t"
+         "4:                                                      \n\t"
+          "sh         %[temp2],       0(%[er_ptr])                \n\t"
+          "sh         %[temp3],       2(%[er_ptr])                \n\t"
+          "addiu      %[ptr],         %[ptr],         2           \n\t"
+          "addiu      %[er_ptr],      %[er_ptr],      4           \n\t"
+          ".set       pop                                         \n\t"
+          : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+            [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr),
+            [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr)
+          :
+          : "memory", "hi", "lo"
+        );
+#endif
+      }
+    }
+  }
+  else {
+    // multiply with Wiener coefficients
+    for (i = 0; i < PART_LEN1; i++) {
+      efw[i].real = (int16_t)
+                      (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
+                                                            hnl[i],
+                                                            14));
+      efw[i].imag = (int16_t)
+                      (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
+                                                            hnl[i],
+                                                            14));
+    }
+  }
+
+  if (aecm->cngMode == AecmTrue) {
+    ComfortNoise(aecm, ptrDfaClean, efw, hnl);
+  }
+
+  InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
+
+  return 0;
+}
+
+// Generate comfort noise and add to output signal.
+static void ComfortNoise(AecmCore_t* aecm,
+                         const uint16_t* dfa,
+                         complex16_t* out,
+                         const int16_t* lambda) {
+  int16_t i;
+  int16_t tmp16, tmp161, tmp162, tmp163, nrsh1, nrsh2;
+  int32_t tmp32, tmp321, tnoise, tnoise1;
+  int32_t tmp322, tmp323, *tmp1;
+  int16_t* dfap;
+  int16_t* lambdap;
+  const int32_t c2049 = 2049;
+  const int32_t c359 = 359;
+  const int32_t c114 = ONE_Q14;
+
+  int16_t randW16[PART_LEN];
+  int16_t uReal[PART_LEN1];
+  int16_t uImag[PART_LEN1];
+  int32_t outLShift32;
+
+  int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
+  int16_t minTrackShift = 9;
+
+  assert(shiftFromNearToNoise >= 0);
+  assert(shiftFromNearToNoise < 16);
+
+  if (aecm->noiseEstCtr < 100) {
+    // Track the minimum more quickly initially.
+    aecm->noiseEstCtr++;
+    minTrackShift = 6;
+  }
+
+  // Generate a uniform random array on [0 2^15-1].
+  WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
+  int16_t* randW16p = (int16_t*)randW16;
+#if defined (MIPS_DSP_R1_LE)
+  int16_t* kCosTablep = (int16_t*)WebRtcAecm_kCosTable;
+  int16_t* kSinTablep = (int16_t*)WebRtcAecm_kSinTable;
+#endif   // #if defined(MIPS_DSP_R1_LE)
+  tmp1 = (int32_t*)aecm->noiseEst + 1;
+  dfap = (int16_t*)dfa + 1;
+  lambdap = (int16_t*)lambda + 1;
+  // Estimate noise power.
+  for (i = 1; i < PART_LEN1; i+=2) {
+  // Shift to the noise domain.
+    __asm __volatile (
+      "lh     %[tmp32],       0(%[dfap])                              \n\t"
+      "lw     %[tnoise],      0(%[tmp1])                              \n\t"
+      "sllv   %[outLShift32], %[tmp32],   %[shiftFromNearToNoise]     \n\t"
+      : [tmp32] "=&r" (tmp32), [outLShift32] "=r" (outLShift32),
+        [tnoise] "=&r" (tnoise)
+      : [tmp1] "r" (tmp1), [dfap] "r" (dfap),
+        [shiftFromNearToNoise] "r" (shiftFromNearToNoise)
+      : "memory"
+    );
+
+    if (outLShift32 < tnoise) {
+      // Reset "too low" counter
+      aecm->noiseEstTooLowCtr[i] = 0;
+      // Track the minimum.
+      if (tnoise < (1 << minTrackShift)) {
+        // For small values, decrease noiseEst[i] every
+        // |kNoiseEstIncCount| block. The regular approach below can not
+        // go further down due to truncation.
+        aecm->noiseEstTooHighCtr[i]++;
+        if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) {
+          tnoise--;
+          aecm->noiseEstTooHighCtr[i] = 0;  // Reset the counter
+        }
+      } else {
+        __asm __volatile (
+          "subu   %[tmp32],       %[tnoise],      %[outLShift32]      \n\t"
+          "srav   %[tmp32],       %[tmp32],       %[minTrackShift]    \n\t"
+          "subu   %[tnoise],      %[tnoise],      %[tmp32]            \n\t"
+          : [tmp32] "=&r" (tmp32), [tnoise] "+r" (tnoise)
+          : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift)
+        );
+      }
+    } else {
+      // Reset "too high" counter
+      aecm->noiseEstTooHighCtr[i] = 0;
+      // Ramp slowly upwards until we hit the minimum again.
+      if ((tnoise >> 19) <= 0) {
+        if ((tnoise >> 11) > 0) {
+          // Large enough for relative increase
+          __asm __volatile (
+            "mul    %[tnoise],  %[tnoise],  %[c2049]    \n\t"
+            "sra    %[tnoise],  %[tnoise],  11          \n\t"
+            : [tnoise] "+r" (tnoise)
+            : [c2049] "r" (c2049)
+            : "hi", "lo"
+          );
+        } else {
+          // Make incremental increases based on size every
+          // |kNoiseEstIncCount| block
+          aecm->noiseEstTooLowCtr[i]++;
+          if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) {
+            __asm __volatile (
+              "sra    %[tmp32],   %[tnoise],  9           \n\t"
+              "addi   %[tnoise],  %[tnoise],  1           \n\t"
+              "addu   %[tnoise],  %[tnoise],  %[tmp32]    \n\t"
+              : [tnoise] "+r" (tnoise), [tmp32] "=&r" (tmp32)
+              :
+            );
+            aecm->noiseEstTooLowCtr[i] = 0; // Reset counter
+          }
+        }
+      } else {
+        // Avoid overflow.
+        // Multiplication with 2049 will cause wrap around. Scale
+        // down first and then multiply
+        __asm __volatile (
+          "sra    %[tnoise],  %[tnoise],  11          \n\t"
+          "mul    %[tnoise],  %[tnoise],  %[c2049]    \n\t"
+          : [tnoise] "+r" (tnoise)
+          : [c2049] "r" (c2049)
+          : "hi", "lo"
+        );
+      }
+    }
+
+    // Shift to the noise domain.
+    __asm __volatile (
+      "lh     %[tmp32],       2(%[dfap])                              \n\t"
+      "lw     %[tnoise1],     4(%[tmp1])                              \n\t"
+      "addiu  %[dfap],        %[dfap],    4                           \n\t"
+      "sllv   %[outLShift32], %[tmp32],   %[shiftFromNearToNoise]     \n\t"
+      : [tmp32] "=&r" (tmp32), [dfap] "+r" (dfap),
+        [outLShift32] "=r" (outLShift32), [tnoise1] "=&r" (tnoise1)
+      : [tmp1] "r" (tmp1), [shiftFromNearToNoise] "r" (shiftFromNearToNoise)
+      : "memory"
+    );
+
+    if (outLShift32 < tnoise1) {
+      // Reset "too low" counter
+      aecm->noiseEstTooLowCtr[i + 1] = 0;
+      // Track the minimum.
+      if (tnoise1 < (1 << minTrackShift)) {
+        // For small values, decrease noiseEst[i] every
+        // |kNoiseEstIncCount| block. The regular approach below can not
+        // go further down due to truncation.
+        aecm->noiseEstTooHighCtr[i + 1]++;
+        if (aecm->noiseEstTooHighCtr[i + 1] >= kNoiseEstIncCount) {
+          tnoise1--;
+          aecm->noiseEstTooHighCtr[i + 1] = 0; // Reset the counter
+        }
+      } else {
+        __asm __volatile (
+          "subu   %[tmp32],       %[tnoise1],     %[outLShift32]      \n\t"
+          "srav   %[tmp32],       %[tmp32],       %[minTrackShift]    \n\t"
+          "subu   %[tnoise1],     %[tnoise1],     %[tmp32]            \n\t"
+          : [tmp32] "=&r" (tmp32), [tnoise1] "+r" (tnoise1)
+          : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift)
+        );
+      }
+    } else {
+      // Reset "too high" counter
+      aecm->noiseEstTooHighCtr[i + 1] = 0;
+      // Ramp slowly upwards until we hit the minimum again.
+      if ((tnoise1 >> 19) <= 0) {
+        if ((tnoise1 >> 11) > 0) {
+          // Large enough for relative increase
+          __asm __volatile (
+            "mul    %[tnoise1], %[tnoise1], %[c2049]   \n\t"
+            "sra    %[tnoise1], %[tnoise1], 11         \n\t"
+            : [tnoise1] "+r" (tnoise1)
+            : [c2049] "r" (c2049)
+            : "hi", "lo"
+          );
+        } else {
+          // Make incremental increases based on size every
+          // |kNoiseEstIncCount| block
+          aecm->noiseEstTooLowCtr[i + 1]++;
+          if (aecm->noiseEstTooLowCtr[i + 1] >= kNoiseEstIncCount) {
+            __asm __volatile (
+              "sra    %[tmp32],   %[tnoise1], 9           \n\t"
+              "addi   %[tnoise1], %[tnoise1], 1           \n\t"
+              "addu   %[tnoise1], %[tnoise1], %[tmp32]    \n\t"
+              : [tnoise1] "+r" (tnoise1), [tmp32] "=&r" (tmp32)
+              :
+            );
+            aecm->noiseEstTooLowCtr[i + 1] = 0; // Reset counter
+          }
+        }
+      } else {
+        // Avoid overflow.
+        // Multiplication with 2049 will cause wrap around. Scale
+        // down first and then multiply
+        __asm __volatile (
+          "sra    %[tnoise1], %[tnoise1], 11          \n\t"
+          "mul    %[tnoise1], %[tnoise1], %[c2049]    \n\t"
+          : [tnoise1] "+r" (tnoise1)
+          : [c2049] "r" (c2049)
+          : "hi", "lo"
+        );
+      }
+    }
+
+    __asm __volatile (
+      "lh     %[tmp16],   0(%[lambdap])                           \n\t"
+      "lh     %[tmp161],  2(%[lambdap])                           \n\t"
+      "sw     %[tnoise],  0(%[tmp1])                              \n\t"
+      "sw     %[tnoise1], 4(%[tmp1])                              \n\t"
+      "subu   %[tmp16],   %[c114],        %[tmp16]                \n\t"
+      "subu   %[tmp161],  %[c114],        %[tmp161]               \n\t"
+      "srav   %[tmp32],   %[tnoise],      %[shiftFromNearToNoise] \n\t"
+      "srav   %[tmp321],  %[tnoise1],     %[shiftFromNearToNoise] \n\t"
+      "addiu  %[lambdap], %[lambdap],     4                       \n\t"
+      "addiu  %[tmp1],    %[tmp1],        8                       \n\t"
+      : [tmp16] "=&r" (tmp16), [tmp161] "=&r" (tmp161), [tmp1] "+r" (tmp1),
+        [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), [lambdap] "+r" (lambdap)
+      : [tnoise] "r" (tnoise), [tnoise1] "r" (tnoise1), [c114] "r" (c114),
+        [shiftFromNearToNoise] "r" (shiftFromNearToNoise)
+      : "memory"
+    );
+
+    if (tmp32 > 32767) {
+      tmp32 = 32767;
+      aecm->noiseEst[i] = WEBRTC_SPL_LSHIFT_W32(tmp32, shiftFromNearToNoise);
+    }
+    if (tmp321 > 32767) {
+      tmp321 = 32767;
+      aecm->noiseEst[i+1] = WEBRTC_SPL_LSHIFT_W32(tmp321, shiftFromNearToNoise);
+    }
+
+    __asm __volatile (
+      "mul    %[tmp32],   %[tmp32],       %[tmp16]                \n\t"
+      "mul    %[tmp321],  %[tmp321],      %[tmp161]               \n\t"
+      "sra    %[nrsh1],   %[tmp32],       14                      \n\t"
+      "sra    %[nrsh2],   %[tmp321],      14                      \n\t"
+      : [nrsh1] "=r" (nrsh1), [nrsh2] "=r" (nrsh2)
+      : [tmp16] "r" (tmp16), [tmp161] "r" (tmp161), [tmp32] "r" (tmp32),
+        [tmp321] "r" (tmp321)
+      : "memory", "hi", "lo"
+    );
+
+    __asm __volatile (
+      "lh     %[tmp32],       0(%[randW16p])              \n\t"
+      "lh     %[tmp321],      2(%[randW16p])              \n\t"
+      "addiu  %[randW16p],    %[randW16p],    4           \n\t"
+      "mul    %[tmp32],       %[tmp32],       %[c359]     \n\t"
+      "mul    %[tmp321],      %[tmp321],      %[c359]     \n\t"
+      "sra    %[tmp16],       %[tmp32],       15          \n\t"
+      "sra    %[tmp161],      %[tmp321],      15          \n\t"
+      : [randW16p] "+r" (randW16p), [tmp32] "=&r" (tmp32),
+        [tmp16] "=r" (tmp16), [tmp161] "=r" (tmp161), [tmp321] "=&r" (tmp321)
+      : [c359] "r" (c359)
+      : "memory", "hi", "lo"
+    );
+
+#if !defined(MIPS_DSP_R1_LE)
+    tmp32 = WebRtcAecm_kCosTable[tmp16];
+    tmp321 = WebRtcAecm_kSinTable[tmp16];
+    tmp322 = WebRtcAecm_kCosTable[tmp161];
+    tmp323 = WebRtcAecm_kSinTable[tmp161];
+#else
+    __asm __volatile (
+      "sll    %[tmp16],       %[tmp16],                   1           \n\t"
+      "sll    %[tmp161],      %[tmp161],                  1           \n\t"
+      "lhx    %[tmp32],       %[tmp16](%[kCosTablep])                 \n\t"
+      "lhx    %[tmp321],      %[tmp16](%[kSinTablep])                 \n\t"
+      "lhx    %[tmp322],      %[tmp161](%[kCosTablep])                \n\t"
+      "lhx    %[tmp323],      %[tmp161](%[kSinTablep])                \n\t"
+      : [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321),
+        [tmp322] "=&r" (tmp322), [tmp323] "=&r" (tmp323)
+      : [kCosTablep] "r" (kCosTablep), [tmp16] "r" (tmp16),
+        [tmp161] "r" (tmp161), [kSinTablep] "r" (kSinTablep)
+      : "memory"
+    );
+#endif
+    __asm __volatile (
+      "mul    %[tmp32],       %[tmp32],                   %[nrsh1]    \n\t"
+      "negu   %[tmp162],      %[nrsh1]                                \n\t"
+      "mul    %[tmp322],      %[tmp322],                  %[nrsh2]    \n\t"
+      "negu   %[tmp163],      %[nrsh2]                                \n\t"
+      "sra    %[tmp32],       %[tmp32],                   13          \n\t"
+      "mul    %[tmp321],      %[tmp321],                  %[tmp162]   \n\t"
+      "sra    %[tmp322],      %[tmp322],                  13          \n\t"
+      "mul    %[tmp323],      %[tmp323],                  %[tmp163]   \n\t"
+      "sra    %[tmp321],      %[tmp321],                  13          \n\t"
+      "sra    %[tmp323],      %[tmp323],                  13          \n\t"
+      : [tmp32] "+r" (tmp32), [tmp321] "+r" (tmp321), [tmp162] "=&r" (tmp162),
+        [tmp322] "+r" (tmp322), [tmp323] "+r" (tmp323), [tmp163] "=&r" (tmp163)
+      : [nrsh1] "r" (nrsh1), [nrsh2] "r" (nrsh2)
+      : "hi", "lo"
+    );
+    // Tables are in Q13.
+    uReal[i] = (int16_t)tmp32;
+    uImag[i] = (int16_t)tmp321;
+    uReal[i + 1] = (int16_t)tmp322;
+    uImag[i + 1] = (int16_t)tmp323;
+  }
+
+  int32_t tt, sgn;
+  tt = out[0].real;
+  sgn = ((int)tt) >> 31;
+  out[0].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+  tt = out[0].imag;
+  sgn = ((int)tt) >> 31;
+  out[0].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+  for (i = 1; i < PART_LEN; i++) {
+    tt = out[i].real + uReal[i];
+    sgn = ((int)tt) >> 31;
+    out[i].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+    tt = out[i].imag + uImag[i];
+    sgn = ((int)tt) >> 31;
+    out[i].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+  }
+  tt = out[PART_LEN].real + uReal[PART_LEN];
+  sgn = ((int)tt) >> 31;
+  out[PART_LEN].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+  tt = out[PART_LEN].imag;
+  sgn = ((int)tt) >> 31;
+  out[PART_LEN].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+}
+
diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi
index 4d9988aec..555bdc346 100644
--- a/webrtc/modules/audio_processing/audio_processing.gypi
+++ b/webrtc/modules/audio_processing/audio_processing.gypi
@@ -120,6 +120,15 @@
         ['(target_arch=="arm" and armv7==1) or target_arch=="armv7"', {
           'dependencies': ['audio_processing_neon',],
         }],
+        ['target_arch=="mipsel"', {
+          'sources': [
+            'aecm/aecm_core_mips.c',
+          ],
+        }, {
+          'sources': [
+            'aecm/aecm_core_c.c',
+          ],
+        }],
       ],
       # TODO(jschuh): Bug 1348: fix size_t to int truncations.
       'msvs_disabled_warnings': [ 4267, ],