diff --git a/LICENSE_THIRD_PARTY b/LICENSE_THIRD_PARTY
index 2086d2844..31d27e8a0 100644
--- a/LICENSE_THIRD_PARTY
+++ b/LICENSE_THIRD_PARTY
@@ -15,6 +15,7 @@ modules/audio_device/main/source/Mac/portaudio/pa_memorybarrier.h
 modules/audio_device/main/source/Mac/portaudio/pa_ringbuffer.h
 modules/audio_device/main/source/Mac/portaudio/pa_ringbuffer.c
 modules/audio_processing/utility/fft4g.c
+modules/audio_processing/aec/main/source/aec_core_rdft.c
 system_wrappers/interface/fix_interlocked_exchange_pointer_windows.h
 system_wrappers/source/condition_variable_windows.cc
 system_wrappers/source/spreadsortlib/constants.hpp
diff --git a/modules/audio_processing/aec/main/source/aec.gyp b/modules/audio_processing/aec/main/source/aec.gyp
index 96f9bd75b..ed33d7f71 100644
--- a/modules/audio_processing/aec/main/source/aec.gyp
+++ b/modules/audio_processing/aec/main/source/aec.gyp
@@ -31,6 +31,7 @@
         'echo_cancellation.c',
         'aec_core.c',
         'aec_core_sse2.c',
+        'aec_core_rdft.c',
         'aec_core.h',
         'resampler.c',
         'resampler.h',
diff --git a/modules/audio_processing/aec/main/source/aec_core.c b/modules/audio_processing/aec/main/source/aec_core.c
index 3303a8a88..eb8a90fc1 100644
--- a/modules/audio_processing/aec/main/source/aec_core.c
+++ b/modules/audio_processing/aec/main/source/aec_core.c
@@ -291,7 +291,7 @@ static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1],
                    -aec->xfBuf[1][xPos + PART_LEN],
                    ef[0][PART_LEN], ef[1][PART_LEN]);
 
-    rdft(PART_LEN2, -1, fft, ip, wfft);
+    aec_rdft_128(-1, fft, ip, wfft);
     memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
 
     // fft scaling
@@ -301,7 +301,7 @@ static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1],
         fft[j] *= scale;
       }
     }
-    rdft(PART_LEN2, 1, fft, ip, wfft);
+    aec_rdft_128(1, fft, ip, wfft);
 
     aec->wfBuf[0][pos] += fft[0];
     aec->wfBuf[0][pos + PART_LEN] += fft[1];
@@ -613,7 +613,7 @@ static void ProcessBlock(aec_t *aec, const short *farend,
 
     // Setting this on the first call initializes work arrays.
     ip[0] = 0;
-    rdft(PART_LEN2, 1, fft, ip, wfft);
+    aec_rdft_128(1, fft, ip, wfft);
 
     // Far fft
     xf[1][0] = 0;
@@ -628,7 +628,7 @@ static void ProcessBlock(aec_t *aec, const short *farend,
 
     // Near fft
     memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
-    rdft(PART_LEN2, 1, fft, ip, wfft);
+    aec_rdft_128(1, fft, ip, wfft);
     df[0][1] = 0;
     df[PART_LEN][1] = 0;
     df[0][0] = fft[0];
@@ -704,7 +704,7 @@ static void ProcessBlock(aec_t *aec, const short *farend,
         fft[2 * i] = yf[0][i];
         fft[2 * i + 1] = yf[1][i];
     }
-    rdft(PART_LEN2, -1, fft, ip, wfft);
+    aec_rdft_128(-1, fft, ip, wfft);
 
     scale = 2.0f / PART_LEN2;
     for (i = 0; i < PART_LEN; i++) {
@@ -719,7 +719,7 @@ static void ProcessBlock(aec_t *aec, const short *farend,
     memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
     memset(fft, 0, sizeof(float) * PART_LEN);
     memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
-    rdft(PART_LEN2, 1, fft, ip, wfft);
+    aec_rdft_128(1, fft, ip, wfft);
 
     ef[1][0] = 0;
     ef[1][PART_LEN] = 0;
@@ -842,7 +842,7 @@ static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output,
         fft[i] = aec->xBuf[i] * sqrtHanning[i];
         fft[PART_LEN + i] = aec->xBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
     }
-    rdft(PART_LEN2, 1, fft, ip, wfft);
+    aec_rdft_128(1, fft, ip, wfft);
 
     xfw[0][1] = 0;
     xfw[PART_LEN][1] = 0;
@@ -864,7 +864,7 @@ static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output,
         fft[i] = aec->dBuf[i] * sqrtHanning[i];
         fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
     }
-    rdft(PART_LEN2, 1, fft, ip, wfft);
+    aec_rdft_128(1, fft, ip, wfft);
 
     dfw[1][0] = 0;
     dfw[1][PART_LEN] = 0;
@@ -880,7 +880,7 @@ static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output,
         fft[i] = aec->eBuf[i] * sqrtHanning[i];
         fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
     }
-    rdft(PART_LEN2, 1, fft, ip, wfft);
+    aec_rdft_128(1, fft, ip, wfft);
     efw[1][0] = 0;
     efw[1][PART_LEN] = 0;
     efw[0][0] = fft[0];
@@ -1057,7 +1057,7 @@ static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output,
         // Sign change required by Ooura fft.
         fft[2*i + 1] = -efw[1][i];
     }
-    rdft(PART_LEN2, -1, fft, ip, wfft);
+    aec_rdft_128(-1, fft, ip, wfft);
 
     // Overlap and add to obtain output.
     scale = 2.0f / PART_LEN2;
@@ -1089,7 +1089,7 @@ static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output,
                 fft[2*i] = comfortNoiseHband[i][0];
                 fft[2*i + 1] = comfortNoiseHband[i][1];
             }
-            rdft(PART_LEN2, -1, fft, ip, wfft);
+            aec_rdft_128(-1, fft, ip, wfft);
             scale = 2.0f / PART_LEN2;
         }
 
diff --git a/modules/audio_processing/aec/main/source/aec_core.h b/modules/audio_processing/aec/main/source/aec_core.h
index ae043038a..2a619085d 100644
--- a/modules/audio_processing/aec/main/source/aec_core.h
+++ b/modules/audio_processing/aec/main/source/aec_core.h
@@ -16,7 +16,6 @@
 #define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_
 
 #include <stdio.h>
-#include "fft4g.h"
 #include "typedefs.h"
 #include "signal_processing_library.h"
 
@@ -191,5 +190,9 @@ void WebRtcAec_ProcessFrame(aec_t *aec, const short *farend,
                        short *out, short *outH,
                        int knownDelay);
 
+// aec_core_rdft.c
+void aec_rdft_128(int, float *, int *, float *);
+
+
 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_
 
diff --git a/modules/audio_processing/aec/main/source/aec_core_rdft.c b/modules/audio_processing/aec/main/source/aec_core_rdft.c
new file mode 100644
index 000000000..3f4d30049
--- /dev/null
+++ b/modules/audio_processing/aec/main/source/aec_core_rdft.c
@@ -0,0 +1,511 @@
+/*
+ * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+ * Copyright Takuya OOURA, 1996-2001
+ *
+ * You may use, copy, modify and distribute this code for any purpose (include
+ * commercial use) and without fee. Please refer to this package when you modify
+ * this code.
+ *
+ * Changes by the WebRTC authors:
+ *    - Trivial type modifications.
+ *    - Minimal code subset to do rdft of length 128.
+ *    - Optimizations because of known length.
+ *
+ *  All changes are covered by the WebRTC license and IP grant:
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "aec_core.h"
+
+static void bitrv2_32or128(int n, int *ip, float *a) {
+  // n is 32 or 128
+  int j, j1, k, k1, m, m2;
+  float xr, xi, yr, yi;
+
+  ip[0] = 0;
+  {
+    int l = n;
+    m = 1;
+    while ((m << 3) < l) {
+      l >>= 1;
+      for (j = 0; j < m; j++) {
+        ip[m + j] = ip[j] + l;
+      }
+      m <<= 1;
+    }
+  }
+  m2 = 2 * m;
+  for (k = 0; k < m; k++) {
+    for (j = 0; j < k; j++) {
+      j1 = 2 * j + ip[k];
+      k1 = 2 * k + ip[j];
+      xr = a[j1];
+      xi = a[j1 + 1];
+      yr = a[k1];
+      yi = a[k1 + 1];
+      a[j1] = yr;
+      a[j1 + 1] = yi;
+      a[k1] = xr;
+      a[k1 + 1] = xi;
+      j1 += m2;
+      k1 += 2 * m2;
+      xr = a[j1];
+      xi = a[j1 + 1];
+      yr = a[k1];
+      yi = a[k1 + 1];
+      a[j1] = yr;
+      a[j1 + 1] = yi;
+      a[k1] = xr;
+      a[k1 + 1] = xi;
+      j1 += m2;
+      k1 -= m2;
+      xr = a[j1];
+      xi = a[j1 + 1];
+      yr = a[k1];
+      yi = a[k1 + 1];
+      a[j1] = yr;
+      a[j1 + 1] = yi;
+      a[k1] = xr;
+      a[k1 + 1] = xi;
+      j1 += m2;
+      k1 += 2 * m2;
+      xr = a[j1];
+      xi = a[j1 + 1];
+      yr = a[k1];
+      yi = a[k1 + 1];
+      a[j1] = yr;
+      a[j1 + 1] = yi;
+      a[k1] = xr;
+      a[k1 + 1] = xi;
+    }
+    j1 = 2 * k + m2 + ip[k];
+    k1 = j1 + m2;
+    xr = a[j1];
+    xi = a[j1 + 1];
+    yr = a[k1];
+    yi = a[k1 + 1];
+    a[j1] = yr;
+    a[j1 + 1] = yi;
+    a[k1] = xr;
+    a[k1 + 1] = xi;
+  }
+}
+
+static void makewt(int *ip, float *w) {
+  const int nw = 32;
+  int j, nwh;
+  float delta, x, y;
+
+  ip[0] = nw;
+  ip[1] = 1;
+  nwh = nw >> 1;
+  delta = atanf(1.0f) / nwh;
+  w[0] = 1;
+  w[1] = 0;
+  w[nwh] = cosf(delta * nwh);
+  w[nwh + 1] = w[nwh];
+  for (j = 2; j < nwh; j += 2) {
+    x = cosf(delta * j);
+    y = sinf(delta * j);
+    w[j] = x;
+    w[j + 1] = y;
+    w[nw - j] = y;
+    w[nw - j + 1] = x;
+  }
+  bitrv2_32or128(nw, ip + 2, w);
+}
+
+static void makect_32(int *ip, float *c) {
+  const int nc = 32;
+  int j, nch;
+  float delta;
+
+  ip[1] = nc;
+  nch = nc >> 1;
+  delta = atanf(1.0f) / nch;
+  c[0] = cosf(delta * nch);
+  c[nch] = 0.5f * c[0];
+  for (j = 1; j < nch; j++) {
+    c[j] = 0.5f * cosf(delta * j);
+    c[nc - j] = 0.5f * sinf(delta * j);
+  }
+}
+
+static void cft1st_128(float *a, float *w) {
+  const int n = 128;
+  int j, k1, k2;
+  float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+  x0r = a[0] + a[2];
+  x0i = a[1] + a[3];
+  x1r = a[0] - a[2];
+  x1i = a[1] - a[3];
+  x2r = a[4] + a[6];
+  x2i = a[5] + a[7];
+  x3r = a[4] - a[6];
+  x3i = a[5] - a[7];
+  a[0] = x0r + x2r;
+  a[1] = x0i + x2i;
+  a[4] = x0r - x2r;
+  a[5] = x0i - x2i;
+  a[2] = x1r - x3i;
+  a[3] = x1i + x3r;
+  a[6] = x1r + x3i;
+  a[7] = x1i - x3r;
+  wk1r = w[2];
+  x0r = a[8] + a[10];
+  x0i = a[9] + a[11];
+  x1r = a[8] - a[10];
+  x1i = a[9] - a[11];
+  x2r = a[12] + a[14];
+  x2i = a[13] + a[15];
+  x3r = a[12] - a[14];
+  x3i = a[13] - a[15];
+  a[8] = x0r + x2r;
+  a[9] = x0i + x2i;
+  a[12] = x2i - x0i;
+  a[13] = x0r - x2r;
+  x0r = x1r - x3i;
+  x0i = x1i + x3r;
+  a[10] = wk1r * (x0r - x0i);
+  a[11] = wk1r * (x0r + x0i);
+  x0r = x3i + x1r;
+  x0i = x3r - x1i;
+  a[14] = wk1r * (x0i - x0r);
+  a[15] = wk1r * (x0i + x0r);
+  k1 = 0;
+  for (j = 16; j < n; j += 16) {
+    k1 += 2;
+    k2 = 2 * k1;
+    wk2r = w[k1];
+    wk2i = w[k1 + 1];
+    wk1r = w[k2];
+    wk1i = w[k2 + 1];
+    wk3r = wk1r - 2 * wk2i * wk1i;
+    wk3i = 2 * wk2i * wk1r - wk1i;
+    x0r = a[j] + a[j + 2];
+    x0i = a[j + 1] + a[j + 3];
+    x1r = a[j] - a[j + 2];
+    x1i = a[j + 1] - a[j + 3];
+    x2r = a[j + 4] + a[j + 6];
+    x2i = a[j + 5] + a[j + 7];
+    x3r = a[j + 4] - a[j + 6];
+    x3i = a[j + 5] - a[j + 7];
+    a[j] = x0r + x2r;
+    a[j + 1] = x0i + x2i;
+    x0r -= x2r;
+    x0i -= x2i;
+    a[j + 4] = wk2r * x0r - wk2i * x0i;
+    a[j + 5] = wk2r * x0i + wk2i * x0r;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j + 2] = wk1r * x0r - wk1i * x0i;
+    a[j + 3] = wk1r * x0i + wk1i * x0r;
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    a[j + 6] = wk3r * x0r - wk3i * x0i;
+    a[j + 7] = wk3r * x0i + wk3i * x0r;
+    wk1r = w[k2 + 2];
+    wk1i = w[k2 + 3];
+    wk3r = wk1r - 2 * wk2r * wk1i;
+    wk3i = 2 * wk2r * wk1r - wk1i;
+    x0r = a[j + 8] + a[j + 10];
+    x0i = a[j + 9] + a[j + 11];
+    x1r = a[j + 8] - a[j + 10];
+    x1i = a[j + 9] - a[j + 11];
+    x2r = a[j + 12] + a[j + 14];
+    x2i = a[j + 13] + a[j + 15];
+    x3r = a[j + 12] - a[j + 14];
+    x3i = a[j + 13] - a[j + 15];
+    a[j + 8] = x0r + x2r;
+    a[j + 9] = x0i + x2i;
+    x0r -= x2r;
+    x0i -= x2i;
+    a[j + 12] = -wk2i * x0r - wk2r * x0i;
+    a[j + 13] = -wk2i * x0i + wk2r * x0r;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j + 10] = wk1r * x0r - wk1i * x0i;
+    a[j + 11] = wk1r * x0i + wk1i * x0r;
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    a[j + 14] = wk3r * x0r - wk3i * x0i;
+    a[j + 15] = wk3r * x0i + wk3i * x0r;
+  }
+}
+
+static void cftmdl_128(int l, float *a, float *w) {
+  const int n = 128;
+  int j, j1, j2, j3, k, k1, k2, m, m2;
+  float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+  m = l << 2;
+  for (j = 0; j < l; j += 2) {
+    j1 = j + l;
+    j2 = j1 + l;
+    j3 = j2 + l;
+    x0r = a[j] + a[j1];
+    x0i = a[j + 1] + a[j1 + 1];
+    x1r = a[j] - a[j1];
+    x1i = a[j + 1] - a[j1 + 1];
+    x2r = a[j2] + a[j3];
+    x2i = a[j2 + 1] + a[j3 + 1];
+    x3r = a[j2] - a[j3];
+    x3i = a[j2 + 1] - a[j3 + 1];
+    a[j] = x0r + x2r;
+    a[j + 1] = x0i + x2i;
+    a[j2] = x0r - x2r;
+    a[j2 + 1] = x0i - x2i;
+    a[j1] = x1r - x3i;
+    a[j1 + 1] = x1i + x3r;
+    a[j3] = x1r + x3i;
+    a[j3 + 1] = x1i - x3r;
+  }
+  wk1r = w[2];
+  for (j = m; j < l + m; j += 2) {
+    j1 = j + l;
+    j2 = j1 + l;
+    j3 = j2 + l;
+    x0r = a[j] + a[j1];
+    x0i = a[j + 1] + a[j1 + 1];
+    x1r = a[j] - a[j1];
+    x1i = a[j + 1] - a[j1 + 1];
+    x2r = a[j2] + a[j3];
+    x2i = a[j2 + 1] + a[j3 + 1];
+    x3r = a[j2] - a[j3];
+    x3i = a[j2 + 1] - a[j3 + 1];
+    a[j] = x0r + x2r;
+    a[j + 1] = x0i + x2i;
+    a[j2] = x2i - x0i;
+    a[j2 + 1] = x0r - x2r;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j1] = wk1r * (x0r - x0i);
+    a[j1 + 1] = wk1r * (x0r + x0i);
+    x0r = x3i + x1r;
+    x0i = x3r - x1i;
+    a[j3] = wk1r * (x0i - x0r);
+    a[j3 + 1] = wk1r * (x0i + x0r);
+  }
+  k1 = 0;
+  m2 = 2 * m;
+  for (k = m2; k < n; k += m2) {
+    k1 += 2;
+    k2 = 2 * k1;
+    wk2r = w[k1];
+    wk2i = w[k1 + 1];
+    wk1r = w[k2];
+    wk1i = w[k2 + 1];
+    wk3r = wk1r - 2 * wk2i * wk1i;
+    wk3i = 2 * wk2i * wk1r - wk1i;
+    for (j = k; j < l + k; j += 2) {
+      j1 = j + l;
+      j2 = j1 + l;
+      j3 = j2 + l;
+      x0r = a[j] + a[j1];
+      x0i = a[j + 1] + a[j1 + 1];
+      x1r = a[j] - a[j1];
+      x1i = a[j + 1] - a[j1 + 1];
+      x2r = a[j2] + a[j3];
+      x2i = a[j2 + 1] + a[j3 + 1];
+      x3r = a[j2] - a[j3];
+      x3i = a[j2 + 1] - a[j3 + 1];
+      a[j] = x0r + x2r;
+      a[j + 1] = x0i + x2i;
+      x0r -= x2r;
+      x0i -= x2i;
+      a[j2] = wk2r * x0r - wk2i * x0i;
+      a[j2 + 1] = wk2r * x0i + wk2i * x0r;
+      x0r = x1r - x3i;
+      x0i = x1i + x3r;
+      a[j1] = wk1r * x0r - wk1i * x0i;
+      a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+      x0r = x1r + x3i;
+      x0i = x1i - x3r;
+      a[j3] = wk3r * x0r - wk3i * x0i;
+      a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+    }
+    wk1r = w[k2 + 2];
+    wk1i = w[k2 + 3];
+    wk3r = wk1r - 2 * wk2r * wk1i;
+    wk3i = 2 * wk2r * wk1r - wk1i;
+    for (j = k + m; j < l + (k + m); j += 2) {
+      j1 = j + l;
+      j2 = j1 + l;
+      j3 = j2 + l;
+      x0r = a[j] + a[j1];
+      x0i = a[j + 1] + a[j1 + 1];
+      x1r = a[j] - a[j1];
+      x1i = a[j + 1] - a[j1 + 1];
+      x2r = a[j2] + a[j3];
+      x2i = a[j2 + 1] + a[j3 + 1];
+      x3r = a[j2] - a[j3];
+      x3i = a[j2 + 1] - a[j3 + 1];
+      a[j] = x0r + x2r;
+      a[j + 1] = x0i + x2i;
+      x0r -= x2r;
+      x0i -= x2i;
+      a[j2] = -wk2i * x0r - wk2r * x0i;
+      a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
+      x0r = x1r - x3i;
+      x0i = x1i + x3r;
+      a[j1] = wk1r * x0r - wk1i * x0i;
+      a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+      x0r = x1r + x3i;
+      x0i = x1i - x3r;
+      a[j3] = wk3r * x0r - wk3i * x0i;
+      a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+    }
+  }
+}
+
+static void cftfsub_128(float *a, float *w) {
+  int j, j1, j2, j3, l;
+  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+  cft1st_128(a, w);
+  cftmdl_128(8, a, w);
+  l = 32;
+  for (j = 0; j < l; j += 2) {
+    j1 = j + l;
+    j2 = j1 + l;
+    j3 = j2 + l;
+    x0r = a[j] + a[j1];
+    x0i = a[j + 1] + a[j1 + 1];
+    x1r = a[j] - a[j1];
+    x1i = a[j + 1] - a[j1 + 1];
+    x2r = a[j2] + a[j3];
+    x2i = a[j2 + 1] + a[j3 + 1];
+    x3r = a[j2] - a[j3];
+    x3i = a[j2 + 1] - a[j3 + 1];
+    a[j] = x0r + x2r;
+    a[j + 1] = x0i + x2i;
+    a[j2] = x0r - x2r;
+    a[j2 + 1] = x0i - x2i;
+    a[j1] = x1r - x3i;
+    a[j1 + 1] = x1i + x3r;
+    a[j3] = x1r + x3i;
+    a[j3 + 1] = x1i - x3r;
+  }
+}
+
+static void cftbsub_128(float *a, float *w) {
+  int j, j1, j2, j3, l;
+  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+  cft1st_128(a, w);
+  cftmdl_128(8, a, w);
+  l = 32;
+
+  for (j = 0; j < l; j += 2) {
+    j1 = j + l;
+    j2 = j1 + l;
+    j3 = j2 + l;
+    x0r = a[j] + a[j1];
+    x0i = -a[j + 1] - a[j1 + 1];
+    x1r = a[j] - a[j1];
+    x1i = -a[j + 1] + a[j1 + 1];
+    x2r = a[j2] + a[j3];
+    x2i = a[j2 + 1] + a[j3 + 1];
+    x3r = a[j2] - a[j3];
+    x3i = a[j2 + 1] - a[j3 + 1];
+    a[j] = x0r + x2r;
+    a[j + 1] = x0i - x2i;
+    a[j2] = x0r - x2r;
+    a[j2 + 1] = x0i + x2i;
+    a[j1] = x1r - x3i;
+    a[j1 + 1] = x1i - x3r;
+    a[j3] = x1r + x3i;
+    a[j3 + 1] = x1i + x3r;
+  }
+}
+
+static void rftfsub_128(float *a, int nc, float *c) {
+  const int n = 128;
+  int j, k, kk, ks, m;
+  float wkr, wki, xr, xi, yr, yi;
+
+  m = n >> 1;
+  ks = 2 * nc / m;
+  kk = 0;
+  for (j = 2; j < m; j += 2) {
+    k = n - j;
+    kk += ks;
+    wkr = 0.5f - c[nc - kk];
+    wki = c[kk];
+    xr = a[j] - a[k];
+    xi = a[j + 1] + a[k + 1];
+    yr = wkr * xr - wki * xi;
+    yi = wkr * xi + wki * xr;
+    a[j] -= yr;
+    a[j + 1] -= yi;
+    a[k] += yr;
+    a[k + 1] -= yi;
+  }
+}
+
+static void rftbsub_128(float *a, int nc, float *c) {
+  const int n = 128;
+  int j, k, kk, ks, m;
+  float wkr, wki, xr, xi, yr, yi;
+
+  a[1] = -a[1];
+  m = n >> 1;
+  ks = 2 * nc / m;
+  kk = 0;
+  for (j = 2; j < m; j += 2) {
+    k = n - j;
+    kk += ks;
+    wkr = 0.5f - c[nc - kk];
+    wki = c[kk];
+    xr = a[j] - a[k];
+    xi = a[j + 1] + a[k + 1];
+    yr = wkr * xr + wki * xi;
+    yi = wkr * xi - wki * xr;
+    a[j] -= yr;
+    a[j + 1] = yi - a[j + 1];
+    a[k] += yr;
+    a[k + 1] = yi - a[k + 1];
+  }
+  a[m + 1] = -a[m + 1];
+}
+
+void aec_rdft_128(int isgn, float *a, int *ip, float *w)
+{
+  const int n = 128;
+  int nw, nc;
+  float xi;
+
+  nw = ip[0];
+  if (n > (nw << 2)) {
+    nw = n >> 2;
+    makewt(ip, w);
+  }
+  nc = ip[1];
+  if (n > (nc << 2)) {
+    nc = n >> 2;
+    makect_32(ip, w + nw);
+  }
+  if (isgn >= 0) {
+    bitrv2_32or128(n, ip + 2, a);
+    cftfsub_128(a, w);
+    rftfsub_128(a, nc, w + nw);
+    xi = a[0] - a[1];
+    a[0] += a[1];
+    a[1] = xi;
+  } else {
+    a[1] = 0.5f * (a[0] - a[1]);
+    a[0] -= a[1];
+    rftbsub_128(a, nc, w + nw);
+    bitrv2_32or128(n, ip + 2, a);
+    cftbsub_128(a, w);
+  }
+}
diff --git a/modules/audio_processing/aec/main/source/aec_core_sse2.c b/modules/audio_processing/aec/main/source/aec_core_sse2.c
index d7f30b58c..c020e7bb4 100644
--- a/modules/audio_processing/aec/main/source/aec_core_sse2.c
+++ b/modules/audio_processing/aec/main/source/aec_core_sse2.c
@@ -174,7 +174,7 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1],
                    -aec->xfBuf[1][xPos + PART_LEN],
                    ef[0][PART_LEN], ef[1][PART_LEN]);
 
-    rdft(PART_LEN2, -1, fft, ip, wfft);
+    aec_rdft_128(-1, fft, ip, wfft);
     memset(fft + PART_LEN, 0, sizeof(float)*PART_LEN);
 
     // fft scaling
@@ -187,7 +187,7 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1],
         _mm_storeu_ps(&fft[j], fft_scale);
       }
     }
-    rdft(PART_LEN2, 1, fft, ip, wfft);
+    aec_rdft_128(1, fft, ip, wfft);
 
     {
       float wt1 = aec->wfBuf[1][pos];