MIPS optimizations for ISAC (patch #1)

Implemented functions: - WebRtcIsacfix_AutocorrMIPS - WebRtcIsacfix_FilterArLoop - WebRtcIsacfix_FilterMaLoopMIPS - WebRtcIsacfix_AllpassFilter2FixDec16MIPS (only MIPS DSP) - WebRtcIsacfix_PitchFilterCore (only MIPS DSPR2) Gain achieved: from aprox. 15% (MIPS32) up to aprox. 40% (MIPS DSPR2) R=andrew@webrtc.org, tina.legrand@webrtc.org Review URL: https://webrtc-codereview.appspot.com/17559005 Patch from Ljubomir Papuga <lpapuga@mips.com>. git-svn-id: http://webrtc.googlecode.com/svn/trunk@6387 4adac7df-926f-26a2-2b94-8c16560cd09d
2014-06-10 18:13:15 +00:00
parent 0d7ab0a634
commit 919914d71b
8 changed files with 999 additions and 1 deletions
--- a/webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h
+++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h
@@ -179,6 +179,21 @@ void WebRtcIsacfix_FilterMaLoopNeon(int16_t input0,
                                    int32_t* ptr2);
 #endif

+#if defined(MIPS32_LE)
+int WebRtcIsacfix_AutocorrMIPS(int32_t* __restrict r,
+                               const int16_t* __restrict x,
+                               int16_t N,
+                               int16_t order,
+                               int16_t* __restrict scale);
+
+void WebRtcIsacfix_FilterMaLoopMIPS(int16_t input0,
+                                    int16_t input1,
+                                    int32_t input2,
+                                    int32_t* ptr0,
+                                    int32_t* ptr1,
+                                    int32_t* ptr2);
+#endif
+
 /* Function pointers associated with the above functions. */

 typedef int (*AutocorrFix)(int32_t* __restrict r,
--- a/webrtc/modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h
+++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h
@@ -58,6 +58,17 @@ void WebRtcIsacfix_AllpassFilter2FixDec16Neon(
   int32_t *filter_state_ch2);
 #endif

+#if defined(MIPS_DSP_R1_LE)
+void WebRtcIsacfix_AllpassFilter2FixDec16MIPS(
+   int16_t *data_ch1,
+   int16_t *data_ch2,
+   const int16_t *factor_ch1,
+   const int16_t *factor_ch2,
+   const int length,
+   int32_t *filter_state_ch1,
+   int32_t *filter_state_ch2);
+#endif
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }
 #endif
--- a/webrtc/modules/audio_coding/codecs/isac/fix/source/filterbanks_mips.c
+++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/filterbanks_mips.c
@@ -0,0 +1,102 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h"
+
+// WebRtcIsacfix_AllpassFilter2FixDec16 function optimized for MIPSDSP platform
+// Bit-exact with WebRtcIsacfix_AllpassFilter2FixDec16C from filterbanks.c
+void WebRtcIsacfix_AllpassFilter2FixDec16MIPS(
+    int16_t *data_ch1,  // Input and output in channel 1, in Q0
+    int16_t *data_ch2,  // Input and output in channel 2, in Q0
+    const int16_t *factor_ch1,  // Scaling factor for channel 1, in Q15
+    const int16_t *factor_ch2,  // Scaling factor for channel 2, in Q15
+    const int length,  // Length of the data buffers
+    int32_t *filter_state_ch1,  // Filter state for channel 1, in Q16
+    int32_t *filter_state_ch2) {  // Filter state for channel 2, in Q16
+
+  int32_t st0_ch1, st1_ch1; // channel1 state variables
+  int32_t st0_ch2, st1_ch2; // channel2 state variables
+  int32_t f_ch10, f_ch11, f_ch20, f_ch21; // factor variables
+  int32_t r0, r1, r2, r3, r4, r5; // temporary ragister variables
+
+  __asm __volatile (
+    ".set           push                                                  \n\t"
+    ".set           noreorder                                             \n\t"
+    // Load all the state and factor variables
+    "lh             %[f_ch10],      0(%[factor_ch1])                      \n\t"
+    "lh             %[f_ch20],      0(%[factor_ch2])                      \n\t"
+    "lh             %[f_ch11],      2(%[factor_ch1])                      \n\t"
+    "lh             %[f_ch21],      2(%[factor_ch2])                      \n\t"
+    "lw             %[st0_ch1],     0(%[filter_state_ch1])                \n\t"
+    "lw             %[st1_ch1],     4(%[filter_state_ch1])                \n\t"
+    "lw             %[st0_ch2],     0(%[filter_state_ch2])                \n\t"
+    "lw             %[st1_ch2],     4(%[filter_state_ch2])                \n\t"
+    // Allpass filtering loop
+   "1:                                                                    \n\t"
+    "lh             %[r0],          0(%[data_ch1])                        \n\t"
+    "lh             %[r1],          0(%[data_ch2])                        \n\t"
+    "addiu          %[length],      %[length],              -1            \n\t"
+    "mul            %[r2],          %[r0],                  %[f_ch10]     \n\t"
+    "mul            %[r3],          %[r1],                  %[f_ch20]     \n\t"
+    "sll            %[r0],          %[r0],                  16            \n\t"
+    "sll            %[r1],          %[r1],                  16            \n\t"
+    "sll            %[r2],          %[r2],                  1             \n\t"
+    "addq_s.w       %[r2],          %[r2],                  %[st0_ch1]    \n\t"
+    "sll            %[r3],          %[r3],                  1             \n\t"
+    "addq_s.w       %[r3],          %[r3],                  %[st0_ch2]    \n\t"
+    "sra            %[r2],          %[r2],                  16            \n\t"
+    "mul            %[st0_ch1],     %[f_ch10],              %[r2]         \n\t"
+    "sra            %[r3],          %[r3],                  16            \n\t"
+    "mul            %[st0_ch2],     %[f_ch20],              %[r3]         \n\t"
+    "mul            %[r4],          %[r2],                  %[f_ch11]     \n\t"
+    "mul            %[r5],          %[r3],                  %[f_ch21]     \n\t"
+    "sll            %[st0_ch1],     %[st0_ch1],             1             \n\t"
+    "subq_s.w       %[st0_ch1],     %[r0],                  %[st0_ch1]    \n\t"
+    "sll            %[st0_ch2],     %[st0_ch2],             1             \n\t"
+    "subq_s.w       %[st0_ch2],     %[r1],                  %[st0_ch2]    \n\t"
+    "sll            %[r4],          %[r4],                  1             \n\t"
+    "addq_s.w       %[r4],          %[r4],                  %[st1_ch1]    \n\t"
+    "sll            %[r5],          %[r5],                  1             \n\t"
+    "addq_s.w       %[r5],          %[r5],                  %[st1_ch2]    \n\t"
+    "sra            %[r4],          %[r4],                  16            \n\t"
+    "mul            %[r0],          %[r4],                  %[f_ch11]     \n\t"
+    "sra            %[r5],          %[r5],                  16            \n\t"
+    "mul            %[r1],          %[r5],                  %[f_ch21]     \n\t"
+    "sh             %[r4],          0(%[data_ch1])                        \n\t"
+    "sh             %[r5],          0(%[data_ch2])                        \n\t"
+    "addiu          %[data_ch1],    %[data_ch1],            2             \n\t"
+    "sll            %[r2],          %[r2],                  16            \n\t"
+    "sll            %[r0],          %[r0],                  1             \n\t"
+    "subq_s.w       %[st1_ch1],     %[r2],                  %[r0]         \n\t"
+    "sll            %[r3],          %[r3],                  16            \n\t"
+    "sll            %[r1],          %[r1],                  1             \n\t"
+    "subq_s.w       %[st1_ch2],     %[r3],                  %[r1]         \n\t"
+    "bgtz           %[length],      1b                                    \n\t"
+    " addiu         %[data_ch2],    %[data_ch2],            2             \n\t"
+    // Store channel states
+    "sw             %[st0_ch1],     0(%[filter_state_ch1])                \n\t"
+    "sw             %[st1_ch1],     4(%[filter_state_ch1])                \n\t"
+    "sw             %[st0_ch2],     0(%[filter_state_ch2])                \n\t"
+    "sw             %[st1_ch2],     4(%[filter_state_ch2])                \n\t"
+    ".set           pop                                                   \n\t"
+    : [f_ch10] "=&r" (f_ch10), [f_ch20] "=&r" (f_ch20),
+      [f_ch11] "=&r" (f_ch11), [f_ch21] "=&r" (f_ch21),
+      [st0_ch1] "=&r" (st0_ch1), [st1_ch1] "=&r" (st1_ch1),
+      [st0_ch2] "=&r" (st0_ch2), [st1_ch2] "=&r" (st1_ch2),
+      [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
+      [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5)
+    : [factor_ch1] "r" (factor_ch1), [factor_ch2] "r" (factor_ch2),
+      [filter_state_ch1] "r" (filter_state_ch1),
+      [filter_state_ch2] "r" (filter_state_ch2),
+      [data_ch1] "r" (data_ch1), [data_ch2] "r" (data_ch2),
+      [length] "r" (length)
+    : "memory", "hi", "lo"
+  );
+}
--- a/webrtc/modules/audio_coding/codecs/isac/fix/source/filters_mips.c
+++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/filters_mips.c
@@ -0,0 +1,365 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h"
+
+// MIPS optimized implementation of the Autocorrelation function in fixed point.
+// NOTE! Different from SPLIB-version in how it scales the signal.
+int WebRtcIsacfix_AutocorrMIPS(int32_t* __restrict r,
+                               const int16_t* __restrict x,
+                               int16_t N,
+                               int16_t order,
+                               int16_t* __restrict scale) {
+  int i = 0;
+  int16_t scaling = 0;
+  int16_t* in = (int16_t*)x;
+  int loop_size = (int)(N >> 3);
+  int count = (int)(N & 7);
+  // Declare temporary variables used as registry values.
+  int32_t r0, r1, r2, r3;
+#if !defined(MIPS_DSP_R2_LE)
+  // For non-DSPR2 optimizations 4 more registers are used.
+  int32_t r4, r5, r6, r7;
+#endif
+
+  // Calculate r[0] and scaling needed.
+  __asm __volatile (
+    ".set          push                                            \n\t"
+    ".set          noreorder                                       \n\t"
+    "mult          $0,             $0                              \n\t"
+    // Loop is unrolled 8 times, set accumulator to zero in branch delay slot.
+    "beqz          %[loop_size],   2f                              \n\t"
+    " mult         $0,             $0                              \n\t"
+   "1:                                                             \n\t"
+    // Load 8 samples per loop iteration.
+#if defined(MIPS_DSP_R2_LE)
+    "ulw           %[r0],          0(%[in])                        \n\t"
+    "ulw           %[r1],          4(%[in])                        \n\t"
+    "ulw           %[r2],          8(%[in])                        \n\t"
+    "ulw           %[r3],          12(%[in])                       \n\t"
+#else
+    "lh            %[r0],          0(%[in])                        \n\t"
+    "lh            %[r1],          2(%[in])                        \n\t"
+    "lh            %[r2],          4(%[in])                        \n\t"
+    "lh            %[r3],          6(%[in])                        \n\t"
+    "lh            %[r4],          8(%[in])                        \n\t"
+    "lh            %[r5],          10(%[in])                       \n\t"
+    "lh            %[r6],          12(%[in])                       \n\t"
+    "lh            %[r7],          14(%[in])                       \n\t"
+#endif
+    "addiu         %[loop_size],   %[loop_size],   -1              \n\t"
+    // Multiply and accumulate.
+#if defined(MIPS_DSP_R2_LE)
+    "dpa.w.ph      $ac0,           %[r0],          %[r0]           \n\t"
+    "dpa.w.ph      $ac0,           %[r1],          %[r1]           \n\t"
+    "dpa.w.ph      $ac0,           %[r2],          %[r2]           \n\t"
+    "dpa.w.ph      $ac0,           %[r3],          %[r3]           \n\t"
+#else
+    "madd          %[r0],          %[r0]                           \n\t"
+    "madd          %[r1],          %[r1]                           \n\t"
+    "madd          %[r2],          %[r2]                           \n\t"
+    "madd          %[r3],          %[r3]                           \n\t"
+    "madd          %[r4],          %[r4]                           \n\t"
+    "madd          %[r5],          %[r5]                           \n\t"
+    "madd          %[r6],          %[r6]                           \n\t"
+    "madd          %[r7],          %[r7]                           \n\t"
+#endif
+    "bnez          %[loop_size],   1b                              \n\t"
+    " addiu        %[in],          %[in],          16              \n\t"
+   "2:                                                             \n\t"
+    "beqz          %[count],       4f                              \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    " extr.w       %[r0],          $ac0,           31              \n\t"
+#else
+    " mfhi         %[r2]                                           \n\t"
+#endif
+    // Process remaining samples (if any).
+   "3:                                                             \n\t"
+    "lh            %[r0],          0(%[in])                        \n\t"
+    "addiu         %[count],       %[count],       -1              \n\t"
+    "madd          %[r0],          %[r0]                           \n\t"
+    "bnez          %[count],       3b                              \n\t"
+    " addiu        %[in],          %[in],          2               \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "extr.w        %[r0],          $ac0,           31              \n\t"
+#else
+    "mfhi          %[r2]                                           \n\t"
+#endif
+   "4:                                                             \n\t"
+#if !defined(MIPS_DSP_R1_LE)
+    "mflo          %[r3]                                           \n\t"
+    "sll           %[r0],          %[r2],          1               \n\t"
+    "srl           %[r1],          %[r3],          31              \n\t"
+    "addu          %[r0],          %[r0],          %[r1]           \n\t"
+#endif
+    // Calculate scaling (the value of shifting).
+    "clz           %[r1],          %[r0]                           \n\t"
+    "addiu         %[r1],          %[r1],          -32             \n\t"
+    "subu          %[scaling],     $0,             %[r1]           \n\t"
+    "slti          %[r1],          %[r0],          0x1             \n\t"
+    "movn          %[scaling],     $0,             %[r1]           \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "extrv.w       %[r0],          $ac0,           %[scaling]      \n\t"
+    "mfhi          %[r2],          $ac0                            \n\t"
+#else
+    "addiu         %[r1],          %[scaling],     -32             \n\t"
+    "subu          %[r1],          $0,             %[r1]           \n\t"
+    "sllv          %[r1],          %[r2],          %[r1]           \n\t"
+    "srlv          %[r0],          %[r3],          %[scaling]      \n\t"
+    "addu          %[r0],          %[r0],          %[r1]           \n\t"
+#endif
+    "slti          %[r1],          %[scaling],     32              \n\t"
+    "movz          %[r0],          %[r2],          %[r1]           \n\t"
+    ".set          pop                                             \n\t"
+    : [loop_size] "+r" (loop_size), [in] "+r" (in), [r0] "=&r" (r0),
+      [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
+#if !defined(MIPS_DSP_R2_LE)
+      [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
+#endif
+      [count] "+r" (count), [scaling] "=r" (scaling)
+    : [N] "r" (N)
+    : "memory", "hi", "lo"
+  );
+  r[0] = r0;
+
+  // Correlation calculation is divided in 3 cases depending on the scaling
+  // value (different accumulator manipulation needed). Three slightly different
+  // loops are written in order to avoid branches inside the loop.
+  if (scaling == 0) {
+    // In this case, the result will be in low part of the accumulator.
+    for (i = 1; i < order + 1; i++) {
+      in = (int16_t*)x;
+      int16_t* in1 = (int16_t*)x + i;
+      count = N - i;
+      loop_size = (count) >> 2;
+      __asm  __volatile (
+        ".set        push                                          \n\t"
+        ".set        noreorder                                     \n\t"
+        "mult        $0,             $0                            \n\t"
+        "beqz        %[loop_size],   2f                            \n\t"
+        " andi       %[count],       %[count],       0x3           \n\t"
+        // Loop processing 4 pairs of samples per iteration.
+       "1:                                                         \n\t"
+#if defined(MIPS_DSP_R2_LE)
+        "ulw         %[r0],          0(%[in])                      \n\t"
+        "ulw         %[r1],          0(%[in1])                     \n\t"
+        "ulw         %[r2],          4(%[in])                      \n\t"
+        "ulw         %[r3],          4(%[in1])                     \n\t"
+#else
+        "lh          %[r0],          0(%[in])                      \n\t"
+        "lh          %[r1],          0(%[in1])                     \n\t"
+        "lh          %[r2],          2(%[in])                      \n\t"
+        "lh          %[r3],          2(%[in1])                     \n\t"
+        "lh          %[r4],          4(%[in])                      \n\t"
+        "lh          %[r5],          4(%[in1])                     \n\t"
+        "lh          %[r6],          6(%[in])                      \n\t"
+        "lh          %[r7],          6(%[in1])                     \n\t"
+#endif
+        "addiu       %[loop_size],   %[loop_size],   -1            \n\t"
+#if defined(MIPS_DSP_R2_LE)
+        "dpa.w.ph    $ac0,           %[r0],          %[r1]         \n\t"
+        "dpa.w.ph    $ac0,           %[r2],          %[r3]         \n\t"
+#else
+        "madd        %[r0],          %[r1]                         \n\t"
+        "madd        %[r2],          %[r3]                         \n\t"
+        "madd        %[r4],          %[r5]                         \n\t"
+        "madd        %[r6],          %[r7]                         \n\t"
+#endif
+        "addiu       %[in],          %[in],          8             \n\t"
+        "bnez        %[loop_size],   1b                            \n\t"
+        " addiu      %[in1],         %[in1],         8             \n\t"
+       "2:                                                         \n\t"
+        "beqz        %[count],       4f                            \n\t"
+        " mflo       %[r0]                                         \n\t"
+        // Process remaining samples (if any).
+       "3:                                                         \n\t"
+        "lh          %[r0],          0(%[in])                      \n\t"
+        "lh          %[r1],          0(%[in1])                     \n\t"
+        "addiu       %[count],       %[count],       -1            \n\t"
+        "addiu       %[in],          %[in],          2             \n\t"
+        "madd        %[r0],          %[r1]                         \n\t"
+        "bnez        %[count],       3b                            \n\t"
+        " addiu      %[in1],         %[in1],         2             \n\t"
+        "mflo        %[r0]                                         \n\t"
+       "4:                                                         \n\t"
+        ".set        pop                                           \n\t"
+        : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1),
+#if !defined(MIPS_DSP_R2_LE)
+          [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
+#endif
+          [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
+          [count] "+r" (count)
+        :
+        : "memory", "hi", "lo"
+      );
+      r[i] = r0;
+    }
+  } else if (scaling == 32) {
+    // In this case, the result will be high part of the accumulator.
+    for (i = 1; i < order + 1; i++) {
+      in = (int16_t*)x;
+      int16_t* in1 = (int16_t*)x + i;
+      count = N - i;
+      loop_size = (count) >> 2;
+      __asm __volatile (
+        ".set        push                                          \n\t"
+        ".set        noreorder                                     \n\t"
+        "mult        $0,             $0                            \n\t"
+        "beqz        %[loop_size],   2f                            \n\t"
+        " andi       %[count],       %[count],       0x3           \n\t"
+        // Loop processing 4 pairs of samples per iteration.
+       "1:                                                         \n\t"
+#if defined(MIPS_DSP_R2_LE)
+        "ulw         %[r0],          0(%[in])                      \n\t"
+        "ulw         %[r1],          0(%[in1])                     \n\t"
+        "ulw         %[r2],          4(%[in])                      \n\t"
+        "ulw         %[r3],          4(%[in1])                     \n\t"
+#else
+        "lh          %[r0],          0(%[in])                      \n\t"
+        "lh          %[r1],          0(%[in1])                     \n\t"
+        "lh          %[r2],          2(%[in])                      \n\t"
+        "lh          %[r3],          2(%[in1])                     \n\t"
+        "lh          %[r4],          4(%[in])                      \n\t"
+        "lh          %[r5],          4(%[in1])                     \n\t"
+        "lh          %[r6],          6(%[in])                      \n\t"
+        "lh          %[r7],          6(%[in1])                     \n\t"
+#endif
+        "addiu       %[loop_size],   %[loop_size],   -1            \n\t"
+#if defined(MIPS_DSP_R2_LE)
+        "dpa.w.ph    $ac0,           %[r0],          %[r1]         \n\t"
+        "dpa.w.ph    $ac0,           %[r2],          %[r3]         \n\t"
+#else
+        "madd        %[r0],          %[r1]                         \n\t"
+        "madd        %[r2],          %[r3]                         \n\t"
+        "madd        %[r4],          %[r5]                         \n\t"
+        "madd        %[r6],          %[r7]                         \n\t"
+#endif
+        "addiu       %[in],          %[in],          8             \n\t"
+        "bnez        %[loop_size],   1b                            \n\t"
+        " addiu      %[in1],         %[in1],         8             \n\t"
+       "2:                                                         \n\t"
+        "beqz        %[count],       4f                            \n\t"
+        " mfhi       %[r0]                                         \n\t"
+        // Process remaining samples (if any).
+       "3:                                                         \n\t"
+        "lh          %[r0],          0(%[in])                      \n\t"
+        "lh          %[r1],          0(%[in1])                     \n\t"
+        "addiu       %[count],       %[count],       -1            \n\t"
+        "addiu       %[in],          %[in],          2             \n\t"
+        "madd        %[r0],          %[r1]                         \n\t"
+        "bnez        %[count],       3b                            \n\t"
+        " addiu      %[in1],         %[in1],         2             \n\t"
+        "mfhi        %[r0]                                         \n\t"
+       "4:                                                         \n\t"
+        ".set        pop                                           \n\t"
+        : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1),
+#if !defined(MIPS_DSP_R2_LE)
+          [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
+#endif
+          [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
+          [count] "+r" (count)
+        :
+        : "memory", "hi", "lo"
+      );
+      r[i] = r0;
+    }
+  } else {
+    // In this case, the result is obtained by combining low and high parts
+    // of the accumulator.
+#if !defined(MIPS_DSP_R1_LE)
+    int32_t tmp_shift = 32 - scaling;
+#endif
+    for (i = 1; i < order + 1; i++) {
+      in = (int16_t*)x;
+      int16_t* in1 = (int16_t*)x + i;
+      count = N - i;
+      loop_size = (count) >> 2;
+      __asm __volatile (
+        ".set        push                                          \n\t"
+        ".set        noreorder                                     \n\t"
+        "mult        $0,             $0                            \n\t"
+        "beqz        %[loop_size],   2f                            \n\t"
+        " andi       %[count],       %[count],       0x3           \n\t"
+       "1:                                                         \n\t"
+#if defined(MIPS_DSP_R2_LE)
+        "ulw         %[r0],          0(%[in])                      \n\t"
+        "ulw         %[r1],          0(%[in1])                     \n\t"
+        "ulw         %[r2],          4(%[in])                      \n\t"
+        "ulw         %[r3],          4(%[in1])                     \n\t"
+#else
+        "lh          %[r0],          0(%[in])                      \n\t"
+        "lh          %[r1],          0(%[in1])                     \n\t"
+        "lh          %[r2],          2(%[in])                      \n\t"
+        "lh          %[r3],          2(%[in1])                     \n\t"
+        "lh          %[r4],          4(%[in])                      \n\t"
+        "lh          %[r5],          4(%[in1])                     \n\t"
+        "lh          %[r6],          6(%[in])                      \n\t"
+        "lh          %[r7],          6(%[in1])                     \n\t"
+#endif
+        "addiu       %[loop_size],   %[loop_size],   -1            \n\t"
+#if defined(MIPS_DSP_R2_LE)
+        "dpa.w.ph    $ac0,           %[r0],          %[r1]         \n\t"
+        "dpa.w.ph    $ac0,           %[r2],          %[r3]         \n\t"
+#else
+        "madd        %[r0],          %[r1]                         \n\t"
+        "madd        %[r2],          %[r3]                         \n\t"
+        "madd        %[r4],          %[r5]                         \n\t"
+        "madd        %[r6],          %[r7]                         \n\t"
+#endif
+        "addiu       %[in],          %[in],          8             \n\t"
+        "bnez        %[loop_size],   1b                            \n\t"
+        " addiu      %[in1],         %[in1],         8             \n\t"
+       "2:                                                         \n\t"
+        "beqz        %[count],       4f                            \n\t"
+#if defined(MIPS_DSP_R1_LE)
+        " extrv.w    %[r0],          $ac0,           %[scaling]    \n\t"
+#else
+        " mfhi       %[r0]                                         \n\t"
+#endif
+       "3:                                                         \n\t"
+        "lh          %[r0],          0(%[in])                      \n\t"
+        "lh          %[r1],          0(%[in1])                     \n\t"
+        "addiu       %[count],       %[count],       -1            \n\t"
+        "addiu       %[in],          %[in],          2             \n\t"
+        "madd        %[r0],          %[r1]                         \n\t"
+        "bnez        %[count],       3b                            \n\t"
+        " addiu      %[in1],         %[in1],         2             \n\t"
+#if defined(MIPS_DSP_R1_LE)
+        "extrv.w     %[r0],          $ac0,           %[scaling]    \n\t"
+#else
+        "mfhi        %[r0]                                         \n\t"
+#endif
+       "4:                                                         \n\t"
+#if !defined(MIPS_DSP_R1_LE)
+        "mflo        %[r1]                                         \n\t"
+        "sllv        %[r0],          %[r0],          %[tmp_shift]  \n\t"
+        "srlv        %[r1],          %[r1],          %[scaling]    \n\t"
+        "addu        %[r0],          %[r0],          %[r1]         \n\t"
+#endif
+        ".set        pop                                           \n\t"
+        : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1),
+#if !defined(MIPS_DSP_R2_LE)
+          [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
+#endif
+          [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
+          [count] "+r" (count)
+        : [scaling] "r" (scaling)
+#if !defined(MIPS_DSP_R1_LE)
+        , [tmp_shift] "r" (tmp_shift)
+#endif
+        : "memory", "hi", "lo"
+      );
+      r[i] = r0;
+    }
+  }
+  *scale = scaling;
+
+  return (order + 1);
+}
--- a/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c
+++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c
@@ -179,7 +179,7 @@ int16_t WebRtcIsacfix_FreeInternal(ISACFIX_MainStruct *ISAC_main_inst)
 }

 /****************************************************************************
- * WebRtcAecm_InitNeon(...)
+ * WebRtcIsacfix_InitNeon(...)
 *
 * This function initializes function pointers for ARM Neon platform.
 */
@@ -199,6 +199,23 @@ static void WebRtcIsacfix_InitNeon(void) {
 }
 #endif

+/****************************************************************************
+ * WebRtcIsacfix_InitMIPS(...)
+ *
+ * This function initializes function pointers for MIPS platform.
+ */
+
+#if defined(MIPS32_LE)
+static void WebRtcIsacfix_InitMIPS(void) {
+  WebRtcIsacfix_AutocorrFix = WebRtcIsacfix_AutocorrMIPS;
+  WebRtcIsacfix_FilterMaLoopFix = WebRtcIsacfix_FilterMaLoopMIPS;
+#if defined(MIPS_DSP_R1_LE)
+  WebRtcIsacfix_AllpassFilter2FixDec16 =
+      WebRtcIsacfix_AllpassFilter2FixDec16MIPS;
+#endif
+}
+#endif
+
 /****************************************************************************
 * WebRtcIsacfix_EncoderInit(...)
 *
@@ -296,6 +313,10 @@ int16_t WebRtcIsacfix_EncoderInit(ISACFIX_MainStruct *ISAC_main_inst,
  WebRtcIsacfix_InitNeon();
 #endif

+#if defined(MIPS32_LE)
+  WebRtcIsacfix_InitMIPS();
+#endif
+
  return statusInit;
 }

--- a/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi
+++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi
@@ -85,6 +85,30 @@
            'pitch_filter_c.c',
          ],
        }],
+        ['target_arch=="mipsel"', {
+          'sources': [
+            'filters_mips.c',
+            'lattice_mips.c',
+          ],
+          'sources!': [
+            'lattice_c.c',
+          ],
+          'conditions': [
+            ['mips_dsp_rev>0', {
+              'sources': [
+                'filterbanks_mips.c',
+              ],
+            }],
+            ['mips_dsp_rev>1', {
+              'sources': [
+                'pitch_filter_mips.c',
+              ],
+              'sources!': [
+                'pitch_filter_c.c',
+              ],
+            }],
+          ],
+        }],
      ],
    },
  ],
--- a/webrtc/modules/audio_coding/codecs/isac/fix/source/lattice_mips.c
+++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/lattice_mips.c
@@ -0,0 +1,327 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
+#include "webrtc/typedefs.h"
+
+// Filter ar_g_Q0[] and ar_f_Q0[] through an AR filter with coefficients
+// cth_Q15[] and sth_Q15[].
+void WebRtcIsacfix_FilterArLoop(int16_t* ar_g_Q0,     // Input samples
+                                int16_t* ar_f_Q0,     // Input samples
+                                int16_t* cth_Q15,     // Filter coefficients
+                                int16_t* sth_Q15,     // Filter coefficients
+                                int16_t order_coef) { // order of the filter
+  int n = 0;
+
+  for (n = 0; n < HALF_SUBFRAMELEN - 1; n++) {
+    int count = order_coef - 1;
+    int offset;
+#if !defined(MIPS_DSP_R1_LE)
+    int16_t* tmp_cth;
+    int16_t* tmp_sth;
+    int16_t* tmp_arg;
+    int32_t max_q16 = 0x7fff;
+    int32_t min_q16 = 0xffff8000;
+#endif
+    // Declare variables used as temporary registers.
+    int32_t r0, r1, r2, t0, t1, t2, t_ar;
+
+    __asm __volatile (
+      ".set          push                                                \n\t"
+      ".set          noreorder                                           \n\t"
+      "bltz          %[count],     2f                                    \n\t"
+      " lh           %[t_ar],      0(%[tmp])                             \n\t"
+      // Inner loop
+     "1:                                                                 \n\t"
+      "sll           %[offset],    %[count],               1             \n\t"
+#if defined(MIPS_DSP_R1_LE)
+      "lhx           %[r0],        %[offset](%[cth_Q15])                 \n\t"
+      "lhx           %[r1],        %[offset](%[sth_Q15])                 \n\t"
+      "lhx           %[r2],        %[offset](%[ar_g_Q0])                 \n\t"
+#else
+      "addu          %[tmp_cth],   %[cth_Q15],             %[offset]     \n\t"
+      "addu          %[tmp_sth],   %[sth_Q15],             %[offset]     \n\t"
+      "addu          %[tmp_arg],   %[ar_g_Q0],             %[offset]     \n\t"
+      "lh            %[r0],        0(%[tmp_cth])                         \n\t"
+      "lh            %[r1],        0(%[tmp_sth])                         \n\t"
+      "lh            %[r2],        0(%[tmp_arg])                         \n\t"
+#endif
+      "mul           %[t0],        %[r0],                  %[t_ar]       \n\t"
+      "mul           %[t1],        %[r1],                  %[t_ar]       \n\t"
+      "mul           %[t2],        %[r1],                  %[r2]         \n\t"
+      "mul           %[r0],        %[r0],                  %[r2]         \n\t"
+      "subu          %[t0],        %[t0],                  %[t2]         \n\t"
+      "addu          %[t1],        %[t1],                  %[r0]         \n\t"
+#if defined(MIPS_DSP_R1_LE)
+      "shra_r.w      %[t1],        %[t1],                  15            \n\t"
+      "shra_r.w      %[t0],        %[t0],                  15            \n\t"
+#else
+      "addiu         %[t1],        %[t1],                  0x4000        \n\t"
+      "sra           %[t1],        %[t1],                  15            \n\t"
+      "addiu         %[t0],        %[t0],                  0x4000        \n\t"
+      "sra           %[t0],        %[t0],                  15            \n\t"
+#endif
+      "addiu         %[offset],    %[offset],              2             \n\t"
+#if defined(MIPS_DSP_R1_LE)
+      "shll_s.w      %[t1],        %[t1],                  16            \n\t"
+      "shll_s.w      %[t_ar],      %[t0],                  16            \n\t"
+#else
+      "slt           %[r0],        %[t1],                  %[max_q16]    \n\t"
+      "slt           %[r1],        %[t0],                  %[max_q16]    \n\t"
+      "movz          %[t1],        %[max_q16],             %[r0]         \n\t"
+      "movz          %[t0],        %[max_q16],             %[r1]         \n\t"
+#endif
+      "addu          %[offset],    %[offset],              %[ar_g_Q0]    \n\t"
+#if defined(MIPS_DSP_R1_LE)
+      "sra           %[t1],        %[t1],                  16            \n\t"
+      "sra           %[t_ar],      %[t_ar],                16            \n\t"
+#else
+      "slt           %[r0],        %[t1],                  %[min_q16]    \n\t"
+      "slt           %[r1],        %[t0],                  %[min_q16]    \n\t"
+      "movn          %[t1],        %[min_q16],             %[r0]         \n\t"
+      "movn          %[t0],        %[min_q16],             %[r1]         \n\t"
+      "addu          %[t_ar],      $zero,                  %[t0]         \n\t"
+#endif
+      "sh            %[t1],        0(%[offset])                          \n\t"
+      "bgtz          %[count],     1b                                    \n\t"
+      " addiu        %[count],     %[count],               -1            \n\t"
+     "2:                                                                 \n\t"
+      "sh            %[t_ar],      0(%[tmp])                             \n\t"
+      "sh            %[t_ar],      0(%[ar_g_Q0])                         \n\t"
+      ".set          pop                                                 \n\t"
+      : [t_ar] "=&r" (t_ar), [count] "+r" (count), [offset] "=&r" (offset),
+        [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [t0] "=&r" (t0),
+#if !defined(MIPS_DSP_R1_LE)
+        [tmp_cth] "=&r" (tmp_cth), [tmp_sth] "=&r" (tmp_sth),
+        [tmp_arg] "=&r" (tmp_arg),
+#endif
+        [t1] "=&r" (t1), [t2] "=&r" (t2)
+      : [tmp] "r" (&ar_f_Q0[n+1]), [cth_Q15] "r" (cth_Q15),
+#if !defined(MIPS_DSP_R1_LE)
+        [max_q16] "r" (max_q16), [min_q16] "r" (min_q16),
+#endif
+        [sth_Q15] "r" (sth_Q15), [ar_g_Q0] "r" (ar_g_Q0)
+      : "memory", "hi", "lo"
+    );
+  }
+}
+
+// MIPS optimization of the inner loop used for function
+// WebRtcIsacfix_NormLatticeFilterMa(). It does:
+//
+// for 0 <= n < HALF_SUBFRAMELEN - 1:
+//   *ptr2 = input2 * (*ptr2) + input0 * (*ptr0));
+//   *ptr1 = input1 * (*ptr0) + input0 * (*ptr2);
+//
+// Note, function WebRtcIsacfix_FilterMaLoopMIPS and WebRtcIsacfix_FilterMaLoopC
+// are not bit-exact. The accuracy of the MIPS function is same or better.
+void WebRtcIsacfix_FilterMaLoopMIPS(int16_t input0,  // Filter coefficient
+                                    int16_t input1,  // Filter coefficient
+                                    int32_t input2,  // Inverse coeff (1/input1)
+                                    int32_t* ptr0,   // Sample buffer
+                                    int32_t* ptr1,   // Sample buffer
+                                    int32_t* ptr2) { // Sample buffer
+#if defined(MIPS_DSP_R2_LE)
+  // MIPS DSPR2 version. 4 available accumulators allows loop unrolling 4 times.
+  // This variant is not bit-exact with WebRtcIsacfix_FilterMaLoopC, since we
+  // are exploiting 64-bit accumulators. The accuracy of the MIPS DSPR2 function
+  // is same or better.
+  int n = (HALF_SUBFRAMELEN - 1) >> 2;
+  int m = (HALF_SUBFRAMELEN - 1) & 3;
+
+  int r0, r1, r2, r3;
+  int t0, t1, t2, t3;
+  int s0, s1, s2, s3;
+
+  __asm __volatile (
+    ".set          push                                      \n\t"
+    ".set          noreorder                                 \n\t"
+   "1:                                                       \n\t"
+    "lw            %[r0],        0(%[ptr0])                  \n\t"
+    "lw            %[r1],        4(%[ptr0])                  \n\t"
+    "lw            %[r2],        8(%[ptr0])                  \n\t"
+    "lw            %[r3],        12(%[ptr0])                 \n\t"
+    "mult          $ac0,         %[r0],        %[input0]     \n\t"
+    "mult          $ac1,         %[r1],        %[input0]     \n\t"
+    "mult          $ac2,         %[r2],        %[input0]     \n\t"
+    "mult          $ac3,         %[r3],        %[input0]     \n\t"
+    "lw            %[t0],        0(%[ptr2])                  \n\t"
+    "extr_rs.w     %[s0],        $ac0,         15            \n\t"
+    "extr_rs.w     %[s1],        $ac1,         15            \n\t"
+    "extr_rs.w     %[s2],        $ac2,         15            \n\t"
+    "extr_rs.w     %[s3],        $ac3,         15            \n\t"
+    "lw            %[t1],        4(%[ptr2])                  \n\t"
+    "lw            %[t2],        8(%[ptr2])                  \n\t"
+    "lw            %[t3],        12(%[ptr2])                 \n\t"
+    "addu          %[t0],        %[t0],        %[s0]         \n\t"
+    "addu          %[t1],        %[t1],        %[s1]         \n\t"
+    "addu          %[t2],        %[t2],        %[s2]         \n\t"
+    "addu          %[t3],        %[t3],        %[s3]         \n\t"
+    "mult          $ac0,         %[t0],        %[input2]     \n\t"
+    "mult          $ac1,         %[t1],        %[input2]     \n\t"
+    "mult          $ac2,         %[t2],        %[input2]     \n\t"
+    "mult          $ac3,         %[t3],        %[input2]     \n\t"
+    "addiu         %[ptr0],      %[ptr0],      16            \n\t"
+    "extr_rs.w     %[t0],        $ac0,         16            \n\t"
+    "extr_rs.w     %[t1],        $ac1,         16            \n\t"
+    "extr_rs.w     %[t2],        $ac2,         16            \n\t"
+    "extr_rs.w     %[t3],        $ac3,         16            \n\t"
+    "addiu         %[n],         %[n],         -1            \n\t"
+    "mult          $ac0,         %[r0],        %[input1]     \n\t"
+    "mult          $ac1,         %[r1],        %[input1]     \n\t"
+    "mult          $ac2,         %[r2],        %[input1]     \n\t"
+    "mult          $ac3,         %[r3],        %[input1]     \n\t"
+    "sw            %[t0],        0(%[ptr2])                  \n\t"
+    "extr_rs.w     %[s0],        $ac0,         15            \n\t"
+    "extr_rs.w     %[s1],        $ac1,         15            \n\t"
+    "extr_rs.w     %[s2],        $ac2,         15            \n\t"
+    "extr_rs.w     %[s3],        $ac3,         15            \n\t"
+    "sw            %[t1],        4(%[ptr2])                  \n\t"
+    "sw            %[t2],        8(%[ptr2])                  \n\t"
+    "sw            %[t3],        12(%[ptr2])                 \n\t"
+    "mult          $ac0,         %[t0],        %[input0]     \n\t"
+    "mult          $ac1,         %[t1],        %[input0]     \n\t"
+    "mult          $ac2,         %[t2],        %[input0]     \n\t"
+    "mult          $ac3,         %[t3],        %[input0]     \n\t"
+    "addiu         %[ptr2],      %[ptr2],      16            \n\t"
+    "extr_rs.w     %[t0],        $ac0,         15            \n\t"
+    "extr_rs.w     %[t1],        $ac1,         15            \n\t"
+    "extr_rs.w     %[t2],        $ac2,         15            \n\t"
+    "extr_rs.w     %[t3],        $ac3,         15            \n\t"
+    "addu          %[t0],        %[t0],        %[s0]         \n\t"
+    "addu          %[t1],        %[t1],        %[s1]         \n\t"
+    "addu          %[t2],        %[t2],        %[s2]         \n\t"
+    "addu          %[t3],        %[t3],        %[s3]         \n\t"
+    "sw            %[t0],        0(%[ptr1])                  \n\t"
+    "sw            %[t1],        4(%[ptr1])                  \n\t"
+    "sw            %[t2],        8(%[ptr1])                  \n\t"
+    "sw            %[t3],        12(%[ptr1])                 \n\t"
+    "bgtz          %[n],         1b                          \n\t"
+    " addiu        %[ptr1],      %[ptr1],      16            \n\t"
+    "beq           %[m],         %0,           3f            \n\t"
+    " nop                                                    \n\t"
+   "2:                                                       \n\t"
+    "lw            %[r0],        0(%[ptr0])                  \n\t"
+    "lw            %[t0],        0(%[ptr2])                  \n\t"
+    "addiu         %[ptr0],      %[ptr0],      4             \n\t"
+    "mult          $ac0,         %[r0],        %[input0]     \n\t"
+    "mult          $ac1,         %[r0],        %[input1]     \n\t"
+    "extr_rs.w     %[r1],        $ac0,         15            \n\t"
+    "extr_rs.w     %[t1],        $ac1,         15            \n\t"
+    "addu          %[t0],        %[t0],        %[r1]         \n\t"
+    "mult          $ac0,         %[t0],        %[input2]     \n\t"
+    "extr_rs.w     %[t0],        $ac0,         16            \n\t"
+    "sw            %[t0],        0(%[ptr2])                  \n\t"
+    "mult          $ac0,         %[t0],        %[input0]     \n\t"
+    "addiu         %[ptr2],      %[ptr2],      4             \n\t"
+    "addiu         %[m],         %[m],         -1            \n\t"
+    "extr_rs.w     %[t0],        $ac0,         15            \n\t"
+    "addu          %[t0],        %[t0],        %[t1]         \n\t"
+    "sw            %[t0],        0(%[ptr1])                  \n\t"
+    "bgtz          %[m],         2b                          \n\t"
+    " addiu        %[ptr1],      %[ptr1],      4             \n\t"
+   "3:                                                       \n\t"
+    ".set          pop                                       \n\t"
+    : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
+      [r3] "=&r" (r3), [t0] "=&r" (t0), [t1] "=&r" (t1),
+      [t2] "=&r" (t2), [t3] "=&r" (t3), [s0] "=&r" (s0),
+      [s1] "=&r" (s1), [s2] "=&r" (s2), [s3] "=&r" (s3),
+      [ptr0] "+r" (ptr0), [ptr1] "+r" (ptr1), [m] "+r" (m),
+      [ptr2] "+r" (ptr2), [n] "+r" (n)
+    : [input0] "r" (input0), [input1] "r" (input1),
+      [input2] "r" (input2)
+    : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi",
+      "$ac2lo", "$ac3hi", "$ac3lo"
+  );
+#else
+  // Non-DSPR2 version of the function. Avoiding the accumulator usage due to
+  // large latencies. This variant is bit-exact with C code.
+  int n = HALF_SUBFRAMELEN - 1;
+  int32_t t16a, t16b;
+  int32_t r0, r1, r2, r3, r4;
+
+  __asm __volatile (
+    ".set          push                                      \n\t"
+    ".set          noreorder                                 \n\t"
+    "sra           %[t16a],      %[input2],     16           \n\t"
+    "andi          %[t16b],      %[input2],     0xFFFF       \n\t"
+#if defined(MIPS32R2_LE)
+    "seh           %[t16b],      %[t16b]                     \n\t"
+    "seh           %[input0],    %[input0]                   \n\t"
+    "seh           %[input1],    %[input1]                   \n\t"
+#else
+    "sll           %[t16b],      %[t16b],       16           \n\t"
+    "sra           %[t16b],      %[t16b],       16           \n\t"
+    "sll           %[input0],    %[input0],     16           \n\t"
+    "sra           %[input0],    %[input0],     16           \n\t"
+    "sll           %[input1],    %[input1],     16           \n\t"
+    "sra           %[input1],    %[input1],     16           \n\t"
+#endif
+    "addiu         %[r0],        %[t16a],       1            \n\t"
+    "slt           %[r1],        %[t16b],       $zero        \n\t"
+    "movn          %[t16a],      %[r0],         %[r1]        \n\t"
+   "1:                                                       \n\t"
+    "lw            %[r0],        0(%[ptr0])                  \n\t"
+    "lw            %[r1],        0(%[ptr2])                  \n\t"
+    "addiu         %[ptr0],      %[ptr0],       4            \n\t"
+    "sra           %[r2],        %[r0],         16           \n\t"
+    "andi          %[r0],        %[r0],         0xFFFF       \n\t"
+    "mul           %[r3],        %[r2],         %[input0]    \n\t"
+    "mul           %[r4],        %[r0],         %[input0]    \n\t"
+    "mul           %[r2],        %[r2],         %[input1]    \n\t"
+    "mul           %[r0],        %[r0],         %[input1]    \n\t"
+    "addiu         %[ptr2],      %[ptr2],       4            \n\t"
+    "sll           %[r3],        %[r3],         1            \n\t"
+    "sra           %[r4],        %[r4],         1            \n\t"
+    "addiu         %[r4],        %[r4],         0x2000       \n\t"
+    "sra           %[r4],        %[r4],         14           \n\t"
+    "addu          %[r3],        %[r3],         %[r4]        \n\t"
+    "addu          %[r1],        %[r1],         %[r3]        \n\t"
+    "sra           %[r3],        %[r1],         16           \n\t"
+    "andi          %[r4],        %[r1],         0xFFFF       \n\t"
+    "sra           %[r4],        %[r4],         1            \n\t"
+    "mul           %[r1],        %[r1],         %[t16a]      \n\t"
+    "mul           %[r3],        %[r3],         %[t16b]      \n\t"
+    "mul           %[r4],        %[r4],         %[t16b]      \n\t"
+    "sll           %[r2],        %[r2],         1            \n\t"
+    "sra           %[r0],        %[r0],         1            \n\t"
+    "addiu         %[r0],        %[r0],         0x2000       \n\t"
+    "sra           %[r0],        %[r0],         14           \n\t"
+    "addu          %[r0],        %[r0],         %[r2]        \n\t"
+    "addiu         %[n],         %[n],          -1           \n\t"
+    "addu          %[r1],        %[r1],         %[r3]        \n\t"
+    "addiu         %[r4],        %[r4],         0x4000       \n\t"
+    "sra           %[r4],        %[r4],         15           \n\t"
+    "addu          %[r1],        %[r1],         %[r4]        \n\t"
+    "sra           %[r2],        %[r1],         16           \n\t"
+    "andi          %[r3],        %[r1],         0xFFFF       \n\t"
+    "mul           %[r3],        %[r3],         %[input0]    \n\t"
+    "mul           %[r2],        %[r2],         %[input0]    \n\t"
+    "sw            %[r1],        -4(%[ptr2])                 \n\t"
+    "sra           %[r3],        %[r3],         1            \n\t"
+    "addiu         %[r3],        %[r3],         0x2000       \n\t"
+    "sra           %[r3],        %[r3],         14           \n\t"
+    "addu          %[r0],        %[r0],         %[r3]        \n\t"
+    "sll           %[r2],        %[r2],         1            \n\t"
+    "addu          %[r0],        %[r0],         %[r2]        \n\t"
+    "sw            %[r0],        0(%[ptr1])                  \n\t"
+    "bgtz          %[n],         1b                          \n\t"
+    " addiu        %[ptr1],      %[ptr1],       4            \n\t"
+    ".set          pop                                       \n\t"
+    : [t16a] "=&r" (t16a), [t16b] "=&r" (t16b), [r0] "=&r" (r0),
+      [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
+      [r4] "=&r" (r4), [ptr0] "+r" (ptr0), [ptr1] "+r" (ptr1),
+      [ptr2] "+r" (ptr2), [n] "+r" (n)
+    : [input0] "r" (input0), [input1] "r" (input1),
+      [input2] "r" (input2)
+    : "hi", "lo", "memory"
+  );
+#endif
+}
--- a/webrtc/modules/audio_coding/codecs/isac/fix/source/pitch_filter_mips.c
+++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/pitch_filter_mips.c
@@ -0,0 +1,133 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h"
+
+void WebRtcIsacfix_PitchFilterCore(int loopNumber,
+                                   int16_t gain,
+                                   int index,
+                                   int16_t sign,
+                                   int16_t* inputState,
+                                   int16_t* outputBuf2,
+                                   const int16_t* coefficient,
+                                   int16_t* inputBuf,
+                                   int16_t* outputBuf,
+                                   int* index2) {
+  int ind2t = *index2;
+  int i = 0;
+  int16_t* out2_pos2 = &outputBuf2[PITCH_BUFFSIZE - (index + 2)] + ind2t;
+  int32_t w1, w2, w3, w4, w5, gain32, sign32;
+  int32_t coef1, coef2, coef3, coef4, coef5 = 0;
+  // Define damp factors as int32_t (pair of int16_t)
+  int32_t kDampF0 = 0x0000F70A;
+  int32_t kDampF1 = 0x51EC2000;
+  int32_t kDampF2 = 0xF70A2000;
+  int16_t* input1 = inputBuf + ind2t;
+  int16_t* output1 = outputBuf + ind2t;
+  int16_t* output2 = outputBuf2 + ind2t + PITCH_BUFFSIZE;
+
+  // Load coefficients outside the loop and sign-extend gain and sign
+  __asm __volatile (
+    ".set     push                                        \n\t"
+    ".set     noreorder                                   \n\t"
+    "lwl      %[coef1],       3(%[coefficient])           \n\t"
+    "lwl      %[coef2],       7(%[coefficient])           \n\t"
+    "lwl      %[coef3],       11(%[coefficient])          \n\t"
+    "lwl      %[coef4],       15(%[coefficient])          \n\t"
+    "lwr      %[coef1],       0(%[coefficient])           \n\t"
+    "lwr      %[coef2],       4(%[coefficient])           \n\t"
+    "lwr      %[coef3],       8(%[coefficient])           \n\t"
+    "lwr      %[coef4],       12(%[coefficient])          \n\t"
+    "lhu      %[coef5],       16(%[coefficient])          \n\t"
+    "seh      %[gain32],      %[gain]                     \n\t"
+    "seh      %[sign32],      %[sign]                     \n\t"
+    ".set     pop                                         \n\t"
+    : [coef1] "=&r" (coef1), [coef2] "=&r" (coef2), [coef3] "=&r" (coef3),
+      [coef4] "=&r" (coef4), [coef5] "=&r" (coef5), [gain32] "=&r" (gain32),
+      [sign32] "=&r" (sign32)
+    : [coefficient] "r" (coefficient), [gain] "r" (gain),
+      [sign] "r" (sign)
+    : "memory"
+  );
+
+  for (i = 0; i < loopNumber; i++) {
+    __asm __volatile (
+      ".set       push                                            \n\t"
+      ".set       noreorder                                       \n\t"
+      // Filter to get fractional pitch
+      "li         %[w1],          8192                            \n\t"
+      "mtlo       %[w1]                                           \n\t"
+      "mthi       $0                                              \n\t"
+      "lwl        %[w1],          3(%[out2_pos2])                 \n\t"
+      "lwl        %[w2],          7(%[out2_pos2])                 \n\t"
+      "lwl        %[w3],          11(%[out2_pos2])                \n\t"
+      "lwl        %[w4],          15(%[out2_pos2])                \n\t"
+      "lwr        %[w1],          0(%[out2_pos2])                 \n\t"
+      "lwr        %[w2],          4(%[out2_pos2])                 \n\t"
+      "lwr        %[w3],          8(%[out2_pos2])                 \n\t"
+      "lwr        %[w4],          12(%[out2_pos2])                \n\t"
+      "lhu        %[w5],          16(%[out2_pos2])                \n\t"
+      "dpa.w.ph   $ac0,           %[w1],              %[coef1]    \n\t"
+      "dpa.w.ph   $ac0,           %[w2],              %[coef2]    \n\t"
+      "dpa.w.ph   $ac0,           %[w3],              %[coef3]    \n\t"
+      "dpa.w.ph   $ac0,           %[w4],              %[coef4]    \n\t"
+      "dpa.w.ph   $ac0,           %[w5],              %[coef5]    \n\t"
+      "addiu      %[out2_pos2],   %[out2_pos2],       2           \n\t"
+      "mthi       $0,             $ac1                            \n\t"
+      "lwl        %[w2],          3(%[inputState])                \n\t"
+      "lwl        %[w3],          7(%[inputState])                \n\t"
+      // Fractional pitch shift & saturation
+      "extr_s.h   %[w1],          $ac0,               14          \n\t"
+      "li         %[w4],          16384                           \n\t"
+      "lwr        %[w2],          0(%[inputState])                \n\t"
+      "lwr        %[w3],          4(%[inputState])                \n\t"
+      "mtlo       %[w4],          $ac1                            \n\t"
+      // Shift low pass filter state
+      "swl        %[w2],          5(%[inputState])                \n\t"
+      "swl        %[w3],          9(%[inputState])                \n\t"
+      "mul        %[w1],          %[gain32],          %[w1]       \n\t"
+      "swr        %[w2],          2(%[inputState])                \n\t"
+      "swr        %[w3],          6(%[inputState])                \n\t"
+      // Low pass filter accumulation
+      "dpa.w.ph   $ac1,           %[kDampF1],         %[w2]       \n\t"
+      "dpa.w.ph   $ac1,           %[kDampF2],         %[w3]       \n\t"
+      "lh         %[w4],          0(%[input1])                    \n\t"
+      "addiu      %[input1],      %[input1],          2           \n\t"
+      "shra_r.w   %[w1],          %[w1],              12          \n\t"
+      "sh         %[w1],          0(%[inputState])                \n\t"
+      "dpa.w.ph   $ac1,           %[kDampF0],         %[w1]       \n\t"
+      // Low pass filter shift & saturation
+      "extr_s.h   %[w2],          $ac1,               15          \n\t"
+      "mul        %[w2],          %[w2],              %[sign32]   \n\t"
+      // Buffer update
+      "subu       %[w2],          %[w4],              %[w2]       \n\t"
+      "shll_s.w   %[w2],          %[w2],              16          \n\t"
+      "sra        %[w2],          %[w2],              16          \n\t"
+      "sh         %[w2],          0(%[output1])                   \n\t"
+      "addu       %[w2],          %[w2],              %[w4]       \n\t"
+      "shll_s.w   %[w2],          %[w2],              16          \n\t"
+      "addiu      %[output1],     %[output1],         2           \n\t"
+      "sra        %[w2],          %[w2],              16          \n\t"
+      "sh         %[w2],          0(%[output2])                   \n\t"
+      "addiu      %[output2],     %[output2],         2           \n\t"
+      ".set       pop                                             \n\t"
+      : [w1] "=&r" (w1), [w2] "=&r" (w2), [w3] "=&r" (w3), [w4] "=&r" (w4),
+        [w5] "=&r" (w5), [input1] "+r" (input1), [out2_pos2] "+r" (out2_pos2),
+        [output1] "+r" (output1), [output2] "+r" (output2)
+      : [coefficient] "r" (coefficient), [inputState] "r" (inputState),
+        [gain32] "r" (gain32), [sign32] "r" (sign32), [kDampF0] "r" (kDampF0),
+        [kDampF1] "r" (kDampF1), [kDampF2] "r" (kDampF2),
+        [coef1] "r" (coef1), [coef2] "r" (coef2), [coef3] "r" (coef3),
+        [coef4] "r" (coef4), [coef5] "r" (coef5)
+      : "hi", "lo", "$ac1hi", "$ac1lo", "memory"
+    );
+  }
+  (*index2) += loopNumber;
+}