MIPS optimizations for NS audio processing module

R=andrew@webrtc.org Review URL: https://webrtc-codereview.appspot.com/4139006 Patch from Ljubomir Papuga <lpapuga@mips.com>. git-svn-id: http://webrtc.googlecode.com/svn/trunk@5393 4adac7df-926f-26a2-2b94-8c16560cd09d
2014-01-16 07:22:01 +00:00 · 2014-01-16 07:22:01 +00:00 · ea9392d5eb
commit ea9392d5eb
parent fb4e256d49
5 changed files with 1356 additions and 244 deletions
--- a/webrtc/modules/audio_processing/audio_processing.gypi
+++ b/webrtc/modules/audio_processing/audio_processing.gypi
@ -101,6 +101,17 @@
            'ns/nsx_core.h',
            'ns/nsx_defines.h',
          ],
+          'conditions': [
+            ['target_arch=="mipsel"', {
+              'sources': [
+                'ns/nsx_core_mips.c',
+              ],
+            }, {
+              'sources': [
+                'ns/nsx_core_c.c',
+              ],
+            }],
+          ],
        }, {
          'defines': ['WEBRTC_NS_FLOAT'],
          'sources': [
--- a/webrtc/modules/audio_processing/ns/nsx_core.c
+++ b/webrtc/modules/audio_processing/ns/nsx_core.c
@ -70,11 +70,6 @@ static const int16_t WebRtcNsx_kLogTableFrac[256] = {
 // Skip first frequency bins during estimation. (0 <= value < 64)
 static const int kStartBand = 5;

-static const int16_t kIndicatorTable[17] = {
-  0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
-  7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
-};
-
 // hybrib Hanning & flat window
 static const int16_t kBlocks80w128x[128] = {
  0,    536,   1072,   1606,   2139,   2669,   3196,   3720,   4240,   4756,   5266,
@ -481,7 +476,7 @@ static void PrepareSpectrumC(NsxInst_t* inst, int16_t* freq_buf) {
 }

 // Denormalize the real-valued signal |in|, the output from inverse FFT.
-static __inline void Denormalize(NsxInst_t* inst, int16_t* in, int factor) {
+static void DenormalizeC(NsxInst_t* inst, int16_t* in, int factor) {
  int i = 0;
  int32_t tmp32 = 0;
  for (i = 0; i < inst->anaLen; i += 1) {
@ -546,9 +541,9 @@ static void AnalysisUpdateC(NsxInst_t* inst,
 }

 // Normalize the real-valued signal |in|, the input to forward FFT.
-static __inline void NormalizeRealBuffer(NsxInst_t* inst,
-                                         const int16_t* in,
-                                         int16_t* out) {
+static void NormalizeRealBufferC(NsxInst_t* inst,
+                                 const int16_t* in,
+                                 int16_t* out) {
  int i = 0;
  for (i = 0; i < inst->anaLen; ++i) {
    out[i] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData)
@ -560,6 +555,8 @@ NoiseEstimation WebRtcNsx_NoiseEstimation;
 PrepareSpectrum WebRtcNsx_PrepareSpectrum;
 SynthesisUpdate WebRtcNsx_SynthesisUpdate;
 AnalysisUpdate WebRtcNsx_AnalysisUpdate;
+Denormalize WebRtcNsx_Denormalize;
+NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer;

 #if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
 // Initialize function pointers for ARM Neon platform.
@ -571,6 +568,19 @@ static void WebRtcNsx_InitNeon(void) {
 }
 #endif

+#if defined(MIPS32_LE)
+// Initialize function pointers for MIPS platform.
+static void WebRtcNsx_InitMips(void) {
+  WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrum_mips;
+  WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdate_mips;
+  WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdate_mips;
+  WebRtcNsx_NormalizeRealBuffer = WebRtcNsx_NormalizeRealBuffer_mips;
+#if defined(MIPS_DSP_R1_LE)
+  WebRtcNsx_Denormalize = WebRtcNsx_Denormalize_mips;
+#endif
+}
+#endif
+
 void WebRtcNsx_CalcParametricNoiseEstimate(NsxInst_t* inst,
                                           int16_t pink_noise_exp_avg,
                                           int32_t pink_noise_num_avg,
@ -758,6 +768,8 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) {
  WebRtcNsx_PrepareSpectrum = PrepareSpectrumC;
  WebRtcNsx_SynthesisUpdate = SynthesisUpdateC;
  WebRtcNsx_AnalysisUpdate = AnalysisUpdateC;
+  WebRtcNsx_Denormalize = DenormalizeC;
+  WebRtcNsx_NormalizeRealBuffer = NormalizeRealBufferC;

 #ifdef WEBRTC_DETECT_ARM_NEON
  uint64_t features = WebRtc_GetCPUFeaturesARM();
@ -768,6 +780,10 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) {
  WebRtcNsx_InitNeon();
 #endif

+#if defined(MIPS32_LE)
+  WebRtcNsx_InitMips();
+#endif
+
  inst->initFlag = 1;

  return 0;
@ -1169,239 +1185,6 @@ void WebRtcNsx_ComputeSpectralDifference(NsxInst_t* inst, uint16_t* magnIn) {
  }
 }

-// Compute speech/noise probability
-// speech/noise probability is returned in: probSpeechFinal
-//snrLocPrior is the prior SNR for each frequency (in Q11)
-//snrLocPost is the post SNR for each frequency (in Q11)
-void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst, uint16_t* nonSpeechProbFinal,
-                               uint32_t* priorLocSnr, uint32_t* postLocSnr) {
-  uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3;
-
-  int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32;
-  int32_t frac32, logTmp;
-  int32_t logLrtTimeAvgKsumFX;
-
-  int16_t indPriorFX16;
-  int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart;
-
-  int i, normTmp, normTmp2, nShifts;
-
-  // compute feature based on average LR factor
-  // this is the average over all frequencies of the smooth log LRT
-  logLrtTimeAvgKsumFX = 0;
-  for (i = 0; i < inst->magnLen; i++) {
-    besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11
-    normTmp = WebRtcSpl_NormU32(postLocSnr[i]);
-    num = WEBRTC_SPL_LSHIFT_U32(postLocSnr[i], normTmp); // Q(11+normTmp)
-    if (normTmp > 10) {
-      den = WEBRTC_SPL_LSHIFT_U32(priorLocSnr[i], normTmp - 11); // Q(normTmp)
-    } else {
-      den = WEBRTC_SPL_RSHIFT_U32(priorLocSnr[i], 11 - normTmp); // Q(normTmp)
-    }
-    if (den > 0) {
-      besselTmpFX32 -= WEBRTC_SPL_UDIV(num, den); // Q11
-    } else {
-      besselTmpFX32 -= num; // Q11
-    }
-
-    // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior) - inst->logLrtTimeAvg[i]);
-    // Here, LRT_TAVG = 0.5
-    zeros = WebRtcSpl_NormU32(priorLocSnr[i]);
-    frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19);
-    tmp32 = WEBRTC_SPL_MUL(frac32, frac32);
-    tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(tmp32, -43), 19);
-    tmp32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)frac32, 5412, 12);
-    frac32 = tmp32 + 37;
-    // tmp32 = log2(priorLocSnr[i])
-    tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12
-    logTmp = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32, 178), 8); // log2(priorLocSnr[i])*log(2)
-    tmp32no1 = WEBRTC_SPL_RSHIFT_W32(logTmp + inst->logLrtTimeAvgW32[i], 1); // Q12
-    inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12
-
-    logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12
-  }
-  inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5, inst->stages + 10); // 5 = BIN_SIZE_LRT / 2
-  // done with computation of LR factor
-
-  //
-  //compute the indicator functions
-  //
-
-  // average LRT feature
-  // FLOAT code
-  // indicator0 = 0.5 * (tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
-  tmpIndFX = 16384; // Q14(1.0)
-  tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
-  nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
-  //use larger width in tanh map for pause regions
-  if (tmp32no1 < 0) {
-    tmpIndFX = 0;
-    tmp32no1 = -tmp32no1;
-    //widthPrior = widthPrior * 2.0;
-    nShifts++;
-  }
-  tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
-  // compute indicator function: sigmoid map
-  tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14);
-  if ((tableIndex < 16) && (tableIndex >= 0)) {
-    tmp16no2 = kIndicatorTable[tableIndex];
-    tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
-    frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
-    tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
-    if (tmpIndFX == 0) {
-      tmpIndFX = 8192 - tmp16no2; // Q14
-    } else {
-      tmpIndFX = 8192 + tmp16no2; // Q14
-    }
-  }
-  indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14
-
-  //spectral flatness feature
-  if (inst->weightSpecFlat) {
-    tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
-    tmpIndFX = 16384; // Q14(1.0)
-    //use larger width in tanh map for pause regions
-    tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
-    nShifts = 4;
-    if (inst->thresholdSpecFlat < tmpU32no1) {
-      tmpIndFX = 0;
-      tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
-      //widthPrior = widthPrior * 2.0;
-      nShifts++;
-    }
-    tmp32no1 = (int32_t)WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2,
-                                                                        nShifts), 25); //Q14
-    tmpU32no1 = WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts), 25); //Q14
-    // compute indicator function: sigmoid map
-    // FLOAT code
-    // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) + 1.0);
-    tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
-    if (tableIndex < 16) {
-      tmp16no2 = kIndicatorTable[tableIndex];
-      tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
-      frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
-      tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
-      if (tmpIndFX) {
-        tmpIndFX = 8192 + tmp16no2; // Q14
-      } else {
-        tmpIndFX = 8192 - tmp16no2; // Q14
-      }
-    }
-    indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14
-  }
-
-  //for template spectral-difference
-  if (inst->weightSpecDiff) {
-    tmpU32no1 = 0;
-    if (inst->featureSpecDiff) {
-      normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
-                               WebRtcSpl_NormU32(inst->featureSpecDiff));
-      tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(inst->featureSpecDiff, normTmp); // Q(normTmp-2*stages)
-      tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy, 20 - inst->stages
-                                        - normTmp);
-      if (tmpU32no2 > 0) {
-        // Q(20 - inst->stages)
-        tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2);
-      } else {
-        tmpU32no1 = (uint32_t)(0x7fffffff);
-      }
-    }
-    tmpU32no3 = WEBRTC_SPL_UDIV(WEBRTC_SPL_LSHIFT_U32(inst->thresholdSpecDiff, 17), 25);
-    tmpU32no2 = tmpU32no1 - tmpU32no3;
-    nShifts = 1;
-    tmpIndFX = 16384; // Q14(1.0)
-    //use larger width in tanh map for pause regions
-    if (tmpU32no2 & 0x80000000) {
-      tmpIndFX = 0;
-      tmpU32no2 = tmpU32no3 - tmpU32no1;
-      //widthPrior = widthPrior * 2.0;
-      nShifts--;
-    }
-    tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts);
-    // compute indicator function: sigmoid map
-    /* FLOAT code
-     indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
-     */
-    tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
-    if (tableIndex < 16) {
-      tmp16no2 = kIndicatorTable[tableIndex];
-      tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
-      frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
-      tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
-                    tmp16no1, frac, 14);
-      if (tmpIndFX) {
-        tmpIndFX = 8192 + tmp16no2;
-      } else {
-        tmpIndFX = 8192 - tmp16no2;
-      }
-    }
-    indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14
-  }
-
-  //combine the indicator function with the feature weights
-  // FLOAT code
-  // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2 * indicator2);
-  indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
-  // done with computing indicator function
-
-  //compute the prior probability
-  // FLOAT code
-  // inst->priorNonSpeechProb += PRIOR_UPDATE * (indPriorNonSpeech - inst->priorNonSpeechProb);
-  tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
-  inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
-                                PRIOR_UPDATE_Q14, tmp16, 14); // Q14
-
-  //final speech probability: combine prior model with LR factor:
-
-  memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
-
-  if (inst->priorNonSpeechProb > 0) {
-    for (i = 0; i < inst->magnLen; i++) {
-      // FLOAT code
-      // invLrt = exp(inst->logLrtTimeAvg[i]);
-      // invLrt = inst->priorSpeechProb * invLrt;
-      // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) / (1.0 - inst->priorSpeechProb + invLrt);
-      // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt;
-      // nonSpeechProbFinal[i] = inst->priorNonSpeechProb / (inst->priorNonSpeechProb + invLrt);
-      if (inst->logLrtTimeAvgW32[i] < 65300) {
-        tmp32no1 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(inst->logLrtTimeAvgW32[i], 23637),
-                                         14); // Q12
-        intPart = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 12);
-        if (intPart < -8) {
-          intPart = -8;
-        }
-        frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12
-
-        // Quadratic approximation of 2^frac
-        tmp32no2 = WEBRTC_SPL_RSHIFT_W32(frac * frac * 44, 19); // Q12
-        tmp32no2 += WEBRTC_SPL_MUL_16_16_RSFT(frac, 84, 7); // Q12
-        invLrtFX = WEBRTC_SPL_LSHIFT_W32(1, 8 + intPart)
-                   + WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8
-
-        normTmp = WebRtcSpl_NormW32(invLrtFX);
-        normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb));
-        if (normTmp + normTmp2 >= 7) {
-          if (normTmp + normTmp2 < 15) {
-            invLrtFX = WEBRTC_SPL_RSHIFT_W32(invLrtFX, 15 - normTmp2 - normTmp);
-            // Q(normTmp+normTmp2-7)
-            tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX, (16384 - inst->priorNonSpeechProb));
-            // Q(normTmp+normTmp2+7)
-            invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2); // Q14
-          } else {
-            tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX, (16384 - inst->priorNonSpeechProb)); // Q22
-            invLrtFX = WEBRTC_SPL_RSHIFT_W32(tmp32no1, 8); // Q14
-          }
-
-          tmp32no1 = WEBRTC_SPL_LSHIFT_W32((int32_t)inst->priorNonSpeechProb, 8); // Q22
-
-          nonSpeechProbFinal[i] = (uint16_t)WEBRTC_SPL_DIV(tmp32no1,
-              (int32_t)inst->priorNonSpeechProb + invLrtFX); // Q8
-        }
-      }
-    }
-  }
-}
-
 // Transform input (speechFrame) to frequency domain magnitude (magnU16)
 void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, uint16_t* magnU16) {

@ -1461,7 +1244,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, uint16_t* magnU
  right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0);

  // create realImag as winData interleaved with zeros (= imag. part), normalize it
-  NormalizeRealBuffer(inst, winData, realImag);
+  WebRtcNsx_NormalizeRealBuffer(inst, winData, realImag);

  // FFT output will be in winData[].
  WebRtcSpl_RealForwardFFT(inst->real_fft, realImag, winData);
@ -1693,7 +1476,7 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) {
  // Inverse FFT output will be in rfft_out[].
  outCIFFT = WebRtcSpl_RealInverseFFT(inst->real_fft, realImag, rfft_out);

-  Denormalize(inst, rfft_out, outCIFFT);
+  WebRtcNsx_Denormalize(inst, rfft_out, outCIFFT);

  //scale factor: only do it after END_STARTUP_LONG time
  gainFactor = 8192; // 8192 = Q13(1.0)
--- a/webrtc/modules/audio_processing/ns/nsx_core.h
+++ b/webrtc/modules/audio_processing/ns/nsx_core.h
@ -201,6 +201,23 @@ typedef void (*AnalysisUpdate)(NsxInst_t* inst,
                               int16_t* new_speech);
 extern AnalysisUpdate WebRtcNsx_AnalysisUpdate;

+// Denormalize the real-valued signal |in|, the output from inverse FFT.
+typedef void (*Denormalize) (NsxInst_t* inst, int16_t* in, int factor);
+extern Denormalize WebRtcNsx_Denormalize;
+
+// Normalize the real-valued signal |in|, the input to forward FFT.
+typedef void (*NormalizeRealBuffer) (NsxInst_t* inst,
+                                     const int16_t* in,
+                                     int16_t* out);
+extern NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer;
+
+// Compute speech/noise probability.
+// Intended to be private.
+void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst,
+                               uint16_t* nonSpeechProbFinal,
+                               uint32_t* priorLocSnr,
+                               uint32_t* postLocSnr);
+
 #if (defined WEBRTC_DETECT_ARM_NEON) || defined (WEBRTC_ARCH_ARM_NEON)
 // For the above function pointers, functions for generic platforms are declared
 // and defined as static in file nsx_core.c, while those for ARM Neon platforms
@ -218,6 +235,26 @@ void WebRtcNsx_AnalysisUpdateNeon(NsxInst_t* inst,
 void WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buff);
 #endif

+#if defined(MIPS32_LE)
+// For the above function pointers, functions for generic platforms are declared
+// and defined as static in file nsx_core.c, while those for MIPS platforms
+// are declared below and defined in file nsx_core_mips.c.
+void WebRtcNsx_SynthesisUpdate_mips(NsxInst_t* inst,
+                                    int16_t* out_frame,
+                                    int16_t gain_factor);
+void WebRtcNsx_AnalysisUpdate_mips(NsxInst_t* inst,
+                                   int16_t* out,
+                                   int16_t* new_speech);
+void WebRtcNsx_PrepareSpectrum_mips(NsxInst_t* inst, int16_t* freq_buff);
+void WebRtcNsx_NormalizeRealBuffer_mips(NsxInst_t* inst,
+                                        const int16_t* in,
+                                        int16_t* out);
+#if defined(MIPS_DSP_R1_LE)
+void WebRtcNsx_Denormalize_mips(NsxInst_t* inst, int16_t* in, int factor);
+#endif
+
+#endif
+
 #ifdef __cplusplus
 }
 #endif
--- a/webrtc/modules/audio_processing/ns/nsx_core_c.c
+++ b/webrtc/modules/audio_processing/ns/nsx_core_c.c
@ -0,0 +1,273 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
+#include "webrtc/modules/audio_processing/ns/nsx_core.h"
+
+static const int16_t kIndicatorTable[17] = {
+  0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
+  7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
+};
+
+// Compute speech/noise probability
+// speech/noise probability is returned in: probSpeechFinal
+//snrLocPrior is the prior SNR for each frequency (in Q11)
+//snrLocPost is the post SNR for each frequency (in Q11)
+void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst,
+                               uint16_t* nonSpeechProbFinal,
+                               uint32_t* priorLocSnr,
+                               uint32_t* postLocSnr) {
+
+  uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3;
+  int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32;
+  int32_t frac32, logTmp;
+  int32_t logLrtTimeAvgKsumFX;
+  int16_t indPriorFX16;
+  int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart;
+  int i, normTmp, normTmp2, nShifts;
+
+  // compute feature based on average LR factor
+  // this is the average over all frequencies of the smooth log LRT
+  logLrtTimeAvgKsumFX = 0;
+  for (i = 0; i < inst->magnLen; i++) {
+    besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11
+    normTmp = WebRtcSpl_NormU32(postLocSnr[i]);
+    num = WEBRTC_SPL_LSHIFT_U32(postLocSnr[i], normTmp); // Q(11+normTmp)
+    if (normTmp > 10) {
+      den = WEBRTC_SPL_LSHIFT_U32(priorLocSnr[i], normTmp - 11); // Q(normTmp)
+    } else {
+      den = WEBRTC_SPL_RSHIFT_U32(priorLocSnr[i], 11 - normTmp); // Q(normTmp)
+    }
+    if (den > 0) {
+      besselTmpFX32 -= WEBRTC_SPL_UDIV(num, den); // Q11
+    } else {
+      besselTmpFX32 -= num; // Q11
+    }
+
+    // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior)
+    //                                       - inst->logLrtTimeAvg[i]);
+    // Here, LRT_TAVG = 0.5
+    zeros = WebRtcSpl_NormU32(priorLocSnr[i]);
+    frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19);
+    tmp32 = WEBRTC_SPL_MUL(frac32, frac32);
+    tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(tmp32, -43), 19);
+    tmp32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)frac32, 5412, 12);
+    frac32 = tmp32 + 37;
+    // tmp32 = log2(priorLocSnr[i])
+    tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12
+    logTmp = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32, 178), 8);
+                                                  // log2(priorLocSnr[i])*log(2)
+    tmp32no1 = WEBRTC_SPL_RSHIFT_W32(logTmp + inst->logLrtTimeAvgW32[i], 1);
+                                                  // Q12
+    inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12
+
+    logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12
+  }
+  inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5,
+                                              inst->stages + 10);
+                                                  // 5 = BIN_SIZE_LRT / 2
+  // done with computation of LR factor
+
+  //
+  //compute the indicator functions
+  //
+
+  // average LRT feature
+  // FLOAT code
+  // indicator0 = 0.5 * (tanh(widthPrior *
+  //                      (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
+  tmpIndFX = 16384; // Q14(1.0)
+  tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
+  nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
+  //use larger width in tanh map for pause regions
+  if (tmp32no1 < 0) {
+    tmpIndFX = 0;
+    tmp32no1 = -tmp32no1;
+    //widthPrior = widthPrior * 2.0;
+    nShifts++;
+  }
+  tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
+  // compute indicator function: sigmoid map
+  tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14);
+  if ((tableIndex < 16) && (tableIndex >= 0)) {
+    tmp16no2 = kIndicatorTable[tableIndex];
+    tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+    frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
+    tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
+    if (tmpIndFX == 0) {
+      tmpIndFX = 8192 - tmp16no2; // Q14
+    } else {
+      tmpIndFX = 8192 + tmp16no2; // Q14
+    }
+  }
+  indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14
+
+  //spectral flatness feature
+  if (inst->weightSpecFlat) {
+    tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
+    tmpIndFX = 16384; // Q14(1.0)
+    //use larger width in tanh map for pause regions
+    tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
+    nShifts = 4;
+    if (inst->thresholdSpecFlat < tmpU32no1) {
+      tmpIndFX = 0;
+      tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
+      //widthPrior = widthPrior * 2.0;
+      nShifts++;
+    }
+    tmp32no1 = (int32_t)WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2,
+                                                                  nShifts), 25);
+                                                     //Q14
+    tmpU32no1 = WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts),
+                                    25); //Q14
+    // compute indicator function: sigmoid map
+    // FLOAT code
+    // indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
+    //                          (threshPrior1 - tmpFloat1)) + 1.0);
+    tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
+    if (tableIndex < 16) {
+      tmp16no2 = kIndicatorTable[tableIndex];
+      tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+      frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
+      tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
+      if (tmpIndFX) {
+        tmpIndFX = 8192 + tmp16no2; // Q14
+      } else {
+        tmpIndFX = 8192 - tmp16no2; // Q14
+      }
+    }
+    indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14
+  }
+
+  //for template spectral-difference
+  if (inst->weightSpecDiff) {
+    tmpU32no1 = 0;
+    if (inst->featureSpecDiff) {
+      normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
+                               WebRtcSpl_NormU32(inst->featureSpecDiff));
+      tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(inst->featureSpecDiff, normTmp);
+                                                         // Q(normTmp-2*stages)
+      tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy,
+                                        20 - inst->stages - normTmp);
+      if (tmpU32no2 > 0) {
+        // Q(20 - inst->stages)
+        tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2);
+      } else {
+        tmpU32no1 = (uint32_t)(0x7fffffff);
+      }
+    }
+    tmpU32no3 = WEBRTC_SPL_UDIV(WEBRTC_SPL_LSHIFT_U32(inst->thresholdSpecDiff,
+                                                      17),
+                                25);
+    tmpU32no2 = tmpU32no1 - tmpU32no3;
+    nShifts = 1;
+    tmpIndFX = 16384; // Q14(1.0)
+    //use larger width in tanh map for pause regions
+    if (tmpU32no2 & 0x80000000) {
+      tmpIndFX = 0;
+      tmpU32no2 = tmpU32no3 - tmpU32no1;
+      //widthPrior = widthPrior * 2.0;
+      nShifts--;
+    }
+    tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts);
+    // compute indicator function: sigmoid map
+    /* FLOAT code
+     indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
+     */
+    tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
+    if (tableIndex < 16) {
+      tmp16no2 = kIndicatorTable[tableIndex];
+      tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+      frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
+      tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+                    tmp16no1, frac, 14);
+      if (tmpIndFX) {
+        tmpIndFX = 8192 + tmp16no2;
+      } else {
+        tmpIndFX = 8192 - tmp16no2;
+      }
+    }
+    indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14
+  }
+
+  //combine the indicator function with the feature weights
+  // FLOAT code
+  // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
+  //                 indicator1 + weightIndPrior2 * indicator2);
+  indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
+  // done with computing indicator function
+
+  //compute the prior probability
+  // FLOAT code
+  // inst->priorNonSpeechProb += PRIOR_UPDATE *
+  //                             (indPriorNonSpeech - inst->priorNonSpeechProb);
+  tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
+  inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
+                                PRIOR_UPDATE_Q14, tmp16, 14); // Q14
+
+  //final speech probability: combine prior model with LR factor:
+
+  memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
+
+  if (inst->priorNonSpeechProb > 0) {
+    for (i = 0; i < inst->magnLen; i++) {
+      // FLOAT code
+      // invLrt = exp(inst->logLrtTimeAvg[i]);
+      // invLrt = inst->priorSpeechProb * invLrt;
+      // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) /
+      //                         (1.0 - inst->priorSpeechProb + invLrt);
+      // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt;
+      // nonSpeechProbFinal[i] = inst->priorNonSpeechProb /
+      //                         (inst->priorNonSpeechProb + invLrt);
+      if (inst->logLrtTimeAvgW32[i] < 65300) {
+        tmp32no1 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(
+                                           inst->logLrtTimeAvgW32[i], 23637),
+                                         14); // Q12
+        intPart = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 12);
+        if (intPart < -8) {
+          intPart = -8;
+        }
+        frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12
+
+        // Quadratic approximation of 2^frac
+        tmp32no2 = WEBRTC_SPL_RSHIFT_W32(frac * frac * 44, 19); // Q12
+        tmp32no2 += WEBRTC_SPL_MUL_16_16_RSFT(frac, 84, 7); // Q12
+        invLrtFX = WEBRTC_SPL_LSHIFT_W32(1, 8 + intPart)
+                   + WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8
+
+        normTmp = WebRtcSpl_NormW32(invLrtFX);
+        normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb));
+        if (normTmp + normTmp2 >= 7) {
+          if (normTmp + normTmp2 < 15) {
+            invLrtFX = WEBRTC_SPL_RSHIFT_W32(invLrtFX, 15 - normTmp2 - normTmp);
+            // Q(normTmp+normTmp2-7)
+            tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX,
+                                            (16384 - inst->priorNonSpeechProb));
+            // Q(normTmp+normTmp2+7)
+            invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2);
+                                                                  // Q14
+          } else {
+            tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX,
+                                            (16384 - inst->priorNonSpeechProb));
+                                                                  // Q22
+            invLrtFX = WEBRTC_SPL_RSHIFT_W32(tmp32no1, 8); // Q14
+          }
+
+          tmp32no1 = WEBRTC_SPL_LSHIFT_W32((int32_t)inst->priorNonSpeechProb,
+                                           8); // Q22
+
+          nonSpeechProbFinal[i] = (uint16_t)WEBRTC_SPL_DIV(tmp32no1,
+              (int32_t)inst->priorNonSpeechProb + invLrtFX); // Q8
+        }
+      }
+    }
+  }
+}
+
--- a/webrtc/modules/audio_processing/ns/nsx_core_mips.c
+++ b/webrtc/modules/audio_processing/ns/nsx_core_mips.c