MIPS optimizations for NS audio processing module


Review URL: https://webrtc-codereview.appspot.com/4139006

Patch from Ljubomir Papuga <lpapuga@mips.com>.

git-svn-id: http://webrtc.googlecode.com/svn/trunk@5393 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
andrew@webrtc.org 2014-01-16 07:22:01 +00:00
parent fb4e256d49
commit ea9392d5eb
5 changed files with 1356 additions and 244 deletions

View File

@ -101,6 +101,17 @@
'conditions': [
['target_arch=="mipsel"', {
'sources': [
}, {
'sources': [
}, {
'defines': ['WEBRTC_NS_FLOAT'],
'sources': [

View File

@ -70,11 +70,6 @@ static const int16_t WebRtcNsx_kLogTableFrac[256] = {
// Skip first frequency bins during estimation. (0 <= value < 64)
static const int kStartBand = 5;
static const int16_t kIndicatorTable[17] = {
0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
// hybrib Hanning & flat window
static const int16_t kBlocks80w128x[128] = {
0, 536, 1072, 1606, 2139, 2669, 3196, 3720, 4240, 4756, 5266,
@ -481,7 +476,7 @@ static void PrepareSpectrumC(NsxInst_t* inst, int16_t* freq_buf) {
// Denormalize the real-valued signal |in|, the output from inverse FFT.
static __inline void Denormalize(NsxInst_t* inst, int16_t* in, int factor) {
static void DenormalizeC(NsxInst_t* inst, int16_t* in, int factor) {
int i = 0;
int32_t tmp32 = 0;
for (i = 0; i < inst->anaLen; i += 1) {
@ -546,9 +541,9 @@ static void AnalysisUpdateC(NsxInst_t* inst,
// Normalize the real-valued signal |in|, the input to forward FFT.
static __inline void NormalizeRealBuffer(NsxInst_t* inst,
const int16_t* in,
int16_t* out) {
static void NormalizeRealBufferC(NsxInst_t* inst,
const int16_t* in,
int16_t* out) {
int i = 0;
for (i = 0; i < inst->anaLen; ++i) {
out[i] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData)
@ -560,6 +555,8 @@ NoiseEstimation WebRtcNsx_NoiseEstimation;
PrepareSpectrum WebRtcNsx_PrepareSpectrum;
SynthesisUpdate WebRtcNsx_SynthesisUpdate;
AnalysisUpdate WebRtcNsx_AnalysisUpdate;
Denormalize WebRtcNsx_Denormalize;
NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer;
// Initialize function pointers for ARM Neon platform.
@ -571,6 +568,19 @@ static void WebRtcNsx_InitNeon(void) {
#if defined(MIPS32_LE)
// Initialize function pointers for MIPS platform.
static void WebRtcNsx_InitMips(void) {
WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrum_mips;
WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdate_mips;
WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdate_mips;
WebRtcNsx_NormalizeRealBuffer = WebRtcNsx_NormalizeRealBuffer_mips;
#if defined(MIPS_DSP_R1_LE)
WebRtcNsx_Denormalize = WebRtcNsx_Denormalize_mips;
void WebRtcNsx_CalcParametricNoiseEstimate(NsxInst_t* inst,
int16_t pink_noise_exp_avg,
int32_t pink_noise_num_avg,
@ -758,6 +768,8 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) {
WebRtcNsx_PrepareSpectrum = PrepareSpectrumC;
WebRtcNsx_SynthesisUpdate = SynthesisUpdateC;
WebRtcNsx_AnalysisUpdate = AnalysisUpdateC;
WebRtcNsx_Denormalize = DenormalizeC;
WebRtcNsx_NormalizeRealBuffer = NormalizeRealBufferC;
uint64_t features = WebRtc_GetCPUFeaturesARM();
@ -768,6 +780,10 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) {
#if defined(MIPS32_LE)
inst->initFlag = 1;
return 0;
@ -1169,239 +1185,6 @@ void WebRtcNsx_ComputeSpectralDifference(NsxInst_t* inst, uint16_t* magnIn) {
// Compute speech/noise probability
// speech/noise probability is returned in: probSpeechFinal
//snrLocPrior is the prior SNR for each frequency (in Q11)
//snrLocPost is the post SNR for each frequency (in Q11)
void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst, uint16_t* nonSpeechProbFinal,
uint32_t* priorLocSnr, uint32_t* postLocSnr) {
uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3;
int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32;
int32_t frac32, logTmp;
int32_t logLrtTimeAvgKsumFX;
int16_t indPriorFX16;
int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart;
int i, normTmp, normTmp2, nShifts;
// compute feature based on average LR factor
// this is the average over all frequencies of the smooth log LRT
logLrtTimeAvgKsumFX = 0;
for (i = 0; i < inst->magnLen; i++) {
besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11
normTmp = WebRtcSpl_NormU32(postLocSnr[i]);
num = WEBRTC_SPL_LSHIFT_U32(postLocSnr[i], normTmp); // Q(11+normTmp)
if (normTmp > 10) {
den = WEBRTC_SPL_LSHIFT_U32(priorLocSnr[i], normTmp - 11); // Q(normTmp)
} else {
den = WEBRTC_SPL_RSHIFT_U32(priorLocSnr[i], 11 - normTmp); // Q(normTmp)
if (den > 0) {
besselTmpFX32 -= WEBRTC_SPL_UDIV(num, den); // Q11
} else {
besselTmpFX32 -= num; // Q11
// inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior) - inst->logLrtTimeAvg[i]);
// Here, LRT_TAVG = 0.5
zeros = WebRtcSpl_NormU32(priorLocSnr[i]);
frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19);
tmp32 = WEBRTC_SPL_MUL(frac32, frac32);
tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(tmp32, -43), 19);
tmp32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)frac32, 5412, 12);
frac32 = tmp32 + 37;
// tmp32 = log2(priorLocSnr[i])
tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12
logTmp = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32, 178), 8); // log2(priorLocSnr[i])*log(2)
tmp32no1 = WEBRTC_SPL_RSHIFT_W32(logTmp + inst->logLrtTimeAvgW32[i], 1); // Q12
inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12
logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12
inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5, inst->stages + 10); // 5 = BIN_SIZE_LRT / 2
// done with computation of LR factor
//compute the indicator functions
// average LRT feature
// FLOAT code
// indicator0 = 0.5 * (tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
tmpIndFX = 16384; // Q14(1.0)
tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
//use larger width in tanh map for pause regions
if (tmp32no1 < 0) {
tmpIndFX = 0;
tmp32no1 = -tmp32no1;
//widthPrior = widthPrior * 2.0;
tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
// compute indicator function: sigmoid map
tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14);
if ((tableIndex < 16) && (tableIndex >= 0)) {
tmp16no2 = kIndicatorTable[tableIndex];
tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
if (tmpIndFX == 0) {
tmpIndFX = 8192 - tmp16no2; // Q14
} else {
tmpIndFX = 8192 + tmp16no2; // Q14
indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14
//spectral flatness feature
if (inst->weightSpecFlat) {
tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
tmpIndFX = 16384; // Q14(1.0)
//use larger width in tanh map for pause regions
tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
nShifts = 4;
if (inst->thresholdSpecFlat < tmpU32no1) {
tmpIndFX = 0;
tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
//widthPrior = widthPrior * 2.0;
tmp32no1 = (int32_t)WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2,
nShifts), 25); //Q14
tmpU32no1 = WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts), 25); //Q14
// compute indicator function: sigmoid map
// FLOAT code
// indicator1 = 0.5 * (tanh(sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) + 1.0);
tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
if (tableIndex < 16) {
tmp16no2 = kIndicatorTable[tableIndex];
tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
if (tmpIndFX) {
tmpIndFX = 8192 + tmp16no2; // Q14
} else {
tmpIndFX = 8192 - tmp16no2; // Q14
indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14
//for template spectral-difference
if (inst->weightSpecDiff) {
tmpU32no1 = 0;
if (inst->featureSpecDiff) {
normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(inst->featureSpecDiff, normTmp); // Q(normTmp-2*stages)
tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy, 20 - inst->stages
- normTmp);
if (tmpU32no2 > 0) {
// Q(20 - inst->stages)
tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2);
} else {
tmpU32no1 = (uint32_t)(0x7fffffff);
tmpU32no3 = WEBRTC_SPL_UDIV(WEBRTC_SPL_LSHIFT_U32(inst->thresholdSpecDiff, 17), 25);
tmpU32no2 = tmpU32no1 - tmpU32no3;
nShifts = 1;
tmpIndFX = 16384; // Q14(1.0)
//use larger width in tanh map for pause regions
if (tmpU32no2 & 0x80000000) {
tmpIndFX = 0;
tmpU32no2 = tmpU32no3 - tmpU32no1;
//widthPrior = widthPrior * 2.0;
tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts);
// compute indicator function: sigmoid map
/* FLOAT code
indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
if (tableIndex < 16) {
tmp16no2 = kIndicatorTable[tableIndex];
tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
tmp16no1, frac, 14);
if (tmpIndFX) {
tmpIndFX = 8192 + tmp16no2;
} else {
tmpIndFX = 8192 - tmp16no2;
indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14
//combine the indicator function with the feature weights
// FLOAT code
// indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2 * indicator2);
indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
// done with computing indicator function
//compute the prior probability
// FLOAT code
// inst->priorNonSpeechProb += PRIOR_UPDATE * (indPriorNonSpeech - inst->priorNonSpeechProb);
tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
PRIOR_UPDATE_Q14, tmp16, 14); // Q14
//final speech probability: combine prior model with LR factor:
memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
if (inst->priorNonSpeechProb > 0) {
for (i = 0; i < inst->magnLen; i++) {
// FLOAT code
// invLrt = exp(inst->logLrtTimeAvg[i]);
// invLrt = inst->priorSpeechProb * invLrt;
// nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) / (1.0 - inst->priorSpeechProb + invLrt);
// invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt;
// nonSpeechProbFinal[i] = inst->priorNonSpeechProb / (inst->priorNonSpeechProb + invLrt);
if (inst->logLrtTimeAvgW32[i] < 65300) {
tmp32no1 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(inst->logLrtTimeAvgW32[i], 23637),
14); // Q12
intPart = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 12);
if (intPart < -8) {
intPart = -8;
frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12
// Quadratic approximation of 2^frac
tmp32no2 = WEBRTC_SPL_RSHIFT_W32(frac * frac * 44, 19); // Q12
tmp32no2 += WEBRTC_SPL_MUL_16_16_RSFT(frac, 84, 7); // Q12
invLrtFX = WEBRTC_SPL_LSHIFT_W32(1, 8 + intPart)
+ WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8
normTmp = WebRtcSpl_NormW32(invLrtFX);
normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb));
if (normTmp + normTmp2 >= 7) {
if (normTmp + normTmp2 < 15) {
invLrtFX = WEBRTC_SPL_RSHIFT_W32(invLrtFX, 15 - normTmp2 - normTmp);
// Q(normTmp+normTmp2-7)
tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX, (16384 - inst->priorNonSpeechProb));
// Q(normTmp+normTmp2+7)
invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2); // Q14
} else {
tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX, (16384 - inst->priorNonSpeechProb)); // Q22
invLrtFX = WEBRTC_SPL_RSHIFT_W32(tmp32no1, 8); // Q14
tmp32no1 = WEBRTC_SPL_LSHIFT_W32((int32_t)inst->priorNonSpeechProb, 8); // Q22
nonSpeechProbFinal[i] = (uint16_t)WEBRTC_SPL_DIV(tmp32no1,
(int32_t)inst->priorNonSpeechProb + invLrtFX); // Q8
// Transform input (speechFrame) to frequency domain magnitude (magnU16)
void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, uint16_t* magnU16) {
@ -1461,7 +1244,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, uint16_t* magnU
right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0);
// create realImag as winData interleaved with zeros (= imag. part), normalize it
NormalizeRealBuffer(inst, winData, realImag);
WebRtcNsx_NormalizeRealBuffer(inst, winData, realImag);
// FFT output will be in winData[].
WebRtcSpl_RealForwardFFT(inst->real_fft, realImag, winData);
@ -1693,7 +1476,7 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) {
// Inverse FFT output will be in rfft_out[].
outCIFFT = WebRtcSpl_RealInverseFFT(inst->real_fft, realImag, rfft_out);
Denormalize(inst, rfft_out, outCIFFT);
WebRtcNsx_Denormalize(inst, rfft_out, outCIFFT);
//scale factor: only do it after END_STARTUP_LONG time
gainFactor = 8192; // 8192 = Q13(1.0)

View File

@ -201,6 +201,23 @@ typedef void (*AnalysisUpdate)(NsxInst_t* inst,
int16_t* new_speech);
extern AnalysisUpdate WebRtcNsx_AnalysisUpdate;
// Denormalize the real-valued signal |in|, the output from inverse FFT.
typedef void (*Denormalize) (NsxInst_t* inst, int16_t* in, int factor);
extern Denormalize WebRtcNsx_Denormalize;
// Normalize the real-valued signal |in|, the input to forward FFT.
typedef void (*NormalizeRealBuffer) (NsxInst_t* inst,
const int16_t* in,
int16_t* out);
extern NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer;
// Compute speech/noise probability.
// Intended to be private.
void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst,
uint16_t* nonSpeechProbFinal,
uint32_t* priorLocSnr,
uint32_t* postLocSnr);
// For the above function pointers, functions for generic platforms are declared
// and defined as static in file nsx_core.c, while those for ARM Neon platforms
@ -218,6 +235,26 @@ void WebRtcNsx_AnalysisUpdateNeon(NsxInst_t* inst,
void WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buff);
#if defined(MIPS32_LE)
// For the above function pointers, functions for generic platforms are declared
// and defined as static in file nsx_core.c, while those for MIPS platforms
// are declared below and defined in file nsx_core_mips.c.
void WebRtcNsx_SynthesisUpdate_mips(NsxInst_t* inst,
int16_t* out_frame,
int16_t gain_factor);
void WebRtcNsx_AnalysisUpdate_mips(NsxInst_t* inst,
int16_t* out,
int16_t* new_speech);
void WebRtcNsx_PrepareSpectrum_mips(NsxInst_t* inst, int16_t* freq_buff);
void WebRtcNsx_NormalizeRealBuffer_mips(NsxInst_t* inst,
const int16_t* in,
int16_t* out);
#if defined(MIPS_DSP_R1_LE)
void WebRtcNsx_Denormalize_mips(NsxInst_t* inst, int16_t* in, int factor);
#ifdef __cplusplus

View File

@ -0,0 +1,273 @@
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
#include "webrtc/modules/audio_processing/ns/nsx_core.h"
static const int16_t kIndicatorTable[17] = {
0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
// Compute speech/noise probability
// speech/noise probability is returned in: probSpeechFinal
//snrLocPrior is the prior SNR for each frequency (in Q11)
//snrLocPost is the post SNR for each frequency (in Q11)
void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst,
uint16_t* nonSpeechProbFinal,
uint32_t* priorLocSnr,
uint32_t* postLocSnr) {
uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3;
int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32;
int32_t frac32, logTmp;
int32_t logLrtTimeAvgKsumFX;
int16_t indPriorFX16;
int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart;
int i, normTmp, normTmp2, nShifts;
// compute feature based on average LR factor
// this is the average over all frequencies of the smooth log LRT
logLrtTimeAvgKsumFX = 0;
for (i = 0; i < inst->magnLen; i++) {
besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11
normTmp = WebRtcSpl_NormU32(postLocSnr[i]);
num = WEBRTC_SPL_LSHIFT_U32(postLocSnr[i], normTmp); // Q(11+normTmp)
if (normTmp > 10) {
den = WEBRTC_SPL_LSHIFT_U32(priorLocSnr[i], normTmp - 11); // Q(normTmp)
} else {
den = WEBRTC_SPL_RSHIFT_U32(priorLocSnr[i], 11 - normTmp); // Q(normTmp)
if (den > 0) {
besselTmpFX32 -= WEBRTC_SPL_UDIV(num, den); // Q11
} else {
besselTmpFX32 -= num; // Q11
// inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior)
// - inst->logLrtTimeAvg[i]);
// Here, LRT_TAVG = 0.5
zeros = WebRtcSpl_NormU32(priorLocSnr[i]);
frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19);
tmp32 = WEBRTC_SPL_MUL(frac32, frac32);
tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(tmp32, -43), 19);
tmp32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)frac32, 5412, 12);
frac32 = tmp32 + 37;
// tmp32 = log2(priorLocSnr[i])
tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12
logTmp = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32, 178), 8);
// log2(priorLocSnr[i])*log(2)
tmp32no1 = WEBRTC_SPL_RSHIFT_W32(logTmp + inst->logLrtTimeAvgW32[i], 1);
// Q12
inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12
logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12
inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5,
inst->stages + 10);
// 5 = BIN_SIZE_LRT / 2
// done with computation of LR factor
//compute the indicator functions
// average LRT feature
// FLOAT code
// indicator0 = 0.5 * (tanh(widthPrior *
// (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
tmpIndFX = 16384; // Q14(1.0)
tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
//use larger width in tanh map for pause regions
if (tmp32no1 < 0) {
tmpIndFX = 0;
tmp32no1 = -tmp32no1;
//widthPrior = widthPrior * 2.0;
tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
// compute indicator function: sigmoid map
tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14);
if ((tableIndex < 16) && (tableIndex >= 0)) {
tmp16no2 = kIndicatorTable[tableIndex];
tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
if (tmpIndFX == 0) {
tmpIndFX = 8192 - tmp16no2; // Q14
} else {
tmpIndFX = 8192 + tmp16no2; // Q14
indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14
//spectral flatness feature
if (inst->weightSpecFlat) {
tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
tmpIndFX = 16384; // Q14(1.0)
//use larger width in tanh map for pause regions
tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
nShifts = 4;
if (inst->thresholdSpecFlat < tmpU32no1) {
tmpIndFX = 0;
tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
//widthPrior = widthPrior * 2.0;
tmp32no1 = (int32_t)WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2,
nShifts), 25);
tmpU32no1 = WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts),
25); //Q14
// compute indicator function: sigmoid map
// FLOAT code
// indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
// (threshPrior1 - tmpFloat1)) + 1.0);
tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
if (tableIndex < 16) {
tmp16no2 = kIndicatorTable[tableIndex];
tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
if (tmpIndFX) {
tmpIndFX = 8192 + tmp16no2; // Q14
} else {
tmpIndFX = 8192 - tmp16no2; // Q14
indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14
//for template spectral-difference
if (inst->weightSpecDiff) {
tmpU32no1 = 0;
if (inst->featureSpecDiff) {
normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(inst->featureSpecDiff, normTmp);
// Q(normTmp-2*stages)
tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy,
20 - inst->stages - normTmp);
if (tmpU32no2 > 0) {
// Q(20 - inst->stages)
tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2);
} else {
tmpU32no1 = (uint32_t)(0x7fffffff);
tmpU32no3 = WEBRTC_SPL_UDIV(WEBRTC_SPL_LSHIFT_U32(inst->thresholdSpecDiff,
tmpU32no2 = tmpU32no1 - tmpU32no3;
nShifts = 1;
tmpIndFX = 16384; // Q14(1.0)
//use larger width in tanh map for pause regions
if (tmpU32no2 & 0x80000000) {
tmpIndFX = 0;
tmpU32no2 = tmpU32no3 - tmpU32no1;
//widthPrior = widthPrior * 2.0;
tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts);
// compute indicator function: sigmoid map
/* FLOAT code
indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
if (tableIndex < 16) {
tmp16no2 = kIndicatorTable[tableIndex];
tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
tmp16no1, frac, 14);
if (tmpIndFX) {
tmpIndFX = 8192 + tmp16no2;
} else {
tmpIndFX = 8192 - tmp16no2;
indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14
//combine the indicator function with the feature weights
// FLOAT code
// indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
// indicator1 + weightIndPrior2 * indicator2);
indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
// done with computing indicator function
//compute the prior probability
// FLOAT code
// inst->priorNonSpeechProb += PRIOR_UPDATE *
// (indPriorNonSpeech - inst->priorNonSpeechProb);
tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
PRIOR_UPDATE_Q14, tmp16, 14); // Q14
//final speech probability: combine prior model with LR factor:
memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
if (inst->priorNonSpeechProb > 0) {
for (i = 0; i < inst->magnLen; i++) {
// FLOAT code
// invLrt = exp(inst->logLrtTimeAvg[i]);
// invLrt = inst->priorSpeechProb * invLrt;
// nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) /
// (1.0 - inst->priorSpeechProb + invLrt);
// invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt;
// nonSpeechProbFinal[i] = inst->priorNonSpeechProb /
// (inst->priorNonSpeechProb + invLrt);
if (inst->logLrtTimeAvgW32[i] < 65300) {
inst->logLrtTimeAvgW32[i], 23637),
14); // Q12
intPart = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 12);
if (intPart < -8) {
intPart = -8;
frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12
// Quadratic approximation of 2^frac
tmp32no2 = WEBRTC_SPL_RSHIFT_W32(frac * frac * 44, 19); // Q12
tmp32no2 += WEBRTC_SPL_MUL_16_16_RSFT(frac, 84, 7); // Q12
invLrtFX = WEBRTC_SPL_LSHIFT_W32(1, 8 + intPart)
+ WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8
normTmp = WebRtcSpl_NormW32(invLrtFX);
normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb));
if (normTmp + normTmp2 >= 7) {
if (normTmp + normTmp2 < 15) {
invLrtFX = WEBRTC_SPL_RSHIFT_W32(invLrtFX, 15 - normTmp2 - normTmp);
// Q(normTmp+normTmp2-7)
tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX,
(16384 - inst->priorNonSpeechProb));
// Q(normTmp+normTmp2+7)
invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2);
// Q14
} else {
tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX,
(16384 - inst->priorNonSpeechProb));
// Q22
invLrtFX = WEBRTC_SPL_RSHIFT_W32(tmp32no1, 8); // Q14
tmp32no1 = WEBRTC_SPL_LSHIFT_W32((int32_t)inst->priorNonSpeechProb,
8); // Q22
nonSpeechProbFinal[i] = (uint16_t)WEBRTC_SPL_DIV(tmp32no1,
(int32_t)inst->priorNonSpeechProb + invLrtFX); // Q8

File diff suppressed because it is too large Load Diff