For commiting changes in CL 277002, due to file structure changes introduced during

the review of the code.
Review URL: http://webrtc-codereview.appspot.com/246005

git-svn-id: http://webrtc.googlecode.com/svn/trunk@805 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
kma@webrtc.org 2011-10-24 21:36:33 +00:00
parent 0d0037c2fd
commit 913644b92d
4 changed files with 890 additions and 319 deletions

View File

@ -107,7 +107,8 @@ LOCAL_MODULE_TAGS := tests
LOCAL_CPP_EXTENSION := .cc
LOCAL_SRC_FILES:= \
$(call all-proto-files-under, test) \
test/unit_test.cc
test/unit_test.cc \
../../../test/testsupport/fileutils.cc
# Flags passed to both C and C++ files.
LOCAL_CFLAGS := \
@ -118,6 +119,7 @@ LOCAL_C_INCLUDES := \
$(LOCAL_PATH)/interface \
$(LOCAL_PATH)/../interface \
$(LOCAL_PATH)/../.. \
$(LOCAL_PATH)/../../../test \
$(LOCAL_PATH)/../../system_wrappers/interface \
$(LOCAL_PATH)/../../common_audio/signal_processing_library/main/interface \
external/gtest/include \

View File

@ -426,46 +426,6 @@ static const WebRtc_Word16 kDeterminantEstMatrix[66] = {
355, 330
};
void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t* inst, int offset) {
WebRtc_Word32 tmp32no1 = 0;
WebRtc_Word32 tmp32no2 = 0;
WebRtc_Word16 tmp16no1 = 0;
WebRtc_Word16 tmp16no2 = 0;
const WebRtc_Word16 kExp2Const = 11819; // Q13
int i = 0;
tmp16no2 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset,
inst->magnLen);
// Guarantee a Q-domain as high as possible and still fit in int16
inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
kExp2Const, tmp16no2, 21);
for (i = 0; i < inst->magnLen; i++) {
// inst->quantile[i]=exp(inst->lquantile[offset+i]);
// in Q21
tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const,
inst->noiseEstLogQuantile[offset + i]);
tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
tmp16no1 = -(WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21);
tmp16no1 += 21;// shift 21 to get result in Q0
tmp16no1 -= (WebRtc_Word16) inst->qNoise; //shift to get result in Q(qNoise)
if (tmp16no1 > 0) {
tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, tmp16no1);
} else {
tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, -tmp16no1);
}
// TODO(bjornv): Replace with WebRtcSpl_SatW32ToW16(...) when available.
if (tmp32no1 > 32767) {
tmp32no1 = 32767;
} else if (tmp32no1 < -32768) {
tmp32no1 = -32768;
}
tmp16no1 = (WebRtc_Word16) tmp32no1;
inst->noiseEstQuantile[i] = tmp16no1;
}
}
void WebRtcNsx_CalcParametricNoiseEstimate(NsxInst_t* inst,
WebRtc_Word16 pink_noise_exp_avg,
WebRtc_Word32 pink_noise_num_avg,
@ -675,128 +635,6 @@ int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode) {
return 0;
}
#if !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID))
void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise,
WebRtc_Word16* qNoise) {
WebRtc_Word32 numerator;
WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv, countProd, delta, zeros, frac;
WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
WebRtc_Word16 log2Const = 22713; // Q15
WebRtc_Word16 widthFactor = 21845;
int i, s, offset;
numerator = FACTOR_Q16;
tabind = inst->stages - inst->normData;
assert(tabind < 9);
assert(tabind > -9);
if (tabind < 0) {
logval = -WebRtcNsx_kLogTable[-tabind];
} else {
logval = WebRtcNsx_kLogTable[tabind];
}
// lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
// magn is in Q(-stages), and the real lmagn values are:
// real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
// lmagn in Q8
for (i = 0; i < inst->magnLen; i++) {
if (magn[i]) {
zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]);
frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23);
// log2(magn(i))
assert(frac < 256);
log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]);
// log2(magn(i))*log(2)
lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15);
// + log(2^stages)
lmagn[i] += logval;
} else {
lmagn[i] = logval;//0;
}
}
// loop over simultaneous estimates
for (s = 0; s < SIMULT; s++) {
offset = s * inst->magnLen;
// Get counter values from state
counter = inst->noiseEstCounter[s];
assert(counter < 201);
countDiv = WebRtcNsx_kCounterDiv[counter];
countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv);
// quant_est(...)
for (i = 0; i < inst->magnLen; i++) {
// compute delta
if (inst->noiseEstDensity[offset + i] > 512) {
delta = WebRtcSpl_DivW32W16ResW16(numerator,
inst->noiseEstDensity[offset + i]);
} else {
delta = FACTOR_Q7;
if (inst->blockIndex < END_STARTUP_LONG) {
// Smaller step size during startup. This prevents from using
// unrealistic values causing overflow.
delta = FACTOR_Q7_STARTUP;
}
}
// update log quantile estimate
tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
// +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
// CounterDiv=1/(inst->counter[s]+1) in Q15
tmp16 += 2;
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2);
inst->noiseEstLogQuantile[offset + i] += tmp16no1;
} else {
tmp16 += 1;
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1);
// *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1);
inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
if (inst->noiseEstLogQuantile[offset + i] < logval) {
// This is the smallest fixed point representation we can
// have, hence we limit the output.
inst->noiseEstLogQuantile[offset + i] = logval;
}
}
// update density estimate
if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
< WIDTH_Q8) {
tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
inst->noiseEstDensity[offset + i], countProd, 15);
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(widthFactor,
countDiv, 15);
inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
}
} // end loop over magnitude spectrum
if (counter >= END_STARTUP_LONG) {
inst->noiseEstCounter[s] = 0;
if (inst->blockIndex >= END_STARTUP_LONG) {
WebRtcNsx_UpdateNoiseEstimate(inst, offset);
}
}
inst->noiseEstCounter[s]++;
} // end loop over simultaneous estimates
// Sequentially update the noise during startup
if (inst->blockIndex < END_STARTUP_LONG) {
WebRtcNsx_UpdateNoiseEstimate(inst, offset);
}
for (i = 0; i < inst->magnLen; i++) {
noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise)
}
(*qNoise) = (WebRtc_Word16)inst->qNoise;
}
#endif // !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID))
// Extract thresholds for feature parameters
// histograms are computed over some window_size (given by window_pars)
// thresholds and weights are extracted every window
@ -1322,7 +1160,7 @@ void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst, WebRtc_UWord16* nonSpeechProbFin
tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
frac = (WebRtc_Word16)(tmpU32no1 & 0x00003fff); // Q14
tmp16no2 += (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
tmp16no1, frac, 14);
tmp16no1, frac, 14);
if (tmpIndFX) {
tmpIndFX = 8192 + tmp16no2;
} else {
@ -1343,7 +1181,7 @@ void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst, WebRtc_UWord16* nonSpeechProbFin
// inst->priorNonSpeechProb += PRIOR_UPDATE * (indPriorNonSpeech - inst->priorNonSpeechProb);
tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
inst->priorNonSpeechProb += (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(
PRIOR_UPDATE_Q14, tmp16, 14); // Q14
PRIOR_UPDATE_Q14, tmp16, 14); // Q14
//final speech probability: combine prior model with LR factor:
@ -1424,18 +1262,9 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16*
int right_shifts_in_magnU16 = 0;
int right_shifts_in_initMagnEst = 0;
// For lower band do all processing
// update analysis buffer for L band
WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms,
inst->anaLen - inst->blockLen10ms);
WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms,
speechFrame, inst->blockLen10ms);
// Update analysis buffer for lower band, and window data before FFT.
WebRtcNsx_AnalysisUpdate(inst, winData, speechFrame);
// Window data before FFT
for (i = 0; i < inst->anaLen; i++) {
winData[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
inst->window[i], inst->analysisBuffer[i], 14); // Q0
}
// Get input energy
inst->energyIn = WebRtcSpl_Energy(winData, (int)inst->anaLen, &(inst->scaleEnergyIn));
@ -1459,11 +1288,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16*
right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0);
// create realImag as winData interleaved with zeros (= imag. part), normalize it
for (i = 0; i < inst->anaLen; i++) {
j = WEBRTC_SPL_LSHIFT_W16(i, 1);
realImag[j] = WEBRTC_SPL_LSHIFT_W16(winData[i], inst->normData); // Q(normData)
realImag[j + 1] = 0; // Insert zeros in imaginary part
}
WebRtcNsx_CreateComplexBuffer(inst, winData, realImag);
// bit-reverse position of elements in array and FFT the array
WebRtcSpl_ComplexBitReverse(realImag, inst->stages); // Q(normData-stages)
@ -1492,7 +1317,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16*
tmpU32no1 += (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(realImag[j + 1], realImag[j + 1]);
inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages))
magnU16[i] = (WebRtc_UWord16)WebRtcSpl_Sqrt(tmpU32no1); // Q(normData-stages)
magnU16[i] = (WebRtc_UWord16)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages)
inst->sumMagn += (WebRtc_UWord32)magnU16[i]; // Q(normData-stages)
}
} else {
@ -1541,7 +1366,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16*
tmpU32no1 += (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(realImag[j + 1], realImag[j + 1]);
inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages))
magnU16[i] = (WebRtc_UWord16)WebRtcSpl_Sqrt(tmpU32no1); // Q(normData-stages)
magnU16[i] = (WebRtc_UWord16)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages)
inst->sumMagn += (WebRtc_UWord32)magnU16[i]; // Q(normData-stages)
// Switch initMagnEst to Q(minNorm-stages)
@ -1607,8 +1432,8 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16*
tmp_1_w32 += WEBRTC_SPL_MUL_16_16_RSFT(kSumLogIndex[65], sum_log_i, 9);
tmp_1_w32 -= WEBRTC_SPL_MUL_16_16_RSFT(kSumLogIndex[65], kSumLogIndex[65], 10);
tmp_1_w32 -= WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)sum_log_i_square, 4);
tmp_1_w32 -= WEBRTC_SPL_MUL_16_16_RSFT(
(WebRtc_Word16)(inst->magnLen - kStartBand), kSumSquareLogIndex[65], 2);
tmp_1_w32 -= WEBRTC_SPL_MUL_16_16_RSFT((WebRtc_Word16)
(inst->magnLen - kStartBand), kSumSquareLogIndex[65], 2);
matrix_determinant = (WebRtc_Word16)tmp_1_w32;
sum_log_i -= kSumLogIndex[65]; // Q5
sum_log_i_square -= kSumSquareLogIndex[65]; // Q2
@ -1684,40 +1509,16 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) {
inst->blockLen10ms);
return;
}
// Filter the data in the frequency domain
for (i = 0; i < inst->magnLen; i++) {
inst->real[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(
inst->real[i], (WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages)
inst->imag[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(
inst->imag[i], (WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages)
}
// back to time domain
// Create spectrum
realImag[0] = inst->real[0];
realImag[1] = -inst->imag[0];
for (i = 1; i < inst->anaLen2; i++) {
j = WEBRTC_SPL_LSHIFT_W16(i, 1);
tmp16no1 = (inst->anaLen << 1) - j;
realImag[j] = inst->real[i];
realImag[j + 1] = -inst->imag[i];
realImag[tmp16no1] = inst->real[i];
realImag[tmp16no1 + 1] = inst->imag[i];
}
realImag[inst->anaLen] = inst->real[inst->anaLen2];
realImag[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
// Filter the data in the frequency domain, and create spectrum.
WebRtcNsx_PrepareSpectrum(inst, realImag);
// bit-reverse position of elements in array and IFFT it
WebRtcSpl_ComplexBitReverse(realImag, inst->stages);
outCIFFT = WebRtcSpl_ComplexIFFT(realImag, inst->stages, 1);
for (i = 0; i < inst->anaLen; i++) {
j = WEBRTC_SPL_LSHIFT_W16(i, 1);
tmp32no1 = WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)realImag[j],
outCIFFT - inst->normData);
inst->real[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
tmp32no1,
WEBRTC_SPL_WORD16_MIN);
}
// Denormalize.
WebRtcNsx_Denormalize(inst, realImag, outCIFFT);
//scale factor: only do it after END_STARTUP_LONG time
gainFactor = 8192; // 8192 = Q13(1.0)
@ -1754,26 +1555,8 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) {
gainFactor = tmp16no1 + tmp16no2; // Q13
} // out of flag_gain_map==1
// synthesis
for (i = 0; i < inst->anaLen; i++) {
tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(inst->window[i],
inst->real[i], 14); // Q0, window in Q14
tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16no1, gainFactor, 13); // Q0
// Down shift with rounding
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, tmp32no1,
WEBRTC_SPL_WORD16_MIN); // Q0
inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16(inst->synthesisBuffer[i], tmp16no2); // Q0
}
// read out fully processed segment
for (i = 0; i < inst->blockLen10ms; i++) {
outFrame[i] = inst->synthesisBuffer[i]; // Q0
}
// update synthesis buffer
WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms,
inst->anaLen - inst->blockLen10ms);
WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms,
inst->blockLen10ms);
// Synthesis, read out fully processed segment, and update synthesis buffer.
WebRtcNsx_SynthesisUpdate(inst, outFrame, gainFactor);
}
int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFrameHB,
@ -1815,6 +1598,12 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
int flag, sign;
int q_domain_to_use = 0;
// Code for ARMv7-Neon platform assumes the following:
assert(inst->anaLen % 16 == 0);
assert(inst->anaLen2 % 8 == 0);
assert(inst->blockLen10ms % 16 == 0);
assert(inst->magnLen == inst->anaLen2 + 1);
#ifdef NS_FILEDEBUG
fwrite(spframe, sizeof(short), inst->blockLen10ms, inst->infile);
#endif
@ -2080,8 +1869,8 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
if (WebRtcSpl_NormU32(tmpU32no3) < norm32no1) {
inst->featureSpecDiff = 0x007FFFFF;
} else {
inst->featureSpecDiff = WEBRTC_SPL_MIN(
0x007FFFFF, WEBRTC_SPL_LSHIFT_U32(tmpU32no3, norm32no1));
inst->featureSpecDiff = WEBRTC_SPL_MIN(0x007FFFFF,
WEBRTC_SPL_LSHIFT_U32(tmpU32no3, norm32no1));
}
}
@ -2317,7 +2106,8 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
}
avgProbSpeechHB = (WebRtc_Word16)(4096
- WEBRTC_SPL_RSHIFT_U16(tmpU16no1, inst->stages - 7)); // Q12
avgFilterGainHB = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, inst->stages - 3); // Q14
avgFilterGainHB = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32(
tmpU32no1, inst->stages - 3); // Q14
// // original FLOAT code
// // gain based on speech probability:
@ -2368,3 +2158,264 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
return 0;
}
#if !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID))
// Update the noise estimation information.
static void UpdateNoiseEstimate(NsxInst_t* inst, int offset) {
WebRtc_Word32 tmp32no1 = 0;
WebRtc_Word32 tmp32no2 = 0;
WebRtc_Word16 tmp16 = 0;
const WebRtc_Word16 kExp2Const = 11819; // Q13
int i = 0;
tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset,
inst->magnLen);
// Guarantee a Q-domain as high as possible and still fit in int16
inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
kExp2Const, tmp16, 21);
for (i = 0; i < inst->magnLen; i++) {
// inst->quantile[i]=exp(inst->lquantile[offset+i]);
// in Q21
tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const,
inst->noiseEstLogQuantile[offset + i]);
tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
tmp16 = (WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21);
tmp16 -= 21;// shift 21 to get result in Q0
tmp16 += (WebRtc_Word16) inst->qNoise; //shift to get result in Q(qNoise)
if (tmp16 < 0) {
tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, -tmp16);
} else {
tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, tmp16);
}
inst->noiseEstQuantile[i] = WebRtcSpl_SatW32ToW16(tmp32no1);
}
}
// Noise Estimation
void WebRtcNsx_NoiseEstimation(NsxInst_t* inst,
uint16_t* magn,
uint32_t* noise,
int16_t* q_noise) {
WebRtc_Word32 numerator = FACTOR_Q16;
WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv;
WebRtc_Word16 countProd, delta, zeros, frac;
WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
const int16_t log2_const = 22713; // Q15
const int16_t width_factor = 21845;
int i, s, offset;
tabind = inst->stages - inst->normData;
assert(tabind < 9);
assert(tabind > -9);
if (tabind < 0) {
logval = -WebRtcNsx_kLogTable[-tabind];
} else {
logval = WebRtcNsx_kLogTable[tabind];
}
// lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
// magn is in Q(-stages), and the real lmagn values are:
// real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
// lmagn in Q8
for (i = 0; i < inst->magnLen; i++) {
if (magn[i]) {
zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]);
frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros)
& 0x7FFFFFFF) >> 23);
// log2(magn(i))
assert(frac < 256);
log2 = (WebRtc_Word16)(((31 - zeros) << 8)
+ WebRtcNsx_kLogTableFrac[frac]);
// log2(magn(i))*log(2)
lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2_const, 15);
// + log(2^stages)
lmagn[i] += logval;
} else {
lmagn[i] = logval;//0;
}
}
// loop over simultaneous estimates
for (s = 0; s < SIMULT; s++) {
offset = s * inst->magnLen;
// Get counter values from state
counter = inst->noiseEstCounter[s];
assert(counter < 201);
countDiv = WebRtcNsx_kCounterDiv[counter];
countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv);
// quant_est(...)
for (i = 0; i < inst->magnLen; i++) {
// compute delta
if (inst->noiseEstDensity[offset + i] > 512) {
delta = WebRtcSpl_DivW32W16ResW16(numerator,
inst->noiseEstDensity[offset + i]);
} else {
delta = FACTOR_Q7;
if (inst->blockIndex < END_STARTUP_LONG) {
// Smaller step size during startup. This prevents from using
// unrealistic values causing overflow.
delta = FACTOR_Q7_STARTUP;
}
}
// update log quantile estimate
tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
// +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
// CounterDiv=1/(inst->counter[s]+1) in Q15
tmp16 += 2;
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2);
inst->noiseEstLogQuantile[offset + i] += tmp16no1;
} else {
tmp16 += 1;
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1);
// *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1);
inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
if (inst->noiseEstLogQuantile[offset + i] < logval) {
// This is the smallest fixed point representation we can
// have, hence we limit the output.
inst->noiseEstLogQuantile[offset + i] = logval;
}
}
// update density estimate
if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
< WIDTH_Q8) {
tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
inst->noiseEstDensity[offset + i], countProd, 15);
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
width_factor, countDiv, 15);
inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
}
} // end loop over magnitude spectrum
if (counter >= END_STARTUP_LONG) {
inst->noiseEstCounter[s] = 0;
if (inst->blockIndex >= END_STARTUP_LONG) {
UpdateNoiseEstimate(inst, offset);
}
}
inst->noiseEstCounter[s]++;
} // end loop over simultaneous estimates
// Sequentially update the noise during startup
if (inst->blockIndex < END_STARTUP_LONG) {
UpdateNoiseEstimate(inst, offset);
}
for (i = 0; i < inst->magnLen; i++) {
noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise)
}
(*q_noise) = (WebRtc_Word16)inst->qNoise;
}
// Filter the data in the frequency domain, and create spectrum.
void WebRtcNsx_PrepareSpectrum(NsxInst_t* inst, int16_t* freq_buf) {
int i = 0, j = 0;
int16_t tmp16 = 0;
for (i = 0; i < inst->magnLen; i++) {
inst->real[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(inst->real[i],
(WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages)
inst->imag[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(inst->imag[i],
(WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages)
}
freq_buf[0] = inst->real[0];
freq_buf[1] = -inst->imag[0];
for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
tmp16 = (inst->anaLen << 1) - j;
freq_buf[j] = inst->real[i];
freq_buf[j + 1] = -inst->imag[i];
freq_buf[tmp16] = inst->real[i];
freq_buf[tmp16 + 1] = inst->imag[i];
}
freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
}
// Denormalize the input buffer.
inline void WebRtcNsx_Denormalize(NsxInst_t* inst, int16_t* in, int factor) {
int i = 0, j = 0;
int32_t tmp32 = 0;
for (i = 0, j = 0; i < inst->anaLen; i += 1, j += 2) {
tmp32 = WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)in[j],
factor - inst->normData);
inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0
}
}
// For the noise supression process, synthesis, read out fully processed
// segment, and update synthesis buffer.
void WebRtcNsx_SynthesisUpdate(NsxInst_t* inst,
int16_t* out_frame,
int16_t gain_factor) {
int i = 0;
int16_t tmp16a = 0;
int16_t tmp16b = 0;
int32_t tmp32 = 0;
// synthesis
for (i = 0; i < inst->anaLen; i++) {
tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
inst->window[i], inst->real[i], 14); // Q0, window in Q14
tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13); // Q0
// Down shift with rounding
tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0
inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16(inst->synthesisBuffer[i],
tmp16b); // Q0
}
// read out fully processed segment
for (i = 0; i < inst->blockLen10ms; i++) {
out_frame[i] = inst->synthesisBuffer[i]; // Q0
}
// update synthesis buffer
WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer,
inst->synthesisBuffer + inst->blockLen10ms,
inst->anaLen - inst->blockLen10ms);
WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
+ inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
}
// Update analysis buffer for lower band, and window data before FFT.
void WebRtcNsx_AnalysisUpdate(NsxInst_t* inst,
int16_t* out,
int16_t* new_speech) {
int i = 0;
// For lower band update analysis buffer.
WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer,
inst->analysisBuffer + inst->blockLen10ms,
inst->anaLen - inst->blockLen10ms);
WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer
+ inst->anaLen - inst->blockLen10ms, new_speech, inst->blockLen10ms);
// Window data before FFT.
for (i = 0; i < inst->anaLen; i++) {
out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
inst->window[i], inst->analysisBuffer[i], 14); // Q0
}
}
// Create a complex number buffer (out[]) as the intput (in[]) interleaved with
// zeros, and normalize it.
inline void WebRtcNsx_CreateComplexBuffer(NsxInst_t* inst,
int16_t* in,
int16_t* out) {
int i = 0, j = 0;
for (i = 0, j = 0; i < inst->anaLen; i += 1, j += 2) {
out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData)
out[j + 1] = 0; // Insert zeros in imaginary part
}
}
#endif // !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID))

View File

@ -129,14 +129,14 @@ WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs);
* This changes the aggressiveness of the noise suppression method.
*
* Input:
* - inst : Instance that should be initialized
* - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB)
* - inst : Instance that should be initialized
* - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB)
*
* Output:
* - NS_inst : Initialized instance
* - inst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode);
@ -158,16 +158,47 @@ int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode);
* Return value : 0 - OK
* -1 - Error
*/
int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* inFrameLow, short* inFrameHigh,
short* outFrameLow, short* outFrameHigh);
int WebRtcNsx_ProcessCore(NsxInst_t* inst,
short* inFrameLow,
short* inFrameHigh,
short* outFrameLow,
short* outFrameHigh);
/****************************************************************************
* Internal functions and variable declarations shared with optimized code.
*/
void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t* inst, int offset);
void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise,
WebRtc_Word16* qNoise);
// Noise Estimation.
void WebRtcNsx_NoiseEstimation(NsxInst_t* inst,
uint16_t* magn,
uint32_t* noise,
int16_t* q_noise);
// Filter the data in the frequency domain, and create spectrum.
void WebRtcNsx_PrepareSpectrum(NsxInst_t* inst,
int16_t* freq_buff);
// For the noise supression process, synthesis, read out fully processed
// segment, and update synthesis buffer.
void WebRtcNsx_SynthesisUpdate(NsxInst_t* inst,
int16_t* out_frame,
int16_t gain_factor);
// Update analysis buffer for lower band, and window data before FFT.
void WebRtcNsx_AnalysisUpdate(NsxInst_t* inst,
int16_t* out,
int16_t* new_speech);
// Denormalize the input buffer.
inline void WebRtcNsx_Denormalize(NsxInst_t* inst,
int16_t* in,
int factor);
// Create a complex number buffer, as the intput interleaved with zeros,
// and normalize it.
inline void WebRtcNsx_CreateComplexBuffer(NsxInst_t* inst,
int16_t* in,
int16_t* out);
extern const WebRtc_Word16 WebRtcNsx_kLogTable[9];
extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256];

View File

@ -15,19 +15,98 @@
#include <arm_neon.h>
#include <assert.h>
void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise,
WebRtc_Word16* qNoise) {
WebRtc_Word32 numerator;
// Update the noise estimation information.
static void UpdateNoiseEstimate(NsxInst_t* inst, int offset) {
int i = 0;
const int16_t kExp2Const = 11819; // Q13
int16_t* ptr_noiseEstLogQuantile = NULL;
int16_t* ptr_noiseEstQuantile = NULL;
int16x4_t kExp2Const16x4 = vdup_n_s16(kExp2Const);
int32x4_t twentyOne32x4 = vdupq_n_s32(21);
int32x4_t constA32x4 = vdupq_n_s32(0x1fffff);
int32x4_t constB32x4 = vdupq_n_s32(0x200000);
WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv, countProd, delta, zeros, frac;
WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
WebRtc_Word16 log2Const = 22713;
WebRtc_Word16 widthFactor = 21845;
int16_t tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset,
inst->magnLen);
// Guarantee a Q-domain as high as possible and still fit in int16
inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(kExp2Const,
tmp16,
21);
int32x4_t qNoise32x4 = vdupq_n_s32(inst->qNoise);
for (ptr_noiseEstLogQuantile = &inst->noiseEstLogQuantile[offset],
ptr_noiseEstQuantile = &inst->noiseEstQuantile[0];
ptr_noiseEstQuantile < &inst->noiseEstQuantile[inst->magnLen - 3];
ptr_noiseEstQuantile += 4, ptr_noiseEstLogQuantile += 4) {
// tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const,
// inst->noiseEstLogQuantile[offset + i]);
int16x4_t v16x4 = vld1_s16(ptr_noiseEstLogQuantile);
int32x4_t v32x4B = vmull_s16(v16x4, kExp2Const16x4);
// tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
int32x4_t v32x4A = vandq_s32(v32x4B, constA32x4);
v32x4A = vorrq_s32(v32x4A, constB32x4);
// tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21);
v32x4B = vshrq_n_s32(v32x4B, 21);
// tmp16 -= 21;// shift 21 to get result in Q0
v32x4B = vsubq_s32(v32x4B, twentyOne32x4);
// tmp16 += (int16_t) inst->qNoise;
// shift to get result in Q(qNoise)
v32x4B = vaddq_s32(v32x4B, qNoise32x4);
// if (tmp16 < 0) {
// tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, -tmp16);
// } else {
// tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, tmp16);
// }
v32x4B = vshlq_s32(v32x4A, v32x4B);
// tmp16 = WebRtcSpl_SatW32ToW16(tmp32no1);
v16x4 = vqmovn_s32(v32x4B);
//inst->noiseEstQuantile[i] = tmp16;
vst1_s16(ptr_noiseEstQuantile, v16x4);
}
// Last iteration:
// inst->quantile[i]=exp(inst->lquantile[offset+i]);
// in Q21
int32_t tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const,
*ptr_noiseEstLogQuantile);
int32_t tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21);
tmp16 -= 21;// shift 21 to get result in Q0
tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise)
if (tmp16 < 0) {
tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, -tmp16);
} else {
tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, tmp16);
}
*ptr_noiseEstQuantile = WebRtcSpl_SatW32ToW16(tmp32no1);
}
// Noise Estimation
void WebRtcNsx_NoiseEstimation(NsxInst_t* inst,
uint16_t* magn,
uint32_t* noise,
int16_t* q_noise) {
int32_t numerator = FACTOR_Q16;
int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv;
int16_t countProd, delta, zeros, frac;
int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
const int16_t log2_const = 22713;
const int16_t width_factor = 21845;
int i, s, offset;
numerator = FACTOR_Q16;
tabind = inst->stages - inst->normData;
assert(tabind < 9);
assert(tabind > -9);
@ -45,13 +124,15 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo
// lmagn in Q8
for (i = 0; i < inst->magnLen; i++) {
if (magn[i]) {
zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]);
frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23);
zeros = WebRtcSpl_NormU32((uint32_t)magn[i]);
frac = (int16_t)((((uint32_t)magn[i] << zeros)
& 0x7FFFFFFF) >> 23);
assert(frac < 256);
// log2(magn(i))
log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]);
log2 = (int16_t)(((31 - zeros) << 8)
+ WebRtcNsx_kLogTableFrac[frac]);
// log2(magn(i))*log(2)
lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15);
lmagn[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2_const, 15);
// + log(2^stages)
lmagn[i] += logval;
} else {
@ -61,9 +142,9 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo
int16x4_t Q3_16x4 = vdup_n_s16(3);
int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8);
int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(widthFactor);
int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(width_factor);
WebRtc_Word16 factor = FACTOR_Q7;
int16_t factor = FACTOR_Q7;
if (inst->blockIndex < END_STARTUP_LONG)
factor = FACTOR_Q7_STARTUP;
@ -75,10 +156,10 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo
counter = inst->noiseEstCounter[s];
assert(counter < 201);
countDiv = WebRtcNsx_kCounterDiv[counter];
countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv);
countProd = (int16_t)WEBRTC_SPL_MUL_16_16(counter, countDiv);
// quant_est(...)
WebRtc_Word16 deltaBuff[8];
int16_t deltaBuff[8];
int16x4_t tmp16x4_0;
int16x4_t tmp16x4_1;
int16x4_t countDiv_16x4 = vdup_n_s16(countDiv);
@ -103,13 +184,13 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo
for (j = 0; j < 8; j++) {
if (inst->noiseEstDensity[offset + i + j] > 512) {
deltaBuff[j] = WebRtcSpl_DivW32W16ResW16(
numerator, inst->noiseEstDensity[offset + i + j]);
numerator, inst->noiseEstDensity[offset + i + j]);
}
}
// Update log quantile estimate
// tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
// tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[0]), countDiv_16x4);
tmp16x4_1 = vshrn_n_s32(tmp32x4, 14);
tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[4]), countDiv_16x4);
@ -142,17 +223,19 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo
tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep
tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0);
// logval is the smallest fixed point representation we can have. Values below
// that will correspond to values in the interval [0, 1], which can't possibly
// occur.
// logval is the smallest fixed point representation we can have. Values
// below that will correspond to values in the interval [0, 1], which
// can't possibly occur.
tmp16x8_0 = vmaxq_s16(tmp16x8_0, logval_16x8);
// Do the if-else branches:
tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines
tmp16x8_5 = vsubq_s16(tmp16x8_3, tmp16x8_2);
__asm__("vcgt.s16 %q0, %q1, #0"::"w"(tmp16x8_4), "w"(tmp16x8_5));
__asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_1), "w"(tmp16x8_4));
__asm__("vbif %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_0), "w"(tmp16x8_4));
__asm__("vbit %q0, %q1, %q2"::
"w"(tmp16x8_2), "w"(tmp16x8_1), "w"(tmp16x8_4));
__asm__("vbif %q0, %q1, %q2"::
"w"(tmp16x8_2), "w"(tmp16x8_0), "w"(tmp16x8_4));
vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2);
// Update density estimate
@ -165,61 +248,61 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo
tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2);
tmp16x8_3 = vabsq_s16(tmp16x8_3);
tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3);
__asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_1), "w"(tmp16x8_0), "w"(tmp16x8_4));
__asm__("vbit %q0, %q1, %q2"::
"w"(tmp16x8_1), "w"(tmp16x8_0), "w"(tmp16x8_4));
vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1);
} // End loop over magnitude spectrum
for (; i < inst->magnLen; i++) {
// compute delta
if (inst->noiseEstDensity[offset + i] > 512) {
delta = WebRtcSpl_DivW32W16ResW16(numerator,
inst->noiseEstDensity[offset + i]);
} else {
delta = FACTOR_Q7;
if (inst->blockIndex < END_STARTUP_LONG) {
// Smaller step size during startup. This prevents from using
// unrealistic values causing overflow.
delta = FACTOR_Q7_STARTUP;
}
// Last iteration over magnitude spectrum:
// compute delta
if (inst->noiseEstDensity[offset + i] > 512) {
delta = WebRtcSpl_DivW32W16ResW16(numerator,
inst->noiseEstDensity[offset + i]);
} else {
delta = FACTOR_Q7;
if (inst->blockIndex < END_STARTUP_LONG) {
// Smaller step size during startup. This prevents from using
// unrealistic values causing overflow.
delta = FACTOR_Q7_STARTUP;
}
}
// update log quantile estimate
tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
// +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
// CounterDiv=1/(inst->counter[s]+1) in Q15
tmp16 += 2;
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2);
inst->noiseEstLogQuantile[offset + i] += tmp16no1;
} else {
tmp16 += 1;
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1);
// *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1);
inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
if (inst->noiseEstLogQuantile[offset + i] < logval) {
// logval is the smallest fixed point representation we can have.
// Values below that will correspond to values in the interval
// [0, 1], which can't possibly occur.
inst->noiseEstLogQuantile[offset + i] = logval;
}
}
// update log quantile estimate
tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
// +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
// CounterDiv=1/(inst->counter[s]+1) in Q15
tmp16 += 2;
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2);
inst->noiseEstLogQuantile[offset + i] += tmp16no1;
} else {
tmp16 += 1;
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1);
// *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1);
inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
if (inst->noiseEstLogQuantile[offset + i] < logval) {
// logval is the smallest fixed point representation we can have.
// Values below that will correspond to values in the interval
// [0, 1], which can't possibly occur.
inst->noiseEstLogQuantile[offset + i] = logval;
}
}
// update density estimate
if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
< WIDTH_Q8) {
tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
inst->noiseEstDensity[offset + i], countProd, 15);
tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
width_factor, countDiv, 15);
inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
}
// update density estimate
if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
< WIDTH_Q8) {
tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
inst->noiseEstDensity[offset + i], countProd, 15);
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
widthFactor, countDiv, 15);
inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
}
} // end loop over magnitude spectrum
if (counter >= END_STARTUP_LONG) {
inst->noiseEstCounter[s] = 0;
if (inst->blockIndex >= END_STARTUP_LONG) {
WebRtcNsx_UpdateNoiseEstimate(inst, offset);
UpdateNoiseEstimate(inst, offset);
}
}
inst->noiseEstCounter[s]++;
@ -228,13 +311,417 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo
// Sequentially update the noise during startup
if (inst->blockIndex < END_STARTUP_LONG) {
WebRtcNsx_UpdateNoiseEstimate(inst, offset);
UpdateNoiseEstimate(inst, offset);
}
for (i = 0; i < inst->magnLen; i++) {
noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise)
noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise)
}
(*qNoise) = (WebRtc_Word16)inst->qNoise;
(*q_noise) = (int16_t)inst->qNoise;
}
// Filter the data in the frequency domain, and create spectrum.
void WebRtcNsx_PrepareSpectrum(NsxInst_t* inst, int16_t* freq_buf) {
// (1) Filtering.
// Fixed point C code for the next block is as follows:
// for (i = 0; i < inst->magnLen; i++) {
// inst->real[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(inst->real[i],
// (int16_t)(inst->noiseSupFilter[i]), 14); // Q(normData-stages)
// inst->imag[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(inst->imag[i],
// (int16_t)(inst->noiseSupFilter[i]), 14); // Q(normData-stages)
// }
int16_t* ptr_real = &inst->real[0];
int16_t* ptr_imag = &inst->imag[0];
uint16_t* ptr_noiseSupFilter = &inst->noiseSupFilter[0];
// Filter the rest in the frequency domain.
for (; ptr_real < &inst->real[inst->magnLen - 1]; ) {
// Loop unrolled once. Both pointers are incremented by 4 twice.
__asm__ __volatile__(
"vld1.16 d20, [%[ptr_real]]\n\t"
"vld1.16 d22, [%[ptr_imag]]\n\t"
"vld1.16 d23, [%[ptr_noiseSupFilter]]!\n\t"
"vmull.s16 q10, d20, d23\n\t"
"vmull.s16 q11, d22, d23\n\t"
"vshrn.s32 d20, q10, #14\n\t"
"vshrn.s32 d22, q11, #14\n\t"
"vst1.16 d20, [%[ptr_real]]!\n\t"
"vst1.16 d22, [%[ptr_imag]]!\n\t"
"vld1.16 d18, [%[ptr_real]]\n\t"
"vld1.16 d24, [%[ptr_imag]]\n\t"
"vld1.16 d25, [%[ptr_noiseSupFilter]]!\n\t"
"vmull.s16 q9, d18, d25\n\t"
"vmull.s16 q12, d24, d25\n\t"
"vshrn.s32 d18, q9, #14\n\t"
"vshrn.s32 d24, q12, #14\n\t"
"vst1.16 d18, [%[ptr_real]]!\n\t"
"vst1.16 d24, [%[ptr_imag]]!\n\t"
// Specify constraints.
:[ptr_imag]"+r"(ptr_imag),
[ptr_real]"+r"(ptr_real),
[ptr_noiseSupFilter]"+r"(ptr_noiseSupFilter)
:
:"d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25",
"q9", "q10", "q11", "q12"
);
}
// Filter the last pair of elements in the frequency domain.
*ptr_real = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(*ptr_real,
(int16_t)(*ptr_noiseSupFilter), 14); // Q(normData-stages)
*ptr_imag = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(*ptr_imag,
(int16_t)(*ptr_noiseSupFilter), 14); // Q(normData-stages)
// (2) Create spectrum.
// Fixed point C code for the rest of the function is as follows:
// freq_buf[0] = inst->real[0];
// freq_buf[1] = -inst->imag[0];
// for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
// tmp16 = (inst->anaLen << 1) - j;
// freq_buf[j] = inst->real[i];
// freq_buf[j + 1] = -inst->imag[i];
// freq_buf[tmp16] = inst->real[i];
// freq_buf[tmp16 + 1] = inst->imag[i];
// }
// freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
// freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
freq_buf[0] = inst->real[0];
freq_buf[1] = -inst->imag[0];
int offset = -16;
int16_t* ptr_realImag1 = &freq_buf[2];
int16_t* ptr_realImag2 = ptr_realImag2 = &freq_buf[(inst->anaLen << 1) - 8];
ptr_real = &inst->real[1];
ptr_imag = &inst->imag[1];
for (; ptr_real < &inst->real[inst->anaLen2 - 11]; ) {
// Loop unrolled once. All pointers are incremented twice.
__asm__ __volatile__(
"vld1.16 d22, [%[ptr_real]]!\n\t"
"vld1.16 d23, [%[ptr_imag]]!\n\t"
// Negate and interleave:
"vmov.s16 d20, d22\n\t"
"vneg.s16 d21, d23\n\t"
"vzip.16 d20, d21\n\t"
// Write 8 elements to &freq_buf[j]
"vst1.16 {d20, d21}, [%[ptr_realImag1]]!\n\t"
// Interleave and reverse elements:
"vzip.16 d22, d23\n\t"
"vrev64.32 d18, d23\n\t"
"vrev64.32 d19, d22\n\t"
// Write 8 elements to &freq_buf[tmp16]
"vst1.16 {d18, d19}, [%[ptr_realImag2]], %[offset]\n\t"
"vld1.16 d22, [%[ptr_real]]!\n\t"
"vld1.16 d23, [%[ptr_imag]]!\n\t"
// Negate and interleave:
"vmov.s16 d20, d22\n\t"
"vneg.s16 d21, d23\n\t"
"vzip.16 d20, d21\n\t"
// Write 8 elements to &freq_buf[j]
"vst1.16 {d20, d21}, [%[ptr_realImag1]]!\n\t"
// Interleave and reverse elements:
"vzip.16 d22, d23\n\t"
"vrev64.32 d18, d23\n\t"
"vrev64.32 d19, d22\n\t"
// Write 8 elements to &freq_buf[tmp16]
"vst1.16 {d18, d19}, [%[ptr_realImag2]], %[offset]\n\t"
// Specify constraints.
:[ptr_imag]"+r"(ptr_imag),
[ptr_real]"+r"(ptr_real),
[ptr_realImag1]"+r"(ptr_realImag1),
[ptr_realImag2]"+r"(ptr_realImag2)
:[offset]"r"(offset)
:"d18", "d19", "d20", "d21", "d22", "d23"
);
}
for (ptr_realImag2 += 6;
ptr_real <= &inst->real[inst->anaLen2];
ptr_real += 1, ptr_imag += 1, ptr_realImag1 += 2, ptr_realImag2 -= 2) {
*ptr_realImag1 = *ptr_real;
*(ptr_realImag1 + 1) = -(*ptr_imag);
*ptr_realImag2 = *ptr_real;
*(ptr_realImag2 + 1) = *ptr_imag;
}
freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
}
// Denormalize the input buffer.
inline void WebRtcNsx_Denormalize(NsxInst_t* inst, int16_t* in, int factor) {
int16_t* ptr_real = &inst->real[0];
int16_t* ptr_in = &in[0];
__asm__ __volatile__("vdup.32 q10, %0" ::
"r"((int32_t)(factor - inst->normData)) : "q10");
for (; ptr_real < &inst->real[inst->anaLen]; ) {
// Loop unrolled once. Both pointers are incremented.
__asm__ __volatile__(
// tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[j],
// factor - inst->normData);
"vld2.16 {d24, d25}, [%[ptr_in]]!\n\t"
"vmovl.s16 q12, d24\n\t"
"vshl.s32 q12, q10\n\t"
// inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0
"vqmovn.s32 d24, q12\n\t"
"vst1.16 d24, [%[ptr_real]]!\n\t"
// tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[j],
// factor - inst->normData);
"vld2.16 {d22, d23}, [%[ptr_in]]!\n\t"
"vmovl.s16 q11, d22\n\t"
"vshl.s32 q11, q10\n\t"
// inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0
"vqmovn.s32 d22, q11\n\t"
"vst1.16 d22, [%[ptr_real]]!\n\t"
// Specify constraints.
:[ptr_in]"+r"(ptr_in),
[ptr_real]"+r"(ptr_real)
:
:"d22", "d23", "d24", "d25"
);
}
}
// For the noise supress process, synthesis, read out fully processed segment,
// and update synthesis buffer.
void WebRtcNsx_SynthesisUpdate(NsxInst_t* inst,
int16_t* out_frame,
int16_t gain_factor) {
int16_t* ptr_real = &inst->real[0];
int16_t* ptr_syn = &inst->synthesisBuffer[0];
int16_t* ptr_window = &inst->window[0];
// synthesis
__asm__ __volatile__("vdup.16 d24, %0" : : "r"(gain_factor) : "d24");
// Loop unrolled once. All pointers are incremented in the assembly code.
for (; ptr_syn < &inst->synthesisBuffer[inst->anaLen]; ) {
__asm__ __volatile__(
// Load variables.
"vld1.16 d22, [%[ptr_real]]!\n\t"
"vld1.16 d23, [%[ptr_window]]!\n\t"
"vld1.16 d25, [%[ptr_syn]]\n\t"
// tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
// inst->window[i], inst->real[i], 14); // Q0, window in Q14
"vmull.s16 q11, d22, d23\n\t"
"vrshrn.i32 d22, q11, #14\n\t"
// tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13);
"vmull.s16 q11, d24, d22\n\t"
// tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0
"vqrshrn.s32 d22, q11, #13\n\t"
// inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16(
// inst->synthesisBuffer[i], tmp16b); // Q0
"vqadd.s16 d25, d22\n\t"
"vst1.16 d25, [%[ptr_syn]]!\n\t"
// Load variables.
"vld1.16 d26, [%[ptr_real]]!\n\t"
"vld1.16 d27, [%[ptr_window]]!\n\t"
"vld1.16 d28, [%[ptr_syn]]\n\t"
// tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
// inst->window[i], inst->real[i], 14); // Q0, window in Q14
"vmull.s16 q13, d26, d27\n\t"
"vrshrn.i32 d26, q13, #14\n\t"
// tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13);
"vmull.s16 q13, d24, d26\n\t"
// tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0
"vqrshrn.s32 d26, q13, #13\n\t"
// inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16(
// inst->synthesisBuffer[i], tmp16b); // Q0
"vqadd.s16 d28, d26\n\t"
"vst1.16 d28, [%[ptr_syn]]!\n\t"
// Specify constraints.
:[ptr_real]"+r"(ptr_real),
[ptr_window]"+r"(ptr_window),
[ptr_syn]"+r"(ptr_syn)
:
:"d22", "d23", "d24", "d25", "d26", "d27", "d28", "q11", "q12", "q13"
);
}
int16_t* ptr_out = &out_frame[0];
ptr_syn = &inst->synthesisBuffer[0];
// read out fully processed segment
for (; ptr_syn < &inst->synthesisBuffer[inst->blockLen10ms]; ) {
// Loop unrolled once. Both pointers are incremented in the assembly code.
__asm__ __volatile__(
// out_frame[i] = inst->synthesisBuffer[i]; // Q0
"vld1.16 {d22, d23}, [%[ptr_syn]]!\n\t"
"vld1.16 {d24, d25}, [%[ptr_syn]]!\n\t"
"vst1.16 {d22, d23}, [%[ptr_out]]!\n\t"
"vst1.16 {d24, d25}, [%[ptr_out]]!\n\t"
:[ptr_syn]"+r"(ptr_syn),
[ptr_out]"+r"(ptr_out)
:
:"d22", "d23", "d24", "d25"
);
}
// Update synthesis buffer.
// C code:
// WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer,
// inst->synthesisBuffer + inst->blockLen10ms,
// inst->anaLen - inst->blockLen10ms);
ptr_out = &inst->synthesisBuffer[0],
ptr_syn = &inst->synthesisBuffer[inst->blockLen10ms];
for (; ptr_syn < &inst->synthesisBuffer[inst->anaLen]; ) {
// Loop unrolled once. Both pointers are incremented in the assembly code.
__asm__ __volatile__(
"vld1.16 {d22, d23}, [%[ptr_syn]]!\n\t"
"vld1.16 {d24, d25}, [%[ptr_syn]]!\n\t"
"vst1.16 {d22, d23}, [%[ptr_out]]!\n\t"
"vst1.16 {d24, d25}, [%[ptr_out]]!\n\t"
:[ptr_syn]"+r"(ptr_syn),
[ptr_out]"+r"(ptr_out)
:
:"d22", "d23", "d24", "d25"
);
}
// C code:
// WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
// + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
__asm__ __volatile__("vdup.16 q10, %0" : : "r"(0) : "q10");
for (; ptr_out < &inst->synthesisBuffer[inst->anaLen]; ) {
// Loop unrolled once. Pointer is incremented in the assembly code.
__asm__ __volatile__(
"vst1.16 {d20, d21}, [%[ptr_out]]!\n\t"
"vst1.16 {d20, d21}, [%[ptr_out]]!\n\t"
:[ptr_out]"+r"(ptr_out)
:
:"d20", "d21"
);
}
}
// Update analysis buffer for lower band, and window data before FFT.
void WebRtcNsx_AnalysisUpdate(NsxInst_t* inst,
int16_t* out,
int16_t* new_speech) {
int16_t* ptr_ana = &inst->analysisBuffer[inst->blockLen10ms];
int16_t* ptr_out = &inst->analysisBuffer[0];
// For lower band update analysis buffer.
// WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer,
// inst->analysisBuffer + inst->blockLen10ms,
// inst->anaLen - inst->blockLen10ms);
for (; ptr_out < &inst->analysisBuffer[inst->anaLen - inst->blockLen10ms]; ) {
// Loop unrolled once, so both pointers are incremented by 8 twice.
__asm__ __volatile__(
"vld1.16 {d20, d21}, [%[ptr_ana]]!\n\t"
"vst1.16 {d20, d21}, [%[ptr_out]]!\n\t"
"vld1.16 {d22, d23}, [%[ptr_ana]]!\n\t"
"vst1.16 {d22, d23}, [%[ptr_out]]!\n\t"
:[ptr_ana]"+r"(ptr_ana),
[ptr_out]"+r"(ptr_out)
:
:"d20", "d21", "d22", "d23"
);
}
// WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer
// + inst->anaLen - inst->blockLen10ms, new_speech, inst->blockLen10ms);
for (ptr_ana = new_speech; ptr_out < &inst->analysisBuffer[inst->anaLen]; ) {
// Loop unrolled once, so both pointers are incremented by 8 twice.
__asm__ __volatile__(
"vld1.16 {d20, d21}, [%[ptr_ana]]!\n\t"
"vst1.16 {d20, d21}, [%[ptr_out]]!\n\t"
"vld1.16 {d22, d23}, [%[ptr_ana]]!\n\t"
"vst1.16 {d22, d23}, [%[ptr_out]]!\n\t"
:[ptr_ana]"+r"(ptr_ana),
[ptr_out]"+r"(ptr_out)
:
:"d20", "d21", "d22", "d23"
);
}
// Window data before FFT
int16_t* ptr_window = &inst->window[0];
ptr_out = &out[0];
ptr_ana = &inst->analysisBuffer[0];
for (; ptr_out < &out[inst->anaLen]; ) {
// Loop unrolled once, so all pointers are incremented by 4 twice.
__asm__ __volatile__(
"vld1.16 d20, [%[ptr_ana]]!\n\t"
"vld1.16 d21, [%[ptr_window]]!\n\t"
// out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
// inst->window[i], inst->analysisBuffer[i], 14); // Q0
"vmull.s16 q10, d20, d21\n\t"
"vrshrn.i32 d20, q10, #14\n\t"
"vst1.16 d20, [%[ptr_out]]!\n\t"
"vld1.16 d22, [%[ptr_ana]]!\n\t"
"vld1.16 d23, [%[ptr_window]]!\n\t"
// out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
// inst->window[i], inst->analysisBuffer[i], 14); // Q0
"vmull.s16 q11, d22, d23\n\t"
"vrshrn.i32 d22, q11, #14\n\t"
"vst1.16 d22, [%[ptr_out]]!\n\t"
// Specify constraints.
:[ptr_ana]"+r"(ptr_ana),
[ptr_window]"+r"(ptr_window),
[ptr_out]"+r"(ptr_out)
:
:"d20", "d21", "d22", "d23", "q10", "q11"
);
}
}
// Create a complex number buffer (out[]) as the intput (in[]) interleaved with
// zeros, and normalize it.
inline void WebRtcNsx_CreateComplexBuffer(NsxInst_t* inst,
int16_t* in,
int16_t* out) {
int16_t* ptr_out = &out[0];
int16_t* ptr_in = &in[0];
__asm__ __volatile__("vdup.16 d25, %0" : : "r"(0) : "d25");
__asm__ __volatile__("vdup.16 q10, %0" : : "r"(inst->normData) : "q10");
for (; ptr_in < &in[inst->anaLen]; ) {
// Loop unrolled once, so ptr_in is incremented by 8 twice,
// and ptr_out is incremented by 8 four times.
__asm__ __volatile__(
// out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData)
"vld1.16 {d22, d23}, [%[ptr_in]]!\n\t"
"vshl.s16 q11, q10\n\t"
"vmov d24, d23\n\t"
// out[j + 1] = 0; // Insert zeros in imaginary part
"vmov d23, d25\n\t"
"vst2.16 {d22, d23}, [%[ptr_out]]!\n\t"
"vst2.16 {d24, d25}, [%[ptr_out]]!\n\t"
// out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData)
"vld1.16 {d22, d23}, [%[ptr_in]]!\n\t"
"vshl.s16 q11, q10\n\t"
"vmov d24, d23\n\t"
// out[j + 1] = 0; // Insert zeros in imaginary part
"vmov d23, d25\n\t"
"vst2.16 {d22, d23}, [%[ptr_out]]!\n\t"
"vst2.16 {d24, d25}, [%[ptr_out]]!\n\t"
// Specify constraints.
:[ptr_in]"+r"(ptr_in),
[ptr_out]"+r"(ptr_out)
:
:"d22", "d23", "d24", "d25", "q10", "q11"
);
}
}
#endif // defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)