For commiting changes in CL 277002, due to file structure changes introduced during
the review of the code. Review URL: http://webrtc-codereview.appspot.com/246005 git-svn-id: http://webrtc.googlecode.com/svn/trunk@805 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
0d0037c2fd
commit
913644b92d
@ -107,7 +107,8 @@ LOCAL_MODULE_TAGS := tests
|
||||
LOCAL_CPP_EXTENSION := .cc
|
||||
LOCAL_SRC_FILES:= \
|
||||
$(call all-proto-files-under, test) \
|
||||
test/unit_test.cc
|
||||
test/unit_test.cc \
|
||||
../../../test/testsupport/fileutils.cc
|
||||
|
||||
# Flags passed to both C and C++ files.
|
||||
LOCAL_CFLAGS := \
|
||||
@ -118,6 +119,7 @@ LOCAL_C_INCLUDES := \
|
||||
$(LOCAL_PATH)/interface \
|
||||
$(LOCAL_PATH)/../interface \
|
||||
$(LOCAL_PATH)/../.. \
|
||||
$(LOCAL_PATH)/../../../test \
|
||||
$(LOCAL_PATH)/../../system_wrappers/interface \
|
||||
$(LOCAL_PATH)/../../common_audio/signal_processing_library/main/interface \
|
||||
external/gtest/include \
|
||||
|
@ -426,46 +426,6 @@ static const WebRtc_Word16 kDeterminantEstMatrix[66] = {
|
||||
355, 330
|
||||
};
|
||||
|
||||
void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t* inst, int offset) {
|
||||
WebRtc_Word32 tmp32no1 = 0;
|
||||
WebRtc_Word32 tmp32no2 = 0;
|
||||
|
||||
WebRtc_Word16 tmp16no1 = 0;
|
||||
WebRtc_Word16 tmp16no2 = 0;
|
||||
const WebRtc_Word16 kExp2Const = 11819; // Q13
|
||||
|
||||
int i = 0;
|
||||
|
||||
tmp16no2 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset,
|
||||
inst->magnLen);
|
||||
// Guarantee a Q-domain as high as possible and still fit in int16
|
||||
inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
kExp2Const, tmp16no2, 21);
|
||||
for (i = 0; i < inst->magnLen; i++) {
|
||||
// inst->quantile[i]=exp(inst->lquantile[offset+i]);
|
||||
// in Q21
|
||||
tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const,
|
||||
inst->noiseEstLogQuantile[offset + i]);
|
||||
tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
|
||||
tmp16no1 = -(WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21);
|
||||
tmp16no1 += 21;// shift 21 to get result in Q0
|
||||
tmp16no1 -= (WebRtc_Word16) inst->qNoise; //shift to get result in Q(qNoise)
|
||||
if (tmp16no1 > 0) {
|
||||
tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, tmp16no1);
|
||||
} else {
|
||||
tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, -tmp16no1);
|
||||
}
|
||||
// TODO(bjornv): Replace with WebRtcSpl_SatW32ToW16(...) when available.
|
||||
if (tmp32no1 > 32767) {
|
||||
tmp32no1 = 32767;
|
||||
} else if (tmp32no1 < -32768) {
|
||||
tmp32no1 = -32768;
|
||||
}
|
||||
tmp16no1 = (WebRtc_Word16) tmp32no1;
|
||||
inst->noiseEstQuantile[i] = tmp16no1;
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcNsx_CalcParametricNoiseEstimate(NsxInst_t* inst,
|
||||
WebRtc_Word16 pink_noise_exp_avg,
|
||||
WebRtc_Word32 pink_noise_num_avg,
|
||||
@ -675,128 +635,6 @@ int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID))
|
||||
void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise,
|
||||
WebRtc_Word16* qNoise) {
|
||||
WebRtc_Word32 numerator;
|
||||
|
||||
WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv, countProd, delta, zeros, frac;
|
||||
WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
|
||||
WebRtc_Word16 log2Const = 22713; // Q15
|
||||
WebRtc_Word16 widthFactor = 21845;
|
||||
|
||||
int i, s, offset;
|
||||
|
||||
numerator = FACTOR_Q16;
|
||||
|
||||
tabind = inst->stages - inst->normData;
|
||||
assert(tabind < 9);
|
||||
assert(tabind > -9);
|
||||
if (tabind < 0) {
|
||||
logval = -WebRtcNsx_kLogTable[-tabind];
|
||||
} else {
|
||||
logval = WebRtcNsx_kLogTable[tabind];
|
||||
}
|
||||
|
||||
// lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
|
||||
// magn is in Q(-stages), and the real lmagn values are:
|
||||
// real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
|
||||
// lmagn in Q8
|
||||
for (i = 0; i < inst->magnLen; i++) {
|
||||
if (magn[i]) {
|
||||
zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]);
|
||||
frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23);
|
||||
// log2(magn(i))
|
||||
assert(frac < 256);
|
||||
log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]);
|
||||
// log2(magn(i))*log(2)
|
||||
lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15);
|
||||
// + log(2^stages)
|
||||
lmagn[i] += logval;
|
||||
} else {
|
||||
lmagn[i] = logval;//0;
|
||||
}
|
||||
}
|
||||
|
||||
// loop over simultaneous estimates
|
||||
for (s = 0; s < SIMULT; s++) {
|
||||
offset = s * inst->magnLen;
|
||||
|
||||
// Get counter values from state
|
||||
counter = inst->noiseEstCounter[s];
|
||||
assert(counter < 201);
|
||||
countDiv = WebRtcNsx_kCounterDiv[counter];
|
||||
countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv);
|
||||
|
||||
// quant_est(...)
|
||||
for (i = 0; i < inst->magnLen; i++) {
|
||||
// compute delta
|
||||
if (inst->noiseEstDensity[offset + i] > 512) {
|
||||
delta = WebRtcSpl_DivW32W16ResW16(numerator,
|
||||
inst->noiseEstDensity[offset + i]);
|
||||
} else {
|
||||
delta = FACTOR_Q7;
|
||||
if (inst->blockIndex < END_STARTUP_LONG) {
|
||||
// Smaller step size during startup. This prevents from using
|
||||
// unrealistic values causing overflow.
|
||||
delta = FACTOR_Q7_STARTUP;
|
||||
}
|
||||
}
|
||||
|
||||
// update log quantile estimate
|
||||
tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
|
||||
if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
|
||||
// +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
|
||||
// CounterDiv=1/(inst->counter[s]+1) in Q15
|
||||
tmp16 += 2;
|
||||
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2);
|
||||
inst->noiseEstLogQuantile[offset + i] += tmp16no1;
|
||||
} else {
|
||||
tmp16 += 1;
|
||||
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1);
|
||||
// *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
|
||||
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1);
|
||||
inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
|
||||
if (inst->noiseEstLogQuantile[offset + i] < logval) {
|
||||
// This is the smallest fixed point representation we can
|
||||
// have, hence we limit the output.
|
||||
inst->noiseEstLogQuantile[offset + i] = logval;
|
||||
}
|
||||
}
|
||||
|
||||
// update density estimate
|
||||
if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
|
||||
< WIDTH_Q8) {
|
||||
tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
inst->noiseEstDensity[offset + i], countProd, 15);
|
||||
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(widthFactor,
|
||||
countDiv, 15);
|
||||
inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
|
||||
}
|
||||
} // end loop over magnitude spectrum
|
||||
|
||||
if (counter >= END_STARTUP_LONG) {
|
||||
inst->noiseEstCounter[s] = 0;
|
||||
if (inst->blockIndex >= END_STARTUP_LONG) {
|
||||
WebRtcNsx_UpdateNoiseEstimate(inst, offset);
|
||||
}
|
||||
}
|
||||
inst->noiseEstCounter[s]++;
|
||||
|
||||
} // end loop over simultaneous estimates
|
||||
|
||||
// Sequentially update the noise during startup
|
||||
if (inst->blockIndex < END_STARTUP_LONG) {
|
||||
WebRtcNsx_UpdateNoiseEstimate(inst, offset);
|
||||
}
|
||||
|
||||
for (i = 0; i < inst->magnLen; i++) {
|
||||
noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise)
|
||||
}
|
||||
(*qNoise) = (WebRtc_Word16)inst->qNoise;
|
||||
}
|
||||
#endif // !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID))
|
||||
|
||||
// Extract thresholds for feature parameters
|
||||
// histograms are computed over some window_size (given by window_pars)
|
||||
// thresholds and weights are extracted every window
|
||||
@ -1322,7 +1160,7 @@ void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst, WebRtc_UWord16* nonSpeechProbFin
|
||||
tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
|
||||
frac = (WebRtc_Word16)(tmpU32no1 & 0x00003fff); // Q14
|
||||
tmp16no2 += (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
tmp16no1, frac, 14);
|
||||
tmp16no1, frac, 14);
|
||||
if (tmpIndFX) {
|
||||
tmpIndFX = 8192 + tmp16no2;
|
||||
} else {
|
||||
@ -1343,7 +1181,7 @@ void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst, WebRtc_UWord16* nonSpeechProbFin
|
||||
// inst->priorNonSpeechProb += PRIOR_UPDATE * (indPriorNonSpeech - inst->priorNonSpeechProb);
|
||||
tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
|
||||
inst->priorNonSpeechProb += (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(
|
||||
PRIOR_UPDATE_Q14, tmp16, 14); // Q14
|
||||
PRIOR_UPDATE_Q14, tmp16, 14); // Q14
|
||||
|
||||
//final speech probability: combine prior model with LR factor:
|
||||
|
||||
@ -1424,18 +1262,9 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16*
|
||||
int right_shifts_in_magnU16 = 0;
|
||||
int right_shifts_in_initMagnEst = 0;
|
||||
|
||||
// For lower band do all processing
|
||||
// update analysis buffer for L band
|
||||
WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms,
|
||||
inst->anaLen - inst->blockLen10ms);
|
||||
WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms,
|
||||
speechFrame, inst->blockLen10ms);
|
||||
// Update analysis buffer for lower band, and window data before FFT.
|
||||
WebRtcNsx_AnalysisUpdate(inst, winData, speechFrame);
|
||||
|
||||
// Window data before FFT
|
||||
for (i = 0; i < inst->anaLen; i++) {
|
||||
winData[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
inst->window[i], inst->analysisBuffer[i], 14); // Q0
|
||||
}
|
||||
// Get input energy
|
||||
inst->energyIn = WebRtcSpl_Energy(winData, (int)inst->anaLen, &(inst->scaleEnergyIn));
|
||||
|
||||
@ -1459,11 +1288,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16*
|
||||
right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0);
|
||||
|
||||
// create realImag as winData interleaved with zeros (= imag. part), normalize it
|
||||
for (i = 0; i < inst->anaLen; i++) {
|
||||
j = WEBRTC_SPL_LSHIFT_W16(i, 1);
|
||||
realImag[j] = WEBRTC_SPL_LSHIFT_W16(winData[i], inst->normData); // Q(normData)
|
||||
realImag[j + 1] = 0; // Insert zeros in imaginary part
|
||||
}
|
||||
WebRtcNsx_CreateComplexBuffer(inst, winData, realImag);
|
||||
|
||||
// bit-reverse position of elements in array and FFT the array
|
||||
WebRtcSpl_ComplexBitReverse(realImag, inst->stages); // Q(normData-stages)
|
||||
@ -1492,7 +1317,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16*
|
||||
tmpU32no1 += (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(realImag[j + 1], realImag[j + 1]);
|
||||
inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages))
|
||||
|
||||
magnU16[i] = (WebRtc_UWord16)WebRtcSpl_Sqrt(tmpU32no1); // Q(normData-stages)
|
||||
magnU16[i] = (WebRtc_UWord16)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages)
|
||||
inst->sumMagn += (WebRtc_UWord32)magnU16[i]; // Q(normData-stages)
|
||||
}
|
||||
} else {
|
||||
@ -1541,7 +1366,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16*
|
||||
tmpU32no1 += (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(realImag[j + 1], realImag[j + 1]);
|
||||
inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages))
|
||||
|
||||
magnU16[i] = (WebRtc_UWord16)WebRtcSpl_Sqrt(tmpU32no1); // Q(normData-stages)
|
||||
magnU16[i] = (WebRtc_UWord16)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages)
|
||||
inst->sumMagn += (WebRtc_UWord32)magnU16[i]; // Q(normData-stages)
|
||||
|
||||
// Switch initMagnEst to Q(minNorm-stages)
|
||||
@ -1607,8 +1432,8 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16*
|
||||
tmp_1_w32 += WEBRTC_SPL_MUL_16_16_RSFT(kSumLogIndex[65], sum_log_i, 9);
|
||||
tmp_1_w32 -= WEBRTC_SPL_MUL_16_16_RSFT(kSumLogIndex[65], kSumLogIndex[65], 10);
|
||||
tmp_1_w32 -= WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)sum_log_i_square, 4);
|
||||
tmp_1_w32 -= WEBRTC_SPL_MUL_16_16_RSFT(
|
||||
(WebRtc_Word16)(inst->magnLen - kStartBand), kSumSquareLogIndex[65], 2);
|
||||
tmp_1_w32 -= WEBRTC_SPL_MUL_16_16_RSFT((WebRtc_Word16)
|
||||
(inst->magnLen - kStartBand), kSumSquareLogIndex[65], 2);
|
||||
matrix_determinant = (WebRtc_Word16)tmp_1_w32;
|
||||
sum_log_i -= kSumLogIndex[65]; // Q5
|
||||
sum_log_i_square -= kSumSquareLogIndex[65]; // Q2
|
||||
@ -1684,40 +1509,16 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) {
|
||||
inst->blockLen10ms);
|
||||
return;
|
||||
}
|
||||
// Filter the data in the frequency domain
|
||||
for (i = 0; i < inst->magnLen; i++) {
|
||||
inst->real[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(
|
||||
inst->real[i], (WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages)
|
||||
inst->imag[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(
|
||||
inst->imag[i], (WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages)
|
||||
}
|
||||
// back to time domain
|
||||
// Create spectrum
|
||||
realImag[0] = inst->real[0];
|
||||
realImag[1] = -inst->imag[0];
|
||||
for (i = 1; i < inst->anaLen2; i++) {
|
||||
j = WEBRTC_SPL_LSHIFT_W16(i, 1);
|
||||
tmp16no1 = (inst->anaLen << 1) - j;
|
||||
realImag[j] = inst->real[i];
|
||||
realImag[j + 1] = -inst->imag[i];
|
||||
realImag[tmp16no1] = inst->real[i];
|
||||
realImag[tmp16no1 + 1] = inst->imag[i];
|
||||
}
|
||||
realImag[inst->anaLen] = inst->real[inst->anaLen2];
|
||||
realImag[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
|
||||
|
||||
// Filter the data in the frequency domain, and create spectrum.
|
||||
WebRtcNsx_PrepareSpectrum(inst, realImag);
|
||||
|
||||
// bit-reverse position of elements in array and IFFT it
|
||||
WebRtcSpl_ComplexBitReverse(realImag, inst->stages);
|
||||
outCIFFT = WebRtcSpl_ComplexIFFT(realImag, inst->stages, 1);
|
||||
|
||||
for (i = 0; i < inst->anaLen; i++) {
|
||||
j = WEBRTC_SPL_LSHIFT_W16(i, 1);
|
||||
tmp32no1 = WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)realImag[j],
|
||||
outCIFFT - inst->normData);
|
||||
inst->real[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
|
||||
tmp32no1,
|
||||
WEBRTC_SPL_WORD16_MIN);
|
||||
}
|
||||
// Denormalize.
|
||||
WebRtcNsx_Denormalize(inst, realImag, outCIFFT);
|
||||
|
||||
//scale factor: only do it after END_STARTUP_LONG time
|
||||
gainFactor = 8192; // 8192 = Q13(1.0)
|
||||
@ -1754,26 +1555,8 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) {
|
||||
gainFactor = tmp16no1 + tmp16no2; // Q13
|
||||
} // out of flag_gain_map==1
|
||||
|
||||
// synthesis
|
||||
for (i = 0; i < inst->anaLen; i++) {
|
||||
tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(inst->window[i],
|
||||
inst->real[i], 14); // Q0, window in Q14
|
||||
tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16no1, gainFactor, 13); // Q0
|
||||
// Down shift with rounding
|
||||
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, tmp32no1,
|
||||
WEBRTC_SPL_WORD16_MIN); // Q0
|
||||
inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16(inst->synthesisBuffer[i], tmp16no2); // Q0
|
||||
}
|
||||
|
||||
// read out fully processed segment
|
||||
for (i = 0; i < inst->blockLen10ms; i++) {
|
||||
outFrame[i] = inst->synthesisBuffer[i]; // Q0
|
||||
}
|
||||
// update synthesis buffer
|
||||
WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms,
|
||||
inst->anaLen - inst->blockLen10ms);
|
||||
WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms,
|
||||
inst->blockLen10ms);
|
||||
// Synthesis, read out fully processed segment, and update synthesis buffer.
|
||||
WebRtcNsx_SynthesisUpdate(inst, outFrame, gainFactor);
|
||||
}
|
||||
|
||||
int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFrameHB,
|
||||
@ -1815,6 +1598,12 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
|
||||
int flag, sign;
|
||||
int q_domain_to_use = 0;
|
||||
|
||||
// Code for ARMv7-Neon platform assumes the following:
|
||||
assert(inst->anaLen % 16 == 0);
|
||||
assert(inst->anaLen2 % 8 == 0);
|
||||
assert(inst->blockLen10ms % 16 == 0);
|
||||
assert(inst->magnLen == inst->anaLen2 + 1);
|
||||
|
||||
#ifdef NS_FILEDEBUG
|
||||
fwrite(spframe, sizeof(short), inst->blockLen10ms, inst->infile);
|
||||
#endif
|
||||
@ -2080,8 +1869,8 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
|
||||
if (WebRtcSpl_NormU32(tmpU32no3) < norm32no1) {
|
||||
inst->featureSpecDiff = 0x007FFFFF;
|
||||
} else {
|
||||
inst->featureSpecDiff = WEBRTC_SPL_MIN(
|
||||
0x007FFFFF, WEBRTC_SPL_LSHIFT_U32(tmpU32no3, norm32no1));
|
||||
inst->featureSpecDiff = WEBRTC_SPL_MIN(0x007FFFFF,
|
||||
WEBRTC_SPL_LSHIFT_U32(tmpU32no3, norm32no1));
|
||||
}
|
||||
}
|
||||
|
||||
@ -2317,7 +2106,8 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
|
||||
}
|
||||
avgProbSpeechHB = (WebRtc_Word16)(4096
|
||||
- WEBRTC_SPL_RSHIFT_U16(tmpU16no1, inst->stages - 7)); // Q12
|
||||
avgFilterGainHB = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, inst->stages - 3); // Q14
|
||||
avgFilterGainHB = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32(
|
||||
tmpU32no1, inst->stages - 3); // Q14
|
||||
|
||||
// // original FLOAT code
|
||||
// // gain based on speech probability:
|
||||
@ -2368,3 +2158,264 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID))
|
||||
|
||||
// Update the noise estimation information.
|
||||
static void UpdateNoiseEstimate(NsxInst_t* inst, int offset) {
|
||||
WebRtc_Word32 tmp32no1 = 0;
|
||||
WebRtc_Word32 tmp32no2 = 0;
|
||||
WebRtc_Word16 tmp16 = 0;
|
||||
const WebRtc_Word16 kExp2Const = 11819; // Q13
|
||||
|
||||
int i = 0;
|
||||
|
||||
tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset,
|
||||
inst->magnLen);
|
||||
// Guarantee a Q-domain as high as possible and still fit in int16
|
||||
inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
kExp2Const, tmp16, 21);
|
||||
for (i = 0; i < inst->magnLen; i++) {
|
||||
// inst->quantile[i]=exp(inst->lquantile[offset+i]);
|
||||
// in Q21
|
||||
tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const,
|
||||
inst->noiseEstLogQuantile[offset + i]);
|
||||
tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
|
||||
tmp16 = (WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21);
|
||||
tmp16 -= 21;// shift 21 to get result in Q0
|
||||
tmp16 += (WebRtc_Word16) inst->qNoise; //shift to get result in Q(qNoise)
|
||||
if (tmp16 < 0) {
|
||||
tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, -tmp16);
|
||||
} else {
|
||||
tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, tmp16);
|
||||
}
|
||||
inst->noiseEstQuantile[i] = WebRtcSpl_SatW32ToW16(tmp32no1);
|
||||
}
|
||||
}
|
||||
|
||||
// Noise Estimation
|
||||
void WebRtcNsx_NoiseEstimation(NsxInst_t* inst,
|
||||
uint16_t* magn,
|
||||
uint32_t* noise,
|
||||
int16_t* q_noise) {
|
||||
WebRtc_Word32 numerator = FACTOR_Q16;
|
||||
WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv;
|
||||
WebRtc_Word16 countProd, delta, zeros, frac;
|
||||
WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
|
||||
const int16_t log2_const = 22713; // Q15
|
||||
const int16_t width_factor = 21845;
|
||||
|
||||
int i, s, offset;
|
||||
|
||||
tabind = inst->stages - inst->normData;
|
||||
assert(tabind < 9);
|
||||
assert(tabind > -9);
|
||||
if (tabind < 0) {
|
||||
logval = -WebRtcNsx_kLogTable[-tabind];
|
||||
} else {
|
||||
logval = WebRtcNsx_kLogTable[tabind];
|
||||
}
|
||||
|
||||
// lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
|
||||
// magn is in Q(-stages), and the real lmagn values are:
|
||||
// real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
|
||||
// lmagn in Q8
|
||||
for (i = 0; i < inst->magnLen; i++) {
|
||||
if (magn[i]) {
|
||||
zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]);
|
||||
frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros)
|
||||
& 0x7FFFFFFF) >> 23);
|
||||
// log2(magn(i))
|
||||
assert(frac < 256);
|
||||
log2 = (WebRtc_Word16)(((31 - zeros) << 8)
|
||||
+ WebRtcNsx_kLogTableFrac[frac]);
|
||||
// log2(magn(i))*log(2)
|
||||
lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2_const, 15);
|
||||
// + log(2^stages)
|
||||
lmagn[i] += logval;
|
||||
} else {
|
||||
lmagn[i] = logval;//0;
|
||||
}
|
||||
}
|
||||
|
||||
// loop over simultaneous estimates
|
||||
for (s = 0; s < SIMULT; s++) {
|
||||
offset = s * inst->magnLen;
|
||||
|
||||
// Get counter values from state
|
||||
counter = inst->noiseEstCounter[s];
|
||||
assert(counter < 201);
|
||||
countDiv = WebRtcNsx_kCounterDiv[counter];
|
||||
countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv);
|
||||
|
||||
// quant_est(...)
|
||||
for (i = 0; i < inst->magnLen; i++) {
|
||||
// compute delta
|
||||
if (inst->noiseEstDensity[offset + i] > 512) {
|
||||
delta = WebRtcSpl_DivW32W16ResW16(numerator,
|
||||
inst->noiseEstDensity[offset + i]);
|
||||
} else {
|
||||
delta = FACTOR_Q7;
|
||||
if (inst->blockIndex < END_STARTUP_LONG) {
|
||||
// Smaller step size during startup. This prevents from using
|
||||
// unrealistic values causing overflow.
|
||||
delta = FACTOR_Q7_STARTUP;
|
||||
}
|
||||
}
|
||||
|
||||
// update log quantile estimate
|
||||
tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
|
||||
if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
|
||||
// +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
|
||||
// CounterDiv=1/(inst->counter[s]+1) in Q15
|
||||
tmp16 += 2;
|
||||
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2);
|
||||
inst->noiseEstLogQuantile[offset + i] += tmp16no1;
|
||||
} else {
|
||||
tmp16 += 1;
|
||||
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1);
|
||||
// *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
|
||||
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1);
|
||||
inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
|
||||
if (inst->noiseEstLogQuantile[offset + i] < logval) {
|
||||
// This is the smallest fixed point representation we can
|
||||
// have, hence we limit the output.
|
||||
inst->noiseEstLogQuantile[offset + i] = logval;
|
||||
}
|
||||
}
|
||||
|
||||
// update density estimate
|
||||
if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
|
||||
< WIDTH_Q8) {
|
||||
tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
inst->noiseEstDensity[offset + i], countProd, 15);
|
||||
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
width_factor, countDiv, 15);
|
||||
inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
|
||||
}
|
||||
} // end loop over magnitude spectrum
|
||||
|
||||
if (counter >= END_STARTUP_LONG) {
|
||||
inst->noiseEstCounter[s] = 0;
|
||||
if (inst->blockIndex >= END_STARTUP_LONG) {
|
||||
UpdateNoiseEstimate(inst, offset);
|
||||
}
|
||||
}
|
||||
inst->noiseEstCounter[s]++;
|
||||
|
||||
} // end loop over simultaneous estimates
|
||||
|
||||
// Sequentially update the noise during startup
|
||||
if (inst->blockIndex < END_STARTUP_LONG) {
|
||||
UpdateNoiseEstimate(inst, offset);
|
||||
}
|
||||
|
||||
for (i = 0; i < inst->magnLen; i++) {
|
||||
noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise)
|
||||
}
|
||||
(*q_noise) = (WebRtc_Word16)inst->qNoise;
|
||||
}
|
||||
|
||||
// Filter the data in the frequency domain, and create spectrum.
|
||||
void WebRtcNsx_PrepareSpectrum(NsxInst_t* inst, int16_t* freq_buf) {
|
||||
int i = 0, j = 0;
|
||||
int16_t tmp16 = 0;
|
||||
|
||||
for (i = 0; i < inst->magnLen; i++) {
|
||||
inst->real[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(inst->real[i],
|
||||
(WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages)
|
||||
inst->imag[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(inst->imag[i],
|
||||
(WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages)
|
||||
}
|
||||
|
||||
freq_buf[0] = inst->real[0];
|
||||
freq_buf[1] = -inst->imag[0];
|
||||
for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
|
||||
tmp16 = (inst->anaLen << 1) - j;
|
||||
freq_buf[j] = inst->real[i];
|
||||
freq_buf[j + 1] = -inst->imag[i];
|
||||
freq_buf[tmp16] = inst->real[i];
|
||||
freq_buf[tmp16 + 1] = inst->imag[i];
|
||||
}
|
||||
freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
|
||||
freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
|
||||
}
|
||||
|
||||
// Denormalize the input buffer.
|
||||
inline void WebRtcNsx_Denormalize(NsxInst_t* inst, int16_t* in, int factor) {
|
||||
int i = 0, j = 0;
|
||||
int32_t tmp32 = 0;
|
||||
for (i = 0, j = 0; i < inst->anaLen; i += 1, j += 2) {
|
||||
tmp32 = WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)in[j],
|
||||
factor - inst->normData);
|
||||
inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0
|
||||
}
|
||||
}
|
||||
|
||||
// For the noise supression process, synthesis, read out fully processed
|
||||
// segment, and update synthesis buffer.
|
||||
void WebRtcNsx_SynthesisUpdate(NsxInst_t* inst,
|
||||
int16_t* out_frame,
|
||||
int16_t gain_factor) {
|
||||
int i = 0;
|
||||
int16_t tmp16a = 0;
|
||||
int16_t tmp16b = 0;
|
||||
int32_t tmp32 = 0;
|
||||
|
||||
// synthesis
|
||||
for (i = 0; i < inst->anaLen; i++) {
|
||||
tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
inst->window[i], inst->real[i], 14); // Q0, window in Q14
|
||||
tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13); // Q0
|
||||
// Down shift with rounding
|
||||
tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0
|
||||
inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16(inst->synthesisBuffer[i],
|
||||
tmp16b); // Q0
|
||||
}
|
||||
|
||||
// read out fully processed segment
|
||||
for (i = 0; i < inst->blockLen10ms; i++) {
|
||||
out_frame[i] = inst->synthesisBuffer[i]; // Q0
|
||||
}
|
||||
|
||||
// update synthesis buffer
|
||||
WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer,
|
||||
inst->synthesisBuffer + inst->blockLen10ms,
|
||||
inst->anaLen - inst->blockLen10ms);
|
||||
WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
|
||||
+ inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
|
||||
}
|
||||
|
||||
// Update analysis buffer for lower band, and window data before FFT.
|
||||
void WebRtcNsx_AnalysisUpdate(NsxInst_t* inst,
|
||||
int16_t* out,
|
||||
int16_t* new_speech) {
|
||||
int i = 0;
|
||||
|
||||
// For lower band update analysis buffer.
|
||||
WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer,
|
||||
inst->analysisBuffer + inst->blockLen10ms,
|
||||
inst->anaLen - inst->blockLen10ms);
|
||||
WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer
|
||||
+ inst->anaLen - inst->blockLen10ms, new_speech, inst->blockLen10ms);
|
||||
|
||||
// Window data before FFT.
|
||||
for (i = 0; i < inst->anaLen; i++) {
|
||||
out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
inst->window[i], inst->analysisBuffer[i], 14); // Q0
|
||||
}
|
||||
}
|
||||
|
||||
// Create a complex number buffer (out[]) as the intput (in[]) interleaved with
|
||||
// zeros, and normalize it.
|
||||
inline void WebRtcNsx_CreateComplexBuffer(NsxInst_t* inst,
|
||||
int16_t* in,
|
||||
int16_t* out) {
|
||||
int i = 0, j = 0;
|
||||
for (i = 0, j = 0; i < inst->anaLen; i += 1, j += 2) {
|
||||
out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData)
|
||||
out[j + 1] = 0; // Insert zeros in imaginary part
|
||||
}
|
||||
}
|
||||
|
||||
#endif // !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID))
|
||||
|
@ -129,14 +129,14 @@ WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs);
|
||||
* This changes the aggressiveness of the noise suppression method.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Instance that should be initialized
|
||||
* - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB)
|
||||
* - inst : Instance that should be initialized
|
||||
* - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB)
|
||||
*
|
||||
* Output:
|
||||
* - NS_inst : Initialized instance
|
||||
* - inst : Initialized instance
|
||||
*
|
||||
* Return value : 0 - Ok
|
||||
* -1 - Error
|
||||
* Return value : 0 - Ok
|
||||
* -1 - Error
|
||||
*/
|
||||
int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode);
|
||||
|
||||
@ -158,16 +158,47 @@ int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode);
|
||||
* Return value : 0 - OK
|
||||
* -1 - Error
|
||||
*/
|
||||
int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* inFrameLow, short* inFrameHigh,
|
||||
short* outFrameLow, short* outFrameHigh);
|
||||
int WebRtcNsx_ProcessCore(NsxInst_t* inst,
|
||||
short* inFrameLow,
|
||||
short* inFrameHigh,
|
||||
short* outFrameLow,
|
||||
short* outFrameHigh);
|
||||
|
||||
/****************************************************************************
|
||||
* Internal functions and variable declarations shared with optimized code.
|
||||
*/
|
||||
void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t* inst, int offset);
|
||||
|
||||
void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise,
|
||||
WebRtc_Word16* qNoise);
|
||||
// Noise Estimation.
|
||||
void WebRtcNsx_NoiseEstimation(NsxInst_t* inst,
|
||||
uint16_t* magn,
|
||||
uint32_t* noise,
|
||||
int16_t* q_noise);
|
||||
|
||||
// Filter the data in the frequency domain, and create spectrum.
|
||||
void WebRtcNsx_PrepareSpectrum(NsxInst_t* inst,
|
||||
int16_t* freq_buff);
|
||||
|
||||
// For the noise supression process, synthesis, read out fully processed
|
||||
// segment, and update synthesis buffer.
|
||||
void WebRtcNsx_SynthesisUpdate(NsxInst_t* inst,
|
||||
int16_t* out_frame,
|
||||
int16_t gain_factor);
|
||||
|
||||
// Update analysis buffer for lower band, and window data before FFT.
|
||||
void WebRtcNsx_AnalysisUpdate(NsxInst_t* inst,
|
||||
int16_t* out,
|
||||
int16_t* new_speech);
|
||||
|
||||
// Denormalize the input buffer.
|
||||
inline void WebRtcNsx_Denormalize(NsxInst_t* inst,
|
||||
int16_t* in,
|
||||
int factor);
|
||||
|
||||
// Create a complex number buffer, as the intput interleaved with zeros,
|
||||
// and normalize it.
|
||||
inline void WebRtcNsx_CreateComplexBuffer(NsxInst_t* inst,
|
||||
int16_t* in,
|
||||
int16_t* out);
|
||||
|
||||
extern const WebRtc_Word16 WebRtcNsx_kLogTable[9];
|
||||
extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256];
|
||||
|
@ -15,19 +15,98 @@
|
||||
#include <arm_neon.h>
|
||||
#include <assert.h>
|
||||
|
||||
void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise,
|
||||
WebRtc_Word16* qNoise) {
|
||||
WebRtc_Word32 numerator;
|
||||
// Update the noise estimation information.
|
||||
static void UpdateNoiseEstimate(NsxInst_t* inst, int offset) {
|
||||
int i = 0;
|
||||
const int16_t kExp2Const = 11819; // Q13
|
||||
int16_t* ptr_noiseEstLogQuantile = NULL;
|
||||
int16_t* ptr_noiseEstQuantile = NULL;
|
||||
int16x4_t kExp2Const16x4 = vdup_n_s16(kExp2Const);
|
||||
int32x4_t twentyOne32x4 = vdupq_n_s32(21);
|
||||
int32x4_t constA32x4 = vdupq_n_s32(0x1fffff);
|
||||
int32x4_t constB32x4 = vdupq_n_s32(0x200000);
|
||||
|
||||
WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv, countProd, delta, zeros, frac;
|
||||
WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
|
||||
WebRtc_Word16 log2Const = 22713;
|
||||
WebRtc_Word16 widthFactor = 21845;
|
||||
int16_t tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset,
|
||||
inst->magnLen);
|
||||
|
||||
// Guarantee a Q-domain as high as possible and still fit in int16
|
||||
inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(kExp2Const,
|
||||
tmp16,
|
||||
21);
|
||||
|
||||
int32x4_t qNoise32x4 = vdupq_n_s32(inst->qNoise);
|
||||
|
||||
for (ptr_noiseEstLogQuantile = &inst->noiseEstLogQuantile[offset],
|
||||
ptr_noiseEstQuantile = &inst->noiseEstQuantile[0];
|
||||
ptr_noiseEstQuantile < &inst->noiseEstQuantile[inst->magnLen - 3];
|
||||
ptr_noiseEstQuantile += 4, ptr_noiseEstLogQuantile += 4) {
|
||||
|
||||
// tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const,
|
||||
// inst->noiseEstLogQuantile[offset + i]);
|
||||
int16x4_t v16x4 = vld1_s16(ptr_noiseEstLogQuantile);
|
||||
int32x4_t v32x4B = vmull_s16(v16x4, kExp2Const16x4);
|
||||
|
||||
// tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
|
||||
int32x4_t v32x4A = vandq_s32(v32x4B, constA32x4);
|
||||
v32x4A = vorrq_s32(v32x4A, constB32x4);
|
||||
|
||||
// tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21);
|
||||
v32x4B = vshrq_n_s32(v32x4B, 21);
|
||||
|
||||
// tmp16 -= 21;// shift 21 to get result in Q0
|
||||
v32x4B = vsubq_s32(v32x4B, twentyOne32x4);
|
||||
|
||||
// tmp16 += (int16_t) inst->qNoise;
|
||||
// shift to get result in Q(qNoise)
|
||||
v32x4B = vaddq_s32(v32x4B, qNoise32x4);
|
||||
|
||||
// if (tmp16 < 0) {
|
||||
// tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, -tmp16);
|
||||
// } else {
|
||||
// tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, tmp16);
|
||||
// }
|
||||
v32x4B = vshlq_s32(v32x4A, v32x4B);
|
||||
|
||||
// tmp16 = WebRtcSpl_SatW32ToW16(tmp32no1);
|
||||
v16x4 = vqmovn_s32(v32x4B);
|
||||
|
||||
//inst->noiseEstQuantile[i] = tmp16;
|
||||
vst1_s16(ptr_noiseEstQuantile, v16x4);
|
||||
}
|
||||
|
||||
// Last iteration:
|
||||
|
||||
// inst->quantile[i]=exp(inst->lquantile[offset+i]);
|
||||
// in Q21
|
||||
int32_t tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const,
|
||||
*ptr_noiseEstLogQuantile);
|
||||
int32_t tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
|
||||
|
||||
tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21);
|
||||
tmp16 -= 21;// shift 21 to get result in Q0
|
||||
tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise)
|
||||
if (tmp16 < 0) {
|
||||
tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, -tmp16);
|
||||
} else {
|
||||
tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, tmp16);
|
||||
}
|
||||
*ptr_noiseEstQuantile = WebRtcSpl_SatW32ToW16(tmp32no1);
|
||||
}
|
||||
|
||||
// Noise Estimation
|
||||
void WebRtcNsx_NoiseEstimation(NsxInst_t* inst,
|
||||
uint16_t* magn,
|
||||
uint32_t* noise,
|
||||
int16_t* q_noise) {
|
||||
int32_t numerator = FACTOR_Q16;
|
||||
int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv;
|
||||
int16_t countProd, delta, zeros, frac;
|
||||
int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
|
||||
const int16_t log2_const = 22713;
|
||||
const int16_t width_factor = 21845;
|
||||
|
||||
int i, s, offset;
|
||||
|
||||
numerator = FACTOR_Q16;
|
||||
|
||||
tabind = inst->stages - inst->normData;
|
||||
assert(tabind < 9);
|
||||
assert(tabind > -9);
|
||||
@ -45,13 +124,15 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo
|
||||
// lmagn in Q8
|
||||
for (i = 0; i < inst->magnLen; i++) {
|
||||
if (magn[i]) {
|
||||
zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]);
|
||||
frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23);
|
||||
zeros = WebRtcSpl_NormU32((uint32_t)magn[i]);
|
||||
frac = (int16_t)((((uint32_t)magn[i] << zeros)
|
||||
& 0x7FFFFFFF) >> 23);
|
||||
assert(frac < 256);
|
||||
// log2(magn(i))
|
||||
log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]);
|
||||
log2 = (int16_t)(((31 - zeros) << 8)
|
||||
+ WebRtcNsx_kLogTableFrac[frac]);
|
||||
// log2(magn(i))*log(2)
|
||||
lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15);
|
||||
lmagn[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2_const, 15);
|
||||
// + log(2^stages)
|
||||
lmagn[i] += logval;
|
||||
} else {
|
||||
@ -61,9 +142,9 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo
|
||||
|
||||
int16x4_t Q3_16x4 = vdup_n_s16(3);
|
||||
int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8);
|
||||
int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(widthFactor);
|
||||
int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(width_factor);
|
||||
|
||||
WebRtc_Word16 factor = FACTOR_Q7;
|
||||
int16_t factor = FACTOR_Q7;
|
||||
if (inst->blockIndex < END_STARTUP_LONG)
|
||||
factor = FACTOR_Q7_STARTUP;
|
||||
|
||||
@ -75,10 +156,10 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo
|
||||
counter = inst->noiseEstCounter[s];
|
||||
assert(counter < 201);
|
||||
countDiv = WebRtcNsx_kCounterDiv[counter];
|
||||
countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv);
|
||||
countProd = (int16_t)WEBRTC_SPL_MUL_16_16(counter, countDiv);
|
||||
|
||||
// quant_est(...)
|
||||
WebRtc_Word16 deltaBuff[8];
|
||||
int16_t deltaBuff[8];
|
||||
int16x4_t tmp16x4_0;
|
||||
int16x4_t tmp16x4_1;
|
||||
int16x4_t countDiv_16x4 = vdup_n_s16(countDiv);
|
||||
@ -103,13 +184,13 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo
|
||||
for (j = 0; j < 8; j++) {
|
||||
if (inst->noiseEstDensity[offset + i + j] > 512) {
|
||||
deltaBuff[j] = WebRtcSpl_DivW32W16ResW16(
|
||||
numerator, inst->noiseEstDensity[offset + i + j]);
|
||||
numerator, inst->noiseEstDensity[offset + i + j]);
|
||||
}
|
||||
}
|
||||
|
||||
// Update log quantile estimate
|
||||
|
||||
// tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
|
||||
// tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
|
||||
tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[0]), countDiv_16x4);
|
||||
tmp16x4_1 = vshrn_n_s32(tmp32x4, 14);
|
||||
tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[4]), countDiv_16x4);
|
||||
@ -142,17 +223,19 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo
|
||||
tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep
|
||||
tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0);
|
||||
|
||||
// logval is the smallest fixed point representation we can have. Values below
|
||||
// that will correspond to values in the interval [0, 1], which can't possibly
|
||||
// occur.
|
||||
// logval is the smallest fixed point representation we can have. Values
|
||||
// below that will correspond to values in the interval [0, 1], which
|
||||
// can't possibly occur.
|
||||
tmp16x8_0 = vmaxq_s16(tmp16x8_0, logval_16x8);
|
||||
|
||||
// Do the if-else branches:
|
||||
tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines
|
||||
tmp16x8_5 = vsubq_s16(tmp16x8_3, tmp16x8_2);
|
||||
__asm__("vcgt.s16 %q0, %q1, #0"::"w"(tmp16x8_4), "w"(tmp16x8_5));
|
||||
__asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_1), "w"(tmp16x8_4));
|
||||
__asm__("vbif %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_0), "w"(tmp16x8_4));
|
||||
__asm__("vbit %q0, %q1, %q2"::
|
||||
"w"(tmp16x8_2), "w"(tmp16x8_1), "w"(tmp16x8_4));
|
||||
__asm__("vbif %q0, %q1, %q2"::
|
||||
"w"(tmp16x8_2), "w"(tmp16x8_0), "w"(tmp16x8_4));
|
||||
vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2);
|
||||
|
||||
// Update density estimate
|
||||
@ -165,61 +248,61 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo
|
||||
tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2);
|
||||
tmp16x8_3 = vabsq_s16(tmp16x8_3);
|
||||
tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3);
|
||||
__asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_1), "w"(tmp16x8_0), "w"(tmp16x8_4));
|
||||
__asm__("vbit %q0, %q1, %q2"::
|
||||
"w"(tmp16x8_1), "w"(tmp16x8_0), "w"(tmp16x8_4));
|
||||
vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1);
|
||||
} // End loop over magnitude spectrum
|
||||
|
||||
for (; i < inst->magnLen; i++) {
|
||||
// compute delta
|
||||
if (inst->noiseEstDensity[offset + i] > 512) {
|
||||
delta = WebRtcSpl_DivW32W16ResW16(numerator,
|
||||
inst->noiseEstDensity[offset + i]);
|
||||
} else {
|
||||
delta = FACTOR_Q7;
|
||||
if (inst->blockIndex < END_STARTUP_LONG) {
|
||||
// Smaller step size during startup. This prevents from using
|
||||
// unrealistic values causing overflow.
|
||||
delta = FACTOR_Q7_STARTUP;
|
||||
}
|
||||
// Last iteration over magnitude spectrum:
|
||||
// compute delta
|
||||
if (inst->noiseEstDensity[offset + i] > 512) {
|
||||
delta = WebRtcSpl_DivW32W16ResW16(numerator,
|
||||
inst->noiseEstDensity[offset + i]);
|
||||
} else {
|
||||
delta = FACTOR_Q7;
|
||||
if (inst->blockIndex < END_STARTUP_LONG) {
|
||||
// Smaller step size during startup. This prevents from using
|
||||
// unrealistic values causing overflow.
|
||||
delta = FACTOR_Q7_STARTUP;
|
||||
}
|
||||
}
|
||||
// update log quantile estimate
|
||||
tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
|
||||
if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
|
||||
// +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
|
||||
// CounterDiv=1/(inst->counter[s]+1) in Q15
|
||||
tmp16 += 2;
|
||||
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2);
|
||||
inst->noiseEstLogQuantile[offset + i] += tmp16no1;
|
||||
} else {
|
||||
tmp16 += 1;
|
||||
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1);
|
||||
// *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
|
||||
tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1);
|
||||
inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
|
||||
if (inst->noiseEstLogQuantile[offset + i] < logval) {
|
||||
// logval is the smallest fixed point representation we can have.
|
||||
// Values below that will correspond to values in the interval
|
||||
// [0, 1], which can't possibly occur.
|
||||
inst->noiseEstLogQuantile[offset + i] = logval;
|
||||
}
|
||||
}
|
||||
|
||||
// update log quantile estimate
|
||||
tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
|
||||
if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
|
||||
// +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
|
||||
// CounterDiv=1/(inst->counter[s]+1) in Q15
|
||||
tmp16 += 2;
|
||||
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2);
|
||||
inst->noiseEstLogQuantile[offset + i] += tmp16no1;
|
||||
} else {
|
||||
tmp16 += 1;
|
||||
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1);
|
||||
// *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
|
||||
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1);
|
||||
inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
|
||||
if (inst->noiseEstLogQuantile[offset + i] < logval) {
|
||||
// logval is the smallest fixed point representation we can have.
|
||||
// Values below that will correspond to values in the interval
|
||||
// [0, 1], which can't possibly occur.
|
||||
inst->noiseEstLogQuantile[offset + i] = logval;
|
||||
}
|
||||
}
|
||||
// update density estimate
|
||||
if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
|
||||
< WIDTH_Q8) {
|
||||
tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
inst->noiseEstDensity[offset + i], countProd, 15);
|
||||
tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
width_factor, countDiv, 15);
|
||||
inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
|
||||
}
|
||||
|
||||
// update density estimate
|
||||
if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
|
||||
< WIDTH_Q8) {
|
||||
tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
inst->noiseEstDensity[offset + i], countProd, 15);
|
||||
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
widthFactor, countDiv, 15);
|
||||
inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
|
||||
}
|
||||
} // end loop over magnitude spectrum
|
||||
|
||||
if (counter >= END_STARTUP_LONG) {
|
||||
inst->noiseEstCounter[s] = 0;
|
||||
if (inst->blockIndex >= END_STARTUP_LONG) {
|
||||
WebRtcNsx_UpdateNoiseEstimate(inst, offset);
|
||||
UpdateNoiseEstimate(inst, offset);
|
||||
}
|
||||
}
|
||||
inst->noiseEstCounter[s]++;
|
||||
@ -228,13 +311,417 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo
|
||||
|
||||
// Sequentially update the noise during startup
|
||||
if (inst->blockIndex < END_STARTUP_LONG) {
|
||||
WebRtcNsx_UpdateNoiseEstimate(inst, offset);
|
||||
UpdateNoiseEstimate(inst, offset);
|
||||
}
|
||||
|
||||
for (i = 0; i < inst->magnLen; i++) {
|
||||
noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise)
|
||||
noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise)
|
||||
}
|
||||
(*qNoise) = (WebRtc_Word16)inst->qNoise;
|
||||
(*q_noise) = (int16_t)inst->qNoise;
|
||||
}
|
||||
|
||||
// Filter the data in the frequency domain, and create spectrum.
|
||||
void WebRtcNsx_PrepareSpectrum(NsxInst_t* inst, int16_t* freq_buf) {
|
||||
|
||||
// (1) Filtering.
|
||||
|
||||
// Fixed point C code for the next block is as follows:
|
||||
// for (i = 0; i < inst->magnLen; i++) {
|
||||
// inst->real[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(inst->real[i],
|
||||
// (int16_t)(inst->noiseSupFilter[i]), 14); // Q(normData-stages)
|
||||
// inst->imag[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(inst->imag[i],
|
||||
// (int16_t)(inst->noiseSupFilter[i]), 14); // Q(normData-stages)
|
||||
// }
|
||||
|
||||
int16_t* ptr_real = &inst->real[0];
|
||||
int16_t* ptr_imag = &inst->imag[0];
|
||||
uint16_t* ptr_noiseSupFilter = &inst->noiseSupFilter[0];
|
||||
|
||||
// Filter the rest in the frequency domain.
|
||||
for (; ptr_real < &inst->real[inst->magnLen - 1]; ) {
|
||||
// Loop unrolled once. Both pointers are incremented by 4 twice.
|
||||
__asm__ __volatile__(
|
||||
"vld1.16 d20, [%[ptr_real]]\n\t"
|
||||
"vld1.16 d22, [%[ptr_imag]]\n\t"
|
||||
"vld1.16 d23, [%[ptr_noiseSupFilter]]!\n\t"
|
||||
"vmull.s16 q10, d20, d23\n\t"
|
||||
"vmull.s16 q11, d22, d23\n\t"
|
||||
"vshrn.s32 d20, q10, #14\n\t"
|
||||
"vshrn.s32 d22, q11, #14\n\t"
|
||||
"vst1.16 d20, [%[ptr_real]]!\n\t"
|
||||
"vst1.16 d22, [%[ptr_imag]]!\n\t"
|
||||
|
||||
"vld1.16 d18, [%[ptr_real]]\n\t"
|
||||
"vld1.16 d24, [%[ptr_imag]]\n\t"
|
||||
"vld1.16 d25, [%[ptr_noiseSupFilter]]!\n\t"
|
||||
"vmull.s16 q9, d18, d25\n\t"
|
||||
"vmull.s16 q12, d24, d25\n\t"
|
||||
"vshrn.s32 d18, q9, #14\n\t"
|
||||
"vshrn.s32 d24, q12, #14\n\t"
|
||||
"vst1.16 d18, [%[ptr_real]]!\n\t"
|
||||
"vst1.16 d24, [%[ptr_imag]]!\n\t"
|
||||
|
||||
// Specify constraints.
|
||||
:[ptr_imag]"+r"(ptr_imag),
|
||||
[ptr_real]"+r"(ptr_real),
|
||||
[ptr_noiseSupFilter]"+r"(ptr_noiseSupFilter)
|
||||
:
|
||||
:"d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25",
|
||||
"q9", "q10", "q11", "q12"
|
||||
);
|
||||
}
|
||||
|
||||
// Filter the last pair of elements in the frequency domain.
|
||||
*ptr_real = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(*ptr_real,
|
||||
(int16_t)(*ptr_noiseSupFilter), 14); // Q(normData-stages)
|
||||
*ptr_imag = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(*ptr_imag,
|
||||
(int16_t)(*ptr_noiseSupFilter), 14); // Q(normData-stages)
|
||||
|
||||
// (2) Create spectrum.
|
||||
|
||||
// Fixed point C code for the rest of the function is as follows:
|
||||
// freq_buf[0] = inst->real[0];
|
||||
// freq_buf[1] = -inst->imag[0];
|
||||
// for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
|
||||
// tmp16 = (inst->anaLen << 1) - j;
|
||||
// freq_buf[j] = inst->real[i];
|
||||
// freq_buf[j + 1] = -inst->imag[i];
|
||||
// freq_buf[tmp16] = inst->real[i];
|
||||
// freq_buf[tmp16 + 1] = inst->imag[i];
|
||||
// }
|
||||
// freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
|
||||
// freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
|
||||
|
||||
freq_buf[0] = inst->real[0];
|
||||
freq_buf[1] = -inst->imag[0];
|
||||
|
||||
int offset = -16;
|
||||
int16_t* ptr_realImag1 = &freq_buf[2];
|
||||
int16_t* ptr_realImag2 = ptr_realImag2 = &freq_buf[(inst->anaLen << 1) - 8];
|
||||
ptr_real = &inst->real[1];
|
||||
ptr_imag = &inst->imag[1];
|
||||
for (; ptr_real < &inst->real[inst->anaLen2 - 11]; ) {
|
||||
// Loop unrolled once. All pointers are incremented twice.
|
||||
__asm__ __volatile__(
|
||||
"vld1.16 d22, [%[ptr_real]]!\n\t"
|
||||
"vld1.16 d23, [%[ptr_imag]]!\n\t"
|
||||
// Negate and interleave:
|
||||
"vmov.s16 d20, d22\n\t"
|
||||
"vneg.s16 d21, d23\n\t"
|
||||
"vzip.16 d20, d21\n\t"
|
||||
// Write 8 elements to &freq_buf[j]
|
||||
"vst1.16 {d20, d21}, [%[ptr_realImag1]]!\n\t"
|
||||
// Interleave and reverse elements:
|
||||
"vzip.16 d22, d23\n\t"
|
||||
"vrev64.32 d18, d23\n\t"
|
||||
"vrev64.32 d19, d22\n\t"
|
||||
// Write 8 elements to &freq_buf[tmp16]
|
||||
"vst1.16 {d18, d19}, [%[ptr_realImag2]], %[offset]\n\t"
|
||||
|
||||
"vld1.16 d22, [%[ptr_real]]!\n\t"
|
||||
"vld1.16 d23, [%[ptr_imag]]!\n\t"
|
||||
// Negate and interleave:
|
||||
"vmov.s16 d20, d22\n\t"
|
||||
"vneg.s16 d21, d23\n\t"
|
||||
"vzip.16 d20, d21\n\t"
|
||||
// Write 8 elements to &freq_buf[j]
|
||||
"vst1.16 {d20, d21}, [%[ptr_realImag1]]!\n\t"
|
||||
// Interleave and reverse elements:
|
||||
"vzip.16 d22, d23\n\t"
|
||||
"vrev64.32 d18, d23\n\t"
|
||||
"vrev64.32 d19, d22\n\t"
|
||||
// Write 8 elements to &freq_buf[tmp16]
|
||||
"vst1.16 {d18, d19}, [%[ptr_realImag2]], %[offset]\n\t"
|
||||
|
||||
// Specify constraints.
|
||||
:[ptr_imag]"+r"(ptr_imag),
|
||||
[ptr_real]"+r"(ptr_real),
|
||||
[ptr_realImag1]"+r"(ptr_realImag1),
|
||||
[ptr_realImag2]"+r"(ptr_realImag2)
|
||||
:[offset]"r"(offset)
|
||||
:"d18", "d19", "d20", "d21", "d22", "d23"
|
||||
);
|
||||
}
|
||||
for (ptr_realImag2 += 6;
|
||||
ptr_real <= &inst->real[inst->anaLen2];
|
||||
ptr_real += 1, ptr_imag += 1, ptr_realImag1 += 2, ptr_realImag2 -= 2) {
|
||||
*ptr_realImag1 = *ptr_real;
|
||||
*(ptr_realImag1 + 1) = -(*ptr_imag);
|
||||
*ptr_realImag2 = *ptr_real;
|
||||
*(ptr_realImag2 + 1) = *ptr_imag;
|
||||
}
|
||||
|
||||
freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
|
||||
freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
|
||||
}
|
||||
|
||||
// Denormalize the input buffer.
|
||||
inline void WebRtcNsx_Denormalize(NsxInst_t* inst, int16_t* in, int factor) {
|
||||
int16_t* ptr_real = &inst->real[0];
|
||||
int16_t* ptr_in = &in[0];
|
||||
|
||||
__asm__ __volatile__("vdup.32 q10, %0" ::
|
||||
"r"((int32_t)(factor - inst->normData)) : "q10");
|
||||
for (; ptr_real < &inst->real[inst->anaLen]; ) {
|
||||
|
||||
// Loop unrolled once. Both pointers are incremented.
|
||||
__asm__ __volatile__(
|
||||
// tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[j],
|
||||
// factor - inst->normData);
|
||||
"vld2.16 {d24, d25}, [%[ptr_in]]!\n\t"
|
||||
"vmovl.s16 q12, d24\n\t"
|
||||
"vshl.s32 q12, q10\n\t"
|
||||
// inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0
|
||||
"vqmovn.s32 d24, q12\n\t"
|
||||
"vst1.16 d24, [%[ptr_real]]!\n\t"
|
||||
|
||||
// tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[j],
|
||||
// factor - inst->normData);
|
||||
"vld2.16 {d22, d23}, [%[ptr_in]]!\n\t"
|
||||
"vmovl.s16 q11, d22\n\t"
|
||||
"vshl.s32 q11, q10\n\t"
|
||||
// inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0
|
||||
"vqmovn.s32 d22, q11\n\t"
|
||||
"vst1.16 d22, [%[ptr_real]]!\n\t"
|
||||
|
||||
// Specify constraints.
|
||||
:[ptr_in]"+r"(ptr_in),
|
||||
[ptr_real]"+r"(ptr_real)
|
||||
:
|
||||
:"d22", "d23", "d24", "d25"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// For the noise supress process, synthesis, read out fully processed segment,
|
||||
// and update synthesis buffer.
|
||||
void WebRtcNsx_SynthesisUpdate(NsxInst_t* inst,
|
||||
int16_t* out_frame,
|
||||
int16_t gain_factor) {
|
||||
int16_t* ptr_real = &inst->real[0];
|
||||
int16_t* ptr_syn = &inst->synthesisBuffer[0];
|
||||
int16_t* ptr_window = &inst->window[0];
|
||||
|
||||
// synthesis
|
||||
__asm__ __volatile__("vdup.16 d24, %0" : : "r"(gain_factor) : "d24");
|
||||
// Loop unrolled once. All pointers are incremented in the assembly code.
|
||||
for (; ptr_syn < &inst->synthesisBuffer[inst->anaLen]; ) {
|
||||
__asm__ __volatile__(
|
||||
// Load variables.
|
||||
"vld1.16 d22, [%[ptr_real]]!\n\t"
|
||||
"vld1.16 d23, [%[ptr_window]]!\n\t"
|
||||
"vld1.16 d25, [%[ptr_syn]]\n\t"
|
||||
// tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
// inst->window[i], inst->real[i], 14); // Q0, window in Q14
|
||||
"vmull.s16 q11, d22, d23\n\t"
|
||||
"vrshrn.i32 d22, q11, #14\n\t"
|
||||
// tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13);
|
||||
"vmull.s16 q11, d24, d22\n\t"
|
||||
// tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0
|
||||
"vqrshrn.s32 d22, q11, #13\n\t"
|
||||
// inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16(
|
||||
// inst->synthesisBuffer[i], tmp16b); // Q0
|
||||
"vqadd.s16 d25, d22\n\t"
|
||||
"vst1.16 d25, [%[ptr_syn]]!\n\t"
|
||||
|
||||
// Load variables.
|
||||
"vld1.16 d26, [%[ptr_real]]!\n\t"
|
||||
"vld1.16 d27, [%[ptr_window]]!\n\t"
|
||||
"vld1.16 d28, [%[ptr_syn]]\n\t"
|
||||
// tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
// inst->window[i], inst->real[i], 14); // Q0, window in Q14
|
||||
"vmull.s16 q13, d26, d27\n\t"
|
||||
"vrshrn.i32 d26, q13, #14\n\t"
|
||||
// tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13);
|
||||
"vmull.s16 q13, d24, d26\n\t"
|
||||
// tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0
|
||||
"vqrshrn.s32 d26, q13, #13\n\t"
|
||||
// inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16(
|
||||
// inst->synthesisBuffer[i], tmp16b); // Q0
|
||||
"vqadd.s16 d28, d26\n\t"
|
||||
"vst1.16 d28, [%[ptr_syn]]!\n\t"
|
||||
|
||||
// Specify constraints.
|
||||
:[ptr_real]"+r"(ptr_real),
|
||||
[ptr_window]"+r"(ptr_window),
|
||||
[ptr_syn]"+r"(ptr_syn)
|
||||
:
|
||||
:"d22", "d23", "d24", "d25", "d26", "d27", "d28", "q11", "q12", "q13"
|
||||
);
|
||||
}
|
||||
|
||||
int16_t* ptr_out = &out_frame[0];
|
||||
ptr_syn = &inst->synthesisBuffer[0];
|
||||
// read out fully processed segment
|
||||
for (; ptr_syn < &inst->synthesisBuffer[inst->blockLen10ms]; ) {
|
||||
// Loop unrolled once. Both pointers are incremented in the assembly code.
|
||||
__asm__ __volatile__(
|
||||
// out_frame[i] = inst->synthesisBuffer[i]; // Q0
|
||||
"vld1.16 {d22, d23}, [%[ptr_syn]]!\n\t"
|
||||
"vld1.16 {d24, d25}, [%[ptr_syn]]!\n\t"
|
||||
"vst1.16 {d22, d23}, [%[ptr_out]]!\n\t"
|
||||
"vst1.16 {d24, d25}, [%[ptr_out]]!\n\t"
|
||||
:[ptr_syn]"+r"(ptr_syn),
|
||||
[ptr_out]"+r"(ptr_out)
|
||||
:
|
||||
:"d22", "d23", "d24", "d25"
|
||||
);
|
||||
}
|
||||
|
||||
// Update synthesis buffer.
|
||||
// C code:
|
||||
// WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer,
|
||||
// inst->synthesisBuffer + inst->blockLen10ms,
|
||||
// inst->anaLen - inst->blockLen10ms);
|
||||
ptr_out = &inst->synthesisBuffer[0],
|
||||
ptr_syn = &inst->synthesisBuffer[inst->blockLen10ms];
|
||||
for (; ptr_syn < &inst->synthesisBuffer[inst->anaLen]; ) {
|
||||
// Loop unrolled once. Both pointers are incremented in the assembly code.
|
||||
__asm__ __volatile__(
|
||||
"vld1.16 {d22, d23}, [%[ptr_syn]]!\n\t"
|
||||
"vld1.16 {d24, d25}, [%[ptr_syn]]!\n\t"
|
||||
"vst1.16 {d22, d23}, [%[ptr_out]]!\n\t"
|
||||
"vst1.16 {d24, d25}, [%[ptr_out]]!\n\t"
|
||||
:[ptr_syn]"+r"(ptr_syn),
|
||||
[ptr_out]"+r"(ptr_out)
|
||||
:
|
||||
:"d22", "d23", "d24", "d25"
|
||||
);
|
||||
}
|
||||
|
||||
// C code:
|
||||
// WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
|
||||
// + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
|
||||
__asm__ __volatile__("vdup.16 q10, %0" : : "r"(0) : "q10");
|
||||
for (; ptr_out < &inst->synthesisBuffer[inst->anaLen]; ) {
|
||||
// Loop unrolled once. Pointer is incremented in the assembly code.
|
||||
__asm__ __volatile__(
|
||||
"vst1.16 {d20, d21}, [%[ptr_out]]!\n\t"
|
||||
"vst1.16 {d20, d21}, [%[ptr_out]]!\n\t"
|
||||
:[ptr_out]"+r"(ptr_out)
|
||||
:
|
||||
:"d20", "d21"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Update analysis buffer for lower band, and window data before FFT.
|
||||
void WebRtcNsx_AnalysisUpdate(NsxInst_t* inst,
|
||||
int16_t* out,
|
||||
int16_t* new_speech) {
|
||||
|
||||
int16_t* ptr_ana = &inst->analysisBuffer[inst->blockLen10ms];
|
||||
int16_t* ptr_out = &inst->analysisBuffer[0];
|
||||
|
||||
// For lower band update analysis buffer.
|
||||
// WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer,
|
||||
// inst->analysisBuffer + inst->blockLen10ms,
|
||||
// inst->anaLen - inst->blockLen10ms);
|
||||
for (; ptr_out < &inst->analysisBuffer[inst->anaLen - inst->blockLen10ms]; ) {
|
||||
// Loop unrolled once, so both pointers are incremented by 8 twice.
|
||||
__asm__ __volatile__(
|
||||
"vld1.16 {d20, d21}, [%[ptr_ana]]!\n\t"
|
||||
"vst1.16 {d20, d21}, [%[ptr_out]]!\n\t"
|
||||
"vld1.16 {d22, d23}, [%[ptr_ana]]!\n\t"
|
||||
"vst1.16 {d22, d23}, [%[ptr_out]]!\n\t"
|
||||
:[ptr_ana]"+r"(ptr_ana),
|
||||
[ptr_out]"+r"(ptr_out)
|
||||
:
|
||||
:"d20", "d21", "d22", "d23"
|
||||
);
|
||||
}
|
||||
|
||||
// WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer
|
||||
// + inst->anaLen - inst->blockLen10ms, new_speech, inst->blockLen10ms);
|
||||
for (ptr_ana = new_speech; ptr_out < &inst->analysisBuffer[inst->anaLen]; ) {
|
||||
// Loop unrolled once, so both pointers are incremented by 8 twice.
|
||||
__asm__ __volatile__(
|
||||
"vld1.16 {d20, d21}, [%[ptr_ana]]!\n\t"
|
||||
"vst1.16 {d20, d21}, [%[ptr_out]]!\n\t"
|
||||
"vld1.16 {d22, d23}, [%[ptr_ana]]!\n\t"
|
||||
"vst1.16 {d22, d23}, [%[ptr_out]]!\n\t"
|
||||
:[ptr_ana]"+r"(ptr_ana),
|
||||
[ptr_out]"+r"(ptr_out)
|
||||
:
|
||||
:"d20", "d21", "d22", "d23"
|
||||
);
|
||||
}
|
||||
|
||||
// Window data before FFT
|
||||
int16_t* ptr_window = &inst->window[0];
|
||||
ptr_out = &out[0];
|
||||
ptr_ana = &inst->analysisBuffer[0];
|
||||
for (; ptr_out < &out[inst->anaLen]; ) {
|
||||
|
||||
// Loop unrolled once, so all pointers are incremented by 4 twice.
|
||||
__asm__ __volatile__(
|
||||
"vld1.16 d20, [%[ptr_ana]]!\n\t"
|
||||
"vld1.16 d21, [%[ptr_window]]!\n\t"
|
||||
// out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
// inst->window[i], inst->analysisBuffer[i], 14); // Q0
|
||||
"vmull.s16 q10, d20, d21\n\t"
|
||||
"vrshrn.i32 d20, q10, #14\n\t"
|
||||
"vst1.16 d20, [%[ptr_out]]!\n\t"
|
||||
|
||||
"vld1.16 d22, [%[ptr_ana]]!\n\t"
|
||||
"vld1.16 d23, [%[ptr_window]]!\n\t"
|
||||
// out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
// inst->window[i], inst->analysisBuffer[i], 14); // Q0
|
||||
"vmull.s16 q11, d22, d23\n\t"
|
||||
"vrshrn.i32 d22, q11, #14\n\t"
|
||||
"vst1.16 d22, [%[ptr_out]]!\n\t"
|
||||
|
||||
// Specify constraints.
|
||||
:[ptr_ana]"+r"(ptr_ana),
|
||||
[ptr_window]"+r"(ptr_window),
|
||||
[ptr_out]"+r"(ptr_out)
|
||||
:
|
||||
:"d20", "d21", "d22", "d23", "q10", "q11"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Create a complex number buffer (out[]) as the intput (in[]) interleaved with
|
||||
// zeros, and normalize it.
|
||||
inline void WebRtcNsx_CreateComplexBuffer(NsxInst_t* inst,
|
||||
int16_t* in,
|
||||
int16_t* out) {
|
||||
int16_t* ptr_out = &out[0];
|
||||
int16_t* ptr_in = &in[0];
|
||||
|
||||
__asm__ __volatile__("vdup.16 d25, %0" : : "r"(0) : "d25");
|
||||
__asm__ __volatile__("vdup.16 q10, %0" : : "r"(inst->normData) : "q10");
|
||||
for (; ptr_in < &in[inst->anaLen]; ) {
|
||||
|
||||
// Loop unrolled once, so ptr_in is incremented by 8 twice,
|
||||
// and ptr_out is incremented by 8 four times.
|
||||
__asm__ __volatile__(
|
||||
// out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData)
|
||||
"vld1.16 {d22, d23}, [%[ptr_in]]!\n\t"
|
||||
"vshl.s16 q11, q10\n\t"
|
||||
"vmov d24, d23\n\t"
|
||||
|
||||
// out[j + 1] = 0; // Insert zeros in imaginary part
|
||||
"vmov d23, d25\n\t"
|
||||
"vst2.16 {d22, d23}, [%[ptr_out]]!\n\t"
|
||||
"vst2.16 {d24, d25}, [%[ptr_out]]!\n\t"
|
||||
|
||||
// out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData)
|
||||
"vld1.16 {d22, d23}, [%[ptr_in]]!\n\t"
|
||||
"vshl.s16 q11, q10\n\t"
|
||||
"vmov d24, d23\n\t"
|
||||
|
||||
// out[j + 1] = 0; // Insert zeros in imaginary part
|
||||
"vmov d23, d25\n\t"
|
||||
"vst2.16 {d22, d23}, [%[ptr_out]]!\n\t"
|
||||
"vst2.16 {d24, d25}, [%[ptr_out]]!\n\t"
|
||||
|
||||
// Specify constraints.
|
||||
:[ptr_in]"+r"(ptr_in),
|
||||
[ptr_out]"+r"(ptr_out)
|
||||
:
|
||||
:"d22", "d23", "d24", "d25", "q10", "q11"
|
||||
);
|
||||
}
|
||||
}
|
||||
#endif // defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)
|
||||
|
Loading…
x
Reference in New Issue
Block a user