diff --git a/src/modules/audio_processing/Android.mk b/src/modules/audio_processing/Android.mk index 4c9ebaf09..5957fb364 100644 --- a/src/modules/audio_processing/Android.mk +++ b/src/modules/audio_processing/Android.mk @@ -107,7 +107,8 @@ LOCAL_MODULE_TAGS := tests LOCAL_CPP_EXTENSION := .cc LOCAL_SRC_FILES:= \ $(call all-proto-files-under, test) \ - test/unit_test.cc + test/unit_test.cc \ + ../../../test/testsupport/fileutils.cc # Flags passed to both C and C++ files. LOCAL_CFLAGS := \ @@ -118,6 +119,7 @@ LOCAL_C_INCLUDES := \ $(LOCAL_PATH)/interface \ $(LOCAL_PATH)/../interface \ $(LOCAL_PATH)/../.. \ + $(LOCAL_PATH)/../../../test \ $(LOCAL_PATH)/../../system_wrappers/interface \ $(LOCAL_PATH)/../../common_audio/signal_processing_library/main/interface \ external/gtest/include \ diff --git a/src/modules/audio_processing/ns/nsx_core.c b/src/modules/audio_processing/ns/nsx_core.c index 967d8499b..616e62631 100644 --- a/src/modules/audio_processing/ns/nsx_core.c +++ b/src/modules/audio_processing/ns/nsx_core.c @@ -426,46 +426,6 @@ static const WebRtc_Word16 kDeterminantEstMatrix[66] = { 355, 330 }; -void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t* inst, int offset) { - WebRtc_Word32 tmp32no1 = 0; - WebRtc_Word32 tmp32no2 = 0; - - WebRtc_Word16 tmp16no1 = 0; - WebRtc_Word16 tmp16no2 = 0; - const WebRtc_Word16 kExp2Const = 11819; // Q13 - - int i = 0; - - tmp16no2 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset, - inst->magnLen); - // Guarantee a Q-domain as high as possible and still fit in int16 - inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( - kExp2Const, tmp16no2, 21); - for (i = 0; i < inst->magnLen; i++) { - // inst->quantile[i]=exp(inst->lquantile[offset+i]); - // in Q21 - tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const, - inst->noiseEstLogQuantile[offset + i]); - tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac - tmp16no1 = -(WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21); - tmp16no1 += 21;// shift 21 to get result in Q0 - tmp16no1 -= (WebRtc_Word16) inst->qNoise; //shift to get result in Q(qNoise) - if (tmp16no1 > 0) { - tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, tmp16no1); - } else { - tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, -tmp16no1); - } - // TODO(bjornv): Replace with WebRtcSpl_SatW32ToW16(...) when available. - if (tmp32no1 > 32767) { - tmp32no1 = 32767; - } else if (tmp32no1 < -32768) { - tmp32no1 = -32768; - } - tmp16no1 = (WebRtc_Word16) tmp32no1; - inst->noiseEstQuantile[i] = tmp16no1; - } -} - void WebRtcNsx_CalcParametricNoiseEstimate(NsxInst_t* inst, WebRtc_Word16 pink_noise_exp_avg, WebRtc_Word32 pink_noise_num_avg, @@ -675,128 +635,6 @@ int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode) { return 0; } -#if !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)) -void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise, - WebRtc_Word16* qNoise) { - WebRtc_Word32 numerator; - - WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv, countProd, delta, zeros, frac; - WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2; - WebRtc_Word16 log2Const = 22713; // Q15 - WebRtc_Word16 widthFactor = 21845; - - int i, s, offset; - - numerator = FACTOR_Q16; - - tabind = inst->stages - inst->normData; - assert(tabind < 9); - assert(tabind > -9); - if (tabind < 0) { - logval = -WebRtcNsx_kLogTable[-tabind]; - } else { - logval = WebRtcNsx_kLogTable[tabind]; - } - - // lmagn(i)=log(magn(i))=log(2)*log2(magn(i)) - // magn is in Q(-stages), and the real lmagn values are: - // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages) - // lmagn in Q8 - for (i = 0; i < inst->magnLen; i++) { - if (magn[i]) { - zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]); - frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23); - // log2(magn(i)) - assert(frac < 256); - log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); - // log2(magn(i))*log(2) - lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15); - // + log(2^stages) - lmagn[i] += logval; - } else { - lmagn[i] = logval;//0; - } - } - - // loop over simultaneous estimates - for (s = 0; s < SIMULT; s++) { - offset = s * inst->magnLen; - - // Get counter values from state - counter = inst->noiseEstCounter[s]; - assert(counter < 201); - countDiv = WebRtcNsx_kCounterDiv[counter]; - countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv); - - // quant_est(...) - for (i = 0; i < inst->magnLen; i++) { - // compute delta - if (inst->noiseEstDensity[offset + i] > 512) { - delta = WebRtcSpl_DivW32W16ResW16(numerator, - inst->noiseEstDensity[offset + i]); - } else { - delta = FACTOR_Q7; - if (inst->blockIndex < END_STARTUP_LONG) { - // Smaller step size during startup. This prevents from using - // unrealistic values causing overflow. - delta = FACTOR_Q7_STARTUP; - } - } - - // update log quantile estimate - tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); - if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) { - // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 - // CounterDiv=1/(inst->counter[s]+1) in Q15 - tmp16 += 2; - tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2); - inst->noiseEstLogQuantile[offset + i] += tmp16no1; - } else { - tmp16 += 1; - tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1); - // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1); - inst->noiseEstLogQuantile[offset + i] -= tmp16no2; - if (inst->noiseEstLogQuantile[offset + i] < logval) { - // This is the smallest fixed point representation we can - // have, hence we limit the output. - inst->noiseEstLogQuantile[offset + i] = logval; - } - } - - // update density estimate - if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) - < WIDTH_Q8) { - tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( - inst->noiseEstDensity[offset + i], countProd, 15); - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(widthFactor, - countDiv, 15); - inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; - } - } // end loop over magnitude spectrum - - if (counter >= END_STARTUP_LONG) { - inst->noiseEstCounter[s] = 0; - if (inst->blockIndex >= END_STARTUP_LONG) { - WebRtcNsx_UpdateNoiseEstimate(inst, offset); - } - } - inst->noiseEstCounter[s]++; - - } // end loop over simultaneous estimates - - // Sequentially update the noise during startup - if (inst->blockIndex < END_STARTUP_LONG) { - WebRtcNsx_UpdateNoiseEstimate(inst, offset); - } - - for (i = 0; i < inst->magnLen; i++) { - noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise) - } - (*qNoise) = (WebRtc_Word16)inst->qNoise; -} -#endif // !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)) - // Extract thresholds for feature parameters // histograms are computed over some window_size (given by window_pars) // thresholds and weights are extracted every window @@ -1322,7 +1160,7 @@ void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst, WebRtc_UWord16* nonSpeechProbFin tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; frac = (WebRtc_Word16)(tmpU32no1 & 0x00003fff); // Q14 tmp16no2 += (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( - tmp16no1, frac, 14); + tmp16no1, frac, 14); if (tmpIndFX) { tmpIndFX = 8192 + tmp16no2; } else { @@ -1343,7 +1181,7 @@ void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst, WebRtc_UWord16* nonSpeechProbFin // inst->priorNonSpeechProb += PRIOR_UPDATE * (indPriorNonSpeech - inst->priorNonSpeechProb); tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14 inst->priorNonSpeechProb += (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT( - PRIOR_UPDATE_Q14, tmp16, 14); // Q14 + PRIOR_UPDATE_Q14, tmp16, 14); // Q14 //final speech probability: combine prior model with LR factor: @@ -1424,18 +1262,9 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* int right_shifts_in_magnU16 = 0; int right_shifts_in_initMagnEst = 0; - // For lower band do all processing - // update analysis buffer for L band - WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, - inst->anaLen - inst->blockLen10ms); - WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, - speechFrame, inst->blockLen10ms); + // Update analysis buffer for lower band, and window data before FFT. + WebRtcNsx_AnalysisUpdate(inst, winData, speechFrame); - // Window data before FFT - for (i = 0; i < inst->anaLen; i++) { - winData[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( - inst->window[i], inst->analysisBuffer[i], 14); // Q0 - } // Get input energy inst->energyIn = WebRtcSpl_Energy(winData, (int)inst->anaLen, &(inst->scaleEnergyIn)); @@ -1459,11 +1288,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0); // create realImag as winData interleaved with zeros (= imag. part), normalize it - for (i = 0; i < inst->anaLen; i++) { - j = WEBRTC_SPL_LSHIFT_W16(i, 1); - realImag[j] = WEBRTC_SPL_LSHIFT_W16(winData[i], inst->normData); // Q(normData) - realImag[j + 1] = 0; // Insert zeros in imaginary part - } + WebRtcNsx_CreateComplexBuffer(inst, winData, realImag); // bit-reverse position of elements in array and FFT the array WebRtcSpl_ComplexBitReverse(realImag, inst->stages); // Q(normData-stages) @@ -1492,7 +1317,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* tmpU32no1 += (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(realImag[j + 1], realImag[j + 1]); inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages)) - magnU16[i] = (WebRtc_UWord16)WebRtcSpl_Sqrt(tmpU32no1); // Q(normData-stages) + magnU16[i] = (WebRtc_UWord16)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages) inst->sumMagn += (WebRtc_UWord32)magnU16[i]; // Q(normData-stages) } } else { @@ -1541,7 +1366,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* tmpU32no1 += (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(realImag[j + 1], realImag[j + 1]); inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages)) - magnU16[i] = (WebRtc_UWord16)WebRtcSpl_Sqrt(tmpU32no1); // Q(normData-stages) + magnU16[i] = (WebRtc_UWord16)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages) inst->sumMagn += (WebRtc_UWord32)magnU16[i]; // Q(normData-stages) // Switch initMagnEst to Q(minNorm-stages) @@ -1607,8 +1432,8 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* tmp_1_w32 += WEBRTC_SPL_MUL_16_16_RSFT(kSumLogIndex[65], sum_log_i, 9); tmp_1_w32 -= WEBRTC_SPL_MUL_16_16_RSFT(kSumLogIndex[65], kSumLogIndex[65], 10); tmp_1_w32 -= WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)sum_log_i_square, 4); - tmp_1_w32 -= WEBRTC_SPL_MUL_16_16_RSFT( - (WebRtc_Word16)(inst->magnLen - kStartBand), kSumSquareLogIndex[65], 2); + tmp_1_w32 -= WEBRTC_SPL_MUL_16_16_RSFT((WebRtc_Word16) + (inst->magnLen - kStartBand), kSumSquareLogIndex[65], 2); matrix_determinant = (WebRtc_Word16)tmp_1_w32; sum_log_i -= kSumLogIndex[65]; // Q5 sum_log_i_square -= kSumSquareLogIndex[65]; // Q2 @@ -1684,40 +1509,16 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) { inst->blockLen10ms); return; } - // Filter the data in the frequency domain - for (i = 0; i < inst->magnLen; i++) { - inst->real[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT( - inst->real[i], (WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) - inst->imag[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT( - inst->imag[i], (WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) - } - // back to time domain - // Create spectrum - realImag[0] = inst->real[0]; - realImag[1] = -inst->imag[0]; - for (i = 1; i < inst->anaLen2; i++) { - j = WEBRTC_SPL_LSHIFT_W16(i, 1); - tmp16no1 = (inst->anaLen << 1) - j; - realImag[j] = inst->real[i]; - realImag[j + 1] = -inst->imag[i]; - realImag[tmp16no1] = inst->real[i]; - realImag[tmp16no1 + 1] = inst->imag[i]; - } - realImag[inst->anaLen] = inst->real[inst->anaLen2]; - realImag[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; + + // Filter the data in the frequency domain, and create spectrum. + WebRtcNsx_PrepareSpectrum(inst, realImag); // bit-reverse position of elements in array and IFFT it WebRtcSpl_ComplexBitReverse(realImag, inst->stages); outCIFFT = WebRtcSpl_ComplexIFFT(realImag, inst->stages, 1); - for (i = 0; i < inst->anaLen; i++) { - j = WEBRTC_SPL_LSHIFT_W16(i, 1); - tmp32no1 = WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)realImag[j], - outCIFFT - inst->normData); - inst->real[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, - tmp32no1, - WEBRTC_SPL_WORD16_MIN); - } + // Denormalize. + WebRtcNsx_Denormalize(inst, realImag, outCIFFT); //scale factor: only do it after END_STARTUP_LONG time gainFactor = 8192; // 8192 = Q13(1.0) @@ -1754,26 +1555,8 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) { gainFactor = tmp16no1 + tmp16no2; // Q13 } // out of flag_gain_map==1 - // synthesis - for (i = 0; i < inst->anaLen; i++) { - tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(inst->window[i], - inst->real[i], 14); // Q0, window in Q14 - tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16no1, gainFactor, 13); // Q0 - // Down shift with rounding - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, tmp32no1, - WEBRTC_SPL_WORD16_MIN); // Q0 - inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16(inst->synthesisBuffer[i], tmp16no2); // Q0 - } - - // read out fully processed segment - for (i = 0; i < inst->blockLen10ms; i++) { - outFrame[i] = inst->synthesisBuffer[i]; // Q0 - } - // update synthesis buffer - WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms, - inst->anaLen - inst->blockLen10ms); - WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms, - inst->blockLen10ms); + // Synthesis, read out fully processed segment, and update synthesis buffer. + WebRtcNsx_SynthesisUpdate(inst, outFrame, gainFactor); } int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFrameHB, @@ -1815,6 +1598,12 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram int flag, sign; int q_domain_to_use = 0; + // Code for ARMv7-Neon platform assumes the following: + assert(inst->anaLen % 16 == 0); + assert(inst->anaLen2 % 8 == 0); + assert(inst->blockLen10ms % 16 == 0); + assert(inst->magnLen == inst->anaLen2 + 1); + #ifdef NS_FILEDEBUG fwrite(spframe, sizeof(short), inst->blockLen10ms, inst->infile); #endif @@ -2080,8 +1869,8 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram if (WebRtcSpl_NormU32(tmpU32no3) < norm32no1) { inst->featureSpecDiff = 0x007FFFFF; } else { - inst->featureSpecDiff = WEBRTC_SPL_MIN( - 0x007FFFFF, WEBRTC_SPL_LSHIFT_U32(tmpU32no3, norm32no1)); + inst->featureSpecDiff = WEBRTC_SPL_MIN(0x007FFFFF, + WEBRTC_SPL_LSHIFT_U32(tmpU32no3, norm32no1)); } } @@ -2317,7 +2106,8 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram } avgProbSpeechHB = (WebRtc_Word16)(4096 - WEBRTC_SPL_RSHIFT_U16(tmpU16no1, inst->stages - 7)); // Q12 - avgFilterGainHB = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, inst->stages - 3); // Q14 + avgFilterGainHB = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32( + tmpU32no1, inst->stages - 3); // Q14 // // original FLOAT code // // gain based on speech probability: @@ -2368,3 +2158,264 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram return 0; } + +#if !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)) + +// Update the noise estimation information. +static void UpdateNoiseEstimate(NsxInst_t* inst, int offset) { + WebRtc_Word32 tmp32no1 = 0; + WebRtc_Word32 tmp32no2 = 0; + WebRtc_Word16 tmp16 = 0; + const WebRtc_Word16 kExp2Const = 11819; // Q13 + + int i = 0; + + tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset, + inst->magnLen); + // Guarantee a Q-domain as high as possible and still fit in int16 + inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + kExp2Const, tmp16, 21); + for (i = 0; i < inst->magnLen; i++) { + // inst->quantile[i]=exp(inst->lquantile[offset+i]); + // in Q21 + tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const, + inst->noiseEstLogQuantile[offset + i]); + tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac + tmp16 = (WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21); + tmp16 -= 21;// shift 21 to get result in Q0 + tmp16 += (WebRtc_Word16) inst->qNoise; //shift to get result in Q(qNoise) + if (tmp16 < 0) { + tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, -tmp16); + } else { + tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, tmp16); + } + inst->noiseEstQuantile[i] = WebRtcSpl_SatW32ToW16(tmp32no1); + } +} + +// Noise Estimation +void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, + uint16_t* magn, + uint32_t* noise, + int16_t* q_noise) { + WebRtc_Word32 numerator = FACTOR_Q16; + WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv; + WebRtc_Word16 countProd, delta, zeros, frac; + WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2; + const int16_t log2_const = 22713; // Q15 + const int16_t width_factor = 21845; + + int i, s, offset; + + tabind = inst->stages - inst->normData; + assert(tabind < 9); + assert(tabind > -9); + if (tabind < 0) { + logval = -WebRtcNsx_kLogTable[-tabind]; + } else { + logval = WebRtcNsx_kLogTable[tabind]; + } + + // lmagn(i)=log(magn(i))=log(2)*log2(magn(i)) + // magn is in Q(-stages), and the real lmagn values are: + // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages) + // lmagn in Q8 + for (i = 0; i < inst->magnLen; i++) { + if (magn[i]) { + zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]); + frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) + & 0x7FFFFFFF) >> 23); + // log2(magn(i)) + assert(frac < 256); + log2 = (WebRtc_Word16)(((31 - zeros) << 8) + + WebRtcNsx_kLogTableFrac[frac]); + // log2(magn(i))*log(2) + lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2_const, 15); + // + log(2^stages) + lmagn[i] += logval; + } else { + lmagn[i] = logval;//0; + } + } + + // loop over simultaneous estimates + for (s = 0; s < SIMULT; s++) { + offset = s * inst->magnLen; + + // Get counter values from state + counter = inst->noiseEstCounter[s]; + assert(counter < 201); + countDiv = WebRtcNsx_kCounterDiv[counter]; + countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv); + + // quant_est(...) + for (i = 0; i < inst->magnLen; i++) { + // compute delta + if (inst->noiseEstDensity[offset + i] > 512) { + delta = WebRtcSpl_DivW32W16ResW16(numerator, + inst->noiseEstDensity[offset + i]); + } else { + delta = FACTOR_Q7; + if (inst->blockIndex < END_STARTUP_LONG) { + // Smaller step size during startup. This prevents from using + // unrealistic values causing overflow. + delta = FACTOR_Q7_STARTUP; + } + } + + // update log quantile estimate + tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); + if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) { + // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 + // CounterDiv=1/(inst->counter[s]+1) in Q15 + tmp16 += 2; + tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2); + inst->noiseEstLogQuantile[offset + i] += tmp16no1; + } else { + tmp16 += 1; + tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1); + // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 + tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1); + inst->noiseEstLogQuantile[offset + i] -= tmp16no2; + if (inst->noiseEstLogQuantile[offset + i] < logval) { + // This is the smallest fixed point representation we can + // have, hence we limit the output. + inst->noiseEstLogQuantile[offset + i] = logval; + } + } + + // update density estimate + if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) + < WIDTH_Q8) { + tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->noiseEstDensity[offset + i], countProd, 15); + tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + width_factor, countDiv, 15); + inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; + } + } // end loop over magnitude spectrum + + if (counter >= END_STARTUP_LONG) { + inst->noiseEstCounter[s] = 0; + if (inst->blockIndex >= END_STARTUP_LONG) { + UpdateNoiseEstimate(inst, offset); + } + } + inst->noiseEstCounter[s]++; + + } // end loop over simultaneous estimates + + // Sequentially update the noise during startup + if (inst->blockIndex < END_STARTUP_LONG) { + UpdateNoiseEstimate(inst, offset); + } + + for (i = 0; i < inst->magnLen; i++) { + noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise) + } + (*q_noise) = (WebRtc_Word16)inst->qNoise; +} + +// Filter the data in the frequency domain, and create spectrum. +void WebRtcNsx_PrepareSpectrum(NsxInst_t* inst, int16_t* freq_buf) { + int i = 0, j = 0; + int16_t tmp16 = 0; + + for (i = 0; i < inst->magnLen; i++) { + inst->real[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(inst->real[i], + (WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) + inst->imag[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(inst->imag[i], + (WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) + } + + freq_buf[0] = inst->real[0]; + freq_buf[1] = -inst->imag[0]; + for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { + tmp16 = (inst->anaLen << 1) - j; + freq_buf[j] = inst->real[i]; + freq_buf[j + 1] = -inst->imag[i]; + freq_buf[tmp16] = inst->real[i]; + freq_buf[tmp16 + 1] = inst->imag[i]; + } + freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; + freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; +} + +// Denormalize the input buffer. +inline void WebRtcNsx_Denormalize(NsxInst_t* inst, int16_t* in, int factor) { + int i = 0, j = 0; + int32_t tmp32 = 0; + for (i = 0, j = 0; i < inst->anaLen; i += 1, j += 2) { + tmp32 = WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)in[j], + factor - inst->normData); + inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0 + } +} + +// For the noise supression process, synthesis, read out fully processed +// segment, and update synthesis buffer. +void WebRtcNsx_SynthesisUpdate(NsxInst_t* inst, + int16_t* out_frame, + int16_t gain_factor) { + int i = 0; + int16_t tmp16a = 0; + int16_t tmp16b = 0; + int32_t tmp32 = 0; + + // synthesis + for (i = 0; i < inst->anaLen; i++) { + tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->window[i], inst->real[i], 14); // Q0, window in Q14 + tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13); // Q0 + // Down shift with rounding + tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0 + inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16(inst->synthesisBuffer[i], + tmp16b); // Q0 + } + + // read out fully processed segment + for (i = 0; i < inst->blockLen10ms; i++) { + out_frame[i] = inst->synthesisBuffer[i]; // Q0 + } + + // update synthesis buffer + WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer, + inst->synthesisBuffer + inst->blockLen10ms, + inst->anaLen - inst->blockLen10ms); + WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms); +} + +// Update analysis buffer for lower band, and window data before FFT. +void WebRtcNsx_AnalysisUpdate(NsxInst_t* inst, + int16_t* out, + int16_t* new_speech) { + int i = 0; + + // For lower band update analysis buffer. + WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer, + inst->analysisBuffer + inst->blockLen10ms, + inst->anaLen - inst->blockLen10ms); + WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer + + inst->anaLen - inst->blockLen10ms, new_speech, inst->blockLen10ms); + + // Window data before FFT. + for (i = 0; i < inst->anaLen; i++) { + out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->window[i], inst->analysisBuffer[i], 14); // Q0 + } +} + +// Create a complex number buffer (out[]) as the intput (in[]) interleaved with +// zeros, and normalize it. +inline void WebRtcNsx_CreateComplexBuffer(NsxInst_t* inst, + int16_t* in, + int16_t* out) { + int i = 0, j = 0; + for (i = 0, j = 0; i < inst->anaLen; i += 1, j += 2) { + out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData) + out[j + 1] = 0; // Insert zeros in imaginary part + } +} + +#endif // !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)) diff --git a/src/modules/audio_processing/ns/nsx_core.h b/src/modules/audio_processing/ns/nsx_core.h index d5766ab8d..28772223d 100644 --- a/src/modules/audio_processing/ns/nsx_core.h +++ b/src/modules/audio_processing/ns/nsx_core.h @@ -129,14 +129,14 @@ WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs); * This changes the aggressiveness of the noise suppression method. * * Input: - * - inst : Instance that should be initialized - * - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB) + * - inst : Instance that should be initialized + * - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB) * * Output: - * - NS_inst : Initialized instance + * - inst : Initialized instance * - * Return value : 0 - Ok - * -1 - Error + * Return value : 0 - Ok + * -1 - Error */ int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode); @@ -158,16 +158,47 @@ int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode); * Return value : 0 - OK * -1 - Error */ -int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* inFrameLow, short* inFrameHigh, - short* outFrameLow, short* outFrameHigh); +int WebRtcNsx_ProcessCore(NsxInst_t* inst, + short* inFrameLow, + short* inFrameHigh, + short* outFrameLow, + short* outFrameHigh); /**************************************************************************** * Internal functions and variable declarations shared with optimized code. */ -void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t* inst, int offset); -void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise, - WebRtc_Word16* qNoise); +// Noise Estimation. +void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, + uint16_t* magn, + uint32_t* noise, + int16_t* q_noise); + +// Filter the data in the frequency domain, and create spectrum. +void WebRtcNsx_PrepareSpectrum(NsxInst_t* inst, + int16_t* freq_buff); + +// For the noise supression process, synthesis, read out fully processed +// segment, and update synthesis buffer. +void WebRtcNsx_SynthesisUpdate(NsxInst_t* inst, + int16_t* out_frame, + int16_t gain_factor); + +// Update analysis buffer for lower band, and window data before FFT. +void WebRtcNsx_AnalysisUpdate(NsxInst_t* inst, + int16_t* out, + int16_t* new_speech); + +// Denormalize the input buffer. +inline void WebRtcNsx_Denormalize(NsxInst_t* inst, + int16_t* in, + int factor); + +// Create a complex number buffer, as the intput interleaved with zeros, +// and normalize it. +inline void WebRtcNsx_CreateComplexBuffer(NsxInst_t* inst, + int16_t* in, + int16_t* out); extern const WebRtc_Word16 WebRtcNsx_kLogTable[9]; extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256]; diff --git a/src/modules/audio_processing/ns/nsx_core_neon.c b/src/modules/audio_processing/ns/nsx_core_neon.c index 82f02aea7..e6163e312 100644 --- a/src/modules/audio_processing/ns/nsx_core_neon.c +++ b/src/modules/audio_processing/ns/nsx_core_neon.c @@ -15,19 +15,98 @@ #include #include -void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise, - WebRtc_Word16* qNoise) { - WebRtc_Word32 numerator; +// Update the noise estimation information. +static void UpdateNoiseEstimate(NsxInst_t* inst, int offset) { + int i = 0; + const int16_t kExp2Const = 11819; // Q13 + int16_t* ptr_noiseEstLogQuantile = NULL; + int16_t* ptr_noiseEstQuantile = NULL; + int16x4_t kExp2Const16x4 = vdup_n_s16(kExp2Const); + int32x4_t twentyOne32x4 = vdupq_n_s32(21); + int32x4_t constA32x4 = vdupq_n_s32(0x1fffff); + int32x4_t constB32x4 = vdupq_n_s32(0x200000); - WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv, countProd, delta, zeros, frac; - WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2; - WebRtc_Word16 log2Const = 22713; - WebRtc_Word16 widthFactor = 21845; + int16_t tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset, + inst->magnLen); + + // Guarantee a Q-domain as high as possible and still fit in int16 + inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(kExp2Const, + tmp16, + 21); + + int32x4_t qNoise32x4 = vdupq_n_s32(inst->qNoise); + + for (ptr_noiseEstLogQuantile = &inst->noiseEstLogQuantile[offset], + ptr_noiseEstQuantile = &inst->noiseEstQuantile[0]; + ptr_noiseEstQuantile < &inst->noiseEstQuantile[inst->magnLen - 3]; + ptr_noiseEstQuantile += 4, ptr_noiseEstLogQuantile += 4) { + + // tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const, + // inst->noiseEstLogQuantile[offset + i]); + int16x4_t v16x4 = vld1_s16(ptr_noiseEstLogQuantile); + int32x4_t v32x4B = vmull_s16(v16x4, kExp2Const16x4); + + // tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac + int32x4_t v32x4A = vandq_s32(v32x4B, constA32x4); + v32x4A = vorrq_s32(v32x4A, constB32x4); + + // tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21); + v32x4B = vshrq_n_s32(v32x4B, 21); + + // tmp16 -= 21;// shift 21 to get result in Q0 + v32x4B = vsubq_s32(v32x4B, twentyOne32x4); + + // tmp16 += (int16_t) inst->qNoise; + // shift to get result in Q(qNoise) + v32x4B = vaddq_s32(v32x4B, qNoise32x4); + + // if (tmp16 < 0) { + // tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, -tmp16); + // } else { + // tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, tmp16); + // } + v32x4B = vshlq_s32(v32x4A, v32x4B); + + // tmp16 = WebRtcSpl_SatW32ToW16(tmp32no1); + v16x4 = vqmovn_s32(v32x4B); + + //inst->noiseEstQuantile[i] = tmp16; + vst1_s16(ptr_noiseEstQuantile, v16x4); + } + + // Last iteration: + + // inst->quantile[i]=exp(inst->lquantile[offset+i]); + // in Q21 + int32_t tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const, + *ptr_noiseEstLogQuantile); + int32_t tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac + + tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21); + tmp16 -= 21;// shift 21 to get result in Q0 + tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise) + if (tmp16 < 0) { + tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, -tmp16); + } else { + tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, tmp16); + } + *ptr_noiseEstQuantile = WebRtcSpl_SatW32ToW16(tmp32no1); +} + +// Noise Estimation +void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, + uint16_t* magn, + uint32_t* noise, + int16_t* q_noise) { + int32_t numerator = FACTOR_Q16; + int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv; + int16_t countProd, delta, zeros, frac; + int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2; + const int16_t log2_const = 22713; + const int16_t width_factor = 21845; int i, s, offset; - numerator = FACTOR_Q16; - tabind = inst->stages - inst->normData; assert(tabind < 9); assert(tabind > -9); @@ -45,13 +124,15 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo // lmagn in Q8 for (i = 0; i < inst->magnLen; i++) { if (magn[i]) { - zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]); - frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23); + zeros = WebRtcSpl_NormU32((uint32_t)magn[i]); + frac = (int16_t)((((uint32_t)magn[i] << zeros) + & 0x7FFFFFFF) >> 23); assert(frac < 256); // log2(magn(i)) - log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); + log2 = (int16_t)(((31 - zeros) << 8) + + WebRtcNsx_kLogTableFrac[frac]); // log2(magn(i))*log(2) - lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15); + lmagn[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2_const, 15); // + log(2^stages) lmagn[i] += logval; } else { @@ -61,9 +142,9 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo int16x4_t Q3_16x4 = vdup_n_s16(3); int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8); - int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(widthFactor); + int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(width_factor); - WebRtc_Word16 factor = FACTOR_Q7; + int16_t factor = FACTOR_Q7; if (inst->blockIndex < END_STARTUP_LONG) factor = FACTOR_Q7_STARTUP; @@ -75,10 +156,10 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo counter = inst->noiseEstCounter[s]; assert(counter < 201); countDiv = WebRtcNsx_kCounterDiv[counter]; - countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv); + countProd = (int16_t)WEBRTC_SPL_MUL_16_16(counter, countDiv); // quant_est(...) - WebRtc_Word16 deltaBuff[8]; + int16_t deltaBuff[8]; int16x4_t tmp16x4_0; int16x4_t tmp16x4_1; int16x4_t countDiv_16x4 = vdup_n_s16(countDiv); @@ -103,13 +184,13 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo for (j = 0; j < 8; j++) { if (inst->noiseEstDensity[offset + i + j] > 512) { deltaBuff[j] = WebRtcSpl_DivW32W16ResW16( - numerator, inst->noiseEstDensity[offset + i + j]); + numerator, inst->noiseEstDensity[offset + i + j]); } } // Update log quantile estimate - // tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); + // tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[0]), countDiv_16x4); tmp16x4_1 = vshrn_n_s32(tmp32x4, 14); tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[4]), countDiv_16x4); @@ -142,17 +223,19 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0); - // logval is the smallest fixed point representation we can have. Values below - // that will correspond to values in the interval [0, 1], which can't possibly - // occur. + // logval is the smallest fixed point representation we can have. Values + // below that will correspond to values in the interval [0, 1], which + // can't possibly occur. tmp16x8_0 = vmaxq_s16(tmp16x8_0, logval_16x8); // Do the if-else branches: tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines tmp16x8_5 = vsubq_s16(tmp16x8_3, tmp16x8_2); __asm__("vcgt.s16 %q0, %q1, #0"::"w"(tmp16x8_4), "w"(tmp16x8_5)); - __asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_1), "w"(tmp16x8_4)); - __asm__("vbif %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_0), "w"(tmp16x8_4)); + __asm__("vbit %q0, %q1, %q2":: + "w"(tmp16x8_2), "w"(tmp16x8_1), "w"(tmp16x8_4)); + __asm__("vbif %q0, %q1, %q2":: + "w"(tmp16x8_2), "w"(tmp16x8_0), "w"(tmp16x8_4)); vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2); // Update density estimate @@ -165,61 +248,61 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2); tmp16x8_3 = vabsq_s16(tmp16x8_3); tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3); - __asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_1), "w"(tmp16x8_0), "w"(tmp16x8_4)); + __asm__("vbit %q0, %q1, %q2":: + "w"(tmp16x8_1), "w"(tmp16x8_0), "w"(tmp16x8_4)); vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1); } // End loop over magnitude spectrum - for (; i < inst->magnLen; i++) { - // compute delta - if (inst->noiseEstDensity[offset + i] > 512) { - delta = WebRtcSpl_DivW32W16ResW16(numerator, - inst->noiseEstDensity[offset + i]); - } else { - delta = FACTOR_Q7; - if (inst->blockIndex < END_STARTUP_LONG) { - // Smaller step size during startup. This prevents from using - // unrealistic values causing overflow. - delta = FACTOR_Q7_STARTUP; - } + // Last iteration over magnitude spectrum: + // compute delta + if (inst->noiseEstDensity[offset + i] > 512) { + delta = WebRtcSpl_DivW32W16ResW16(numerator, + inst->noiseEstDensity[offset + i]); + } else { + delta = FACTOR_Q7; + if (inst->blockIndex < END_STARTUP_LONG) { + // Smaller step size during startup. This prevents from using + // unrealistic values causing overflow. + delta = FACTOR_Q7_STARTUP; } + } + // update log quantile estimate + tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); + if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) { + // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 + // CounterDiv=1/(inst->counter[s]+1) in Q15 + tmp16 += 2; + tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2); + inst->noiseEstLogQuantile[offset + i] += tmp16no1; + } else { + tmp16 += 1; + tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1); + // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 + tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1); + inst->noiseEstLogQuantile[offset + i] -= tmp16no2; + if (inst->noiseEstLogQuantile[offset + i] < logval) { + // logval is the smallest fixed point representation we can have. + // Values below that will correspond to values in the interval + // [0, 1], which can't possibly occur. + inst->noiseEstLogQuantile[offset + i] = logval; + } + } - // update log quantile estimate - tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); - if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) { - // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 - // CounterDiv=1/(inst->counter[s]+1) in Q15 - tmp16 += 2; - tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2); - inst->noiseEstLogQuantile[offset + i] += tmp16no1; - } else { - tmp16 += 1; - tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1); - // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1); - inst->noiseEstLogQuantile[offset + i] -= tmp16no2; - if (inst->noiseEstLogQuantile[offset + i] < logval) { - // logval is the smallest fixed point representation we can have. - // Values below that will correspond to values in the interval - // [0, 1], which can't possibly occur. - inst->noiseEstLogQuantile[offset + i] = logval; - } - } + // update density estimate + if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) + < WIDTH_Q8) { + tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->noiseEstDensity[offset + i], countProd, 15); + tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + width_factor, countDiv, 15); + inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; + } - // update density estimate - if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) - < WIDTH_Q8) { - tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( - inst->noiseEstDensity[offset + i], countProd, 15); - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( - widthFactor, countDiv, 15); - inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; - } - } // end loop over magnitude spectrum if (counter >= END_STARTUP_LONG) { inst->noiseEstCounter[s] = 0; if (inst->blockIndex >= END_STARTUP_LONG) { - WebRtcNsx_UpdateNoiseEstimate(inst, offset); + UpdateNoiseEstimate(inst, offset); } } inst->noiseEstCounter[s]++; @@ -228,13 +311,417 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo // Sequentially update the noise during startup if (inst->blockIndex < END_STARTUP_LONG) { - WebRtcNsx_UpdateNoiseEstimate(inst, offset); + UpdateNoiseEstimate(inst, offset); } for (i = 0; i < inst->magnLen; i++) { - noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise) + noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise) } - (*qNoise) = (WebRtc_Word16)inst->qNoise; + (*q_noise) = (int16_t)inst->qNoise; } +// Filter the data in the frequency domain, and create spectrum. +void WebRtcNsx_PrepareSpectrum(NsxInst_t* inst, int16_t* freq_buf) { + + // (1) Filtering. + + // Fixed point C code for the next block is as follows: + // for (i = 0; i < inst->magnLen; i++) { + // inst->real[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(inst->real[i], + // (int16_t)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) + // inst->imag[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(inst->imag[i], + // (int16_t)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) + // } + + int16_t* ptr_real = &inst->real[0]; + int16_t* ptr_imag = &inst->imag[0]; + uint16_t* ptr_noiseSupFilter = &inst->noiseSupFilter[0]; + + // Filter the rest in the frequency domain. + for (; ptr_real < &inst->real[inst->magnLen - 1]; ) { + // Loop unrolled once. Both pointers are incremented by 4 twice. + __asm__ __volatile__( + "vld1.16 d20, [%[ptr_real]]\n\t" + "vld1.16 d22, [%[ptr_imag]]\n\t" + "vld1.16 d23, [%[ptr_noiseSupFilter]]!\n\t" + "vmull.s16 q10, d20, d23\n\t" + "vmull.s16 q11, d22, d23\n\t" + "vshrn.s32 d20, q10, #14\n\t" + "vshrn.s32 d22, q11, #14\n\t" + "vst1.16 d20, [%[ptr_real]]!\n\t" + "vst1.16 d22, [%[ptr_imag]]!\n\t" + + "vld1.16 d18, [%[ptr_real]]\n\t" + "vld1.16 d24, [%[ptr_imag]]\n\t" + "vld1.16 d25, [%[ptr_noiseSupFilter]]!\n\t" + "vmull.s16 q9, d18, d25\n\t" + "vmull.s16 q12, d24, d25\n\t" + "vshrn.s32 d18, q9, #14\n\t" + "vshrn.s32 d24, q12, #14\n\t" + "vst1.16 d18, [%[ptr_real]]!\n\t" + "vst1.16 d24, [%[ptr_imag]]!\n\t" + + // Specify constraints. + :[ptr_imag]"+r"(ptr_imag), + [ptr_real]"+r"(ptr_real), + [ptr_noiseSupFilter]"+r"(ptr_noiseSupFilter) + : + :"d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", + "q9", "q10", "q11", "q12" + ); + } + + // Filter the last pair of elements in the frequency domain. + *ptr_real = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(*ptr_real, + (int16_t)(*ptr_noiseSupFilter), 14); // Q(normData-stages) + *ptr_imag = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(*ptr_imag, + (int16_t)(*ptr_noiseSupFilter), 14); // Q(normData-stages) + + // (2) Create spectrum. + + // Fixed point C code for the rest of the function is as follows: + // freq_buf[0] = inst->real[0]; + // freq_buf[1] = -inst->imag[0]; + // for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { + // tmp16 = (inst->anaLen << 1) - j; + // freq_buf[j] = inst->real[i]; + // freq_buf[j + 1] = -inst->imag[i]; + // freq_buf[tmp16] = inst->real[i]; + // freq_buf[tmp16 + 1] = inst->imag[i]; + // } + // freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; + // freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; + + freq_buf[0] = inst->real[0]; + freq_buf[1] = -inst->imag[0]; + + int offset = -16; + int16_t* ptr_realImag1 = &freq_buf[2]; + int16_t* ptr_realImag2 = ptr_realImag2 = &freq_buf[(inst->anaLen << 1) - 8]; + ptr_real = &inst->real[1]; + ptr_imag = &inst->imag[1]; + for (; ptr_real < &inst->real[inst->anaLen2 - 11]; ) { + // Loop unrolled once. All pointers are incremented twice. + __asm__ __volatile__( + "vld1.16 d22, [%[ptr_real]]!\n\t" + "vld1.16 d23, [%[ptr_imag]]!\n\t" + // Negate and interleave: + "vmov.s16 d20, d22\n\t" + "vneg.s16 d21, d23\n\t" + "vzip.16 d20, d21\n\t" + // Write 8 elements to &freq_buf[j] + "vst1.16 {d20, d21}, [%[ptr_realImag1]]!\n\t" + // Interleave and reverse elements: + "vzip.16 d22, d23\n\t" + "vrev64.32 d18, d23\n\t" + "vrev64.32 d19, d22\n\t" + // Write 8 elements to &freq_buf[tmp16] + "vst1.16 {d18, d19}, [%[ptr_realImag2]], %[offset]\n\t" + + "vld1.16 d22, [%[ptr_real]]!\n\t" + "vld1.16 d23, [%[ptr_imag]]!\n\t" + // Negate and interleave: + "vmov.s16 d20, d22\n\t" + "vneg.s16 d21, d23\n\t" + "vzip.16 d20, d21\n\t" + // Write 8 elements to &freq_buf[j] + "vst1.16 {d20, d21}, [%[ptr_realImag1]]!\n\t" + // Interleave and reverse elements: + "vzip.16 d22, d23\n\t" + "vrev64.32 d18, d23\n\t" + "vrev64.32 d19, d22\n\t" + // Write 8 elements to &freq_buf[tmp16] + "vst1.16 {d18, d19}, [%[ptr_realImag2]], %[offset]\n\t" + + // Specify constraints. + :[ptr_imag]"+r"(ptr_imag), + [ptr_real]"+r"(ptr_real), + [ptr_realImag1]"+r"(ptr_realImag1), + [ptr_realImag2]"+r"(ptr_realImag2) + :[offset]"r"(offset) + :"d18", "d19", "d20", "d21", "d22", "d23" + ); + } + for (ptr_realImag2 += 6; + ptr_real <= &inst->real[inst->anaLen2]; + ptr_real += 1, ptr_imag += 1, ptr_realImag1 += 2, ptr_realImag2 -= 2) { + *ptr_realImag1 = *ptr_real; + *(ptr_realImag1 + 1) = -(*ptr_imag); + *ptr_realImag2 = *ptr_real; + *(ptr_realImag2 + 1) = *ptr_imag; + } + + freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; + freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; +} + +// Denormalize the input buffer. +inline void WebRtcNsx_Denormalize(NsxInst_t* inst, int16_t* in, int factor) { + int16_t* ptr_real = &inst->real[0]; + int16_t* ptr_in = &in[0]; + + __asm__ __volatile__("vdup.32 q10, %0" :: + "r"((int32_t)(factor - inst->normData)) : "q10"); + for (; ptr_real < &inst->real[inst->anaLen]; ) { + + // Loop unrolled once. Both pointers are incremented. + __asm__ __volatile__( + // tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[j], + // factor - inst->normData); + "vld2.16 {d24, d25}, [%[ptr_in]]!\n\t" + "vmovl.s16 q12, d24\n\t" + "vshl.s32 q12, q10\n\t" + // inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0 + "vqmovn.s32 d24, q12\n\t" + "vst1.16 d24, [%[ptr_real]]!\n\t" + + // tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[j], + // factor - inst->normData); + "vld2.16 {d22, d23}, [%[ptr_in]]!\n\t" + "vmovl.s16 q11, d22\n\t" + "vshl.s32 q11, q10\n\t" + // inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0 + "vqmovn.s32 d22, q11\n\t" + "vst1.16 d22, [%[ptr_real]]!\n\t" + + // Specify constraints. + :[ptr_in]"+r"(ptr_in), + [ptr_real]"+r"(ptr_real) + : + :"d22", "d23", "d24", "d25" + ); + } +} + +// For the noise supress process, synthesis, read out fully processed segment, +// and update synthesis buffer. +void WebRtcNsx_SynthesisUpdate(NsxInst_t* inst, + int16_t* out_frame, + int16_t gain_factor) { + int16_t* ptr_real = &inst->real[0]; + int16_t* ptr_syn = &inst->synthesisBuffer[0]; + int16_t* ptr_window = &inst->window[0]; + + // synthesis + __asm__ __volatile__("vdup.16 d24, %0" : : "r"(gain_factor) : "d24"); + // Loop unrolled once. All pointers are incremented in the assembly code. + for (; ptr_syn < &inst->synthesisBuffer[inst->anaLen]; ) { + __asm__ __volatile__( + // Load variables. + "vld1.16 d22, [%[ptr_real]]!\n\t" + "vld1.16 d23, [%[ptr_window]]!\n\t" + "vld1.16 d25, [%[ptr_syn]]\n\t" + // tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + // inst->window[i], inst->real[i], 14); // Q0, window in Q14 + "vmull.s16 q11, d22, d23\n\t" + "vrshrn.i32 d22, q11, #14\n\t" + // tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13); + "vmull.s16 q11, d24, d22\n\t" + // tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0 + "vqrshrn.s32 d22, q11, #13\n\t" + // inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16( + // inst->synthesisBuffer[i], tmp16b); // Q0 + "vqadd.s16 d25, d22\n\t" + "vst1.16 d25, [%[ptr_syn]]!\n\t" + + // Load variables. + "vld1.16 d26, [%[ptr_real]]!\n\t" + "vld1.16 d27, [%[ptr_window]]!\n\t" + "vld1.16 d28, [%[ptr_syn]]\n\t" + // tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + // inst->window[i], inst->real[i], 14); // Q0, window in Q14 + "vmull.s16 q13, d26, d27\n\t" + "vrshrn.i32 d26, q13, #14\n\t" + // tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13); + "vmull.s16 q13, d24, d26\n\t" + // tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0 + "vqrshrn.s32 d26, q13, #13\n\t" + // inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16( + // inst->synthesisBuffer[i], tmp16b); // Q0 + "vqadd.s16 d28, d26\n\t" + "vst1.16 d28, [%[ptr_syn]]!\n\t" + + // Specify constraints. + :[ptr_real]"+r"(ptr_real), + [ptr_window]"+r"(ptr_window), + [ptr_syn]"+r"(ptr_syn) + : + :"d22", "d23", "d24", "d25", "d26", "d27", "d28", "q11", "q12", "q13" + ); + } + + int16_t* ptr_out = &out_frame[0]; + ptr_syn = &inst->synthesisBuffer[0]; + // read out fully processed segment + for (; ptr_syn < &inst->synthesisBuffer[inst->blockLen10ms]; ) { + // Loop unrolled once. Both pointers are incremented in the assembly code. + __asm__ __volatile__( + // out_frame[i] = inst->synthesisBuffer[i]; // Q0 + "vld1.16 {d22, d23}, [%[ptr_syn]]!\n\t" + "vld1.16 {d24, d25}, [%[ptr_syn]]!\n\t" + "vst1.16 {d22, d23}, [%[ptr_out]]!\n\t" + "vst1.16 {d24, d25}, [%[ptr_out]]!\n\t" + :[ptr_syn]"+r"(ptr_syn), + [ptr_out]"+r"(ptr_out) + : + :"d22", "d23", "d24", "d25" + ); + } + + // Update synthesis buffer. + // C code: + // WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer, + // inst->synthesisBuffer + inst->blockLen10ms, + // inst->anaLen - inst->blockLen10ms); + ptr_out = &inst->synthesisBuffer[0], + ptr_syn = &inst->synthesisBuffer[inst->blockLen10ms]; + for (; ptr_syn < &inst->synthesisBuffer[inst->anaLen]; ) { + // Loop unrolled once. Both pointers are incremented in the assembly code. + __asm__ __volatile__( + "vld1.16 {d22, d23}, [%[ptr_syn]]!\n\t" + "vld1.16 {d24, d25}, [%[ptr_syn]]!\n\t" + "vst1.16 {d22, d23}, [%[ptr_out]]!\n\t" + "vst1.16 {d24, d25}, [%[ptr_out]]!\n\t" + :[ptr_syn]"+r"(ptr_syn), + [ptr_out]"+r"(ptr_out) + : + :"d22", "d23", "d24", "d25" + ); + } + + // C code: + // WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + // + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms); + __asm__ __volatile__("vdup.16 q10, %0" : : "r"(0) : "q10"); + for (; ptr_out < &inst->synthesisBuffer[inst->anaLen]; ) { + // Loop unrolled once. Pointer is incremented in the assembly code. + __asm__ __volatile__( + "vst1.16 {d20, d21}, [%[ptr_out]]!\n\t" + "vst1.16 {d20, d21}, [%[ptr_out]]!\n\t" + :[ptr_out]"+r"(ptr_out) + : + :"d20", "d21" + ); + } +} + +// Update analysis buffer for lower band, and window data before FFT. +void WebRtcNsx_AnalysisUpdate(NsxInst_t* inst, + int16_t* out, + int16_t* new_speech) { + + int16_t* ptr_ana = &inst->analysisBuffer[inst->blockLen10ms]; + int16_t* ptr_out = &inst->analysisBuffer[0]; + + // For lower band update analysis buffer. + // WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer, + // inst->analysisBuffer + inst->blockLen10ms, + // inst->anaLen - inst->blockLen10ms); + for (; ptr_out < &inst->analysisBuffer[inst->anaLen - inst->blockLen10ms]; ) { + // Loop unrolled once, so both pointers are incremented by 8 twice. + __asm__ __volatile__( + "vld1.16 {d20, d21}, [%[ptr_ana]]!\n\t" + "vst1.16 {d20, d21}, [%[ptr_out]]!\n\t" + "vld1.16 {d22, d23}, [%[ptr_ana]]!\n\t" + "vst1.16 {d22, d23}, [%[ptr_out]]!\n\t" + :[ptr_ana]"+r"(ptr_ana), + [ptr_out]"+r"(ptr_out) + : + :"d20", "d21", "d22", "d23" + ); + } + + // WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer + // + inst->anaLen - inst->blockLen10ms, new_speech, inst->blockLen10ms); + for (ptr_ana = new_speech; ptr_out < &inst->analysisBuffer[inst->anaLen]; ) { + // Loop unrolled once, so both pointers are incremented by 8 twice. + __asm__ __volatile__( + "vld1.16 {d20, d21}, [%[ptr_ana]]!\n\t" + "vst1.16 {d20, d21}, [%[ptr_out]]!\n\t" + "vld1.16 {d22, d23}, [%[ptr_ana]]!\n\t" + "vst1.16 {d22, d23}, [%[ptr_out]]!\n\t" + :[ptr_ana]"+r"(ptr_ana), + [ptr_out]"+r"(ptr_out) + : + :"d20", "d21", "d22", "d23" + ); + } + + // Window data before FFT + int16_t* ptr_window = &inst->window[0]; + ptr_out = &out[0]; + ptr_ana = &inst->analysisBuffer[0]; + for (; ptr_out < &out[inst->anaLen]; ) { + + // Loop unrolled once, so all pointers are incremented by 4 twice. + __asm__ __volatile__( + "vld1.16 d20, [%[ptr_ana]]!\n\t" + "vld1.16 d21, [%[ptr_window]]!\n\t" + // out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + // inst->window[i], inst->analysisBuffer[i], 14); // Q0 + "vmull.s16 q10, d20, d21\n\t" + "vrshrn.i32 d20, q10, #14\n\t" + "vst1.16 d20, [%[ptr_out]]!\n\t" + + "vld1.16 d22, [%[ptr_ana]]!\n\t" + "vld1.16 d23, [%[ptr_window]]!\n\t" + // out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + // inst->window[i], inst->analysisBuffer[i], 14); // Q0 + "vmull.s16 q11, d22, d23\n\t" + "vrshrn.i32 d22, q11, #14\n\t" + "vst1.16 d22, [%[ptr_out]]!\n\t" + + // Specify constraints. + :[ptr_ana]"+r"(ptr_ana), + [ptr_window]"+r"(ptr_window), + [ptr_out]"+r"(ptr_out) + : + :"d20", "d21", "d22", "d23", "q10", "q11" + ); + } +} + +// Create a complex number buffer (out[]) as the intput (in[]) interleaved with +// zeros, and normalize it. +inline void WebRtcNsx_CreateComplexBuffer(NsxInst_t* inst, + int16_t* in, + int16_t* out) { + int16_t* ptr_out = &out[0]; + int16_t* ptr_in = &in[0]; + + __asm__ __volatile__("vdup.16 d25, %0" : : "r"(0) : "d25"); + __asm__ __volatile__("vdup.16 q10, %0" : : "r"(inst->normData) : "q10"); + for (; ptr_in < &in[inst->anaLen]; ) { + + // Loop unrolled once, so ptr_in is incremented by 8 twice, + // and ptr_out is incremented by 8 four times. + __asm__ __volatile__( + // out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData) + "vld1.16 {d22, d23}, [%[ptr_in]]!\n\t" + "vshl.s16 q11, q10\n\t" + "vmov d24, d23\n\t" + + // out[j + 1] = 0; // Insert zeros in imaginary part + "vmov d23, d25\n\t" + "vst2.16 {d22, d23}, [%[ptr_out]]!\n\t" + "vst2.16 {d24, d25}, [%[ptr_out]]!\n\t" + + // out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData) + "vld1.16 {d22, d23}, [%[ptr_in]]!\n\t" + "vshl.s16 q11, q10\n\t" + "vmov d24, d23\n\t" + + // out[j + 1] = 0; // Insert zeros in imaginary part + "vmov d23, d25\n\t" + "vst2.16 {d22, d23}, [%[ptr_out]]!\n\t" + "vst2.16 {d24, d25}, [%[ptr_out]]!\n\t" + + // Specify constraints. + :[ptr_in]"+r"(ptr_in), + [ptr_out]"+r"(ptr_out) + : + :"d22", "d23", "d24", "d25", "q10", "q11" + ); + } +} #endif // defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)