For commiting changes in CL 277002, due to file structure changes introduced during
the review of the code. Review URL: http://webrtc-codereview.appspot.com/246005 git-svn-id: http://webrtc.googlecode.com/svn/trunk@805 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
		| @@ -107,7 +107,8 @@ LOCAL_MODULE_TAGS := tests | ||||
| LOCAL_CPP_EXTENSION := .cc | ||||
| LOCAL_SRC_FILES:= \ | ||||
|     $(call all-proto-files-under, test) \ | ||||
|     test/unit_test.cc | ||||
|     test/unit_test.cc \ | ||||
|     ../../../test/testsupport/fileutils.cc | ||||
|  | ||||
| # Flags passed to both C and C++ files. | ||||
| LOCAL_CFLAGS := \ | ||||
| @@ -118,6 +119,7 @@ LOCAL_C_INCLUDES := \ | ||||
|     $(LOCAL_PATH)/interface \ | ||||
|     $(LOCAL_PATH)/../interface \ | ||||
|     $(LOCAL_PATH)/../.. \ | ||||
|     $(LOCAL_PATH)/../../../test \ | ||||
|     $(LOCAL_PATH)/../../system_wrappers/interface \ | ||||
|     $(LOCAL_PATH)/../../common_audio/signal_processing_library/main/interface \ | ||||
|     external/gtest/include \ | ||||
|   | ||||
| @@ -426,46 +426,6 @@ static const WebRtc_Word16 kDeterminantEstMatrix[66] = { | ||||
|   355,    330 | ||||
| }; | ||||
|  | ||||
| void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t* inst, int offset) { | ||||
|   WebRtc_Word32 tmp32no1 = 0; | ||||
|   WebRtc_Word32 tmp32no2 = 0; | ||||
|  | ||||
|   WebRtc_Word16 tmp16no1 = 0; | ||||
|   WebRtc_Word16 tmp16no2 = 0; | ||||
|   const WebRtc_Word16 kExp2Const = 11819; // Q13 | ||||
|  | ||||
|   int i = 0; | ||||
|  | ||||
|   tmp16no2 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset, | ||||
|                                    inst->magnLen); | ||||
|   // Guarantee a Q-domain as high as possible and still fit in int16 | ||||
|   inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( | ||||
|       kExp2Const, tmp16no2, 21); | ||||
|   for (i = 0; i < inst->magnLen; i++) { | ||||
|     // inst->quantile[i]=exp(inst->lquantile[offset+i]); | ||||
|     // in Q21 | ||||
|     tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const, | ||||
|                                     inst->noiseEstLogQuantile[offset + i]); | ||||
|     tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac | ||||
|     tmp16no1 = -(WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21); | ||||
|     tmp16no1 += 21;// shift 21 to get result in Q0 | ||||
|     tmp16no1 -= (WebRtc_Word16) inst->qNoise; //shift to get result in Q(qNoise) | ||||
|     if (tmp16no1 > 0) { | ||||
|       tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, tmp16no1); | ||||
|     } else { | ||||
|       tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, -tmp16no1); | ||||
|     } | ||||
|     // TODO(bjornv): Replace with WebRtcSpl_SatW32ToW16(...) when available. | ||||
|     if (tmp32no1 > 32767) { | ||||
|       tmp32no1 = 32767; | ||||
|     } else if (tmp32no1 < -32768) { | ||||
|       tmp32no1 = -32768; | ||||
|     } | ||||
|     tmp16no1 = (WebRtc_Word16) tmp32no1; | ||||
|     inst->noiseEstQuantile[i] = tmp16no1; | ||||
|   } | ||||
| } | ||||
|  | ||||
| void WebRtcNsx_CalcParametricNoiseEstimate(NsxInst_t* inst, | ||||
|                                            WebRtc_Word16 pink_noise_exp_avg, | ||||
|                                            WebRtc_Word32 pink_noise_num_avg, | ||||
| @@ -675,128 +635,6 @@ int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode) { | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| #if !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)) | ||||
| void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise, | ||||
|                                WebRtc_Word16* qNoise) { | ||||
|   WebRtc_Word32 numerator; | ||||
|  | ||||
|   WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv, countProd, delta, zeros, frac; | ||||
|   WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2; | ||||
|   WebRtc_Word16 log2Const = 22713; // Q15 | ||||
|   WebRtc_Word16 widthFactor = 21845; | ||||
|  | ||||
|   int i, s, offset; | ||||
|  | ||||
|   numerator = FACTOR_Q16; | ||||
|  | ||||
|   tabind = inst->stages - inst->normData; | ||||
|   assert(tabind < 9); | ||||
|   assert(tabind > -9); | ||||
|   if (tabind < 0) { | ||||
|     logval = -WebRtcNsx_kLogTable[-tabind]; | ||||
|   } else { | ||||
|     logval = WebRtcNsx_kLogTable[tabind]; | ||||
|   } | ||||
|  | ||||
|   // lmagn(i)=log(magn(i))=log(2)*log2(magn(i)) | ||||
|   // magn is in Q(-stages), and the real lmagn values are: | ||||
|   // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages) | ||||
|   // lmagn in Q8 | ||||
|   for (i = 0; i < inst->magnLen; i++) { | ||||
|     if (magn[i]) { | ||||
|       zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]); | ||||
|       frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23); | ||||
|       // log2(magn(i)) | ||||
|       assert(frac < 256); | ||||
|       log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); | ||||
|       // log2(magn(i))*log(2) | ||||
|       lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15); | ||||
|       // + log(2^stages) | ||||
|       lmagn[i] += logval; | ||||
|     } else { | ||||
|       lmagn[i] = logval;//0; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // loop over simultaneous estimates | ||||
|   for (s = 0; s < SIMULT; s++) { | ||||
|     offset = s * inst->magnLen; | ||||
|  | ||||
|     // Get counter values from state | ||||
|     counter = inst->noiseEstCounter[s]; | ||||
|     assert(counter < 201); | ||||
|     countDiv = WebRtcNsx_kCounterDiv[counter]; | ||||
|     countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv); | ||||
|  | ||||
|     // quant_est(...) | ||||
|     for (i = 0; i < inst->magnLen; i++) { | ||||
|       // compute delta | ||||
|       if (inst->noiseEstDensity[offset + i] > 512) { | ||||
|         delta = WebRtcSpl_DivW32W16ResW16(numerator, | ||||
|                                           inst->noiseEstDensity[offset + i]); | ||||
|       } else { | ||||
|         delta = FACTOR_Q7; | ||||
|         if (inst->blockIndex < END_STARTUP_LONG) { | ||||
|           // Smaller step size during startup. This prevents from using | ||||
|           // unrealistic values causing overflow. | ||||
|           delta = FACTOR_Q7_STARTUP; | ||||
|         } | ||||
|       } | ||||
|  | ||||
|       // update log quantile estimate | ||||
|       tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); | ||||
|       if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) { | ||||
|         // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 | ||||
|         // CounterDiv=1/(inst->counter[s]+1) in Q15 | ||||
|         tmp16 += 2; | ||||
|         tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2); | ||||
|         inst->noiseEstLogQuantile[offset + i] += tmp16no1; | ||||
|       } else { | ||||
|         tmp16 += 1; | ||||
|         tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1); | ||||
|         // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 | ||||
|         tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1); | ||||
|         inst->noiseEstLogQuantile[offset + i] -= tmp16no2; | ||||
|         if (inst->noiseEstLogQuantile[offset + i] < logval) { | ||||
|           // This is the smallest fixed point representation we can | ||||
|           // have, hence we limit the output. | ||||
|           inst->noiseEstLogQuantile[offset + i] = logval; | ||||
|         } | ||||
|       } | ||||
|  | ||||
|       // update density estimate | ||||
|       if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) | ||||
|           < WIDTH_Q8) { | ||||
|         tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( | ||||
|                      inst->noiseEstDensity[offset + i], countProd, 15); | ||||
|         tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(widthFactor, | ||||
|                                                                        countDiv, 15); | ||||
|         inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; | ||||
|       } | ||||
|     } // end loop over magnitude spectrum | ||||
|  | ||||
|     if (counter >= END_STARTUP_LONG) { | ||||
|       inst->noiseEstCounter[s] = 0; | ||||
|       if (inst->blockIndex >= END_STARTUP_LONG) { | ||||
|         WebRtcNsx_UpdateNoiseEstimate(inst, offset); | ||||
|       } | ||||
|     } | ||||
|     inst->noiseEstCounter[s]++; | ||||
|  | ||||
|   } // end loop over simultaneous estimates | ||||
|  | ||||
|   // Sequentially update the noise during startup | ||||
|   if (inst->blockIndex < END_STARTUP_LONG) { | ||||
|     WebRtcNsx_UpdateNoiseEstimate(inst, offset); | ||||
|   } | ||||
|  | ||||
|   for (i = 0; i < inst->magnLen; i++) { | ||||
|     noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise) | ||||
|   } | ||||
|   (*qNoise) = (WebRtc_Word16)inst->qNoise; | ||||
| } | ||||
| #endif  // !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)) | ||||
|  | ||||
| // Extract thresholds for feature parameters | ||||
| // histograms are computed over some window_size (given by window_pars) | ||||
| // thresholds and weights are extracted every window | ||||
| @@ -1424,18 +1262,9 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* | ||||
|   int right_shifts_in_magnU16 = 0; | ||||
|   int right_shifts_in_initMagnEst = 0; | ||||
|  | ||||
|   // For lower band do all processing | ||||
|   // update analysis buffer for L band | ||||
|   WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, | ||||
|                         inst->anaLen - inst->blockLen10ms); | ||||
|   WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, | ||||
|                         speechFrame, inst->blockLen10ms); | ||||
|   // Update analysis buffer for lower band, and window data before FFT. | ||||
|   WebRtcNsx_AnalysisUpdate(inst, winData, speechFrame); | ||||
|  | ||||
|   // Window data before FFT | ||||
|   for (i = 0; i < inst->anaLen; i++) { | ||||
|     winData[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( | ||||
|         inst->window[i], inst->analysisBuffer[i], 14); // Q0 | ||||
|   } | ||||
|   // Get input energy | ||||
|   inst->energyIn = WebRtcSpl_Energy(winData, (int)inst->anaLen, &(inst->scaleEnergyIn)); | ||||
|  | ||||
| @@ -1459,11 +1288,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* | ||||
|   right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0); | ||||
|  | ||||
|   // create realImag as winData interleaved with zeros (= imag. part), normalize it | ||||
|   for (i = 0; i < inst->anaLen; i++) { | ||||
|     j = WEBRTC_SPL_LSHIFT_W16(i, 1); | ||||
|     realImag[j] = WEBRTC_SPL_LSHIFT_W16(winData[i], inst->normData); // Q(normData) | ||||
|     realImag[j + 1] = 0; // Insert zeros in imaginary part | ||||
|   } | ||||
|   WebRtcNsx_CreateComplexBuffer(inst, winData, realImag); | ||||
|  | ||||
|   // bit-reverse position of elements in array and FFT the array | ||||
|   WebRtcSpl_ComplexBitReverse(realImag, inst->stages); // Q(normData-stages) | ||||
| @@ -1492,7 +1317,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* | ||||
|       tmpU32no1 += (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(realImag[j + 1], realImag[j + 1]); | ||||
|       inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages)) | ||||
|  | ||||
|       magnU16[i] = (WebRtc_UWord16)WebRtcSpl_Sqrt(tmpU32no1); // Q(normData-stages) | ||||
|       magnU16[i] = (WebRtc_UWord16)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages) | ||||
|       inst->sumMagn += (WebRtc_UWord32)magnU16[i]; // Q(normData-stages) | ||||
|     } | ||||
|   } else { | ||||
| @@ -1541,7 +1366,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* | ||||
|       tmpU32no1 += (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(realImag[j + 1], realImag[j + 1]); | ||||
|       inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages)) | ||||
|  | ||||
|       magnU16[i] = (WebRtc_UWord16)WebRtcSpl_Sqrt(tmpU32no1); // Q(normData-stages) | ||||
|       magnU16[i] = (WebRtc_UWord16)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages) | ||||
|       inst->sumMagn += (WebRtc_UWord32)magnU16[i]; // Q(normData-stages) | ||||
|  | ||||
|       // Switch initMagnEst to Q(minNorm-stages) | ||||
| @@ -1607,8 +1432,8 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* | ||||
|       tmp_1_w32 += WEBRTC_SPL_MUL_16_16_RSFT(kSumLogIndex[65], sum_log_i, 9); | ||||
|       tmp_1_w32 -= WEBRTC_SPL_MUL_16_16_RSFT(kSumLogIndex[65], kSumLogIndex[65], 10); | ||||
|       tmp_1_w32 -= WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)sum_log_i_square, 4); | ||||
|       tmp_1_w32 -= WEBRTC_SPL_MUL_16_16_RSFT( | ||||
|           (WebRtc_Word16)(inst->magnLen - kStartBand), kSumSquareLogIndex[65], 2); | ||||
|       tmp_1_w32 -= WEBRTC_SPL_MUL_16_16_RSFT((WebRtc_Word16) | ||||
|                        (inst->magnLen - kStartBand), kSumSquareLogIndex[65], 2); | ||||
|       matrix_determinant = (WebRtc_Word16)tmp_1_w32; | ||||
|       sum_log_i -= kSumLogIndex[65]; // Q5 | ||||
|       sum_log_i_square -= kSumSquareLogIndex[65]; // Q2 | ||||
| @@ -1684,40 +1509,16 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) { | ||||
|                             inst->blockLen10ms); | ||||
|     return; | ||||
|   } | ||||
|   // Filter the data in the frequency domain | ||||
|   for (i = 0; i < inst->magnLen; i++) { | ||||
|     inst->real[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT( | ||||
|         inst->real[i], (WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) | ||||
|     inst->imag[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT( | ||||
|         inst->imag[i], (WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) | ||||
|   } | ||||
|   // back to time domain | ||||
|   // Create spectrum | ||||
|   realImag[0] = inst->real[0]; | ||||
|   realImag[1] = -inst->imag[0]; | ||||
|   for (i = 1; i < inst->anaLen2; i++) { | ||||
|     j = WEBRTC_SPL_LSHIFT_W16(i, 1); | ||||
|     tmp16no1 = (inst->anaLen << 1) - j; | ||||
|     realImag[j] = inst->real[i]; | ||||
|     realImag[j + 1] = -inst->imag[i]; | ||||
|     realImag[tmp16no1] = inst->real[i]; | ||||
|     realImag[tmp16no1 + 1] = inst->imag[i]; | ||||
|   } | ||||
|   realImag[inst->anaLen] = inst->real[inst->anaLen2]; | ||||
|   realImag[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; | ||||
|  | ||||
|   // Filter the data in the frequency domain, and create spectrum. | ||||
|   WebRtcNsx_PrepareSpectrum(inst, realImag); | ||||
|  | ||||
|   // bit-reverse position of elements in array and IFFT it | ||||
|   WebRtcSpl_ComplexBitReverse(realImag, inst->stages); | ||||
|   outCIFFT = WebRtcSpl_ComplexIFFT(realImag, inst->stages, 1); | ||||
|  | ||||
|   for (i = 0; i < inst->anaLen; i++) { | ||||
|     j = WEBRTC_SPL_LSHIFT_W16(i, 1); | ||||
|     tmp32no1 = WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)realImag[j], | ||||
|                                     outCIFFT - inst->normData); | ||||
|     inst->real[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, | ||||
|                                                   tmp32no1, | ||||
|                                                   WEBRTC_SPL_WORD16_MIN); | ||||
|   } | ||||
|   // Denormalize. | ||||
|   WebRtcNsx_Denormalize(inst, realImag, outCIFFT); | ||||
|  | ||||
|   //scale factor: only do it after END_STARTUP_LONG time | ||||
|   gainFactor = 8192; // 8192 = Q13(1.0) | ||||
| @@ -1754,26 +1555,8 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) { | ||||
|     gainFactor = tmp16no1 + tmp16no2; // Q13 | ||||
|   } // out of flag_gain_map==1 | ||||
|  | ||||
|   // synthesis | ||||
|   for (i = 0; i < inst->anaLen; i++) { | ||||
|     tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(inst->window[i], | ||||
|                                                                    inst->real[i], 14); // Q0, window in Q14 | ||||
|     tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16no1, gainFactor, 13); // Q0 | ||||
|     // Down shift with rounding | ||||
|     tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, tmp32no1, | ||||
|                                              WEBRTC_SPL_WORD16_MIN); // Q0 | ||||
|     inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16(inst->synthesisBuffer[i], tmp16no2); // Q0 | ||||
|   } | ||||
|  | ||||
|   // read out fully processed segment | ||||
|   for (i = 0; i < inst->blockLen10ms; i++) { | ||||
|     outFrame[i] = inst->synthesisBuffer[i]; // Q0 | ||||
|   } | ||||
|   // update synthesis buffer | ||||
|   WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms, | ||||
|                         inst->anaLen - inst->blockLen10ms); | ||||
|   WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms, | ||||
|                           inst->blockLen10ms); | ||||
|   // Synthesis, read out fully processed segment, and update synthesis buffer. | ||||
|   WebRtcNsx_SynthesisUpdate(inst, outFrame, gainFactor); | ||||
| } | ||||
|  | ||||
| int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFrameHB, | ||||
| @@ -1815,6 +1598,12 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram | ||||
|   int flag, sign; | ||||
|   int q_domain_to_use = 0; | ||||
|  | ||||
|   // Code for ARMv7-Neon platform assumes the following: | ||||
|   assert(inst->anaLen % 16 == 0); | ||||
|   assert(inst->anaLen2 % 8 == 0); | ||||
|   assert(inst->blockLen10ms % 16 == 0); | ||||
|   assert(inst->magnLen == inst->anaLen2 + 1); | ||||
|  | ||||
| #ifdef NS_FILEDEBUG | ||||
|   fwrite(spframe, sizeof(short), inst->blockLen10ms, inst->infile); | ||||
| #endif | ||||
| @@ -2080,8 +1869,8 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram | ||||
|       if (WebRtcSpl_NormU32(tmpU32no3) < norm32no1) { | ||||
|         inst->featureSpecDiff = 0x007FFFFF; | ||||
|       } else { | ||||
|         inst->featureSpecDiff = WEBRTC_SPL_MIN( | ||||
|             0x007FFFFF, WEBRTC_SPL_LSHIFT_U32(tmpU32no3, norm32no1)); | ||||
|         inst->featureSpecDiff = WEBRTC_SPL_MIN(0x007FFFFF, | ||||
|             WEBRTC_SPL_LSHIFT_U32(tmpU32no3, norm32no1)); | ||||
|       } | ||||
|     } | ||||
|  | ||||
| @@ -2317,7 +2106,8 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram | ||||
|     } | ||||
|     avgProbSpeechHB = (WebRtc_Word16)(4096 | ||||
|         - WEBRTC_SPL_RSHIFT_U16(tmpU16no1, inst->stages - 7)); // Q12 | ||||
|     avgFilterGainHB = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, inst->stages - 3); // Q14 | ||||
|     avgFilterGainHB = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32( | ||||
|         tmpU32no1, inst->stages - 3); // Q14 | ||||
|  | ||||
|     // // original FLOAT code | ||||
|     // // gain based on speech probability: | ||||
| @@ -2368,3 +2158,264 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram | ||||
|  | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| #if !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)) | ||||
|  | ||||
| // Update the noise estimation information. | ||||
| static void UpdateNoiseEstimate(NsxInst_t* inst, int offset) { | ||||
|   WebRtc_Word32 tmp32no1 = 0; | ||||
|   WebRtc_Word32 tmp32no2 = 0; | ||||
|   WebRtc_Word16 tmp16 = 0; | ||||
|   const WebRtc_Word16 kExp2Const = 11819; // Q13 | ||||
|  | ||||
|   int i = 0; | ||||
|  | ||||
|   tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset, | ||||
|                                    inst->magnLen); | ||||
|   // Guarantee a Q-domain as high as possible and still fit in int16 | ||||
|   inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( | ||||
|                    kExp2Const, tmp16, 21); | ||||
|   for (i = 0; i < inst->magnLen; i++) { | ||||
|     // inst->quantile[i]=exp(inst->lquantile[offset+i]); | ||||
|     // in Q21 | ||||
|     tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const, | ||||
|                                     inst->noiseEstLogQuantile[offset + i]); | ||||
|     tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac | ||||
|     tmp16 = (WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21); | ||||
|     tmp16 -= 21;// shift 21 to get result in Q0 | ||||
|     tmp16 += (WebRtc_Word16) inst->qNoise; //shift to get result in Q(qNoise) | ||||
|     if (tmp16 < 0) { | ||||
|       tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, -tmp16); | ||||
|     } else { | ||||
|       tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, tmp16); | ||||
|     } | ||||
|     inst->noiseEstQuantile[i] = WebRtcSpl_SatW32ToW16(tmp32no1); | ||||
|   } | ||||
| } | ||||
|  | ||||
| // Noise Estimation | ||||
| void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, | ||||
|                                uint16_t* magn, | ||||
|                                uint32_t* noise, | ||||
|                                int16_t* q_noise) { | ||||
|   WebRtc_Word32 numerator = FACTOR_Q16; | ||||
|   WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv; | ||||
|   WebRtc_Word16 countProd, delta, zeros, frac; | ||||
|   WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2; | ||||
|   const int16_t log2_const = 22713; // Q15 | ||||
|   const int16_t width_factor = 21845; | ||||
|  | ||||
|   int i, s, offset; | ||||
|  | ||||
|   tabind = inst->stages - inst->normData; | ||||
|   assert(tabind < 9); | ||||
|   assert(tabind > -9); | ||||
|   if (tabind < 0) { | ||||
|     logval = -WebRtcNsx_kLogTable[-tabind]; | ||||
|   } else { | ||||
|     logval = WebRtcNsx_kLogTable[tabind]; | ||||
|   } | ||||
|  | ||||
|   // lmagn(i)=log(magn(i))=log(2)*log2(magn(i)) | ||||
|   // magn is in Q(-stages), and the real lmagn values are: | ||||
|   // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages) | ||||
|   // lmagn in Q8 | ||||
|   for (i = 0; i < inst->magnLen; i++) { | ||||
|     if (magn[i]) { | ||||
|       zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]); | ||||
|       frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) | ||||
|                               & 0x7FFFFFFF) >> 23); | ||||
|       // log2(magn(i)) | ||||
|       assert(frac < 256); | ||||
|       log2 = (WebRtc_Word16)(((31 - zeros) << 8) | ||||
|                              + WebRtcNsx_kLogTableFrac[frac]); | ||||
|       // log2(magn(i))*log(2) | ||||
|       lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2_const, 15); | ||||
|       // + log(2^stages) | ||||
|       lmagn[i] += logval; | ||||
|     } else { | ||||
|       lmagn[i] = logval;//0; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // loop over simultaneous estimates | ||||
|   for (s = 0; s < SIMULT; s++) { | ||||
|     offset = s * inst->magnLen; | ||||
|  | ||||
|     // Get counter values from state | ||||
|     counter = inst->noiseEstCounter[s]; | ||||
|     assert(counter < 201); | ||||
|     countDiv = WebRtcNsx_kCounterDiv[counter]; | ||||
|     countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv); | ||||
|  | ||||
|     // quant_est(...) | ||||
|     for (i = 0; i < inst->magnLen; i++) { | ||||
|       // compute delta | ||||
|       if (inst->noiseEstDensity[offset + i] > 512) { | ||||
|         delta = WebRtcSpl_DivW32W16ResW16(numerator, | ||||
|                                           inst->noiseEstDensity[offset + i]); | ||||
|       } else { | ||||
|         delta = FACTOR_Q7; | ||||
|         if (inst->blockIndex < END_STARTUP_LONG) { | ||||
|           // Smaller step size during startup. This prevents from using | ||||
|           // unrealistic values causing overflow. | ||||
|           delta = FACTOR_Q7_STARTUP; | ||||
|         } | ||||
|       } | ||||
|  | ||||
|       // update log quantile estimate | ||||
|       tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); | ||||
|       if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) { | ||||
|         // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 | ||||
|         // CounterDiv=1/(inst->counter[s]+1) in Q15 | ||||
|         tmp16 += 2; | ||||
|         tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2); | ||||
|         inst->noiseEstLogQuantile[offset + i] += tmp16no1; | ||||
|       } else { | ||||
|         tmp16 += 1; | ||||
|         tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1); | ||||
|         // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 | ||||
|         tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1); | ||||
|         inst->noiseEstLogQuantile[offset + i] -= tmp16no2; | ||||
|         if (inst->noiseEstLogQuantile[offset + i] < logval) { | ||||
|           // This is the smallest fixed point representation we can | ||||
|           // have, hence we limit the output. | ||||
|           inst->noiseEstLogQuantile[offset + i] = logval; | ||||
|         } | ||||
|       } | ||||
|  | ||||
|       // update density estimate | ||||
|       if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) | ||||
|           < WIDTH_Q8) { | ||||
|         tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( | ||||
|                      inst->noiseEstDensity[offset + i], countProd, 15); | ||||
|         tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( | ||||
|                      width_factor, countDiv, 15); | ||||
|         inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; | ||||
|       } | ||||
|     } // end loop over magnitude spectrum | ||||
|  | ||||
|     if (counter >= END_STARTUP_LONG) { | ||||
|       inst->noiseEstCounter[s] = 0; | ||||
|       if (inst->blockIndex >= END_STARTUP_LONG) { | ||||
|         UpdateNoiseEstimate(inst, offset); | ||||
|       } | ||||
|     } | ||||
|     inst->noiseEstCounter[s]++; | ||||
|  | ||||
|   } // end loop over simultaneous estimates | ||||
|  | ||||
|   // Sequentially update the noise during startup | ||||
|   if (inst->blockIndex < END_STARTUP_LONG) { | ||||
|     UpdateNoiseEstimate(inst, offset); | ||||
|   } | ||||
|  | ||||
|   for (i = 0; i < inst->magnLen; i++) { | ||||
|     noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise) | ||||
|   } | ||||
|   (*q_noise) = (WebRtc_Word16)inst->qNoise; | ||||
| } | ||||
|  | ||||
| // Filter the data in the frequency domain, and create spectrum. | ||||
| void WebRtcNsx_PrepareSpectrum(NsxInst_t* inst, int16_t* freq_buf) { | ||||
|   int i = 0, j = 0; | ||||
|   int16_t tmp16 = 0; | ||||
|  | ||||
|   for (i = 0; i < inst->magnLen; i++) { | ||||
|     inst->real[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(inst->real[i], | ||||
|         (WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) | ||||
|     inst->imag[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(inst->imag[i], | ||||
|         (WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) | ||||
|   } | ||||
|  | ||||
|   freq_buf[0] = inst->real[0]; | ||||
|   freq_buf[1] = -inst->imag[0]; | ||||
|   for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { | ||||
|     tmp16 = (inst->anaLen << 1) - j; | ||||
|     freq_buf[j] = inst->real[i]; | ||||
|     freq_buf[j + 1] = -inst->imag[i]; | ||||
|     freq_buf[tmp16] = inst->real[i]; | ||||
|     freq_buf[tmp16 + 1] = inst->imag[i]; | ||||
|   } | ||||
|   freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; | ||||
|   freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; | ||||
| } | ||||
|  | ||||
| // Denormalize the input buffer. | ||||
| inline void WebRtcNsx_Denormalize(NsxInst_t* inst, int16_t* in, int factor) { | ||||
|   int i = 0, j = 0; | ||||
|   int32_t tmp32 = 0; | ||||
|   for (i = 0, j = 0; i < inst->anaLen; i += 1, j += 2) { | ||||
|     tmp32 = WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)in[j], | ||||
|                                  factor - inst->normData); | ||||
|     inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0 | ||||
|   } | ||||
| } | ||||
|  | ||||
| // For the noise supression process, synthesis, read out fully processed | ||||
| // segment, and update synthesis buffer. | ||||
| void WebRtcNsx_SynthesisUpdate(NsxInst_t* inst, | ||||
|                                int16_t* out_frame, | ||||
|                                int16_t gain_factor) { | ||||
|   int i = 0; | ||||
|   int16_t tmp16a = 0; | ||||
|   int16_t tmp16b = 0; | ||||
|   int32_t tmp32 = 0; | ||||
|  | ||||
|   // synthesis | ||||
|   for (i = 0; i < inst->anaLen; i++) { | ||||
|     tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( | ||||
|                  inst->window[i], inst->real[i], 14); // Q0, window in Q14 | ||||
|     tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13); // Q0 | ||||
|     // Down shift with rounding | ||||
|     tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0 | ||||
|     inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16(inst->synthesisBuffer[i], | ||||
|                                                       tmp16b); // Q0 | ||||
|   } | ||||
|  | ||||
|   // read out fully processed segment | ||||
|   for (i = 0; i < inst->blockLen10ms; i++) { | ||||
|     out_frame[i] = inst->synthesisBuffer[i]; // Q0 | ||||
|   } | ||||
|  | ||||
|   // update synthesis buffer | ||||
|   WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer, | ||||
|                         inst->synthesisBuffer + inst->blockLen10ms, | ||||
|                         inst->anaLen - inst->blockLen10ms); | ||||
|   WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer | ||||
|       + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms); | ||||
| } | ||||
|  | ||||
| // Update analysis buffer for lower band, and window data before FFT. | ||||
| void WebRtcNsx_AnalysisUpdate(NsxInst_t* inst, | ||||
|                               int16_t* out, | ||||
|                               int16_t* new_speech) { | ||||
|   int i = 0; | ||||
|  | ||||
|   // For lower band update analysis buffer. | ||||
|   WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer, | ||||
|                         inst->analysisBuffer + inst->blockLen10ms, | ||||
|                         inst->anaLen - inst->blockLen10ms); | ||||
|   WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer | ||||
|       + inst->anaLen - inst->blockLen10ms, new_speech, inst->blockLen10ms); | ||||
|  | ||||
|   // Window data before FFT. | ||||
|   for (i = 0; i < inst->anaLen; i++) { | ||||
|     out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( | ||||
|                inst->window[i], inst->analysisBuffer[i], 14); // Q0 | ||||
|   } | ||||
| } | ||||
|  | ||||
| // Create a complex number buffer (out[]) as the intput (in[]) interleaved with | ||||
| // zeros, and normalize it. | ||||
| inline void WebRtcNsx_CreateComplexBuffer(NsxInst_t* inst, | ||||
|                                           int16_t* in, | ||||
|                                           int16_t* out) { | ||||
|   int i = 0, j = 0; | ||||
|   for (i = 0, j = 0; i < inst->anaLen; i += 1, j += 2) { | ||||
|     out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData) | ||||
|     out[j + 1] = 0; // Insert zeros in imaginary part | ||||
|   } | ||||
| } | ||||
|  | ||||
| #endif  // !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)) | ||||
|   | ||||
| @@ -133,7 +133,7 @@ WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs); | ||||
|  *      - mode       : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB) | ||||
|  * | ||||
|  * Output: | ||||
|  *      - NS_inst      : Initialized instance | ||||
|  *      - inst       : Initialized instance | ||||
|  * | ||||
|  * Return value      :  0 - Ok | ||||
|  *                     -1 - Error | ||||
| @@ -158,16 +158,47 @@ int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode); | ||||
|  * Return value         :  0 - OK | ||||
|  *                        -1 - Error | ||||
|  */ | ||||
| int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* inFrameLow, short* inFrameHigh, | ||||
|                           short* outFrameLow, short* outFrameHigh); | ||||
| int WebRtcNsx_ProcessCore(NsxInst_t* inst, | ||||
|                           short* inFrameLow, | ||||
|                           short* inFrameHigh, | ||||
|                           short* outFrameLow, | ||||
|                           short* outFrameHigh); | ||||
|  | ||||
| /**************************************************************************** | ||||
|  * Internal functions and variable declarations shared with optimized code. | ||||
|  */ | ||||
| void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t* inst, int offset); | ||||
|  | ||||
| void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise, | ||||
|                                WebRtc_Word16* qNoise); | ||||
| // Noise Estimation. | ||||
| void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, | ||||
|                                uint16_t* magn, | ||||
|                                uint32_t* noise, | ||||
|                                int16_t* q_noise); | ||||
|  | ||||
| // Filter the data in the frequency domain, and create spectrum. | ||||
| void WebRtcNsx_PrepareSpectrum(NsxInst_t* inst, | ||||
|                                int16_t* freq_buff); | ||||
|  | ||||
| // For the noise supression process, synthesis, read out fully processed | ||||
| // segment, and update synthesis buffer. | ||||
| void WebRtcNsx_SynthesisUpdate(NsxInst_t* inst, | ||||
|                                int16_t* out_frame, | ||||
|                                int16_t gain_factor); | ||||
|  | ||||
| // Update analysis buffer for lower band, and window data before FFT. | ||||
| void WebRtcNsx_AnalysisUpdate(NsxInst_t* inst, | ||||
|                               int16_t* out, | ||||
|                               int16_t* new_speech); | ||||
|  | ||||
| // Denormalize the input buffer. | ||||
| inline void WebRtcNsx_Denormalize(NsxInst_t* inst, | ||||
|                                   int16_t* in, | ||||
|                                   int factor); | ||||
|  | ||||
| // Create a complex number buffer, as the intput interleaved with zeros, | ||||
| // and normalize it. | ||||
| inline void WebRtcNsx_CreateComplexBuffer(NsxInst_t* inst, | ||||
|                                           int16_t* in, | ||||
|                                           int16_t* out); | ||||
|  | ||||
| extern const WebRtc_Word16 WebRtcNsx_kLogTable[9]; | ||||
| extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256]; | ||||
|   | ||||
| @@ -15,19 +15,98 @@ | ||||
| #include <arm_neon.h> | ||||
| #include <assert.h> | ||||
|  | ||||
| void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise, | ||||
|                                WebRtc_Word16* qNoise) { | ||||
|   WebRtc_Word32 numerator; | ||||
| // Update the noise estimation information. | ||||
| static void UpdateNoiseEstimate(NsxInst_t* inst, int offset) { | ||||
|   int i = 0; | ||||
|   const int16_t kExp2Const = 11819; // Q13 | ||||
|   int16_t* ptr_noiseEstLogQuantile = NULL; | ||||
|   int16_t* ptr_noiseEstQuantile = NULL; | ||||
|   int16x4_t kExp2Const16x4 = vdup_n_s16(kExp2Const); | ||||
|   int32x4_t twentyOne32x4 = vdupq_n_s32(21); | ||||
|   int32x4_t constA32x4 = vdupq_n_s32(0x1fffff); | ||||
|   int32x4_t constB32x4 = vdupq_n_s32(0x200000); | ||||
|  | ||||
|   WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv, countProd, delta, zeros, frac; | ||||
|   WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2; | ||||
|   WebRtc_Word16 log2Const = 22713; | ||||
|   WebRtc_Word16 widthFactor = 21845; | ||||
|   int16_t tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset, | ||||
|                                         inst->magnLen); | ||||
|  | ||||
|   // Guarantee a Q-domain as high as possible and still fit in int16 | ||||
|   inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(kExp2Const, | ||||
|                                                                  tmp16, | ||||
|                                                                  21); | ||||
|  | ||||
|   int32x4_t qNoise32x4 = vdupq_n_s32(inst->qNoise); | ||||
|  | ||||
|   for (ptr_noiseEstLogQuantile = &inst->noiseEstLogQuantile[offset], | ||||
|        ptr_noiseEstQuantile = &inst->noiseEstQuantile[0]; | ||||
|        ptr_noiseEstQuantile < &inst->noiseEstQuantile[inst->magnLen - 3]; | ||||
|        ptr_noiseEstQuantile += 4, ptr_noiseEstLogQuantile += 4) { | ||||
|  | ||||
|     // tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const, | ||||
|     //                                inst->noiseEstLogQuantile[offset + i]); | ||||
|     int16x4_t v16x4 = vld1_s16(ptr_noiseEstLogQuantile); | ||||
|     int32x4_t v32x4B = vmull_s16(v16x4, kExp2Const16x4); | ||||
|  | ||||
|     // tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac | ||||
|     int32x4_t v32x4A = vandq_s32(v32x4B, constA32x4); | ||||
|     v32x4A = vorrq_s32(v32x4A, constB32x4); | ||||
|  | ||||
|     // tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21); | ||||
|     v32x4B = vshrq_n_s32(v32x4B, 21); | ||||
|  | ||||
|     // tmp16 -= 21;// shift 21 to get result in Q0 | ||||
|     v32x4B = vsubq_s32(v32x4B, twentyOne32x4); | ||||
|  | ||||
|     // tmp16 += (int16_t) inst->qNoise; | ||||
|     // shift to get result in Q(qNoise) | ||||
|     v32x4B = vaddq_s32(v32x4B, qNoise32x4); | ||||
|  | ||||
|     // if (tmp16 < 0) { | ||||
|     //   tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, -tmp16); | ||||
|     // } else { | ||||
|     //   tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, tmp16); | ||||
|     // } | ||||
|     v32x4B = vshlq_s32(v32x4A, v32x4B); | ||||
|  | ||||
|     // tmp16 = WebRtcSpl_SatW32ToW16(tmp32no1); | ||||
|     v16x4 = vqmovn_s32(v32x4B); | ||||
|  | ||||
|     //inst->noiseEstQuantile[i] = tmp16; | ||||
|     vst1_s16(ptr_noiseEstQuantile, v16x4); | ||||
|   } | ||||
|  | ||||
|   // Last iteration: | ||||
|    | ||||
|   // inst->quantile[i]=exp(inst->lquantile[offset+i]); | ||||
|   // in Q21 | ||||
|   int32_t tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const, | ||||
|                                           *ptr_noiseEstLogQuantile); | ||||
|   int32_t tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac | ||||
|  | ||||
|   tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21); | ||||
|   tmp16 -= 21;// shift 21 to get result in Q0 | ||||
|   tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise) | ||||
|   if (tmp16 < 0) { | ||||
|     tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, -tmp16); | ||||
|   } else { | ||||
|     tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, tmp16); | ||||
|   } | ||||
|   *ptr_noiseEstQuantile = WebRtcSpl_SatW32ToW16(tmp32no1); | ||||
| } | ||||
|  | ||||
| // Noise Estimation | ||||
| void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, | ||||
|                                uint16_t* magn, | ||||
|                                uint32_t* noise, | ||||
|                                int16_t* q_noise) { | ||||
|   int32_t numerator = FACTOR_Q16; | ||||
|   int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv; | ||||
|   int16_t countProd, delta, zeros, frac; | ||||
|   int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2; | ||||
|   const int16_t log2_const = 22713; | ||||
|   const int16_t width_factor = 21845; | ||||
|  | ||||
|   int i, s, offset; | ||||
|  | ||||
|   numerator = FACTOR_Q16; | ||||
|  | ||||
|   tabind = inst->stages - inst->normData; | ||||
|   assert(tabind < 9); | ||||
|   assert(tabind > -9); | ||||
| @@ -45,13 +124,15 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo | ||||
|   // lmagn in Q8 | ||||
|   for (i = 0; i < inst->magnLen; i++) { | ||||
|     if (magn[i]) { | ||||
|       zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]); | ||||
|       frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23); | ||||
|       zeros = WebRtcSpl_NormU32((uint32_t)magn[i]); | ||||
|       frac = (int16_t)((((uint32_t)magn[i] << zeros) | ||||
|                               & 0x7FFFFFFF) >> 23); | ||||
|       assert(frac < 256); | ||||
|       // log2(magn(i)) | ||||
|       log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); | ||||
|       log2 = (int16_t)(((31 - zeros) << 8) | ||||
|                              + WebRtcNsx_kLogTableFrac[frac]); | ||||
|       // log2(magn(i))*log(2) | ||||
|       lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15); | ||||
|       lmagn[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2_const, 15); | ||||
|       // + log(2^stages) | ||||
|       lmagn[i] += logval; | ||||
|     } else { | ||||
| @@ -61,9 +142,9 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo | ||||
|  | ||||
|   int16x4_t Q3_16x4  = vdup_n_s16(3); | ||||
|   int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8); | ||||
|   int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(widthFactor); | ||||
|   int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(width_factor); | ||||
|  | ||||
|   WebRtc_Word16 factor = FACTOR_Q7; | ||||
|   int16_t factor = FACTOR_Q7; | ||||
|   if (inst->blockIndex < END_STARTUP_LONG) | ||||
|     factor = FACTOR_Q7_STARTUP; | ||||
|  | ||||
| @@ -75,10 +156,10 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo | ||||
|     counter = inst->noiseEstCounter[s]; | ||||
|     assert(counter < 201); | ||||
|     countDiv = WebRtcNsx_kCounterDiv[counter]; | ||||
|     countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv); | ||||
|     countProd = (int16_t)WEBRTC_SPL_MUL_16_16(counter, countDiv); | ||||
|  | ||||
|     // quant_est(...) | ||||
|     WebRtc_Word16 deltaBuff[8]; | ||||
|     int16_t deltaBuff[8]; | ||||
|     int16x4_t tmp16x4_0; | ||||
|     int16x4_t tmp16x4_1; | ||||
|     int16x4_t countDiv_16x4 = vdup_n_s16(countDiv); | ||||
| @@ -109,7 +190,7 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo | ||||
|  | ||||
|       // Update log quantile estimate | ||||
|  | ||||
|       // tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); | ||||
|       // tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); | ||||
|       tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[0]), countDiv_16x4); | ||||
|       tmp16x4_1 = vshrn_n_s32(tmp32x4, 14); | ||||
|       tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[4]), countDiv_16x4); | ||||
| @@ -142,17 +223,19 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo | ||||
|       tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep | ||||
|       tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0); | ||||
|  | ||||
|       // logval is the smallest fixed point representation we can have. Values below | ||||
|       // that will correspond to values in the interval [0, 1], which can't possibly | ||||
|       // occur. | ||||
|       // logval is the smallest fixed point representation we can have. Values | ||||
|       // below that will correspond to values in the interval [0, 1], which | ||||
|       // can't possibly occur. | ||||
|       tmp16x8_0 = vmaxq_s16(tmp16x8_0, logval_16x8); | ||||
|  | ||||
|       // Do the if-else branches: | ||||
|       tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines | ||||
|       tmp16x8_5 = vsubq_s16(tmp16x8_3, tmp16x8_2); | ||||
|       __asm__("vcgt.s16 %q0, %q1, #0"::"w"(tmp16x8_4), "w"(tmp16x8_5)); | ||||
|       __asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_1), "w"(tmp16x8_4)); | ||||
|       __asm__("vbif %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_0), "w"(tmp16x8_4)); | ||||
|       __asm__("vbit %q0, %q1, %q2":: | ||||
|               "w"(tmp16x8_2), "w"(tmp16x8_1), "w"(tmp16x8_4)); | ||||
|       __asm__("vbif %q0, %q1, %q2":: | ||||
|               "w"(tmp16x8_2), "w"(tmp16x8_0), "w"(tmp16x8_4)); | ||||
|       vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2); | ||||
|  | ||||
|       // Update density estimate | ||||
| @@ -165,11 +248,12 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo | ||||
|       tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2); | ||||
|       tmp16x8_3 = vabsq_s16(tmp16x8_3); | ||||
|       tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3); | ||||
|       __asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_1), "w"(tmp16x8_0), "w"(tmp16x8_4)); | ||||
|       __asm__("vbit %q0, %q1, %q2":: | ||||
|               "w"(tmp16x8_1), "w"(tmp16x8_0), "w"(tmp16x8_4)); | ||||
|       vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1); | ||||
|     } // End loop over magnitude spectrum | ||||
|  | ||||
|     for (; i < inst->magnLen; i++) { | ||||
|     // Last iteration over magnitude spectrum: | ||||
|     // compute delta | ||||
|     if (inst->noiseEstDensity[offset + i] > 512) { | ||||
|       delta = WebRtcSpl_DivW32W16ResW16(numerator, | ||||
| @@ -182,9 +266,8 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo | ||||
|         delta = FACTOR_Q7_STARTUP; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     // update log quantile estimate | ||||
|       tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); | ||||
|     tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); | ||||
|     if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) { | ||||
|       // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 | ||||
|       // CounterDiv=1/(inst->counter[s]+1) in Q15 | ||||
| @@ -195,7 +278,7 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo | ||||
|       tmp16 += 1; | ||||
|       tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1); | ||||
|       // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 | ||||
|         tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1); | ||||
|       tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1); | ||||
|       inst->noiseEstLogQuantile[offset + i] -= tmp16no2; | ||||
|       if (inst->noiseEstLogQuantile[offset + i] < logval) { | ||||
|         // logval is the smallest fixed point representation we can have. | ||||
| @@ -208,18 +291,18 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo | ||||
|     // update density estimate | ||||
|     if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) | ||||
|         < WIDTH_Q8) { | ||||
|         tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( | ||||
|       tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( | ||||
|                    inst->noiseEstDensity[offset + i], countProd, 15); | ||||
|         tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( | ||||
|                      widthFactor, countDiv, 15); | ||||
|       tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( | ||||
|                    width_factor, countDiv, 15); | ||||
|       inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; | ||||
|     } | ||||
|     } // end loop over magnitude spectrum | ||||
|  | ||||
|  | ||||
|     if (counter >= END_STARTUP_LONG) { | ||||
|       inst->noiseEstCounter[s] = 0; | ||||
|       if (inst->blockIndex >= END_STARTUP_LONG) { | ||||
|         WebRtcNsx_UpdateNoiseEstimate(inst, offset); | ||||
|         UpdateNoiseEstimate(inst, offset); | ||||
|       } | ||||
|     } | ||||
|     inst->noiseEstCounter[s]++; | ||||
| @@ -228,13 +311,417 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo | ||||
|  | ||||
|   // Sequentially update the noise during startup | ||||
|   if (inst->blockIndex < END_STARTUP_LONG) { | ||||
|     WebRtcNsx_UpdateNoiseEstimate(inst, offset); | ||||
|     UpdateNoiseEstimate(inst, offset); | ||||
|   } | ||||
|  | ||||
|   for (i = 0; i < inst->magnLen; i++) { | ||||
|     noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise) | ||||
|     noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise) | ||||
|   } | ||||
|   (*qNoise) = (WebRtc_Word16)inst->qNoise; | ||||
|   (*q_noise) = (int16_t)inst->qNoise; | ||||
| } | ||||
|  | ||||
| // Filter the data in the frequency domain, and create spectrum. | ||||
| void WebRtcNsx_PrepareSpectrum(NsxInst_t* inst, int16_t* freq_buf) { | ||||
|  | ||||
|   // (1) Filtering. | ||||
|  | ||||
|   // Fixed point C code for the next block is as follows: | ||||
|   // for (i = 0; i < inst->magnLen; i++) { | ||||
|   //   inst->real[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(inst->real[i], | ||||
|   //      (int16_t)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) | ||||
|   //   inst->imag[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(inst->imag[i], | ||||
|   //      (int16_t)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) | ||||
|   // } | ||||
|  | ||||
|   int16_t* ptr_real = &inst->real[0]; | ||||
|   int16_t* ptr_imag = &inst->imag[0]; | ||||
|   uint16_t* ptr_noiseSupFilter = &inst->noiseSupFilter[0]; | ||||
|  | ||||
|   // Filter the rest in the frequency domain. | ||||
|   for (; ptr_real < &inst->real[inst->magnLen - 1]; ) { | ||||
|     // Loop unrolled once. Both pointers are incremented by 4 twice. | ||||
|     __asm__ __volatile__( | ||||
|       "vld1.16 d20, [%[ptr_real]]\n\t" | ||||
|       "vld1.16 d22, [%[ptr_imag]]\n\t" | ||||
|       "vld1.16 d23, [%[ptr_noiseSupFilter]]!\n\t" | ||||
|       "vmull.s16 q10, d20, d23\n\t" | ||||
|       "vmull.s16 q11, d22, d23\n\t" | ||||
|       "vshrn.s32 d20, q10, #14\n\t" | ||||
|       "vshrn.s32 d22, q11, #14\n\t" | ||||
|       "vst1.16 d20, [%[ptr_real]]!\n\t" | ||||
|       "vst1.16 d22, [%[ptr_imag]]!\n\t" | ||||
|  | ||||
|       "vld1.16 d18, [%[ptr_real]]\n\t" | ||||
|       "vld1.16 d24, [%[ptr_imag]]\n\t" | ||||
|       "vld1.16 d25, [%[ptr_noiseSupFilter]]!\n\t" | ||||
|       "vmull.s16 q9, d18, d25\n\t" | ||||
|       "vmull.s16 q12, d24, d25\n\t" | ||||
|       "vshrn.s32 d18, q9, #14\n\t" | ||||
|       "vshrn.s32 d24, q12, #14\n\t" | ||||
|       "vst1.16 d18, [%[ptr_real]]!\n\t" | ||||
|       "vst1.16 d24, [%[ptr_imag]]!\n\t" | ||||
|  | ||||
|       // Specify constraints. | ||||
|       :[ptr_imag]"+r"(ptr_imag), | ||||
|        [ptr_real]"+r"(ptr_real), | ||||
|        [ptr_noiseSupFilter]"+r"(ptr_noiseSupFilter) | ||||
|       : | ||||
|       :"d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", | ||||
|        "q9", "q10", "q11", "q12" | ||||
|       ); | ||||
|   } | ||||
|  | ||||
|   // Filter the last pair of elements in the frequency domain. | ||||
|   *ptr_real = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(*ptr_real, | ||||
|       (int16_t)(*ptr_noiseSupFilter), 14); // Q(normData-stages) | ||||
|   *ptr_imag = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(*ptr_imag, | ||||
|       (int16_t)(*ptr_noiseSupFilter), 14); // Q(normData-stages) | ||||
|  | ||||
|   // (2) Create spectrum. | ||||
|  | ||||
|   // Fixed point C code for the rest of the function is as follows: | ||||
|   // freq_buf[0] = inst->real[0]; | ||||
|   // freq_buf[1] = -inst->imag[0]; | ||||
|   // for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { | ||||
|   //   tmp16 = (inst->anaLen << 1) - j; | ||||
|   //   freq_buf[j] = inst->real[i]; | ||||
|   //   freq_buf[j + 1] = -inst->imag[i]; | ||||
|   //   freq_buf[tmp16] = inst->real[i]; | ||||
|   //   freq_buf[tmp16 + 1] = inst->imag[i]; | ||||
|   // } | ||||
|   // freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; | ||||
|   // freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; | ||||
|  | ||||
|   freq_buf[0] = inst->real[0]; | ||||
|   freq_buf[1] = -inst->imag[0]; | ||||
|  | ||||
|   int offset = -16; | ||||
|   int16_t* ptr_realImag1 = &freq_buf[2]; | ||||
|   int16_t* ptr_realImag2 = ptr_realImag2 = &freq_buf[(inst->anaLen << 1) - 8]; | ||||
|   ptr_real = &inst->real[1]; | ||||
|   ptr_imag = &inst->imag[1]; | ||||
|   for (; ptr_real < &inst->real[inst->anaLen2 - 11]; ) { | ||||
|     // Loop unrolled once. All pointers are incremented twice. | ||||
|     __asm__ __volatile__( | ||||
|       "vld1.16 d22, [%[ptr_real]]!\n\t" | ||||
|       "vld1.16 d23, [%[ptr_imag]]!\n\t" | ||||
|       // Negate and interleave: | ||||
|       "vmov.s16 d20, d22\n\t" | ||||
|       "vneg.s16 d21, d23\n\t" | ||||
|       "vzip.16 d20, d21\n\t" | ||||
|       // Write 8 elements to &freq_buf[j] | ||||
|       "vst1.16 {d20, d21}, [%[ptr_realImag1]]!\n\t" | ||||
|       // Interleave and reverse elements: | ||||
|       "vzip.16 d22, d23\n\t" | ||||
|       "vrev64.32 d18, d23\n\t" | ||||
|       "vrev64.32 d19, d22\n\t" | ||||
|       // Write 8 elements to &freq_buf[tmp16] | ||||
|       "vst1.16 {d18, d19}, [%[ptr_realImag2]], %[offset]\n\t" | ||||
|  | ||||
|       "vld1.16 d22, [%[ptr_real]]!\n\t" | ||||
|       "vld1.16 d23, [%[ptr_imag]]!\n\t" | ||||
|       // Negate and interleave: | ||||
|       "vmov.s16 d20, d22\n\t" | ||||
|       "vneg.s16 d21, d23\n\t" | ||||
|       "vzip.16 d20, d21\n\t" | ||||
|       // Write 8 elements to &freq_buf[j] | ||||
|       "vst1.16 {d20, d21}, [%[ptr_realImag1]]!\n\t" | ||||
|       // Interleave and reverse elements: | ||||
|       "vzip.16 d22, d23\n\t" | ||||
|       "vrev64.32 d18, d23\n\t" | ||||
|       "vrev64.32 d19, d22\n\t" | ||||
|       // Write 8 elements to &freq_buf[tmp16] | ||||
|       "vst1.16 {d18, d19}, [%[ptr_realImag2]], %[offset]\n\t" | ||||
|  | ||||
|       // Specify constraints. | ||||
|       :[ptr_imag]"+r"(ptr_imag), | ||||
|        [ptr_real]"+r"(ptr_real), | ||||
|        [ptr_realImag1]"+r"(ptr_realImag1), | ||||
|        [ptr_realImag2]"+r"(ptr_realImag2) | ||||
|       :[offset]"r"(offset) | ||||
|       :"d18", "d19", "d20", "d21", "d22", "d23" | ||||
|     ); | ||||
|   } | ||||
|   for (ptr_realImag2 += 6; | ||||
|        ptr_real <= &inst->real[inst->anaLen2]; | ||||
|        ptr_real += 1, ptr_imag += 1, ptr_realImag1 += 2, ptr_realImag2 -= 2) { | ||||
|     *ptr_realImag1 = *ptr_real; | ||||
|     *(ptr_realImag1 + 1) = -(*ptr_imag); | ||||
|     *ptr_realImag2 = *ptr_real; | ||||
|     *(ptr_realImag2 + 1) = *ptr_imag; | ||||
|   } | ||||
|  | ||||
|   freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; | ||||
|   freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; | ||||
| } | ||||
|  | ||||
| // Denormalize the input buffer. | ||||
| inline void WebRtcNsx_Denormalize(NsxInst_t* inst, int16_t* in, int factor) { | ||||
|   int16_t* ptr_real = &inst->real[0]; | ||||
|   int16_t* ptr_in = &in[0]; | ||||
|  | ||||
|   __asm__ __volatile__("vdup.32 q10, %0" :: | ||||
|                        "r"((int32_t)(factor - inst->normData)) : "q10"); | ||||
|   for (; ptr_real < &inst->real[inst->anaLen]; ) { | ||||
|  | ||||
|     // Loop unrolled once. Both pointers are incremented. | ||||
|     __asm__ __volatile__( | ||||
|       // tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[j], | ||||
|       //                             factor - inst->normData); | ||||
|       "vld2.16 {d24, d25}, [%[ptr_in]]!\n\t" | ||||
|       "vmovl.s16 q12, d24\n\t" | ||||
|       "vshl.s32 q12, q10\n\t" | ||||
|       // inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0 | ||||
|       "vqmovn.s32 d24, q12\n\t" | ||||
|       "vst1.16 d24, [%[ptr_real]]!\n\t" | ||||
|  | ||||
|       // tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[j], | ||||
|       //                             factor - inst->normData); | ||||
|       "vld2.16 {d22, d23}, [%[ptr_in]]!\n\t" | ||||
|       "vmovl.s16 q11, d22\n\t" | ||||
|       "vshl.s32 q11, q10\n\t" | ||||
|       // inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0 | ||||
|       "vqmovn.s32 d22, q11\n\t" | ||||
|       "vst1.16 d22, [%[ptr_real]]!\n\t" | ||||
|  | ||||
|       // Specify constraints. | ||||
|       :[ptr_in]"+r"(ptr_in), | ||||
|        [ptr_real]"+r"(ptr_real) | ||||
|       : | ||||
|       :"d22", "d23", "d24", "d25" | ||||
|     ); | ||||
|   } | ||||
| } | ||||
|  | ||||
| // For the noise supress process, synthesis, read out fully processed segment, | ||||
| // and update synthesis buffer. | ||||
| void WebRtcNsx_SynthesisUpdate(NsxInst_t* inst, | ||||
|                                int16_t* out_frame, | ||||
|                                int16_t gain_factor) { | ||||
|   int16_t* ptr_real = &inst->real[0]; | ||||
|   int16_t* ptr_syn = &inst->synthesisBuffer[0]; | ||||
|   int16_t* ptr_window = &inst->window[0]; | ||||
|  | ||||
|   // synthesis | ||||
|   __asm__ __volatile__("vdup.16 d24, %0" : : "r"(gain_factor) : "d24"); | ||||
|   // Loop unrolled once. All pointers are incremented in the assembly code. | ||||
|   for (; ptr_syn < &inst->synthesisBuffer[inst->anaLen]; ) { | ||||
|     __asm__ __volatile__( | ||||
|       // Load variables. | ||||
|       "vld1.16 d22, [%[ptr_real]]!\n\t" | ||||
|       "vld1.16 d23, [%[ptr_window]]!\n\t" | ||||
|       "vld1.16 d25, [%[ptr_syn]]\n\t" | ||||
|       // tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( | ||||
|       //           inst->window[i], inst->real[i], 14); // Q0, window in Q14 | ||||
|       "vmull.s16 q11, d22, d23\n\t" | ||||
|       "vrshrn.i32 d22, q11, #14\n\t" | ||||
|       // tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13); | ||||
|       "vmull.s16 q11, d24, d22\n\t" | ||||
|       // tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0 | ||||
|       "vqrshrn.s32 d22, q11, #13\n\t" | ||||
|       // inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16( | ||||
|       //     inst->synthesisBuffer[i], tmp16b); // Q0 | ||||
|       "vqadd.s16 d25, d22\n\t" | ||||
|       "vst1.16 d25, [%[ptr_syn]]!\n\t" | ||||
|  | ||||
|       // Load variables. | ||||
|       "vld1.16 d26, [%[ptr_real]]!\n\t" | ||||
|       "vld1.16 d27, [%[ptr_window]]!\n\t" | ||||
|       "vld1.16 d28, [%[ptr_syn]]\n\t" | ||||
|       // tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( | ||||
|       //           inst->window[i], inst->real[i], 14); // Q0, window in Q14 | ||||
|       "vmull.s16 q13, d26, d27\n\t" | ||||
|       "vrshrn.i32 d26, q13, #14\n\t" | ||||
|       // tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13); | ||||
|       "vmull.s16 q13, d24, d26\n\t" | ||||
|       // tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0 | ||||
|       "vqrshrn.s32 d26, q13, #13\n\t" | ||||
|       // inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16( | ||||
|       //     inst->synthesisBuffer[i], tmp16b); // Q0 | ||||
|       "vqadd.s16 d28, d26\n\t" | ||||
|       "vst1.16 d28, [%[ptr_syn]]!\n\t" | ||||
|  | ||||
|       // Specify constraints. | ||||
|       :[ptr_real]"+r"(ptr_real), | ||||
|        [ptr_window]"+r"(ptr_window), | ||||
|        [ptr_syn]"+r"(ptr_syn) | ||||
|       : | ||||
|       :"d22", "d23", "d24", "d25", "d26", "d27", "d28", "q11", "q12", "q13" | ||||
|     ); | ||||
|   } | ||||
|  | ||||
|   int16_t* ptr_out = &out_frame[0]; | ||||
|   ptr_syn = &inst->synthesisBuffer[0]; | ||||
|   // read out fully processed segment | ||||
|   for (; ptr_syn < &inst->synthesisBuffer[inst->blockLen10ms]; ) { | ||||
|     // Loop unrolled once. Both pointers are incremented in the assembly code. | ||||
|     __asm__ __volatile__( | ||||
|       // out_frame[i] = inst->synthesisBuffer[i]; // Q0 | ||||
|       "vld1.16 {d22, d23}, [%[ptr_syn]]!\n\t" | ||||
|       "vld1.16 {d24, d25}, [%[ptr_syn]]!\n\t" | ||||
|       "vst1.16 {d22, d23}, [%[ptr_out]]!\n\t" | ||||
|       "vst1.16 {d24, d25}, [%[ptr_out]]!\n\t" | ||||
|       :[ptr_syn]"+r"(ptr_syn), | ||||
|        [ptr_out]"+r"(ptr_out) | ||||
|       : | ||||
|       :"d22", "d23", "d24", "d25" | ||||
|     ); | ||||
|   } | ||||
|  | ||||
|   // Update synthesis buffer. | ||||
|   // C code: | ||||
|   // WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer, | ||||
|   //                      inst->synthesisBuffer + inst->blockLen10ms, | ||||
|   //                      inst->anaLen - inst->blockLen10ms); | ||||
|   ptr_out = &inst->synthesisBuffer[0], | ||||
|   ptr_syn = &inst->synthesisBuffer[inst->blockLen10ms]; | ||||
|   for (; ptr_syn < &inst->synthesisBuffer[inst->anaLen]; ) { | ||||
|     // Loop unrolled once. Both pointers are incremented in the assembly code. | ||||
|     __asm__ __volatile__( | ||||
|       "vld1.16 {d22, d23}, [%[ptr_syn]]!\n\t" | ||||
|       "vld1.16 {d24, d25}, [%[ptr_syn]]!\n\t" | ||||
|       "vst1.16 {d22, d23}, [%[ptr_out]]!\n\t" | ||||
|       "vst1.16 {d24, d25}, [%[ptr_out]]!\n\t" | ||||
|       :[ptr_syn]"+r"(ptr_syn), | ||||
|        [ptr_out]"+r"(ptr_out) | ||||
|       : | ||||
|       :"d22", "d23", "d24", "d25" | ||||
|     ); | ||||
|   } | ||||
|  | ||||
|   // C code: | ||||
|   // WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer | ||||
|   //    + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms); | ||||
|   __asm__ __volatile__("vdup.16 q10, %0" : : "r"(0) : "q10"); | ||||
|   for (; ptr_out < &inst->synthesisBuffer[inst->anaLen]; ) { | ||||
|     // Loop unrolled once. Pointer is incremented in the assembly code. | ||||
|     __asm__ __volatile__( | ||||
|       "vst1.16 {d20, d21}, [%[ptr_out]]!\n\t" | ||||
|       "vst1.16 {d20, d21}, [%[ptr_out]]!\n\t" | ||||
|       :[ptr_out]"+r"(ptr_out) | ||||
|       : | ||||
|       :"d20", "d21" | ||||
|     ); | ||||
|   } | ||||
| } | ||||
|  | ||||
| // Update analysis buffer for lower band, and window data before FFT. | ||||
| void WebRtcNsx_AnalysisUpdate(NsxInst_t* inst, | ||||
|                               int16_t* out, | ||||
|                               int16_t* new_speech) { | ||||
|  | ||||
|   int16_t* ptr_ana = &inst->analysisBuffer[inst->blockLen10ms]; | ||||
|   int16_t* ptr_out = &inst->analysisBuffer[0]; | ||||
|  | ||||
|   // For lower band update analysis buffer. | ||||
|   // WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer, | ||||
|   //                      inst->analysisBuffer + inst->blockLen10ms, | ||||
|   //                      inst->anaLen - inst->blockLen10ms); | ||||
|   for (; ptr_out < &inst->analysisBuffer[inst->anaLen - inst->blockLen10ms]; ) { | ||||
|     // Loop unrolled once, so both pointers are incremented by 8 twice. | ||||
|     __asm__ __volatile__( | ||||
|       "vld1.16 {d20, d21}, [%[ptr_ana]]!\n\t" | ||||
|       "vst1.16 {d20, d21}, [%[ptr_out]]!\n\t" | ||||
|       "vld1.16 {d22, d23}, [%[ptr_ana]]!\n\t" | ||||
|       "vst1.16 {d22, d23}, [%[ptr_out]]!\n\t" | ||||
|       :[ptr_ana]"+r"(ptr_ana), | ||||
|        [ptr_out]"+r"(ptr_out) | ||||
|       : | ||||
|       :"d20", "d21", "d22", "d23" | ||||
|     ); | ||||
|   } | ||||
|  | ||||
|   // WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer | ||||
|   //    + inst->anaLen - inst->blockLen10ms, new_speech, inst->blockLen10ms); | ||||
|   for (ptr_ana = new_speech; ptr_out < &inst->analysisBuffer[inst->anaLen]; ) { | ||||
|     // Loop unrolled once, so both pointers are incremented by 8 twice. | ||||
|     __asm__ __volatile__( | ||||
|       "vld1.16 {d20, d21}, [%[ptr_ana]]!\n\t" | ||||
|       "vst1.16 {d20, d21}, [%[ptr_out]]!\n\t" | ||||
|       "vld1.16 {d22, d23}, [%[ptr_ana]]!\n\t" | ||||
|       "vst1.16 {d22, d23}, [%[ptr_out]]!\n\t" | ||||
|       :[ptr_ana]"+r"(ptr_ana), | ||||
|        [ptr_out]"+r"(ptr_out) | ||||
|       : | ||||
|       :"d20", "d21", "d22", "d23" | ||||
|     ); | ||||
|   } | ||||
|  | ||||
|   // Window data before FFT | ||||
|   int16_t* ptr_window = &inst->window[0]; | ||||
|   ptr_out = &out[0]; | ||||
|   ptr_ana = &inst->analysisBuffer[0]; | ||||
|   for (; ptr_out < &out[inst->anaLen]; ) { | ||||
|  | ||||
|     // Loop unrolled once, so all pointers are incremented by 4 twice. | ||||
|     __asm__ __volatile__( | ||||
|       "vld1.16 d20, [%[ptr_ana]]!\n\t" | ||||
|       "vld1.16 d21, [%[ptr_window]]!\n\t" | ||||
|       // out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( | ||||
|       //           inst->window[i], inst->analysisBuffer[i], 14); // Q0 | ||||
|       "vmull.s16 q10, d20, d21\n\t" | ||||
|       "vrshrn.i32 d20, q10, #14\n\t" | ||||
|       "vst1.16 d20, [%[ptr_out]]!\n\t" | ||||
|  | ||||
|       "vld1.16 d22, [%[ptr_ana]]!\n\t" | ||||
|       "vld1.16 d23, [%[ptr_window]]!\n\t" | ||||
|       // out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( | ||||
|       //           inst->window[i], inst->analysisBuffer[i], 14); // Q0 | ||||
|       "vmull.s16 q11, d22, d23\n\t" | ||||
|       "vrshrn.i32 d22, q11, #14\n\t" | ||||
|       "vst1.16 d22, [%[ptr_out]]!\n\t" | ||||
|  | ||||
|       // Specify constraints. | ||||
|       :[ptr_ana]"+r"(ptr_ana), | ||||
|        [ptr_window]"+r"(ptr_window), | ||||
|        [ptr_out]"+r"(ptr_out) | ||||
|       : | ||||
|       :"d20", "d21", "d22", "d23", "q10", "q11" | ||||
|     ); | ||||
|   } | ||||
| } | ||||
|  | ||||
| // Create a complex number buffer (out[]) as the intput (in[]) interleaved with | ||||
| // zeros, and normalize it. | ||||
| inline void WebRtcNsx_CreateComplexBuffer(NsxInst_t* inst, | ||||
|                                           int16_t* in, | ||||
|                                           int16_t* out) { | ||||
|   int16_t* ptr_out = &out[0]; | ||||
|   int16_t* ptr_in = &in[0]; | ||||
|  | ||||
|   __asm__ __volatile__("vdup.16 d25, %0" : : "r"(0) : "d25"); | ||||
|   __asm__ __volatile__("vdup.16 q10, %0" : : "r"(inst->normData) : "q10"); | ||||
|   for (; ptr_in < &in[inst->anaLen]; ) { | ||||
|  | ||||
|     // Loop unrolled once, so ptr_in is incremented by 8 twice,  | ||||
|     // and ptr_out is incremented by 8 four times. | ||||
|     __asm__ __volatile__( | ||||
|       // out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData) | ||||
|       "vld1.16 {d22, d23}, [%[ptr_in]]!\n\t" | ||||
|       "vshl.s16 q11, q10\n\t" | ||||
|       "vmov d24, d23\n\t" | ||||
|  | ||||
|       // out[j + 1] = 0; // Insert zeros in imaginary part | ||||
|       "vmov d23, d25\n\t" | ||||
|       "vst2.16 {d22, d23}, [%[ptr_out]]!\n\t" | ||||
|       "vst2.16 {d24, d25}, [%[ptr_out]]!\n\t" | ||||
|  | ||||
|       // out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData) | ||||
|       "vld1.16 {d22, d23}, [%[ptr_in]]!\n\t" | ||||
|       "vshl.s16 q11, q10\n\t" | ||||
|       "vmov d24, d23\n\t" | ||||
|  | ||||
|       // out[j + 1] = 0; // Insert zeros in imaginary part | ||||
|       "vmov d23, d25\n\t" | ||||
|       "vst2.16 {d22, d23}, [%[ptr_out]]!\n\t" | ||||
|       "vst2.16 {d24, d25}, [%[ptr_out]]!\n\t" | ||||
|  | ||||
|       // Specify constraints. | ||||
|       :[ptr_in]"+r"(ptr_in), | ||||
|        [ptr_out]"+r"(ptr_out) | ||||
|       : | ||||
|       :"d22", "d23", "d24", "d25", "q10", "q11" | ||||
|     ); | ||||
|   } | ||||
| } | ||||
| #endif // defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 kma@webrtc.org
					kma@webrtc.org