diff --git a/webrtc/modules/audio_processing/aec/aec_core.c b/webrtc/modules/audio_processing/aec/aec_core.c index 7fcdec6a3..9227f10b4 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.c +++ b/webrtc/modules/audio_processing/aec/aec_core.c @@ -36,8 +36,9 @@ static const int subCountLen = 4; static const int countLen = 50; // Quantities to control H band scaling for SWB input -static const int flagHbandCn = 1; // flag for adding comfort noise in H band -static const float cnScaleHband = (float)0.4; // scale for comfort noise in H band +static const int flagHbandCn = 1; // flag for adding comfort noise in H band +static const float cnScaleHband = + (float)0.4; // scale for comfort noise in H band // Initial bin for averaging nlp gain in low band static const int freqAvgIc = PART_LEN / 2; @@ -45,78 +46,68 @@ static const int freqAvgIc = PART_LEN / 2; // win = sqrt(hanning(63)); win = [0 ; win(1:32)]; // fprintf(1, '\t%.14f, %.14f, %.14f,\n', win); static const float sqrtHanning[65] = { - 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, - 0.07356456359967f, 0.09801714032956f, 0.12241067519922f, - 0.14673047445536f, 0.17096188876030f, 0.19509032201613f, - 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, - 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, - 0.35989503653499f, 0.38268343236509f, 0.40524131400499f, - 0.42755509343028f, 0.44961132965461f, 0.47139673682600f, - 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, - 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, - 0.61523159058063f, 0.63439328416365f, 0.65317284295378f, - 0.67155895484702f, 0.68954054473707f, 0.70710678118655f, - 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, - 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, - 0.81758481315158f, 0.83146961230255f, 0.84485356524971f, - 0.85772861000027f, 0.87008699110871f, 0.88192126434835f, - 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, - 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, - 0.94952818059304f, 0.95694033573221f, 0.96377606579544f, - 0.97003125319454f, 0.97570213003853f, 0.98078528040323f, - 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, - 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, - 0.99969881869620f, 1.00000000000000f -}; + 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, + 0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f, + 0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, + 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f, + 0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f, + 0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, + 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f, + 0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f, + 0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, + 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f, + 0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f, + 0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, + 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f, + 0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f, + 0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, + 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f, + 1.00000000000000f}; // Matlab code to produce table: // weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1]; // fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve); const float WebRtcAec_weightCurve[65] = { - 0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f, - 0.1845f, 0.1926f, 0.2000f, 0.2069f, 0.2134f, 0.2195f, - 0.2254f, 0.2309f, 0.2363f, 0.2414f, 0.2464f, 0.2512f, - 0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f, - 0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f, - 0.3035f, 0.3070f, 0.3104f, 0.3138f, 0.3171f, 0.3204f, - 0.3236f, 0.3268f, 0.3299f, 0.3330f, 0.3360f, 0.3390f, - 0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f, - 0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f, - 0.3752f, 0.3777f, 0.3803f, 0.3828f, 0.3854f, 0.3878f, - 0.3903f, 0.3928f, 0.3952f, 0.3976f, 0.4000f -}; + 0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f, 0.1845f, 0.1926f, + 0.2000f, 0.2069f, 0.2134f, 0.2195f, 0.2254f, 0.2309f, 0.2363f, 0.2414f, + 0.2464f, 0.2512f, 0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f, + 0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f, 0.3035f, 0.3070f, + 0.3104f, 0.3138f, 0.3171f, 0.3204f, 0.3236f, 0.3268f, 0.3299f, 0.3330f, + 0.3360f, 0.3390f, 0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f, + 0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f, 0.3752f, 0.3777f, + 0.3803f, 0.3828f, 0.3854f, 0.3878f, 0.3903f, 0.3928f, 0.3952f, 0.3976f, + 0.4000f}; // Matlab code to produce table: // overDriveCurve = [sqrt(linspace(0,1,65))' + 1]; // fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve); const float WebRtcAec_overDriveCurve[65] = { - 1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f, - 1.3062f, 1.3307f, 1.3536f, 1.3750f, 1.3953f, 1.4146f, - 1.4330f, 1.4507f, 1.4677f, 1.4841f, 1.5000f, 1.5154f, - 1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f, - 1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f, - 1.6847f, 1.6960f, 1.7071f, 1.7181f, 1.7289f, 1.7395f, - 1.7500f, 1.7603f, 1.7706f, 1.7806f, 1.7906f, 1.8004f, - 1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f, - 1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f, - 1.9186f, 1.9270f, 1.9354f, 1.9437f, 1.9520f, 1.9601f, - 1.9682f, 1.9763f, 1.9843f, 1.9922f, 2.0000f -}; + 1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f, 1.3062f, 1.3307f, + 1.3536f, 1.3750f, 1.3953f, 1.4146f, 1.4330f, 1.4507f, 1.4677f, 1.4841f, + 1.5000f, 1.5154f, 1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f, + 1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f, 1.6847f, 1.6960f, + 1.7071f, 1.7181f, 1.7289f, 1.7395f, 1.7500f, 1.7603f, 1.7706f, 1.7806f, + 1.7906f, 1.8004f, 1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f, + 1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f, 1.9186f, 1.9270f, + 1.9354f, 1.9437f, 1.9520f, 1.9601f, 1.9682f, 1.9763f, 1.9843f, 1.9922f, + 2.0000f}; // Target suppression levels for nlp modes. // log{0.001, 0.00001, 0.00000001} -static const float kTargetSupp[3] = { -6.9f, -11.5f, -18.4f }; +static const float kTargetSupp[3] = {-6.9f, -11.5f, -18.4f}; // Two sets of parameters, one for the extended filter mode. -static const float kExtendedMinOverDrive[3] = { 3.0f, 6.0f, 15.0f }; -static const float kNormalMinOverDrive[3] = { 1.0f, 2.0f, 5.0f }; -static const float kExtendedSmoothingCoefficients[2][2] = - { { 0.9f, 0.1f }, { 0.92f, 0.08f } }; -static const float kNormalSmoothingCoefficients[2][2] = - { { 0.9f, 0.1f }, { 0.93f, 0.07f } }; +static const float kExtendedMinOverDrive[3] = {3.0f, 6.0f, 15.0f}; +static const float kNormalMinOverDrive[3] = {1.0f, 2.0f, 5.0f}; +static const float kExtendedSmoothingCoefficients[2][2] = {{0.9f, 0.1f}, + {0.92f, 0.08f}}; +static const float kNormalSmoothingCoefficients[2][2] = {{0.9f, 0.1f}, + {0.93f, 0.07f}}; // Number of partitions forming the NLP's "preferred" bands. -enum { kPrefBandSize = 24 }; +enum { + kPrefBandSize = 24 +}; #ifdef WEBRTC_AEC_DEBUG_DUMP extern int webrtc_aec_instance_count; @@ -125,14 +116,16 @@ extern int webrtc_aec_instance_count; // "Private" function prototypes. static void ProcessBlock(AecCore* aec); -static void NonLinearProcessing(AecCore* aec, short *output, short *outputH); +static void NonLinearProcessing(AecCore* aec, short* output, short* outputH); -static void GetHighbandGain(const float *lambda, float *nlpGainHband); +static void GetHighbandGain(const float* lambda, float* nlpGainHband); // Comfort_noise also computes noise for H band returned in comfortNoiseHband -static void ComfortNoise(AecCore* aec, float efw[2][PART_LEN1], - complex_t *comfortNoiseHband, - const float *noisePow, const float *lambda); +static void ComfortNoise(AecCore* aec, + float efw[2][PART_LEN1], + complex_t* comfortNoiseHband, + const float* noisePow, + const float* lambda); static void InitLevel(PowerLevel* level); static void InitStats(Stats* stats); @@ -145,148 +138,137 @@ static void TimeToFrequency(float time_data[PART_LEN2], float freq_data[2][PART_LEN1], int window); -__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) -{ - return aRe * bRe - aIm * bIm; +__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { + return aRe * bRe - aIm * bIm; } -__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) -{ - return aRe * bIm + aIm * bRe; +__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { + return aRe * bIm + aIm * bRe; } -static int CmpFloat(const void *a, const void *b) -{ - const float *da = (const float *)a; - const float *db = (const float *)b; +static int CmpFloat(const void* a, const void* b) { + const float* da = (const float*)a; + const float* db = (const float*)b; - return (*da > *db) - (*da < *db); + return (*da > *db) - (*da < *db); } -int WebRtcAec_CreateAec(AecCore** aecInst) -{ - AecCore* aec = malloc(sizeof(AecCore)); - *aecInst = aec; - if (aec == NULL) { - return -1; - } +int WebRtcAec_CreateAec(AecCore** aecInst) { + AecCore* aec = malloc(sizeof(AecCore)); + *aecInst = aec; + if (aec == NULL) { + return -1; + } - aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, - sizeof(int16_t)); - if (!aec->nearFrBuf) { - WebRtcAec_FreeAec(aec); - aec = NULL; - return -1; - } + aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t)); + if (!aec->nearFrBuf) { + WebRtcAec_FreeAec(aec); + aec = NULL; + return -1; + } - aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, - sizeof(int16_t)); - if (!aec->outFrBuf) { - WebRtcAec_FreeAec(aec); - aec = NULL; - return -1; - } + aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t)); + if (!aec->outFrBuf) { + WebRtcAec_FreeAec(aec); + aec = NULL; + return -1; + } - aec->nearFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, - sizeof(int16_t)); - if (!aec->nearFrBufH) { - WebRtcAec_FreeAec(aec); - aec = NULL; - return -1; - } + aec->nearFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t)); + if (!aec->nearFrBufH) { + WebRtcAec_FreeAec(aec); + aec = NULL; + return -1; + } - aec->outFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, - sizeof(int16_t)); - if (!aec->outFrBufH) { - WebRtcAec_FreeAec(aec); - aec = NULL; - return -1; - } + aec->outFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t)); + if (!aec->outFrBufH) { + WebRtcAec_FreeAec(aec); + aec = NULL; + return -1; + } - // Create far-end buffers. - aec->far_buf = WebRtc_CreateBuffer(kBufSizePartitions, - sizeof(float) * 2 * PART_LEN1); - if (!aec->far_buf) { - WebRtcAec_FreeAec(aec); - aec = NULL; - return -1; - } - aec->far_buf_windowed = WebRtc_CreateBuffer(kBufSizePartitions, - sizeof(float) * 2 * PART_LEN1); - if (!aec->far_buf_windowed) { - WebRtcAec_FreeAec(aec); - aec = NULL; - return -1; - } + // Create far-end buffers. + aec->far_buf = + WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1); + if (!aec->far_buf) { + WebRtcAec_FreeAec(aec); + aec = NULL; + return -1; + } + aec->far_buf_windowed = + WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1); + if (!aec->far_buf_windowed) { + WebRtcAec_FreeAec(aec); + aec = NULL; + return -1; + } #ifdef WEBRTC_AEC_DEBUG_DUMP - aec->far_time_buf = WebRtc_CreateBuffer(kBufSizePartitions, - sizeof(int16_t) * PART_LEN); - if (!aec->far_time_buf) { - WebRtcAec_FreeAec(aec); - aec = NULL; - return -1; - } - { - char filename[64]; - sprintf(filename, "aec_far%d.pcm", webrtc_aec_instance_count); - aec->farFile = fopen(filename, "wb"); - sprintf(filename, "aec_near%d.pcm", webrtc_aec_instance_count); - aec->nearFile = fopen(filename, "wb"); - sprintf(filename, "aec_out%d.pcm", webrtc_aec_instance_count); - aec->outFile = fopen(filename, "wb"); - sprintf(filename, "aec_out_linear%d.pcm", webrtc_aec_instance_count); - aec->outLinearFile = fopen(filename, "wb"); - } + aec->far_time_buf = + WebRtc_CreateBuffer(kBufSizePartitions, sizeof(int16_t) * PART_LEN); + if (!aec->far_time_buf) { + WebRtcAec_FreeAec(aec); + aec = NULL; + return -1; + } + { + char filename[64]; + sprintf(filename, "aec_far%d.pcm", webrtc_aec_instance_count); + aec->farFile = fopen(filename, "wb"); + sprintf(filename, "aec_near%d.pcm", webrtc_aec_instance_count); + aec->nearFile = fopen(filename, "wb"); + sprintf(filename, "aec_out%d.pcm", webrtc_aec_instance_count); + aec->outFile = fopen(filename, "wb"); + sprintf(filename, "aec_out_linear%d.pcm", webrtc_aec_instance_count); + aec->outLinearFile = fopen(filename, "wb"); + } #endif - aec->delay_estimator_farend = - WebRtc_CreateDelayEstimatorFarend(PART_LEN1, kHistorySizeBlocks); - if (aec->delay_estimator_farend == NULL) { - WebRtcAec_FreeAec(aec); - aec = NULL; - return -1; - } - aec->delay_estimator = - WebRtc_CreateDelayEstimator(aec->delay_estimator_farend, - kLookaheadBlocks); - if (aec->delay_estimator == NULL) { - WebRtcAec_FreeAec(aec); - aec = NULL; - return -1; - } + aec->delay_estimator_farend = + WebRtc_CreateDelayEstimatorFarend(PART_LEN1, kHistorySizeBlocks); + if (aec->delay_estimator_farend == NULL) { + WebRtcAec_FreeAec(aec); + aec = NULL; + return -1; + } + aec->delay_estimator = WebRtc_CreateDelayEstimator( + aec->delay_estimator_farend, kLookaheadBlocks); + if (aec->delay_estimator == NULL) { + WebRtcAec_FreeAec(aec); + aec = NULL; + return -1; + } - return 0; + return 0; } -int WebRtcAec_FreeAec(AecCore* aec) -{ - if (aec == NULL) { - return -1; - } +int WebRtcAec_FreeAec(AecCore* aec) { + if (aec == NULL) { + return -1; + } - WebRtc_FreeBuffer(aec->nearFrBuf); - WebRtc_FreeBuffer(aec->outFrBuf); + WebRtc_FreeBuffer(aec->nearFrBuf); + WebRtc_FreeBuffer(aec->outFrBuf); - WebRtc_FreeBuffer(aec->nearFrBufH); - WebRtc_FreeBuffer(aec->outFrBufH); + WebRtc_FreeBuffer(aec->nearFrBufH); + WebRtc_FreeBuffer(aec->outFrBufH); - WebRtc_FreeBuffer(aec->far_buf); - WebRtc_FreeBuffer(aec->far_buf_windowed); + WebRtc_FreeBuffer(aec->far_buf); + WebRtc_FreeBuffer(aec->far_buf_windowed); #ifdef WEBRTC_AEC_DEBUG_DUMP - WebRtc_FreeBuffer(aec->far_time_buf); - fclose(aec->farFile); - fclose(aec->nearFile); - fclose(aec->outFile); - fclose(aec->outLinearFile); + WebRtc_FreeBuffer(aec->far_time_buf); + fclose(aec->farFile); + fclose(aec->nearFile); + fclose(aec->outFile); + fclose(aec->outLinearFile); #endif - WebRtc_FreeDelayEstimator(aec->delay_estimator); - WebRtc_FreeDelayEstimatorFarend(aec->delay_estimator_farend); + WebRtc_FreeDelayEstimator(aec->delay_estimator); + WebRtc_FreeDelayEstimatorFarend(aec->delay_estimator_farend); - free(aec); - return 0; + free(aec); + return 0; } -static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) -{ +static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) { int i; for (i = 0; i < aec->num_partitions; i++) { int j; @@ -294,23 +276,27 @@ static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) int pos = i * PART_LEN1; // Check for wrap if (i + aec->xfBufBlockPos >= aec->num_partitions) { - xPos -= aec->num_partitions*(PART_LEN1); + xPos -= aec->num_partitions * (PART_LEN1); } for (j = 0; j < PART_LEN1; j++) { - yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j], - aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]); - yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j], - aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]); + yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], + aec->xfBuf[1][xPos + j], + aec->wfBuf[0][pos + j], + aec->wfBuf[1][pos + j]); + yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], + aec->xfBuf[1][xPos + j], + aec->wfBuf[0][pos + j], + aec->wfBuf[1][pos + j]); } } } -static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) -{ +static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) { const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; - const float error_threshold = aec->extended_filter_enabled ? - kExtendedErrorThreshold : aec->normal_error_threshold; + const float error_threshold = aec->extended_filter_enabled + ? kExtendedErrorThreshold + : aec->normal_error_threshold; int i; float abs_ef; for (i = 0; i < (PART_LEN1); i++) { @@ -332,7 +318,7 @@ static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) // Time-unconstrined filter adaptation. // TODO(andrew): consider for a low-complexity mode. -//static void FilterAdaptationUnconstrained(AecCore* aec, float *fft, +// static void FilterAdaptationUnconstrained(AecCore* aec, float *fft, // float ef[2][PART_LEN1]) { // int i, j; // for (i = 0; i < aec->num_partitions; i++) { @@ -356,10 +342,10 @@ static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) // } //} -static void FilterAdaptation(AecCore* aec, float *fft, float ef[2][PART_LEN1]) { +static void FilterAdaptation(AecCore* aec, float* fft, float ef[2][PART_LEN1]) { int i, j; for (i = 0; i < aec->num_partitions; i++) { - int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); + int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1); int pos; // Check for wrap if (i + aec->xfBufBlockPos >= aec->num_partitions) { @@ -372,14 +358,17 @@ static void FilterAdaptation(AecCore* aec, float *fft, float ef[2][PART_LEN1]) { fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j], -aec->xfBuf[1][xPos + j], - ef[0][j], ef[1][j]); + ef[0][j], + ef[1][j]); fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j], -aec->xfBuf[1][xPos + j], - ef[0][j], ef[1][j]); + ef[0][j], + ef[1][j]); } fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], -aec->xfBuf[1][xPos + PART_LEN], - ef[0][PART_LEN], ef[1][PART_LEN]); + ef[0][PART_LEN], + ef[1][PART_LEN]); aec_rdft_inverse_128(fft); memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); @@ -403,7 +392,8 @@ static void FilterAdaptation(AecCore* aec, float *fft, float ef[2][PART_LEN1]) { } } -static void OverdriveAndSuppress(AecCore* aec, float hNl[PART_LEN1], +static void OverdriveAndSuppress(AecCore* aec, + float hNl[PART_LEN1], const float hNlFb, float efw[2][PART_LEN1]) { int i; @@ -411,7 +401,7 @@ static void OverdriveAndSuppress(AecCore* aec, float hNl[PART_LEN1], // Weight subbands if (hNl[i] > hNlFb) { hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + - (1 - WebRtcAec_weightCurve[i]) * hNl[i]; + (1 - WebRtcAec_weightCurve[i]) * hNl[i]; } hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); @@ -430,158 +420,153 @@ WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal; WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation; WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress; -int WebRtcAec_InitAec(AecCore* aec, int sampFreq) -{ - int i; +int WebRtcAec_InitAec(AecCore* aec, int sampFreq) { + int i; - aec->sampFreq = sampFreq; + aec->sampFreq = sampFreq; - if (sampFreq == 8000) { - aec->normal_mu = 0.6f; - aec->normal_error_threshold = 2e-6f; - } - else { - aec->normal_mu = 0.5f; - aec->normal_error_threshold = 1.5e-6f; - } + if (sampFreq == 8000) { + aec->normal_mu = 0.6f; + aec->normal_error_threshold = 2e-6f; + } else { + aec->normal_mu = 0.5f; + aec->normal_error_threshold = 1.5e-6f; + } - if (WebRtc_InitBuffer(aec->nearFrBuf) == -1) { - return -1; - } + if (WebRtc_InitBuffer(aec->nearFrBuf) == -1) { + return -1; + } - if (WebRtc_InitBuffer(aec->outFrBuf) == -1) { - return -1; - } + if (WebRtc_InitBuffer(aec->outFrBuf) == -1) { + return -1; + } - if (WebRtc_InitBuffer(aec->nearFrBufH) == -1) { - return -1; - } + if (WebRtc_InitBuffer(aec->nearFrBufH) == -1) { + return -1; + } - if (WebRtc_InitBuffer(aec->outFrBufH) == -1) { - return -1; - } + if (WebRtc_InitBuffer(aec->outFrBufH) == -1) { + return -1; + } - // Initialize far-end buffers. - if (WebRtc_InitBuffer(aec->far_buf) == -1) { - return -1; - } - if (WebRtc_InitBuffer(aec->far_buf_windowed) == -1) { - return -1; - } + // Initialize far-end buffers. + if (WebRtc_InitBuffer(aec->far_buf) == -1) { + return -1; + } + if (WebRtc_InitBuffer(aec->far_buf_windowed) == -1) { + return -1; + } #ifdef WEBRTC_AEC_DEBUG_DUMP - if (WebRtc_InitBuffer(aec->far_time_buf) == -1) { - return -1; - } + if (WebRtc_InitBuffer(aec->far_time_buf) == -1) { + return -1; + } #endif - aec->system_delay = 0; + aec->system_delay = 0; - if (WebRtc_InitDelayEstimatorFarend(aec->delay_estimator_farend) != 0) { - return -1; - } - if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) { - return -1; - } - aec->delay_logging_enabled = 0; - memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram)); + if (WebRtc_InitDelayEstimatorFarend(aec->delay_estimator_farend) != 0) { + return -1; + } + if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) { + return -1; + } + aec->delay_logging_enabled = 0; + memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram)); - aec->extended_filter_enabled = 0; - aec->num_partitions = kNormalNumPartitions; + aec->extended_filter_enabled = 0; + aec->num_partitions = kNormalNumPartitions; - // Default target suppression mode. - aec->nlp_mode = 1; + // Default target suppression mode. + aec->nlp_mode = 1; - // Sampling frequency multiplier - // SWB is processed as 160 frame size - if (aec->sampFreq == 32000) { - aec->mult = (short)aec->sampFreq / 16000; - } - else { - aec->mult = (short)aec->sampFreq / 8000; - } + // Sampling frequency multiplier + // SWB is processed as 160 frame size + if (aec->sampFreq == 32000) { + aec->mult = (short)aec->sampFreq / 16000; + } else { + aec->mult = (short)aec->sampFreq / 8000; + } - aec->farBufWritePos = 0; - aec->farBufReadPos = 0; + aec->farBufWritePos = 0; + aec->farBufReadPos = 0; - aec->inSamples = 0; - aec->outSamples = 0; - aec->knownDelay = 0; + aec->inSamples = 0; + aec->outSamples = 0; + aec->knownDelay = 0; - // Initialize buffers - memset(aec->dBuf, 0, sizeof(aec->dBuf)); - memset(aec->eBuf, 0, sizeof(aec->eBuf)); - // For H band - memset(aec->dBufH, 0, sizeof(aec->dBufH)); + // Initialize buffers + memset(aec->dBuf, 0, sizeof(aec->dBuf)); + memset(aec->eBuf, 0, sizeof(aec->eBuf)); + // For H band + memset(aec->dBufH, 0, sizeof(aec->dBufH)); - memset(aec->xPow, 0, sizeof(aec->xPow)); - memset(aec->dPow, 0, sizeof(aec->dPow)); - memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow)); - aec->noisePow = aec->dInitMinPow; - aec->noiseEstCtr = 0; + memset(aec->xPow, 0, sizeof(aec->xPow)); + memset(aec->dPow, 0, sizeof(aec->dPow)); + memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow)); + aec->noisePow = aec->dInitMinPow; + aec->noiseEstCtr = 0; - // Initial comfort noise power - for (i = 0; i < PART_LEN1; i++) { - aec->dMinPow[i] = 1.0e6f; - } + // Initial comfort noise power + for (i = 0; i < PART_LEN1; i++) { + aec->dMinPow[i] = 1.0e6f; + } - // Holds the last block written to - aec->xfBufBlockPos = 0; - // TODO: Investigate need for these initializations. Deleting them doesn't - // change the output at all and yields 0.4% overall speedup. - memset(aec->xfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * - PART_LEN1); - memset(aec->wfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * - PART_LEN1); - memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1); - memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1); - memset(aec->xfwBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * - PART_LEN1); - memset(aec->se, 0, sizeof(float) * PART_LEN1); + // Holds the last block written to + aec->xfBufBlockPos = 0; + // TODO: Investigate need for these initializations. Deleting them doesn't + // change the output at all and yields 0.4% overall speedup. + memset(aec->xfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); + memset(aec->wfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); + memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1); + memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1); + memset( + aec->xfwBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); + memset(aec->se, 0, sizeof(float) * PART_LEN1); - // To prevent numerical instability in the first block. - for (i = 0; i < PART_LEN1; i++) { - aec->sd[i] = 1; - } - for (i = 0; i < PART_LEN1; i++) { - aec->sx[i] = 1; - } + // To prevent numerical instability in the first block. + for (i = 0; i < PART_LEN1; i++) { + aec->sd[i] = 1; + } + for (i = 0; i < PART_LEN1; i++) { + aec->sx[i] = 1; + } - memset(aec->hNs, 0, sizeof(aec->hNs)); - memset(aec->outBuf, 0, sizeof(float) * PART_LEN); + memset(aec->hNs, 0, sizeof(aec->hNs)); + memset(aec->outBuf, 0, sizeof(float) * PART_LEN); - aec->hNlFbMin = 1; - aec->hNlFbLocalMin = 1; - aec->hNlXdAvgMin = 1; - aec->hNlNewMin = 0; - aec->hNlMinCtr = 0; - aec->overDrive = 2; - aec->overDriveSm = 2; - aec->delayIdx = 0; - aec->stNearState = 0; - aec->echoState = 0; - aec->divergeState = 0; + aec->hNlFbMin = 1; + aec->hNlFbLocalMin = 1; + aec->hNlXdAvgMin = 1; + aec->hNlNewMin = 0; + aec->hNlMinCtr = 0; + aec->overDrive = 2; + aec->overDriveSm = 2; + aec->delayIdx = 0; + aec->stNearState = 0; + aec->echoState = 0; + aec->divergeState = 0; - aec->seed = 777; - aec->delayEstCtr = 0; + aec->seed = 777; + aec->delayEstCtr = 0; - // Metrics disabled by default - aec->metricsMode = 0; - InitMetrics(aec); + // Metrics disabled by default + aec->metricsMode = 0; + InitMetrics(aec); - // Assembly optimization - WebRtcAec_FilterFar = FilterFar; - WebRtcAec_ScaleErrorSignal = ScaleErrorSignal; - WebRtcAec_FilterAdaptation = FilterAdaptation; - WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress; + // Assembly optimization + WebRtcAec_FilterFar = FilterFar; + WebRtcAec_ScaleErrorSignal = ScaleErrorSignal; + WebRtcAec_FilterAdaptation = FilterAdaptation; + WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress; #if defined(WEBRTC_ARCH_X86_FAMILY) - if (WebRtc_GetCPUInfo(kSSE2)) { - WebRtcAec_InitAec_SSE2(); - } + if (WebRtc_GetCPUInfo(kSSE2)) { + WebRtcAec_InitAec_SSE2(); + } #endif - aec_rdft_init(); + aec_rdft_init(); - return 0; + return 0; } void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) { @@ -619,80 +604,80 @@ void WebRtcAec_ProcessFrame(AecCore* aec, int knownDelay, int16_t* out, int16_t* outH) { - int out_elements = 0; + int out_elements = 0; - // For each frame the process is as follows: - // 1) If the system_delay indicates on being too small for processing a - // frame we stuff the buffer with enough data for 10 ms. - // 2) Adjust the buffer to the system delay, by moving the read pointer. - // 3) TODO(bjornv): Investigate if we need to add this: - // If we can't move read pointer due to buffer size limitations we - // flush/stuff the buffer. - // 4) Process as many partitions as possible. - // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN - // samples. Even though we will have data left to process (we work with - // partitions) we consider updating a whole frame, since that's the - // amount of data we input and output in audio_processing. - // 6) Update the outputs. + // For each frame the process is as follows: + // 1) If the system_delay indicates on being too small for processing a + // frame we stuff the buffer with enough data for 10 ms. + // 2) Adjust the buffer to the system delay, by moving the read pointer. + // 3) TODO(bjornv): Investigate if we need to add this: + // If we can't move read pointer due to buffer size limitations we + // flush/stuff the buffer. + // 4) Process as many partitions as possible. + // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN + // samples. Even though we will have data left to process (we work with + // partitions) we consider updating a whole frame, since that's the + // amount of data we input and output in audio_processing. + // 6) Update the outputs. - // TODO(bjornv): Investigate how we should round the delay difference; right - // now we know that incoming |knownDelay| is underestimated when it's less - // than |aec->knownDelay|. We therefore, round (-32) in that direction. In - // the other direction, we don't have this situation, but might flush one - // partition too little. This can cause non-causality, which should be - // investigated. Maybe, allow for a non-symmetric rounding, like -16. - int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN; - int moved_elements = 0; + // TODO(bjornv): Investigate how we should round the delay difference; right + // now we know that incoming |knownDelay| is underestimated when it's less + // than |aec->knownDelay|. We therefore, round (-32) in that direction. In + // the other direction, we don't have this situation, but might flush one + // partition too little. This can cause non-causality, which should be + // investigated. Maybe, allow for a non-symmetric rounding, like -16. + int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN; + int moved_elements = 0; - // TODO(bjornv): Change the near-end buffer handling to be the same as for - // far-end, that is, with a near_pre_buf. - // Buffer the near-end frame. - WebRtc_WriteBuffer(aec->nearFrBuf, nearend, FRAME_LEN); - // For H band - if (aec->sampFreq == 32000) { - WebRtc_WriteBuffer(aec->nearFrBufH, nearendH, FRAME_LEN); - } + // TODO(bjornv): Change the near-end buffer handling to be the same as for + // far-end, that is, with a near_pre_buf. + // Buffer the near-end frame. + WebRtc_WriteBuffer(aec->nearFrBuf, nearend, FRAME_LEN); + // For H band + if (aec->sampFreq == 32000) { + WebRtc_WriteBuffer(aec->nearFrBufH, nearendH, FRAME_LEN); + } - // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we - // have enough far-end data for that by stuffing the buffer if the - // |system_delay| indicates others. - if (aec->system_delay < FRAME_LEN) { - // We don't have enough data so we rewind 10 ms. - WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1)); - } + // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we + // have enough far-end data for that by stuffing the buffer if the + // |system_delay| indicates others. + if (aec->system_delay < FRAME_LEN) { + // We don't have enough data so we rewind 10 ms. + WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1)); + } - // 2) Compensate for a possible change in the system delay. - WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements); - moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements); - aec->knownDelay -= moved_elements * PART_LEN; + // 2) Compensate for a possible change in the system delay. + WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements); + moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements); + aec->knownDelay -= moved_elements * PART_LEN; #ifdef WEBRTC_AEC_DEBUG_DUMP - WebRtc_MoveReadPtr(aec->far_time_buf, move_elements); + WebRtc_MoveReadPtr(aec->far_time_buf, move_elements); #endif - // 4) Process as many blocks as possible. - while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) { - ProcessBlock(aec); - } + // 4) Process as many blocks as possible. + while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) { + ProcessBlock(aec); + } - // 5) Update system delay with respect to the entire frame. - aec->system_delay -= FRAME_LEN; + // 5) Update system delay with respect to the entire frame. + aec->system_delay -= FRAME_LEN; - // 6) Update output frame. - // Stuff the out buffer if we have less than a frame to output. - // This should only happen for the first frame. - out_elements = (int) WebRtc_available_read(aec->outFrBuf); - if (out_elements < FRAME_LEN) { - WebRtc_MoveReadPtr(aec->outFrBuf, out_elements - FRAME_LEN); - if (aec->sampFreq == 32000) { - WebRtc_MoveReadPtr(aec->outFrBufH, out_elements - FRAME_LEN); - } - } - // Obtain an output frame. - WebRtc_ReadBuffer(aec->outFrBuf, NULL, out, FRAME_LEN); - // For H band. + // 6) Update output frame. + // Stuff the out buffer if we have less than a frame to output. + // This should only happen for the first frame. + out_elements = (int)WebRtc_available_read(aec->outFrBuf); + if (out_elements < FRAME_LEN) { + WebRtc_MoveReadPtr(aec->outFrBuf, out_elements - FRAME_LEN); if (aec->sampFreq == 32000) { - WebRtc_ReadBuffer(aec->outFrBufH, NULL, outH, FRAME_LEN); + WebRtc_MoveReadPtr(aec->outFrBufH, out_elements - FRAME_LEN); } + } + // Obtain an output frame. + WebRtc_ReadBuffer(aec->outFrBuf, NULL, out, FRAME_LEN); + // For H band. + if (aec->sampFreq == 32000) { + WebRtc_ReadBuffer(aec->outFrBufH, NULL, outH, FRAME_LEN); + } } int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std) { @@ -739,9 +724,9 @@ int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std) { // Calculate the L1 norm, with median value as central moment. for (i = 0; i < kHistorySizeBlocks; i++) { - l1_norm += (float) (fabs(i - my_median) * self->delay_histogram[i]); + l1_norm += (float)(fabs(i - my_median) * self->delay_histogram[i]); } - *std = (int) (l1_norm / (float) num_delay_values + 0.5f) * kMsPerBlock; + *std = (int)(l1_norm / (float)num_delay_values + 0.5f) * kMsPerBlock; // Reset histogram. memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); @@ -749,11 +734,11 @@ int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std) { return 0; } -int WebRtcAec_echo_state(AecCore* self) { - return self->echoState; -} +int WebRtcAec_echo_state(AecCore* self) { return self->echoState; } -void WebRtcAec_GetEchoStats(AecCore* self, Stats* erl, Stats* erle, +void WebRtcAec_GetEchoStats(AecCore* self, + Stats* erl, + Stats* erle, Stats* a_nlp) { assert(erl != NULL); assert(erle != NULL); @@ -764,12 +749,12 @@ void WebRtcAec_GetEchoStats(AecCore* self, Stats* erl, Stats* erle, } #ifdef WEBRTC_AEC_DEBUG_DUMP -void* WebRtcAec_far_time_buf(AecCore* self) { - return self->far_time_buf; -} +void* WebRtcAec_far_time_buf(AecCore* self) { return self->far_time_buf; } #endif -void WebRtcAec_SetConfigCore(AecCore* self, int nlp_mode, int metrics_mode, +void WebRtcAec_SetConfigCore(AecCore* self, + int nlp_mode, + int metrics_mode, int delay_logging) { assert(nlp_mode >= 0 && nlp_mode < 3); self->nlp_mode = nlp_mode; @@ -792,9 +777,7 @@ int WebRtcAec_delay_correction_enabled(AecCore* self) { return self->extended_filter_enabled; } -int WebRtcAec_system_delay(AecCore* self) { - return self->system_delay; -} +int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; } void WebRtcAec_SetSystemDelay(AecCore* self, int delay) { assert(delay >= 0); @@ -802,665 +785,666 @@ void WebRtcAec_SetSystemDelay(AecCore* self, int delay) { } static void ProcessBlock(AecCore* aec) { - int i; - float d[PART_LEN], y[PART_LEN], e[PART_LEN], dH[PART_LEN]; - float scale; + int i; + float d[PART_LEN], y[PART_LEN], e[PART_LEN], dH[PART_LEN]; + float scale; - float fft[PART_LEN2]; - float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1]; - float df[2][PART_LEN1]; - float far_spectrum = 0.0f; - float near_spectrum = 0.0f; - float abs_far_spectrum[PART_LEN1]; - float abs_near_spectrum[PART_LEN1]; + float fft[PART_LEN2]; + float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1]; + float df[2][PART_LEN1]; + float far_spectrum = 0.0f; + float near_spectrum = 0.0f; + float abs_far_spectrum[PART_LEN1]; + float abs_near_spectrum[PART_LEN1]; - const float gPow[2] = {0.9f, 0.1f}; + const float gPow[2] = {0.9f, 0.1f}; - // Noise estimate constants. - const int noiseInitBlocks = 500 * aec->mult; - const float step = 0.1f; - const float ramp = 1.0002f; - const float gInitNoise[2] = {0.999f, 0.001f}; + // Noise estimate constants. + const int noiseInitBlocks = 500 * aec->mult; + const float step = 0.1f; + const float ramp = 1.0002f; + const float gInitNoise[2] = {0.999f, 0.001f}; - int16_t nearend[PART_LEN]; - int16_t* nearend_ptr = NULL; - int16_t output[PART_LEN]; - int16_t outputH[PART_LEN]; + int16_t nearend[PART_LEN]; + int16_t* nearend_ptr = NULL; + int16_t output[PART_LEN]; + int16_t outputH[PART_LEN]; - float* xf_ptr = NULL; + float* xf_ptr = NULL; - memset(dH, 0, sizeof(dH)); - if (aec->sampFreq == 32000) { - // Get the upper band first so we can reuse |nearend|. - WebRtc_ReadBuffer(aec->nearFrBufH, - (void**) &nearend_ptr, - nearend, - PART_LEN); - for (i = 0; i < PART_LEN; i++) { - dH[i] = (float) (nearend_ptr[i]); - } - memcpy(aec->dBufH + PART_LEN, dH, sizeof(float) * PART_LEN); - } - WebRtc_ReadBuffer(aec->nearFrBuf, (void**) &nearend_ptr, nearend, PART_LEN); - - // ---------- Ooura fft ---------- - // Concatenate old and new nearend blocks. + memset(dH, 0, sizeof(dH)); + if (aec->sampFreq == 32000) { + // Get the upper band first so we can reuse |nearend|. + WebRtc_ReadBuffer(aec->nearFrBufH, (void**)&nearend_ptr, nearend, PART_LEN); for (i = 0; i < PART_LEN; i++) { - d[i] = (float) (nearend_ptr[i]); + dH[i] = (float)(nearend_ptr[i]); } - memcpy(aec->dBuf + PART_LEN, d, sizeof(float) * PART_LEN); + memcpy(aec->dBufH + PART_LEN, dH, sizeof(float) * PART_LEN); + } + WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN); + + // ---------- Ooura fft ---------- + // Concatenate old and new nearend blocks. + for (i = 0; i < PART_LEN; i++) { + d[i] = (float)(nearend_ptr[i]); + } + memcpy(aec->dBuf + PART_LEN, d, sizeof(float) * PART_LEN); #ifdef WEBRTC_AEC_DEBUG_DUMP - { - int16_t farend[PART_LEN]; - int16_t* farend_ptr = NULL; - WebRtc_ReadBuffer(aec->far_time_buf, (void**) &farend_ptr, farend, 1); - (void)fwrite(farend_ptr, sizeof(int16_t), PART_LEN, aec->farFile); - (void)fwrite(nearend_ptr, sizeof(int16_t), PART_LEN, aec->nearFile); - } + { + int16_t farend[PART_LEN]; + int16_t* farend_ptr = NULL; + WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1); + (void)fwrite(farend_ptr, sizeof(int16_t), PART_LEN, aec->farFile); + (void)fwrite(nearend_ptr, sizeof(int16_t), PART_LEN, aec->nearFile); + } #endif - // We should always have at least one element stored in |far_buf|. - assert(WebRtc_available_read(aec->far_buf) > 0); - WebRtc_ReadBuffer(aec->far_buf, (void**) &xf_ptr, &xf[0][0], 1); + // We should always have at least one element stored in |far_buf|. + assert(WebRtc_available_read(aec->far_buf) > 0); + WebRtc_ReadBuffer(aec->far_buf, (void**)&xf_ptr, &xf[0][0], 1); - // Near fft - memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); - TimeToFrequency(fft, df, 0); + // Near fft + memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); + TimeToFrequency(fft, df, 0); - // Power smoothing + // Power smoothing + for (i = 0; i < PART_LEN1; i++) { + far_spectrum = (xf_ptr[i] * xf_ptr[i]) + + (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]); + aec->xPow[i] = + gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum; + // Calculate absolute spectra + abs_far_spectrum[i] = sqrtf(far_spectrum); + + near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i]; + aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum; + // Calculate absolute spectra + abs_near_spectrum[i] = sqrtf(near_spectrum); + } + + // Estimate noise power. Wait until dPow is more stable. + if (aec->noiseEstCtr > 50) { for (i = 0; i < PART_LEN1; i++) { - far_spectrum = (xf_ptr[i] * xf_ptr[i]) + - (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]); - aec->xPow[i] = gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * - far_spectrum; - // Calculate absolute spectra - abs_far_spectrum[i] = sqrtf(far_spectrum); - - near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i]; - aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum; - // Calculate absolute spectra - abs_near_spectrum[i] = sqrtf(near_spectrum); - } - - // Estimate noise power. Wait until dPow is more stable. - if (aec->noiseEstCtr > 50) { - for (i = 0; i < PART_LEN1; i++) { - if (aec->dPow[i] < aec->dMinPow[i]) { - aec->dMinPow[i] = (aec->dPow[i] + step * (aec->dMinPow[i] - - aec->dPow[i])) * ramp; - } - else { - aec->dMinPow[i] *= ramp; - } - } - } - - // Smooth increasing noise power from zero at the start, - // to avoid a sudden burst of comfort noise. - if (aec->noiseEstCtr < noiseInitBlocks) { - aec->noiseEstCtr++; - for (i = 0; i < PART_LEN1; i++) { - if (aec->dMinPow[i] > aec->dInitMinPow[i]) { - aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] + - gInitNoise[1] * aec->dMinPow[i]; - } - else { - aec->dInitMinPow[i] = aec->dMinPow[i]; - } - } - aec->noisePow = aec->dInitMinPow; - } - else { - aec->noisePow = aec->dMinPow; - } - - // Block wise delay estimation used for logging - if (aec->delay_logging_enabled) { - int delay_estimate = 0; - if (WebRtc_AddFarSpectrumFloat(aec->delay_estimator_farend, - abs_far_spectrum, PART_LEN1) == 0) { - delay_estimate = WebRtc_DelayEstimatorProcessFloat(aec->delay_estimator, - abs_near_spectrum, - PART_LEN1); - if (delay_estimate >= 0) { - // Update delay estimate buffer. - aec->delay_histogram[delay_estimate]++; - } + if (aec->dPow[i] < aec->dMinPow[i]) { + aec->dMinPow[i] = + (aec->dPow[i] + step * (aec->dMinPow[i] - aec->dPow[i])) * ramp; + } else { + aec->dMinPow[i] *= ramp; } } + } - // Update the xfBuf block position. - aec->xfBufBlockPos--; - if (aec->xfBufBlockPos == -1) { - aec->xfBufBlockPos = aec->num_partitions - 1; + // Smooth increasing noise power from zero at the start, + // to avoid a sudden burst of comfort noise. + if (aec->noiseEstCtr < noiseInitBlocks) { + aec->noiseEstCtr++; + for (i = 0; i < PART_LEN1; i++) { + if (aec->dMinPow[i] > aec->dInitMinPow[i]) { + aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] + + gInitNoise[1] * aec->dMinPow[i]; + } else { + aec->dInitMinPow[i] = aec->dMinPow[i]; + } } + aec->noisePow = aec->dInitMinPow; + } else { + aec->noisePow = aec->dMinPow; + } - // Buffer xf - memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1, xf_ptr, - sizeof(float) * PART_LEN1); - memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1, &xf_ptr[PART_LEN1], - sizeof(float) * PART_LEN1); - - memset(yf, 0, sizeof(yf)); - - // Filter far - WebRtcAec_FilterFar(aec, yf); - - // Inverse fft to obtain echo estimate and error. - fft[0] = yf[0][0]; - fft[1] = yf[0][PART_LEN]; - for (i = 1; i < PART_LEN; i++) { - fft[2 * i] = yf[0][i]; - fft[2 * i + 1] = yf[1][i]; + // Block wise delay estimation used for logging + if (aec->delay_logging_enabled) { + int delay_estimate = 0; + if (WebRtc_AddFarSpectrumFloat( + aec->delay_estimator_farend, abs_far_spectrum, PART_LEN1) == 0) { + delay_estimate = WebRtc_DelayEstimatorProcessFloat( + aec->delay_estimator, abs_near_spectrum, PART_LEN1); + if (delay_estimate >= 0) { + // Update delay estimate buffer. + aec->delay_histogram[delay_estimate]++; + } } - aec_rdft_inverse_128(fft); + } - scale = 2.0f / PART_LEN2; - for (i = 0; i < PART_LEN; i++) { - y[i] = fft[PART_LEN + i] * scale; // fft scaling - } + // Update the xfBuf block position. + aec->xfBufBlockPos--; + if (aec->xfBufBlockPos == -1) { + aec->xfBufBlockPos = aec->num_partitions - 1; + } - for (i = 0; i < PART_LEN; i++) { - e[i] = d[i] - y[i]; - } + // Buffer xf + memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1, + xf_ptr, + sizeof(float) * PART_LEN1); + memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1, + &xf_ptr[PART_LEN1], + sizeof(float) * PART_LEN1); - // Error fft - memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN); - memset(fft, 0, sizeof(float) * PART_LEN); - memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN); - // TODO(bjornv): Change to use TimeToFrequency(). - aec_rdft_forward_128(fft); + memset(yf, 0, sizeof(yf)); - ef[1][0] = 0; - ef[1][PART_LEN] = 0; - ef[0][0] = fft[0]; - ef[0][PART_LEN] = fft[1]; - for (i = 1; i < PART_LEN; i++) { - ef[0][i] = fft[2 * i]; - ef[1][i] = fft[2 * i + 1]; - } + // Filter far + WebRtcAec_FilterFar(aec, yf); - if (aec->metricsMode == 1) { - // Note that the first PART_LEN samples in fft (before transformation) are - // zero. Hence, the scaling by two in UpdateLevel() should not be - // performed. That scaling is taken care of in UpdateMetrics() instead. - UpdateLevel(&aec->linoutlevel, ef); - } + // Inverse fft to obtain echo estimate and error. + fft[0] = yf[0][0]; + fft[1] = yf[0][PART_LEN]; + for (i = 1; i < PART_LEN; i++) { + fft[2 * i] = yf[0][i]; + fft[2 * i + 1] = yf[1][i]; + } + aec_rdft_inverse_128(fft); - // Scale error signal inversely with far power. - WebRtcAec_ScaleErrorSignal(aec, ef); - WebRtcAec_FilterAdaptation(aec, fft, ef); - NonLinearProcessing(aec, output, outputH); + scale = 2.0f / PART_LEN2; + for (i = 0; i < PART_LEN; i++) { + y[i] = fft[PART_LEN + i] * scale; // fft scaling + } - if (aec->metricsMode == 1) { - // Update power levels and echo metrics - UpdateLevel(&aec->farlevel, (float (*)[PART_LEN1]) xf_ptr); - UpdateLevel(&aec->nearlevel, df); - UpdateMetrics(aec); - } + for (i = 0; i < PART_LEN; i++) { + e[i] = d[i] - y[i]; + } - // Store the output block. - WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN); - // For H band - if (aec->sampFreq == 32000) { - WebRtc_WriteBuffer(aec->outFrBufH, outputH, PART_LEN); - } + // Error fft + memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN); + memset(fft, 0, sizeof(float) * PART_LEN); + memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN); + // TODO(bjornv): Change to use TimeToFrequency(). + aec_rdft_forward_128(fft); + + ef[1][0] = 0; + ef[1][PART_LEN] = 0; + ef[0][0] = fft[0]; + ef[0][PART_LEN] = fft[1]; + for (i = 1; i < PART_LEN; i++) { + ef[0][i] = fft[2 * i]; + ef[1][i] = fft[2 * i + 1]; + } + + if (aec->metricsMode == 1) { + // Note that the first PART_LEN samples in fft (before transformation) are + // zero. Hence, the scaling by two in UpdateLevel() should not be + // performed. That scaling is taken care of in UpdateMetrics() instead. + UpdateLevel(&aec->linoutlevel, ef); + } + + // Scale error signal inversely with far power. + WebRtcAec_ScaleErrorSignal(aec, ef); + WebRtcAec_FilterAdaptation(aec, fft, ef); + NonLinearProcessing(aec, output, outputH); + + if (aec->metricsMode == 1) { + // Update power levels and echo metrics + UpdateLevel(&aec->farlevel, (float(*)[PART_LEN1])xf_ptr); + UpdateLevel(&aec->nearlevel, df); + UpdateMetrics(aec); + } + + // Store the output block. + WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN); + // For H band + if (aec->sampFreq == 32000) { + WebRtc_WriteBuffer(aec->outFrBufH, outputH, PART_LEN); + } #ifdef WEBRTC_AEC_DEBUG_DUMP - { - int16_t eInt16[PART_LEN]; - for (i = 0; i < PART_LEN; i++) { - eInt16[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, e[i], - WEBRTC_SPL_WORD16_MIN); - } - - (void)fwrite(eInt16, sizeof(int16_t), PART_LEN, aec->outLinearFile); - (void)fwrite(output, sizeof(int16_t), PART_LEN, aec->outFile); + { + int16_t eInt16[PART_LEN]; + for (i = 0; i < PART_LEN; i++) { + eInt16[i] = (int16_t)WEBRTC_SPL_SAT( + WEBRTC_SPL_WORD16_MAX, e[i], WEBRTC_SPL_WORD16_MIN); } + + (void)fwrite(eInt16, sizeof(int16_t), PART_LEN, aec->outLinearFile); + (void)fwrite(output, sizeof(int16_t), PART_LEN, aec->outFile); + } #endif } -static void NonLinearProcessing(AecCore* aec, short *output, short *outputH) -{ - float efw[2][PART_LEN1], dfw[2][PART_LEN1], xfw[2][PART_LEN1]; - complex_t comfortNoiseHband[PART_LEN1]; - float fft[PART_LEN2]; - float scale, dtmp; - float nlpGainHband; - int i, j, pos; +static void NonLinearProcessing(AecCore* aec, short* output, short* outputH) { + float efw[2][PART_LEN1], dfw[2][PART_LEN1], xfw[2][PART_LEN1]; + complex_t comfortNoiseHband[PART_LEN1]; + float fft[PART_LEN2]; + float scale, dtmp; + float nlpGainHband; + int i, j, pos; - // Coherence and non-linear filter - float cohde[PART_LEN1], cohxd[PART_LEN1]; - float hNlDeAvg, hNlXdAvg; - float hNl[PART_LEN1]; - float hNlPref[kPrefBandSize]; - float hNlFb = 0, hNlFbLow = 0; - const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f; - const int prefBandSize = kPrefBandSize / aec->mult; - const int minPrefBand = 4 / aec->mult; + // Coherence and non-linear filter + float cohde[PART_LEN1], cohxd[PART_LEN1]; + float hNlDeAvg, hNlXdAvg; + float hNl[PART_LEN1]; + float hNlPref[kPrefBandSize]; + float hNlFb = 0, hNlFbLow = 0; + const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f; + const int prefBandSize = kPrefBandSize / aec->mult; + const int minPrefBand = 4 / aec->mult; - // Near and error power sums - float sdSum = 0, seSum = 0; + // Near and error power sums + float sdSum = 0, seSum = 0; - // Power estimate smoothing coefficients. - const float *ptrGCoh = aec->extended_filter_enabled ? - kExtendedSmoothingCoefficients[aec->mult - 1] : - kNormalSmoothingCoefficients[aec->mult - 1]; - const float* min_overdrive = aec->extended_filter_enabled ? - kExtendedMinOverDrive : kNormalMinOverDrive; + // Power estimate smoothing coefficients. + const float* ptrGCoh = aec->extended_filter_enabled + ? kExtendedSmoothingCoefficients[aec->mult - 1] + : kNormalSmoothingCoefficients[aec->mult - 1]; + const float* min_overdrive = aec->extended_filter_enabled + ? kExtendedMinOverDrive + : kNormalMinOverDrive; - // Filter energy - float wfEnMax = 0, wfEn = 0; - const int delayEstInterval = 10 * aec->mult; + // Filter energy + float wfEnMax = 0, wfEn = 0; + const int delayEstInterval = 10 * aec->mult; - float* xfw_ptr = NULL; + float* xfw_ptr = NULL; - aec->delayEstCtr++; - if (aec->delayEstCtr == delayEstInterval) { - aec->delayEstCtr = 0; + aec->delayEstCtr++; + if (aec->delayEstCtr == delayEstInterval) { + aec->delayEstCtr = 0; + } + + // initialize comfort noise for H band + memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband)); + nlpGainHband = (float)0.0; + dtmp = (float)0.0; + + // Measure energy in each filter partition to determine delay. + // TODO: Spread by computing one partition per block? + if (aec->delayEstCtr == 0) { + wfEnMax = 0; + aec->delayIdx = 0; + for (i = 0; i < aec->num_partitions; i++) { + pos = i * PART_LEN1; + wfEn = 0; + for (j = 0; j < PART_LEN1; j++) { + wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + + aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; + } + + if (wfEn > wfEnMax) { + wfEnMax = wfEn; + aec->delayIdx = i; + } } + } - // initialize comfort noise for H band - memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband)); - nlpGainHband = (float)0.0; - dtmp = (float)0.0; + // We should always have at least one element stored in |far_buf|. + assert(WebRtc_available_read(aec->far_buf_windowed) > 0); + // NLP + WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1); - // Measure energy in each filter partition to determine delay. - // TODO: Spread by computing one partition per block? - if (aec->delayEstCtr == 0) { - wfEnMax = 0; - aec->delayIdx = 0; - for (i = 0; i < aec->num_partitions; i++) { - pos = i * PART_LEN1; - wfEn = 0; - for (j = 0; j < PART_LEN1; j++) { - wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + - aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; - } + // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of + // |xfwBuf|. + // Buffer far. + memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); - if (wfEn > wfEnMax) { - wfEnMax = wfEn; - aec->delayIdx = i; - } - } - } + // Use delayed far. + memcpy(xfw, aec->xfwBuf + aec->delayIdx * PART_LEN1, sizeof(xfw)); - // We should always have at least one element stored in |far_buf|. - assert(WebRtc_available_read(aec->far_buf_windowed) > 0); - // NLP - WebRtc_ReadBuffer(aec->far_buf_windowed, (void**) &xfw_ptr, &xfw[0][0], 1); + // Windowed near fft + for (i = 0; i < PART_LEN; i++) { + fft[i] = aec->dBuf[i] * sqrtHanning[i]; + fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; + } + aec_rdft_forward_128(fft); - // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of - // |xfwBuf|. - // Buffer far. - memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); + dfw[1][0] = 0; + dfw[1][PART_LEN] = 0; + dfw[0][0] = fft[0]; + dfw[0][PART_LEN] = fft[1]; + for (i = 1; i < PART_LEN; i++) { + dfw[0][i] = fft[2 * i]; + dfw[1][i] = fft[2 * i + 1]; + } - // Use delayed far. - memcpy(xfw, aec->xfwBuf + aec->delayIdx * PART_LEN1, sizeof(xfw)); + // Windowed error fft + for (i = 0; i < PART_LEN; i++) { + fft[i] = aec->eBuf[i] * sqrtHanning[i]; + fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; + } + aec_rdft_forward_128(fft); + efw[1][0] = 0; + efw[1][PART_LEN] = 0; + efw[0][0] = fft[0]; + efw[0][PART_LEN] = fft[1]; + for (i = 1; i < PART_LEN; i++) { + efw[0][i] = fft[2 * i]; + efw[1][i] = fft[2 * i + 1]; + } - // Windowed near fft - for (i = 0; i < PART_LEN; i++) { - fft[i] = aec->dBuf[i] * sqrtHanning[i]; - fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; - } - aec_rdft_forward_128(fft); - - dfw[1][0] = 0; - dfw[1][PART_LEN] = 0; - dfw[0][0] = fft[0]; - dfw[0][PART_LEN] = fft[1]; - for (i = 1; i < PART_LEN; i++) { - dfw[0][i] = fft[2 * i]; - dfw[1][i] = fft[2 * i + 1]; - } - - // Windowed error fft - for (i = 0; i < PART_LEN; i++) { - fft[i] = aec->eBuf[i] * sqrtHanning[i]; - fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; - } - aec_rdft_forward_128(fft); - efw[1][0] = 0; - efw[1][PART_LEN] = 0; - efw[0][0] = fft[0]; - efw[0][PART_LEN] = fft[1]; - for (i = 1; i < PART_LEN; i++) { - efw[0][i] = fft[2 * i]; - efw[1][i] = fft[2 * i + 1]; - } - - // Smoothed PSD - for (i = 0; i < PART_LEN1; i++) { - aec->sd[i] = ptrGCoh[0] * aec->sd[i] + ptrGCoh[1] * - (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); - aec->se[i] = ptrGCoh[0] * aec->se[i] + ptrGCoh[1] * - (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); - // We threshold here to protect against the ill-effects of a zero farend. - // The threshold is not arbitrarily chosen, but balances protection and - // adverse interaction with the algorithm's tuning. - // TODO: investigate further why this is so sensitive. - aec->sx[i] = ptrGCoh[0] * aec->sx[i] + ptrGCoh[1] * + // Smoothed PSD + for (i = 0; i < PART_LEN1; i++) { + aec->sd[i] = ptrGCoh[0] * aec->sd[i] + + ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); + aec->se[i] = ptrGCoh[0] * aec->se[i] + + ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); + // We threshold here to protect against the ill-effects of a zero farend. + // The threshold is not arbitrarily chosen, but balances protection and + // adverse interaction with the algorithm's tuning. + // TODO: investigate further why this is so sensitive. + aec->sx[i] = + ptrGCoh[0] * aec->sx[i] + + ptrGCoh[1] * WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], 15); - aec->sde[i][0] = ptrGCoh[0] * aec->sde[i][0] + ptrGCoh[1] * - (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); - aec->sde[i][1] = ptrGCoh[0] * aec->sde[i][1] + ptrGCoh[1] * - (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); + aec->sde[i][0] = + ptrGCoh[0] * aec->sde[i][0] + + ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); + aec->sde[i][1] = + ptrGCoh[0] * aec->sde[i][1] + + ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); - aec->sxd[i][0] = ptrGCoh[0] * aec->sxd[i][0] + ptrGCoh[1] * - (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); - aec->sxd[i][1] = ptrGCoh[0] * aec->sxd[i][1] + ptrGCoh[1] * - (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); + aec->sxd[i][0] = + ptrGCoh[0] * aec->sxd[i][0] + + ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); + aec->sxd[i][1] = + ptrGCoh[0] * aec->sxd[i][1] + + ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); - sdSum += aec->sd[i]; - seSum += aec->se[i]; + sdSum += aec->sd[i]; + seSum += aec->se[i]; + } + + // Divergent filter safeguard. + if (aec->divergeState == 0) { + if (seSum > sdSum) { + aec->divergeState = 1; + } + } else { + if (seSum * 1.05f < sdSum) { + aec->divergeState = 0; + } + } + + if (aec->divergeState == 1) { + memcpy(efw, dfw, sizeof(efw)); + } + + // Reset if error is significantly larger than nearend (13 dB). + if (seSum > (19.95f * sdSum)) { + memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); + } + + // Subband coherence + for (i = 0; i < PART_LEN1; i++) { + cohde[i] = + (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / + (aec->sd[i] * aec->se[i] + 1e-10f); + cohxd[i] = + (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / + (aec->sx[i] * aec->sd[i] + 1e-10f); + } + + hNlXdAvg = 0; + for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { + hNlXdAvg += cohxd[i]; + } + hNlXdAvg /= prefBandSize; + hNlXdAvg = 1 - hNlXdAvg; + + hNlDeAvg = 0; + for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { + hNlDeAvg += cohde[i]; + } + hNlDeAvg /= prefBandSize; + + if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) { + aec->hNlXdAvgMin = hNlXdAvg; + } + + if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) { + aec->stNearState = 1; + } else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) { + aec->stNearState = 0; + } + + if (aec->hNlXdAvgMin == 1) { + aec->echoState = 0; + aec->overDrive = min_overdrive[aec->nlp_mode]; + + if (aec->stNearState == 1) { + memcpy(hNl, cohde, sizeof(hNl)); + hNlFb = hNlDeAvg; + hNlFbLow = hNlDeAvg; + } else { + for (i = 0; i < PART_LEN1; i++) { + hNl[i] = 1 - cohxd[i]; + } + hNlFb = hNlXdAvg; + hNlFbLow = hNlXdAvg; + } + } else { + + if (aec->stNearState == 1) { + aec->echoState = 0; + memcpy(hNl, cohde, sizeof(hNl)); + hNlFb = hNlDeAvg; + hNlFbLow = hNlDeAvg; + } else { + aec->echoState = 1; + for (i = 0; i < PART_LEN1; i++) { + hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]); + } + + // Select an order statistic from the preferred bands. + // TODO: Using quicksort now, but a selection algorithm may be preferred. + memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize); + qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat); + hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))]; + hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))]; + } + } + + // Track the local filter minimum to determine suppression overdrive. + if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) { + aec->hNlFbLocalMin = hNlFbLow; + aec->hNlFbMin = hNlFbLow; + aec->hNlNewMin = 1; + aec->hNlMinCtr = 0; + } + aec->hNlFbLocalMin = + WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1); + aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1); + + if (aec->hNlNewMin == 1) { + aec->hNlMinCtr++; + } + if (aec->hNlMinCtr == 2) { + aec->hNlNewMin = 0; + aec->hNlMinCtr = 0; + aec->overDrive = + WEBRTC_SPL_MAX(kTargetSupp[aec->nlp_mode] / + ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f), + min_overdrive[aec->nlp_mode]); + } + + // Smooth the overdrive. + if (aec->overDrive < aec->overDriveSm) { + aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive; + } else { + aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive; + } + + WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw); + + // Add comfort noise. + ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl); + + // TODO(bjornv): Investigate how to take the windowing below into account if + // needed. + if (aec->metricsMode == 1) { + // Note that we have a scaling by two in the time domain |eBuf|. + // In addition the time domain signal is windowed before transformation, + // losing half the energy on the average. We take care of the first + // scaling only in UpdateMetrics(). + UpdateLevel(&aec->nlpoutlevel, efw); + } + // Inverse error fft. + fft[0] = efw[0][0]; + fft[1] = efw[0][PART_LEN]; + for (i = 1; i < PART_LEN; i++) { + fft[2 * i] = efw[0][i]; + // Sign change required by Ooura fft. + fft[2 * i + 1] = -efw[1][i]; + } + aec_rdft_inverse_128(fft); + + // Overlap and add to obtain output. + scale = 2.0f / PART_LEN2; + for (i = 0; i < PART_LEN; i++) { + fft[i] *= scale; // fft scaling + fft[i] = fft[i] * sqrtHanning[i] + aec->outBuf[i]; + + // Saturation protection + output[i] = (short)WEBRTC_SPL_SAT( + WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN); + + fft[PART_LEN + i] *= scale; // fft scaling + aec->outBuf[i] = fft[PART_LEN + i] * sqrtHanning[PART_LEN - i]; + } + + // For H band + if (aec->sampFreq == 32000) { + + // H band gain + // average nlp over low band: average over second half of freq spectrum + // (4->8khz) + GetHighbandGain(hNl, &nlpGainHband); + + // Inverse comfort_noise + if (flagHbandCn == 1) { + fft[0] = comfortNoiseHband[0][0]; + fft[1] = comfortNoiseHband[PART_LEN][0]; + for (i = 1; i < PART_LEN; i++) { + fft[2 * i] = comfortNoiseHband[i][0]; + fft[2 * i + 1] = comfortNoiseHband[i][1]; + } + aec_rdft_inverse_128(fft); + scale = 2.0f / PART_LEN2; } - // Divergent filter safeguard. - if (aec->divergeState == 0) { - if (seSum > sdSum) { - aec->divergeState = 1; - } - } - else { - if (seSum * 1.05f < sdSum) { - aec->divergeState = 0; - } - } - - if (aec->divergeState == 1) { - memcpy(efw, dfw, sizeof(efw)); - } - - // Reset if error is significantly larger than nearend (13 dB). - if (seSum > (19.95f * sdSum)) { - memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); - } - - // Subband coherence - for (i = 0; i < PART_LEN1; i++) { - cohde[i] = (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / - (aec->sd[i] * aec->se[i] + 1e-10f); - cohxd[i] = (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / - (aec->sx[i] * aec->sd[i] + 1e-10f); - } - - hNlXdAvg = 0; - for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { - hNlXdAvg += cohxd[i]; - } - hNlXdAvg /= prefBandSize; - hNlXdAvg = 1 - hNlXdAvg; - - hNlDeAvg = 0; - for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { - hNlDeAvg += cohde[i]; - } - hNlDeAvg /= prefBandSize; - - if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) { - aec->hNlXdAvgMin = hNlXdAvg; - } - - if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) { - aec->stNearState = 1; - } - else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) { - aec->stNearState = 0; - } - - if (aec->hNlXdAvgMin == 1) { - aec->echoState = 0; - aec->overDrive = min_overdrive[aec->nlp_mode]; - - if (aec->stNearState == 1) { - memcpy(hNl, cohde, sizeof(hNl)); - hNlFb = hNlDeAvg; - hNlFbLow = hNlDeAvg; - } - else { - for (i = 0; i < PART_LEN1; i++) { - hNl[i] = 1 - cohxd[i]; - } - hNlFb = hNlXdAvg; - hNlFbLow = hNlXdAvg; - } - } - else { - - if (aec->stNearState == 1) { - aec->echoState = 0; - memcpy(hNl, cohde, sizeof(hNl)); - hNlFb = hNlDeAvg; - hNlFbLow = hNlDeAvg; - } - else { - aec->echoState = 1; - for (i = 0; i < PART_LEN1; i++) { - hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]); - } - - // Select an order statistic from the preferred bands. - // TODO: Using quicksort now, but a selection algorithm may be preferred. - memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize); - qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat); - hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))]; - hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))]; - } - } - - // Track the local filter minimum to determine suppression overdrive. - if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) { - aec->hNlFbLocalMin = hNlFbLow; - aec->hNlFbMin = hNlFbLow; - aec->hNlNewMin = 1; - aec->hNlMinCtr = 0; - } - aec->hNlFbLocalMin = WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1); - aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1); - - if (aec->hNlNewMin == 1) { - aec->hNlMinCtr++; - } - if (aec->hNlMinCtr == 2) { - aec->hNlNewMin = 0; - aec->hNlMinCtr = 0; - aec->overDrive = WEBRTC_SPL_MAX(kTargetSupp[aec->nlp_mode] / - ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f), - min_overdrive[aec->nlp_mode]); - } - - // Smooth the overdrive. - if (aec->overDrive < aec->overDriveSm) { - aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive; - } - else { - aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive; - } - - WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw); - - // Add comfort noise. - ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl); - - // TODO(bjornv): Investigate how to take the windowing below into account if - // needed. - if (aec->metricsMode == 1) { - // Note that we have a scaling by two in the time domain |eBuf|. - // In addition the time domain signal is windowed before transformation, - // losing half the energy on the average. We take care of the first - // scaling only in UpdateMetrics(). - UpdateLevel(&aec->nlpoutlevel, efw); - } - // Inverse error fft. - fft[0] = efw[0][0]; - fft[1] = efw[0][PART_LEN]; - for (i = 1; i < PART_LEN; i++) { - fft[2*i] = efw[0][i]; - // Sign change required by Ooura fft. - fft[2*i + 1] = -efw[1][i]; - } - aec_rdft_inverse_128(fft); - - // Overlap and add to obtain output. - scale = 2.0f / PART_LEN2; + // compute gain factor for (i = 0; i < PART_LEN; i++) { - fft[i] *= scale; // fft scaling - fft[i] = fft[i]*sqrtHanning[i] + aec->outBuf[i]; + dtmp = (float)aec->dBufH[i]; + dtmp = (float)dtmp * nlpGainHband; // for variable gain - // Saturation protection - output[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fft[i], - WEBRTC_SPL_WORD16_MIN); + // add some comfort noise where Hband is attenuated + if (flagHbandCn == 1) { + fft[i] *= scale; // fft scaling + dtmp += cnScaleHband * fft[i]; + } - fft[PART_LEN + i] *= scale; // fft scaling - aec->outBuf[i] = fft[PART_LEN + i] * sqrtHanning[PART_LEN - i]; + // Saturation protection + outputH[i] = (short)WEBRTC_SPL_SAT( + WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN); } + } - // For H band - if (aec->sampFreq == 32000) { + // Copy the current block to the old position. + memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN); + memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN); - // H band gain - // average nlp over low band: average over second half of freq spectrum - // (4->8khz) - GetHighbandGain(hNl, &nlpGainHband); + // Copy the current block to the old position for H band + if (aec->sampFreq == 32000) { + memcpy(aec->dBufH, aec->dBufH + PART_LEN, sizeof(float) * PART_LEN); + } - // Inverse comfort_noise - if (flagHbandCn == 1) { - fft[0] = comfortNoiseHband[0][0]; - fft[1] = comfortNoiseHband[PART_LEN][0]; - for (i = 1; i < PART_LEN; i++) { - fft[2*i] = comfortNoiseHband[i][0]; - fft[2*i + 1] = comfortNoiseHband[i][1]; - } - aec_rdft_inverse_128(fft); - scale = 2.0f / PART_LEN2; - } - - // compute gain factor - for (i = 0; i < PART_LEN; i++) { - dtmp = (float)aec->dBufH[i]; - dtmp = (float)dtmp * nlpGainHband; // for variable gain - - // add some comfort noise where Hband is attenuated - if (flagHbandCn == 1) { - fft[i] *= scale; // fft scaling - dtmp += cnScaleHband * fft[i]; - } - - // Saturation protection - outputH[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, dtmp, - WEBRTC_SPL_WORD16_MIN); - } - } - - // Copy the current block to the old position. - memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN); - memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN); - - // Copy the current block to the old position for H band - if (aec->sampFreq == 32000) { - memcpy(aec->dBufH, aec->dBufH + PART_LEN, sizeof(float) * PART_LEN); - } - - memmove(aec->xfwBuf + PART_LEN1, aec->xfwBuf, sizeof(aec->xfwBuf) - - sizeof(complex_t) * PART_LEN1); + memmove(aec->xfwBuf + PART_LEN1, + aec->xfwBuf, + sizeof(aec->xfwBuf) - sizeof(complex_t) * PART_LEN1); } -static void GetHighbandGain(const float *lambda, float *nlpGainHband) -{ - int i; +static void GetHighbandGain(const float* lambda, float* nlpGainHband) { + int i; - nlpGainHband[0] = (float)0.0; - for (i = freqAvgIc; i < PART_LEN1 - 1; i++) { - nlpGainHband[0] += lambda[i]; - } - nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc); + nlpGainHband[0] = (float)0.0; + for (i = freqAvgIc; i < PART_LEN1 - 1; i++) { + nlpGainHband[0] += lambda[i]; + } + nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc); } -static void ComfortNoise(AecCore* aec, float efw[2][PART_LEN1], - complex_t *comfortNoiseHband, const float *noisePow, const float *lambda) -{ - int i, num; - float rand[PART_LEN]; - float noise, noiseAvg, tmp, tmpAvg; - int16_t randW16[PART_LEN]; - complex_t u[PART_LEN1]; +static void ComfortNoise(AecCore* aec, + float efw[2][PART_LEN1], + complex_t* comfortNoiseHband, + const float* noisePow, + const float* lambda) { + int i, num; + float rand[PART_LEN]; + float noise, noiseAvg, tmp, tmpAvg; + int16_t randW16[PART_LEN]; + complex_t u[PART_LEN1]; - const float pi2 = 6.28318530717959f; + const float pi2 = 6.28318530717959f; - // Generate a uniform random array on [0 1] - WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); - for (i = 0; i < PART_LEN; i++) { - rand[i] = ((float)randW16[i]) / 32768; + // Generate a uniform random array on [0 1] + WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); + for (i = 0; i < PART_LEN; i++) { + rand[i] = ((float)randW16[i]) / 32768; + } + + // Reject LF noise + u[0][0] = 0; + u[0][1] = 0; + for (i = 1; i < PART_LEN1; i++) { + tmp = pi2 * rand[i - 1]; + + noise = sqrtf(noisePow[i]); + u[i][0] = noise * cosf(tmp); + u[i][1] = -noise * sinf(tmp); + } + u[PART_LEN][1] = 0; + + for (i = 0; i < PART_LEN1; i++) { + // This is the proper weighting to match the background noise power + tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); + // tmp = 1 - lambda[i]; + efw[0][i] += tmp * u[i][0]; + efw[1][i] += tmp * u[i][1]; + } + + // For H band comfort noise + // TODO: don't compute noise and "tmp" twice. Use the previous results. + noiseAvg = 0.0; + tmpAvg = 0.0; + num = 0; + if (aec->sampFreq == 32000 && flagHbandCn == 1) { + + // average noise scale + // average over second half of freq spectrum (i.e., 4->8khz) + // TODO: we shouldn't need num. We know how many elements we're summing. + for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { + num++; + noiseAvg += sqrtf(noisePow[i]); } + noiseAvg /= (float)num; + // average nlp scale + // average over second half of freq spectrum (i.e., 4->8khz) + // TODO: we shouldn't need num. We know how many elements we're summing. + num = 0; + for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { + num++; + tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); + } + tmpAvg /= (float)num; + + // Use average noise for H band + // TODO: we should probably have a new random vector here. // Reject LF noise u[0][0] = 0; u[0][1] = 0; for (i = 1; i < PART_LEN1; i++) { - tmp = pi2 * rand[i - 1]; + tmp = pi2 * rand[i - 1]; - noise = sqrtf(noisePow[i]); - u[i][0] = noise * cosf(tmp); - u[i][1] = -noise * sinf(tmp); + // Use average noise for H band + u[i][0] = noiseAvg * (float)cos(tmp); + u[i][1] = -noiseAvg * (float)sin(tmp); } u[PART_LEN][1] = 0; for (i = 0; i < PART_LEN1; i++) { - // This is the proper weighting to match the background noise power - tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); - //tmp = 1 - lambda[i]; - efw[0][i] += tmp * u[i][0]; - efw[1][i] += tmp * u[i][1]; - } - - // For H band comfort noise - // TODO: don't compute noise and "tmp" twice. Use the previous results. - noiseAvg = 0.0; - tmpAvg = 0.0; - num = 0; - if (aec->sampFreq == 32000 && flagHbandCn == 1) { - - // average noise scale - // average over second half of freq spectrum (i.e., 4->8khz) - // TODO: we shouldn't need num. We know how many elements we're summing. - for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { - num++; - noiseAvg += sqrtf(noisePow[i]); - } - noiseAvg /= (float)num; - - // average nlp scale - // average over second half of freq spectrum (i.e., 4->8khz) - // TODO: we shouldn't need num. We know how many elements we're summing. - num = 0; - for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { - num++; - tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); - } - tmpAvg /= (float)num; - - // Use average noise for H band - // TODO: we should probably have a new random vector here. - // Reject LF noise - u[0][0] = 0; - u[0][1] = 0; - for (i = 1; i < PART_LEN1; i++) { - tmp = pi2 * rand[i - 1]; - - // Use average noise for H band - u[i][0] = noiseAvg * (float)cos(tmp); - u[i][1] = -noiseAvg * (float)sin(tmp); - } - u[PART_LEN][1] = 0; - - for (i = 0; i < PART_LEN1; i++) { - // Use average NLP weight for H band - comfortNoiseHband[i][0] = tmpAvg * u[i][0]; - comfortNoiseHband[i][1] = tmpAvg * u[i][1]; - } + // Use average NLP weight for H band + comfortNoiseHband[i][0] = tmpAvg * u[i][0]; + comfortNoiseHband[i][1] = tmpAvg * u[i][1]; } + } } static void InitLevel(PowerLevel* level) { @@ -1559,130 +1543,132 @@ static void UpdateLevel(PowerLevel* level, float in[2][PART_LEN1]) { } } -static void UpdateMetrics(AecCore* aec) -{ - float dtmp, dtmp2; +static void UpdateMetrics(AecCore* aec) { + float dtmp, dtmp2; - const float actThresholdNoisy = 8.0f; - const float actThresholdClean = 40.0f; - const float safety = 0.99995f; - const float noisyPower = 300000.0f; + const float actThresholdNoisy = 8.0f; + const float actThresholdClean = 40.0f; + const float safety = 0.99995f; + const float noisyPower = 300000.0f; - float actThreshold; - float echo, suppressedEcho; + float actThreshold; + float echo, suppressedEcho; - if (aec->echoState) { // Check if echo is likely present - aec->stateCounter++; + if (aec->echoState) { // Check if echo is likely present + aec->stateCounter++; + } + + if (aec->farlevel.frcounter == 0) { + + if (aec->farlevel.minlevel < noisyPower) { + actThreshold = actThresholdClean; + } else { + actThreshold = actThresholdNoisy; } - if (aec->farlevel.frcounter == 0) { + if ((aec->stateCounter > (0.5f * countLen * subCountLen)) && + (aec->farlevel.sfrcounter == 0) - if (aec->farlevel.minlevel < noisyPower) { - actThreshold = actThresholdClean; - } - else { - actThreshold = actThresholdNoisy; - } + // Estimate in active far-end segments only + && + (aec->farlevel.averagelevel > + (actThreshold * aec->farlevel.minlevel))) { - if ((aec->stateCounter > (0.5f * countLen * subCountLen)) - && (aec->farlevel.sfrcounter == 0) + // Subtract noise power + echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel; - // Estimate in active far-end segments only - && (aec->farlevel.averagelevel > (actThreshold * aec->farlevel.minlevel)) - ) { + // ERL + dtmp = 10 * (float)log10(aec->farlevel.averagelevel / + aec->nearlevel.averagelevel + + 1e-10f); + dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f); - // Subtract noise power - echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel; + aec->erl.instant = dtmp; + if (dtmp > aec->erl.max) { + aec->erl.max = dtmp; + } - // ERL - dtmp = 10 * (float)log10(aec->farlevel.averagelevel / - aec->nearlevel.averagelevel + 1e-10f); - dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f); + if (dtmp < aec->erl.min) { + aec->erl.min = dtmp; + } - aec->erl.instant = dtmp; - if (dtmp > aec->erl.max) { - aec->erl.max = dtmp; - } + aec->erl.counter++; + aec->erl.sum += dtmp; + aec->erl.average = aec->erl.sum / aec->erl.counter; - if (dtmp < aec->erl.min) { - aec->erl.min = dtmp; - } + // Upper mean + if (dtmp > aec->erl.average) { + aec->erl.hicounter++; + aec->erl.hisum += dtmp; + aec->erl.himean = aec->erl.hisum / aec->erl.hicounter; + } - aec->erl.counter++; - aec->erl.sum += dtmp; - aec->erl.average = aec->erl.sum / aec->erl.counter; + // A_NLP + dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / + (2 * aec->linoutlevel.averagelevel) + + 1e-10f); - // Upper mean - if (dtmp > aec->erl.average) { - aec->erl.hicounter++; - aec->erl.hisum += dtmp; - aec->erl.himean = aec->erl.hisum / aec->erl.hicounter; - } + // subtract noise power + suppressedEcho = 2 * (aec->linoutlevel.averagelevel - + safety * aec->linoutlevel.minlevel); - // A_NLP - dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / - (2 * aec->linoutlevel.averagelevel) + 1e-10f); + dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); - // subtract noise power - suppressedEcho = 2 * (aec->linoutlevel.averagelevel - - safety * aec->linoutlevel.minlevel); + aec->aNlp.instant = dtmp2; + if (dtmp > aec->aNlp.max) { + aec->aNlp.max = dtmp; + } - dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); + if (dtmp < aec->aNlp.min) { + aec->aNlp.min = dtmp; + } - aec->aNlp.instant = dtmp2; - if (dtmp > aec->aNlp.max) { - aec->aNlp.max = dtmp; - } + aec->aNlp.counter++; + aec->aNlp.sum += dtmp; + aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter; - if (dtmp < aec->aNlp.min) { - aec->aNlp.min = dtmp; - } + // Upper mean + if (dtmp > aec->aNlp.average) { + aec->aNlp.hicounter++; + aec->aNlp.hisum += dtmp; + aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter; + } - aec->aNlp.counter++; - aec->aNlp.sum += dtmp; - aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter; + // ERLE - // Upper mean - if (dtmp > aec->aNlp.average) { - aec->aNlp.hicounter++; - aec->aNlp.hisum += dtmp; - aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter; - } + // subtract noise power + suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel - + safety * aec->nlpoutlevel.minlevel); - // ERLE + dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / + (2 * aec->nlpoutlevel.averagelevel) + + 1e-10f); + dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); - // subtract noise power - suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel - - safety * aec->nlpoutlevel.minlevel); + dtmp = dtmp2; + aec->erle.instant = dtmp; + if (dtmp > aec->erle.max) { + aec->erle.max = dtmp; + } - dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / - (2 * aec->nlpoutlevel.averagelevel) + 1e-10f); - dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); + if (dtmp < aec->erle.min) { + aec->erle.min = dtmp; + } - dtmp = dtmp2; - aec->erle.instant = dtmp; - if (dtmp > aec->erle.max) { - aec->erle.max = dtmp; - } + aec->erle.counter++; + aec->erle.sum += dtmp; + aec->erle.average = aec->erle.sum / aec->erle.counter; - if (dtmp < aec->erle.min) { - aec->erle.min = dtmp; - } - - aec->erle.counter++; - aec->erle.sum += dtmp; - aec->erle.average = aec->erle.sum / aec->erle.counter; - - // Upper mean - if (dtmp > aec->erle.average) { - aec->erle.hicounter++; - aec->erle.hisum += dtmp; - aec->erle.himean = aec->erle.hisum / aec->erle.hicounter; - } - } - - aec->stateCounter = 0; + // Upper mean + if (dtmp > aec->erle.average) { + aec->erle.hicounter++; + aec->erle.hisum += dtmp; + aec->erle.himean = aec->erle.hisum / aec->erle.hicounter; + } } + + aec->stateCounter = 0; + } } static void TimeToFrequency(float time_data[PART_LEN2], @@ -1709,4 +1695,3 @@ static void TimeToFrequency(float time_data[PART_LEN2], freq_data[1][i] = time_data[2 * i + 1]; } } - diff --git a/webrtc/modules/audio_processing/aec/aec_core.h b/webrtc/modules/audio_processing/aec/aec_core.h index f83c37c8c..d3c6d7e2b 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.h +++ b/webrtc/modules/audio_processing/aec/aec_core.h @@ -18,14 +18,20 @@ #include "webrtc/typedefs.h" #define FRAME_LEN 80 -#define PART_LEN 64 // Length of partition +#define PART_LEN 64 // Length of partition #define PART_LEN1 (PART_LEN + 1) // Unique fft coefficients #define PART_LEN2 (PART_LEN * 2) // Length of partition * 2 // Delay estimator constants, used for logging. -enum { kMaxDelayBlocks = 60 }; -enum { kLookaheadBlocks = 15 }; -enum { kHistorySizeBlocks = kMaxDelayBlocks + kLookaheadBlocks }; +enum { + kMaxDelayBlocks = 60 +}; +enum { + kLookaheadBlocks = 15 +}; +enum { + kHistorySizeBlocks = kMaxDelayBlocks + kLookaheadBlocks +}; typedef float complex_t[2]; // For performance reasons, some arrays of complex numbers are replaced by twice @@ -37,7 +43,9 @@ typedef float complex_t[2]; // compile time. // Metrics -enum { kOffsetLevel = -100 }; +enum { + kOffsetLevel = -100 +}; typedef struct Stats { float instant; @@ -79,14 +87,18 @@ int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std); int WebRtcAec_echo_state(AecCore* self); // Gets statistics of the echo metrics ERL, ERLE, A_NLP. -void WebRtcAec_GetEchoStats(AecCore* self, Stats* erl, Stats* erle, +void WebRtcAec_GetEchoStats(AecCore* self, + Stats* erl, + Stats* erle, Stats* a_nlp); #ifdef WEBRTC_AEC_DEBUG_DUMP void* WebRtcAec_far_time_buf(AecCore* self); #endif // Sets local configuration modes. -void WebRtcAec_SetConfigCore(AecCore* self, int nlp_mode, int metrics_mode, +void WebRtcAec_SetConfigCore(AecCore* self, + int nlp_mode, + int metrics_mode, int delay_logging); // We now interpret delay correction to mean an extended filter length feature. diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h index 4480101f4..193369382 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_internal.h +++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h @@ -21,7 +21,9 @@ // Number of partitions for the extended filter mode. The first one is an enum // to be used in array declarations, as it represents the maximum filter length. -enum { kExtendedNumPartitions = 32 }; +enum { + kExtendedNumPartitions = 32 +}; static const int kNormalNumPartitions = 12; // Extended filter adaptation parameters. @@ -61,7 +63,7 @@ struct AecCore { float dPow[PART_LEN1]; float dMinPow[PART_LEN1]; float dInitMinPow[PART_LEN1]; - float *noisePow; + float* noisePow; float xfBuf[2][kExtendedNumPartitions * PART_LEN1]; // farend fft buffer float wfBuf[2][kExtendedNumPartitions * PART_LEN1]; // filter fft @@ -93,7 +95,7 @@ struct AecCore { int sampFreq; uint32_t seed; - float normal_mu; // stepsize + float normal_mu; // stepsize float normal_error_threshold; // error threshold int noiseEstCtr; @@ -111,8 +113,8 @@ struct AecCore { Stats rerl; // Quantities to control H band scaling for SWB input - int freq_avg_ic; // initial bin for averaging nlp gain - int flag_Hband_cn; // for comfort noise + int freq_avg_ic; // initial bin for averaging nlp gain + int flag_Hband_cn; // for comfort noise float cn_scale_Hband; // scale for comfort noise in H band int delay_histogram[kHistorySizeBlocks]; @@ -127,24 +129,26 @@ struct AecCore { #ifdef WEBRTC_AEC_DEBUG_DUMP RingBuffer* far_time_buf; - FILE *farFile; - FILE *nearFile; - FILE *outFile; - FILE *outLinearFile; + FILE* farFile; + FILE* nearFile; + FILE* outFile; + FILE* outLinearFile; #endif }; typedef void (*WebRtcAec_FilterFar_t)(AecCore* aec, float yf[2][PART_LEN1]); extern WebRtcAec_FilterFar_t WebRtcAec_FilterFar; -typedef void (*WebRtcAec_ScaleErrorSignal_t) - (AecCore* aec, float ef[2][PART_LEN1]); +typedef void (*WebRtcAec_ScaleErrorSignal_t)(AecCore* aec, + float ef[2][PART_LEN1]); extern WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal; -typedef void (*WebRtcAec_FilterAdaptation_t) - (AecCore* aec, float *fft, float ef[2][PART_LEN1]); +typedef void (*WebRtcAec_FilterAdaptation_t)(AecCore* aec, + float* fft, + float ef[2][PART_LEN1]); extern WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation; -typedef void (*WebRtcAec_OverdriveAndSuppress_t) - (AecCore* aec, float hNl[PART_LEN1], const float hNlFb, - float efw[2][PART_LEN1]); +typedef void (*WebRtcAec_OverdriveAndSuppress_t)(AecCore* aec, + float hNl[PART_LEN1], + const float hNlFb, + float efw[2][PART_LEN1]); extern WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress; #endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_ diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.c b/webrtc/modules/audio_processing/aec/aec_core_sse2.c index 61602a823..8d4afdbaa 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_sse2.c +++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.c @@ -21,18 +21,15 @@ #include "webrtc/modules/audio_processing/aec/aec_core_internal.h" #include "webrtc/modules/audio_processing/aec/aec_rdft.h" -__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) -{ +__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { return aRe * bRe - aIm * bIm; } -__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) -{ +__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { return aRe * bIm + aIm * bRe; } -static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) -{ +static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) { int i; const int num_partitions = aec->num_partitions; for (i = 0; i < num_partitions; i++) { @@ -41,7 +38,7 @@ static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) int pos = i * PART_LEN1; // Check for wrap if (i + aec->xfBufBlockPos >= num_partitions) { - xPos -= num_partitions*(PART_LEN1); + xPos -= num_partitions * (PART_LEN1); } // vectorized code (four at once) @@ -65,22 +62,25 @@ static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) } // scalar code for the remaining items. for (; j < PART_LEN1; j++) { - yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j], - aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]); - yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j], - aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]); + yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], + aec->xfBuf[1][xPos + j], + aec->wfBuf[0][pos + j], + aec->wfBuf[1][pos + j]); + yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], + aec->xfBuf[1][xPos + j], + aec->wfBuf[0][pos + j], + aec->wfBuf[1][pos + j]); } } } -static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) -{ +static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) { const __m128 k1e_10f = _mm_set1_ps(1e-10f); - const __m128 kMu = aec->extended_filter_enabled ? - _mm_set1_ps(kExtendedMu) : _mm_set1_ps(aec->normal_mu); - const __m128 kThresh = aec->extended_filter_enabled ? - _mm_set1_ps(kExtendedErrorThreshold) : - _mm_set1_ps(aec->normal_error_threshold); + const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu) + : _mm_set1_ps(aec->normal_mu); + const __m128 kThresh = aec->extended_filter_enabled + ? _mm_set1_ps(kExtendedErrorThreshold) + : _mm_set1_ps(aec->normal_error_threshold); int i; // vectorized code (four at once) @@ -115,12 +115,13 @@ static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) } // scalar code for the remaining items. { - const float mu = aec->extended_filter_enabled ? - kExtendedMu : aec->normal_mu; - const float error_threshold = aec->extended_filter_enabled ? - kExtendedErrorThreshold : aec->normal_error_threshold; + const float mu = + aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; + const float error_threshold = aec->extended_filter_enabled + ? kExtendedErrorThreshold + : aec->normal_error_threshold; for (; i < (PART_LEN1); i++) { - float abs_ef; + float abs_ef; ef[0][i] /= (aec->xPow[i] + 1e-10f); ef[1][i] /= (aec->xPow[i] + 1e-10f); abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); @@ -138,11 +139,13 @@ static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) } } -static void FilterAdaptationSSE2(AecCore* aec, float *fft, float ef[2][PART_LEN1]) { +static void FilterAdaptationSSE2(AecCore* aec, + float* fft, + float ef[2][PART_LEN1]) { int i, j; const int num_partitions = aec->num_partitions; for (i = 0; i < num_partitions; i++) { - int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); + int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1); int pos = i * PART_LEN1; // Check for wrap if (i + aec->xfBufBlockPos >= num_partitions) { @@ -150,7 +153,7 @@ static void FilterAdaptationSSE2(AecCore* aec, float *fft, float ef[2][PART_LEN1 } // Process the whole array... - for (j = 0; j < PART_LEN; j+= 4) { + for (j = 0; j < PART_LEN; j += 4) { // Load xfBuf and ef. const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); @@ -169,22 +172,23 @@ static void FilterAdaptationSSE2(AecCore* aec, float *fft, float ef[2][PART_LEN1 const __m128 g = _mm_unpacklo_ps(e, f); const __m128 h = _mm_unpackhi_ps(e, f); // Store - _mm_storeu_ps(&fft[2*j + 0], g); - _mm_storeu_ps(&fft[2*j + 4], h); + _mm_storeu_ps(&fft[2 * j + 0], g); + _mm_storeu_ps(&fft[2 * j + 4], h); } // ... and fixup the first imaginary entry. fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], -aec->xfBuf[1][xPos + PART_LEN], - ef[0][PART_LEN], ef[1][PART_LEN]); + ef[0][PART_LEN], + ef[1][PART_LEN]); aec_rdft_inverse_128(fft); - memset(fft + PART_LEN, 0, sizeof(float)*PART_LEN); + memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); // fft scaling { float scale = 2.0f / PART_LEN2; const __m128 scale_ps = _mm_load_ps1(&scale); - for (j = 0; j < PART_LEN; j+=4) { + for (j = 0; j < PART_LEN; j += 4) { const __m128 fft_ps = _mm_loadu_ps(&fft[j]); const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps); _mm_storeu_ps(&fft[j], fft_scale); @@ -195,13 +199,15 @@ static void FilterAdaptationSSE2(AecCore* aec, float *fft, float ef[2][PART_LEN1 { float wt1 = aec->wfBuf[1][pos]; aec->wfBuf[0][pos + PART_LEN] += fft[1]; - for (j = 0; j < PART_LEN; j+= 4) { + for (j = 0; j < PART_LEN; j += 4) { __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]); const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]); - const __m128 fft_re = _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2 ,0)); - const __m128 fft_im = _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3 ,1)); + const __m128 fft_re = + _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0)); + const __m128 fft_im = + _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1)); wtBuf_re = _mm_add_ps(wtBuf_re, fft_re); wtBuf_im = _mm_add_ps(wtBuf_im, fft_im); _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re); @@ -212,8 +218,7 @@ static void FilterAdaptationSSE2(AecCore* aec, float *fft, float ef[2][PART_LEN1 } } -static __m128 mm_pow_ps(__m128 a, __m128 b) -{ +static __m128 mm_pow_ps(__m128 a, __m128 b) { // a^b = exp2(b * log2(a)) // exp2(x) and log2(x) are calculated using polynomial approximations. __m128 log2_a, b_log2_a, a_exp_b; @@ -238,55 +243,55 @@ static __m128 mm_pow_ps(__m128 a, __m128 b) // compensate the fact that the exponent has been shifted in the top/ // fractional part and finally getting rid of the implicit leading one // from the mantissa by substracting it out. - static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = - {0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000}; - static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = - {0x43800000, 0x43800000, 0x43800000, 0x43800000}; - static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = - {0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000}; + static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = { + 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000}; + static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = { + 0x43800000, 0x43800000, 0x43800000, 0x43800000}; + static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = { + 0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000}; static const int shift_exponent_into_top_mantissa = 8; - const __m128 two_n = _mm_and_ps(a, *((__m128 *)float_exponent_mask)); - const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(_mm_castps_si128(two_n), - shift_exponent_into_top_mantissa)); - const __m128 n_0 = _mm_or_ps(n_1, *((__m128 *)eight_biased_exponent)); - const __m128 n = _mm_sub_ps(n_0, *((__m128 *)implicit_leading_one)); + const __m128 two_n = _mm_and_ps(a, *((__m128*)float_exponent_mask)); + const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32( + _mm_castps_si128(two_n), shift_exponent_into_top_mantissa)); + const __m128 n_0 = _mm_or_ps(n_1, *((__m128*)eight_biased_exponent)); + const __m128 n = _mm_sub_ps(n_0, *((__m128*)implicit_leading_one)); // Compute y. - static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = - {0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF}; - static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = - {0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000}; - const __m128 mantissa = _mm_and_ps(a, *((__m128 *)mantissa_mask)); - const __m128 y = _mm_or_ps( - mantissa, *((__m128 *)zero_biased_exponent_is_one)); + static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = { + 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF}; + static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = { + 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000}; + const __m128 mantissa = _mm_and_ps(a, *((__m128*)mantissa_mask)); + const __m128 y = + _mm_or_ps(mantissa, *((__m128*)zero_biased_exponent_is_one)); // Approximate log2(y) ~= (y - 1) * pol5(y). // pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0 - static const ALIGN16_BEG float ALIGN16_END C5[4] = - {-3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f}; - static const ALIGN16_BEG float ALIGN16_END C4[4] = - {3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f}; - static const ALIGN16_BEG float ALIGN16_END C3[4] = - {-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f}; - static const ALIGN16_BEG float ALIGN16_END C2[4] = - {2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f}; - static const ALIGN16_BEG float ALIGN16_END C1[4] = - {-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f}; - static const ALIGN16_BEG float ALIGN16_END C0[4] = - {3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f}; - const __m128 pol5_y_0 = _mm_mul_ps(y, *((__m128 *)C5)); - const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128 *)C4)); + static const ALIGN16_BEG float ALIGN16_END C5[4] = { + -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f}; + static const ALIGN16_BEG float ALIGN16_END + C4[4] = {3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f}; + static const ALIGN16_BEG float ALIGN16_END + C3[4] = {-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f}; + static const ALIGN16_BEG float ALIGN16_END + C2[4] = {2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f}; + static const ALIGN16_BEG float ALIGN16_END + C1[4] = {-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f}; + static const ALIGN16_BEG float ALIGN16_END + C0[4] = {3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f}; + const __m128 pol5_y_0 = _mm_mul_ps(y, *((__m128*)C5)); + const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128*)C4)); const __m128 pol5_y_2 = _mm_mul_ps(pol5_y_1, y); - const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128 *)C3)); + const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128*)C3)); const __m128 pol5_y_4 = _mm_mul_ps(pol5_y_3, y); - const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128 *)C2)); + const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128*)C2)); const __m128 pol5_y_6 = _mm_mul_ps(pol5_y_5, y); - const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128 *)C1)); + const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128*)C1)); const __m128 pol5_y_8 = _mm_mul_ps(pol5_y_7, y); - const __m128 pol5_y = _mm_add_ps(pol5_y_8, *((__m128 *)C0)); - const __m128 y_minus_one = _mm_sub_ps( - y, *((__m128 *)zero_biased_exponent_is_one)); - const __m128 log2_y = _mm_mul_ps(y_minus_one , pol5_y); + const __m128 pol5_y = _mm_add_ps(pol5_y_8, *((__m128*)C0)); + const __m128 y_minus_one = + _mm_sub_ps(y, *((__m128*)zero_biased_exponent_is_one)); + const __m128 log2_y = _mm_mul_ps(y_minus_one, pol5_y); // Combine parts. log2_a = _mm_add_ps(n, log2_y); @@ -310,38 +315,38 @@ static __m128 mm_pow_ps(__m128 a, __m128 b) // maximum relative error of 0.17%. // To avoid over/underflow, we reduce the range of input to ]-127, 129]. - static const ALIGN16_BEG float max_input[4] ALIGN16_END = - {129.f, 129.f, 129.f, 129.f}; - static const ALIGN16_BEG float min_input[4] ALIGN16_END = - {-126.99999f, -126.99999f, -126.99999f, -126.99999f}; - const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128 *)max_input)); - const __m128 x_max = _mm_max_ps(x_min, *((__m128 *)min_input)); + static const ALIGN16_BEG float max_input[4] ALIGN16_END = {129.f, 129.f, + 129.f, 129.f}; + static const ALIGN16_BEG float min_input[4] ALIGN16_END = { + -126.99999f, -126.99999f, -126.99999f, -126.99999f}; + const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128*)max_input)); + const __m128 x_max = _mm_max_ps(x_min, *((__m128*)min_input)); // Compute n. - static const ALIGN16_BEG float half[4] ALIGN16_END = - {0.5f, 0.5f, 0.5f, 0.5f}; - const __m128 x_minus_half = _mm_sub_ps(x_max, *((__m128 *)half)); + static const ALIGN16_BEG float half[4] ALIGN16_END = {0.5f, 0.5f, + 0.5f, 0.5f}; + const __m128 x_minus_half = _mm_sub_ps(x_max, *((__m128*)half)); const __m128i x_minus_half_floor = _mm_cvtps_epi32(x_minus_half); // Compute 2^n. - static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = - {127, 127, 127, 127}; + static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = { + 127, 127, 127, 127}; static const int float_exponent_shift = 23; - const __m128i two_n_exponent = _mm_add_epi32( - x_minus_half_floor, *((__m128i *)float_exponent_bias)); - const __m128 two_n = _mm_castsi128_ps(_mm_slli_epi32( - two_n_exponent, float_exponent_shift)); + const __m128i two_n_exponent = + _mm_add_epi32(x_minus_half_floor, *((__m128i*)float_exponent_bias)); + const __m128 two_n = + _mm_castsi128_ps(_mm_slli_epi32(two_n_exponent, float_exponent_shift)); // Compute y. const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor)); // Approximate 2^y ~= C2 * y^2 + C1 * y + C0. - static const ALIGN16_BEG float C2[4] ALIGN16_END = - {3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f}; - static const ALIGN16_BEG float C1[4] ALIGN16_END = - {6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f}; - static const ALIGN16_BEG float C0[4] ALIGN16_END = - {1.0017247f, 1.0017247f, 1.0017247f, 1.0017247f}; - const __m128 exp2_y_0 = _mm_mul_ps(y, *((__m128 *)C2)); - const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128 *)C1)); + static const ALIGN16_BEG float C2[4] ALIGN16_END = { + 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f}; + static const ALIGN16_BEG float C1[4] ALIGN16_END = { + 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f}; + static const ALIGN16_BEG float C0[4] ALIGN16_END = {1.0017247f, 1.0017247f, + 1.0017247f, 1.0017247f}; + const __m128 exp2_y_0 = _mm_mul_ps(y, *((__m128*)C2)); + const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128*)C1)); const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y); - const __m128 exp2_y = _mm_add_ps(exp2_y_2, *((__m128 *)C0)); + const __m128 exp2_y = _mm_add_ps(exp2_y_2, *((__m128*)C0)); // Combine parts. a_exp_b = _mm_mul_ps(exp2_y, two_n); @@ -352,7 +357,8 @@ static __m128 mm_pow_ps(__m128 a, __m128 b) extern const float WebRtcAec_weightCurve[65]; extern const float WebRtcAec_overDriveCurve[65]; -static void OverdriveAndSuppressSSE2(AecCore* aec, float hNl[PART_LEN1], +static void OverdriveAndSuppressSSE2(AecCore* aec, + float hNl[PART_LEN1], const float hNlFb, float efw[2][PART_LEN1]) { int i; @@ -361,26 +367,25 @@ static void OverdriveAndSuppressSSE2(AecCore* aec, float hNl[PART_LEN1], const __m128 vec_minus_one = _mm_set1_ps(-1.0f); const __m128 vec_overDriveSm = _mm_set1_ps(aec->overDriveSm); // vectorized code (four at once) - for (i = 0; i + 3 < PART_LEN1; i+=4) { + for (i = 0; i + 3 < PART_LEN1; i += 4) { // Weight subbands __m128 vec_hNl = _mm_loadu_ps(&hNl[i]); const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]); const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb); - const __m128 vec_weightCurve_hNlFb = _mm_mul_ps( - vec_weightCurve, vec_hNlFb); + const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb); const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve); - const __m128 vec_one_weightCurve_hNl = _mm_mul_ps( - vec_one_weightCurve, vec_hNl); + const __m128 vec_one_weightCurve_hNl = + _mm_mul_ps(vec_one_weightCurve, vec_hNl); const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl); const __m128 vec_if1 = _mm_and_ps( bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl)); vec_hNl = _mm_or_ps(vec_if0, vec_if1); { - const __m128 vec_overDriveCurve = _mm_loadu_ps( - &WebRtcAec_overDriveCurve[i]); - const __m128 vec_overDriveSm_overDriveCurve = _mm_mul_ps( - vec_overDriveSm, vec_overDriveCurve); + const __m128 vec_overDriveCurve = + _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]); + const __m128 vec_overDriveSm_overDriveCurve = + _mm_mul_ps(vec_overDriveSm, vec_overDriveCurve); vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve); _mm_storeu_ps(&hNl[i], vec_hNl); } @@ -404,7 +409,7 @@ static void OverdriveAndSuppressSSE2(AecCore* aec, float hNl[PART_LEN1], // Weight subbands if (hNl[i] > hNlFb) { hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + - (1 - WebRtcAec_weightCurve[i]) * hNl[i]; + (1 - WebRtcAec_weightCurve[i]) * hNl[i]; } hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); @@ -424,4 +429,3 @@ void WebRtcAec_InitAec_SSE2(void) { WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; } - diff --git a/webrtc/modules/audio_processing/aec/aec_rdft.c b/webrtc/modules/audio_processing/aec/aec_rdft.c index e63f36719..a19e8877b 100644 --- a/webrtc/modules/audio_processing/aec/aec_rdft.c +++ b/webrtc/modules/audio_processing/aec/aec_rdft.c @@ -42,7 +42,7 @@ ALIGN16_BEG float ALIGN16_END cftmdl_wk1r[4]; static int ip[16]; -static void bitrv2_32(int *ip, float *a) { +static void bitrv2_32(int* ip, float* a) { const int n = 32; int j, j1, k, k1, m, m2; float xr, xi, yr, yi; @@ -116,7 +116,7 @@ static void bitrv2_32(int *ip, float *a) { } } -static void bitrv2_128(float *a) { +static void bitrv2_128(float* a) { /* Following things have been attempted but are no faster: (a) Storing the swap indexes in a LUT (index calculations are done @@ -146,7 +146,7 @@ static void bitrv2_128(float *a) { a[j1 + 1] = yi; a[k1 + 0] = xr; a[k1 + 1] = xi; - j1 += 8; + j1 += 8; k1 += 16; xr = a[j1 + 0]; xi = a[j1 + 1]; @@ -166,7 +166,7 @@ static void bitrv2_128(float *a) { a[j1 + 1] = yi; a[k1 + 0] = xr; a[k1 + 1] = xi; - j1 += 8; + j1 += 8; k1 += 16; xr = a[j1 + 0]; xi = a[j1 + 1]; @@ -265,7 +265,7 @@ static void makewt_32(void) { } static void makect_32(void) { - float *c = rdft_w + 32; + float* c = rdft_w + 32; const int nc = 32; int j, nch; float delta; @@ -281,7 +281,7 @@ static void makect_32(void) { } } -static void cft1st_128_C(float *a) { +static void cft1st_128_C(float* a) { const int n = 128; int j, k1, k2; float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; @@ -385,7 +385,7 @@ static void cft1st_128_C(float *a) { } } -static void cftmdl_128_C(float *a) { +static void cftmdl_128_C(float* a) { const int l = 8; const int n = 128; const int m = 32; @@ -394,7 +394,7 @@ static void cftmdl_128_C(float *a) { float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; for (j0 = 0; j0 < l; j0 += 2) { - j1 = j0 + 8; + j1 = j0 + 8; j2 = j0 + 16; j3 = j0 + 24; x0r = a[j0 + 0] + a[j1 + 0]; @@ -416,7 +416,7 @@ static void cftmdl_128_C(float *a) { } wk1r = rdft_w[2]; for (j0 = m; j0 < l + m; j0 += 2) { - j1 = j0 + 8; + j1 = j0 + 8; j2 = j0 + 16; j3 = j0 + 24; x0r = a[j0 + 0] + a[j1 + 0]; @@ -452,7 +452,7 @@ static void cftmdl_128_C(float *a) { wk3r = rdft_wk3ri_first[k1 + 0]; wk3i = rdft_wk3ri_first[k1 + 1]; for (j0 = k; j0 < l + k; j0 += 2) { - j1 = j0 + 8; + j1 = j0 + 8; j2 = j0 + 16; j3 = j0 + 24; x0r = a[j0 + 0] + a[j1 + 0]; @@ -483,7 +483,7 @@ static void cftmdl_128_C(float *a) { wk3r = rdft_wk3ri_second[k1 + 0]; wk3i = rdft_wk3ri_second[k1 + 1]; for (j0 = k + m; j0 < l + (k + m); j0 += 2) { - j1 = j0 + 8; + j1 = j0 + 8; j2 = j0 + 16; j3 = j0 + 24; x0r = a[j0 + 0] + a[j1 + 0]; @@ -512,7 +512,7 @@ static void cftmdl_128_C(float *a) { } } -static void cftfsub_128(float *a) { +static void cftfsub_128(float* a) { int j, j1, j2, j3, l; float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; @@ -542,7 +542,7 @@ static void cftfsub_128(float *a) { } } -static void cftbsub_128(float *a) { +static void cftbsub_128(float* a) { int j, j1, j2, j3, l; float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; @@ -573,14 +573,14 @@ static void cftbsub_128(float *a) { } } -static void rftfsub_128_C(float *a) { - const float *c = rdft_w + 32; +static void rftfsub_128_C(float* a) { + const float* c = rdft_w + 32; int j1, j2, k1, k2; float wkr, wki, xr, xi, yr, yi; for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { k2 = 128 - j2; - k1 = 32 - j1; + k1 = 32 - j1; wkr = 0.5f - c[k1]; wki = c[j1]; xr = a[j2 + 0] - a[k2 + 0]; @@ -594,15 +594,15 @@ static void rftfsub_128_C(float *a) { } } -static void rftbsub_128_C(float *a) { - const float *c = rdft_w + 32; +static void rftbsub_128_C(float* a) { + const float* c = rdft_w + 32; int j1, j2, k1, k2; float wkr, wki, xr, xi, yr, yi; a[1] = -a[1]; for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { k2 = 128 - j2; - k1 = 32 - j1; + k1 = 32 - j1; wkr = 0.5f - c[k1]; wki = c[j1]; xr = a[j2 + 0] - a[k2 + 0]; @@ -617,7 +617,7 @@ static void rftbsub_128_C(float *a) { a[65] = -a[65]; } -void aec_rdft_forward_128(float *a) { +void aec_rdft_forward_128(float* a) { float xi; bitrv2_128(a); cftfsub_128(a); @@ -627,7 +627,7 @@ void aec_rdft_forward_128(float *a) { a[1] = xi; } -void aec_rdft_inverse_128(float *a) { +void aec_rdft_inverse_128(float* a) { a[1] = 0.5f * (a[0] - a[1]); a[0] -= a[1]; rftbsub_128(a); diff --git a/webrtc/modules/audio_processing/aec/aec_rdft.h b/webrtc/modules/audio_processing/aec/aec_rdft.h index 91bedc9fc..3380633ce 100644 --- a/webrtc/modules/audio_processing/aec/aec_rdft.h +++ b/webrtc/modules/audio_processing/aec/aec_rdft.h @@ -20,11 +20,11 @@ static __inline __m128i _mm_castps_si128(__m128 a) { return *(__m128i*)&a; } #endif #ifdef _MSC_VER /* visual c++ */ -# define ALIGN16_BEG __declspec(align(16)) -# define ALIGN16_END +#define ALIGN16_BEG __declspec(align(16)) +#define ALIGN16_END #else /* gcc or icc */ -# define ALIGN16_BEG -# define ALIGN16_END __attribute__((aligned(16))) +#define ALIGN16_BEG +#define ALIGN16_END __attribute__((aligned(16))) #endif // constants shared by all paths (C, SSE2). @@ -42,7 +42,7 @@ extern float rdft_wk3i[32]; extern float cftmdl_wk1r[4]; // code path selection function pointers -typedef void (*rft_sub_128_t)(float *a); +typedef void (*rft_sub_128_t)(float* a); extern rft_sub_128_t rftfsub_128; extern rft_sub_128_t rftbsub_128; extern rft_sub_128_t cft1st_128; @@ -51,7 +51,7 @@ extern rft_sub_128_t cftmdl_128; // entry points void aec_rdft_init(void); void aec_rdft_init_sse2(void); -void aec_rdft_forward_128(float *a); -void aec_rdft_inverse_128(float *a); +void aec_rdft_forward_128(float* a); +void aec_rdft_inverse_128(float* a); #endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_ diff --git a/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c b/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c index 49a407263..b4e453ff5 100644 --- a/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c +++ b/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c @@ -12,165 +12,164 @@ #include -static const ALIGN16_BEG float ALIGN16_END k_swap_sign[4] = - {-1.f, 1.f, -1.f, 1.f}; +static const ALIGN16_BEG float ALIGN16_END + k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f}; -static void cft1st_128_SSE2(float *a) { +static void cft1st_128_SSE2(float* a) { const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign); int j, k2; for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) { - __m128 a00v = _mm_loadu_ps(&a[j + 0]); - __m128 a04v = _mm_loadu_ps(&a[j + 4]); - __m128 a08v = _mm_loadu_ps(&a[j + 8]); - __m128 a12v = _mm_loadu_ps(&a[j + 12]); - __m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1 ,0)); - __m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3 ,2)); - __m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1 ,0)); - __m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3 ,2)); + __m128 a00v = _mm_loadu_ps(&a[j + 0]); + __m128 a04v = _mm_loadu_ps(&a[j + 4]); + __m128 a08v = _mm_loadu_ps(&a[j + 8]); + __m128 a12v = _mm_loadu_ps(&a[j + 12]); + __m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1, 0)); + __m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3, 2)); + __m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1, 0)); + __m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3, 2)); - const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]); - const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]); - const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2]); - const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2]); - const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2]); - const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2]); - __m128 x0v = _mm_add_ps(a01v, a23v); - const __m128 x1v = _mm_sub_ps(a01v, a23v); - const __m128 x2v = _mm_add_ps(a45v, a67v); - const __m128 x3v = _mm_sub_ps(a45v, a67v); - __m128 x0w; - a01v = _mm_add_ps(x0v, x2v); - x0v = _mm_sub_ps(x0v, x2v); - x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1)); + const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]); + const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]); + const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2]); + const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2]); + const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2]); + const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2]); + __m128 x0v = _mm_add_ps(a01v, a23v); + const __m128 x1v = _mm_sub_ps(a01v, a23v); + const __m128 x2v = _mm_add_ps(a45v, a67v); + const __m128 x3v = _mm_sub_ps(a45v, a67v); + __m128 x0w; + a01v = _mm_add_ps(x0v, x2v); + x0v = _mm_sub_ps(x0v, x2v); + x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1)); { const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v); const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w); - a45v = _mm_add_ps(a45_0v, a45_1v); + a45v = _mm_add_ps(a45_0v, a45_1v); } { - __m128 a23_0v, a23_1v; - const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0 ,1)); - const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w); - x0v = _mm_add_ps(x1v, x3s); - x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1)); - a23_0v = _mm_mul_ps(wk1rv, x0v); - a23_1v = _mm_mul_ps(wk1iv, x0w); - a23v = _mm_add_ps(a23_0v, a23_1v); + __m128 a23_0v, a23_1v; + const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0, 1)); + const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w); + x0v = _mm_add_ps(x1v, x3s); + x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1)); + a23_0v = _mm_mul_ps(wk1rv, x0v); + a23_1v = _mm_mul_ps(wk1iv, x0w); + a23v = _mm_add_ps(a23_0v, a23_1v); - x0v = _mm_sub_ps(x1v, x3s); - x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1)); + x0v = _mm_sub_ps(x1v, x3s); + x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1)); } { const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v); const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w); - a67v = _mm_add_ps(a67_0v, a67_1v); + a67v = _mm_add_ps(a67_0v, a67_1v); } - a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1 ,0)); - a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1 ,0)); - a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3 ,2)); - a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3 ,2)); - _mm_storeu_ps(&a[j + 0], a00v); - _mm_storeu_ps(&a[j + 4], a04v); - _mm_storeu_ps(&a[j + 8], a08v); + a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1, 0)); + a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1, 0)); + a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3, 2)); + a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3, 2)); + _mm_storeu_ps(&a[j + 0], a00v); + _mm_storeu_ps(&a[j + 4], a04v); + _mm_storeu_ps(&a[j + 8], a08v); _mm_storeu_ps(&a[j + 12], a12v); } } -static void cftmdl_128_SSE2(float *a) { +static void cftmdl_128_SSE2(float* a) { const int l = 8; const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign); int j0; __m128 wk1rv = _mm_load_ps(cftmdl_wk1r); for (j0 = 0; j0 < l; j0 += 2) { - const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]); - const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]); + const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]); + const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]); const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]); const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]); - const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00), - _mm_castsi128_ps(a_32), - _MM_SHUFFLE(1, 0, 1 ,0)); - const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08), - _mm_castsi128_ps(a_40), - _MM_SHUFFLE(1, 0, 1 ,0)); - __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40); - const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); + const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00), + _mm_castsi128_ps(a_32), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08), + _mm_castsi128_ps(a_40), + _MM_SHUFFLE(1, 0, 1, 0)); + __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40); + const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]); const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]); const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]); const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]); - const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16), - _mm_castsi128_ps(a_48), - _MM_SHUFFLE(1, 0, 1 ,0)); - const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24), - _mm_castsi128_ps(a_56), - _MM_SHUFFLE(1, 0, 1 ,0)); - const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56); - const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); + const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16), + _mm_castsi128_ps(a_48), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24), + _mm_castsi128_ps(a_56), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56); + const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); - const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); - const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); - const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps( - _mm_shuffle_epi32(_mm_castps_si128(x3r0_3i0_3r1_x3i1), - _MM_SHUFFLE(2, 3, 0, 1))); - const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1); - const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped); - const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped); + const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32( + _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1))); + const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1); + const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped); + const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped); - const __m128 yy0 = _mm_shuffle_ps(x1_x3_add, x1_x3_sub, - _MM_SHUFFLE(2, 2, 2 ,2)); - const __m128 yy1 = _mm_shuffle_ps(x1_x3_add, x1_x3_sub, - _MM_SHUFFLE(3, 3, 3 ,3)); + const __m128 yy0 = + _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(2, 2, 2, 2)); + const __m128 yy1 = + _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(3, 3, 3, 3)); const __m128 yy2 = _mm_mul_ps(mm_swap_sign, yy1); const __m128 yy3 = _mm_add_ps(yy0, yy2); const __m128 yy4 = _mm_mul_ps(wk1rv, yy3); - _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0)); - _mm_storel_epi64((__m128i*)&a[j0 + 32], - _mm_shuffle_epi32(_mm_castps_si128(xx0), - _MM_SHUFFLE(3, 2, 3, 2))); + _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 32], + _mm_shuffle_epi32(_mm_castps_si128(xx0), _MM_SHUFFLE(3, 2, 3, 2))); _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1)); - _mm_storel_epi64((__m128i*)&a[j0 + 48], - _mm_shuffle_epi32(_mm_castps_si128(xx1), - _MM_SHUFFLE(2, 3, 2, 3))); + _mm_storel_epi64( + (__m128i*)&a[j0 + 48], + _mm_shuffle_epi32(_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 2, 3))); a[j0 + 48] = -a[j0 + 48]; - _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add)); + _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add)); _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub)); _mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4)); - _mm_storel_epi64((__m128i*)&a[j0 + 56], - _mm_shuffle_epi32(_mm_castps_si128(yy4), - _MM_SHUFFLE(2, 3, 2, 3))); + _mm_storel_epi64( + (__m128i*)&a[j0 + 56], + _mm_shuffle_epi32(_mm_castps_si128(yy4), _MM_SHUFFLE(2, 3, 2, 3))); } { int k = 64; int k1 = 2; int k2 = 2 * k1; - const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2+0]); - const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2+0]); - const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2+0]); - const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2+0]); - const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2+0]); - wk1rv = _mm_load_ps(&rdft_wk1r[k2+0]); + const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2 + 0]); + const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2 + 0]); + const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2 + 0]); + const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2 + 0]); + const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2 + 0]); + wk1rv = _mm_load_ps(&rdft_wk1r[k2 + 0]); for (j0 = k; j0 < l + k; j0 += 2) { - const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]); - const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]); + const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]); + const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]); const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]); const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]); const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00), _mm_castsi128_ps(a_32), - _MM_SHUFFLE(1, 0, 1 ,0)); + _MM_SHUFFLE(1, 0, 1, 0)); const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08), _mm_castsi128_ps(a_40), - _MM_SHUFFLE(1, 0, 1 ,0)); - __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40); + _MM_SHUFFLE(1, 0, 1, 0)); + __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40); const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]); @@ -179,100 +178,102 @@ static void cftmdl_128_SSE2(float *a) { const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]); const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16), _mm_castsi128_ps(a_48), - _MM_SHUFFLE(1, 0, 1 ,0)); + _MM_SHUFFLE(1, 0, 1, 0)); const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24), _mm_castsi128_ps(a_56), - _MM_SHUFFLE(1, 0, 1 ,0)); + _MM_SHUFFLE(1, 0, 1, 0)); const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56); const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); - const __m128 xx2 = _mm_mul_ps(xx1 , wk2rv); - const __m128 xx3 = _mm_mul_ps(wk2iv, - _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xx1), - _MM_SHUFFLE(2, 3, 0, 1)))); + const __m128 xx2 = _mm_mul_ps(xx1, wk2rv); + const __m128 xx3 = + _mm_mul_ps(wk2iv, + _mm_castsi128_ps(_mm_shuffle_epi32( + _mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 0, 1)))); const __m128 xx4 = _mm_add_ps(xx2, xx3); - const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps( - _mm_shuffle_epi32(_mm_castps_si128(x3r0_3i0_3r1_x3i1), - _MM_SHUFFLE(2, 3, 0, 1))); - const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1); - const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped); - const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped); + const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32( + _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1))); + const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1); + const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped); + const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped); const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv); - const __m128 xx11 = _mm_mul_ps(wk1iv, + const __m128 xx11 = _mm_mul_ps( + wk1iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add), _MM_SHUFFLE(2, 3, 0, 1)))); const __m128 xx12 = _mm_add_ps(xx10, xx11); const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv); - const __m128 xx21 = _mm_mul_ps(wk3iv, + const __m128 xx21 = _mm_mul_ps( + wk3iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub), - _MM_SHUFFLE(2, 3, 0, 1)))); + _MM_SHUFFLE(2, 3, 0, 1)))); const __m128 xx22 = _mm_add_ps(xx20, xx21); - _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx)); - _mm_storel_epi64((__m128i*)&a[j0 + 32], - _mm_shuffle_epi32(_mm_castps_si128(xx), - _MM_SHUFFLE(3, 2, 3, 2))); + _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 32], + _mm_shuffle_epi32(_mm_castps_si128(xx), _MM_SHUFFLE(3, 2, 3, 2))); _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx4)); - _mm_storel_epi64((__m128i*)&a[j0 + 48], - _mm_shuffle_epi32(_mm_castps_si128(xx4), - _MM_SHUFFLE(3, 2, 3, 2))); + _mm_storel_epi64( + (__m128i*)&a[j0 + 48], + _mm_shuffle_epi32(_mm_castps_si128(xx4), _MM_SHUFFLE(3, 2, 3, 2))); - _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12)); - _mm_storel_epi64((__m128i*)&a[j0 + 40], - _mm_shuffle_epi32(_mm_castps_si128(xx12), - _MM_SHUFFLE(3, 2, 3, 2))); + _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 40], + _mm_shuffle_epi32(_mm_castps_si128(xx12), _MM_SHUFFLE(3, 2, 3, 2))); _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(xx22)); - _mm_storel_epi64((__m128i*)&a[j0 + 56], - _mm_shuffle_epi32(_mm_castps_si128(xx22), - _MM_SHUFFLE(3, 2, 3, 2))); + _mm_storel_epi64( + (__m128i*)&a[j0 + 56], + _mm_shuffle_epi32(_mm_castps_si128(xx22), _MM_SHUFFLE(3, 2, 3, 2))); } } } -static void rftfsub_128_SSE2(float *a) { - const float *c = rdft_w + 32; +static void rftfsub_128_SSE2(float* a) { + const float* c = rdft_w + 32; int j1, j2, k1, k2; float wkr, wki, xr, xi, yr, yi; - static const ALIGN16_BEG float ALIGN16_END k_half[4] = - {0.5f, 0.5f, 0.5f, 0.5f}; + static const ALIGN16_BEG float ALIGN16_END + k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f}; const __m128 mm_half = _mm_load_ps(k_half); // Vectorized code (four at once). // Note: commented number are indexes for the first iteration of the loop. for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { // Load 'wk'. - const __m128 c_j1 = _mm_loadu_ps(&c[ j1]); // 1, 2, 3, 4, - const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, - const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31, + const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4, + const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, + const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31, const __m128 wkr_ = - _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28, - const __m128 wki_ = c_j1; // 1, 2, 3, 4, + _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28, + const __m128 wki_ = c_j1; // 1, 2, 3, 4, // Load and shuffle 'a'. - const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, - const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, + const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, + const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123, const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127, - const __m128 a_j2_p0 = _mm_shuffle_ps(a_j2_0, a_j2_4, - _MM_SHUFFLE(2, 0, 2 ,0)); // 2, 4, 6, 8, - const __m128 a_j2_p1 = _mm_shuffle_ps(a_j2_0, a_j2_4, - _MM_SHUFFLE(3, 1, 3 ,1)); // 3, 5, 7, 9, - const __m128 a_k2_p0 = _mm_shuffle_ps(a_k2_4, a_k2_0, - _MM_SHUFFLE(0, 2, 0 ,2)); // 126, 124, 122, 120, - const __m128 a_k2_p1 = _mm_shuffle_ps(a_k2_4, a_k2_0, - _MM_SHUFFLE(1, 3, 1 ,3)); // 127, 125, 123, 121, + const __m128 a_j2_p0 = _mm_shuffle_ps( + a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8, + const __m128 a_j2_p1 = _mm_shuffle_ps( + a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9, + const __m128 a_k2_p0 = _mm_shuffle_ps( + a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120, + const __m128 a_k2_p1 = _mm_shuffle_ps( + a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121, // Calculate 'x'. const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0); - // 2-126, 4-124, 6-122, 8-120, + // 2-126, 4-124, 6-122, 8-120, const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1); - // 3-127, 5-125, 7-123, 9-121, + // 3-127, 5-125, 7-123, 9-121, // Calculate product into 'y'. // yr = wkr * xr - wki * xi; // yi = wkr * xi + wki * xr; @@ -280,12 +281,12 @@ static void rftfsub_128_SSE2(float *a) { const __m128 b_ = _mm_mul_ps(wki_, xi_); const __m128 c_ = _mm_mul_ps(wkr_, xi_); const __m128 d_ = _mm_mul_ps(wki_, xr_); - const __m128 yr_ = _mm_sub_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120, - const __m128 yi_ = _mm_add_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121, - // Update 'a'. - // a[j2 + 0] -= yr; - // a[j2 + 1] -= yi; - // a[k2 + 0] += yr; + const __m128 yr_ = _mm_sub_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120, + const __m128 yi_ = _mm_add_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121, + // Update 'a'. + // a[j2 + 0] -= yr; + // a[j2 + 1] -= yi; + // a[k2 + 0] += yr; // a[k2 + 1] -= yi; const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8, const __m128 a_j2_p1n = _mm_sub_ps(a_j2_p1, yi_); // 3, 5, 7, 9, @@ -293,26 +294,26 @@ static void rftfsub_128_SSE2(float *a) { const __m128 a_k2_p1n = _mm_sub_ps(a_k2_p1, yi_); // 127, 125, 123, 121, // Shuffle in right order and store. const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n); - // 2, 3, 4, 5, + // 2, 3, 4, 5, const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n); - // 6, 7, 8, 9, + // 6, 7, 8, 9, const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n); - // 122, 123, 120, 121, + // 122, 123, 120, 121, const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n); - // 126, 127, 124, 125, - const __m128 a_k2_0n = _mm_shuffle_ps(a_k2_0nt, a_k2_0nt, - _MM_SHUFFLE(1, 0, 3 ,2)); // 120, 121, 122, 123, - const __m128 a_k2_4n = _mm_shuffle_ps(a_k2_4nt, a_k2_4nt, - _MM_SHUFFLE(1, 0, 3 ,2)); // 124, 125, 126, 127, - _mm_storeu_ps(&a[0 + j2], a_j2_0n); - _mm_storeu_ps(&a[4 + j2], a_j2_4n); + // 126, 127, 124, 125, + const __m128 a_k2_0n = _mm_shuffle_ps( + a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123, + const __m128 a_k2_4n = _mm_shuffle_ps( + a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127, + _mm_storeu_ps(&a[0 + j2], a_j2_0n); + _mm_storeu_ps(&a[4 + j2], a_j2_4n); _mm_storeu_ps(&a[122 - j2], a_k2_0n); _mm_storeu_ps(&a[126 - j2], a_k2_4n); } // Scalar code for the remaining items. for (; j2 < 64; j1 += 1, j2 += 2) { k2 = 128 - j2; - k1 = 32 - j1; + k1 = 32 - j1; wkr = 0.5f - c[k1]; wki = c[j1]; xr = a[j2 + 0] - a[k2 + 0]; @@ -326,13 +327,13 @@ static void rftfsub_128_SSE2(float *a) { } } -static void rftbsub_128_SSE2(float *a) { - const float *c = rdft_w + 32; +static void rftbsub_128_SSE2(float* a) { + const float* c = rdft_w + 32; int j1, j2, k1, k2; float wkr, wki, xr, xi, yr, yi; - static const ALIGN16_BEG float ALIGN16_END k_half[4] = - {0.5f, 0.5f, 0.5f, 0.5f}; + static const ALIGN16_BEG float ALIGN16_END + k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f}; const __m128 mm_half = _mm_load_ps(k_half); a[1] = -a[1]; @@ -340,30 +341,30 @@ static void rftbsub_128_SSE2(float *a) { // Note: commented number are indexes for the first iteration of the loop. for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { // Load 'wk'. - const __m128 c_j1 = _mm_loadu_ps(&c[ j1]); // 1, 2, 3, 4, - const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, - const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31, + const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4, + const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, + const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31, const __m128 wkr_ = - _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28, - const __m128 wki_ = c_j1; // 1, 2, 3, 4, + _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28, + const __m128 wki_ = c_j1; // 1, 2, 3, 4, // Load and shuffle 'a'. - const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, - const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, + const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, + const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123, const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127, - const __m128 a_j2_p0 = _mm_shuffle_ps(a_j2_0, a_j2_4, - _MM_SHUFFLE(2, 0, 2 ,0)); // 2, 4, 6, 8, - const __m128 a_j2_p1 = _mm_shuffle_ps(a_j2_0, a_j2_4, - _MM_SHUFFLE(3, 1, 3 ,1)); // 3, 5, 7, 9, - const __m128 a_k2_p0 = _mm_shuffle_ps(a_k2_4, a_k2_0, - _MM_SHUFFLE(0, 2, 0 ,2)); // 126, 124, 122, 120, - const __m128 a_k2_p1 = _mm_shuffle_ps(a_k2_4, a_k2_0, - _MM_SHUFFLE(1, 3, 1 ,3)); // 127, 125, 123, 121, + const __m128 a_j2_p0 = _mm_shuffle_ps( + a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8, + const __m128 a_j2_p1 = _mm_shuffle_ps( + a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9, + const __m128 a_k2_p0 = _mm_shuffle_ps( + a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120, + const __m128 a_k2_p1 = _mm_shuffle_ps( + a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121, // Calculate 'x'. const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0); - // 2-126, 4-124, 6-122, 8-120, + // 2-126, 4-124, 6-122, 8-120, const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1); - // 3-127, 5-125, 7-123, 9-121, + // 3-127, 5-125, 7-123, 9-121, // Calculate product into 'y'. // yr = wkr * xr + wki * xi; // yi = wkr * xi - wki * xr; @@ -371,12 +372,12 @@ static void rftbsub_128_SSE2(float *a) { const __m128 b_ = _mm_mul_ps(wki_, xi_); const __m128 c_ = _mm_mul_ps(wkr_, xi_); const __m128 d_ = _mm_mul_ps(wki_, xr_); - const __m128 yr_ = _mm_add_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120, - const __m128 yi_ = _mm_sub_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121, - // Update 'a'. - // a[j2 + 0] = a[j2 + 0] - yr; - // a[j2 + 1] = yi - a[j2 + 1]; - // a[k2 + 0] = yr + a[k2 + 0]; + const __m128 yr_ = _mm_add_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120, + const __m128 yi_ = _mm_sub_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121, + // Update 'a'. + // a[j2 + 0] = a[j2 + 0] - yr; + // a[j2 + 1] = yi - a[j2 + 1]; + // a[k2 + 0] = yr + a[k2 + 0]; // a[k2 + 1] = yi - a[k2 + 1]; const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8, const __m128 a_j2_p1n = _mm_sub_ps(yi_, a_j2_p1); // 3, 5, 7, 9, @@ -384,26 +385,26 @@ static void rftbsub_128_SSE2(float *a) { const __m128 a_k2_p1n = _mm_sub_ps(yi_, a_k2_p1); // 127, 125, 123, 121, // Shuffle in right order and store. const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n); - // 2, 3, 4, 5, + // 2, 3, 4, 5, const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n); - // 6, 7, 8, 9, + // 6, 7, 8, 9, const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n); - // 122, 123, 120, 121, + // 122, 123, 120, 121, const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n); - // 126, 127, 124, 125, - const __m128 a_k2_0n = _mm_shuffle_ps(a_k2_0nt, a_k2_0nt, - _MM_SHUFFLE(1, 0, 3 ,2)); // 120, 121, 122, 123, - const __m128 a_k2_4n = _mm_shuffle_ps(a_k2_4nt, a_k2_4nt, - _MM_SHUFFLE(1, 0, 3 ,2)); // 124, 125, 126, 127, - _mm_storeu_ps(&a[0 + j2], a_j2_0n); - _mm_storeu_ps(&a[4 + j2], a_j2_4n); + // 126, 127, 124, 125, + const __m128 a_k2_0n = _mm_shuffle_ps( + a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123, + const __m128 a_k2_4n = _mm_shuffle_ps( + a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127, + _mm_storeu_ps(&a[0 + j2], a_j2_0n); + _mm_storeu_ps(&a[4 + j2], a_j2_4n); _mm_storeu_ps(&a[122 - j2], a_k2_0n); _mm_storeu_ps(&a[126 - j2], a_k2_4n); } // Scalar code for the remaining items. for (; j2 < 64; j1 += 1, j2 += 2) { k2 = 128 - j2; - k1 = 32 - j1; + k1 = 32 - j1; wkr = 0.5f - c[k1]; wki = c[j1]; xr = a[j2 + 0] - a[k2 + 0]; diff --git a/webrtc/modules/audio_processing/aec/aec_resampler.c b/webrtc/modules/audio_processing/aec/aec_resampler.c index 13521ec78..ebd052f80 100644 --- a/webrtc/modules/audio_processing/aec/aec_resampler.c +++ b/webrtc/modules/audio_processing/aec/aec_resampler.c @@ -8,7 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for clock +/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for + * clock * skew by resampling the farend signal. */ @@ -21,214 +22,205 @@ #include "webrtc/modules/audio_processing/aec/aec_core.h" -enum { kEstimateLengthFrames = 400 }; +enum { + kEstimateLengthFrames = 400 +}; typedef struct { - short buffer[kResamplerBufferSize]; - float position; + short buffer[kResamplerBufferSize]; + float position; - int deviceSampleRateHz; - int skewData[kEstimateLengthFrames]; - int skewDataIndex; - float skewEstimate; + int deviceSampleRateHz; + int skewData[kEstimateLengthFrames]; + int skewDataIndex; + float skewEstimate; } resampler_t; static int EstimateSkew(const int* rawSkew, int size, int absLimit, - float *skewEst); + float* skewEst); -int WebRtcAec_CreateResampler(void **resampInst) -{ - resampler_t *obj = malloc(sizeof(resampler_t)); - *resampInst = obj; - if (obj == NULL) { - return -1; - } +int WebRtcAec_CreateResampler(void** resampInst) { + resampler_t* obj = malloc(sizeof(resampler_t)); + *resampInst = obj; + if (obj == NULL) { + return -1; + } - return 0; + return 0; } -int WebRtcAec_InitResampler(void *resampInst, int deviceSampleRateHz) -{ - resampler_t *obj = (resampler_t*) resampInst; - memset(obj->buffer, 0, sizeof(obj->buffer)); - obj->position = 0.0; +int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz) { + resampler_t* obj = (resampler_t*)resampInst; + memset(obj->buffer, 0, sizeof(obj->buffer)); + obj->position = 0.0; - obj->deviceSampleRateHz = deviceSampleRateHz; - memset(obj->skewData, 0, sizeof(obj->skewData)); - obj->skewDataIndex = 0; - obj->skewEstimate = 0.0; + obj->deviceSampleRateHz = deviceSampleRateHz; + memset(obj->skewData, 0, sizeof(obj->skewData)); + obj->skewDataIndex = 0; + obj->skewEstimate = 0.0; - return 0; + return 0; } -int WebRtcAec_FreeResampler(void *resampInst) -{ - resampler_t *obj = (resampler_t*) resampInst; - free(obj); +int WebRtcAec_FreeResampler(void* resampInst) { + resampler_t* obj = (resampler_t*)resampInst; + free(obj); - return 0; + return 0; } -void WebRtcAec_ResampleLinear(void *resampInst, - const short *inspeech, +void WebRtcAec_ResampleLinear(void* resampInst, + const short* inspeech, int size, float skew, - short *outspeech, - int *size_out) -{ - resampler_t *obj = (resampler_t*) resampInst; + short* outspeech, + int* size_out) { + resampler_t* obj = (resampler_t*)resampInst; - short *y; - float be, tnew, interp; - int tn, mm; + short* y; + float be, tnew, interp; + int tn, mm; - assert(!(size < 0 || size > 2 * FRAME_LEN)); - assert(resampInst != NULL); - assert(inspeech != NULL); - assert(outspeech != NULL); - assert(size_out != NULL); + assert(!(size < 0 || size > 2 * FRAME_LEN)); + assert(resampInst != NULL); + assert(inspeech != NULL); + assert(outspeech != NULL); + assert(size_out != NULL); - // Add new frame data in lookahead - memcpy(&obj->buffer[FRAME_LEN + kResamplingDelay], - inspeech, - size * sizeof(short)); + // Add new frame data in lookahead + memcpy(&obj->buffer[FRAME_LEN + kResamplingDelay], + inspeech, + size * sizeof(short)); - // Sample rate ratio - be = 1 + skew; + // Sample rate ratio + be = 1 + skew; - // Loop over input frame - mm = 0; - y = &obj->buffer[FRAME_LEN]; // Point at current frame + // Loop over input frame + mm = 0; + y = &obj->buffer[FRAME_LEN]; // Point at current frame + + tnew = be * mm + obj->position; + tn = (int)tnew; + + while (tn < size) { + + // Interpolation + interp = y[tn] + (tnew - tn) * (y[tn + 1] - y[tn]); + + if (interp > 32767) { + interp = 32767; + } else if (interp < -32768) { + interp = -32768; + } + + outspeech[mm] = (short)interp; + mm++; tnew = be * mm + obj->position; - tn = (int) tnew; + tn = (int)tnew; + } - while (tn < size) { + *size_out = mm; + obj->position += (*size_out) * be - size; - // Interpolation - interp = y[tn] + (tnew - tn) * (y[tn+1] - y[tn]); - - if (interp > 32767) { - interp = 32767; - } - else if (interp < -32768) { - interp = -32768; - } - - outspeech[mm] = (short) interp; - mm++; - - tnew = be * mm + obj->position; - tn = (int) tnew; - } - - *size_out = mm; - obj->position += (*size_out) * be - size; - - // Shift buffer - memmove(obj->buffer, - &obj->buffer[size], - (kResamplerBufferSize - size) * sizeof(short)); + // Shift buffer + memmove(obj->buffer, + &obj->buffer[size], + (kResamplerBufferSize - size) * sizeof(short)); } -int WebRtcAec_GetSkew(void *resampInst, int rawSkew, float *skewEst) -{ - resampler_t *obj = (resampler_t*)resampInst; - int err = 0; +int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst) { + resampler_t* obj = (resampler_t*)resampInst; + int err = 0; - if (obj->skewDataIndex < kEstimateLengthFrames) { - obj->skewData[obj->skewDataIndex] = rawSkew; - obj->skewDataIndex++; - } - else if (obj->skewDataIndex == kEstimateLengthFrames) { - err = EstimateSkew(obj->skewData, - kEstimateLengthFrames, - obj->deviceSampleRateHz, - skewEst); - obj->skewEstimate = *skewEst; - obj->skewDataIndex++; - } - else { - *skewEst = obj->skewEstimate; - } + if (obj->skewDataIndex < kEstimateLengthFrames) { + obj->skewData[obj->skewDataIndex] = rawSkew; + obj->skewDataIndex++; + } else if (obj->skewDataIndex == kEstimateLengthFrames) { + err = EstimateSkew( + obj->skewData, kEstimateLengthFrames, obj->deviceSampleRateHz, skewEst); + obj->skewEstimate = *skewEst; + obj->skewDataIndex++; + } else { + *skewEst = obj->skewEstimate; + } - return err; + return err; } int EstimateSkew(const int* rawSkew, int size, int deviceSampleRateHz, - float *skewEst) -{ - const int absLimitOuter = (int)(0.04f * deviceSampleRateHz); - const int absLimitInner = (int)(0.0025f * deviceSampleRateHz); - int i = 0; - int n = 0; - float rawAvg = 0; - float err = 0; - float rawAbsDev = 0; - int upperLimit = 0; - int lowerLimit = 0; - float cumSum = 0; - float x = 0; - float x2 = 0; - float y = 0; - float xy = 0; - float xAvg = 0; - float denom = 0; - float skew = 0; + float* skewEst) { + const int absLimitOuter = (int)(0.04f * deviceSampleRateHz); + const int absLimitInner = (int)(0.0025f * deviceSampleRateHz); + int i = 0; + int n = 0; + float rawAvg = 0; + float err = 0; + float rawAbsDev = 0; + int upperLimit = 0; + int lowerLimit = 0; + float cumSum = 0; + float x = 0; + float x2 = 0; + float y = 0; + float xy = 0; + float xAvg = 0; + float denom = 0; + float skew = 0; - *skewEst = 0; // Set in case of error below. - for (i = 0; i < size; i++) { - if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) { - n++; - rawAvg += rawSkew[i]; - } + *skewEst = 0; // Set in case of error below. + for (i = 0; i < size; i++) { + if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) { + n++; + rawAvg += rawSkew[i]; } + } - if (n == 0) { - return -1; + if (n == 0) { + return -1; + } + assert(n > 0); + rawAvg /= n; + + for (i = 0; i < size; i++) { + if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) { + err = rawSkew[i] - rawAvg; + rawAbsDev += err >= 0 ? err : -err; } - assert(n > 0); - rawAvg /= n; + } + assert(n > 0); + rawAbsDev /= n; + upperLimit = (int)(rawAvg + 5 * rawAbsDev + 1); // +1 for ceiling. + lowerLimit = (int)(rawAvg - 5 * rawAbsDev - 1); // -1 for floor. - for (i = 0; i < size; i++) { - if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) { - err = rawSkew[i] - rawAvg; - rawAbsDev += err >= 0 ? err : -err; - } + n = 0; + for (i = 0; i < size; i++) { + if ((rawSkew[i] < absLimitInner && rawSkew[i] > -absLimitInner) || + (rawSkew[i] < upperLimit && rawSkew[i] > lowerLimit)) { + n++; + cumSum += rawSkew[i]; + x += n; + x2 += n * n; + y += cumSum; + xy += n * cumSum; } - assert(n > 0); - rawAbsDev /= n; - upperLimit = (int)(rawAvg + 5 * rawAbsDev + 1); // +1 for ceiling. - lowerLimit = (int)(rawAvg - 5 * rawAbsDev - 1); // -1 for floor. + } - n = 0; - for (i = 0; i < size; i++) { - if ((rawSkew[i] < absLimitInner && rawSkew[i] > -absLimitInner) || - (rawSkew[i] < upperLimit && rawSkew[i] > lowerLimit)) { - n++; - cumSum += rawSkew[i]; - x += n; - x2 += n*n; - y += cumSum; - xy += n * cumSum; - } - } + if (n == 0) { + return -1; + } + assert(n > 0); + xAvg = x / n; + denom = x2 - xAvg * x; - if (n == 0) { - return -1; - } - assert(n > 0); - xAvg = x / n; - denom = x2 - xAvg*x; + if (denom != 0) { + skew = (xy - xAvg * y) / denom; + } - if (denom != 0) { - skew = (xy - xAvg*y) / denom; - } - - *skewEst = skew; - return 0; + *skewEst = skew; + return 0; } diff --git a/webrtc/modules/audio_processing/aec/aec_resampler.h b/webrtc/modules/audio_processing/aec/aec_resampler.h index 3cd0691e0..e42c056f6 100644 --- a/webrtc/modules/audio_processing/aec/aec_resampler.h +++ b/webrtc/modules/audio_processing/aec/aec_resampler.h @@ -13,23 +13,27 @@ #include "webrtc/modules/audio_processing/aec/aec_core.h" -enum { kResamplingDelay = 1 }; -enum { kResamplerBufferSize = FRAME_LEN * 4 }; +enum { + kResamplingDelay = 1 +}; +enum { + kResamplerBufferSize = FRAME_LEN * 4 +}; // Unless otherwise specified, functions return 0 on success and -1 on error -int WebRtcAec_CreateResampler(void **resampInst); -int WebRtcAec_InitResampler(void *resampInst, int deviceSampleRateHz); -int WebRtcAec_FreeResampler(void *resampInst); +int WebRtcAec_CreateResampler(void** resampInst); +int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz); +int WebRtcAec_FreeResampler(void* resampInst); // Estimates skew from raw measurement. -int WebRtcAec_GetSkew(void *resampInst, int rawSkew, float *skewEst); +int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst); // Resamples input using linear interpolation. -void WebRtcAec_ResampleLinear(void *resampInst, - const short *inspeech, +void WebRtcAec_ResampleLinear(void* resampInst, + const short* inspeech, int size, float skew, - short *outspeech, - int *size_out); + short* outspeech, + int* size_out); #endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_ diff --git a/webrtc/modules/audio_processing/aec/echo_cancellation.c b/webrtc/modules/audio_processing/aec/echo_cancellation.c index 57208cd90..cf95e43b2 100644 --- a/webrtc/modules/audio_processing/aec/echo_cancellation.c +++ b/webrtc/modules/audio_processing/aec/echo_cancellation.c @@ -90,7 +90,7 @@ static const int kMaxTrustedDelayMs = 500; #define MAX_RESAMP_LEN (5 * FRAME_LEN) static const int kMaxBufSizeStart = 62; // In partitions -static const int sampMsNb = 8; // samples per ms in nb +static const int sampMsNb = 8; // samples per ms in nb static const int initCheck = 42; #ifdef WEBRTC_AEC_DEBUG_DUMP @@ -99,334 +99,351 @@ int webrtc_aec_instance_count = 0; // Estimates delay to set the position of the far-end buffer read pointer // (controlled by knownDelay) -static void EstBufDelayNormal(aecpc_t *aecInst); -static void EstBufDelayExtended(aecpc_t *aecInst); -static int ProcessNormal(aecpc_t* self, const int16_t* near, - const int16_t* near_high, int16_t* out, int16_t* out_high, - int16_t num_samples, int16_t reported_delay_ms, int32_t skew); -static void ProcessExtended(aecpc_t* self, const int16_t* near, - const int16_t* near_high, int16_t* out, int16_t* out_high, - int16_t num_samples, int16_t reported_delay_ms, int32_t skew); +static void EstBufDelayNormal(aecpc_t* aecInst); +static void EstBufDelayExtended(aecpc_t* aecInst); +static int ProcessNormal(aecpc_t* self, + const int16_t* near, + const int16_t* near_high, + int16_t* out, + int16_t* out_high, + int16_t num_samples, + int16_t reported_delay_ms, + int32_t skew); +static void ProcessExtended(aecpc_t* self, + const int16_t* near, + const int16_t* near_high, + int16_t* out, + int16_t* out_high, + int16_t num_samples, + int16_t reported_delay_ms, + int32_t skew); -int32_t WebRtcAec_Create(void **aecInst) -{ - aecpc_t *aecpc; - if (aecInst == NULL) { - return -1; - } +int32_t WebRtcAec_Create(void** aecInst) { + aecpc_t* aecpc; + if (aecInst == NULL) { + return -1; + } - aecpc = malloc(sizeof(aecpc_t)); - *aecInst = aecpc; - if (aecpc == NULL) { - return -1; - } + aecpc = malloc(sizeof(aecpc_t)); + *aecInst = aecpc; + if (aecpc == NULL) { + return -1; + } - if (WebRtcAec_CreateAec(&aecpc->aec) == -1) { - WebRtcAec_Free(aecpc); - aecpc = NULL; - return -1; - } + if (WebRtcAec_CreateAec(&aecpc->aec) == -1) { + WebRtcAec_Free(aecpc); + aecpc = NULL; + return -1; + } - if (WebRtcAec_CreateResampler(&aecpc->resampler) == -1) { - WebRtcAec_Free(aecpc); - aecpc = NULL; - return -1; - } - // Create far-end pre-buffer. The buffer size has to be large enough for - // largest possible drift compensation (kResamplerBufferSize) + "almost" an - // FFT buffer (PART_LEN2 - 1). - aecpc->far_pre_buf = WebRtc_CreateBuffer(PART_LEN2 + kResamplerBufferSize, - sizeof(float)); - if (!aecpc->far_pre_buf) { - WebRtcAec_Free(aecpc); - aecpc = NULL; - return -1; - } + if (WebRtcAec_CreateResampler(&aecpc->resampler) == -1) { + WebRtcAec_Free(aecpc); + aecpc = NULL; + return -1; + } + // Create far-end pre-buffer. The buffer size has to be large enough for + // largest possible drift compensation (kResamplerBufferSize) + "almost" an + // FFT buffer (PART_LEN2 - 1). + aecpc->far_pre_buf = + WebRtc_CreateBuffer(PART_LEN2 + kResamplerBufferSize, sizeof(float)); + if (!aecpc->far_pre_buf) { + WebRtcAec_Free(aecpc); + aecpc = NULL; + return -1; + } - aecpc->initFlag = 0; - aecpc->lastError = 0; + aecpc->initFlag = 0; + aecpc->lastError = 0; #ifdef WEBRTC_AEC_DEBUG_DUMP - aecpc->far_pre_buf_s16 = WebRtc_CreateBuffer( - PART_LEN2 + kResamplerBufferSize, sizeof(int16_t)); - if (!aecpc->far_pre_buf_s16) { - WebRtcAec_Free(aecpc); - aecpc = NULL; - return -1; - } - { - char filename[64]; - sprintf(filename, "aec_buf%d.dat", webrtc_aec_instance_count); - aecpc->bufFile = fopen(filename, "wb"); - sprintf(filename, "aec_skew%d.dat", webrtc_aec_instance_count); - aecpc->skewFile = fopen(filename, "wb"); - sprintf(filename, "aec_delay%d.dat", webrtc_aec_instance_count); - aecpc->delayFile = fopen(filename, "wb"); - webrtc_aec_instance_count++; - } + aecpc->far_pre_buf_s16 = + WebRtc_CreateBuffer(PART_LEN2 + kResamplerBufferSize, sizeof(int16_t)); + if (!aecpc->far_pre_buf_s16) { + WebRtcAec_Free(aecpc); + aecpc = NULL; + return -1; + } + { + char filename[64]; + sprintf(filename, "aec_buf%d.dat", webrtc_aec_instance_count); + aecpc->bufFile = fopen(filename, "wb"); + sprintf(filename, "aec_skew%d.dat", webrtc_aec_instance_count); + aecpc->skewFile = fopen(filename, "wb"); + sprintf(filename, "aec_delay%d.dat", webrtc_aec_instance_count); + aecpc->delayFile = fopen(filename, "wb"); + webrtc_aec_instance_count++; + } #endif - return 0; + return 0; } -int32_t WebRtcAec_Free(void *aecInst) -{ - aecpc_t *aecpc = aecInst; +int32_t WebRtcAec_Free(void* aecInst) { + aecpc_t* aecpc = aecInst; - if (aecpc == NULL) { - return -1; - } + if (aecpc == NULL) { + return -1; + } - WebRtc_FreeBuffer(aecpc->far_pre_buf); + WebRtc_FreeBuffer(aecpc->far_pre_buf); #ifdef WEBRTC_AEC_DEBUG_DUMP - WebRtc_FreeBuffer(aecpc->far_pre_buf_s16); - fclose(aecpc->bufFile); - fclose(aecpc->skewFile); - fclose(aecpc->delayFile); + WebRtc_FreeBuffer(aecpc->far_pre_buf_s16); + fclose(aecpc->bufFile); + fclose(aecpc->skewFile); + fclose(aecpc->delayFile); #endif - WebRtcAec_FreeAec(aecpc->aec); - WebRtcAec_FreeResampler(aecpc->resampler); - free(aecpc); + WebRtcAec_FreeAec(aecpc->aec); + WebRtcAec_FreeResampler(aecpc->resampler); + free(aecpc); - return 0; + return 0; } -int32_t WebRtcAec_Init(void *aecInst, int32_t sampFreq, int32_t scSampFreq) -{ - aecpc_t *aecpc = aecInst; - AecConfig aecConfig; +int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq) { + aecpc_t* aecpc = aecInst; + AecConfig aecConfig; - if (sampFreq != 8000 && sampFreq != 16000 && sampFreq != 32000) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; - } - aecpc->sampFreq = sampFreq; + if (sampFreq != 8000 && sampFreq != 16000 && sampFreq != 32000) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + aecpc->sampFreq = sampFreq; - if (scSampFreq < 1 || scSampFreq > 96000) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; - } - aecpc->scSampFreq = scSampFreq; + if (scSampFreq < 1 || scSampFreq > 96000) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + aecpc->scSampFreq = scSampFreq; - // Initialize echo canceller core - if (WebRtcAec_InitAec(aecpc->aec, aecpc->sampFreq) == -1) { - aecpc->lastError = AEC_UNSPECIFIED_ERROR; - return -1; - } + // Initialize echo canceller core + if (WebRtcAec_InitAec(aecpc->aec, aecpc->sampFreq) == -1) { + aecpc->lastError = AEC_UNSPECIFIED_ERROR; + return -1; + } - if (WebRtcAec_InitResampler(aecpc->resampler, aecpc->scSampFreq) == -1) { - aecpc->lastError = AEC_UNSPECIFIED_ERROR; - return -1; - } + if (WebRtcAec_InitResampler(aecpc->resampler, aecpc->scSampFreq) == -1) { + aecpc->lastError = AEC_UNSPECIFIED_ERROR; + return -1; + } - if (WebRtc_InitBuffer(aecpc->far_pre_buf) == -1) { - aecpc->lastError = AEC_UNSPECIFIED_ERROR; - return -1; - } - WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN); // Start overlap. + if (WebRtc_InitBuffer(aecpc->far_pre_buf) == -1) { + aecpc->lastError = AEC_UNSPECIFIED_ERROR; + return -1; + } + WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN); // Start overlap. - aecpc->initFlag = initCheck; // indicates that initialization has been done + aecpc->initFlag = initCheck; // indicates that initialization has been done - if (aecpc->sampFreq == 32000) { - aecpc->splitSampFreq = 16000; - } - else { - aecpc->splitSampFreq = sampFreq; - } + if (aecpc->sampFreq == 32000) { + aecpc->splitSampFreq = 16000; + } else { + aecpc->splitSampFreq = sampFreq; + } - aecpc->delayCtr = 0; - aecpc->sampFactor = (aecpc->scSampFreq * 1.0f) / aecpc->splitSampFreq; - // Sampling frequency multiplier (SWB is processed as 160 frame size). - aecpc->rate_factor = aecpc->splitSampFreq / 8000; + aecpc->delayCtr = 0; + aecpc->sampFactor = (aecpc->scSampFreq * 1.0f) / aecpc->splitSampFreq; + // Sampling frequency multiplier (SWB is processed as 160 frame size). + aecpc->rate_factor = aecpc->splitSampFreq / 8000; - aecpc->sum = 0; - aecpc->counter = 0; - aecpc->checkBuffSize = 1; - aecpc->firstVal = 0; + aecpc->sum = 0; + aecpc->counter = 0; + aecpc->checkBuffSize = 1; + aecpc->firstVal = 0; - aecpc->startup_phase = 1; - aecpc->bufSizeStart = 0; - aecpc->checkBufSizeCtr = 0; - aecpc->msInSndCardBuf = 0; - aecpc->filtDelay = -1; // -1 indicates an initialized state. - aecpc->timeForDelayChange = 0; - aecpc->knownDelay = 0; - aecpc->lastDelayDiff = 0; + aecpc->startup_phase = 1; + aecpc->bufSizeStart = 0; + aecpc->checkBufSizeCtr = 0; + aecpc->msInSndCardBuf = 0; + aecpc->filtDelay = -1; // -1 indicates an initialized state. + aecpc->timeForDelayChange = 0; + aecpc->knownDelay = 0; + aecpc->lastDelayDiff = 0; - aecpc->skewFrCtr = 0; - aecpc->resample = kAecFalse; - aecpc->highSkewCtr = 0; - aecpc->skew = 0; + aecpc->skewFrCtr = 0; + aecpc->resample = kAecFalse; + aecpc->highSkewCtr = 0; + aecpc->skew = 0; - aecpc->farend_started = 0; + aecpc->farend_started = 0; - // Default settings. - aecConfig.nlpMode = kAecNlpModerate; - aecConfig.skewMode = kAecFalse; - aecConfig.metricsMode = kAecFalse; - aecConfig.delay_logging = kAecFalse; + // Default settings. + aecConfig.nlpMode = kAecNlpModerate; + aecConfig.skewMode = kAecFalse; + aecConfig.metricsMode = kAecFalse; + aecConfig.delay_logging = kAecFalse; - if (WebRtcAec_set_config(aecpc, aecConfig) == -1) { - aecpc->lastError = AEC_UNSPECIFIED_ERROR; - return -1; - } + if (WebRtcAec_set_config(aecpc, aecConfig) == -1) { + aecpc->lastError = AEC_UNSPECIFIED_ERROR; + return -1; + } #ifdef WEBRTC_AEC_DEBUG_DUMP - if (WebRtc_InitBuffer(aecpc->far_pre_buf_s16) == -1) { - aecpc->lastError = AEC_UNSPECIFIED_ERROR; - return -1; - } - WebRtc_MoveReadPtr(aecpc->far_pre_buf_s16, -PART_LEN); // Start overlap. + if (WebRtc_InitBuffer(aecpc->far_pre_buf_s16) == -1) { + aecpc->lastError = AEC_UNSPECIFIED_ERROR; + return -1; + } + WebRtc_MoveReadPtr(aecpc->far_pre_buf_s16, -PART_LEN); // Start overlap. #endif - return 0; + return 0; } // only buffer L band for farend -int32_t WebRtcAec_BufferFarend(void *aecInst, const int16_t *farend, - int16_t nrOfSamples) -{ - aecpc_t *aecpc = aecInst; - int32_t retVal = 0; - int newNrOfSamples = (int) nrOfSamples; - short newFarend[MAX_RESAMP_LEN]; - const int16_t* farend_ptr = farend; - float tmp_farend[MAX_RESAMP_LEN]; - const float* farend_float = tmp_farend; - float skew; - int i = 0; +int32_t WebRtcAec_BufferFarend(void* aecInst, + const int16_t* farend, + int16_t nrOfSamples) { + aecpc_t* aecpc = aecInst; + int32_t retVal = 0; + int newNrOfSamples = (int)nrOfSamples; + short newFarend[MAX_RESAMP_LEN]; + const int16_t* farend_ptr = farend; + float tmp_farend[MAX_RESAMP_LEN]; + const float* farend_float = tmp_farend; + float skew; + int i = 0; - if (farend == NULL) { - aecpc->lastError = AEC_NULL_POINTER_ERROR; - return -1; - } + if (farend == NULL) { + aecpc->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } - if (aecpc->initFlag != initCheck) { - aecpc->lastError = AEC_UNINITIALIZED_ERROR; - return -1; - } + if (aecpc->initFlag != initCheck) { + aecpc->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } - // number of samples == 160 for SWB input - if (nrOfSamples != 80 && nrOfSamples != 160) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; - } + // number of samples == 160 for SWB input + if (nrOfSamples != 80 && nrOfSamples != 160) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } - skew = aecpc->skew; + skew = aecpc->skew; - if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) { - // Resample and get a new number of samples - WebRtcAec_ResampleLinear(aecpc->resampler, farend, nrOfSamples, skew, - newFarend, &newNrOfSamples); - farend_ptr = (const int16_t*) newFarend; - } + if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) { + // Resample and get a new number of samples + WebRtcAec_ResampleLinear(aecpc->resampler, + farend, + nrOfSamples, + skew, + newFarend, + &newNrOfSamples); + farend_ptr = (const int16_t*)newFarend; + } - aecpc->farend_started = 1; - WebRtcAec_SetSystemDelay(aecpc->aec, WebRtcAec_system_delay(aecpc->aec) + - newNrOfSamples); + aecpc->farend_started = 1; + WebRtcAec_SetSystemDelay(aecpc->aec, + WebRtcAec_system_delay(aecpc->aec) + newNrOfSamples); #ifdef WEBRTC_AEC_DEBUG_DUMP - WebRtc_WriteBuffer(aecpc->far_pre_buf_s16, farend_ptr, - (size_t) newNrOfSamples); + WebRtc_WriteBuffer( + aecpc->far_pre_buf_s16, farend_ptr, (size_t)newNrOfSamples); #endif - // Cast to float and write the time-domain data to |far_pre_buf|. - for (i = 0; i < newNrOfSamples; i++) { - tmp_farend[i] = (float) farend_ptr[i]; - } - WebRtc_WriteBuffer(aecpc->far_pre_buf, farend_float, - (size_t) newNrOfSamples); + // Cast to float and write the time-domain data to |far_pre_buf|. + for (i = 0; i < newNrOfSamples; i++) { + tmp_farend[i] = (float)farend_ptr[i]; + } + WebRtc_WriteBuffer(aecpc->far_pre_buf, farend_float, (size_t)newNrOfSamples); - // Transform to frequency domain if we have enough data. - while (WebRtc_available_read(aecpc->far_pre_buf) >= PART_LEN2) { - // We have enough data to pass to the FFT, hence read PART_LEN2 samples. - WebRtc_ReadBuffer(aecpc->far_pre_buf, (void**) &farend_float, tmp_farend, - PART_LEN2); + // Transform to frequency domain if we have enough data. + while (WebRtc_available_read(aecpc->far_pre_buf) >= PART_LEN2) { + // We have enough data to pass to the FFT, hence read PART_LEN2 samples. + WebRtc_ReadBuffer( + aecpc->far_pre_buf, (void**)&farend_float, tmp_farend, PART_LEN2); - WebRtcAec_BufferFarendPartition(aecpc->aec, farend_float); + WebRtcAec_BufferFarendPartition(aecpc->aec, farend_float); - // Rewind |far_pre_buf| PART_LEN samples for overlap before continuing. - WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN); + // Rewind |far_pre_buf| PART_LEN samples for overlap before continuing. + WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN); #ifdef WEBRTC_AEC_DEBUG_DUMP - WebRtc_ReadBuffer(aecpc->far_pre_buf_s16, (void**) &farend_ptr, newFarend, - PART_LEN2); - WebRtc_WriteBuffer(WebRtcAec_far_time_buf(aecpc->aec), - &farend_ptr[PART_LEN], 1); - WebRtc_MoveReadPtr(aecpc->far_pre_buf_s16, -PART_LEN); + WebRtc_ReadBuffer( + aecpc->far_pre_buf_s16, (void**)&farend_ptr, newFarend, PART_LEN2); + WebRtc_WriteBuffer( + WebRtcAec_far_time_buf(aecpc->aec), &farend_ptr[PART_LEN], 1); + WebRtc_MoveReadPtr(aecpc->far_pre_buf_s16, -PART_LEN); #endif - } + } - return retVal; + return retVal; } -int32_t WebRtcAec_Process(void *aecInst, const int16_t *nearend, - const int16_t *nearendH, int16_t *out, int16_t *outH, - int16_t nrOfSamples, int16_t msInSndCardBuf, - int32_t skew) -{ - aecpc_t *aecpc = aecInst; - int32_t retVal = 0; - if (nearend == NULL) { - aecpc->lastError = AEC_NULL_POINTER_ERROR; - return -1; - } +int32_t WebRtcAec_Process(void* aecInst, + const int16_t* nearend, + const int16_t* nearendH, + int16_t* out, + int16_t* outH, + int16_t nrOfSamples, + int16_t msInSndCardBuf, + int32_t skew) { + aecpc_t* aecpc = aecInst; + int32_t retVal = 0; + if (nearend == NULL) { + aecpc->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } - if (out == NULL) { - aecpc->lastError = AEC_NULL_POINTER_ERROR; - return -1; - } + if (out == NULL) { + aecpc->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } - if (aecpc->initFlag != initCheck) { - aecpc->lastError = AEC_UNINITIALIZED_ERROR; - return -1; - } + if (aecpc->initFlag != initCheck) { + aecpc->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } - // number of samples == 160 for SWB input - if (nrOfSamples != 80 && nrOfSamples != 160) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; - } + // number of samples == 160 for SWB input + if (nrOfSamples != 80 && nrOfSamples != 160) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } - // Check for valid pointers based on sampling rate - if (aecpc->sampFreq == 32000 && nearendH == NULL) { - aecpc->lastError = AEC_NULL_POINTER_ERROR; - return -1; - } + // Check for valid pointers based on sampling rate + if (aecpc->sampFreq == 32000 && nearendH == NULL) { + aecpc->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } - if (msInSndCardBuf < 0) { - msInSndCardBuf = 0; - aecpc->lastError = AEC_BAD_PARAMETER_WARNING; - retVal = -1; - } - else if (msInSndCardBuf > kMaxTrustedDelayMs) { - // The clamping is now done in ProcessExtended/Normal(). - aecpc->lastError = AEC_BAD_PARAMETER_WARNING; - retVal = -1; - } + if (msInSndCardBuf < 0) { + msInSndCardBuf = 0; + aecpc->lastError = AEC_BAD_PARAMETER_WARNING; + retVal = -1; + } else if (msInSndCardBuf > kMaxTrustedDelayMs) { + // The clamping is now done in ProcessExtended/Normal(). + aecpc->lastError = AEC_BAD_PARAMETER_WARNING; + retVal = -1; + } - // This returns the value of aec->extended_filter_enabled. - if (WebRtcAec_delay_correction_enabled(aecpc->aec)) { - ProcessExtended(aecpc, nearend, nearendH, out, outH, nrOfSamples, - msInSndCardBuf, skew); - } else { - if (ProcessNormal(aecpc, nearend, nearendH, out, outH, nrOfSamples, - msInSndCardBuf, skew) != 0) { - retVal = -1; - } + // This returns the value of aec->extended_filter_enabled. + if (WebRtcAec_delay_correction_enabled(aecpc->aec)) { + ProcessExtended( + aecpc, nearend, nearendH, out, outH, nrOfSamples, msInSndCardBuf, skew); + } else { + if (ProcessNormal(aecpc, + nearend, + nearendH, + out, + outH, + nrOfSamples, + msInSndCardBuf, + skew) != 0) { + retVal = -1; } + } #ifdef WEBRTC_AEC_DEBUG_DUMP - { - int16_t far_buf_size_ms = (int16_t)(WebRtcAec_system_delay(aecpc->aec) / - (sampMsNb * aecpc->rate_factor)); - (void)fwrite(&far_buf_size_ms, 2, 1, aecpc->bufFile); - (void)fwrite(&aecpc->knownDelay, sizeof(aecpc->knownDelay), 1, - aecpc->delayFile); - } + { + int16_t far_buf_size_ms = (int16_t)(WebRtcAec_system_delay(aecpc->aec) / + (sampMsNb * aecpc->rate_factor)); + (void)fwrite(&far_buf_size_ms, 2, 1, aecpc->bufFile); + (void)fwrite( + &aecpc->knownDelay, sizeof(aecpc->knownDelay), 1, aecpc->delayFile); + } #endif - return retVal; + return retVal; } int WebRtcAec_set_config(void* handle, AecConfig config) { @@ -442,8 +459,9 @@ int WebRtcAec_set_config(void* handle, AecConfig config) { } self->skewMode = config.skewMode; - if (config.nlpMode != kAecNlpConservative && config.nlpMode != kAecNlpModerate - && config.nlpMode != kAecNlpAggressive) { + if (config.nlpMode != kAecNlpConservative && + config.nlpMode != kAecNlpModerate && + config.nlpMode != kAecNlpAggressive) { self->lastError = AEC_BAD_PARAMETER_ERROR; return -1; } @@ -458,14 +476,14 @@ int WebRtcAec_set_config(void* handle, AecConfig config) { return -1; } - WebRtcAec_SetConfigCore(self->aec, config.nlpMode, config.metricsMode, - config.delay_logging); + WebRtcAec_SetConfigCore( + self->aec, config.nlpMode, config.metricsMode, config.delay_logging); return 0; } int WebRtcAec_get_echo_status(void* handle, int* status) { aecpc_t* self = (aecpc_t*)handle; - if (status == NULL ) { + if (status == NULL) { self->lastError = AEC_NULL_POINTER_ERROR; return -1; } @@ -488,10 +506,10 @@ int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) { Stats erle; Stats a_nlp; - if (handle == NULL ) { + if (handle == NULL) { return -1; } - if (metrics == NULL ) { + if (metrics == NULL) { self->lastError = AEC_NULL_POINTER_ERROR; return -1; } @@ -503,46 +521,46 @@ int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) { WebRtcAec_GetEchoStats(self->aec, &erl, &erle, &a_nlp); // ERL - metrics->erl.instant = (int) erl.instant; + metrics->erl.instant = (int)erl.instant; if ((erl.himean > kOffsetLevel) && (erl.average > kOffsetLevel)) { // Use a mix between regular average and upper part average. dtmp = kUpWeight * erl.himean + (1 - kUpWeight) * erl.average; - metrics->erl.average = (int) dtmp; + metrics->erl.average = (int)dtmp; } else { metrics->erl.average = kOffsetLevel; } - metrics->erl.max = (int) erl.max; + metrics->erl.max = (int)erl.max; if (erl.min < (kOffsetLevel * (-1))) { - metrics->erl.min = (int) erl.min; + metrics->erl.min = (int)erl.min; } else { metrics->erl.min = kOffsetLevel; } // ERLE - metrics->erle.instant = (int) erle.instant; + metrics->erle.instant = (int)erle.instant; if ((erle.himean > kOffsetLevel) && (erle.average > kOffsetLevel)) { // Use a mix between regular average and upper part average. dtmp = kUpWeight * erle.himean + (1 - kUpWeight) * erle.average; - metrics->erle.average = (int) dtmp; + metrics->erle.average = (int)dtmp; } else { metrics->erle.average = kOffsetLevel; } - metrics->erle.max = (int) erle.max; + metrics->erle.max = (int)erle.max; if (erle.min < (kOffsetLevel * (-1))) { - metrics->erle.min = (int) erle.min; + metrics->erle.min = (int)erle.min; } else { metrics->erle.min = kOffsetLevel; } // RERL - if ((metrics->erl.average > kOffsetLevel) - && (metrics->erle.average > kOffsetLevel)) { + if ((metrics->erl.average > kOffsetLevel) && + (metrics->erle.average > kOffsetLevel)) { stmp = metrics->erl.average + metrics->erle.average; } else { stmp = kOffsetLevel; @@ -555,20 +573,20 @@ int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) { metrics->rerl.min = stmp; // A_NLP - metrics->aNlp.instant = (int) a_nlp.instant; + metrics->aNlp.instant = (int)a_nlp.instant; if ((a_nlp.himean > kOffsetLevel) && (a_nlp.average > kOffsetLevel)) { // Use a mix between regular average and upper part average. dtmp = kUpWeight * a_nlp.himean + (1 - kUpWeight) * a_nlp.average; - metrics->aNlp.average = (int) dtmp; + metrics->aNlp.average = (int)dtmp; } else { metrics->aNlp.average = kOffsetLevel; } - metrics->aNlp.max = (int) a_nlp.max; + metrics->aNlp.max = (int)a_nlp.max; if (a_nlp.min < (kOffsetLevel * (-1))) { - metrics->aNlp.min = (int) a_nlp.min; + metrics->aNlp.min = (int)a_nlp.min; } else { metrics->aNlp.min = kOffsetLevel; } @@ -599,22 +617,25 @@ int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std) { return 0; } -int32_t WebRtcAec_get_error_code(void *aecInst) -{ - aecpc_t *aecpc = aecInst; - return aecpc->lastError; +int32_t WebRtcAec_get_error_code(void* aecInst) { + aecpc_t* aecpc = aecInst; + return aecpc->lastError; } AecCore* WebRtcAec_aec_core(void* handle) { if (!handle) { return NULL; } - return ((aecpc_t*) handle)->aec; + return ((aecpc_t*)handle)->aec; } -static int ProcessNormal(aecpc_t *aecpc, const int16_t *nearend, - const int16_t *nearendH, int16_t *out, int16_t *outH, - int16_t nrOfSamples, int16_t msInSndCardBuf, +static int ProcessNormal(aecpc_t* aecpc, + const int16_t* nearend, + const int16_t* nearendH, + int16_t* out, + int16_t* outH, + int16_t nrOfSamples, + int16_t msInSndCardBuf, int32_t skew) { int retVal = 0; short i; @@ -624,8 +645,8 @@ static int ProcessNormal(aecpc_t *aecpc, const int16_t *nearend, const float minSkewEst = -0.5f; const float maxSkewEst = 1.0f; - msInSndCardBuf = msInSndCardBuf > kMaxTrustedDelayMs ? - kMaxTrustedDelayMs : msInSndCardBuf; + msInSndCardBuf = + msInSndCardBuf > kMaxTrustedDelayMs ? kMaxTrustedDelayMs : msInSndCardBuf; // TODO(andrew): we need to investigate if this +10 is really wanted. msInSndCardBuf += 10; aecpc->msInSndCardBuf = msInSndCardBuf; @@ -633,27 +654,24 @@ static int ProcessNormal(aecpc_t *aecpc, const int16_t *nearend, if (aecpc->skewMode == kAecTrue) { if (aecpc->skewFrCtr < 25) { aecpc->skewFrCtr++; - } - else { + } else { retVal = WebRtcAec_GetSkew(aecpc->resampler, skew, &aecpc->skew); if (retVal == -1) { aecpc->skew = 0; aecpc->lastError = AEC_BAD_PARAMETER_WARNING; } - aecpc->skew /= aecpc->sampFactor*nrOfSamples; + aecpc->skew /= aecpc->sampFactor * nrOfSamples; if (aecpc->skew < 1.0e-3 && aecpc->skew > -1.0e-3) { aecpc->resample = kAecFalse; - } - else { + } else { aecpc->resample = kAecTrue; } if (aecpc->skew < minSkewEst) { aecpc->skew = minSkewEst; - } - else if (aecpc->skew > maxSkewEst) { + } else if (aecpc->skew > maxSkewEst) { aecpc->skew = maxSkewEst; } @@ -692,11 +710,10 @@ static int ProcessNormal(aecpc_t *aecpc, const int16_t *nearend, } if (abs(aecpc->firstVal - aecpc->msInSndCardBuf) < - WEBRTC_SPL_MAX(0.2 * aecpc->msInSndCardBuf, sampMsNb)) { + WEBRTC_SPL_MAX(0.2 * aecpc->msInSndCardBuf, sampMsNb)) { aecpc->sum += aecpc->msInSndCardBuf; aecpc->counter++; - } - else { + } else { aecpc->counter = 0; } @@ -704,9 +721,10 @@ static int ProcessNormal(aecpc_t *aecpc, const int16_t *nearend, // The far-end buffer size is determined in partitions of // PART_LEN samples. Use 75% of the average value of the system // delay as buffer size to start with. - aecpc->bufSizeStart = WEBRTC_SPL_MIN((3 * aecpc->sum * - aecpc->rate_factor * 8) / (4 * aecpc->counter * PART_LEN), - kMaxBufSizeStart); + aecpc->bufSizeStart = + WEBRTC_SPL_MIN((3 * aecpc->sum * aecpc->rate_factor * 8) / + (4 * aecpc->counter * PART_LEN), + kMaxBufSizeStart); // Buffer size has now been determined. aecpc->checkBuffSize = 0; } @@ -714,8 +732,9 @@ static int ProcessNormal(aecpc_t *aecpc, const int16_t *nearend, if (aecpc->checkBufSizeCtr * nBlocks10ms > 50) { // For really bad systems, don't disable the echo canceller for // more than 0.5 sec. - aecpc->bufSizeStart = WEBRTC_SPL_MIN((aecpc->msInSndCardBuf * - aecpc->rate_factor * 3) / 40, kMaxBufSizeStart); + aecpc->bufSizeStart = WEBRTC_SPL_MIN( + (aecpc->msInSndCardBuf * aecpc->rate_factor * 3) / 40, + kMaxBufSizeStart); aecpc->checkBuffSize = 0; } } @@ -765,9 +784,14 @@ static int ProcessNormal(aecpc_t *aecpc, const int16_t *nearend, return retVal; } -static void ProcessExtended(aecpc_t* self, const int16_t* near, - const int16_t* near_high, int16_t* out, int16_t* out_high, - int16_t num_samples, int16_t reported_delay_ms, int32_t skew) { +static void ProcessExtended(aecpc_t* self, + const int16_t* near, + const int16_t* near_high, + int16_t* out, + int16_t* out_high, + int16_t num_samples, + int16_t reported_delay_ms, + int32_t skew) { int i; const int num_frames = num_samples / FRAME_LEN; #if defined(WEBRTC_UNTRUSTED_DELAY) @@ -779,14 +803,16 @@ static void ProcessExtended(aecpc_t* self, const int16_t* near, // Due to the longer filter, we no longer add 10 ms to the reported delay // to reduce chance of non-causality. Instead we apply a minimum here to avoid // issues with the read pointer jumping around needlessly. - reported_delay_ms = reported_delay_ms < kMinTrustedDelayMs ? - kMinTrustedDelayMs : reported_delay_ms; + reported_delay_ms = reported_delay_ms < kMinTrustedDelayMs + ? kMinTrustedDelayMs + : reported_delay_ms; // If the reported delay appears to be bogus, we attempt to recover by using // the measured fixed delay values. We use >= here because higher layers // may already clamp to this maximum value, and we would otherwise not // detect it here. - reported_delay_ms = reported_delay_ms >= kMaxTrustedDelayMs ? - kFixedDelayMs : reported_delay_ms; + reported_delay_ms = reported_delay_ms >= kMaxTrustedDelayMs + ? kFixedDelayMs + : reported_delay_ms; #endif self->msInSndCardBuf = reported_delay_ms; @@ -805,10 +831,11 @@ static void ProcessExtended(aecpc_t* self, const int16_t* near, // action on the first frame. In the trusted delay case, we'll take the // current reported delay, unless it's less then our conservative // measurement. - int startup_size_ms = reported_delay_ms < kFixedDelayMs ? - kFixedDelayMs : reported_delay_ms; + int startup_size_ms = + reported_delay_ms < kFixedDelayMs ? kFixedDelayMs : reported_delay_ms; int overhead_elements = (WebRtcAec_system_delay(self->aec) - - startup_size_ms / 2 * self->rate_factor * 8) / PART_LEN; + startup_size_ms / 2 * self->rate_factor * 8) / + PART_LEN; WebRtcAec_MoveFarReadPtr(self->aec, overhead_elements); self->startup_phase = 0; } @@ -823,9 +850,12 @@ static void ProcessExtended(aecpc_t* self, const int16_t* near, WEBRTC_SPL_MAX(0, self->knownDelay + delay_diff_offset); for (i = 0; i < num_frames; ++i) { - WebRtcAec_ProcessFrame(self->aec, &near[FRAME_LEN * i], - &near_high[FRAME_LEN * i], adjusted_known_delay, - &out[FRAME_LEN * i], &out_high[FRAME_LEN * i]); + WebRtcAec_ProcessFrame(self->aec, + &near[FRAME_LEN * i], + &near_high[FRAME_LEN * i], + adjusted_known_delay, + &out[FRAME_LEN * i], + &out_high[FRAME_LEN * i]); } } } @@ -857,8 +887,8 @@ static void EstBufDelayNormal(aecpc_t* aecpc) { // We use -1 to signal an initialized state in the "extended" implementation; // compensate for that. aecpc->filtDelay = aecpc->filtDelay < 0 ? 0 : aecpc->filtDelay; - aecpc->filtDelay = WEBRTC_SPL_MAX(0, (short) (0.8 * aecpc->filtDelay + - 0.2 * current_delay)); + aecpc->filtDelay = + WEBRTC_SPL_MAX(0, (short)(0.8 * aecpc->filtDelay + 0.2 * current_delay)); delay_difference = aecpc->filtDelay - aecpc->knownDelay; if (delay_difference > 224) { @@ -879,7 +909,7 @@ static void EstBufDelayNormal(aecpc_t* aecpc) { aecpc->lastDelayDiff = delay_difference; if (aecpc->timeForDelayChange > 25) { - aecpc->knownDelay = WEBRTC_SPL_MAX((int) aecpc->filtDelay - 160, 0); + aecpc->knownDelay = WEBRTC_SPL_MAX((int)aecpc->filtDelay - 160, 0); } } @@ -910,8 +940,8 @@ static void EstBufDelayExtended(aecpc_t* self) { if (self->filtDelay == -1) { self->filtDelay = WEBRTC_SPL_MAX(0, 0.5 * current_delay); } else { - self->filtDelay = WEBRTC_SPL_MAX(0, (short) (0.95 * self->filtDelay + - 0.05 * current_delay)); + self->filtDelay = WEBRTC_SPL_MAX( + 0, (short)(0.95 * self->filtDelay + 0.05 * current_delay)); } delay_difference = self->filtDelay - self->knownDelay; @@ -933,6 +963,6 @@ static void EstBufDelayExtended(aecpc_t* self) { self->lastDelayDiff = delay_difference; if (self->timeForDelayChange > 25) { - self->knownDelay = WEBRTC_SPL_MAX((int) self->filtDelay - 256, 0); + self->knownDelay = WEBRTC_SPL_MAX((int)self->filtDelay - 256, 0); } } diff --git a/webrtc/modules/audio_processing/aec/include/echo_cancellation.h b/webrtc/modules/audio_processing/aec/include/echo_cancellation.h index c362a6766..4c852cf64 100644 --- a/webrtc/modules/audio_processing/aec/include/echo_cancellation.h +++ b/webrtc/modules/audio_processing/aec/include/echo_cancellation.h @@ -14,32 +14,32 @@ #include "webrtc/typedefs.h" // Errors -#define AEC_UNSPECIFIED_ERROR 12000 -#define AEC_UNSUPPORTED_FUNCTION_ERROR 12001 -#define AEC_UNINITIALIZED_ERROR 12002 -#define AEC_NULL_POINTER_ERROR 12003 -#define AEC_BAD_PARAMETER_ERROR 12004 +#define AEC_UNSPECIFIED_ERROR 12000 +#define AEC_UNSUPPORTED_FUNCTION_ERROR 12001 +#define AEC_UNINITIALIZED_ERROR 12002 +#define AEC_NULL_POINTER_ERROR 12003 +#define AEC_BAD_PARAMETER_ERROR 12004 // Warnings -#define AEC_BAD_PARAMETER_WARNING 12050 +#define AEC_BAD_PARAMETER_WARNING 12050 enum { - kAecNlpConservative = 0, - kAecNlpModerate, - kAecNlpAggressive + kAecNlpConservative = 0, + kAecNlpModerate, + kAecNlpAggressive }; enum { - kAecFalse = 0, - kAecTrue + kAecFalse = 0, + kAecTrue }; typedef struct { - int16_t nlpMode; // default kAecNlpModerate - int16_t skewMode; // default kAecFalse - int16_t metricsMode; // default kAecFalse - int delay_logging; // default kAecFalse - //float realSkew; + int16_t nlpMode; // default kAecNlpModerate + int16_t skewMode; // default kAecFalse + int16_t metricsMode; // default kAecFalse + int delay_logging; // default kAecFalse + // float realSkew; } AecConfig; typedef struct { @@ -50,10 +50,10 @@ typedef struct { } AecLevel; typedef struct { - AecLevel rerl; - AecLevel erl; - AecLevel erle; - AecLevel aNlp; + AecLevel rerl; + AecLevel erl; + AecLevel erle; + AecLevel aNlp; } AecMetrics; struct AecCore; @@ -76,7 +76,7 @@ extern "C" { * int32_t return 0: OK * -1: error */ -int32_t WebRtcAec_Create(void **aecInst); +int32_t WebRtcAec_Create(void** aecInst); /* * This function releases the memory allocated by WebRtcAec_Create(). @@ -90,7 +90,7 @@ int32_t WebRtcAec_Create(void **aecInst); * int32_t return 0: OK * -1: error */ -int32_t WebRtcAec_Free(void *aecInst); +int32_t WebRtcAec_Free(void* aecInst); /* * Initializes an AEC instance. @@ -106,7 +106,7 @@ int32_t WebRtcAec_Free(void *aecInst); * int32_t return 0: OK * -1: error */ -int32_t WebRtcAec_Init(void *aecInst, int32_t sampFreq, int32_t scSampFreq); +int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq); /* * Inserts an 80 or 160 sample block of data into the farend buffer. @@ -123,8 +123,8 @@ int32_t WebRtcAec_Init(void *aecInst, int32_t sampFreq, int32_t scSampFreq); * int32_t return 0: OK * -1: error */ -int32_t WebRtcAec_BufferFarend(void *aecInst, - const int16_t *farend, +int32_t WebRtcAec_BufferFarend(void* aecInst, + const int16_t* farend, int16_t nrOfSamples); /* @@ -153,11 +153,11 @@ int32_t WebRtcAec_BufferFarend(void *aecInst, * int32_t return 0: OK * -1: error */ -int32_t WebRtcAec_Process(void *aecInst, - const int16_t *nearend, - const int16_t *nearendH, - int16_t *out, - int16_t *outH, +int32_t WebRtcAec_Process(void* aecInst, + const int16_t* nearend, + const int16_t* nearendH, + int16_t* out, + int16_t* outH, int16_t nrOfSamples, int16_t msInSndCardBuf, int32_t skew); @@ -238,7 +238,7 @@ int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std); * ------------------------------------------------------------------- * int32_t return 11000-11100: error code */ -int32_t WebRtcAec_get_error_code(void *aecInst); +int32_t WebRtcAec_get_error_code(void* aecInst); // Returns a pointer to the low level AEC handle. // diff --git a/webrtc/modules/audio_processing/aec/system_delay_unittest.cc b/webrtc/modules/audio_processing/aec/system_delay_unittest.cc index db37f0e83..a19030ae3 100644 --- a/webrtc/modules/audio_processing/aec/system_delay_unittest.cc +++ b/webrtc/modules/audio_processing/aec/system_delay_unittest.cc @@ -52,9 +52,7 @@ class SystemDelayTest : public ::testing::Test { }; SystemDelayTest::SystemDelayTest() - : handle_(NULL), - self_(NULL), - samples_per_frame_(0) { + : handle_(NULL), self_(NULL), samples_per_frame_(0) { // Dummy input data are set with more or less arbitrary non-zero values. memset(far_, 1, sizeof(far_)); memset(near_, 2, sizeof(near_)); @@ -74,7 +72,7 @@ void SystemDelayTest::TearDown() { // In SWB mode nothing is added to the buffer handling with respect to // functionality compared to WB. We therefore only verify behavior in NB and WB. -static const int kSampleRateHz[] = { 8000, 16000 }; +static const int kSampleRateHz[] = {8000, 16000}; static const size_t kNumSampleRates = sizeof(kSampleRateHz) / sizeof(*kSampleRateHz); @@ -100,8 +98,15 @@ void SystemDelayTest::Init(int sample_rate_hz) { void SystemDelayTest::RenderAndCapture(int device_buffer_ms) { EXPECT_EQ(0, WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_)); - EXPECT_EQ(0, WebRtcAec_Process(handle_, near_, NULL, out_, NULL, - samples_per_frame_, device_buffer_ms, 0)); + EXPECT_EQ(0, + WebRtcAec_Process(handle_, + near_, + NULL, + out_, + NULL, + samples_per_frame_, + device_buffer_ms, + 0)); } int SystemDelayTest::BufferFillUp() { @@ -254,8 +259,15 @@ TEST_F(SystemDelayTest, CorrectDelayAfterStableBufferBuildUp) { // can make that assumption since we have a separate stability test. int process_time_ms = 0; for (; process_time_ms < kStableConvergenceMs; process_time_ms += 10) { - EXPECT_EQ(0, WebRtcAec_Process(handle_, near_, NULL, out_, NULL, - samples_per_frame_, kDeviceBufMs, 0)); + EXPECT_EQ(0, + WebRtcAec_Process(handle_, + near_, + NULL, + out_, + NULL, + samples_per_frame_, + kDeviceBufMs, + 0)); } // Verify that a buffer size has been established. EXPECT_EQ(0, self_->checkBuffSize); @@ -301,8 +313,15 @@ TEST_F(SystemDelayTest, CorrectDelayWhenBufferUnderrun) { // |kStableConvergenceMs| in the buffer. Keep on calling Process() until // we run out of data and verify that the system delay is non-negative. for (int j = 0; j <= kStableConvergenceMs; j += 10) { - EXPECT_EQ(0, WebRtcAec_Process(handle_, near_, NULL, out_, NULL, - samples_per_frame_, kDeviceBufMs, 0)); + EXPECT_EQ(0, + WebRtcAec_Process(handle_, + near_, + NULL, + out_, + NULL, + samples_per_frame_, + kDeviceBufMs, + 0)); EXPECT_LE(0, WebRtcAec_system_delay(self_->aec)); } }