clang-format audio_processing/aec/*
TBR=bjornv TESTED=trybots Review URL: https://webrtc-codereview.appspot.com/2373004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@4944 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
d241718e17
commit
13b2d46593
@ -37,7 +37,8 @@ static const int countLen = 50;
|
||||
|
||||
// Quantities to control H band scaling for SWB input
|
||||
static const int flagHbandCn = 1; // flag for adding comfort noise in H band
|
||||
static const float cnScaleHband = (float)0.4; // scale for comfort noise in H band
|
||||
static const float cnScaleHband =
|
||||
(float)0.4; // scale for comfort noise in H band
|
||||
// Initial bin for averaging nlp gain in low band
|
||||
static const int freqAvgIc = PART_LEN / 2;
|
||||
|
||||
@ -45,78 +46,68 @@ static const int freqAvgIc = PART_LEN / 2;
|
||||
// win = sqrt(hanning(63)); win = [0 ; win(1:32)];
|
||||
// fprintf(1, '\t%.14f, %.14f, %.14f,\n', win);
|
||||
static const float sqrtHanning[65] = {
|
||||
0.00000000000000f, 0.02454122852291f, 0.04906767432742f,
|
||||
0.07356456359967f, 0.09801714032956f, 0.12241067519922f,
|
||||
0.14673047445536f, 0.17096188876030f, 0.19509032201613f,
|
||||
0.21910124015687f, 0.24298017990326f, 0.26671275747490f,
|
||||
0.29028467725446f, 0.31368174039889f, 0.33688985339222f,
|
||||
0.35989503653499f, 0.38268343236509f, 0.40524131400499f,
|
||||
0.42755509343028f, 0.44961132965461f, 0.47139673682600f,
|
||||
0.49289819222978f, 0.51410274419322f, 0.53499761988710f,
|
||||
0.55557023301960f, 0.57580819141785f, 0.59569930449243f,
|
||||
0.61523159058063f, 0.63439328416365f, 0.65317284295378f,
|
||||
0.67155895484702f, 0.68954054473707f, 0.70710678118655f,
|
||||
0.72424708295147f, 0.74095112535496f, 0.75720884650648f,
|
||||
0.77301045336274f, 0.78834642762661f, 0.80320753148064f,
|
||||
0.81758481315158f, 0.83146961230255f, 0.84485356524971f,
|
||||
0.85772861000027f, 0.87008699110871f, 0.88192126434835f,
|
||||
0.89322430119552f, 0.90398929312344f, 0.91420975570353f,
|
||||
0.92387953251129f, 0.93299279883474f, 0.94154406518302f,
|
||||
0.94952818059304f, 0.95694033573221f, 0.96377606579544f,
|
||||
0.97003125319454f, 0.97570213003853f, 0.98078528040323f,
|
||||
0.98527764238894f, 0.98917650996478f, 0.99247953459871f,
|
||||
0.99518472667220f, 0.99729045667869f, 0.99879545620517f,
|
||||
0.99969881869620f, 1.00000000000000f
|
||||
};
|
||||
0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f,
|
||||
0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f,
|
||||
0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f,
|
||||
0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f,
|
||||
0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f,
|
||||
0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f,
|
||||
0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f,
|
||||
0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f,
|
||||
0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f,
|
||||
0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f,
|
||||
0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f,
|
||||
0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f,
|
||||
0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f,
|
||||
0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f,
|
||||
0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f,
|
||||
0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f,
|
||||
1.00000000000000f};
|
||||
|
||||
// Matlab code to produce table:
|
||||
// weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1];
|
||||
// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve);
|
||||
const float WebRtcAec_weightCurve[65] = {
|
||||
0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f,
|
||||
0.1845f, 0.1926f, 0.2000f, 0.2069f, 0.2134f, 0.2195f,
|
||||
0.2254f, 0.2309f, 0.2363f, 0.2414f, 0.2464f, 0.2512f,
|
||||
0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f,
|
||||
0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f,
|
||||
0.3035f, 0.3070f, 0.3104f, 0.3138f, 0.3171f, 0.3204f,
|
||||
0.3236f, 0.3268f, 0.3299f, 0.3330f, 0.3360f, 0.3390f,
|
||||
0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f,
|
||||
0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f,
|
||||
0.3752f, 0.3777f, 0.3803f, 0.3828f, 0.3854f, 0.3878f,
|
||||
0.3903f, 0.3928f, 0.3952f, 0.3976f, 0.4000f
|
||||
};
|
||||
0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f, 0.1845f, 0.1926f,
|
||||
0.2000f, 0.2069f, 0.2134f, 0.2195f, 0.2254f, 0.2309f, 0.2363f, 0.2414f,
|
||||
0.2464f, 0.2512f, 0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f,
|
||||
0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f, 0.3035f, 0.3070f,
|
||||
0.3104f, 0.3138f, 0.3171f, 0.3204f, 0.3236f, 0.3268f, 0.3299f, 0.3330f,
|
||||
0.3360f, 0.3390f, 0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f,
|
||||
0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f, 0.3752f, 0.3777f,
|
||||
0.3803f, 0.3828f, 0.3854f, 0.3878f, 0.3903f, 0.3928f, 0.3952f, 0.3976f,
|
||||
0.4000f};
|
||||
|
||||
// Matlab code to produce table:
|
||||
// overDriveCurve = [sqrt(linspace(0,1,65))' + 1];
|
||||
// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve);
|
||||
const float WebRtcAec_overDriveCurve[65] = {
|
||||
1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f,
|
||||
1.3062f, 1.3307f, 1.3536f, 1.3750f, 1.3953f, 1.4146f,
|
||||
1.4330f, 1.4507f, 1.4677f, 1.4841f, 1.5000f, 1.5154f,
|
||||
1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f,
|
||||
1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f,
|
||||
1.6847f, 1.6960f, 1.7071f, 1.7181f, 1.7289f, 1.7395f,
|
||||
1.7500f, 1.7603f, 1.7706f, 1.7806f, 1.7906f, 1.8004f,
|
||||
1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f,
|
||||
1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f,
|
||||
1.9186f, 1.9270f, 1.9354f, 1.9437f, 1.9520f, 1.9601f,
|
||||
1.9682f, 1.9763f, 1.9843f, 1.9922f, 2.0000f
|
||||
};
|
||||
1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f, 1.3062f, 1.3307f,
|
||||
1.3536f, 1.3750f, 1.3953f, 1.4146f, 1.4330f, 1.4507f, 1.4677f, 1.4841f,
|
||||
1.5000f, 1.5154f, 1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f,
|
||||
1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f, 1.6847f, 1.6960f,
|
||||
1.7071f, 1.7181f, 1.7289f, 1.7395f, 1.7500f, 1.7603f, 1.7706f, 1.7806f,
|
||||
1.7906f, 1.8004f, 1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f,
|
||||
1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f, 1.9186f, 1.9270f,
|
||||
1.9354f, 1.9437f, 1.9520f, 1.9601f, 1.9682f, 1.9763f, 1.9843f, 1.9922f,
|
||||
2.0000f};
|
||||
|
||||
// Target suppression levels for nlp modes.
|
||||
// log{0.001, 0.00001, 0.00000001}
|
||||
static const float kTargetSupp[3] = { -6.9f, -11.5f, -18.4f };
|
||||
static const float kTargetSupp[3] = {-6.9f, -11.5f, -18.4f};
|
||||
|
||||
// Two sets of parameters, one for the extended filter mode.
|
||||
static const float kExtendedMinOverDrive[3] = { 3.0f, 6.0f, 15.0f };
|
||||
static const float kNormalMinOverDrive[3] = { 1.0f, 2.0f, 5.0f };
|
||||
static const float kExtendedSmoothingCoefficients[2][2] =
|
||||
{ { 0.9f, 0.1f }, { 0.92f, 0.08f } };
|
||||
static const float kNormalSmoothingCoefficients[2][2] =
|
||||
{ { 0.9f, 0.1f }, { 0.93f, 0.07f } };
|
||||
static const float kExtendedMinOverDrive[3] = {3.0f, 6.0f, 15.0f};
|
||||
static const float kNormalMinOverDrive[3] = {1.0f, 2.0f, 5.0f};
|
||||
static const float kExtendedSmoothingCoefficients[2][2] = {{0.9f, 0.1f},
|
||||
{0.92f, 0.08f}};
|
||||
static const float kNormalSmoothingCoefficients[2][2] = {{0.9f, 0.1f},
|
||||
{0.93f, 0.07f}};
|
||||
|
||||
// Number of partitions forming the NLP's "preferred" bands.
|
||||
enum { kPrefBandSize = 24 };
|
||||
enum {
|
||||
kPrefBandSize = 24
|
||||
};
|
||||
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
extern int webrtc_aec_instance_count;
|
||||
@ -125,14 +116,16 @@ extern int webrtc_aec_instance_count;
|
||||
// "Private" function prototypes.
|
||||
static void ProcessBlock(AecCore* aec);
|
||||
|
||||
static void NonLinearProcessing(AecCore* aec, short *output, short *outputH);
|
||||
static void NonLinearProcessing(AecCore* aec, short* output, short* outputH);
|
||||
|
||||
static void GetHighbandGain(const float *lambda, float *nlpGainHband);
|
||||
static void GetHighbandGain(const float* lambda, float* nlpGainHband);
|
||||
|
||||
// Comfort_noise also computes noise for H band returned in comfortNoiseHband
|
||||
static void ComfortNoise(AecCore* aec, float efw[2][PART_LEN1],
|
||||
complex_t *comfortNoiseHband,
|
||||
const float *noisePow, const float *lambda);
|
||||
static void ComfortNoise(AecCore* aec,
|
||||
float efw[2][PART_LEN1],
|
||||
complex_t* comfortNoiseHband,
|
||||
const float* noisePow,
|
||||
const float* lambda);
|
||||
|
||||
static void InitLevel(PowerLevel* level);
|
||||
static void InitStats(Stats* stats);
|
||||
@ -145,58 +138,50 @@ static void TimeToFrequency(float time_data[PART_LEN2],
|
||||
float freq_data[2][PART_LEN1],
|
||||
int window);
|
||||
|
||||
__inline static float MulRe(float aRe, float aIm, float bRe, float bIm)
|
||||
{
|
||||
__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
|
||||
return aRe * bRe - aIm * bIm;
|
||||
}
|
||||
|
||||
__inline static float MulIm(float aRe, float aIm, float bRe, float bIm)
|
||||
{
|
||||
__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
|
||||
return aRe * bIm + aIm * bRe;
|
||||
}
|
||||
|
||||
static int CmpFloat(const void *a, const void *b)
|
||||
{
|
||||
const float *da = (const float *)a;
|
||||
const float *db = (const float *)b;
|
||||
static int CmpFloat(const void* a, const void* b) {
|
||||
const float* da = (const float*)a;
|
||||
const float* db = (const float*)b;
|
||||
|
||||
return (*da > *db) - (*da < *db);
|
||||
}
|
||||
|
||||
int WebRtcAec_CreateAec(AecCore** aecInst)
|
||||
{
|
||||
int WebRtcAec_CreateAec(AecCore** aecInst) {
|
||||
AecCore* aec = malloc(sizeof(AecCore));
|
||||
*aecInst = aec;
|
||||
if (aec == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
|
||||
sizeof(int16_t));
|
||||
aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t));
|
||||
if (!aec->nearFrBuf) {
|
||||
WebRtcAec_FreeAec(aec);
|
||||
aec = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
|
||||
sizeof(int16_t));
|
||||
aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t));
|
||||
if (!aec->outFrBuf) {
|
||||
WebRtcAec_FreeAec(aec);
|
||||
aec = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
aec->nearFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
|
||||
sizeof(int16_t));
|
||||
aec->nearFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t));
|
||||
if (!aec->nearFrBufH) {
|
||||
WebRtcAec_FreeAec(aec);
|
||||
aec = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
aec->outFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
|
||||
sizeof(int16_t));
|
||||
aec->outFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t));
|
||||
if (!aec->outFrBufH) {
|
||||
WebRtcAec_FreeAec(aec);
|
||||
aec = NULL;
|
||||
@ -204,23 +189,23 @@ int WebRtcAec_CreateAec(AecCore** aecInst)
|
||||
}
|
||||
|
||||
// Create far-end buffers.
|
||||
aec->far_buf = WebRtc_CreateBuffer(kBufSizePartitions,
|
||||
sizeof(float) * 2 * PART_LEN1);
|
||||
aec->far_buf =
|
||||
WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1);
|
||||
if (!aec->far_buf) {
|
||||
WebRtcAec_FreeAec(aec);
|
||||
aec = NULL;
|
||||
return -1;
|
||||
}
|
||||
aec->far_buf_windowed = WebRtc_CreateBuffer(kBufSizePartitions,
|
||||
sizeof(float) * 2 * PART_LEN1);
|
||||
aec->far_buf_windowed =
|
||||
WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1);
|
||||
if (!aec->far_buf_windowed) {
|
||||
WebRtcAec_FreeAec(aec);
|
||||
aec = NULL;
|
||||
return -1;
|
||||
}
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
aec->far_time_buf = WebRtc_CreateBuffer(kBufSizePartitions,
|
||||
sizeof(int16_t) * PART_LEN);
|
||||
aec->far_time_buf =
|
||||
WebRtc_CreateBuffer(kBufSizePartitions, sizeof(int16_t) * PART_LEN);
|
||||
if (!aec->far_time_buf) {
|
||||
WebRtcAec_FreeAec(aec);
|
||||
aec = NULL;
|
||||
@ -245,9 +230,8 @@ int WebRtcAec_CreateAec(AecCore** aecInst)
|
||||
aec = NULL;
|
||||
return -1;
|
||||
}
|
||||
aec->delay_estimator =
|
||||
WebRtc_CreateDelayEstimator(aec->delay_estimator_farend,
|
||||
kLookaheadBlocks);
|
||||
aec->delay_estimator = WebRtc_CreateDelayEstimator(
|
||||
aec->delay_estimator_farend, kLookaheadBlocks);
|
||||
if (aec->delay_estimator == NULL) {
|
||||
WebRtcAec_FreeAec(aec);
|
||||
aec = NULL;
|
||||
@ -257,8 +241,7 @@ int WebRtcAec_CreateAec(AecCore** aecInst)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebRtcAec_FreeAec(AecCore* aec)
|
||||
{
|
||||
int WebRtcAec_FreeAec(AecCore* aec) {
|
||||
if (aec == NULL) {
|
||||
return -1;
|
||||
}
|
||||
@ -285,8 +268,7 @@ int WebRtcAec_FreeAec(AecCore* aec)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void FilterFar(AecCore* aec, float yf[2][PART_LEN1])
|
||||
{
|
||||
static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) {
|
||||
int i;
|
||||
for (i = 0; i < aec->num_partitions; i++) {
|
||||
int j;
|
||||
@ -294,23 +276,27 @@ static void FilterFar(AecCore* aec, float yf[2][PART_LEN1])
|
||||
int pos = i * PART_LEN1;
|
||||
// Check for wrap
|
||||
if (i + aec->xfBufBlockPos >= aec->num_partitions) {
|
||||
xPos -= aec->num_partitions*(PART_LEN1);
|
||||
xPos -= aec->num_partitions * (PART_LEN1);
|
||||
}
|
||||
|
||||
for (j = 0; j < PART_LEN1; j++) {
|
||||
yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
|
||||
aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
|
||||
yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
|
||||
aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
|
||||
yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
|
||||
aec->xfBuf[1][xPos + j],
|
||||
aec->wfBuf[0][pos + j],
|
||||
aec->wfBuf[1][pos + j]);
|
||||
yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
|
||||
aec->xfBuf[1][xPos + j],
|
||||
aec->wfBuf[0][pos + j],
|
||||
aec->wfBuf[1][pos + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1])
|
||||
{
|
||||
static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) {
|
||||
const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
|
||||
const float error_threshold = aec->extended_filter_enabled ?
|
||||
kExtendedErrorThreshold : aec->normal_error_threshold;
|
||||
const float error_threshold = aec->extended_filter_enabled
|
||||
? kExtendedErrorThreshold
|
||||
: aec->normal_error_threshold;
|
||||
int i;
|
||||
float abs_ef;
|
||||
for (i = 0; i < (PART_LEN1); i++) {
|
||||
@ -332,7 +318,7 @@ static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1])
|
||||
|
||||
// Time-unconstrined filter adaptation.
|
||||
// TODO(andrew): consider for a low-complexity mode.
|
||||
//static void FilterAdaptationUnconstrained(AecCore* aec, float *fft,
|
||||
// static void FilterAdaptationUnconstrained(AecCore* aec, float *fft,
|
||||
// float ef[2][PART_LEN1]) {
|
||||
// int i, j;
|
||||
// for (i = 0; i < aec->num_partitions; i++) {
|
||||
@ -356,10 +342,10 @@ static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1])
|
||||
// }
|
||||
//}
|
||||
|
||||
static void FilterAdaptation(AecCore* aec, float *fft, float ef[2][PART_LEN1]) {
|
||||
static void FilterAdaptation(AecCore* aec, float* fft, float ef[2][PART_LEN1]) {
|
||||
int i, j;
|
||||
for (i = 0; i < aec->num_partitions; i++) {
|
||||
int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
|
||||
int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);
|
||||
int pos;
|
||||
// Check for wrap
|
||||
if (i + aec->xfBufBlockPos >= aec->num_partitions) {
|
||||
@ -372,14 +358,17 @@ static void FilterAdaptation(AecCore* aec, float *fft, float ef[2][PART_LEN1]) {
|
||||
|
||||
fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j],
|
||||
-aec->xfBuf[1][xPos + j],
|
||||
ef[0][j], ef[1][j]);
|
||||
ef[0][j],
|
||||
ef[1][j]);
|
||||
fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j],
|
||||
-aec->xfBuf[1][xPos + j],
|
||||
ef[0][j], ef[1][j]);
|
||||
ef[0][j],
|
||||
ef[1][j]);
|
||||
}
|
||||
fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
|
||||
-aec->xfBuf[1][xPos + PART_LEN],
|
||||
ef[0][PART_LEN], ef[1][PART_LEN]);
|
||||
ef[0][PART_LEN],
|
||||
ef[1][PART_LEN]);
|
||||
|
||||
aec_rdft_inverse_128(fft);
|
||||
memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
|
||||
@ -403,7 +392,8 @@ static void FilterAdaptation(AecCore* aec, float *fft, float ef[2][PART_LEN1]) {
|
||||
}
|
||||
}
|
||||
|
||||
static void OverdriveAndSuppress(AecCore* aec, float hNl[PART_LEN1],
|
||||
static void OverdriveAndSuppress(AecCore* aec,
|
||||
float hNl[PART_LEN1],
|
||||
const float hNlFb,
|
||||
float efw[2][PART_LEN1]) {
|
||||
int i;
|
||||
@ -430,8 +420,7 @@ WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal;
|
||||
WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation;
|
||||
WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress;
|
||||
|
||||
int WebRtcAec_InitAec(AecCore* aec, int sampFreq)
|
||||
{
|
||||
int WebRtcAec_InitAec(AecCore* aec, int sampFreq) {
|
||||
int i;
|
||||
|
||||
aec->sampFreq = sampFreq;
|
||||
@ -439,8 +428,7 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq)
|
||||
if (sampFreq == 8000) {
|
||||
aec->normal_mu = 0.6f;
|
||||
aec->normal_error_threshold = 2e-6f;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
aec->normal_mu = 0.5f;
|
||||
aec->normal_error_threshold = 1.5e-6f;
|
||||
}
|
||||
@ -494,8 +482,7 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq)
|
||||
// SWB is processed as 160 frame size
|
||||
if (aec->sampFreq == 32000) {
|
||||
aec->mult = (short)aec->sampFreq / 16000;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
aec->mult = (short)aec->sampFreq / 8000;
|
||||
}
|
||||
|
||||
@ -527,14 +514,12 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq)
|
||||
aec->xfBufBlockPos = 0;
|
||||
// TODO: Investigate need for these initializations. Deleting them doesn't
|
||||
// change the output at all and yields 0.4% overall speedup.
|
||||
memset(aec->xfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions *
|
||||
PART_LEN1);
|
||||
memset(aec->wfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions *
|
||||
PART_LEN1);
|
||||
memset(aec->xfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
|
||||
memset(aec->wfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
|
||||
memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1);
|
||||
memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1);
|
||||
memset(aec->xfwBuf, 0, sizeof(complex_t) * kExtendedNumPartitions *
|
||||
PART_LEN1);
|
||||
memset(
|
||||
aec->xfwBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
|
||||
memset(aec->se, 0, sizeof(float) * PART_LEN1);
|
||||
|
||||
// To prevent numerical instability in the first block.
|
||||
@ -680,7 +665,7 @@ void WebRtcAec_ProcessFrame(AecCore* aec,
|
||||
// 6) Update output frame.
|
||||
// Stuff the out buffer if we have less than a frame to output.
|
||||
// This should only happen for the first frame.
|
||||
out_elements = (int) WebRtc_available_read(aec->outFrBuf);
|
||||
out_elements = (int)WebRtc_available_read(aec->outFrBuf);
|
||||
if (out_elements < FRAME_LEN) {
|
||||
WebRtc_MoveReadPtr(aec->outFrBuf, out_elements - FRAME_LEN);
|
||||
if (aec->sampFreq == 32000) {
|
||||
@ -739,9 +724,9 @@ int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std) {
|
||||
|
||||
// Calculate the L1 norm, with median value as central moment.
|
||||
for (i = 0; i < kHistorySizeBlocks; i++) {
|
||||
l1_norm += (float) (fabs(i - my_median) * self->delay_histogram[i]);
|
||||
l1_norm += (float)(fabs(i - my_median) * self->delay_histogram[i]);
|
||||
}
|
||||
*std = (int) (l1_norm / (float) num_delay_values + 0.5f) * kMsPerBlock;
|
||||
*std = (int)(l1_norm / (float)num_delay_values + 0.5f) * kMsPerBlock;
|
||||
|
||||
// Reset histogram.
|
||||
memset(self->delay_histogram, 0, sizeof(self->delay_histogram));
|
||||
@ -749,11 +734,11 @@ int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebRtcAec_echo_state(AecCore* self) {
|
||||
return self->echoState;
|
||||
}
|
||||
int WebRtcAec_echo_state(AecCore* self) { return self->echoState; }
|
||||
|
||||
void WebRtcAec_GetEchoStats(AecCore* self, Stats* erl, Stats* erle,
|
||||
void WebRtcAec_GetEchoStats(AecCore* self,
|
||||
Stats* erl,
|
||||
Stats* erle,
|
||||
Stats* a_nlp) {
|
||||
assert(erl != NULL);
|
||||
assert(erle != NULL);
|
||||
@ -764,12 +749,12 @@ void WebRtcAec_GetEchoStats(AecCore* self, Stats* erl, Stats* erle,
|
||||
}
|
||||
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
void* WebRtcAec_far_time_buf(AecCore* self) {
|
||||
return self->far_time_buf;
|
||||
}
|
||||
void* WebRtcAec_far_time_buf(AecCore* self) { return self->far_time_buf; }
|
||||
#endif
|
||||
|
||||
void WebRtcAec_SetConfigCore(AecCore* self, int nlp_mode, int metrics_mode,
|
||||
void WebRtcAec_SetConfigCore(AecCore* self,
|
||||
int nlp_mode,
|
||||
int metrics_mode,
|
||||
int delay_logging) {
|
||||
assert(nlp_mode >= 0 && nlp_mode < 3);
|
||||
self->nlp_mode = nlp_mode;
|
||||
@ -792,9 +777,7 @@ int WebRtcAec_delay_correction_enabled(AecCore* self) {
|
||||
return self->extended_filter_enabled;
|
||||
}
|
||||
|
||||
int WebRtcAec_system_delay(AecCore* self) {
|
||||
return self->system_delay;
|
||||
}
|
||||
int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; }
|
||||
|
||||
void WebRtcAec_SetSystemDelay(AecCore* self, int delay) {
|
||||
assert(delay >= 0);
|
||||
@ -832,21 +815,18 @@ static void ProcessBlock(AecCore* aec) {
|
||||
memset(dH, 0, sizeof(dH));
|
||||
if (aec->sampFreq == 32000) {
|
||||
// Get the upper band first so we can reuse |nearend|.
|
||||
WebRtc_ReadBuffer(aec->nearFrBufH,
|
||||
(void**) &nearend_ptr,
|
||||
nearend,
|
||||
PART_LEN);
|
||||
WebRtc_ReadBuffer(aec->nearFrBufH, (void**)&nearend_ptr, nearend, PART_LEN);
|
||||
for (i = 0; i < PART_LEN; i++) {
|
||||
dH[i] = (float) (nearend_ptr[i]);
|
||||
dH[i] = (float)(nearend_ptr[i]);
|
||||
}
|
||||
memcpy(aec->dBufH + PART_LEN, dH, sizeof(float) * PART_LEN);
|
||||
}
|
||||
WebRtc_ReadBuffer(aec->nearFrBuf, (void**) &nearend_ptr, nearend, PART_LEN);
|
||||
WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN);
|
||||
|
||||
// ---------- Ooura fft ----------
|
||||
// Concatenate old and new nearend blocks.
|
||||
for (i = 0; i < PART_LEN; i++) {
|
||||
d[i] = (float) (nearend_ptr[i]);
|
||||
d[i] = (float)(nearend_ptr[i]);
|
||||
}
|
||||
memcpy(aec->dBuf + PART_LEN, d, sizeof(float) * PART_LEN);
|
||||
|
||||
@ -854,7 +834,7 @@ static void ProcessBlock(AecCore* aec) {
|
||||
{
|
||||
int16_t farend[PART_LEN];
|
||||
int16_t* farend_ptr = NULL;
|
||||
WebRtc_ReadBuffer(aec->far_time_buf, (void**) &farend_ptr, farend, 1);
|
||||
WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1);
|
||||
(void)fwrite(farend_ptr, sizeof(int16_t), PART_LEN, aec->farFile);
|
||||
(void)fwrite(nearend_ptr, sizeof(int16_t), PART_LEN, aec->nearFile);
|
||||
}
|
||||
@ -862,7 +842,7 @@ static void ProcessBlock(AecCore* aec) {
|
||||
|
||||
// We should always have at least one element stored in |far_buf|.
|
||||
assert(WebRtc_available_read(aec->far_buf) > 0);
|
||||
WebRtc_ReadBuffer(aec->far_buf, (void**) &xf_ptr, &xf[0][0], 1);
|
||||
WebRtc_ReadBuffer(aec->far_buf, (void**)&xf_ptr, &xf[0][0], 1);
|
||||
|
||||
// Near fft
|
||||
memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
|
||||
@ -872,8 +852,8 @@ static void ProcessBlock(AecCore* aec) {
|
||||
for (i = 0; i < PART_LEN1; i++) {
|
||||
far_spectrum = (xf_ptr[i] * xf_ptr[i]) +
|
||||
(xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]);
|
||||
aec->xPow[i] = gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions *
|
||||
far_spectrum;
|
||||
aec->xPow[i] =
|
||||
gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum;
|
||||
// Calculate absolute spectra
|
||||
abs_far_spectrum[i] = sqrtf(far_spectrum);
|
||||
|
||||
@ -887,10 +867,9 @@ static void ProcessBlock(AecCore* aec) {
|
||||
if (aec->noiseEstCtr > 50) {
|
||||
for (i = 0; i < PART_LEN1; i++) {
|
||||
if (aec->dPow[i] < aec->dMinPow[i]) {
|
||||
aec->dMinPow[i] = (aec->dPow[i] + step * (aec->dMinPow[i] -
|
||||
aec->dPow[i])) * ramp;
|
||||
}
|
||||
else {
|
||||
aec->dMinPow[i] =
|
||||
(aec->dPow[i] + step * (aec->dMinPow[i] - aec->dPow[i])) * ramp;
|
||||
} else {
|
||||
aec->dMinPow[i] *= ramp;
|
||||
}
|
||||
}
|
||||
@ -904,25 +883,22 @@ static void ProcessBlock(AecCore* aec) {
|
||||
if (aec->dMinPow[i] > aec->dInitMinPow[i]) {
|
||||
aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] +
|
||||
gInitNoise[1] * aec->dMinPow[i];
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
aec->dInitMinPow[i] = aec->dMinPow[i];
|
||||
}
|
||||
}
|
||||
aec->noisePow = aec->dInitMinPow;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
aec->noisePow = aec->dMinPow;
|
||||
}
|
||||
|
||||
// Block wise delay estimation used for logging
|
||||
if (aec->delay_logging_enabled) {
|
||||
int delay_estimate = 0;
|
||||
if (WebRtc_AddFarSpectrumFloat(aec->delay_estimator_farend,
|
||||
abs_far_spectrum, PART_LEN1) == 0) {
|
||||
delay_estimate = WebRtc_DelayEstimatorProcessFloat(aec->delay_estimator,
|
||||
abs_near_spectrum,
|
||||
PART_LEN1);
|
||||
if (WebRtc_AddFarSpectrumFloat(
|
||||
aec->delay_estimator_farend, abs_far_spectrum, PART_LEN1) == 0) {
|
||||
delay_estimate = WebRtc_DelayEstimatorProcessFloat(
|
||||
aec->delay_estimator, abs_near_spectrum, PART_LEN1);
|
||||
if (delay_estimate >= 0) {
|
||||
// Update delay estimate buffer.
|
||||
aec->delay_histogram[delay_estimate]++;
|
||||
@ -937,9 +913,11 @@ static void ProcessBlock(AecCore* aec) {
|
||||
}
|
||||
|
||||
// Buffer xf
|
||||
memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1, xf_ptr,
|
||||
memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1,
|
||||
xf_ptr,
|
||||
sizeof(float) * PART_LEN1);
|
||||
memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1, &xf_ptr[PART_LEN1],
|
||||
memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1,
|
||||
&xf_ptr[PART_LEN1],
|
||||
sizeof(float) * PART_LEN1);
|
||||
|
||||
memset(yf, 0, sizeof(yf));
|
||||
@ -995,7 +973,7 @@ static void ProcessBlock(AecCore* aec) {
|
||||
|
||||
if (aec->metricsMode == 1) {
|
||||
// Update power levels and echo metrics
|
||||
UpdateLevel(&aec->farlevel, (float (*)[PART_LEN1]) xf_ptr);
|
||||
UpdateLevel(&aec->farlevel, (float(*)[PART_LEN1])xf_ptr);
|
||||
UpdateLevel(&aec->nearlevel, df);
|
||||
UpdateMetrics(aec);
|
||||
}
|
||||
@ -1011,8 +989,8 @@ static void ProcessBlock(AecCore* aec) {
|
||||
{
|
||||
int16_t eInt16[PART_LEN];
|
||||
for (i = 0; i < PART_LEN; i++) {
|
||||
eInt16[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, e[i],
|
||||
WEBRTC_SPL_WORD16_MIN);
|
||||
eInt16[i] = (int16_t)WEBRTC_SPL_SAT(
|
||||
WEBRTC_SPL_WORD16_MAX, e[i], WEBRTC_SPL_WORD16_MIN);
|
||||
}
|
||||
|
||||
(void)fwrite(eInt16, sizeof(int16_t), PART_LEN, aec->outLinearFile);
|
||||
@ -1021,8 +999,7 @@ static void ProcessBlock(AecCore* aec) {
|
||||
#endif
|
||||
}
|
||||
|
||||
static void NonLinearProcessing(AecCore* aec, short *output, short *outputH)
|
||||
{
|
||||
static void NonLinearProcessing(AecCore* aec, short* output, short* outputH) {
|
||||
float efw[2][PART_LEN1], dfw[2][PART_LEN1], xfw[2][PART_LEN1];
|
||||
complex_t comfortNoiseHband[PART_LEN1];
|
||||
float fft[PART_LEN2];
|
||||
@ -1044,11 +1021,12 @@ static void NonLinearProcessing(AecCore* aec, short *output, short *outputH)
|
||||
float sdSum = 0, seSum = 0;
|
||||
|
||||
// Power estimate smoothing coefficients.
|
||||
const float *ptrGCoh = aec->extended_filter_enabled ?
|
||||
kExtendedSmoothingCoefficients[aec->mult - 1] :
|
||||
kNormalSmoothingCoefficients[aec->mult - 1];
|
||||
const float* min_overdrive = aec->extended_filter_enabled ?
|
||||
kExtendedMinOverDrive : kNormalMinOverDrive;
|
||||
const float* ptrGCoh = aec->extended_filter_enabled
|
||||
? kExtendedSmoothingCoefficients[aec->mult - 1]
|
||||
: kNormalSmoothingCoefficients[aec->mult - 1];
|
||||
const float* min_overdrive = aec->extended_filter_enabled
|
||||
? kExtendedMinOverDrive
|
||||
: kNormalMinOverDrive;
|
||||
|
||||
// Filter energy
|
||||
float wfEnMax = 0, wfEn = 0;
|
||||
@ -1089,7 +1067,7 @@ static void NonLinearProcessing(AecCore* aec, short *output, short *outputH)
|
||||
// We should always have at least one element stored in |far_buf|.
|
||||
assert(WebRtc_available_read(aec->far_buf_windowed) > 0);
|
||||
// NLP
|
||||
WebRtc_ReadBuffer(aec->far_buf_windowed, (void**) &xfw_ptr, &xfw[0][0], 1);
|
||||
WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1);
|
||||
|
||||
// TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of
|
||||
// |xfwBuf|.
|
||||
@ -1132,26 +1110,32 @@ static void NonLinearProcessing(AecCore* aec, short *output, short *outputH)
|
||||
|
||||
// Smoothed PSD
|
||||
for (i = 0; i < PART_LEN1; i++) {
|
||||
aec->sd[i] = ptrGCoh[0] * aec->sd[i] + ptrGCoh[1] *
|
||||
(dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
|
||||
aec->se[i] = ptrGCoh[0] * aec->se[i] + ptrGCoh[1] *
|
||||
(efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
|
||||
aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
|
||||
ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
|
||||
aec->se[i] = ptrGCoh[0] * aec->se[i] +
|
||||
ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
|
||||
// We threshold here to protect against the ill-effects of a zero farend.
|
||||
// The threshold is not arbitrarily chosen, but balances protection and
|
||||
// adverse interaction with the algorithm's tuning.
|
||||
// TODO: investigate further why this is so sensitive.
|
||||
aec->sx[i] = ptrGCoh[0] * aec->sx[i] + ptrGCoh[1] *
|
||||
aec->sx[i] =
|
||||
ptrGCoh[0] * aec->sx[i] +
|
||||
ptrGCoh[1] *
|
||||
WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], 15);
|
||||
|
||||
aec->sde[i][0] = ptrGCoh[0] * aec->sde[i][0] + ptrGCoh[1] *
|
||||
(dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
|
||||
aec->sde[i][1] = ptrGCoh[0] * aec->sde[i][1] + ptrGCoh[1] *
|
||||
(dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
|
||||
aec->sde[i][0] =
|
||||
ptrGCoh[0] * aec->sde[i][0] +
|
||||
ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
|
||||
aec->sde[i][1] =
|
||||
ptrGCoh[0] * aec->sde[i][1] +
|
||||
ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
|
||||
|
||||
aec->sxd[i][0] = ptrGCoh[0] * aec->sxd[i][0] + ptrGCoh[1] *
|
||||
(dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
|
||||
aec->sxd[i][1] = ptrGCoh[0] * aec->sxd[i][1] + ptrGCoh[1] *
|
||||
(dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
|
||||
aec->sxd[i][0] =
|
||||
ptrGCoh[0] * aec->sxd[i][0] +
|
||||
ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
|
||||
aec->sxd[i][1] =
|
||||
ptrGCoh[0] * aec->sxd[i][1] +
|
||||
ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
|
||||
|
||||
sdSum += aec->sd[i];
|
||||
seSum += aec->se[i];
|
||||
@ -1162,8 +1146,7 @@ static void NonLinearProcessing(AecCore* aec, short *output, short *outputH)
|
||||
if (seSum > sdSum) {
|
||||
aec->divergeState = 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
if (seSum * 1.05f < sdSum) {
|
||||
aec->divergeState = 0;
|
||||
}
|
||||
@ -1180,9 +1163,11 @@ static void NonLinearProcessing(AecCore* aec, short *output, short *outputH)
|
||||
|
||||
// Subband coherence
|
||||
for (i = 0; i < PART_LEN1; i++) {
|
||||
cohde[i] = (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
|
||||
cohde[i] =
|
||||
(aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
|
||||
(aec->sd[i] * aec->se[i] + 1e-10f);
|
||||
cohxd[i] = (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
|
||||
cohxd[i] =
|
||||
(aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
|
||||
(aec->sx[i] * aec->sd[i] + 1e-10f);
|
||||
}
|
||||
|
||||
@ -1205,8 +1190,7 @@ static void NonLinearProcessing(AecCore* aec, short *output, short *outputH)
|
||||
|
||||
if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) {
|
||||
aec->stNearState = 1;
|
||||
}
|
||||
else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) {
|
||||
} else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) {
|
||||
aec->stNearState = 0;
|
||||
}
|
||||
|
||||
@ -1218,24 +1202,21 @@ static void NonLinearProcessing(AecCore* aec, short *output, short *outputH)
|
||||
memcpy(hNl, cohde, sizeof(hNl));
|
||||
hNlFb = hNlDeAvg;
|
||||
hNlFbLow = hNlDeAvg;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
for (i = 0; i < PART_LEN1; i++) {
|
||||
hNl[i] = 1 - cohxd[i];
|
||||
}
|
||||
hNlFb = hNlXdAvg;
|
||||
hNlFbLow = hNlXdAvg;
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
|
||||
if (aec->stNearState == 1) {
|
||||
aec->echoState = 0;
|
||||
memcpy(hNl, cohde, sizeof(hNl));
|
||||
hNlFb = hNlDeAvg;
|
||||
hNlFbLow = hNlDeAvg;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
aec->echoState = 1;
|
||||
for (i = 0; i < PART_LEN1; i++) {
|
||||
hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]);
|
||||
@ -1257,7 +1238,8 @@ static void NonLinearProcessing(AecCore* aec, short *output, short *outputH)
|
||||
aec->hNlNewMin = 1;
|
||||
aec->hNlMinCtr = 0;
|
||||
}
|
||||
aec->hNlFbLocalMin = WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1);
|
||||
aec->hNlFbLocalMin =
|
||||
WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1);
|
||||
aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1);
|
||||
|
||||
if (aec->hNlNewMin == 1) {
|
||||
@ -1266,7 +1248,8 @@ static void NonLinearProcessing(AecCore* aec, short *output, short *outputH)
|
||||
if (aec->hNlMinCtr == 2) {
|
||||
aec->hNlNewMin = 0;
|
||||
aec->hNlMinCtr = 0;
|
||||
aec->overDrive = WEBRTC_SPL_MAX(kTargetSupp[aec->nlp_mode] /
|
||||
aec->overDrive =
|
||||
WEBRTC_SPL_MAX(kTargetSupp[aec->nlp_mode] /
|
||||
((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f),
|
||||
min_overdrive[aec->nlp_mode]);
|
||||
}
|
||||
@ -1274,8 +1257,7 @@ static void NonLinearProcessing(AecCore* aec, short *output, short *outputH)
|
||||
// Smooth the overdrive.
|
||||
if (aec->overDrive < aec->overDriveSm) {
|
||||
aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive;
|
||||
}
|
||||
|
||||
@ -1297,9 +1279,9 @@ static void NonLinearProcessing(AecCore* aec, short *output, short *outputH)
|
||||
fft[0] = efw[0][0];
|
||||
fft[1] = efw[0][PART_LEN];
|
||||
for (i = 1; i < PART_LEN; i++) {
|
||||
fft[2*i] = efw[0][i];
|
||||
fft[2 * i] = efw[0][i];
|
||||
// Sign change required by Ooura fft.
|
||||
fft[2*i + 1] = -efw[1][i];
|
||||
fft[2 * i + 1] = -efw[1][i];
|
||||
}
|
||||
aec_rdft_inverse_128(fft);
|
||||
|
||||
@ -1307,11 +1289,11 @@ static void NonLinearProcessing(AecCore* aec, short *output, short *outputH)
|
||||
scale = 2.0f / PART_LEN2;
|
||||
for (i = 0; i < PART_LEN; i++) {
|
||||
fft[i] *= scale; // fft scaling
|
||||
fft[i] = fft[i]*sqrtHanning[i] + aec->outBuf[i];
|
||||
fft[i] = fft[i] * sqrtHanning[i] + aec->outBuf[i];
|
||||
|
||||
// Saturation protection
|
||||
output[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fft[i],
|
||||
WEBRTC_SPL_WORD16_MIN);
|
||||
output[i] = (short)WEBRTC_SPL_SAT(
|
||||
WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN);
|
||||
|
||||
fft[PART_LEN + i] *= scale; // fft scaling
|
||||
aec->outBuf[i] = fft[PART_LEN + i] * sqrtHanning[PART_LEN - i];
|
||||
@ -1330,8 +1312,8 @@ static void NonLinearProcessing(AecCore* aec, short *output, short *outputH)
|
||||
fft[0] = comfortNoiseHband[0][0];
|
||||
fft[1] = comfortNoiseHband[PART_LEN][0];
|
||||
for (i = 1; i < PART_LEN; i++) {
|
||||
fft[2*i] = comfortNoiseHband[i][0];
|
||||
fft[2*i + 1] = comfortNoiseHband[i][1];
|
||||
fft[2 * i] = comfortNoiseHband[i][0];
|
||||
fft[2 * i + 1] = comfortNoiseHband[i][1];
|
||||
}
|
||||
aec_rdft_inverse_128(fft);
|
||||
scale = 2.0f / PART_LEN2;
|
||||
@ -1349,8 +1331,8 @@ static void NonLinearProcessing(AecCore* aec, short *output, short *outputH)
|
||||
}
|
||||
|
||||
// Saturation protection
|
||||
outputH[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, dtmp,
|
||||
WEBRTC_SPL_WORD16_MIN);
|
||||
outputH[i] = (short)WEBRTC_SPL_SAT(
|
||||
WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1363,12 +1345,12 @@ static void NonLinearProcessing(AecCore* aec, short *output, short *outputH)
|
||||
memcpy(aec->dBufH, aec->dBufH + PART_LEN, sizeof(float) * PART_LEN);
|
||||
}
|
||||
|
||||
memmove(aec->xfwBuf + PART_LEN1, aec->xfwBuf, sizeof(aec->xfwBuf) -
|
||||
sizeof(complex_t) * PART_LEN1);
|
||||
memmove(aec->xfwBuf + PART_LEN1,
|
||||
aec->xfwBuf,
|
||||
sizeof(aec->xfwBuf) - sizeof(complex_t) * PART_LEN1);
|
||||
}
|
||||
|
||||
static void GetHighbandGain(const float *lambda, float *nlpGainHband)
|
||||
{
|
||||
static void GetHighbandGain(const float* lambda, float* nlpGainHband) {
|
||||
int i;
|
||||
|
||||
nlpGainHband[0] = (float)0.0;
|
||||
@ -1378,9 +1360,11 @@ static void GetHighbandGain(const float *lambda, float *nlpGainHband)
|
||||
nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc);
|
||||
}
|
||||
|
||||
static void ComfortNoise(AecCore* aec, float efw[2][PART_LEN1],
|
||||
complex_t *comfortNoiseHband, const float *noisePow, const float *lambda)
|
||||
{
|
||||
static void ComfortNoise(AecCore* aec,
|
||||
float efw[2][PART_LEN1],
|
||||
complex_t* comfortNoiseHband,
|
||||
const float* noisePow,
|
||||
const float* lambda) {
|
||||
int i, num;
|
||||
float rand[PART_LEN];
|
||||
float noise, noiseAvg, tmp, tmpAvg;
|
||||
@ -1410,7 +1394,7 @@ static void ComfortNoise(AecCore* aec, float efw[2][PART_LEN1],
|
||||
for (i = 0; i < PART_LEN1; i++) {
|
||||
// This is the proper weighting to match the background noise power
|
||||
tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
|
||||
//tmp = 1 - lambda[i];
|
||||
// tmp = 1 - lambda[i];
|
||||
efw[0][i] += tmp * u[i][0];
|
||||
efw[1][i] += tmp * u[i][1];
|
||||
}
|
||||
@ -1559,8 +1543,7 @@ static void UpdateLevel(PowerLevel* level, float in[2][PART_LEN1]) {
|
||||
}
|
||||
}
|
||||
|
||||
static void UpdateMetrics(AecCore* aec)
|
||||
{
|
||||
static void UpdateMetrics(AecCore* aec) {
|
||||
float dtmp, dtmp2;
|
||||
|
||||
const float actThresholdNoisy = 8.0f;
|
||||
@ -1579,24 +1562,25 @@ static void UpdateMetrics(AecCore* aec)
|
||||
|
||||
if (aec->farlevel.minlevel < noisyPower) {
|
||||
actThreshold = actThresholdClean;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
actThreshold = actThresholdNoisy;
|
||||
}
|
||||
|
||||
if ((aec->stateCounter > (0.5f * countLen * subCountLen))
|
||||
&& (aec->farlevel.sfrcounter == 0)
|
||||
if ((aec->stateCounter > (0.5f * countLen * subCountLen)) &&
|
||||
(aec->farlevel.sfrcounter == 0)
|
||||
|
||||
// Estimate in active far-end segments only
|
||||
&& (aec->farlevel.averagelevel > (actThreshold * aec->farlevel.minlevel))
|
||||
) {
|
||||
&&
|
||||
(aec->farlevel.averagelevel >
|
||||
(actThreshold * aec->farlevel.minlevel))) {
|
||||
|
||||
// Subtract noise power
|
||||
echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel;
|
||||
|
||||
// ERL
|
||||
dtmp = 10 * (float)log10(aec->farlevel.averagelevel /
|
||||
aec->nearlevel.averagelevel + 1e-10f);
|
||||
aec->nearlevel.averagelevel +
|
||||
1e-10f);
|
||||
dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f);
|
||||
|
||||
aec->erl.instant = dtmp;
|
||||
@ -1621,7 +1605,8 @@ static void UpdateMetrics(AecCore* aec)
|
||||
|
||||
// A_NLP
|
||||
dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
|
||||
(2 * aec->linoutlevel.averagelevel) + 1e-10f);
|
||||
(2 * aec->linoutlevel.averagelevel) +
|
||||
1e-10f);
|
||||
|
||||
// subtract noise power
|
||||
suppressedEcho = 2 * (aec->linoutlevel.averagelevel -
|
||||
@ -1656,7 +1641,8 @@ static void UpdateMetrics(AecCore* aec)
|
||||
safety * aec->nlpoutlevel.minlevel);
|
||||
|
||||
dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
|
||||
(2 * aec->nlpoutlevel.averagelevel) + 1e-10f);
|
||||
(2 * aec->nlpoutlevel.averagelevel) +
|
||||
1e-10f);
|
||||
dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
|
||||
|
||||
dtmp = dtmp2;
|
||||
@ -1709,4 +1695,3 @@ static void TimeToFrequency(float time_data[PART_LEN2],
|
||||
freq_data[1][i] = time_data[2 * i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,9 +23,15 @@
|
||||
#define PART_LEN2 (PART_LEN * 2) // Length of partition * 2
|
||||
|
||||
// Delay estimator constants, used for logging.
|
||||
enum { kMaxDelayBlocks = 60 };
|
||||
enum { kLookaheadBlocks = 15 };
|
||||
enum { kHistorySizeBlocks = kMaxDelayBlocks + kLookaheadBlocks };
|
||||
enum {
|
||||
kMaxDelayBlocks = 60
|
||||
};
|
||||
enum {
|
||||
kLookaheadBlocks = 15
|
||||
};
|
||||
enum {
|
||||
kHistorySizeBlocks = kMaxDelayBlocks + kLookaheadBlocks
|
||||
};
|
||||
|
||||
typedef float complex_t[2];
|
||||
// For performance reasons, some arrays of complex numbers are replaced by twice
|
||||
@ -37,7 +43,9 @@ typedef float complex_t[2];
|
||||
// compile time.
|
||||
|
||||
// Metrics
|
||||
enum { kOffsetLevel = -100 };
|
||||
enum {
|
||||
kOffsetLevel = -100
|
||||
};
|
||||
|
||||
typedef struct Stats {
|
||||
float instant;
|
||||
@ -79,14 +87,18 @@ int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std);
|
||||
int WebRtcAec_echo_state(AecCore* self);
|
||||
|
||||
// Gets statistics of the echo metrics ERL, ERLE, A_NLP.
|
||||
void WebRtcAec_GetEchoStats(AecCore* self, Stats* erl, Stats* erle,
|
||||
void WebRtcAec_GetEchoStats(AecCore* self,
|
||||
Stats* erl,
|
||||
Stats* erle,
|
||||
Stats* a_nlp);
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
void* WebRtcAec_far_time_buf(AecCore* self);
|
||||
#endif
|
||||
|
||||
// Sets local configuration modes.
|
||||
void WebRtcAec_SetConfigCore(AecCore* self, int nlp_mode, int metrics_mode,
|
||||
void WebRtcAec_SetConfigCore(AecCore* self,
|
||||
int nlp_mode,
|
||||
int metrics_mode,
|
||||
int delay_logging);
|
||||
|
||||
// We now interpret delay correction to mean an extended filter length feature.
|
||||
|
@ -21,7 +21,9 @@
|
||||
|
||||
// Number of partitions for the extended filter mode. The first one is an enum
|
||||
// to be used in array declarations, as it represents the maximum filter length.
|
||||
enum { kExtendedNumPartitions = 32 };
|
||||
enum {
|
||||
kExtendedNumPartitions = 32
|
||||
};
|
||||
static const int kNormalNumPartitions = 12;
|
||||
|
||||
// Extended filter adaptation parameters.
|
||||
@ -61,7 +63,7 @@ struct AecCore {
|
||||
float dPow[PART_LEN1];
|
||||
float dMinPow[PART_LEN1];
|
||||
float dInitMinPow[PART_LEN1];
|
||||
float *noisePow;
|
||||
float* noisePow;
|
||||
|
||||
float xfBuf[2][kExtendedNumPartitions * PART_LEN1]; // farend fft buffer
|
||||
float wfBuf[2][kExtendedNumPartitions * PART_LEN1]; // filter fft
|
||||
@ -127,23 +129,25 @@ struct AecCore {
|
||||
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
RingBuffer* far_time_buf;
|
||||
FILE *farFile;
|
||||
FILE *nearFile;
|
||||
FILE *outFile;
|
||||
FILE *outLinearFile;
|
||||
FILE* farFile;
|
||||
FILE* nearFile;
|
||||
FILE* outFile;
|
||||
FILE* outLinearFile;
|
||||
#endif
|
||||
};
|
||||
|
||||
typedef void (*WebRtcAec_FilterFar_t)(AecCore* aec, float yf[2][PART_LEN1]);
|
||||
extern WebRtcAec_FilterFar_t WebRtcAec_FilterFar;
|
||||
typedef void (*WebRtcAec_ScaleErrorSignal_t)
|
||||
(AecCore* aec, float ef[2][PART_LEN1]);
|
||||
typedef void (*WebRtcAec_ScaleErrorSignal_t)(AecCore* aec,
|
||||
float ef[2][PART_LEN1]);
|
||||
extern WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal;
|
||||
typedef void (*WebRtcAec_FilterAdaptation_t)
|
||||
(AecCore* aec, float *fft, float ef[2][PART_LEN1]);
|
||||
typedef void (*WebRtcAec_FilterAdaptation_t)(AecCore* aec,
|
||||
float* fft,
|
||||
float ef[2][PART_LEN1]);
|
||||
extern WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation;
|
||||
typedef void (*WebRtcAec_OverdriveAndSuppress_t)
|
||||
(AecCore* aec, float hNl[PART_LEN1], const float hNlFb,
|
||||
typedef void (*WebRtcAec_OverdriveAndSuppress_t)(AecCore* aec,
|
||||
float hNl[PART_LEN1],
|
||||
const float hNlFb,
|
||||
float efw[2][PART_LEN1]);
|
||||
extern WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress;
|
||||
|
||||
|
@ -21,18 +21,15 @@
|
||||
#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
|
||||
#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
|
||||
|
||||
__inline static float MulRe(float aRe, float aIm, float bRe, float bIm)
|
||||
{
|
||||
__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
|
||||
return aRe * bRe - aIm * bIm;
|
||||
}
|
||||
|
||||
__inline static float MulIm(float aRe, float aIm, float bRe, float bIm)
|
||||
{
|
||||
__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
|
||||
return aRe * bIm + aIm * bRe;
|
||||
}
|
||||
|
||||
static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1])
|
||||
{
|
||||
static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) {
|
||||
int i;
|
||||
const int num_partitions = aec->num_partitions;
|
||||
for (i = 0; i < num_partitions; i++) {
|
||||
@ -41,7 +38,7 @@ static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1])
|
||||
int pos = i * PART_LEN1;
|
||||
// Check for wrap
|
||||
if (i + aec->xfBufBlockPos >= num_partitions) {
|
||||
xPos -= num_partitions*(PART_LEN1);
|
||||
xPos -= num_partitions * (PART_LEN1);
|
||||
}
|
||||
|
||||
// vectorized code (four at once)
|
||||
@ -65,22 +62,25 @@ static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1])
|
||||
}
|
||||
// scalar code for the remaining items.
|
||||
for (; j < PART_LEN1; j++) {
|
||||
yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
|
||||
aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
|
||||
yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
|
||||
aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
|
||||
yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
|
||||
aec->xfBuf[1][xPos + j],
|
||||
aec->wfBuf[0][pos + j],
|
||||
aec->wfBuf[1][pos + j]);
|
||||
yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
|
||||
aec->xfBuf[1][xPos + j],
|
||||
aec->wfBuf[0][pos + j],
|
||||
aec->wfBuf[1][pos + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1])
|
||||
{
|
||||
static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) {
|
||||
const __m128 k1e_10f = _mm_set1_ps(1e-10f);
|
||||
const __m128 kMu = aec->extended_filter_enabled ?
|
||||
_mm_set1_ps(kExtendedMu) : _mm_set1_ps(aec->normal_mu);
|
||||
const __m128 kThresh = aec->extended_filter_enabled ?
|
||||
_mm_set1_ps(kExtendedErrorThreshold) :
|
||||
_mm_set1_ps(aec->normal_error_threshold);
|
||||
const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
|
||||
: _mm_set1_ps(aec->normal_mu);
|
||||
const __m128 kThresh = aec->extended_filter_enabled
|
||||
? _mm_set1_ps(kExtendedErrorThreshold)
|
||||
: _mm_set1_ps(aec->normal_error_threshold);
|
||||
|
||||
int i;
|
||||
// vectorized code (four at once)
|
||||
@ -115,10 +115,11 @@ static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1])
|
||||
}
|
||||
// scalar code for the remaining items.
|
||||
{
|
||||
const float mu = aec->extended_filter_enabled ?
|
||||
kExtendedMu : aec->normal_mu;
|
||||
const float error_threshold = aec->extended_filter_enabled ?
|
||||
kExtendedErrorThreshold : aec->normal_error_threshold;
|
||||
const float mu =
|
||||
aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
|
||||
const float error_threshold = aec->extended_filter_enabled
|
||||
? kExtendedErrorThreshold
|
||||
: aec->normal_error_threshold;
|
||||
for (; i < (PART_LEN1); i++) {
|
||||
float abs_ef;
|
||||
ef[0][i] /= (aec->xPow[i] + 1e-10f);
|
||||
@ -138,11 +139,13 @@ static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1])
|
||||
}
|
||||
}
|
||||
|
||||
static void FilterAdaptationSSE2(AecCore* aec, float *fft, float ef[2][PART_LEN1]) {
|
||||
static void FilterAdaptationSSE2(AecCore* aec,
|
||||
float* fft,
|
||||
float ef[2][PART_LEN1]) {
|
||||
int i, j;
|
||||
const int num_partitions = aec->num_partitions;
|
||||
for (i = 0; i < num_partitions; i++) {
|
||||
int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
|
||||
int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);
|
||||
int pos = i * PART_LEN1;
|
||||
// Check for wrap
|
||||
if (i + aec->xfBufBlockPos >= num_partitions) {
|
||||
@ -150,7 +153,7 @@ static void FilterAdaptationSSE2(AecCore* aec, float *fft, float ef[2][PART_LEN1
|
||||
}
|
||||
|
||||
// Process the whole array...
|
||||
for (j = 0; j < PART_LEN; j+= 4) {
|
||||
for (j = 0; j < PART_LEN; j += 4) {
|
||||
// Load xfBuf and ef.
|
||||
const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);
|
||||
const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);
|
||||
@ -169,22 +172,23 @@ static void FilterAdaptationSSE2(AecCore* aec, float *fft, float ef[2][PART_LEN1
|
||||
const __m128 g = _mm_unpacklo_ps(e, f);
|
||||
const __m128 h = _mm_unpackhi_ps(e, f);
|
||||
// Store
|
||||
_mm_storeu_ps(&fft[2*j + 0], g);
|
||||
_mm_storeu_ps(&fft[2*j + 4], h);
|
||||
_mm_storeu_ps(&fft[2 * j + 0], g);
|
||||
_mm_storeu_ps(&fft[2 * j + 4], h);
|
||||
}
|
||||
// ... and fixup the first imaginary entry.
|
||||
fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
|
||||
-aec->xfBuf[1][xPos + PART_LEN],
|
||||
ef[0][PART_LEN], ef[1][PART_LEN]);
|
||||
ef[0][PART_LEN],
|
||||
ef[1][PART_LEN]);
|
||||
|
||||
aec_rdft_inverse_128(fft);
|
||||
memset(fft + PART_LEN, 0, sizeof(float)*PART_LEN);
|
||||
memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
|
||||
|
||||
// fft scaling
|
||||
{
|
||||
float scale = 2.0f / PART_LEN2;
|
||||
const __m128 scale_ps = _mm_load_ps1(&scale);
|
||||
for (j = 0; j < PART_LEN; j+=4) {
|
||||
for (j = 0; j < PART_LEN; j += 4) {
|
||||
const __m128 fft_ps = _mm_loadu_ps(&fft[j]);
|
||||
const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps);
|
||||
_mm_storeu_ps(&fft[j], fft_scale);
|
||||
@ -195,13 +199,15 @@ static void FilterAdaptationSSE2(AecCore* aec, float *fft, float ef[2][PART_LEN1
|
||||
{
|
||||
float wt1 = aec->wfBuf[1][pos];
|
||||
aec->wfBuf[0][pos + PART_LEN] += fft[1];
|
||||
for (j = 0; j < PART_LEN; j+= 4) {
|
||||
for (j = 0; j < PART_LEN; j += 4) {
|
||||
__m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
|
||||
__m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
|
||||
const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]);
|
||||
const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]);
|
||||
const __m128 fft_re = _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2 ,0));
|
||||
const __m128 fft_im = _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3 ,1));
|
||||
const __m128 fft_re =
|
||||
_mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0));
|
||||
const __m128 fft_im =
|
||||
_mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1));
|
||||
wtBuf_re = _mm_add_ps(wtBuf_re, fft_re);
|
||||
wtBuf_im = _mm_add_ps(wtBuf_im, fft_im);
|
||||
_mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re);
|
||||
@ -212,8 +218,7 @@ static void FilterAdaptationSSE2(AecCore* aec, float *fft, float ef[2][PART_LEN1
|
||||
}
|
||||
}
|
||||
|
||||
static __m128 mm_pow_ps(__m128 a, __m128 b)
|
||||
{
|
||||
static __m128 mm_pow_ps(__m128 a, __m128 b) {
|
||||
// a^b = exp2(b * log2(a))
|
||||
// exp2(x) and log2(x) are calculated using polynomial approximations.
|
||||
__m128 log2_a, b_log2_a, a_exp_b;
|
||||
@ -238,55 +243,55 @@ static __m128 mm_pow_ps(__m128 a, __m128 b)
|
||||
// compensate the fact that the exponent has been shifted in the top/
|
||||
// fractional part and finally getting rid of the implicit leading one
|
||||
// from the mantissa by substracting it out.
|
||||
static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END =
|
||||
{0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000};
|
||||
static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END =
|
||||
{0x43800000, 0x43800000, 0x43800000, 0x43800000};
|
||||
static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END =
|
||||
{0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000};
|
||||
static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = {
|
||||
0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000};
|
||||
static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = {
|
||||
0x43800000, 0x43800000, 0x43800000, 0x43800000};
|
||||
static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = {
|
||||
0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000};
|
||||
static const int shift_exponent_into_top_mantissa = 8;
|
||||
const __m128 two_n = _mm_and_ps(a, *((__m128 *)float_exponent_mask));
|
||||
const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(_mm_castps_si128(two_n),
|
||||
shift_exponent_into_top_mantissa));
|
||||
const __m128 n_0 = _mm_or_ps(n_1, *((__m128 *)eight_biased_exponent));
|
||||
const __m128 n = _mm_sub_ps(n_0, *((__m128 *)implicit_leading_one));
|
||||
const __m128 two_n = _mm_and_ps(a, *((__m128*)float_exponent_mask));
|
||||
const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(
|
||||
_mm_castps_si128(two_n), shift_exponent_into_top_mantissa));
|
||||
const __m128 n_0 = _mm_or_ps(n_1, *((__m128*)eight_biased_exponent));
|
||||
const __m128 n = _mm_sub_ps(n_0, *((__m128*)implicit_leading_one));
|
||||
|
||||
// Compute y.
|
||||
static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END =
|
||||
{0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF};
|
||||
static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END =
|
||||
{0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000};
|
||||
const __m128 mantissa = _mm_and_ps(a, *((__m128 *)mantissa_mask));
|
||||
const __m128 y = _mm_or_ps(
|
||||
mantissa, *((__m128 *)zero_biased_exponent_is_one));
|
||||
static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = {
|
||||
0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF};
|
||||
static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = {
|
||||
0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000};
|
||||
const __m128 mantissa = _mm_and_ps(a, *((__m128*)mantissa_mask));
|
||||
const __m128 y =
|
||||
_mm_or_ps(mantissa, *((__m128*)zero_biased_exponent_is_one));
|
||||
|
||||
// Approximate log2(y) ~= (y - 1) * pol5(y).
|
||||
// pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0
|
||||
static const ALIGN16_BEG float ALIGN16_END C5[4] =
|
||||
{-3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f};
|
||||
static const ALIGN16_BEG float ALIGN16_END C4[4] =
|
||||
{3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f};
|
||||
static const ALIGN16_BEG float ALIGN16_END C3[4] =
|
||||
{-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f};
|
||||
static const ALIGN16_BEG float ALIGN16_END C2[4] =
|
||||
{2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f};
|
||||
static const ALIGN16_BEG float ALIGN16_END C1[4] =
|
||||
{-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f};
|
||||
static const ALIGN16_BEG float ALIGN16_END C0[4] =
|
||||
{3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f};
|
||||
const __m128 pol5_y_0 = _mm_mul_ps(y, *((__m128 *)C5));
|
||||
const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128 *)C4));
|
||||
static const ALIGN16_BEG float ALIGN16_END C5[4] = {
|
||||
-3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f};
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
C4[4] = {3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f};
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
C3[4] = {-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f};
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
C2[4] = {2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f};
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
C1[4] = {-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f};
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
C0[4] = {3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f};
|
||||
const __m128 pol5_y_0 = _mm_mul_ps(y, *((__m128*)C5));
|
||||
const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128*)C4));
|
||||
const __m128 pol5_y_2 = _mm_mul_ps(pol5_y_1, y);
|
||||
const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128 *)C3));
|
||||
const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128*)C3));
|
||||
const __m128 pol5_y_4 = _mm_mul_ps(pol5_y_3, y);
|
||||
const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128 *)C2));
|
||||
const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128*)C2));
|
||||
const __m128 pol5_y_6 = _mm_mul_ps(pol5_y_5, y);
|
||||
const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128 *)C1));
|
||||
const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128*)C1));
|
||||
const __m128 pol5_y_8 = _mm_mul_ps(pol5_y_7, y);
|
||||
const __m128 pol5_y = _mm_add_ps(pol5_y_8, *((__m128 *)C0));
|
||||
const __m128 y_minus_one = _mm_sub_ps(
|
||||
y, *((__m128 *)zero_biased_exponent_is_one));
|
||||
const __m128 log2_y = _mm_mul_ps(y_minus_one , pol5_y);
|
||||
const __m128 pol5_y = _mm_add_ps(pol5_y_8, *((__m128*)C0));
|
||||
const __m128 y_minus_one =
|
||||
_mm_sub_ps(y, *((__m128*)zero_biased_exponent_is_one));
|
||||
const __m128 log2_y = _mm_mul_ps(y_minus_one, pol5_y);
|
||||
|
||||
// Combine parts.
|
||||
log2_a = _mm_add_ps(n, log2_y);
|
||||
@ -310,38 +315,38 @@ static __m128 mm_pow_ps(__m128 a, __m128 b)
|
||||
// maximum relative error of 0.17%.
|
||||
|
||||
// To avoid over/underflow, we reduce the range of input to ]-127, 129].
|
||||
static const ALIGN16_BEG float max_input[4] ALIGN16_END =
|
||||
{129.f, 129.f, 129.f, 129.f};
|
||||
static const ALIGN16_BEG float min_input[4] ALIGN16_END =
|
||||
{-126.99999f, -126.99999f, -126.99999f, -126.99999f};
|
||||
const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128 *)max_input));
|
||||
const __m128 x_max = _mm_max_ps(x_min, *((__m128 *)min_input));
|
||||
static const ALIGN16_BEG float max_input[4] ALIGN16_END = {129.f, 129.f,
|
||||
129.f, 129.f};
|
||||
static const ALIGN16_BEG float min_input[4] ALIGN16_END = {
|
||||
-126.99999f, -126.99999f, -126.99999f, -126.99999f};
|
||||
const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128*)max_input));
|
||||
const __m128 x_max = _mm_max_ps(x_min, *((__m128*)min_input));
|
||||
// Compute n.
|
||||
static const ALIGN16_BEG float half[4] ALIGN16_END =
|
||||
{0.5f, 0.5f, 0.5f, 0.5f};
|
||||
const __m128 x_minus_half = _mm_sub_ps(x_max, *((__m128 *)half));
|
||||
static const ALIGN16_BEG float half[4] ALIGN16_END = {0.5f, 0.5f,
|
||||
0.5f, 0.5f};
|
||||
const __m128 x_minus_half = _mm_sub_ps(x_max, *((__m128*)half));
|
||||
const __m128i x_minus_half_floor = _mm_cvtps_epi32(x_minus_half);
|
||||
// Compute 2^n.
|
||||
static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END =
|
||||
{127, 127, 127, 127};
|
||||
static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = {
|
||||
127, 127, 127, 127};
|
||||
static const int float_exponent_shift = 23;
|
||||
const __m128i two_n_exponent = _mm_add_epi32(
|
||||
x_minus_half_floor, *((__m128i *)float_exponent_bias));
|
||||
const __m128 two_n = _mm_castsi128_ps(_mm_slli_epi32(
|
||||
two_n_exponent, float_exponent_shift));
|
||||
const __m128i two_n_exponent =
|
||||
_mm_add_epi32(x_minus_half_floor, *((__m128i*)float_exponent_bias));
|
||||
const __m128 two_n =
|
||||
_mm_castsi128_ps(_mm_slli_epi32(two_n_exponent, float_exponent_shift));
|
||||
// Compute y.
|
||||
const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor));
|
||||
// Approximate 2^y ~= C2 * y^2 + C1 * y + C0.
|
||||
static const ALIGN16_BEG float C2[4] ALIGN16_END =
|
||||
{3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f};
|
||||
static const ALIGN16_BEG float C1[4] ALIGN16_END =
|
||||
{6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f};
|
||||
static const ALIGN16_BEG float C0[4] ALIGN16_END =
|
||||
{1.0017247f, 1.0017247f, 1.0017247f, 1.0017247f};
|
||||
const __m128 exp2_y_0 = _mm_mul_ps(y, *((__m128 *)C2));
|
||||
const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128 *)C1));
|
||||
static const ALIGN16_BEG float C2[4] ALIGN16_END = {
|
||||
3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f};
|
||||
static const ALIGN16_BEG float C1[4] ALIGN16_END = {
|
||||
6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f};
|
||||
static const ALIGN16_BEG float C0[4] ALIGN16_END = {1.0017247f, 1.0017247f,
|
||||
1.0017247f, 1.0017247f};
|
||||
const __m128 exp2_y_0 = _mm_mul_ps(y, *((__m128*)C2));
|
||||
const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128*)C1));
|
||||
const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y);
|
||||
const __m128 exp2_y = _mm_add_ps(exp2_y_2, *((__m128 *)C0));
|
||||
const __m128 exp2_y = _mm_add_ps(exp2_y_2, *((__m128*)C0));
|
||||
|
||||
// Combine parts.
|
||||
a_exp_b = _mm_mul_ps(exp2_y, two_n);
|
||||
@ -352,7 +357,8 @@ static __m128 mm_pow_ps(__m128 a, __m128 b)
|
||||
extern const float WebRtcAec_weightCurve[65];
|
||||
extern const float WebRtcAec_overDriveCurve[65];
|
||||
|
||||
static void OverdriveAndSuppressSSE2(AecCore* aec, float hNl[PART_LEN1],
|
||||
static void OverdriveAndSuppressSSE2(AecCore* aec,
|
||||
float hNl[PART_LEN1],
|
||||
const float hNlFb,
|
||||
float efw[2][PART_LEN1]) {
|
||||
int i;
|
||||
@ -361,26 +367,25 @@ static void OverdriveAndSuppressSSE2(AecCore* aec, float hNl[PART_LEN1],
|
||||
const __m128 vec_minus_one = _mm_set1_ps(-1.0f);
|
||||
const __m128 vec_overDriveSm = _mm_set1_ps(aec->overDriveSm);
|
||||
// vectorized code (four at once)
|
||||
for (i = 0; i + 3 < PART_LEN1; i+=4) {
|
||||
for (i = 0; i + 3 < PART_LEN1; i += 4) {
|
||||
// Weight subbands
|
||||
__m128 vec_hNl = _mm_loadu_ps(&hNl[i]);
|
||||
const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]);
|
||||
const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb);
|
||||
const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(
|
||||
vec_weightCurve, vec_hNlFb);
|
||||
const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb);
|
||||
const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve);
|
||||
const __m128 vec_one_weightCurve_hNl = _mm_mul_ps(
|
||||
vec_one_weightCurve, vec_hNl);
|
||||
const __m128 vec_one_weightCurve_hNl =
|
||||
_mm_mul_ps(vec_one_weightCurve, vec_hNl);
|
||||
const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl);
|
||||
const __m128 vec_if1 = _mm_and_ps(
|
||||
bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl));
|
||||
vec_hNl = _mm_or_ps(vec_if0, vec_if1);
|
||||
|
||||
{
|
||||
const __m128 vec_overDriveCurve = _mm_loadu_ps(
|
||||
&WebRtcAec_overDriveCurve[i]);
|
||||
const __m128 vec_overDriveSm_overDriveCurve = _mm_mul_ps(
|
||||
vec_overDriveSm, vec_overDriveCurve);
|
||||
const __m128 vec_overDriveCurve =
|
||||
_mm_loadu_ps(&WebRtcAec_overDriveCurve[i]);
|
||||
const __m128 vec_overDriveSm_overDriveCurve =
|
||||
_mm_mul_ps(vec_overDriveSm, vec_overDriveCurve);
|
||||
vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve);
|
||||
_mm_storeu_ps(&hNl[i], vec_hNl);
|
||||
}
|
||||
@ -424,4 +429,3 @@ void WebRtcAec_InitAec_SSE2(void) {
|
||||
WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
|
||||
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,7 @@ ALIGN16_BEG float ALIGN16_END cftmdl_wk1r[4];
|
||||
|
||||
static int ip[16];
|
||||
|
||||
static void bitrv2_32(int *ip, float *a) {
|
||||
static void bitrv2_32(int* ip, float* a) {
|
||||
const int n = 32;
|
||||
int j, j1, k, k1, m, m2;
|
||||
float xr, xi, yr, yi;
|
||||
@ -116,7 +116,7 @@ static void bitrv2_32(int *ip, float *a) {
|
||||
}
|
||||
}
|
||||
|
||||
static void bitrv2_128(float *a) {
|
||||
static void bitrv2_128(float* a) {
|
||||
/*
|
||||
Following things have been attempted but are no faster:
|
||||
(a) Storing the swap indexes in a LUT (index calculations are done
|
||||
@ -265,7 +265,7 @@ static void makewt_32(void) {
|
||||
}
|
||||
|
||||
static void makect_32(void) {
|
||||
float *c = rdft_w + 32;
|
||||
float* c = rdft_w + 32;
|
||||
const int nc = 32;
|
||||
int j, nch;
|
||||
float delta;
|
||||
@ -281,7 +281,7 @@ static void makect_32(void) {
|
||||
}
|
||||
}
|
||||
|
||||
static void cft1st_128_C(float *a) {
|
||||
static void cft1st_128_C(float* a) {
|
||||
const int n = 128;
|
||||
int j, k1, k2;
|
||||
float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
|
||||
@ -385,7 +385,7 @@ static void cft1st_128_C(float *a) {
|
||||
}
|
||||
}
|
||||
|
||||
static void cftmdl_128_C(float *a) {
|
||||
static void cftmdl_128_C(float* a) {
|
||||
const int l = 8;
|
||||
const int n = 128;
|
||||
const int m = 32;
|
||||
@ -512,7 +512,7 @@ static void cftmdl_128_C(float *a) {
|
||||
}
|
||||
}
|
||||
|
||||
static void cftfsub_128(float *a) {
|
||||
static void cftfsub_128(float* a) {
|
||||
int j, j1, j2, j3, l;
|
||||
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
||||
|
||||
@ -542,7 +542,7 @@ static void cftfsub_128(float *a) {
|
||||
}
|
||||
}
|
||||
|
||||
static void cftbsub_128(float *a) {
|
||||
static void cftbsub_128(float* a) {
|
||||
int j, j1, j2, j3, l;
|
||||
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
||||
|
||||
@ -573,8 +573,8 @@ static void cftbsub_128(float *a) {
|
||||
}
|
||||
}
|
||||
|
||||
static void rftfsub_128_C(float *a) {
|
||||
const float *c = rdft_w + 32;
|
||||
static void rftfsub_128_C(float* a) {
|
||||
const float* c = rdft_w + 32;
|
||||
int j1, j2, k1, k2;
|
||||
float wkr, wki, xr, xi, yr, yi;
|
||||
|
||||
@ -594,8 +594,8 @@ static void rftfsub_128_C(float *a) {
|
||||
}
|
||||
}
|
||||
|
||||
static void rftbsub_128_C(float *a) {
|
||||
const float *c = rdft_w + 32;
|
||||
static void rftbsub_128_C(float* a) {
|
||||
const float* c = rdft_w + 32;
|
||||
int j1, j2, k1, k2;
|
||||
float wkr, wki, xr, xi, yr, yi;
|
||||
|
||||
@ -617,7 +617,7 @@ static void rftbsub_128_C(float *a) {
|
||||
a[65] = -a[65];
|
||||
}
|
||||
|
||||
void aec_rdft_forward_128(float *a) {
|
||||
void aec_rdft_forward_128(float* a) {
|
||||
float xi;
|
||||
bitrv2_128(a);
|
||||
cftfsub_128(a);
|
||||
@ -627,7 +627,7 @@ void aec_rdft_forward_128(float *a) {
|
||||
a[1] = xi;
|
||||
}
|
||||
|
||||
void aec_rdft_inverse_128(float *a) {
|
||||
void aec_rdft_inverse_128(float* a) {
|
||||
a[1] = 0.5f * (a[0] - a[1]);
|
||||
a[0] -= a[1];
|
||||
rftbsub_128(a);
|
||||
|
@ -20,11 +20,11 @@ static __inline __m128i _mm_castps_si128(__m128 a) { return *(__m128i*)&a; }
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER /* visual c++ */
|
||||
# define ALIGN16_BEG __declspec(align(16))
|
||||
# define ALIGN16_END
|
||||
#define ALIGN16_BEG __declspec(align(16))
|
||||
#define ALIGN16_END
|
||||
#else /* gcc or icc */
|
||||
# define ALIGN16_BEG
|
||||
# define ALIGN16_END __attribute__((aligned(16)))
|
||||
#define ALIGN16_BEG
|
||||
#define ALIGN16_END __attribute__((aligned(16)))
|
||||
#endif
|
||||
|
||||
// constants shared by all paths (C, SSE2).
|
||||
@ -42,7 +42,7 @@ extern float rdft_wk3i[32];
|
||||
extern float cftmdl_wk1r[4];
|
||||
|
||||
// code path selection function pointers
|
||||
typedef void (*rft_sub_128_t)(float *a);
|
||||
typedef void (*rft_sub_128_t)(float* a);
|
||||
extern rft_sub_128_t rftfsub_128;
|
||||
extern rft_sub_128_t rftbsub_128;
|
||||
extern rft_sub_128_t cft1st_128;
|
||||
@ -51,7 +51,7 @@ extern rft_sub_128_t cftmdl_128;
|
||||
// entry points
|
||||
void aec_rdft_init(void);
|
||||
void aec_rdft_init_sse2(void);
|
||||
void aec_rdft_forward_128(float *a);
|
||||
void aec_rdft_inverse_128(float *a);
|
||||
void aec_rdft_forward_128(float* a);
|
||||
void aec_rdft_inverse_128(float* a);
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
|
||||
|
@ -12,10 +12,10 @@
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
static const ALIGN16_BEG float ALIGN16_END k_swap_sign[4] =
|
||||
{-1.f, 1.f, -1.f, 1.f};
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
|
||||
|
||||
static void cft1st_128_SSE2(float *a) {
|
||||
static void cft1st_128_SSE2(float* a) {
|
||||
const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
|
||||
int j, k2;
|
||||
|
||||
@ -24,10 +24,10 @@ static void cft1st_128_SSE2(float *a) {
|
||||
__m128 a04v = _mm_loadu_ps(&a[j + 4]);
|
||||
__m128 a08v = _mm_loadu_ps(&a[j + 8]);
|
||||
__m128 a12v = _mm_loadu_ps(&a[j + 12]);
|
||||
__m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1 ,0));
|
||||
__m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3 ,2));
|
||||
__m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1 ,0));
|
||||
__m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3 ,2));
|
||||
__m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1, 0));
|
||||
__m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3, 2));
|
||||
__m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1, 0));
|
||||
__m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3, 2));
|
||||
|
||||
const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]);
|
||||
const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]);
|
||||
@ -42,7 +42,7 @@ static void cft1st_128_SSE2(float *a) {
|
||||
__m128 x0w;
|
||||
a01v = _mm_add_ps(x0v, x2v);
|
||||
x0v = _mm_sub_ps(x0v, x2v);
|
||||
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
|
||||
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
{
|
||||
const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v);
|
||||
const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w);
|
||||
@ -50,16 +50,16 @@ static void cft1st_128_SSE2(float *a) {
|
||||
}
|
||||
{
|
||||
__m128 a23_0v, a23_1v;
|
||||
const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0 ,1));
|
||||
const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w);
|
||||
x0v = _mm_add_ps(x1v, x3s);
|
||||
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
|
||||
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
a23_0v = _mm_mul_ps(wk1rv, x0v);
|
||||
a23_1v = _mm_mul_ps(wk1iv, x0w);
|
||||
a23v = _mm_add_ps(a23_0v, a23_1v);
|
||||
|
||||
x0v = _mm_sub_ps(x1v, x3s);
|
||||
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
|
||||
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
}
|
||||
{
|
||||
const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v);
|
||||
@ -67,10 +67,10 @@ static void cft1st_128_SSE2(float *a) {
|
||||
a67v = _mm_add_ps(a67_0v, a67_1v);
|
||||
}
|
||||
|
||||
a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1 ,0));
|
||||
a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1 ,0));
|
||||
a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3 ,2));
|
||||
a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3 ,2));
|
||||
a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1, 0));
|
||||
a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1, 0));
|
||||
a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3, 2));
|
||||
a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3, 2));
|
||||
_mm_storeu_ps(&a[j + 0], a00v);
|
||||
_mm_storeu_ps(&a[j + 4], a04v);
|
||||
_mm_storeu_ps(&a[j + 8], a08v);
|
||||
@ -78,7 +78,7 @@ static void cft1st_128_SSE2(float *a) {
|
||||
}
|
||||
}
|
||||
|
||||
static void cftmdl_128_SSE2(float *a) {
|
||||
static void cftmdl_128_SSE2(float* a) {
|
||||
const int l = 8;
|
||||
const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
|
||||
int j0;
|
||||
@ -91,10 +91,10 @@ static void cftmdl_128_SSE2(float *a) {
|
||||
const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
|
||||
const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
|
||||
_mm_castsi128_ps(a_32),
|
||||
_MM_SHUFFLE(1, 0, 1 ,0));
|
||||
_MM_SHUFFLE(1, 0, 1, 0));
|
||||
const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
|
||||
_mm_castsi128_ps(a_40),
|
||||
_MM_SHUFFLE(1, 0, 1 ,0));
|
||||
_MM_SHUFFLE(1, 0, 1, 0));
|
||||
__m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
|
||||
const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
|
||||
|
||||
@ -104,61 +104,60 @@ static void cftmdl_128_SSE2(float *a) {
|
||||
const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
|
||||
const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
|
||||
_mm_castsi128_ps(a_48),
|
||||
_MM_SHUFFLE(1, 0, 1 ,0));
|
||||
_MM_SHUFFLE(1, 0, 1, 0));
|
||||
const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
|
||||
_mm_castsi128_ps(a_56),
|
||||
_MM_SHUFFLE(1, 0, 1 ,0));
|
||||
_MM_SHUFFLE(1, 0, 1, 0));
|
||||
const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
|
||||
const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
|
||||
|
||||
const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
|
||||
const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
|
||||
|
||||
const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(
|
||||
_mm_shuffle_epi32(_mm_castps_si128(x3r0_3i0_3r1_x3i1),
|
||||
_MM_SHUFFLE(2, 3, 0, 1)));
|
||||
const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
|
||||
_mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
|
||||
const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
|
||||
const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
|
||||
const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
|
||||
|
||||
const __m128 yy0 = _mm_shuffle_ps(x1_x3_add, x1_x3_sub,
|
||||
_MM_SHUFFLE(2, 2, 2 ,2));
|
||||
const __m128 yy1 = _mm_shuffle_ps(x1_x3_add, x1_x3_sub,
|
||||
_MM_SHUFFLE(3, 3, 3 ,3));
|
||||
const __m128 yy0 =
|
||||
_mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
const __m128 yy1 =
|
||||
_mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
const __m128 yy2 = _mm_mul_ps(mm_swap_sign, yy1);
|
||||
const __m128 yy3 = _mm_add_ps(yy0, yy2);
|
||||
const __m128 yy4 = _mm_mul_ps(wk1rv, yy3);
|
||||
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0));
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 32],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx0),
|
||||
_MM_SHUFFLE(3, 2, 3, 2)));
|
||||
_mm_storel_epi64(
|
||||
(__m128i*)&a[j0 + 32],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx0), _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1));
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 48],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx1),
|
||||
_MM_SHUFFLE(2, 3, 2, 3)));
|
||||
_mm_storel_epi64(
|
||||
(__m128i*)&a[j0 + 48],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 2, 3)));
|
||||
a[j0 + 48] = -a[j0 + 48];
|
||||
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add));
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub));
|
||||
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4));
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 56],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(yy4),
|
||||
_MM_SHUFFLE(2, 3, 2, 3)));
|
||||
_mm_storel_epi64(
|
||||
(__m128i*)&a[j0 + 56],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(yy4), _MM_SHUFFLE(2, 3, 2, 3)));
|
||||
}
|
||||
|
||||
{
|
||||
int k = 64;
|
||||
int k1 = 2;
|
||||
int k2 = 2 * k1;
|
||||
const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2+0]);
|
||||
const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2+0]);
|
||||
const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2+0]);
|
||||
const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2+0]);
|
||||
const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2+0]);
|
||||
wk1rv = _mm_load_ps(&rdft_wk1r[k2+0]);
|
||||
const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2 + 0]);
|
||||
const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2 + 0]);
|
||||
const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2 + 0]);
|
||||
const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2 + 0]);
|
||||
const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2 + 0]);
|
||||
wk1rv = _mm_load_ps(&rdft_wk1r[k2 + 0]);
|
||||
for (j0 = k; j0 < l + k; j0 += 2) {
|
||||
const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
|
||||
const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
|
||||
@ -166,10 +165,10 @@ static void cftmdl_128_SSE2(float *a) {
|
||||
const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
|
||||
const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
|
||||
_mm_castsi128_ps(a_32),
|
||||
_MM_SHUFFLE(1, 0, 1 ,0));
|
||||
_MM_SHUFFLE(1, 0, 1, 0));
|
||||
const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
|
||||
_mm_castsi128_ps(a_40),
|
||||
_MM_SHUFFLE(1, 0, 1 ,0));
|
||||
_MM_SHUFFLE(1, 0, 1, 0));
|
||||
__m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
|
||||
const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
|
||||
|
||||
@ -179,77 +178,79 @@ static void cftmdl_128_SSE2(float *a) {
|
||||
const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
|
||||
const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
|
||||
_mm_castsi128_ps(a_48),
|
||||
_MM_SHUFFLE(1, 0, 1 ,0));
|
||||
_MM_SHUFFLE(1, 0, 1, 0));
|
||||
const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
|
||||
_mm_castsi128_ps(a_56),
|
||||
_MM_SHUFFLE(1, 0, 1 ,0));
|
||||
_MM_SHUFFLE(1, 0, 1, 0));
|
||||
const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
|
||||
const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
|
||||
|
||||
const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
|
||||
const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
|
||||
const __m128 xx2 = _mm_mul_ps(xx1 , wk2rv);
|
||||
const __m128 xx3 = _mm_mul_ps(wk2iv,
|
||||
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xx1),
|
||||
_MM_SHUFFLE(2, 3, 0, 1))));
|
||||
const __m128 xx2 = _mm_mul_ps(xx1, wk2rv);
|
||||
const __m128 xx3 =
|
||||
_mm_mul_ps(wk2iv,
|
||||
_mm_castsi128_ps(_mm_shuffle_epi32(
|
||||
_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 0, 1))));
|
||||
const __m128 xx4 = _mm_add_ps(xx2, xx3);
|
||||
|
||||
const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(
|
||||
_mm_shuffle_epi32(_mm_castps_si128(x3r0_3i0_3r1_x3i1),
|
||||
_MM_SHUFFLE(2, 3, 0, 1)));
|
||||
const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
|
||||
_mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
|
||||
const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
|
||||
const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
|
||||
const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
|
||||
|
||||
const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv);
|
||||
const __m128 xx11 = _mm_mul_ps(wk1iv,
|
||||
const __m128 xx11 = _mm_mul_ps(
|
||||
wk1iv,
|
||||
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),
|
||||
_MM_SHUFFLE(2, 3, 0, 1))));
|
||||
const __m128 xx12 = _mm_add_ps(xx10, xx11);
|
||||
|
||||
const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv);
|
||||
const __m128 xx21 = _mm_mul_ps(wk3iv,
|
||||
const __m128 xx21 = _mm_mul_ps(
|
||||
wk3iv,
|
||||
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),
|
||||
_MM_SHUFFLE(2, 3, 0, 1))));
|
||||
const __m128 xx22 = _mm_add_ps(xx20, xx21);
|
||||
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx));
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 32],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx),
|
||||
_MM_SHUFFLE(3, 2, 3, 2)));
|
||||
_mm_storel_epi64(
|
||||
(__m128i*)&a[j0 + 32],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx), _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx4));
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 48],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx4),
|
||||
_MM_SHUFFLE(3, 2, 3, 2)));
|
||||
_mm_storel_epi64(
|
||||
(__m128i*)&a[j0 + 48],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx4), _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12));
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 40],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx12),
|
||||
_MM_SHUFFLE(3, 2, 3, 2)));
|
||||
_mm_storel_epi64(
|
||||
(__m128i*)&a[j0 + 40],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx12), _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(xx22));
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 56],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx22),
|
||||
_MM_SHUFFLE(3, 2, 3, 2)));
|
||||
_mm_storel_epi64(
|
||||
(__m128i*)&a[j0 + 56],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx22), _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void rftfsub_128_SSE2(float *a) {
|
||||
const float *c = rdft_w + 32;
|
||||
static void rftfsub_128_SSE2(float* a) {
|
||||
const float* c = rdft_w + 32;
|
||||
int j1, j2, k1, k2;
|
||||
float wkr, wki, xr, xi, yr, yi;
|
||||
|
||||
static const ALIGN16_BEG float ALIGN16_END k_half[4] =
|
||||
{0.5f, 0.5f, 0.5f, 0.5f};
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
|
||||
const __m128 mm_half = _mm_load_ps(k_half);
|
||||
|
||||
// Vectorized code (four at once).
|
||||
// Note: commented number are indexes for the first iteration of the loop.
|
||||
for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
|
||||
// Load 'wk'.
|
||||
const __m128 c_j1 = _mm_loadu_ps(&c[ j1]); // 1, 2, 3, 4,
|
||||
const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4,
|
||||
const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31,
|
||||
const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31,
|
||||
const __m128 wkr_ =
|
||||
@ -260,14 +261,14 @@ static void rftfsub_128_SSE2(float *a) {
|
||||
const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9,
|
||||
const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123,
|
||||
const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127,
|
||||
const __m128 a_j2_p0 = _mm_shuffle_ps(a_j2_0, a_j2_4,
|
||||
_MM_SHUFFLE(2, 0, 2 ,0)); // 2, 4, 6, 8,
|
||||
const __m128 a_j2_p1 = _mm_shuffle_ps(a_j2_0, a_j2_4,
|
||||
_MM_SHUFFLE(3, 1, 3 ,1)); // 3, 5, 7, 9,
|
||||
const __m128 a_k2_p0 = _mm_shuffle_ps(a_k2_4, a_k2_0,
|
||||
_MM_SHUFFLE(0, 2, 0 ,2)); // 126, 124, 122, 120,
|
||||
const __m128 a_k2_p1 = _mm_shuffle_ps(a_k2_4, a_k2_0,
|
||||
_MM_SHUFFLE(1, 3, 1 ,3)); // 127, 125, 123, 121,
|
||||
const __m128 a_j2_p0 = _mm_shuffle_ps(
|
||||
a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8,
|
||||
const __m128 a_j2_p1 = _mm_shuffle_ps(
|
||||
a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9,
|
||||
const __m128 a_k2_p0 = _mm_shuffle_ps(
|
||||
a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120,
|
||||
const __m128 a_k2_p1 = _mm_shuffle_ps(
|
||||
a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121,
|
||||
// Calculate 'x'.
|
||||
const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
|
||||
// 2-126, 4-124, 6-122, 8-120,
|
||||
@ -300,10 +301,10 @@ static void rftfsub_128_SSE2(float *a) {
|
||||
// 122, 123, 120, 121,
|
||||
const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
|
||||
// 126, 127, 124, 125,
|
||||
const __m128 a_k2_0n = _mm_shuffle_ps(a_k2_0nt, a_k2_0nt,
|
||||
_MM_SHUFFLE(1, 0, 3 ,2)); // 120, 121, 122, 123,
|
||||
const __m128 a_k2_4n = _mm_shuffle_ps(a_k2_4nt, a_k2_4nt,
|
||||
_MM_SHUFFLE(1, 0, 3 ,2)); // 124, 125, 126, 127,
|
||||
const __m128 a_k2_0n = _mm_shuffle_ps(
|
||||
a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123,
|
||||
const __m128 a_k2_4n = _mm_shuffle_ps(
|
||||
a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127,
|
||||
_mm_storeu_ps(&a[0 + j2], a_j2_0n);
|
||||
_mm_storeu_ps(&a[4 + j2], a_j2_4n);
|
||||
_mm_storeu_ps(&a[122 - j2], a_k2_0n);
|
||||
@ -326,13 +327,13 @@ static void rftfsub_128_SSE2(float *a) {
|
||||
}
|
||||
}
|
||||
|
||||
static void rftbsub_128_SSE2(float *a) {
|
||||
const float *c = rdft_w + 32;
|
||||
static void rftbsub_128_SSE2(float* a) {
|
||||
const float* c = rdft_w + 32;
|
||||
int j1, j2, k1, k2;
|
||||
float wkr, wki, xr, xi, yr, yi;
|
||||
|
||||
static const ALIGN16_BEG float ALIGN16_END k_half[4] =
|
||||
{0.5f, 0.5f, 0.5f, 0.5f};
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
|
||||
const __m128 mm_half = _mm_load_ps(k_half);
|
||||
|
||||
a[1] = -a[1];
|
||||
@ -340,7 +341,7 @@ static void rftbsub_128_SSE2(float *a) {
|
||||
// Note: commented number are indexes for the first iteration of the loop.
|
||||
for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
|
||||
// Load 'wk'.
|
||||
const __m128 c_j1 = _mm_loadu_ps(&c[ j1]); // 1, 2, 3, 4,
|
||||
const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4,
|
||||
const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31,
|
||||
const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31,
|
||||
const __m128 wkr_ =
|
||||
@ -351,14 +352,14 @@ static void rftbsub_128_SSE2(float *a) {
|
||||
const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9,
|
||||
const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123,
|
||||
const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127,
|
||||
const __m128 a_j2_p0 = _mm_shuffle_ps(a_j2_0, a_j2_4,
|
||||
_MM_SHUFFLE(2, 0, 2 ,0)); // 2, 4, 6, 8,
|
||||
const __m128 a_j2_p1 = _mm_shuffle_ps(a_j2_0, a_j2_4,
|
||||
_MM_SHUFFLE(3, 1, 3 ,1)); // 3, 5, 7, 9,
|
||||
const __m128 a_k2_p0 = _mm_shuffle_ps(a_k2_4, a_k2_0,
|
||||
_MM_SHUFFLE(0, 2, 0 ,2)); // 126, 124, 122, 120,
|
||||
const __m128 a_k2_p1 = _mm_shuffle_ps(a_k2_4, a_k2_0,
|
||||
_MM_SHUFFLE(1, 3, 1 ,3)); // 127, 125, 123, 121,
|
||||
const __m128 a_j2_p0 = _mm_shuffle_ps(
|
||||
a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8,
|
||||
const __m128 a_j2_p1 = _mm_shuffle_ps(
|
||||
a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9,
|
||||
const __m128 a_k2_p0 = _mm_shuffle_ps(
|
||||
a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120,
|
||||
const __m128 a_k2_p1 = _mm_shuffle_ps(
|
||||
a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121,
|
||||
// Calculate 'x'.
|
||||
const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
|
||||
// 2-126, 4-124, 6-122, 8-120,
|
||||
@ -391,10 +392,10 @@ static void rftbsub_128_SSE2(float *a) {
|
||||
// 122, 123, 120, 121,
|
||||
const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
|
||||
// 126, 127, 124, 125,
|
||||
const __m128 a_k2_0n = _mm_shuffle_ps(a_k2_0nt, a_k2_0nt,
|
||||
_MM_SHUFFLE(1, 0, 3 ,2)); // 120, 121, 122, 123,
|
||||
const __m128 a_k2_4n = _mm_shuffle_ps(a_k2_4nt, a_k2_4nt,
|
||||
_MM_SHUFFLE(1, 0, 3 ,2)); // 124, 125, 126, 127,
|
||||
const __m128 a_k2_0n = _mm_shuffle_ps(
|
||||
a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123,
|
||||
const __m128 a_k2_4n = _mm_shuffle_ps(
|
||||
a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127,
|
||||
_mm_storeu_ps(&a[0 + j2], a_j2_0n);
|
||||
_mm_storeu_ps(&a[4 + j2], a_j2_4n);
|
||||
_mm_storeu_ps(&a[122 - j2], a_k2_0n);
|
||||
|
@ -8,7 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for clock
|
||||
/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for
|
||||
* clock
|
||||
* skew by resampling the farend signal.
|
||||
*/
|
||||
|
||||
@ -21,7 +22,9 @@
|
||||
|
||||
#include "webrtc/modules/audio_processing/aec/aec_core.h"
|
||||
|
||||
enum { kEstimateLengthFrames = 400 };
|
||||
enum {
|
||||
kEstimateLengthFrames = 400
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
short buffer[kResamplerBufferSize];
|
||||
@ -36,11 +39,10 @@ typedef struct {
|
||||
static int EstimateSkew(const int* rawSkew,
|
||||
int size,
|
||||
int absLimit,
|
||||
float *skewEst);
|
||||
float* skewEst);
|
||||
|
||||
int WebRtcAec_CreateResampler(void **resampInst)
|
||||
{
|
||||
resampler_t *obj = malloc(sizeof(resampler_t));
|
||||
int WebRtcAec_CreateResampler(void** resampInst) {
|
||||
resampler_t* obj = malloc(sizeof(resampler_t));
|
||||
*resampInst = obj;
|
||||
if (obj == NULL) {
|
||||
return -1;
|
||||
@ -49,9 +51,8 @@ int WebRtcAec_CreateResampler(void **resampInst)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebRtcAec_InitResampler(void *resampInst, int deviceSampleRateHz)
|
||||
{
|
||||
resampler_t *obj = (resampler_t*) resampInst;
|
||||
int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz) {
|
||||
resampler_t* obj = (resampler_t*)resampInst;
|
||||
memset(obj->buffer, 0, sizeof(obj->buffer));
|
||||
obj->position = 0.0;
|
||||
|
||||
@ -63,24 +64,22 @@ int WebRtcAec_InitResampler(void *resampInst, int deviceSampleRateHz)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebRtcAec_FreeResampler(void *resampInst)
|
||||
{
|
||||
resampler_t *obj = (resampler_t*) resampInst;
|
||||
int WebRtcAec_FreeResampler(void* resampInst) {
|
||||
resampler_t* obj = (resampler_t*)resampInst;
|
||||
free(obj);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void WebRtcAec_ResampleLinear(void *resampInst,
|
||||
const short *inspeech,
|
||||
void WebRtcAec_ResampleLinear(void* resampInst,
|
||||
const short* inspeech,
|
||||
int size,
|
||||
float skew,
|
||||
short *outspeech,
|
||||
int *size_out)
|
||||
{
|
||||
resampler_t *obj = (resampler_t*) resampInst;
|
||||
short* outspeech,
|
||||
int* size_out) {
|
||||
resampler_t* obj = (resampler_t*)resampInst;
|
||||
|
||||
short *y;
|
||||
short* y;
|
||||
float be, tnew, interp;
|
||||
int tn, mm;
|
||||
|
||||
@ -103,25 +102,24 @@ void WebRtcAec_ResampleLinear(void *resampInst,
|
||||
y = &obj->buffer[FRAME_LEN]; // Point at current frame
|
||||
|
||||
tnew = be * mm + obj->position;
|
||||
tn = (int) tnew;
|
||||
tn = (int)tnew;
|
||||
|
||||
while (tn < size) {
|
||||
|
||||
// Interpolation
|
||||
interp = y[tn] + (tnew - tn) * (y[tn+1] - y[tn]);
|
||||
interp = y[tn] + (tnew - tn) * (y[tn + 1] - y[tn]);
|
||||
|
||||
if (interp > 32767) {
|
||||
interp = 32767;
|
||||
}
|
||||
else if (interp < -32768) {
|
||||
} else if (interp < -32768) {
|
||||
interp = -32768;
|
||||
}
|
||||
|
||||
outspeech[mm] = (short) interp;
|
||||
outspeech[mm] = (short)interp;
|
||||
mm++;
|
||||
|
||||
tnew = be * mm + obj->position;
|
||||
tn = (int) tnew;
|
||||
tn = (int)tnew;
|
||||
}
|
||||
|
||||
*size_out = mm;
|
||||
@ -133,24 +131,19 @@ void WebRtcAec_ResampleLinear(void *resampInst,
|
||||
(kResamplerBufferSize - size) * sizeof(short));
|
||||
}
|
||||
|
||||
int WebRtcAec_GetSkew(void *resampInst, int rawSkew, float *skewEst)
|
||||
{
|
||||
resampler_t *obj = (resampler_t*)resampInst;
|
||||
int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst) {
|
||||
resampler_t* obj = (resampler_t*)resampInst;
|
||||
int err = 0;
|
||||
|
||||
if (obj->skewDataIndex < kEstimateLengthFrames) {
|
||||
obj->skewData[obj->skewDataIndex] = rawSkew;
|
||||
obj->skewDataIndex++;
|
||||
}
|
||||
else if (obj->skewDataIndex == kEstimateLengthFrames) {
|
||||
err = EstimateSkew(obj->skewData,
|
||||
kEstimateLengthFrames,
|
||||
obj->deviceSampleRateHz,
|
||||
skewEst);
|
||||
} else if (obj->skewDataIndex == kEstimateLengthFrames) {
|
||||
err = EstimateSkew(
|
||||
obj->skewData, kEstimateLengthFrames, obj->deviceSampleRateHz, skewEst);
|
||||
obj->skewEstimate = *skewEst;
|
||||
obj->skewDataIndex++;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
*skewEst = obj->skewEstimate;
|
||||
}
|
||||
|
||||
@ -160,8 +153,7 @@ int WebRtcAec_GetSkew(void *resampInst, int rawSkew, float *skewEst)
|
||||
int EstimateSkew(const int* rawSkew,
|
||||
int size,
|
||||
int deviceSampleRateHz,
|
||||
float *skewEst)
|
||||
{
|
||||
float* skewEst) {
|
||||
const int absLimitOuter = (int)(0.04f * deviceSampleRateHz);
|
||||
const int absLimitInner = (int)(0.0025f * deviceSampleRateHz);
|
||||
int i = 0;
|
||||
@ -212,7 +204,7 @@ int EstimateSkew(const int* rawSkew,
|
||||
n++;
|
||||
cumSum += rawSkew[i];
|
||||
x += n;
|
||||
x2 += n*n;
|
||||
x2 += n * n;
|
||||
y += cumSum;
|
||||
xy += n * cumSum;
|
||||
}
|
||||
@ -223,10 +215,10 @@ int EstimateSkew(const int* rawSkew,
|
||||
}
|
||||
assert(n > 0);
|
||||
xAvg = x / n;
|
||||
denom = x2 - xAvg*x;
|
||||
denom = x2 - xAvg * x;
|
||||
|
||||
if (denom != 0) {
|
||||
skew = (xy - xAvg*y) / denom;
|
||||
skew = (xy - xAvg * y) / denom;
|
||||
}
|
||||
|
||||
*skewEst = skew;
|
||||
|
@ -13,23 +13,27 @@
|
||||
|
||||
#include "webrtc/modules/audio_processing/aec/aec_core.h"
|
||||
|
||||
enum { kResamplingDelay = 1 };
|
||||
enum { kResamplerBufferSize = FRAME_LEN * 4 };
|
||||
enum {
|
||||
kResamplingDelay = 1
|
||||
};
|
||||
enum {
|
||||
kResamplerBufferSize = FRAME_LEN * 4
|
||||
};
|
||||
|
||||
// Unless otherwise specified, functions return 0 on success and -1 on error
|
||||
int WebRtcAec_CreateResampler(void **resampInst);
|
||||
int WebRtcAec_InitResampler(void *resampInst, int deviceSampleRateHz);
|
||||
int WebRtcAec_FreeResampler(void *resampInst);
|
||||
int WebRtcAec_CreateResampler(void** resampInst);
|
||||
int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz);
|
||||
int WebRtcAec_FreeResampler(void* resampInst);
|
||||
|
||||
// Estimates skew from raw measurement.
|
||||
int WebRtcAec_GetSkew(void *resampInst, int rawSkew, float *skewEst);
|
||||
int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst);
|
||||
|
||||
// Resamples input using linear interpolation.
|
||||
void WebRtcAec_ResampleLinear(void *resampInst,
|
||||
const short *inspeech,
|
||||
void WebRtcAec_ResampleLinear(void* resampInst,
|
||||
const short* inspeech,
|
||||
int size,
|
||||
float skew,
|
||||
short *outspeech,
|
||||
int *size_out);
|
||||
short* outspeech,
|
||||
int* size_out);
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
|
||||
|
@ -99,18 +99,27 @@ int webrtc_aec_instance_count = 0;
|
||||
|
||||
// Estimates delay to set the position of the far-end buffer read pointer
|
||||
// (controlled by knownDelay)
|
||||
static void EstBufDelayNormal(aecpc_t *aecInst);
|
||||
static void EstBufDelayExtended(aecpc_t *aecInst);
|
||||
static int ProcessNormal(aecpc_t* self, const int16_t* near,
|
||||
const int16_t* near_high, int16_t* out, int16_t* out_high,
|
||||
int16_t num_samples, int16_t reported_delay_ms, int32_t skew);
|
||||
static void ProcessExtended(aecpc_t* self, const int16_t* near,
|
||||
const int16_t* near_high, int16_t* out, int16_t* out_high,
|
||||
int16_t num_samples, int16_t reported_delay_ms, int32_t skew);
|
||||
static void EstBufDelayNormal(aecpc_t* aecInst);
|
||||
static void EstBufDelayExtended(aecpc_t* aecInst);
|
||||
static int ProcessNormal(aecpc_t* self,
|
||||
const int16_t* near,
|
||||
const int16_t* near_high,
|
||||
int16_t* out,
|
||||
int16_t* out_high,
|
||||
int16_t num_samples,
|
||||
int16_t reported_delay_ms,
|
||||
int32_t skew);
|
||||
static void ProcessExtended(aecpc_t* self,
|
||||
const int16_t* near,
|
||||
const int16_t* near_high,
|
||||
int16_t* out,
|
||||
int16_t* out_high,
|
||||
int16_t num_samples,
|
||||
int16_t reported_delay_ms,
|
||||
int32_t skew);
|
||||
|
||||
int32_t WebRtcAec_Create(void **aecInst)
|
||||
{
|
||||
aecpc_t *aecpc;
|
||||
int32_t WebRtcAec_Create(void** aecInst) {
|
||||
aecpc_t* aecpc;
|
||||
if (aecInst == NULL) {
|
||||
return -1;
|
||||
}
|
||||
@ -135,8 +144,8 @@ int32_t WebRtcAec_Create(void **aecInst)
|
||||
// Create far-end pre-buffer. The buffer size has to be large enough for
|
||||
// largest possible drift compensation (kResamplerBufferSize) + "almost" an
|
||||
// FFT buffer (PART_LEN2 - 1).
|
||||
aecpc->far_pre_buf = WebRtc_CreateBuffer(PART_LEN2 + kResamplerBufferSize,
|
||||
sizeof(float));
|
||||
aecpc->far_pre_buf =
|
||||
WebRtc_CreateBuffer(PART_LEN2 + kResamplerBufferSize, sizeof(float));
|
||||
if (!aecpc->far_pre_buf) {
|
||||
WebRtcAec_Free(aecpc);
|
||||
aecpc = NULL;
|
||||
@ -147,8 +156,8 @@ int32_t WebRtcAec_Create(void **aecInst)
|
||||
aecpc->lastError = 0;
|
||||
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
aecpc->far_pre_buf_s16 = WebRtc_CreateBuffer(
|
||||
PART_LEN2 + kResamplerBufferSize, sizeof(int16_t));
|
||||
aecpc->far_pre_buf_s16 =
|
||||
WebRtc_CreateBuffer(PART_LEN2 + kResamplerBufferSize, sizeof(int16_t));
|
||||
if (!aecpc->far_pre_buf_s16) {
|
||||
WebRtcAec_Free(aecpc);
|
||||
aecpc = NULL;
|
||||
@ -169,9 +178,8 @@ int32_t WebRtcAec_Create(void **aecInst)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t WebRtcAec_Free(void *aecInst)
|
||||
{
|
||||
aecpc_t *aecpc = aecInst;
|
||||
int32_t WebRtcAec_Free(void* aecInst) {
|
||||
aecpc_t* aecpc = aecInst;
|
||||
|
||||
if (aecpc == NULL) {
|
||||
return -1;
|
||||
@ -193,9 +201,8 @@ int32_t WebRtcAec_Free(void *aecInst)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t WebRtcAec_Init(void *aecInst, int32_t sampFreq, int32_t scSampFreq)
|
||||
{
|
||||
aecpc_t *aecpc = aecInst;
|
||||
int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq) {
|
||||
aecpc_t* aecpc = aecInst;
|
||||
AecConfig aecConfig;
|
||||
|
||||
if (sampFreq != 8000 && sampFreq != 16000 && sampFreq != 32000) {
|
||||
@ -231,8 +238,7 @@ int32_t WebRtcAec_Init(void *aecInst, int32_t sampFreq, int32_t scSampFreq)
|
||||
|
||||
if (aecpc->sampFreq == 32000) {
|
||||
aecpc->splitSampFreq = 16000;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
aecpc->splitSampFreq = sampFreq;
|
||||
}
|
||||
|
||||
@ -285,12 +291,12 @@ int32_t WebRtcAec_Init(void *aecInst, int32_t sampFreq, int32_t scSampFreq)
|
||||
}
|
||||
|
||||
// only buffer L band for farend
|
||||
int32_t WebRtcAec_BufferFarend(void *aecInst, const int16_t *farend,
|
||||
int16_t nrOfSamples)
|
||||
{
|
||||
aecpc_t *aecpc = aecInst;
|
||||
int32_t WebRtcAec_BufferFarend(void* aecInst,
|
||||
const int16_t* farend,
|
||||
int16_t nrOfSamples) {
|
||||
aecpc_t* aecpc = aecInst;
|
||||
int32_t retVal = 0;
|
||||
int newNrOfSamples = (int) nrOfSamples;
|
||||
int newNrOfSamples = (int)nrOfSamples;
|
||||
short newFarend[MAX_RESAMP_LEN];
|
||||
const int16_t* farend_ptr = farend;
|
||||
float tmp_farend[MAX_RESAMP_LEN];
|
||||
@ -318,41 +324,44 @@ int32_t WebRtcAec_BufferFarend(void *aecInst, const int16_t *farend,
|
||||
|
||||
if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) {
|
||||
// Resample and get a new number of samples
|
||||
WebRtcAec_ResampleLinear(aecpc->resampler, farend, nrOfSamples, skew,
|
||||
newFarend, &newNrOfSamples);
|
||||
farend_ptr = (const int16_t*) newFarend;
|
||||
WebRtcAec_ResampleLinear(aecpc->resampler,
|
||||
farend,
|
||||
nrOfSamples,
|
||||
skew,
|
||||
newFarend,
|
||||
&newNrOfSamples);
|
||||
farend_ptr = (const int16_t*)newFarend;
|
||||
}
|
||||
|
||||
aecpc->farend_started = 1;
|
||||
WebRtcAec_SetSystemDelay(aecpc->aec, WebRtcAec_system_delay(aecpc->aec) +
|
||||
newNrOfSamples);
|
||||
WebRtcAec_SetSystemDelay(aecpc->aec,
|
||||
WebRtcAec_system_delay(aecpc->aec) + newNrOfSamples);
|
||||
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
WebRtc_WriteBuffer(aecpc->far_pre_buf_s16, farend_ptr,
|
||||
(size_t) newNrOfSamples);
|
||||
WebRtc_WriteBuffer(
|
||||
aecpc->far_pre_buf_s16, farend_ptr, (size_t)newNrOfSamples);
|
||||
#endif
|
||||
// Cast to float and write the time-domain data to |far_pre_buf|.
|
||||
for (i = 0; i < newNrOfSamples; i++) {
|
||||
tmp_farend[i] = (float) farend_ptr[i];
|
||||
tmp_farend[i] = (float)farend_ptr[i];
|
||||
}
|
||||
WebRtc_WriteBuffer(aecpc->far_pre_buf, farend_float,
|
||||
(size_t) newNrOfSamples);
|
||||
WebRtc_WriteBuffer(aecpc->far_pre_buf, farend_float, (size_t)newNrOfSamples);
|
||||
|
||||
// Transform to frequency domain if we have enough data.
|
||||
while (WebRtc_available_read(aecpc->far_pre_buf) >= PART_LEN2) {
|
||||
// We have enough data to pass to the FFT, hence read PART_LEN2 samples.
|
||||
WebRtc_ReadBuffer(aecpc->far_pre_buf, (void**) &farend_float, tmp_farend,
|
||||
PART_LEN2);
|
||||
WebRtc_ReadBuffer(
|
||||
aecpc->far_pre_buf, (void**)&farend_float, tmp_farend, PART_LEN2);
|
||||
|
||||
WebRtcAec_BufferFarendPartition(aecpc->aec, farend_float);
|
||||
|
||||
// Rewind |far_pre_buf| PART_LEN samples for overlap before continuing.
|
||||
WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN);
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
WebRtc_ReadBuffer(aecpc->far_pre_buf_s16, (void**) &farend_ptr, newFarend,
|
||||
PART_LEN2);
|
||||
WebRtc_WriteBuffer(WebRtcAec_far_time_buf(aecpc->aec),
|
||||
&farend_ptr[PART_LEN], 1);
|
||||
WebRtc_ReadBuffer(
|
||||
aecpc->far_pre_buf_s16, (void**)&farend_ptr, newFarend, PART_LEN2);
|
||||
WebRtc_WriteBuffer(
|
||||
WebRtcAec_far_time_buf(aecpc->aec), &farend_ptr[PART_LEN], 1);
|
||||
WebRtc_MoveReadPtr(aecpc->far_pre_buf_s16, -PART_LEN);
|
||||
#endif
|
||||
}
|
||||
@ -360,12 +369,15 @@ int32_t WebRtcAec_BufferFarend(void *aecInst, const int16_t *farend,
|
||||
return retVal;
|
||||
}
|
||||
|
||||
int32_t WebRtcAec_Process(void *aecInst, const int16_t *nearend,
|
||||
const int16_t *nearendH, int16_t *out, int16_t *outH,
|
||||
int16_t nrOfSamples, int16_t msInSndCardBuf,
|
||||
int32_t skew)
|
||||
{
|
||||
aecpc_t *aecpc = aecInst;
|
||||
int32_t WebRtcAec_Process(void* aecInst,
|
||||
const int16_t* nearend,
|
||||
const int16_t* nearendH,
|
||||
int16_t* out,
|
||||
int16_t* outH,
|
||||
int16_t nrOfSamples,
|
||||
int16_t msInSndCardBuf,
|
||||
int32_t skew) {
|
||||
aecpc_t* aecpc = aecInst;
|
||||
int32_t retVal = 0;
|
||||
if (nearend == NULL) {
|
||||
aecpc->lastError = AEC_NULL_POINTER_ERROR;
|
||||
@ -398,8 +410,7 @@ int32_t WebRtcAec_Process(void *aecInst, const int16_t *nearend,
|
||||
msInSndCardBuf = 0;
|
||||
aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
|
||||
retVal = -1;
|
||||
}
|
||||
else if (msInSndCardBuf > kMaxTrustedDelayMs) {
|
||||
} else if (msInSndCardBuf > kMaxTrustedDelayMs) {
|
||||
// The clamping is now done in ProcessExtended/Normal().
|
||||
aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
|
||||
retVal = -1;
|
||||
@ -407,11 +418,17 @@ int32_t WebRtcAec_Process(void *aecInst, const int16_t *nearend,
|
||||
|
||||
// This returns the value of aec->extended_filter_enabled.
|
||||
if (WebRtcAec_delay_correction_enabled(aecpc->aec)) {
|
||||
ProcessExtended(aecpc, nearend, nearendH, out, outH, nrOfSamples,
|
||||
msInSndCardBuf, skew);
|
||||
ProcessExtended(
|
||||
aecpc, nearend, nearendH, out, outH, nrOfSamples, msInSndCardBuf, skew);
|
||||
} else {
|
||||
if (ProcessNormal(aecpc, nearend, nearendH, out, outH, nrOfSamples,
|
||||
msInSndCardBuf, skew) != 0) {
|
||||
if (ProcessNormal(aecpc,
|
||||
nearend,
|
||||
nearendH,
|
||||
out,
|
||||
outH,
|
||||
nrOfSamples,
|
||||
msInSndCardBuf,
|
||||
skew) != 0) {
|
||||
retVal = -1;
|
||||
}
|
||||
}
|
||||
@ -421,8 +438,8 @@ int32_t WebRtcAec_Process(void *aecInst, const int16_t *nearend,
|
||||
int16_t far_buf_size_ms = (int16_t)(WebRtcAec_system_delay(aecpc->aec) /
|
||||
(sampMsNb * aecpc->rate_factor));
|
||||
(void)fwrite(&far_buf_size_ms, 2, 1, aecpc->bufFile);
|
||||
(void)fwrite(&aecpc->knownDelay, sizeof(aecpc->knownDelay), 1,
|
||||
aecpc->delayFile);
|
||||
(void)fwrite(
|
||||
&aecpc->knownDelay, sizeof(aecpc->knownDelay), 1, aecpc->delayFile);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -442,8 +459,9 @@ int WebRtcAec_set_config(void* handle, AecConfig config) {
|
||||
}
|
||||
self->skewMode = config.skewMode;
|
||||
|
||||
if (config.nlpMode != kAecNlpConservative && config.nlpMode != kAecNlpModerate
|
||||
&& config.nlpMode != kAecNlpAggressive) {
|
||||
if (config.nlpMode != kAecNlpConservative &&
|
||||
config.nlpMode != kAecNlpModerate &&
|
||||
config.nlpMode != kAecNlpAggressive) {
|
||||
self->lastError = AEC_BAD_PARAMETER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
@ -458,14 +476,14 @@ int WebRtcAec_set_config(void* handle, AecConfig config) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
WebRtcAec_SetConfigCore(self->aec, config.nlpMode, config.metricsMode,
|
||||
config.delay_logging);
|
||||
WebRtcAec_SetConfigCore(
|
||||
self->aec, config.nlpMode, config.metricsMode, config.delay_logging);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebRtcAec_get_echo_status(void* handle, int* status) {
|
||||
aecpc_t* self = (aecpc_t*)handle;
|
||||
if (status == NULL ) {
|
||||
if (status == NULL) {
|
||||
self->lastError = AEC_NULL_POINTER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
@ -488,10 +506,10 @@ int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) {
|
||||
Stats erle;
|
||||
Stats a_nlp;
|
||||
|
||||
if (handle == NULL ) {
|
||||
if (handle == NULL) {
|
||||
return -1;
|
||||
}
|
||||
if (metrics == NULL ) {
|
||||
if (metrics == NULL) {
|
||||
self->lastError = AEC_NULL_POINTER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
@ -503,46 +521,46 @@ int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) {
|
||||
WebRtcAec_GetEchoStats(self->aec, &erl, &erle, &a_nlp);
|
||||
|
||||
// ERL
|
||||
metrics->erl.instant = (int) erl.instant;
|
||||
metrics->erl.instant = (int)erl.instant;
|
||||
|
||||
if ((erl.himean > kOffsetLevel) && (erl.average > kOffsetLevel)) {
|
||||
// Use a mix between regular average and upper part average.
|
||||
dtmp = kUpWeight * erl.himean + (1 - kUpWeight) * erl.average;
|
||||
metrics->erl.average = (int) dtmp;
|
||||
metrics->erl.average = (int)dtmp;
|
||||
} else {
|
||||
metrics->erl.average = kOffsetLevel;
|
||||
}
|
||||
|
||||
metrics->erl.max = (int) erl.max;
|
||||
metrics->erl.max = (int)erl.max;
|
||||
|
||||
if (erl.min < (kOffsetLevel * (-1))) {
|
||||
metrics->erl.min = (int) erl.min;
|
||||
metrics->erl.min = (int)erl.min;
|
||||
} else {
|
||||
metrics->erl.min = kOffsetLevel;
|
||||
}
|
||||
|
||||
// ERLE
|
||||
metrics->erle.instant = (int) erle.instant;
|
||||
metrics->erle.instant = (int)erle.instant;
|
||||
|
||||
if ((erle.himean > kOffsetLevel) && (erle.average > kOffsetLevel)) {
|
||||
// Use a mix between regular average and upper part average.
|
||||
dtmp = kUpWeight * erle.himean + (1 - kUpWeight) * erle.average;
|
||||
metrics->erle.average = (int) dtmp;
|
||||
metrics->erle.average = (int)dtmp;
|
||||
} else {
|
||||
metrics->erle.average = kOffsetLevel;
|
||||
}
|
||||
|
||||
metrics->erle.max = (int) erle.max;
|
||||
metrics->erle.max = (int)erle.max;
|
||||
|
||||
if (erle.min < (kOffsetLevel * (-1))) {
|
||||
metrics->erle.min = (int) erle.min;
|
||||
metrics->erle.min = (int)erle.min;
|
||||
} else {
|
||||
metrics->erle.min = kOffsetLevel;
|
||||
}
|
||||
|
||||
// RERL
|
||||
if ((metrics->erl.average > kOffsetLevel)
|
||||
&& (metrics->erle.average > kOffsetLevel)) {
|
||||
if ((metrics->erl.average > kOffsetLevel) &&
|
||||
(metrics->erle.average > kOffsetLevel)) {
|
||||
stmp = metrics->erl.average + metrics->erle.average;
|
||||
} else {
|
||||
stmp = kOffsetLevel;
|
||||
@ -555,20 +573,20 @@ int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) {
|
||||
metrics->rerl.min = stmp;
|
||||
|
||||
// A_NLP
|
||||
metrics->aNlp.instant = (int) a_nlp.instant;
|
||||
metrics->aNlp.instant = (int)a_nlp.instant;
|
||||
|
||||
if ((a_nlp.himean > kOffsetLevel) && (a_nlp.average > kOffsetLevel)) {
|
||||
// Use a mix between regular average and upper part average.
|
||||
dtmp = kUpWeight * a_nlp.himean + (1 - kUpWeight) * a_nlp.average;
|
||||
metrics->aNlp.average = (int) dtmp;
|
||||
metrics->aNlp.average = (int)dtmp;
|
||||
} else {
|
||||
metrics->aNlp.average = kOffsetLevel;
|
||||
}
|
||||
|
||||
metrics->aNlp.max = (int) a_nlp.max;
|
||||
metrics->aNlp.max = (int)a_nlp.max;
|
||||
|
||||
if (a_nlp.min < (kOffsetLevel * (-1))) {
|
||||
metrics->aNlp.min = (int) a_nlp.min;
|
||||
metrics->aNlp.min = (int)a_nlp.min;
|
||||
} else {
|
||||
metrics->aNlp.min = kOffsetLevel;
|
||||
}
|
||||
@ -599,9 +617,8 @@ int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t WebRtcAec_get_error_code(void *aecInst)
|
||||
{
|
||||
aecpc_t *aecpc = aecInst;
|
||||
int32_t WebRtcAec_get_error_code(void* aecInst) {
|
||||
aecpc_t* aecpc = aecInst;
|
||||
return aecpc->lastError;
|
||||
}
|
||||
|
||||
@ -609,12 +626,16 @@ AecCore* WebRtcAec_aec_core(void* handle) {
|
||||
if (!handle) {
|
||||
return NULL;
|
||||
}
|
||||
return ((aecpc_t*) handle)->aec;
|
||||
return ((aecpc_t*)handle)->aec;
|
||||
}
|
||||
|
||||
static int ProcessNormal(aecpc_t *aecpc, const int16_t *nearend,
|
||||
const int16_t *nearendH, int16_t *out, int16_t *outH,
|
||||
int16_t nrOfSamples, int16_t msInSndCardBuf,
|
||||
static int ProcessNormal(aecpc_t* aecpc,
|
||||
const int16_t* nearend,
|
||||
const int16_t* nearendH,
|
||||
int16_t* out,
|
||||
int16_t* outH,
|
||||
int16_t nrOfSamples,
|
||||
int16_t msInSndCardBuf,
|
||||
int32_t skew) {
|
||||
int retVal = 0;
|
||||
short i;
|
||||
@ -624,8 +645,8 @@ static int ProcessNormal(aecpc_t *aecpc, const int16_t *nearend,
|
||||
const float minSkewEst = -0.5f;
|
||||
const float maxSkewEst = 1.0f;
|
||||
|
||||
msInSndCardBuf = msInSndCardBuf > kMaxTrustedDelayMs ?
|
||||
kMaxTrustedDelayMs : msInSndCardBuf;
|
||||
msInSndCardBuf =
|
||||
msInSndCardBuf > kMaxTrustedDelayMs ? kMaxTrustedDelayMs : msInSndCardBuf;
|
||||
// TODO(andrew): we need to investigate if this +10 is really wanted.
|
||||
msInSndCardBuf += 10;
|
||||
aecpc->msInSndCardBuf = msInSndCardBuf;
|
||||
@ -633,27 +654,24 @@ static int ProcessNormal(aecpc_t *aecpc, const int16_t *nearend,
|
||||
if (aecpc->skewMode == kAecTrue) {
|
||||
if (aecpc->skewFrCtr < 25) {
|
||||
aecpc->skewFrCtr++;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
retVal = WebRtcAec_GetSkew(aecpc->resampler, skew, &aecpc->skew);
|
||||
if (retVal == -1) {
|
||||
aecpc->skew = 0;
|
||||
aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
|
||||
}
|
||||
|
||||
aecpc->skew /= aecpc->sampFactor*nrOfSamples;
|
||||
aecpc->skew /= aecpc->sampFactor * nrOfSamples;
|
||||
|
||||
if (aecpc->skew < 1.0e-3 && aecpc->skew > -1.0e-3) {
|
||||
aecpc->resample = kAecFalse;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
aecpc->resample = kAecTrue;
|
||||
}
|
||||
|
||||
if (aecpc->skew < minSkewEst) {
|
||||
aecpc->skew = minSkewEst;
|
||||
}
|
||||
else if (aecpc->skew > maxSkewEst) {
|
||||
} else if (aecpc->skew > maxSkewEst) {
|
||||
aecpc->skew = maxSkewEst;
|
||||
}
|
||||
|
||||
@ -695,8 +713,7 @@ static int ProcessNormal(aecpc_t *aecpc, const int16_t *nearend,
|
||||
WEBRTC_SPL_MAX(0.2 * aecpc->msInSndCardBuf, sampMsNb)) {
|
||||
aecpc->sum += aecpc->msInSndCardBuf;
|
||||
aecpc->counter++;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
aecpc->counter = 0;
|
||||
}
|
||||
|
||||
@ -704,8 +721,9 @@ static int ProcessNormal(aecpc_t *aecpc, const int16_t *nearend,
|
||||
// The far-end buffer size is determined in partitions of
|
||||
// PART_LEN samples. Use 75% of the average value of the system
|
||||
// delay as buffer size to start with.
|
||||
aecpc->bufSizeStart = WEBRTC_SPL_MIN((3 * aecpc->sum *
|
||||
aecpc->rate_factor * 8) / (4 * aecpc->counter * PART_LEN),
|
||||
aecpc->bufSizeStart =
|
||||
WEBRTC_SPL_MIN((3 * aecpc->sum * aecpc->rate_factor * 8) /
|
||||
(4 * aecpc->counter * PART_LEN),
|
||||
kMaxBufSizeStart);
|
||||
// Buffer size has now been determined.
|
||||
aecpc->checkBuffSize = 0;
|
||||
@ -714,8 +732,9 @@ static int ProcessNormal(aecpc_t *aecpc, const int16_t *nearend,
|
||||
if (aecpc->checkBufSizeCtr * nBlocks10ms > 50) {
|
||||
// For really bad systems, don't disable the echo canceller for
|
||||
// more than 0.5 sec.
|
||||
aecpc->bufSizeStart = WEBRTC_SPL_MIN((aecpc->msInSndCardBuf *
|
||||
aecpc->rate_factor * 3) / 40, kMaxBufSizeStart);
|
||||
aecpc->bufSizeStart = WEBRTC_SPL_MIN(
|
||||
(aecpc->msInSndCardBuf * aecpc->rate_factor * 3) / 40,
|
||||
kMaxBufSizeStart);
|
||||
aecpc->checkBuffSize = 0;
|
||||
}
|
||||
}
|
||||
@ -765,9 +784,14 @@ static int ProcessNormal(aecpc_t *aecpc, const int16_t *nearend,
|
||||
return retVal;
|
||||
}
|
||||
|
||||
static void ProcessExtended(aecpc_t* self, const int16_t* near,
|
||||
const int16_t* near_high, int16_t* out, int16_t* out_high,
|
||||
int16_t num_samples, int16_t reported_delay_ms, int32_t skew) {
|
||||
static void ProcessExtended(aecpc_t* self,
|
||||
const int16_t* near,
|
||||
const int16_t* near_high,
|
||||
int16_t* out,
|
||||
int16_t* out_high,
|
||||
int16_t num_samples,
|
||||
int16_t reported_delay_ms,
|
||||
int32_t skew) {
|
||||
int i;
|
||||
const int num_frames = num_samples / FRAME_LEN;
|
||||
#if defined(WEBRTC_UNTRUSTED_DELAY)
|
||||
@ -779,14 +803,16 @@ static void ProcessExtended(aecpc_t* self, const int16_t* near,
|
||||
// Due to the longer filter, we no longer add 10 ms to the reported delay
|
||||
// to reduce chance of non-causality. Instead we apply a minimum here to avoid
|
||||
// issues with the read pointer jumping around needlessly.
|
||||
reported_delay_ms = reported_delay_ms < kMinTrustedDelayMs ?
|
||||
kMinTrustedDelayMs : reported_delay_ms;
|
||||
reported_delay_ms = reported_delay_ms < kMinTrustedDelayMs
|
||||
? kMinTrustedDelayMs
|
||||
: reported_delay_ms;
|
||||
// If the reported delay appears to be bogus, we attempt to recover by using
|
||||
// the measured fixed delay values. We use >= here because higher layers
|
||||
// may already clamp to this maximum value, and we would otherwise not
|
||||
// detect it here.
|
||||
reported_delay_ms = reported_delay_ms >= kMaxTrustedDelayMs ?
|
||||
kFixedDelayMs : reported_delay_ms;
|
||||
reported_delay_ms = reported_delay_ms >= kMaxTrustedDelayMs
|
||||
? kFixedDelayMs
|
||||
: reported_delay_ms;
|
||||
#endif
|
||||
self->msInSndCardBuf = reported_delay_ms;
|
||||
|
||||
@ -805,10 +831,11 @@ static void ProcessExtended(aecpc_t* self, const int16_t* near,
|
||||
// action on the first frame. In the trusted delay case, we'll take the
|
||||
// current reported delay, unless it's less then our conservative
|
||||
// measurement.
|
||||
int startup_size_ms = reported_delay_ms < kFixedDelayMs ?
|
||||
kFixedDelayMs : reported_delay_ms;
|
||||
int startup_size_ms =
|
||||
reported_delay_ms < kFixedDelayMs ? kFixedDelayMs : reported_delay_ms;
|
||||
int overhead_elements = (WebRtcAec_system_delay(self->aec) -
|
||||
startup_size_ms / 2 * self->rate_factor * 8) / PART_LEN;
|
||||
startup_size_ms / 2 * self->rate_factor * 8) /
|
||||
PART_LEN;
|
||||
WebRtcAec_MoveFarReadPtr(self->aec, overhead_elements);
|
||||
self->startup_phase = 0;
|
||||
}
|
||||
@ -823,9 +850,12 @@ static void ProcessExtended(aecpc_t* self, const int16_t* near,
|
||||
WEBRTC_SPL_MAX(0, self->knownDelay + delay_diff_offset);
|
||||
|
||||
for (i = 0; i < num_frames; ++i) {
|
||||
WebRtcAec_ProcessFrame(self->aec, &near[FRAME_LEN * i],
|
||||
&near_high[FRAME_LEN * i], adjusted_known_delay,
|
||||
&out[FRAME_LEN * i], &out_high[FRAME_LEN * i]);
|
||||
WebRtcAec_ProcessFrame(self->aec,
|
||||
&near[FRAME_LEN * i],
|
||||
&near_high[FRAME_LEN * i],
|
||||
adjusted_known_delay,
|
||||
&out[FRAME_LEN * i],
|
||||
&out_high[FRAME_LEN * i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -857,8 +887,8 @@ static void EstBufDelayNormal(aecpc_t* aecpc) {
|
||||
// We use -1 to signal an initialized state in the "extended" implementation;
|
||||
// compensate for that.
|
||||
aecpc->filtDelay = aecpc->filtDelay < 0 ? 0 : aecpc->filtDelay;
|
||||
aecpc->filtDelay = WEBRTC_SPL_MAX(0, (short) (0.8 * aecpc->filtDelay +
|
||||
0.2 * current_delay));
|
||||
aecpc->filtDelay =
|
||||
WEBRTC_SPL_MAX(0, (short)(0.8 * aecpc->filtDelay + 0.2 * current_delay));
|
||||
|
||||
delay_difference = aecpc->filtDelay - aecpc->knownDelay;
|
||||
if (delay_difference > 224) {
|
||||
@ -879,7 +909,7 @@ static void EstBufDelayNormal(aecpc_t* aecpc) {
|
||||
aecpc->lastDelayDiff = delay_difference;
|
||||
|
||||
if (aecpc->timeForDelayChange > 25) {
|
||||
aecpc->knownDelay = WEBRTC_SPL_MAX((int) aecpc->filtDelay - 160, 0);
|
||||
aecpc->knownDelay = WEBRTC_SPL_MAX((int)aecpc->filtDelay - 160, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -910,8 +940,8 @@ static void EstBufDelayExtended(aecpc_t* self) {
|
||||
if (self->filtDelay == -1) {
|
||||
self->filtDelay = WEBRTC_SPL_MAX(0, 0.5 * current_delay);
|
||||
} else {
|
||||
self->filtDelay = WEBRTC_SPL_MAX(0, (short) (0.95 * self->filtDelay +
|
||||
0.05 * current_delay));
|
||||
self->filtDelay = WEBRTC_SPL_MAX(
|
||||
0, (short)(0.95 * self->filtDelay + 0.05 * current_delay));
|
||||
}
|
||||
|
||||
delay_difference = self->filtDelay - self->knownDelay;
|
||||
@ -933,6 +963,6 @@ static void EstBufDelayExtended(aecpc_t* self) {
|
||||
self->lastDelayDiff = delay_difference;
|
||||
|
||||
if (self->timeForDelayChange > 25) {
|
||||
self->knownDelay = WEBRTC_SPL_MAX((int) self->filtDelay - 256, 0);
|
||||
self->knownDelay = WEBRTC_SPL_MAX((int)self->filtDelay - 256, 0);
|
||||
}
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ typedef struct {
|
||||
int16_t skewMode; // default kAecFalse
|
||||
int16_t metricsMode; // default kAecFalse
|
||||
int delay_logging; // default kAecFalse
|
||||
//float realSkew;
|
||||
// float realSkew;
|
||||
} AecConfig;
|
||||
|
||||
typedef struct {
|
||||
@ -76,7 +76,7 @@ extern "C" {
|
||||
* int32_t return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int32_t WebRtcAec_Create(void **aecInst);
|
||||
int32_t WebRtcAec_Create(void** aecInst);
|
||||
|
||||
/*
|
||||
* This function releases the memory allocated by WebRtcAec_Create().
|
||||
@ -90,7 +90,7 @@ int32_t WebRtcAec_Create(void **aecInst);
|
||||
* int32_t return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int32_t WebRtcAec_Free(void *aecInst);
|
||||
int32_t WebRtcAec_Free(void* aecInst);
|
||||
|
||||
/*
|
||||
* Initializes an AEC instance.
|
||||
@ -106,7 +106,7 @@ int32_t WebRtcAec_Free(void *aecInst);
|
||||
* int32_t return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int32_t WebRtcAec_Init(void *aecInst, int32_t sampFreq, int32_t scSampFreq);
|
||||
int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq);
|
||||
|
||||
/*
|
||||
* Inserts an 80 or 160 sample block of data into the farend buffer.
|
||||
@ -123,8 +123,8 @@ int32_t WebRtcAec_Init(void *aecInst, int32_t sampFreq, int32_t scSampFreq);
|
||||
* int32_t return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int32_t WebRtcAec_BufferFarend(void *aecInst,
|
||||
const int16_t *farend,
|
||||
int32_t WebRtcAec_BufferFarend(void* aecInst,
|
||||
const int16_t* farend,
|
||||
int16_t nrOfSamples);
|
||||
|
||||
/*
|
||||
@ -153,11 +153,11 @@ int32_t WebRtcAec_BufferFarend(void *aecInst,
|
||||
* int32_t return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int32_t WebRtcAec_Process(void *aecInst,
|
||||
const int16_t *nearend,
|
||||
const int16_t *nearendH,
|
||||
int16_t *out,
|
||||
int16_t *outH,
|
||||
int32_t WebRtcAec_Process(void* aecInst,
|
||||
const int16_t* nearend,
|
||||
const int16_t* nearendH,
|
||||
int16_t* out,
|
||||
int16_t* outH,
|
||||
int16_t nrOfSamples,
|
||||
int16_t msInSndCardBuf,
|
||||
int32_t skew);
|
||||
@ -238,7 +238,7 @@ int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std);
|
||||
* -------------------------------------------------------------------
|
||||
* int32_t return 11000-11100: error code
|
||||
*/
|
||||
int32_t WebRtcAec_get_error_code(void *aecInst);
|
||||
int32_t WebRtcAec_get_error_code(void* aecInst);
|
||||
|
||||
// Returns a pointer to the low level AEC handle.
|
||||
//
|
||||
|
@ -52,9 +52,7 @@ class SystemDelayTest : public ::testing::Test {
|
||||
};
|
||||
|
||||
SystemDelayTest::SystemDelayTest()
|
||||
: handle_(NULL),
|
||||
self_(NULL),
|
||||
samples_per_frame_(0) {
|
||||
: handle_(NULL), self_(NULL), samples_per_frame_(0) {
|
||||
// Dummy input data are set with more or less arbitrary non-zero values.
|
||||
memset(far_, 1, sizeof(far_));
|
||||
memset(near_, 2, sizeof(near_));
|
||||
@ -74,7 +72,7 @@ void SystemDelayTest::TearDown() {
|
||||
|
||||
// In SWB mode nothing is added to the buffer handling with respect to
|
||||
// functionality compared to WB. We therefore only verify behavior in NB and WB.
|
||||
static const int kSampleRateHz[] = { 8000, 16000 };
|
||||
static const int kSampleRateHz[] = {8000, 16000};
|
||||
static const size_t kNumSampleRates =
|
||||
sizeof(kSampleRateHz) / sizeof(*kSampleRateHz);
|
||||
|
||||
@ -100,8 +98,15 @@ void SystemDelayTest::Init(int sample_rate_hz) {
|
||||
|
||||
void SystemDelayTest::RenderAndCapture(int device_buffer_ms) {
|
||||
EXPECT_EQ(0, WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
|
||||
EXPECT_EQ(0, WebRtcAec_Process(handle_, near_, NULL, out_, NULL,
|
||||
samples_per_frame_, device_buffer_ms, 0));
|
||||
EXPECT_EQ(0,
|
||||
WebRtcAec_Process(handle_,
|
||||
near_,
|
||||
NULL,
|
||||
out_,
|
||||
NULL,
|
||||
samples_per_frame_,
|
||||
device_buffer_ms,
|
||||
0));
|
||||
}
|
||||
|
||||
int SystemDelayTest::BufferFillUp() {
|
||||
@ -254,8 +259,15 @@ TEST_F(SystemDelayTest, CorrectDelayAfterStableBufferBuildUp) {
|
||||
// can make that assumption since we have a separate stability test.
|
||||
int process_time_ms = 0;
|
||||
for (; process_time_ms < kStableConvergenceMs; process_time_ms += 10) {
|
||||
EXPECT_EQ(0, WebRtcAec_Process(handle_, near_, NULL, out_, NULL,
|
||||
samples_per_frame_, kDeviceBufMs, 0));
|
||||
EXPECT_EQ(0,
|
||||
WebRtcAec_Process(handle_,
|
||||
near_,
|
||||
NULL,
|
||||
out_,
|
||||
NULL,
|
||||
samples_per_frame_,
|
||||
kDeviceBufMs,
|
||||
0));
|
||||
}
|
||||
// Verify that a buffer size has been established.
|
||||
EXPECT_EQ(0, self_->checkBuffSize);
|
||||
@ -301,8 +313,15 @@ TEST_F(SystemDelayTest, CorrectDelayWhenBufferUnderrun) {
|
||||
// |kStableConvergenceMs| in the buffer. Keep on calling Process() until
|
||||
// we run out of data and verify that the system delay is non-negative.
|
||||
for (int j = 0; j <= kStableConvergenceMs; j += 10) {
|
||||
EXPECT_EQ(0, WebRtcAec_Process(handle_, near_, NULL, out_, NULL,
|
||||
samples_per_frame_, kDeviceBufMs, 0));
|
||||
EXPECT_EQ(0,
|
||||
WebRtcAec_Process(handle_,
|
||||
near_,
|
||||
NULL,
|
||||
out_,
|
||||
NULL,
|
||||
samples_per_frame_,
|
||||
kDeviceBufMs,
|
||||
0));
|
||||
EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user