Merge AEC changes.

R=bjornv@webrtc.org
BUG=

Review URL: https://webrtc-codereview.appspot.com/34459004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@7877 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
pbos@webrtc.org 2014-12-11 13:46:59 +00:00
parent 2b19f06312
commit 5f162c8509
3 changed files with 154 additions and 20 deletions

View File

@ -97,6 +97,10 @@ ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65] = {
1.9354f, 1.9437f, 1.9520f, 1.9601f, 1.9682f, 1.9763f, 1.9843f, 1.9922f, 1.9354f, 1.9437f, 1.9520f, 1.9601f, 1.9682f, 1.9763f, 1.9843f, 1.9922f,
2.0000f}; 2.0000f};
// TODO(bjornv): These parameters will be tuned.
static const float kDelayQualityThresholdMax = 0.07f;
static const int kInitialShiftOffset = 5;
// Target suppression levels for nlp modes. // Target suppression levels for nlp modes.
// log{0.001, 0.00001, 0.00000001} // log{0.001, 0.00001, 0.00000001}
static const float kTargetSupp[3] = {-6.9f, -11.5f, -18.4f}; static const float kTargetSupp[3] = {-6.9f, -11.5f, -18.4f};
@ -790,6 +794,61 @@ static void TimeToFrequency(float time_data[PART_LEN2],
} }
} }
static int SignalBasedDelayCorrection(AecCore* self) {
int delay_correction = 0;
int last_delay = -2;
assert(self != NULL);
// 1. Check for non-negative delay estimate. Note that the estimates we get
// from the delay estimation are not compensated for lookahead. Hence, a
// negative |last_delay| is an invalid one.
// 2. Verify that there is a delay change. In addition, only allow a change
// if the delay is outside a certain region taking the AEC filter length
// into account.
// TODO(bjornv): Investigate if we can remove the non-zero delay change check.
// 3. Only allow delay correction if the delay estimation quality exceeds
// |delay_quality_threshold|.
// 4. Finally, verify that the proposed |delay_correction| is feasible by
// comparing with the size of the far-end buffer.
last_delay = WebRtc_last_delay(self->delay_estimator);
if ((last_delay >= 0) &&
(last_delay != self->previous_delay) &&
(WebRtc_last_delay_quality(self->delay_estimator) >
self->delay_quality_threshold)) {
int delay = last_delay - WebRtc_lookahead(self->delay_estimator);
// Allow for a slack in the actual delay. The adaptive echo cancellation
// filter is currently |num_partitions| (of 64 samples) long. If the
// delay estimate indicates a delay of at least one quarter of the filter
// length we open up for correction.
if (delay <= 0 || delay > (self->num_partitions / 4)) {
int available_read = (int)WebRtc_available_read(self->far_buf);
// Adjust w.r.t. a |shift_offset| to account for not as reliable estimates
// in the beginning, hence we are more conservative.
delay_correction = -(delay - self->shift_offset);
self->shift_offset--;
self->shift_offset = (self->shift_offset <= 1 ? 1 : self->shift_offset);
if (delay_correction > available_read - self->mult - 1) {
// There is not enough data in the buffer to perform this shift. Hence,
// we do not rely on the delay estimate and do nothing.
delay_correction = 0;
} else {
self->previous_delay = last_delay;
++self->delay_correction_count;
}
}
}
// Update the |delay_quality_threshold| once we have our first delay
// correction.
if (self->delay_correction_count > 0) {
float delay_quality = WebRtc_last_delay_quality(self->delay_estimator);
delay_quality = (delay_quality > kDelayQualityThresholdMax ?
kDelayQualityThresholdMax : delay_quality);
self->delay_quality_threshold =
(delay_quality > self->delay_quality_threshold ? delay_quality :
self->delay_quality_threshold);
}
return delay_correction;
}
static void NonLinearProcessing(AecCore* aec, float* output, float* outputH) { static void NonLinearProcessing(AecCore* aec, float* output, float* outputH) {
float efw[2][PART_LEN1], xfw[2][PART_LEN1]; float efw[2][PART_LEN1], xfw[2][PART_LEN1];
complex_t comfortNoiseHband[PART_LEN1]; complex_t comfortNoiseHband[PART_LEN1];
@ -1286,13 +1345,22 @@ int WebRtcAec_CreateAec(AecCore** aecInst) {
aec = NULL; aec = NULL;
return -1; return -1;
} }
// We create the delay_estimator with the same amount of maximum lookahead as
// the delay history size (kHistorySizeBlocks) for symmetry reasons.
aec->delay_estimator = WebRtc_CreateDelayEstimator( aec->delay_estimator = WebRtc_CreateDelayEstimator(
aec->delay_estimator_farend, kLookaheadBlocks); aec->delay_estimator_farend, kHistorySizeBlocks);
if (aec->delay_estimator == NULL) { if (aec->delay_estimator == NULL) {
WebRtcAec_FreeAec(aec); WebRtcAec_FreeAec(aec);
aec = NULL; aec = NULL;
return -1; return -1;
} }
#ifdef WEBRTC_ANDROID
// DA-AEC assumes the system is causal from the beginning and will self adjust
// the lookahead when shifting is required.
WebRtc_set_lookahead(aec->delay_estimator, 0);
#else
WebRtc_set_lookahead(aec->delay_estimator, kLookaheadBlocks);
#endif
// Assembly optimization // Assembly optimization
WebRtcAec_FilterFar = FilterFar; WebRtcAec_FilterFar = FilterFar;
@ -1356,7 +1424,7 @@ static void ReopenWav(rtc_WavWriter** wav_file,
int seq1, int seq1,
int seq2, int seq2,
int sample_rate) { int sample_rate) {
int written ATTRIBUTE_UNUSED; int written UNUSED;
char filename[64]; char filename[64];
if (*wav_file) { if (*wav_file) {
if (rtc_WavSampleRate(*wav_file) == sample_rate) if (rtc_WavSampleRate(*wav_file) == sample_rate)
@ -1435,7 +1503,17 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq) {
aec->delay_logging_enabled = 0; aec->delay_logging_enabled = 0;
memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram)); memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram));
aec->signal_delay_correction = 0;
aec->previous_delay = -2; // (-2): Uninitialized.
aec->delay_correction_count = 0;
aec->shift_offset = kInitialShiftOffset;
aec->delay_quality_threshold = 0;
#ifdef WEBRTC_ANDROID
aec->reported_delay_enabled = 0; // Disabled by default.
#else
aec->reported_delay_enabled = 1; aec->reported_delay_enabled = 1;
#endif
aec->extended_filter_enabled = 0; aec->extended_filter_enabled = 0;
aec->num_partitions = kNormalNumPartitions; aec->num_partitions = kNormalNumPartitions;
@ -1570,7 +1648,9 @@ void WebRtcAec_ProcessFrame(AecCore* aec,
// For each frame the process is as follows: // For each frame the process is as follows:
// 1) If the system_delay indicates on being too small for processing a // 1) If the system_delay indicates on being too small for processing a
// frame we stuff the buffer with enough data for 10 ms. // frame we stuff the buffer with enough data for 10 ms.
// 2) Adjust the buffer to the system delay, by moving the read pointer. // 2 a) Adjust the buffer to the system delay, by moving the read pointer.
// b) Apply signal based delay correction, if we have detected poor AEC
// performance.
// 3) TODO(bjornv): Investigate if we need to add this: // 3) TODO(bjornv): Investigate if we need to add this:
// If we can't move read pointer due to buffer size limitations we // If we can't move read pointer due to buffer size limitations we
// flush/stuff the buffer. // flush/stuff the buffer.
@ -1581,14 +1661,16 @@ void WebRtcAec_ProcessFrame(AecCore* aec,
// amount of data we input and output in audio_processing. // amount of data we input and output in audio_processing.
// 6) Update the outputs. // 6) Update the outputs.
// TODO(bjornv): Investigate how we should round the delay difference; right // The AEC has two different delay estimation algorithms built in. The
// now we know that incoming |knownDelay| is underestimated when it's less // first relies on delay input values from the user and the amount of
// than |aec->knownDelay|. We therefore, round (-32) in that direction. In // shifted buffer elements is controlled by |knownDelay|. This delay will
// the other direction, we don't have this situation, but might flush one // give a guess on how much we need to shift far-end buffers to align with
// partition too little. This can cause non-causality, which should be // the near-end signal. The other delay estimation algorithm uses the
// investigated. Maybe, allow for a non-symmetric rounding, like -16. // far- and near-end signals to find the offset between them. This one
int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN; // (called "signal delay") is then used to fine tune the alignment, or
int moved_elements = 0; // simply compensate for errors in the system based one.
// Note that the two algorithms operate independently. Currently, we only
// allow one algorithm to be turned on.
// TODO(bjornv): Change the near-end buffer handling to be the same as for // TODO(bjornv): Change the near-end buffer handling to be the same as for
// far-end, that is, with a near_pre_buf. // far-end, that is, with a near_pre_buf.
@ -1607,13 +1689,53 @@ void WebRtcAec_ProcessFrame(AecCore* aec,
WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1)); WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1));
} }
// 2) Compensate for a possible change in the system delay. if (aec->reported_delay_enabled) {
WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements); // 2 a) Compensate for a possible change in the system delay.
moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements);
aec->knownDelay -= moved_elements * PART_LEN; // TODO(bjornv): Investigate how we should round the delay difference; right
// now we know that incoming |knownDelay| is underestimated when it's less
// than |aec->knownDelay|. We therefore, round (-32) in that direction. In
// the other direction, we don't have this situation, but might flush one
// partition too little. This can cause non-causality, which should be
// investigated. Maybe, allow for a non-symmetric rounding, like -16.
int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN;
int moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements);
WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements);
aec->knownDelay -= moved_elements * PART_LEN;
#ifdef WEBRTC_AEC_DEBUG_DUMP #ifdef WEBRTC_AEC_DEBUG_DUMP
WebRtc_MoveReadPtr(aec->far_time_buf, move_elements); WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
#endif #endif
} else {
// 2 b) Apply signal based delay correction.
int move_elements = SignalBasedDelayCorrection(aec);
int moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements);
WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements);
#ifdef WEBRTC_AEC_DEBUG_DUMP
WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
#endif
WebRtc_SoftResetDelayEstimator(aec->delay_estimator, moved_elements);
WebRtc_SoftResetDelayEstimatorFarend(aec->delay_estimator_farend,
moved_elements);
aec->signal_delay_correction += moved_elements;
// TODO(bjornv): Investigate if this is reasonable. I had to add this
// guard when the signal based delay correction replaces the system based
// one. Otherwise there was a buffer underrun in the "qa-new/01/" recording
// when adding 44 ms extra delay. This was not seen if we kept both delay
// correction algorithms running in parallel.
// A first investigation showed that we have a drift in this case that
// causes the buffer underrun. Compared to when delay correction was
// turned off, we get buffer underrun as well which was triggered in 1)
// above. In addition there was a shift in |knownDelay| later increasing
// the buffer. When running in parallel, this if statement was not
// triggered. This suggests two alternatives; (a) use both algorithms, or
// (b) allow for smaller delay corrections when we operate close to the
// buffer limit. At the time of testing we required a change of 6 blocks,
// but could change it to, e.g., 2 blocks. It requires some testing though.
if ((int)WebRtc_available_read(aec->far_buf) < (aec->mult + 1)) {
// We don't have enough data so we stuff the far-end buffers.
WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1));
}
}
// 4) Process as many blocks as possible. // 4) Process as many blocks as possible.
while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) { while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) {
@ -1681,7 +1803,7 @@ int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std) {
} }
} }
// Account for lookahead. // Account for lookahead.
*median = (my_median - kLookaheadBlocks) * kMsPerBlock; *median = (my_median - WebRtc_lookahead(self->delay_estimator)) * kMsPerBlock;
// Calculate the L1 norm, with median value as central moment. // Calculate the L1 norm, with median value as central moment.
for (i = 0; i < kHistorySizeBlocks; i++) { for (i = 0; i < kHistorySizeBlocks; i++) {
@ -1754,4 +1876,3 @@ void WebRtcAec_SetSystemDelay(AecCore* self, int delay) {
assert(delay >= 0); assert(delay >= 0);
self->system_delay = delay; self->system_delay = delay;
} }

View File

@ -32,7 +32,12 @@ enum {
kLookaheadBlocks = 15 kLookaheadBlocks = 15
}; };
enum { enum {
#ifdef WEBRTC_ANDROID
// 500 ms for 16 kHz which is equivalent with the limit of reported delays.
kHistorySizeBlocks = 125
#else
kHistorySizeBlocks = kMaxDelayBlocks + kLookaheadBlocks kHistorySizeBlocks = kMaxDelayBlocks + kLookaheadBlocks
#endif
}; };
// Extended filter adaptation parameters. // Extended filter adaptation parameters.
@ -130,8 +135,17 @@ struct AecCore {
int delay_logging_enabled; int delay_logging_enabled;
void* delay_estimator_farend; void* delay_estimator_farend;
void* delay_estimator; void* delay_estimator;
// Variables associated with delay correction through signal based delay
// estimation feedback.
int signal_delay_correction;
int previous_delay;
int delay_correction_count;
int shift_offset;
float delay_quality_threshold;
int reported_delay_enabled; // 0 = disabled, otherwise enabled. // 0 = reported delay mode disabled (signal based delay correction enabled).
// otherwise enabled
int reported_delay_enabled;
// 1 = extended filter mode enabled, 0 = disabled. // 1 = extended filter mode enabled, 0 = disabled.
int extended_filter_enabled; int extended_filter_enabled;
// Runtime selection of number of filter partitions. // Runtime selection of number of filter partitions.

View File

@ -120,7 +120,6 @@ static float32x4_t vsqrtq_f32(float32x4_t s) {
// sqrt(s) = s * 1/sqrt(s) // sqrt(s) = s * 1/sqrt(s)
return vmulq_f32(s, x);; return vmulq_f32(s, x);;
} }
#endif // WEBRTC_ARCH_ARM64_NEON #endif // WEBRTC_ARCH_ARM64_NEON
static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) { static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {