From 5c64508b03d3fd177201769c32fb2d002c66927e Mon Sep 17 00:00:00 2001 From: "bjornv@webrtc.org" Date: Mon, 16 Dec 2013 10:57:53 +0000 Subject: [PATCH] Adds robust validation functionality to the delay estimator Evaluated over a 51 recordings: False positives went from 4.4% to 0.7% Missed detections unchanged at 0.8% No increase in complexity, but need to re-evaluate that. TESTED=trybots, unittests, verified against Matlab implementation BUG=None R=aluebs@webrtc.org, andrew@webrtc.org Review URL: https://webrtc-codereview.appspot.com/5419004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@5296 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../utility/delay_estimator.c | 262 +++++++++++++++++- .../utility/delay_estimator.h | 5 + 2 files changed, 264 insertions(+), 3 deletions(-) diff --git a/webrtc/modules/audio_processing/utility/delay_estimator.c b/webrtc/modules/audio_processing/utility/delay_estimator.c index 062874dc2..3b7412a1b 100644 --- a/webrtc/modules/audio_processing/utility/delay_estimator.c +++ b/webrtc/modules/audio_processing/utility/delay_estimator.c @@ -23,6 +23,22 @@ static const int32_t kProbabilityOffset = 1024; // 2 in Q9. static const int32_t kProbabilityLowerLimit = 8704; // 17 in Q9. static const int32_t kProbabilityMinSpread = 2816; // 5.5 in Q9. +// Robust validation settings +static const float kHistogramMax = 3000.f; +static const float kLastHistogramMax = 250.f; +static const float kMinHistogramThreshold = 1.5f; +static const int kMinRequiredHits = 10; +static const int kMaxHitsWhenPossiblyNonCausal = 10; +static const int kMaxHitsWhenPossiblyCausal = 1000; +// TODO(bjornv): Make kMaxDelayDifference a configurable parameter, since it +// corresponds to the filter length if the delay estimation is used in echo +// control. +static const int kMaxDelayDifference = 32; +static const float kQ14Scaling = 1.f / (1 << 14); // Scaling by 2^14 to get Q0. +static const float kFractionSlope = 0.05f; +static const float kMinFractionWhenPossiblyCausal = 0.5f; +static const float kMinFractionWhenPossiblyNonCausal = 0.25f; + // Counts and returns number of bits of a 32-bit word. static int BitCount(uint32_t u32) { uint32_t tmp = u32 - ((u32 >> 1) & 033333333333) - @@ -59,6 +75,219 @@ static void BitCountComparison(uint32_t binary_vector, } } +// Collects necessary statistics for the HistogramBasedValidation(). This +// function has to be called prior to calling HistogramBasedValidation(). The +// statistics updated and used by the HistogramBasedValidation() are: +// 1. the number of |candidate_hits|, which states for how long we have had the +// same |candidate_delay| +// 2. the |histogram| of candidate delays over time. This histogram is +// weighted with respect to a reliability measure and time-varying to cope +// with possible delay shifts. +// For further description see commented code. +// +// Inputs: +// - candidate_delay : The delay to validate. +// - valley_depth_q14 : The cost function has a valley/minimum at the +// |candidate_delay| location. |valley_depth_q14| is the +// cost function difference between the minimum and +// maximum locations. The value is in the Q14 domain. +// - valley_level_q14 : Is the cost function value at the minimum, in Q14. +static void UpdateRobustValidationStatistics(BinaryDelayEstimator* self, + int candidate_delay, + int32_t valley_depth_q14, + int32_t valley_level_q14) { + const float valley_depth = valley_depth_q14 * kQ14Scaling; + float decrease_in_last_set = valley_depth; + const int max_hits_for_slow_change = (candidate_delay < self->last_delay) ? + kMaxHitsWhenPossiblyNonCausal : kMaxHitsWhenPossiblyCausal; + int i = 0; + + // Reset |candidate_hits| if we have a new candidate. + if (candidate_delay != self->last_candidate_delay) { + self->candidate_hits = 0; + self->last_candidate_delay = candidate_delay; + } + self->candidate_hits++; + + // The |histogram| is updated differently across the bins. + // 1. The |candidate_delay| histogram bin is increased with the + // |valley_depth|, which is a simple measure of how reliable the + // |candidate_delay| is. The histogram is not increased above + // |kHistogramMax|. + self->histogram[candidate_delay] += valley_depth; + if (self->histogram[candidate_delay] > kHistogramMax) { + self->histogram[candidate_delay] = kHistogramMax; + } + // 2. The histogram bins in the neighborhood of |candidate_delay| are + // unaffected. The neighborhood is defined as x + {-2, -1, 0, 1}. + // 3. The histogram bins in the neighborhood of |last_delay| are decreased + // with |decrease_in_last_set|. This value equals the difference between + // the cost function values at the locations |candidate_delay| and + // |last_delay| until we reach |max_hits_for_slow_change| consecutive hits + // at the |candidate_delay|. If we exceed this amount of hits the + // |candidate_delay| is a "potential" candidate and we start decreasing + // these histogram bins more rapidly with |valley_depth|. + if (self->candidate_hits < max_hits_for_slow_change) { + decrease_in_last_set = (self->mean_bit_counts[self->compare_delay] - + valley_level_q14) * kQ14Scaling; + } + // 4. All other bins are decreased with |valley_depth|. + // TODO(bjornv): Investigate how to make this loop more efficient. Split up + // the loop? Remove parts that doesn't add too much. + for (i = 0; i < self->farend->history_size; ++i) { + int is_in_last_set = (i >= self->last_delay - 2) && + (i <= self->last_delay + 1) && (i != candidate_delay); + int is_in_candidate_set = (i >= candidate_delay - 2) && + (i <= candidate_delay + 1); + self->histogram[i] -= decrease_in_last_set * is_in_last_set + + valley_depth * (!is_in_last_set && !is_in_candidate_set); + // 5. No histogram bin can go below 0. + if (self->histogram[i] < 0) { + self->histogram[i] = 0; + } + } +} + +// Validates the |candidate_delay|, estimated in WebRtc_ProcessBinarySpectrum(), +// based on a mix of counting concurring hits with a modified histogram +// of recent delay estimates. In brief a candidate is valid (returns 1) if it +// is the most likely according to the histogram. There are a couple of +// exceptions that are worth mentioning: +// 1. If the |candidate_delay| < |last_delay| it can be that we are in a +// non-causal state, breaking a possible echo control algorithm. Hence, we +// open up for a quicker change by allowing the change even if the +// |candidate_delay| is not the most likely one according to the histogram. +// 2. There's a minimum number of hits (kMinRequiredHits) and the histogram +// value has to reached a minimum (kMinHistogramThreshold) to be valid. +// 3. The action is also depending on the filter length used for echo control. +// If the delay difference is larger than what the filter can capture, we +// also move quicker towards a change. +// For further description see commented code. +// +// Input: +// - candidate_delay : The delay to validate. +// +// Return value: +// - is_histogram_valid : 1 - The |candidate_delay| is valid. +// 0 - Otherwise. +static int HistogramBasedValidation(const BinaryDelayEstimator* self, + int candidate_delay) { + float fraction = 1.f; + float histogram_threshold = self->histogram[self->compare_delay]; + const int delay_difference = candidate_delay - self->last_delay; + int is_histogram_valid = 0; + + // The histogram based validation of |candidate_delay| is done by comparing + // the |histogram| at bin |candidate_delay| with a |histogram_threshold|. + // This |histogram_threshold| equals a |fraction| of the |histogram| at bin + // |last_delay|. The |fraction| is a piecewise linear function of the + // |delay_difference| between the |candidate_delay| and the |last_delay| + // allowing for a quicker move if + // i) a potential echo control filter can not handle these large differences. + // ii) keeping |last_delay| instead of updating to |candidate_delay| could + // force an echo control into a non-causal state. + // We further require the histogram to have reached a minimum value of + // |kMinHistogramThreshold|. In addition, we also require the number of + // |candidate_hits| to be more than |kMinRequiredHits| to remove spurious + // values. + + // Calculate a comparison histogram value (|histogram_threshold|) that is + // depending on the distance between the |candidate_delay| and |last_delay|. + // TODO(bjornv): How much can we gain by turning the fraction calculation + // into tables? + if (delay_difference >= kMaxDelayDifference) { + fraction = 1.f - kFractionSlope * (delay_difference - kMaxDelayDifference); + fraction = (fraction > kMinFractionWhenPossiblyCausal ? fraction : + kMinFractionWhenPossiblyCausal); + } else if (delay_difference < 0) { + fraction = kMinFractionWhenPossiblyNonCausal - + kFractionSlope * delay_difference; + fraction = (fraction > 1.f ? 1.f : fraction); + } + histogram_threshold *= fraction; + histogram_threshold = (histogram_threshold > kMinHistogramThreshold ? + histogram_threshold : kMinHistogramThreshold); + + is_histogram_valid = + (self->histogram[candidate_delay] >= histogram_threshold) && + (self->candidate_hits > kMinRequiredHits); + + return is_histogram_valid; +} + +// Performs a robust validation of the |candidate_delay| estimated in +// WebRtc_ProcessBinarySpectrum(). The algorithm takes the +// |is_instantaneous_valid| and the |is_histogram_valid| and combines them +// into a robust validation. The HistogramBasedValidation() has to be called +// prior to this call. +// For further description on how the combination is done, see commented code. +// +// Inputs: +// - candidate_delay : The delay to validate. +// - is_instantaneous_valid : The instantaneous validation performed in +// WebRtc_ProcessBinarySpectrum(). +// - is_histogram_valid : The histogram based validation. +// +// Return value: +// - is_robust : 1 - The candidate_delay is valid according to a +// combination of the two inputs. +// : 0 - Otherwise. +static int RobustValidation(const BinaryDelayEstimator* self, + int candidate_delay, + int is_instantaneous_valid, + int is_histogram_valid) { + int is_robust = 0; + + // The final robust validation is based on the two algorithms; 1) the + // |is_instantaneous_valid| and 2) the histogram based with result stored in + // |is_histogram_valid|. + // i) Before we actually have a valid estimate (|last_delay| == -2), we say + // a candidate is valid if either algorithm states so + // (|is_instantaneous_valid| OR |is_histogram_valid|). + is_robust = (self->last_delay < 0) && + (is_instantaneous_valid || is_histogram_valid); + // ii) Otherwise, we need both algorithms to be certain + // (|is_instantaneous_valid| AND |is_histogram_valid|) + is_robust |= is_instantaneous_valid && is_histogram_valid; + // iii) With one exception, i.e., the histogram based algorithm can overrule + // the instantaneous one if |is_histogram_valid| = 1 and the histogram + // is significantly strong. + is_robust |= is_histogram_valid && + (self->histogram[candidate_delay] > self->last_delay_histogram); + + return is_robust; +} + +// UpdatesMadeUponChange() makes two parameter updates only done when we have a +// change/jump in delay. For further description, see commented code. +// +// Inputs: +// - candidate_delay : The delay to validate. +static void UpdatesMadeUponChange(BinaryDelayEstimator* self, + int candidate_delay) { + int i = 0; + + self->last_delay_histogram = + (self->histogram[candidate_delay] > kLastHistogramMax ? + kLastHistogramMax : self->histogram[candidate_delay]); + // TODO(bjornv): Investigate if we can simplify to only change + // self->histogram[self->last_delay] instead of looping through all histogram + // bins. Looping through all bins is to ensure + // self->histogram[candidate_delay] is currently the most likely bin. + // Otherwise we might jump back too easy to a neighbor even for spurious + // changes. + // Since we may jump to a new delay even if it is not the most likely + // according to the histogram, we here adjust the histogram to make sure the + // |candidate_delay| now is the most likely one. + if (self->histogram[candidate_delay] < self->histogram[self->compare_delay]) { + for (i = 0; i < self->farend->history_size; ++i) { + if (self->histogram[i] > self->histogram[candidate_delay]) { + self->histogram[i] = self->histogram[candidate_delay]; + } + } + } +} + void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) { if (self == NULL) { @@ -139,6 +368,9 @@ void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self) { free(self->binary_near_history); self->binary_near_history = NULL; + free(self->histogram); + self->histogram = NULL; + // BinaryDelayEstimator does not have ownership of |farend|, hence we do not // free the memory here. That should be handled separately by the user. self->farend = NULL; @@ -161,8 +393,11 @@ BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator( self->farend = farend; self->near_history_size = lookahead + 1; - // Allocate memory for spectrum buffers. - self->mean_bit_counts = malloc(farend->history_size * sizeof(int32_t)); + // Allocate memory for spectrum buffers. The extra array element in + // |mean_bit_counts| and |histogram| is a dummy element only used while + // |last_delay| == -2, i.e., before we have a valid estimate. + self->mean_bit_counts = + malloc((farend->history_size + 1) * sizeof(int32_t)); malloc_fail |= (self->mean_bit_counts == NULL); self->bit_counts = malloc(farend->history_size * sizeof(int32_t)); @@ -172,6 +407,9 @@ BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator( self->binary_near_history = malloc((lookahead + 1) * sizeof(uint32_t)); malloc_fail |= (self->binary_near_history == NULL); + self->histogram = malloc((farend->history_size + 1) * sizeof(float)); + malloc_fail |= (self->histogram == NULL); + if (malloc_fail) { WebRtc_FreeBinaryDelayEstimator(self); self = NULL; @@ -188,8 +426,9 @@ void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self) { memset(self->bit_counts, 0, sizeof(int32_t) * self->farend->history_size); memset(self->binary_near_history, 0, sizeof(uint32_t) * self->near_history_size); - for (i = 0; i < self->farend->history_size; ++i) { + for (i = 0; i <= self->farend->history_size; ++i) { self->mean_bit_counts[i] = (20 << 9); // 20 in Q9. + self->histogram[i] = 0.f; } self->minimum_probability = (32 << 9); // 32 in Q9. self->last_delay_probability = (32 << 9); // 32 in Q9. @@ -198,6 +437,10 @@ void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self) { self->last_delay = -2; self->robust_validation_enabled = 0; // Disabled by default. + self->last_candidate_delay = -2; + self->compare_delay = self->farend->history_size; + self->candidate_hits = 0; + self->last_delay_histogram = 0.f; } int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self, @@ -298,11 +541,24 @@ int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self, ((value_best_candidate < self->minimum_probability) || (value_best_candidate < self->last_delay_probability))); + if (self->robust_validation_enabled) { + int is_histogram_valid = 0; + UpdateRobustValidationStatistics(self, candidate_delay, valley_depth, + value_best_candidate); + is_histogram_valid = HistogramBasedValidation(self, candidate_delay); + valid_candidate = RobustValidation(self, candidate_delay, valid_candidate, + is_histogram_valid); + + } if (valid_candidate) { + if (candidate_delay != self->last_delay) { + UpdatesMadeUponChange(self, candidate_delay); + } self->last_delay = candidate_delay; if (value_best_candidate < self->last_delay_probability) { self->last_delay_probability = value_best_candidate; } + self->compare_delay = self->last_delay; } return self->last_delay; diff --git a/webrtc/modules/audio_processing/utility/delay_estimator.h b/webrtc/modules/audio_processing/utility/delay_estimator.h index 561514b48..7ffb81b8b 100644 --- a/webrtc/modules/audio_processing/utility/delay_estimator.h +++ b/webrtc/modules/audio_processing/utility/delay_estimator.h @@ -44,6 +44,11 @@ typedef struct { // Robust validation int robust_validation_enabled; + int last_candidate_delay; + int compare_delay; + int candidate_hits; + float* histogram; + float last_delay_histogram; // Far-end binary spectrum history buffer etc. BinaryDelayEstimatorFarend* farend;