diff --git a/src/modules/audio_processing/aec/aec_core.c b/src/modules/audio_processing/aec/aec_core.c index b47891f3b..6718dec3f 100644 --- a/src/modules/audio_processing/aec/aec_core.c +++ b/src/modules/audio_processing/aec/aec_core.c @@ -751,8 +751,7 @@ static void ProcessBlock(aec_t* aec) { delay_estimate = WebRtc_DelayEstimatorProcessFloat(aec->delay_estimator, abs_far_spectrum, abs_near_spectrum, - PART_LEN1, - aec->echoState); + PART_LEN1); if (delay_estimate >= 0) { // Update delay estimate buffer. aec->delay_histogram[delay_estimate]++; diff --git a/src/modules/audio_processing/aecm/aecm_core.c b/src/modules/audio_processing/aecm/aecm_core.c index f2e468328..3bb2125b2 100644 --- a/src/modules/audio_processing/aecm/aecm_core.c +++ b/src/modules/audio_processing/aecm/aecm_core.c @@ -1612,7 +1612,7 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm, dfaNoisy, PART_LEN1, far_q, - aecm->currentVADValue); + zerosDBufNoisy); if (delay == -1) { return -1; diff --git a/src/modules/audio_processing/utility/delay_estimator.c b/src/modules/audio_processing/utility/delay_estimator.c index af1d2e331..24ee74d7f 100644 --- a/src/modules/audio_processing/utility/delay_estimator.c +++ b/src/modules/audio_processing/utility/delay_estimator.c @@ -14,7 +14,25 @@ #include #include -#include "signal_processing_library.h" +// Number of right shifts for scaling is linearly depending on number of bits in +// the far-end binary spectrum. +static const int kShiftsAtZero = 13; // Right shifts at zero binary spectrum. +static const int kShiftsLinearSlope = 3; + +static const int32_t kProbabilityOffset = 1024; // 2 in Q9. +static const int32_t kProbabilityLowerLimit = 8704; // 17 in Q9. +static const int32_t kProbabilityMinSpread = 2816; // 5.5 in Q9. + +// Counts and returns number of bits of a 32-bit word. +static int BitCount(uint32_t u32) { + uint32_t tmp = u32 - ((u32 >> 1) & 033333333333) - + ((u32 >> 2) & 011111111111); + tmp = ((tmp + (tmp >> 3)) & 030707070707); + tmp = (tmp + (tmp >> 6)); + tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077; + + return ((int) tmp); +} // Compares the |binary_vector| with all rows of the |binary_matrix| and counts // per row the number of times they have the same value. @@ -34,23 +52,14 @@ static void BitCountComparison(uint32_t binary_vector, int matrix_size, int32_t* bit_counts) { int n = 0; - uint32_t a = binary_vector; - register uint32_t tmp; - // compare |binary_vector| with all rows of the |binary_matrix| + // Compare |binary_vector| with all rows of the |binary_matrix| for (; n < matrix_size; n++) { - a = (binary_vector ^ binary_matrix[n]); - // Returns bit counts in tmp - tmp = a - ((a >> 1) & 033333333333) - ((a >> 2) & 011111111111); - tmp = ((tmp + (tmp >> 3)) & 030707070707); - tmp = (tmp + (tmp >> 6)); - tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077; - - bit_counts[n] = (int32_t) tmp; + bit_counts[n] = (int32_t) BitCount(binary_vector ^ binary_matrix[n]); } } -int WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator_t* handle) { +int WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* handle) { assert(handle != NULL); if (handle->mean_bit_counts != NULL) { @@ -69,9 +78,9 @@ int WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator_t* handle) { free(handle->binary_near_history); handle->binary_near_history = NULL; } - if (handle->delay_histogram != NULL) { - free(handle->delay_histogram); - handle->delay_histogram = NULL; + if (handle->far_bit_counts != NULL) { + free(handle->far_bit_counts); + handle->far_bit_counts = NULL; } free(handle); @@ -79,10 +88,10 @@ int WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator_t* handle) { return 0; } -int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle, +int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator** handle, int max_delay, int lookahead) { - BinaryDelayEstimator_t* self = NULL; + BinaryDelayEstimator* self = NULL; int history_size = max_delay + lookahead; if (handle == NULL) { @@ -99,7 +108,7 @@ int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle, return -1; } - self = malloc(sizeof(BinaryDelayEstimator_t)); + self = malloc(sizeof(BinaryDelayEstimator)); *handle = self; if (self == NULL) { return -1; @@ -108,12 +117,12 @@ int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle, self->mean_bit_counts = NULL; self->bit_counts = NULL; self->binary_far_history = NULL; - self->delay_histogram = NULL; + self->far_bit_counts = NULL; self->history_size = history_size; self->near_history_size = lookahead + 1; - // Allocate memory for spectrum buffers + // Allocate memory for spectrum buffers. self->mean_bit_counts = malloc(history_size * sizeof(int32_t)); if (self->mean_bit_counts == NULL) { WebRtc_FreeBinaryDelayEstimator(self); @@ -126,7 +135,7 @@ int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle, self = NULL; return -1; } - // Allocate memory for history buffers + // Allocate memory for history buffers. self->binary_far_history = malloc(history_size * sizeof(uint32_t)); if (self->binary_far_history == NULL) { WebRtc_FreeBinaryDelayEstimator(self); @@ -140,8 +149,8 @@ int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle, self = NULL; return -1; } - self->delay_histogram = malloc(history_size * sizeof(int)); - if (self->delay_histogram == NULL) { + self->far_bit_counts = malloc(history_size * sizeof(int)); + if (self->far_bit_counts == NULL) { WebRtc_FreeBinaryDelayEstimator(self); self = NULL; return -1; @@ -150,48 +159,52 @@ int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle, return 0; } -int WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator_t* handle) { +int WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* handle) { + int i = 0; assert(handle != NULL); - memset(handle->mean_bit_counts, 0, sizeof(int32_t) * handle->history_size); memset(handle->bit_counts, 0, sizeof(int32_t) * handle->history_size); memset(handle->binary_far_history, 0, sizeof(uint32_t) * handle->history_size); memset(handle->binary_near_history, 0, sizeof(uint32_t) * handle->near_history_size); - memset(handle->delay_histogram, 0, sizeof(int) * handle->history_size); + memset(handle->far_bit_counts, 0, sizeof(int) * handle->history_size); + for (i = 0; i < handle->history_size; ++i) { + handle->mean_bit_counts[i] = (20 << 9); // 20 in Q9. + } + handle->minimum_probability = (32 << 9); // 32 in Q9. + handle->last_delay_probability = (32 << 9); // 32 in Q9. - handle->vad_counter = 0; - - // Default value to return if we're unable to estimate. -1 is used for - // errors. + // Default return value if we're unable to estimate. -1 is used for errors. handle->last_delay = -2; return 0; } -int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator_t* handle, +int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* handle, uint32_t binary_far_spectrum, - uint32_t binary_near_spectrum, - int vad_value) { - const int kVadCountThreshold = 25; - const int kMaxHistogram = 600; - - int histogram_bin = 0; + uint32_t binary_near_spectrum) { int i = 0; - int max_histogram_level = 0; - int min_position = -1; + int candidate_delay = -1; - int32_t bit_counts_tmp = 0; + int32_t value_best_candidate = 16384; // 32 in Q9, (max |mean_bit_counts|). + int32_t value_worst_candidate = 0; assert(handle != NULL); - // Shift binary spectrum history + // Shift binary spectrum history and insert current |binary_far_spectrum|. memmove(&(handle->binary_far_history[1]), &(handle->binary_far_history[0]), (handle->history_size - 1) * sizeof(uint32_t)); - // Insert new binary spectrum handle->binary_far_history[0] = binary_far_spectrum; + // Shift history of far-end binary spectrum bit counts and insert bit count + // of current |binary_far_spectrum|. + memmove(&(handle->far_bit_counts[1]), &(handle->far_bit_counts[0]), + (handle->history_size - 1) * sizeof(int)); + handle->far_bit_counts[0] = BitCount(binary_far_spectrum); + if (handle->near_history_size > 1) { + // If we apply lookahead, shift near-end binary spectrum history. Insert + // current |binary_near_spectrum| and pull out the delayed one. memmove(&(handle->binary_near_history[1]), &(handle->binary_near_history[0]), (handle->near_history_size - 1) * sizeof(uint32_t)); @@ -200,66 +213,93 @@ int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator_t* handle, handle->binary_near_history[handle->near_history_size - 1]; } - // Compare with delayed spectra + // Compare with delayed spectra and store the |bit_counts| for each delay. BitCountComparison(binary_near_spectrum, handle->binary_far_history, handle->history_size, handle->bit_counts); - // Smooth bit count curve + // Update |mean_bit_counts|, which is the smoothed version of |bit_counts|. for (i = 0; i < handle->history_size; i++) { - // Update sum // |bit_counts| is constrained to [0, 32], meaning we can smooth with a // factor up to 2^26. We use Q9. - bit_counts_tmp = WEBRTC_SPL_LSHIFT_W32(handle->bit_counts[i], 9); // Q9 - WebRtc_MeanEstimatorFix(bit_counts_tmp, 9, &(handle->mean_bit_counts[i])); + int32_t bit_count = (handle->bit_counts[i] << 9); // Q9. + + // Update |mean_bit_counts| only when far-end signal has something to + // contribute. If |far_bit_counts| is zero the far-end signal is weak and + // we likely have a poor echo condition, hence don't update. + if (handle->far_bit_counts[i] > 0) { + // Make number of right shifts piecewise linear w.r.t. |far_bit_counts|. + int shifts = kShiftsAtZero; + shifts -= (kShiftsLinearSlope * handle->far_bit_counts[i]) >> 4; + WebRtc_MeanEstimatorFix(bit_count, shifts, &(handle->mean_bit_counts[i])); + } } - // Find minimum position of bit count curve - min_position = (int) WebRtcSpl_MinIndexW32(handle->mean_bit_counts, - (int16_t) handle->history_size); - - // If the far end has been active sufficiently long, begin accumulating a - // histogram of the minimum positions. Search for the maximum bin to - // determine the delay. - if (vad_value == 1) { - if (handle->vad_counter >= kVadCountThreshold) { - // Increment the histogram at the current minimum position. - if (handle->delay_histogram[min_position] < kMaxHistogram) { - handle->delay_histogram[min_position] += 3; - } - - for (i = 0; i < handle->history_size; i++) { - histogram_bin = handle->delay_histogram[i]; - - // Decrement the histogram bin. - if (histogram_bin > 0) { - histogram_bin--; - handle->delay_histogram[i] = histogram_bin; - // Select the histogram index corresponding to the maximum bin as the - // delay. - if (histogram_bin > max_histogram_level) { - max_histogram_level = histogram_bin; - handle->last_delay = i; - } - } - } - } else { - handle->vad_counter++; + // Find |candidate_delay|, |value_best_candidate| and |value_worst_candidate| + // of |mean_bit_counts|. + for (i = 0; i < handle->history_size; i++) { + if (handle->mean_bit_counts[i] < value_best_candidate) { + value_best_candidate = handle->mean_bit_counts[i]; + candidate_delay = i; + } + if (handle->mean_bit_counts[i] > value_worst_candidate) { + value_worst_candidate = handle->mean_bit_counts[i]; + } + } + + // The |value_best_candidate| is a good indicator on the probability of + // |candidate_delay| being an accurate delay (a small |value_best_candidate| + // means a good binary match). In the following sections we make a decision + // whether to update |last_delay| or not. + // 1) If the difference bit counts between the best and the worst delay + // candidates is too small we consider the situation to be unreliable and + // don't update |last_delay|. + // 2) If the situation is reliable we update |last_delay| if the value of the + // best candidate delay has a value less than + // i) an adaptive threshold |minimum_probability|, or + // ii) this corresponding value |last_delay_probability|, but updated at + // this time instant. + + // Update |minimum_probability|. + if ((handle->minimum_probability > kProbabilityLowerLimit) && + (value_worst_candidate - value_best_candidate > kProbabilityMinSpread)) { + // The "hard" threshold can't be lower than 17 (in Q9). + // The valley in the curve also has to be distinct, i.e., the + // difference between |value_worst_candidate| and |value_best_candidate| has + // to be large enough. + int32_t threshold = value_best_candidate + kProbabilityOffset; + if (threshold < kProbabilityLowerLimit) { + threshold = kProbabilityLowerLimit; + } + if (handle->minimum_probability > threshold) { + handle->minimum_probability = threshold; + } + } + // Update |last_delay_probability|. + // We use a Markov type model, i.e., a slowly increasing level over time. + handle->last_delay_probability++; + if (value_worst_candidate > value_best_candidate + kProbabilityOffset) { + // Reliable delay value for usage. + if (value_best_candidate < handle->minimum_probability) { + handle->last_delay = candidate_delay; + } + if (value_best_candidate < handle->last_delay_probability) { + handle->last_delay = candidate_delay; + // Reset |last_delay_probability|. + handle->last_delay_probability = value_best_candidate; } - } else { - handle->vad_counter = 0; } return handle->last_delay; } -int WebRtc_binary_last_delay(BinaryDelayEstimator_t* handle) { +int WebRtc_binary_last_delay(BinaryDelayEstimator* handle) { assert(handle != NULL); return handle->last_delay; } -int WebRtc_history_size(BinaryDelayEstimator_t* handle) { +int WebRtc_history_size(BinaryDelayEstimator* handle) { assert(handle != NULL); return handle->history_size; } @@ -267,16 +307,13 @@ int WebRtc_history_size(BinaryDelayEstimator_t* handle) { void WebRtc_MeanEstimatorFix(int32_t new_value, int factor, int32_t* mean_value) { - int32_t mean_new = *mean_value; - int32_t diff = new_value - mean_new; + int32_t diff = new_value - *mean_value; // mean_new = mean_value + ((new_value - mean_value) >> factor); if (diff < 0) { - diff = -WEBRTC_SPL_RSHIFT_W32(-diff, factor); + diff = -((-diff) >> factor); } else { - diff = WEBRTC_SPL_RSHIFT_W32(diff, factor); + diff = (diff >> factor); } - mean_new += diff; - - *mean_value = mean_new; + *mean_value += diff; } diff --git a/src/modules/audio_processing/utility/delay_estimator.h b/src/modules/audio_processing/utility/delay_estimator.h index 8ff65cb83..a376dfeb6 100644 --- a/src/modules/audio_processing/utility/delay_estimator.h +++ b/src/modules/audio_processing/utility/delay_estimator.h @@ -14,12 +14,12 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ -#include "signal_processing_library.h" #include "typedefs.h" typedef struct { - // Pointer to bit counts + // Pointer to bit counts. int32_t* mean_bit_counts; + int* far_bit_counts; // Array only used locally in ProcessBinarySpectrum() but whose size is // determined at run-time. @@ -29,9 +29,9 @@ typedef struct { uint32_t* binary_far_history; uint32_t* binary_near_history; - // Delay histogram variables. - int* delay_histogram; - int vad_counter; + // Delay estimation variables. + int32_t minimum_probability; + int last_delay_probability; // Delay memory. int last_delay; @@ -41,75 +41,73 @@ typedef struct { // Near-end buffer size. int near_history_size; -} BinaryDelayEstimator_t; +} BinaryDelayEstimator; -// Releases the memory allocated by WebRtc_CreateBinaryDelayEstimator(...) +// Releases the memory allocated by WebRtc_CreateBinaryDelayEstimator(...). // Input: -// - handle : Pointer to the delay estimation instance +// - handle : Pointer to the delay estimation instance. // -int WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator_t* handle); +int WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* handle); -// Refer to WebRtc_CreateDelayEstimator() in delay_estimator_wrapper.h -int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle, +// Refer to WebRtc_CreateDelayEstimator() in delay_estimator_wrapper.h. +int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator** handle, int max_delay, int lookahead); // Initializes the delay estimation instance created with -// WebRtc_CreateBinaryDelayEstimator(...) +// WebRtc_CreateBinaryDelayEstimator(...). // Input: -// - handle : Pointer to the delay estimation instance +// - handle : Pointer to the delay estimation instance. // // Output: -// - handle : Initialized instance +// - handle : Initialized instance. // -int WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator_t* handle); +int WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* handle); // Estimates and returns the delay between the binary far-end and binary near- // end spectra. The value will be offset by the lookahead (i.e. the lookahead // should be subtracted from the returned value). // Inputs: -// - handle : Pointer to the delay estimation instance -// - binary_far_spectrum : Far-end binary spectrum -// - binary_near_spectrum : Near-end binary spectrum of the current block -// - vad_value : The VAD decision of the current block +// - handle : Pointer to the delay estimation instance. +// - binary_far_spectrum : Far-end binary spectrum. +// - binary_near_spectrum : Near-end binary spectrum of the current block. // // Output: -// - handle : Updated instance +// - handle : Updated instance. // // Return value: -// - delay : >= 0 - Calculated delay value -// -1 - Error +// - delay : >= 0 - Calculated delay value. +// -1 - Error. // -2 - Insufficient data for estimation. // -int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator_t* handle, +int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* handle, uint32_t binary_far_spectrum, - uint32_t binary_near_spectrum, - int vad_value); + uint32_t binary_near_spectrum); // Returns the last calculated delay updated by the function -// WebRtc_ProcessBinarySpectrum(...) +// WebRtc_ProcessBinarySpectrum(...). // // Input: -// - handle : Pointer to the delay estimation instance +// - handle : Pointer to the delay estimation instance. // // Return value: // - delay : >= 0 - Last calculated delay value // -1 - Error // -2 - Insufficient data for estimation. // -int WebRtc_binary_last_delay(BinaryDelayEstimator_t* handle); +int WebRtc_binary_last_delay(BinaryDelayEstimator* handle); // Returns the history size used in the far-end buffers to calculate the delay // over. // // Input: -// - handle : Pointer to the delay estimation instance +// - handle : Pointer to the delay estimation instance. // // Return value: -// - history_size : > 0 - Far-end history size -// -1 - Error +// - history_size : > 0 - Far-end history size. +// -1 - Error. // -int WebRtc_history_size(BinaryDelayEstimator_t* handle); +int WebRtc_history_size(BinaryDelayEstimator* handle); // Updates the |mean_value| recursively with a step size of 2^-|factor|. This // function is used internally in the Binary Delay Estimator as well as the diff --git a/src/modules/audio_processing/utility/delay_estimator_wrapper.c b/src/modules/audio_processing/utility/delay_estimator_wrapper.c index 6e4e44a11..438c95f5e 100644 --- a/src/modules/audio_processing/utility/delay_estimator_wrapper.c +++ b/src/modules/audio_processing/utility/delay_estimator_wrapper.c @@ -15,32 +15,33 @@ #include #include "delay_estimator.h" -#include "signal_processing_library.h" typedef union { float float_; int32_t int32_; -} SpectrumType_t; +} SpectrumType; typedef struct { - // Pointers to mean values of spectrum - SpectrumType_t* mean_far_spectrum; - SpectrumType_t* mean_near_spectrum; + // Pointers to mean values of spectrum. + SpectrumType* mean_far_spectrum; + SpectrumType* mean_near_spectrum; + // |mean_*_spectrum| initialization indicator. + int far_spectrum_initialized; + int near_spectrum_initialized; - // Spectrum size int spectrum_size; // Binary spectrum based delay estimator - BinaryDelayEstimator_t* binary_handle; -} DelayEstimator_t; + BinaryDelayEstimator* binary_handle; +} DelayEstimator; -// Only bit |kBandFirst| through bit |kBandLast| are processed -// |kBandFirst| - |kBandLast| must be < 32 +// Only bit |kBandFirst| through bit |kBandLast| are processed and +// |kBandFirst| - |kBandLast| must be < 32. static const int kBandFirst = 12; static const int kBandLast = 43; static __inline uint32_t SetBit(uint32_t in, int pos) { - uint32_t mask = WEBRTC_SPL_LSHIFT_W32(1, pos); + uint32_t mask = (1 << pos); uint32_t out = (in | mask); return out; @@ -50,17 +51,16 @@ static __inline uint32_t SetBit(uint32_t in, int pos) { // but for float. // // Inputs: -// - new_value : new additional value. -// - scale : scale for smoothing (should be less than 1.0). +// - new_value : New additional value. +// - scale : Scale for smoothing (should be less than 1.0). // // Input/Output: -// - mean_value : pointer to the mean value for updating. +// - mean_value : Pointer to the mean value for updating. // static void MeanEstimatorFloat(float new_value, float scale, float* mean_value) { assert(scale < 1.0f); - // mean_new = mean_value + ((new_value - mean_value) * scale); *mean_value += (new_value - *mean_value) * scale; } @@ -73,19 +73,37 @@ static void MeanEstimatorFloat(float new_value, // - threshold_spectrum : Threshold spectrum with which the input // spectrum is compared. // Return: -// - out : Binary spectrum +// - out : Binary spectrum. // static uint32_t BinarySpectrumFix(uint16_t* spectrum, - SpectrumType_t* threshold_spectrum) { - int k = kBandFirst; + SpectrumType* threshold_spectrum, + int q_domain, + int* threshold_initialized) { + int i = kBandFirst; uint32_t out = 0; - for (; k <= kBandLast; k++) { - WebRtc_MeanEstimatorFix((int32_t) spectrum[k], - 6, - &(threshold_spectrum[k].int32_)); - if (spectrum[k] > threshold_spectrum[k].int32_) { - out = SetBit(out, k - kBandFirst); + assert(q_domain < 16); + + if (!(*threshold_initialized)) { + // Set the |threshold_spectrum| to half the input |spectrum| as starting + // value. This speeds up the convergence. + for (i = kBandFirst; i <= kBandLast; i++) { + if (spectrum[i] > 0) { + // Convert input spectrum from Q(|q_domain|) to Q15. + int32_t spectrum_q15 = ((int32_t) spectrum[i]) << (15 - q_domain); + threshold_spectrum[i].int32_ = (spectrum_q15 >> 1); + *threshold_initialized = 1; + } + } + } + for (i = kBandFirst; i <= kBandLast; i++) { + // Convert input spectrum from Q(|q_domain|) to Q15. + int32_t spectrum_q15 = ((int32_t) spectrum[i]) << (15 - q_domain); + // Update the |threshold_spectrum|. + WebRtc_MeanEstimatorFix(spectrum_q15, 6, &(threshold_spectrum[i].int32_)); + // Convert |spectrum| at current frequency bin to a binary value. + if (spectrum_q15 > threshold_spectrum[i].int32_) { + out = SetBit(out, i - kBandFirst); } } @@ -93,15 +111,29 @@ static uint32_t BinarySpectrumFix(uint16_t* spectrum, } static uint32_t BinarySpectrumFloat(float* spectrum, - SpectrumType_t* threshold_spectrum) { - int k = kBandFirst; + SpectrumType* threshold_spectrum, + int* threshold_initialized) { + int i = kBandFirst; uint32_t out = 0; - float scale = 1 / 64.0; + const float kScale = 1 / 64.0; - for (; k <= kBandLast; k++) { - MeanEstimatorFloat(spectrum[k], scale, &(threshold_spectrum[k].float_)); - if (spectrum[k] > threshold_spectrum[k].float_) { - out = SetBit(out, k - kBandFirst); + if (!(*threshold_initialized)) { + // Set the |threshold_spectrum| to half the input |spectrum| as starting + // value. This speeds up the convergence. + for (i = kBandFirst; i <= kBandLast; i++) { + if (spectrum[i] > 0.0f) { + threshold_spectrum[i].float_ = (spectrum[i] / 2); + *threshold_initialized = 1; + } + } + } + + for (i = kBandFirst; i <= kBandLast; i++) { + // Update the |threshold_spectrum|. + MeanEstimatorFloat(spectrum[i], kScale, &(threshold_spectrum[i].float_)); + // Convert |spectrum| at current frequency bin to a binary value. + if (spectrum[i] > threshold_spectrum[i].float_) { + out = SetBit(out, i - kBandFirst); } } @@ -109,7 +141,7 @@ static uint32_t BinarySpectrumFloat(float* spectrum, } int WebRtc_FreeDelayEstimator(void* handle) { - DelayEstimator_t* self = (DelayEstimator_t*) handle; + DelayEstimator* self = (DelayEstimator*) handle; if (self == NULL) { return -1; @@ -135,10 +167,10 @@ int WebRtc_CreateDelayEstimator(void** handle, int spectrum_size, int max_delay, int lookahead) { - DelayEstimator_t *self = NULL; + DelayEstimator* self = NULL; - // Check if the sub band used in the delay estimation is small enough to - // fit the binary spectra in a uint32. + // Check if the sub band used in the delay estimation is small enough to fit + // the binary spectra in a uint32_t. assert(kBandLast - kBandFirst < 32); if (handle == NULL) { @@ -148,7 +180,7 @@ int WebRtc_CreateDelayEstimator(void** handle, return -1; } - self = malloc(sizeof(DelayEstimator_t)); + self = malloc(sizeof(DelayEstimator)); *handle = self; if (self == NULL) { return -1; @@ -165,14 +197,14 @@ int WebRtc_CreateDelayEstimator(void** handle, self = NULL; return -1; } - // Allocate memory for spectrum buffers - self->mean_far_spectrum = malloc(spectrum_size * sizeof(SpectrumType_t)); + // Allocate memory for spectrum buffers. + self->mean_far_spectrum = malloc(spectrum_size * sizeof(SpectrumType)); if (self->mean_far_spectrum == NULL) { WebRtc_FreeDelayEstimator(self); self = NULL; return -1; } - self->mean_near_spectrum = malloc(spectrum_size * sizeof(SpectrumType_t)); + self->mean_near_spectrum = malloc(spectrum_size * sizeof(SpectrumType)); if (self->mean_near_spectrum == NULL) { WebRtc_FreeDelayEstimator(self); self = NULL; @@ -185,23 +217,24 @@ int WebRtc_CreateDelayEstimator(void** handle, } int WebRtc_InitDelayEstimator(void* handle) { - DelayEstimator_t* self = (DelayEstimator_t*) handle; + DelayEstimator* self = (DelayEstimator*) handle; if (self == NULL) { return -1; } - // Initialize binary delay estimator + // Initialize binary delay estimator. if (WebRtc_InitBinaryDelayEstimator(self->binary_handle) != 0) { return -1; } - // Set averaged far and near end spectra to zero - memset(self->mean_far_spectrum, - 0, - sizeof(SpectrumType_t) * self->spectrum_size); - memset(self->mean_near_spectrum, - 0, - sizeof(SpectrumType_t) * self->spectrum_size); + // Set averaged far and near end spectra to zero. + memset(self->mean_far_spectrum, 0, + sizeof(SpectrumType) * self->spectrum_size); + memset(self->mean_near_spectrum, 0, + sizeof(SpectrumType) * self->spectrum_size); + // Reset initialization indicators. + self->far_spectrum_initialized = 0; + self->near_spectrum_initialized = 0; return 0; } @@ -211,8 +244,8 @@ int WebRtc_DelayEstimatorProcessFix(void* handle, uint16_t* near_spectrum, int spectrum_size, int far_q, - int vad_value) { - DelayEstimator_t* self = (DelayEstimator_t*) handle; + int near_q) { + DelayEstimator* self = (DelayEstimator*) handle; uint32_t binary_far_spectrum = 0; uint32_t binary_near_spectrum = 0; @@ -220,40 +253,46 @@ int WebRtc_DelayEstimatorProcessFix(void* handle, return -1; } if (far_spectrum == NULL) { - // Empty far end spectrum + // Empty far end spectrum. return -1; } if (near_spectrum == NULL) { - // Empty near end spectrum + // Empty near end spectrum. return -1; } if (spectrum_size != self->spectrum_size) { - // Data sizes don't match + // Data sizes don't match. return -1; } if (far_q > 15) { - // If |far_q| is larger than 15 we cannot guarantee no wrap around + // If |far_q| is larger than 15 we cannot guarantee no wrap around. + return -1; + } + if (near_q > 15) { + // If |near_q| is larger than 15 we cannot guarantee no wrap around. return -1; } - // Get binary spectra + // Get binary spectra. binary_far_spectrum = BinarySpectrumFix(far_spectrum, - self->mean_far_spectrum); + self->mean_far_spectrum, + far_q, + &(self->far_spectrum_initialized)); binary_near_spectrum = BinarySpectrumFix(near_spectrum, - self->mean_near_spectrum); + self->mean_near_spectrum, + near_q, + &(self->near_spectrum_initialized)); return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_far_spectrum, - binary_near_spectrum, - vad_value); + binary_near_spectrum); } int WebRtc_DelayEstimatorProcessFloat(void* handle, float* far_spectrum, float* near_spectrum, - int spectrum_size, - int vad_value) { - DelayEstimator_t* self = (DelayEstimator_t*) handle; + int spectrum_size) { + DelayEstimator* self = (DelayEstimator*) handle; uint32_t binary_far_spectrum = 0; uint32_t binary_near_spectrum = 0; @@ -261,32 +300,33 @@ int WebRtc_DelayEstimatorProcessFloat(void* handle, return -1; } if (far_spectrum == NULL) { - // Empty far end spectrum + // Empty far end spectrum. return -1; } if (near_spectrum == NULL) { - // Empty near end spectrum + // Empty near end spectrum. return -1; } if (spectrum_size != self->spectrum_size) { - // Data sizes don't match + // Data sizes don't match. return -1; } - // Get binary spectra + // Get binary spectra. binary_far_spectrum = BinarySpectrumFloat(far_spectrum, - self->mean_far_spectrum); + self->mean_far_spectrum, + &(self->far_spectrum_initialized)); binary_near_spectrum = BinarySpectrumFloat(near_spectrum, - self->mean_near_spectrum); + self->mean_near_spectrum, + &(self->near_spectrum_initialized)); return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_far_spectrum, - binary_near_spectrum, - vad_value); + binary_near_spectrum); } int WebRtc_last_delay(void* handle) { - DelayEstimator_t* self = (DelayEstimator_t*) handle; + DelayEstimator* self = (DelayEstimator*) handle; if (self == NULL) { return -1; diff --git a/src/modules/audio_processing/utility/delay_estimator_wrapper.h b/src/modules/audio_processing/utility/delay_estimator_wrapper.h index c1f5cc12c..2a47b5d85 100644 --- a/src/modules/audio_processing/utility/delay_estimator_wrapper.h +++ b/src/modules/audio_processing/utility/delay_estimator_wrapper.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -// Performs delay estimation on a block by block basis +// Performs delay estimation on block by block basis. // The return value is 0 - OK and -1 - Error, unless otherwise stated. #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_ @@ -18,7 +18,7 @@ // Releases the memory allocated by WebRtc_CreateDelayEstimator(...) // Input: -// - handle : Pointer to the delay estimation instance +// - handle : Pointer to the delay estimation instance. // int WebRtc_FreeDelayEstimator(void* handle); @@ -30,20 +30,20 @@ int WebRtc_FreeDelayEstimator(void* handle); // - spectrum_size : Size of the spectrum used both in far-end and // near-end. Used to allocate memory for spectrum // specific buffers. -// - max_delay : The maximum delay which can be estimated. Needed -// to allocate memory for history buffers. -// - lookahead : Amount of non-causal lookahead to use. This can detect -// cases in which a near-end signal occurs before the -// corresponding far-end signal. It will delay the +// - max_delay : The maximum delay which can be estimated. Needed to +// allocate memory for history buffers. +// - lookahead : Amount of non-causal lookahead to use. This can +// detect cases in which a near-end signal occurs before +// the corresponding far-end signal. It will delay the // estimate for the current block by an equal amount, // and the returned values will be offset by it. // -// A value of zero is the typical no-lookahead case. This -// also represents the minimum delay which can be +// A value of zero is the typical no-lookahead case. +// This also represents the minimum delay which can be // estimated. // // Output: -// - handle : Created instance +// - handle : Created instance. // int WebRtc_CreateDelayEstimator(void** handle, int spectrum_size, @@ -53,10 +53,10 @@ int WebRtc_CreateDelayEstimator(void** handle, // Initializes the delay estimation instance created with // WebRtc_CreateDelayEstimator(...) // Input: -// - handle : Pointer to the delay estimation instance +// - handle : Pointer to the delay estimation instance. // // Output: -// - handle : Initialized instance +// - handle : Initialized instance. // int WebRtc_InitDelayEstimator(void* handle); @@ -64,21 +64,21 @@ int WebRtc_InitDelayEstimator(void* handle); // value will be offset by the lookahead (i.e. the lookahead should be // subtracted from the returned value). // Inputs: -// - handle : Pointer to the delay estimation instance -// - far_spectrum : Pointer to the far-end spectrum data +// - handle : Pointer to the delay estimation instance. +// - far_spectrum : Pointer to the far-end spectrum data. // - near_spectrum : Pointer to the near-end spectrum data of the current -// block +// block. // - spectrum_size : The size of the data arrays (same for both far- and -// near-end) -// - far_q : The Q-domain of the far-end data -// - vad_value : The VAD decision of the current block +// near-end). +// - far_q : The Q-domain of the far-end data. +// - near_q : The Q-domain of the near-end data. // // Output: -// - handle : Updated instance +// - handle : Updated instance. // // Return value: -// - delay : >= 0 - Calculated delay value -// -1 - Error +// - delay : >= 0 - Calculated delay value. +// -1 - Error. // -2 - Insufficient data for estimation. // int WebRtc_DelayEstimatorProcessFix(void* handle, @@ -86,24 +86,23 @@ int WebRtc_DelayEstimatorProcessFix(void* handle, uint16_t* near_spectrum, int spectrum_size, int far_q, - int vad_value); + int near_q); // See WebRtc_DelayEstimatorProcessFix() for description. int WebRtc_DelayEstimatorProcessFloat(void* handle, float* far_spectrum, float* near_spectrum, - int spectrum_size, - int vad_value); + int spectrum_size); // Returns the last calculated delay updated by the function -// WebRtc_DelayEstimatorProcess(...) +// WebRtc_DelayEstimatorProcess(...). // // Input: -// - handle : Pointer to the delay estimation instance +// - handle : Pointer to the delay estimation instance. // // Return value: -// - delay : >= 0 - Last calculated delay value -// -1 - Error +// - delay : >= 0 - Last calculated delay value. +// -1 - Error. // -2 - Insufficient data for estimation. // int WebRtc_last_delay(void* handle); diff --git a/test/data/audio_processing/output_data_fixed.pb b/test/data/audio_processing/output_data_fixed.pb index 7476a5250..81bc5af68 100644 Binary files a/test/data/audio_processing/output_data_fixed.pb and b/test/data/audio_processing/output_data_fixed.pb differ diff --git a/test/data/audio_processing/output_data_float.pb b/test/data/audio_processing/output_data_float.pb index d608e5fb0..ccd750926 100644 Binary files a/test/data/audio_processing/output_data_float.pb and b/test/data/audio_processing/output_data_float.pb differ