diff --git a/src/modules/audio_processing/aecm/main/source/aecm.gypi b/src/modules/audio_processing/aecm/main/source/aecm.gypi index 3c63c52b7..bbfb1ca82 100644 --- a/src/modules/audio_processing/aecm/main/source/aecm.gypi +++ b/src/modules/audio_processing/aecm/main/source/aecm.gypi @@ -28,8 +28,6 @@ 'echo_control_mobile.c', 'aecm_core.c', 'aecm_core.h', - 'aecm_delay_estimator.c', - 'aecm_delay_estimator.h', ], }, ], diff --git a/src/modules/audio_processing/aecm/main/source/aecm_core.c b/src/modules/audio_processing/aecm/main/source/aecm_core.c index b7dae909a..13bffaea6 100644 --- a/src/modules/audio_processing/aecm/main/source/aecm_core.c +++ b/src/modules/audio_processing/aecm/main/source/aecm_core.c @@ -13,8 +13,8 @@ #include #include -#include "aecm_delay_estimator.h" #include "echo_control_mobile.h" +#include "delay_estimator.h" #include "ring_buffer.h" #include "typedefs.h" @@ -153,11 +153,13 @@ int WebRtcAecm_CreateCore(AecmCore_t **aecmInst) return -1; } - if (WebRtcAecm_CreateDelayEstimator(&aecm->delay_estimator, PART_LEN1, MAX_DELAY) == -1) - { - WebRtcAecm_FreeCore(aecm); - aecm = NULL; - return -1; + if (WebRtc_CreateDelayEstimator(&aecm->delay_estimator, + PART_LEN1, + MAX_DELAY, + 1) == -1) { + WebRtcAecm_FreeCore(aecm); + aecm = NULL; + return -1; } // Init some aecm pointers. 16 and 32 byte alignment is only necessary @@ -242,9 +244,8 @@ int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq) aecm->seed = 666; aecm->totCount = 0; - if (WebRtcAecm_InitDelayEstimator(aecm->delay_estimator) != 0) - { - return -1; + if (WebRtc_InitDelayEstimator(aecm->delay_estimator) != 0) { + return -1; } // Initialize to reasonable values @@ -339,7 +340,7 @@ int WebRtcAecm_FreeCore(AecmCore_t *aecm) WebRtcApm_FreeBuffer(aecm->nearCleanFrameBuf); WebRtcApm_FreeBuffer(aecm->outFrameBuf); - WebRtcAecm_FreeDelayEstimator(aecm->delay_estimator); + WebRtc_FreeDelayEstimator(aecm->delay_estimator); free(aecm); return 0; @@ -1161,6 +1162,7 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm, WebRtc_Word16 supGain; WebRtc_Word16 zeros32, zeros16; WebRtc_Word16 zerosDBufNoisy, zerosDBufClean, zerosXBuf; + int far_q; WebRtc_Word16 resolutionDiff, qDomainDiff; const int kMinPrefBand = 4; @@ -1200,10 +1202,10 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm, #endif // Transform far end signal from time domain to frequency domain. - zerosXBuf = TimeToFrequencyDomain(aecm->xBuf, - dfw, - xfa, - &xfaSum); + far_q = TimeToFrequencyDomain(aecm->xBuf, + dfw, + xfa, + &xfaSum); // Transform noisy near end signal from time domain to frequency domain. zerosDBufNoisy = TimeToFrequencyDomain(aecm->dBufNoisy, @@ -1211,7 +1213,7 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm, dfaNoisy, &dfaNoisySum); aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; - aecm->dfaNoisyQDomain = zerosDBufNoisy; + aecm->dfaNoisyQDomain = (WebRtc_Word16)zerosDBufNoisy; if (nearendClean == NULL) @@ -1228,7 +1230,7 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm, dfaClean, &dfaCleanSum); aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; - aecm->dfaCleanQDomain = zerosDBufClean; + aecm->dfaCleanQDomain = (WebRtc_Word16)zerosDBufClean; } #ifdef ARM_WINM_LOG_ @@ -1243,12 +1245,12 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm, // Get the delay // Save far-end history and estimate delay - delay = WebRtcAecm_DelayEstimatorProcess(aecm->delay_estimator, - xfa, - dfaNoisy, - PART_LEN1, - zerosXBuf, - aecm->currentVADValue); + delay = WebRtc_DelayEstimatorProcess(aecm->delay_estimator, + xfa, + dfaNoisy, + PART_LEN1, + far_q, + aecm->currentVADValue); if (delay < 0) { return -1; @@ -1272,16 +1274,21 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm, QueryPerformanceCounter((LARGE_INTEGER*)&start); #endif // Get aligned far end spectrum - far_spectrum_ptr = WebRtcAecm_GetAlignedFarend(aecm->delay_estimator, - PART_LEN1, - &zerosXBuf); + far_spectrum_ptr = WebRtc_AlignedFarend(aecm->delay_estimator, + PART_LEN1, + &far_q); + zerosXBuf = (WebRtc_Word16) far_q; if (far_spectrum_ptr == NULL) { return -1; } // Calculate log(energy) and update energy threshold levels - WebRtcAecm_CalcEnergies(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisySum, echoEst32); + WebRtcAecm_CalcEnergies(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisySum, + echoEst32); // Calculate stepsize mu = WebRtcAecm_CalcStepSize(aecm); @@ -1923,4 +1930,3 @@ void WebRtcAecm_ResetAdaptiveChannel(AecmCore_t* aecm) } #endif // !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON)) - diff --git a/src/modules/audio_processing/aecm/main/source/aecm_core.h b/src/modules/audio_processing/aecm/main/source/aecm_core.h index e431c71af..0dfdb040d 100644 --- a/src/modules/audio_processing/aecm/main/source/aecm_core.h +++ b/src/modules/audio_processing/aecm/main/source/aecm_core.h @@ -178,7 +178,7 @@ typedef struct WebRtc_Word16 farEnergyMaxMin; WebRtc_Word16 farEnergyVAD; WebRtc_Word16 farEnergyMSE; - WebRtc_Word16 currentVADValue; + int currentVADValue; WebRtc_Word16 vadUpdateCount; WebRtc_Word16 startupState; diff --git a/src/modules/audio_processing/aecm/main/source/aecm_delay_estimator.c b/src/modules/audio_processing/aecm/main/source/aecm_delay_estimator.c deleted file mode 100644 index 2ed9e037a..000000000 --- a/src/modules/audio_processing/aecm/main/source/aecm_delay_estimator.c +++ /dev/null @@ -1,604 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "aecm_delay_estimator.h" - -#include -#include - -#include "signal_processing_library.h" -#include "typedefs.h" - -typedef struct -{ - // Pointers to mean values of spectrum and bit counts - WebRtc_Word32* mean_far_spectrum; - WebRtc_Word32* mean_near_spectrum; - WebRtc_Word32* mean_bit_counts; - - // Arrays only used locally in DelayEstimatorProcess() but whose size - // is determined at run-time. - WebRtc_Word32* bit_counts; - WebRtc_Word32* far_spectrum_32; - WebRtc_Word32* near_spectrum_32; - - // Binary history variables - WebRtc_UWord32* binary_far_history; - - // Far end history variables - WebRtc_UWord16* far_history; - int far_history_position; - WebRtc_Word16* far_q_domains; - - // Delay histogram variables - WebRtc_Word16* delay_histogram; - WebRtc_Word16 vad_counter; - - // Delay memory - int last_delay; - - // Buffer size parameters - int history_size; - int spectrum_size; - -} DelayEstimator_t; - -// Only bit |kBandFirst| through bit |kBandLast| are processed -// |kBandFirst| - |kBandLast| must be < 32 -static const int kBandFirst = 12; -static const int kBandLast = 43; - -static __inline WebRtc_UWord32 SetBit(WebRtc_UWord32 in, - WebRtc_Word32 pos) -{ - WebRtc_UWord32 mask = WEBRTC_SPL_LSHIFT_W32(1, pos); - WebRtc_UWord32 out = (in | mask); - - return out; -} - -// Compares the binary vector |binary_vector| with all rows of the binary -// matrix |binary_matrix| and counts per row the number of times they have the -// same value. -// Input: -// - binary_vector : binary "vector" stored in a long -// - binary_matrix : binary "matrix" stored as a vector of long -// - matrix_size : size of binary "matrix" -// Output: -// - bit_counts : "Vector" stored as a long, containing for each -// row the number of times the matrix row and the -// input vector have the same value -// -static void BitCountComparison(const WebRtc_UWord32 binary_vector, - const WebRtc_UWord32* binary_matrix, - int matrix_size, - WebRtc_Word32* bit_counts) -{ - int n = 0; - WebRtc_UWord32 a = binary_vector; - register WebRtc_UWord32 tmp; - - // compare binary vector |binary_vector| with all rows of the binary matrix - // |binary_matrix| - for (; n < matrix_size; n++) - { - a = (binary_vector ^ binary_matrix[n]); - // Returns bit counts in tmp - tmp = a - ((a >> 1) & 033333333333) - ((a >> 2) & 011111111111); - tmp = ((tmp + (tmp >> 3)) & 030707070707); - tmp = (tmp + (tmp >> 6)); - tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077; - - bit_counts[n] = (WebRtc_Word32)tmp; - } -} - -// Computes the binary spectrum by comparing the input |spectrum| with a -// |threshold_spectrum|. -// -// Input: -// - spectrum : Spectrum of which the binary spectrum should -// be calculated. -// - threshold_spectrum : Threshold spectrum with which the input -// spectrum is compared. -// Return: -// - out : Binary spectrum -// -static WebRtc_UWord32 GetBinarySpectrum(WebRtc_Word32* spectrum, - WebRtc_Word32* threshold_spectrum) -{ - int k = kBandFirst; - WebRtc_UWord32 out = 0; - - for (; k <= kBandLast; k++) - { - if (spectrum[k] > threshold_spectrum[k]) - { - out = SetBit(out, k - kBandFirst); - } - } - - return out; -} - -// Calculates the mean recursively. -// -// Input: -// - new_value : new additional value -// - factor : factor for smoothing -// -// Input/Output: -// - mean_value : pointer to the mean value that should be updated -// -static void MeanEstimator(const WebRtc_Word32 new_value, - int factor, - WebRtc_Word32* mean_value) -{ - WebRtc_Word32 mean_new = *mean_value; - WebRtc_Word32 diff = new_value - mean_new; - - // mean_new = mean_value + ((new_value - mean_value) >> factor); - if (diff < 0) - { - diff = -WEBRTC_SPL_RSHIFT_W32(-diff, factor); - } - else - { - diff = WEBRTC_SPL_RSHIFT_W32(diff, factor); - } - mean_new += diff; - - *mean_value = mean_new; -} - -// Moves the pointer to the next entry and inserts new far end spectrum and -// corresponding Q-domain in its buffer. -// -// Input: -// - handle : Pointer to the delay estimation instance -// - far_spectrum : Pointer to the far end spectrum -// - far_q : Q-domain of far end spectrum -// -static void UpdateFarHistory(DelayEstimator_t* self, - WebRtc_UWord16* far_spectrum, - WebRtc_Word16 far_q) -{ - // Get new buffer position - self->far_history_position++; - if (self->far_history_position >= self->history_size) - { - self->far_history_position = 0; - } - // Update Q-domain buffer - self->far_q_domains[self->far_history_position] = far_q; - // Update far end spectrum buffer - memcpy(&(self->far_history[self->far_history_position * self->spectrum_size]), - far_spectrum, - sizeof(WebRtc_UWord16) * self->spectrum_size); -} - -int WebRtcAecm_FreeDelayEstimator(void* handle) -{ - DelayEstimator_t* self = (DelayEstimator_t*)handle; - - if (self == NULL) - { - return -1; - } - - if (self->mean_far_spectrum != NULL) - { - free(self->mean_far_spectrum); - self->mean_far_spectrum = NULL; - } - if (self->mean_near_spectrum != NULL) - { - free(self->mean_near_spectrum); - self->mean_near_spectrum = NULL; - } - if (self->mean_bit_counts != NULL) - { - free(self->mean_bit_counts); - self->mean_bit_counts = NULL; - } - if (self->bit_counts != NULL) - { - free(self->bit_counts); - self->bit_counts = NULL; - } - if (self->far_spectrum_32 != NULL) - { - free(self->far_spectrum_32); - self->far_spectrum_32 = NULL; - } - if (self->near_spectrum_32 != NULL) - { - free(self->near_spectrum_32); - self->near_spectrum_32 = NULL; - } - if (self->far_history != NULL) - { - free(self->far_history); - self->far_history = NULL; - } - if (self->binary_far_history != NULL) - { - free(self->binary_far_history); - self->binary_far_history = NULL; - } - if (self->far_q_domains != NULL) - { - free(self->far_q_domains); - self->far_q_domains = NULL; - } - if (self->delay_histogram != NULL) - { - free(self->delay_histogram); - self->delay_histogram = NULL; - } - - free(self); - - return 0; -} - -int WebRtcAecm_CreateDelayEstimator(void** handle, - int spectrum_size, - int history_size) -{ - DelayEstimator_t *self = NULL; - // Check if the sub band used in the delay estimation is small enough to - // fit in a Word32. - assert(kBandLast - kBandFirst < 32); - if (spectrum_size < kBandLast) - { - return -1; - } - if (history_size < 0) - { - return -1; - } - - self = malloc(sizeof(DelayEstimator_t)); - *handle = self; - if (self == NULL) - { - return -1; - } - - self->mean_far_spectrum = NULL; - self->mean_near_spectrum = NULL; - self->bit_counts = NULL; - self->far_spectrum_32 = NULL; - self->near_spectrum_32 = NULL; - self->far_history = NULL; - self->mean_bit_counts = NULL; - self->binary_far_history = NULL; - self->far_q_domains = NULL; - self->delay_histogram = NULL; - - // Allocate memory for spectrum buffers - self->mean_far_spectrum = malloc(spectrum_size * sizeof(WebRtc_Word32)); - if (self->mean_far_spectrum == NULL) - { - WebRtcAecm_FreeDelayEstimator(self); - self = NULL; - return -1; - } - self->mean_near_spectrum = malloc(spectrum_size * sizeof(WebRtc_Word32)); - if (self->mean_near_spectrum == NULL) - { - WebRtcAecm_FreeDelayEstimator(self); - self = NULL; - return -1; - } - self->mean_bit_counts = malloc(history_size * sizeof(WebRtc_Word32)); - if (self->mean_bit_counts == NULL) - { - WebRtcAecm_FreeDelayEstimator(self); - self = NULL; - return -1; - } - self->bit_counts = malloc(history_size * sizeof(WebRtc_Word32)); - if (self->bit_counts == NULL) - { - WebRtcAecm_FreeDelayEstimator(self); - self = NULL; - return -1; - } - self->far_spectrum_32 = malloc(spectrum_size * sizeof(WebRtc_Word32)); - if (self->far_spectrum_32 == NULL) - { - WebRtcAecm_FreeDelayEstimator(self); - self = NULL; - return -1; - } - self->near_spectrum_32 = malloc(spectrum_size * sizeof(WebRtc_Word32)); - if (self->near_spectrum_32 == NULL) - { - WebRtcAecm_FreeDelayEstimator(self); - self = NULL; - return -1; - } - // Allocate memory for history buffers - self->far_history = malloc(spectrum_size * history_size * - sizeof(WebRtc_UWord16)); - if (self->far_history == NULL) - { - WebRtcAecm_FreeDelayEstimator(self); - self = NULL; - return -1; - } - self->binary_far_history = malloc(history_size * sizeof(WebRtc_UWord32)); - if (self->binary_far_history == NULL) - { - WebRtcAecm_FreeDelayEstimator(self); - self = NULL; - return -1; - } - self->far_q_domains = malloc(history_size * sizeof(WebRtc_Word16)); - if (self->far_q_domains == NULL) - { - WebRtcAecm_FreeDelayEstimator(self); - self = NULL; - return -1; - } - self->delay_histogram = malloc(history_size * sizeof(WebRtc_Word16)); - if (self->delay_histogram == NULL) - { - WebRtcAecm_FreeDelayEstimator(self); - self = NULL; - return -1; - } - - self->spectrum_size = spectrum_size; - self->history_size = history_size; - - return 0; -} - -int WebRtcAecm_InitDelayEstimator(void* handle) -{ - DelayEstimator_t* self = (DelayEstimator_t*)handle; - - if (self == NULL) - { - return -1; - } - // Set averaged far and near end spectra to zero - memset(self->mean_far_spectrum, - 0, - sizeof(WebRtc_Word32) * self->spectrum_size); - memset(self->mean_near_spectrum, - 0, - sizeof(WebRtc_Word32) * self->spectrum_size); - // Set averaged bit counts to zero - memset(self->mean_bit_counts, - 0, - sizeof(WebRtc_Word32) * self->history_size); - memset(self->bit_counts, - 0, - sizeof(WebRtc_Word32) * self->history_size); - memset(self->far_spectrum_32, - 0, - sizeof(WebRtc_Word32) * self->spectrum_size); - memset(self->near_spectrum_32, - 0, - sizeof(WebRtc_Word32) * self->spectrum_size); - // Set far end histories to zero - memset(self->binary_far_history, - 0, - sizeof(WebRtc_UWord32) * self->history_size); - memset(self->far_history, - 0, - sizeof(WebRtc_UWord16) * self->spectrum_size * - self->history_size); - memset(self->far_q_domains, - 0, - sizeof(WebRtc_Word16) * self->history_size); - - self->far_history_position = self->history_size; - // Set delay histogram to zero - memset(self->delay_histogram, - 0, - sizeof(WebRtc_Word16) * self->history_size); - // Set VAD counter to zero - self->vad_counter = 0; - // Set delay memory to zero - self->last_delay = 0; - - return 0; -} - -int WebRtcAecm_DelayEstimatorProcess(void* handle, - WebRtc_UWord16* far_spectrum, - WebRtc_UWord16* near_spectrum, - int spectrum_size, - WebRtc_Word16 far_q, - WebRtc_Word16 vad_value) -{ - DelayEstimator_t* self = (DelayEstimator_t*)handle; - - WebRtc_UWord32 bxspectrum, byspectrum; - - int i; - - WebRtc_Word32 dtmp1; - - WebRtc_Word16 maxHistLvl = 0; - WebRtc_Word16 minpos = -1; - - const int kVadCountThreshold = 25; - const int kMaxHistogram = 600; - - if (self == NULL) - { - return -1; - } - - if (spectrum_size != self->spectrum_size) - { - // Data sizes don't match - return -1; - } - if (far_q > 15) - { - // If far_Q is larger than 15 we can not guarantee no wrap around - return -1; - } - - // Update far end history - UpdateFarHistory(self, far_spectrum, far_q); - // Update the far and near end means - for (i = 0; i < self->spectrum_size; i++) - { - self->far_spectrum_32[i] = (WebRtc_Word32)far_spectrum[i]; - MeanEstimator(self->far_spectrum_32[i], 6, &(self->mean_far_spectrum[i])); - - self->near_spectrum_32[i] = (WebRtc_Word32)near_spectrum[i]; - MeanEstimator(self->near_spectrum_32[i], 6, &(self->mean_near_spectrum[i])); - } - - // Shift binary spectrum history - memmove(&(self->binary_far_history[1]), - &(self->binary_far_history[0]), - (self->history_size - 1) * sizeof(WebRtc_UWord32)); - - // Get binary spectra - bxspectrum = GetBinarySpectrum(self->far_spectrum_32, self->mean_far_spectrum); - byspectrum = GetBinarySpectrum(self->near_spectrum_32, self->mean_near_spectrum); - // Insert new binary spectrum - self->binary_far_history[0] = bxspectrum; - - // Compare with delayed spectra - BitCountComparison(byspectrum, - self->binary_far_history, - self->history_size, - self->bit_counts); - - // Smooth bit count curve - for (i = 0; i < self->history_size; i++) - { - // Update sum - // |bit_counts| is constrained to [0, 32], meaning we can smooth with a - // factor up to 2^26. We use Q9. - dtmp1 = WEBRTC_SPL_LSHIFT_W32(self->bit_counts[i], 9); // Q9 - MeanEstimator(dtmp1, 9, &(self->mean_bit_counts[i])); - } - - // Find minimum position of bit count curve - minpos = WebRtcSpl_MinIndexW32(self->mean_bit_counts, self->history_size); - - // If the farend has been active sufficiently long, begin accumulating a - // histogram of the minimum positions. Search for the maximum bin to - // determine the delay. - if (vad_value == 1) - { - if (self->vad_counter >= kVadCountThreshold) - { - // Increment the histogram at the current minimum position. - if (self->delay_histogram[minpos] < kMaxHistogram) - { - self->delay_histogram[minpos] += 3; - } - -#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) - // Decrement the entire histogram. - // Select the histogram index corresponding to the maximum bin as - // the delay. - self->last_delay = 0; - for (i = 0; i < self->history_size; i++) - { - if (self->delay_histogram[i] > 0) - { - self->delay_histogram[i]--; - } - if (self->delay_histogram[i] > maxHistLvl) - { - maxHistLvl = self->delay_histogram[i]; - self->last_delay = i; - } - } -#else - self->last_delay = 0; - - for (i = 0; i < self->history_size; i++) - { - WebRtc_Word16 tempVar = self->delay_histogram[i]; - - // Decrement the entire histogram. - if (tempVar > 0) - { - tempVar--; - self->delay_histogram[i] = tempVar; - - // Select the histogram index corresponding to the maximum - // bin as the delay. - if (tempVar > maxHistLvl) - { - maxHistLvl = tempVar; - self->last_delay = i; - } - } - } -#endif - } else - { - self->vad_counter++; - } - } else - { - self->vad_counter = 0; - } - - return self->last_delay; -} - -const WebRtc_UWord16* WebRtcAecm_GetAlignedFarend(void* handle, - int far_spectrum_size, - WebRtc_Word16* far_q) -{ - DelayEstimator_t* self = (DelayEstimator_t*)handle; - int buffer_position = 0; - - if (self == NULL) - { - return NULL; - } - if (far_spectrum_size != self->spectrum_size) - { - return NULL; - } - - // Get buffer position - buffer_position = self->far_history_position - self->last_delay; - if (buffer_position < 0) - { - buffer_position += self->history_size; - } - // Get Q-domain - *far_q = self->far_q_domains[buffer_position]; - // Return far end spectrum - return (self->far_history + (buffer_position * self->spectrum_size)); - -} - -int WebRtcAecm_GetLastDelay(void* handle) -{ - DelayEstimator_t* self = (DelayEstimator_t*)handle; - - if (self == NULL) - { - return -1; - } - - // Return last calculated delay - return self->last_delay; -} diff --git a/src/modules/audio_processing/aecm/main/source/aecm_delay_estimator.h b/src/modules/audio_processing/aecm/main/source/aecm_delay_estimator.h deleted file mode 100644 index 5ce57facc..000000000 --- a/src/modules/audio_processing/aecm/main/source/aecm_delay_estimator.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -// Performs delay estimation on a block by block basis -// The return value is 0 - OK and -1 - Error, unless otherwise stated. - -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_DELAY_ESTIMATOR_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_DELAY_ESTIMATOR_H_ - -#include "typedefs.h" - -// Releases the memory allocated by WebRtcAecm_CreateDelayEstimator(...) -// Input: -// - handle : Pointer to the delay estimation instance -// -int WebRtcAecm_FreeDelayEstimator(void* handle); - -// Allocates the memory needed by the delay estimation. The memory needs to be -// initialized separately using the WebRtcAecm_InitDelayEstimator(...) function. -// -// Input: -// - handle : Instance that should be created -// - spectrum_size : Size of the spectrum used both in far end and near -// end. Used to allocate memory for spectrum specific -// buffers. -// - history_size : Size of the far end history used to estimate the -// delay from. Used to allocate memory for history -// specific buffers. -// -// Output: -// - handle : Created instance -// -int WebRtcAecm_CreateDelayEstimator(void** handle, - int spectrum_size, - int history_size); - -// Initializes the delay estimation instance created with -// WebRtcAecm_CreateDelayEstimator(...) -// Input: -// - handle : Pointer to the delay estimation instance -// -// Output: -// - handle : Initialized instance -// -int WebRtcAecm_InitDelayEstimator(void* handle); - -// Estimates and returns the delay between the far end and near end blocks. -// Input: -// - handle : Pointer to the delay estimation instance -// - far_spectrum : Pointer to the far end spectrum data -// - near_spectrum : Pointer to the near end spectrum data of the current -// block -// - spectrum_size : The size of the data arrays (same for both far and -// near end) -// - far_q : The Q-domain of the far end data -// - vad_value : The VAD decision of the current block -// -// Output: -// - handle : Updated instance -// -// Return value: -// - delay : >= 0 - Calculated delay value -// -1 - Error -// -int WebRtcAecm_DelayEstimatorProcess(void* handle, - WebRtc_UWord16* far_spectrum, - WebRtc_UWord16* near_spectrum, - int spectrum_size, - WebRtc_Word16 far_q, - WebRtc_Word16 vad_value); - -// Returns a pointer to the far end spectrum aligned to current near end -// spectrum. The function WebRtcAecm_DelayEstimatorProcess(...) should -// have been called before WebRtcAecm_GetAlignedFarend(...). Otherwise, you get -// the pointer to the previous frame. The memory is only valid until the next -// call of WebRtcAecm_DelayEstimatorProcess(...). -// -// Inputs: -// - handle : Pointer to the delay estimation instance -// -// Output: -// - far_spectrum_size : Size of far_spectrum allocated by the caller -// - far_q : The Q-domain of the aligned far end spectrum -// -// Return value: -// - far_spectrum : Pointer to the aligned far end spectrum -// NULL - Error -// -const WebRtc_UWord16* WebRtcAecm_GetAlignedFarend(void* handle, - int far_spectrum_size, - WebRtc_Word16* far_q); - -// Returns the last calculated delay updated by the function -// WebRtcAecm_DelayEstimatorProcess(...) -// -// Inputs: -// - handle : Pointer to the delay estimation instance -// -// Return value: -// - delay : >= 0 - Last calculated delay value -// -1 - Error -// -int WebRtcAecm_GetLastDelay(void* handle); - -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_DELAY_ESTIMATOR_H_ diff --git a/src/modules/audio_processing/utility/delay_estimator.c b/src/modules/audio_processing/utility/delay_estimator.c new file mode 100644 index 000000000..cb65c20d7 --- /dev/null +++ b/src/modules/audio_processing/utility/delay_estimator.c @@ -0,0 +1,550 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "delay_estimator.h" + +#include +#include +#include +#include + +#include "signal_processing_library.h" + +typedef struct { + // Pointers to mean values of spectrum and bit counts + int32_t* mean_far_spectrum; + int32_t* mean_near_spectrum; + int32_t* mean_bit_counts; + + // Arrays only used locally in DelayEstimatorProcess() but whose size + // is determined at run-time. + int32_t* bit_counts; + int32_t* far_spectrum_32; + int32_t* near_spectrum_32; + + // Binary history variables + uint32_t* binary_far_history; + + // Far end history variables + uint16_t* far_history; + int far_history_pos; + int* far_q_domains; + + // Delay histogram variables + int* delay_histogram; + int vad_counter; + + // Delay memory + int last_delay; + + // Used to enable far end alignment. If it is disabled, only delay values are + // produced + int alignment_enabled; + + // Buffer size parameters + int history_size; + int spectrum_size; + +} DelayEstimator_t; + +// Only bit |kBandFirst| through bit |kBandLast| are processed +// |kBandFirst| - |kBandLast| must be < 32 +static const int kBandFirst = 12; +static const int kBandLast = 43; + +static __inline uint32_t SetBit(uint32_t in, int32_t pos) { + uint32_t mask = WEBRTC_SPL_LSHIFT_W32(1, pos); + uint32_t out = (in | mask); + + return out; +} + +// Compares the |binary_vector| with all rows of the |binary_matrix| and counts +// per row the number of times they have the same value. +// +// Inputs: +// - binary_vector : binary "vector" stored in a long +// - binary_matrix : binary "matrix" stored as a vector of long +// - matrix_size : size of binary "matrix" +// +// Output: +// - bit_counts : "Vector" stored as a long, containing for each +// row the number of times the matrix row and the +// input vector have the same value +// +static void BitCountComparison(uint32_t binary_vector, + const uint32_t* binary_matrix, + int matrix_size, + int32_t* bit_counts) { + int n = 0; + uint32_t a = binary_vector; + register uint32_t tmp; + + // compare |binary_vector| with all rows of the |binary_matrix| + for (; n < matrix_size; n++) { + a = (binary_vector ^ binary_matrix[n]); + // Returns bit counts in tmp + tmp = a - ((a >> 1) & 033333333333) - ((a >> 2) & 011111111111); + tmp = ((tmp + (tmp >> 3)) & 030707070707); + tmp = (tmp + (tmp >> 6)); + tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077; + + bit_counts[n] = (int32_t) tmp; + } +} + +// Computes the binary spectrum by comparing the input |spectrum| with a +// |threshold_spectrum|. +// +// Inputs: +// - spectrum : Spectrum of which the binary spectrum should be +// calculated. +// - threshold_spectrum : Threshold spectrum with which the input +// spectrum is compared. +// Return: +// - out : Binary spectrum +// +static uint32_t BinarySpectrum(int32_t* spectrum, int32_t* threshold_spectrum) { + int k = kBandFirst; + uint32_t out = 0; + + for (; k <= kBandLast; k++) { + if (spectrum[k] > threshold_spectrum[k]) { + out = SetBit(out, k - kBandFirst); + } + } + + return out; +} + +// Calculates the mean recursively. +// +// Inputs: +// - new_value : new additional value +// - factor : factor for smoothing +// +// Input/Output: +// - mean_value : pointer to the mean value that should be updated +// +static void MeanEstimator(const int32_t new_value, + int factor, + int32_t* mean_value) { + int32_t mean_new = *mean_value; + int32_t diff = new_value - mean_new; + + // mean_new = mean_value + ((new_value - mean_value) >> factor); + if (diff < 0) { + diff = -WEBRTC_SPL_RSHIFT_W32(-diff, factor); + } else { + diff = WEBRTC_SPL_RSHIFT_W32(diff, factor); + } + mean_new += diff; + + *mean_value = mean_new; +} + +// Moves the pointer to the next entry and inserts |far_spectrum| and +// corresponding Q-domain in its buffer. +// +// Inputs: +// - self : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum +// - far_q : Q-domain of far end spectrum +// +static void UpdateFarHistory(DelayEstimator_t* self, + uint16_t* far_spectrum, + int far_q) { + // Get new buffer position + self->far_history_pos++; + if (self->far_history_pos >= self->history_size) { + self->far_history_pos = 0; + } + // Update Q-domain buffer + self->far_q_domains[self->far_history_pos] = far_q; + // Update far end spectrum buffer + memcpy(&(self->far_history[self->far_history_pos * self->spectrum_size]), + far_spectrum, + sizeof(uint16_t) * self->spectrum_size); +} + +int WebRtc_FreeDelayEstimator(void* handle) { + DelayEstimator_t* self = (DelayEstimator_t*) handle; + + if (self == NULL) { + return -1; + } + + if (self->mean_far_spectrum != NULL) { + free(self->mean_far_spectrum); + self->mean_far_spectrum = NULL; + } + if (self->mean_near_spectrum != NULL) { + free(self->mean_near_spectrum); + self->mean_near_spectrum = NULL; + } + if (self->mean_bit_counts != NULL) { + free(self->mean_bit_counts); + self->mean_bit_counts = NULL; + } + if (self->bit_counts != NULL) { + free(self->bit_counts); + self->bit_counts = NULL; + } + if (self->far_spectrum_32 != NULL) { + free(self->far_spectrum_32); + self->far_spectrum_32 = NULL; + } + if (self->near_spectrum_32 != NULL) { + free(self->near_spectrum_32); + self->near_spectrum_32 = NULL; + } + if (self->binary_far_history != NULL) { + free(self->binary_far_history); + self->binary_far_history = NULL; + } + if (self->far_history != NULL) { + free(self->far_history); + self->far_history = NULL; + } + if (self->far_q_domains != NULL) { + free(self->far_q_domains); + self->far_q_domains = NULL; + } + if (self->delay_histogram != NULL) { + free(self->delay_histogram); + self->delay_histogram = NULL; + } + + free(self); + + return 0; +} + +int WebRtc_CreateDelayEstimator(void** handle, + int spectrum_size, + int history_size, + int enable_alignment) { + // Check if the sub band used in the delay estimation is small enough to + // fit the binary spectra in a uint32. + assert(kBandLast - kBandFirst < 32); + + DelayEstimator_t *self = NULL; + if (spectrum_size < kBandLast) { + return -1; + } + if (history_size < 0) { + return -1; + } + if ((enable_alignment != 0) && (enable_alignment != 1)) { + return -1; + } + + self = malloc(sizeof(DelayEstimator_t)); + *handle = self; + if (self == NULL) { + return -1; + } + + self->mean_far_spectrum = NULL; + self->mean_near_spectrum = NULL; + self->mean_bit_counts = NULL; + self->bit_counts = NULL; + self->far_spectrum_32 = NULL; + self->near_spectrum_32 = NULL; + self->binary_far_history = NULL; + self->far_history = NULL; + self->far_q_domains = NULL; + self->delay_histogram = NULL; + + // Allocate memory for spectrum buffers + self->mean_far_spectrum = malloc(spectrum_size * sizeof(int32_t)); + if (self->mean_far_spectrum == NULL) { + WebRtc_FreeDelayEstimator(self); + self = NULL; + return -1; + } + self->mean_near_spectrum = malloc(spectrum_size * sizeof(int32_t)); + if (self->mean_near_spectrum == NULL) { + WebRtc_FreeDelayEstimator(self); + self = NULL; + return -1; + } + self->mean_bit_counts = malloc(history_size * sizeof(int32_t)); + if (self->mean_bit_counts == NULL) { + WebRtc_FreeDelayEstimator(self); + self = NULL; + return -1; + } + self->bit_counts = malloc(history_size * sizeof(int32_t)); + if (self->bit_counts == NULL) { + WebRtc_FreeDelayEstimator(self); + self = NULL; + return -1; + } + self->far_spectrum_32 = malloc(spectrum_size * sizeof(int32_t)); + if (self->far_spectrum_32 == NULL) { + WebRtc_FreeDelayEstimator(self); + self = NULL; + return -1; + } + self->near_spectrum_32 = malloc(spectrum_size * sizeof(int32_t)); + if (self->near_spectrum_32 == NULL) { + WebRtc_FreeDelayEstimator(self); + self = NULL; + return -1; + } + // Allocate memory for history buffers + self->binary_far_history = malloc(history_size * sizeof(uint32_t)); + if (self->binary_far_history == NULL) { + WebRtc_FreeDelayEstimator(self); + self = NULL; + return -1; + } + if (enable_alignment) { + self->far_history = malloc(spectrum_size * history_size * sizeof(uint16_t)); + if (self->far_history == NULL) { + WebRtc_FreeDelayEstimator(self); + self = NULL; + return -1; + } + self->far_q_domains = malloc(history_size * sizeof(int)); + if (self->far_q_domains == NULL) { + WebRtc_FreeDelayEstimator(self); + self = NULL; + return -1; + } + } + self->delay_histogram = malloc(history_size * sizeof(int)); + if (self->delay_histogram == NULL) { + WebRtc_FreeDelayEstimator(self); + self = NULL; + return -1; + } + + self->spectrum_size = spectrum_size; + self->history_size = history_size; + self->alignment_enabled = enable_alignment; + + return 0; +} + +int WebRtc_InitDelayEstimator(void* handle) { + DelayEstimator_t* self = (DelayEstimator_t*) handle; + + if (self == NULL) { + return -1; + } + // Set averaged far and near end spectra to zero + memset(self->mean_far_spectrum, 0, sizeof(int32_t) * self->spectrum_size); + memset(self->mean_near_spectrum, 0, sizeof(int32_t) * self->spectrum_size); + // Set averaged bit counts to zero + memset(self->mean_bit_counts, 0, sizeof(int32_t) * self->history_size); + memset(self->bit_counts, 0, sizeof(int32_t) * self->history_size); + memset(self->far_spectrum_32, 0, sizeof(int32_t) * self->spectrum_size); + memset(self->near_spectrum_32, 0, sizeof(int32_t) * self->spectrum_size); + // Set far end histories to zero + memset(self->binary_far_history, 0, sizeof(uint32_t) * self->history_size); + if (self->alignment_enabled) { + memset(self->far_history, + 0, + sizeof(uint16_t) * self->spectrum_size * self->history_size); + memset(self->far_q_domains, 0, sizeof(int) * self->history_size); + self->far_history_pos = self->history_size; + } + // Set delay histogram to zero + memset(self->delay_histogram, 0, sizeof(int) * self->history_size); + // Set VAD counter to zero + self->vad_counter = 0; + // Set delay memory to zero + self->last_delay = 0; + + return 0; +} + +int WebRtc_DelayEstimatorProcess(void* handle, + uint16_t* far_spectrum, + uint16_t* near_spectrum, + int spectrum_size, + int far_q, + int vad_value) { + DelayEstimator_t* self = (DelayEstimator_t*) handle; + + const int kVadCountThreshold = 25; + const int kMaxHistogram = 600; + + int histogram_bin = 0; + int i = 0; + int max_histogram_level = 0; + int min_position = -1; + + uint32_t binary_far_spectrum = 0; + uint32_t binary_near_spectrum = 0; + + int32_t bit_counts_tmp = 0; + + if (self == NULL) { + return -1; + } + + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match + return -1; + } + if (far_q > 15) { + // If |far_q| is larger than 15 we cannot guarantee no wrap around + return -1; + } + + if (self->alignment_enabled) { + // Update far end history + UpdateFarHistory(self, far_spectrum, far_q); + } // Update the far and near end means + for (i = 0; i < self->spectrum_size; i++) { + self->far_spectrum_32[i] = (int32_t) far_spectrum[i]; + MeanEstimator(self->far_spectrum_32[i], 6, &(self->mean_far_spectrum[i])); + + self->near_spectrum_32[i] = (int32_t) near_spectrum[i]; + MeanEstimator(self->near_spectrum_32[i], 6, &(self->mean_near_spectrum[i])); + } + + // Shift binary spectrum history + memmove(&(self->binary_far_history[1]), &(self->binary_far_history[0]), + (self->history_size - 1) * sizeof(uint32_t)); + + // Get binary spectra + binary_far_spectrum = BinarySpectrum(self->far_spectrum_32, + self->mean_far_spectrum); + binary_near_spectrum = BinarySpectrum(self->near_spectrum_32, + self->mean_near_spectrum); + // Insert new binary spectrum + self->binary_far_history[0] = binary_far_spectrum; + + // Compare with delayed spectra + BitCountComparison(binary_near_spectrum, + self->binary_far_history, + self->history_size, + self->bit_counts); + + // Smooth bit count curve + for (i = 0; i < self->history_size; i++) { + // Update sum + // |bit_counts| is constrained to [0, 32], meaning we can smooth with a + // factor up to 2^26. We use Q9. + bit_counts_tmp = WEBRTC_SPL_LSHIFT_W32(self->bit_counts[i], 9); // Q9 + MeanEstimator(bit_counts_tmp, 9, &(self->mean_bit_counts[i])); + } + + // Find minimum position of bit count curve + min_position = (int) WebRtcSpl_MinIndexW32(self->mean_bit_counts, + (int16_t) self->history_size); + + // If the far end has been active sufficiently long, begin accumulating a + // histogram of the minimum positions. Search for the maximum bin to + // determine the delay. + if (vad_value == 1) { + if (self->vad_counter >= kVadCountThreshold) { + // Increment the histogram at the current minimum position. + if (self->delay_histogram[min_position] < kMaxHistogram) { + self->delay_histogram[min_position] += 3; + } + + self->last_delay = 0; + for (i = 0; i < self->history_size; i++) { + histogram_bin = self->delay_histogram[i]; + + // Decrement the histogram bin. + if (histogram_bin > 0) { + histogram_bin--; + self->delay_histogram[i] = histogram_bin; + // Select the histogram index corresponding to the maximum bin as the + // delay. + if (histogram_bin > max_histogram_level) { + max_histogram_level = histogram_bin; + self->last_delay = i; + } + } + } + } else { + self->vad_counter++; + } + } else { + self->vad_counter = 0; + } + + return self->last_delay; +} + +const uint16_t* WebRtc_AlignedFarend(void* handle, + int far_spectrum_size, + int* far_q) { + DelayEstimator_t* self = (DelayEstimator_t*) handle; + int buffer_position = 0; + + if (self == NULL) { + return NULL; + } + if (far_spectrum_size != self->spectrum_size) { + return NULL; + } + if (self->alignment_enabled == 0) { + return NULL; + } + + // Get buffer position + buffer_position = self->far_history_pos - self->last_delay; + if (buffer_position < 0) { + buffer_position += self->history_size; + } + // Get Q-domain + *far_q = self->far_q_domains[buffer_position]; + // Return far end spectrum + return (self->far_history + (buffer_position * far_spectrum_size)); + +} + +int WebRtc_last_delay(void* handle) { + DelayEstimator_t* self = (DelayEstimator_t*) handle; + + if (self == NULL) { + return -1; + } + + return self->last_delay; +} + +int WebRtc_history_size(void* handle) { + DelayEstimator_t* self = (DelayEstimator_t*) handle; + + if (self == NULL) { + return -1; + } + + return self->history_size; +} + +int WebRtc_spectrum_size(void* handle) { + DelayEstimator_t* self = (DelayEstimator_t*) handle; + + if (self == NULL) { + return -1; + } + + return self->spectrum_size; +} + +int WebRtc_is_alignment_enabled(void* handle) { + DelayEstimator_t* self = (DelayEstimator_t*) handle; + + if (self == NULL) { + return -1; + } + + return self->alignment_enabled; +} diff --git a/src/modules/audio_processing/utility/delay_estimator.h b/src/modules/audio_processing/utility/delay_estimator.h new file mode 100644 index 000000000..190de19ab --- /dev/null +++ b/src/modules/audio_processing/utility/delay_estimator.h @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs delay estimation on a block by block basis +// The return value is 0 - OK and -1 - Error, unless otherwise stated. + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ + +#include + +// Releases the memory allocated by WebRtc_CreateDelayEstimator(...) +// Input: +// - handle : Pointer to the delay estimation instance +// +int WebRtc_FreeDelayEstimator(void* handle); + +// Allocates the memory needed by the delay estimation. The memory needs to be +// initialized separately using the WebRtc_InitDelayEstimator(...) +// function. +// +// Inputs: +// - handle : Instance that should be created +// - spectrum_size : Size of the spectrum used both in far end and +// near end. Used to allocate memory for spectrum +// specific buffers. +// - history_size : Size of the far end history used to estimate the +// delay from. Used to allocate memory for history +// specific buffers. +// - enable_alignment : With this mode set to 1, a far end history is +// created, so that the user can retrieve aligned +// far end spectra using +// WebRtc_AlignedFarend(...). Otherwise, only delay +// values are calculated. +// +// Output: +// - handle : Created instance +// +int WebRtc_CreateDelayEstimator(void** handle, + int spectrum_size, + int history_size, + int enable_alignment); + +// Initializes the delay estimation instance created with +// WebRtc_CreateDelayEstimator(...) +// Input: +// - handle : Pointer to the delay estimation instance +// +// Output: +// - handle : Initialized instance +// +int WebRtc_InitDelayEstimator(void* handle); + +// Estimates and returns the delay between the far end and near end blocks. +// Inputs: +// - handle : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum data +// - near_spectrum : Pointer to the near end spectrum data of the current +// block +// - spectrum_size : The size of the data arrays (same for both far and +// near end) +// - far_q : The Q-domain of the far end data +// - vad_value : The VAD decision of the current block +// +// Output: +// - handle : Updated instance +// +// Return value: +// - delay : >= 0 - Calculated delay value +// -1 - Error +// +int WebRtc_DelayEstimatorProcess(void* handle, + uint16_t* far_spectrum, + uint16_t* near_spectrum, + int spectrum_size, + int far_q, + int vad_value); + +// Returns a pointer to the far end spectrum aligned to current near end +// spectrum. The function WebRtc_DelayEstimatorProcess(...) should have been +// called before WebRtc_AlignedFarend(...). Otherwise, you get the pointer to +// the previous frame. The memory is only valid until the next call of +// WebRtc_DelayEstimatorProcess(...). +// +// Inputs: +// - handle : Pointer to the delay estimation instance +// - far_spectrum_size : Size of far_spectrum allocated by the caller +// +// Output: +// - far_q : The Q-domain of the aligned far end spectrum +// +// Return value: +// - far_spectrum : Pointer to the aligned far end spectrum +// NULL - Error +// +const uint16_t* WebRtc_AlignedFarend(void* handle, + int far_spectrum_size, + int* far_q); + +// Returns the last calculated delay updated by the function +// WebRtc_DelayEstimatorProcess(...) +// +// Input: +// - handle : Pointer to the delay estimation instance +// +// Return value: +// - delay : >= 0 - Last calculated delay value +// -1 - Error +// +int WebRtc_last_delay(void* handle); + +// Returns the history size used in the far end buffers to calculate the delay +// over. +// +// Input: +// - handle : Pointer to the delay estimation instance +// +// Return value: +// - history_size : > 0 - Far end history size +// -1 - Error +// +int WebRtc_history_size(void* handle); + +// Returns the fixed spectrum size used in the algorithm. +// +// Input: +// - handle : Pointer to the delay estimation instance +// +// Return value: +// - spectrum_size : > 0 - Spectrum size +// -1 - Error +// +int WebRtc_spectrum_size(void* handle); + +// Returns 1 if the far end alignment is enabled and 0 otherwise. +// +// Input: +// - handle : Pointer to the delay estimation instance +// +// Return value: +// - alignment_enabled : 1 - Enabled +// 0 - Disabled +// -1 - Error +// +int WebRtc_is_alignment_enabled(void* handle); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ diff --git a/src/modules/audio_processing/utility/delay_estimator_float.c b/src/modules/audio_processing/utility/delay_estimator_float.c new file mode 100644 index 000000000..8312c56d0 --- /dev/null +++ b/src/modules/audio_processing/utility/delay_estimator_float.c @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "delay_estimator_float.h" + +#include +#include +#include +#include +#include + +#include "delay_estimator.h" +#include "signal_processing_library.h" + +typedef struct { + // Fixed point spectra + uint16_t* far_spectrum_u16; + uint16_t* near_spectrum_u16; + + // Far end history variables + float* far_history; + int far_history_pos; + + // Fixed point delay estimator + void* fixed_handle; + +} DelayEstimatorFloat_t; + +// Moves the pointer to the next buffer entry and inserts new far end spectrum. +// Only used when alignment is enabled. +// +// Inputs: +// - self : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum +// +static void UpdateFarHistory(DelayEstimatorFloat_t* self, float* far_spectrum) { + int spectrum_size = WebRtc_spectrum_size(self->fixed_handle); + // Get new buffer position + self->far_history_pos++; + if (self->far_history_pos >= WebRtc_history_size(self->fixed_handle)) { + self->far_history_pos = 0; + } + // Update far end spectrum buffer + memcpy(&(self->far_history[self->far_history_pos * spectrum_size]), + far_spectrum, + sizeof(float) * spectrum_size); +} + +int WebRtc_FreeDelayEstimatorFloat(void* handle) { + DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle; + + if (self == NULL) { + return -1; + } + + if (self->far_history != NULL) { + free(self->far_history); + self->far_history = NULL; + } + if (self->far_spectrum_u16 != NULL) { + free(self->far_spectrum_u16); + self->far_spectrum_u16 = NULL; + } + if (self->near_spectrum_u16 != NULL) { + free(self->near_spectrum_u16); + self->near_spectrum_u16 = NULL; + } + + WebRtc_FreeDelayEstimator(self->fixed_handle); + free(self); + + return 0; +} + +int WebRtc_CreateDelayEstimatorFloat(void** handle, + int spectrum_size, + int history_size, + int enable_alignment) { + DelayEstimatorFloat_t *self = NULL; + if ((enable_alignment != 0) && (enable_alignment != 1)) { + return -1; + } + + self = malloc(sizeof(DelayEstimatorFloat_t)); + *handle = self; + if (self == NULL) { + return -1; + } + + self->far_history = NULL; + self->far_spectrum_u16 = NULL; + self->near_spectrum_u16 = NULL; + + // Create fixed point core delay estimator + if (WebRtc_CreateDelayEstimator(&self->fixed_handle, + spectrum_size, + history_size, + enable_alignment) != 0) { + WebRtc_FreeDelayEstimatorFloat(self); + self = NULL; + return -1; + } + + // Allocate memory for far history buffer + if (enable_alignment) { + self->far_history = malloc(spectrum_size * history_size * sizeof(float)); + if (self->far_history == NULL) { + WebRtc_FreeDelayEstimatorFloat(self); + self = NULL; + return -1; + } + } + // Allocate memory for fixed point spectra + self->far_spectrum_u16 = malloc(spectrum_size * sizeof(uint16_t)); + if (self->far_spectrum_u16 == NULL) { + WebRtc_FreeDelayEstimatorFloat(self); + self = NULL; + return -1; + } + self->near_spectrum_u16 = malloc(spectrum_size * sizeof(uint16_t)); + if (self->near_spectrum_u16 == NULL) { + WebRtc_FreeDelayEstimatorFloat(self); + self = NULL; + return -1; + } + + return 0; +} + +int WebRtc_InitDelayEstimatorFloat(void* handle) { + DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle; + + if (self == NULL) { + return -1; + } + + if (WebRtc_InitDelayEstimator(self->fixed_handle) != 0) { + return -1; + } + + { + int history_size = WebRtc_history_size(self->fixed_handle); + int spectrum_size = WebRtc_spectrum_size(self->fixed_handle); + if (WebRtc_is_alignment_enabled(self->fixed_handle) == 1) { + // Set far end histories to zero + memset(self->far_history, + 0, + sizeof(float) * spectrum_size * history_size); + self->far_history_pos = history_size; + } + // Set fixed point spectra to zero + memset(self->far_spectrum_u16, 0, sizeof(uint16_t) * spectrum_size); + memset(self->near_spectrum_u16, 0, sizeof(uint16_t) * spectrum_size); + } + + return 0; +} + +int WebRtc_DelayEstimatorProcessFloat(void* handle, + float* far_spectrum, + float* near_spectrum, + int spectrum_size, + int vad_value) { + DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle; + + const float kFftSize = (float) (2 * (spectrum_size - 1)); + float max_value = 0.0f; + float freq_scaling = 0; + + int far_q = 0; + int freq_scaling_log = 0; + int i = 0; + + if (self == NULL) { + return -1; + } + if (far_spectrum == NULL) { + // Empty far end spectrum + return -1; + } + if (near_spectrum == NULL) { + // Empty near end spectrum + return -1; + } + if (spectrum_size != WebRtc_spectrum_size(self->fixed_handle)) { + // Data sizes don't match + return -1; + } + + // Convert floating point spectrum to fixed point + // 1) Find largest value + // 2) Scale largest value to fit in Word16 + for (i = 0; i < spectrum_size; ++i) { + if (near_spectrum[i] > max_value) { + max_value = near_spectrum[i]; + } + } + // Find the largest possible scaling that is a multiple of two. + // With largest we mean to fit in a Word16. + // TODO(bjornv): I've taken the size of FFT into account, since there is a + // different scaling in float vs fixed point FFTs. I'm not completely sure + // this is necessary. + freq_scaling_log = 14 - (int) log2(max_value / kFftSize + 1); + freq_scaling = (float) (1 << freq_scaling_log) / kFftSize; + for (i = 0; i < spectrum_size; ++i) { + self->near_spectrum_u16[i] = (uint16_t) (near_spectrum[i] * freq_scaling); + } + + // Same for far end + max_value = 0.0f; + for (i = 0; i < spectrum_size; ++i) { + if (far_spectrum[i] > max_value) { + max_value = far_spectrum[i]; + } + } + // Find the largest possible scaling that is a multiple of two. + // With largest we mean to fit in a Word16. + freq_scaling_log = 14 - (int) log2(max_value / kFftSize + 1); + freq_scaling = (float) (1 << freq_scaling_log) / kFftSize; + for (i = 0; i < spectrum_size; ++i) { + self->far_spectrum_u16[i] = (uint16_t) (far_spectrum[i] * freq_scaling); + } + far_q = (int) freq_scaling_log; + assert(far_q < 16); // Catch too large scaling, which should never be able to + // occur. + + if (WebRtc_is_alignment_enabled(self->fixed_handle) == 1) { + // Update far end history + UpdateFarHistory(self, far_spectrum); + } + + return WebRtc_DelayEstimatorProcess(self->fixed_handle, + self->far_spectrum_u16, + self->near_spectrum_u16, + spectrum_size, + far_q, + vad_value); +} + +const float* WebRtc_AlignedFarendFloat(void* handle, int far_spectrum_size) { + DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle; + int buffer_pos = 0; + + if (self == NULL) { + return NULL; + } + if (far_spectrum_size != WebRtc_spectrum_size(self->fixed_handle)) { + return NULL; + } + if (WebRtc_is_alignment_enabled(self->fixed_handle) != 1) { + return NULL; + } + + // Get buffer position + buffer_pos = self->far_history_pos - WebRtc_last_delay(self->fixed_handle); + if (buffer_pos < 0) { + buffer_pos += WebRtc_history_size(self->fixed_handle); + } + // Return pointer to far end spectrum + return (self->far_history + (buffer_pos * far_spectrum_size)); +} + +int WebRtc_last_delay_float(void* handle) { + DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle; + + if (self == NULL) { + return -1; + } + + return WebRtc_last_delay(self->fixed_handle); +} + +int WebRtc_is_alignment_enabled_float(void* handle) { + DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle; + + if (self == NULL) { + return -1; + } + + return WebRtc_is_alignment_enabled(self->fixed_handle); +} diff --git a/src/modules/audio_processing/utility/delay_estimator_float.h b/src/modules/audio_processing/utility/delay_estimator_float.h new file mode 100644 index 000000000..308996523 --- /dev/null +++ b/src/modules/audio_processing/utility/delay_estimator_float.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs delay estimation on a block by block basis +// The return value is 0 - OK and -1 - Error, unless otherwise stated. + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_FLOAT_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_FLOAT_H_ + +// Releases the memory allocated by WebRtc_CreateDelayEstimatorFloat(...) +// Input: +// - handle : Pointer to the delay estimation instance +// +int WebRtc_FreeDelayEstimatorFloat(void* handle); + +// Allocates the memory needed by the delay estimation. The memory needs to be +// initialized separately using the WebRtc_InitDelayEstimatorFloat(...) +// function. +// +// Inputs: +// - handle : Instance that should be created +// - spectrum_size : Size of the spectrum used both in far end and +// near end. Used to allocate memory for spectrum +// specific buffers. +// - history_size : Size of the far end history used to estimate the +// delay from. Used to allocate memory for history +// specific buffers. +// - enable_alignment : With this mode set to 1, a far end history is +// created, so that the user can retrieve aligned +// far end spectra using +// WebRtc_AlignedFarendFloat(...). Otherwise, only +// delay values are calculated. +// +// Output: +// - handle : Created instance +// +int WebRtc_CreateDelayEstimatorFloat(void** handle, + int spectrum_size, + int history_size, + int enable_alignment); + +// Initializes the delay estimation instance created with +// WebRtc_CreateDelayEstimatorFloat(...) +// Input: +// - handle : Pointer to the delay estimation instance +// +// Output: +// - handle : Initialized instance +// +int WebRtc_InitDelayEstimatorFloat(void* handle); + +// Estimates and returns the delay between the far end and near end blocks. +// Inputs: +// - handle : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum data +// - near_spectrum : Pointer to the near end spectrum data of the current +// block +// - spectrum_size : The size of the data arrays (same for both far and +// near end) +// - far_q : The Q-domain of the far end data +// - vad_value : The VAD decision of the current block +// +// Output: +// - handle : Updated instance +// +// Return value: +// - delay : >= 0 - Calculated delay value +// -1 - Error +// +int WebRtc_DelayEstimatorProcessFloat(void* handle, + float* far_spectrum, + float* near_spectrum, + int spectrum_size, + int vad_value); + +// Returns a pointer to the far end spectrum aligned to current near end +// spectrum. The function WebRtc_DelayEstimatorProcessFloat(...) should +// have been called before WebRtc_AlignedFarendFloat(...). Otherwise, you get +// the pointer to the previous frame. The memory is only valid until the +// next call of WebRtc_DelayEstimatorProcessFloat(...). +// +// Inputs: +// - handle : Pointer to the delay estimation instance +// - far_spectrum_size : Size of far_spectrum allocated by the caller +// +// Output: +// +// Return value: +// - far_spectrum : Pointer to the aligned far end spectrum +// NULL - Error +// +const float* WebRtc_AlignedFarendFloat(void* handle, int far_spectrum_size); + +// Returns the last calculated delay updated by the function +// WebRtcApm_DelayEstimatorProcessFloat(...) +// +// Inputs: +// - handle : Pointer to the delay estimation instance +// +// Return value: +// - delay : >= 0 - Last calculated delay value +// -1 - Error +// +int WebRtc_last_delay_float(void* handle); + +// Returns 1 if the far end alignment is enabled and 0 otherwise. +// +// Input: +// - handle : Pointer to the delay estimation instance +// +// Return value: +// - alignment_enabled : 1 - Enabled +// 0 - Disabled +// -1 - Error +// +int WebRtc_is_alignment_enabled_float(void* handle); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_FLOAT_H_ diff --git a/src/modules/audio_processing/utility/util.gypi b/src/modules/audio_processing/utility/util.gypi index 331de4b3d..c088e9858 100644 --- a/src/modules/audio_processing/utility/util.gypi +++ b/src/modules/audio_processing/utility/util.gypi @@ -11,16 +11,23 @@ { 'target_name': 'apm_util', 'type': '<(library)', + 'dependencies': [ + '<(webrtc_root)/common_audio/common_audio.gyp:spl', + ], 'direct_dependent_settings': { 'include_dirs': [ '.', ], }, 'sources': [ - 'ring_buffer.c', - 'ring_buffer.h', + 'delay_estimator_float.c', + 'delay_estimator_float.h', + 'delay_estimator.c', + 'delay_estimator.h', 'fft4g.c', 'fft4g.h', + 'ring_buffer.c', + 'ring_buffer.h', ], }, ],