This CL includes a move of the fixed point delay estimator from aecm to apm/utility. There has also been a code change that makes it possible to enable/disable the far end alignment, so that we save complexity when used as a quality metrics.

Review URL: http://webrtc-codereview.appspot.com/135014

git-svn-id: http://webrtc.googlecode.com/svn/trunk@599 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
bjornv@google.com 2011-09-15 12:27:36 +00:00
parent 29fd9a5f30
commit b47d4b287d
10 changed files with 1160 additions and 748 deletions

View File

@ -28,8 +28,6 @@
'echo_control_mobile.c',
'aecm_core.c',
'aecm_core.h',
'aecm_delay_estimator.c',
'aecm_delay_estimator.h',
],
},
],

View File

@ -13,8 +13,8 @@
#include <assert.h>
#include <stdlib.h>
#include "aecm_delay_estimator.h"
#include "echo_control_mobile.h"
#include "delay_estimator.h"
#include "ring_buffer.h"
#include "typedefs.h"
@ -153,11 +153,13 @@ int WebRtcAecm_CreateCore(AecmCore_t **aecmInst)
return -1;
}
if (WebRtcAecm_CreateDelayEstimator(&aecm->delay_estimator, PART_LEN1, MAX_DELAY) == -1)
{
WebRtcAecm_FreeCore(aecm);
aecm = NULL;
return -1;
if (WebRtc_CreateDelayEstimator(&aecm->delay_estimator,
PART_LEN1,
MAX_DELAY,
1) == -1) {
WebRtcAecm_FreeCore(aecm);
aecm = NULL;
return -1;
}
// Init some aecm pointers. 16 and 32 byte alignment is only necessary
@ -242,9 +244,8 @@ int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq)
aecm->seed = 666;
aecm->totCount = 0;
if (WebRtcAecm_InitDelayEstimator(aecm->delay_estimator) != 0)
{
return -1;
if (WebRtc_InitDelayEstimator(aecm->delay_estimator) != 0) {
return -1;
}
// Initialize to reasonable values
@ -339,7 +340,7 @@ int WebRtcAecm_FreeCore(AecmCore_t *aecm)
WebRtcApm_FreeBuffer(aecm->nearCleanFrameBuf);
WebRtcApm_FreeBuffer(aecm->outFrameBuf);
WebRtcAecm_FreeDelayEstimator(aecm->delay_estimator);
WebRtc_FreeDelayEstimator(aecm->delay_estimator);
free(aecm);
return 0;
@ -1161,6 +1162,7 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
WebRtc_Word16 supGain;
WebRtc_Word16 zeros32, zeros16;
WebRtc_Word16 zerosDBufNoisy, zerosDBufClean, zerosXBuf;
int far_q;
WebRtc_Word16 resolutionDiff, qDomainDiff;
const int kMinPrefBand = 4;
@ -1200,10 +1202,10 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
#endif
// Transform far end signal from time domain to frequency domain.
zerosXBuf = TimeToFrequencyDomain(aecm->xBuf,
dfw,
xfa,
&xfaSum);
far_q = TimeToFrequencyDomain(aecm->xBuf,
dfw,
xfa,
&xfaSum);
// Transform noisy near end signal from time domain to frequency domain.
zerosDBufNoisy = TimeToFrequencyDomain(aecm->dBufNoisy,
@ -1211,7 +1213,7 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
dfaNoisy,
&dfaNoisySum);
aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
aecm->dfaNoisyQDomain = zerosDBufNoisy;
aecm->dfaNoisyQDomain = (WebRtc_Word16)zerosDBufNoisy;
if (nearendClean == NULL)
@ -1228,7 +1230,7 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
dfaClean,
&dfaCleanSum);
aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
aecm->dfaCleanQDomain = zerosDBufClean;
aecm->dfaCleanQDomain = (WebRtc_Word16)zerosDBufClean;
}
#ifdef ARM_WINM_LOG_
@ -1243,12 +1245,12 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
// Get the delay
// Save far-end history and estimate delay
delay = WebRtcAecm_DelayEstimatorProcess(aecm->delay_estimator,
xfa,
dfaNoisy,
PART_LEN1,
zerosXBuf,
aecm->currentVADValue);
delay = WebRtc_DelayEstimatorProcess(aecm->delay_estimator,
xfa,
dfaNoisy,
PART_LEN1,
far_q,
aecm->currentVADValue);
if (delay < 0)
{
return -1;
@ -1272,16 +1274,21 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
QueryPerformanceCounter((LARGE_INTEGER*)&start);
#endif
// Get aligned far end spectrum
far_spectrum_ptr = WebRtcAecm_GetAlignedFarend(aecm->delay_estimator,
PART_LEN1,
&zerosXBuf);
far_spectrum_ptr = WebRtc_AlignedFarend(aecm->delay_estimator,
PART_LEN1,
&far_q);
zerosXBuf = (WebRtc_Word16) far_q;
if (far_spectrum_ptr == NULL)
{
return -1;
}
// Calculate log(energy) and update energy threshold levels
WebRtcAecm_CalcEnergies(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisySum, echoEst32);
WebRtcAecm_CalcEnergies(aecm,
far_spectrum_ptr,
zerosXBuf,
dfaNoisySum,
echoEst32);
// Calculate stepsize
mu = WebRtcAecm_CalcStepSize(aecm);
@ -1923,4 +1930,3 @@ void WebRtcAecm_ResetAdaptiveChannel(AecmCore_t* aecm)
}
#endif // !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))

View File

@ -178,7 +178,7 @@ typedef struct
WebRtc_Word16 farEnergyMaxMin;
WebRtc_Word16 farEnergyVAD;
WebRtc_Word16 farEnergyMSE;
WebRtc_Word16 currentVADValue;
int currentVADValue;
WebRtc_Word16 vadUpdateCount;
WebRtc_Word16 startupState;

View File

@ -1,604 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "aecm_delay_estimator.h"
#include <assert.h>
#include <stdlib.h>
#include "signal_processing_library.h"
#include "typedefs.h"
typedef struct
{
// Pointers to mean values of spectrum and bit counts
WebRtc_Word32* mean_far_spectrum;
WebRtc_Word32* mean_near_spectrum;
WebRtc_Word32* mean_bit_counts;
// Arrays only used locally in DelayEstimatorProcess() but whose size
// is determined at run-time.
WebRtc_Word32* bit_counts;
WebRtc_Word32* far_spectrum_32;
WebRtc_Word32* near_spectrum_32;
// Binary history variables
WebRtc_UWord32* binary_far_history;
// Far end history variables
WebRtc_UWord16* far_history;
int far_history_position;
WebRtc_Word16* far_q_domains;
// Delay histogram variables
WebRtc_Word16* delay_histogram;
WebRtc_Word16 vad_counter;
// Delay memory
int last_delay;
// Buffer size parameters
int history_size;
int spectrum_size;
} DelayEstimator_t;
// Only bit |kBandFirst| through bit |kBandLast| are processed
// |kBandFirst| - |kBandLast| must be < 32
static const int kBandFirst = 12;
static const int kBandLast = 43;
static __inline WebRtc_UWord32 SetBit(WebRtc_UWord32 in,
WebRtc_Word32 pos)
{
WebRtc_UWord32 mask = WEBRTC_SPL_LSHIFT_W32(1, pos);
WebRtc_UWord32 out = (in | mask);
return out;
}
// Compares the binary vector |binary_vector| with all rows of the binary
// matrix |binary_matrix| and counts per row the number of times they have the
// same value.
// Input:
// - binary_vector : binary "vector" stored in a long
// - binary_matrix : binary "matrix" stored as a vector of long
// - matrix_size : size of binary "matrix"
// Output:
// - bit_counts : "Vector" stored as a long, containing for each
// row the number of times the matrix row and the
// input vector have the same value
//
static void BitCountComparison(const WebRtc_UWord32 binary_vector,
const WebRtc_UWord32* binary_matrix,
int matrix_size,
WebRtc_Word32* bit_counts)
{
int n = 0;
WebRtc_UWord32 a = binary_vector;
register WebRtc_UWord32 tmp;
// compare binary vector |binary_vector| with all rows of the binary matrix
// |binary_matrix|
for (; n < matrix_size; n++)
{
a = (binary_vector ^ binary_matrix[n]);
// Returns bit counts in tmp
tmp = a - ((a >> 1) & 033333333333) - ((a >> 2) & 011111111111);
tmp = ((tmp + (tmp >> 3)) & 030707070707);
tmp = (tmp + (tmp >> 6));
tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077;
bit_counts[n] = (WebRtc_Word32)tmp;
}
}
// Computes the binary spectrum by comparing the input |spectrum| with a
// |threshold_spectrum|.
//
// Input:
// - spectrum : Spectrum of which the binary spectrum should
// be calculated.
// - threshold_spectrum : Threshold spectrum with which the input
// spectrum is compared.
// Return:
// - out : Binary spectrum
//
static WebRtc_UWord32 GetBinarySpectrum(WebRtc_Word32* spectrum,
WebRtc_Word32* threshold_spectrum)
{
int k = kBandFirst;
WebRtc_UWord32 out = 0;
for (; k <= kBandLast; k++)
{
if (spectrum[k] > threshold_spectrum[k])
{
out = SetBit(out, k - kBandFirst);
}
}
return out;
}
// Calculates the mean recursively.
//
// Input:
// - new_value : new additional value
// - factor : factor for smoothing
//
// Input/Output:
// - mean_value : pointer to the mean value that should be updated
//
static void MeanEstimator(const WebRtc_Word32 new_value,
int factor,
WebRtc_Word32* mean_value)
{
WebRtc_Word32 mean_new = *mean_value;
WebRtc_Word32 diff = new_value - mean_new;
// mean_new = mean_value + ((new_value - mean_value) >> factor);
if (diff < 0)
{
diff = -WEBRTC_SPL_RSHIFT_W32(-diff, factor);
}
else
{
diff = WEBRTC_SPL_RSHIFT_W32(diff, factor);
}
mean_new += diff;
*mean_value = mean_new;
}
// Moves the pointer to the next entry and inserts new far end spectrum and
// corresponding Q-domain in its buffer.
//
// Input:
// - handle : Pointer to the delay estimation instance
// - far_spectrum : Pointer to the far end spectrum
// - far_q : Q-domain of far end spectrum
//
static void UpdateFarHistory(DelayEstimator_t* self,
WebRtc_UWord16* far_spectrum,
WebRtc_Word16 far_q)
{
// Get new buffer position
self->far_history_position++;
if (self->far_history_position >= self->history_size)
{
self->far_history_position = 0;
}
// Update Q-domain buffer
self->far_q_domains[self->far_history_position] = far_q;
// Update far end spectrum buffer
memcpy(&(self->far_history[self->far_history_position * self->spectrum_size]),
far_spectrum,
sizeof(WebRtc_UWord16) * self->spectrum_size);
}
int WebRtcAecm_FreeDelayEstimator(void* handle)
{
DelayEstimator_t* self = (DelayEstimator_t*)handle;
if (self == NULL)
{
return -1;
}
if (self->mean_far_spectrum != NULL)
{
free(self->mean_far_spectrum);
self->mean_far_spectrum = NULL;
}
if (self->mean_near_spectrum != NULL)
{
free(self->mean_near_spectrum);
self->mean_near_spectrum = NULL;
}
if (self->mean_bit_counts != NULL)
{
free(self->mean_bit_counts);
self->mean_bit_counts = NULL;
}
if (self->bit_counts != NULL)
{
free(self->bit_counts);
self->bit_counts = NULL;
}
if (self->far_spectrum_32 != NULL)
{
free(self->far_spectrum_32);
self->far_spectrum_32 = NULL;
}
if (self->near_spectrum_32 != NULL)
{
free(self->near_spectrum_32);
self->near_spectrum_32 = NULL;
}
if (self->far_history != NULL)
{
free(self->far_history);
self->far_history = NULL;
}
if (self->binary_far_history != NULL)
{
free(self->binary_far_history);
self->binary_far_history = NULL;
}
if (self->far_q_domains != NULL)
{
free(self->far_q_domains);
self->far_q_domains = NULL;
}
if (self->delay_histogram != NULL)
{
free(self->delay_histogram);
self->delay_histogram = NULL;
}
free(self);
return 0;
}
int WebRtcAecm_CreateDelayEstimator(void** handle,
int spectrum_size,
int history_size)
{
DelayEstimator_t *self = NULL;
// Check if the sub band used in the delay estimation is small enough to
// fit in a Word32.
assert(kBandLast - kBandFirst < 32);
if (spectrum_size < kBandLast)
{
return -1;
}
if (history_size < 0)
{
return -1;
}
self = malloc(sizeof(DelayEstimator_t));
*handle = self;
if (self == NULL)
{
return -1;
}
self->mean_far_spectrum = NULL;
self->mean_near_spectrum = NULL;
self->bit_counts = NULL;
self->far_spectrum_32 = NULL;
self->near_spectrum_32 = NULL;
self->far_history = NULL;
self->mean_bit_counts = NULL;
self->binary_far_history = NULL;
self->far_q_domains = NULL;
self->delay_histogram = NULL;
// Allocate memory for spectrum buffers
self->mean_far_spectrum = malloc(spectrum_size * sizeof(WebRtc_Word32));
if (self->mean_far_spectrum == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->mean_near_spectrum = malloc(spectrum_size * sizeof(WebRtc_Word32));
if (self->mean_near_spectrum == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->mean_bit_counts = malloc(history_size * sizeof(WebRtc_Word32));
if (self->mean_bit_counts == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->bit_counts = malloc(history_size * sizeof(WebRtc_Word32));
if (self->bit_counts == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->far_spectrum_32 = malloc(spectrum_size * sizeof(WebRtc_Word32));
if (self->far_spectrum_32 == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->near_spectrum_32 = malloc(spectrum_size * sizeof(WebRtc_Word32));
if (self->near_spectrum_32 == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
// Allocate memory for history buffers
self->far_history = malloc(spectrum_size * history_size *
sizeof(WebRtc_UWord16));
if (self->far_history == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->binary_far_history = malloc(history_size * sizeof(WebRtc_UWord32));
if (self->binary_far_history == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->far_q_domains = malloc(history_size * sizeof(WebRtc_Word16));
if (self->far_q_domains == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->delay_histogram = malloc(history_size * sizeof(WebRtc_Word16));
if (self->delay_histogram == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->spectrum_size = spectrum_size;
self->history_size = history_size;
return 0;
}
int WebRtcAecm_InitDelayEstimator(void* handle)
{
DelayEstimator_t* self = (DelayEstimator_t*)handle;
if (self == NULL)
{
return -1;
}
// Set averaged far and near end spectra to zero
memset(self->mean_far_spectrum,
0,
sizeof(WebRtc_Word32) * self->spectrum_size);
memset(self->mean_near_spectrum,
0,
sizeof(WebRtc_Word32) * self->spectrum_size);
// Set averaged bit counts to zero
memset(self->mean_bit_counts,
0,
sizeof(WebRtc_Word32) * self->history_size);
memset(self->bit_counts,
0,
sizeof(WebRtc_Word32) * self->history_size);
memset(self->far_spectrum_32,
0,
sizeof(WebRtc_Word32) * self->spectrum_size);
memset(self->near_spectrum_32,
0,
sizeof(WebRtc_Word32) * self->spectrum_size);
// Set far end histories to zero
memset(self->binary_far_history,
0,
sizeof(WebRtc_UWord32) * self->history_size);
memset(self->far_history,
0,
sizeof(WebRtc_UWord16) * self->spectrum_size *
self->history_size);
memset(self->far_q_domains,
0,
sizeof(WebRtc_Word16) * self->history_size);
self->far_history_position = self->history_size;
// Set delay histogram to zero
memset(self->delay_histogram,
0,
sizeof(WebRtc_Word16) * self->history_size);
// Set VAD counter to zero
self->vad_counter = 0;
// Set delay memory to zero
self->last_delay = 0;
return 0;
}
int WebRtcAecm_DelayEstimatorProcess(void* handle,
WebRtc_UWord16* far_spectrum,
WebRtc_UWord16* near_spectrum,
int spectrum_size,
WebRtc_Word16 far_q,
WebRtc_Word16 vad_value)
{
DelayEstimator_t* self = (DelayEstimator_t*)handle;
WebRtc_UWord32 bxspectrum, byspectrum;
int i;
WebRtc_Word32 dtmp1;
WebRtc_Word16 maxHistLvl = 0;
WebRtc_Word16 minpos = -1;
const int kVadCountThreshold = 25;
const int kMaxHistogram = 600;
if (self == NULL)
{
return -1;
}
if (spectrum_size != self->spectrum_size)
{
// Data sizes don't match
return -1;
}
if (far_q > 15)
{
// If far_Q is larger than 15 we can not guarantee no wrap around
return -1;
}
// Update far end history
UpdateFarHistory(self, far_spectrum, far_q);
// Update the far and near end means
for (i = 0; i < self->spectrum_size; i++)
{
self->far_spectrum_32[i] = (WebRtc_Word32)far_spectrum[i];
MeanEstimator(self->far_spectrum_32[i], 6, &(self->mean_far_spectrum[i]));
self->near_spectrum_32[i] = (WebRtc_Word32)near_spectrum[i];
MeanEstimator(self->near_spectrum_32[i], 6, &(self->mean_near_spectrum[i]));
}
// Shift binary spectrum history
memmove(&(self->binary_far_history[1]),
&(self->binary_far_history[0]),
(self->history_size - 1) * sizeof(WebRtc_UWord32));
// Get binary spectra
bxspectrum = GetBinarySpectrum(self->far_spectrum_32, self->mean_far_spectrum);
byspectrum = GetBinarySpectrum(self->near_spectrum_32, self->mean_near_spectrum);
// Insert new binary spectrum
self->binary_far_history[0] = bxspectrum;
// Compare with delayed spectra
BitCountComparison(byspectrum,
self->binary_far_history,
self->history_size,
self->bit_counts);
// Smooth bit count curve
for (i = 0; i < self->history_size; i++)
{
// Update sum
// |bit_counts| is constrained to [0, 32], meaning we can smooth with a
// factor up to 2^26. We use Q9.
dtmp1 = WEBRTC_SPL_LSHIFT_W32(self->bit_counts[i], 9); // Q9
MeanEstimator(dtmp1, 9, &(self->mean_bit_counts[i]));
}
// Find minimum position of bit count curve
minpos = WebRtcSpl_MinIndexW32(self->mean_bit_counts, self->history_size);
// If the farend has been active sufficiently long, begin accumulating a
// histogram of the minimum positions. Search for the maximum bin to
// determine the delay.
if (vad_value == 1)
{
if (self->vad_counter >= kVadCountThreshold)
{
// Increment the histogram at the current minimum position.
if (self->delay_histogram[minpos] < kMaxHistogram)
{
self->delay_histogram[minpos] += 3;
}
#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT)
// Decrement the entire histogram.
// Select the histogram index corresponding to the maximum bin as
// the delay.
self->last_delay = 0;
for (i = 0; i < self->history_size; i++)
{
if (self->delay_histogram[i] > 0)
{
self->delay_histogram[i]--;
}
if (self->delay_histogram[i] > maxHistLvl)
{
maxHistLvl = self->delay_histogram[i];
self->last_delay = i;
}
}
#else
self->last_delay = 0;
for (i = 0; i < self->history_size; i++)
{
WebRtc_Word16 tempVar = self->delay_histogram[i];
// Decrement the entire histogram.
if (tempVar > 0)
{
tempVar--;
self->delay_histogram[i] = tempVar;
// Select the histogram index corresponding to the maximum
// bin as the delay.
if (tempVar > maxHistLvl)
{
maxHistLvl = tempVar;
self->last_delay = i;
}
}
}
#endif
} else
{
self->vad_counter++;
}
} else
{
self->vad_counter = 0;
}
return self->last_delay;
}
const WebRtc_UWord16* WebRtcAecm_GetAlignedFarend(void* handle,
int far_spectrum_size,
WebRtc_Word16* far_q)
{
DelayEstimator_t* self = (DelayEstimator_t*)handle;
int buffer_position = 0;
if (self == NULL)
{
return NULL;
}
if (far_spectrum_size != self->spectrum_size)
{
return NULL;
}
// Get buffer position
buffer_position = self->far_history_position - self->last_delay;
if (buffer_position < 0)
{
buffer_position += self->history_size;
}
// Get Q-domain
*far_q = self->far_q_domains[buffer_position];
// Return far end spectrum
return (self->far_history + (buffer_position * self->spectrum_size));
}
int WebRtcAecm_GetLastDelay(void* handle)
{
DelayEstimator_t* self = (DelayEstimator_t*)handle;
if (self == NULL)
{
return -1;
}
// Return last calculated delay
return self->last_delay;
}

View File

@ -1,112 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Performs delay estimation on a block by block basis
// The return value is 0 - OK and -1 - Error, unless otherwise stated.
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_DELAY_ESTIMATOR_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_DELAY_ESTIMATOR_H_
#include "typedefs.h"
// Releases the memory allocated by WebRtcAecm_CreateDelayEstimator(...)
// Input:
// - handle : Pointer to the delay estimation instance
//
int WebRtcAecm_FreeDelayEstimator(void* handle);
// Allocates the memory needed by the delay estimation. The memory needs to be
// initialized separately using the WebRtcAecm_InitDelayEstimator(...) function.
//
// Input:
// - handle : Instance that should be created
// - spectrum_size : Size of the spectrum used both in far end and near
// end. Used to allocate memory for spectrum specific
// buffers.
// - history_size : Size of the far end history used to estimate the
// delay from. Used to allocate memory for history
// specific buffers.
//
// Output:
// - handle : Created instance
//
int WebRtcAecm_CreateDelayEstimator(void** handle,
int spectrum_size,
int history_size);
// Initializes the delay estimation instance created with
// WebRtcAecm_CreateDelayEstimator(...)
// Input:
// - handle : Pointer to the delay estimation instance
//
// Output:
// - handle : Initialized instance
//
int WebRtcAecm_InitDelayEstimator(void* handle);
// Estimates and returns the delay between the far end and near end blocks.
// Input:
// - handle : Pointer to the delay estimation instance
// - far_spectrum : Pointer to the far end spectrum data
// - near_spectrum : Pointer to the near end spectrum data of the current
// block
// - spectrum_size : The size of the data arrays (same for both far and
// near end)
// - far_q : The Q-domain of the far end data
// - vad_value : The VAD decision of the current block
//
// Output:
// - handle : Updated instance
//
// Return value:
// - delay : >= 0 - Calculated delay value
// -1 - Error
//
int WebRtcAecm_DelayEstimatorProcess(void* handle,
WebRtc_UWord16* far_spectrum,
WebRtc_UWord16* near_spectrum,
int spectrum_size,
WebRtc_Word16 far_q,
WebRtc_Word16 vad_value);
// Returns a pointer to the far end spectrum aligned to current near end
// spectrum. The function WebRtcAecm_DelayEstimatorProcess(...) should
// have been called before WebRtcAecm_GetAlignedFarend(...). Otherwise, you get
// the pointer to the previous frame. The memory is only valid until the next
// call of WebRtcAecm_DelayEstimatorProcess(...).
//
// Inputs:
// - handle : Pointer to the delay estimation instance
//
// Output:
// - far_spectrum_size : Size of far_spectrum allocated by the caller
// - far_q : The Q-domain of the aligned far end spectrum
//
// Return value:
// - far_spectrum : Pointer to the aligned far end spectrum
// NULL - Error
//
const WebRtc_UWord16* WebRtcAecm_GetAlignedFarend(void* handle,
int far_spectrum_size,
WebRtc_Word16* far_q);
// Returns the last calculated delay updated by the function
// WebRtcAecm_DelayEstimatorProcess(...)
//
// Inputs:
// - handle : Pointer to the delay estimation instance
//
// Return value:
// - delay : >= 0 - Last calculated delay value
// -1 - Error
//
int WebRtcAecm_GetLastDelay(void* handle);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_DELAY_ESTIMATOR_H_

View File

@ -0,0 +1,550 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "delay_estimator.h"
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "signal_processing_library.h"
typedef struct {
// Pointers to mean values of spectrum and bit counts
int32_t* mean_far_spectrum;
int32_t* mean_near_spectrum;
int32_t* mean_bit_counts;
// Arrays only used locally in DelayEstimatorProcess() but whose size
// is determined at run-time.
int32_t* bit_counts;
int32_t* far_spectrum_32;
int32_t* near_spectrum_32;
// Binary history variables
uint32_t* binary_far_history;
// Far end history variables
uint16_t* far_history;
int far_history_pos;
int* far_q_domains;
// Delay histogram variables
int* delay_histogram;
int vad_counter;
// Delay memory
int last_delay;
// Used to enable far end alignment. If it is disabled, only delay values are
// produced
int alignment_enabled;
// Buffer size parameters
int history_size;
int spectrum_size;
} DelayEstimator_t;
// Only bit |kBandFirst| through bit |kBandLast| are processed
// |kBandFirst| - |kBandLast| must be < 32
static const int kBandFirst = 12;
static const int kBandLast = 43;
static __inline uint32_t SetBit(uint32_t in, int32_t pos) {
uint32_t mask = WEBRTC_SPL_LSHIFT_W32(1, pos);
uint32_t out = (in | mask);
return out;
}
// Compares the |binary_vector| with all rows of the |binary_matrix| and counts
// per row the number of times they have the same value.
//
// Inputs:
// - binary_vector : binary "vector" stored in a long
// - binary_matrix : binary "matrix" stored as a vector of long
// - matrix_size : size of binary "matrix"
//
// Output:
// - bit_counts : "Vector" stored as a long, containing for each
// row the number of times the matrix row and the
// input vector have the same value
//
static void BitCountComparison(uint32_t binary_vector,
const uint32_t* binary_matrix,
int matrix_size,
int32_t* bit_counts) {
int n = 0;
uint32_t a = binary_vector;
register uint32_t tmp;
// compare |binary_vector| with all rows of the |binary_matrix|
for (; n < matrix_size; n++) {
a = (binary_vector ^ binary_matrix[n]);
// Returns bit counts in tmp
tmp = a - ((a >> 1) & 033333333333) - ((a >> 2) & 011111111111);
tmp = ((tmp + (tmp >> 3)) & 030707070707);
tmp = (tmp + (tmp >> 6));
tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077;
bit_counts[n] = (int32_t) tmp;
}
}
// Computes the binary spectrum by comparing the input |spectrum| with a
// |threshold_spectrum|.
//
// Inputs:
// - spectrum : Spectrum of which the binary spectrum should be
// calculated.
// - threshold_spectrum : Threshold spectrum with which the input
// spectrum is compared.
// Return:
// - out : Binary spectrum
//
static uint32_t BinarySpectrum(int32_t* spectrum, int32_t* threshold_spectrum) {
int k = kBandFirst;
uint32_t out = 0;
for (; k <= kBandLast; k++) {
if (spectrum[k] > threshold_spectrum[k]) {
out = SetBit(out, k - kBandFirst);
}
}
return out;
}
// Calculates the mean recursively.
//
// Inputs:
// - new_value : new additional value
// - factor : factor for smoothing
//
// Input/Output:
// - mean_value : pointer to the mean value that should be updated
//
static void MeanEstimator(const int32_t new_value,
int factor,
int32_t* mean_value) {
int32_t mean_new = *mean_value;
int32_t diff = new_value - mean_new;
// mean_new = mean_value + ((new_value - mean_value) >> factor);
if (diff < 0) {
diff = -WEBRTC_SPL_RSHIFT_W32(-diff, factor);
} else {
diff = WEBRTC_SPL_RSHIFT_W32(diff, factor);
}
mean_new += diff;
*mean_value = mean_new;
}
// Moves the pointer to the next entry and inserts |far_spectrum| and
// corresponding Q-domain in its buffer.
//
// Inputs:
// - self : Pointer to the delay estimation instance
// - far_spectrum : Pointer to the far end spectrum
// - far_q : Q-domain of far end spectrum
//
static void UpdateFarHistory(DelayEstimator_t* self,
uint16_t* far_spectrum,
int far_q) {
// Get new buffer position
self->far_history_pos++;
if (self->far_history_pos >= self->history_size) {
self->far_history_pos = 0;
}
// Update Q-domain buffer
self->far_q_domains[self->far_history_pos] = far_q;
// Update far end spectrum buffer
memcpy(&(self->far_history[self->far_history_pos * self->spectrum_size]),
far_spectrum,
sizeof(uint16_t) * self->spectrum_size);
}
int WebRtc_FreeDelayEstimator(void* handle) {
DelayEstimator_t* self = (DelayEstimator_t*) handle;
if (self == NULL) {
return -1;
}
if (self->mean_far_spectrum != NULL) {
free(self->mean_far_spectrum);
self->mean_far_spectrum = NULL;
}
if (self->mean_near_spectrum != NULL) {
free(self->mean_near_spectrum);
self->mean_near_spectrum = NULL;
}
if (self->mean_bit_counts != NULL) {
free(self->mean_bit_counts);
self->mean_bit_counts = NULL;
}
if (self->bit_counts != NULL) {
free(self->bit_counts);
self->bit_counts = NULL;
}
if (self->far_spectrum_32 != NULL) {
free(self->far_spectrum_32);
self->far_spectrum_32 = NULL;
}
if (self->near_spectrum_32 != NULL) {
free(self->near_spectrum_32);
self->near_spectrum_32 = NULL;
}
if (self->binary_far_history != NULL) {
free(self->binary_far_history);
self->binary_far_history = NULL;
}
if (self->far_history != NULL) {
free(self->far_history);
self->far_history = NULL;
}
if (self->far_q_domains != NULL) {
free(self->far_q_domains);
self->far_q_domains = NULL;
}
if (self->delay_histogram != NULL) {
free(self->delay_histogram);
self->delay_histogram = NULL;
}
free(self);
return 0;
}
int WebRtc_CreateDelayEstimator(void** handle,
int spectrum_size,
int history_size,
int enable_alignment) {
// Check if the sub band used in the delay estimation is small enough to
// fit the binary spectra in a uint32.
assert(kBandLast - kBandFirst < 32);
DelayEstimator_t *self = NULL;
if (spectrum_size < kBandLast) {
return -1;
}
if (history_size < 0) {
return -1;
}
if ((enable_alignment != 0) && (enable_alignment != 1)) {
return -1;
}
self = malloc(sizeof(DelayEstimator_t));
*handle = self;
if (self == NULL) {
return -1;
}
self->mean_far_spectrum = NULL;
self->mean_near_spectrum = NULL;
self->mean_bit_counts = NULL;
self->bit_counts = NULL;
self->far_spectrum_32 = NULL;
self->near_spectrum_32 = NULL;
self->binary_far_history = NULL;
self->far_history = NULL;
self->far_q_domains = NULL;
self->delay_histogram = NULL;
// Allocate memory for spectrum buffers
self->mean_far_spectrum = malloc(spectrum_size * sizeof(int32_t));
if (self->mean_far_spectrum == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->mean_near_spectrum = malloc(spectrum_size * sizeof(int32_t));
if (self->mean_near_spectrum == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->mean_bit_counts = malloc(history_size * sizeof(int32_t));
if (self->mean_bit_counts == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->bit_counts = malloc(history_size * sizeof(int32_t));
if (self->bit_counts == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->far_spectrum_32 = malloc(spectrum_size * sizeof(int32_t));
if (self->far_spectrum_32 == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->near_spectrum_32 = malloc(spectrum_size * sizeof(int32_t));
if (self->near_spectrum_32 == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
// Allocate memory for history buffers
self->binary_far_history = malloc(history_size * sizeof(uint32_t));
if (self->binary_far_history == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
if (enable_alignment) {
self->far_history = malloc(spectrum_size * history_size * sizeof(uint16_t));
if (self->far_history == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->far_q_domains = malloc(history_size * sizeof(int));
if (self->far_q_domains == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
}
self->delay_histogram = malloc(history_size * sizeof(int));
if (self->delay_histogram == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->spectrum_size = spectrum_size;
self->history_size = history_size;
self->alignment_enabled = enable_alignment;
return 0;
}
int WebRtc_InitDelayEstimator(void* handle) {
DelayEstimator_t* self = (DelayEstimator_t*) handle;
if (self == NULL) {
return -1;
}
// Set averaged far and near end spectra to zero
memset(self->mean_far_spectrum, 0, sizeof(int32_t) * self->spectrum_size);
memset(self->mean_near_spectrum, 0, sizeof(int32_t) * self->spectrum_size);
// Set averaged bit counts to zero
memset(self->mean_bit_counts, 0, sizeof(int32_t) * self->history_size);
memset(self->bit_counts, 0, sizeof(int32_t) * self->history_size);
memset(self->far_spectrum_32, 0, sizeof(int32_t) * self->spectrum_size);
memset(self->near_spectrum_32, 0, sizeof(int32_t) * self->spectrum_size);
// Set far end histories to zero
memset(self->binary_far_history, 0, sizeof(uint32_t) * self->history_size);
if (self->alignment_enabled) {
memset(self->far_history,
0,
sizeof(uint16_t) * self->spectrum_size * self->history_size);
memset(self->far_q_domains, 0, sizeof(int) * self->history_size);
self->far_history_pos = self->history_size;
}
// Set delay histogram to zero
memset(self->delay_histogram, 0, sizeof(int) * self->history_size);
// Set VAD counter to zero
self->vad_counter = 0;
// Set delay memory to zero
self->last_delay = 0;
return 0;
}
int WebRtc_DelayEstimatorProcess(void* handle,
uint16_t* far_spectrum,
uint16_t* near_spectrum,
int spectrum_size,
int far_q,
int vad_value) {
DelayEstimator_t* self = (DelayEstimator_t*) handle;
const int kVadCountThreshold = 25;
const int kMaxHistogram = 600;
int histogram_bin = 0;
int i = 0;
int max_histogram_level = 0;
int min_position = -1;
uint32_t binary_far_spectrum = 0;
uint32_t binary_near_spectrum = 0;
int32_t bit_counts_tmp = 0;
if (self == NULL) {
return -1;
}
if (spectrum_size != self->spectrum_size) {
// Data sizes don't match
return -1;
}
if (far_q > 15) {
// If |far_q| is larger than 15 we cannot guarantee no wrap around
return -1;
}
if (self->alignment_enabled) {
// Update far end history
UpdateFarHistory(self, far_spectrum, far_q);
} // Update the far and near end means
for (i = 0; i < self->spectrum_size; i++) {
self->far_spectrum_32[i] = (int32_t) far_spectrum[i];
MeanEstimator(self->far_spectrum_32[i], 6, &(self->mean_far_spectrum[i]));
self->near_spectrum_32[i] = (int32_t) near_spectrum[i];
MeanEstimator(self->near_spectrum_32[i], 6, &(self->mean_near_spectrum[i]));
}
// Shift binary spectrum history
memmove(&(self->binary_far_history[1]), &(self->binary_far_history[0]),
(self->history_size - 1) * sizeof(uint32_t));
// Get binary spectra
binary_far_spectrum = BinarySpectrum(self->far_spectrum_32,
self->mean_far_spectrum);
binary_near_spectrum = BinarySpectrum(self->near_spectrum_32,
self->mean_near_spectrum);
// Insert new binary spectrum
self->binary_far_history[0] = binary_far_spectrum;
// Compare with delayed spectra
BitCountComparison(binary_near_spectrum,
self->binary_far_history,
self->history_size,
self->bit_counts);
// Smooth bit count curve
for (i = 0; i < self->history_size; i++) {
// Update sum
// |bit_counts| is constrained to [0, 32], meaning we can smooth with a
// factor up to 2^26. We use Q9.
bit_counts_tmp = WEBRTC_SPL_LSHIFT_W32(self->bit_counts[i], 9); // Q9
MeanEstimator(bit_counts_tmp, 9, &(self->mean_bit_counts[i]));
}
// Find minimum position of bit count curve
min_position = (int) WebRtcSpl_MinIndexW32(self->mean_bit_counts,
(int16_t) self->history_size);
// If the far end has been active sufficiently long, begin accumulating a
// histogram of the minimum positions. Search for the maximum bin to
// determine the delay.
if (vad_value == 1) {
if (self->vad_counter >= kVadCountThreshold) {
// Increment the histogram at the current minimum position.
if (self->delay_histogram[min_position] < kMaxHistogram) {
self->delay_histogram[min_position] += 3;
}
self->last_delay = 0;
for (i = 0; i < self->history_size; i++) {
histogram_bin = self->delay_histogram[i];
// Decrement the histogram bin.
if (histogram_bin > 0) {
histogram_bin--;
self->delay_histogram[i] = histogram_bin;
// Select the histogram index corresponding to the maximum bin as the
// delay.
if (histogram_bin > max_histogram_level) {
max_histogram_level = histogram_bin;
self->last_delay = i;
}
}
}
} else {
self->vad_counter++;
}
} else {
self->vad_counter = 0;
}
return self->last_delay;
}
const uint16_t* WebRtc_AlignedFarend(void* handle,
int far_spectrum_size,
int* far_q) {
DelayEstimator_t* self = (DelayEstimator_t*) handle;
int buffer_position = 0;
if (self == NULL) {
return NULL;
}
if (far_spectrum_size != self->spectrum_size) {
return NULL;
}
if (self->alignment_enabled == 0) {
return NULL;
}
// Get buffer position
buffer_position = self->far_history_pos - self->last_delay;
if (buffer_position < 0) {
buffer_position += self->history_size;
}
// Get Q-domain
*far_q = self->far_q_domains[buffer_position];
// Return far end spectrum
return (self->far_history + (buffer_position * far_spectrum_size));
}
int WebRtc_last_delay(void* handle) {
DelayEstimator_t* self = (DelayEstimator_t*) handle;
if (self == NULL) {
return -1;
}
return self->last_delay;
}
int WebRtc_history_size(void* handle) {
DelayEstimator_t* self = (DelayEstimator_t*) handle;
if (self == NULL) {
return -1;
}
return self->history_size;
}
int WebRtc_spectrum_size(void* handle) {
DelayEstimator_t* self = (DelayEstimator_t*) handle;
if (self == NULL) {
return -1;
}
return self->spectrum_size;
}
int WebRtc_is_alignment_enabled(void* handle) {
DelayEstimator_t* self = (DelayEstimator_t*) handle;
if (self == NULL) {
return -1;
}
return self->alignment_enabled;
}

View File

@ -0,0 +1,154 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Performs delay estimation on a block by block basis
// The return value is 0 - OK and -1 - Error, unless otherwise stated.
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_
#include <stdint.h>
// Releases the memory allocated by WebRtc_CreateDelayEstimator(...)
// Input:
// - handle : Pointer to the delay estimation instance
//
int WebRtc_FreeDelayEstimator(void* handle);
// Allocates the memory needed by the delay estimation. The memory needs to be
// initialized separately using the WebRtc_InitDelayEstimator(...)
// function.
//
// Inputs:
// - handle : Instance that should be created
// - spectrum_size : Size of the spectrum used both in far end and
// near end. Used to allocate memory for spectrum
// specific buffers.
// - history_size : Size of the far end history used to estimate the
// delay from. Used to allocate memory for history
// specific buffers.
// - enable_alignment : With this mode set to 1, a far end history is
// created, so that the user can retrieve aligned
// far end spectra using
// WebRtc_AlignedFarend(...). Otherwise, only delay
// values are calculated.
//
// Output:
// - handle : Created instance
//
int WebRtc_CreateDelayEstimator(void** handle,
int spectrum_size,
int history_size,
int enable_alignment);
// Initializes the delay estimation instance created with
// WebRtc_CreateDelayEstimator(...)
// Input:
// - handle : Pointer to the delay estimation instance
//
// Output:
// - handle : Initialized instance
//
int WebRtc_InitDelayEstimator(void* handle);
// Estimates and returns the delay between the far end and near end blocks.
// Inputs:
// - handle : Pointer to the delay estimation instance
// - far_spectrum : Pointer to the far end spectrum data
// - near_spectrum : Pointer to the near end spectrum data of the current
// block
// - spectrum_size : The size of the data arrays (same for both far and
// near end)
// - far_q : The Q-domain of the far end data
// - vad_value : The VAD decision of the current block
//
// Output:
// - handle : Updated instance
//
// Return value:
// - delay : >= 0 - Calculated delay value
// -1 - Error
//
int WebRtc_DelayEstimatorProcess(void* handle,
uint16_t* far_spectrum,
uint16_t* near_spectrum,
int spectrum_size,
int far_q,
int vad_value);
// Returns a pointer to the far end spectrum aligned to current near end
// spectrum. The function WebRtc_DelayEstimatorProcess(...) should have been
// called before WebRtc_AlignedFarend(...). Otherwise, you get the pointer to
// the previous frame. The memory is only valid until the next call of
// WebRtc_DelayEstimatorProcess(...).
//
// Inputs:
// - handle : Pointer to the delay estimation instance
// - far_spectrum_size : Size of far_spectrum allocated by the caller
//
// Output:
// - far_q : The Q-domain of the aligned far end spectrum
//
// Return value:
// - far_spectrum : Pointer to the aligned far end spectrum
// NULL - Error
//
const uint16_t* WebRtc_AlignedFarend(void* handle,
int far_spectrum_size,
int* far_q);
// Returns the last calculated delay updated by the function
// WebRtc_DelayEstimatorProcess(...)
//
// Input:
// - handle : Pointer to the delay estimation instance
//
// Return value:
// - delay : >= 0 - Last calculated delay value
// -1 - Error
//
int WebRtc_last_delay(void* handle);
// Returns the history size used in the far end buffers to calculate the delay
// over.
//
// Input:
// - handle : Pointer to the delay estimation instance
//
// Return value:
// - history_size : > 0 - Far end history size
// -1 - Error
//
int WebRtc_history_size(void* handle);
// Returns the fixed spectrum size used in the algorithm.
//
// Input:
// - handle : Pointer to the delay estimation instance
//
// Return value:
// - spectrum_size : > 0 - Spectrum size
// -1 - Error
//
int WebRtc_spectrum_size(void* handle);
// Returns 1 if the far end alignment is enabled and 0 otherwise.
//
// Input:
// - handle : Pointer to the delay estimation instance
//
// Return value:
// - alignment_enabled : 1 - Enabled
// 0 - Disabled
// -1 - Error
//
int WebRtc_is_alignment_enabled(void* handle);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_

View File

@ -0,0 +1,288 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "delay_estimator_float.h"
#include <assert.h>
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "delay_estimator.h"
#include "signal_processing_library.h"
typedef struct {
// Fixed point spectra
uint16_t* far_spectrum_u16;
uint16_t* near_spectrum_u16;
// Far end history variables
float* far_history;
int far_history_pos;
// Fixed point delay estimator
void* fixed_handle;
} DelayEstimatorFloat_t;
// Moves the pointer to the next buffer entry and inserts new far end spectrum.
// Only used when alignment is enabled.
//
// Inputs:
// - self : Pointer to the delay estimation instance
// - far_spectrum : Pointer to the far end spectrum
//
static void UpdateFarHistory(DelayEstimatorFloat_t* self, float* far_spectrum) {
int spectrum_size = WebRtc_spectrum_size(self->fixed_handle);
// Get new buffer position
self->far_history_pos++;
if (self->far_history_pos >= WebRtc_history_size(self->fixed_handle)) {
self->far_history_pos = 0;
}
// Update far end spectrum buffer
memcpy(&(self->far_history[self->far_history_pos * spectrum_size]),
far_spectrum,
sizeof(float) * spectrum_size);
}
int WebRtc_FreeDelayEstimatorFloat(void* handle) {
DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
if (self == NULL) {
return -1;
}
if (self->far_history != NULL) {
free(self->far_history);
self->far_history = NULL;
}
if (self->far_spectrum_u16 != NULL) {
free(self->far_spectrum_u16);
self->far_spectrum_u16 = NULL;
}
if (self->near_spectrum_u16 != NULL) {
free(self->near_spectrum_u16);
self->near_spectrum_u16 = NULL;
}
WebRtc_FreeDelayEstimator(self->fixed_handle);
free(self);
return 0;
}
int WebRtc_CreateDelayEstimatorFloat(void** handle,
int spectrum_size,
int history_size,
int enable_alignment) {
DelayEstimatorFloat_t *self = NULL;
if ((enable_alignment != 0) && (enable_alignment != 1)) {
return -1;
}
self = malloc(sizeof(DelayEstimatorFloat_t));
*handle = self;
if (self == NULL) {
return -1;
}
self->far_history = NULL;
self->far_spectrum_u16 = NULL;
self->near_spectrum_u16 = NULL;
// Create fixed point core delay estimator
if (WebRtc_CreateDelayEstimator(&self->fixed_handle,
spectrum_size,
history_size,
enable_alignment) != 0) {
WebRtc_FreeDelayEstimatorFloat(self);
self = NULL;
return -1;
}
// Allocate memory for far history buffer
if (enable_alignment) {
self->far_history = malloc(spectrum_size * history_size * sizeof(float));
if (self->far_history == NULL) {
WebRtc_FreeDelayEstimatorFloat(self);
self = NULL;
return -1;
}
}
// Allocate memory for fixed point spectra
self->far_spectrum_u16 = malloc(spectrum_size * sizeof(uint16_t));
if (self->far_spectrum_u16 == NULL) {
WebRtc_FreeDelayEstimatorFloat(self);
self = NULL;
return -1;
}
self->near_spectrum_u16 = malloc(spectrum_size * sizeof(uint16_t));
if (self->near_spectrum_u16 == NULL) {
WebRtc_FreeDelayEstimatorFloat(self);
self = NULL;
return -1;
}
return 0;
}
int WebRtc_InitDelayEstimatorFloat(void* handle) {
DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
if (self == NULL) {
return -1;
}
if (WebRtc_InitDelayEstimator(self->fixed_handle) != 0) {
return -1;
}
{
int history_size = WebRtc_history_size(self->fixed_handle);
int spectrum_size = WebRtc_spectrum_size(self->fixed_handle);
if (WebRtc_is_alignment_enabled(self->fixed_handle) == 1) {
// Set far end histories to zero
memset(self->far_history,
0,
sizeof(float) * spectrum_size * history_size);
self->far_history_pos = history_size;
}
// Set fixed point spectra to zero
memset(self->far_spectrum_u16, 0, sizeof(uint16_t) * spectrum_size);
memset(self->near_spectrum_u16, 0, sizeof(uint16_t) * spectrum_size);
}
return 0;
}
int WebRtc_DelayEstimatorProcessFloat(void* handle,
float* far_spectrum,
float* near_spectrum,
int spectrum_size,
int vad_value) {
DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
const float kFftSize = (float) (2 * (spectrum_size - 1));
float max_value = 0.0f;
float freq_scaling = 0;
int far_q = 0;
int freq_scaling_log = 0;
int i = 0;
if (self == NULL) {
return -1;
}
if (far_spectrum == NULL) {
// Empty far end spectrum
return -1;
}
if (near_spectrum == NULL) {
// Empty near end spectrum
return -1;
}
if (spectrum_size != WebRtc_spectrum_size(self->fixed_handle)) {
// Data sizes don't match
return -1;
}
// Convert floating point spectrum to fixed point
// 1) Find largest value
// 2) Scale largest value to fit in Word16
for (i = 0; i < spectrum_size; ++i) {
if (near_spectrum[i] > max_value) {
max_value = near_spectrum[i];
}
}
// Find the largest possible scaling that is a multiple of two.
// With largest we mean to fit in a Word16.
// TODO(bjornv): I've taken the size of FFT into account, since there is a
// different scaling in float vs fixed point FFTs. I'm not completely sure
// this is necessary.
freq_scaling_log = 14 - (int) log2(max_value / kFftSize + 1);
freq_scaling = (float) (1 << freq_scaling_log) / kFftSize;
for (i = 0; i < spectrum_size; ++i) {
self->near_spectrum_u16[i] = (uint16_t) (near_spectrum[i] * freq_scaling);
}
// Same for far end
max_value = 0.0f;
for (i = 0; i < spectrum_size; ++i) {
if (far_spectrum[i] > max_value) {
max_value = far_spectrum[i];
}
}
// Find the largest possible scaling that is a multiple of two.
// With largest we mean to fit in a Word16.
freq_scaling_log = 14 - (int) log2(max_value / kFftSize + 1);
freq_scaling = (float) (1 << freq_scaling_log) / kFftSize;
for (i = 0; i < spectrum_size; ++i) {
self->far_spectrum_u16[i] = (uint16_t) (far_spectrum[i] * freq_scaling);
}
far_q = (int) freq_scaling_log;
assert(far_q < 16); // Catch too large scaling, which should never be able to
// occur.
if (WebRtc_is_alignment_enabled(self->fixed_handle) == 1) {
// Update far end history
UpdateFarHistory(self, far_spectrum);
}
return WebRtc_DelayEstimatorProcess(self->fixed_handle,
self->far_spectrum_u16,
self->near_spectrum_u16,
spectrum_size,
far_q,
vad_value);
}
const float* WebRtc_AlignedFarendFloat(void* handle, int far_spectrum_size) {
DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
int buffer_pos = 0;
if (self == NULL) {
return NULL;
}
if (far_spectrum_size != WebRtc_spectrum_size(self->fixed_handle)) {
return NULL;
}
if (WebRtc_is_alignment_enabled(self->fixed_handle) != 1) {
return NULL;
}
// Get buffer position
buffer_pos = self->far_history_pos - WebRtc_last_delay(self->fixed_handle);
if (buffer_pos < 0) {
buffer_pos += WebRtc_history_size(self->fixed_handle);
}
// Return pointer to far end spectrum
return (self->far_history + (buffer_pos * far_spectrum_size));
}
int WebRtc_last_delay_float(void* handle) {
DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
if (self == NULL) {
return -1;
}
return WebRtc_last_delay(self->fixed_handle);
}
int WebRtc_is_alignment_enabled_float(void* handle) {
DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
if (self == NULL) {
return -1;
}
return WebRtc_is_alignment_enabled(self->fixed_handle);
}

View File

@ -0,0 +1,125 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Performs delay estimation on a block by block basis
// The return value is 0 - OK and -1 - Error, unless otherwise stated.
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_FLOAT_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_FLOAT_H_
// Releases the memory allocated by WebRtc_CreateDelayEstimatorFloat(...)
// Input:
// - handle : Pointer to the delay estimation instance
//
int WebRtc_FreeDelayEstimatorFloat(void* handle);
// Allocates the memory needed by the delay estimation. The memory needs to be
// initialized separately using the WebRtc_InitDelayEstimatorFloat(...)
// function.
//
// Inputs:
// - handle : Instance that should be created
// - spectrum_size : Size of the spectrum used both in far end and
// near end. Used to allocate memory for spectrum
// specific buffers.
// - history_size : Size of the far end history used to estimate the
// delay from. Used to allocate memory for history
// specific buffers.
// - enable_alignment : With this mode set to 1, a far end history is
// created, so that the user can retrieve aligned
// far end spectra using
// WebRtc_AlignedFarendFloat(...). Otherwise, only
// delay values are calculated.
//
// Output:
// - handle : Created instance
//
int WebRtc_CreateDelayEstimatorFloat(void** handle,
int spectrum_size,
int history_size,
int enable_alignment);
// Initializes the delay estimation instance created with
// WebRtc_CreateDelayEstimatorFloat(...)
// Input:
// - handle : Pointer to the delay estimation instance
//
// Output:
// - handle : Initialized instance
//
int WebRtc_InitDelayEstimatorFloat(void* handle);
// Estimates and returns the delay between the far end and near end blocks.
// Inputs:
// - handle : Pointer to the delay estimation instance
// - far_spectrum : Pointer to the far end spectrum data
// - near_spectrum : Pointer to the near end spectrum data of the current
// block
// - spectrum_size : The size of the data arrays (same for both far and
// near end)
// - far_q : The Q-domain of the far end data
// - vad_value : The VAD decision of the current block
//
// Output:
// - handle : Updated instance
//
// Return value:
// - delay : >= 0 - Calculated delay value
// -1 - Error
//
int WebRtc_DelayEstimatorProcessFloat(void* handle,
float* far_spectrum,
float* near_spectrum,
int spectrum_size,
int vad_value);
// Returns a pointer to the far end spectrum aligned to current near end
// spectrum. The function WebRtc_DelayEstimatorProcessFloat(...) should
// have been called before WebRtc_AlignedFarendFloat(...). Otherwise, you get
// the pointer to the previous frame. The memory is only valid until the
// next call of WebRtc_DelayEstimatorProcessFloat(...).
//
// Inputs:
// - handle : Pointer to the delay estimation instance
// - far_spectrum_size : Size of far_spectrum allocated by the caller
//
// Output:
//
// Return value:
// - far_spectrum : Pointer to the aligned far end spectrum
// NULL - Error
//
const float* WebRtc_AlignedFarendFloat(void* handle, int far_spectrum_size);
// Returns the last calculated delay updated by the function
// WebRtcApm_DelayEstimatorProcessFloat(...)
//
// Inputs:
// - handle : Pointer to the delay estimation instance
//
// Return value:
// - delay : >= 0 - Last calculated delay value
// -1 - Error
//
int WebRtc_last_delay_float(void* handle);
// Returns 1 if the far end alignment is enabled and 0 otherwise.
//
// Input:
// - handle : Pointer to the delay estimation instance
//
// Return value:
// - alignment_enabled : 1 - Enabled
// 0 - Disabled
// -1 - Error
//
int WebRtc_is_alignment_enabled_float(void* handle);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_FLOAT_H_

View File

@ -11,16 +11,23 @@
{
'target_name': 'apm_util',
'type': '<(library)',
'dependencies': [
'<(webrtc_root)/common_audio/common_audio.gyp:spl',
],
'direct_dependent_settings': {
'include_dirs': [
'.',
],
},
'sources': [
'ring_buffer.c',
'ring_buffer.h',
'delay_estimator_float.c',
'delay_estimator_float.h',
'delay_estimator.c',
'delay_estimator.h',
'fft4g.c',
'fft4g.h',
'ring_buffer.c',
'ring_buffer.h',
],
},
],