Refactor vad_filterbank: Local functions made static.
Review URL: http://webrtc-codereview.appspot.com/342002 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1357 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
@@ -19,26 +19,32 @@
|
||||
#include "typedefs.h"
|
||||
#include "vad_defines.h"
|
||||
|
||||
// Constant 160*log10(2) in Q9
|
||||
// Constant 160*log10(2) in Q9.
|
||||
static const int16_t kLogConst = 24660;
|
||||
|
||||
// Coefficients used by WebRtcVad_HpOutput, Q14
|
||||
// Coefficients used by HighPassFilter, Q14.
|
||||
static const int16_t kHpZeroCoefs[3] = { 6631, -13262, 6631 };
|
||||
static const int16_t kHpPoleCoefs[3] = { 16384, -7756, 5620 };
|
||||
|
||||
// Allpass filter coefficients, upper and lower, in Q15
|
||||
// Allpass filter coefficients, upper and lower, in Q15.
|
||||
// Upper: 0.64, Lower: 0.17
|
||||
static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 };
|
||||
|
||||
// Adjustment for division with two in WebRtcVad_SplitFilter
|
||||
// Adjustment for division with two in SplitFilter.
|
||||
static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 };
|
||||
|
||||
void WebRtcVad_HpOutput(int16_t* in_vector,
|
||||
int in_vector_length,
|
||||
int16_t* filter_state,
|
||||
int16_t* out_vector) {
|
||||
// High pass filtering, with a cut-off frequency at 80 Hz, if the |in_vector| is
|
||||
// sampled at 500 Hz.
|
||||
//
|
||||
// - in_vector [i] : Input audio data sampled at 500 Hz.
|
||||
// - in_vector_length [i] : Length of input and output data.
|
||||
// - filter_state [i/o] : State of the filter.
|
||||
// - out_vector [o] : Output audio data in the frequency interval
|
||||
// 80 - 250 Hz.
|
||||
static void HighPassFilter(const int16_t* in_vector, int in_vector_length,
|
||||
int16_t* filter_state, int16_t* out_vector) {
|
||||
int i;
|
||||
int16_t* in_ptr = in_vector;
|
||||
const int16_t* in_ptr = in_vector;
|
||||
int16_t* out_ptr = out_vector;
|
||||
int32_t tmp32 = 0;
|
||||
|
||||
@@ -70,11 +76,19 @@ void WebRtcVad_HpOutput(int16_t* in_vector,
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcVad_Allpass(int16_t* in_vector,
|
||||
int16_t filter_coefficients,
|
||||
int vector_length,
|
||||
int16_t* filter_state,
|
||||
int16_t* out_vector) {
|
||||
// All pass filtering of |in_vector|, used before splitting the signal into two
|
||||
// frequency bands (low pass vs high pass).
|
||||
// Note that |in_vector| and |out_vector| can NOT correspond to the same
|
||||
// address.
|
||||
//
|
||||
// - in_vector [i] : Input audio signal given in Q0.
|
||||
// - vector_length [i] : Length of input and output data.
|
||||
// - filter_coefficient [i] : Given in Q15.
|
||||
// - filter_state [i/o] : State of the filter given in Q(-1).
|
||||
// - out_vector [o] : Output audio signal given in Q(-1).
|
||||
static void AllPassFilter(const int16_t* in_vector, int vector_length,
|
||||
int16_t filter_coefficient, int16_t* filter_state,
|
||||
int16_t* out_vector) {
|
||||
// The filter can only cause overflow (in the w16 output variable)
|
||||
// if more than 4 consecutive input numbers are of maximum value and
|
||||
// has the the same sign as the impulse responses first taps.
|
||||
@@ -87,11 +101,11 @@ void WebRtcVad_Allpass(int16_t* in_vector,
|
||||
int32_t state32 = WEBRTC_SPL_LSHIFT_W32((int32_t) (*filter_state), 16); // Q31
|
||||
|
||||
for (i = 0; i < vector_length; i++) {
|
||||
tmp32 = state32 + WEBRTC_SPL_MUL_16_16(filter_coefficients, (*in_vector));
|
||||
tmp32 = state32 + WEBRTC_SPL_MUL_16_16(filter_coefficient, (*in_vector));
|
||||
tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32, 16);
|
||||
*out_vector++ = tmp16;
|
||||
in32 = WEBRTC_SPL_LSHIFT_W32(((int32_t) (*in_vector)), 14);
|
||||
state32 = in32 - WEBRTC_SPL_MUL_16_16(filter_coefficients, tmp16);
|
||||
state32 = in32 - WEBRTC_SPL_MUL_16_16(filter_coefficient, tmp16);
|
||||
state32 = WEBRTC_SPL_LSHIFT_W32(state32, 1);
|
||||
in_vector += 2;
|
||||
}
|
||||
@@ -99,23 +113,32 @@ void WebRtcVad_Allpass(int16_t* in_vector,
|
||||
*filter_state = (int16_t) WEBRTC_SPL_RSHIFT_W32(state32, 16);
|
||||
}
|
||||
|
||||
void WebRtcVad_SplitFilter(int16_t* in_vector,
|
||||
int in_vector_length,
|
||||
int16_t* upper_state,
|
||||
int16_t* lower_state,
|
||||
int16_t* out_vector_hp,
|
||||
int16_t* out_vector_lp) {
|
||||
// Splits |in_vector| into |out_vector_hp| and |out_vector_lp| corresponding to
|
||||
// an upper (high pass) part and a lower (low pass) part respectively.
|
||||
//
|
||||
// - in_vector [i] : Input audio data to be split into two frequency
|
||||
// bands.
|
||||
// - in_vector_length [i] : Length of |in_vector|.
|
||||
// - upper_state [i/o] : State of the upper filter, given in Q(-1).
|
||||
// - lower_state [i/o] : State of the lower filter, given in Q(-1).
|
||||
// - out_vector_hp [o] : Output audio data of the upper half of the
|
||||
// spectrum. The length is |in_vector_length| / 2.
|
||||
// - out_vector_lp [o] : Output audio data of the lower half of the
|
||||
// spectrum. The length is |in_vector_length| / 2.
|
||||
static void SplitFilter(const int16_t* in_vector, int in_vector_length,
|
||||
int16_t* upper_state, int16_t* lower_state,
|
||||
int16_t* out_vector_hp, int16_t* out_vector_lp) {
|
||||
int16_t tmp_out;
|
||||
int i;
|
||||
int half_length = WEBRTC_SPL_RSHIFT_W16(in_vector_length, 1);
|
||||
|
||||
// All-pass filtering upper branch
|
||||
WebRtcVad_Allpass(&in_vector[0], kAllPassCoefsQ15[0], half_length,
|
||||
upper_state, out_vector_hp);
|
||||
AllPassFilter(&in_vector[0], half_length, kAllPassCoefsQ15[0], upper_state,
|
||||
out_vector_hp);
|
||||
|
||||
// All-pass filtering lower branch
|
||||
WebRtcVad_Allpass(&in_vector[1], kAllPassCoefsQ15[1], half_length,
|
||||
lower_state, out_vector_lp);
|
||||
AllPassFilter(&in_vector[1], half_length, kAllPassCoefsQ15[1], lower_state,
|
||||
out_vector_lp);
|
||||
|
||||
// Make LP and HP signals
|
||||
for (i = 0; i < half_length; i++) {
|
||||
@@ -125,113 +148,24 @@ void WebRtcVad_SplitFilter(int16_t* in_vector,
|
||||
}
|
||||
}
|
||||
|
||||
int16_t WebRtcVad_get_features(VadInstT* inst,
|
||||
int16_t* in_vector,
|
||||
int frame_size,
|
||||
int16_t* out_vector) {
|
||||
int16_t power = 0;
|
||||
// We expect |frame_size| to be 80, 160 or 240 samples, which corresponds to
|
||||
// 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will
|
||||
// have at most 120 samples after the first split and at most 60 samples after
|
||||
// the second split.
|
||||
int16_t hp_120[120], lp_120[120];
|
||||
int16_t hp_60[60], lp_60[60];
|
||||
// Initialize variables for the first SplitFilter().
|
||||
int length = frame_size;
|
||||
int frequency_band = 0;
|
||||
int16_t* in_ptr = in_vector;
|
||||
int16_t* hp_out_ptr = hp_120;
|
||||
int16_t* lp_out_ptr = lp_120;
|
||||
|
||||
// Split at 2000 Hz and downsample
|
||||
WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
|
||||
&inst->lower_state[frequency_band], hp_out_ptr,
|
||||
lp_out_ptr);
|
||||
|
||||
// Split at 3000 Hz and downsample
|
||||
frequency_band = 1;
|
||||
in_ptr = hp_120;
|
||||
hp_out_ptr = hp_60;
|
||||
lp_out_ptr = lp_60;
|
||||
length = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
|
||||
|
||||
WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
|
||||
&inst->lower_state[frequency_band], hp_out_ptr,
|
||||
lp_out_ptr);
|
||||
|
||||
// Energy in 3000 Hz - 4000 Hz
|
||||
length = WEBRTC_SPL_RSHIFT_W16(length, 1);
|
||||
WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[5], &power,
|
||||
&out_vector[5]);
|
||||
|
||||
// Energy in 2000 Hz - 3000 Hz
|
||||
WebRtcVad_LogOfEnergy(lp_60, length, kOffsetVector[4], &power,
|
||||
&out_vector[4]);
|
||||
|
||||
// Split at 1000 Hz and downsample
|
||||
frequency_band = 2;
|
||||
in_ptr = lp_120;
|
||||
hp_out_ptr = hp_60;
|
||||
lp_out_ptr = lp_60;
|
||||
length = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
|
||||
WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
|
||||
&inst->lower_state[frequency_band], hp_out_ptr,
|
||||
lp_out_ptr);
|
||||
|
||||
// Energy in 1000 Hz - 2000 Hz
|
||||
length = WEBRTC_SPL_RSHIFT_W16(length, 1);
|
||||
WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[3], &power,
|
||||
&out_vector[3]);
|
||||
|
||||
// Split at 500 Hz
|
||||
frequency_band = 3;
|
||||
in_ptr = lp_60;
|
||||
hp_out_ptr = hp_120;
|
||||
lp_out_ptr = lp_120;
|
||||
|
||||
WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
|
||||
&inst->lower_state[frequency_band], hp_out_ptr,
|
||||
lp_out_ptr);
|
||||
|
||||
// Energy in 500 Hz - 1000 Hz
|
||||
length = WEBRTC_SPL_RSHIFT_W16(length, 1);
|
||||
WebRtcVad_LogOfEnergy(hp_120, length, kOffsetVector[2], &power,
|
||||
&out_vector[2]);
|
||||
|
||||
// Split at 250 Hz
|
||||
frequency_band = 4;
|
||||
in_ptr = lp_120;
|
||||
hp_out_ptr = hp_60;
|
||||
lp_out_ptr = lp_60;
|
||||
|
||||
WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
|
||||
&inst->lower_state[frequency_band], hp_out_ptr,
|
||||
lp_out_ptr);
|
||||
|
||||
// Energy in 250 Hz - 500 Hz
|
||||
length = WEBRTC_SPL_RSHIFT_W16(length, 1);
|
||||
WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[1], &power,
|
||||
&out_vector[1]);
|
||||
|
||||
// Remove DC and LFs
|
||||
WebRtcVad_HpOutput(lp_60, length, inst->hp_filter_state, hp_120);
|
||||
|
||||
// Power in 80 Hz - 250 Hz
|
||||
WebRtcVad_LogOfEnergy(hp_120, length, kOffsetVector[0], &power,
|
||||
&out_vector[0]);
|
||||
|
||||
return power;
|
||||
}
|
||||
|
||||
void WebRtcVad_LogOfEnergy(int16_t* vector,
|
||||
int vector_length,
|
||||
int16_t offset,
|
||||
int16_t* power,
|
||||
int16_t* log_energy) {
|
||||
// Calculates the energy in dB of |in_vector|, and also updates an overall
|
||||
// |power| if necessary.
|
||||
//
|
||||
// - in_vector [i] : Input audio data for energy calculation.
|
||||
// - vector_length [i] : Length of input data.
|
||||
// - offset [i] : Offset value added to |log_energy|.
|
||||
// - power [i/o] : Signal power updated with the energy from
|
||||
// |in_vector|.
|
||||
// NOTE: |power| is only updated if
|
||||
// |power| < MIN_ENERGY.
|
||||
// - log_energy [o] : 10 * log10("energy of |in_vector|") given in Q4.
|
||||
static void LogOfEnergy(const int16_t* in_vector, int vector_length,
|
||||
int16_t offset, int16_t* power, int16_t* log_energy) {
|
||||
int shfts = 0, shfts2 = 0;
|
||||
int16_t energy_s16 = 0;
|
||||
int16_t zeros = 0, frac = 0, log2 = 0;
|
||||
int32_t energy = WebRtcSpl_Energy(vector, vector_length, &shfts);
|
||||
int32_t energy = WebRtcSpl_Energy((int16_t*) in_vector, vector_length,
|
||||
&shfts);
|
||||
|
||||
if (energy > 0) {
|
||||
|
||||
@@ -276,3 +210,88 @@ void WebRtcVad_LogOfEnergy(int16_t* vector,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int16_t WebRtcVad_get_features(VadInstT* inst, const int16_t* in_vector,
|
||||
int frame_size, int16_t* out_vector) {
|
||||
int16_t power = 0;
|
||||
// We expect |frame_size| to be 80, 160 or 240 samples, which corresponds to
|
||||
// 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will
|
||||
// have at most 120 samples after the first split and at most 60 samples after
|
||||
// the second split.
|
||||
int16_t hp_120[120], lp_120[120];
|
||||
int16_t hp_60[60], lp_60[60];
|
||||
// Initialize variables for the first SplitFilter().
|
||||
int length = frame_size;
|
||||
int frequency_band = 0;
|
||||
const int16_t* in_ptr = in_vector;
|
||||
int16_t* hp_out_ptr = hp_120;
|
||||
int16_t* lp_out_ptr = lp_120;
|
||||
|
||||
// Split at 2000 Hz and downsample
|
||||
SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
|
||||
&inst->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
|
||||
|
||||
// Split at 3000 Hz and downsample
|
||||
frequency_band = 1;
|
||||
in_ptr = hp_120;
|
||||
hp_out_ptr = hp_60;
|
||||
lp_out_ptr = lp_60;
|
||||
length = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
|
||||
|
||||
SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
|
||||
&inst->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
|
||||
|
||||
// Energy in 3000 Hz - 4000 Hz
|
||||
length = WEBRTC_SPL_RSHIFT_W16(length, 1);
|
||||
LogOfEnergy(hp_60, length, kOffsetVector[5], &power, &out_vector[5]);
|
||||
|
||||
// Energy in 2000 Hz - 3000 Hz
|
||||
LogOfEnergy(lp_60, length, kOffsetVector[4], &power, &out_vector[4]);
|
||||
|
||||
// Split at 1000 Hz and downsample
|
||||
frequency_band = 2;
|
||||
in_ptr = lp_120;
|
||||
hp_out_ptr = hp_60;
|
||||
lp_out_ptr = lp_60;
|
||||
length = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
|
||||
SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
|
||||
&inst->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
|
||||
|
||||
// Energy in 1000 Hz - 2000 Hz
|
||||
length = WEBRTC_SPL_RSHIFT_W16(length, 1);
|
||||
LogOfEnergy(hp_60, length, kOffsetVector[3], &power, &out_vector[3]);
|
||||
|
||||
// Split at 500 Hz
|
||||
frequency_band = 3;
|
||||
in_ptr = lp_60;
|
||||
hp_out_ptr = hp_120;
|
||||
lp_out_ptr = lp_120;
|
||||
|
||||
SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
|
||||
&inst->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
|
||||
|
||||
// Energy in 500 Hz - 1000 Hz
|
||||
length = WEBRTC_SPL_RSHIFT_W16(length, 1);
|
||||
LogOfEnergy(hp_120, length, kOffsetVector[2], &power, &out_vector[2]);
|
||||
|
||||
// Split at 250 Hz
|
||||
frequency_band = 4;
|
||||
in_ptr = lp_120;
|
||||
hp_out_ptr = hp_60;
|
||||
lp_out_ptr = lp_60;
|
||||
|
||||
SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
|
||||
&inst->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
|
||||
|
||||
// Energy in 250 Hz - 500 Hz
|
||||
length = WEBRTC_SPL_RSHIFT_W16(length, 1);
|
||||
LogOfEnergy(hp_60, length, kOffsetVector[1], &power, &out_vector[1]);
|
||||
|
||||
// Remove DC and LFs
|
||||
HighPassFilter(lp_60, length, inst->hp_filter_state, hp_120);
|
||||
|
||||
// Power in 80 Hz - 250 Hz
|
||||
LogOfEnergy(hp_120, length, kOffsetVector[0], &power, &out_vector[0]);
|
||||
|
||||
return power;
|
||||
}
|
||||
|
||||
@@ -19,89 +19,19 @@
|
||||
#include "typedefs.h"
|
||||
#include "vad_core.h"
|
||||
|
||||
// TODO(bjornv): Move local functions to vad_filterbank.c and make static.
|
||||
/****************************************************************************
|
||||
* WebRtcVad_HpOutput(...)
|
||||
*
|
||||
* This function removes DC from the lowest frequency band
|
||||
*
|
||||
* Input:
|
||||
* - in_vector : Samples in the frequency interval 0 - 250 Hz
|
||||
* - in_vector_length : Length of input and output vector
|
||||
* - filter_state : Current state of the filter
|
||||
*
|
||||
* Output:
|
||||
* - out_vector : Samples in the frequency interval 80 - 250 Hz
|
||||
* - filter_state : Updated state of the filter
|
||||
*
|
||||
*/
|
||||
void WebRtcVad_HpOutput(int16_t* in_vector,
|
||||
int in_vector_length,
|
||||
int16_t* filter_state,
|
||||
int16_t* out_vector);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcVad_Allpass(...)
|
||||
*
|
||||
* This function is used when before splitting a speech file into
|
||||
* different frequency bands
|
||||
*
|
||||
* Note! Do NOT let the arrays in_vector and out_vector correspond to the same address.
|
||||
*
|
||||
* Input:
|
||||
* - in_vector : (Q0)
|
||||
* - filter_coefficients : (Q15)
|
||||
* - vector_length : Length of input and output vector
|
||||
* - filter_state : Current state of the filter (Q(-1))
|
||||
*
|
||||
* Output:
|
||||
* - out_vector : Output speech signal (Q(-1))
|
||||
* - filter_state : Updated state of the filter (Q(-1))
|
||||
*
|
||||
*/
|
||||
void WebRtcVad_Allpass(int16_t* in_vector,
|
||||
int16_t filter_coefficients,
|
||||
int vector_length,
|
||||
int16_t* filter_state,
|
||||
int16_t* outw16);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcVad_SplitFilter(...)
|
||||
*
|
||||
* This function is used when before splitting a speech file into
|
||||
* different frequency bands
|
||||
*
|
||||
* Input:
|
||||
* - in_vector : Input signal to be split into two frequency bands.
|
||||
* - upper_state : Current state of the upper filter
|
||||
* - lower_state : Current state of the lower filter
|
||||
* - in_vector_length : Length of input vector
|
||||
*
|
||||
* Output:
|
||||
* - out_vector_hp : Upper half of the spectrum
|
||||
* - out_vector_lp : Lower half of the spectrum
|
||||
* - upper_state : Updated state of the upper filter
|
||||
* - lower_state : Updated state of the lower filter
|
||||
*
|
||||
*/
|
||||
void WebRtcVad_SplitFilter(int16_t* in_vector,
|
||||
int in_vector_length,
|
||||
int16_t* upper_state,
|
||||
int16_t* lower_state,
|
||||
int16_t* out_vector_hp,
|
||||
int16_t* out_vector_lp);
|
||||
|
||||
// TODO(bjornv): Rename to CalcFeatures() or similar. Update at the same time
|
||||
// comments and parameter order.
|
||||
/****************************************************************************
|
||||
* WebRtcVad_get_features(...)
|
||||
*
|
||||
* This function is used to get the logarithm of the power of each of the
|
||||
* This function is used to get the logarithm of the power of each of the
|
||||
* 6 frequency bands used by the VAD:
|
||||
* 80 Hz - 250 Hz
|
||||
* 250 Hz - 500 Hz
|
||||
* 500 Hz - 1000 Hz
|
||||
* 1000 Hz - 2000 Hz
|
||||
* 2000 Hz - 3000 Hz
|
||||
* 3000 Hz - 4000 Hz
|
||||
* 3000 Hz - 4000 Hz
|
||||
*
|
||||
* Input:
|
||||
* - inst : Pointer to VAD instance
|
||||
@@ -110,35 +40,13 @@ void WebRtcVad_SplitFilter(int16_t* in_vector,
|
||||
*
|
||||
* Output:
|
||||
* - out_vector : 10*log10(power in each freq. band), Q4
|
||||
*
|
||||
*
|
||||
* Return: total power in the signal (NOTE! This value is not exact since it
|
||||
* is only used in a comparison.
|
||||
*/
|
||||
int16_t WebRtcVad_get_features(VadInstT* inst,
|
||||
int16_t* in_vector,
|
||||
const int16_t* in_vector,
|
||||
int frame_size,
|
||||
int16_t* out_vector);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcVad_LogOfEnergy(...)
|
||||
*
|
||||
* This function is used to get the logarithm of the power of one frequency band.
|
||||
*
|
||||
* Input:
|
||||
* - vector : Input speech samples for one frequency band
|
||||
* - offset : Offset value for the current frequency band
|
||||
* - vector_length : Length of input vector
|
||||
*
|
||||
* Output:
|
||||
* - log_energy : 10*log10(energy);
|
||||
* - power : Update total power in speech frame. NOTE! This value
|
||||
* is not exact since it is only used in a comparison.
|
||||
*
|
||||
*/
|
||||
void WebRtcVad_LogOfEnergy(int16_t* vector,
|
||||
int vector_length,
|
||||
int16_t offset,
|
||||
int16_t* power,
|
||||
int16_t* log_energy);
|
||||
|
||||
#endif // WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
|
||||
|
||||
Reference in New Issue
Block a user