Refactoring vad_filterbank
Made internal function LogOfEnergy() more efficient. Includes - Name change "vector" -> "data" - Complete refactor of LogOfEnergy() - Removed lint warning Major changes: * Removed unnecessary variables * Reduced number of shifts * Removed one norm calculation TEST=vad_unittests, audioproc_unittest Review URL: http://webrtc-codereview.appspot.com/347004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1407 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
b39a3b4a7a
commit
40ea5106f6
@ -16,8 +16,9 @@
|
||||
#include "typedefs.h"
|
||||
#include "vad_defines.h"
|
||||
|
||||
// Constant 160*log10(2) in Q9.
|
||||
static const int16_t kLogConst = 24660;
|
||||
// Constants used in LogOfEnergy().
|
||||
static const int16_t kLogConst = 24660; // 160*log10(2) in Q9.
|
||||
static const int16_t kLogEnergyIntPart = 14336; // 14 in Q10
|
||||
|
||||
// Coefficients used by HighPassFilter, Q14.
|
||||
static const int16_t kHpZeroCoefs[3] = { 6631, -13262, 6631 };
|
||||
@ -30,19 +31,19 @@ static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 };
|
||||
// Adjustment for division with two in SplitFilter.
|
||||
static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 };
|
||||
|
||||
// High pass filtering, with a cut-off frequency at 80 Hz, if the |in_vector| is
|
||||
// High pass filtering, with a cut-off frequency at 80 Hz, if the |data_in| is
|
||||
// sampled at 500 Hz.
|
||||
//
|
||||
// - in_vector [i] : Input audio data sampled at 500 Hz.
|
||||
// - in_vector_length [i] : Length of input and output data.
|
||||
// - filter_state [i/o] : State of the filter.
|
||||
// - out_vector [o] : Output audio data in the frequency interval
|
||||
// 80 - 250 Hz.
|
||||
static void HighPassFilter(const int16_t* in_vector, int in_vector_length,
|
||||
int16_t* filter_state, int16_t* out_vector) {
|
||||
// - data_in [i] : Input audio data sampled at 500 Hz.
|
||||
// - data_length [i] : Length of input and output data.
|
||||
// - filter_state [i/o] : State of the filter.
|
||||
// - data_out [o] : Output audio data in the frequency interval
|
||||
// 80 - 250 Hz.
|
||||
static void HighPassFilter(const int16_t* data_in, int data_length,
|
||||
int16_t* filter_state, int16_t* data_out) {
|
||||
int i;
|
||||
const int16_t* in_ptr = in_vector;
|
||||
int16_t* out_ptr = out_vector;
|
||||
const int16_t* in_ptr = data_in;
|
||||
int16_t* out_ptr = data_out;
|
||||
int32_t tmp32 = 0;
|
||||
|
||||
|
||||
@ -54,7 +55,7 @@ static void HighPassFilter(const int16_t* in_vector, int in_vector_length,
|
||||
// The all-pole section has a max amplification of a single sample of: 1.9931
|
||||
// Impulse response: 1.0000 0.4734 -0.1189 -0.2187 -0.0627 0.04532
|
||||
|
||||
for (i = 0; i < in_vector_length; i++) {
|
||||
for (i = 0; i < data_length; i++) {
|
||||
// All-zero section (filter coefficients in Q14).
|
||||
tmp32 = WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[0], *in_ptr);
|
||||
tmp32 += WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[1], filter_state[0]);
|
||||
@ -71,19 +72,18 @@ static void HighPassFilter(const int16_t* in_vector, int in_vector_length,
|
||||
}
|
||||
}
|
||||
|
||||
// All pass filtering of |in_vector|, used before splitting the signal into two
|
||||
// All pass filtering of |data_in|, used before splitting the signal into two
|
||||
// frequency bands (low pass vs high pass).
|
||||
// Note that |in_vector| and |out_vector| can NOT correspond to the same
|
||||
// address.
|
||||
// Note that |data_in| and |data_out| can NOT correspond to the same address.
|
||||
//
|
||||
// - in_vector [i] : Input audio signal given in Q0.
|
||||
// - vector_length [i] : Length of input and output data.
|
||||
// - data_in [i] : Input audio signal given in Q0.
|
||||
// - data_length [i] : Length of input and output data.
|
||||
// - filter_coefficient [i] : Given in Q15.
|
||||
// - filter_state [i/o] : State of the filter given in Q(-1).
|
||||
// - out_vector [o] : Output audio signal given in Q(-1).
|
||||
static void AllPassFilter(const int16_t* in_vector, int vector_length,
|
||||
// - data_out [o] : Output audio signal given in Q(-1).
|
||||
static void AllPassFilter(const int16_t* data_in, int data_length,
|
||||
int16_t filter_coefficient, int16_t* filter_state,
|
||||
int16_t* out_vector) {
|
||||
int16_t* data_out) {
|
||||
// The filter can only cause overflow (in the w16 output variable)
|
||||
// if more than 4 consecutive input numbers are of maximum value and
|
||||
// has the the same sign as the impulse responses first taps.
|
||||
@ -95,120 +95,159 @@ static void AllPassFilter(const int16_t* in_vector, int vector_length,
|
||||
int32_t tmp32 = 0;
|
||||
int32_t state32 = ((int32_t) (*filter_state) << 16); // Q15
|
||||
|
||||
for (i = 0; i < vector_length; i++) {
|
||||
tmp32 = state32 + WEBRTC_SPL_MUL_16_16(filter_coefficient, *in_vector);
|
||||
for (i = 0; i < data_length; i++) {
|
||||
tmp32 = state32 + WEBRTC_SPL_MUL_16_16(filter_coefficient, *data_in);
|
||||
tmp16 = (int16_t) (tmp32 >> 16); // Q(-1)
|
||||
*out_vector++ = tmp16;
|
||||
state32 = (((int32_t) (*in_vector)) << 14); // Q14
|
||||
*data_out++ = tmp16;
|
||||
state32 = (((int32_t) (*data_in)) << 14); // Q14
|
||||
state32 -= WEBRTC_SPL_MUL_16_16(filter_coefficient, tmp16); // Q14
|
||||
state32 <<= 1; // Q15.
|
||||
in_vector += 2;
|
||||
data_in += 2;
|
||||
}
|
||||
|
||||
*filter_state = (int16_t) (state32 >> 16); // Q(-1)
|
||||
}
|
||||
|
||||
// Splits |in_vector| into |out_vector_hp| and |out_vector_lp| corresponding to
|
||||
// Splits |data_in| into |hp_data_out| and |lp_data_out| corresponding to
|
||||
// an upper (high pass) part and a lower (low pass) part respectively.
|
||||
//
|
||||
// - in_vector [i] : Input audio data to be split into two frequency
|
||||
// bands.
|
||||
// - in_vector_length [i] : Length of |in_vector|.
|
||||
// - upper_state [i/o] : State of the upper filter, given in Q(-1).
|
||||
// - lower_state [i/o] : State of the lower filter, given in Q(-1).
|
||||
// - out_vector_hp [o] : Output audio data of the upper half of the
|
||||
// spectrum. The length is |in_vector_length| / 2.
|
||||
// - out_vector_lp [o] : Output audio data of the lower half of the
|
||||
// spectrum. The length is |in_vector_length| / 2.
|
||||
static void SplitFilter(const int16_t* in_vector, int in_vector_length,
|
||||
// - data_in [i] : Input audio data to be split into two frequency bands.
|
||||
// - data_length [i] : Length of |data_in|.
|
||||
// - upper_state [i/o] : State of the upper filter, given in Q(-1).
|
||||
// - lower_state [i/o] : State of the lower filter, given in Q(-1).
|
||||
// - hp_data_out [o] : Output audio data of the upper half of the spectrum.
|
||||
// The length is |data_length| / 2.
|
||||
// - lp_data_out [o] : Output audio data of the lower half of the spectrum.
|
||||
// The length is |data_length| / 2.
|
||||
static void SplitFilter(const int16_t* data_in, int data_length,
|
||||
int16_t* upper_state, int16_t* lower_state,
|
||||
int16_t* out_vector_hp, int16_t* out_vector_lp) {
|
||||
int16_t* hp_data_out, int16_t* lp_data_out) {
|
||||
int i;
|
||||
int half_length = in_vector_length >> 1; // Downsampling by 2.
|
||||
int half_length = data_length >> 1; // Downsampling by 2.
|
||||
int16_t tmp_out;
|
||||
|
||||
// All-pass filtering upper branch.
|
||||
AllPassFilter(&in_vector[0], half_length, kAllPassCoefsQ15[0], upper_state,
|
||||
out_vector_hp);
|
||||
AllPassFilter(&data_in[0], half_length, kAllPassCoefsQ15[0], upper_state,
|
||||
hp_data_out);
|
||||
|
||||
// All-pass filtering lower branch.
|
||||
AllPassFilter(&in_vector[1], half_length, kAllPassCoefsQ15[1], lower_state,
|
||||
out_vector_lp);
|
||||
AllPassFilter(&data_in[1], half_length, kAllPassCoefsQ15[1], lower_state,
|
||||
lp_data_out);
|
||||
|
||||
// Make LP and HP signals.
|
||||
for (i = 0; i < half_length; i++) {
|
||||
tmp_out = *out_vector_hp;
|
||||
*out_vector_hp++ -= *out_vector_lp;
|
||||
*out_vector_lp++ += tmp_out;
|
||||
tmp_out = *hp_data_out;
|
||||
*hp_data_out++ -= *lp_data_out;
|
||||
*lp_data_out++ += tmp_out;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculates the energy in dB of |in_vector|, and also updates an overall
|
||||
// |power| if necessary.
|
||||
// Calculates the energy of |data_in| in dB, and also updates an overall
|
||||
// |total_energy| if necessary.
|
||||
//
|
||||
// - in_vector [i] : Input audio data for energy calculation.
|
||||
// - vector_length [i] : Length of input data.
|
||||
// - offset [i] : Offset value added to |log_energy|.
|
||||
// - power [i/o] : Signal power updated with the energy from
|
||||
// |in_vector|.
|
||||
// NOTE: |power| is only updated if
|
||||
// |power| < MIN_ENERGY.
|
||||
// - log_energy [o] : 10 * log10("energy of |in_vector|") given in Q4.
|
||||
static void LogOfEnergy(const int16_t* in_vector, int vector_length,
|
||||
int16_t offset, int16_t* power, int16_t* log_energy) {
|
||||
int shfts = 0, shfts2 = 0;
|
||||
int16_t energy_s16 = 0;
|
||||
int16_t zeros = 0, frac = 0, log2 = 0;
|
||||
int32_t energy = WebRtcSpl_Energy((int16_t*) in_vector, vector_length,
|
||||
&shfts);
|
||||
// - data_in [i] : Input audio data for energy calculation.
|
||||
// - data_length [i] : Length of input data.
|
||||
// - offset [i] : Offset value added to |log_energy|.
|
||||
// - total_energy [i/o] : An external energy updated with the energy of
|
||||
// |data_in|.
|
||||
// NOTE: |total_energy| is only updated if
|
||||
// |total_energy| <= MIN_ENERGY.
|
||||
// - log_energy [o] : 10 * log10("energy of |data_in|") given in Q4.
|
||||
static void LogOfEnergy(const int16_t* data_in, int data_length,
|
||||
int16_t offset, int16_t* total_energy,
|
||||
int16_t* log_energy) {
|
||||
// |tot_rshifts| accumulates the number of right shifts performed on |energy|.
|
||||
int tot_rshifts = 0;
|
||||
// The |energy| will be normalized to 15 bits. We use unsigned integer because
|
||||
// we eventually will mask out the fractional part.
|
||||
uint32_t energy = 0;
|
||||
|
||||
if (energy > 0) {
|
||||
assert(data_in != NULL);
|
||||
assert(data_length > 0);
|
||||
|
||||
shfts2 = 16 - WebRtcSpl_NormW32(energy);
|
||||
shfts += shfts2;
|
||||
// "shfts" is the total number of right shifts that has been done to
|
||||
// energy_s16.
|
||||
energy_s16 = (int16_t) WEBRTC_SPL_SHIFT_W32(energy, -shfts2);
|
||||
energy = (uint32_t) WebRtcSpl_Energy((int16_t*) data_in, data_length,
|
||||
&tot_rshifts);
|
||||
|
||||
// Find:
|
||||
// 160*log10(energy_s16*2^shfts) = 160*log10(2)*log2(energy_s16*2^shfts) =
|
||||
// 160*log10(2)*(log2(energy_s16) + log2(2^shfts)) =
|
||||
// 160*log10(2)*(log2(energy_s16) + shfts)
|
||||
if (energy != 0) {
|
||||
// By construction, normalizing to 15 bits is equivalent with 17 leading
|
||||
// zeros of an unsigned 32 bit value.
|
||||
int normalizing_rshifts = 17 - WebRtcSpl_NormU32(energy);
|
||||
// In a 15 bit representation the leading bit is 2^14. log2(2^14) in Q10 is
|
||||
// (14 << 10), which is what we initialize |log2_energy| with. For a more
|
||||
// detailed derivations, see below.
|
||||
int16_t log2_energy = kLogEnergyIntPart;
|
||||
|
||||
zeros = WebRtcSpl_NormU32(energy_s16);
|
||||
frac = (int16_t) (((uint32_t) ((int32_t) (energy_s16) << zeros)
|
||||
& 0x7FFFFFFF) >> 21);
|
||||
log2 = (int16_t) (((31 - zeros) << 10) + frac);
|
||||
tot_rshifts += normalizing_rshifts;
|
||||
// Normalize |energy| to 15 bits.
|
||||
// |tot_rshifts| is now the total number of right shifts performed on
|
||||
// |energy| after normalization. This means that |energy| is in
|
||||
// Q(-tot_rshifts).
|
||||
if (normalizing_rshifts < 0) {
|
||||
energy <<= -normalizing_rshifts;
|
||||
} else {
|
||||
energy >>= normalizing_rshifts;
|
||||
}
|
||||
|
||||
*log_energy = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(kLogConst, log2, 19)
|
||||
+ (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(shfts, kLogConst, 9);
|
||||
// Calculate the energy of |data_in| in dB, in Q4.
|
||||
//
|
||||
// 10 * log10("true energy") in Q4 = 2^4 * 10 * log10("true energy") =
|
||||
// 160 * log10(|energy| * 2^|tot_rshifts|) =
|
||||
// 160 * log10(2) * log2(|energy| * 2^|tot_rshifts|) =
|
||||
// 160 * log10(2) * (log2(|energy|) + log2(2^|tot_rshifts|)) =
|
||||
// (160 * log10(2)) * (log2(|energy|) + |tot_rshifts|) =
|
||||
// |kLogConst| * (|log2_energy| + |tot_rshifts|)
|
||||
//
|
||||
// We know by construction that |energy| is normalized to 15 bits. Hence,
|
||||
// |energy| = 2^14 + frac_Q15, where frac_Q15 is a fractional part in Q15.
|
||||
// Further, we'd like |log2_energy| in Q10
|
||||
// log2(|energy|) in Q10 = 2^10 * log2(2^14 + frac_Q15) =
|
||||
// 2^10 * log2(2^14 * (1 + frac_Q15 * 2^-14)) =
|
||||
// 2^10 * (14 + log2(1 + frac_Q15 * 2^-14)) ~=
|
||||
// (14 << 10) + 2^10 * (frac_Q15 * 2^-14) =
|
||||
// (14 << 10) + (frac_Q15 * 2^-4) = (14 << 10) + (frac_Q15 >> 4)
|
||||
//
|
||||
// Note that frac_Q15 = (|energy| & 0x00003FFF)
|
||||
|
||||
// Calculate and add the fractional part to |log2_energy|.
|
||||
log2_energy += (int16_t) ((energy & 0x00003FFF) >> 4);
|
||||
|
||||
// |kLogConst| is in Q9, |log2_energy| in Q10 and |tot_rshifts| in Q0.
|
||||
// Note that we in our derivation above have accounted for an output in Q4.
|
||||
*log_energy = (int16_t) (WEBRTC_SPL_MUL_16_16_RSFT(
|
||||
kLogConst, log2_energy, 19) +
|
||||
WEBRTC_SPL_MUL_16_16_RSFT(tot_rshifts, kLogConst, 9));
|
||||
|
||||
if (*log_energy < 0) {
|
||||
*log_energy = 0;
|
||||
}
|
||||
} else {
|
||||
*log_energy = 0;
|
||||
shfts = -15;
|
||||
energy_s16 = 0;
|
||||
*log_energy = offset;
|
||||
return;
|
||||
}
|
||||
|
||||
*log_energy += offset;
|
||||
|
||||
// Total power in frame
|
||||
if (*power <= MIN_ENERGY) {
|
||||
if (shfts > 0) {
|
||||
*power += MIN_ENERGY + 1;
|
||||
} else if (WEBRTC_SPL_SHIFT_W16(energy_s16, shfts) > MIN_ENERGY) {
|
||||
*power += MIN_ENERGY + 1;
|
||||
// Update the approximate |total_energy| with the energy of |data_in|, if
|
||||
// |total_energy| has not exceeded MIN_ENERGY. |total_energy| is used as an
|
||||
// energy indicator in WebRtcVad_GmmProbability() in vad_core.c.
|
||||
if (*total_energy <= MIN_ENERGY) {
|
||||
if (tot_rshifts >= 0) {
|
||||
// We know by construction that the |energy| > MIN_ENERGY in Q0, so add an
|
||||
// arbitrary value such that |total_energy| exceeds MIN_ENERGY.
|
||||
*total_energy += MIN_ENERGY + 1;
|
||||
} else {
|
||||
*power += WEBRTC_SPL_SHIFT_W16(energy_s16, shfts);
|
||||
// By construction |energy| is represented by 15 bits, hence any number of
|
||||
// right shifted |energy| will fit in an int16_t. In addition, adding the
|
||||
// value to |total_energy| is wrap around safe as long as
|
||||
// MIN_ENERGY < 8192.
|
||||
*total_energy += (int16_t) (energy >> -tot_rshifts); // Q0.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
|
||||
int data_length, int16_t* data_out) {
|
||||
int16_t power = 0;
|
||||
int data_length, int16_t* features) {
|
||||
int16_t total_energy = 0;
|
||||
// We expect |data_length| to be 80, 160 or 240 samples, which corresponds to
|
||||
// 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will
|
||||
// have at most 120 samples after the first split and at most 60 samples after
|
||||
@ -244,10 +283,10 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
|
||||
// Energy in 3000 Hz - 4000 Hz.
|
||||
length >>= 1; // |data_length| / 4 <=> bandwidth = 1000 Hz.
|
||||
|
||||
LogOfEnergy(hp_60, length, kOffsetVector[5], &power, &data_out[5]);
|
||||
LogOfEnergy(hp_60, length, kOffsetVector[5], &total_energy, &features[5]);
|
||||
|
||||
// Energy in 2000 Hz - 3000 Hz.
|
||||
LogOfEnergy(lp_60, length, kOffsetVector[4], &power, &data_out[4]);
|
||||
LogOfEnergy(lp_60, length, kOffsetVector[4], &total_energy, &features[4]);
|
||||
|
||||
// For the lower band (0 Hz - 2000 Hz) split at 1000 Hz and downsample.
|
||||
frequency_band = 2;
|
||||
@ -260,7 +299,7 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
|
||||
|
||||
// Energy in 1000 Hz - 2000 Hz.
|
||||
length >>= 1; // |data_length| / 4 <=> bandwidth = 1000 Hz.
|
||||
LogOfEnergy(hp_60, length, kOffsetVector[3], &power, &data_out[3]);
|
||||
LogOfEnergy(hp_60, length, kOffsetVector[3], &total_energy, &features[3]);
|
||||
|
||||
// For the lower band (0 Hz - 1000 Hz) split at 500 Hz and downsample.
|
||||
frequency_band = 3;
|
||||
@ -272,7 +311,7 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
|
||||
|
||||
// Energy in 500 Hz - 1000 Hz.
|
||||
length >>= 1; // |data_length| / 8 <=> bandwidth = 500 Hz.
|
||||
LogOfEnergy(hp_120, length, kOffsetVector[2], &power, &data_out[2]);
|
||||
LogOfEnergy(hp_120, length, kOffsetVector[2], &total_energy, &features[2]);
|
||||
|
||||
// For the lower band (0 Hz - 500 Hz) split at 250 Hz and downsample.
|
||||
frequency_band = 4;
|
||||
@ -284,13 +323,13 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
|
||||
|
||||
// Energy in 250 Hz - 500 Hz.
|
||||
length >>= 1; // |data_length| / 16 <=> bandwidth = 250 Hz.
|
||||
LogOfEnergy(hp_60, length, kOffsetVector[1], &power, &data_out[1]);
|
||||
LogOfEnergy(hp_60, length, kOffsetVector[1], &total_energy, &features[1]);
|
||||
|
||||
// Remove 0 Hz - 80 Hz, by high pass filtering the lower band.
|
||||
HighPassFilter(lp_60, length, self->hp_filter_state, hp_120);
|
||||
|
||||
// Energy in 80 Hz - 250 Hz.
|
||||
LogOfEnergy(hp_120, length, kOffsetVector[0], &power, &data_out[0]);
|
||||
LogOfEnergy(hp_120, length, kOffsetVector[0], &total_energy, &features[0]);
|
||||
|
||||
return power;
|
||||
return total_energy;
|
||||
}
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include "vad_core.h"
|
||||
|
||||
// Takes |data_length| samples of |data_in| and calculates the logarithm of the
|
||||
// power of each of the |NUM_CHANNELS| = 6 frequency bands used by the VAD:
|
||||
// energy of each of the |NUM_CHANNELS| = 6 frequency bands used by the VAD:
|
||||
// 80 Hz - 250 Hz
|
||||
// 250 Hz - 500 Hz
|
||||
// 500 Hz - 1000 Hz
|
||||
@ -27,18 +27,18 @@
|
||||
// 2000 Hz - 3000 Hz
|
||||
// 3000 Hz - 4000 Hz
|
||||
//
|
||||
// The values are given in Q4 and written to |data_out|. Further, an approximate
|
||||
// overall power is returned. The return value is used in
|
||||
// The values are given in Q4 and written to |features|. Further, an approximate
|
||||
// overall energy is returned. The return value is used in
|
||||
// WebRtcVad_GmmProbability() as a signal indicator, hence it is arbitrary above
|
||||
// the threshold MIN_ENERGY.
|
||||
//
|
||||
// - self [i/o] : State information of the VAD.
|
||||
// - data_in [i] : Input audio data, for feature extraction.
|
||||
// - data_length [i] : Audio data size, in number of samples.
|
||||
// - data_out [o] : 10 * log10(power in each frequency band), Q4.
|
||||
// - returns : Total power of the signal (NOTE! This value is not
|
||||
// - features [o] : 10 * log10(energy in each frequency band), Q4.
|
||||
// - returns : Total energy of the signal (NOTE! This value is not
|
||||
// exact. It is only used in a comparison.)
|
||||
int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
|
||||
int data_length, int16_t* data_out);
|
||||
int data_length, int16_t* features);
|
||||
|
||||
#endif // WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
|
||||
|
@ -25,17 +25,16 @@ namespace {
|
||||
enum { kNumValidFrameLengths = 3 };
|
||||
|
||||
TEST_F(VadTest, vad_filterbank) {
|
||||
VadInstT* self = (VadInstT*) malloc(sizeof(VadInstT));
|
||||
static const int16_t kReference[kNumValidFrameLengths] = { 15, 11, 11 };
|
||||
static const int16_t kReferencePowers[kNumValidFrameLengths * NUM_CHANNELS] =
|
||||
{
|
||||
VadInstT* self = reinterpret_cast<VadInstT*>(malloc(sizeof(VadInstT)));
|
||||
static const int16_t kReference[kNumValidFrameLengths] = { 48, 11, 11 };
|
||||
static const int16_t kFeatures[kNumValidFrameLengths * NUM_CHANNELS] = {
|
||||
1213, 759, 587, 462, 434, 272,
|
||||
1479, 1385, 1291, 1200, 1103, 1099,
|
||||
1732, 1692, 1681, 1629, 1436, 1436
|
||||
};
|
||||
static const int16_t kOffsetVector[NUM_CHANNELS] = {
|
||||
368, 368, 272, 176, 176, 176 };
|
||||
int16_t data_out[NUM_CHANNELS];
|
||||
int16_t features[NUM_CHANNELS];
|
||||
|
||||
// Construct a speech signal that will trigger the VAD in all modes. It is
|
||||
// known that (i * i) will wrap around, but that doesn't matter in this case.
|
||||
@ -50,10 +49,10 @@ TEST_F(VadTest, vad_filterbank) {
|
||||
if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
|
||||
EXPECT_EQ(kReference[frame_length_index],
|
||||
WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j],
|
||||
data_out));
|
||||
features));
|
||||
for (int k = 0; k < NUM_CHANNELS; ++k) {
|
||||
EXPECT_EQ(kReferencePowers[k + frame_length_index * NUM_CHANNELS],
|
||||
data_out[k]);
|
||||
EXPECT_EQ(kFeatures[k + frame_length_index * NUM_CHANNELS],
|
||||
features[k]);
|
||||
}
|
||||
frame_length_index++;
|
||||
}
|
||||
@ -66,9 +65,9 @@ TEST_F(VadTest, vad_filterbank) {
|
||||
for (size_t j = 0; j < kFrameLengthsSize; ++j) {
|
||||
if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
|
||||
EXPECT_EQ(0, WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j],
|
||||
data_out));
|
||||
features));
|
||||
for (int k = 0; k < NUM_CHANNELS; ++k) {
|
||||
EXPECT_EQ(kOffsetVector[k], data_out[k]);
|
||||
EXPECT_EQ(kOffsetVector[k], features[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -82,9 +81,9 @@ TEST_F(VadTest, vad_filterbank) {
|
||||
if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
|
||||
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0));
|
||||
EXPECT_EQ(0, WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j],
|
||||
data_out));
|
||||
features));
|
||||
for (int k = 0; k < NUM_CHANNELS; ++k) {
|
||||
EXPECT_EQ(kOffsetVector[k], data_out[k]);
|
||||
EXPECT_EQ(kOffsetVector[k], features[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user