VAD refactoring: Replaced hard coded array sizes with enum.

Further replaced hard coded calculations with a for loop for better understanding.

Tested with vad_unittests and audioproc_unittest.

BUG=None
TEST=None

Review URL: https://webrtc-codereview.appspot.com/519002

git-svn-id: http://webrtc.googlecode.com/svn/trunk@2162 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
bjornv@webrtc.org 2012-05-02 12:14:57 +00:00
parent 404843e6e5
commit b286bfb13e

View File

@ -17,40 +17,40 @@
#include "vad_sp.h"
// Spectrum Weighting
static const int16_t kSpectrumWeight[6] = { 6, 8, 10, 12, 14, 16 };
static const int16_t kSpectrumWeight[kNumChannels] = { 6, 8, 10, 12, 14, 16 };
static const int16_t kNoiseUpdateConst = 655; // Q15
static const int16_t kSpeechUpdateConst = 6554; // Q15
static const int16_t kBackEta = 154; // Q8
// Minimum difference between the two models, Q5
static const int16_t kMinimumDifference[6] = {
static const int16_t kMinimumDifference[kNumChannels] = {
544, 544, 576, 576, 576, 576 };
// Upper limit of mean value for speech model, Q7
static const int16_t kMaximumSpeech[6] = {
static const int16_t kMaximumSpeech[kNumChannels] = {
11392, 11392, 11520, 11520, 11520, 11520 };
// Minimum value for mean value
static const int16_t kMinimumMean[2] = { 640, 768 };
static const int16_t kMinimumMean[kNumGaussians] = { 640, 768 };
// Upper limit of mean value for noise model, Q7
static const int16_t kMaximumNoise[6] = {
static const int16_t kMaximumNoise[kNumChannels] = {
9216, 9088, 8960, 8832, 8704, 8576 };
// Start values for the Gaussian models, Q7
// Weights for the two Gaussians for the six channels (noise)
static const int16_t kNoiseDataWeights[12] = {
static const int16_t kNoiseDataWeights[kTableSize] = {
34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103 };
// Weights for the two Gaussians for the six channels (speech)
static const int16_t kSpeechDataWeights[12] = {
static const int16_t kSpeechDataWeights[kTableSize] = {
48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81 };
// Means for the two Gaussians for the six channels (noise)
static const int16_t kNoiseDataMeans[12] = {
static const int16_t kNoiseDataMeans[kTableSize] = {
6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863, 7820, 7266, 5020, 4362 };
// Means for the two Gaussians for the six channels (speech)
static const int16_t kSpeechDataMeans[12] = {
static const int16_t kSpeechDataMeans[kTableSize] = {
8306, 10085, 10078, 11823, 11843, 6309, 9473, 9571, 10879, 7581, 8180, 7483
};
// Stds for the two Gaussians for the six channels (noise)
static const int16_t kNoiseDataStds[12] = {
static const int16_t kNoiseDataStds[kTableSize] = {
378, 1064, 493, 582, 688, 593, 474, 697, 475, 688, 421, 455 };
// Stds for the two Gaussians for the six channels (speech)
static const int16_t kSpeechDataStds[12] = {
static const int16_t kSpeechDataStds[kTableSize] = {
555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540, 1079, 850 };
// Constants used in GmmProbability().
@ -105,7 +105,7 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
int n, k;
int16_t feature_minimum;
int16_t h0, h1;
int16_t log_likelihood_ratio, feature;
int16_t log_likelihood_ratio;
int16_t vadflag = 0;
int16_t shifts0, shifts1;
int16_t tmp_s16, tmp1_s16, tmp2_s16;
@ -145,39 +145,32 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
if (total_power > kMinEnergy) {
// We have a signal present.
// Set pointers to the Gaussian parameters.
nmean1ptr = &self->noise_means[0];
nmean2ptr = &self->noise_means[kNumChannels];
smean1ptr = &self->speech_means[0];
smean2ptr = &self->speech_means[kNumChannels];
nstd1ptr = &self->noise_stds[0];
nstd2ptr = &self->noise_stds[kNumChannels];
sstd1ptr = &self->speech_stds[0];
sstd2ptr = &self->speech_stds[kNumChannels];
for (n = 0; n < kNumChannels; n++) {
// Perform for all channels.
pos = (n << 1);
feature = feature_vector[n];
h0_test = 0;
h1_test = 0;
// Probability for Noise, Q7 * Q20 = Q27.
tmp1_s32 = WebRtcVad_GaussianProbability(feature, *nmean1ptr++,
*nstd1ptr++, &deltaN[pos]);
noise_probability[0] = kNoiseDataWeights[n] * tmp1_s32;
tmp1_s32 = WebRtcVad_GaussianProbability(feature, *nmean2ptr++,
*nstd2ptr++, &deltaN[pos + 1]);
noise_probability[1] = kNoiseDataWeights[n + kNumChannels] * tmp1_s32;
h0_test = noise_probability[0] + noise_probability[1]; // Q27
for (k = 0; k < kNumGaussians; k++) {
nr = n + k * kNumChannels;
// Probability for Noise, Q7 * Q20 = Q27.
tmp1_s32 = WebRtcVad_GaussianProbability(feature_vector[n],
self->noise_means[nr],
self->noise_stds[nr],
&deltaN[pos + k]);
noise_probability[k] = kNoiseDataWeights[nr] * tmp1_s32;
h0_test += noise_probability[k]; // Q27
// Probability for Speech.
tmp1_s32 = WebRtcVad_GaussianProbability(feature_vector[n],
self->speech_means[nr],
self->speech_stds[nr],
&deltaS[pos + k]);
speech_probability[k] = kSpeechDataWeights[nr] * tmp1_s32;
h1_test += speech_probability[k]; // Q27
}
h0 = (int16_t) (h0_test >> 12); // Q15
// Probability for Speech.
tmp1_s32 = WebRtcVad_GaussianProbability(feature, *smean1ptr++,
*sstd1ptr++, &deltaS[pos]);
speech_probability[0] = kSpeechDataWeights[n] * tmp1_s32;
tmp1_s32 = WebRtcVad_GaussianProbability(feature, *smean2ptr++,
*sstd2ptr++, &deltaS[pos + 1]);
speech_probability[1] = kSpeechDataWeights[n + kNumChannels] * tmp1_s32;
h1_test = speech_probability[0] + speech_probability[1]; // Q27
h1 = (int16_t) (h1_test >> 12); // Q15
// Calculate the log likelihood ratio. Approximate log2(H1/H0) with