VAD refactoring: Local variable name changes

Changes tested with vad_unittests and audioproc_unittest

BUG=None
TEST=None

Review URL: https://webrtc-codereview.appspot.com/485004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@2093 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
bjornv@webrtc.org 2012-04-23 14:50:11 +00:00
parent 5284d6e905
commit 11654c2344

View File

@ -103,12 +103,12 @@ static const int16_t kGlobalThresholdVAG[3] = { 1100, 1050, 1100 };
static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
int16_t total_power, int frame_length) {
int n, k;
int16_t backval;
int16_t feature_minimum;
int16_t h0, h1;
int16_t ratvec, xval;
int16_t log_likelihood_ratio, feature;
int16_t vadflag = 0;
int16_t shifts0, shifts1;
int16_t tmp16, tmp16_1, tmp16_2;
int16_t tmp_s16, tmp1_s16, tmp2_s16;
int16_t diff;
int nr, pos;
int16_t nmk, nmk2, nmk3, smk, smk2, nsk, ssk;
@ -116,11 +116,11 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
int16_t maxspe, maxmu;
int16_t deltaN[kTableSize], deltaS[kTableSize];
int16_t ngprvec[kTableSize], sgprvec[kTableSize];
int32_t h0test, h1test;
int32_t tmp32_1, tmp32_2;
int32_t dotVal;
int32_t nmid, smid;
int32_t probn[kNumGaussians], probs[kNumGaussians];
int32_t h0_test, h1_test;
int32_t tmp1_s32, tmp2_s32;
int32_t sum_log_likelihood_ratios = 0;
int32_t noise_global_mean, speech_global_mean;
int32_t noise_probability[kNumGaussians], speech_probability[kNumGaussians];
int16_t *nmean1ptr, *nmean2ptr, *smean1ptr, *smean2ptr;
int16_t *nstd1ptr, *nstd2ptr, *sstd1ptr, *sstd2ptr;
int16_t overhead1, overhead2, individualTest, totalTest;
@ -155,60 +155,60 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
sstd1ptr = &self->speech_stds[0];
sstd2ptr = &self->speech_stds[kNumChannels];
dotVal = 0;
for (n = 0; n < kNumChannels; n++) {
// Perform for all channels.
pos = (n << 1);
xval = feature_vector[n];
feature = feature_vector[n];
// Probability for Noise, Q7 * Q20 = Q27.
tmp32_1 = WebRtcVad_GaussianProbability(xval, *nmean1ptr++, *nstd1ptr++,
&deltaN[pos]);
probn[0] = kNoiseDataWeights[n] * tmp32_1;
tmp32_1 = WebRtcVad_GaussianProbability(xval, *nmean2ptr++, *nstd2ptr++,
&deltaN[pos + 1]);
probn[1] = kNoiseDataWeights[n + kNumChannels] * tmp32_1;
h0test = probn[0] + probn[1]; // Q27
h0 = (int16_t) (h0test >> 12); // Q15
tmp1_s32 = WebRtcVad_GaussianProbability(feature, *nmean1ptr++,
*nstd1ptr++, &deltaN[pos]);
noise_probability[0] = kNoiseDataWeights[n] * tmp1_s32;
tmp1_s32 = WebRtcVad_GaussianProbability(feature, *nmean2ptr++,
*nstd2ptr++, &deltaN[pos + 1]);
noise_probability[1] = kNoiseDataWeights[n + kNumChannels] * tmp1_s32;
h0_test = noise_probability[0] + noise_probability[1]; // Q27
h0 = (int16_t) (h0_test >> 12); // Q15
// Probability for Speech.
tmp32_1 = WebRtcVad_GaussianProbability(xval, *smean1ptr++, *sstd1ptr++,
&deltaS[pos]);
probs[0] = kSpeechDataWeights[n] * tmp32_1;
tmp32_1 = WebRtcVad_GaussianProbability(xval, *smean2ptr++, *sstd2ptr++,
&deltaS[pos + 1]);
probs[1] = kSpeechDataWeights[n + kNumChannels] * tmp32_1;
h1test = probs[0] + probs[1]; // Q27
h1 = (int16_t) (h1test >> 12); // Q15
tmp1_s32 = WebRtcVad_GaussianProbability(feature, *smean1ptr++,
*sstd1ptr++, &deltaS[pos]);
speech_probability[0] = kSpeechDataWeights[n] * tmp1_s32;
tmp1_s32 = WebRtcVad_GaussianProbability(feature, *smean2ptr++,
*sstd2ptr++, &deltaS[pos + 1]);
speech_probability[1] = kSpeechDataWeights[n + kNumChannels] * tmp1_s32;
h1_test = speech_probability[0] + speech_probability[1]; // Q27
h1 = (int16_t) (h1_test >> 12); // Q15
// Calculate the log likelihood ratio. Approximate log2(H1/H0) with
// |shifts0| - |shifts1|.
shifts0 = WebRtcSpl_NormW32(h0test);
shifts1 = WebRtcSpl_NormW32(h1test);
shifts0 = WebRtcSpl_NormW32(h0_test);
shifts1 = WebRtcSpl_NormW32(h1_test);
if ((h0test > 0) && (h1test > 0)) {
ratvec = shifts0 - shifts1;
} else if (h1test > 0) {
ratvec = 31 - shifts1;
} else if (h0test > 0) {
ratvec = shifts0 - 31;
if ((h0_test > 0) && (h1_test > 0)) {
log_likelihood_ratio = shifts0 - shifts1;
} else if (h1_test > 0) {
log_likelihood_ratio = 31 - shifts1;
} else if (h0_test > 0) {
log_likelihood_ratio = shifts0 - 31;
} else {
ratvec = 0;
log_likelihood_ratio = 0;
}
// VAD decision with spectrum weighting.
dotVal += WEBRTC_SPL_MUL_16_16(ratvec, kSpectrumWeight[n]);
sum_log_likelihood_ratios += WEBRTC_SPL_MUL_16_16(log_likelihood_ratio,
kSpectrumWeight[n]);
// Individual channel test.
if ((ratvec << 2) > individualTest) {
if ((log_likelihood_ratio << 2) > individualTest) {
vadflag = 1;
}
// Probabilities used when updating model.
if (h0 > 0) {
tmp32_1 = probn[0] & 0xFFFFF000; // Q27
tmp32_2 = (tmp32_1 << 2); // Q29
ngprvec[pos] = (int16_t) WebRtcSpl_DivW32W16(tmp32_2, h0); // Q14
tmp1_s32 = noise_probability[0] & 0xFFFFF000; // Q27
tmp2_s32 = (tmp1_s32 << 2); // Q29
ngprvec[pos] = (int16_t) WebRtcSpl_DivW32W16(tmp2_s32, h0); // Q14
ngprvec[pos + 1] = 16384 - ngprvec[pos];
} else {
ngprvec[pos] = 16384;
@ -217,9 +217,9 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
// Probabilities used when updating model.
if (h1 > 0) {
tmp32_1 = probs[0] & 0xFFFFF000;
tmp32_2 = (tmp32_1 << 2);
sgprvec[pos] = (int16_t) WebRtcSpl_DivW32W16(tmp32_2, h1);
tmp1_s32 = speech_probability[0] & 0xFFFFF000;
tmp2_s32 = (tmp1_s32 << 2);
sgprvec[pos] = (int16_t) WebRtcSpl_DivW32W16(tmp2_s32, h1);
sgprvec[pos + 1] = 16384 - sgprvec[pos];
} else {
sgprvec[pos] = 0;
@ -228,7 +228,7 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
}
// Overall test.
if (dotVal >= totalTest) {
if (sum_log_likelihood_ratios >= totalTest) {
vadflag |= 1;
}
@ -244,14 +244,16 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
for (n = 0; n < kNumChannels; n++) {
pos = (n << 1);
// Get minimum value in past which is used for long term correction.
backval = WebRtcVad_FindMinimum(self, feature_vector[n], n); // Q4
// Get minimum value in past which is used for long term correction in Q4.
feature_minimum = WebRtcVad_FindMinimum(self, feature_vector[n], n);
// Compute the "global" mean, that is the sum of the two means weighted.
nmid = WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n], *nmean1ptr); // Q7 * Q7
nmid += WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n + kNumChannels],
*(nmean1ptr + kNumChannels));
tmp16_1 = (int16_t) (nmid >> 6); // Q8
noise_global_mean = WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n],
*nmean1ptr); // Q7 * Q7
noise_global_mean +=
WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n + kNumChannels],
*(nmean1ptr + kNumChannels));
tmp1_s16 = (int16_t) (noise_global_mean >> 6); // Q8
for (k = 0; k < kNumGaussians; k++) {
nr = pos + k;
@ -269,7 +271,8 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
nmk2 = nmk;
if (!vadflag) {
// deltaN = (x-mu)/sigma^2
// ngprvec[k] = probn[k]/(probn[0] + probn[1])
// ngprvec[k] = |noise_probability[k]| /
// (|noise_probability[0]| + |noise_probability[1]|)
// (Q14 * Q11 >> 11) = Q14.
delt = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(ngprvec[nr], deltaN[nr],
@ -282,34 +285,36 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
// Long term correction of the noise mean.
// Q8 - Q8 = Q8.
ndelt = (backval << 4) - tmp16_1;
ndelt = (feature_minimum << 4) - tmp1_s16;
// Q7 + (Q8 * Q8) >> 9 = Q7.
nmk3 = nmk2 + (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(ndelt, kBackEta, 9);
// Control that the noise mean does not drift to much.
tmp16 = (int16_t) ((k + 5) << 7);
if (nmk3 < tmp16) {
nmk3 = tmp16;
tmp_s16 = (int16_t) ((k + 5) << 7);
if (nmk3 < tmp_s16) {
nmk3 = tmp_s16;
}
tmp16 = (int16_t) ((72 + k - n) << 7);
if (nmk3 > tmp16) {
nmk3 = tmp16;
tmp_s16 = (int16_t) ((72 + k - n) << 7);
if (nmk3 > tmp_s16) {
nmk3 = tmp_s16;
}
*nmean2ptr = nmk3;
if (vadflag) {
// Update speech mean vector:
// |deltaS| = (x-mu)/sigma^2
// sgprvec[k] = probn[k]/(probn[0] + probn[1])
// sgprvec[k] = |speech_probability[k]| /
// (|speech_probability[0]| + |speech_probability[1]|)
// (Q14 * Q11) >> 11 = Q14.
delt = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(sgprvec[nr], deltaS[nr],
11);
// Q14 * Q15 >> 21 = Q8.
tmp16 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(delt, kSpeechUpdateConst,
21);
tmp_s16 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(delt,
kSpeechUpdateConst,
21);
// Q7 + (Q8 >> 1) = Q7. With rounding.
smk2 = smk + ((tmp16 + 1) >> 1);
smk2 = smk + ((tmp_s16 + 1) >> 1);
// Control that the speech mean does not drift to much.
maxmu = maxspe + 640;
@ -322,30 +327,30 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
*smean2ptr = smk2; // Q7.
// (Q7 >> 3) = Q4. With rounding.
tmp16 = ((smk + 4) >> 3);
tmp_s16 = ((smk + 4) >> 3);
tmp16 = feature_vector[n] - tmp16; // Q4
tmp_s16 = feature_vector[n] - tmp_s16; // Q4
// (Q11 * Q4 >> 3) = Q12.
tmp32_1 = WEBRTC_SPL_MUL_16_16_RSFT(deltaS[nr], tmp16, 3);
tmp32_2 = tmp32_1 - 4096;
tmp16 = (sgprvec[nr] >> 2);
tmp1_s32 = WEBRTC_SPL_MUL_16_16_RSFT(deltaS[nr], tmp_s16, 3);
tmp2_s32 = tmp1_s32 - 4096;
tmp_s16 = (sgprvec[nr] >> 2);
// (Q14 >> 2) * Q12 = Q24.
tmp32_1 = tmp16 * tmp32_2;
tmp1_s32 = tmp_s16 * tmp2_s32;
tmp32_2 = (tmp32_1 >> 4); // Q20
tmp2_s32 = (tmp1_s32 >> 4); // Q20
// 0.1 * Q20 / Q7 = Q13.
if (tmp32_2 > 0) {
tmp16 = (int16_t) WebRtcSpl_DivW32W16(tmp32_2, ssk * 10);
if (tmp2_s32 > 0) {
tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(tmp2_s32, ssk * 10);
} else {
tmp16 = (int16_t) WebRtcSpl_DivW32W16(-tmp32_2, ssk * 10);
tmp16 = -tmp16;
tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(-tmp2_s32, ssk * 10);
tmp_s16 = -tmp_s16;
}
// Divide by 4 giving an update factor of 0.025 (= 0.1 / 4).
// Note that division by 4 equals shift by 2, hence,
// (Q13 >> 8) = (Q13 >> 6) / 4 = Q7.
tmp16 += 128; // Rounding.
ssk += (tmp16 >> 8);
tmp_s16 += 128; // Rounding.
ssk += (tmp_s16 >> 8);
if (ssk < kMinStd) {
ssk = kMinStd;
}
@ -354,26 +359,26 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
// Update GMM variance vectors.
// deltaN * (feature_vector[n] - nmk) - 1
// Q4 - (Q7 >> 3) = Q4.
tmp16 = feature_vector[n] - (nmk >> 3);
tmp_s16 = feature_vector[n] - (nmk >> 3);
// (Q11 * Q4 >> 3) = Q12.
tmp32_1 = WEBRTC_SPL_MUL_16_16_RSFT(deltaN[nr], tmp16, 3) - 4096;
tmp1_s32 = WEBRTC_SPL_MUL_16_16_RSFT(deltaN[nr], tmp_s16, 3) - 4096;
// (Q14 >> 2) * Q12 = Q24.
tmp16 = ((ngprvec[nr] + 2) >> 2);
tmp32_2 = tmp16 * tmp32_1;
tmp_s16 = ((ngprvec[nr] + 2) >> 2);
tmp2_s32 = tmp_s16 * tmp1_s32;
// Q20 * approx 0.001 (2^-10=0.0009766), hence,
// (Q24 >> 14) = (Q24 >> 4) / 2^10 = Q20.
tmp32_1 = (tmp32_2 >> 14);
tmp1_s32 = (tmp2_s32 >> 14);
// Q20 / Q7 = Q13.
if (tmp32_1 > 0) {
tmp16 = (int16_t) WebRtcSpl_DivW32W16(tmp32_1, nsk);
if (tmp1_s32 > 0) {
tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, nsk);
} else {
tmp16 = (int16_t) WebRtcSpl_DivW32W16(-tmp32_1, nsk);
tmp16 = -tmp16;
tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(-tmp1_s32, nsk);
tmp_s16 = -tmp_s16;
}
tmp16 += 32; // Rounding
nsk += (tmp16 >> 6); // Q13 >> 6 = Q7.
tmp_s16 += 32; // Rounding
nsk += (tmp_s16 >> 6); // Q13 >> 6 = Q7.
if (nsk < kMinStd) {
nsk = kMinStd;
}
@ -382,67 +387,73 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
}
// Separate models if they are too close.
// |nmid| in Q14 (= Q7 * Q7).
nmid = WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n], *nmean1ptr);
nmid += WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n + kNumChannels],
*nmean2ptr);
// |noise_global_mean| in Q14 (= Q7 * Q7).
noise_global_mean = WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n],
*nmean1ptr);
noise_global_mean +=
WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n + kNumChannels],
*nmean2ptr);
// |smid| in Q14 (= Q7 * Q7).
smid = WEBRTC_SPL_MUL_16_16(kSpeechDataWeights[n], *smean1ptr);
smid += WEBRTC_SPL_MUL_16_16(kSpeechDataWeights[n + kNumChannels],
*smean2ptr);
// |speech_global_mean| in Q14 (= Q7 * Q7).
speech_global_mean = WEBRTC_SPL_MUL_16_16(kSpeechDataWeights[n],
*smean1ptr);
speech_global_mean +=
WEBRTC_SPL_MUL_16_16(kSpeechDataWeights[n + kNumChannels],
*smean2ptr);
// |diff| = "global" speech mean - "global" noise mean.
// (Q14 >> 9) - (Q14 >> 9) = Q5.
diff = (int16_t) (smid >> 9) - (int16_t) (nmid >> 9);
diff = (int16_t) (speech_global_mean >> 9) -
(int16_t) (noise_global_mean >> 9);
if (diff < kMinimumDifference[n]) {
tmp16 = kMinimumDifference[n] - diff;
tmp_s16 = kMinimumDifference[n] - diff;
// |tmp16_1| = ~0.8 * (kMinimumDifference - diff) in Q7.
// |tmp16_2| = ~0.2 * (kMinimumDifference - diff) in Q7.
tmp16_1 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(13, tmp16, 2);
tmp16_2 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(3, tmp16, 2);
// |tmp1_s16| = ~0.8 * (kMinimumDifference - diff) in Q7.
// |tmp2_s16| = ~0.2 * (kMinimumDifference - diff) in Q7.
tmp1_s16 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(13, tmp_s16, 2);
tmp2_s16 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(3, tmp_s16, 2);
// First Gaussian, speech model.
tmp16 = tmp16_1 + *smean1ptr;
*smean1ptr = tmp16;
smid = WEBRTC_SPL_MUL_16_16(tmp16, kSpeechDataWeights[n]);
tmp_s16 = tmp1_s16 + *smean1ptr;
*smean1ptr = tmp_s16;
speech_global_mean = WEBRTC_SPL_MUL_16_16(tmp_s16,
kSpeechDataWeights[n]);
// Second Gaussian, speech model.
tmp16 = tmp16_1 + *smean2ptr;
*smean2ptr = tmp16;
smid += WEBRTC_SPL_MUL_16_16(tmp16,
kSpeechDataWeights[n + kNumChannels]);
tmp_s16 = tmp1_s16 + *smean2ptr;
*smean2ptr = tmp_s16;
speech_global_mean +=
WEBRTC_SPL_MUL_16_16(tmp_s16, kSpeechDataWeights[n + kNumChannels]);
// First Gaussian, noise model.
tmp16 = *nmean1ptr - tmp16_2;
*nmean1ptr = tmp16;
nmid = WEBRTC_SPL_MUL_16_16(tmp16, kNoiseDataWeights[n]);
tmp_s16 = *nmean1ptr - tmp2_s16;
*nmean1ptr = tmp_s16;
noise_global_mean = WEBRTC_SPL_MUL_16_16(tmp_s16, kNoiseDataWeights[n]);
// Second Gaussian, noise model.
tmp16 = *nmean2ptr - tmp16_2;
*nmean2ptr = tmp16;
nmid += WEBRTC_SPL_MUL_16_16(tmp16,
kNoiseDataWeights[n + kNumChannels]);
tmp_s16 = *nmean2ptr - tmp2_s16;
*nmean2ptr = tmp_s16;
noise_global_mean +=
WEBRTC_SPL_MUL_16_16(tmp_s16, kNoiseDataWeights[n + kNumChannels]);
}
// Control that the speech & noise means do not drift to much.
maxspe = kMaximumSpeech[n];
tmp16_2 = (int16_t) (smid >> 7);
if (tmp16_2 > maxspe) {
tmp2_s16 = (int16_t) (speech_global_mean >> 7);
if (tmp2_s16 > maxspe) {
// Upper limit of speech model.
tmp16_2 -= maxspe;
tmp2_s16 -= maxspe;
*smean1ptr -= tmp16_2;
*smean2ptr -= tmp16_2;
*smean1ptr -= tmp2_s16;
*smean2ptr -= tmp2_s16;
}
tmp16_2 = (int16_t) (nmid >> 7);
if (tmp16_2 > kMaximumNoise[n]) {
tmp16_2 -= kMaximumNoise[n];
tmp2_s16 = (int16_t) (noise_global_mean >> 7);
if (tmp2_s16 > kMaximumNoise[n]) {
tmp2_s16 -= kMaximumNoise[n];
*nmean1ptr -= tmp16_2;
*nmean2ptr -= tmp16_2;
*nmean1ptr -= tmp2_s16;
*nmean2ptr -= tmp2_s16;
}
nmean1ptr++;