VAD refactoring: Removed macro file.

In this CL we've replaced the VAD macros with static const or enum. Priority=low BUG= TEST=vad_unittest Review URL: https://webrtc-codereview.appspot.com/453004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1913 4adac7df-926f-26a2-2b94-8c16560cd09d
2012-03-20 12:53:06 +00:00 · 2012-03-20 12:53:06 +00:00 · a496b03c78
commit a496b03c78
parent ac9fd8af09
8 changed files with 145 additions and 234 deletions
--- a/src/common_audio/vad/vad_core.c
+++ b/src/common_audio/vad/vad_core.c
@ -12,7 +12,6 @@

 #include "signal_processing_library.h"
 #include "typedefs.h"
-#include "vad_defines.h"
 #include "vad_filterbank.h"
 #include "vad_gmm.h"
 #include "vad_sp.h"
@ -66,6 +65,31 @@ static const int16_t kMinStd = 384;
 static const short kDefaultMode = 0;
 static const int kInitCheck = 42;

+// Constants used in WebRtcVad_set_mode_core().
+//
+// Thresholds for different frame lengths (10 ms, 20 ms and 30 ms).
+//
+// Mode 0, Quality.
+static const int16_t kOverHangMax1Q[3] = { 8, 4, 3 };
+static const int16_t kOverHangMax2Q[3] = { 14, 7, 5 };
+static const int16_t kLocalThresholdQ[3] = { 24, 21, 24 };
+static const int16_t kGlobalThresholdQ[3] = { 57, 48, 57 };
+// Mode 1, Low bitrate.
+static const int16_t kOverHangMax1LBR[3] = { 8, 4, 3 };
+static const int16_t kOverHangMax2LBR[3] = { 14, 7, 5 };
+static const int16_t kLocalThresholdLBR[3] = { 37, 32, 37 };
+static const int16_t kGlobalThresholdLBR[3] = { 100, 80, 100 };
+// Mode 2, Aggressive.
+static const int16_t kOverHangMax1AGG[3] = { 6, 3, 2 };
+static const int16_t kOverHangMax2AGG[3] = { 9, 5, 3 };
+static const int16_t kLocalThresholdAGG[3] = { 82, 78, 82 };
+static const int16_t kGlobalThresholdAGG[3] = { 285, 260, 285 };
+// Mode 3, Very aggressive.
+static const int16_t kOverHangMax1VAG[3] = { 6, 3, 2 };
+static const int16_t kOverHangMax2VAG[3] = { 9, 5, 3 };
+static const int16_t kLocalThresholdVAG[3] = { 94, 94, 94 };
+static const int16_t kGlobalThresholdVAG[3] = { 1100, 1050, 1100 };
+
 // Calculates the probabilities for both speech and background noise using
 // Gaussian Mixture Models. A hypothesis-test is performed to decide which type
 // of signal is most probable.
@ -90,13 +114,13 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector,
    WebRtc_Word16 nmk, nmk2, nmk3, smk, smk2, nsk, ssk;
    WebRtc_Word16 delt, ndelt;
    WebRtc_Word16 maxspe, maxmu;
-    WebRtc_Word16 deltaN[NUM_TABLE_VALUES], deltaS[NUM_TABLE_VALUES];
-    WebRtc_Word16 ngprvec[NUM_TABLE_VALUES], sgprvec[NUM_TABLE_VALUES];
+    WebRtc_Word16 deltaN[kTableSize], deltaS[kTableSize];
+    WebRtc_Word16 ngprvec[kTableSize], sgprvec[kTableSize];
    WebRtc_Word32 h0test, h1test;
    WebRtc_Word32 tmp32_1, tmp32_2;
    WebRtc_Word32 dotVal;
    WebRtc_Word32 nmid, smid;
-    WebRtc_Word32 probn[NUM_MODELS], probs[NUM_MODELS];
+    WebRtc_Word32 probn[kNumGaussians], probs[kNumGaussians];
    WebRtc_Word16 *nmean1ptr, *nmean2ptr, *smean1ptr, *smean2ptr, *nstd1ptr, *nstd2ptr,
            *sstd1ptr, *sstd2ptr;
    WebRtc_Word16 overhead1, overhead2, individualTest, totalTest;
@ -125,22 +149,22 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector,
        totalTest = inst->total[2];
    }

-    if (total_power > MIN_ENERGY)
+    if (total_power > kMinEnergy)
    { // If signal present at all

        // Set pointers to the gaussian parameters
        nmean1ptr = &inst->noise_means[0];
-        nmean2ptr = &inst->noise_means[NUM_CHANNELS];
+        nmean2ptr = &inst->noise_means[kNumChannels];
        smean1ptr = &inst->speech_means[0];
-        smean2ptr = &inst->speech_means[NUM_CHANNELS];
+        smean2ptr = &inst->speech_means[kNumChannels];
        nstd1ptr = &inst->noise_stds[0];
-        nstd2ptr = &inst->noise_stds[NUM_CHANNELS];
+        nstd2ptr = &inst->noise_stds[kNumChannels];
        sstd1ptr = &inst->speech_stds[0];
-        sstd2ptr = &inst->speech_stds[NUM_CHANNELS];
+        sstd2ptr = &inst->speech_stds[kNumChannels];

        vadflag = 0;
        dotVal = 0;
-        for (n = 0; n < NUM_CHANNELS; n++)
+        for (n = 0; n < kNumChannels; n++)
        { // For all channels

            pos = WEBRTC_SPL_LSHIFT_W16(n, 1);
@ -152,7 +176,7 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector,
            probn[0] = (WebRtc_Word32)(kNoiseDataWeights[n] * tmp32_1);
            tmp32_1 = WebRtcVad_GaussianProbability(xval, *nmean2ptr++, *nstd2ptr++,
                                                    &deltaN[pos + 1]);
-            probn[1] = (WebRtc_Word32)(kNoiseDataWeights[n + NUM_CHANNELS] * tmp32_1);
+            probn[1] = (WebRtc_Word32)(kNoiseDataWeights[n + kNumChannels] * tmp32_1);
            h0test = probn[0] + probn[1]; // Q27
            h0 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(h0test, 12); // Q15

@ -162,7 +186,7 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector,
            probs[0] = (WebRtc_Word32)(kSpeechDataWeights[n] * tmp32_1);
            tmp32_1 = WebRtcVad_GaussianProbability(xval, *smean2ptr++, *sstd2ptr++,
                                                    &deltaS[pos + 1]);
-            probs[1] = (WebRtc_Word32)(kSpeechDataWeights[n + NUM_CHANNELS] * tmp32_1);
+            probs[1] = (WebRtc_Word32)(kSpeechDataWeights[n + kNumChannels] * tmp32_1);
            h1test = probs[0] + probs[1]; // Q27
            h1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(h1test, 12); // Q15

@ -235,7 +259,7 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector,
        maxspe = 12800;

        // Update the model's parameters
-        for (n = 0; n < NUM_CHANNELS; n++)
+        for (n = 0; n < kNumChannels; n++)
        {

            pos = WEBRTC_SPL_LSHIFT_W16(n, 1);
@ -245,19 +269,19 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector,

            // Compute the "global" mean, that is the sum of the two means weighted
            nmid = WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n], *nmean1ptr); // Q7 * Q7
-            nmid += WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n+NUM_CHANNELS],
-                    *(nmean1ptr+NUM_CHANNELS));
+            nmid += WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n+kNumChannels],
+                    *(nmean1ptr+kNumChannels));
            tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(nmid, 6); // Q8

-            for (k = 0; k < NUM_MODELS; k++)
+            for (k = 0; k < kNumGaussians; k++)
            {

                nr = pos + k;

-                nmean2ptr = nmean1ptr + k * NUM_CHANNELS;
-                smean2ptr = smean1ptr + k * NUM_CHANNELS;
-                nstd2ptr = nstd1ptr + k * NUM_CHANNELS;
-                sstd2ptr = sstd1ptr + k * NUM_CHANNELS;
+                nmean2ptr = nmean1ptr + k * kNumChannels;
+                smean2ptr = smean1ptr + k * kNumChannels;
+                nstd2ptr = nstd1ptr + k * kNumChannels;
+                sstd2ptr = sstd1ptr + k * kNumChannels;
                nmk = *nmean2ptr;
                smk = *smean2ptr;
                nsk = *nstd2ptr;
@ -376,11 +400,11 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector,

            // Separate models if they are too close - nmid in Q14
            nmid = WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n], *nmean1ptr);
-            nmid += WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n+NUM_CHANNELS], *nmean2ptr);
+            nmid += WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n+kNumChannels], *nmean2ptr);

            // smid in Q14
            smid = WEBRTC_SPL_MUL_16_16(kSpeechDataWeights[n], *smean1ptr);
-            smid += WEBRTC_SPL_MUL_16_16(kSpeechDataWeights[n+NUM_CHANNELS], *smean2ptr);
+            smid += WEBRTC_SPL_MUL_16_16(kSpeechDataWeights[n+kNumChannels], *smean2ptr);

            // diff = "global" speech mean - "global" noise mean
            diff = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(smid, 9);
@ -405,7 +429,7 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector,
                // Second Gauss, speech model
                tmp16 = tmp16_1 + *smean2ptr;
                *smean2ptr = tmp16;
-                smid += WEBRTC_SPL_MUL_16_16(tmp16, kSpeechDataWeights[n+NUM_CHANNELS]);
+                smid += WEBRTC_SPL_MUL_16_16(tmp16, kSpeechDataWeights[n+kNumChannels]);

                // First Gauss, noise model
                tmp16 = *nmean1ptr - tmp16_2;
@ -416,7 +440,7 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector,
                // Second Gauss, noise model
                tmp16 = *nmean2ptr - tmp16_2;
                *nmean2ptr = tmp16;
-                nmid += WEBRTC_SPL_MUL_16_16(tmp16, kNoiseDataWeights[n+NUM_CHANNELS]);
+                nmid += WEBRTC_SPL_MUL_16_16(tmp16, kNoiseDataWeights[n+kNumChannels]);
            }

            // Control that the speech & noise means do not drift to much
@ -491,7 +515,7 @@ int WebRtcVad_InitCore(VadInstT* self) {
         sizeof(self->downsampling_filter_states));

  // Read initial PDF parameters.
-  for (i = 0; i < NUM_TABLE_VALUES; i++) {
+  for (i = 0; i < kTableSize; i++) {
    self->noise_means[i] = kNoiseDataMeans[i];
    self->speech_means[i] = kSpeechDataMeans[i];
    self->noise_stds[i] = kNoiseDataStds[i];
@ -499,7 +523,7 @@ int WebRtcVad_InitCore(VadInstT* self) {
  }

  // Initialize Index and Minimum value vectors.
-  for (i = 0; i < 16 * NUM_CHANNELS; i++) {
+  for (i = 0; i < 16 * kNumChannels; i++) {
    self->low_value_vector[i] = 10000;
    self->index_vector[i] = 0;
  }
@ -512,7 +536,7 @@ int WebRtcVad_InitCore(VadInstT* self) {
  memset(self->hp_filter_state, 0, sizeof(self->hp_filter_state));

  // Initialize mean value memory, for WebRtcVad_FindMinimum().
-  for (i = 0; i < NUM_CHANNELS; i++) {
+  for (i = 0; i < kNumChannels; i++) {
    self->mean_value[i] = 1600;
  }

@ -527,83 +551,60 @@ int WebRtcVad_InitCore(VadInstT* self) {
 }

 // Set aggressiveness mode
-int WebRtcVad_set_mode_core(VadInstT *inst, int mode)
-{
+int WebRtcVad_set_mode_core(VadInstT* self, int mode) {
+  int return_value = 0;

-    if (mode == 0)
-    {
-        // Quality mode
-        inst->over_hang_max_1[0] = OHMAX1_10MS_Q; // Overhang short speech burst
-        inst->over_hang_max_1[1] = OHMAX1_20MS_Q; // Overhang short speech burst
-        inst->over_hang_max_1[2] = OHMAX1_30MS_Q; // Overhang short speech burst
-        inst->over_hang_max_2[0] = OHMAX2_10MS_Q; // Overhang long speech burst
-        inst->over_hang_max_2[1] = OHMAX2_20MS_Q; // Overhang long speech burst
-        inst->over_hang_max_2[2] = OHMAX2_30MS_Q; // Overhang long speech burst
+  switch (mode) {
+    case 0:
+      // Quality mode.
+      memcpy(self->over_hang_max_1, kOverHangMax1Q,
+             sizeof(self->over_hang_max_1));
+      memcpy(self->over_hang_max_2, kOverHangMax2Q,
+             sizeof(self->over_hang_max_2));
+      memcpy(self->individual, kLocalThresholdQ,
+             sizeof(self->individual));
+      memcpy(self->total, kGlobalThresholdQ,
+             sizeof(self->total));
+      break;
+    case 1:
+      // Low bitrate mode.
+      memcpy(self->over_hang_max_1, kOverHangMax1LBR,
+             sizeof(self->over_hang_max_1));
+      memcpy(self->over_hang_max_2, kOverHangMax2LBR,
+             sizeof(self->over_hang_max_2));
+      memcpy(self->individual, kLocalThresholdLBR,
+             sizeof(self->individual));
+      memcpy(self->total, kGlobalThresholdLBR,
+             sizeof(self->total));
+      break;
+    case 2:
+      // Aggressive mode.
+      memcpy(self->over_hang_max_1, kOverHangMax1AGG,
+             sizeof(self->over_hang_max_1));
+      memcpy(self->over_hang_max_2, kOverHangMax2AGG,
+             sizeof(self->over_hang_max_2));
+      memcpy(self->individual, kLocalThresholdAGG,
+             sizeof(self->individual));
+      memcpy(self->total, kGlobalThresholdAGG,
+             sizeof(self->total));
+      break;
+    case 3:
+      // Very aggressive mode.
+      memcpy(self->over_hang_max_1, kOverHangMax1VAG,
+             sizeof(self->over_hang_max_1));
+      memcpy(self->over_hang_max_2, kOverHangMax2VAG,
+             sizeof(self->over_hang_max_2));
+      memcpy(self->individual, kLocalThresholdVAG,
+             sizeof(self->individual));
+      memcpy(self->total, kGlobalThresholdVAG,
+             sizeof(self->total));
+      break;
+    default:
+      return_value = -1;
+      break;
+  }

-        inst->individual[0] = INDIVIDUAL_10MS_Q;
-        inst->individual[1] = INDIVIDUAL_20MS_Q;
-        inst->individual[2] = INDIVIDUAL_30MS_Q;
-
-        inst->total[0] = TOTAL_10MS_Q;
-        inst->total[1] = TOTAL_20MS_Q;
-        inst->total[2] = TOTAL_30MS_Q;
-    } else if (mode == 1)
-    {
-        // Low bitrate mode
-        inst->over_hang_max_1[0] = OHMAX1_10MS_LBR; // Overhang short speech burst
-        inst->over_hang_max_1[1] = OHMAX1_20MS_LBR; // Overhang short speech burst
-        inst->over_hang_max_1[2] = OHMAX1_30MS_LBR; // Overhang short speech burst
-        inst->over_hang_max_2[0] = OHMAX2_10MS_LBR; // Overhang long speech burst
-        inst->over_hang_max_2[1] = OHMAX2_20MS_LBR; // Overhang long speech burst
-        inst->over_hang_max_2[2] = OHMAX2_30MS_LBR; // Overhang long speech burst
-
-        inst->individual[0] = INDIVIDUAL_10MS_LBR;
-        inst->individual[1] = INDIVIDUAL_20MS_LBR;
-        inst->individual[2] = INDIVIDUAL_30MS_LBR;
-
-        inst->total[0] = TOTAL_10MS_LBR;
-        inst->total[1] = TOTAL_20MS_LBR;
-        inst->total[2] = TOTAL_30MS_LBR;
-    } else if (mode == 2)
-    {
-        // Aggressive mode
-        inst->over_hang_max_1[0] = OHMAX1_10MS_AGG; // Overhang short speech burst
-        inst->over_hang_max_1[1] = OHMAX1_20MS_AGG; // Overhang short speech burst
-        inst->over_hang_max_1[2] = OHMAX1_30MS_AGG; // Overhang short speech burst
-        inst->over_hang_max_2[0] = OHMAX2_10MS_AGG; // Overhang long speech burst
-        inst->over_hang_max_2[1] = OHMAX2_20MS_AGG; // Overhang long speech burst
-        inst->over_hang_max_2[2] = OHMAX2_30MS_AGG; // Overhang long speech burst
-
-        inst->individual[0] = INDIVIDUAL_10MS_AGG;
-        inst->individual[1] = INDIVIDUAL_20MS_AGG;
-        inst->individual[2] = INDIVIDUAL_30MS_AGG;
-
-        inst->total[0] = TOTAL_10MS_AGG;
-        inst->total[1] = TOTAL_20MS_AGG;
-        inst->total[2] = TOTAL_30MS_AGG;
-    } else if (mode == 3)
-    {
-        // Very aggressive mode
-        inst->over_hang_max_1[0] = OHMAX1_10MS_VAG; // Overhang short speech burst
-        inst->over_hang_max_1[1] = OHMAX1_20MS_VAG; // Overhang short speech burst
-        inst->over_hang_max_1[2] = OHMAX1_30MS_VAG; // Overhang short speech burst
-        inst->over_hang_max_2[0] = OHMAX2_10MS_VAG; // Overhang long speech burst
-        inst->over_hang_max_2[1] = OHMAX2_20MS_VAG; // Overhang long speech burst
-        inst->over_hang_max_2[2] = OHMAX2_30MS_VAG; // Overhang long speech burst
-
-        inst->individual[0] = INDIVIDUAL_10MS_VAG;
-        inst->individual[1] = INDIVIDUAL_20MS_VAG;
-        inst->individual[2] = INDIVIDUAL_30MS_VAG;
-
-        inst->total[0] = TOTAL_10MS_VAG;
-        inst->total[1] = TOTAL_20MS_VAG;
-        inst->total[2] = TOTAL_30MS_VAG;
-    } else
-    {
-        return -1;
-    }
-
-    return 0;
+  return return_value;
 }

 // Calculate VAD decision by first extracting feature values and then calculate
@ -650,7 +651,7 @@ WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT *inst, WebRtc_Word16 *speech_frame
 WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT *inst, WebRtc_Word16 *speech_frame,
                                    int frame_length)
 {
-    WebRtc_Word16 feature_vector[NUM_CHANNELS], total_power;
+    WebRtc_Word16 feature_vector[kNumChannels], total_power;

    // Get power in the bands
    total_power = WebRtcVad_CalculateFeatures(inst, speech_frame, frame_length,
--- a/src/common_audio/vad/vad_core.h
+++ b/src/common_audio/vad/vad_core.h
@ -17,26 +17,30 @@
 #define WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_

 #include "typedefs.h"
-#include "vad_defines.h"
+
+enum { kNumChannels = 6 };  // Number of frequency bands (named channels).
+enum { kNumGaussians = 2 };  // Number of Gaussians per channel in the GMM.
+enum { kTableSize = kNumChannels * kNumGaussians };
+enum { kMinEnergy = 10 };  // Minimum energy required to trigger audio signal.

 typedef struct VadInstT_
 {

    WebRtc_Word16 vad;
    WebRtc_Word32 downsampling_filter_states[4];
-    WebRtc_Word16 noise_means[NUM_TABLE_VALUES];
-    WebRtc_Word16 speech_means[NUM_TABLE_VALUES];
-    WebRtc_Word16 noise_stds[NUM_TABLE_VALUES];
-    WebRtc_Word16 speech_stds[NUM_TABLE_VALUES];
+    WebRtc_Word16 noise_means[kTableSize];
+    WebRtc_Word16 speech_means[kTableSize];
+    WebRtc_Word16 noise_stds[kTableSize];
+    WebRtc_Word16 speech_stds[kTableSize];
    // TODO(bjornv): Change to |frame_count|.
    WebRtc_Word32 frame_counter;
    WebRtc_Word16 over_hang; // Over Hang
    WebRtc_Word16 num_of_speech;
    // TODO(bjornv): Change to |age_vector|.
-    WebRtc_Word16 index_vector[16 * NUM_CHANNELS];
-    WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS];
+    WebRtc_Word16 index_vector[16 * kNumChannels];
+    WebRtc_Word16 low_value_vector[16 * kNumChannels];
    // TODO(bjornv): Change to |median|.
-    WebRtc_Word16 mean_value[NUM_CHANNELS];
+    WebRtc_Word16 mean_value[kNumChannels];
    WebRtc_Word16 upper_state[5];
    WebRtc_Word16 lower_state[5];
    WebRtc_Word16 hp_filter_state[4];
@ -75,7 +79,7 @@ int WebRtcVad_InitCore(VadInstT* self);
 *                    -1 - Error
 */

-int WebRtcVad_set_mode_core(VadInstT* inst, int mode);
+int WebRtcVad_set_mode_core(VadInstT* self, int mode);

 /****************************************************************************
 * WebRtcVad_CalcVad32khz(...) 
--- a/src/common_audio/vad/vad_defines.h
+++ b/src/common_audio/vad/vad_defines.h
@ -1,93 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/*
- * This header file includes the macros used in VAD.
- */
-
-#ifndef WEBRTC_VAD_DEFINES_H_
-#define WEBRTC_VAD_DEFINES_H_
-
-#define NUM_CHANNELS        6   // Eight frequency bands
-#define NUM_MODELS          2   // Number of Gaussian models
-#define NUM_TABLE_VALUES    NUM_CHANNELS * NUM_MODELS
-
-#define MIN_ENERGY          10
-#define ALPHA1              6553    // 0.2 in Q15
-#define ALPHA2              32439   // 0.99 in Q15
-// Mode 0, Quality thresholds - Different thresholds for the different frame lengths
-#define INDIVIDUAL_10MS_Q   24
-#define INDIVIDUAL_20MS_Q   21      // (log10(2)*66)<<2 ~=16
-#define INDIVIDUAL_30MS_Q   24
-
-#define TOTAL_10MS_Q        57
-#define TOTAL_20MS_Q        48
-#define TOTAL_30MS_Q        57
-
-#define OHMAX1_10MS_Q       8  // Max Overhang 1
-#define OHMAX2_10MS_Q       14 // Max Overhang 2
-#define OHMAX1_20MS_Q       4  // Max Overhang 1
-#define OHMAX2_20MS_Q       7  // Max Overhang 2
-#define OHMAX1_30MS_Q       3
-#define OHMAX2_30MS_Q       5
-
-// Mode 1, Low bitrate thresholds - Different thresholds for the different frame lengths
-#define INDIVIDUAL_10MS_LBR 37
-#define INDIVIDUAL_20MS_LBR 32
-#define INDIVIDUAL_30MS_LBR 37
-
-#define TOTAL_10MS_LBR      100
-#define TOTAL_20MS_LBR      80
-#define TOTAL_30MS_LBR      100
-
-#define OHMAX1_10MS_LBR     8  // Max Overhang 1
-#define OHMAX2_10MS_LBR     14 // Max Overhang 2
-#define OHMAX1_20MS_LBR     4
-#define OHMAX2_20MS_LBR     7
-
-#define OHMAX1_30MS_LBR     3
-#define OHMAX2_30MS_LBR     5
-
-// Mode 2, Very aggressive thresholds - Different thresholds for the different frame lengths
-#define INDIVIDUAL_10MS_AGG 82
-#define INDIVIDUAL_20MS_AGG 78
-#define INDIVIDUAL_30MS_AGG 82
-
-#define TOTAL_10MS_AGG      285 //580
-#define TOTAL_20MS_AGG      260
-#define TOTAL_30MS_AGG      285
-
-#define OHMAX1_10MS_AGG     6  // Max Overhang 1
-#define OHMAX2_10MS_AGG     9  // Max Overhang 2
-#define OHMAX1_20MS_AGG     3
-#define OHMAX2_20MS_AGG     5
-
-#define OHMAX1_30MS_AGG     2
-#define OHMAX2_30MS_AGG     3
-
-// Mode 3, Super aggressive thresholds - Different thresholds for the different frame lengths
-#define INDIVIDUAL_10MS_VAG 94
-#define INDIVIDUAL_20MS_VAG 94
-#define INDIVIDUAL_30MS_VAG 94
-
-#define TOTAL_10MS_VAG      1100 //1700
-#define TOTAL_20MS_VAG      1050
-#define TOTAL_30MS_VAG      1100
-
-#define OHMAX1_10MS_VAG     6  // Max Overhang 1
-#define OHMAX2_10MS_VAG     9  // Max Overhang 2
-#define OHMAX1_20MS_VAG     3
-#define OHMAX2_20MS_VAG     5
-
-#define OHMAX1_30MS_VAG     2
-#define OHMAX2_30MS_VAG     3
-
-#endif // WEBRTC_VAD_DEFINES_H_
--- a/src/common_audio/vad/vad_filterbank.c
+++ b/src/common_audio/vad/vad_filterbank.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -14,7 +14,6 @@

 #include "signal_processing_library.h"
 #include "typedefs.h"
-#include "vad_defines.h"

 // Constants used in LogOfEnergy().
 static const int16_t kLogConst = 24660;  // 160*log10(2) in Q9.
@ -151,7 +150,7 @@ static void SplitFilter(const int16_t* data_in, int data_length,
 // - total_energy [i/o] : An external energy updated with the energy of
 //                        |data_in|.
 //                        NOTE: |total_energy| is only updated if
-//                        |total_energy| <= MIN_ENERGY.
+//                        |total_energy| <= |kMinEnergy|.
 // - log_energy   [o]   : 10 * log10("energy of |data_in|") given in Q4.
 static void LogOfEnergy(const int16_t* data_in, int data_length,
                        int16_t offset, int16_t* total_energy,
@ -228,18 +227,18 @@ static void LogOfEnergy(const int16_t* data_in, int data_length,
  *log_energy += offset;

  // Update the approximate |total_energy| with the energy of |data_in|, if
-  // |total_energy| has not exceeded MIN_ENERGY. |total_energy| is used as an
+  // |total_energy| has not exceeded |kMinEnergy|. |total_energy| is used as an
  // energy indicator in WebRtcVad_GmmProbability() in vad_core.c.
-  if (*total_energy <= MIN_ENERGY) {
+  if (*total_energy <= kMinEnergy) {
    if (tot_rshifts >= 0) {
-      // We know by construction that the |energy| > MIN_ENERGY in Q0, so add an
-      // arbitrary value such that |total_energy| exceeds MIN_ENERGY.
-      *total_energy += MIN_ENERGY + 1;
+      // We know by construction that the |energy| > |kMinEnergy| in Q0, so add
+      // an arbitrary value such that |total_energy| exceeds |kMinEnergy|.
+      *total_energy += kMinEnergy + 1;
    } else {
      // By construction |energy| is represented by 15 bits, hence any number of
      // right shifted |energy| will fit in an int16_t. In addition, adding the
      // value to |total_energy| is wrap around safe as long as
-      // MIN_ENERGY < 8192.
+      // |kMinEnergy| < 8192.
      *total_energy += (int16_t) (energy >> -tot_rshifts);  // Q0.
    }
  }
@ -266,7 +265,7 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,

  assert(data_length >= 0);
  assert(data_length <= 240);
-  assert(4 < NUM_CHANNELS - 1);  // Checking maximum |frequency_band|.
+  assert(4 < kNumChannels - 1);  // Checking maximum |frequency_band|.

  // Split at 2000 Hz and downsample.
  SplitFilter(in_ptr, data_length, &self->upper_state[frequency_band],
--- a/src/common_audio/vad/vad_filterbank.h
+++ b/src/common_audio/vad/vad_filterbank.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -19,7 +19,7 @@
 #include "vad_core.h"

 // Takes |data_length| samples of |data_in| and calculates the logarithm of the
-// energy of each of the |NUM_CHANNELS| = 6 frequency bands used by the VAD:
+// energy of each of the |kNumChannels| = 6 frequency bands used by the VAD:
 //        80 Hz - 250 Hz
 //        250 Hz - 500 Hz
 //        500 Hz - 1000 Hz
@ -30,7 +30,7 @@
 // The values are given in Q4 and written to |features|. Further, an approximate
 // overall energy is returned. The return value is used in
 // WebRtcVad_GmmProbability() as a signal indicator, hence it is arbitrary above
-// the threshold MIN_ENERGY.
+// the threshold |kMinEnergy|.
 //
 // - self         [i/o] : State information of the VAD.
 // - data_in      [i]   : Input audio data, for feature extraction.
--- a/src/common_audio/vad/vad_filterbank_unittest.cc
+++ b/src/common_audio/vad/vad_filterbank_unittest.cc
@ -16,7 +16,6 @@

 extern "C" {
 #include "vad_core.h"
-#include "vad_defines.h"
 #include "vad_filterbank.h"
 }

@ -27,14 +26,14 @@ enum { kNumValidFrameLengths = 3 };
 TEST_F(VadTest, vad_filterbank) {
  VadInstT* self = reinterpret_cast<VadInstT*>(malloc(sizeof(VadInstT)));
  static const int16_t kReference[kNumValidFrameLengths] = { 48, 11, 11 };
-  static const int16_t kFeatures[kNumValidFrameLengths * NUM_CHANNELS] = {
+  static const int16_t kFeatures[kNumValidFrameLengths * kNumChannels] = {
      1213, 759, 587, 462, 434, 272,
      1479, 1385, 1291, 1200, 1103, 1099,
      1732, 1692, 1681, 1629, 1436, 1436
  };
-  static const int16_t kOffsetVector[NUM_CHANNELS] = {
+  static const int16_t kOffsetVector[kNumChannels] = {
      368, 368, 272, 176, 176, 176 };
-  int16_t features[NUM_CHANNELS];
+  int16_t features[kNumChannels];

  // Construct a speech signal that will trigger the VAD in all modes. It is
  // known that (i * i) will wrap around, but that doesn't matter in this case.
@ -50,8 +49,8 @@ TEST_F(VadTest, vad_filterbank) {
      EXPECT_EQ(kReference[frame_length_index],
                WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j],
                                            features));
-      for (int k = 0; k < NUM_CHANNELS; ++k) {
-        EXPECT_EQ(kFeatures[k + frame_length_index * NUM_CHANNELS],
+      for (int k = 0; k < kNumChannels; ++k) {
+        EXPECT_EQ(kFeatures[k + frame_length_index * kNumChannels],
                  features[k]);
      }
      frame_length_index++;
@ -66,7 +65,7 @@ TEST_F(VadTest, vad_filterbank) {
    if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
      EXPECT_EQ(0, WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j],
                                               features));
-      for (int k = 0; k < NUM_CHANNELS; ++k) {
+      for (int k = 0; k < kNumChannels; ++k) {
        EXPECT_EQ(kOffsetVector[k], features[k]);
      }
    }
@ -82,7 +81,7 @@ TEST_F(VadTest, vad_filterbank) {
      ASSERT_EQ(0, WebRtcVad_InitCore(self));
      EXPECT_EQ(0, WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j],
                                               features));
-      for (int k = 0; k < NUM_CHANNELS; ++k) {
+      for (int k = 0; k < kNumChannels; ++k) {
        EXPECT_EQ(kOffsetVector[k], features[k]);
      }
    }
--- a/src/common_audio/vad/vad_sp.c
+++ b/src/common_audio/vad/vad_sp.c
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -14,11 +14,13 @@

 #include "signal_processing_library.h"
 #include "typedefs.h"
-#include "vad_defines.h"
+#include "vad_core.h"

 // Allpass filter coefficients, upper and lower, in Q13.
 // Upper: 0.64, Lower: 0.17.
-static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 };  // Q13
+static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 };  // Q13.
+static const int16_t kSmoothingDown = 6553;  // 0.2 in Q15.
+static const int16_t kSmoothingUp = 32439;  // 0.99 in Q15.

 // TODO(bjornv): Move this function to vad_filterbank.c.
 // Downsampling filter based on splitting filter and allpass functions.
@ -72,7 +74,7 @@ int16_t WebRtcVad_FindMinimum(VadInstT* self,
  int16_t* value_ptr = &self->low_value_vector[offset];
  int16_t *p1, *p2, *p3;

-  assert(channel < NUM_CHANNELS);
+  assert(channel < kNumChannels);

  // Each value in |low_value_vector| is getting 1 loop older.
  // Update age of each value in |age_ptr|, and remove old values.
@ -167,9 +169,9 @@ int16_t WebRtcVad_FindMinimum(VadInstT* self,
  // Smooth the median value.
  if (self->frame_counter > 0) {
    if (current_median < self->mean_value[channel]) {
-      alpha = (int16_t) ALPHA1;  // 0.2 in Q15.
+      alpha = kSmoothingDown;  // 0.2 in Q15.
    } else {
-      alpha = (int16_t) ALPHA2;  // 0.99 in Q15.
+      alpha = kSmoothingUp;  // 0.99 in Q15.
    }
  }
  tmp32 = WEBRTC_SPL_MUL_16_16(alpha + 1, self->mean_value[channel]);
--- a/src/common_audio/vad/vad_sp_unittest.cc
+++ b/src/common_audio/vad/vad_sp_unittest.cc
@ -16,7 +16,6 @@

 extern "C" {
 #include "vad_core.h"
-#include "vad_defines.h"
 #include "vad_sp.h"
 }

@ -63,7 +62,7 @@ TEST_F(VadTest, vad_sp) {
  // ordered.
  for (int16_t i = 0; i < 16; ++i) {
    int16_t value = 500 * (i + 1);
-    for (int j = 0; j < NUM_CHANNELS; ++j) {
+    for (int j = 0; j < kNumChannels; ++j) {
      // Use values both above and below initialized value.
      EXPECT_EQ(kReferenceMin[i], WebRtcVad_FindMinimum(self, value, j));
      EXPECT_EQ(kReferenceMin[i + 16], WebRtcVad_FindMinimum(self, 12000, j));