Refactoring of vad_sp.[h/c]

- define guard name change - changed to stdint - added unit test - removed shift macros - style changes - comments Review URL: http://webrtc-codereview.appspot.com/336004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1326 4adac7df-926f-26a2-2b94-8c16560cd09d
2012-01-04 09:15:12 +00:00
parent cc33737a80
commit 226c5a1a95
4 changed files with 238 additions and 250 deletions
--- a/src/common_audio/vad/vad_core.h
+++ b/src/common_audio/vad/vad_core.h
@@ -28,11 +28,14 @@ typedef struct VadInstT_
    WebRtc_Word16 speech_means[NUM_TABLE_VALUES];
    WebRtc_Word16 noise_stds[NUM_TABLE_VALUES];
    WebRtc_Word16 speech_stds[NUM_TABLE_VALUES];
    // TODO(bjornv): Change to |frame_count|.
    WebRtc_Word32 frame_counter;
    WebRtc_Word16 over_hang; // Over Hang
    WebRtc_Word16 num_of_speech;
    // TODO(bjornv): Change to |age_vector|.
    WebRtc_Word16 index_vector[16 * NUM_CHANNELS];
    WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS];
    // TODO(bjornv): Change to |median|.
    WebRtc_Word16 mean_value[NUM_CHANNELS];
    WebRtc_Word16 upper_state[5];
    WebRtc_Word16 lower_state[5];
--- a/src/common_audio/vad/vad_sp.c
+++ b/src/common_audio/vad/vad_sp.c
@@ -8,229 +8,174 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */
 /*
 * This file includes the implementation of the VAD internal calls for
 * Downsampling and FindMinimum.
 * For function call descriptions; See vad_sp.h.
 */
 #include "vad_sp.h"
 #include <assert.h>
 #include "signal_processing_library.h"
 #include "typedefs.h"
 #include "vad_defines.h"
-// Allpass filter coefficients, upper and lower, in Q13
+// Allpass filter coefficients, upper and lower, in Q13.
-// Upper: 0.64, Lower: 0.17
+// Upper: 0.64, Lower: 0.17.
-static const WebRtc_Word16 kAllPassCoefsQ13[2] = {5243, 1392}; // Q13
+static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 };  // Q13
-// Downsampling filter based on the splitting filter and the allpass functions
+// TODO(bjornv): Move this function to vad_filterbank.c.
-// in vad_filterbank.c
+// Downsampling filter based on splitting filter and allpass functions.
-void WebRtcVad_Downsampling(WebRtc_Word16* signal_in,
+void WebRtcVad_Downsampling(int16_t* signal_in,
-                            WebRtc_Word16* signal_out,
+                            int16_t* signal_out,
-                            WebRtc_Word32* filter_state,
+                            int32_t* filter_state,
-                            int inlen)
+                            int in_length) {
-{
+  int16_t tmp16_1 = 0, tmp16_2 = 0;
-    WebRtc_Word16 tmp16_1, tmp16_2;
+  int32_t tmp32_1 = filter_state[0];
-    WebRtc_Word32 tmp32_1, tmp32_2;
+  int32_t tmp32_2 = filter_state[1];
-    int n, halflen;
+  int n = 0;
  int half_length = (in_length >> 1);  // Downsampling by 2 gives half length.
-    // Downsampling by 2 and get two branches
+  // Filter coefficients in Q13, filter state in Q0.
-    halflen = WEBRTC_SPL_RSHIFT_W16(inlen, 1);
+  for (n = 0; n < half_length; n++) {
    // All-pass filtering upper branch.
    tmp16_1 = (int16_t) ((tmp32_1 >> 1) +
        WEBRTC_SPL_MUL_16_16_RSFT(kAllPassCoefsQ13[0], *signal_in, 14));
    *signal_out = tmp16_1;
    tmp32_1 = (int32_t) (*signal_in++) -
        WEBRTC_SPL_MUL_16_16_RSFT(kAllPassCoefsQ13[0], tmp16_1, 12);
-    tmp32_1 = filter_state[0];
+    // All-pass filtering lower branch.
-    tmp32_2 = filter_state[1];
+    tmp16_2 = (int16_t) ((tmp32_2 >> 1) +
-
+        WEBRTC_SPL_MUL_16_16_RSFT(kAllPassCoefsQ13[1], *signal_in, 14));
-    // Filter coefficients in Q13, filter state in Q0
+    *signal_out++ += tmp16_2;
-    for (n = 0; n < halflen; n++)
+    tmp32_2 = (int32_t) (*signal_in++) -
-    {
+        WEBRTC_SPL_MUL_16_16_RSFT(kAllPassCoefsQ13[1], tmp16_2, 12);
-        // All-pass filtering upper branch
+  }
-        tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32_1, 1)
+  // Store the filter states.
-                + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[0]),
+  filter_state[0] = tmp32_1;
-                                                           *signal_in, 14);
+  filter_state[1] = tmp32_2;
        *signal_out = tmp16_1;
        tmp32_1 = (WebRtc_Word32)(*signal_in++)
                - (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[0]), tmp16_1, 12);
        // All-pass filtering lower branch
        tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32_2, 1)
                + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[1]),
                                                           *signal_in, 14);
        *signal_out++ += tmp16_2;
        tmp32_2 = (WebRtc_Word32)(*signal_in++)
                - (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[1]), tmp16_2, 12);
    }
    filter_state[0] = tmp32_1;
    filter_state[1] = tmp32_2;
 }
-WebRtc_Word16 WebRtcVad_FindMinimum(VadInstT* inst,
+// Inserts |feature_value| into |low_value_vector|, if it is one of the 16
-                                    WebRtc_Word16 x,
+// smallest values the last 100 frames. Then calculates and returns the median
-                                    int n)
+// of the five smallest values.
-{
+int16_t WebRtcVad_FindMinimum(VadInstT* self,
-    int i, j, k, II = -1, offset;
+                              int16_t feature_value,
-    WebRtc_Word16 meanV, alpha;
+                              int channel) {
-    WebRtc_Word32 tmp32, tmp32_1;
+  int i = 0, j = 0;
-    WebRtc_Word16 *valptr, *idxptr, *p1, *p2, *p3;
+  int position = -1;
  // Offset to beginning of the 16 minimum values in memory.
  int offset = (channel << 4);
  int16_t current_median = 1600;
  int16_t alpha = 0;
  int32_t tmp32 = 0;
  // Pointer to memory for the 16 minimum values and the age of each value of
  // the |channel|.
  int16_t* age_ptr = &self->index_vector[offset];
  int16_t* value_ptr = &self->low_value_vector[offset];
  int16_t *p1, *p2, *p3;
-    // Offset to beginning of the 16 minimum values in memory
+  assert(channel < NUM_CHANNELS);
    offset = WEBRTC_SPL_LSHIFT_W16(n, 4);
-    // Pointer to memory for the 16 minimum values and the age of each value
+  // Each value in |low_value_vector| is getting 1 loop older.
-    idxptr = &inst->index_vector[offset];
+  // Update age of each value in |age_ptr|, and remove old values.
-    valptr = &inst->low_value_vector[offset];
+  for (i = 0; i < 16; i++) {
    p3 = age_ptr + i;
    if (*p3 != 100) {
      *p3 += 1;
    } else {
      p1 = value_ptr + i + 1;
      p2 = p3 + 1;
      for (j = i; j < 16; j++) {
        *(value_ptr + j) = *p1++;
        *(age_ptr + j) = *p2++;
      }
      *(age_ptr + 15) = 101;
      *(value_ptr + 15) = 10000;
    }
  }
-    // Each value in low_value_vector is getting 1 loop older.
+  // Check if |feature_value| is smaller than any of the values in
-    // Update age of each value in indexVal, and remove old values.
+  // |low_value_vector|. If so, find the |position| where to insert the new
-    for (i = 0; i < 16; i++)
+  // value.
-    {
+  if (feature_value < *(value_ptr + 7)) {
-        p3 = idxptr + i;
+    if (feature_value < *(value_ptr + 3)) {
-        if (*p3 != 100)
+      if (feature_value < *(value_ptr + 1)) {
-        {
+        if (feature_value < *value_ptr) {
-            *p3 += 1;
+          position = 0;
-        } else
+        } else {
-        {
+          position = 1;
            p1 = valptr + i + 1;
            p2 = p3 + 1;
            for (j = i; j < 16; j++)
            {
                *(valptr + j) = *p1++;
                *(idxptr + j) = *p2++;
            }
            *(idxptr + 15) = 101;
            *(valptr + 15) = 10000;
        }
      } else if (feature_value < *(value_ptr + 2)) {
        position = 2;
      } else {
        position = 3;
      }
    } else if (feature_value < *(value_ptr + 5)) {
      if (feature_value < *(value_ptr + 4)) {
        position = 4;
      } else {
        position = 5;
      }
    } else if (feature_value < *(value_ptr + 6)) {
      position = 6;
    } else {
      position = 7;
    }
-
+  } else if (feature_value < *(value_ptr + 15)) {
-    // Check if x smaller than any of the values in low_value_vector.
+    if (feature_value < *(value_ptr + 11)) {
-    // If so, find position.
+      if (feature_value < *(value_ptr + 9)) {
-    if (x < *(valptr + 7))
+        if (feature_value < *(value_ptr + 8)) {
-    {
+          position = 8;
-        if (x < *(valptr + 3))
+        } else {
-        {
+          position = 9;
            if (x < *(valptr + 1))
            {
                if (x < *valptr)
                {
                    II = 0;
                } else
                {
                    II = 1;
                }
            } else if (x < *(valptr + 2))
            {
                II = 2;
            } else
            {
                II = 3;
            }
        } else if (x < *(valptr + 5))
        {
            if (x < *(valptr + 4))
            {
                II = 4;
            } else
            {
                II = 5;
            }
        } else if (x < *(valptr + 6))
        {
            II = 6;
        } else
        {
            II = 7;
        }
    } else if (x < *(valptr + 15))
    {
        if (x < *(valptr + 11))
        {
            if (x < *(valptr + 9))
            {
                if (x < *(valptr + 8))
                {
                    II = 8;
                } else
                {
                    II = 9;
                }
            } else if (x < *(valptr + 10))
            {
                II = 10;
            } else
            {
                II = 11;
            }
        } else if (x < *(valptr + 13))
        {
            if (x < *(valptr + 12))
            {
                II = 12;
            } else
            {
                II = 13;
            }
        } else if (x < *(valptr + 14))
        {
            II = 14;
        } else
        {
            II = 15;
        }
      } else if (feature_value < *(value_ptr + 10)) {
        position = 10;
      } else {
        position = 11;
      }
    } else if (feature_value < *(value_ptr + 13)) {
      if (feature_value < *(value_ptr + 12)) {
        position = 12;
      } else {
        position = 13;
      }
    } else if (feature_value < *(value_ptr + 14)) {
      position = 14;
    } else {
      position = 15;
    }
  }
-    // Put new min value on right position and shift bigger values up
+  // If we have a new small value, put it in the correct position and shift
-    if (II > -1)
+  // larger values up.
-    {
+  if (position > -1) {
-        for (i = 15; i > II; i--)
+    for (i = 15; i > position; i--) {
-        {
+      j = i - 1;
-            k = i - 1;
+      *(value_ptr + i) = *(value_ptr + j);
-            *(valptr + i) = *(valptr + k);
+      *(age_ptr + i) = *(age_ptr + j);
            *(idxptr + i) = *(idxptr + k);
        }
        *(valptr + II) = x;
        *(idxptr + II) = 1;
    }
    *(value_ptr + position) = feature_value;
    *(age_ptr + position) = 1;
  }
-    meanV = 0;
+  // Get |current_median|.
-    if ((inst->frame_counter) > 4)
+  if (self->frame_counter > 2) {
-    {
+    current_median = *(value_ptr + 2);
-        j = 5;
+  } else if (self->frame_counter > 0) {
-    } else
+    current_median = *value_ptr;
-    {
+  }
-        j = inst->frame_counter;
+
  // Smooth the median value.
  if (self->frame_counter > 0) {
    if (current_median < self->mean_value[channel]) {
      alpha = (int16_t) ALPHA1;  // 0.2 in Q15.
    } else {
      alpha = (int16_t) ALPHA2;  // 0.99 in Q15.
    }
  }
  tmp32 = WEBRTC_SPL_MUL_16_16(alpha + 1, self->mean_value[channel]);
  tmp32 += WEBRTC_SPL_MUL_16_16(WEBRTC_SPL_WORD16_MAX - alpha, current_median);
  tmp32 += 16384;
  self->mean_value[channel] = (int16_t) (tmp32 >> 15);
-    if (j > 2)
+  return self->mean_value[channel];
    {
        meanV = *(valptr + 2);
    } else if (j > 0)
    {
        meanV = *valptr;
    } else
    {
        meanV = 1600;
    }
    if (inst->frame_counter > 0)
    {
        if (meanV < inst->mean_value[n])
        {
            alpha = (WebRtc_Word16)ALPHA1; // 0.2 in Q15
        } else
        {
            alpha = (WebRtc_Word16)ALPHA2; // 0.99 in Q15
        }
    } else
    {
        alpha = 0;
    }
    tmp32 = WEBRTC_SPL_MUL_16_16((alpha+1), inst->mean_value[n]);
    tmp32_1 = WEBRTC_SPL_MUL_16_16(WEBRTC_SPL_WORD16_MAX - alpha, meanV);
    tmp32 += tmp32_1;
    tmp32 += 16384;
    inst->mean_value[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 15);
    return inst->mean_value[n];
 }
--- a/src/common_audio/vad/vad_sp.h
+++ b/src/common_audio/vad/vad_sp.h
@@ -9,52 +9,46 @@
 */
-/*
+// This file includes specific signal processing tools used in vad_core.c.
 * This header file includes the VAD internal calls for Downsampling and FindMinimum.
 * Specific function calls are given below.
 */
-#ifndef WEBRTC_VAD_SP_H_
+#ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_SP_H_
-#define WEBRTC_VAD_SP_H_
+#define WEBRTC_COMMON_AUDIO_VAD_VAD_SP_H_
 #include "typedefs.h"
 #include "vad_core.h"
-/****************************************************************************
+// Downsamples the signal by a factor 2, eg. 32->16 or 16->8.
- * WebRtcVad_Downsampling(...)
+//
- *
+// Inputs:
- * Downsamples the signal a factor 2, eg. 32->16 or 16->8
+//      - signal_in     : Input signal.
- *
+//      - in_length     : Length of input signal in samples.
- * Input:
+//
- *      - signal_in     : Input signal
+// Input & Output:
- *      - in_length     : Length of input signal in samples
+//      - filter_state  : Current filter states of the two all-pass filters. The
- *
+//                        |filter_state| is updated after all samples have been
- * Input & Output:
+//                        processed.
- *      - filter_state  : Filter state for first all-pass filters
+//
- *
+// Output:
- * Output:
+//      - signal_out    : Downsampled signal (of length |in_length| / 2).
- *      - signal_out    : Downsampled signal (of length len/2)
+void WebRtcVad_Downsampling(int16_t* signal_in,
- */
+                            int16_t* signal_out,
-void WebRtcVad_Downsampling(WebRtc_Word16* signal_in,
+                            int32_t* filter_state,
                            WebRtc_Word16* signal_out,
                            WebRtc_Word32* filter_state,
                            int in_length);
-/****************************************************************************
+// Updates and returns the smoothed feature minimum. As minimum we use the
- * WebRtcVad_FindMinimum(...)
+// median of the five smallest feature values in a 100 frames long window.
- *
+//
- * Find the five lowest values of x in 100 frames long window. Return a mean
+// Inputs:
- * value of these five values.
+//      - feature_value : New feature value to update with.
- *
+//      - channel       : Channel number.
- * Input:
+//
- *      - feature_value : Feature value
+// Input & Output:
- *      - channel       : Channel number
+//      - handle        : State information of the VAD.
- *
+//
- * Input & Output:
+// Returns:
- *      - inst          : State information
+//                      : Smoothed minimum value for a moving window.
- *
+int16_t WebRtcVad_FindMinimum(VadInstT* handle,
- * Output:
+                              int16_t feature_value,
- *      return value    : Weighted minimum value for a moving window.
+                              int channel);
 */
 WebRtc_Word16 WebRtcVad_FindMinimum(VadInstT* inst, WebRtc_Word16 feature_value, int channel);
-#endif // WEBRTC_VAD_SP_H_
+#endif  // WEBRTC_COMMON_AUDIO_VAD_VAD_SP_H_
--- a/src/common_audio/vad/vad_unittest.cc
+++ b/src/common_audio/vad/vad_unittest.cc
@@ -15,12 +15,12 @@
 #include "typedefs.h"
 #include "webrtc_vad.h"
-#ifdef __cplusplus
+// TODO(bjornv): Move the internal unit tests to separate files.
-extern "C"
+extern "C" {
-{
+#include "vad_core.h"
 #include "vad_gmm.h"
 #include "vad_sp.h"
 }
 #endif
 namespace webrtc {
 namespace {
@@ -28,11 +28,12 @@ const int16_t kModes[] = { 0, 1, 2, 3 };
 const size_t kModesSize = sizeof(kModes) / sizeof(*kModes);
 // Rates we support.
-const int16_t kRates[] = { 8000, 16000, 32000 };
+const int16_t kRates[] = { 8000, 12000, 16000, 24000, 32000 };
 const size_t kRatesSize = sizeof(kRates) / sizeof(*kRates);
 // Frame lengths we support.
 const int16_t kMaxFrameLength = 960;
-const int16_t kFrameLengths[] = { 80, 160, 240, 320, 480, 640, 960 };
+const int16_t kFrameLengths[] = { 80, 120, 160, 240, 320, 480, 640,
    kMaxFrameLength };
 const size_t kFrameLengthsSize = sizeof(kFrameLengths) / sizeof(*kFrameLengths);
 // Returns true if the rate and frame length combination is valid.
@@ -182,6 +183,51 @@ TEST_F(VadTest, GMMTests) {
  EXPECT_EQ(13440, delta);
 }
 TEST_F(VadTest, SPTests) {
  VadInstT* handle = (VadInstT*) malloc(sizeof(VadInstT));
  int16_t zeros[kMaxFrameLength] = { 0 };
  int32_t state[2] = { 0 };
  int16_t data_in[kMaxFrameLength];
  int16_t data_out[kMaxFrameLength];
  const int16_t kReferenceMin[32] = {
      1600, 720, 509, 512, 532, 552, 570, 588,
      606, 624, 642, 659, 675, 691, 707, 723,
      1600, 544, 502, 522, 542, 561, 579, 597,
      615, 633, 651, 667, 683, 699, 715, 731
  };
  // Construct a speech signal that will trigger the VAD in all modes. It is
  // known that (i * i) will wrap around, but that doesn't matter in this case.
  for (int16_t i = 0; i < kMaxFrameLength; ++i) {
    data_in[i] = (i * i);
  }
  // Input values all zeros, expect all zeros out.
  WebRtcVad_Downsampling(zeros, data_out, state, (int) kMaxFrameLength);
  EXPECT_EQ(0, state[0]);
  EXPECT_EQ(0, state[1]);
  for (int16_t i = 0; i < kMaxFrameLength / 2; ++i) {
    EXPECT_EQ(0, data_out[i]);
  }
  // Make a simple non-zero data test.
  WebRtcVad_Downsampling(data_in, data_out, state, (int) kMaxFrameLength);
  EXPECT_EQ(207, state[0]);
  EXPECT_EQ(2270, state[1]);
  ASSERT_EQ(0, WebRtcVad_InitCore(handle, 0));
  for (int16_t i = 0; i < 16; ++i) {
    int16_t value = 500 * (i + 1);
    for (int j = 0; j < NUM_CHANNELS; ++j) {
      // Use values both above and below initialized value.
      EXPECT_EQ(kReferenceMin[i], WebRtcVad_FindMinimum(handle, value, j));
      EXPECT_EQ(kReferenceMin[i + 16], WebRtcVad_FindMinimum(handle, 12000, j));
    }
    handle->frame_counter++;
  }
  free(handle);
 }
 // TODO(bjornv): Add a process test, run on file.
 }  // namespace