Refactor vad_filterbank: Local functions made static.

Review URL: http://webrtc-codereview.appspot.com/342002 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1357 4adac7df-926f-26a2-2b94-8c16560cd09d
2012-01-09 13:11:29 +00:00
parent d8d85711c7
commit d9c87b2146
2 changed files with 155 additions and 228 deletions
--- a/src/common_audio/vad/vad_filterbank.c
+++ b/src/common_audio/vad/vad_filterbank.c
@@ -19,26 +19,32 @@
 #include "typedefs.h"
 #include "vad_defines.h"

-// Constant 160*log10(2) in Q9
+// Constant 160*log10(2) in Q9.
 static const int16_t kLogConst = 24660;

-// Coefficients used by WebRtcVad_HpOutput, Q14
+// Coefficients used by HighPassFilter, Q14.
 static const int16_t kHpZeroCoefs[3] = { 6631, -13262, 6631 };
 static const int16_t kHpPoleCoefs[3] = { 16384, -7756, 5620 };

-// Allpass filter coefficients, upper and lower, in Q15
+// Allpass filter coefficients, upper and lower, in Q15.
 // Upper: 0.64, Lower: 0.17
 static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 };

-// Adjustment for division with two in WebRtcVad_SplitFilter
+// Adjustment for division with two in SplitFilter.
 static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 };

-void WebRtcVad_HpOutput(int16_t* in_vector,
-                        int in_vector_length,
-                        int16_t* filter_state,
-                        int16_t* out_vector) {
+// High pass filtering, with a cut-off frequency at 80 Hz, if the |in_vector| is
+// sampled at 500 Hz.
+//
+// - in_vector        [i]   : Input audio data sampled at 500 Hz.
+// - in_vector_length [i]   : Length of input and output data.
+// - filter_state     [i/o] : State of the filter.
+// - out_vector       [o]   : Output audio data in the frequency interval
+//                            80 - 250 Hz.
+static void HighPassFilter(const int16_t* in_vector, int in_vector_length,
+                           int16_t* filter_state, int16_t* out_vector) {
  int i;
-  int16_t* in_ptr = in_vector;
+  const int16_t* in_ptr = in_vector;
  int16_t* out_ptr = out_vector;
  int32_t tmp32 = 0;

@@ -70,11 +76,19 @@ void WebRtcVad_HpOutput(int16_t* in_vector,
  }
 }

-void WebRtcVad_Allpass(int16_t* in_vector,
-                       int16_t filter_coefficients,
-                       int vector_length,
-                       int16_t* filter_state,
-                       int16_t* out_vector) {
+// All pass filtering of |in_vector|, used before splitting the signal into two
+// frequency bands (low pass vs high pass).
+// Note that |in_vector| and |out_vector| can NOT correspond to the same
+// address.
+//
+// - in_vector          [i]   : Input audio signal given in Q0.
+// - vector_length      [i]   : Length of input and output data.
+// - filter_coefficient [i]   : Given in Q15.
+// - filter_state       [i/o] : State of the filter given in Q(-1).
+// - out_vector         [o]   : Output audio signal given in Q(-1).
+static void AllPassFilter(const int16_t* in_vector, int vector_length,
+                          int16_t filter_coefficient, int16_t* filter_state,
+                          int16_t* out_vector) {
  // The filter can only cause overflow (in the w16 output variable)
  // if more than 4 consecutive input numbers are of maximum value and
  // has the the same sign as the impulse responses first taps.
@@ -87,11 +101,11 @@ void WebRtcVad_Allpass(int16_t* in_vector,
  int32_t state32 = WEBRTC_SPL_LSHIFT_W32((int32_t) (*filter_state), 16); // Q31

  for (i = 0; i < vector_length; i++) {
-    tmp32 = state32 + WEBRTC_SPL_MUL_16_16(filter_coefficients, (*in_vector));
+    tmp32 = state32 + WEBRTC_SPL_MUL_16_16(filter_coefficient, (*in_vector));
    tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32, 16);
    *out_vector++ = tmp16;
    in32 = WEBRTC_SPL_LSHIFT_W32(((int32_t) (*in_vector)), 14);
-    state32 = in32 - WEBRTC_SPL_MUL_16_16(filter_coefficients, tmp16);
+    state32 = in32 - WEBRTC_SPL_MUL_16_16(filter_coefficient, tmp16);
    state32 = WEBRTC_SPL_LSHIFT_W32(state32, 1);
    in_vector += 2;
  }
@@ -99,23 +113,32 @@ void WebRtcVad_Allpass(int16_t* in_vector,
  *filter_state = (int16_t) WEBRTC_SPL_RSHIFT_W32(state32, 16);
 }

-void WebRtcVad_SplitFilter(int16_t* in_vector,
-                           int in_vector_length,
-                           int16_t* upper_state,
-                           int16_t* lower_state,
-                           int16_t* out_vector_hp,
-                           int16_t* out_vector_lp) {
+// Splits |in_vector| into |out_vector_hp| and |out_vector_lp| corresponding to
+// an upper (high pass) part and a lower (low pass) part respectively.
+//
+// - in_vector        [i]   : Input audio data to be split into two frequency
+//                            bands.
+// - in_vector_length [i]   : Length of |in_vector|.
+// - upper_state      [i/o] : State of the upper filter, given in Q(-1).
+// - lower_state      [i/o] : State of the lower filter, given in Q(-1).
+// - out_vector_hp    [o]   : Output audio data of the upper half of the
+//                            spectrum. The length is |in_vector_length| / 2.
+// - out_vector_lp    [o]   : Output audio data of the lower half of the
+//                            spectrum. The length is |in_vector_length| / 2.
+static void SplitFilter(const int16_t* in_vector, int in_vector_length,
+                        int16_t* upper_state, int16_t* lower_state,
+                        int16_t* out_vector_hp, int16_t* out_vector_lp) {
  int16_t tmp_out;
  int i;
  int half_length = WEBRTC_SPL_RSHIFT_W16(in_vector_length, 1);

  // All-pass filtering upper branch
-  WebRtcVad_Allpass(&in_vector[0], kAllPassCoefsQ15[0], half_length,
-                    upper_state, out_vector_hp);
+  AllPassFilter(&in_vector[0], half_length, kAllPassCoefsQ15[0], upper_state,
+                out_vector_hp);

  // All-pass filtering lower branch
-  WebRtcVad_Allpass(&in_vector[1], kAllPassCoefsQ15[1], half_length,
-                    lower_state, out_vector_lp);
+  AllPassFilter(&in_vector[1], half_length, kAllPassCoefsQ15[1], lower_state,
+                out_vector_lp);

  // Make LP and HP signals
  for (i = 0; i < half_length; i++) {
@@ -125,113 +148,24 @@ void WebRtcVad_SplitFilter(int16_t* in_vector,
  }
 }

-int16_t WebRtcVad_get_features(VadInstT* inst,
-                               int16_t* in_vector,
-                               int frame_size,
-                               int16_t* out_vector) {
-  int16_t power = 0;
-  // We expect |frame_size| to be 80, 160 or 240 samples, which corresponds to
-  // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will
-  // have at most 120 samples after the first split and at most 60 samples after
-  // the second split.
-  int16_t hp_120[120], lp_120[120];
-  int16_t hp_60[60], lp_60[60];
-  // Initialize variables for the first SplitFilter().
-  int length = frame_size;
-  int frequency_band = 0;
-  int16_t* in_ptr = in_vector;
-  int16_t* hp_out_ptr = hp_120;
-  int16_t* lp_out_ptr = lp_120;
-
-  // Split at 2000 Hz and downsample
-  WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
-                        &inst->lower_state[frequency_band], hp_out_ptr,
-                        lp_out_ptr);
-
-  // Split at 3000 Hz and downsample
-  frequency_band = 1;
-  in_ptr = hp_120;
-  hp_out_ptr = hp_60;
-  lp_out_ptr = lp_60;
-  length = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
-
-  WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
-                        &inst->lower_state[frequency_band], hp_out_ptr,
-                        lp_out_ptr);
-
-  // Energy in 3000 Hz - 4000 Hz
-  length = WEBRTC_SPL_RSHIFT_W16(length, 1);
-  WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[5], &power,
-                        &out_vector[5]);
-
-  // Energy in 2000 Hz - 3000 Hz
-  WebRtcVad_LogOfEnergy(lp_60, length, kOffsetVector[4], &power,
-                        &out_vector[4]);
-
-  // Split at 1000 Hz and downsample
-  frequency_band = 2;
-  in_ptr = lp_120;
-  hp_out_ptr = hp_60;
-  lp_out_ptr = lp_60;
-  length = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
-  WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
-                        &inst->lower_state[frequency_band], hp_out_ptr,
-                        lp_out_ptr);
-
-  // Energy in 1000 Hz - 2000 Hz
-  length = WEBRTC_SPL_RSHIFT_W16(length, 1);
-  WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[3], &power,
-                        &out_vector[3]);
-
-  // Split at 500 Hz
-  frequency_band = 3;
-  in_ptr = lp_60;
-  hp_out_ptr = hp_120;
-  lp_out_ptr = lp_120;
-
-  WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
-                        &inst->lower_state[frequency_band], hp_out_ptr,
-                        lp_out_ptr);
-
-  // Energy in 500 Hz - 1000 Hz
-  length = WEBRTC_SPL_RSHIFT_W16(length, 1);
-  WebRtcVad_LogOfEnergy(hp_120, length, kOffsetVector[2], &power,
-                        &out_vector[2]);
-
-  // Split at 250 Hz
-  frequency_band = 4;
-  in_ptr = lp_120;
-  hp_out_ptr = hp_60;
-  lp_out_ptr = lp_60;
-
-  WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
-                        &inst->lower_state[frequency_band], hp_out_ptr,
-                        lp_out_ptr);
-
-  // Energy in 250 Hz - 500 Hz
-  length = WEBRTC_SPL_RSHIFT_W16(length, 1);
-  WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[1], &power,
-                        &out_vector[1]);
-
-  // Remove DC and LFs
-  WebRtcVad_HpOutput(lp_60, length, inst->hp_filter_state, hp_120);
-
-  // Power in 80 Hz - 250 Hz
-  WebRtcVad_LogOfEnergy(hp_120, length, kOffsetVector[0], &power,
-                        &out_vector[0]);
-
-  return power;
-}
-
-void WebRtcVad_LogOfEnergy(int16_t* vector,
-                           int vector_length,
-                           int16_t offset,
-                           int16_t* power,
-                           int16_t* log_energy) {
+// Calculates the energy in dB of |in_vector|, and also updates an overall
+// |power| if necessary.
+//
+// - in_vector      [i]   : Input audio data for energy calculation.
+// - vector_length  [i]   : Length of input data.
+// - offset         [i]   : Offset value added to |log_energy|.
+// - power          [i/o] : Signal power updated with the energy from
+//                          |in_vector|.
+//                          NOTE: |power| is only updated if
+//                          |power| < MIN_ENERGY.
+// - log_energy     [o]   : 10 * log10("energy of |in_vector|") given in Q4.
+static void LogOfEnergy(const int16_t* in_vector, int vector_length,
+                        int16_t offset, int16_t* power, int16_t* log_energy) {
  int shfts = 0, shfts2 = 0;
  int16_t energy_s16 = 0;
  int16_t zeros = 0, frac = 0, log2 = 0;
-  int32_t energy = WebRtcSpl_Energy(vector, vector_length, &shfts);
+  int32_t energy = WebRtcSpl_Energy((int16_t*) in_vector, vector_length,
+                                    &shfts);

  if (energy > 0) {

@@ -276,3 +210,88 @@ void WebRtcVad_LogOfEnergy(int16_t* vector,
    }
  }
 }
+
+int16_t WebRtcVad_get_features(VadInstT* inst, const int16_t* in_vector,
+                               int frame_size, int16_t* out_vector) {
+  int16_t power = 0;
+  // We expect |frame_size| to be 80, 160 or 240 samples, which corresponds to
+  // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will
+  // have at most 120 samples after the first split and at most 60 samples after
+  // the second split.
+  int16_t hp_120[120], lp_120[120];
+  int16_t hp_60[60], lp_60[60];
+  // Initialize variables for the first SplitFilter().
+  int length = frame_size;
+  int frequency_band = 0;
+  const int16_t* in_ptr = in_vector;
+  int16_t* hp_out_ptr = hp_120;
+  int16_t* lp_out_ptr = lp_120;
+
+  // Split at 2000 Hz and downsample
+  SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
+              &inst->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
+
+  // Split at 3000 Hz and downsample
+  frequency_band = 1;
+  in_ptr = hp_120;
+  hp_out_ptr = hp_60;
+  lp_out_ptr = lp_60;
+  length = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
+
+  SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
+              &inst->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
+
+  // Energy in 3000 Hz - 4000 Hz
+  length = WEBRTC_SPL_RSHIFT_W16(length, 1);
+  LogOfEnergy(hp_60, length, kOffsetVector[5], &power, &out_vector[5]);
+
+  // Energy in 2000 Hz - 3000 Hz
+  LogOfEnergy(lp_60, length, kOffsetVector[4], &power, &out_vector[4]);
+
+  // Split at 1000 Hz and downsample
+  frequency_band = 2;
+  in_ptr = lp_120;
+  hp_out_ptr = hp_60;
+  lp_out_ptr = lp_60;
+  length = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
+  SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
+              &inst->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
+
+  // Energy in 1000 Hz - 2000 Hz
+  length = WEBRTC_SPL_RSHIFT_W16(length, 1);
+  LogOfEnergy(hp_60, length, kOffsetVector[3], &power, &out_vector[3]);
+
+  // Split at 500 Hz
+  frequency_band = 3;
+  in_ptr = lp_60;
+  hp_out_ptr = hp_120;
+  lp_out_ptr = lp_120;
+
+  SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
+              &inst->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
+
+  // Energy in 500 Hz - 1000 Hz
+  length = WEBRTC_SPL_RSHIFT_W16(length, 1);
+  LogOfEnergy(hp_120, length, kOffsetVector[2], &power, &out_vector[2]);
+
+  // Split at 250 Hz
+  frequency_band = 4;
+  in_ptr = lp_120;
+  hp_out_ptr = hp_60;
+  lp_out_ptr = lp_60;
+
+  SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
+              &inst->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
+
+  // Energy in 250 Hz - 500 Hz
+  length = WEBRTC_SPL_RSHIFT_W16(length, 1);
+  LogOfEnergy(hp_60, length, kOffsetVector[1], &power, &out_vector[1]);
+
+  // Remove DC and LFs
+  HighPassFilter(lp_60, length, inst->hp_filter_state, hp_120);
+
+  // Power in 80 Hz - 250 Hz
+  LogOfEnergy(hp_120, length, kOffsetVector[0], &power, &out_vector[0]);
+
+  return power;
+}
--- a/src/common_audio/vad/vad_filterbank.h
+++ b/src/common_audio/vad/vad_filterbank.h
@@ -19,89 +19,19 @@
 #include "typedefs.h"
 #include "vad_core.h"

-// TODO(bjornv): Move local functions to vad_filterbank.c and make static.
-/****************************************************************************
- * WebRtcVad_HpOutput(...)
- *
- * This function removes DC from the lowest frequency band
- *
- * Input:
- *      - in_vector         : Samples in the frequency interval 0 - 250 Hz
- *      - in_vector_length  : Length of input and output vector
- *      - filter_state      : Current state of the filter
- *
- * Output:
- *      - out_vector        : Samples in the frequency interval 80 - 250 Hz
- *      - filter_state      : Updated state of the filter
- *
- */
-void WebRtcVad_HpOutput(int16_t* in_vector,
-                        int in_vector_length,
-                        int16_t* filter_state,
-                        int16_t* out_vector);
-
-/****************************************************************************
- * WebRtcVad_Allpass(...)
- *
- * This function is used when before splitting a speech file into 
- * different frequency bands
- *
- * Note! Do NOT let the arrays in_vector and out_vector correspond to the same address.
- *
- * Input:
- *      - in_vector             : (Q0)
- *      - filter_coefficients   : (Q15)
- *      - vector_length         : Length of input and output vector
- *      - filter_state          : Current state of the filter (Q(-1))
- *
- * Output:
- *      - out_vector            : Output speech signal (Q(-1))
- *      - filter_state          : Updated state of the filter (Q(-1))
- *
- */
-void WebRtcVad_Allpass(int16_t* in_vector,
-                       int16_t filter_coefficients,
-                       int vector_length,
-                       int16_t* filter_state,
-                       int16_t* outw16);
-
-/****************************************************************************
- * WebRtcVad_SplitFilter(...)
- *
- * This function is used when before splitting a speech file into 
- * different frequency bands
- *
- * Input:
- *      - in_vector         : Input signal to be split into two frequency bands.
- *      - upper_state       : Current state of the upper filter
- *      - lower_state       : Current state of the lower filter
- *      - in_vector_length  : Length of input vector
- *
- * Output:
- *      - out_vector_hp     : Upper half of the spectrum
- *      - out_vector_lp     : Lower half of the spectrum
- *      - upper_state       : Updated state of the upper filter
- *      - lower_state       : Updated state of the lower filter
- *
- */
-void WebRtcVad_SplitFilter(int16_t* in_vector,
-                           int in_vector_length,
-                           int16_t* upper_state,
-                           int16_t* lower_state,
-                           int16_t* out_vector_hp,
-                           int16_t* out_vector_lp);
-
+// TODO(bjornv): Rename to CalcFeatures() or similar. Update at the same time
+// comments and parameter order.
 /****************************************************************************
 * WebRtcVad_get_features(...)
 *
- * This function is used to get the logarithm of the power of each of the 
+ * This function is used to get the logarithm of the power of each of the
 * 6 frequency bands used by the VAD:
 *        80 Hz - 250 Hz
 *        250 Hz - 500 Hz
 *        500 Hz - 1000 Hz
 *        1000 Hz - 2000 Hz
 *        2000 Hz - 3000 Hz
- *        3000 Hz - 4000 Hz 
+ *        3000 Hz - 4000 Hz
 *
 * Input:
 *      - inst        : Pointer to VAD instance
@@ -110,35 +40,13 @@ void WebRtcVad_SplitFilter(int16_t* in_vector,
 *
 * Output:
 *      - out_vector  : 10*log10(power in each freq. band), Q4
- *    
+ *
 * Return: total power in the signal (NOTE! This value is not exact since it
 *         is only used in a comparison.
 */
 int16_t WebRtcVad_get_features(VadInstT* inst,
-                               int16_t* in_vector,
+                               const int16_t* in_vector,
                               int frame_size,
                               int16_t* out_vector);

-/****************************************************************************
- * WebRtcVad_LogOfEnergy(...)
- *
- * This function is used to get the logarithm of the power of one frequency band.
- *
- * Input:
- *      - vector            : Input speech samples for one frequency band
- *      - offset            : Offset value for the current frequency band
- *      - vector_length     : Length of input vector
- *
- * Output:
- *      - log_energy        : 10*log10(energy);
- *      - power             : Update total power in speech frame. NOTE! This value
- *                            is not exact since it is only used in a comparison.
- *     
- */
-void WebRtcVad_LogOfEnergy(int16_t* vector,
-                           int vector_length,
-                           int16_t offset,
-                           int16_t* power,
-                           int16_t* log_energy);
-
 #endif  // WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_