Refactoring of vad_sp.[h/c]

- define guard name change
- changed to stdint
- added unit test
- removed shift macros
- style changes
- comments
Review URL: http://webrtc-codereview.appspot.com/336004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@1326 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
bjornv@webrtc.org 2012-01-04 09:15:12 +00:00
parent cc33737a80
commit 226c5a1a95
4 changed files with 238 additions and 250 deletions

View File

@ -28,11 +28,14 @@ typedef struct VadInstT_
WebRtc_Word16 speech_means[NUM_TABLE_VALUES]; WebRtc_Word16 speech_means[NUM_TABLE_VALUES];
WebRtc_Word16 noise_stds[NUM_TABLE_VALUES]; WebRtc_Word16 noise_stds[NUM_TABLE_VALUES];
WebRtc_Word16 speech_stds[NUM_TABLE_VALUES]; WebRtc_Word16 speech_stds[NUM_TABLE_VALUES];
// TODO(bjornv): Change to |frame_count|.
WebRtc_Word32 frame_counter; WebRtc_Word32 frame_counter;
WebRtc_Word16 over_hang; // Over Hang WebRtc_Word16 over_hang; // Over Hang
WebRtc_Word16 num_of_speech; WebRtc_Word16 num_of_speech;
// TODO(bjornv): Change to |age_vector|.
WebRtc_Word16 index_vector[16 * NUM_CHANNELS]; WebRtc_Word16 index_vector[16 * NUM_CHANNELS];
WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS]; WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS];
// TODO(bjornv): Change to |median|.
WebRtc_Word16 mean_value[NUM_CHANNELS]; WebRtc_Word16 mean_value[NUM_CHANNELS];
WebRtc_Word16 upper_state[5]; WebRtc_Word16 upper_state[5];
WebRtc_Word16 lower_state[5]; WebRtc_Word16 lower_state[5];

View File

@ -8,229 +8,174 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
/*
* This file includes the implementation of the VAD internal calls for
* Downsampling and FindMinimum.
* For function call descriptions; See vad_sp.h.
*/
#include "vad_sp.h" #include "vad_sp.h"
#include <assert.h>
#include "signal_processing_library.h" #include "signal_processing_library.h"
#include "typedefs.h" #include "typedefs.h"
#include "vad_defines.h" #include "vad_defines.h"
// Allpass filter coefficients, upper and lower, in Q13 // Allpass filter coefficients, upper and lower, in Q13.
// Upper: 0.64, Lower: 0.17 // Upper: 0.64, Lower: 0.17.
static const WebRtc_Word16 kAllPassCoefsQ13[2] = {5243, 1392}; // Q13 static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 }; // Q13
// Downsampling filter based on the splitting filter and the allpass functions // TODO(bjornv): Move this function to vad_filterbank.c.
// in vad_filterbank.c // Downsampling filter based on splitting filter and allpass functions.
void WebRtcVad_Downsampling(WebRtc_Word16* signal_in, void WebRtcVad_Downsampling(int16_t* signal_in,
WebRtc_Word16* signal_out, int16_t* signal_out,
WebRtc_Word32* filter_state, int32_t* filter_state,
int inlen) int in_length) {
{ int16_t tmp16_1 = 0, tmp16_2 = 0;
WebRtc_Word16 tmp16_1, tmp16_2; int32_t tmp32_1 = filter_state[0];
WebRtc_Word32 tmp32_1, tmp32_2; int32_t tmp32_2 = filter_state[1];
int n, halflen; int n = 0;
int half_length = (in_length >> 1); // Downsampling by 2 gives half length.
// Downsampling by 2 and get two branches // Filter coefficients in Q13, filter state in Q0.
halflen = WEBRTC_SPL_RSHIFT_W16(inlen, 1); for (n = 0; n < half_length; n++) {
// All-pass filtering upper branch.
tmp16_1 = (int16_t) ((tmp32_1 >> 1) +
WEBRTC_SPL_MUL_16_16_RSFT(kAllPassCoefsQ13[0], *signal_in, 14));
*signal_out = tmp16_1;
tmp32_1 = (int32_t) (*signal_in++) -
WEBRTC_SPL_MUL_16_16_RSFT(kAllPassCoefsQ13[0], tmp16_1, 12);
tmp32_1 = filter_state[0]; // All-pass filtering lower branch.
tmp32_2 = filter_state[1]; tmp16_2 = (int16_t) ((tmp32_2 >> 1) +
WEBRTC_SPL_MUL_16_16_RSFT(kAllPassCoefsQ13[1], *signal_in, 14));
// Filter coefficients in Q13, filter state in Q0 *signal_out++ += tmp16_2;
for (n = 0; n < halflen; n++) tmp32_2 = (int32_t) (*signal_in++) -
{ WEBRTC_SPL_MUL_16_16_RSFT(kAllPassCoefsQ13[1], tmp16_2, 12);
// All-pass filtering upper branch }
tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32_1, 1) // Store the filter states.
+ (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[0]), filter_state[0] = tmp32_1;
*signal_in, 14); filter_state[1] = tmp32_2;
*signal_out = tmp16_1;
tmp32_1 = (WebRtc_Word32)(*signal_in++)
- (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[0]), tmp16_1, 12);
// All-pass filtering lower branch
tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32_2, 1)
+ (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[1]),
*signal_in, 14);
*signal_out++ += tmp16_2;
tmp32_2 = (WebRtc_Word32)(*signal_in++)
- (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[1]), tmp16_2, 12);
}
filter_state[0] = tmp32_1;
filter_state[1] = tmp32_2;
} }
WebRtc_Word16 WebRtcVad_FindMinimum(VadInstT* inst, // Inserts |feature_value| into |low_value_vector|, if it is one of the 16
WebRtc_Word16 x, // smallest values the last 100 frames. Then calculates and returns the median
int n) // of the five smallest values.
{ int16_t WebRtcVad_FindMinimum(VadInstT* self,
int i, j, k, II = -1, offset; int16_t feature_value,
WebRtc_Word16 meanV, alpha; int channel) {
WebRtc_Word32 tmp32, tmp32_1; int i = 0, j = 0;
WebRtc_Word16 *valptr, *idxptr, *p1, *p2, *p3; int position = -1;
// Offset to beginning of the 16 minimum values in memory.
int offset = (channel << 4);
int16_t current_median = 1600;
int16_t alpha = 0;
int32_t tmp32 = 0;
// Pointer to memory for the 16 minimum values and the age of each value of
// the |channel|.
int16_t* age_ptr = &self->index_vector[offset];
int16_t* value_ptr = &self->low_value_vector[offset];
int16_t *p1, *p2, *p3;
// Offset to beginning of the 16 minimum values in memory assert(channel < NUM_CHANNELS);
offset = WEBRTC_SPL_LSHIFT_W16(n, 4);
// Pointer to memory for the 16 minimum values and the age of each value // Each value in |low_value_vector| is getting 1 loop older.
idxptr = &inst->index_vector[offset]; // Update age of each value in |age_ptr|, and remove old values.
valptr = &inst->low_value_vector[offset]; for (i = 0; i < 16; i++) {
p3 = age_ptr + i;
if (*p3 != 100) {
*p3 += 1;
} else {
p1 = value_ptr + i + 1;
p2 = p3 + 1;
for (j = i; j < 16; j++) {
*(value_ptr + j) = *p1++;
*(age_ptr + j) = *p2++;
}
*(age_ptr + 15) = 101;
*(value_ptr + 15) = 10000;
}
}
// Each value in low_value_vector is getting 1 loop older. // Check if |feature_value| is smaller than any of the values in
// Update age of each value in indexVal, and remove old values. // |low_value_vector|. If so, find the |position| where to insert the new
for (i = 0; i < 16; i++) // value.
{ if (feature_value < *(value_ptr + 7)) {
p3 = idxptr + i; if (feature_value < *(value_ptr + 3)) {
if (*p3 != 100) if (feature_value < *(value_ptr + 1)) {
{ if (feature_value < *value_ptr) {
*p3 += 1; position = 0;
} else } else {
{ position = 1;
p1 = valptr + i + 1;
p2 = p3 + 1;
for (j = i; j < 16; j++)
{
*(valptr + j) = *p1++;
*(idxptr + j) = *p2++;
}
*(idxptr + 15) = 101;
*(valptr + 15) = 10000;
} }
} else if (feature_value < *(value_ptr + 2)) {
position = 2;
} else {
position = 3;
}
} else if (feature_value < *(value_ptr + 5)) {
if (feature_value < *(value_ptr + 4)) {
position = 4;
} else {
position = 5;
}
} else if (feature_value < *(value_ptr + 6)) {
position = 6;
} else {
position = 7;
} }
} else if (feature_value < *(value_ptr + 15)) {
// Check if x smaller than any of the values in low_value_vector. if (feature_value < *(value_ptr + 11)) {
// If so, find position. if (feature_value < *(value_ptr + 9)) {
if (x < *(valptr + 7)) if (feature_value < *(value_ptr + 8)) {
{ position = 8;
if (x < *(valptr + 3)) } else {
{ position = 9;
if (x < *(valptr + 1))
{
if (x < *valptr)
{
II = 0;
} else
{
II = 1;
}
} else if (x < *(valptr + 2))
{
II = 2;
} else
{
II = 3;
}
} else if (x < *(valptr + 5))
{
if (x < *(valptr + 4))
{
II = 4;
} else
{
II = 5;
}
} else if (x < *(valptr + 6))
{
II = 6;
} else
{
II = 7;
}
} else if (x < *(valptr + 15))
{
if (x < *(valptr + 11))
{
if (x < *(valptr + 9))
{
if (x < *(valptr + 8))
{
II = 8;
} else
{
II = 9;
}
} else if (x < *(valptr + 10))
{
II = 10;
} else
{
II = 11;
}
} else if (x < *(valptr + 13))
{
if (x < *(valptr + 12))
{
II = 12;
} else
{
II = 13;
}
} else if (x < *(valptr + 14))
{
II = 14;
} else
{
II = 15;
} }
} else if (feature_value < *(value_ptr + 10)) {
position = 10;
} else {
position = 11;
}
} else if (feature_value < *(value_ptr + 13)) {
if (feature_value < *(value_ptr + 12)) {
position = 12;
} else {
position = 13;
}
} else if (feature_value < *(value_ptr + 14)) {
position = 14;
} else {
position = 15;
} }
}
// Put new min value on right position and shift bigger values up // If we have a new small value, put it in the correct position and shift
if (II > -1) // larger values up.
{ if (position > -1) {
for (i = 15; i > II; i--) for (i = 15; i > position; i--) {
{ j = i - 1;
k = i - 1; *(value_ptr + i) = *(value_ptr + j);
*(valptr + i) = *(valptr + k); *(age_ptr + i) = *(age_ptr + j);
*(idxptr + i) = *(idxptr + k);
}
*(valptr + II) = x;
*(idxptr + II) = 1;
} }
*(value_ptr + position) = feature_value;
*(age_ptr + position) = 1;
}
meanV = 0; // Get |current_median|.
if ((inst->frame_counter) > 4) if (self->frame_counter > 2) {
{ current_median = *(value_ptr + 2);
j = 5; } else if (self->frame_counter > 0) {
} else current_median = *value_ptr;
{ }
j = inst->frame_counter;
// Smooth the median value.
if (self->frame_counter > 0) {
if (current_median < self->mean_value[channel]) {
alpha = (int16_t) ALPHA1; // 0.2 in Q15.
} else {
alpha = (int16_t) ALPHA2; // 0.99 in Q15.
} }
}
tmp32 = WEBRTC_SPL_MUL_16_16(alpha + 1, self->mean_value[channel]);
tmp32 += WEBRTC_SPL_MUL_16_16(WEBRTC_SPL_WORD16_MAX - alpha, current_median);
tmp32 += 16384;
self->mean_value[channel] = (int16_t) (tmp32 >> 15);
if (j > 2) return self->mean_value[channel];
{
meanV = *(valptr + 2);
} else if (j > 0)
{
meanV = *valptr;
} else
{
meanV = 1600;
}
if (inst->frame_counter > 0)
{
if (meanV < inst->mean_value[n])
{
alpha = (WebRtc_Word16)ALPHA1; // 0.2 in Q15
} else
{
alpha = (WebRtc_Word16)ALPHA2; // 0.99 in Q15
}
} else
{
alpha = 0;
}
tmp32 = WEBRTC_SPL_MUL_16_16((alpha+1), inst->mean_value[n]);
tmp32_1 = WEBRTC_SPL_MUL_16_16(WEBRTC_SPL_WORD16_MAX - alpha, meanV);
tmp32 += tmp32_1;
tmp32 += 16384;
inst->mean_value[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 15);
return inst->mean_value[n];
} }

View File

@ -9,52 +9,46 @@
*/ */
/* // This file includes specific signal processing tools used in vad_core.c.
* This header file includes the VAD internal calls for Downsampling and FindMinimum.
* Specific function calls are given below.
*/
#ifndef WEBRTC_VAD_SP_H_ #ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_SP_H_
#define WEBRTC_VAD_SP_H_ #define WEBRTC_COMMON_AUDIO_VAD_VAD_SP_H_
#include "typedefs.h"
#include "vad_core.h" #include "vad_core.h"
/**************************************************************************** // Downsamples the signal by a factor 2, eg. 32->16 or 16->8.
* WebRtcVad_Downsampling(...) //
* // Inputs:
* Downsamples the signal a factor 2, eg. 32->16 or 16->8 // - signal_in : Input signal.
* // - in_length : Length of input signal in samples.
* Input: //
* - signal_in : Input signal // Input & Output:
* - in_length : Length of input signal in samples // - filter_state : Current filter states of the two all-pass filters. The
* // |filter_state| is updated after all samples have been
* Input & Output: // processed.
* - filter_state : Filter state for first all-pass filters //
* // Output:
* Output: // - signal_out : Downsampled signal (of length |in_length| / 2).
* - signal_out : Downsampled signal (of length len/2) void WebRtcVad_Downsampling(int16_t* signal_in,
*/ int16_t* signal_out,
void WebRtcVad_Downsampling(WebRtc_Word16* signal_in, int32_t* filter_state,
WebRtc_Word16* signal_out,
WebRtc_Word32* filter_state,
int in_length); int in_length);
/**************************************************************************** // Updates and returns the smoothed feature minimum. As minimum we use the
* WebRtcVad_FindMinimum(...) // median of the five smallest feature values in a 100 frames long window.
* //
* Find the five lowest values of x in 100 frames long window. Return a mean // Inputs:
* value of these five values. // - feature_value : New feature value to update with.
* // - channel : Channel number.
* Input: //
* - feature_value : Feature value // Input & Output:
* - channel : Channel number // - handle : State information of the VAD.
* //
* Input & Output: // Returns:
* - inst : State information // : Smoothed minimum value for a moving window.
* int16_t WebRtcVad_FindMinimum(VadInstT* handle,
* Output: int16_t feature_value,
* return value : Weighted minimum value for a moving window. int channel);
*/
WebRtc_Word16 WebRtcVad_FindMinimum(VadInstT* inst, WebRtc_Word16 feature_value, int channel);
#endif // WEBRTC_VAD_SP_H_ #endif // WEBRTC_COMMON_AUDIO_VAD_VAD_SP_H_

View File

@ -15,12 +15,12 @@
#include "typedefs.h" #include "typedefs.h"
#include "webrtc_vad.h" #include "webrtc_vad.h"
#ifdef __cplusplus // TODO(bjornv): Move the internal unit tests to separate files.
extern "C" extern "C" {
{ #include "vad_core.h"
#include "vad_gmm.h" #include "vad_gmm.h"
#include "vad_sp.h"
} }
#endif
namespace webrtc { namespace webrtc {
namespace { namespace {
@ -28,11 +28,12 @@ const int16_t kModes[] = { 0, 1, 2, 3 };
const size_t kModesSize = sizeof(kModes) / sizeof(*kModes); const size_t kModesSize = sizeof(kModes) / sizeof(*kModes);
// Rates we support. // Rates we support.
const int16_t kRates[] = { 8000, 16000, 32000 }; const int16_t kRates[] = { 8000, 12000, 16000, 24000, 32000 };
const size_t kRatesSize = sizeof(kRates) / sizeof(*kRates); const size_t kRatesSize = sizeof(kRates) / sizeof(*kRates);
// Frame lengths we support. // Frame lengths we support.
const int16_t kMaxFrameLength = 960; const int16_t kMaxFrameLength = 960;
const int16_t kFrameLengths[] = { 80, 160, 240, 320, 480, 640, 960 }; const int16_t kFrameLengths[] = { 80, 120, 160, 240, 320, 480, 640,
kMaxFrameLength };
const size_t kFrameLengthsSize = sizeof(kFrameLengths) / sizeof(*kFrameLengths); const size_t kFrameLengthsSize = sizeof(kFrameLengths) / sizeof(*kFrameLengths);
// Returns true if the rate and frame length combination is valid. // Returns true if the rate and frame length combination is valid.
@ -182,6 +183,51 @@ TEST_F(VadTest, GMMTests) {
EXPECT_EQ(13440, delta); EXPECT_EQ(13440, delta);
} }
TEST_F(VadTest, SPTests) {
VadInstT* handle = (VadInstT*) malloc(sizeof(VadInstT));
int16_t zeros[kMaxFrameLength] = { 0 };
int32_t state[2] = { 0 };
int16_t data_in[kMaxFrameLength];
int16_t data_out[kMaxFrameLength];
const int16_t kReferenceMin[32] = {
1600, 720, 509, 512, 532, 552, 570, 588,
606, 624, 642, 659, 675, 691, 707, 723,
1600, 544, 502, 522, 542, 561, 579, 597,
615, 633, 651, 667, 683, 699, 715, 731
};
// Construct a speech signal that will trigger the VAD in all modes. It is
// known that (i * i) will wrap around, but that doesn't matter in this case.
for (int16_t i = 0; i < kMaxFrameLength; ++i) {
data_in[i] = (i * i);
}
// Input values all zeros, expect all zeros out.
WebRtcVad_Downsampling(zeros, data_out, state, (int) kMaxFrameLength);
EXPECT_EQ(0, state[0]);
EXPECT_EQ(0, state[1]);
for (int16_t i = 0; i < kMaxFrameLength / 2; ++i) {
EXPECT_EQ(0, data_out[i]);
}
// Make a simple non-zero data test.
WebRtcVad_Downsampling(data_in, data_out, state, (int) kMaxFrameLength);
EXPECT_EQ(207, state[0]);
EXPECT_EQ(2270, state[1]);
ASSERT_EQ(0, WebRtcVad_InitCore(handle, 0));
for (int16_t i = 0; i < 16; ++i) {
int16_t value = 500 * (i + 1);
for (int j = 0; j < NUM_CHANNELS; ++j) {
// Use values both above and below initialized value.
EXPECT_EQ(kReferenceMin[i], WebRtcVad_FindMinimum(handle, value, j));
EXPECT_EQ(kReferenceMin[i + 16], WebRtcVad_FindMinimum(handle, 12000, j));
}
handle->frame_counter++;
}
free(handle);
}
// TODO(bjornv): Add a process test, run on file. // TODO(bjornv): Add a process test, run on file.
} // namespace } // namespace