Refactoring of vad_sp.[h/c]

- define guard name change
- changed to stdint
- added unit test
- removed shift macros
- style changes
- comments
Review URL: http://webrtc-codereview.appspot.com/336004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@1326 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
bjornv@webrtc.org 2012-01-04 09:15:12 +00:00
parent cc33737a80
commit 226c5a1a95
4 changed files with 238 additions and 250 deletions

View File

@ -28,11 +28,14 @@ typedef struct VadInstT_
WebRtc_Word16 speech_means[NUM_TABLE_VALUES];
WebRtc_Word16 noise_stds[NUM_TABLE_VALUES];
WebRtc_Word16 speech_stds[NUM_TABLE_VALUES];
// TODO(bjornv): Change to |frame_count|.
WebRtc_Word32 frame_counter;
WebRtc_Word16 over_hang; // Over Hang
WebRtc_Word16 num_of_speech;
// TODO(bjornv): Change to |age_vector|.
WebRtc_Word16 index_vector[16 * NUM_CHANNELS];
WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS];
// TODO(bjornv): Change to |median|.
WebRtc_Word16 mean_value[NUM_CHANNELS];
WebRtc_Word16 upper_state[5];
WebRtc_Word16 lower_state[5];

View File

@ -8,229 +8,174 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file includes the implementation of the VAD internal calls for
* Downsampling and FindMinimum.
* For function call descriptions; See vad_sp.h.
*/
#include "vad_sp.h"
#include <assert.h>
#include "signal_processing_library.h"
#include "typedefs.h"
#include "vad_defines.h"
// Allpass filter coefficients, upper and lower, in Q13
// Upper: 0.64, Lower: 0.17
static const WebRtc_Word16 kAllPassCoefsQ13[2] = {5243, 1392}; // Q13
// Allpass filter coefficients, upper and lower, in Q13.
// Upper: 0.64, Lower: 0.17.
static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 }; // Q13
// Downsampling filter based on the splitting filter and the allpass functions
// in vad_filterbank.c
void WebRtcVad_Downsampling(WebRtc_Word16* signal_in,
WebRtc_Word16* signal_out,
WebRtc_Word32* filter_state,
int inlen)
{
WebRtc_Word16 tmp16_1, tmp16_2;
WebRtc_Word32 tmp32_1, tmp32_2;
int n, halflen;
// TODO(bjornv): Move this function to vad_filterbank.c.
// Downsampling filter based on splitting filter and allpass functions.
void WebRtcVad_Downsampling(int16_t* signal_in,
int16_t* signal_out,
int32_t* filter_state,
int in_length) {
int16_t tmp16_1 = 0, tmp16_2 = 0;
int32_t tmp32_1 = filter_state[0];
int32_t tmp32_2 = filter_state[1];
int n = 0;
int half_length = (in_length >> 1); // Downsampling by 2 gives half length.
// Downsampling by 2 and get two branches
halflen = WEBRTC_SPL_RSHIFT_W16(inlen, 1);
// Filter coefficients in Q13, filter state in Q0.
for (n = 0; n < half_length; n++) {
// All-pass filtering upper branch.
tmp16_1 = (int16_t) ((tmp32_1 >> 1) +
WEBRTC_SPL_MUL_16_16_RSFT(kAllPassCoefsQ13[0], *signal_in, 14));
*signal_out = tmp16_1;
tmp32_1 = (int32_t) (*signal_in++) -
WEBRTC_SPL_MUL_16_16_RSFT(kAllPassCoefsQ13[0], tmp16_1, 12);
tmp32_1 = filter_state[0];
tmp32_2 = filter_state[1];
// Filter coefficients in Q13, filter state in Q0
for (n = 0; n < halflen; n++)
{
// All-pass filtering upper branch
tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32_1, 1)
+ (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[0]),
*signal_in, 14);
*signal_out = tmp16_1;
tmp32_1 = (WebRtc_Word32)(*signal_in++)
- (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[0]), tmp16_1, 12);
// All-pass filtering lower branch
tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32_2, 1)
+ (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[1]),
*signal_in, 14);
*signal_out++ += tmp16_2;
tmp32_2 = (WebRtc_Word32)(*signal_in++)
- (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT((kAllPassCoefsQ13[1]), tmp16_2, 12);
}
filter_state[0] = tmp32_1;
filter_state[1] = tmp32_2;
// All-pass filtering lower branch.
tmp16_2 = (int16_t) ((tmp32_2 >> 1) +
WEBRTC_SPL_MUL_16_16_RSFT(kAllPassCoefsQ13[1], *signal_in, 14));
*signal_out++ += tmp16_2;
tmp32_2 = (int32_t) (*signal_in++) -
WEBRTC_SPL_MUL_16_16_RSFT(kAllPassCoefsQ13[1], tmp16_2, 12);
}
// Store the filter states.
filter_state[0] = tmp32_1;
filter_state[1] = tmp32_2;
}
WebRtc_Word16 WebRtcVad_FindMinimum(VadInstT* inst,
WebRtc_Word16 x,
int n)
{
int i, j, k, II = -1, offset;
WebRtc_Word16 meanV, alpha;
WebRtc_Word32 tmp32, tmp32_1;
WebRtc_Word16 *valptr, *idxptr, *p1, *p2, *p3;
// Inserts |feature_value| into |low_value_vector|, if it is one of the 16
// smallest values the last 100 frames. Then calculates and returns the median
// of the five smallest values.
int16_t WebRtcVad_FindMinimum(VadInstT* self,
int16_t feature_value,
int channel) {
int i = 0, j = 0;
int position = -1;
// Offset to beginning of the 16 minimum values in memory.
int offset = (channel << 4);
int16_t current_median = 1600;
int16_t alpha = 0;
int32_t tmp32 = 0;
// Pointer to memory for the 16 minimum values and the age of each value of
// the |channel|.
int16_t* age_ptr = &self->index_vector[offset];
int16_t* value_ptr = &self->low_value_vector[offset];
int16_t *p1, *p2, *p3;
// Offset to beginning of the 16 minimum values in memory
offset = WEBRTC_SPL_LSHIFT_W16(n, 4);
assert(channel < NUM_CHANNELS);
// Pointer to memory for the 16 minimum values and the age of each value
idxptr = &inst->index_vector[offset];
valptr = &inst->low_value_vector[offset];
// Each value in |low_value_vector| is getting 1 loop older.
// Update age of each value in |age_ptr|, and remove old values.
for (i = 0; i < 16; i++) {
p3 = age_ptr + i;
if (*p3 != 100) {
*p3 += 1;
} else {
p1 = value_ptr + i + 1;
p2 = p3 + 1;
for (j = i; j < 16; j++) {
*(value_ptr + j) = *p1++;
*(age_ptr + j) = *p2++;
}
*(age_ptr + 15) = 101;
*(value_ptr + 15) = 10000;
}
}
// Each value in low_value_vector is getting 1 loop older.
// Update age of each value in indexVal, and remove old values.
for (i = 0; i < 16; i++)
{
p3 = idxptr + i;
if (*p3 != 100)
{
*p3 += 1;
} else
{
p1 = valptr + i + 1;
p2 = p3 + 1;
for (j = i; j < 16; j++)
{
*(valptr + j) = *p1++;
*(idxptr + j) = *p2++;
}
*(idxptr + 15) = 101;
*(valptr + 15) = 10000;
// Check if |feature_value| is smaller than any of the values in
// |low_value_vector|. If so, find the |position| where to insert the new
// value.
if (feature_value < *(value_ptr + 7)) {
if (feature_value < *(value_ptr + 3)) {
if (feature_value < *(value_ptr + 1)) {
if (feature_value < *value_ptr) {
position = 0;
} else {
position = 1;
}
} else if (feature_value < *(value_ptr + 2)) {
position = 2;
} else {
position = 3;
}
} else if (feature_value < *(value_ptr + 5)) {
if (feature_value < *(value_ptr + 4)) {
position = 4;
} else {
position = 5;
}
} else if (feature_value < *(value_ptr + 6)) {
position = 6;
} else {
position = 7;
}
// Check if x smaller than any of the values in low_value_vector.
// If so, find position.
if (x < *(valptr + 7))
{
if (x < *(valptr + 3))
{
if (x < *(valptr + 1))
{
if (x < *valptr)
{
II = 0;
} else
{
II = 1;
}
} else if (x < *(valptr + 2))
{
II = 2;
} else
{
II = 3;
}
} else if (x < *(valptr + 5))
{
if (x < *(valptr + 4))
{
II = 4;
} else
{
II = 5;
}
} else if (x < *(valptr + 6))
{
II = 6;
} else
{
II = 7;
}
} else if (x < *(valptr + 15))
{
if (x < *(valptr + 11))
{
if (x < *(valptr + 9))
{
if (x < *(valptr + 8))
{
II = 8;
} else
{
II = 9;
}
} else if (x < *(valptr + 10))
{
II = 10;
} else
{
II = 11;
}
} else if (x < *(valptr + 13))
{
if (x < *(valptr + 12))
{
II = 12;
} else
{
II = 13;
}
} else if (x < *(valptr + 14))
{
II = 14;
} else
{
II = 15;
} else if (feature_value < *(value_ptr + 15)) {
if (feature_value < *(value_ptr + 11)) {
if (feature_value < *(value_ptr + 9)) {
if (feature_value < *(value_ptr + 8)) {
position = 8;
} else {
position = 9;
}
} else if (feature_value < *(value_ptr + 10)) {
position = 10;
} else {
position = 11;
}
} else if (feature_value < *(value_ptr + 13)) {
if (feature_value < *(value_ptr + 12)) {
position = 12;
} else {
position = 13;
}
} else if (feature_value < *(value_ptr + 14)) {
position = 14;
} else {
position = 15;
}
}
// Put new min value on right position and shift bigger values up
if (II > -1)
{
for (i = 15; i > II; i--)
{
k = i - 1;
*(valptr + i) = *(valptr + k);
*(idxptr + i) = *(idxptr + k);
}
*(valptr + II) = x;
*(idxptr + II) = 1;
// If we have a new small value, put it in the correct position and shift
// larger values up.
if (position > -1) {
for (i = 15; i > position; i--) {
j = i - 1;
*(value_ptr + i) = *(value_ptr + j);
*(age_ptr + i) = *(age_ptr + j);
}
*(value_ptr + position) = feature_value;
*(age_ptr + position) = 1;
}
meanV = 0;
if ((inst->frame_counter) > 4)
{
j = 5;
} else
{
j = inst->frame_counter;
// Get |current_median|.
if (self->frame_counter > 2) {
current_median = *(value_ptr + 2);
} else if (self->frame_counter > 0) {
current_median = *value_ptr;
}
// Smooth the median value.
if (self->frame_counter > 0) {
if (current_median < self->mean_value[channel]) {
alpha = (int16_t) ALPHA1; // 0.2 in Q15.
} else {
alpha = (int16_t) ALPHA2; // 0.99 in Q15.
}
}
tmp32 = WEBRTC_SPL_MUL_16_16(alpha + 1, self->mean_value[channel]);
tmp32 += WEBRTC_SPL_MUL_16_16(WEBRTC_SPL_WORD16_MAX - alpha, current_median);
tmp32 += 16384;
self->mean_value[channel] = (int16_t) (tmp32 >> 15);
if (j > 2)
{
meanV = *(valptr + 2);
} else if (j > 0)
{
meanV = *valptr;
} else
{
meanV = 1600;
}
if (inst->frame_counter > 0)
{
if (meanV < inst->mean_value[n])
{
alpha = (WebRtc_Word16)ALPHA1; // 0.2 in Q15
} else
{
alpha = (WebRtc_Word16)ALPHA2; // 0.99 in Q15
}
} else
{
alpha = 0;
}
tmp32 = WEBRTC_SPL_MUL_16_16((alpha+1), inst->mean_value[n]);
tmp32_1 = WEBRTC_SPL_MUL_16_16(WEBRTC_SPL_WORD16_MAX - alpha, meanV);
tmp32 += tmp32_1;
tmp32 += 16384;
inst->mean_value[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 15);
return inst->mean_value[n];
return self->mean_value[channel];
}

View File

@ -9,52 +9,46 @@
*/
/*
* This header file includes the VAD internal calls for Downsampling and FindMinimum.
* Specific function calls are given below.
*/
// This file includes specific signal processing tools used in vad_core.c.
#ifndef WEBRTC_VAD_SP_H_
#define WEBRTC_VAD_SP_H_
#ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_SP_H_
#define WEBRTC_COMMON_AUDIO_VAD_VAD_SP_H_
#include "typedefs.h"
#include "vad_core.h"
/****************************************************************************
* WebRtcVad_Downsampling(...)
*
* Downsamples the signal a factor 2, eg. 32->16 or 16->8
*
* Input:
* - signal_in : Input signal
* - in_length : Length of input signal in samples
*
* Input & Output:
* - filter_state : Filter state for first all-pass filters
*
* Output:
* - signal_out : Downsampled signal (of length len/2)
*/
void WebRtcVad_Downsampling(WebRtc_Word16* signal_in,
WebRtc_Word16* signal_out,
WebRtc_Word32* filter_state,
// Downsamples the signal by a factor 2, eg. 32->16 or 16->8.
//
// Inputs:
// - signal_in : Input signal.
// - in_length : Length of input signal in samples.
//
// Input & Output:
// - filter_state : Current filter states of the two all-pass filters. The
// |filter_state| is updated after all samples have been
// processed.
//
// Output:
// - signal_out : Downsampled signal (of length |in_length| / 2).
void WebRtcVad_Downsampling(int16_t* signal_in,
int16_t* signal_out,
int32_t* filter_state,
int in_length);
/****************************************************************************
* WebRtcVad_FindMinimum(...)
*
* Find the five lowest values of x in 100 frames long window. Return a mean
* value of these five values.
*
* Input:
* - feature_value : Feature value
* - channel : Channel number
*
* Input & Output:
* - inst : State information
*
* Output:
* return value : Weighted minimum value for a moving window.
*/
WebRtc_Word16 WebRtcVad_FindMinimum(VadInstT* inst, WebRtc_Word16 feature_value, int channel);
// Updates and returns the smoothed feature minimum. As minimum we use the
// median of the five smallest feature values in a 100 frames long window.
//
// Inputs:
// - feature_value : New feature value to update with.
// - channel : Channel number.
//
// Input & Output:
// - handle : State information of the VAD.
//
// Returns:
// : Smoothed minimum value for a moving window.
int16_t WebRtcVad_FindMinimum(VadInstT* handle,
int16_t feature_value,
int channel);
#endif // WEBRTC_VAD_SP_H_
#endif // WEBRTC_COMMON_AUDIO_VAD_VAD_SP_H_

View File

@ -15,12 +15,12 @@
#include "typedefs.h"
#include "webrtc_vad.h"
#ifdef __cplusplus
extern "C"
{
// TODO(bjornv): Move the internal unit tests to separate files.
extern "C" {
#include "vad_core.h"
#include "vad_gmm.h"
#include "vad_sp.h"
}
#endif
namespace webrtc {
namespace {
@ -28,11 +28,12 @@ const int16_t kModes[] = { 0, 1, 2, 3 };
const size_t kModesSize = sizeof(kModes) / sizeof(*kModes);
// Rates we support.
const int16_t kRates[] = { 8000, 16000, 32000 };
const int16_t kRates[] = { 8000, 12000, 16000, 24000, 32000 };
const size_t kRatesSize = sizeof(kRates) / sizeof(*kRates);
// Frame lengths we support.
const int16_t kMaxFrameLength = 960;
const int16_t kFrameLengths[] = { 80, 160, 240, 320, 480, 640, 960 };
const int16_t kFrameLengths[] = { 80, 120, 160, 240, 320, 480, 640,
kMaxFrameLength };
const size_t kFrameLengthsSize = sizeof(kFrameLengths) / sizeof(*kFrameLengths);
// Returns true if the rate and frame length combination is valid.
@ -182,6 +183,51 @@ TEST_F(VadTest, GMMTests) {
EXPECT_EQ(13440, delta);
}
TEST_F(VadTest, SPTests) {
VadInstT* handle = (VadInstT*) malloc(sizeof(VadInstT));
int16_t zeros[kMaxFrameLength] = { 0 };
int32_t state[2] = { 0 };
int16_t data_in[kMaxFrameLength];
int16_t data_out[kMaxFrameLength];
const int16_t kReferenceMin[32] = {
1600, 720, 509, 512, 532, 552, 570, 588,
606, 624, 642, 659, 675, 691, 707, 723,
1600, 544, 502, 522, 542, 561, 579, 597,
615, 633, 651, 667, 683, 699, 715, 731
};
// Construct a speech signal that will trigger the VAD in all modes. It is
// known that (i * i) will wrap around, but that doesn't matter in this case.
for (int16_t i = 0; i < kMaxFrameLength; ++i) {
data_in[i] = (i * i);
}
// Input values all zeros, expect all zeros out.
WebRtcVad_Downsampling(zeros, data_out, state, (int) kMaxFrameLength);
EXPECT_EQ(0, state[0]);
EXPECT_EQ(0, state[1]);
for (int16_t i = 0; i < kMaxFrameLength / 2; ++i) {
EXPECT_EQ(0, data_out[i]);
}
// Make a simple non-zero data test.
WebRtcVad_Downsampling(data_in, data_out, state, (int) kMaxFrameLength);
EXPECT_EQ(207, state[0]);
EXPECT_EQ(2270, state[1]);
ASSERT_EQ(0, WebRtcVad_InitCore(handle, 0));
for (int16_t i = 0; i < 16; ++i) {
int16_t value = 500 * (i + 1);
for (int j = 0; j < NUM_CHANNELS; ++j) {
// Use values both above and below initialized value.
EXPECT_EQ(kReferenceMin[i], WebRtcVad_FindMinimum(handle, value, j));
EXPECT_EQ(kReferenceMin[i + 16], WebRtcVad_FindMinimum(handle, 12000, j));
}
handle->frame_counter++;
}
free(handle);
}
// TODO(bjornv): Add a process test, run on file.
} // namespace