VAD refactor: WebRtcVad_InitCore().

Impact only locally.
- Replaced for loops with memset().
- Added guard against NULL pointer.
- Removed mode as input parameter (never really used).
- Updated unit tests.
- Made struct member init_flag "int".
- Updated function description.
- Updated Copyright notes with 2012.
- Removed some lint warnings.

TESTS=vad_unittests, audioproc_unitest
Review URL: https://webrtc-codereview.appspot.com/369005

git-svn-id: http://webrtc.googlecode.com/svn/trunk@1543 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
bjornv@webrtc.org 2012-01-25 12:18:12 +00:00
parent 567b99be5f
commit 2a4dcd7d15
6 changed files with 82 additions and 99 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -61,6 +61,9 @@ static const int16_t kMaxSpeechFrames = 6;
// Minimum standard deviation for both speech and noise.
static const int16_t kMinStd = 384;
// Constants in WebRtcVad_InitCore().
// Default aggressiveness mode.
static const short kDefaultMode = 0;
static const int kInitCheck = 42;
// Calculates the probabilities for both speech and background noise using
@ -469,63 +472,56 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector,
return vadflag;
}
// Initialize VAD
int WebRtcVad_InitCore(VadInstT *inst, short mode)
{
// Initialize the VAD. Set aggressiveness mode to default value.
int WebRtcVad_InitCore(VadInstT* self) {
int i;
// Initialization of struct
inst->vad = 1;
inst->frame_counter = 0;
inst->over_hang = 0;
inst->num_of_speech = 0;
// Initialization of downsampling filter state
inst->downsampling_filter_states[0] = 0;
inst->downsampling_filter_states[1] = 0;
inst->downsampling_filter_states[2] = 0;
inst->downsampling_filter_states[3] = 0;
// Read initial PDF parameters
for (i = 0; i < NUM_TABLE_VALUES; i++)
{
inst->noise_means[i] = kNoiseDataMeans[i];
inst->speech_means[i] = kSpeechDataMeans[i];
inst->noise_stds[i] = kNoiseDataStds[i];
inst->speech_stds[i] = kSpeechDataStds[i];
}
// Index and Minimum value vectors are initialized
for (i = 0; i < 16 * NUM_CHANNELS; i++)
{
inst->low_value_vector[i] = 10000;
inst->index_vector[i] = 0;
}
for (i = 0; i < 5; i++)
{
inst->upper_state[i] = 0;
inst->lower_state[i] = 0;
}
for (i = 0; i < 4; i++)
{
inst->hp_filter_state[i] = 0;
}
// Init mean value memory, for FindMin function
inst->mean_value[0] = 1600;
inst->mean_value[1] = 1600;
inst->mean_value[2] = 1600;
inst->mean_value[3] = 1600;
inst->mean_value[4] = 1600;
inst->mean_value[5] = 1600;
if (WebRtcVad_set_mode_core(inst, mode) != 0) {
if (self == NULL) {
return -1;
}
inst->init_flag = kInitCheck;
// Initialization of general struct variables.
self->vad = 1; // Speech active (=1).
self->frame_counter = 0;
self->over_hang = 0;
self->num_of_speech = 0;
// Initialization of downsampling filter state.
memset(self->downsampling_filter_states, 0,
sizeof(self->downsampling_filter_states));
// Read initial PDF parameters.
for (i = 0; i < NUM_TABLE_VALUES; i++) {
self->noise_means[i] = kNoiseDataMeans[i];
self->speech_means[i] = kSpeechDataMeans[i];
self->noise_stds[i] = kNoiseDataStds[i];
self->speech_stds[i] = kSpeechDataStds[i];
}
// Initialize Index and Minimum value vectors.
for (i = 0; i < 16 * NUM_CHANNELS; i++) {
self->low_value_vector[i] = 10000;
self->index_vector[i] = 0;
}
// Initialize splitting filter states.
memset(self->upper_state, 0, sizeof(self->upper_state));
memset(self->lower_state, 0, sizeof(self->lower_state));
// Initialize high pass filter states.
memset(self->hp_filter_state, 0, sizeof(self->hp_filter_state));
// Initialize mean value memory, for WebRtcVad_FindMinimum().
for (i = 0; i < NUM_CHANNELS; i++) {
self->mean_value[i] = 1600;
}
// Set aggressiveness mode to default (=|kDefaultMode|).
if (WebRtcVad_set_mode_core(self, kDefaultMode) != 0) {
return -1;
}
self->init_flag = kInitCheck;
return 0;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -45,27 +45,18 @@ typedef struct VadInstT_
WebRtc_Word16 individual[3];
WebRtc_Word16 total[3];
short init_flag;
int init_flag;
} VadInstT;
/****************************************************************************
* WebRtcVad_InitCore(...)
*
* This function initializes a VAD instance
*
* Input:
* - inst : Instance that should be initialized
* - mode : Aggressiveness degree
* 0 (High quality) - 3 (Highly aggressive)
*
* Output:
* - inst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcVad_InitCore(VadInstT* inst, short mode);
// Initializes the core VAD component. The default aggressiveness mode is
// controlled by |kDefaultMode| in vad_core.c.
//
// - self [i/o] : Instance that should be initialized
//
// returns : 0 (OK), -1 (NULL pointer in or if the default mode can't be
// set)
int WebRtcVad_InitCore(VadInstT* self);
/****************************************************************************
* WebRtcVad_set_mode_core(...)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -21,16 +21,16 @@ extern "C" {
namespace {
TEST_F(VadTest, InitCore) {
// Test WebRtcVad_InitCore().
VadInstT* self = reinterpret_cast<VadInstT*>(malloc(sizeof(VadInstT)));
// TODO(bjornv): Add NULL pointer check if we take care of it in
// vad_core.c
// NULL pointer test.
EXPECT_EQ(-1, WebRtcVad_InitCore(NULL));
// Test WebRtcVad_InitCore().
// Verify return = 0 for all modes.
for (size_t j = 0; j < kModesSize; ++j) {
EXPECT_EQ(0, WebRtcVad_InitCore(self, kModes[j]));
}
// Verify return = 0 for non-NULL pointer.
EXPECT_EQ(0, WebRtcVad_InitCore(self));
// Verify init_flag is set.
EXPECT_EQ(42, self->init_flag);
free(self);
}
@ -41,11 +41,12 @@ TEST_F(VadTest, set_mode_core) {
// TODO(bjornv): Add NULL pointer check if we take care of it in
// vad_core.c
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0));
ASSERT_EQ(0, WebRtcVad_InitCore(self));
// Test WebRtcVad_set_mode_core().
// Invalid modes should return -1.
EXPECT_EQ(-1, WebRtcVad_set_mode_core(self, -1));
EXPECT_EQ(-1, WebRtcVad_set_mode_core(self, (short) kModesSize));
EXPECT_EQ(-1, WebRtcVad_set_mode_core(self,
static_cast<int16_t>(kModesSize)));
// Valid modes should return 0.
for (size_t j = 0; j < kModesSize; ++j) {
EXPECT_EQ(0, WebRtcVad_set_mode_core(self, kModes[j]));
@ -64,7 +65,7 @@ TEST_F(VadTest, CalcVad) {
// Test WebRtcVad_CalcVadXXkhz()
// Verify that all zeros in gives VAD = 0 out.
memset(speech, 0, sizeof(speech));
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0));
ASSERT_EQ(0, WebRtcVad_InitCore(self));
for (size_t j = 0; j < kFrameLengthsSize; ++j) {
if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
EXPECT_EQ(0, WebRtcVad_CalcVad8khz(self, speech, kFrameLengths[j]));

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -44,7 +44,7 @@ TEST_F(VadTest, vad_filterbank) {
}
int frame_length_index = 0;
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0));
ASSERT_EQ(0, WebRtcVad_InitCore(self));
for (size_t j = 0; j < kFrameLengthsSize; ++j) {
if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
EXPECT_EQ(kReference[frame_length_index],
@ -61,7 +61,7 @@ TEST_F(VadTest, vad_filterbank) {
// Verify that all zeros in gives kOffsetVector out.
memset(speech, 0, sizeof(speech));
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0));
ASSERT_EQ(0, WebRtcVad_InitCore(self));
for (size_t j = 0; j < kFrameLengthsSize; ++j) {
if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
EXPECT_EQ(0, WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j],
@ -79,7 +79,7 @@ TEST_F(VadTest, vad_filterbank) {
}
for (size_t j = 0; j < kFrameLengthsSize; ++j) {
if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0));
ASSERT_EQ(0, WebRtcVad_InitCore(self));
EXPECT_EQ(0, WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j],
features));
for (int k = 0; k < NUM_CHANNELS; ++k) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -23,7 +23,7 @@ extern "C" {
namespace {
TEST_F(VadTest, vad_sp) {
VadInstT* self = (VadInstT*) malloc(sizeof(VadInstT));
VadInstT* self = reinterpret_cast<VadInstT*>(malloc(sizeof(VadInstT)));
int16_t zeros[kMaxFrameLength] = { 0 };
int32_t state[2] = { 0 };
int16_t data_in[kMaxFrameLength];
@ -44,18 +44,20 @@ TEST_F(VadTest, vad_sp) {
data_in[i] = (i * i);
}
// Input values all zeros, expect all zeros out.
WebRtcVad_Downsampling(zeros, data_out, state, (int) kMaxFrameLength);
WebRtcVad_Downsampling(zeros, data_out, state,
static_cast<int>(kMaxFrameLength));
EXPECT_EQ(0, state[0]);
EXPECT_EQ(0, state[1]);
for (int16_t i = 0; i < kMaxFrameLength / 2; ++i) {
EXPECT_EQ(0, data_out[i]);
}
// Make a simple non-zero data test.
WebRtcVad_Downsampling(data_in, data_out, state, (int) kMaxFrameLength);
WebRtcVad_Downsampling(data_in, data_out, state,
static_cast<int>(kMaxFrameLength));
EXPECT_EQ(207, state[0]);
EXPECT_EQ(2270, state[1]);
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0));
ASSERT_EQ(0, WebRtcVad_InitCore(self));
// TODO(bjornv): Replace this part of the test with taking values from an
// array and calculate the reference value here. Make sure the values are not
// ordered.

View File

@ -101,16 +101,9 @@ WebRtc_Word16 WebRtcVad_Free(VadInst *vad_inst)
return 0;
}
int WebRtcVad_Init(VadInst *vad_inst)
{
short mode = 0; // Default high quality
if (vad_inst == NULL)
{
return -1;
}
return WebRtcVad_InitCore((VadInstT*)vad_inst, mode);
int WebRtcVad_Init(VadInst* handle) {
// Initialize the core VAD component.
return WebRtcVad_InitCore((VadInstT*) handle);
}
int WebRtcVad_set_mode(VadInst *vad_inst, WebRtc_Word16 mode)