VAD refactor: WebRtcVad_InitCore().

Impact only locally.
- Replaced for loops with memset().
- Added guard against NULL pointer.
- Removed mode as input parameter (never really used).
- Updated unit tests.
- Made struct member init_flag "int".
- Updated function description.
- Updated Copyright notes with 2012.
- Removed some lint warnings.

TESTS=vad_unittests, audioproc_unitest
Review URL: https://webrtc-codereview.appspot.com/369005

git-svn-id: http://webrtc.googlecode.com/svn/trunk@1543 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
bjornv@webrtc.org 2012-01-25 12:18:12 +00:00
parent 567b99be5f
commit 2a4dcd7d15
6 changed files with 82 additions and 99 deletions

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
* *
* Use of this source code is governed by a BSD-style license * Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source * that can be found in the LICENSE file in the root of the source
@ -61,6 +61,9 @@ static const int16_t kMaxSpeechFrames = 6;
// Minimum standard deviation for both speech and noise. // Minimum standard deviation for both speech and noise.
static const int16_t kMinStd = 384; static const int16_t kMinStd = 384;
// Constants in WebRtcVad_InitCore().
// Default aggressiveness mode.
static const short kDefaultMode = 0;
static const int kInitCheck = 42; static const int kInitCheck = 42;
// Calculates the probabilities for both speech and background noise using // Calculates the probabilities for both speech and background noise using
@ -469,63 +472,56 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector,
return vadflag; return vadflag;
} }
// Initialize VAD // Initialize the VAD. Set aggressiveness mode to default value.
int WebRtcVad_InitCore(VadInstT *inst, short mode) int WebRtcVad_InitCore(VadInstT* self) {
{
int i; int i;
// Initialization of struct if (self == NULL) {
inst->vad = 1;
inst->frame_counter = 0;
inst->over_hang = 0;
inst->num_of_speech = 0;
// Initialization of downsampling filter state
inst->downsampling_filter_states[0] = 0;
inst->downsampling_filter_states[1] = 0;
inst->downsampling_filter_states[2] = 0;
inst->downsampling_filter_states[3] = 0;
// Read initial PDF parameters
for (i = 0; i < NUM_TABLE_VALUES; i++)
{
inst->noise_means[i] = kNoiseDataMeans[i];
inst->speech_means[i] = kSpeechDataMeans[i];
inst->noise_stds[i] = kNoiseDataStds[i];
inst->speech_stds[i] = kSpeechDataStds[i];
}
// Index and Minimum value vectors are initialized
for (i = 0; i < 16 * NUM_CHANNELS; i++)
{
inst->low_value_vector[i] = 10000;
inst->index_vector[i] = 0;
}
for (i = 0; i < 5; i++)
{
inst->upper_state[i] = 0;
inst->lower_state[i] = 0;
}
for (i = 0; i < 4; i++)
{
inst->hp_filter_state[i] = 0;
}
// Init mean value memory, for FindMin function
inst->mean_value[0] = 1600;
inst->mean_value[1] = 1600;
inst->mean_value[2] = 1600;
inst->mean_value[3] = 1600;
inst->mean_value[4] = 1600;
inst->mean_value[5] = 1600;
if (WebRtcVad_set_mode_core(inst, mode) != 0) {
return -1; return -1;
} }
inst->init_flag = kInitCheck; // Initialization of general struct variables.
self->vad = 1; // Speech active (=1).
self->frame_counter = 0;
self->over_hang = 0;
self->num_of_speech = 0;
// Initialization of downsampling filter state.
memset(self->downsampling_filter_states, 0,
sizeof(self->downsampling_filter_states));
// Read initial PDF parameters.
for (i = 0; i < NUM_TABLE_VALUES; i++) {
self->noise_means[i] = kNoiseDataMeans[i];
self->speech_means[i] = kSpeechDataMeans[i];
self->noise_stds[i] = kNoiseDataStds[i];
self->speech_stds[i] = kSpeechDataStds[i];
}
// Initialize Index and Minimum value vectors.
for (i = 0; i < 16 * NUM_CHANNELS; i++) {
self->low_value_vector[i] = 10000;
self->index_vector[i] = 0;
}
// Initialize splitting filter states.
memset(self->upper_state, 0, sizeof(self->upper_state));
memset(self->lower_state, 0, sizeof(self->lower_state));
// Initialize high pass filter states.
memset(self->hp_filter_state, 0, sizeof(self->hp_filter_state));
// Initialize mean value memory, for WebRtcVad_FindMinimum().
for (i = 0; i < NUM_CHANNELS; i++) {
self->mean_value[i] = 1600;
}
// Set aggressiveness mode to default (=|kDefaultMode|).
if (WebRtcVad_set_mode_core(self, kDefaultMode) != 0) {
return -1;
}
self->init_flag = kInitCheck;
return 0; return 0;
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
* *
* Use of this source code is governed by a BSD-style license * Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source * that can be found in the LICENSE file in the root of the source
@ -45,27 +45,18 @@ typedef struct VadInstT_
WebRtc_Word16 individual[3]; WebRtc_Word16 individual[3];
WebRtc_Word16 total[3]; WebRtc_Word16 total[3];
short init_flag; int init_flag;
} VadInstT; } VadInstT;
/**************************************************************************** // Initializes the core VAD component. The default aggressiveness mode is
* WebRtcVad_InitCore(...) // controlled by |kDefaultMode| in vad_core.c.
* //
* This function initializes a VAD instance // - self [i/o] : Instance that should be initialized
* //
* Input: // returns : 0 (OK), -1 (NULL pointer in or if the default mode can't be
* - inst : Instance that should be initialized // set)
* - mode : Aggressiveness degree int WebRtcVad_InitCore(VadInstT* self);
* 0 (High quality) - 3 (Highly aggressive)
*
* Output:
* - inst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcVad_InitCore(VadInstT* inst, short mode);
/**************************************************************************** /****************************************************************************
* WebRtcVad_set_mode_core(...) * WebRtcVad_set_mode_core(...)

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
* *
* Use of this source code is governed by a BSD-style license * Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source * that can be found in the LICENSE file in the root of the source
@ -21,16 +21,16 @@ extern "C" {
namespace { namespace {
TEST_F(VadTest, InitCore) { TEST_F(VadTest, InitCore) {
// Test WebRtcVad_InitCore().
VadInstT* self = reinterpret_cast<VadInstT*>(malloc(sizeof(VadInstT))); VadInstT* self = reinterpret_cast<VadInstT*>(malloc(sizeof(VadInstT)));
// TODO(bjornv): Add NULL pointer check if we take care of it in // NULL pointer test.
// vad_core.c EXPECT_EQ(-1, WebRtcVad_InitCore(NULL));
// Test WebRtcVad_InitCore(). // Verify return = 0 for non-NULL pointer.
// Verify return = 0 for all modes. EXPECT_EQ(0, WebRtcVad_InitCore(self));
for (size_t j = 0; j < kModesSize; ++j) { // Verify init_flag is set.
EXPECT_EQ(0, WebRtcVad_InitCore(self, kModes[j])); EXPECT_EQ(42, self->init_flag);
}
free(self); free(self);
} }
@ -41,11 +41,12 @@ TEST_F(VadTest, set_mode_core) {
// TODO(bjornv): Add NULL pointer check if we take care of it in // TODO(bjornv): Add NULL pointer check if we take care of it in
// vad_core.c // vad_core.c
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0)); ASSERT_EQ(0, WebRtcVad_InitCore(self));
// Test WebRtcVad_set_mode_core(). // Test WebRtcVad_set_mode_core().
// Invalid modes should return -1. // Invalid modes should return -1.
EXPECT_EQ(-1, WebRtcVad_set_mode_core(self, -1)); EXPECT_EQ(-1, WebRtcVad_set_mode_core(self, -1));
EXPECT_EQ(-1, WebRtcVad_set_mode_core(self, (short) kModesSize)); EXPECT_EQ(-1, WebRtcVad_set_mode_core(self,
static_cast<int16_t>(kModesSize)));
// Valid modes should return 0. // Valid modes should return 0.
for (size_t j = 0; j < kModesSize; ++j) { for (size_t j = 0; j < kModesSize; ++j) {
EXPECT_EQ(0, WebRtcVad_set_mode_core(self, kModes[j])); EXPECT_EQ(0, WebRtcVad_set_mode_core(self, kModes[j]));
@ -64,7 +65,7 @@ TEST_F(VadTest, CalcVad) {
// Test WebRtcVad_CalcVadXXkhz() // Test WebRtcVad_CalcVadXXkhz()
// Verify that all zeros in gives VAD = 0 out. // Verify that all zeros in gives VAD = 0 out.
memset(speech, 0, sizeof(speech)); memset(speech, 0, sizeof(speech));
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0)); ASSERT_EQ(0, WebRtcVad_InitCore(self));
for (size_t j = 0; j < kFrameLengthsSize; ++j) { for (size_t j = 0; j < kFrameLengthsSize; ++j) {
if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) { if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
EXPECT_EQ(0, WebRtcVad_CalcVad8khz(self, speech, kFrameLengths[j])); EXPECT_EQ(0, WebRtcVad_CalcVad8khz(self, speech, kFrameLengths[j]));

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
* *
* Use of this source code is governed by a BSD-style license * Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source * that can be found in the LICENSE file in the root of the source
@ -44,7 +44,7 @@ TEST_F(VadTest, vad_filterbank) {
} }
int frame_length_index = 0; int frame_length_index = 0;
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0)); ASSERT_EQ(0, WebRtcVad_InitCore(self));
for (size_t j = 0; j < kFrameLengthsSize; ++j) { for (size_t j = 0; j < kFrameLengthsSize; ++j) {
if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) { if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
EXPECT_EQ(kReference[frame_length_index], EXPECT_EQ(kReference[frame_length_index],
@ -61,7 +61,7 @@ TEST_F(VadTest, vad_filterbank) {
// Verify that all zeros in gives kOffsetVector out. // Verify that all zeros in gives kOffsetVector out.
memset(speech, 0, sizeof(speech)); memset(speech, 0, sizeof(speech));
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0)); ASSERT_EQ(0, WebRtcVad_InitCore(self));
for (size_t j = 0; j < kFrameLengthsSize; ++j) { for (size_t j = 0; j < kFrameLengthsSize; ++j) {
if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) { if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
EXPECT_EQ(0, WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j], EXPECT_EQ(0, WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j],
@ -79,7 +79,7 @@ TEST_F(VadTest, vad_filterbank) {
} }
for (size_t j = 0; j < kFrameLengthsSize; ++j) { for (size_t j = 0; j < kFrameLengthsSize; ++j) {
if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) { if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0)); ASSERT_EQ(0, WebRtcVad_InitCore(self));
EXPECT_EQ(0, WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j], EXPECT_EQ(0, WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j],
features)); features));
for (int k = 0; k < NUM_CHANNELS; ++k) { for (int k = 0; k < NUM_CHANNELS; ++k) {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
* *
* Use of this source code is governed by a BSD-style license * Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source * that can be found in the LICENSE file in the root of the source
@ -23,7 +23,7 @@ extern "C" {
namespace { namespace {
TEST_F(VadTest, vad_sp) { TEST_F(VadTest, vad_sp) {
VadInstT* self = (VadInstT*) malloc(sizeof(VadInstT)); VadInstT* self = reinterpret_cast<VadInstT*>(malloc(sizeof(VadInstT)));
int16_t zeros[kMaxFrameLength] = { 0 }; int16_t zeros[kMaxFrameLength] = { 0 };
int32_t state[2] = { 0 }; int32_t state[2] = { 0 };
int16_t data_in[kMaxFrameLength]; int16_t data_in[kMaxFrameLength];
@ -44,18 +44,20 @@ TEST_F(VadTest, vad_sp) {
data_in[i] = (i * i); data_in[i] = (i * i);
} }
// Input values all zeros, expect all zeros out. // Input values all zeros, expect all zeros out.
WebRtcVad_Downsampling(zeros, data_out, state, (int) kMaxFrameLength); WebRtcVad_Downsampling(zeros, data_out, state,
static_cast<int>(kMaxFrameLength));
EXPECT_EQ(0, state[0]); EXPECT_EQ(0, state[0]);
EXPECT_EQ(0, state[1]); EXPECT_EQ(0, state[1]);
for (int16_t i = 0; i < kMaxFrameLength / 2; ++i) { for (int16_t i = 0; i < kMaxFrameLength / 2; ++i) {
EXPECT_EQ(0, data_out[i]); EXPECT_EQ(0, data_out[i]);
} }
// Make a simple non-zero data test. // Make a simple non-zero data test.
WebRtcVad_Downsampling(data_in, data_out, state, (int) kMaxFrameLength); WebRtcVad_Downsampling(data_in, data_out, state,
static_cast<int>(kMaxFrameLength));
EXPECT_EQ(207, state[0]); EXPECT_EQ(207, state[0]);
EXPECT_EQ(2270, state[1]); EXPECT_EQ(2270, state[1]);
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0)); ASSERT_EQ(0, WebRtcVad_InitCore(self));
// TODO(bjornv): Replace this part of the test with taking values from an // TODO(bjornv): Replace this part of the test with taking values from an
// array and calculate the reference value here. Make sure the values are not // array and calculate the reference value here. Make sure the values are not
// ordered. // ordered.

View File

@ -101,16 +101,9 @@ WebRtc_Word16 WebRtcVad_Free(VadInst *vad_inst)
return 0; return 0;
} }
int WebRtcVad_Init(VadInst *vad_inst) int WebRtcVad_Init(VadInst* handle) {
{ // Initialize the core VAD component.
short mode = 0; // Default high quality return WebRtcVad_InitCore((VadInstT*) handle);
if (vad_inst == NULL)
{
return -1;
}
return WebRtcVad_InitCore((VadInstT*)vad_inst, mode);
} }
int WebRtcVad_set_mode(VadInst *vad_inst, WebRtc_Word16 mode) int WebRtcVad_set_mode(VadInst *vad_inst, WebRtc_Word16 mode)