VAD refactor: WebRtcVad_InitCore().
Impact only locally. - Replaced for loops with memset(). - Added guard against NULL pointer. - Removed mode as input parameter (never really used). - Updated unit tests. - Made struct member init_flag "int". - Updated function description. - Updated Copyright notes with 2012. - Removed some lint warnings. TESTS=vad_unittests, audioproc_unitest Review URL: https://webrtc-codereview.appspot.com/369005 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1543 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
567b99be5f
commit
2a4dcd7d15
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
@ -61,6 +61,9 @@ static const int16_t kMaxSpeechFrames = 6;
|
||||
// Minimum standard deviation for both speech and noise.
|
||||
static const int16_t kMinStd = 384;
|
||||
|
||||
// Constants in WebRtcVad_InitCore().
|
||||
// Default aggressiveness mode.
|
||||
static const short kDefaultMode = 0;
|
||||
static const int kInitCheck = 42;
|
||||
|
||||
// Calculates the probabilities for both speech and background noise using
|
||||
@ -469,65 +472,58 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector,
|
||||
return vadflag;
|
||||
}
|
||||
|
||||
// Initialize VAD
|
||||
int WebRtcVad_InitCore(VadInstT *inst, short mode)
|
||||
{
|
||||
int i;
|
||||
// Initialize the VAD. Set aggressiveness mode to default value.
|
||||
int WebRtcVad_InitCore(VadInstT* self) {
|
||||
int i;
|
||||
|
||||
// Initialization of struct
|
||||
inst->vad = 1;
|
||||
inst->frame_counter = 0;
|
||||
inst->over_hang = 0;
|
||||
inst->num_of_speech = 0;
|
||||
if (self == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Initialization of downsampling filter state
|
||||
inst->downsampling_filter_states[0] = 0;
|
||||
inst->downsampling_filter_states[1] = 0;
|
||||
inst->downsampling_filter_states[2] = 0;
|
||||
inst->downsampling_filter_states[3] = 0;
|
||||
// Initialization of general struct variables.
|
||||
self->vad = 1; // Speech active (=1).
|
||||
self->frame_counter = 0;
|
||||
self->over_hang = 0;
|
||||
self->num_of_speech = 0;
|
||||
|
||||
// Read initial PDF parameters
|
||||
for (i = 0; i < NUM_TABLE_VALUES; i++)
|
||||
{
|
||||
inst->noise_means[i] = kNoiseDataMeans[i];
|
||||
inst->speech_means[i] = kSpeechDataMeans[i];
|
||||
inst->noise_stds[i] = kNoiseDataStds[i];
|
||||
inst->speech_stds[i] = kSpeechDataStds[i];
|
||||
}
|
||||
// Initialization of downsampling filter state.
|
||||
memset(self->downsampling_filter_states, 0,
|
||||
sizeof(self->downsampling_filter_states));
|
||||
|
||||
// Index and Minimum value vectors are initialized
|
||||
for (i = 0; i < 16 * NUM_CHANNELS; i++)
|
||||
{
|
||||
inst->low_value_vector[i] = 10000;
|
||||
inst->index_vector[i] = 0;
|
||||
}
|
||||
// Read initial PDF parameters.
|
||||
for (i = 0; i < NUM_TABLE_VALUES; i++) {
|
||||
self->noise_means[i] = kNoiseDataMeans[i];
|
||||
self->speech_means[i] = kSpeechDataMeans[i];
|
||||
self->noise_stds[i] = kNoiseDataStds[i];
|
||||
self->speech_stds[i] = kSpeechDataStds[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 5; i++)
|
||||
{
|
||||
inst->upper_state[i] = 0;
|
||||
inst->lower_state[i] = 0;
|
||||
}
|
||||
// Initialize Index and Minimum value vectors.
|
||||
for (i = 0; i < 16 * NUM_CHANNELS; i++) {
|
||||
self->low_value_vector[i] = 10000;
|
||||
self->index_vector[i] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
inst->hp_filter_state[i] = 0;
|
||||
}
|
||||
// Initialize splitting filter states.
|
||||
memset(self->upper_state, 0, sizeof(self->upper_state));
|
||||
memset(self->lower_state, 0, sizeof(self->lower_state));
|
||||
|
||||
// Init mean value memory, for FindMin function
|
||||
inst->mean_value[0] = 1600;
|
||||
inst->mean_value[1] = 1600;
|
||||
inst->mean_value[2] = 1600;
|
||||
inst->mean_value[3] = 1600;
|
||||
inst->mean_value[4] = 1600;
|
||||
inst->mean_value[5] = 1600;
|
||||
// Initialize high pass filter states.
|
||||
memset(self->hp_filter_state, 0, sizeof(self->hp_filter_state));
|
||||
|
||||
if (WebRtcVad_set_mode_core(inst, mode) != 0) {
|
||||
return -1;
|
||||
}
|
||||
// Initialize mean value memory, for WebRtcVad_FindMinimum().
|
||||
for (i = 0; i < NUM_CHANNELS; i++) {
|
||||
self->mean_value[i] = 1600;
|
||||
}
|
||||
|
||||
inst->init_flag = kInitCheck;
|
||||
// Set aggressiveness mode to default (=|kDefaultMode|).
|
||||
if (WebRtcVad_set_mode_core(self, kDefaultMode) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
self->init_flag = kInitCheck;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Set aggressiveness mode
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
@ -45,27 +45,18 @@ typedef struct VadInstT_
|
||||
WebRtc_Word16 individual[3];
|
||||
WebRtc_Word16 total[3];
|
||||
|
||||
short init_flag;
|
||||
int init_flag;
|
||||
|
||||
} VadInstT;
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcVad_InitCore(...)
|
||||
*
|
||||
* This function initializes a VAD instance
|
||||
*
|
||||
* Input:
|
||||
* - inst : Instance that should be initialized
|
||||
* - mode : Aggressiveness degree
|
||||
* 0 (High quality) - 3 (Highly aggressive)
|
||||
*
|
||||
* Output:
|
||||
* - inst : Initialized instance
|
||||
*
|
||||
* Return value : 0 - Ok
|
||||
* -1 - Error
|
||||
*/
|
||||
int WebRtcVad_InitCore(VadInstT* inst, short mode);
|
||||
// Initializes the core VAD component. The default aggressiveness mode is
|
||||
// controlled by |kDefaultMode| in vad_core.c.
|
||||
//
|
||||
// - self [i/o] : Instance that should be initialized
|
||||
//
|
||||
// returns : 0 (OK), -1 (NULL pointer in or if the default mode can't be
|
||||
// set)
|
||||
int WebRtcVad_InitCore(VadInstT* self);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcVad_set_mode_core(...)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
@ -21,16 +21,16 @@ extern "C" {
|
||||
namespace {
|
||||
|
||||
TEST_F(VadTest, InitCore) {
|
||||
// Test WebRtcVad_InitCore().
|
||||
VadInstT* self = reinterpret_cast<VadInstT*>(malloc(sizeof(VadInstT)));
|
||||
|
||||
// TODO(bjornv): Add NULL pointer check if we take care of it in
|
||||
// vad_core.c
|
||||
// NULL pointer test.
|
||||
EXPECT_EQ(-1, WebRtcVad_InitCore(NULL));
|
||||
|
||||
// Test WebRtcVad_InitCore().
|
||||
// Verify return = 0 for all modes.
|
||||
for (size_t j = 0; j < kModesSize; ++j) {
|
||||
EXPECT_EQ(0, WebRtcVad_InitCore(self, kModes[j]));
|
||||
}
|
||||
// Verify return = 0 for non-NULL pointer.
|
||||
EXPECT_EQ(0, WebRtcVad_InitCore(self));
|
||||
// Verify init_flag is set.
|
||||
EXPECT_EQ(42, self->init_flag);
|
||||
|
||||
free(self);
|
||||
}
|
||||
@ -41,11 +41,12 @@ TEST_F(VadTest, set_mode_core) {
|
||||
// TODO(bjornv): Add NULL pointer check if we take care of it in
|
||||
// vad_core.c
|
||||
|
||||
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0));
|
||||
ASSERT_EQ(0, WebRtcVad_InitCore(self));
|
||||
// Test WebRtcVad_set_mode_core().
|
||||
// Invalid modes should return -1.
|
||||
EXPECT_EQ(-1, WebRtcVad_set_mode_core(self, -1));
|
||||
EXPECT_EQ(-1, WebRtcVad_set_mode_core(self, (short) kModesSize));
|
||||
EXPECT_EQ(-1, WebRtcVad_set_mode_core(self,
|
||||
static_cast<int16_t>(kModesSize)));
|
||||
// Valid modes should return 0.
|
||||
for (size_t j = 0; j < kModesSize; ++j) {
|
||||
EXPECT_EQ(0, WebRtcVad_set_mode_core(self, kModes[j]));
|
||||
@ -64,7 +65,7 @@ TEST_F(VadTest, CalcVad) {
|
||||
// Test WebRtcVad_CalcVadXXkhz()
|
||||
// Verify that all zeros in gives VAD = 0 out.
|
||||
memset(speech, 0, sizeof(speech));
|
||||
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0));
|
||||
ASSERT_EQ(0, WebRtcVad_InitCore(self));
|
||||
for (size_t j = 0; j < kFrameLengthsSize; ++j) {
|
||||
if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
|
||||
EXPECT_EQ(0, WebRtcVad_CalcVad8khz(self, speech, kFrameLengths[j]));
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
@ -44,7 +44,7 @@ TEST_F(VadTest, vad_filterbank) {
|
||||
}
|
||||
|
||||
int frame_length_index = 0;
|
||||
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0));
|
||||
ASSERT_EQ(0, WebRtcVad_InitCore(self));
|
||||
for (size_t j = 0; j < kFrameLengthsSize; ++j) {
|
||||
if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
|
||||
EXPECT_EQ(kReference[frame_length_index],
|
||||
@ -61,7 +61,7 @@ TEST_F(VadTest, vad_filterbank) {
|
||||
|
||||
// Verify that all zeros in gives kOffsetVector out.
|
||||
memset(speech, 0, sizeof(speech));
|
||||
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0));
|
||||
ASSERT_EQ(0, WebRtcVad_InitCore(self));
|
||||
for (size_t j = 0; j < kFrameLengthsSize; ++j) {
|
||||
if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
|
||||
EXPECT_EQ(0, WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j],
|
||||
@ -79,7 +79,7 @@ TEST_F(VadTest, vad_filterbank) {
|
||||
}
|
||||
for (size_t j = 0; j < kFrameLengthsSize; ++j) {
|
||||
if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) {
|
||||
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0));
|
||||
ASSERT_EQ(0, WebRtcVad_InitCore(self));
|
||||
EXPECT_EQ(0, WebRtcVad_CalculateFeatures(self, speech, kFrameLengths[j],
|
||||
features));
|
||||
for (int k = 0; k < NUM_CHANNELS; ++k) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
@ -23,7 +23,7 @@ extern "C" {
|
||||
namespace {
|
||||
|
||||
TEST_F(VadTest, vad_sp) {
|
||||
VadInstT* self = (VadInstT*) malloc(sizeof(VadInstT));
|
||||
VadInstT* self = reinterpret_cast<VadInstT*>(malloc(sizeof(VadInstT)));
|
||||
int16_t zeros[kMaxFrameLength] = { 0 };
|
||||
int32_t state[2] = { 0 };
|
||||
int16_t data_in[kMaxFrameLength];
|
||||
@ -44,18 +44,20 @@ TEST_F(VadTest, vad_sp) {
|
||||
data_in[i] = (i * i);
|
||||
}
|
||||
// Input values all zeros, expect all zeros out.
|
||||
WebRtcVad_Downsampling(zeros, data_out, state, (int) kMaxFrameLength);
|
||||
WebRtcVad_Downsampling(zeros, data_out, state,
|
||||
static_cast<int>(kMaxFrameLength));
|
||||
EXPECT_EQ(0, state[0]);
|
||||
EXPECT_EQ(0, state[1]);
|
||||
for (int16_t i = 0; i < kMaxFrameLength / 2; ++i) {
|
||||
EXPECT_EQ(0, data_out[i]);
|
||||
}
|
||||
// Make a simple non-zero data test.
|
||||
WebRtcVad_Downsampling(data_in, data_out, state, (int) kMaxFrameLength);
|
||||
WebRtcVad_Downsampling(data_in, data_out, state,
|
||||
static_cast<int>(kMaxFrameLength));
|
||||
EXPECT_EQ(207, state[0]);
|
||||
EXPECT_EQ(2270, state[1]);
|
||||
|
||||
ASSERT_EQ(0, WebRtcVad_InitCore(self, 0));
|
||||
ASSERT_EQ(0, WebRtcVad_InitCore(self));
|
||||
// TODO(bjornv): Replace this part of the test with taking values from an
|
||||
// array and calculate the reference value here. Make sure the values are not
|
||||
// ordered.
|
||||
|
@ -101,16 +101,9 @@ WebRtc_Word16 WebRtcVad_Free(VadInst *vad_inst)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebRtcVad_Init(VadInst *vad_inst)
|
||||
{
|
||||
short mode = 0; // Default high quality
|
||||
|
||||
if (vad_inst == NULL)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
return WebRtcVad_InitCore((VadInstT*)vad_inst, mode);
|
||||
int WebRtcVad_Init(VadInst* handle) {
|
||||
// Initialize the core VAD component.
|
||||
return WebRtcVad_InitCore((VadInstT*) handle);
|
||||
}
|
||||
|
||||
int WebRtcVad_set_mode(VadInst *vad_inst, WebRtc_Word16 mode)
|
||||
|
Loading…
Reference in New Issue
Block a user