diff --git a/src/common_audio/vad/vad.gypi b/src/common_audio/vad/vad.gypi index 4b12db0c2..0890605e9 100644 --- a/src/common_audio/vad/vad.gypi +++ b/src/common_audio/vad/vad.gypi @@ -50,6 +50,10 @@ ], 'sources': [ 'vad_unittest.cc', + 'vad_filterbank_unittest.cc', + 'vad_gmm_unittest.cc', + 'vad_sp_unittest.cc', + 'vad_unittest.h', ], }, # vad_unittests ], # targets diff --git a/src/common_audio/vad/vad_filterbank_unittest.cc b/src/common_audio/vad/vad_filterbank_unittest.cc new file mode 100644 index 000000000..6f4dc1e79 --- /dev/null +++ b/src/common_audio/vad/vad_filterbank_unittest.cc @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "gtest/gtest.h" +#include "typedefs.h" +#include "vad_unittest.h" + +extern "C" { +#include "vad_core.h" +#include "vad_defines.h" +#include "vad_filterbank.h" +} + +namespace { + +enum { kNumValidFrameLengths = 3 }; + +TEST_F(VadTest, vad_filterbank) { + VadInstT* self = (VadInstT*) malloc(sizeof(VadInstT)); + static const int16_t kReference[kNumValidFrameLengths] = { 15, 11, 11 }; + static const int16_t kReferencePowers[kNumValidFrameLengths * NUM_CHANNELS] = + { + 1213, 759, 587, 462, 434, 272, + 1479, 1385, 1291, 1200, 1103, 1099, + 1732, 1692, 1681, 1629, 1436, 1436 + }; + static const int16_t kOffsetVector[NUM_CHANNELS] = { + 368, 368, 272, 176, 176, 176 }; + int16_t data_out[NUM_CHANNELS]; + + // Construct a speech signal that will trigger the VAD in all modes. It is + // known that (i * i) will wrap around, but that doesn't matter in this case. + int16_t speech[kMaxFrameLength]; + for (int16_t i = 0; i < kMaxFrameLength; ++i) { + speech[i] = (i * i); + } + + int frame_length_index = 0; + ASSERT_EQ(0, WebRtcVad_InitCore(self, 0)); + for (size_t j = 0; j < kFrameLengthsSize; ++j) { + if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) { + EXPECT_EQ(kReference[frame_length_index], + WebRtcVad_get_features(self, speech, kFrameLengths[j], + data_out)); + for (int k = 0; k < NUM_CHANNELS; ++k) { + EXPECT_EQ(kReferencePowers[k + frame_length_index * NUM_CHANNELS], + data_out[k]); + } + frame_length_index++; + } + } + EXPECT_EQ(kNumValidFrameLengths, frame_length_index); + + // Verify that all zeros in gives kOffsetVector out. + memset(speech, 0, sizeof(speech)); + ASSERT_EQ(0, WebRtcVad_InitCore(self, 0)); + for (size_t j = 0; j < kFrameLengthsSize; ++j) { + if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) { + EXPECT_EQ(0, WebRtcVad_get_features(self, speech, kFrameLengths[j], + data_out)); + for (int k = 0; k < NUM_CHANNELS; ++k) { + EXPECT_EQ(kOffsetVector[k], data_out[k]); + } + } + } + + // Verify that all ones in gives kOffsetVector out. Any other constant input + // will have a small impact in the sub bands. + for (int16_t i = 0; i < kMaxFrameLength; ++i) { + speech[i] = 1; + } + for (size_t j = 0; j < kFrameLengthsSize; ++j) { + if (ValidRatesAndFrameLengths(8000, kFrameLengths[j])) { + ASSERT_EQ(0, WebRtcVad_InitCore(self, 0)); + EXPECT_EQ(0, WebRtcVad_get_features(self, speech, kFrameLengths[j], + data_out)); + for (int k = 0; k < NUM_CHANNELS; ++k) { + EXPECT_EQ(kOffsetVector[k], data_out[k]); + } + } + } + + free(self); +} +} // namespace diff --git a/src/common_audio/vad/vad_gmm_unittest.cc b/src/common_audio/vad/vad_gmm_unittest.cc new file mode 100644 index 000000000..205435adb --- /dev/null +++ b/src/common_audio/vad/vad_gmm_unittest.cc @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "gtest/gtest.h" +#include "typedefs.h" +#include "vad_unittest.h" + +extern "C" { +#include "vad_gmm.h" +} + +namespace { + +TEST_F(VadTest, vad_gmm) { + int16_t delta = 0; + // Input value at mean. + EXPECT_EQ(1048576, WebRtcVad_GaussianProbability(0, 0, 128, &delta)); + EXPECT_EQ(0, delta); + EXPECT_EQ(1048576, WebRtcVad_GaussianProbability(16, 128, 128, &delta)); + EXPECT_EQ(0, delta); + EXPECT_EQ(1048576, WebRtcVad_GaussianProbability(-16, -128, 128, &delta)); + EXPECT_EQ(0, delta); + + // Largest possible input to give non-zero probability. + EXPECT_EQ(1024, WebRtcVad_GaussianProbability(59, 0, 128, &delta)); + EXPECT_EQ(7552, delta); + EXPECT_EQ(1024, WebRtcVad_GaussianProbability(75, 128, 128, &delta)); + EXPECT_EQ(7552, delta); + EXPECT_EQ(1024, WebRtcVad_GaussianProbability(-75, -128, 128, &delta)); + EXPECT_EQ(-7552, delta); + + // Too large input, should give zero probability. + EXPECT_EQ(0, WebRtcVad_GaussianProbability(105, 0, 128, &delta)); + EXPECT_EQ(13440, delta); +} +} // namespace diff --git a/src/common_audio/vad/vad_sp.h b/src/common_audio/vad/vad_sp.h index 95c3b4c89..9e8b204e0 100644 --- a/src/common_audio/vad/vad_sp.h +++ b/src/common_audio/vad/vad_sp.h @@ -37,6 +37,8 @@ void WebRtcVad_Downsampling(int16_t* signal_in, // Updates and returns the smoothed feature minimum. As minimum we use the // median of the five smallest feature values in a 100 frames long window. +// As long as |handle->frame_counter| is zero, that is, we haven't received any +// "valid" data, FindMinimum() outputs the default value of 1600. // // Inputs: // - feature_value : New feature value to update with. diff --git a/src/common_audio/vad/vad_sp_unittest.cc b/src/common_audio/vad/vad_sp_unittest.cc new file mode 100644 index 000000000..4520712ea --- /dev/null +++ b/src/common_audio/vad/vad_sp_unittest.cc @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "gtest/gtest.h" +#include "typedefs.h" +#include "vad_unittest.h" + +extern "C" { +#include "vad_core.h" +#include "vad_defines.h" +#include "vad_sp.h" +} + +namespace { + +TEST_F(VadTest, vad_sp) { + VadInstT* self = (VadInstT*) malloc(sizeof(VadInstT)); + int16_t zeros[kMaxFrameLength] = { 0 }; + int32_t state[2] = { 0 }; + int16_t data_in[kMaxFrameLength]; + int16_t data_out[kMaxFrameLength]; + + // We expect the first value to be 1600 as long as |frame_counter| is zero, + // which is true for the first iteration. + static const int16_t kReferenceMin[32] = { + 1600, 720, 509, 512, 532, 552, 570, 588, + 606, 624, 642, 659, 675, 691, 707, 723, + 1600, 544, 502, 522, 542, 561, 579, 597, + 615, 633, 651, 667, 683, 699, 715, 731 + }; + + // Construct a speech signal that will trigger the VAD in all modes. It is + // known that (i * i) will wrap around, but that doesn't matter in this case. + for (int16_t i = 0; i < kMaxFrameLength; ++i) { + data_in[i] = (i * i); + } + // Input values all zeros, expect all zeros out. + WebRtcVad_Downsampling(zeros, data_out, state, (int) kMaxFrameLength); + EXPECT_EQ(0, state[0]); + EXPECT_EQ(0, state[1]); + for (int16_t i = 0; i < kMaxFrameLength / 2; ++i) { + EXPECT_EQ(0, data_out[i]); + } + // Make a simple non-zero data test. + WebRtcVad_Downsampling(data_in, data_out, state, (int) kMaxFrameLength); + EXPECT_EQ(207, state[0]); + EXPECT_EQ(2270, state[1]); + + ASSERT_EQ(0, WebRtcVad_InitCore(self, 0)); + // TODO(bjornv): Replace this part of the test with taking values from an + // array and calculate the reference value here. Make sure the values are not + // ordered. + for (int16_t i = 0; i < 16; ++i) { + int16_t value = 500 * (i + 1); + for (int j = 0; j < NUM_CHANNELS; ++j) { + // Use values both above and below initialized value. + EXPECT_EQ(kReferenceMin[i], WebRtcVad_FindMinimum(self, value, j)); + EXPECT_EQ(kReferenceMin[i + 16], WebRtcVad_FindMinimum(self, 12000, j)); + } + self->frame_counter++; + } + + free(self); +} +} // namespace diff --git a/src/common_audio/vad/vad_unittest.cc b/src/common_audio/vad/vad_unittest.cc index 54a397a30..5d86c1078 100644 --- a/src/common_audio/vad/vad_unittest.cc +++ b/src/common_audio/vad/vad_unittest.cc @@ -8,36 +8,22 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include // size_t +#include "vad_unittest.h" + #include #include "gtest/gtest.h" #include "typedefs.h" #include "webrtc_vad.h" -// TODO(bjornv): Move the internal unit tests to separate files. -extern "C" { -#include "vad_core.h" -#include "vad_gmm.h" -#include "vad_sp.h" -} +VadTest::VadTest() {} -namespace webrtc { -namespace { -const int16_t kModes[] = { 0, 1, 2, 3 }; -const size_t kModesSize = sizeof(kModes) / sizeof(*kModes); +void VadTest::SetUp() {} -// Rates we support. -const int16_t kRates[] = { 8000, 12000, 16000, 24000, 32000 }; -const size_t kRatesSize = sizeof(kRates) / sizeof(*kRates); -// Frame lengths we support. -const int16_t kMaxFrameLength = 960; -const int16_t kFrameLengths[] = { 80, 120, 160, 240, 320, 480, 640, - kMaxFrameLength }; -const size_t kFrameLengthsSize = sizeof(kFrameLengths) / sizeof(*kFrameLengths); +void VadTest::TearDown() {} // Returns true if the rate and frame length combination is valid. -bool ValidRatesAndFrameLengths(int16_t rate, int16_t frame_length) { +bool VadTest::ValidRatesAndFrameLengths(int16_t rate, int16_t frame_length) { if (rate == 8000) { if (frame_length == 80 || frame_length == 160 || frame_length == 240) { return true; @@ -59,21 +45,7 @@ bool ValidRatesAndFrameLengths(int16_t rate, int16_t frame_length) { return false; } -class VadTest : public ::testing::Test { - protected: - VadTest(); - virtual void SetUp(); - virtual void TearDown(); -}; - -VadTest::VadTest() { -} - -void VadTest::SetUp() { -} - -void VadTest::TearDown() { -} +namespace { TEST_F(VadTest, ApiTest) { // This API test runs through the APIs for all possible valid and invalid @@ -160,75 +132,6 @@ TEST_F(VadTest, ApiTest) { EXPECT_EQ(0, WebRtcVad_Free(handle)); } -TEST_F(VadTest, GMMTests) { - int16_t delta = 0; - // Input value at mean. - EXPECT_EQ(1048576, WebRtcVad_GaussianProbability(0, 0, 128, &delta)); - EXPECT_EQ(0, delta); - EXPECT_EQ(1048576, WebRtcVad_GaussianProbability(16, 128, 128, &delta)); - EXPECT_EQ(0, delta); - EXPECT_EQ(1048576, WebRtcVad_GaussianProbability(-16, -128, 128, &delta)); - EXPECT_EQ(0, delta); - - // Largest possible input to give non-zero probability. - EXPECT_EQ(1024, WebRtcVad_GaussianProbability(59, 0, 128, &delta)); - EXPECT_EQ(7552, delta); - EXPECT_EQ(1024, WebRtcVad_GaussianProbability(75, 128, 128, &delta)); - EXPECT_EQ(7552, delta); - EXPECT_EQ(1024, WebRtcVad_GaussianProbability(-75, -128, 128, &delta)); - EXPECT_EQ(-7552, delta); - - // Too large input, should give zero probability. - EXPECT_EQ(0, WebRtcVad_GaussianProbability(105, 0, 128, &delta)); - EXPECT_EQ(13440, delta); -} - -TEST_F(VadTest, SPTests) { - VadInstT* handle = (VadInstT*) malloc(sizeof(VadInstT)); - int16_t zeros[kMaxFrameLength] = { 0 }; - int32_t state[2] = { 0 }; - int16_t data_in[kMaxFrameLength]; - int16_t data_out[kMaxFrameLength]; - - const int16_t kReferenceMin[32] = { - 1600, 720, 509, 512, 532, 552, 570, 588, - 606, 624, 642, 659, 675, 691, 707, 723, - 1600, 544, 502, 522, 542, 561, 579, 597, - 615, 633, 651, 667, 683, 699, 715, 731 - }; - - // Construct a speech signal that will trigger the VAD in all modes. It is - // known that (i * i) will wrap around, but that doesn't matter in this case. - for (int16_t i = 0; i < kMaxFrameLength; ++i) { - data_in[i] = (i * i); - } - // Input values all zeros, expect all zeros out. - WebRtcVad_Downsampling(zeros, data_out, state, (int) kMaxFrameLength); - EXPECT_EQ(0, state[0]); - EXPECT_EQ(0, state[1]); - for (int16_t i = 0; i < kMaxFrameLength / 2; ++i) { - EXPECT_EQ(0, data_out[i]); - } - // Make a simple non-zero data test. - WebRtcVad_Downsampling(data_in, data_out, state, (int) kMaxFrameLength); - EXPECT_EQ(207, state[0]); - EXPECT_EQ(2270, state[1]); - - ASSERT_EQ(0, WebRtcVad_InitCore(handle, 0)); - for (int16_t i = 0; i < 16; ++i) { - int16_t value = 500 * (i + 1); - for (int j = 0; j < NUM_CHANNELS; ++j) { - // Use values both above and below initialized value. - EXPECT_EQ(kReferenceMin[i], WebRtcVad_FindMinimum(handle, value, j)); - EXPECT_EQ(kReferenceMin[i + 16], WebRtcVad_FindMinimum(handle, 12000, j)); - } - handle->frame_counter++; - } - - free(handle); -} - // TODO(bjornv): Add a process test, run on file. } // namespace -} // namespace webrtc diff --git a/src/common_audio/vad/vad_unittest.h b/src/common_audio/vad/vad_unittest.h new file mode 100644 index 000000000..87852e84a --- /dev/null +++ b/src/common_audio/vad/vad_unittest.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMONT_AUDIO_VAD_VAD_UNIT_TESTS_H +#define WEBRTC_COMMONT_AUDIO_VAD_VAD_UNIT_TESTS_H + +#include // size_t + +#include "gtest/gtest.h" +#include "typedefs.h" + +namespace { + +// Modes we support +const int16_t kModes[] = { 0, 1, 2, 3 }; +const size_t kModesSize = sizeof(kModes) / sizeof(*kModes); + +// Rates we support. +const int16_t kRates[] = { 8000, 12000, 16000, 24000, 32000 }; +const size_t kRatesSize = sizeof(kRates) / sizeof(*kRates); + +// Frame lengths we support. +const int16_t kMaxFrameLength = 960; +const int16_t kFrameLengths[] = { 80, 120, 160, 240, 320, 480, 640, + kMaxFrameLength }; +const size_t kFrameLengthsSize = sizeof(kFrameLengths) / sizeof(*kFrameLengths); + +} // namespace + +class VadTest : public ::testing::Test { + protected: + VadTest(); + virtual void SetUp(); + virtual void TearDown(); + + // Returns true if the rate and frame length combination is valid. + bool ValidRatesAndFrameLengths(int16_t rate, int16_t frame_length); +}; + +#endif // WEBRTC_COMMONT_AUDIO_VAD_VAD_UNIT_TESTS_H