diff --git a/webrtc/common_audio/vad/include/vad.h b/webrtc/common_audio/vad/include/vad.h index f1d12123f..1944f9dc5 100644 --- a/webrtc/common_audio/vad/include/vad.h +++ b/webrtc/common_audio/vad/include/vad.h @@ -33,9 +33,9 @@ class Vad { virtual ~Vad(); - enum Activity VoiceActivity(const int16_t* audio, - size_t num_samples, - int sample_rate_hz); + virtual Activity VoiceActivity(const int16_t* audio, + size_t num_samples, + int sample_rate_hz); private: VadInst* handle_; diff --git a/webrtc/common_audio/vad/mock/mock_vad.h b/webrtc/common_audio/vad/mock/mock_vad.h index f1d8c226b..7a7de0fa7 100644 --- a/webrtc/common_audio/vad/mock/mock_vad.h +++ b/webrtc/common_audio/vad/mock/mock_vad.h @@ -19,7 +19,7 @@ namespace webrtc { class MockVad : public Vad { public: - explicit MockVad(enum Aggressiveness mode) {} + explicit MockVad(enum Aggressiveness mode) : Vad(mode) {} virtual ~MockVad() { Die(); } MOCK_METHOD0(Die, void()); diff --git a/webrtc/modules/audio_coding/BUILD.gn b/webrtc/modules/audio_coding/BUILD.gn index fb6120f89..7207c8bcd 100644 --- a/webrtc/modules/audio_coding/BUILD.gn +++ b/webrtc/modules/audio_coding/BUILD.gn @@ -131,8 +131,10 @@ config("cng_config") { source_set("cng") { sources = [ + "codecs/cng/audio_encoder_cng.cc", "codecs/cng/cng_helpfuns.c", "codecs/cng/cng_helpfuns.h", + "codecs/cng/include/audio_encoder_cng.h", "codecs/cng/include/webrtc_cng.h", "codecs/cng/webrtc_cng.c", ] diff --git a/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc b/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc new file mode 100644 index 000000000..864dc8aaf --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h" + +#include + +namespace webrtc { + +AudioEncoderCng::Config::Config() + : sample_rate_hz(8000), + num_channels(1), + payload_type(13), + speech_encoder(NULL), + vad_mode(Vad::kVadNormal), + sid_frame_interval_ms(100), + num_cng_coefficients(8), + vad(NULL) { +} + +bool AudioEncoderCng::Config::IsOk() const { + if (sample_rate_hz != 8000 && sample_rate_hz != 16000) + return false; + if (num_channels != 1) + return false; + if (!speech_encoder) + return false; + if (num_channels != speech_encoder->num_channels()) + return false; + if (sid_frame_interval_ms < speech_encoder->Max10MsFramesInAPacket() * 10) + return false; + if (num_cng_coefficients > WEBRTC_CNG_MAX_LPC_ORDER || + num_cng_coefficients <= 0) + return false; + return true; +} + +AudioEncoderCng::AudioEncoderCng(const Config& config) + : speech_encoder_(config.speech_encoder), + sample_rate_hz_(config.sample_rate_hz), + num_channels_(config.num_channels), + cng_payload_type_(config.payload_type), + num_cng_coefficients_(config.num_cng_coefficients), + first_timestamp_in_buffer_(0), + frames_in_buffer_(0), + last_frame_active_(true), + vad_(new Vad(config.vad_mode)) { + if (config.vad) { + // Replace default Vad object with user-provided one. + vad_.reset(config.vad); + } + CHECK(config.IsOk()) << "Invalid configuration."; + CNG_enc_inst* cng_inst; + CHECK_EQ(WebRtcCng_CreateEnc(&cng_inst), 0) << "WebRtcCng_CreateEnc failed."; + cng_inst_.reset(cng_inst); // Transfer ownership to scoped_ptr. + CHECK_EQ(WebRtcCng_InitEnc(cng_inst_.get(), sample_rate_hz_, + config.sid_frame_interval_ms, + config.num_cng_coefficients), + 0) + << "WebRtcCng_InitEnc failed"; +} + +AudioEncoderCng::~AudioEncoderCng() { +} + +int AudioEncoderCng::sample_rate_hz() const { + return sample_rate_hz_; +} + +int AudioEncoderCng::num_channels() const { + return num_channels_; +} + +int AudioEncoderCng::Num10MsFramesInNextPacket() const { + return speech_encoder_->Num10MsFramesInNextPacket(); +} + +int AudioEncoderCng::Max10MsFramesInAPacket() const { + return speech_encoder_->Max10MsFramesInAPacket(); +} + +bool AudioEncoderCng::EncodeInternal(uint32_t timestamp, + const int16_t* audio, + size_t max_encoded_bytes, + uint8_t* encoded, + size_t* encoded_bytes, + EncodedInfo* info) { + DCHECK_GE(max_encoded_bytes, static_cast(num_cng_coefficients_ + 1)); + if (max_encoded_bytes < static_cast(num_cng_coefficients_ + 1)) { + return false; + } + *encoded_bytes = 0; + const int num_samples = sample_rate_hz() / 100 * num_channels(); + if (speech_buffer_.empty()) { + CHECK_EQ(frames_in_buffer_, 0); + first_timestamp_in_buffer_ = timestamp; + } + for (int i = 0; i < num_samples; ++i) { + speech_buffer_.push_back(audio[i]); + } + ++frames_in_buffer_; + if (frames_in_buffer_ < speech_encoder_->Num10MsFramesInNextPacket()) { + return true; + } + CHECK_LE(frames_in_buffer_, 6) + << "Frame size cannot be larger than 60 ms when using VAD/CNG."; + const size_t samples_per_10ms_frame = 10 * sample_rate_hz_ / 1000; + CHECK_EQ(speech_buffer_.size(), + static_cast(frames_in_buffer_) * samples_per_10ms_frame); + + // Group several 10 ms blocks per VAD call. Call VAD once or twice using the + // following split sizes: + // 10 ms = 10 + 0 ms; 20 ms = 20 + 0 ms; 30 ms = 30 + 0 ms; + // 40 ms = 20 + 20 ms; 50 ms = 30 + 20 ms; 60 ms = 30 + 30 ms. + int blocks_in_first_vad_call = + (frames_in_buffer_ > 3 ? 3 : frames_in_buffer_); + if (frames_in_buffer_ == 4) + blocks_in_first_vad_call = 2; + const int blocks_in_second_vad_call = + frames_in_buffer_ - blocks_in_first_vad_call; + CHECK_GE(blocks_in_second_vad_call, 0); + + // Check if all of the buffer is passive speech. Start with checking the first + // block. + Vad::Activity activity = vad_->VoiceActivity( + &speech_buffer_[0], samples_per_10ms_frame * blocks_in_first_vad_call, + sample_rate_hz_); + if (activity == Vad::kPassive && blocks_in_second_vad_call > 0) { + // Only check the second block if the first was passive. + activity = vad_->VoiceActivity( + &speech_buffer_[samples_per_10ms_frame * blocks_in_first_vad_call], + samples_per_10ms_frame * blocks_in_second_vad_call, sample_rate_hz_); + } + DCHECK_NE(activity, Vad::kError); + + bool return_val = true; + switch (activity) { + case Vad::kPassive: { + return_val = EncodePassive(encoded, encoded_bytes); + info->encoded_timestamp = first_timestamp_in_buffer_; + info->payload_type = cng_payload_type_; + last_frame_active_ = false; + break; + } + case Vad::kActive: { + return_val = + EncodeActive(max_encoded_bytes, encoded, encoded_bytes, info); + last_frame_active_ = true; + break; + } + case Vad::kError: { + return_val = false; + break; + } + } + + speech_buffer_.clear(); + frames_in_buffer_ = 0; + return return_val; +} + +bool AudioEncoderCng::EncodePassive(uint8_t* encoded, size_t* encoded_bytes) { + bool force_sid = last_frame_active_; + bool output_produced = false; + const size_t samples_per_10ms_frame = 10 * sample_rate_hz_ / 1000; + for (int i = 0; i < frames_in_buffer_; ++i) { + int16_t encoded_bytes_tmp = 0; + if (WebRtcCng_Encode(cng_inst_.get(), + &speech_buffer_[i * samples_per_10ms_frame], + static_cast(samples_per_10ms_frame), encoded, + &encoded_bytes_tmp, force_sid) < 0) + return false; + if (encoded_bytes_tmp > 0) { + CHECK(!output_produced); + *encoded_bytes = static_cast(encoded_bytes_tmp); + output_produced = true; + force_sid = false; + } + CHECK(!force_sid) << "SID frame not produced despite being forced."; + } + return true; +} + +bool AudioEncoderCng::EncodeActive(size_t max_encoded_bytes, + uint8_t* encoded, + size_t* encoded_bytes, + EncodedInfo* info) { + const size_t samples_per_10ms_frame = 10 * sample_rate_hz_ / 1000; + for (int i = 0; i < frames_in_buffer_; ++i) { + if (!speech_encoder_->Encode(first_timestamp_in_buffer_, + &speech_buffer_[i * samples_per_10ms_frame], + samples_per_10ms_frame, max_encoded_bytes, + encoded, encoded_bytes, info)) + return false; + if (i < frames_in_buffer_ - 1) { + CHECK_EQ(*encoded_bytes, 0u) << "Encoder delivered data too early."; + } + } + return true; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc b/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc new file mode 100644 index 000000000..6262fd20c --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc @@ -0,0 +1,465 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/common_audio/vad/mock/mock_vad.h" +#include "webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h" +#include "webrtc/modules/audio_coding/codecs/mock/mock_audio_encoder.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +using ::testing::Return; +using ::testing::_; +using ::testing::SetArgPointee; +using ::testing::InSequence; + +namespace webrtc { + +namespace { +static const size_t kMaxEncodedBytes = 1000; +static const size_t kMaxNumSamples = 48 * 10 * 2; // 10 ms @ 48 kHz stereo. +static const size_t kMockReturnEncodedBytes = 17; +static const int kCngPayloadType = 18; +} + +class AudioEncoderCngTest : public ::testing::Test { + protected: + AudioEncoderCngTest() + : mock_vad_(new MockVad(Vad::kVadNormal)), + timestamp_(4711), + num_audio_samples_10ms_(0), + encoded_bytes_(0) { + memset(encoded_, 0, kMaxEncodedBytes); + memset(audio_, 0, kMaxNumSamples * 2); + config_.speech_encoder = &mock_encoder_; + EXPECT_CALL(mock_encoder_, num_channels()).WillRepeatedly(Return(1)); + // Let the AudioEncoderCng object use a MockVad instead of its internally + // created Vad object. + config_.vad = mock_vad_; + config_.payload_type = kCngPayloadType; + } + + virtual void TearDown() OVERRIDE { + EXPECT_CALL(*mock_vad_, Die()).Times(1); + cng_.reset(); + // Don't expect the cng_ object to delete the AudioEncoder object. But it + // will be deleted with the test fixture. This is why we explicitly delete + // the cng_ object above, and set expectations on mock_encoder_ afterwards. + EXPECT_CALL(mock_encoder_, Die()).Times(1); + } + + void CreateCng() { + // The config_ parameters may be changed by the TEST_Fs up until CreateCng() + // is called, thus we cannot use the values until now. + num_audio_samples_10ms_ = 10 * config_.sample_rate_hz / 1000; + ASSERT_LE(num_audio_samples_10ms_, kMaxNumSamples); + EXPECT_CALL(mock_encoder_, sample_rate_hz()) + .WillRepeatedly(Return(config_.sample_rate_hz)); + // Max10MsFramesInAPacket() is just used to verify that the SID frame period + // is not too small. The return value does not matter that much, as long as + // it is smaller than 10. + EXPECT_CALL(mock_encoder_, Max10MsFramesInAPacket()).WillOnce(Return(1)); + cng_.reset(new AudioEncoderCng(config_)); + } + + void Encode() { + ASSERT_TRUE(cng_) << "Must call CreateCng() first."; + memset(&encoded_info_, 0, sizeof(encoded_info_)); + encoded_bytes_ = 0; + ASSERT_TRUE(cng_->Encode(timestamp_, audio_, num_audio_samples_10ms_, + kMaxEncodedBytes, encoded_, &encoded_bytes_, + &encoded_info_)); + timestamp_ += num_audio_samples_10ms_; + } + + // Verifies that the cng_ object waits until it has collected + // |blocks_per_frame| blocks of audio, and then dispatches all of them to + // the underlying codec (speech or cng). + void CheckBlockGrouping(int blocks_per_frame, bool active_speech) { + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(blocks_per_frame)); + CreateCng(); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillRepeatedly(Return(active_speech ? Vad::kActive : Vad::kPassive)); + + // Don't expect any calls to the encoder yet. + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)).Times(0); + for (int i = 0; i < blocks_per_frame - 1; ++i) { + Encode(); + EXPECT_EQ(0u, encoded_bytes_); + } + if (active_speech) { + // Now expect |blocks_per_frame| calls to the encoder in sequence. + // Let the speech codec mock return true and set the number of encoded + // bytes to |kMockReturnEncodedBytes|. + InSequence s; + for (int j = 0; j < blocks_per_frame - 1; ++j) { + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)) + .WillOnce(DoAll(SetArgPointee<4>(0), Return(true))); + } + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)) + .WillOnce( + DoAll(SetArgPointee<4>(kMockReturnEncodedBytes), Return(true))); + } + Encode(); + if (active_speech) { + EXPECT_EQ(kMockReturnEncodedBytes, encoded_bytes_); + } else { + EXPECT_EQ(static_cast(config_.num_cng_coefficients + 1), + encoded_bytes_); + } + } + + // Verifies that the audio is partitioned into larger blocks before calling + // the VAD. + void CheckVadInputSize(int input_frame_size_ms, + int expected_first_block_size_ms, + int expected_second_block_size_ms) { + const int blocks_per_frame = input_frame_size_ms / 10; + + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(blocks_per_frame)); + + // Expect nothing to happen before the last block is sent to cng_. + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)).Times(0); + for (int i = 0; i < blocks_per_frame - 1; ++i) { + Encode(); + } + + // Let the VAD decision be passive, since an active decision may lead to + // early termination of the decision loop. + const int sample_rate_hz = config_.sample_rate_hz; + InSequence s; + EXPECT_CALL( + *mock_vad_, + VoiceActivity(_, expected_first_block_size_ms * sample_rate_hz / 1000, + sample_rate_hz)).WillOnce(Return(Vad::kPassive)); + if (expected_second_block_size_ms > 0) { + EXPECT_CALL(*mock_vad_, + VoiceActivity( + _, expected_second_block_size_ms * sample_rate_hz / 1000, + sample_rate_hz)).WillOnce(Return(Vad::kPassive)); + } + + // With this call to Encode(), |mock_vad_| should be called according to the + // above expectations. + Encode(); + } + + // Tests a frame with both active and passive speech. Returns true if the + // decision was active speech, false if it was passive. + bool CheckMixedActivePassive(Vad::Activity first_type, + Vad::Activity second_type) { + // Set the speech encoder frame size to 60 ms, to ensure that the VAD will + // be called twice. + const int blocks_per_frame = 6; + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(blocks_per_frame)); + InSequence s; + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(first_type)); + if (first_type == Vad::kPassive) { + // Expect a second call to the VAD only if the first frame was passive. + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(second_type)); + } + encoded_info_.payload_type = 0; + for (int i = 0; i < blocks_per_frame; ++i) { + Encode(); + } + return encoded_info_.payload_type != kCngPayloadType; + } + + AudioEncoderCng::Config config_; + scoped_ptr cng_; + MockAudioEncoder mock_encoder_; + MockVad* mock_vad_; // Ownership is transferred to |cng_|. + uint32_t timestamp_; + int16_t audio_[kMaxNumSamples]; + size_t num_audio_samples_10ms_; + uint8_t encoded_[kMaxEncodedBytes]; + size_t encoded_bytes_; + AudioEncoder::EncodedInfo encoded_info_; +}; + +TEST_F(AudioEncoderCngTest, CreateAndDestroy) { + CreateCng(); +} + +TEST_F(AudioEncoderCngTest, CheckFrameSizePropagation) { + CreateCng(); + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()).WillOnce(Return(17)); + EXPECT_EQ(17, cng_->Num10MsFramesInNextPacket()); +} + +TEST_F(AudioEncoderCngTest, EncodeCallsVad) { + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(1)); + CreateCng(); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kPassive)); + Encode(); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects1BlockPassiveSpeech) { + CheckBlockGrouping(1, false); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects2BlocksPassiveSpeech) { + CheckBlockGrouping(2, false); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects3BlocksPassiveSpeech) { + CheckBlockGrouping(3, false); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects1BlockActiveSpeech) { + CheckBlockGrouping(1, true); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects2BlocksActiveSpeech) { + CheckBlockGrouping(2, true); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects3BlocksActiveSpeech) { + CheckBlockGrouping(3, true); +} + +TEST_F(AudioEncoderCngTest, EncodePassive) { + const int kBlocksPerFrame = 3; + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(kBlocksPerFrame)); + CreateCng(); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillRepeatedly(Return(Vad::kPassive)); + // Expect no calls at all to the speech encoder mock. + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)).Times(0); + uint32_t expected_timestamp = timestamp_; + for (int i = 0; i < 100; ++i) { + Encode(); + // Check if it was time to call the cng encoder. This is done once every + // |kBlocksPerFrame| calls. + if ((i + 1) % kBlocksPerFrame == 0) { + // Now check if a SID interval has elapsed. + if ((i % (config_.sid_frame_interval_ms / 10)) < kBlocksPerFrame) { + // If so, verify that we got a CNG encoding. + EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); + EXPECT_EQ(static_cast(config_.num_cng_coefficients) + 1, + encoded_bytes_); + EXPECT_EQ(expected_timestamp, encoded_info_.encoded_timestamp); + } + expected_timestamp += kBlocksPerFrame * num_audio_samples_10ms_; + } else { + // Otherwise, expect no output. + EXPECT_EQ(0u, encoded_bytes_); + } + } +} + +// Verifies that the correct action is taken for frames with both active and +// passive speech. +TEST_F(AudioEncoderCngTest, MixedActivePassive) { + CreateCng(); + + // All of the frame is active speech. + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)) + .Times(6) + .WillRepeatedly(Return(true)); + EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kActive)); + + // First half of the frame is active speech. + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)) + .Times(6) + .WillRepeatedly(Return(true)); + EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kPassive)); + + // Second half of the frame is active speech. + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)) + .Times(6) + .WillRepeatedly(Return(true)); + EXPECT_TRUE(CheckMixedActivePassive(Vad::kPassive, Vad::kActive)); + + // All of the frame is passive speech. Expect no calls to |mock_encoder_|. + EXPECT_FALSE(CheckMixedActivePassive(Vad::kPassive, Vad::kPassive)); +} + +// These tests verify that the audio is partitioned into larger blocks before +// calling the VAD. +// The parameters for CheckVadInputSize are: +// CheckVadInputSize(frame_size, expected_first_block_size, +// expected_second_block_size); +TEST_F(AudioEncoderCngTest, VadInputSize10Ms) { + CreateCng(); + CheckVadInputSize(10, 10, 0); +} +TEST_F(AudioEncoderCngTest, VadInputSize20Ms) { + CreateCng(); + CheckVadInputSize(20, 20, 0); +} +TEST_F(AudioEncoderCngTest, VadInputSize30Ms) { + CreateCng(); + CheckVadInputSize(30, 30, 0); +} +TEST_F(AudioEncoderCngTest, VadInputSize40Ms) { + CreateCng(); + CheckVadInputSize(40, 20, 20); +} +TEST_F(AudioEncoderCngTest, VadInputSize50Ms) { + CreateCng(); + CheckVadInputSize(50, 30, 20); +} +TEST_F(AudioEncoderCngTest, VadInputSize60Ms) { + CreateCng(); + CheckVadInputSize(60, 30, 30); +} + +// Verifies that the EncodedInfo struct pointer passed to +// AudioEncoderCng::Encode is propagated to the Encode call to the underlying +// speech encoder. +TEST_F(AudioEncoderCngTest, VerifyEncoderInfoPropagation) { + CreateCng(); + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, &encoded_info_)) + .WillOnce(Return(true)); + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()).WillOnce(Return(1)); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kActive)); + Encode(); +} + +// Verifies that the correct payload type is set when CNG is encoded. +TEST_F(AudioEncoderCngTest, VerifyCngPayloadType) { + CreateCng(); + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)).Times(0); + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()).WillOnce(Return(1)); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kPassive)); + encoded_info_.payload_type = 0; + Encode(); + EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); +} + +// Verifies that a SID frame is encoded immediately as the signal changes from +// active speech to passive. +TEST_F(AudioEncoderCngTest, VerifySidFrameAfterSpeech) { + CreateCng(); + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(1)); + // Start with encoding noise. + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .Times(2) + .WillRepeatedly(Return(Vad::kPassive)); + Encode(); + EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); + EXPECT_EQ(static_cast(config_.num_cng_coefficients) + 1, + encoded_bytes_); + // Encode again, and make sure we got no frame at all (since the SID frame + // period is 100 ms by default). + Encode(); + EXPECT_EQ(0u, encoded_bytes_); + + // Now encode active speech. + encoded_info_.payload_type = 0; + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kActive)); + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)) + .WillOnce(DoAll(SetArgPointee<4>(kMockReturnEncodedBytes), Return(true))); + Encode(); + EXPECT_EQ(kMockReturnEncodedBytes, encoded_bytes_); + + // Go back to noise again, and verify that a SID frame is emitted. + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kPassive)); + Encode(); + EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); + EXPECT_EQ(static_cast(config_.num_cng_coefficients) + 1, + encoded_bytes_); +} + +#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// This test fixture tests various error conditions that makes the +// AudioEncoderCng die via CHECKs. +class AudioEncoderCngDeathTest : public AudioEncoderCngTest { + protected: + AudioEncoderCngDeathTest() : AudioEncoderCngTest() { + // Don't provide a Vad mock object, since it will leak when the test dies. + config_.vad = NULL; + EXPECT_CALL(*mock_vad_, Die()).Times(1); + delete mock_vad_; + mock_vad_ = NULL; + } + + // Override AudioEncoderCngTest::TearDown, since that one expects a call to + // the destructor of |mock_vad_|. In this case, that object is already + // deleted. + virtual void TearDown() OVERRIDE { + cng_.reset(); + // Don't expect the cng_ object to delete the AudioEncoder object. But it + // will be deleted with the test fixture. This is why we explicitly delete + // the cng_ object above, and set expectations on mock_encoder_ afterwards. + EXPECT_CALL(mock_encoder_, Die()).Times(1); + } +}; + +TEST_F(AudioEncoderCngDeathTest, WrongFrameSize) { + CreateCng(); + num_audio_samples_10ms_ *= 2; // 20 ms frame. + EXPECT_DEATH(Encode(), ""); + num_audio_samples_10ms_ = 0; // Zero samples. + EXPECT_DEATH(Encode(), ""); +} + +TEST_F(AudioEncoderCngDeathTest, WrongSampleRates) { + config_.sample_rate_hz = 32000; + EXPECT_DEATH(CreateCng(), "Invalid configuration"); + config_.sample_rate_hz = 48000; + EXPECT_DEATH(CreateCng(), "Invalid configuration"); + config_.sample_rate_hz = 0; + EXPECT_DEATH(CreateCng(), "Invalid configuration"); + config_.sample_rate_hz = -8000; + // Don't use CreateCng() here, since the built-in sanity checks will prevent + // the test from reaching the expected point-of-death. + EXPECT_DEATH(cng_.reset(new AudioEncoderCng(config_)), + "Invalid configuration"); +} + +TEST_F(AudioEncoderCngDeathTest, WrongNumCoefficients) { + config_.num_cng_coefficients = -1; + EXPECT_DEATH(CreateCng(), "Invalid configuration"); + config_.num_cng_coefficients = 0; + EXPECT_DEATH(CreateCng(), "Invalid configuration"); + config_.num_cng_coefficients = 13; + EXPECT_DEATH(CreateCng(), "Invalid configuration"); +} + +TEST_F(AudioEncoderCngDeathTest, NullSpeechEncoder) { + config_.speech_encoder = NULL; + EXPECT_DEATH(CreateCng(), "Invalid configuration"); +} + +TEST_F(AudioEncoderCngDeathTest, Stereo) { + EXPECT_CALL(mock_encoder_, num_channels()).WillRepeatedly(Return(2)); + EXPECT_DEATH(CreateCng(), "Invalid configuration"); + config_.num_channels = 2; + EXPECT_DEATH(CreateCng(), "Invalid configuration"); +} + +TEST_F(AudioEncoderCngDeathTest, EncoderFrameSizeTooLarge) { + CreateCng(); + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(7)); + for (int i = 0; i < 6; ++i) + Encode(); + EXPECT_DEATH(Encode(), + "Frame size cannot be larger than 60 ms when using VAD/CNG."); +} + +#endif // GTEST_HAS_DEATH_TEST + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/codecs/cng/cng.gypi b/webrtc/modules/audio_coding/codecs/cng/cng.gypi index 3ad7dd320..ccc4f9506 100644 --- a/webrtc/modules/audio_coding/codecs/cng/cng.gypi +++ b/webrtc/modules/audio_coding/codecs/cng/cng.gypi @@ -25,7 +25,9 @@ ], }, 'sources': [ + 'include/audio_encoder_cng.h', 'include/webrtc_cng.h', + 'audio_encoder_cng.cc', 'webrtc_cng.c', 'cng_helpfuns.c', 'cng_helpfuns.h', diff --git a/webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h b/webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h new file mode 100644 index 000000000..c74441372 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_INCLUDE_AUDIO_ENCODER_CNG_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_INCLUDE_AUDIO_ENCODER_CNG_H_ + +#include + +#include "webrtc/common_audio/vad/include/vad.h" +#include "webrtc/modules/audio_coding/codecs/audio_encoder.h" +#include "webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +class Vad; + +class AudioEncoderCng : public AudioEncoder { + public: + struct Config { + Config(); + bool IsOk() const; + + int sample_rate_hz; + int num_channels; + int payload_type; + // Caller keeps ownership of the AudioEncoder object. + AudioEncoder* speech_encoder; + Vad::Aggressiveness vad_mode; + int sid_frame_interval_ms; + int num_cng_coefficients; + // The Vad pointer is mainly for testing. If a NULL pointer is passed, the + // AudioEncoderCng creates (and destroys) a Vad object internally. If an + // object is passed, the AudioEncoderCng assumes ownership of the Vad + // object. + Vad* vad; + }; + + explicit AudioEncoderCng(const Config& config); + + virtual ~AudioEncoderCng(); + + virtual int sample_rate_hz() const OVERRIDE; + virtual int num_channels() const OVERRIDE; + virtual int Num10MsFramesInNextPacket() const OVERRIDE; + virtual int Max10MsFramesInAPacket() const OVERRIDE; + + protected: + virtual bool EncodeInternal(uint32_t timestamp, + const int16_t* audio, + size_t max_encoded_bytes, + uint8_t* encoded, + size_t* encoded_bytes, + EncodedInfo* info) OVERRIDE; + + private: + // Deleter for use with scoped_ptr. E.g., use as + // scoped_ptr cng_inst_; + struct CngInstDeleter { + inline void operator()(CNG_enc_inst* ptr) const { WebRtcCng_FreeEnc(ptr); } + }; + + bool EncodePassive(uint8_t* encoded, size_t* encoded_bytes); + + bool EncodeActive(size_t max_encoded_bytes, + uint8_t* encoded, + size_t* encoded_bytes, + EncodedInfo* info); + + AudioEncoder* speech_encoder_; + const int sample_rate_hz_; + const int num_channels_; + const int cng_payload_type_; + const int num_cng_coefficients_; + std::vector speech_buffer_; + uint32_t first_timestamp_in_buffer_; + int frames_in_buffer_; + bool last_frame_active_; + scoped_ptr vad_; + scoped_ptr cng_inst_; +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_INCLUDE_AUDIO_ENCODER_CNG_H_ diff --git a/webrtc/modules/audio_coding/codecs/mock/mock_audio_encoder.h b/webrtc/modules/audio_coding/codecs/mock/mock_audio_encoder.h new file mode 100644 index 000000000..fe2ace95b --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/mock/mock_audio_encoder.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_MOCK_MOCK_AUDIO_ENCODER_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_MOCK_MOCK_AUDIO_ENCODER_H_ + +#include "webrtc/modules/audio_coding/codecs/audio_encoder.h" + +#include "testing/gmock/include/gmock/gmock.h" + +namespace webrtc { + +class MockAudioEncoder : public AudioEncoder { + public: + virtual ~MockAudioEncoder() { Die(); } + MOCK_METHOD0(Die, void()); + MOCK_CONST_METHOD0(sample_rate_hz, int()); + MOCK_CONST_METHOD0(num_channels, int()); + MOCK_CONST_METHOD0(Num10MsFramesInNextPacket, int()); + MOCK_CONST_METHOD0(Max10MsFramesInAPacket, int()); + // Note, we explicitly chose not to create a mock for the Encode method. + MOCK_METHOD6(EncodeInternal, + bool(uint32_t timestamp, + const int16_t* audio, + size_t max_encoded_bytes, + uint8_t* encoded, + size_t* encoded_bytes, + EncodedInfo* info)); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_MOCK_MOCK_AUDIO_ENCODER_H_ diff --git a/webrtc/modules/modules.gyp b/webrtc/modules/modules.gyp index 79c84f95c..f129d359f 100644 --- a/webrtc/modules/modules.gyp +++ b/webrtc/modules/modules.gyp @@ -102,6 +102,7 @@ '<(webrtc_root)/test/test.gyp:test_support_main', ], 'sources': [ + 'audio_coding/codecs/cng/audio_encoder_cng_unittest.cc', 'audio_coding/main/acm2/acm_opus_unittest.cc', 'audio_coding/main/acm2/acm_receiver_unittest.cc', 'audio_coding/main/acm2/acm_receiver_unittest_oldapi.cc',