Make an AudioEncoder subclass for comfort noise
BUG=3926 R=bjornv@webrtc.org, kjellander@webrtc.org, kwiberg@webrtc.org Review URL: https://webrtc-codereview.appspot.com/31129004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7857 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
		| @@ -131,8 +131,10 @@ config("cng_config") { | ||||
|  | ||||
| source_set("cng") { | ||||
|   sources = [ | ||||
|     "codecs/cng/audio_encoder_cng.cc", | ||||
|     "codecs/cng/cng_helpfuns.c", | ||||
|     "codecs/cng/cng_helpfuns.h", | ||||
|     "codecs/cng/include/audio_encoder_cng.h", | ||||
|     "codecs/cng/include/webrtc_cng.h", | ||||
|     "codecs/cng/webrtc_cng.c", | ||||
|   ] | ||||
|   | ||||
							
								
								
									
										209
									
								
								webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										209
									
								
								webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,209 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #include "webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h" | ||||
|  | ||||
| #include <limits> | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| AudioEncoderCng::Config::Config() | ||||
|     : sample_rate_hz(8000), | ||||
|       num_channels(1), | ||||
|       payload_type(13), | ||||
|       speech_encoder(NULL), | ||||
|       vad_mode(Vad::kVadNormal), | ||||
|       sid_frame_interval_ms(100), | ||||
|       num_cng_coefficients(8), | ||||
|       vad(NULL) { | ||||
| } | ||||
|  | ||||
| bool AudioEncoderCng::Config::IsOk() const { | ||||
|   if (sample_rate_hz != 8000 && sample_rate_hz != 16000) | ||||
|     return false; | ||||
|   if (num_channels != 1) | ||||
|     return false; | ||||
|   if (!speech_encoder) | ||||
|     return false; | ||||
|   if (num_channels != speech_encoder->num_channels()) | ||||
|     return false; | ||||
|   if (sid_frame_interval_ms < speech_encoder->Max10MsFramesInAPacket() * 10) | ||||
|     return false; | ||||
|   if (num_cng_coefficients > WEBRTC_CNG_MAX_LPC_ORDER || | ||||
|       num_cng_coefficients <= 0) | ||||
|     return false; | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| AudioEncoderCng::AudioEncoderCng(const Config& config) | ||||
|     : speech_encoder_(config.speech_encoder), | ||||
|       sample_rate_hz_(config.sample_rate_hz), | ||||
|       num_channels_(config.num_channels), | ||||
|       cng_payload_type_(config.payload_type), | ||||
|       num_cng_coefficients_(config.num_cng_coefficients), | ||||
|       first_timestamp_in_buffer_(0), | ||||
|       frames_in_buffer_(0), | ||||
|       last_frame_active_(true), | ||||
|       vad_(new Vad(config.vad_mode)) { | ||||
|   if (config.vad) { | ||||
|     // Replace default Vad object with user-provided one. | ||||
|     vad_.reset(config.vad); | ||||
|   } | ||||
|   CHECK(config.IsOk()) << "Invalid configuration."; | ||||
|   CNG_enc_inst* cng_inst; | ||||
|   CHECK_EQ(WebRtcCng_CreateEnc(&cng_inst), 0) << "WebRtcCng_CreateEnc failed."; | ||||
|   cng_inst_.reset(cng_inst);  // Transfer ownership to scoped_ptr. | ||||
|   CHECK_EQ(WebRtcCng_InitEnc(cng_inst_.get(), sample_rate_hz_, | ||||
|                              config.sid_frame_interval_ms, | ||||
|                              config.num_cng_coefficients), | ||||
|            0) | ||||
|       << "WebRtcCng_InitEnc failed"; | ||||
| } | ||||
|  | ||||
| AudioEncoderCng::~AudioEncoderCng() { | ||||
| } | ||||
|  | ||||
| int AudioEncoderCng::sample_rate_hz() const { | ||||
|   return sample_rate_hz_; | ||||
| } | ||||
|  | ||||
| int AudioEncoderCng::num_channels() const { | ||||
|   return num_channels_; | ||||
| } | ||||
|  | ||||
| int AudioEncoderCng::Num10MsFramesInNextPacket() const { | ||||
|   return speech_encoder_->Num10MsFramesInNextPacket(); | ||||
| } | ||||
|  | ||||
| int AudioEncoderCng::Max10MsFramesInAPacket() const { | ||||
|   return speech_encoder_->Max10MsFramesInAPacket(); | ||||
| } | ||||
|  | ||||
| bool AudioEncoderCng::EncodeInternal(uint32_t timestamp, | ||||
|                                      const int16_t* audio, | ||||
|                                      size_t max_encoded_bytes, | ||||
|                                      uint8_t* encoded, | ||||
|                                      size_t* encoded_bytes, | ||||
|                                      EncodedInfo* info) { | ||||
|   DCHECK_GE(max_encoded_bytes, static_cast<size_t>(num_cng_coefficients_ + 1)); | ||||
|   if (max_encoded_bytes < static_cast<size_t>(num_cng_coefficients_ + 1)) { | ||||
|     return false; | ||||
|   } | ||||
|   *encoded_bytes = 0; | ||||
|   const int num_samples = sample_rate_hz() / 100 * num_channels(); | ||||
|   if (speech_buffer_.empty()) { | ||||
|     CHECK_EQ(frames_in_buffer_, 0); | ||||
|     first_timestamp_in_buffer_ = timestamp; | ||||
|   } | ||||
|   for (int i = 0; i < num_samples; ++i) { | ||||
|     speech_buffer_.push_back(audio[i]); | ||||
|   } | ||||
|   ++frames_in_buffer_; | ||||
|   if (frames_in_buffer_ < speech_encoder_->Num10MsFramesInNextPacket()) { | ||||
|     return true; | ||||
|   } | ||||
|   CHECK_LE(frames_in_buffer_, 6) | ||||
|       << "Frame size cannot be larger than 60 ms when using VAD/CNG."; | ||||
|   const size_t samples_per_10ms_frame = 10 * sample_rate_hz_ / 1000; | ||||
|   CHECK_EQ(speech_buffer_.size(), | ||||
|            static_cast<size_t>(frames_in_buffer_) * samples_per_10ms_frame); | ||||
|  | ||||
|   // Group several 10 ms blocks per VAD call. Call VAD once or twice using the | ||||
|   // following split sizes: | ||||
|   // 10 ms = 10 + 0 ms; 20 ms = 20 + 0 ms; 30 ms = 30 + 0 ms; | ||||
|   // 40 ms = 20 + 20 ms; 50 ms = 30 + 20 ms; 60 ms = 30 + 30 ms. | ||||
|   int blocks_in_first_vad_call = | ||||
|       (frames_in_buffer_ > 3 ? 3 : frames_in_buffer_); | ||||
|   if (frames_in_buffer_ == 4) | ||||
|     blocks_in_first_vad_call = 2; | ||||
|   const int blocks_in_second_vad_call = | ||||
|       frames_in_buffer_ - blocks_in_first_vad_call; | ||||
|   CHECK_GE(blocks_in_second_vad_call, 0); | ||||
|  | ||||
|   // Check if all of the buffer is passive speech. Start with checking the first | ||||
|   // block. | ||||
|   Vad::Activity activity = vad_->VoiceActivity( | ||||
|       &speech_buffer_[0], samples_per_10ms_frame * blocks_in_first_vad_call, | ||||
|       sample_rate_hz_); | ||||
|   if (activity == Vad::kPassive && blocks_in_second_vad_call > 0) { | ||||
|     // Only check the second block if the first was passive. | ||||
|     activity = vad_->VoiceActivity( | ||||
|         &speech_buffer_[samples_per_10ms_frame * blocks_in_first_vad_call], | ||||
|         samples_per_10ms_frame * blocks_in_second_vad_call, sample_rate_hz_); | ||||
|   } | ||||
|   DCHECK_NE(activity, Vad::kError); | ||||
|  | ||||
|   bool return_val = true; | ||||
|   switch (activity) { | ||||
|     case Vad::kPassive: { | ||||
|       return_val = EncodePassive(encoded, encoded_bytes); | ||||
|       info->encoded_timestamp = first_timestamp_in_buffer_; | ||||
|       info->payload_type = cng_payload_type_; | ||||
|       last_frame_active_ = false; | ||||
|       break; | ||||
|     } | ||||
|     case Vad::kActive: { | ||||
|       return_val = | ||||
|           EncodeActive(max_encoded_bytes, encoded, encoded_bytes, info); | ||||
|       last_frame_active_ = true; | ||||
|       break; | ||||
|     } | ||||
|     case Vad::kError: { | ||||
|       return_val = false; | ||||
|       break; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   speech_buffer_.clear(); | ||||
|   frames_in_buffer_ = 0; | ||||
|   return return_val; | ||||
| } | ||||
|  | ||||
| bool AudioEncoderCng::EncodePassive(uint8_t* encoded, size_t* encoded_bytes) { | ||||
|   bool force_sid = last_frame_active_; | ||||
|   bool output_produced = false; | ||||
|   const size_t samples_per_10ms_frame = 10 * sample_rate_hz_ / 1000; | ||||
|   for (int i = 0; i < frames_in_buffer_; ++i) { | ||||
|     int16_t encoded_bytes_tmp = 0; | ||||
|     if (WebRtcCng_Encode(cng_inst_.get(), | ||||
|                          &speech_buffer_[i * samples_per_10ms_frame], | ||||
|                          static_cast<int16_t>(samples_per_10ms_frame), encoded, | ||||
|                          &encoded_bytes_tmp, force_sid) < 0) | ||||
|       return false; | ||||
|     if (encoded_bytes_tmp > 0) { | ||||
|       CHECK(!output_produced); | ||||
|       *encoded_bytes = static_cast<size_t>(encoded_bytes_tmp); | ||||
|       output_produced = true; | ||||
|       force_sid = false; | ||||
|     } | ||||
|     CHECK(!force_sid) << "SID frame not produced despite being forced."; | ||||
|   } | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| bool AudioEncoderCng::EncodeActive(size_t max_encoded_bytes, | ||||
|                                    uint8_t* encoded, | ||||
|                                    size_t* encoded_bytes, | ||||
|                                    EncodedInfo* info) { | ||||
|   const size_t samples_per_10ms_frame = 10 * sample_rate_hz_ / 1000; | ||||
|   for (int i = 0; i < frames_in_buffer_; ++i) { | ||||
|     if (!speech_encoder_->Encode(first_timestamp_in_buffer_, | ||||
|                                  &speech_buffer_[i * samples_per_10ms_frame], | ||||
|                                  samples_per_10ms_frame, max_encoded_bytes, | ||||
|                                  encoded, encoded_bytes, info)) | ||||
|       return false; | ||||
|     if (i < frames_in_buffer_ - 1) { | ||||
|       CHECK_EQ(*encoded_bytes, 0u) << "Encoder delivered data too early."; | ||||
|     } | ||||
|   } | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| }  // namespace webrtc | ||||
| @@ -0,0 +1,465 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #include "testing/gtest/include/gtest/gtest.h" | ||||
| #include "webrtc/common_audio/vad/mock/mock_vad.h" | ||||
| #include "webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h" | ||||
| #include "webrtc/modules/audio_coding/codecs/mock/mock_audio_encoder.h" | ||||
| #include "webrtc/system_wrappers/interface/scoped_ptr.h" | ||||
|  | ||||
| using ::testing::Return; | ||||
| using ::testing::_; | ||||
| using ::testing::SetArgPointee; | ||||
| using ::testing::InSequence; | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| namespace { | ||||
| static const size_t kMaxEncodedBytes = 1000; | ||||
| static const size_t kMaxNumSamples = 48 * 10 * 2;  // 10 ms @ 48 kHz stereo. | ||||
| static const size_t kMockReturnEncodedBytes = 17; | ||||
| static const int kCngPayloadType = 18; | ||||
| } | ||||
|  | ||||
| class AudioEncoderCngTest : public ::testing::Test { | ||||
|  protected: | ||||
|   AudioEncoderCngTest() | ||||
|       : mock_vad_(new MockVad(Vad::kVadNormal)), | ||||
|         timestamp_(4711), | ||||
|         num_audio_samples_10ms_(0), | ||||
|         encoded_bytes_(0) { | ||||
|     memset(encoded_, 0, kMaxEncodedBytes); | ||||
|     memset(audio_, 0, kMaxNumSamples * 2); | ||||
|     config_.speech_encoder = &mock_encoder_; | ||||
|     EXPECT_CALL(mock_encoder_, num_channels()).WillRepeatedly(Return(1)); | ||||
|     // Let the AudioEncoderCng object use a MockVad instead of its internally | ||||
|     // created Vad object. | ||||
|     config_.vad = mock_vad_; | ||||
|     config_.payload_type = kCngPayloadType; | ||||
|   } | ||||
|  | ||||
|   virtual void TearDown() OVERRIDE { | ||||
|     EXPECT_CALL(*mock_vad_, Die()).Times(1); | ||||
|     cng_.reset(); | ||||
|     // Don't expect the cng_ object to delete the AudioEncoder object. But it | ||||
|     // will be deleted with the test fixture. This is why we explicitly delete | ||||
|     // the cng_ object above, and set expectations on mock_encoder_ afterwards. | ||||
|     EXPECT_CALL(mock_encoder_, Die()).Times(1); | ||||
|   } | ||||
|  | ||||
|   void CreateCng() { | ||||
|     // The config_ parameters may be changed by the TEST_Fs up until CreateCng() | ||||
|     // is called, thus we cannot use the values until now. | ||||
|     num_audio_samples_10ms_ = 10 * config_.sample_rate_hz / 1000; | ||||
|     ASSERT_LE(num_audio_samples_10ms_, kMaxNumSamples); | ||||
|     EXPECT_CALL(mock_encoder_, sample_rate_hz()) | ||||
|         .WillRepeatedly(Return(config_.sample_rate_hz)); | ||||
|     // Max10MsFramesInAPacket() is just used to verify that the SID frame period | ||||
|     // is not too small. The return value does not matter that much, as long as | ||||
|     // it is smaller than 10. | ||||
|     EXPECT_CALL(mock_encoder_, Max10MsFramesInAPacket()).WillOnce(Return(1)); | ||||
|     cng_.reset(new AudioEncoderCng(config_)); | ||||
|   } | ||||
|  | ||||
|   void Encode() { | ||||
|     ASSERT_TRUE(cng_) << "Must call CreateCng() first."; | ||||
|     memset(&encoded_info_, 0, sizeof(encoded_info_)); | ||||
|     encoded_bytes_ = 0; | ||||
|     ASSERT_TRUE(cng_->Encode(timestamp_, audio_, num_audio_samples_10ms_, | ||||
|                              kMaxEncodedBytes, encoded_, &encoded_bytes_, | ||||
|                              &encoded_info_)); | ||||
|     timestamp_ += num_audio_samples_10ms_; | ||||
|   } | ||||
|  | ||||
|   // Verifies that the cng_ object waits until it has collected | ||||
|   // |blocks_per_frame| blocks of audio, and then dispatches all of them to | ||||
|   // the underlying codec (speech or cng). | ||||
|   void CheckBlockGrouping(int blocks_per_frame, bool active_speech) { | ||||
|     EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) | ||||
|         .WillRepeatedly(Return(blocks_per_frame)); | ||||
|     CreateCng(); | ||||
|     EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) | ||||
|         .WillRepeatedly(Return(active_speech ? Vad::kActive : Vad::kPassive)); | ||||
|  | ||||
|     // Don't expect any calls to the encoder yet. | ||||
|     EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)).Times(0); | ||||
|     for (int i = 0; i < blocks_per_frame - 1; ++i) { | ||||
|       Encode(); | ||||
|       EXPECT_EQ(0u, encoded_bytes_); | ||||
|     } | ||||
|     if (active_speech) { | ||||
|       // Now expect |blocks_per_frame| calls to the encoder in sequence. | ||||
|       // Let the speech codec mock return true and set the number of encoded | ||||
|       // bytes to |kMockReturnEncodedBytes|. | ||||
|       InSequence s; | ||||
|       for (int j = 0; j < blocks_per_frame - 1; ++j) { | ||||
|         EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)) | ||||
|             .WillOnce(DoAll(SetArgPointee<4>(0), Return(true))); | ||||
|       } | ||||
|       EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)) | ||||
|           .WillOnce( | ||||
|               DoAll(SetArgPointee<4>(kMockReturnEncodedBytes), Return(true))); | ||||
|     } | ||||
|     Encode(); | ||||
|     if (active_speech) { | ||||
|       EXPECT_EQ(kMockReturnEncodedBytes, encoded_bytes_); | ||||
|     } else { | ||||
|       EXPECT_EQ(static_cast<size_t>(config_.num_cng_coefficients + 1), | ||||
|                 encoded_bytes_); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // Verifies that the audio is partitioned into larger blocks before calling | ||||
|   // the VAD. | ||||
|   void CheckVadInputSize(int input_frame_size_ms, | ||||
|                          int expected_first_block_size_ms, | ||||
|                          int expected_second_block_size_ms) { | ||||
|     const int blocks_per_frame = input_frame_size_ms / 10; | ||||
|  | ||||
|     EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) | ||||
|         .WillRepeatedly(Return(blocks_per_frame)); | ||||
|  | ||||
|     // Expect nothing to happen before the last block is sent to cng_. | ||||
|     EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)).Times(0); | ||||
|     for (int i = 0; i < blocks_per_frame - 1; ++i) { | ||||
|       Encode(); | ||||
|     } | ||||
|  | ||||
|     // Let the VAD decision be passive, since an active decision may lead to | ||||
|     // early termination of the decision loop. | ||||
|     const int sample_rate_hz = config_.sample_rate_hz; | ||||
|     InSequence s; | ||||
|     EXPECT_CALL( | ||||
|         *mock_vad_, | ||||
|         VoiceActivity(_, expected_first_block_size_ms * sample_rate_hz / 1000, | ||||
|                       sample_rate_hz)).WillOnce(Return(Vad::kPassive)); | ||||
|     if (expected_second_block_size_ms > 0) { | ||||
|       EXPECT_CALL(*mock_vad_, | ||||
|                   VoiceActivity( | ||||
|                       _, expected_second_block_size_ms * sample_rate_hz / 1000, | ||||
|                       sample_rate_hz)).WillOnce(Return(Vad::kPassive)); | ||||
|     } | ||||
|  | ||||
|     // With this call to Encode(), |mock_vad_| should be called according to the | ||||
|     // above expectations. | ||||
|     Encode(); | ||||
|   } | ||||
|  | ||||
|   // Tests a frame with both active and passive speech. Returns true if the | ||||
|   // decision was active speech, false if it was passive. | ||||
|   bool CheckMixedActivePassive(Vad::Activity first_type, | ||||
|                                Vad::Activity second_type) { | ||||
|     // Set the speech encoder frame size to 60 ms, to ensure that the VAD will | ||||
|     // be called twice. | ||||
|     const int blocks_per_frame = 6; | ||||
|     EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) | ||||
|         .WillRepeatedly(Return(blocks_per_frame)); | ||||
|     InSequence s; | ||||
|     EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) | ||||
|         .WillOnce(Return(first_type)); | ||||
|     if (first_type == Vad::kPassive) { | ||||
|       // Expect a second call to the VAD only if the first frame was passive. | ||||
|       EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) | ||||
|           .WillOnce(Return(second_type)); | ||||
|     } | ||||
|     encoded_info_.payload_type = 0; | ||||
|     for (int i = 0; i < blocks_per_frame; ++i) { | ||||
|       Encode(); | ||||
|     } | ||||
|     return encoded_info_.payload_type != kCngPayloadType; | ||||
|   } | ||||
|  | ||||
|   AudioEncoderCng::Config config_; | ||||
|   scoped_ptr<AudioEncoderCng> cng_; | ||||
|   MockAudioEncoder mock_encoder_; | ||||
|   MockVad* mock_vad_;  // Ownership is transferred to |cng_|. | ||||
|   uint32_t timestamp_; | ||||
|   int16_t audio_[kMaxNumSamples]; | ||||
|   size_t num_audio_samples_10ms_; | ||||
|   uint8_t encoded_[kMaxEncodedBytes]; | ||||
|   size_t encoded_bytes_; | ||||
|   AudioEncoder::EncodedInfo encoded_info_; | ||||
| }; | ||||
|  | ||||
| TEST_F(AudioEncoderCngTest, CreateAndDestroy) { | ||||
|   CreateCng(); | ||||
| } | ||||
|  | ||||
| TEST_F(AudioEncoderCngTest, CheckFrameSizePropagation) { | ||||
|   CreateCng(); | ||||
|   EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()).WillOnce(Return(17)); | ||||
|   EXPECT_EQ(17, cng_->Num10MsFramesInNextPacket()); | ||||
| } | ||||
|  | ||||
| TEST_F(AudioEncoderCngTest, EncodeCallsVad) { | ||||
|   EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) | ||||
|       .WillRepeatedly(Return(1)); | ||||
|   CreateCng(); | ||||
|   EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) | ||||
|       .WillOnce(Return(Vad::kPassive)); | ||||
|   Encode(); | ||||
| } | ||||
|  | ||||
| TEST_F(AudioEncoderCngTest, EncodeCollects1BlockPassiveSpeech) { | ||||
|   CheckBlockGrouping(1, false); | ||||
| } | ||||
|  | ||||
| TEST_F(AudioEncoderCngTest, EncodeCollects2BlocksPassiveSpeech) { | ||||
|   CheckBlockGrouping(2, false); | ||||
| } | ||||
|  | ||||
| TEST_F(AudioEncoderCngTest, EncodeCollects3BlocksPassiveSpeech) { | ||||
|   CheckBlockGrouping(3, false); | ||||
| } | ||||
|  | ||||
| TEST_F(AudioEncoderCngTest, EncodeCollects1BlockActiveSpeech) { | ||||
|   CheckBlockGrouping(1, true); | ||||
| } | ||||
|  | ||||
| TEST_F(AudioEncoderCngTest, EncodeCollects2BlocksActiveSpeech) { | ||||
|   CheckBlockGrouping(2, true); | ||||
| } | ||||
|  | ||||
| TEST_F(AudioEncoderCngTest, EncodeCollects3BlocksActiveSpeech) { | ||||
|   CheckBlockGrouping(3, true); | ||||
| } | ||||
|  | ||||
| TEST_F(AudioEncoderCngTest, EncodePassive) { | ||||
|   const int kBlocksPerFrame = 3; | ||||
|   EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) | ||||
|       .WillRepeatedly(Return(kBlocksPerFrame)); | ||||
|   CreateCng(); | ||||
|   EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) | ||||
|       .WillRepeatedly(Return(Vad::kPassive)); | ||||
|   // Expect no calls at all to the speech encoder mock. | ||||
|   EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)).Times(0); | ||||
|   uint32_t expected_timestamp = timestamp_; | ||||
|   for (int i = 0; i < 100; ++i) { | ||||
|     Encode(); | ||||
|     // Check if it was time to call the cng encoder. This is done once every | ||||
|     // |kBlocksPerFrame| calls. | ||||
|     if ((i + 1) % kBlocksPerFrame == 0) { | ||||
|       // Now check if a SID interval has elapsed. | ||||
|       if ((i % (config_.sid_frame_interval_ms / 10)) < kBlocksPerFrame) { | ||||
|         // If so, verify that we got a CNG encoding. | ||||
|         EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); | ||||
|         EXPECT_EQ(static_cast<size_t>(config_.num_cng_coefficients) + 1, | ||||
|                   encoded_bytes_); | ||||
|         EXPECT_EQ(expected_timestamp, encoded_info_.encoded_timestamp); | ||||
|       } | ||||
|       expected_timestamp += kBlocksPerFrame * num_audio_samples_10ms_; | ||||
|     } else { | ||||
|       // Otherwise, expect no output. | ||||
|       EXPECT_EQ(0u, encoded_bytes_); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| // Verifies that the correct action is taken for frames with both active and | ||||
| // passive speech. | ||||
| TEST_F(AudioEncoderCngTest, MixedActivePassive) { | ||||
|   CreateCng(); | ||||
|  | ||||
|   // All of the frame is active speech. | ||||
|   EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)) | ||||
|       .Times(6) | ||||
|       .WillRepeatedly(Return(true)); | ||||
|   EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kActive)); | ||||
|  | ||||
|   // First half of the frame is active speech. | ||||
|   EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)) | ||||
|       .Times(6) | ||||
|       .WillRepeatedly(Return(true)); | ||||
|   EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kPassive)); | ||||
|  | ||||
|   // Second half of the frame is active speech. | ||||
|   EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)) | ||||
|       .Times(6) | ||||
|       .WillRepeatedly(Return(true)); | ||||
|   EXPECT_TRUE(CheckMixedActivePassive(Vad::kPassive, Vad::kActive)); | ||||
|  | ||||
|   // All of the frame is passive speech. Expect no calls to |mock_encoder_|. | ||||
|   EXPECT_FALSE(CheckMixedActivePassive(Vad::kPassive, Vad::kPassive)); | ||||
| } | ||||
|  | ||||
| // These tests verify that the audio is partitioned into larger blocks before | ||||
| // calling the VAD. | ||||
| // The parameters for CheckVadInputSize are: | ||||
| // CheckVadInputSize(frame_size, expected_first_block_size, | ||||
| //                   expected_second_block_size); | ||||
| TEST_F(AudioEncoderCngTest, VadInputSize10Ms) { | ||||
|   CreateCng(); | ||||
|   CheckVadInputSize(10, 10, 0); | ||||
| } | ||||
| TEST_F(AudioEncoderCngTest, VadInputSize20Ms) { | ||||
|   CreateCng(); | ||||
|   CheckVadInputSize(20, 20, 0); | ||||
| } | ||||
| TEST_F(AudioEncoderCngTest, VadInputSize30Ms) { | ||||
|   CreateCng(); | ||||
|   CheckVadInputSize(30, 30, 0); | ||||
| } | ||||
| TEST_F(AudioEncoderCngTest, VadInputSize40Ms) { | ||||
|   CreateCng(); | ||||
|   CheckVadInputSize(40, 20, 20); | ||||
| } | ||||
| TEST_F(AudioEncoderCngTest, VadInputSize50Ms) { | ||||
|   CreateCng(); | ||||
|   CheckVadInputSize(50, 30, 20); | ||||
| } | ||||
| TEST_F(AudioEncoderCngTest, VadInputSize60Ms) { | ||||
|   CreateCng(); | ||||
|   CheckVadInputSize(60, 30, 30); | ||||
| } | ||||
|  | ||||
| // Verifies that the EncodedInfo struct pointer passed to | ||||
| // AudioEncoderCng::Encode is propagated to the Encode call to the underlying | ||||
| // speech encoder. | ||||
| TEST_F(AudioEncoderCngTest, VerifyEncoderInfoPropagation) { | ||||
|   CreateCng(); | ||||
|   EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, &encoded_info_)) | ||||
|       .WillOnce(Return(true)); | ||||
|   EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()).WillOnce(Return(1)); | ||||
|   EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) | ||||
|       .WillOnce(Return(Vad::kActive)); | ||||
|   Encode(); | ||||
| } | ||||
|  | ||||
| // Verifies that the correct payload type is set when CNG is encoded. | ||||
| TEST_F(AudioEncoderCngTest, VerifyCngPayloadType) { | ||||
|   CreateCng(); | ||||
|   EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)).Times(0); | ||||
|   EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()).WillOnce(Return(1)); | ||||
|   EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) | ||||
|       .WillOnce(Return(Vad::kPassive)); | ||||
|   encoded_info_.payload_type = 0; | ||||
|   Encode(); | ||||
|   EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); | ||||
| } | ||||
|  | ||||
| // Verifies that a SID frame is encoded immediately as the signal changes from | ||||
| // active speech to passive. | ||||
| TEST_F(AudioEncoderCngTest, VerifySidFrameAfterSpeech) { | ||||
|   CreateCng(); | ||||
|   EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) | ||||
|       .WillRepeatedly(Return(1)); | ||||
|   // Start with encoding noise. | ||||
|   EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) | ||||
|       .Times(2) | ||||
|       .WillRepeatedly(Return(Vad::kPassive)); | ||||
|   Encode(); | ||||
|   EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); | ||||
|   EXPECT_EQ(static_cast<size_t>(config_.num_cng_coefficients) + 1, | ||||
|             encoded_bytes_); | ||||
|   // Encode again, and make sure we got no frame at all (since the SID frame | ||||
|   // period is 100 ms by default). | ||||
|   Encode(); | ||||
|   EXPECT_EQ(0u, encoded_bytes_); | ||||
|  | ||||
|   // Now encode active speech. | ||||
|   encoded_info_.payload_type = 0; | ||||
|   EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) | ||||
|       .WillOnce(Return(Vad::kActive)); | ||||
|   EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)) | ||||
|       .WillOnce(DoAll(SetArgPointee<4>(kMockReturnEncodedBytes), Return(true))); | ||||
|   Encode(); | ||||
|   EXPECT_EQ(kMockReturnEncodedBytes, encoded_bytes_); | ||||
|  | ||||
|   // Go back to noise again, and verify that a SID frame is emitted. | ||||
|   EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) | ||||
|       .WillOnce(Return(Vad::kPassive)); | ||||
|   Encode(); | ||||
|   EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); | ||||
|   EXPECT_EQ(static_cast<size_t>(config_.num_cng_coefficients) + 1, | ||||
|             encoded_bytes_); | ||||
| } | ||||
|  | ||||
| #if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) | ||||
|  | ||||
| // This test fixture tests various error conditions that makes the | ||||
| // AudioEncoderCng die via CHECKs. | ||||
| class AudioEncoderCngDeathTest : public AudioEncoderCngTest { | ||||
|  protected: | ||||
|   AudioEncoderCngDeathTest() : AudioEncoderCngTest() { | ||||
|     // Don't provide a Vad mock object, since it will leak when the test dies. | ||||
|     config_.vad = NULL; | ||||
|     EXPECT_CALL(*mock_vad_, Die()).Times(1); | ||||
|     delete mock_vad_; | ||||
|     mock_vad_ = NULL; | ||||
|   } | ||||
|  | ||||
|   // Override AudioEncoderCngTest::TearDown, since that one expects a call to | ||||
|   // the destructor of |mock_vad_|. In this case, that object is already | ||||
|   // deleted. | ||||
|   virtual void TearDown() OVERRIDE { | ||||
|     cng_.reset(); | ||||
|     // Don't expect the cng_ object to delete the AudioEncoder object. But it | ||||
|     // will be deleted with the test fixture. This is why we explicitly delete | ||||
|     // the cng_ object above, and set expectations on mock_encoder_ afterwards. | ||||
|     EXPECT_CALL(mock_encoder_, Die()).Times(1); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| TEST_F(AudioEncoderCngDeathTest, WrongFrameSize) { | ||||
|   CreateCng(); | ||||
|   num_audio_samples_10ms_ *= 2;  // 20 ms frame. | ||||
|   EXPECT_DEATH(Encode(), ""); | ||||
|   num_audio_samples_10ms_ = 0;  // Zero samples. | ||||
|   EXPECT_DEATH(Encode(), ""); | ||||
| } | ||||
|  | ||||
| TEST_F(AudioEncoderCngDeathTest, WrongSampleRates) { | ||||
|   config_.sample_rate_hz = 32000; | ||||
|   EXPECT_DEATH(CreateCng(), "Invalid configuration"); | ||||
|   config_.sample_rate_hz = 48000; | ||||
|   EXPECT_DEATH(CreateCng(), "Invalid configuration"); | ||||
|   config_.sample_rate_hz = 0; | ||||
|   EXPECT_DEATH(CreateCng(), "Invalid configuration"); | ||||
|   config_.sample_rate_hz = -8000; | ||||
|   // Don't use CreateCng() here, since the built-in sanity checks will prevent | ||||
|   // the test from reaching the expected point-of-death. | ||||
|   EXPECT_DEATH(cng_.reset(new AudioEncoderCng(config_)), | ||||
|                "Invalid configuration"); | ||||
| } | ||||
|  | ||||
| TEST_F(AudioEncoderCngDeathTest, WrongNumCoefficients) { | ||||
|   config_.num_cng_coefficients = -1; | ||||
|   EXPECT_DEATH(CreateCng(), "Invalid configuration"); | ||||
|   config_.num_cng_coefficients = 0; | ||||
|   EXPECT_DEATH(CreateCng(), "Invalid configuration"); | ||||
|   config_.num_cng_coefficients = 13; | ||||
|   EXPECT_DEATH(CreateCng(), "Invalid configuration"); | ||||
| } | ||||
|  | ||||
| TEST_F(AudioEncoderCngDeathTest, NullSpeechEncoder) { | ||||
|   config_.speech_encoder = NULL; | ||||
|   EXPECT_DEATH(CreateCng(), "Invalid configuration"); | ||||
| } | ||||
|  | ||||
| TEST_F(AudioEncoderCngDeathTest, Stereo) { | ||||
|   EXPECT_CALL(mock_encoder_, num_channels()).WillRepeatedly(Return(2)); | ||||
|   EXPECT_DEATH(CreateCng(), "Invalid configuration"); | ||||
|   config_.num_channels = 2; | ||||
|   EXPECT_DEATH(CreateCng(), "Invalid configuration"); | ||||
| } | ||||
|  | ||||
| TEST_F(AudioEncoderCngDeathTest, EncoderFrameSizeTooLarge) { | ||||
|   CreateCng(); | ||||
|   EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) | ||||
|       .WillRepeatedly(Return(7)); | ||||
|   for (int i = 0; i < 6; ++i) | ||||
|     Encode(); | ||||
|   EXPECT_DEATH(Encode(), | ||||
|                "Frame size cannot be larger than 60 ms when using VAD/CNG."); | ||||
| } | ||||
|  | ||||
| #endif  // GTEST_HAS_DEATH_TEST | ||||
|  | ||||
| }  // namespace webrtc | ||||
| @@ -25,7 +25,9 @@ | ||||
|         ], | ||||
|       }, | ||||
|       'sources': [ | ||||
|         'include/audio_encoder_cng.h', | ||||
|         'include/webrtc_cng.h', | ||||
|         'audio_encoder_cng.cc', | ||||
|         'webrtc_cng.c', | ||||
|         'cng_helpfuns.c', | ||||
|         'cng_helpfuns.h', | ||||
|   | ||||
| @@ -0,0 +1,91 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_INCLUDE_AUDIO_ENCODER_CNG_H_ | ||||
| #define WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_INCLUDE_AUDIO_ENCODER_CNG_H_ | ||||
|  | ||||
| #include <vector> | ||||
|  | ||||
| #include "webrtc/common_audio/vad/include/vad.h" | ||||
| #include "webrtc/modules/audio_coding/codecs/audio_encoder.h" | ||||
| #include "webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h" | ||||
| #include "webrtc/system_wrappers/interface/scoped_ptr.h" | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| class Vad; | ||||
|  | ||||
| class AudioEncoderCng : public AudioEncoder { | ||||
|  public: | ||||
|   struct Config { | ||||
|     Config(); | ||||
|     bool IsOk() const; | ||||
|  | ||||
|     int sample_rate_hz; | ||||
|     int num_channels; | ||||
|     int payload_type; | ||||
|     // Caller keeps ownership of the AudioEncoder object. | ||||
|     AudioEncoder* speech_encoder; | ||||
|     Vad::Aggressiveness vad_mode; | ||||
|     int sid_frame_interval_ms; | ||||
|     int num_cng_coefficients; | ||||
|     // The Vad pointer is mainly for testing. If a NULL pointer is passed, the | ||||
|     // AudioEncoderCng creates (and destroys) a Vad object internally. If an | ||||
|     // object is passed, the AudioEncoderCng assumes ownership of the Vad | ||||
|     // object. | ||||
|     Vad* vad; | ||||
|   }; | ||||
|  | ||||
|   explicit AudioEncoderCng(const Config& config); | ||||
|  | ||||
|   virtual ~AudioEncoderCng(); | ||||
|  | ||||
|   virtual int sample_rate_hz() const OVERRIDE; | ||||
|   virtual int num_channels() const OVERRIDE; | ||||
|   virtual int Num10MsFramesInNextPacket() const OVERRIDE; | ||||
|   virtual int Max10MsFramesInAPacket() const OVERRIDE; | ||||
|  | ||||
|  protected: | ||||
|   virtual bool EncodeInternal(uint32_t timestamp, | ||||
|                               const int16_t* audio, | ||||
|                               size_t max_encoded_bytes, | ||||
|                               uint8_t* encoded, | ||||
|                               size_t* encoded_bytes, | ||||
|                               EncodedInfo* info) OVERRIDE; | ||||
|  | ||||
|  private: | ||||
|   // Deleter for use with scoped_ptr. E.g., use as | ||||
|   //   scoped_ptr<CNG_enc_inst, CngInstDeleter> cng_inst_; | ||||
|   struct CngInstDeleter { | ||||
|     inline void operator()(CNG_enc_inst* ptr) const { WebRtcCng_FreeEnc(ptr); } | ||||
|   }; | ||||
|  | ||||
|   bool EncodePassive(uint8_t* encoded, size_t* encoded_bytes); | ||||
|  | ||||
|   bool EncodeActive(size_t max_encoded_bytes, | ||||
|                     uint8_t* encoded, | ||||
|                     size_t* encoded_bytes, | ||||
|                     EncodedInfo* info); | ||||
|  | ||||
|   AudioEncoder* speech_encoder_; | ||||
|   const int sample_rate_hz_; | ||||
|   const int num_channels_; | ||||
|   const int cng_payload_type_; | ||||
|   const int num_cng_coefficients_; | ||||
|   std::vector<int16_t> speech_buffer_; | ||||
|   uint32_t first_timestamp_in_buffer_; | ||||
|   int frames_in_buffer_; | ||||
|   bool last_frame_active_; | ||||
|   scoped_ptr<Vad> vad_; | ||||
|   scoped_ptr<CNG_enc_inst, CngInstDeleter> cng_inst_; | ||||
| }; | ||||
|  | ||||
| }  // namespace webrtc | ||||
| #endif  // WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_INCLUDE_AUDIO_ENCODER_CNG_H_ | ||||
							
								
								
									
										40
									
								
								webrtc/modules/audio_coding/codecs/mock/mock_audio_encoder.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								webrtc/modules/audio_coding/codecs/mock/mock_audio_encoder.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_MOCK_MOCK_AUDIO_ENCODER_H_ | ||||
| #define WEBRTC_MODULES_AUDIO_CODING_CODECS_MOCK_MOCK_AUDIO_ENCODER_H_ | ||||
|  | ||||
| #include "webrtc/modules/audio_coding/codecs/audio_encoder.h" | ||||
|  | ||||
| #include "testing/gmock/include/gmock/gmock.h" | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| class MockAudioEncoder : public AudioEncoder { | ||||
|  public: | ||||
|   virtual ~MockAudioEncoder() { Die(); } | ||||
|   MOCK_METHOD0(Die, void()); | ||||
|   MOCK_CONST_METHOD0(sample_rate_hz, int()); | ||||
|   MOCK_CONST_METHOD0(num_channels, int()); | ||||
|   MOCK_CONST_METHOD0(Num10MsFramesInNextPacket, int()); | ||||
|   MOCK_CONST_METHOD0(Max10MsFramesInAPacket, int()); | ||||
|   // Note, we explicitly chose not to create a mock for the Encode method. | ||||
|   MOCK_METHOD6(EncodeInternal, | ||||
|                bool(uint32_t timestamp, | ||||
|                     const int16_t* audio, | ||||
|                     size_t max_encoded_bytes, | ||||
|                     uint8_t* encoded, | ||||
|                     size_t* encoded_bytes, | ||||
|                     EncodedInfo* info)); | ||||
| }; | ||||
|  | ||||
| }  // namespace webrtc | ||||
|  | ||||
| #endif  // WEBRTC_MODULES_AUDIO_CODING_CODECS_MOCK_MOCK_AUDIO_ENCODER_H_ | ||||
		Reference in New Issue
	
	Block a user
	 henrik.lundin@webrtc.org
					henrik.lundin@webrtc.org