diff --git a/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc b/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc index 58fd24f53..9b23607f0 100644 --- a/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc +++ b/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc @@ -50,8 +50,6 @@ AudioEncoderCng::AudioEncoderCng(const Config& config) : speech_encoder_(config.speech_encoder), cng_payload_type_(config.payload_type), num_cng_coefficients_(config.num_cng_coefficients), - first_timestamp_in_buffer_(0), - frames_in_buffer_(0), last_frame_active_(true), vad_(new Vad(config.vad_mode)) { if (config.vad) { @@ -115,35 +113,31 @@ AudioEncoder::EncodedInfo AudioEncoderCng::EncodeInternal( size_t max_encoded_bytes, uint8_t* encoded) { CHECK_GE(max_encoded_bytes, static_cast(num_cng_coefficients_ + 1)); - const int num_samples = SampleRateHz() / 100 * NumChannels(); - if (speech_buffer_.empty()) { - CHECK_EQ(frames_in_buffer_, 0); - first_timestamp_in_buffer_ = rtp_timestamp; - } - for (int i = 0; i < num_samples; ++i) { + const size_t samples_per_10ms_frame = SamplesPer10msFrame(); + CHECK_EQ(speech_buffer_.size(), + rtp_timestamps_.size() * samples_per_10ms_frame); + rtp_timestamps_.push_back(rtp_timestamp); + for (size_t i = 0; i < samples_per_10ms_frame; ++i) { speech_buffer_.push_back(audio[i]); } - ++frames_in_buffer_; - if (frames_in_buffer_ < speech_encoder_->Num10MsFramesInNextPacket()) { + const int frames_to_encode = speech_encoder_->Num10MsFramesInNextPacket(); + if (rtp_timestamps_.size() < static_cast(frames_to_encode)) { return EncodedInfo(); } - CHECK_LE(frames_in_buffer_ * 10, kMaxFrameSizeMs) + CHECK_LE(frames_to_encode * 10, kMaxFrameSizeMs) << "Frame size cannot be larger than " << kMaxFrameSizeMs << " ms when using VAD/CNG."; - const size_t samples_per_10ms_frame = 10 * SampleRateHz() / 1000; - CHECK_EQ(speech_buffer_.size(), - static_cast(frames_in_buffer_) * samples_per_10ms_frame); // Group several 10 ms blocks per VAD call. Call VAD once or twice using the // following split sizes: // 10 ms = 10 + 0 ms; 20 ms = 20 + 0 ms; 30 ms = 30 + 0 ms; // 40 ms = 20 + 20 ms; 50 ms = 30 + 20 ms; 60 ms = 30 + 30 ms. int blocks_in_first_vad_call = - (frames_in_buffer_ > 3 ? 3 : frames_in_buffer_); - if (frames_in_buffer_ == 4) + (frames_to_encode > 3 ? 3 : frames_to_encode); + if (frames_to_encode == 4) blocks_in_first_vad_call = 2; const int blocks_in_second_vad_call = - frames_in_buffer_ - blocks_in_first_vad_call; + frames_to_encode - blocks_in_first_vad_call; CHECK_GE(blocks_in_second_vad_call, 0); // Check if all of the buffer is passive speech. Start with checking the first @@ -161,12 +155,12 @@ AudioEncoder::EncodedInfo AudioEncoderCng::EncodeInternal( EncodedInfo info; switch (activity) { case Vad::kPassive: { - info = EncodePassive(max_encoded_bytes, encoded); + info = EncodePassive(frames_to_encode, max_encoded_bytes, encoded); last_frame_active_ = false; break; } case Vad::kActive: { - info = EncodeActive(max_encoded_bytes, encoded); + info = EncodeActive(frames_to_encode, max_encoded_bytes, encoded); last_frame_active_ = true; break; } @@ -176,20 +170,24 @@ AudioEncoder::EncodedInfo AudioEncoderCng::EncodeInternal( } } - speech_buffer_.clear(); - frames_in_buffer_ = 0; + speech_buffer_.erase( + speech_buffer_.begin(), + speech_buffer_.begin() + frames_to_encode * samples_per_10ms_frame); + rtp_timestamps_.erase(rtp_timestamps_.begin(), + rtp_timestamps_.begin() + frames_to_encode); return info; } AudioEncoder::EncodedInfo AudioEncoderCng::EncodePassive( + int frames_to_encode, size_t max_encoded_bytes, uint8_t* encoded) { bool force_sid = last_frame_active_; bool output_produced = false; const size_t samples_per_10ms_frame = SamplesPer10msFrame(); - CHECK_GE(max_encoded_bytes, frames_in_buffer_ * samples_per_10ms_frame); + CHECK_GE(max_encoded_bytes, frames_to_encode * samples_per_10ms_frame); AudioEncoder::EncodedInfo info; - for (int i = 0; i < frames_in_buffer_; ++i) { + for (int i = 0; i < frames_to_encode; ++i) { int16_t encoded_bytes_tmp = 0; CHECK_GE(WebRtcCng_Encode(cng_inst_.get(), &speech_buffer_[i * samples_per_10ms_frame], @@ -202,7 +200,7 @@ AudioEncoder::EncodedInfo AudioEncoderCng::EncodePassive( force_sid = false; } } - info.encoded_timestamp = first_timestamp_in_buffer_; + info.encoded_timestamp = rtp_timestamps_.front(); info.payload_type = cng_payload_type_; info.send_even_if_empty = true; info.speech = false; @@ -210,15 +208,18 @@ AudioEncoder::EncodedInfo AudioEncoderCng::EncodePassive( } AudioEncoder::EncodedInfo AudioEncoderCng::EncodeActive( + int frames_to_encode, size_t max_encoded_bytes, uint8_t* encoded) { const size_t samples_per_10ms_frame = SamplesPer10msFrame(); AudioEncoder::EncodedInfo info; - for (int i = 0; i < frames_in_buffer_; ++i) { + for (int i = 0; i < frames_to_encode; ++i) { info = speech_encoder_->Encode( - first_timestamp_in_buffer_, &speech_buffer_[i * samples_per_10ms_frame], + rtp_timestamps_.front(), &speech_buffer_[i * samples_per_10ms_frame], samples_per_10ms_frame, max_encoded_bytes, encoded); - if (i < frames_in_buffer_ - 1) { + if (i == frames_to_encode - 1) { + CHECK_GT(info.encoded_bytes, 0u) << "Encoder didn't deliver data."; + } else { CHECK_EQ(info.encoded_bytes, 0u) << "Encoder delivered data too early."; } } diff --git a/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc b/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc index a31f0deb1..8135b9871 100644 --- a/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc +++ b/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc @@ -80,6 +80,21 @@ class AudioEncoderCngTest : public ::testing::Test { timestamp_ += num_audio_samples_10ms_; } + // Expect |num_calls| calls to the encoder, all successful. The last call + // claims to have encoded |kMockMaxEncodedBytes| bytes, and all the preceding + // ones 0 bytes. + void ExpectEncodeCalls(int num_calls) { + InSequence s; + AudioEncoder::EncodedInfo info; + for (int j = 0; j < num_calls - 1; ++j) { + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _)) + .WillOnce(Return(info)); + } + info.encoded_bytes = kMockReturnEncodedBytes; + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _)) + .WillOnce(Return(info)); + } + // Verifies that the cng_ object waits until it has collected // |blocks_per_frame| blocks of audio, and then dispatches all of them to // the underlying codec (speech or cng). @@ -96,20 +111,8 @@ class AudioEncoderCngTest : public ::testing::Test { Encode(); EXPECT_EQ(0u, encoded_info_.encoded_bytes); } - if (active_speech) { - // Now expect |blocks_per_frame| calls to the encoder in sequence. - // Let the speech codec mock return true and set the number of encoded - // bytes to |kMockReturnEncodedBytes|. - InSequence s; - AudioEncoder::EncodedInfo info; - for (int j = 0; j < blocks_per_frame - 1; ++j) { - EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _)) - .WillOnce(Return(info)); - } - info.encoded_bytes = kMockReturnEncodedBytes; - EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _)) - .WillOnce(Return(info)); - } + if (active_speech) + ExpectEncodeCalls(blocks_per_frame); Encode(); if (active_speech) { EXPECT_EQ(kMockReturnEncodedBytes, encoded_info_.encoded_bytes); @@ -283,23 +286,17 @@ TEST_F(AudioEncoderCngTest, MixedActivePassive) { CreateCng(); // All of the frame is active speech. - EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _)) - .Times(6) - .WillRepeatedly(Return(AudioEncoder::EncodedInfo())); + ExpectEncodeCalls(6); EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kActive)); EXPECT_TRUE(encoded_info_.speech); // First half of the frame is active speech. - EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _)) - .Times(6) - .WillRepeatedly(Return(AudioEncoder::EncodedInfo())); + ExpectEncodeCalls(6); EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kPassive)); EXPECT_TRUE(encoded_info_.speech); // Second half of the frame is active speech. - EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _)) - .Times(6) - .WillRepeatedly(Return(AudioEncoder::EncodedInfo())); + ExpectEncodeCalls(6); EXPECT_TRUE(CheckMixedActivePassive(Vad::kPassive, Vad::kActive)); EXPECT_TRUE(encoded_info_.speech); diff --git a/webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h b/webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h index 831758b8d..094b73074 100644 --- a/webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h +++ b/webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h @@ -66,16 +66,19 @@ class AudioEncoderCng final : public AudioEncoder { inline void operator()(CNG_enc_inst* ptr) const { WebRtcCng_FreeEnc(ptr); } }; - EncodedInfo EncodePassive(size_t max_encoded_bytes, uint8_t* encoded); - EncodedInfo EncodeActive(size_t max_encoded_bytes, uint8_t* encoded); + EncodedInfo EncodePassive(int frames_to_encode, + size_t max_encoded_bytes, + uint8_t* encoded); + EncodedInfo EncodeActive(int frames_to_encode, + size_t max_encoded_bytes, + uint8_t* encoded); size_t SamplesPer10msFrame() const; AudioEncoder* speech_encoder_; const int cng_payload_type_; const int num_cng_coefficients_; std::vector speech_buffer_; - uint32_t first_timestamp_in_buffer_; - int frames_in_buffer_; + std::vector rtp_timestamps_; bool last_frame_active_; rtc::scoped_ptr vad_; rtc::scoped_ptr cng_inst_;