diff --git a/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc b/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc
index 58fd24f53..9b23607f0 100644
--- a/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc
+++ b/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc
@@ -50,8 +50,6 @@ AudioEncoderCng::AudioEncoderCng(const Config& config)
     : speech_encoder_(config.speech_encoder),
       cng_payload_type_(config.payload_type),
       num_cng_coefficients_(config.num_cng_coefficients),
-      first_timestamp_in_buffer_(0),
-      frames_in_buffer_(0),
       last_frame_active_(true),
       vad_(new Vad(config.vad_mode)) {
   if (config.vad) {
@@ -115,35 +113,31 @@ AudioEncoder::EncodedInfo AudioEncoderCng::EncodeInternal(
     size_t max_encoded_bytes,
     uint8_t* encoded) {
   CHECK_GE(max_encoded_bytes, static_cast<size_t>(num_cng_coefficients_ + 1));
-  const int num_samples = SampleRateHz() / 100 * NumChannels();
-  if (speech_buffer_.empty()) {
-    CHECK_EQ(frames_in_buffer_, 0);
-    first_timestamp_in_buffer_ = rtp_timestamp;
-  }
-  for (int i = 0; i < num_samples; ++i) {
+  const size_t samples_per_10ms_frame = SamplesPer10msFrame();
+  CHECK_EQ(speech_buffer_.size(),
+           rtp_timestamps_.size() * samples_per_10ms_frame);
+  rtp_timestamps_.push_back(rtp_timestamp);
+  for (size_t i = 0; i < samples_per_10ms_frame; ++i) {
     speech_buffer_.push_back(audio[i]);
   }
-  ++frames_in_buffer_;
-  if (frames_in_buffer_ < speech_encoder_->Num10MsFramesInNextPacket()) {
+  const int frames_to_encode = speech_encoder_->Num10MsFramesInNextPacket();
+  if (rtp_timestamps_.size() < static_cast<size_t>(frames_to_encode)) {
     return EncodedInfo();
   }
-  CHECK_LE(frames_in_buffer_ * 10, kMaxFrameSizeMs)
+  CHECK_LE(frames_to_encode * 10, kMaxFrameSizeMs)
       << "Frame size cannot be larger than " << kMaxFrameSizeMs
       << " ms when using VAD/CNG.";
-  const size_t samples_per_10ms_frame = 10 * SampleRateHz() / 1000;
-  CHECK_EQ(speech_buffer_.size(),
-           static_cast<size_t>(frames_in_buffer_) * samples_per_10ms_frame);
 
   // Group several 10 ms blocks per VAD call. Call VAD once or twice using the
   // following split sizes:
   // 10 ms = 10 + 0 ms; 20 ms = 20 + 0 ms; 30 ms = 30 + 0 ms;
   // 40 ms = 20 + 20 ms; 50 ms = 30 + 20 ms; 60 ms = 30 + 30 ms.
   int blocks_in_first_vad_call =
-      (frames_in_buffer_ > 3 ? 3 : frames_in_buffer_);
-  if (frames_in_buffer_ == 4)
+      (frames_to_encode > 3 ? 3 : frames_to_encode);
+  if (frames_to_encode == 4)
     blocks_in_first_vad_call = 2;
   const int blocks_in_second_vad_call =
-      frames_in_buffer_ - blocks_in_first_vad_call;
+      frames_to_encode - blocks_in_first_vad_call;
   CHECK_GE(blocks_in_second_vad_call, 0);
 
   // Check if all of the buffer is passive speech. Start with checking the first
@@ -161,12 +155,12 @@ AudioEncoder::EncodedInfo AudioEncoderCng::EncodeInternal(
   EncodedInfo info;
   switch (activity) {
     case Vad::kPassive: {
-      info = EncodePassive(max_encoded_bytes, encoded);
+      info = EncodePassive(frames_to_encode, max_encoded_bytes, encoded);
       last_frame_active_ = false;
       break;
     }
     case Vad::kActive: {
-      info = EncodeActive(max_encoded_bytes, encoded);
+      info = EncodeActive(frames_to_encode, max_encoded_bytes, encoded);
       last_frame_active_ = true;
       break;
     }
@@ -176,20 +170,24 @@ AudioEncoder::EncodedInfo AudioEncoderCng::EncodeInternal(
     }
   }
 
-  speech_buffer_.clear();
-  frames_in_buffer_ = 0;
+  speech_buffer_.erase(
+      speech_buffer_.begin(),
+      speech_buffer_.begin() + frames_to_encode * samples_per_10ms_frame);
+  rtp_timestamps_.erase(rtp_timestamps_.begin(),
+                        rtp_timestamps_.begin() + frames_to_encode);
   return info;
 }
 
 AudioEncoder::EncodedInfo AudioEncoderCng::EncodePassive(
+    int frames_to_encode,
     size_t max_encoded_bytes,
     uint8_t* encoded) {
   bool force_sid = last_frame_active_;
   bool output_produced = false;
   const size_t samples_per_10ms_frame = SamplesPer10msFrame();
-  CHECK_GE(max_encoded_bytes, frames_in_buffer_ * samples_per_10ms_frame);
+  CHECK_GE(max_encoded_bytes, frames_to_encode * samples_per_10ms_frame);
   AudioEncoder::EncodedInfo info;
-  for (int i = 0; i < frames_in_buffer_; ++i) {
+  for (int i = 0; i < frames_to_encode; ++i) {
     int16_t encoded_bytes_tmp = 0;
     CHECK_GE(WebRtcCng_Encode(cng_inst_.get(),
                               &speech_buffer_[i * samples_per_10ms_frame],
@@ -202,7 +200,7 @@ AudioEncoder::EncodedInfo AudioEncoderCng::EncodePassive(
       force_sid = false;
     }
   }
-  info.encoded_timestamp = first_timestamp_in_buffer_;
+  info.encoded_timestamp = rtp_timestamps_.front();
   info.payload_type = cng_payload_type_;
   info.send_even_if_empty = true;
   info.speech = false;
@@ -210,15 +208,18 @@ AudioEncoder::EncodedInfo AudioEncoderCng::EncodePassive(
 }
 
 AudioEncoder::EncodedInfo AudioEncoderCng::EncodeActive(
+    int frames_to_encode,
     size_t max_encoded_bytes,
     uint8_t* encoded) {
   const size_t samples_per_10ms_frame = SamplesPer10msFrame();
   AudioEncoder::EncodedInfo info;
-  for (int i = 0; i < frames_in_buffer_; ++i) {
+  for (int i = 0; i < frames_to_encode; ++i) {
     info = speech_encoder_->Encode(
-        first_timestamp_in_buffer_, &speech_buffer_[i * samples_per_10ms_frame],
+        rtp_timestamps_.front(), &speech_buffer_[i * samples_per_10ms_frame],
         samples_per_10ms_frame, max_encoded_bytes, encoded);
-    if (i < frames_in_buffer_ - 1) {
+    if (i == frames_to_encode - 1) {
+      CHECK_GT(info.encoded_bytes, 0u) << "Encoder didn't deliver data.";
+    } else {
       CHECK_EQ(info.encoded_bytes, 0u) << "Encoder delivered data too early.";
     }
   }
diff --git a/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc b/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc
index a31f0deb1..8135b9871 100644
--- a/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc
+++ b/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc
@@ -80,6 +80,21 @@ class AudioEncoderCngTest : public ::testing::Test {
     timestamp_ += num_audio_samples_10ms_;
   }
 
+  // Expect |num_calls| calls to the encoder, all successful. The last call
+  // claims to have encoded |kMockMaxEncodedBytes| bytes, and all the preceding
+  // ones 0 bytes.
+  void ExpectEncodeCalls(int num_calls) {
+    InSequence s;
+    AudioEncoder::EncodedInfo info;
+    for (int j = 0; j < num_calls - 1; ++j) {
+      EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _))
+          .WillOnce(Return(info));
+    }
+    info.encoded_bytes = kMockReturnEncodedBytes;
+    EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _))
+        .WillOnce(Return(info));
+  }
+
   // Verifies that the cng_ object waits until it has collected
   // |blocks_per_frame| blocks of audio, and then dispatches all of them to
   // the underlying codec (speech or cng).
@@ -96,20 +111,8 @@ class AudioEncoderCngTest : public ::testing::Test {
       Encode();
       EXPECT_EQ(0u, encoded_info_.encoded_bytes);
     }
-    if (active_speech) {
-      // Now expect |blocks_per_frame| calls to the encoder in sequence.
-      // Let the speech codec mock return true and set the number of encoded
-      // bytes to |kMockReturnEncodedBytes|.
-      InSequence s;
-      AudioEncoder::EncodedInfo info;
-      for (int j = 0; j < blocks_per_frame - 1; ++j) {
-        EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _))
-            .WillOnce(Return(info));
-      }
-      info.encoded_bytes = kMockReturnEncodedBytes;
-      EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _))
-          .WillOnce(Return(info));
-    }
+    if (active_speech)
+      ExpectEncodeCalls(blocks_per_frame);
     Encode();
     if (active_speech) {
       EXPECT_EQ(kMockReturnEncodedBytes, encoded_info_.encoded_bytes);
@@ -283,23 +286,17 @@ TEST_F(AudioEncoderCngTest, MixedActivePassive) {
   CreateCng();
 
   // All of the frame is active speech.
-  EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _))
-      .Times(6)
-      .WillRepeatedly(Return(AudioEncoder::EncodedInfo()));
+  ExpectEncodeCalls(6);
   EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kActive));
   EXPECT_TRUE(encoded_info_.speech);
 
   // First half of the frame is active speech.
-  EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _))
-      .Times(6)
-      .WillRepeatedly(Return(AudioEncoder::EncodedInfo()));
+  ExpectEncodeCalls(6);
   EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kPassive));
   EXPECT_TRUE(encoded_info_.speech);
 
   // Second half of the frame is active speech.
-  EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _))
-      .Times(6)
-      .WillRepeatedly(Return(AudioEncoder::EncodedInfo()));
+  ExpectEncodeCalls(6);
   EXPECT_TRUE(CheckMixedActivePassive(Vad::kPassive, Vad::kActive));
   EXPECT_TRUE(encoded_info_.speech);
 
diff --git a/webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h b/webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h
index 831758b8d..094b73074 100644
--- a/webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h
+++ b/webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h
@@ -66,16 +66,19 @@ class AudioEncoderCng final : public AudioEncoder {
     inline void operator()(CNG_enc_inst* ptr) const { WebRtcCng_FreeEnc(ptr); }
   };
 
-  EncodedInfo EncodePassive(size_t max_encoded_bytes, uint8_t* encoded);
-  EncodedInfo EncodeActive(size_t max_encoded_bytes, uint8_t* encoded);
+  EncodedInfo EncodePassive(int frames_to_encode,
+                            size_t max_encoded_bytes,
+                            uint8_t* encoded);
+  EncodedInfo EncodeActive(int frames_to_encode,
+                           size_t max_encoded_bytes,
+                           uint8_t* encoded);
   size_t SamplesPer10msFrame() const;
 
   AudioEncoder* speech_encoder_;
   const int cng_payload_type_;
   const int num_cng_coefficients_;
   std::vector<int16_t> speech_buffer_;
-  uint32_t first_timestamp_in_buffer_;
-  int frames_in_buffer_;
+  std::vector<uint32_t> rtp_timestamps_;
   bool last_frame_active_;
   rtc::scoped_ptr<Vad> vad_;
   rtc::scoped_ptr<CNG_enc_inst, CngInstDeleter> cng_inst_;