Fix a bug where network freeze during CNG causes delay

Wrote a new NetEq unit test to test a network freeze during comfort noise playout. The network freezes and resumes during the silence period, and then resumes speech. It was verified that the delay increased due to the freeze, and this CL contains a fix for that problem. BUG=2995 R=turaj@webrtc.org Review URL: https://webrtc-codereview.appspot.com/9849004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@5701 4adac7df-926f-26a2-2b94-8c16560cd09d
2014-03-14 12:40:05 +00:00 · 2014-03-14 12:40:05 +00:00 · 24779fe7cc
commit 24779fe7cc
parent 367000fa8d
2 changed files with 142 additions and 17 deletions
--- a/webrtc/modules/audio_coding/neteq4/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq4/neteq_impl.cc
@ -874,9 +874,10 @@ int NetEqImpl::GetDecision(Operations* operation,
    // Because of timestamp peculiarities, we have to "manually" disallow using
    // a CNG packet with the same timestamp as the one that was last played.
    // This can happen when using redundancy and will cause the timing to shift.
-    while (header &&
-        decoder_database_->IsComfortNoise(header->payloadType) &&
-        end_timestamp >= header->timestamp) {
+    while (header && decoder_database_->IsComfortNoise(header->payloadType) &&
+           (end_timestamp >= header->timestamp ||
+            end_timestamp + decision_logic_->generated_noise_samples() >
+                header->timestamp)) {
      // Don't use this packet, discard it.
      if (packet_buffer_->DiscardNextPacket() != PacketBuffer::kOK) {
        assert(false);  // Must be ok by design.
--- a/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc
@ -219,7 +219,12 @@ class NetEqDecodingTest : public ::testing::Test {
                const std::set<uint16_t>& drop_seq_numbers,
                bool expect_seq_no_wrap, bool expect_timestamp_wrap);

-  void LongCngWithClockDrift(double drift_factor);
+  void LongCngWithClockDrift(double drift_factor,
+                             double network_freeze_ms,
+                             bool pull_audio_during_freeze,
+                             int delay_tolerance_ms,
+                             int max_time_to_speech_ms);
+
  void DuplicateCng();

  NetEq* neteq_;
@ -698,7 +703,11 @@ TEST_F(NetEqDecodingTest,
  EXPECT_EQ(110946, network_stats.clockdrift_ppm);
 }

-void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor) {
+void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor,
+                                              double network_freeze_ms,
+                                              bool pull_audio_during_freeze,
+                                              int delay_tolerance_ms,
+                                              int max_time_to_speech_ms) {
  uint16_t seq_no = 0;
  uint32_t timestamp = 0;
  const int kFrameSizeMs = 30;
@ -706,6 +715,8 @@ void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor) {
  const int kPayloadBytes = kSamples * 2;
  double next_input_time_ms = 0.0;
  double t_ms;
+  int out_len;
+  int num_channels;
  NetEqOutputType type;

  // Insert speech for 5 seconds.
@ -723,8 +734,6 @@ void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor) {
      next_input_time_ms += static_cast<double>(kFrameSizeMs) * drift_factor;
    }
    // Pull out data once.
-    int out_len;
-    int num_channels;
    ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
                                  &num_channels, &type));
    ASSERT_EQ(kBlockSize16kHz, out_len);
@ -751,8 +760,6 @@ void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor) {
      next_input_time_ms += static_cast<double>(kCngPeriodMs) * drift_factor;
    }
    // Pull out data once.
-    int out_len;
-    int num_channels;
    ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
                                  &num_channels, &type));
    ASSERT_EQ(kBlockSize16kHz, out_len);
@ -760,7 +767,49 @@ void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor) {

  EXPECT_EQ(kOutputCNG, type);

+  if (network_freeze_ms > 0) {
+    // First keep pulling audio for |network_freeze_ms| without inserting
+    // any data, then insert CNG data corresponding to |network_freeze_ms|
+    // without pulling any output audio.
+    const double loop_end_time = t_ms + network_freeze_ms;
+    for (; t_ms < loop_end_time; t_ms += 10) {
+      // Pull out data once.
+      ASSERT_EQ(0,
+                neteq_->GetAudio(
+                    kMaxBlockSize, out_data_, &out_len, &num_channels, &type));
+      ASSERT_EQ(kBlockSize16kHz, out_len);
+      EXPECT_EQ(kOutputCNG, type);
+    }
+    bool pull_once = pull_audio_during_freeze;
+    // If |pull_once| is true, GetAudio will be called once half-way through
+    // the network recovery period.
+    double pull_time_ms = (t_ms + next_input_time_ms) / 2;
+    while (next_input_time_ms <= t_ms) {
+      if (pull_once && next_input_time_ms >= pull_time_ms) {
+        pull_once = false;
+        // Pull out data once.
+        ASSERT_EQ(
+            0,
+            neteq_->GetAudio(
+                kMaxBlockSize, out_data_, &out_len, &num_channels, &type));
+        ASSERT_EQ(kBlockSize16kHz, out_len);
+        EXPECT_EQ(kOutputCNG, type);
+        t_ms += 10;
+      }
+      // Insert one CNG frame each 100 ms.
+      uint8_t payload[kPayloadBytes];
+      int payload_len;
+      WebRtcRTPHeader rtp_info;
+      PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len);
+      ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, payload_len, 0));
+      ++seq_no;
+      timestamp += kCngPeriodSamples;
+      next_input_time_ms += kCngPeriodMs * drift_factor;
+    }
+  }
+
  // Insert speech again until output type is speech.
+  double speech_restart_time_ms = t_ms;
  while (type != kOutputNormal) {
    // Each turn in this for loop is 10 ms.
    while (next_input_time_ms <= t_ms) {
@ -771,11 +820,9 @@ void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor) {
      ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0));
      ++seq_no;
      timestamp += kSamples;
-      next_input_time_ms += static_cast<double>(kFrameSizeMs) * drift_factor;
+      next_input_time_ms += kFrameSizeMs * drift_factor;
    }
    // Pull out data once.
-    int out_len;
-    int num_channels;
    ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
                                  &num_channels, &type));
    ASSERT_EQ(kBlockSize16kHz, out_len);
@ -783,23 +830,100 @@ void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor) {
    t_ms += 10;
  }

+  // Check that the speech starts again within reasonable time.
+  double time_until_speech_returns_ms = t_ms - speech_restart_time_ms;
+  EXPECT_LT(time_until_speech_returns_ms, max_time_to_speech_ms);
  int32_t delay_after = timestamp - neteq_->PlayoutTimestamp();
  // Compare delay before and after, and make sure it differs less than 20 ms.
-  EXPECT_LE(delay_after, delay_before + 20 * 16);
-  EXPECT_GE(delay_after, delay_before - 20 * 16);
+  EXPECT_LE(delay_after, delay_before + delay_tolerance_ms * 16);
+  EXPECT_GE(delay_after, delay_before - delay_tolerance_ms * 16);
 }

 TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(LongCngWithNegativeClockDrift)) {
  // Apply a clock drift of -25 ms / s (sender faster than receiver).
  const double kDriftFactor = 1000.0 / (1000.0 + 25.0);
-  LongCngWithClockDrift(kDriftFactor);
+  const double kNetworkFreezeTimeMs = 0.0;
+  const bool kGetAudioDuringFreezeRecovery = false;
+  const int kDelayToleranceMs = 20;
+  const int kMaxTimeToSpeechMs = 100;
+  LongCngWithClockDrift(kDriftFactor,
+                        kNetworkFreezeTimeMs,
+                        kGetAudioDuringFreezeRecovery,
+                        kDelayToleranceMs,
+                        kMaxTimeToSpeechMs);
 }

-// TODO(hlundin): Re-enable this test and fix the issues to make it pass.
 TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(LongCngWithPositiveClockDrift)) {
  // Apply a clock drift of +25 ms / s (sender slower than receiver).
  const double kDriftFactor = 1000.0 / (1000.0 - 25.0);
-  LongCngWithClockDrift(kDriftFactor);
+  const double kNetworkFreezeTimeMs = 0.0;
+  const bool kGetAudioDuringFreezeRecovery = false;
+  const int kDelayToleranceMs = 20;
+  const int kMaxTimeToSpeechMs = 100;
+  LongCngWithClockDrift(kDriftFactor,
+                        kNetworkFreezeTimeMs,
+                        kGetAudioDuringFreezeRecovery,
+                        kDelayToleranceMs,
+                        kMaxTimeToSpeechMs);
+}
+
+TEST_F(NetEqDecodingTest,
+       DISABLED_ON_ANDROID(LongCngWithNegativeClockDriftNetworkFreeze)) {
+  // Apply a clock drift of -25 ms / s (sender faster than receiver).
+  const double kDriftFactor = 1000.0 / (1000.0 + 25.0);
+  const double kNetworkFreezeTimeMs = 5000.0;
+  const bool kGetAudioDuringFreezeRecovery = false;
+  const int kDelayToleranceMs = 50;
+  const int kMaxTimeToSpeechMs = 200;
+  LongCngWithClockDrift(kDriftFactor,
+                        kNetworkFreezeTimeMs,
+                        kGetAudioDuringFreezeRecovery,
+                        kDelayToleranceMs,
+                        kMaxTimeToSpeechMs);
+}
+
+TEST_F(NetEqDecodingTest,
+       DISABLED_ON_ANDROID(LongCngWithPositiveClockDriftNetworkFreeze)) {
+  // Apply a clock drift of +25 ms / s (sender slower than receiver).
+  const double kDriftFactor = 1000.0 / (1000.0 - 25.0);
+  const double kNetworkFreezeTimeMs = 5000.0;
+  const bool kGetAudioDuringFreezeRecovery = false;
+  const int kDelayToleranceMs = 20;
+  const int kMaxTimeToSpeechMs = 100;
+  LongCngWithClockDrift(kDriftFactor,
+                        kNetworkFreezeTimeMs,
+                        kGetAudioDuringFreezeRecovery,
+                        kDelayToleranceMs,
+                        kMaxTimeToSpeechMs);
+}
+
+TEST_F(
+    NetEqDecodingTest,
+    DISABLED_ON_ANDROID(LongCngWithPositiveClockDriftNetworkFreezeExtraPull)) {
+  // Apply a clock drift of +25 ms / s (sender slower than receiver).
+  const double kDriftFactor = 1000.0 / (1000.0 - 25.0);
+  const double kNetworkFreezeTimeMs = 5000.0;
+  const bool kGetAudioDuringFreezeRecovery = true;
+  const int kDelayToleranceMs = 20;
+  const int kMaxTimeToSpeechMs = 100;
+  LongCngWithClockDrift(kDriftFactor,
+                        kNetworkFreezeTimeMs,
+                        kGetAudioDuringFreezeRecovery,
+                        kDelayToleranceMs,
+                        kMaxTimeToSpeechMs);
+}
+
+TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(LongCngWithoutClockDrift)) {
+  const double kDriftFactor = 1.0;  // No drift.
+  const double kNetworkFreezeTimeMs = 0.0;
+  const bool kGetAudioDuringFreezeRecovery = false;
+  const int kDelayToleranceMs = 10;
+  const int kMaxTimeToSpeechMs = 50;
+  LongCngWithClockDrift(kDriftFactor,
+                        kNetworkFreezeTimeMs,
+                        kGetAudioDuringFreezeRecovery,
+                        kDelayToleranceMs,
+                        kMaxTimeToSpeechMs);
 }

 TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(UnknownPayloadType)) {