Fix for glitches in ACM when switching desired output sample rate

The problem was that if the output sample rate is changed such from one where no resampling is needed to a rate that requires resampling, the first output from the resampler will contain an onset period. The solution provided in this CL is to keep a copy of the last output frame in ACM, and if the resampler is engaged, it will be primed with this old frame before resampling the current frame. BUG=3919 R=bjornv@webrtc.org, turaj@webrtc.org Review URL: https://webrtc-codereview.appspot.com/27729004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7479 4adac7df-926f-26a2-2b94-8c16560cd09d
2014-10-21 06:54:23 +00:00
parent a8c0edd29f
commit 913f7b8d5e
3 changed files with 62 additions and 57 deletions
--- a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
+++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
@@ -122,11 +122,14 @@ AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config)
      last_audio_decoder_(-1),  // Invalid value.
      previous_audio_activity_(AudioFrame::kVadPassive),
      current_sample_rate_hz_(config.neteq_config.sample_rate_hz),
+      audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
+      last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
      nack_(),
      nack_enabled_(false),
      neteq_(NetEq::Create(config.neteq_config)),
      vad_enabled_(true),
      clock_(config.clock),
+      resampled_last_output_frame_(true),
      av_sync_(false),
      initial_delay_manager_(),
      missing_packets_sync_stream_(),
@@ -143,6 +146,9 @@ AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config)
    neteq_->EnableVad();
  else
    neteq_->DisableVad();
+
+  memset(audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples);
+  memset(last_audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples);
 }

 AcmReceiver::~AcmReceiver() {
@@ -342,7 +348,6 @@ int AcmReceiver::InsertPacket(const WebRtcRTPHeader& rtp_header,

 int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
  enum NetEqOutputType type;
-  int16_t* ptr_audio_buffer = audio_frame->data_;
  int samples_per_channel;
  int num_channels;
  bool return_silence = false;
@@ -359,18 +364,6 @@ int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
      initial_delay_manager_->LatePackets(timestamp_now,
                                          late_packets_sync_stream_.get());
    }
-
-    if (!return_silence) {
-      // This is our initial guess regarding whether a resampling will be
-      // required. It is based on previous sample rate of netEq. Most often,
-      // this is a correct guess, however, in case that incoming payload changes
-      // the resampling might might be needed. By doing so, we avoid an
-      // unnecessary memcpy().
-      if (desired_freq_hz != -1 &&
-          current_sample_rate_hz_ != desired_freq_hz) {
-        ptr_audio_buffer = audio_buffer_;
-      }
-    }
  }

  // If |late_packets_sync_stream_| is allocated then we have been in AV-sync
@@ -381,17 +374,19 @@ int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
      return 0;
  }

+  // Accessing members, take the lock.
+  CriticalSectionScoped lock(crit_sect_.get());
+
+  // Always write the output to |audio_buffer_| first.
  if (neteq_->GetAudio(AudioFrame::kMaxDataSizeSamples,
-                       ptr_audio_buffer,
+                       audio_buffer_.get(),
                       &samples_per_channel,
-                       &num_channels, &type) != NetEq::kOK) {
+                       &num_channels,
+                       &type) != NetEq::kOK) {
    LOG_FERR0(LS_ERROR, "AcmReceiver::GetAudio") << "NetEq Failed.";
    return -1;
  }

-  // Accessing members, take the lock.
-  CriticalSectionScoped lock(crit_sect_.get());
-
  // Update NACK.
  int decoded_sequence_num = 0;
  uint32_t decoded_timestamp = 0;
@@ -409,45 +404,53 @@ int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
  bool need_resampling = (desired_freq_hz != -1) &&
      (current_sample_rate_hz_ != desired_freq_hz);

-  if (ptr_audio_buffer == audio_buffer_) {
-    // Data is written to local buffer.
-    if (need_resampling) {
-      samples_per_channel =
-          resampler_.Resample10Msec(audio_buffer_,
-                                    current_sample_rate_hz_,
-                                    desired_freq_hz,
-                                    num_channels,
-                                    AudioFrame::kMaxDataSizeSamples,
-                                    audio_frame->data_);
-      if (samples_per_channel < 0) {
-        LOG_FERR0(LS_ERROR, "AcmReceiver::GetAudio") << "Resampler Failed.";
-        return -1;
-      }
-    } else {
-      // We might end up here ONLY if codec is changed.
-      memcpy(audio_frame->data_, audio_buffer_, samples_per_channel *
-             num_channels * sizeof(int16_t));
-    }
-  } else {
-    // Data is written into |audio_frame|.
-    if (need_resampling) {
-      // We might end up here ONLY if codec is changed.
-      samples_per_channel =
-          resampler_.Resample10Msec(audio_frame->data_,
-                                    current_sample_rate_hz_,
-                                    desired_freq_hz,
-                                    num_channels,
-                                    AudioFrame::kMaxDataSizeSamples,
-                                    audio_buffer_);
-      if (samples_per_channel < 0) {
-        LOG_FERR0(LS_ERROR, "AcmReceiver::GetAudio") << "Resampler Failed.";
-        return -1;
-      }
-      memcpy(audio_frame->data_, audio_buffer_, samples_per_channel *
-             num_channels * sizeof(int16_t));
+  if (need_resampling && !resampled_last_output_frame_) {
+    // Prime the resampler with the last frame.
+    int16_t temp_output[AudioFrame::kMaxDataSizeSamples];
+    samples_per_channel =
+        resampler_.Resample10Msec(last_audio_buffer_.get(),
+                                  current_sample_rate_hz_,
+                                  desired_freq_hz,
+                                  num_channels,
+                                  AudioFrame::kMaxDataSizeSamples,
+                                  temp_output);
+    if (samples_per_channel < 0) {
+      LOG_FERR0(LS_ERROR, "AcmReceiver::GetAudio")
+          << "Resampling last_audio_buffer_ failed.";
+      return -1;
    }
  }

+  // The audio in |audio_buffer_| is tansferred to |audio_frame_| below, either
+  // through resampling, or through straight memcpy.
+  // TODO(henrik.lundin) Glitches in the output may appear if the output rate
+  // from NetEq changes. See WebRTC issue 3923.
+  if (need_resampling) {
+    samples_per_channel =
+        resampler_.Resample10Msec(audio_buffer_.get(),
+                                  current_sample_rate_hz_,
+                                  desired_freq_hz,
+                                  num_channels,
+                                  AudioFrame::kMaxDataSizeSamples,
+                                  audio_frame->data_);
+    if (samples_per_channel < 0) {
+      LOG_FERR0(LS_ERROR, "AcmReceiver::GetAudio")
+          << "Resampling audio_buffer_ failed.";
+      return -1;
+    }
+    resampled_last_output_frame_ = true;
+  } else {
+    resampled_last_output_frame_ = false;
+    // We might end up here ONLY if codec is changed.
+    memcpy(audio_frame->data_,
+           audio_buffer_.get(),
+           samples_per_channel * num_channels * sizeof(int16_t));
+  }
+
+  // Swap buffers, so that the current audio is stored in |last_audio_buffer_|
+  // for next time.
+  audio_buffer_.swap(last_audio_buffer_);
+
  audio_frame->num_channels_ = num_channels;
  audio_frame->samples_per_channel_ = samples_per_channel;
  audio_frame->sample_rate_hz_ = samples_per_channel * 100;
--- a/webrtc/modules/audio_coding/main/acm2/acm_receiver.h
+++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver.h
@@ -334,7 +334,8 @@ class AcmReceiver {
  ACMResampler resampler_ GUARDED_BY(crit_sect_);
  // Used in GetAudio, declared as member to avoid allocating every 10ms.
  // TODO(henrik.lundin) Stack-allocate in GetAudio instead?
-  int16_t audio_buffer_[AudioFrame::kMaxDataSizeSamples] GUARDED_BY(crit_sect_);
+  scoped_ptr<int16_t[]> audio_buffer_ GUARDED_BY(crit_sect_);
+  scoped_ptr<int16_t[]> last_audio_buffer_ GUARDED_BY(crit_sect_);
  scoped_ptr<Nack> nack_ GUARDED_BY(crit_sect_);
  bool nack_enabled_ GUARDED_BY(crit_sect_);
  CallStatistics call_stats_ GUARDED_BY(crit_sect_);
@@ -342,6 +343,7 @@ class AcmReceiver {
  Decoder decoders_[ACMCodecDB::kMaxNumCodecs];
  bool vad_enabled_;
  Clock* clock_;  // TODO(henrik.lundin) Make const if possible.
+  bool resampled_last_output_frame_ GUARDED_BY(crit_sect_);

  // Indicates if a non-zero initial delay is set, and the receiver is in
  // AV-sync mode.
--- a/webrtc/modules/audio_coding/main/acm2/audio_coding_module_unittest_oldapi.cc
+++ b/webrtc/modules/audio_coding/main/acm2/audio_coding_module_unittest_oldapi.cc
@@ -1034,7 +1034,7 @@ TEST_F(AcmSwitchingOutputFrequencyOldApi, TestWithoutToggling) {
  Run(16000, 16000, 1000);
 }

-TEST_F(AcmSwitchingOutputFrequencyOldApi, DISABLED_Toggle16KhzTo32Khz) {
+TEST_F(AcmSwitchingOutputFrequencyOldApi, Toggle16KhzTo32Khz) {
  Run(16000, 32000, 1000);
 }

@@ -1042,7 +1042,7 @@ TEST_F(AcmSwitchingOutputFrequencyOldApi, Toggle32KhzTo16Khz) {
  Run(32000, 16000, 1000);
 }

-TEST_F(AcmSwitchingOutputFrequencyOldApi, DISABLED_Toggle16KhzTo8Khz) {
+TEST_F(AcmSwitchingOutputFrequencyOldApi, Toggle16KhzTo8Khz) {
  Run(16000, 8000, 1000);
 }