Implement initial delay. This CL allows clients of VoE to set an initial delay. Playout of audio is delayed and the extra playout delay is maintained during the call. While packets are buffered (in NetEq) to acheive the desired delay. ACM will playout silence (zeros). Initial delay has to be set before any packet is pushed into ACM.

TEST=ACM unit test is added, also a manual integration test is writen. Review URL: https://webrtc-codereview.appspot.com/1097009 git-svn-id: http://webrtc.googlecode.com/svn/trunk@3506 4adac7df-926f-26a2-2b94-8c16560cd09d
2013-02-12 21:42:18 +00:00 · 2013-02-12 21:42:18 +00:00 · 6388c3e2fd
commit 6388c3e2fd
parent e6e344a7dc
24 changed files with 839 additions and 85 deletions
--- a/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
+++ b/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
@ -940,6 +940,22 @@ class AudioCodingModule: public Module {
  //
  virtual WebRtc_Word32 NetworkStatistics(
      ACMNetworkStatistics& network_statistics) const = 0;
+
+  //
+  // Set an initial delay for playout.
+  // An initial delay yields ACM playout silence until equivalent of |delay_ms|
+  // audio payload is accumulated in NetEq jitter. Thereafter, ACM pulls audio
+  // from NetEq in its regular fashion, and the given delay is maintained as
+  // "minimum playout delay."
+  //
+  // Input:
+  //   -delay_ms           : delay in milliseconds.
+  //
+  // Return values:
+  //   -1 if failed to set the delay.
+  //    0 if delay is set successfully.
+  //
+  virtual int SetInitialPlayoutDelay(int delay_ms) = 0;
 };

 }  // namespace webrtc
--- a/webrtc/modules/audio_coding/main/source/acm_neteq.cc
+++ b/webrtc/modules/audio_coding/main/source/acm_neteq.cc
@ -45,7 +45,10 @@ ACMNetEQ::ACMNetEQ()
      master_slave_info_(NULL),
      previous_audio_activity_(AudioFrame::kVadUnknown),
      extra_delay_(0),
-      callback_crit_sect_(CriticalSectionWrapper::CreateCriticalSection()) {
+      callback_crit_sect_(CriticalSectionWrapper::CreateCriticalSection()),
+      min_of_max_num_packets_(0),
+      min_of_buffer_size_bytes_(0),
+      per_packet_overhead_bytes_(0) {
  for (int n = 0; n < MAX_NUM_SLAVE_NETEQ + 1; n++) {
    is_initialized_[n] = false;
    ptr_vadinst_[n] = NULL;
@ -215,6 +218,7 @@ WebRtc_Word16 ACMNetEQ::AllocatePacketBufferByIdxSafe(
    const WebRtc_Word16 idx) {
  int max_num_packets;
  int buffer_size_in_bytes;
+  int per_packet_overhead_bytes;

  if (!is_initialized_[idx]) {
    WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
@ -223,12 +227,23 @@ WebRtc_Word16 ACMNetEQ::AllocatePacketBufferByIdxSafe(
  }
  if (WebRtcNetEQ_GetRecommendedBufferSize(inst_[idx], used_codecs,
                                           num_codecs,
-                                           kTCPLargeJitter,
+                                           kTCPXLargeJitter,
                                           &max_num_packets,
-                                           &buffer_size_in_bytes) != 0) {
+                                           &buffer_size_in_bytes,
+                                           &per_packet_overhead_bytes) != 0) {
    LogError("GetRecommendedBufferSize", idx);
    return -1;
  }
+  if (idx == 0) {
+    min_of_buffer_size_bytes_ = buffer_size_in_bytes;
+    min_of_max_num_packets_ = max_num_packets;
+    per_packet_overhead_bytes_ = per_packet_overhead_bytes;
+  } else {
+    min_of_buffer_size_bytes_ = std::min(min_of_buffer_size_bytes_,
+                                        buffer_size_in_bytes);
+    min_of_max_num_packets_ = std::min(min_of_max_num_packets_,
+                                       max_num_packets);
+  }
  if (neteq_packet_buffer_[idx] != NULL) {
    free(neteq_packet_buffer_[idx]);
    neteq_packet_buffer_[idx] = NULL;
--- a/webrtc/modules/audio_coding/main/source/acm_neteq.h
+++ b/webrtc/modules/audio_coding/main/source/acm_neteq.h
@ -272,6 +272,12 @@ class ACMNetEQ {
  WebRtc_Word16 AddSlave(const WebRtcNetEQDecoder* used_codecs,
                         WebRtc_Word16 num_codecs);

+  void BufferSpec(int& num_packets, int& size_bytes, int& overhead_bytes) {
+    num_packets = min_of_max_num_packets_;
+    size_bytes = min_of_buffer_size_bytes_;
+    overhead_bytes = per_packet_overhead_bytes_;
+  }
+
 private:
  //
  // RTPPack()
@ -339,6 +345,11 @@ class ACMNetEQ {
  WebRtc_Word32 extra_delay_;

  CriticalSectionWrapper* callback_crit_sect_;
+  // Minimum of "max number of packets," among all NetEq instances.
+  int min_of_max_num_packets_;
+  // Minimum of buffer-size among all NetEq instances.
+  int min_of_buffer_size_bytes_;
+  int per_packet_overhead_bytes_;
 };

 }  // namespace webrtc
--- a/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi
+++ b/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi
@ -145,8 +145,25 @@
             '../test/TimedTrace.cc',
             '../test/TwoWayCommunication.cc',
             '../test/utility.cc',
+             '../test/initial_delay_unittest.cc',
          ],
        },
+        {
+          'target_name': 'delay_test',
+          'type': 'executable',
+          'dependencies': [
+            'audio_coding_module',
+            '<(DEPTH)/testing/gtest.gyp:gtest',
+            '<(webrtc_root)/test/test.gyp:test_support_main',
+            '<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers',
+            '<(DEPTH)/third_party/google-gflags/google-gflags.gyp:google-gflags',
+          ],
+          'sources': [
+             '../test/delay_test.cc',
+             '../test/Channel.cc',
+             '../test/PCMFile.cc',
+           ],
+        }, # delay_test
        {
          'target_name': 'audio_coding_unittests',
          'type': 'executable',
--- a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc
+++ b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc
@ -138,7 +138,15 @@ AudioCodingModuleImpl::AudioCodingModuleImpl(const WebRtc_Word32 id)
      last_detected_tone_(kACMToneEnd),
      callback_crit_sect_(CriticalSectionWrapper::CreateCriticalSection()),
      secondary_send_codec_inst_(),
-      secondary_encoder_(NULL) {
+      secondary_encoder_(NULL),
+      initial_delay_ms_(0),
+      num_packets_accumulated_(0),
+      num_bytes_accumulated_(0),
+      accumulated_audio_ms_(0),
+      first_payload_received_(false),
+      last_incoming_send_timestamp_(0),
+      track_neteq_buffer_(false),
+      playout_ts_(0) {

  // Nullify send codec memory, set payload type and set codec name to
  // invalid values.
@ -1612,6 +1620,14 @@ WebRtc_Word32 AudioCodingModuleImpl::InitializeReceiver() {

 // Initialize receiver, resets codec database etc.
 WebRtc_Word32 AudioCodingModuleImpl::InitializeReceiverSafe() {
+  initial_delay_ms_ = 0;
+  num_packets_accumulated_ = 0;
+  num_bytes_accumulated_ = 0;
+  accumulated_audio_ms_ = 0;
+  first_payload_received_ = 0;;
+  last_incoming_send_timestamp_ = 0;
+  track_neteq_buffer_ = false;
+  playout_ts_ = 0;
  // If the receiver is already initialized then we want to destroy any
  // existing decoders. After a call to this function, we should have a clean
  // start-up.
@ -1953,10 +1969,12 @@ WebRtc_Word32 AudioCodingModuleImpl::IncomingPacket(
                 "IncomingPacket() Error, payload-length cannot be negative");
    return -1;
  }
+
  {
    // Store the payload Type. This will be used to retrieve "received codec"
    // and "received frequency."
    CriticalSectionScoped lock(acm_crit_sect_);
+
    WebRtc_UWord8 my_payload_type;

    // Check if this is an RED payload.
@ -1984,9 +2002,32 @@ WebRtc_Word32 AudioCodingModuleImpl::IncomingPacket(
            break;
          }
        }
+        // Codec is changed, there might be a jump in timestamp, therefore,
+        // we have to reset some variables that track NetEq buffer.
+        if (track_neteq_buffer_) {
+          last_incoming_send_timestamp_ = rtp_info.header.timestamp;
+        }
      }
      last_recv_audio_codec_pltype_ = my_payload_type;
    }
+
+    if (track_neteq_buffer_) {
+      const int in_sample_rate_khz =
+          (ACMCodecDB::database_[current_receive_codec_idx_].plfreq / 1000);
+      if (first_payload_received_) {
+        if (rtp_info.header.timestamp > last_incoming_send_timestamp_) {
+          accumulated_audio_ms_ += (rtp_info.header.timestamp -
+              last_incoming_send_timestamp_) / in_sample_rate_khz;
+        }
+      } else {
+        first_payload_received_ = true;
+      }
+      num_packets_accumulated_++;
+      last_incoming_send_timestamp_ = rtp_info.header.timestamp;
+      playout_ts_ = static_cast<uint32_t>(
+          rtp_info.header.timestamp - static_cast<uint32_t>(
+              initial_delay_ms_ * in_sample_rate_khz));
+    }
  }

  // Split the payload for stereo packets, so that first half of payload
@ -2000,6 +2041,9 @@ WebRtc_Word32 AudioCodingModuleImpl::IncomingPacket(
      memcpy(payload, incoming_payload, payload_length);
      codecs_[current_receive_codec_idx_]->SplitStereoPacket(payload, &length);
      rtp_header.type.Audio.channel = 2;
+      if (track_neteq_buffer_)
+        num_bytes_accumulated_ += length / 2;  // Per neteq, half is inserted
+                                               // into master and half to slave.
      // Insert packet into NetEQ.
      return neteq_.RecIn(payload, length, rtp_header);
    } else {
@ -2008,6 +2052,8 @@ WebRtc_Word32 AudioCodingModuleImpl::IncomingPacket(
      return 0;
    }
  } else {
+    if (track_neteq_buffer_)
+      num_bytes_accumulated_ += payload_length;
    return neteq_.RecIn(incoming_payload, payload_length, rtp_header);
  }
 }
@ -2084,11 +2130,14 @@ int AudioCodingModuleImpl::InitStereoSlave() {
 // Minimum playout delay (Used for lip-sync).
 WebRtc_Word32 AudioCodingModuleImpl::SetMinimumPlayoutDelay(
    const WebRtc_Word32 time_ms) {
-  if ((time_ms < 0) || (time_ms > 1000)) {
+  if ((time_ms < 0) || (time_ms > 10000)) {
    WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
-                 "Delay must be in the range of 0-1000 milliseconds.");
+                 "Delay must be in the range of 0-10000 milliseconds.");
    return -1;
  }
+  // Don't let the extra delay modified while accumulating buffers in NetEq.
+  if (track_neteq_buffer_ && first_payload_received_)
+    return 0;
  return neteq_.SetExtraDelay(time_ms);
 }

@ -2177,6 +2226,9 @@ WebRtc_Word32 AudioCodingModuleImpl::PlayoutData10Ms(
    const WebRtc_Word32 desired_freq_hz, AudioFrame& audio_frame) {
  bool stereo_mode;

+  if (GetSilence(desired_freq_hz, &audio_frame))
+     return 0;  // Silence is generated, return.
+
  // RecOut always returns 10 ms.
  if (neteq_.RecOut(audio_frame_) != 0) {
    WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
@ -2612,7 +2664,12 @@ WebRtc_Word32 AudioCodingModuleImpl::PlayoutTimestamp(
    WebRtc_UWord32& timestamp) {
  WEBRTC_TRACE(webrtc::kTraceStream, webrtc::kTraceAudioCoding, id_,
               "PlayoutTimestamp()");
-  return neteq_.PlayoutTimestamp(timestamp);
+  if (track_neteq_buffer_) {
+    timestamp = playout_ts_;
+    return 0;
+  } else {
+    return neteq_.PlayoutTimestamp(timestamp);
+  }
 }

 bool AudioCodingModuleImpl::HaveValidEncoder(const char* caller_name) const {
@ -2757,4 +2814,74 @@ void AudioCodingModuleImpl::ResetFragmentation(int vector_size) {
      static_cast<WebRtc_UWord16>(vector_size);
 }

+int AudioCodingModuleImpl::SetInitialPlayoutDelay(int delay_ms) {
+  if (delay_ms < 0 || delay_ms > 10000) {
+    return -1;
+  }
+
+  CriticalSectionScoped lock(acm_crit_sect_);
+
+  // Receiver should be initialized before this call processed.
+  if (!receiver_initialized_) {
+    InitializeReceiverSafe();
+  }
+
+  if (first_payload_received_) {
+    // Too late for this API. Only works before a call is started.
+    return -1;
+  }
+  initial_delay_ms_ = delay_ms;
+  track_neteq_buffer_ = true;
+  return neteq_.SetExtraDelay(delay_ms);
+}
+
+bool AudioCodingModuleImpl::GetSilence(int desired_sample_rate_hz,
+                                       AudioFrame* frame) {
+  CriticalSectionScoped lock(acm_crit_sect_);
+  if (initial_delay_ms_ == 0 || accumulated_audio_ms_ >= initial_delay_ms_) {
+    track_neteq_buffer_ = false;
+    return false;
+  }
+
+  // We stop accumulating packets, if the number of packets or the total size
+  // exceeds a threshold.
+  int max_num_packets;
+  int buffer_size_bytes;
+  int per_payload_overhead_bytes;
+  neteq_.BufferSpec(max_num_packets, buffer_size_bytes,
+                     per_payload_overhead_bytes);
+  int total_bytes_accumulated = num_bytes_accumulated_ +
+      num_packets_accumulated_ * per_payload_overhead_bytes;
+  if (num_packets_accumulated_ > max_num_packets * 0.9 ||
+      total_bytes_accumulated > buffer_size_bytes * 0.9) {
+    WEBRTC_TRACE(webrtc::kTraceWarning, webrtc::kTraceAudioCoding, id_,
+                 "GetSilence: Initial delay couldn't be achieved."
+                 " num_packets_accumulated=%d, total_bytes_accumulated=%d",
+                 num_packets_accumulated_, num_bytes_accumulated_);
+    track_neteq_buffer_ = false;
+    return false;
+  }
+
+  if (desired_sample_rate_hz > 0) {
+    frame->sample_rate_hz_ = desired_sample_rate_hz;
+  } else {
+    frame->sample_rate_hz_ = 0;
+    if (current_receive_codec_idx_ >= 0) {
+      frame->sample_rate_hz_ =
+          ACMCodecDB::database_[current_receive_codec_idx_].plfreq;
+    } else {
+      // No payload received yet, use the default sampling rate of NetEq.
+      frame->sample_rate_hz_ = neteq_.CurrentSampFreqHz();
+    }
+  }
+  frame->num_channels_ = expected_channels_;
+  frame->samples_per_channel_ = frame->sample_rate_hz_ / 100;  // Always 10 ms.
+  frame->speech_type_ = AudioFrame::kCNG;
+  frame->vad_activity_ = AudioFrame::kVadPassive;
+  frame->energy_ = 0;
+  int samples = frame->samples_per_channel_ * frame->num_channels_;
+  memset(frame->data_, 0, samples * sizeof(int16_t));
+  return true;
+}
+
 }  // namespace webrtc
--- a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h
+++ b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h
@ -282,6 +282,14 @@ class AudioCodingModuleImpl : public AudioCodingModule {
  int PreprocessToAddData(const AudioFrame& in_frame,
                          const AudioFrame** ptr_out);

+  // Set initial playout delay.
+  //  -delay_ms: delay in millisecond.
+  //
+  // Return value:
+  //  -1: if cannot set the delay.
+  //   0: if delay set successfully.
+  int SetInitialPlayoutDelay(int delay_ms);
+
 private:
  // Change required states after starting to receive the codec corresponding
  // to |index|.
@ -302,6 +310,8 @@ class AudioCodingModuleImpl : public AudioCodingModule {

  void ResetFragmentation(int vector_size);

+  bool GetSilence(int desired_sample_rate_hz, AudioFrame* frame);
+
  AudioPacketizationCallback* packetization_callback_;
  WebRtc_Word32 id_;
  WebRtc_UWord32 last_timestamp_;
@ -375,6 +385,16 @@ class AudioCodingModuleImpl : public AudioCodingModule {
  AudioFrame preprocess_frame_;
  CodecInst secondary_send_codec_inst_;
  scoped_ptr<ACMGenericCodec> secondary_encoder_;
+
+  // Initial delay.
+  int initial_delay_ms_;
+  int num_packets_accumulated_;
+  int num_bytes_accumulated_;
+  int accumulated_audio_ms_;
+  int first_payload_received_;
+  uint32_t last_incoming_send_timestamp_;
+  bool track_neteq_buffer_;
+  uint32_t playout_ts_;
 };

 }  // namespace webrtc
--- a/webrtc/modules/audio_coding/main/test/delay_test.cc
+++ b/webrtc/modules/audio_coding/main/test/delay_test.cc
@ -0,0 +1,268 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
+
+#include <math.h>
+
+#include <cassert>
+#include <iostream>
+
+#include "gflags/gflags.h"
+#include "gtest/gtest.h"
+#include "testsupport/fileutils.h"
+#include "webrtc/common_types.h"
+#include "webrtc/engine_configurations.h"
+#include "webrtc/modules/audio_coding/main/interface/audio_coding_module_typedefs.h"
+#include "webrtc/modules/audio_coding/main/test/Channel.h"
+#include "webrtc/modules/audio_coding/main/test/PCMFile.h"
+#include "webrtc/modules/audio_coding/main/test/utility.h"
+#include "webrtc/system_wrappers/interface/event_wrapper.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+DEFINE_string(codec, "isac", "Codec Name");
+DEFINE_int32(sample_rate_hz, 16000, "Sampling rate in Hertz.");
+DEFINE_int32(num_channels, 1, "Number of Channels.");
+DEFINE_string(input_file, "", "Input file, PCM16 32 kHz, optional.");
+DEFINE_int32(delay, 0, "Delay in millisecond.");
+DEFINE_int32(init_delay, 0, "Initial delay in millisecond.");
+DEFINE_bool(dtx, false, "Enable DTX at the sender side.");
+
+namespace webrtc {
+
+namespace {
+
+struct CodecConfig {
+  char name[50];
+  int sample_rate_hz;
+  int num_channels;
+};
+
+struct AcmConfig {
+  bool dtx;
+  bool fec;
+};
+
+struct Config {
+  CodecConfig codec;
+  AcmConfig acm;
+  bool packet_loss;
+};
+
+}
+
+class DelayTest {
+ public:
+
+  DelayTest()
+     : acm_a_(NULL),
+       acm_b_(NULL),
+       channel_a2b_(NULL),
+       test_cntr_(0),
+       encoding_sample_rate_hz_(8000) {
+  }
+
+  ~DelayTest() {}
+
+  void TearDown() {
+    if(acm_a_ != NULL) {
+      AudioCodingModule::Destroy(acm_a_);
+      acm_a_ = NULL;
+    }
+    if(acm_b_ != NULL) {
+      AudioCodingModule::Destroy(acm_b_);
+      acm_b_ = NULL;
+    }
+    if(channel_a2b_ != NULL) {
+      delete channel_a2b_;
+      channel_a2b_ = NULL;
+    }
+  }
+
+  void SetUp() {
+    test_cntr_ = 0;
+    std::string file_name =
+        webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm");
+    if (FLAGS_input_file.size() > 0)
+      file_name = FLAGS_input_file;
+    in_file_a_.Open(file_name, 32000, "rb");
+    acm_a_ = AudioCodingModule::Create(0);
+    acm_b_ = AudioCodingModule::Create(1);
+    acm_a_->InitializeReceiver();
+    acm_b_->InitializeReceiver();
+    if (FLAGS_init_delay > 0) {
+      ASSERT_EQ(0, acm_b_->SetInitialPlayoutDelay(FLAGS_init_delay));
+    }
+
+    if (FLAGS_delay > 0) {
+      ASSERT_EQ(0, acm_b_->SetMinimumPlayoutDelay(FLAGS_delay));
+    }
+
+    WebRtc_UWord8 num_encoders = acm_a_->NumberOfCodecs();
+    CodecInst my_codec_param;
+    for(int n = 0; n < num_encoders; n++) {
+      acm_b_->Codec(n, my_codec_param);
+      if (STR_CASE_CMP(my_codec_param.plname, "opus") == 0)
+        my_codec_param.channels = 1;
+      else if  (my_codec_param.channels > 1)
+        continue;
+      if (STR_CASE_CMP(my_codec_param.plname, "CN") == 0 &&
+          my_codec_param.plfreq == 48000)
+          continue;
+      if (STR_CASE_CMP(my_codec_param.plname, "telephone-event") == 0)
+        continue;
+      acm_b_->RegisterReceiveCodec(my_codec_param);
+    }
+
+    // Create and connect the channel
+    channel_a2b_ = new Channel;
+    acm_a_->RegisterTransportCallback(channel_a2b_);
+    channel_a2b_->RegisterReceiverACM(acm_b_);
+  }
+
+  void Perform(const Config* config, size_t num_tests, int duration_sec,
+               const char* output_prefix) {
+    for (size_t n = 0; n < num_tests; ++n) {
+      ApplyConfig(config[n]);
+      Run(duration_sec, output_prefix);
+    }
+  }
+
+ private:
+
+  void ApplyConfig(const Config& config) {
+    printf("====================================\n");
+    printf("Test %d \n"
+        "Codec: %s, %d kHz, %d channel(s)\n"
+        "ACM: DTX %s, FEC %s\n"
+        "Channel: %s\n",
+        ++test_cntr_,
+        config.codec.name, config.codec.sample_rate_hz,
+        config.codec.num_channels, config.acm.dtx ? "on" : "off",
+        config.acm.fec ? "on" : "off",
+        config.packet_loss ? "with packet-loss" : "no packet-loss");
+    SendCodec(config.codec);
+    ConfigAcm(config.acm);
+    ConfigChannel(config.packet_loss);
+  }
+
+  void SendCodec(const CodecConfig& config) {
+    CodecInst my_codec_param;
+    ASSERT_EQ(0, AudioCodingModule::Codec(config.name, my_codec_param,
+                                          config.sample_rate_hz,
+                                          config.num_channels));
+    encoding_sample_rate_hz_ = my_codec_param.plfreq;
+    ASSERT_EQ(0, acm_a_->RegisterSendCodec(my_codec_param));
+  }
+
+  void ConfigAcm(const AcmConfig& config) {
+    ASSERT_EQ(0, acm_a_->SetVAD(config.dtx, config.dtx, VADAggr));
+    ASSERT_EQ(0, acm_a_->SetFECStatus(config.fec));
+  }
+
+  void ConfigChannel(bool packet_loss) {
+    channel_a2b_->SetFECTestWithPacketLoss(packet_loss);
+  }
+
+  void OpenOutFile(const char* output_id) {
+    std::stringstream file_stream;
+    file_stream << "delay_test_" << FLAGS_codec << "_"
+        << FLAGS_sample_rate_hz << "Hz" << "_"
+        << FLAGS_init_delay << "ms_"
+        << FLAGS_delay << "ms.pcm";
+    std::cout << "Output file: " << file_stream.str() << std::endl <<std::endl;
+    std::string file_name = webrtc::test::OutputPath() + file_stream.str();
+    out_file_b_.Open(file_name.c_str(), 32000, "wb");
+  }
+
+  void Run(int duration_sec, const char* output_prefix) {
+    OpenOutFile(output_prefix);
+    AudioFrame audio_frame;
+    uint32_t out_freq_hz_b = out_file_b_.SamplingFrequency();
+
+    int num_frames = 0;
+    int in_file_frames = 0;
+    uint32_t playout_ts;
+    uint32_t received_ts;
+    double average_delay = 0;
+    double inst_delay_sec = 0;
+    while(num_frames < (duration_sec * 100)) {
+      if (in_file_a_.EndOfFile()) {
+        in_file_a_.Rewind();
+      }
+
+      // Print delay information every 16 frame
+      if ((num_frames & 0x3F) == 0x3F) {
+        ACMNetworkStatistics statistics;
+        acm_b_->NetworkStatistics(statistics);
+        fprintf(stdout, "delay: min=%3d  max=%3d  mean=%3d  median=%3d"
+                " ts-based average = %6.3f, "
+                "curr buff-lev = %4u opt buff-lev = %4u \n",
+                statistics.minWaitingTimeMs,
+                statistics.maxWaitingTimeMs,
+                statistics.meanWaitingTimeMs,
+                statistics.medianWaitingTimeMs,
+                average_delay,
+                statistics.currentBufferSize,
+                statistics.preferredBufferSize);
+        fflush(stdout);
+      }
+
+      in_file_a_.Read10MsData(audio_frame);
+      ASSERT_EQ(0, acm_a_->Add10MsData(audio_frame));
+      ASSERT_LE(0, acm_a_->Process());
+      ASSERT_EQ(0, acm_b_->PlayoutData10Ms(out_freq_hz_b, audio_frame));
+      out_file_b_.Write10MsData(audio_frame.data_,
+                                audio_frame.samples_per_channel_ *
+                                audio_frame.num_channels_);
+      acm_b_->PlayoutTimestamp(playout_ts);
+      received_ts = channel_a2b_->LastInTimestamp();
+      inst_delay_sec = static_cast<uint32_t>(received_ts - playout_ts) /
+          static_cast<double>(encoding_sample_rate_hz_);
+
+      if (num_frames > 10)
+        average_delay = 0.95 * average_delay + 0.05 * inst_delay_sec;
+
+      ++num_frames;
+      ++in_file_frames;
+    }
+    out_file_b_.Close();
+  }
+
+  AudioCodingModule* acm_a_;
+  AudioCodingModule* acm_b_;
+
+  Channel* channel_a2b_;
+
+  PCMFile in_file_a_;
+  PCMFile out_file_b_;
+  int test_cntr_;
+  int encoding_sample_rate_hz_;
+};
+
+} // namespace webrtc
+
+int main(int argc, char* argv[]) {
+
+  google::ParseCommandLineFlags(&argc, &argv, true);
+
+  webrtc::Config config;
+  strcpy(config.codec.name, FLAGS_codec.c_str());
+  config.codec.sample_rate_hz = FLAGS_sample_rate_hz;
+  config.codec.num_channels = FLAGS_num_channels;
+  config.acm.dtx = FLAGS_dtx;
+  config.acm.fec = false;
+  config.packet_loss = false;
+
+  webrtc::DelayTest delay_test;
+  delay_test.SetUp();
+  delay_test.Perform(&config, 1, 240, "delay_test");
+  delay_test.TearDown();
+}
--- a/webrtc/modules/audio_coding/main/test/initial_delay_unittest.cc
+++ b/webrtc/modules/audio_coding/main/test/initial_delay_unittest.cc
@ -0,0 +1,170 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
+
+#include <math.h>
+
+#include <cassert>
+#include <iostream>
+
+#include "gtest/gtest.h"
+#include "testsupport/fileutils.h"
+#include "webrtc/common_types.h"
+#include "webrtc/engine_configurations.h"
+#include "webrtc/modules/audio_coding/main/interface/audio_coding_module_typedefs.h"
+#include "webrtc/modules/audio_coding/main/test/Channel.h"
+#include "webrtc/modules/audio_coding/main/test/PCMFile.h"
+#include "webrtc/modules/audio_coding/main/test/utility.h"
+#include "webrtc/system_wrappers/interface/event_wrapper.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+namespace webrtc {
+
+namespace {
+double FrameRms(AudioFrame& frame) {
+  int samples = frame.num_channels_ * frame.samples_per_channel_;
+  double rms = 0;
+  for (int n = 0; n < samples; ++n)
+    rms += frame.data_[n] * frame.data_[n];
+  rms /= samples;
+  rms = sqrt(rms);
+  return rms;
+}
+
+}
+
+class InitialPlayoutDelayTest  : public ::testing::Test {
+ protected:
+
+  InitialPlayoutDelayTest()
+     : acm_a_(NULL),
+       acm_b_(NULL),
+       channel_a2b_(NULL) {
+  }
+
+  ~InitialPlayoutDelayTest() {}
+
+  void TearDown() {
+    if(acm_a_ != NULL) {
+      AudioCodingModule::Destroy(acm_a_);
+      acm_a_ = NULL;
+    }
+    if(acm_b_ != NULL) {
+      AudioCodingModule::Destroy(acm_b_);
+      acm_b_ = NULL;
+    }
+    if(channel_a2b_ != NULL) {
+      delete channel_a2b_;
+      channel_a2b_ = NULL;
+    }
+  }
+
+  void SetUp() {
+    acm_a_ = AudioCodingModule::Create(0);
+    acm_b_ = AudioCodingModule::Create(1);
+
+    acm_b_->InitializeReceiver();
+    acm_a_->InitializeReceiver();
+
+    // Register all L16 codecs in receiver.
+    CodecInst codec;
+    const int kFsHz[3] = {8000, 16000, 32000};
+    const int kChannels[2] = {1, 2};
+    for (int n = 0; n < 3; ++n) {
+      for (int k = 0; k < 2; ++k) {
+        AudioCodingModule::Codec("L16", codec, kFsHz[n], kChannels[k]);
+        acm_b_->RegisterReceiveCodec(codec);
+      }
+    }
+
+    // Create and connect the channel
+    channel_a2b_ = new Channel;
+    acm_a_->RegisterTransportCallback(channel_a2b_);
+    channel_a2b_->RegisterReceiverACM(acm_b_);
+  }
+
+  void Run(CodecInst codec, int initial_delay_ms) {
+    AudioFrame in_audio_frame;
+    AudioFrame out_audio_frame;
+    int num_frames = 0;
+    const int kAmp = 10000;
+    in_audio_frame.sample_rate_hz_ = codec.plfreq;
+    in_audio_frame.num_channels_ = codec.channels;
+    in_audio_frame.samples_per_channel_ = codec.plfreq / 100;  // 10 ms.
+    int samples = in_audio_frame.num_channels_ *
+        in_audio_frame.samples_per_channel_;
+    for (int n = 0; n < samples; ++n) {
+      in_audio_frame.data_[n] = kAmp;
+    }
+
+    uint32_t timestamp = 0;
+    double rms = 0;
+    acm_a_->RegisterSendCodec(codec);
+    acm_b_->SetInitialPlayoutDelay(initial_delay_ms);
+    while(rms < kAmp / 2) {
+      in_audio_frame.timestamp_ = timestamp;
+      timestamp += in_audio_frame.samples_per_channel_;
+      ASSERT_EQ(0, acm_a_->Add10MsData(in_audio_frame));
+      ASSERT_LE(0, acm_a_->Process());
+      ASSERT_EQ(0, acm_b_->PlayoutData10Ms(codec.plfreq, out_audio_frame));
+      rms = FrameRms(out_audio_frame);
+      ++num_frames;
+    }
+
+    ASSERT_GE(num_frames * 10, initial_delay_ms);
+    ASSERT_LE(num_frames * 10, initial_delay_ms + 100);
+  }
+
+  AudioCodingModule* acm_a_;
+  AudioCodingModule* acm_b_;
+  Channel* channel_a2b_;
+};
+
+
+TEST_F( InitialPlayoutDelayTest, NbMono) {
+  CodecInst codec;
+  AudioCodingModule::Codec("L16", codec, 8000, 1);
+  Run(codec, 3000);
+}
+
+TEST_F( InitialPlayoutDelayTest, WbMono) {
+  CodecInst codec;
+  AudioCodingModule::Codec("L16", codec, 16000, 1);
+  Run(codec, 3000);
+}
+
+TEST_F( InitialPlayoutDelayTest, SwbMono) {
+  CodecInst codec;
+  AudioCodingModule::Codec("L16", codec, 32000, 1);
+  Run(codec, 2000);  // NetEq buffer is not sufficiently large for 3 sec of
+                     // PCM16 super-wideband.
+}
+
+TEST_F( InitialPlayoutDelayTest, NbStereo) {
+  CodecInst codec;
+  AudioCodingModule::Codec("L16", codec, 8000, 2);
+  Run(codec, 3000);
+}
+
+TEST_F( InitialPlayoutDelayTest, WbStereo) {
+  CodecInst codec;
+  AudioCodingModule::Codec("L16", codec, 16000, 2);
+  Run(codec, 3000);
+}
+
+TEST_F( InitialPlayoutDelayTest, SwbStereo) {
+  CodecInst codec;
+  AudioCodingModule::Codec("L16", codec, 32000, 2);
+  Run(codec, 2000);  // NetEq buffer is not sufficiently large for 3 sec of
+                     // PCM16 super-wideband.
+}
+
+} // namespace webrtc
--- a/webrtc/modules/audio_coding/neteq/automode.c
+++ b/webrtc/modules/audio_coding/neteq/automode.c
@ -216,7 +216,7 @@ int WebRtcNetEQ_UpdateIatStatistics(AutomodeInst_t *inst, int maxBufLen,
            streamingMode);
        if (tempvar > 0)
        {
-            inst->optBufLevel = (WebRtc_UWord16) tempvar;
+            inst->optBufLevel = tempvar;

            if (streamingMode != 0)
            {
@ -238,7 +238,7 @@ int WebRtcNetEQ_UpdateIatStatistics(AutomodeInst_t *inst, int maxBufLen,
            maxBufLen = WEBRTC_SPL_LSHIFT_W32(maxBufLen, 8); /* shift to Q8 */

            /* Enforce upper limit; 75% of maxBufLen */
-            inst->optBufLevel = (WebRtc_UWord16) WEBRTC_SPL_MIN( inst->optBufLevel,
+            inst->optBufLevel = WEBRTC_SPL_MIN( inst->optBufLevel,
                (maxBufLen >> 1) + (maxBufLen >> 2) ); /* 1/2 + 1/4 = 75% */
        }
        else
@ -575,9 +575,8 @@ int WebRtcNetEQ_BufferLevelFilter(WebRtc_Word32 curSizeMs8, AutomodeInst_t *inst
         *
         * levelFiltFact is in Q8
         */
-        inst->buffLevelFilt = (WebRtc_UWord16) (WEBRTC_SPL_RSHIFT_W32(
-            WEBRTC_SPL_MUL_16_U16(inst->levelFiltFact, inst->buffLevelFilt), 8)
-            + WEBRTC_SPL_MUL_16_16(256 - inst->levelFiltFact, curSizeFrames));
+        inst->buffLevelFilt = ((inst->levelFiltFact * inst->buffLevelFilt) >> 8) +
+            (256 - inst->levelFiltFact) * curSizeFrames;
    }

    /* Account for time-scale operations (accelerate and pre-emptive expand) */
@ -589,7 +588,7 @@ int WebRtcNetEQ_BufferLevelFilter(WebRtc_Word32 curSizeMs8, AutomodeInst_t *inst
         * from samples to packets in Q8. Make sure that the filtered value is
         * non-negative.
         */
-        inst->buffLevelFilt = (WebRtc_UWord16) WEBRTC_SPL_MAX( inst->buffLevelFilt -
+        inst->buffLevelFilt = WEBRTC_SPL_MAX( inst->buffLevelFilt -
            WebRtcSpl_DivW32W16(
                WEBRTC_SPL_LSHIFT_W32(inst->sampleMemory, 8), /* sampleMemory in Q8 */
                inst->packetSpeechLenSamp ), /* divide by packetSpeechLenSamp */
--- a/webrtc/modules/audio_coding/neteq/automode.h
+++ b/webrtc/modules/audio_coding/neteq/automode.h
@ -65,14 +65,14 @@ typedef struct

    /* Filtered current buffer level */
    WebRtc_UWord16 levelFiltFact; /* filter forgetting factor in Q8 */
-    WebRtc_UWord16 buffLevelFilt; /* filtered buffer level in Q8 */
+    int buffLevelFilt; /* filtered buffer level in Q8 */

    /* Inter-arrival time (iat) statistics */
    WebRtc_Word32 iatProb[MAX_IAT + 1]; /* iat probabilities in Q30 */
    WebRtc_Word16 iatProbFact; /* iat forgetting factor in Q15 */
    WebRtc_UWord32 packetIatCountSamp; /* time (in timestamps) elapsed since last
     packet arrival, based on RecOut calls */
-    WebRtc_UWord16 optBufLevel; /* current optimal buffer level in Q8 */
+    int optBufLevel; /* current optimal buffer level in Q8 */

    /* Packet related information */
    WebRtc_Word16 packetSpeechLenSamp; /* speech samples per incoming packet */
--- a/webrtc/modules/audio_coding/neteq/bufstats_decision.c
+++ b/webrtc/modules/audio_coding/neteq/bufstats_decision.c
@ -38,11 +38,11 @@ WebRtc_UWord16 WebRtcNetEQ_BufstatsDecision(BufstatsInst_t *inst, WebRtc_Word16

    int currentDelayMs;
    WebRtc_Word32 currSizeSamples = cur_size;
-    WebRtc_Word16 extraDelayPacketsQ8 = 0;
+    int extraDelayPacketsQ8 = 0;

    /* Avoid overflow if the buffer size should be really large (cur_size is limited 256ms) */
    WebRtc_Word32 curr_sizeQ7 = WEBRTC_SPL_LSHIFT_W32(cur_size, 4);
-    WebRtc_UWord16 level_limit_hi, level_limit_lo;
+    int level_limit_hi, level_limit_lo;

    inst->Automode_inst.prevTimeScale &= (prevPlayMode == MODE_SUCCESS_ACCELERATE
        || prevPlayMode == MODE_LOWEN_ACCELERATE || prevPlayMode == MODE_SUCCESS_PREEMPTIVE
@ -167,10 +167,11 @@ WebRtc_UWord16 WebRtcNetEQ_BufstatsDecision(BufstatsInst_t *inst, WebRtc_Word16
        if (inst->Automode_inst.extraDelayMs > 0 && inst->Automode_inst.packetSpeechLenSamp
            > 0)
        {
-            extraDelayPacketsQ8 = WebRtcSpl_DivW32W16ResW16(
-                (WEBRTC_SPL_MUL(inst->Automode_inst.extraDelayMs, 8 * fs_mult) << 8),
-                inst->Automode_inst.packetSpeechLenSamp);
+
            /* (extra delay in samples in Q8) */
+            extraDelayPacketsQ8 =
+                ((inst->Automode_inst.extraDelayMs * 8 * fs_mult) << 8) /
+                inst->Automode_inst.packetSpeechLenSamp;
        }

        /* Check if needed packet is available */
@ -256,10 +257,10 @@ WebRtc_UWord16 WebRtcNetEQ_BufstatsDecision(BufstatsInst_t *inst, WebRtc_Word16
                 * higher than 4 times the optimal level.
                 */
                WebRtc_Word32 diffTS = (inst->uw32_CNGplayedTS + targetTS) - availableTS;
-                if (diffTS >= 0
-                    || (WEBRTC_SPL_MUL_16_16_RSFT( inst->Automode_inst.optBufLevel
-                        + extraDelayPacketsQ8,
-                        inst->Automode_inst.packetSpeechLenSamp, 6) < currSizeSamples))
+                int val = ((inst->Automode_inst.optBufLevel +
+                    extraDelayPacketsQ8) *
+                    inst->Automode_inst.packetSpeechLenSamp) >> 6;
+                if (diffTS >= 0 || val < currSizeSamples)
                {
                    /* it is time to play this new packet */
                    return BUFSTATS_DO_NORMAL;
--- a/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq.h
+++ b/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq.h
@ -177,7 +177,8 @@ int WebRtcNetEQ_AssignSize(int *sizeinbytes);
 int WebRtcNetEQ_Assign(void **inst, void *NETEQ_inst_Addr);
 int WebRtcNetEQ_GetRecommendedBufferSize(void *inst, const enum WebRtcNetEQDecoder *codec,
                                         int noOfCodecs, enum WebRtcNetEQNetworkType nwType,
-                                         int *MaxNoOfPackets, int *sizeinbytes);
+                                         int *MaxNoOfPackets, int *sizeinbytes,
+                                         int* per_packet_overhead_bytes);
 int WebRtcNetEQ_AssignBuffer(void *inst, int MaxNoOfPackets, void *NETEQ_Buffer_Addr,
                             int sizeinbytes);

--- a/webrtc/modules/audio_coding/neteq/neteq.gypi
+++ b/webrtc/modules/audio_coding/neteq/neteq.gypi
@ -93,6 +93,7 @@
          'dependencies': [
            'NetEq',
            'NetEqTestTools',
+            'neteq_unittest_tools',
            '<(DEPTH)/testing/gtest.gyp:gtest',
            '<(webrtc_root)/test/test.gyp:test_support_main',
          ],
--- a/webrtc/modules/audio_coding/neteq/packet_buffer.c
+++ b/webrtc/modules/audio_coding/neteq/packet_buffer.c
@ -577,7 +577,9 @@ void WebRtcNetEQ_IncrementWaitingTimes(PacketBuf_t *buffer_inst) {
 }

 int WebRtcNetEQ_GetDefaultCodecSettings(const enum WebRtcNetEQDecoder *codecID,
-                                        int noOfCodecs, int *maxBytes, int *maxSlots)
+                                        int noOfCodecs, int *maxBytes,
+                                        int *maxSlots,
+                                        int* per_slot_overhead_bytes)
 {
    int i;
    int ok = 0;
@ -794,5 +796,6 @@ int WebRtcNetEQ_GetDefaultCodecSettings(const enum WebRtcNetEQDecoder *codecID,
    /* Add the extra size per slot to the memory count */
    *maxBytes += w16_tmp * (*maxSlots);

+    *per_slot_overhead_bytes = w16_tmp;
    return ok;
 }
--- a/webrtc/modules/audio_coding/neteq/packet_buffer.h
+++ b/webrtc/modules/audio_coding/neteq/packet_buffer.h
@ -237,12 +237,15 @@ void WebRtcNetEQ_IncrementWaitingTimes(PacketBuf_t *buffer_inst);
 * Output:
 *		- maxBytes	    : Recommended buffer memory size in bytes
 *      - maxSlots      : Recommended number of slots in buffer
+ *      - per_slot_overhead_bytes : overhead in bytes for each slot in buffer.
 *
 * Return value			:  0 - Ok
 *						  <0 - Error
 */

 int WebRtcNetEQ_GetDefaultCodecSettings(const enum WebRtcNetEQDecoder *codecID,
-                                        int noOfCodecs, int *maxBytes, int *maxSlots);
+                                        int noOfCodecs, int *maxBytes,
+                                        int *maxSlots,
+                                        int* per_slot_overhead_bytes);

 #endif /* PACKET_BUFFER_H */
--- a/webrtc/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.cc
+++ b/webrtc/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.cc
@ -127,7 +127,7 @@ int NETEQTEST_NetEQClass::init(WebRtc_UWord16 fs)

 int NETEQTEST_NetEQClass::assignBuffer(enum WebRtcNetEQDecoder *usedCodec, int noOfCodecs, WebRtcNetEQNetworkType nwType)
 {
-    int numPackets, memSize, ret;
+    int numPackets, memSize, ret, overhead_bytes;

    if (!_inst)
    {
@ -149,7 +149,9 @@ int NETEQTEST_NetEQClass::assignBuffer(enum WebRtcNetEQDecoder *usedCodec, int n
        }
    }

-    ret = WebRtcNetEQ_GetRecommendedBufferSize(_inst, usedCodec, noOfCodecs, nwType, &numPackets, &memSize);
+    ret = WebRtcNetEQ_GetRecommendedBufferSize(_inst, usedCodec, noOfCodecs,
+                                               nwType, &numPackets, &memSize,
+                                               &overhead_bytes);

    if (ret != 0)
    {
--- a/webrtc/modules/audio_coding/neteq/test/NetEqRTPplay.cc
+++ b/webrtc/modules/audio_coding/neteq/test/NetEqRTPplay.cc
@ -1611,6 +1611,7 @@ int doAPItest() {
    WebRtc_UWord32 timestamp;
    int memorySize;
    int ok;
+    int overhead_bytes;

    printf("API-test:\n\n");

@ -1623,7 +1624,7 @@ int doAPItest() {
    CHECK_MINUS_ONE(WebRtcNetEQ_Assign(&inst, NULL))
 //  printf("WARNING: Test of WebRtcNetEQ_Assign() is disabled due to a bug.\n");
    usedCodec=kDecoderPCMu;
-    CHECK_MINUS_ONE(WebRtcNetEQ_GetRecommendedBufferSize(inst, &usedCodec, 1, kTCPLargeJitter,  &NetEqBufferMaxPackets, &BufferSizeInBytes))
+    CHECK_MINUS_ONE(WebRtcNetEQ_GetRecommendedBufferSize(inst, &usedCodec, 1, kTCPLargeJitter,  &NetEqBufferMaxPackets, &BufferSizeInBytes, &overhead_bytes))
    CHECK_MINUS_ONE(WebRtcNetEQ_AssignBuffer(inst, NetEqBufferMaxPackets, NetEqPacketBuffer, BufferSizeInBytes))

    CHECK_MINUS_ONE(WebRtcNetEQ_Init(inst, 8000))
@ -1661,7 +1662,7 @@ int doAPItest() {

    /* GetRecommendedBufferSize with wrong codec */
    usedCodec=kDecoderReservedStart;
-    ok = WebRtcNetEQ_GetRecommendedBufferSize(inst, &usedCodec, 1, kTCPLargeJitter , &NetEqBufferMaxPackets, &BufferSizeInBytes);
+    ok = WebRtcNetEQ_GetRecommendedBufferSize(inst, &usedCodec, 1, kTCPLargeJitter , &NetEqBufferMaxPackets, &BufferSizeInBytes, &overhead_bytes);
    if((ok!=-1) || ((ok==-1)&&(WebRtcNetEQ_GetErrorCode(inst)!=-CODEC_DB_UNKNOWN_CODEC))){
        printf("WebRtcNetEQ_GetRecommendedBufferSize() did not return proper error code for wrong codec.\n");
        printf("return value = %d; error code = %d\n", ok, WebRtcNetEQ_GetErrorCode(inst));
@ -1670,13 +1671,13 @@ int doAPItest() {

    /* GetRecommendedBufferSize with wrong network type */
    usedCodec = kDecoderPCMu;
-    ok=WebRtcNetEQ_GetRecommendedBufferSize(inst, &usedCodec, 1, (enum WebRtcNetEQNetworkType) 4711 , &NetEqBufferMaxPackets, &BufferSizeInBytes);
+    ok=WebRtcNetEQ_GetRecommendedBufferSize(inst, &usedCodec, 1, (enum WebRtcNetEQNetworkType) 4711 , &NetEqBufferMaxPackets, &BufferSizeInBytes, &overhead_bytes);
    if ((ok!=-1) || ((ok==-1)&&(WebRtcNetEQ_GetErrorCode(inst)!=-FAULTY_NETWORK_TYPE))) {
        printf("WebRtcNetEQ_GetRecommendedBufferSize() did not return proper error code for wrong network type.\n");
        printf("return value = %d; error code = %d\n", ok, WebRtcNetEQ_GetErrorCode(inst));
        //RESET_ERROR(inst)
    }
-    CHECK_ZERO(WebRtcNetEQ_GetRecommendedBufferSize(inst, &usedCodec, 1, kTCPLargeJitter , &NetEqBufferMaxPackets, &BufferSizeInBytes))
+    CHECK_ZERO(WebRtcNetEQ_GetRecommendedBufferSize(inst, &usedCodec, 1, kTCPLargeJitter , &NetEqBufferMaxPackets, &BufferSizeInBytes, &overhead_bytes))

    /* try to do RecIn before assigning the packet buffer */
 /*  makeRTPheader(rtp_data, NETEQ_CODEC_AVT_PT, 17,4711, 1235412312);
--- a/webrtc/modules/audio_coding/neteq/webrtc_neteq.c
+++ b/webrtc/modules/audio_coding/neteq/webrtc_neteq.c
@ -307,7 +307,8 @@ int WebRtcNetEQ_Assign(void **inst, void *NETEQ_inst_Addr)

 int WebRtcNetEQ_GetRecommendedBufferSize(void *inst, const enum WebRtcNetEQDecoder *codec,
                                         int noOfCodecs, enum WebRtcNetEQNetworkType nwType,
-                                         int *MaxNoOfPackets, int *sizeinbytes)
+                                         int *MaxNoOfPackets, int *sizeinbytes,
+                                         int* per_packet_overhead_bytes)
 {
    int ok = 0;
    int multiplier;
@ -315,7 +316,9 @@ int WebRtcNetEQ_GetRecommendedBufferSize(void *inst, const enum WebRtcNetEQDecod
    if (NetEqMainInst == NULL) return (-1);
    *MaxNoOfPackets = 0;
    *sizeinbytes = 0;
-    ok = WebRtcNetEQ_GetDefaultCodecSettings(codec, noOfCodecs, sizeinbytes, MaxNoOfPackets);
+    ok = WebRtcNetEQ_GetDefaultCodecSettings(codec, noOfCodecs, sizeinbytes,
+                                             MaxNoOfPackets,
+                                             per_packet_overhead_bytes);
    if (ok != 0)
    {
        NetEqMainInst->ErrorCode = -ok;
@ -339,7 +342,7 @@ int WebRtcNetEQ_GetRecommendedBufferSize(void *inst, const enum WebRtcNetEQDecod
    }
    else if (nwType == kTCPXLargeJitter)
    {
-        multiplier = 20;
+        multiplier = 12;
    }
    else
    {
@ -514,7 +517,7 @@ int WebRtcNetEQ_SetExtraDelay(void *inst, int DelayInMs)
 {
    MainInst_t *NetEqMainInst = (MainInst_t*) inst;
    if (NetEqMainInst == NULL) return (-1);
-    if ((DelayInMs < 0) || (DelayInMs > 1000))
+    if ((DelayInMs < 0) || (DelayInMs > 10000))
    {
        NetEqMainInst->ErrorCode = -FAULTY_DELAYVALUE;
        return (-1);
--- a/webrtc/modules/audio_coding/neteq/webrtc_neteq_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/webrtc_neteq_unittest.cc
@ -12,6 +12,8 @@
 * This file includes unit tests for NetEQ.
 */

+#include "webrtc/modules/audio_coding/neteq/interface/webrtc_neteq.h"
+
 #include <stdlib.h>
 #include <string.h>  // memset

@ -20,15 +22,14 @@
 #include <vector>

 #include "gtest/gtest.h"
-
-#include "modules/audio_coding/neteq/interface/webrtc_neteq.h"
-#include "modules/audio_coding/neteq/interface/webrtc_neteq_help_macros.h"
-#include "modules/audio_coding/neteq/interface/webrtc_neteq_internal.h"
-#include "modules/audio_coding/neteq/test/NETEQTEST_CodecClass.h"
-#include "modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.h"
-#include "modules/audio_coding/neteq/test/NETEQTEST_RTPpacket.h"
 #include "testsupport/fileutils.h"
-#include "typedefs.h"  // NOLINT(build/include)
+#include "webrtc/modules/audio_coding/neteq/interface/webrtc_neteq_help_macros.h"
+#include "webrtc/modules/audio_coding/neteq/interface/webrtc_neteq_internal.h"
+#include "webrtc/modules/audio_coding/neteq/test/NETEQTEST_CodecClass.h"
+#include "webrtc/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.h"
+#include "webrtc/modules/audio_coding/neteq/test/NETEQTEST_RTPpacket.h"
+#include "webrtc/modules/audio_coding/neteq4/tools/input_audio_file.h"
+#include "webrtc/typedefs.h"

 namespace webrtc {

@ -651,4 +652,45 @@ TEST_F(NetEqDecodingTest, NoInputDataStereo) {
  free(ms_info);
 }

+TEST_F(NetEqDecodingTest, TestExtraDelay) {
+  static const int kNumFrames = 120000;  // Needed for convergence.
+  int frame_index = 0;
+  static const int kFrameSizeSamples = 30 * 16;
+  static const int kPayloadBytes = kFrameSizeSamples * 2;
+  test::InputAudioFile input_file(
+      webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"));
+  int16_t input[kFrameSizeSamples];
+  // Buffers of NetEq cannot accommodate larger delays for PCM16.
+  static const int kExtraDelayMs = 3200;
+  ASSERT_EQ(0, WebRtcNetEQ_SetExtraDelay(neteq_inst_->instance(),
+                                         kExtraDelayMs));
+  for (int i = 0; i < kNumFrames; ++i) {
+    ASSERT_TRUE(input_file.Read(kFrameSizeSamples, input));
+    WebRtcNetEQ_RTPInfo rtp_info;
+    PopulateRtpInfo(frame_index, frame_index * kFrameSizeSamples, &rtp_info);
+    uint8_t* payload = reinterpret_cast<uint8_t*>(input);
+    ASSERT_EQ(0,
+              WebRtcNetEQ_RecInRTPStruct(neteq_inst_->instance(),
+                                         &rtp_info,
+                                         payload,
+                                         kPayloadBytes, 0));
+    ++frame_index;
+    // Pull out data.
+    for (int j = 0; j < 3; ++j) {
+      ASSERT_TRUE(kBlockSize16kHz == neteq_inst_->recOut(out_data_));
+    }
+    if (i % 100 == 0) {
+      WebRtcNetEQ_NetworkStatistics network_stats;
+      ASSERT_EQ(0, WebRtcNetEQ_GetNetworkStatistics(neteq_inst_->instance(),
+                                                    &network_stats));
+      const int expected_lower_limit =
+          std::min(i * 0.083 - 210, 0.9 * network_stats.preferredBufferSize);
+      EXPECT_GE(network_stats.currentBufferSize, expected_lower_limit);
+      const int expected_upper_limit =
+          std::min(i * 0.083 + 255, 1.2 * network_stats.preferredBufferSize);
+      EXPECT_LE(network_stats.currentBufferSize, expected_upper_limit);
+    }
+  }
+}
+
 }  // namespace
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@ -8,23 +8,23 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "channel.h"
+#include "webrtc/voice_engine/channel.h"

-#include "audio_device.h"
-#include "audio_frame_operations.h"
-#include "audio_processing.h"
-#include "critical_section_wrapper.h"
-#include "logging.h"
-#include "output_mixer.h"
-#include "process_thread.h"
-#include "rtp_dump.h"
-#include "statistics.h"
-#include "trace.h"
-#include "transmit_mixer.h"
-#include "utility.h"
-#include "voe_base.h"
-#include "voe_external_media.h"
-#include "voe_rtp_rtcp.h"
+#include "webrtc/modules/audio_device/include/audio_device.h"
+#include "webrtc/modules/audio_processing/include/audio_processing.h"
+#include "webrtc/modules/utility/interface/audio_frame_operations.h"
+#include "webrtc/modules/utility/interface/process_thread.h"
+#include "webrtc/modules/utility/interface/rtp_dump.h"
+#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
+#include "webrtc/system_wrappers/interface/logging.h"
+#include "webrtc/system_wrappers/interface/trace.h"
+#include "webrtc/voice_engine/include/voe_base.h"
+#include "webrtc/voice_engine/include/voe_external_media.h"
+#include "webrtc/voice_engine/include/voe_rtp_rtcp.h"
+#include "webrtc/voice_engine/output_mixer.h"
+#include "webrtc/voice_engine/statistics.h"
+#include "webrtc/voice_engine/transmit_mixer.h"
+#include "webrtc/voice_engine/utility.h"

 #if defined(_WIN32)
 #include <Qos.h>
@ -6095,6 +6095,29 @@ Channel::GetDelayEstimate(int& delayMs) const
    return 0;
 }

+int Channel::SetInitialPlayoutDelay(int delay_ms)
+{
+  WEBRTC_TRACE(kTraceInfo, kTraceVoice, VoEId(_instanceId,_channelId),
+               "Channel::SetInitialPlayoutDelay()");
+  if ((delay_ms < kVoiceEngineMinMinPlayoutDelayMs) ||
+      (delay_ms > kVoiceEngineMaxMinPlayoutDelayMs))
+  {
+    _engineStatisticsPtr->SetLastError(
+        VE_INVALID_ARGUMENT, kTraceError,
+        "SetInitialPlayoutDelay() invalid min delay");
+    return -1;
+  }
+  if (_audioCodingModule.SetInitialPlayoutDelay(delay_ms) != 0)
+  {
+    _engineStatisticsPtr->SetLastError(
+        VE_AUDIO_CODING_MODULE_ERROR, kTraceError,
+        "SetInitialPlayoutDelay() failed to set min playout delay");
+    return -1;
+  }
+  return 0;
+}
+
+
 int
 Channel::SetMinimumPlayoutDelay(int delayMs)
 {
--- a/webrtc/voice_engine/channel.h
+++ b/webrtc/voice_engine/channel.h
@ -11,24 +11,24 @@
 #ifndef WEBRTC_VOICE_ENGINE_CHANNEL_H
 #define WEBRTC_VOICE_ENGINE_CHANNEL_H

-#include "audio_coding_module.h"
-#include "audio_conference_mixer_defines.h"
-#include "common_types.h"
-#include "dtmf_inband.h"
-#include "dtmf_inband_queue.h"
-#include "file_player.h"
-#include "file_recorder.h"
-#include "level_indicator.h"
-#include "resampler.h"
-#include "rtp_rtcp.h"
-#include "scoped_ptr.h"
-#include "shared_data.h"
-#include "voe_audio_processing.h"
-#include "voe_network.h"
-#include "voice_engine_defines.h"
+#include "webrtc/common_audio/resampler/include/resampler.h"
+#include "webrtc/common_types.h"
+#include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
+#include "webrtc/modules/audio_conference_mixer/interface/audio_conference_mixer_defines.h"
+#include "webrtc/modules/rtp_rtcp/interface/rtp_rtcp.h"
+#include "webrtc/modules/utility/interface/file_player.h"
+#include "webrtc/modules/utility/interface/file_recorder.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/voice_engine/dtmf_inband.h"
+#include "webrtc/voice_engine/dtmf_inband_queue.h"
+#include "webrtc/voice_engine/include/voe_audio_processing.h"
+#include "webrtc/voice_engine/include/voe_network.h"
+#include "webrtc/voice_engine/level_indicator.h"
+#include "webrtc/voice_engine/shared_data.h"
+#include "webrtc/voice_engine/voice_engine_defines.h"

 #ifndef WEBRTC_EXTERNAL_TRANSPORT
-#include "udp_transport.h"
+#include "webrtc/modules/udp_transport/interface/udp_transport.h"
 #endif
 #ifdef WEBRTC_SRTP
 #include "SrtpModule.h"
@ -254,6 +254,7 @@ public:

    // VoEVideoSync
    int GetDelayEstimate(int& delayMs) const;
+    int SetInitialPlayoutDelay(int delay_ms);
    int SetMinimumPlayoutDelay(int delayMs);
    int GetPlayoutTimestamp(unsigned int& timestamp);
    int SetInitTimestamp(unsigned int timestamp);
--- a/webrtc/voice_engine/include/voe_video_sync.h
+++ b/webrtc/voice_engine/include/voe_video_sync.h
@ -33,7 +33,7 @@
 #ifndef WEBRTC_VOICE_ENGINE_VOE_VIDEO_SYNC_H
 #define WEBRTC_VOICE_ENGINE_VOE_VIDEO_SYNC_H

-#include "common_types.h"
+#include "webrtc/common_types.h"

 namespace webrtc {

@ -60,6 +60,10 @@ public:
    // Sets an additional delay for the playout jitter buffer.
    virtual int SetMinimumPlayoutDelay(int channel, int delayMs) = 0;

+    // Sets an initial delay for the playout jitter buffer. The playout of the
+    // audio is delayed by |delay_ms| in millisecond.
+    virtual int SetInitialPlayoutDelay(int channel, int delay_ms) = 0;
+
    // Gets the sum of the algorithmic delay, jitter buffer delay, and the
    // playout buffer delay for a specified |channel|.
    virtual int GetDelayEstimate(int channel, int& delayMs) = 0;
--- a/webrtc/voice_engine/voe_video_sync_impl.cc
+++ b/webrtc/voice_engine/voe_video_sync_impl.cc
@ -10,11 +10,11 @@

 #include "voe_video_sync_impl.h"

-#include "channel.h"
-#include "critical_section_wrapper.h"
-#include "trace.h"
-#include "voe_errors.h"
-#include "voice_engine_impl.h"
+#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
+#include "webrtc/system_wrappers/interface/trace.h"
+#include "webrtc/voice_engine/channel.h"
+#include "webrtc/voice_engine/include/voe_errors.h"
+#include "webrtc/voice_engine/voice_engine_impl.h"

 namespace webrtc {

@ -144,6 +144,30 @@ int VoEVideoSyncImpl::SetMinimumPlayoutDelay(int channel,int delayMs)
    return channelPtr->SetMinimumPlayoutDelay(delayMs);
 }

+int VoEVideoSyncImpl::SetInitialPlayoutDelay(int channel, int delay_ms)
+{
+    WEBRTC_TRACE(kTraceApiCall, kTraceVoice, VoEId(_shared->instance_id(), -1),
+                 "SetInitialPlayoutDelay(channel=%d, delay_ms=%d)",
+                 channel, delay_ms);
+    ANDROID_NOT_SUPPORTED(_shared->statistics());
+    IPHONE_NOT_SUPPORTED(_shared->statistics());
+
+    if (!_shared->statistics().Initialized())
+    {
+        _shared->SetLastError(VE_NOT_INITED, kTraceError);
+        return -1;
+    }
+    voe::ScopedChannel sc(_shared->channel_manager(), channel);
+    voe::Channel* channel_ptr = sc.ChannelPtr();
+    if (channel_ptr == NULL)
+    {
+        _shared->SetLastError(VE_CHANNEL_NOT_VALID, kTraceError,
+            "SetInitialPlayoutDelay() failed to locate channel");
+        return -1;
+    }
+    return channel_ptr->SetInitialPlayoutDelay(delay_ms);
+}
+
 int VoEVideoSyncImpl::GetDelayEstimate(int channel, int& delayMs)
 {
    WEBRTC_TRACE(kTraceApiCall, kTraceVoice, VoEId(_shared->instance_id(), -1),
--- a/webrtc/voice_engine/voe_video_sync_impl.h
+++ b/webrtc/voice_engine/voe_video_sync_impl.h
@ -11,9 +11,9 @@
 #ifndef WEBRTC_VOICE_ENGINE_VOE_VIDEO_SYNC_IMPL_H
 #define WEBRTC_VOICE_ENGINE_VOE_VIDEO_SYNC_IMPL_H

-#include "voe_video_sync.h"
+#include "webrtc/voice_engine/include/voe_video_sync.h"

-#include "shared_data.h"
+#include "webrtc/voice_engine/shared_data.h"

 namespace webrtc {

@ -24,6 +24,8 @@ public:

    virtual int SetMinimumPlayoutDelay(int channel, int delayMs);

+    virtual int SetInitialPlayoutDelay(int channel, int delay_ms);
+
    virtual int GetDelayEstimate(int channel, int& delayMs);

    virtual int SetInitTimestamp(int channel, unsigned int timestamp);