From 6388c3e2fdfc91b3648fb7d408a14ddb25e41cd1 Mon Sep 17 00:00:00 2001
From: "turaj@webrtc.org"
 <turaj@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>
Date: Tue, 12 Feb 2013 21:42:18 +0000
Subject: [PATCH] Implement initial delay. This CL allows clients of VoE to set
 an initial delay. Playout of audio is delayed and the extra playout delay is
 maintained during the call. While packets are buffered (in NetEq) to acheive
 the desired delay. ACM will playout silence (zeros). Initial delay has to be
 set before any packet is pushed into ACM.

TEST=ACM unit test is added, also a manual integration test is writen.
Review URL: https://webrtc-codereview.appspot.com/1097009

git-svn-id: http://webrtc.googlecode.com/svn/trunk@3506 4adac7df-926f-26a2-2b94-8c16560cd09d
---
 .../main/interface/audio_coding_module.h      |  16 ++
 .../audio_coding/main/source/acm_neteq.cc     |  21 +-
 .../audio_coding/main/source/acm_neteq.h      |  11 +
 .../main/source/audio_coding_module.gypi      |  17 ++
 .../main/source/audio_coding_module_impl.cc   | 135 ++++++++-
 .../main/source/audio_coding_module_impl.h    |  20 ++
 .../audio_coding/main/test/delay_test.cc      | 268 ++++++++++++++++++
 .../main/test/initial_delay_unittest.cc       | 170 +++++++++++
 webrtc/modules/audio_coding/neteq/automode.c  |  11 +-
 webrtc/modules/audio_coding/neteq/automode.h  |   4 +-
 .../audio_coding/neteq/bufstats_decision.c    |  19 +-
 .../neteq/interface/webrtc_neteq.h            |   3 +-
 webrtc/modules/audio_coding/neteq/neteq.gypi  |   1 +
 .../audio_coding/neteq/packet_buffer.c        |   5 +-
 .../audio_coding/neteq/packet_buffer.h        |   5 +-
 .../neteq/test/NETEQTEST_NetEQClass.cc        |   6 +-
 .../audio_coding/neteq/test/NetEqRTPplay.cc   |   9 +-
 .../modules/audio_coding/neteq/webrtc_neteq.c |  11 +-
 .../neteq/webrtc_neteq_unittest.cc            |  58 +++-
 webrtc/voice_engine/channel.cc                |  55 ++--
 webrtc/voice_engine/channel.h                 |  33 +--
 webrtc/voice_engine/include/voe_video_sync.h  |   6 +-
 webrtc/voice_engine/voe_video_sync_impl.cc    |  34 ++-
 webrtc/voice_engine/voe_video_sync_impl.h     |   6 +-
 24 files changed, 839 insertions(+), 85 deletions(-)
 create mode 100644 webrtc/modules/audio_coding/main/test/delay_test.cc
 create mode 100644 webrtc/modules/audio_coding/main/test/initial_delay_unittest.cc

diff --git a/webrtc/modules/audio_coding/main/interface/audio_coding_module.h b/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
index 8679b362c..236c07721 100644
--- a/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
+++ b/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
@@ -940,6 +940,22 @@ class AudioCodingModule: public Module {
   //
   virtual WebRtc_Word32 NetworkStatistics(
       ACMNetworkStatistics& network_statistics) const = 0;
+
+  //
+  // Set an initial delay for playout.
+  // An initial delay yields ACM playout silence until equivalent of |delay_ms|
+  // audio payload is accumulated in NetEq jitter. Thereafter, ACM pulls audio
+  // from NetEq in its regular fashion, and the given delay is maintained as
+  // "minimum playout delay."
+  //
+  // Input:
+  //   -delay_ms           : delay in milliseconds.
+  //
+  // Return values:
+  //   -1 if failed to set the delay.
+  //    0 if delay is set successfully.
+  //
+  virtual int SetInitialPlayoutDelay(int delay_ms) = 0;
 };
 
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_coding/main/source/acm_neteq.cc b/webrtc/modules/audio_coding/main/source/acm_neteq.cc
index 1f900c080..3bfc6e47a 100644
--- a/webrtc/modules/audio_coding/main/source/acm_neteq.cc
+++ b/webrtc/modules/audio_coding/main/source/acm_neteq.cc
@@ -45,7 +45,10 @@ ACMNetEQ::ACMNetEQ()
       master_slave_info_(NULL),
       previous_audio_activity_(AudioFrame::kVadUnknown),
       extra_delay_(0),
-      callback_crit_sect_(CriticalSectionWrapper::CreateCriticalSection()) {
+      callback_crit_sect_(CriticalSectionWrapper::CreateCriticalSection()),
+      min_of_max_num_packets_(0),
+      min_of_buffer_size_bytes_(0),
+      per_packet_overhead_bytes_(0) {
   for (int n = 0; n < MAX_NUM_SLAVE_NETEQ + 1; n++) {
     is_initialized_[n] = false;
     ptr_vadinst_[n] = NULL;
@@ -215,6 +218,7 @@ WebRtc_Word16 ACMNetEQ::AllocatePacketBufferByIdxSafe(
     const WebRtc_Word16 idx) {
   int max_num_packets;
   int buffer_size_in_bytes;
+  int per_packet_overhead_bytes;
 
   if (!is_initialized_[idx]) {
     WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
@@ -223,12 +227,23 @@ WebRtc_Word16 ACMNetEQ::AllocatePacketBufferByIdxSafe(
   }
   if (WebRtcNetEQ_GetRecommendedBufferSize(inst_[idx], used_codecs,
                                            num_codecs,
-                                           kTCPLargeJitter,
+                                           kTCPXLargeJitter,
                                            &max_num_packets,
-                                           &buffer_size_in_bytes) != 0) {
+                                           &buffer_size_in_bytes,
+                                           &per_packet_overhead_bytes) != 0) {
     LogError("GetRecommendedBufferSize", idx);
     return -1;
   }
+  if (idx == 0) {
+    min_of_buffer_size_bytes_ = buffer_size_in_bytes;
+    min_of_max_num_packets_ = max_num_packets;
+    per_packet_overhead_bytes_ = per_packet_overhead_bytes;
+  } else {
+    min_of_buffer_size_bytes_ = std::min(min_of_buffer_size_bytes_,
+                                        buffer_size_in_bytes);
+    min_of_max_num_packets_ = std::min(min_of_max_num_packets_,
+                                       max_num_packets);
+  }
   if (neteq_packet_buffer_[idx] != NULL) {
     free(neteq_packet_buffer_[idx]);
     neteq_packet_buffer_[idx] = NULL;
diff --git a/webrtc/modules/audio_coding/main/source/acm_neteq.h b/webrtc/modules/audio_coding/main/source/acm_neteq.h
index 1ee43096a..06a11486c 100644
--- a/webrtc/modules/audio_coding/main/source/acm_neteq.h
+++ b/webrtc/modules/audio_coding/main/source/acm_neteq.h
@@ -272,6 +272,12 @@ class ACMNetEQ {
   WebRtc_Word16 AddSlave(const WebRtcNetEQDecoder* used_codecs,
                          WebRtc_Word16 num_codecs);
 
+  void BufferSpec(int& num_packets, int& size_bytes, int& overhead_bytes) {
+    num_packets = min_of_max_num_packets_;
+    size_bytes = min_of_buffer_size_bytes_;
+    overhead_bytes = per_packet_overhead_bytes_;
+  }
+
  private:
   //
   // RTPPack()
@@ -339,6 +345,11 @@ class ACMNetEQ {
   WebRtc_Word32 extra_delay_;
 
   CriticalSectionWrapper* callback_crit_sect_;
+  // Minimum of "max number of packets," among all NetEq instances.
+  int min_of_max_num_packets_;
+  // Minimum of buffer-size among all NetEq instances.
+  int min_of_buffer_size_bytes_;
+  int per_packet_overhead_bytes_;
 };
 
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi b/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi
index aba15eff1..56d595832 100644
--- a/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi
+++ b/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi
@@ -145,8 +145,25 @@
              '../test/TimedTrace.cc',
              '../test/TwoWayCommunication.cc',
              '../test/utility.cc',
+             '../test/initial_delay_unittest.cc',
           ],
         },
+        {
+          'target_name': 'delay_test',
+          'type': 'executable',
+          'dependencies': [
+            'audio_coding_module',
+            '<(DEPTH)/testing/gtest.gyp:gtest',
+            '<(webrtc_root)/test/test.gyp:test_support_main',
+            '<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers',
+            '<(DEPTH)/third_party/google-gflags/google-gflags.gyp:google-gflags',
+          ],
+          'sources': [
+             '../test/delay_test.cc',
+             '../test/Channel.cc',
+             '../test/PCMFile.cc',
+           ],
+        }, # delay_test
         {
           'target_name': 'audio_coding_unittests',
           'type': 'executable',
diff --git a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc
index 2ce5abb68..99761c027 100644
--- a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc
+++ b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc
@@ -138,7 +138,15 @@ AudioCodingModuleImpl::AudioCodingModuleImpl(const WebRtc_Word32 id)
       last_detected_tone_(kACMToneEnd),
       callback_crit_sect_(CriticalSectionWrapper::CreateCriticalSection()),
       secondary_send_codec_inst_(),
-      secondary_encoder_(NULL) {
+      secondary_encoder_(NULL),
+      initial_delay_ms_(0),
+      num_packets_accumulated_(0),
+      num_bytes_accumulated_(0),
+      accumulated_audio_ms_(0),
+      first_payload_received_(false),
+      last_incoming_send_timestamp_(0),
+      track_neteq_buffer_(false),
+      playout_ts_(0) {
 
   // Nullify send codec memory, set payload type and set codec name to
   // invalid values.
@@ -1612,6 +1620,14 @@ WebRtc_Word32 AudioCodingModuleImpl::InitializeReceiver() {
 
 // Initialize receiver, resets codec database etc.
 WebRtc_Word32 AudioCodingModuleImpl::InitializeReceiverSafe() {
+  initial_delay_ms_ = 0;
+  num_packets_accumulated_ = 0;
+  num_bytes_accumulated_ = 0;
+  accumulated_audio_ms_ = 0;
+  first_payload_received_ = 0;;
+  last_incoming_send_timestamp_ = 0;
+  track_neteq_buffer_ = false;
+  playout_ts_ = 0;
   // If the receiver is already initialized then we want to destroy any
   // existing decoders. After a call to this function, we should have a clean
   // start-up.
@@ -1953,10 +1969,12 @@ WebRtc_Word32 AudioCodingModuleImpl::IncomingPacket(
                  "IncomingPacket() Error, payload-length cannot be negative");
     return -1;
   }
+
   {
     // Store the payload Type. This will be used to retrieve "received codec"
     // and "received frequency."
     CriticalSectionScoped lock(acm_crit_sect_);
+
     WebRtc_UWord8 my_payload_type;
 
     // Check if this is an RED payload.
@@ -1984,9 +2002,32 @@ WebRtc_Word32 AudioCodingModuleImpl::IncomingPacket(
             break;
           }
         }
+        // Codec is changed, there might be a jump in timestamp, therefore,
+        // we have to reset some variables that track NetEq buffer.
+        if (track_neteq_buffer_) {
+          last_incoming_send_timestamp_ = rtp_info.header.timestamp;
+        }
       }
       last_recv_audio_codec_pltype_ = my_payload_type;
     }
+
+    if (track_neteq_buffer_) {
+      const int in_sample_rate_khz =
+          (ACMCodecDB::database_[current_receive_codec_idx_].plfreq / 1000);
+      if (first_payload_received_) {
+        if (rtp_info.header.timestamp > last_incoming_send_timestamp_) {
+          accumulated_audio_ms_ += (rtp_info.header.timestamp -
+              last_incoming_send_timestamp_) / in_sample_rate_khz;
+        }
+      } else {
+        first_payload_received_ = true;
+      }
+      num_packets_accumulated_++;
+      last_incoming_send_timestamp_ = rtp_info.header.timestamp;
+      playout_ts_ = static_cast<uint32_t>(
+          rtp_info.header.timestamp - static_cast<uint32_t>(
+              initial_delay_ms_ * in_sample_rate_khz));
+    }
   }
 
   // Split the payload for stereo packets, so that first half of payload
@@ -2000,6 +2041,9 @@ WebRtc_Word32 AudioCodingModuleImpl::IncomingPacket(
       memcpy(payload, incoming_payload, payload_length);
       codecs_[current_receive_codec_idx_]->SplitStereoPacket(payload, &length);
       rtp_header.type.Audio.channel = 2;
+      if (track_neteq_buffer_)
+        num_bytes_accumulated_ += length / 2;  // Per neteq, half is inserted
+                                               // into master and half to slave.
       // Insert packet into NetEQ.
       return neteq_.RecIn(payload, length, rtp_header);
     } else {
@@ -2008,6 +2052,8 @@ WebRtc_Word32 AudioCodingModuleImpl::IncomingPacket(
       return 0;
     }
   } else {
+    if (track_neteq_buffer_)
+      num_bytes_accumulated_ += payload_length;
     return neteq_.RecIn(incoming_payload, payload_length, rtp_header);
   }
 }
@@ -2084,11 +2130,14 @@ int AudioCodingModuleImpl::InitStereoSlave() {
 // Minimum playout delay (Used for lip-sync).
 WebRtc_Word32 AudioCodingModuleImpl::SetMinimumPlayoutDelay(
     const WebRtc_Word32 time_ms) {
-  if ((time_ms < 0) || (time_ms > 1000)) {
+  if ((time_ms < 0) || (time_ms > 10000)) {
     WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
-                 "Delay must be in the range of 0-1000 milliseconds.");
+                 "Delay must be in the range of 0-10000 milliseconds.");
     return -1;
   }
+  // Don't let the extra delay modified while accumulating buffers in NetEq.
+  if (track_neteq_buffer_ && first_payload_received_)
+    return 0;
   return neteq_.SetExtraDelay(time_ms);
 }
 
@@ -2177,6 +2226,9 @@ WebRtc_Word32 AudioCodingModuleImpl::PlayoutData10Ms(
     const WebRtc_Word32 desired_freq_hz, AudioFrame& audio_frame) {
   bool stereo_mode;
 
+  if (GetSilence(desired_freq_hz, &audio_frame))
+     return 0;  // Silence is generated, return.
+
   // RecOut always returns 10 ms.
   if (neteq_.RecOut(audio_frame_) != 0) {
     WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
@@ -2612,7 +2664,12 @@ WebRtc_Word32 AudioCodingModuleImpl::PlayoutTimestamp(
     WebRtc_UWord32& timestamp) {
   WEBRTC_TRACE(webrtc::kTraceStream, webrtc::kTraceAudioCoding, id_,
                "PlayoutTimestamp()");
-  return neteq_.PlayoutTimestamp(timestamp);
+  if (track_neteq_buffer_) {
+    timestamp = playout_ts_;
+    return 0;
+  } else {
+    return neteq_.PlayoutTimestamp(timestamp);
+  }
 }
 
 bool AudioCodingModuleImpl::HaveValidEncoder(const char* caller_name) const {
@@ -2757,4 +2814,74 @@ void AudioCodingModuleImpl::ResetFragmentation(int vector_size) {
       static_cast<WebRtc_UWord16>(vector_size);
 }
 
+int AudioCodingModuleImpl::SetInitialPlayoutDelay(int delay_ms) {
+  if (delay_ms < 0 || delay_ms > 10000) {
+    return -1;
+  }
+
+  CriticalSectionScoped lock(acm_crit_sect_);
+
+  // Receiver should be initialized before this call processed.
+  if (!receiver_initialized_) {
+    InitializeReceiverSafe();
+  }
+
+  if (first_payload_received_) {
+    // Too late for this API. Only works before a call is started.
+    return -1;
+  }
+  initial_delay_ms_ = delay_ms;
+  track_neteq_buffer_ = true;
+  return neteq_.SetExtraDelay(delay_ms);
+}
+
+bool AudioCodingModuleImpl::GetSilence(int desired_sample_rate_hz,
+                                       AudioFrame* frame) {
+  CriticalSectionScoped lock(acm_crit_sect_);
+  if (initial_delay_ms_ == 0 || accumulated_audio_ms_ >= initial_delay_ms_) {
+    track_neteq_buffer_ = false;
+    return false;
+  }
+
+  // We stop accumulating packets, if the number of packets or the total size
+  // exceeds a threshold.
+  int max_num_packets;
+  int buffer_size_bytes;
+  int per_payload_overhead_bytes;
+  neteq_.BufferSpec(max_num_packets, buffer_size_bytes,
+                     per_payload_overhead_bytes);
+  int total_bytes_accumulated = num_bytes_accumulated_ +
+      num_packets_accumulated_ * per_payload_overhead_bytes;
+  if (num_packets_accumulated_ > max_num_packets * 0.9 ||
+      total_bytes_accumulated > buffer_size_bytes * 0.9) {
+    WEBRTC_TRACE(webrtc::kTraceWarning, webrtc::kTraceAudioCoding, id_,
+                 "GetSilence: Initial delay couldn't be achieved."
+                 " num_packets_accumulated=%d, total_bytes_accumulated=%d",
+                 num_packets_accumulated_, num_bytes_accumulated_);
+    track_neteq_buffer_ = false;
+    return false;
+  }
+
+  if (desired_sample_rate_hz > 0) {
+    frame->sample_rate_hz_ = desired_sample_rate_hz;
+  } else {
+    frame->sample_rate_hz_ = 0;
+    if (current_receive_codec_idx_ >= 0) {
+      frame->sample_rate_hz_ =
+          ACMCodecDB::database_[current_receive_codec_idx_].plfreq;
+    } else {
+      // No payload received yet, use the default sampling rate of NetEq.
+      frame->sample_rate_hz_ = neteq_.CurrentSampFreqHz();
+    }
+  }
+  frame->num_channels_ = expected_channels_;
+  frame->samples_per_channel_ = frame->sample_rate_hz_ / 100;  // Always 10 ms.
+  frame->speech_type_ = AudioFrame::kCNG;
+  frame->vad_activity_ = AudioFrame::kVadPassive;
+  frame->energy_ = 0;
+  int samples = frame->samples_per_channel_ * frame->num_channels_;
+  memset(frame->data_, 0, samples * sizeof(int16_t));
+  return true;
+}
+
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h
index a8950a64a..53ea4619c 100644
--- a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h
+++ b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h
@@ -282,6 +282,14 @@ class AudioCodingModuleImpl : public AudioCodingModule {
   int PreprocessToAddData(const AudioFrame& in_frame,
                           const AudioFrame** ptr_out);
 
+  // Set initial playout delay.
+  //  -delay_ms: delay in millisecond.
+  //
+  // Return value:
+  //  -1: if cannot set the delay.
+  //   0: if delay set successfully.
+  int SetInitialPlayoutDelay(int delay_ms);
+
  private:
   // Change required states after starting to receive the codec corresponding
   // to |index|.
@@ -302,6 +310,8 @@ class AudioCodingModuleImpl : public AudioCodingModule {
 
   void ResetFragmentation(int vector_size);
 
+  bool GetSilence(int desired_sample_rate_hz, AudioFrame* frame);
+
   AudioPacketizationCallback* packetization_callback_;
   WebRtc_Word32 id_;
   WebRtc_UWord32 last_timestamp_;
@@ -375,6 +385,16 @@ class AudioCodingModuleImpl : public AudioCodingModule {
   AudioFrame preprocess_frame_;
   CodecInst secondary_send_codec_inst_;
   scoped_ptr<ACMGenericCodec> secondary_encoder_;
+
+  // Initial delay.
+  int initial_delay_ms_;
+  int num_packets_accumulated_;
+  int num_bytes_accumulated_;
+  int accumulated_audio_ms_;
+  int first_payload_received_;
+  uint32_t last_incoming_send_timestamp_;
+  bool track_neteq_buffer_;
+  uint32_t playout_ts_;
 };
 
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_coding/main/test/delay_test.cc b/webrtc/modules/audio_coding/main/test/delay_test.cc
new file mode 100644
index 000000000..2383b3447
--- /dev/null
+++ b/webrtc/modules/audio_coding/main/test/delay_test.cc
@@ -0,0 +1,268 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
+
+#include <math.h>
+
+#include <cassert>
+#include <iostream>
+
+#include "gflags/gflags.h"
+#include "gtest/gtest.h"
+#include "testsupport/fileutils.h"
+#include "webrtc/common_types.h"
+#include "webrtc/engine_configurations.h"
+#include "webrtc/modules/audio_coding/main/interface/audio_coding_module_typedefs.h"
+#include "webrtc/modules/audio_coding/main/test/Channel.h"
+#include "webrtc/modules/audio_coding/main/test/PCMFile.h"
+#include "webrtc/modules/audio_coding/main/test/utility.h"
+#include "webrtc/system_wrappers/interface/event_wrapper.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+DEFINE_string(codec, "isac", "Codec Name");
+DEFINE_int32(sample_rate_hz, 16000, "Sampling rate in Hertz.");
+DEFINE_int32(num_channels, 1, "Number of Channels.");
+DEFINE_string(input_file, "", "Input file, PCM16 32 kHz, optional.");
+DEFINE_int32(delay, 0, "Delay in millisecond.");
+DEFINE_int32(init_delay, 0, "Initial delay in millisecond.");
+DEFINE_bool(dtx, false, "Enable DTX at the sender side.");
+
+namespace webrtc {
+
+namespace {
+
+struct CodecConfig {
+  char name[50];
+  int sample_rate_hz;
+  int num_channels;
+};
+
+struct AcmConfig {
+  bool dtx;
+  bool fec;
+};
+
+struct Config {
+  CodecConfig codec;
+  AcmConfig acm;
+  bool packet_loss;
+};
+
+}
+
+class DelayTest {
+ public:
+
+  DelayTest()
+     : acm_a_(NULL),
+       acm_b_(NULL),
+       channel_a2b_(NULL),
+       test_cntr_(0),
+       encoding_sample_rate_hz_(8000) {
+  }
+
+  ~DelayTest() {}
+
+  void TearDown() {
+    if(acm_a_ != NULL) {
+      AudioCodingModule::Destroy(acm_a_);
+      acm_a_ = NULL;
+    }
+    if(acm_b_ != NULL) {
+      AudioCodingModule::Destroy(acm_b_);
+      acm_b_ = NULL;
+    }
+    if(channel_a2b_ != NULL) {
+      delete channel_a2b_;
+      channel_a2b_ = NULL;
+    }
+  }
+
+  void SetUp() {
+    test_cntr_ = 0;
+    std::string file_name =
+        webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm");
+    if (FLAGS_input_file.size() > 0)
+      file_name = FLAGS_input_file;
+    in_file_a_.Open(file_name, 32000, "rb");
+    acm_a_ = AudioCodingModule::Create(0);
+    acm_b_ = AudioCodingModule::Create(1);
+    acm_a_->InitializeReceiver();
+    acm_b_->InitializeReceiver();
+    if (FLAGS_init_delay > 0) {
+      ASSERT_EQ(0, acm_b_->SetInitialPlayoutDelay(FLAGS_init_delay));
+    }
+
+    if (FLAGS_delay > 0) {
+      ASSERT_EQ(0, acm_b_->SetMinimumPlayoutDelay(FLAGS_delay));
+    }
+
+    WebRtc_UWord8 num_encoders = acm_a_->NumberOfCodecs();
+    CodecInst my_codec_param;
+    for(int n = 0; n < num_encoders; n++) {
+      acm_b_->Codec(n, my_codec_param);
+      if (STR_CASE_CMP(my_codec_param.plname, "opus") == 0)
+        my_codec_param.channels = 1;
+      else if  (my_codec_param.channels > 1)
+        continue;
+      if (STR_CASE_CMP(my_codec_param.plname, "CN") == 0 &&
+          my_codec_param.plfreq == 48000)
+          continue;
+      if (STR_CASE_CMP(my_codec_param.plname, "telephone-event") == 0)
+        continue;
+      acm_b_->RegisterReceiveCodec(my_codec_param);
+    }
+
+    // Create and connect the channel
+    channel_a2b_ = new Channel;
+    acm_a_->RegisterTransportCallback(channel_a2b_);
+    channel_a2b_->RegisterReceiverACM(acm_b_);
+  }
+
+  void Perform(const Config* config, size_t num_tests, int duration_sec,
+               const char* output_prefix) {
+    for (size_t n = 0; n < num_tests; ++n) {
+      ApplyConfig(config[n]);
+      Run(duration_sec, output_prefix);
+    }
+  }
+
+ private:
+
+  void ApplyConfig(const Config& config) {
+    printf("====================================\n");
+    printf("Test %d \n"
+        "Codec: %s, %d kHz, %d channel(s)\n"
+        "ACM: DTX %s, FEC %s\n"
+        "Channel: %s\n",
+        ++test_cntr_,
+        config.codec.name, config.codec.sample_rate_hz,
+        config.codec.num_channels, config.acm.dtx ? "on" : "off",
+        config.acm.fec ? "on" : "off",
+        config.packet_loss ? "with packet-loss" : "no packet-loss");
+    SendCodec(config.codec);
+    ConfigAcm(config.acm);
+    ConfigChannel(config.packet_loss);
+  }
+
+  void SendCodec(const CodecConfig& config) {
+    CodecInst my_codec_param;
+    ASSERT_EQ(0, AudioCodingModule::Codec(config.name, my_codec_param,
+                                          config.sample_rate_hz,
+                                          config.num_channels));
+    encoding_sample_rate_hz_ = my_codec_param.plfreq;
+    ASSERT_EQ(0, acm_a_->RegisterSendCodec(my_codec_param));
+  }
+
+  void ConfigAcm(const AcmConfig& config) {
+    ASSERT_EQ(0, acm_a_->SetVAD(config.dtx, config.dtx, VADAggr));
+    ASSERT_EQ(0, acm_a_->SetFECStatus(config.fec));
+  }
+
+  void ConfigChannel(bool packet_loss) {
+    channel_a2b_->SetFECTestWithPacketLoss(packet_loss);
+  }
+
+  void OpenOutFile(const char* output_id) {
+    std::stringstream file_stream;
+    file_stream << "delay_test_" << FLAGS_codec << "_"
+        << FLAGS_sample_rate_hz << "Hz" << "_"
+        << FLAGS_init_delay << "ms_"
+        << FLAGS_delay << "ms.pcm";
+    std::cout << "Output file: " << file_stream.str() << std::endl <<std::endl;
+    std::string file_name = webrtc::test::OutputPath() + file_stream.str();
+    out_file_b_.Open(file_name.c_str(), 32000, "wb");
+  }
+
+  void Run(int duration_sec, const char* output_prefix) {
+    OpenOutFile(output_prefix);
+    AudioFrame audio_frame;
+    uint32_t out_freq_hz_b = out_file_b_.SamplingFrequency();
+
+    int num_frames = 0;
+    int in_file_frames = 0;
+    uint32_t playout_ts;
+    uint32_t received_ts;
+    double average_delay = 0;
+    double inst_delay_sec = 0;
+    while(num_frames < (duration_sec * 100)) {
+      if (in_file_a_.EndOfFile()) {
+        in_file_a_.Rewind();
+      }
+
+      // Print delay information every 16 frame
+      if ((num_frames & 0x3F) == 0x3F) {
+        ACMNetworkStatistics statistics;
+        acm_b_->NetworkStatistics(statistics);
+        fprintf(stdout, "delay: min=%3d  max=%3d  mean=%3d  median=%3d"
+                " ts-based average = %6.3f, "
+                "curr buff-lev = %4u opt buff-lev = %4u \n",
+                statistics.minWaitingTimeMs,
+                statistics.maxWaitingTimeMs,
+                statistics.meanWaitingTimeMs,
+                statistics.medianWaitingTimeMs,
+                average_delay,
+                statistics.currentBufferSize,
+                statistics.preferredBufferSize);
+        fflush(stdout);
+      }
+
+      in_file_a_.Read10MsData(audio_frame);
+      ASSERT_EQ(0, acm_a_->Add10MsData(audio_frame));
+      ASSERT_LE(0, acm_a_->Process());
+      ASSERT_EQ(0, acm_b_->PlayoutData10Ms(out_freq_hz_b, audio_frame));
+      out_file_b_.Write10MsData(audio_frame.data_,
+                                audio_frame.samples_per_channel_ *
+                                audio_frame.num_channels_);
+      acm_b_->PlayoutTimestamp(playout_ts);
+      received_ts = channel_a2b_->LastInTimestamp();
+      inst_delay_sec = static_cast<uint32_t>(received_ts - playout_ts) /
+          static_cast<double>(encoding_sample_rate_hz_);
+
+      if (num_frames > 10)
+        average_delay = 0.95 * average_delay + 0.05 * inst_delay_sec;
+
+      ++num_frames;
+      ++in_file_frames;
+    }
+    out_file_b_.Close();
+  }
+
+  AudioCodingModule* acm_a_;
+  AudioCodingModule* acm_b_;
+
+  Channel* channel_a2b_;
+
+  PCMFile in_file_a_;
+  PCMFile out_file_b_;
+  int test_cntr_;
+  int encoding_sample_rate_hz_;
+};
+
+} // namespace webrtc
+
+int main(int argc, char* argv[]) {
+
+  google::ParseCommandLineFlags(&argc, &argv, true);
+
+  webrtc::Config config;
+  strcpy(config.codec.name, FLAGS_codec.c_str());
+  config.codec.sample_rate_hz = FLAGS_sample_rate_hz;
+  config.codec.num_channels = FLAGS_num_channels;
+  config.acm.dtx = FLAGS_dtx;
+  config.acm.fec = false;
+  config.packet_loss = false;
+
+  webrtc::DelayTest delay_test;
+  delay_test.SetUp();
+  delay_test.Perform(&config, 1, 240, "delay_test");
+  delay_test.TearDown();
+}
diff --git a/webrtc/modules/audio_coding/main/test/initial_delay_unittest.cc b/webrtc/modules/audio_coding/main/test/initial_delay_unittest.cc
new file mode 100644
index 000000000..af720c3a0
--- /dev/null
+++ b/webrtc/modules/audio_coding/main/test/initial_delay_unittest.cc
@@ -0,0 +1,170 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
+
+#include <math.h>
+
+#include <cassert>
+#include <iostream>
+
+#include "gtest/gtest.h"
+#include "testsupport/fileutils.h"
+#include "webrtc/common_types.h"
+#include "webrtc/engine_configurations.h"
+#include "webrtc/modules/audio_coding/main/interface/audio_coding_module_typedefs.h"
+#include "webrtc/modules/audio_coding/main/test/Channel.h"
+#include "webrtc/modules/audio_coding/main/test/PCMFile.h"
+#include "webrtc/modules/audio_coding/main/test/utility.h"
+#include "webrtc/system_wrappers/interface/event_wrapper.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+namespace webrtc {
+
+namespace {
+double FrameRms(AudioFrame& frame) {
+  int samples = frame.num_channels_ * frame.samples_per_channel_;
+  double rms = 0;
+  for (int n = 0; n < samples; ++n)
+    rms += frame.data_[n] * frame.data_[n];
+  rms /= samples;
+  rms = sqrt(rms);
+  return rms;
+}
+
+}
+
+class InitialPlayoutDelayTest  : public ::testing::Test {
+ protected:
+
+  InitialPlayoutDelayTest()
+     : acm_a_(NULL),
+       acm_b_(NULL),
+       channel_a2b_(NULL) {
+  }
+
+  ~InitialPlayoutDelayTest() {}
+
+  void TearDown() {
+    if(acm_a_ != NULL) {
+      AudioCodingModule::Destroy(acm_a_);
+      acm_a_ = NULL;
+    }
+    if(acm_b_ != NULL) {
+      AudioCodingModule::Destroy(acm_b_);
+      acm_b_ = NULL;
+    }
+    if(channel_a2b_ != NULL) {
+      delete channel_a2b_;
+      channel_a2b_ = NULL;
+    }
+  }
+
+  void SetUp() {
+    acm_a_ = AudioCodingModule::Create(0);
+    acm_b_ = AudioCodingModule::Create(1);
+
+    acm_b_->InitializeReceiver();
+    acm_a_->InitializeReceiver();
+
+    // Register all L16 codecs in receiver.
+    CodecInst codec;
+    const int kFsHz[3] = {8000, 16000, 32000};
+    const int kChannels[2] = {1, 2};
+    for (int n = 0; n < 3; ++n) {
+      for (int k = 0; k < 2; ++k) {
+        AudioCodingModule::Codec("L16", codec, kFsHz[n], kChannels[k]);
+        acm_b_->RegisterReceiveCodec(codec);
+      }
+    }
+
+    // Create and connect the channel
+    channel_a2b_ = new Channel;
+    acm_a_->RegisterTransportCallback(channel_a2b_);
+    channel_a2b_->RegisterReceiverACM(acm_b_);
+  }
+
+  void Run(CodecInst codec, int initial_delay_ms) {
+    AudioFrame in_audio_frame;
+    AudioFrame out_audio_frame;
+    int num_frames = 0;
+    const int kAmp = 10000;
+    in_audio_frame.sample_rate_hz_ = codec.plfreq;
+    in_audio_frame.num_channels_ = codec.channels;
+    in_audio_frame.samples_per_channel_ = codec.plfreq / 100;  // 10 ms.
+    int samples = in_audio_frame.num_channels_ *
+        in_audio_frame.samples_per_channel_;
+    for (int n = 0; n < samples; ++n) {
+      in_audio_frame.data_[n] = kAmp;
+    }
+
+    uint32_t timestamp = 0;
+    double rms = 0;
+    acm_a_->RegisterSendCodec(codec);
+    acm_b_->SetInitialPlayoutDelay(initial_delay_ms);
+    while(rms < kAmp / 2) {
+      in_audio_frame.timestamp_ = timestamp;
+      timestamp += in_audio_frame.samples_per_channel_;
+      ASSERT_EQ(0, acm_a_->Add10MsData(in_audio_frame));
+      ASSERT_LE(0, acm_a_->Process());
+      ASSERT_EQ(0, acm_b_->PlayoutData10Ms(codec.plfreq, out_audio_frame));
+      rms = FrameRms(out_audio_frame);
+      ++num_frames;
+    }
+
+    ASSERT_GE(num_frames * 10, initial_delay_ms);
+    ASSERT_LE(num_frames * 10, initial_delay_ms + 100);
+  }
+
+  AudioCodingModule* acm_a_;
+  AudioCodingModule* acm_b_;
+  Channel* channel_a2b_;
+};
+
+
+TEST_F( InitialPlayoutDelayTest, NbMono) {
+  CodecInst codec;
+  AudioCodingModule::Codec("L16", codec, 8000, 1);
+  Run(codec, 3000);
+}
+
+TEST_F( InitialPlayoutDelayTest, WbMono) {
+  CodecInst codec;
+  AudioCodingModule::Codec("L16", codec, 16000, 1);
+  Run(codec, 3000);
+}
+
+TEST_F( InitialPlayoutDelayTest, SwbMono) {
+  CodecInst codec;
+  AudioCodingModule::Codec("L16", codec, 32000, 1);
+  Run(codec, 2000);  // NetEq buffer is not sufficiently large for 3 sec of
+                     // PCM16 super-wideband.
+}
+
+TEST_F( InitialPlayoutDelayTest, NbStereo) {
+  CodecInst codec;
+  AudioCodingModule::Codec("L16", codec, 8000, 2);
+  Run(codec, 3000);
+}
+
+TEST_F( InitialPlayoutDelayTest, WbStereo) {
+  CodecInst codec;
+  AudioCodingModule::Codec("L16", codec, 16000, 2);
+  Run(codec, 3000);
+}
+
+TEST_F( InitialPlayoutDelayTest, SwbStereo) {
+  CodecInst codec;
+  AudioCodingModule::Codec("L16", codec, 32000, 2);
+  Run(codec, 2000);  // NetEq buffer is not sufficiently large for 3 sec of
+                     // PCM16 super-wideband.
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_coding/neteq/automode.c b/webrtc/modules/audio_coding/neteq/automode.c
index d8d56c692..78933cc1b 100644
--- a/webrtc/modules/audio_coding/neteq/automode.c
+++ b/webrtc/modules/audio_coding/neteq/automode.c
@@ -216,7 +216,7 @@ int WebRtcNetEQ_UpdateIatStatistics(AutomodeInst_t *inst, int maxBufLen,
             streamingMode);
         if (tempvar > 0)
         {
-            inst->optBufLevel = (WebRtc_UWord16) tempvar;
+            inst->optBufLevel = tempvar;
 
             if (streamingMode != 0)
             {
@@ -238,7 +238,7 @@ int WebRtcNetEQ_UpdateIatStatistics(AutomodeInst_t *inst, int maxBufLen,
             maxBufLen = WEBRTC_SPL_LSHIFT_W32(maxBufLen, 8); /* shift to Q8 */
 
             /* Enforce upper limit; 75% of maxBufLen */
-            inst->optBufLevel = (WebRtc_UWord16) WEBRTC_SPL_MIN( inst->optBufLevel,
+            inst->optBufLevel = WEBRTC_SPL_MIN( inst->optBufLevel,
                 (maxBufLen >> 1) + (maxBufLen >> 2) ); /* 1/2 + 1/4 = 75% */
         }
         else
@@ -575,9 +575,8 @@ int WebRtcNetEQ_BufferLevelFilter(WebRtc_Word32 curSizeMs8, AutomodeInst_t *inst
          *
          * levelFiltFact is in Q8
          */
-        inst->buffLevelFilt = (WebRtc_UWord16) (WEBRTC_SPL_RSHIFT_W32(
-            WEBRTC_SPL_MUL_16_U16(inst->levelFiltFact, inst->buffLevelFilt), 8)
-            + WEBRTC_SPL_MUL_16_16(256 - inst->levelFiltFact, curSizeFrames));
+        inst->buffLevelFilt = ((inst->levelFiltFact * inst->buffLevelFilt) >> 8) +
+            (256 - inst->levelFiltFact) * curSizeFrames;
     }
 
     /* Account for time-scale operations (accelerate and pre-emptive expand) */
@@ -589,7 +588,7 @@ int WebRtcNetEQ_BufferLevelFilter(WebRtc_Word32 curSizeMs8, AutomodeInst_t *inst
          * from samples to packets in Q8. Make sure that the filtered value is
          * non-negative.
          */
-        inst->buffLevelFilt = (WebRtc_UWord16) WEBRTC_SPL_MAX( inst->buffLevelFilt -
+        inst->buffLevelFilt = WEBRTC_SPL_MAX( inst->buffLevelFilt -
             WebRtcSpl_DivW32W16(
                 WEBRTC_SPL_LSHIFT_W32(inst->sampleMemory, 8), /* sampleMemory in Q8 */
                 inst->packetSpeechLenSamp ), /* divide by packetSpeechLenSamp */
diff --git a/webrtc/modules/audio_coding/neteq/automode.h b/webrtc/modules/audio_coding/neteq/automode.h
index dbd09cf9a..2e6b514b4 100644
--- a/webrtc/modules/audio_coding/neteq/automode.h
+++ b/webrtc/modules/audio_coding/neteq/automode.h
@@ -65,14 +65,14 @@ typedef struct
 
     /* Filtered current buffer level */
     WebRtc_UWord16 levelFiltFact; /* filter forgetting factor in Q8 */
-    WebRtc_UWord16 buffLevelFilt; /* filtered buffer level in Q8 */
+    int buffLevelFilt; /* filtered buffer level in Q8 */
 
     /* Inter-arrival time (iat) statistics */
     WebRtc_Word32 iatProb[MAX_IAT + 1]; /* iat probabilities in Q30 */
     WebRtc_Word16 iatProbFact; /* iat forgetting factor in Q15 */
     WebRtc_UWord32 packetIatCountSamp; /* time (in timestamps) elapsed since last
      packet arrival, based on RecOut calls */
-    WebRtc_UWord16 optBufLevel; /* current optimal buffer level in Q8 */
+    int optBufLevel; /* current optimal buffer level in Q8 */
 
     /* Packet related information */
     WebRtc_Word16 packetSpeechLenSamp; /* speech samples per incoming packet */
diff --git a/webrtc/modules/audio_coding/neteq/bufstats_decision.c b/webrtc/modules/audio_coding/neteq/bufstats_decision.c
index 3d37e1732..cf7c0b085 100644
--- a/webrtc/modules/audio_coding/neteq/bufstats_decision.c
+++ b/webrtc/modules/audio_coding/neteq/bufstats_decision.c
@@ -38,11 +38,11 @@ WebRtc_UWord16 WebRtcNetEQ_BufstatsDecision(BufstatsInst_t *inst, WebRtc_Word16
 
     int currentDelayMs;
     WebRtc_Word32 currSizeSamples = cur_size;
-    WebRtc_Word16 extraDelayPacketsQ8 = 0;
+    int extraDelayPacketsQ8 = 0;
 
     /* Avoid overflow if the buffer size should be really large (cur_size is limited 256ms) */
     WebRtc_Word32 curr_sizeQ7 = WEBRTC_SPL_LSHIFT_W32(cur_size, 4);
-    WebRtc_UWord16 level_limit_hi, level_limit_lo;
+    int level_limit_hi, level_limit_lo;
 
     inst->Automode_inst.prevTimeScale &= (prevPlayMode == MODE_SUCCESS_ACCELERATE
         || prevPlayMode == MODE_LOWEN_ACCELERATE || prevPlayMode == MODE_SUCCESS_PREEMPTIVE
@@ -167,10 +167,11 @@ WebRtc_UWord16 WebRtcNetEQ_BufstatsDecision(BufstatsInst_t *inst, WebRtc_Word16
         if (inst->Automode_inst.extraDelayMs > 0 && inst->Automode_inst.packetSpeechLenSamp
             > 0)
         {
-            extraDelayPacketsQ8 = WebRtcSpl_DivW32W16ResW16(
-                (WEBRTC_SPL_MUL(inst->Automode_inst.extraDelayMs, 8 * fs_mult) << 8),
-                inst->Automode_inst.packetSpeechLenSamp);
+
             /* (extra delay in samples in Q8) */
+            extraDelayPacketsQ8 =
+                ((inst->Automode_inst.extraDelayMs * 8 * fs_mult) << 8) /
+                inst->Automode_inst.packetSpeechLenSamp;
         }
 
         /* Check if needed packet is available */
@@ -256,10 +257,10 @@ WebRtc_UWord16 WebRtcNetEQ_BufstatsDecision(BufstatsInst_t *inst, WebRtc_Word16
                  * higher than 4 times the optimal level.
                  */
                 WebRtc_Word32 diffTS = (inst->uw32_CNGplayedTS + targetTS) - availableTS;
-                if (diffTS >= 0
-                    || (WEBRTC_SPL_MUL_16_16_RSFT( inst->Automode_inst.optBufLevel
-                        + extraDelayPacketsQ8,
-                        inst->Automode_inst.packetSpeechLenSamp, 6) < currSizeSamples))
+                int val = ((inst->Automode_inst.optBufLevel +
+                    extraDelayPacketsQ8) *
+                    inst->Automode_inst.packetSpeechLenSamp) >> 6;
+                if (diffTS >= 0 || val < currSizeSamples)
                 {
                     /* it is time to play this new packet */
                     return BUFSTATS_DO_NORMAL;
diff --git a/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq.h b/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq.h
index 3d5181e7e..10adfa011 100644
--- a/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq.h
+++ b/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq.h
@@ -177,7 +177,8 @@ int WebRtcNetEQ_AssignSize(int *sizeinbytes);
 int WebRtcNetEQ_Assign(void **inst, void *NETEQ_inst_Addr);
 int WebRtcNetEQ_GetRecommendedBufferSize(void *inst, const enum WebRtcNetEQDecoder *codec,
                                          int noOfCodecs, enum WebRtcNetEQNetworkType nwType,
-                                         int *MaxNoOfPackets, int *sizeinbytes);
+                                         int *MaxNoOfPackets, int *sizeinbytes,
+                                         int* per_packet_overhead_bytes);
 int WebRtcNetEQ_AssignBuffer(void *inst, int MaxNoOfPackets, void *NETEQ_Buffer_Addr,
                              int sizeinbytes);
 
diff --git a/webrtc/modules/audio_coding/neteq/neteq.gypi b/webrtc/modules/audio_coding/neteq/neteq.gypi
index fcdb1b59a..a447e0786 100644
--- a/webrtc/modules/audio_coding/neteq/neteq.gypi
+++ b/webrtc/modules/audio_coding/neteq/neteq.gypi
@@ -93,6 +93,7 @@
           'dependencies': [
             'NetEq',
             'NetEqTestTools',
+            'neteq_unittest_tools',
             '<(DEPTH)/testing/gtest.gyp:gtest',
             '<(webrtc_root)/test/test.gyp:test_support_main',
           ],
diff --git a/webrtc/modules/audio_coding/neteq/packet_buffer.c b/webrtc/modules/audio_coding/neteq/packet_buffer.c
index bb2d08e29..c51805e11 100644
--- a/webrtc/modules/audio_coding/neteq/packet_buffer.c
+++ b/webrtc/modules/audio_coding/neteq/packet_buffer.c
@@ -577,7 +577,9 @@ void WebRtcNetEQ_IncrementWaitingTimes(PacketBuf_t *buffer_inst) {
 }
 
 int WebRtcNetEQ_GetDefaultCodecSettings(const enum WebRtcNetEQDecoder *codecID,
-                                        int noOfCodecs, int *maxBytes, int *maxSlots)
+                                        int noOfCodecs, int *maxBytes,
+                                        int *maxSlots,
+                                        int* per_slot_overhead_bytes)
 {
     int i;
     int ok = 0;
@@ -794,5 +796,6 @@ int WebRtcNetEQ_GetDefaultCodecSettings(const enum WebRtcNetEQDecoder *codecID,
     /* Add the extra size per slot to the memory count */
     *maxBytes += w16_tmp * (*maxSlots);
 
+    *per_slot_overhead_bytes = w16_tmp;
     return ok;
 }
diff --git a/webrtc/modules/audio_coding/neteq/packet_buffer.h b/webrtc/modules/audio_coding/neteq/packet_buffer.h
index 44e070af4..1aa435fa6 100644
--- a/webrtc/modules/audio_coding/neteq/packet_buffer.h
+++ b/webrtc/modules/audio_coding/neteq/packet_buffer.h
@@ -237,12 +237,15 @@ void WebRtcNetEQ_IncrementWaitingTimes(PacketBuf_t *buffer_inst);
  * Output:
  *		- maxBytes	    : Recommended buffer memory size in bytes
  *      - maxSlots      : Recommended number of slots in buffer
+ *      - per_slot_overhead_bytes : overhead in bytes for each slot in buffer.
  *
  * Return value			:  0 - Ok
  *						  <0 - Error
  */
 
 int WebRtcNetEQ_GetDefaultCodecSettings(const enum WebRtcNetEQDecoder *codecID,
-                                        int noOfCodecs, int *maxBytes, int *maxSlots);
+                                        int noOfCodecs, int *maxBytes,
+                                        int *maxSlots,
+                                        int* per_slot_overhead_bytes);
 
 #endif /* PACKET_BUFFER_H */
diff --git a/webrtc/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.cc b/webrtc/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.cc
index 0d8be0027..d175c7643 100644
--- a/webrtc/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.cc
+++ b/webrtc/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.cc
@@ -127,7 +127,7 @@ int NETEQTEST_NetEQClass::init(WebRtc_UWord16 fs)
 
 int NETEQTEST_NetEQClass::assignBuffer(enum WebRtcNetEQDecoder *usedCodec, int noOfCodecs, WebRtcNetEQNetworkType nwType)
 {
-    int numPackets, memSize, ret;
+    int numPackets, memSize, ret, overhead_bytes;
 
     if (!_inst)
     {
@@ -149,7 +149,9 @@ int NETEQTEST_NetEQClass::assignBuffer(enum WebRtcNetEQDecoder *usedCodec, int n
         }
     }
 
-    ret = WebRtcNetEQ_GetRecommendedBufferSize(_inst, usedCodec, noOfCodecs, nwType, &numPackets, &memSize);
+    ret = WebRtcNetEQ_GetRecommendedBufferSize(_inst, usedCodec, noOfCodecs,
+                                               nwType, &numPackets, &memSize,
+                                               &overhead_bytes);
 
     if (ret != 0)
     {
diff --git a/webrtc/modules/audio_coding/neteq/test/NetEqRTPplay.cc b/webrtc/modules/audio_coding/neteq/test/NetEqRTPplay.cc
index 5c6ff5eea..c8b742d02 100644
--- a/webrtc/modules/audio_coding/neteq/test/NetEqRTPplay.cc
+++ b/webrtc/modules/audio_coding/neteq/test/NetEqRTPplay.cc
@@ -1611,6 +1611,7 @@ int doAPItest() {
     WebRtc_UWord32 timestamp;
     int memorySize;
     int ok;
+    int overhead_bytes;
 
     printf("API-test:\n\n");
 
@@ -1623,7 +1624,7 @@ int doAPItest() {
     CHECK_MINUS_ONE(WebRtcNetEQ_Assign(&inst, NULL))
 //  printf("WARNING: Test of WebRtcNetEQ_Assign() is disabled due to a bug.\n");
     usedCodec=kDecoderPCMu;
-    CHECK_MINUS_ONE(WebRtcNetEQ_GetRecommendedBufferSize(inst, &usedCodec, 1, kTCPLargeJitter,  &NetEqBufferMaxPackets, &BufferSizeInBytes))
+    CHECK_MINUS_ONE(WebRtcNetEQ_GetRecommendedBufferSize(inst, &usedCodec, 1, kTCPLargeJitter,  &NetEqBufferMaxPackets, &BufferSizeInBytes, &overhead_bytes))
     CHECK_MINUS_ONE(WebRtcNetEQ_AssignBuffer(inst, NetEqBufferMaxPackets, NetEqPacketBuffer, BufferSizeInBytes))
 
     CHECK_MINUS_ONE(WebRtcNetEQ_Init(inst, 8000))
@@ -1661,7 +1662,7 @@ int doAPItest() {
 
     /* GetRecommendedBufferSize with wrong codec */
     usedCodec=kDecoderReservedStart;
-    ok = WebRtcNetEQ_GetRecommendedBufferSize(inst, &usedCodec, 1, kTCPLargeJitter , &NetEqBufferMaxPackets, &BufferSizeInBytes);
+    ok = WebRtcNetEQ_GetRecommendedBufferSize(inst, &usedCodec, 1, kTCPLargeJitter , &NetEqBufferMaxPackets, &BufferSizeInBytes, &overhead_bytes);
     if((ok!=-1) || ((ok==-1)&&(WebRtcNetEQ_GetErrorCode(inst)!=-CODEC_DB_UNKNOWN_CODEC))){
         printf("WebRtcNetEQ_GetRecommendedBufferSize() did not return proper error code for wrong codec.\n");
         printf("return value = %d; error code = %d\n", ok, WebRtcNetEQ_GetErrorCode(inst));
@@ -1670,13 +1671,13 @@ int doAPItest() {
 
     /* GetRecommendedBufferSize with wrong network type */
     usedCodec = kDecoderPCMu;
-    ok=WebRtcNetEQ_GetRecommendedBufferSize(inst, &usedCodec, 1, (enum WebRtcNetEQNetworkType) 4711 , &NetEqBufferMaxPackets, &BufferSizeInBytes);
+    ok=WebRtcNetEQ_GetRecommendedBufferSize(inst, &usedCodec, 1, (enum WebRtcNetEQNetworkType) 4711 , &NetEqBufferMaxPackets, &BufferSizeInBytes, &overhead_bytes);
     if ((ok!=-1) || ((ok==-1)&&(WebRtcNetEQ_GetErrorCode(inst)!=-FAULTY_NETWORK_TYPE))) {
         printf("WebRtcNetEQ_GetRecommendedBufferSize() did not return proper error code for wrong network type.\n");
         printf("return value = %d; error code = %d\n", ok, WebRtcNetEQ_GetErrorCode(inst));
         //RESET_ERROR(inst)
     }
-    CHECK_ZERO(WebRtcNetEQ_GetRecommendedBufferSize(inst, &usedCodec, 1, kTCPLargeJitter , &NetEqBufferMaxPackets, &BufferSizeInBytes))
+    CHECK_ZERO(WebRtcNetEQ_GetRecommendedBufferSize(inst, &usedCodec, 1, kTCPLargeJitter , &NetEqBufferMaxPackets, &BufferSizeInBytes, &overhead_bytes))
 
     /* try to do RecIn before assigning the packet buffer */
 /*  makeRTPheader(rtp_data, NETEQ_CODEC_AVT_PT, 17,4711, 1235412312);
diff --git a/webrtc/modules/audio_coding/neteq/webrtc_neteq.c b/webrtc/modules/audio_coding/neteq/webrtc_neteq.c
index d13902d26..477b0d0d6 100644
--- a/webrtc/modules/audio_coding/neteq/webrtc_neteq.c
+++ b/webrtc/modules/audio_coding/neteq/webrtc_neteq.c
@@ -307,7 +307,8 @@ int WebRtcNetEQ_Assign(void **inst, void *NETEQ_inst_Addr)
 
 int WebRtcNetEQ_GetRecommendedBufferSize(void *inst, const enum WebRtcNetEQDecoder *codec,
                                          int noOfCodecs, enum WebRtcNetEQNetworkType nwType,
-                                         int *MaxNoOfPackets, int *sizeinbytes)
+                                         int *MaxNoOfPackets, int *sizeinbytes,
+                                         int* per_packet_overhead_bytes)
 {
     int ok = 0;
     int multiplier;
@@ -315,7 +316,9 @@ int WebRtcNetEQ_GetRecommendedBufferSize(void *inst, const enum WebRtcNetEQDecod
     if (NetEqMainInst == NULL) return (-1);
     *MaxNoOfPackets = 0;
     *sizeinbytes = 0;
-    ok = WebRtcNetEQ_GetDefaultCodecSettings(codec, noOfCodecs, sizeinbytes, MaxNoOfPackets);
+    ok = WebRtcNetEQ_GetDefaultCodecSettings(codec, noOfCodecs, sizeinbytes,
+                                             MaxNoOfPackets,
+                                             per_packet_overhead_bytes);
     if (ok != 0)
     {
         NetEqMainInst->ErrorCode = -ok;
@@ -339,7 +342,7 @@ int WebRtcNetEQ_GetRecommendedBufferSize(void *inst, const enum WebRtcNetEQDecod
     }
     else if (nwType == kTCPXLargeJitter)
     {
-        multiplier = 20;
+        multiplier = 12;
     }
     else
     {
@@ -514,7 +517,7 @@ int WebRtcNetEQ_SetExtraDelay(void *inst, int DelayInMs)
 {
     MainInst_t *NetEqMainInst = (MainInst_t*) inst;
     if (NetEqMainInst == NULL) return (-1);
-    if ((DelayInMs < 0) || (DelayInMs > 1000))
+    if ((DelayInMs < 0) || (DelayInMs > 10000))
     {
         NetEqMainInst->ErrorCode = -FAULTY_DELAYVALUE;
         return (-1);
diff --git a/webrtc/modules/audio_coding/neteq/webrtc_neteq_unittest.cc b/webrtc/modules/audio_coding/neteq/webrtc_neteq_unittest.cc
index 8b9478874..7f51c665d 100644
--- a/webrtc/modules/audio_coding/neteq/webrtc_neteq_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/webrtc_neteq_unittest.cc
@@ -12,6 +12,8 @@
  * This file includes unit tests for NetEQ.
  */
 
+#include "webrtc/modules/audio_coding/neteq/interface/webrtc_neteq.h"
+
 #include <stdlib.h>
 #include <string.h>  // memset
 
@@ -20,15 +22,14 @@
 #include <vector>
 
 #include "gtest/gtest.h"
-
-#include "modules/audio_coding/neteq/interface/webrtc_neteq.h"
-#include "modules/audio_coding/neteq/interface/webrtc_neteq_help_macros.h"
-#include "modules/audio_coding/neteq/interface/webrtc_neteq_internal.h"
-#include "modules/audio_coding/neteq/test/NETEQTEST_CodecClass.h"
-#include "modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.h"
-#include "modules/audio_coding/neteq/test/NETEQTEST_RTPpacket.h"
 #include "testsupport/fileutils.h"
-#include "typedefs.h"  // NOLINT(build/include)
+#include "webrtc/modules/audio_coding/neteq/interface/webrtc_neteq_help_macros.h"
+#include "webrtc/modules/audio_coding/neteq/interface/webrtc_neteq_internal.h"
+#include "webrtc/modules/audio_coding/neteq/test/NETEQTEST_CodecClass.h"
+#include "webrtc/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.h"
+#include "webrtc/modules/audio_coding/neteq/test/NETEQTEST_RTPpacket.h"
+#include "webrtc/modules/audio_coding/neteq4/tools/input_audio_file.h"
+#include "webrtc/typedefs.h"
 
 namespace webrtc {
 
@@ -651,4 +652,45 @@ TEST_F(NetEqDecodingTest, NoInputDataStereo) {
   free(ms_info);
 }
 
+TEST_F(NetEqDecodingTest, TestExtraDelay) {
+  static const int kNumFrames = 120000;  // Needed for convergence.
+  int frame_index = 0;
+  static const int kFrameSizeSamples = 30 * 16;
+  static const int kPayloadBytes = kFrameSizeSamples * 2;
+  test::InputAudioFile input_file(
+      webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"));
+  int16_t input[kFrameSizeSamples];
+  // Buffers of NetEq cannot accommodate larger delays for PCM16.
+  static const int kExtraDelayMs = 3200;
+  ASSERT_EQ(0, WebRtcNetEQ_SetExtraDelay(neteq_inst_->instance(),
+                                         kExtraDelayMs));
+  for (int i = 0; i < kNumFrames; ++i) {
+    ASSERT_TRUE(input_file.Read(kFrameSizeSamples, input));
+    WebRtcNetEQ_RTPInfo rtp_info;
+    PopulateRtpInfo(frame_index, frame_index * kFrameSizeSamples, &rtp_info);
+    uint8_t* payload = reinterpret_cast<uint8_t*>(input);
+    ASSERT_EQ(0,
+              WebRtcNetEQ_RecInRTPStruct(neteq_inst_->instance(),
+                                         &rtp_info,
+                                         payload,
+                                         kPayloadBytes, 0));
+    ++frame_index;
+    // Pull out data.
+    for (int j = 0; j < 3; ++j) {
+      ASSERT_TRUE(kBlockSize16kHz == neteq_inst_->recOut(out_data_));
+    }
+    if (i % 100 == 0) {
+      WebRtcNetEQ_NetworkStatistics network_stats;
+      ASSERT_EQ(0, WebRtcNetEQ_GetNetworkStatistics(neteq_inst_->instance(),
+                                                    &network_stats));
+      const int expected_lower_limit =
+          std::min(i * 0.083 - 210, 0.9 * network_stats.preferredBufferSize);
+      EXPECT_GE(network_stats.currentBufferSize, expected_lower_limit);
+      const int expected_upper_limit =
+          std::min(i * 0.083 + 255, 1.2 * network_stats.preferredBufferSize);
+      EXPECT_LE(network_stats.currentBufferSize, expected_upper_limit);
+    }
+  }
+}
+
 }  // namespace
diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc
index dd0423f67..88256dee7 100644
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -8,23 +8,23 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "channel.h"
+#include "webrtc/voice_engine/channel.h"
 
-#include "audio_device.h"
-#include "audio_frame_operations.h"
-#include "audio_processing.h"
-#include "critical_section_wrapper.h"
-#include "logging.h"
-#include "output_mixer.h"
-#include "process_thread.h"
-#include "rtp_dump.h"
-#include "statistics.h"
-#include "trace.h"
-#include "transmit_mixer.h"
-#include "utility.h"
-#include "voe_base.h"
-#include "voe_external_media.h"
-#include "voe_rtp_rtcp.h"
+#include "webrtc/modules/audio_device/include/audio_device.h"
+#include "webrtc/modules/audio_processing/include/audio_processing.h"
+#include "webrtc/modules/utility/interface/audio_frame_operations.h"
+#include "webrtc/modules/utility/interface/process_thread.h"
+#include "webrtc/modules/utility/interface/rtp_dump.h"
+#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
+#include "webrtc/system_wrappers/interface/logging.h"
+#include "webrtc/system_wrappers/interface/trace.h"
+#include "webrtc/voice_engine/include/voe_base.h"
+#include "webrtc/voice_engine/include/voe_external_media.h"
+#include "webrtc/voice_engine/include/voe_rtp_rtcp.h"
+#include "webrtc/voice_engine/output_mixer.h"
+#include "webrtc/voice_engine/statistics.h"
+#include "webrtc/voice_engine/transmit_mixer.h"
+#include "webrtc/voice_engine/utility.h"
 
 #if defined(_WIN32)
 #include <Qos.h>
@@ -6095,6 +6095,29 @@ Channel::GetDelayEstimate(int& delayMs) const
     return 0;
 }
 
+int Channel::SetInitialPlayoutDelay(int delay_ms)
+{
+  WEBRTC_TRACE(kTraceInfo, kTraceVoice, VoEId(_instanceId,_channelId),
+               "Channel::SetInitialPlayoutDelay()");
+  if ((delay_ms < kVoiceEngineMinMinPlayoutDelayMs) ||
+      (delay_ms > kVoiceEngineMaxMinPlayoutDelayMs))
+  {
+    _engineStatisticsPtr->SetLastError(
+        VE_INVALID_ARGUMENT, kTraceError,
+        "SetInitialPlayoutDelay() invalid min delay");
+    return -1;
+  }
+  if (_audioCodingModule.SetInitialPlayoutDelay(delay_ms) != 0)
+  {
+    _engineStatisticsPtr->SetLastError(
+        VE_AUDIO_CODING_MODULE_ERROR, kTraceError,
+        "SetInitialPlayoutDelay() failed to set min playout delay");
+    return -1;
+  }
+  return 0;
+}
+
+
 int
 Channel::SetMinimumPlayoutDelay(int delayMs)
 {
diff --git a/webrtc/voice_engine/channel.h b/webrtc/voice_engine/channel.h
index 7e8f90865..7fce6b14a 100644
--- a/webrtc/voice_engine/channel.h
+++ b/webrtc/voice_engine/channel.h
@@ -11,24 +11,24 @@
 #ifndef WEBRTC_VOICE_ENGINE_CHANNEL_H
 #define WEBRTC_VOICE_ENGINE_CHANNEL_H
 
-#include "audio_coding_module.h"
-#include "audio_conference_mixer_defines.h"
-#include "common_types.h"
-#include "dtmf_inband.h"
-#include "dtmf_inband_queue.h"
-#include "file_player.h"
-#include "file_recorder.h"
-#include "level_indicator.h"
-#include "resampler.h"
-#include "rtp_rtcp.h"
-#include "scoped_ptr.h"
-#include "shared_data.h"
-#include "voe_audio_processing.h"
-#include "voe_network.h"
-#include "voice_engine_defines.h"
+#include "webrtc/common_audio/resampler/include/resampler.h"
+#include "webrtc/common_types.h"
+#include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
+#include "webrtc/modules/audio_conference_mixer/interface/audio_conference_mixer_defines.h"
+#include "webrtc/modules/rtp_rtcp/interface/rtp_rtcp.h"
+#include "webrtc/modules/utility/interface/file_player.h"
+#include "webrtc/modules/utility/interface/file_recorder.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/voice_engine/dtmf_inband.h"
+#include "webrtc/voice_engine/dtmf_inband_queue.h"
+#include "webrtc/voice_engine/include/voe_audio_processing.h"
+#include "webrtc/voice_engine/include/voe_network.h"
+#include "webrtc/voice_engine/level_indicator.h"
+#include "webrtc/voice_engine/shared_data.h"
+#include "webrtc/voice_engine/voice_engine_defines.h"
 
 #ifndef WEBRTC_EXTERNAL_TRANSPORT
-#include "udp_transport.h"
+#include "webrtc/modules/udp_transport/interface/udp_transport.h"
 #endif
 #ifdef WEBRTC_SRTP
 #include "SrtpModule.h"
@@ -254,6 +254,7 @@ public:
 
     // VoEVideoSync
     int GetDelayEstimate(int& delayMs) const;
+    int SetInitialPlayoutDelay(int delay_ms);
     int SetMinimumPlayoutDelay(int delayMs);
     int GetPlayoutTimestamp(unsigned int& timestamp);
     int SetInitTimestamp(unsigned int timestamp);
diff --git a/webrtc/voice_engine/include/voe_video_sync.h b/webrtc/voice_engine/include/voe_video_sync.h
index ac3b84a96..142303646 100644
--- a/webrtc/voice_engine/include/voe_video_sync.h
+++ b/webrtc/voice_engine/include/voe_video_sync.h
@@ -33,7 +33,7 @@
 #ifndef WEBRTC_VOICE_ENGINE_VOE_VIDEO_SYNC_H
 #define WEBRTC_VOICE_ENGINE_VOE_VIDEO_SYNC_H
 
-#include "common_types.h"
+#include "webrtc/common_types.h"
 
 namespace webrtc {
 
@@ -60,6 +60,10 @@ public:
     // Sets an additional delay for the playout jitter buffer.
     virtual int SetMinimumPlayoutDelay(int channel, int delayMs) = 0;
 
+    // Sets an initial delay for the playout jitter buffer. The playout of the
+    // audio is delayed by |delay_ms| in millisecond.
+    virtual int SetInitialPlayoutDelay(int channel, int delay_ms) = 0;
+
     // Gets the sum of the algorithmic delay, jitter buffer delay, and the
     // playout buffer delay for a specified |channel|.
     virtual int GetDelayEstimate(int channel, int& delayMs) = 0;
diff --git a/webrtc/voice_engine/voe_video_sync_impl.cc b/webrtc/voice_engine/voe_video_sync_impl.cc
index 2a7ff7d89..b0910c318 100644
--- a/webrtc/voice_engine/voe_video_sync_impl.cc
+++ b/webrtc/voice_engine/voe_video_sync_impl.cc
@@ -10,11 +10,11 @@
 
 #include "voe_video_sync_impl.h"
 
-#include "channel.h"
-#include "critical_section_wrapper.h"
-#include "trace.h"
-#include "voe_errors.h"
-#include "voice_engine_impl.h"
+#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
+#include "webrtc/system_wrappers/interface/trace.h"
+#include "webrtc/voice_engine/channel.h"
+#include "webrtc/voice_engine/include/voe_errors.h"
+#include "webrtc/voice_engine/voice_engine_impl.h"
 
 namespace webrtc {
 
@@ -144,6 +144,30 @@ int VoEVideoSyncImpl::SetMinimumPlayoutDelay(int channel,int delayMs)
     return channelPtr->SetMinimumPlayoutDelay(delayMs);
 }
 
+int VoEVideoSyncImpl::SetInitialPlayoutDelay(int channel, int delay_ms)
+{
+    WEBRTC_TRACE(kTraceApiCall, kTraceVoice, VoEId(_shared->instance_id(), -1),
+                 "SetInitialPlayoutDelay(channel=%d, delay_ms=%d)",
+                 channel, delay_ms);
+    ANDROID_NOT_SUPPORTED(_shared->statistics());
+    IPHONE_NOT_SUPPORTED(_shared->statistics());
+
+    if (!_shared->statistics().Initialized())
+    {
+        _shared->SetLastError(VE_NOT_INITED, kTraceError);
+        return -1;
+    }
+    voe::ScopedChannel sc(_shared->channel_manager(), channel);
+    voe::Channel* channel_ptr = sc.ChannelPtr();
+    if (channel_ptr == NULL)
+    {
+        _shared->SetLastError(VE_CHANNEL_NOT_VALID, kTraceError,
+            "SetInitialPlayoutDelay() failed to locate channel");
+        return -1;
+    }
+    return channel_ptr->SetInitialPlayoutDelay(delay_ms);
+}
+
 int VoEVideoSyncImpl::GetDelayEstimate(int channel, int& delayMs)
 {
     WEBRTC_TRACE(kTraceApiCall, kTraceVoice, VoEId(_shared->instance_id(), -1),
diff --git a/webrtc/voice_engine/voe_video_sync_impl.h b/webrtc/voice_engine/voe_video_sync_impl.h
index 1b75f05f7..86ce75cdb 100644
--- a/webrtc/voice_engine/voe_video_sync_impl.h
+++ b/webrtc/voice_engine/voe_video_sync_impl.h
@@ -11,9 +11,9 @@
 #ifndef WEBRTC_VOICE_ENGINE_VOE_VIDEO_SYNC_IMPL_H
 #define WEBRTC_VOICE_ENGINE_VOE_VIDEO_SYNC_IMPL_H
 
-#include "voe_video_sync.h"
+#include "webrtc/voice_engine/include/voe_video_sync.h"
 
-#include "shared_data.h"
+#include "webrtc/voice_engine/shared_data.h"
 
 namespace webrtc {
 
@@ -24,6 +24,8 @@ public:
 
     virtual int SetMinimumPlayoutDelay(int channel, int delayMs);
 
+    virtual int SetInitialPlayoutDelay(int channel, int delay_ms);
+
     virtual int GetDelayEstimate(int channel, int& delayMs);
 
     virtual int SetInitTimestamp(int channel, unsigned int timestamp);