API to control target delay in NetEq jitter buffer. NetEq maintains the given delay unless channel conditions require a higher delay.

TEST=unit-test, manual, trybots.
R=henrik.lundin@webrtc.org, henrika@webrtc.org, mflodman@webrtc.org, mikhal@webrtc.org, stefan@webrtc.org, tina.legrand@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/1384005

git-svn-id: http://webrtc.googlecode.com/svn/trunk@4087 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
turaj@webrtc.org 2013-05-22 20:39:43 +00:00
parent 561990fd73
commit e46c8d3875
19 changed files with 405 additions and 74 deletions

View File

@ -639,8 +639,9 @@ class AudioCodingModule: public Module {
const uint32_t timestamp = 0) = 0; const uint32_t timestamp = 0) = 0;
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// int32_t SetMinimumPlayoutDelay() // int SetMinimumPlayoutDelay()
// Set Minimum playout delay, used for lip-sync. // Set a minimum for the playout delay, used for lip-sync. NetEq maintains
// such a delay unless channel condition yields to a higher delay.
// //
// Input: // Input:
// -time_ms : minimum delay in milliseconds. // -time_ms : minimum delay in milliseconds.
@ -649,7 +650,15 @@ class AudioCodingModule: public Module {
// -1 if failed to set the delay, // -1 if failed to set the delay,
// 0 if the minimum delay is set. // 0 if the minimum delay is set.
// //
virtual int32_t SetMinimumPlayoutDelay(const int32_t time_ms) = 0; virtual int SetMinimumPlayoutDelay(int time_ms) = 0;
//
// The shortest latency, in milliseconds, required by jitter buffer. This
// is computed based on inter-arrival times and playout mode of NetEq. The
// actual delay is the maximum of least-required-delay and the minimum-delay
// specified by SetMinumumPlayoutDelay() API.
//
virtual int LeastRequiredDelayMs() const = 0;
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// int32_t RegisterIncomingMessagesCallback() // int32_t RegisterIncomingMessagesCallback()
@ -945,8 +954,9 @@ class AudioCodingModule: public Module {
// Set an initial delay for playout. // Set an initial delay for playout.
// An initial delay yields ACM playout silence until equivalent of |delay_ms| // An initial delay yields ACM playout silence until equivalent of |delay_ms|
// audio payload is accumulated in NetEq jitter. Thereafter, ACM pulls audio // audio payload is accumulated in NetEq jitter. Thereafter, ACM pulls audio
// from NetEq in its regular fashion, and the given delay is maintained as // from NetEq in its regular fashion, and the given delay is maintained
// "minimum playout delay." // through out the call, unless channel conditions yield to a higher jitter
// buffer delay.
// //
// Input: // Input:
// -delay_ms : delay in milliseconds. // -delay_ms : delay in milliseconds.

View File

@ -44,12 +44,12 @@ ACMNetEQ::ACMNetEQ()
received_stereo_(false), received_stereo_(false),
master_slave_info_(NULL), master_slave_info_(NULL),
previous_audio_activity_(AudioFrame::kVadUnknown), previous_audio_activity_(AudioFrame::kVadUnknown),
extra_delay_(0),
callback_crit_sect_(CriticalSectionWrapper::CreateCriticalSection()), callback_crit_sect_(CriticalSectionWrapper::CreateCriticalSection()),
min_of_max_num_packets_(0), min_of_max_num_packets_(0),
min_of_buffer_size_bytes_(0), min_of_buffer_size_bytes_(0),
per_packet_overhead_bytes_(0), per_packet_overhead_bytes_(0),
av_sync_(false) { av_sync_(false),
minimum_delay_ms_(0) {
for (int n = 0; n < MAX_NUM_SLAVE_NETEQ + 1; n++) { for (int n = 0; n < MAX_NUM_SLAVE_NETEQ + 1; n++) {
is_initialized_[n] = false; is_initialized_[n] = false;
ptr_vadinst_[n] = NULL; ptr_vadinst_[n] = NULL;
@ -270,24 +270,6 @@ int16_t ACMNetEQ::AllocatePacketBufferByIdxSafe(
return 0; return 0;
} }
int32_t ACMNetEQ::SetExtraDelay(const int32_t delay_in_ms) {
CriticalSectionScoped lock(neteq_crit_sect_);
for (int16_t idx = 0; idx < num_slaves_ + 1; idx++) {
if (!is_initialized_[idx]) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
"SetExtraDelay: NetEq is not initialized.");
return -1;
}
if (WebRtcNetEQ_SetExtraDelay(inst_[idx], delay_in_ms) < 0) {
LogError("SetExtraDelay", idx);
return -1;
}
}
extra_delay_ = delay_in_ms;
return 0;
}
int32_t ACMNetEQ::SetAVTPlayout(const bool enable) { int32_t ACMNetEQ::SetAVTPlayout(const bool enable) {
CriticalSectionScoped lock(neteq_crit_sect_); CriticalSectionScoped lock(neteq_crit_sect_);
if (avt_playout_ != enable) { if (avt_playout_ != enable) {
@ -1037,14 +1019,6 @@ int16_t ACMNetEQ::AddSlave(const WebRtcNetEQDecoder* used_codecs,
num_slaves_ = 1; num_slaves_ = 1;
is_initialized_[slave_idx] = true; is_initialized_[slave_idx] = true;
// Set Slave delay as all other instances.
if (WebRtcNetEQ_SetExtraDelay(inst_[slave_idx], extra_delay_) < 0) {
LogError("SetExtraDelay", slave_idx);
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
"AddSlave: AddSlave Failed, Could not set delay");
return -1;
}
// Set AVT // Set AVT
if (WebRtcNetEQ_SetAVTPlayout(inst_[slave_idx], if (WebRtcNetEQ_SetAVTPlayout(inst_[slave_idx],
(avt_playout_) ? 1 : 0) < 0) { (avt_playout_) ? 1 : 0) < 0) {
@ -1093,8 +1067,13 @@ int16_t ACMNetEQ::AddSlave(const WebRtcNetEQDecoder* used_codecs,
"AddSlave: AddSlave Failed, Could not Set Playout Mode."); "AddSlave: AddSlave Failed, Could not Set Playout Mode.");
return -1; return -1;
} }
// Set AV-sync for the slave. // Set AV-sync for the slave.
WebRtcNetEQ_EnableAVSync(inst_[slave_idx], av_sync_ ? 1 : 0); WebRtcNetEQ_EnableAVSync(inst_[slave_idx], av_sync_ ? 1 : 0);
// Set minimum delay.
if (minimum_delay_ms_ > 0)
WebRtcNetEQ_SetMinimumDelay(inst_[slave_idx], minimum_delay_ms_);
} }
return 0; return 0;
@ -1119,4 +1098,23 @@ void ACMNetEQ::EnableAVSync(bool enable) {
} }
} }
int ACMNetEQ::SetMinimumDelay(int minimum_delay_ms) {
CriticalSectionScoped lock(neteq_crit_sect_);
for (int i = 0; i < num_slaves_ + 1; ++i) {
assert(is_initialized_[i]);
if (WebRtcNetEQ_SetMinimumDelay(inst_[i], minimum_delay_ms) < 0)
return -1;
}
minimum_delay_ms_ = minimum_delay_ms;
return 0;
}
int ACMNetEQ::LeastRequiredDelayMs() const {
CriticalSectionScoped lock(neteq_crit_sect_);
assert(is_initialized_[0]);
// Sufficient to query the master.
return WebRtcNetEQ_GetRequiredDelayMs(inst_[0]);
}
} // namespace webrtc } // namespace webrtc

View File

@ -129,18 +129,6 @@ class ACMNetEQ {
int32_t AllocatePacketBuffer(const WebRtcNetEQDecoder* used_codecs, int32_t AllocatePacketBuffer(const WebRtcNetEQDecoder* used_codecs,
int16_t num_codecs); int16_t num_codecs);
//
// SetExtraDelay()
// Sets a |delay_in_ms| milliseconds extra delay in NetEQ.
//
// Input:
// - delay_in_ms : Extra delay in milliseconds.
//
// Return value : 0 if ok.
// <0 if NetEQ returned an error.
//
int32_t SetExtraDelay(const int32_t delay_in_ms);
// //
// SetAVTPlayout() // SetAVTPlayout()
// Enable/disable playout of AVT payloads. // Enable/disable playout of AVT payloads.
@ -301,6 +289,20 @@ class ACMNetEQ {
// //
void EnableAVSync(bool enable); void EnableAVSync(bool enable);
//
// Set a minimum delay in NetEq. Unless channel condition dictates a longer
// delay, the given delay is maintained by NetEq.
//
int SetMinimumDelay(int minimum_delay_ms);
//
// The shortest latency, in milliseconds, required by jitter buffer. This
// is computed based on inter-arrival times and playout mode of NetEq. The
// actual delay is the maximum of least-required-delay and the minimum-delay
// specified by SetMinumumPlayoutDelay() API.
//
int LeastRequiredDelayMs() const ;
private: private:
// //
// RTPPack() // RTPPack()
@ -365,7 +367,6 @@ class ACMNetEQ {
bool received_stereo_; bool received_stereo_;
void* master_slave_info_; void* master_slave_info_;
AudioFrame::VADActivity previous_audio_activity_; AudioFrame::VADActivity previous_audio_activity_;
int32_t extra_delay_;
CriticalSectionWrapper* callback_crit_sect_; CriticalSectionWrapper* callback_crit_sect_;
// Minimum of "max number of packets," among all NetEq instances. // Minimum of "max number of packets," among all NetEq instances.
@ -376,6 +377,8 @@ class ACMNetEQ {
// Keep track of AV-sync. Just used to set the slave when a slave is added. // Keep track of AV-sync. Just used to set the slave when a slave is added.
bool av_sync_; bool av_sync_;
int minimum_delay_ms_;
}; };
} // namespace webrtc } // namespace webrtc

View File

@ -137,14 +137,15 @@
'../test/RTPFile.cc', '../test/RTPFile.cc',
'../test/SpatialAudio.cc', '../test/SpatialAudio.cc',
'../test/TestAllCodecs.cc', '../test/TestAllCodecs.cc',
'../test/target_delay_unittest.cc',
'../test/Tester.cc', '../test/Tester.cc',
'../test/TestFEC.cc', '../test/TestFEC.cc',
'../test/TestStereo.cc', '../test/TestStereo.cc',
'../test/TestVADDTX.cc', '../test/TestVADDTX.cc',
'../test/TimedTrace.cc', '../test/TimedTrace.cc',
'../test/TwoWayCommunication.cc', '../test/TwoWayCommunication.cc',
'../test/utility.cc',
'../test/initial_delay_unittest.cc', '../test/initial_delay_unittest.cc',
'../test/utility.cc',
], ],
}, },
{ {

View File

@ -2116,8 +2116,11 @@ int32_t AudioCodingModuleImpl::IncomingPacket(
if (av_sync_ || track_neteq_buffer_) { if (av_sync_ || track_neteq_buffer_) {
last_incoming_send_timestamp_ = rtp_info.header.timestamp; last_incoming_send_timestamp_ = rtp_info.header.timestamp;
first_payload_received_ = true;
} }
// Set the following regardless of tracking NetEq buffer or being in
// AV-sync mode.
first_payload_received_ = true;
} }
return 0; return 0;
} }
@ -2192,8 +2195,7 @@ int AudioCodingModuleImpl::InitStereoSlave() {
} }
// Minimum playout delay (Used for lip-sync). // Minimum playout delay (Used for lip-sync).
int32_t AudioCodingModuleImpl::SetMinimumPlayoutDelay( int AudioCodingModuleImpl::SetMinimumPlayoutDelay(int time_ms) {
const int32_t time_ms) {
if ((time_ms < 0) || (time_ms > 10000)) { if ((time_ms < 0) || (time_ms > 10000)) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
"Delay must be in the range of 0-10000 milliseconds."); "Delay must be in the range of 0-10000 milliseconds.");
@ -2205,7 +2207,7 @@ int32_t AudioCodingModuleImpl::SetMinimumPlayoutDelay(
if (track_neteq_buffer_ && first_payload_received_) if (track_neteq_buffer_ && first_payload_received_)
return 0; return 0;
} }
return neteq_.SetExtraDelay(time_ms); return neteq_.SetMinimumDelay(time_ms);
} }
// Get Dtmf playout status. // Get Dtmf playout status.
@ -2937,7 +2939,7 @@ int AudioCodingModuleImpl::SetInitialPlayoutDelay(int delay_ms) {
} }
av_sync_ = true; av_sync_ = true;
neteq_.EnableAVSync(av_sync_); neteq_.EnableAVSync(av_sync_);
return neteq_.SetExtraDelay(delay_ms); return neteq_.SetMinimumDelay(delay_ms);
} }
bool AudioCodingModuleImpl::GetSilence(int desired_sample_rate_hz, bool AudioCodingModuleImpl::GetSilence(int desired_sample_rate_hz,
@ -3041,4 +3043,8 @@ void AudioCodingModuleImpl::UpdateBufferingSafe(const WebRtcRTPHeader& rtp_info,
initial_delay_ms_ * in_sample_rate_khz)); initial_delay_ms_ * in_sample_rate_khz));
} }
int AudioCodingModuleImpl::LeastRequiredDelayMs() const {
return std::max(neteq_.LeastRequiredDelayMs(), initial_delay_ms_);
}
} // namespace webrtc } // namespace webrtc

View File

@ -167,8 +167,17 @@ class AudioCodingModuleImpl : public AudioCodingModule {
const uint8_t payload_type, const uint8_t payload_type,
const uint32_t timestamp = 0); const uint32_t timestamp = 0);
// Minimum playout delay (used for lip-sync). // NetEq minimum playout delay (used for lip-sync). The actual target delay
int32_t SetMinimumPlayoutDelay(const int32_t time_ms); // is the max of |time_ms| and the required delay dictated by the channel.
int SetMinimumPlayoutDelay(int time_ms);
//
// The shortest latency, in milliseconds, required by jitter buffer. This
// is computed based on inter-arrival times and playout mode of NetEq. The
// actual delay is the maximum of least-required-delay and the minimum-delay
// specified by SetMinumumPlayoutDelay() API.
//
int LeastRequiredDelayMs() const ;
// Configure Dtmf playout status i.e on/off playout the incoming outband Dtmf // Configure Dtmf playout status i.e on/off playout the incoming outband Dtmf
// tone. // tone.

View File

@ -0,0 +1,172 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "gtest/gtest.h"
#include "testsupport/fileutils.h"
#include "webrtc/common_types.h"
#include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/system_wrappers/interface/sleep.h"
namespace webrtc {
class TargetDelayTest : public ::testing::Test {
protected:
static const int kSampleRateHz = 16000;
static const int kNum10msPerFrame = 2;
static const int kFrameSizeSamples = 320; // 20 ms @ 16 kHz.
// payload-len = frame-samples * 2 bytes/sample.
static const int kPayloadLenBytes = 320 * 2;
// Inter-arrival time in number of packets in a jittery channel. One is no
// jitter.
static const int kInterarrivalJitterPacket = 2;
TargetDelayTest()
: acm_(AudioCodingModule::Create(0)) {}
~TargetDelayTest() {
AudioCodingModule::Destroy(acm_);
}
void SetUp() {
EXPECT_TRUE(acm_ != NULL);
CodecInst codec;
ASSERT_EQ(0, AudioCodingModule::Codec("L16", &codec, kSampleRateHz, 1));
ASSERT_EQ(0, acm_->InitializeReceiver());
ASSERT_EQ(0, acm_->RegisterReceiveCodec(codec));
rtp_info_.header.payloadType = codec.pltype;
rtp_info_.header.timestamp = 0;
rtp_info_.header.ssrc = 0x12345678;
rtp_info_.header.markerBit = false;
rtp_info_.header.sequenceNumber = 0;
rtp_info_.type.Audio.channel = 1;
rtp_info_.type.Audio.isCNG = false;
rtp_info_.frameType = kAudioFrameSpeech;
}
void Push() {
rtp_info_.header.timestamp += kFrameSizeSamples;
rtp_info_.header.sequenceNumber++;
uint8_t payload[kPayloadLenBytes]; // Doesn't need to be initialized.
ASSERT_EQ(0, acm_->IncomingPacket(payload, kFrameSizeSamples * 2,
rtp_info_));
}
// Pull audio equivalent to the amount of audio in one RTP packet.
void Pull() {
AudioFrame frame;
for (int k = 0; k < kNum10msPerFrame; ++k) { // Pull one frame.
ASSERT_EQ(0, acm_->PlayoutData10Ms(-1, &frame));
// Had to use ASSERT_TRUE, ASSERT_EQ generated error.
ASSERT_TRUE(kSampleRateHz == frame.sample_rate_hz_);
ASSERT_EQ(1, frame.num_channels_);
ASSERT_TRUE(kSampleRateHz / 100 == frame.samples_per_channel_);
}
}
void Run(bool clean) {
for (int n = 0; n < 10; ++n) {
for (int m = 0; m < 5; ++m) {
Push();
Pull();
}
if (!clean) {
for (int m = 0; m < 10; ++m) { // Long enough to trigger delay change.
Push();
for (int n = 0; n < kInterarrivalJitterPacket; ++n)
Pull();
}
}
}
}
int SetMinimumDelay(int delay_ms) {
return acm_->SetMinimumPlayoutDelay(delay_ms);
}
int GetCurrentOptimalDelayMs() {
ACMNetworkStatistics stats;
acm_->NetworkStatistics(&stats);
return stats.preferredBufferSize;
}
int RequiredDelay() {
return acm_->LeastRequiredDelayMs();
}
AudioCodingModule* acm_;
WebRtcRTPHeader rtp_info_;
};
TEST_F(TargetDelayTest, OutOfRangeInput) {
EXPECT_EQ(-1, SetMinimumDelay(-1));
EXPECT_EQ(-1, SetMinimumDelay(10001));
}
TEST_F(TargetDelayTest, NoTargetDelayBufferSizeChanges) {
for (int n = 0; n < 30; ++n) // Run enough iterations.
Run(true);
int clean_optimal_delay = GetCurrentOptimalDelayMs();
Run(false); // Run with jitter.
int jittery_optimal_delay = GetCurrentOptimalDelayMs();
EXPECT_GT(jittery_optimal_delay, clean_optimal_delay);
int required_delay = RequiredDelay();
EXPECT_GT(required_delay, 0);
EXPECT_NEAR(required_delay, jittery_optimal_delay, 1);
}
TEST_F(TargetDelayTest, WithTargetDelayBufferNotChanging) {
// A target delay that is one packet larger than jitter.
const int kTargetDelayMs = (kInterarrivalJitterPacket + 1) *
kNum10msPerFrame * 10;
ASSERT_EQ(0, SetMinimumDelay(kTargetDelayMs));
for (int n = 0; n < 30; ++n) // Run enough iterations to fill up the buffer.
Run(true);
int clean_optimal_delay = GetCurrentOptimalDelayMs();
EXPECT_EQ(kTargetDelayMs, clean_optimal_delay);
Run(false); // Run with jitter.
int jittery_optimal_delay = GetCurrentOptimalDelayMs();
EXPECT_EQ(jittery_optimal_delay, clean_optimal_delay);
}
TEST_F(TargetDelayTest, RequiredDelayAtCorrectRange) {
for (int n = 0; n < 30; ++n) // Run clean and store delay.
Run(true);
int clean_optimal_delay = GetCurrentOptimalDelayMs();
// A relatively large delay.
const int kTargetDelayMs = (kInterarrivalJitterPacket + 10) *
kNum10msPerFrame * 10;
ASSERT_EQ(0, SetMinimumDelay(kTargetDelayMs));
for (int n = 0; n < 300; ++n) // Run enough iterations to fill up the buffer.
Run(true);
Run(false); // Run with jitter.
int jittery_optimal_delay = GetCurrentOptimalDelayMs();
EXPECT_EQ(kTargetDelayMs, jittery_optimal_delay);
int required_delay = RequiredDelay();
// Checking |required_delay| is in correct range.
EXPECT_GT(required_delay, 0);
EXPECT_GT(jittery_optimal_delay, required_delay);
EXPECT_GT(required_delay, clean_optimal_delay);
// A tighter check for the value of |required_delay|.
// The jitter forces a delay of
// |kInterarrivalJitterPacket * kNum10msPerFrame * 10| milliseconds. So we
// expect |required_delay| be close to that.
EXPECT_NEAR(kInterarrivalJitterPacket * kNum10msPerFrame * 10,
required_delay, 1);
}
} // webrtc

View File

@ -216,6 +216,14 @@ int WebRtcNetEQ_UpdateIatStatistics(AutomodeInst_t *inst, int maxBufLen,
streamingMode); streamingMode);
if (tempvar > 0) if (tempvar > 0)
{ {
int high_lim_delay;
/* Convert the minimum delay from milliseconds to packets in Q8.
* |fsHz| is sampling rate in Hertz, and |inst->packetSpeechLenSamp|
* is the number of samples per packet (according to the last
* decoding).
*/
int32_t minimum_delay_q8 = ((inst->minimum_delay_ms *
(fsHz / 1000)) << 8) / inst->packetSpeechLenSamp;
inst->optBufLevel = tempvar; inst->optBufLevel = tempvar;
if (streamingMode != 0) if (streamingMode != 0)
@ -224,6 +232,13 @@ int WebRtcNetEQ_UpdateIatStatistics(AutomodeInst_t *inst, int maxBufLen,
inst->maxCSumIatQ8); inst->maxCSumIatQ8);
} }
/* The required delay. */
inst->required_delay_q8 = inst->optBufLevel;
// Maintain the target delay.
inst->optBufLevel = WEBRTC_SPL_MAX(inst->optBufLevel,
minimum_delay_q8);
/*********/ /*********/
/* Limit */ /* Limit */
/*********/ /*********/
@ -238,8 +253,12 @@ int WebRtcNetEQ_UpdateIatStatistics(AutomodeInst_t *inst, int maxBufLen,
maxBufLen = WEBRTC_SPL_LSHIFT_W32(maxBufLen, 8); /* shift to Q8 */ maxBufLen = WEBRTC_SPL_LSHIFT_W32(maxBufLen, 8); /* shift to Q8 */
/* Enforce upper limit; 75% of maxBufLen */ /* Enforce upper limit; 75% of maxBufLen */
/* 1/2 + 1/4 = 75% */
high_lim_delay = (maxBufLen >> 1) + (maxBufLen >> 2);
inst->optBufLevel = WEBRTC_SPL_MIN(inst->optBufLevel, inst->optBufLevel = WEBRTC_SPL_MIN(inst->optBufLevel,
(maxBufLen >> 1) + (maxBufLen >> 2) ); /* 1/2 + 1/4 = 75% */ high_lim_delay);
inst->required_delay_q8 = WEBRTC_SPL_MIN(inst->required_delay_q8,
high_lim_delay);
} }
else else
{ {
@ -700,6 +719,7 @@ int WebRtcNetEQ_ResetAutomode(AutomodeInst_t *inst, int maxBufLenPackets)
*/ */
inst->optBufLevel = WEBRTC_SPL_MIN(4, inst->optBufLevel = WEBRTC_SPL_MIN(4,
(maxBufLenPackets >> 1) + (maxBufLenPackets >> 1)); /* 75% of maxBufLenPackets */ (maxBufLenPackets >> 1) + (maxBufLenPackets >> 1)); /* 75% of maxBufLenPackets */
inst->required_delay_q8 = inst->optBufLevel;
inst->levelFiltFact = 253; inst->levelFiltFact = 253;
/* /*

View File

@ -89,6 +89,12 @@ typedef struct
reached 0 */ reached 0 */
int16_t extraDelayMs; /* extra delay for sync with video */ int16_t extraDelayMs; /* extra delay for sync with video */
int minimum_delay_ms; /* Desired delay, NetEq maintains this amount of
delay unless jitter statistics suggests a higher value. */
int required_delay_q8; /* Smallest delay required. This is computed
according to inter-arrival time and playout mode. It has the same unit
as |optBufLevel|. */
/* Peak-detection */ /* Peak-detection */
/* vector with the latest peak periods (peak spacing in samples) */ /* vector with the latest peak periods (peak spacing in samples) */
uint32_t peakPeriodSamp[NUM_PEAKS]; uint32_t peakPeriodSamp[NUM_PEAKS];

View File

@ -309,6 +309,19 @@ int WebRtcNetEQ_RecInSyncRTP(void* inst,
WebRtcNetEQ_RTPInfo* rtp_info, WebRtcNetEQ_RTPInfo* rtp_info,
uint32_t receive_timestamp); uint32_t receive_timestamp);
/*
* Set a minimum latency for the jitter buffer. The overall delay is the max of
* |minimum_delay_ms| and the latency that is internally computed based on the
* inter-arrival times.
*/
int WebRtcNetEQ_SetMinimumDelay(void *inst, int minimum_delay_ms);
/*
* Get the least required delay in milliseconds given inter-arrival times
* and playout mode.
*/
int WebRtcNetEQ_GetRequiredDelayMs(const void* inst);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -32,7 +32,9 @@ int WebRtcNetEQ_McuReset(MCUInst_t *inst)
inst->main_inst = NULL; inst->main_inst = NULL;
inst->one_desc = 0; inst->one_desc = 0;
inst->BufferStat_inst.Automode_inst.extraDelayMs = 0; inst->BufferStat_inst.Automode_inst.extraDelayMs = 0;
inst->BufferStat_inst.Automode_inst.minimum_delay_ms = 0;
inst->NetEqPlayoutMode = kPlayoutOn; inst->NetEqPlayoutMode = kPlayoutOn;
inst->av_sync = 0;
WebRtcNetEQ_DbReset(&inst->codec_DB_inst); WebRtcNetEQ_DbReset(&inst->codec_DB_inst);
memset(&inst->PayloadSplit_inst, 0, sizeof(SplitInfo_t)); memset(&inst->PayloadSplit_inst, 0, sizeof(SplitInfo_t));

View File

@ -437,6 +437,7 @@ int WebRtcNetEQ_Init(void *inst, uint16_t fs)
NetEqMainInst->MCUinst.first_packet = 1; NetEqMainInst->MCUinst.first_packet = 1;
NetEqMainInst->MCUinst.one_desc = 0; NetEqMainInst->MCUinst.one_desc = 0;
NetEqMainInst->MCUinst.BufferStat_inst.Automode_inst.extraDelayMs = 0; NetEqMainInst->MCUinst.BufferStat_inst.Automode_inst.extraDelayMs = 0;
NetEqMainInst->MCUinst.BufferStat_inst.Automode_inst.minimum_delay_ms = 0;
NetEqMainInst->MCUinst.NoOfExpandCalls = 0; NetEqMainInst->MCUinst.NoOfExpandCalls = 0;
NetEqMainInst->MCUinst.fs = fs; NetEqMainInst->MCUinst.fs = fs;
@ -529,6 +530,19 @@ int WebRtcNetEQ_SetExtraDelay(void *inst, int DelayInMs)
return (0); return (0);
} }
int WebRtcNetEQ_SetMinimumDelay(void *inst, int minimum_delay_ms) {
MainInst_t *NetEqMainInst = (MainInst_t*) inst;
if (NetEqMainInst == NULL)
return -1;
if (minimum_delay_ms < 0 || minimum_delay_ms > 10000) {
NetEqMainInst->ErrorCode = -FAULTY_DELAYVALUE;
return -1;
}
NetEqMainInst->MCUinst.BufferStat_inst.Automode_inst.minimum_delay_ms =
minimum_delay_ms;
return 0;
}
int WebRtcNetEQ_SetPlayoutMode(void *inst, enum WebRtcNetEQPlayoutMode playoutMode) int WebRtcNetEQ_SetPlayoutMode(void *inst, enum WebRtcNetEQPlayoutMode playoutMode)
{ {
MainInst_t *NetEqMainInst = (MainInst_t*) inst; MainInst_t *NetEqMainInst = (MainInst_t*) inst;
@ -1213,7 +1227,7 @@ int WebRtcNetEQ_GetNetworkStatistics(void *inst, WebRtcNetEQ_NetworkStatistics *
/* Get optimal buffer size */ /* Get optimal buffer size */
/***************************/ /***************************/
if (NetEqMainInst->MCUinst.fs != 0 && NetEqMainInst->MCUinst.fs <= WEBRTC_SPL_WORD16_MAX) if (NetEqMainInst->MCUinst.fs != 0)
{ {
/* preferredBufferSize = Bopt * packSizeSamples / (fs/1000) */ /* preferredBufferSize = Bopt * packSizeSamples / (fs/1000) */
stats->preferredBufferSize stats->preferredBufferSize
@ -1693,3 +1707,25 @@ int WebRtcNetEQ_RecInSyncRTP(void* inst, WebRtcNetEQ_RTPInfo* rtp_info,
} }
return SYNC_PAYLOAD_LEN_BYTES; return SYNC_PAYLOAD_LEN_BYTES;
} }
int WebRtcNetEQ_GetRequiredDelayMs(const void* inst) {
const MainInst_t* NetEqMainInst = (MainInst_t*)inst;
const AutomodeInst_t* auto_mode = (NetEqMainInst == NULL) ? NULL :
&NetEqMainInst->MCUinst.BufferStat_inst.Automode_inst;
/* Instance sanity */
if (NetEqMainInst == NULL || auto_mode == NULL)
return 0;
if (NetEqMainInst->MCUinst.fs == 0)
return 0; // Sampling rate not initialized.
/* |required_delay_q8| has the unit of packets in Q8 domain, therefore,
* the corresponding delay is
* required_delay_ms = (1000 * required_delay_q8 * samples_per_packet /
* sample_rate_hz) / 256;
*/
return (auto_mode->required_delay_q8 *
((auto_mode->packetSpeechLenSamp * 1000) / NetEqMainInst->MCUinst.fs) +
128) >> 8;
}

View File

@ -29,12 +29,14 @@ struct ViESyncDelay {
extra_video_delay_ms = 0; extra_video_delay_ms = 0;
last_video_delay_ms = 0; last_video_delay_ms = 0;
extra_audio_delay_ms = 0; extra_audio_delay_ms = 0;
last_audio_delay_ms = 0;
network_delay = 120; network_delay = 120;
} }
int extra_video_delay_ms; int extra_video_delay_ms;
int last_video_delay_ms; int last_video_delay_ms;
int extra_audio_delay_ms; int extra_audio_delay_ms;
int last_audio_delay_ms;
int network_delay; int network_delay;
}; };
@ -87,9 +89,9 @@ bool StreamSynchronization::ComputeRelativeDelay(
bool StreamSynchronization::ComputeDelays(int relative_delay_ms, bool StreamSynchronization::ComputeDelays(int relative_delay_ms,
int current_audio_delay_ms, int current_audio_delay_ms,
int* extra_audio_delay_ms, int* total_audio_delay_target_ms,
int* total_video_delay_target_ms) { int* total_video_delay_target_ms) {
assert(extra_audio_delay_ms && total_video_delay_target_ms); assert(total_audio_delay_target_ms && total_video_delay_target_ms);
int current_video_delay_ms = *total_video_delay_target_ms; int current_video_delay_ms = *total_video_delay_target_ms;
WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, video_channel_id_, WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, video_channel_id_,
@ -173,17 +175,26 @@ bool StreamSynchronization::ComputeDelays(int relative_delay_ms,
new_video_delay_ms = new_video_delay_ms =
std::min(new_video_delay_ms, base_target_delay_ms_ + kMaxDeltaDelayMs); std::min(new_video_delay_ms, base_target_delay_ms_ + kMaxDeltaDelayMs);
// Make sure that audio is never below our target. int new_audio_delay_ms;
channel_delay_->extra_audio_delay_ms = if (channel_delay_->extra_audio_delay_ms > base_target_delay_ms_) {
std::max(base_target_delay_ms_, channel_delay_->extra_audio_delay_ms); new_audio_delay_ms = channel_delay_->extra_audio_delay_ms;
} else {
// No change to the audio delay. We are changing video and we only
// allow to change one at the time.
new_audio_delay_ms = channel_delay_->last_audio_delay_ms;
}
// Make sure that we don't go below the extra audio delay.
new_audio_delay_ms = std::max(
new_audio_delay_ms, channel_delay_->extra_audio_delay_ms);
// Verify we don't go above the maximum allowed audio delay. // Verify we don't go above the maximum allowed audio delay.
channel_delay_->extra_audio_delay_ms = std::min( new_audio_delay_ms =
channel_delay_->extra_audio_delay_ms, std::min(new_audio_delay_ms, base_target_delay_ms_ + kMaxDeltaDelayMs);
base_target_delay_ms_ + kMaxDeltaDelayMs);
// Remember our last video delay. // Remember our last audio and video delays.
channel_delay_->last_video_delay_ms = new_video_delay_ms; channel_delay_->last_video_delay_ms = new_video_delay_ms;
channel_delay_->last_audio_delay_ms = new_audio_delay_ms;
WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, video_channel_id_, WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, video_channel_id_,
"Sync video delay %d ms for video channel and audio delay %d for audio " "Sync video delay %d ms for video channel and audio delay %d for audio "
@ -192,8 +203,8 @@ bool StreamSynchronization::ComputeDelays(int relative_delay_ms,
audio_channel_id_); audio_channel_id_);
// Return values. // Return values.
*extra_audio_delay_ms = channel_delay_->extra_audio_delay_ms;
*total_video_delay_target_ms = new_video_delay_ms; *total_video_delay_target_ms = new_video_delay_ms;
*total_audio_delay_target_ms = new_audio_delay_ms;
return true; return true;
} }
@ -201,6 +212,8 @@ void StreamSynchronization::SetTargetBufferingDelay(int target_delay_ms) {
// Initial extra delay for audio (accounting for existing extra delay). // Initial extra delay for audio (accounting for existing extra delay).
channel_delay_->extra_audio_delay_ms += channel_delay_->extra_audio_delay_ms +=
target_delay_ms - base_target_delay_ms_; target_delay_ms - base_target_delay_ms_;
channel_delay_->last_audio_delay_ms +=
target_delay_ms - base_target_delay_ms_;
// The video delay is compared to the last value (and how much we can update // The video delay is compared to the last value (and how much we can update
// is limited by that as well). // is limited by that as well).

View File

@ -153,21 +153,24 @@ int32_t ViESyncModule::Process() {
TRACE_COUNTER1("webrtc", "SyncCurrentAudioDelay", TRACE_COUNTER1("webrtc", "SyncCurrentAudioDelay",
audio_jitter_buffer_delay_ms); audio_jitter_buffer_delay_ms);
TRACE_COUNTER1("webrtc", "SyncRelativeDelay", relative_delay_ms); TRACE_COUNTER1("webrtc", "SyncRelativeDelay", relative_delay_ms);
int extra_audio_delay_ms = 0; int total_audio_delay_target_ms = 0;
// Calculate the necessary extra audio delay and desired total video // Calculate the necessary extra audio delay and desired total video
// delay to get the streams in sync. // delay to get the streams in sync.
int current_audio_delay = audio_jitter_buffer_delay_ms +
playout_buffer_delay_ms;
if (!sync_->ComputeDelays(relative_delay_ms, if (!sync_->ComputeDelays(relative_delay_ms,
audio_jitter_buffer_delay_ms, current_audio_delay,
&extra_audio_delay_ms, &total_audio_delay_target_ms,
&total_video_delay_target_ms)) { &total_video_delay_target_ms)) {
return 0; return 0;
} }
TRACE_COUNTER1("webrtc", "SyncExtraAudioDelayTarget", extra_audio_delay_ms); TRACE_COUNTER1("webrtc", "SyncTotalAudioDelayTarget",
total_audio_delay_target_ms);
TRACE_COUNTER1("webrtc", "SyncTotalVideoDelayTarget", TRACE_COUNTER1("webrtc", "SyncTotalVideoDelayTarget",
total_video_delay_target_ms); total_video_delay_target_ms);
if (voe_sync_interface_->SetMinimumPlayoutDelay( if (voe_sync_interface_->SetMinimumPlayoutDelay(
voe_channel_id_, extra_audio_delay_ms) == -1) { voe_channel_id_, total_audio_delay_target_ms) == -1) {
WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceVideo, vie_channel_->Id(), WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceVideo, vie_channel_->Id(),
"Error setting voice delay"); "Error setting voice delay");
} }

View File

@ -950,6 +950,7 @@ Channel::Channel(int32_t channelId,
_countDeadDetections(0), _countDeadDetections(0),
_outputSpeechType(AudioFrame::kNormalSpeech), _outputSpeechType(AudioFrame::kNormalSpeech),
_average_jitter_buffer_delay_us(0), _average_jitter_buffer_delay_us(0),
least_required_delay_ms_(0),
_previousTimestamp(0), _previousTimestamp(0),
_recPacketDelayMs(20), _recPacketDelayMs(20),
_RxVadDetection(false), _RxVadDetection(false),
@ -5092,6 +5093,9 @@ void Channel::UpdatePacketDelay(uint32_t rtp_timestamp,
return; return;
} }
// Update the least required delay.
least_required_delay_ms_ = _audioCodingModule.LeastRequiredDelayMs();
if (STR_CASE_CMP("G722", current_receive_codec.plname) == 0) { if (STR_CASE_CMP("G722", current_receive_codec.plname) == 0) {
// Even though the actual sampling rate for G.722 audio is // Even though the actual sampling rate for G.722 audio is
// 16,000 Hz, the RTP clock rate for the G722 payload format is // 16,000 Hz, the RTP clock rate for the G722 payload format is

View File

@ -205,6 +205,7 @@ public:
// VoEVideoSync // VoEVideoSync
bool GetDelayEstimate(int* jitter_buffer_delay_ms, bool GetDelayEstimate(int* jitter_buffer_delay_ms,
int* playout_buffer_delay_ms) const; int* playout_buffer_delay_ms) const;
int least_required_delay_ms() const { return least_required_delay_ms_; }
int SetInitialPlayoutDelay(int delay_ms); int SetInitialPlayoutDelay(int delay_ms);
int SetMinimumPlayoutDelay(int delayMs); int SetMinimumPlayoutDelay(int delayMs);
int GetPlayoutTimestamp(unsigned int& timestamp); int GetPlayoutTimestamp(unsigned int& timestamp);
@ -536,6 +537,7 @@ private:
AudioFrame::SpeechType _outputSpeechType; AudioFrame::SpeechType _outputSpeechType;
// VoEVideoSync // VoEVideoSync
uint32_t _average_jitter_buffer_delay_us; uint32_t _average_jitter_buffer_delay_us;
int least_required_delay_ms_;
uint32_t _previousTimestamp; uint32_t _previousTimestamp;
uint16_t _recPacketDelayMs; uint16_t _recPacketDelayMs;
// VoEAudioProcessing // VoEAudioProcessing

View File

@ -57,11 +57,18 @@ public:
// Gets the current sound card buffer size (playout delay). // Gets the current sound card buffer size (playout delay).
virtual int GetPlayoutBufferSize(int& buffer_ms) = 0; virtual int GetPlayoutBufferSize(int& buffer_ms) = 0;
// Sets an additional delay for the playout jitter buffer. // Sets a minimum target delay for the jitter buffer. This delay is
// maintained by the jitter buffer, unless channel condition (jitter in
// inter-arrival times) dictates a higher required delay. The overall
// jitter buffer delay is max of |delay_ms| and the latency that NetEq
// computes based on inter-arrival times and its playout mode.
virtual int SetMinimumPlayoutDelay(int channel, int delay_ms) = 0; virtual int SetMinimumPlayoutDelay(int channel, int delay_ms) = 0;
// Sets an initial delay for the playout jitter buffer. The playout of the // Sets an initial delay for the playout jitter buffer. The playout of the
// audio is delayed by |delay_ms| in millisecond. // audio is delayed by |delay_ms| in milliseconds. Thereafter, the delay is
// maintained, unless NetEq's internal mechanism requires a higher latency.
// Such a latency is computed based on inter-arrival times and NetEq's
// playout mode.
virtual int SetInitialPlayoutDelay(int channel, int delay_ms) = 0; virtual int SetInitialPlayoutDelay(int channel, int delay_ms) = 0;
// Gets the |jitter_buffer_delay_ms| (including the algorithmic delay), and // Gets the |jitter_buffer_delay_ms| (including the algorithmic delay), and
@ -70,6 +77,12 @@ public:
int* jitter_buffer_delay_ms, int* jitter_buffer_delay_ms,
int* playout_buffer_delay_ms) = 0; int* playout_buffer_delay_ms) = 0;
// Returns the least required jitter buffer delay. This is computed by the
// the jitter buffer based on the inter-arrival time of RTP packets and
// playout mode. NetEq maintains this latency unless a higher value is
// requested by calling SetMinimumPlayoutDelay().
virtual int GetLeastRequiredDelayMs(int channel) const = 0;
// Manual initialization of the RTP timestamp. // Manual initialization of the RTP timestamp.
virtual int SetInitTimestamp(int channel, unsigned int timestamp) = 0; virtual int SetInitTimestamp(int channel, unsigned int timestamp) = 0;

View File

@ -237,6 +237,24 @@ int VoEVideoSyncImpl::GetRtpRtcp(int channel, RtpRtcp* &rtpRtcpModule)
return channelPtr->GetRtpRtcp(rtpRtcpModule); return channelPtr->GetRtpRtcp(rtpRtcpModule);
} }
int VoEVideoSyncImpl::GetLeastRequiredDelayMs(int channel) const {
WEBRTC_TRACE(kTraceApiCall, kTraceVoice, VoEId(_shared->instance_id(), -1),
"GetLeastRequiredDelayMS(channel=%d)", channel);
IPHONE_NOT_SUPPORTED(_shared->statistics());
if (!_shared->statistics().Initialized()) {
_shared->SetLastError(VE_NOT_INITED, kTraceError);
return -1;
}
voe::ScopedChannel sc(_shared->channel_manager(), channel);
voe::Channel* channel_ptr = sc.ChannelPtr();
if (channel_ptr == NULL) {
_shared->SetLastError(VE_CHANNEL_NOT_VALID, kTraceError,
"GetLeastRequiredDelayMs() failed to locate channel");
return -1;
}
return channel_ptr->least_required_delay_ms();
}
#endif // #ifdef WEBRTC_VOICE_ENGINE_VIDEO_SYNC_API #endif // #ifdef WEBRTC_VOICE_ENGINE_VIDEO_SYNC_API

View File

@ -30,6 +30,8 @@ public:
int* jitter_buffer_delay_ms, int* jitter_buffer_delay_ms,
int* playout_buffer_delay_ms); int* playout_buffer_delay_ms);
virtual int GetLeastRequiredDelayMs(int channel) const;
virtual int SetInitTimestamp(int channel, unsigned int timestamp); virtual int SetInitTimestamp(int channel, unsigned int timestamp);
virtual int SetInitSequenceNumber(int channel, short sequenceNumber); virtual int SetInitSequenceNumber(int channel, short sequenceNumber);