diff --git a/webrtc/modules/audio_coding/main/interface/audio_coding_module.h b/webrtc/modules/audio_coding/main/interface/audio_coding_module.h index a2e7efef4..c3bbc9b77 100644 --- a/webrtc/modules/audio_coding/main/interface/audio_coding_module.h +++ b/webrtc/modules/audio_coding/main/interface/audio_coding_module.h @@ -639,8 +639,9 @@ class AudioCodingModule: public Module { const uint32_t timestamp = 0) = 0; /////////////////////////////////////////////////////////////////////////// - // int32_t SetMinimumPlayoutDelay() - // Set Minimum playout delay, used for lip-sync. + // int SetMinimumPlayoutDelay() + // Set a minimum for the playout delay, used for lip-sync. NetEq maintains + // such a delay unless channel condition yields to a higher delay. // // Input: // -time_ms : minimum delay in milliseconds. @@ -649,7 +650,15 @@ class AudioCodingModule: public Module { // -1 if failed to set the delay, // 0 if the minimum delay is set. // - virtual int32_t SetMinimumPlayoutDelay(const int32_t time_ms) = 0; + virtual int SetMinimumPlayoutDelay(int time_ms) = 0; + + // + // The shortest latency, in milliseconds, required by jitter buffer. This + // is computed based on inter-arrival times and playout mode of NetEq. The + // actual delay is the maximum of least-required-delay and the minimum-delay + // specified by SetMinumumPlayoutDelay() API. + // + virtual int LeastRequiredDelayMs() const = 0; /////////////////////////////////////////////////////////////////////////// // int32_t RegisterIncomingMessagesCallback() @@ -945,8 +954,9 @@ class AudioCodingModule: public Module { // Set an initial delay for playout. // An initial delay yields ACM playout silence until equivalent of |delay_ms| // audio payload is accumulated in NetEq jitter. Thereafter, ACM pulls audio - // from NetEq in its regular fashion, and the given delay is maintained as - // "minimum playout delay." + // from NetEq in its regular fashion, and the given delay is maintained + // through out the call, unless channel conditions yield to a higher jitter + // buffer delay. // // Input: // -delay_ms : delay in milliseconds. diff --git a/webrtc/modules/audio_coding/main/source/acm_neteq.cc b/webrtc/modules/audio_coding/main/source/acm_neteq.cc index f6b64d710..f2eafd71b 100644 --- a/webrtc/modules/audio_coding/main/source/acm_neteq.cc +++ b/webrtc/modules/audio_coding/main/source/acm_neteq.cc @@ -44,12 +44,12 @@ ACMNetEQ::ACMNetEQ() received_stereo_(false), master_slave_info_(NULL), previous_audio_activity_(AudioFrame::kVadUnknown), - extra_delay_(0), callback_crit_sect_(CriticalSectionWrapper::CreateCriticalSection()), min_of_max_num_packets_(0), min_of_buffer_size_bytes_(0), per_packet_overhead_bytes_(0), - av_sync_(false) { + av_sync_(false), + minimum_delay_ms_(0) { for (int n = 0; n < MAX_NUM_SLAVE_NETEQ + 1; n++) { is_initialized_[n] = false; ptr_vadinst_[n] = NULL; @@ -270,24 +270,6 @@ int16_t ACMNetEQ::AllocatePacketBufferByIdxSafe( return 0; } -int32_t ACMNetEQ::SetExtraDelay(const int32_t delay_in_ms) { - CriticalSectionScoped lock(neteq_crit_sect_); - - for (int16_t idx = 0; idx < num_slaves_ + 1; idx++) { - if (!is_initialized_[idx]) { - WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, - "SetExtraDelay: NetEq is not initialized."); - return -1; - } - if (WebRtcNetEQ_SetExtraDelay(inst_[idx], delay_in_ms) < 0) { - LogError("SetExtraDelay", idx); - return -1; - } - } - extra_delay_ = delay_in_ms; - return 0; -} - int32_t ACMNetEQ::SetAVTPlayout(const bool enable) { CriticalSectionScoped lock(neteq_crit_sect_); if (avt_playout_ != enable) { @@ -1037,14 +1019,6 @@ int16_t ACMNetEQ::AddSlave(const WebRtcNetEQDecoder* used_codecs, num_slaves_ = 1; is_initialized_[slave_idx] = true; - // Set Slave delay as all other instances. - if (WebRtcNetEQ_SetExtraDelay(inst_[slave_idx], extra_delay_) < 0) { - LogError("SetExtraDelay", slave_idx); - WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, - "AddSlave: AddSlave Failed, Could not set delay"); - return -1; - } - // Set AVT if (WebRtcNetEQ_SetAVTPlayout(inst_[slave_idx], (avt_playout_) ? 1 : 0) < 0) { @@ -1093,8 +1067,13 @@ int16_t ACMNetEQ::AddSlave(const WebRtcNetEQDecoder* used_codecs, "AddSlave: AddSlave Failed, Could not Set Playout Mode."); return -1; } + // Set AV-sync for the slave. WebRtcNetEQ_EnableAVSync(inst_[slave_idx], av_sync_ ? 1 : 0); + + // Set minimum delay. + if (minimum_delay_ms_ > 0) + WebRtcNetEQ_SetMinimumDelay(inst_[slave_idx], minimum_delay_ms_); } return 0; @@ -1119,4 +1098,23 @@ void ACMNetEQ::EnableAVSync(bool enable) { } } +int ACMNetEQ::SetMinimumDelay(int minimum_delay_ms) { + CriticalSectionScoped lock(neteq_crit_sect_); + for (int i = 0; i < num_slaves_ + 1; ++i) { + assert(is_initialized_[i]); + if (WebRtcNetEQ_SetMinimumDelay(inst_[i], minimum_delay_ms) < 0) + return -1; + } + minimum_delay_ms_ = minimum_delay_ms; + return 0; +} + +int ACMNetEQ::LeastRequiredDelayMs() const { + CriticalSectionScoped lock(neteq_crit_sect_); + assert(is_initialized_[0]); + + // Sufficient to query the master. + return WebRtcNetEQ_GetRequiredDelayMs(inst_[0]); +} + } // namespace webrtc diff --git a/webrtc/modules/audio_coding/main/source/acm_neteq.h b/webrtc/modules/audio_coding/main/source/acm_neteq.h index ed815443b..e70ac246d 100644 --- a/webrtc/modules/audio_coding/main/source/acm_neteq.h +++ b/webrtc/modules/audio_coding/main/source/acm_neteq.h @@ -129,18 +129,6 @@ class ACMNetEQ { int32_t AllocatePacketBuffer(const WebRtcNetEQDecoder* used_codecs, int16_t num_codecs); - // - // SetExtraDelay() - // Sets a |delay_in_ms| milliseconds extra delay in NetEQ. - // - // Input: - // - delay_in_ms : Extra delay in milliseconds. - // - // Return value : 0 if ok. - // <0 if NetEQ returned an error. - // - int32_t SetExtraDelay(const int32_t delay_in_ms); - // // SetAVTPlayout() // Enable/disable playout of AVT payloads. @@ -301,6 +289,20 @@ class ACMNetEQ { // void EnableAVSync(bool enable); + // + // Set a minimum delay in NetEq. Unless channel condition dictates a longer + // delay, the given delay is maintained by NetEq. + // + int SetMinimumDelay(int minimum_delay_ms); + + // + // The shortest latency, in milliseconds, required by jitter buffer. This + // is computed based on inter-arrival times and playout mode of NetEq. The + // actual delay is the maximum of least-required-delay and the minimum-delay + // specified by SetMinumumPlayoutDelay() API. + // + int LeastRequiredDelayMs() const ; + private: // // RTPPack() @@ -365,7 +367,6 @@ class ACMNetEQ { bool received_stereo_; void* master_slave_info_; AudioFrame::VADActivity previous_audio_activity_; - int32_t extra_delay_; CriticalSectionWrapper* callback_crit_sect_; // Minimum of "max number of packets," among all NetEq instances. @@ -376,6 +377,8 @@ class ACMNetEQ { // Keep track of AV-sync. Just used to set the slave when a slave is added. bool av_sync_; + + int minimum_delay_ms_; }; } // namespace webrtc diff --git a/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi b/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi index 753291ba3..e6ba5009b 100644 --- a/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi +++ b/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi @@ -137,14 +137,15 @@ '../test/RTPFile.cc', '../test/SpatialAudio.cc', '../test/TestAllCodecs.cc', + '../test/target_delay_unittest.cc', '../test/Tester.cc', '../test/TestFEC.cc', '../test/TestStereo.cc', '../test/TestVADDTX.cc', '../test/TimedTrace.cc', '../test/TwoWayCommunication.cc', - '../test/utility.cc', '../test/initial_delay_unittest.cc', + '../test/utility.cc', ], }, { diff --git a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc index be9befc4c..5eb631af0 100644 --- a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc +++ b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc @@ -2116,8 +2116,11 @@ int32_t AudioCodingModuleImpl::IncomingPacket( if (av_sync_ || track_neteq_buffer_) { last_incoming_send_timestamp_ = rtp_info.header.timestamp; - first_payload_received_ = true; } + + // Set the following regardless of tracking NetEq buffer or being in + // AV-sync mode. + first_payload_received_ = true; } return 0; } @@ -2192,8 +2195,7 @@ int AudioCodingModuleImpl::InitStereoSlave() { } // Minimum playout delay (Used for lip-sync). -int32_t AudioCodingModuleImpl::SetMinimumPlayoutDelay( - const int32_t time_ms) { +int AudioCodingModuleImpl::SetMinimumPlayoutDelay(int time_ms) { if ((time_ms < 0) || (time_ms > 10000)) { WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, "Delay must be in the range of 0-10000 milliseconds."); @@ -2205,7 +2207,7 @@ int32_t AudioCodingModuleImpl::SetMinimumPlayoutDelay( if (track_neteq_buffer_ && first_payload_received_) return 0; } - return neteq_.SetExtraDelay(time_ms); + return neteq_.SetMinimumDelay(time_ms); } // Get Dtmf playout status. @@ -2937,7 +2939,7 @@ int AudioCodingModuleImpl::SetInitialPlayoutDelay(int delay_ms) { } av_sync_ = true; neteq_.EnableAVSync(av_sync_); - return neteq_.SetExtraDelay(delay_ms); + return neteq_.SetMinimumDelay(delay_ms); } bool AudioCodingModuleImpl::GetSilence(int desired_sample_rate_hz, @@ -3041,4 +3043,8 @@ void AudioCodingModuleImpl::UpdateBufferingSafe(const WebRtcRTPHeader& rtp_info, initial_delay_ms_ * in_sample_rate_khz)); } +int AudioCodingModuleImpl::LeastRequiredDelayMs() const { + return std::max(neteq_.LeastRequiredDelayMs(), initial_delay_ms_); +} + } // namespace webrtc diff --git a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h index fe1564dd3..a0ae01434 100644 --- a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h +++ b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h @@ -167,8 +167,17 @@ class AudioCodingModuleImpl : public AudioCodingModule { const uint8_t payload_type, const uint32_t timestamp = 0); - // Minimum playout delay (used for lip-sync). - int32_t SetMinimumPlayoutDelay(const int32_t time_ms); + // NetEq minimum playout delay (used for lip-sync). The actual target delay + // is the max of |time_ms| and the required delay dictated by the channel. + int SetMinimumPlayoutDelay(int time_ms); + + // + // The shortest latency, in milliseconds, required by jitter buffer. This + // is computed based on inter-arrival times and playout mode of NetEq. The + // actual delay is the maximum of least-required-delay and the minimum-delay + // specified by SetMinumumPlayoutDelay() API. + // + int LeastRequiredDelayMs() const ; // Configure Dtmf playout status i.e on/off playout the incoming outband Dtmf // tone. diff --git a/webrtc/modules/audio_coding/main/test/target_delay_unittest.cc b/webrtc/modules/audio_coding/main/test/target_delay_unittest.cc new file mode 100644 index 000000000..0ae25294c --- /dev/null +++ b/webrtc/modules/audio_coding/main/test/target_delay_unittest.cc @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "gtest/gtest.h" +#include "testsupport/fileutils.h" +#include "webrtc/common_types.h" +#include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h" +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/system_wrappers/interface/sleep.h" + +namespace webrtc { +class TargetDelayTest : public ::testing::Test { + protected: + static const int kSampleRateHz = 16000; + static const int kNum10msPerFrame = 2; + static const int kFrameSizeSamples = 320; // 20 ms @ 16 kHz. + // payload-len = frame-samples * 2 bytes/sample. + static const int kPayloadLenBytes = 320 * 2; + // Inter-arrival time in number of packets in a jittery channel. One is no + // jitter. + static const int kInterarrivalJitterPacket = 2; + + TargetDelayTest() + : acm_(AudioCodingModule::Create(0)) {} + + ~TargetDelayTest() { + AudioCodingModule::Destroy(acm_); + } + + void SetUp() { + EXPECT_TRUE(acm_ != NULL); + + CodecInst codec; + ASSERT_EQ(0, AudioCodingModule::Codec("L16", &codec, kSampleRateHz, 1)); + ASSERT_EQ(0, acm_->InitializeReceiver()); + ASSERT_EQ(0, acm_->RegisterReceiveCodec(codec)); + + rtp_info_.header.payloadType = codec.pltype; + rtp_info_.header.timestamp = 0; + rtp_info_.header.ssrc = 0x12345678; + rtp_info_.header.markerBit = false; + rtp_info_.header.sequenceNumber = 0; + rtp_info_.type.Audio.channel = 1; + rtp_info_.type.Audio.isCNG = false; + rtp_info_.frameType = kAudioFrameSpeech; + } + + void Push() { + rtp_info_.header.timestamp += kFrameSizeSamples; + rtp_info_.header.sequenceNumber++; + uint8_t payload[kPayloadLenBytes]; // Doesn't need to be initialized. + ASSERT_EQ(0, acm_->IncomingPacket(payload, kFrameSizeSamples * 2, + rtp_info_)); + } + + // Pull audio equivalent to the amount of audio in one RTP packet. + void Pull() { + AudioFrame frame; + for (int k = 0; k < kNum10msPerFrame; ++k) { // Pull one frame. + ASSERT_EQ(0, acm_->PlayoutData10Ms(-1, &frame)); + // Had to use ASSERT_TRUE, ASSERT_EQ generated error. + ASSERT_TRUE(kSampleRateHz == frame.sample_rate_hz_); + ASSERT_EQ(1, frame.num_channels_); + ASSERT_TRUE(kSampleRateHz / 100 == frame.samples_per_channel_); + } + } + + void Run(bool clean) { + for (int n = 0; n < 10; ++n) { + for (int m = 0; m < 5; ++m) { + Push(); + Pull(); + } + + if (!clean) { + for (int m = 0; m < 10; ++m) { // Long enough to trigger delay change. + Push(); + for (int n = 0; n < kInterarrivalJitterPacket; ++n) + Pull(); + } + } + } + } + + int SetMinimumDelay(int delay_ms) { + return acm_->SetMinimumPlayoutDelay(delay_ms); + } + + int GetCurrentOptimalDelayMs() { + ACMNetworkStatistics stats; + acm_->NetworkStatistics(&stats); + return stats.preferredBufferSize; + } + + int RequiredDelay() { + return acm_->LeastRequiredDelayMs(); + } + + AudioCodingModule* acm_; + WebRtcRTPHeader rtp_info_; +}; + +TEST_F(TargetDelayTest, OutOfRangeInput) { + EXPECT_EQ(-1, SetMinimumDelay(-1)); + EXPECT_EQ(-1, SetMinimumDelay(10001)); +} + +TEST_F(TargetDelayTest, NoTargetDelayBufferSizeChanges) { + for (int n = 0; n < 30; ++n) // Run enough iterations. + Run(true); + int clean_optimal_delay = GetCurrentOptimalDelayMs(); + Run(false); // Run with jitter. + int jittery_optimal_delay = GetCurrentOptimalDelayMs(); + EXPECT_GT(jittery_optimal_delay, clean_optimal_delay); + int required_delay = RequiredDelay(); + EXPECT_GT(required_delay, 0); + EXPECT_NEAR(required_delay, jittery_optimal_delay, 1); +} + +TEST_F(TargetDelayTest, WithTargetDelayBufferNotChanging) { + // A target delay that is one packet larger than jitter. + const int kTargetDelayMs = (kInterarrivalJitterPacket + 1) * + kNum10msPerFrame * 10; + ASSERT_EQ(0, SetMinimumDelay(kTargetDelayMs)); + for (int n = 0; n < 30; ++n) // Run enough iterations to fill up the buffer. + Run(true); + int clean_optimal_delay = GetCurrentOptimalDelayMs(); + EXPECT_EQ(kTargetDelayMs, clean_optimal_delay); + Run(false); // Run with jitter. + int jittery_optimal_delay = GetCurrentOptimalDelayMs(); + EXPECT_EQ(jittery_optimal_delay, clean_optimal_delay); +} + +TEST_F(TargetDelayTest, RequiredDelayAtCorrectRange) { + for (int n = 0; n < 30; ++n) // Run clean and store delay. + Run(true); + int clean_optimal_delay = GetCurrentOptimalDelayMs(); + + // A relatively large delay. + const int kTargetDelayMs = (kInterarrivalJitterPacket + 10) * + kNum10msPerFrame * 10; + ASSERT_EQ(0, SetMinimumDelay(kTargetDelayMs)); + for (int n = 0; n < 300; ++n) // Run enough iterations to fill up the buffer. + Run(true); + Run(false); // Run with jitter. + + int jittery_optimal_delay = GetCurrentOptimalDelayMs(); + EXPECT_EQ(kTargetDelayMs, jittery_optimal_delay); + + int required_delay = RequiredDelay(); + + // Checking |required_delay| is in correct range. + EXPECT_GT(required_delay, 0); + EXPECT_GT(jittery_optimal_delay, required_delay); + EXPECT_GT(required_delay, clean_optimal_delay); + + // A tighter check for the value of |required_delay|. + // The jitter forces a delay of + // |kInterarrivalJitterPacket * kNum10msPerFrame * 10| milliseconds. So we + // expect |required_delay| be close to that. + EXPECT_NEAR(kInterarrivalJitterPacket * kNum10msPerFrame * 10, + required_delay, 1); +} + +} // webrtc diff --git a/webrtc/modules/audio_coding/neteq/automode.c b/webrtc/modules/audio_coding/neteq/automode.c index edee98e84..ea6fa8d76 100644 --- a/webrtc/modules/audio_coding/neteq/automode.c +++ b/webrtc/modules/audio_coding/neteq/automode.c @@ -216,6 +216,14 @@ int WebRtcNetEQ_UpdateIatStatistics(AutomodeInst_t *inst, int maxBufLen, streamingMode); if (tempvar > 0) { + int high_lim_delay; + /* Convert the minimum delay from milliseconds to packets in Q8. + * |fsHz| is sampling rate in Hertz, and |inst->packetSpeechLenSamp| + * is the number of samples per packet (according to the last + * decoding). + */ + int32_t minimum_delay_q8 = ((inst->minimum_delay_ms * + (fsHz / 1000)) << 8) / inst->packetSpeechLenSamp; inst->optBufLevel = tempvar; if (streamingMode != 0) @@ -224,6 +232,13 @@ int WebRtcNetEQ_UpdateIatStatistics(AutomodeInst_t *inst, int maxBufLen, inst->maxCSumIatQ8); } + /* The required delay. */ + inst->required_delay_q8 = inst->optBufLevel; + + // Maintain the target delay. + inst->optBufLevel = WEBRTC_SPL_MAX(inst->optBufLevel, + minimum_delay_q8); + /*********/ /* Limit */ /*********/ @@ -238,8 +253,12 @@ int WebRtcNetEQ_UpdateIatStatistics(AutomodeInst_t *inst, int maxBufLen, maxBufLen = WEBRTC_SPL_LSHIFT_W32(maxBufLen, 8); /* shift to Q8 */ /* Enforce upper limit; 75% of maxBufLen */ - inst->optBufLevel = WEBRTC_SPL_MIN( inst->optBufLevel, - (maxBufLen >> 1) + (maxBufLen >> 2) ); /* 1/2 + 1/4 = 75% */ + /* 1/2 + 1/4 = 75% */ + high_lim_delay = (maxBufLen >> 1) + (maxBufLen >> 2); + inst->optBufLevel = WEBRTC_SPL_MIN(inst->optBufLevel, + high_lim_delay); + inst->required_delay_q8 = WEBRTC_SPL_MIN(inst->required_delay_q8, + high_lim_delay); } else { @@ -700,6 +719,7 @@ int WebRtcNetEQ_ResetAutomode(AutomodeInst_t *inst, int maxBufLenPackets) */ inst->optBufLevel = WEBRTC_SPL_MIN(4, (maxBufLenPackets >> 1) + (maxBufLenPackets >> 1)); /* 75% of maxBufLenPackets */ + inst->required_delay_q8 = inst->optBufLevel; inst->levelFiltFact = 253; /* diff --git a/webrtc/modules/audio_coding/neteq/automode.h b/webrtc/modules/audio_coding/neteq/automode.h index 5996a5120..49878c08a 100644 --- a/webrtc/modules/audio_coding/neteq/automode.h +++ b/webrtc/modules/audio_coding/neteq/automode.h @@ -89,6 +89,12 @@ typedef struct reached 0 */ int16_t extraDelayMs; /* extra delay for sync with video */ + int minimum_delay_ms; /* Desired delay, NetEq maintains this amount of + delay unless jitter statistics suggests a higher value. */ + int required_delay_q8; /* Smallest delay required. This is computed + according to inter-arrival time and playout mode. It has the same unit + as |optBufLevel|. */ + /* Peak-detection */ /* vector with the latest peak periods (peak spacing in samples) */ uint32_t peakPeriodSamp[NUM_PEAKS]; diff --git a/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq_internal.h b/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq_internal.h index 4eefce069..021704c23 100644 --- a/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq_internal.h +++ b/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq_internal.h @@ -309,6 +309,19 @@ int WebRtcNetEQ_RecInSyncRTP(void* inst, WebRtcNetEQ_RTPInfo* rtp_info, uint32_t receive_timestamp); +/* + * Set a minimum latency for the jitter buffer. The overall delay is the max of + * |minimum_delay_ms| and the latency that is internally computed based on the + * inter-arrival times. + */ +int WebRtcNetEQ_SetMinimumDelay(void *inst, int minimum_delay_ms); + +/* + * Get the least required delay in milliseconds given inter-arrival times + * and playout mode. + */ +int WebRtcNetEQ_GetRequiredDelayMs(const void* inst); + #ifdef __cplusplus } #endif diff --git a/webrtc/modules/audio_coding/neteq/mcu_reset.c b/webrtc/modules/audio_coding/neteq/mcu_reset.c index 3aae4ce61..c8a4cd73a 100644 --- a/webrtc/modules/audio_coding/neteq/mcu_reset.c +++ b/webrtc/modules/audio_coding/neteq/mcu_reset.c @@ -32,7 +32,9 @@ int WebRtcNetEQ_McuReset(MCUInst_t *inst) inst->main_inst = NULL; inst->one_desc = 0; inst->BufferStat_inst.Automode_inst.extraDelayMs = 0; + inst->BufferStat_inst.Automode_inst.minimum_delay_ms = 0; inst->NetEqPlayoutMode = kPlayoutOn; + inst->av_sync = 0; WebRtcNetEQ_DbReset(&inst->codec_DB_inst); memset(&inst->PayloadSplit_inst, 0, sizeof(SplitInfo_t)); diff --git a/webrtc/modules/audio_coding/neteq/webrtc_neteq.c b/webrtc/modules/audio_coding/neteq/webrtc_neteq.c index 31940c890..83479252f 100644 --- a/webrtc/modules/audio_coding/neteq/webrtc_neteq.c +++ b/webrtc/modules/audio_coding/neteq/webrtc_neteq.c @@ -437,6 +437,7 @@ int WebRtcNetEQ_Init(void *inst, uint16_t fs) NetEqMainInst->MCUinst.first_packet = 1; NetEqMainInst->MCUinst.one_desc = 0; NetEqMainInst->MCUinst.BufferStat_inst.Automode_inst.extraDelayMs = 0; + NetEqMainInst->MCUinst.BufferStat_inst.Automode_inst.minimum_delay_ms = 0; NetEqMainInst->MCUinst.NoOfExpandCalls = 0; NetEqMainInst->MCUinst.fs = fs; @@ -529,6 +530,19 @@ int WebRtcNetEQ_SetExtraDelay(void *inst, int DelayInMs) return (0); } +int WebRtcNetEQ_SetMinimumDelay(void *inst, int minimum_delay_ms) { + MainInst_t *NetEqMainInst = (MainInst_t*) inst; + if (NetEqMainInst == NULL) + return -1; + if (minimum_delay_ms < 0 || minimum_delay_ms > 10000) { + NetEqMainInst->ErrorCode = -FAULTY_DELAYVALUE; + return -1; + } + NetEqMainInst->MCUinst.BufferStat_inst.Automode_inst.minimum_delay_ms = + minimum_delay_ms; + return 0; +} + int WebRtcNetEQ_SetPlayoutMode(void *inst, enum WebRtcNetEQPlayoutMode playoutMode) { MainInst_t *NetEqMainInst = (MainInst_t*) inst; @@ -1213,7 +1227,7 @@ int WebRtcNetEQ_GetNetworkStatistics(void *inst, WebRtcNetEQ_NetworkStatistics * /* Get optimal buffer size */ /***************************/ - if (NetEqMainInst->MCUinst.fs != 0 && NetEqMainInst->MCUinst.fs <= WEBRTC_SPL_WORD16_MAX) + if (NetEqMainInst->MCUinst.fs != 0) { /* preferredBufferSize = Bopt * packSizeSamples / (fs/1000) */ stats->preferredBufferSize @@ -1693,3 +1707,25 @@ int WebRtcNetEQ_RecInSyncRTP(void* inst, WebRtcNetEQ_RTPInfo* rtp_info, } return SYNC_PAYLOAD_LEN_BYTES; } + +int WebRtcNetEQ_GetRequiredDelayMs(const void* inst) { + const MainInst_t* NetEqMainInst = (MainInst_t*)inst; + const AutomodeInst_t* auto_mode = (NetEqMainInst == NULL) ? NULL : + &NetEqMainInst->MCUinst.BufferStat_inst.Automode_inst; + + /* Instance sanity */ + if (NetEqMainInst == NULL || auto_mode == NULL) + return 0; + + if (NetEqMainInst->MCUinst.fs == 0) + return 0; // Sampling rate not initialized. + + /* |required_delay_q8| has the unit of packets in Q8 domain, therefore, + * the corresponding delay is + * required_delay_ms = (1000 * required_delay_q8 * samples_per_packet / + * sample_rate_hz) / 256; + */ + return (auto_mode->required_delay_q8 * + ((auto_mode->packetSpeechLenSamp * 1000) / NetEqMainInst->MCUinst.fs) + + 128) >> 8; +} diff --git a/webrtc/video_engine/stream_synchronization.cc b/webrtc/video_engine/stream_synchronization.cc index 6ad579ce6..9490d101f 100644 --- a/webrtc/video_engine/stream_synchronization.cc +++ b/webrtc/video_engine/stream_synchronization.cc @@ -29,12 +29,14 @@ struct ViESyncDelay { extra_video_delay_ms = 0; last_video_delay_ms = 0; extra_audio_delay_ms = 0; + last_audio_delay_ms = 0; network_delay = 120; } int extra_video_delay_ms; int last_video_delay_ms; int extra_audio_delay_ms; + int last_audio_delay_ms; int network_delay; }; @@ -87,9 +89,9 @@ bool StreamSynchronization::ComputeRelativeDelay( bool StreamSynchronization::ComputeDelays(int relative_delay_ms, int current_audio_delay_ms, - int* extra_audio_delay_ms, + int* total_audio_delay_target_ms, int* total_video_delay_target_ms) { - assert(extra_audio_delay_ms && total_video_delay_target_ms); + assert(total_audio_delay_target_ms && total_video_delay_target_ms); int current_video_delay_ms = *total_video_delay_target_ms; WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, video_channel_id_, @@ -173,17 +175,26 @@ bool StreamSynchronization::ComputeDelays(int relative_delay_ms, new_video_delay_ms = std::min(new_video_delay_ms, base_target_delay_ms_ + kMaxDeltaDelayMs); - // Make sure that audio is never below our target. - channel_delay_->extra_audio_delay_ms = - std::max(base_target_delay_ms_, channel_delay_->extra_audio_delay_ms); + int new_audio_delay_ms; + if (channel_delay_->extra_audio_delay_ms > base_target_delay_ms_) { + new_audio_delay_ms = channel_delay_->extra_audio_delay_ms; + } else { + // No change to the audio delay. We are changing video and we only + // allow to change one at the time. + new_audio_delay_ms = channel_delay_->last_audio_delay_ms; + } + + // Make sure that we don't go below the extra audio delay. + new_audio_delay_ms = std::max( + new_audio_delay_ms, channel_delay_->extra_audio_delay_ms); // Verify we don't go above the maximum allowed audio delay. - channel_delay_->extra_audio_delay_ms = std::min( - channel_delay_->extra_audio_delay_ms, - base_target_delay_ms_ + kMaxDeltaDelayMs); + new_audio_delay_ms = + std::min(new_audio_delay_ms, base_target_delay_ms_ + kMaxDeltaDelayMs); - // Remember our last video delay. + // Remember our last audio and video delays. channel_delay_->last_video_delay_ms = new_video_delay_ms; + channel_delay_->last_audio_delay_ms = new_audio_delay_ms; WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, video_channel_id_, "Sync video delay %d ms for video channel and audio delay %d for audio " @@ -192,8 +203,8 @@ bool StreamSynchronization::ComputeDelays(int relative_delay_ms, audio_channel_id_); // Return values. - *extra_audio_delay_ms = channel_delay_->extra_audio_delay_ms; *total_video_delay_target_ms = new_video_delay_ms; + *total_audio_delay_target_ms = new_audio_delay_ms; return true; } @@ -201,6 +212,8 @@ void StreamSynchronization::SetTargetBufferingDelay(int target_delay_ms) { // Initial extra delay for audio (accounting for existing extra delay). channel_delay_->extra_audio_delay_ms += target_delay_ms - base_target_delay_ms_; + channel_delay_->last_audio_delay_ms += + target_delay_ms - base_target_delay_ms_; // The video delay is compared to the last value (and how much we can update // is limited by that as well). diff --git a/webrtc/video_engine/vie_sync_module.cc b/webrtc/video_engine/vie_sync_module.cc index d0617d6a0..06d41965d 100644 --- a/webrtc/video_engine/vie_sync_module.cc +++ b/webrtc/video_engine/vie_sync_module.cc @@ -153,21 +153,24 @@ int32_t ViESyncModule::Process() { TRACE_COUNTER1("webrtc", "SyncCurrentAudioDelay", audio_jitter_buffer_delay_ms); TRACE_COUNTER1("webrtc", "SyncRelativeDelay", relative_delay_ms); - int extra_audio_delay_ms = 0; + int total_audio_delay_target_ms = 0; // Calculate the necessary extra audio delay and desired total video // delay to get the streams in sync. + int current_audio_delay = audio_jitter_buffer_delay_ms + + playout_buffer_delay_ms; if (!sync_->ComputeDelays(relative_delay_ms, - audio_jitter_buffer_delay_ms, - &extra_audio_delay_ms, + current_audio_delay, + &total_audio_delay_target_ms, &total_video_delay_target_ms)) { return 0; } - TRACE_COUNTER1("webrtc", "SyncExtraAudioDelayTarget", extra_audio_delay_ms); + TRACE_COUNTER1("webrtc", "SyncTotalAudioDelayTarget", + total_audio_delay_target_ms); TRACE_COUNTER1("webrtc", "SyncTotalVideoDelayTarget", total_video_delay_target_ms); if (voe_sync_interface_->SetMinimumPlayoutDelay( - voe_channel_id_, extra_audio_delay_ms) == -1) { + voe_channel_id_, total_audio_delay_target_ms) == -1) { WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceVideo, vie_channel_->Id(), "Error setting voice delay"); } diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc index 0728990c3..936ddd188 100644 --- a/webrtc/voice_engine/channel.cc +++ b/webrtc/voice_engine/channel.cc @@ -950,6 +950,7 @@ Channel::Channel(int32_t channelId, _countDeadDetections(0), _outputSpeechType(AudioFrame::kNormalSpeech), _average_jitter_buffer_delay_us(0), + least_required_delay_ms_(0), _previousTimestamp(0), _recPacketDelayMs(20), _RxVadDetection(false), @@ -5092,6 +5093,9 @@ void Channel::UpdatePacketDelay(uint32_t rtp_timestamp, return; } + // Update the least required delay. + least_required_delay_ms_ = _audioCodingModule.LeastRequiredDelayMs(); + if (STR_CASE_CMP("G722", current_receive_codec.plname) == 0) { // Even though the actual sampling rate for G.722 audio is // 16,000 Hz, the RTP clock rate for the G722 payload format is diff --git a/webrtc/voice_engine/channel.h b/webrtc/voice_engine/channel.h index 391415620..1bf5e5124 100644 --- a/webrtc/voice_engine/channel.h +++ b/webrtc/voice_engine/channel.h @@ -205,6 +205,7 @@ public: // VoEVideoSync bool GetDelayEstimate(int* jitter_buffer_delay_ms, int* playout_buffer_delay_ms) const; + int least_required_delay_ms() const { return least_required_delay_ms_; } int SetInitialPlayoutDelay(int delay_ms); int SetMinimumPlayoutDelay(int delayMs); int GetPlayoutTimestamp(unsigned int& timestamp); @@ -536,6 +537,7 @@ private: AudioFrame::SpeechType _outputSpeechType; // VoEVideoSync uint32_t _average_jitter_buffer_delay_us; + int least_required_delay_ms_; uint32_t _previousTimestamp; uint16_t _recPacketDelayMs; // VoEAudioProcessing diff --git a/webrtc/voice_engine/include/voe_video_sync.h b/webrtc/voice_engine/include/voe_video_sync.h index 857422ee7..a3770eabb 100644 --- a/webrtc/voice_engine/include/voe_video_sync.h +++ b/webrtc/voice_engine/include/voe_video_sync.h @@ -57,11 +57,18 @@ public: // Gets the current sound card buffer size (playout delay). virtual int GetPlayoutBufferSize(int& buffer_ms) = 0; - // Sets an additional delay for the playout jitter buffer. + // Sets a minimum target delay for the jitter buffer. This delay is + // maintained by the jitter buffer, unless channel condition (jitter in + // inter-arrival times) dictates a higher required delay. The overall + // jitter buffer delay is max of |delay_ms| and the latency that NetEq + // computes based on inter-arrival times and its playout mode. virtual int SetMinimumPlayoutDelay(int channel, int delay_ms) = 0; // Sets an initial delay for the playout jitter buffer. The playout of the - // audio is delayed by |delay_ms| in millisecond. + // audio is delayed by |delay_ms| in milliseconds. Thereafter, the delay is + // maintained, unless NetEq's internal mechanism requires a higher latency. + // Such a latency is computed based on inter-arrival times and NetEq's + // playout mode. virtual int SetInitialPlayoutDelay(int channel, int delay_ms) = 0; // Gets the |jitter_buffer_delay_ms| (including the algorithmic delay), and @@ -70,6 +77,12 @@ public: int* jitter_buffer_delay_ms, int* playout_buffer_delay_ms) = 0; + // Returns the least required jitter buffer delay. This is computed by the + // the jitter buffer based on the inter-arrival time of RTP packets and + // playout mode. NetEq maintains this latency unless a higher value is + // requested by calling SetMinimumPlayoutDelay(). + virtual int GetLeastRequiredDelayMs(int channel) const = 0; + // Manual initialization of the RTP timestamp. virtual int SetInitTimestamp(int channel, unsigned int timestamp) = 0; diff --git a/webrtc/voice_engine/voe_video_sync_impl.cc b/webrtc/voice_engine/voe_video_sync_impl.cc index 8db2e688f..91c075035 100644 --- a/webrtc/voice_engine/voe_video_sync_impl.cc +++ b/webrtc/voice_engine/voe_video_sync_impl.cc @@ -237,6 +237,24 @@ int VoEVideoSyncImpl::GetRtpRtcp(int channel, RtpRtcp* &rtpRtcpModule) return channelPtr->GetRtpRtcp(rtpRtcpModule); } +int VoEVideoSyncImpl::GetLeastRequiredDelayMs(int channel) const { + WEBRTC_TRACE(kTraceApiCall, kTraceVoice, VoEId(_shared->instance_id(), -1), + "GetLeastRequiredDelayMS(channel=%d)", channel); + IPHONE_NOT_SUPPORTED(_shared->statistics()); + + if (!_shared->statistics().Initialized()) { + _shared->SetLastError(VE_NOT_INITED, kTraceError); + return -1; + } + voe::ScopedChannel sc(_shared->channel_manager(), channel); + voe::Channel* channel_ptr = sc.ChannelPtr(); + if (channel_ptr == NULL) { + _shared->SetLastError(VE_CHANNEL_NOT_VALID, kTraceError, + "GetLeastRequiredDelayMs() failed to locate channel"); + return -1; + } + return channel_ptr->least_required_delay_ms(); +} #endif // #ifdef WEBRTC_VOICE_ENGINE_VIDEO_SYNC_API diff --git a/webrtc/voice_engine/voe_video_sync_impl.h b/webrtc/voice_engine/voe_video_sync_impl.h index fafefd106..932c8cd62 100644 --- a/webrtc/voice_engine/voe_video_sync_impl.h +++ b/webrtc/voice_engine/voe_video_sync_impl.h @@ -30,6 +30,8 @@ public: int* jitter_buffer_delay_ms, int* playout_buffer_delay_ms); + virtual int GetLeastRequiredDelayMs(int channel) const; + virtual int SetInitTimestamp(int channel, unsigned int timestamp); virtual int SetInitSequenceNumber(int channel, short sequenceNumber);