Add interface to propagate audio capture timestamp to the renderer.

BUG=3111 R=andrew@webrtc.org, turaj@webrtc.org, xians@webrtc.org Review URL: https://webrtc-codereview.appspot.com/12239004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@6189 4adac7df-926f-26a2-2b94-8c16560cd09d
2014-05-19 17:39:11 +00:00 · 2014-05-19 17:39:11 +00:00 · cb711f77d2
commit cb711f77d2
parent ebb467fdc8
15 changed files with 133 additions and 23 deletions
--- a/talk/app/webrtc/test/fakeaudiocapturemodule.cc
+++ b/talk/app/webrtc/test/fakeaudiocapturemodule.cc
@ -728,11 +728,22 @@ void FakeAudioCaptureModule::ReceiveFrameP() {
    }
    ResetRecBuffer();
    uint32_t nSamplesOut = 0;
+#ifdef USE_WEBRTC_DEV_BRANCH
+    uint32_t rtp_timestamp = 0;
+    int64_t ntp_time_ms = 0;
+    if (audio_callback_->NeedMorePlayData(kNumberSamples, kNumberBytesPerSample,
+                                         kNumberOfChannels, kSamplesPerSecond,
+                                         rec_buffer_, nSamplesOut,
+                                         &rtp_timestamp, &ntp_time_ms) != 0) {
+      ASSERT(false);
+    }
+#else
    if (audio_callback_->NeedMorePlayData(kNumberSamples, kNumberBytesPerSample,
                                         kNumberOfChannels, kSamplesPerSecond,
                                         rec_buffer_, nSamplesOut) != 0) {
      ASSERT(false);
    }
+#endif
    ASSERT(nSamplesOut == kNumberSamples);
  }
  // The SetBuffer() function ensures that after decoding, the audio buffer
--- a/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc
+++ b/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc
@ -84,13 +84,23 @@ class FakeAdmTest : public testing::Test,
                                   const uint8_t nChannels,
                                   const uint32_t samplesPerSec,
                                   void* audioSamples,
+#ifdef USE_WEBRTC_DEV_BRANCH
+                                   uint32_t& nSamplesOut,
+                                   uint32_t* rtp_timestamp,
+                                   int64_t* ntp_time_ms) {
+#else
                                   uint32_t& nSamplesOut) {
+#endif
    ++pull_iterations_;
    const uint32_t audio_buffer_size = nSamples * nBytesPerSample;
    const uint32_t bytes_out = RecordedDataReceived() ?
        CopyFromRecBuffer(audioSamples, audio_buffer_size):
        GenerateZeroBuffer(audioSamples, audio_buffer_size);
    nSamplesOut = bytes_out / nBytesPerSample;
+#ifdef USE_WEBRTC_DEV_BRANCH
+    *rtp_timestamp = 0;
+    *ntp_time_ms = 0;
+#endif
    return 0;
  }

--- a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
+++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
@ -473,6 +473,12 @@ int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
  SetAudioFrameActivityAndType(vad_enabled_, type, audio_frame);
  previous_audio_activity_ = audio_frame->vad_activity_;
  call_stats_.DecodedByNetEq(audio_frame->speech_type_);
+
+  // Computes the RTP timestamp of the first sample in |audio_frame| from
+  // |PlayoutTimestamp|, which is the timestamp of the last sample of
+  // |audio_frame|.
+  audio_frame->timestamp_ =
+      PlayoutTimestamp() - audio_frame->samples_per_channel_;
  return 0;
 }

--- a/webrtc/modules/audio_device/audio_device_buffer.cc
+++ b/webrtc/modules/audio_device/audio_device_buffer.cc
@ -548,13 +548,16 @@ int32_t AudioDeviceBuffer::RequestPlayoutData(uint32_t nSamples)
    if (_ptrCbAudioTransport)
    {
        uint32_t res(0);
-
+        uint32_t rtp_timestamp = 0;
+        int64_t ntp_time_ms = 0;
        res = _ptrCbAudioTransport->NeedMorePlayData(_playSamples,
                                                     playBytesPerSample,
                                                     playChannels,
                                                     playSampleRate,
                                                     &_playBuffer[0],
-                                                     nSamplesOut);
+                                                     nSamplesOut,
+                                                     &rtp_timestamp,
+                                                     &ntp_time_ms);
        if (res != 0)
        {
            WEBRTC_TRACE(kTraceError, kTraceAudioDevice, _id, "NeedMorePlayData() failed");
--- a/webrtc/modules/audio_device/include/audio_device_defines.h
+++ b/webrtc/modules/audio_device/include/audio_device_defines.h
@ -63,14 +63,16 @@ public:
                                            const int32_t clockDrift,
                                            const uint32_t currentMicLevel,
                                            const bool keyPressed,
-                                            uint32_t& newMicLevel) = 0;   
+                                            uint32_t& newMicLevel) = 0;

    virtual int32_t NeedMorePlayData(const uint32_t nSamples,
                                     const uint8_t nBytesPerSample,
                                     const uint8_t nChannels,
                                     const uint32_t samplesPerSec,
                                     void* audioSamples,
-                                     uint32_t& nSamplesOut) = 0;
+                                     uint32_t& nSamplesOut,
+                                     uint32_t* rtp_timestamp,
+                                     int64_t* ntp_time_ms) = 0;

    // Method to pass captured data directly and unmixed to network channels.
    // |channel_ids| contains a list of VoE channels which are the
@ -125,7 +127,9 @@ public:
    // channel.
    virtual void PullRenderData(int bits_per_sample, int sample_rate,
                                int number_of_channels, int number_of_frames,
-                                void* audio_data) {}
+                                void* audio_data,
+                                uint32_t* rtp_timestamp,
+                                int64_t* ntp_time_ms) {}

 protected:
    virtual ~AudioTransport() {}
--- a/webrtc/modules/audio_device/test/audio_device_test_api.cc
+++ b/webrtc/modules/audio_device/test/audio_device_test_api.cc
@ -116,7 +116,9 @@ class AudioTransportAPI: public AudioTransport {
      const uint8_t nChannels,
      const uint32_t sampleRate,
      void* audioSamples,
-      uint32_t& nSamplesOut) {
+      uint32_t& nSamplesOut,
+      uint32_t* rtp_timestamp,
+      int64_t* ntp_time_ms) {
    play_count_++;
    if (play_count_ % 100 == 0) {
      if (nChannels == 1) {
@ -149,7 +151,9 @@ class AudioTransportAPI: public AudioTransport {

  virtual void PullRenderData(int bits_per_sample, int sample_rate,
                              int number_of_channels, int number_of_frames,
-                              void* audio_data) {}
+                              void* audio_data,
+                              uint32_t* rtp_timestamp,
+                              int64_t* ntp_time_ms) {}
 private:
  uint32_t rec_count_;
  uint32_t play_count_;
--- a/webrtc/modules/audio_device/test/func_test_manager.cc
+++ b/webrtc/modules/audio_device/test/func_test_manager.cc
@ -292,7 +292,9 @@ int32_t AudioTransportImpl::NeedMorePlayData(
    const uint8_t nChannels,
    const uint32_t samplesPerSec,
    void* audioSamples,
-    uint32_t& nSamplesOut)
+    uint32_t& nSamplesOut,
+    uint32_t* rtp_timestamp,
+    int64_t* ntp_time_ms)
 {
    if (_fullDuplex)
    {
@ -551,7 +553,9 @@ void AudioTransportImpl::PushCaptureData(int voe_channel,
 void AudioTransportImpl::PullRenderData(int bits_per_sample, int sample_rate,
                                        int number_of_channels,
                                        int number_of_frames,
-                                        void* audio_data) {}
+                                        void* audio_data,
+                                        uint32_t* rtp_timestamp,
+                                        int64_t* ntp_time_ms) {}

 FuncTestManager::FuncTestManager() :
    _processThread(NULL),
--- a/webrtc/modules/audio_device/test/func_test_manager.h
+++ b/webrtc/modules/audio_device/test/func_test_manager.h
@ -118,7 +118,9 @@ public:
                                     const uint8_t nChannels,
                                     const uint32_t samplesPerSec,
                                     void* audioSamples,
-                                     uint32_t& nSamplesOut);
+                                     uint32_t& nSamplesOut,
+                                     uint32_t* rtp_timestamp,
+                                     int64_t* ntp_time_ms);

    virtual int OnDataAvailable(const int voe_channels[],
                                int number_of_voe_channels,
@ -138,7 +140,9 @@ public:

    virtual void PullRenderData(int bits_per_sample, int sample_rate,
                                int number_of_channels, int number_of_frames,
-                                void* audio_data);
+                                void* audio_data,
+                                uint32_t* rtp_timestamp,
+                                int64_t* ntp_time_ms);

    AudioTransportImpl(AudioDeviceModule* audioDevice);
    ~AudioTransportImpl();
--- a/webrtc/modules/interface/module_common_types.h
+++ b/webrtc/modules/interface/module_common_types.h
@ -684,7 +684,10 @@ class AudioFrame {
  AudioFrame& operator-=(const AudioFrame& rhs);

  int id_;
+  // RTP timestamp of the first sample in the AudioFrame.
  uint32_t timestamp_;
+  // NTP time of the estimated capture time in local timebase in milliseconds.
+  int64_t ntp_time_ms_;
  int16_t data_[kMaxDataSizeSamples];
  int samples_per_channel_;
  int sample_rate_hz_;
@ -705,6 +708,7 @@ class AudioFrame {
 inline AudioFrame::AudioFrame()
    : id_(-1),
      timestamp_(0),
+      ntp_time_ms_(0),
      data_(),
      samples_per_channel_(0),
      sample_rate_hz_(0),
--- a/webrtc/test/fake_audio_device.cc
+++ b/webrtc/test/fake_audio_device.cc
@ -121,13 +121,17 @@ void FakeAudioDevice::CaptureAudio() {
        samples_needed = std::min(kFrequencyHz / time_since_last_playout_ms,
                                  kBufferSizeBytes / 2);
      uint32_t samples_out = 0;
+      uint32_t rtp_timestamp = 0;
+      int64_t ntp_time_ms = 0;
      EXPECT_EQ(0,
                audio_callback_->NeedMorePlayData(samples_needed,
                                                  2,
                                                  1,
                                                  kFrequencyHz,
                                                  playout_buffer_,
-                                                  samples_out));
+                                                  samples_out,
+                                                  &rtp_timestamp,
+                                                  &ntp_time_ms));
    }
  }
  tick_->Wait(WEBRTC_EVENT_INFINITE);
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@ -664,6 +664,25 @@ int32_t Channel::GetAudioFrame(int32_t id, AudioFrame& audioFrame)
    // Measure audio level (0-9)
    _outputAudioLevel.ComputeLevel(audioFrame);

+    // TODO(wu): Calculate capture NTP time based on RTP timestamp and RTCP SR.
+    audioFrame.ntp_time_ms_ = 0;
+
+    if (!first_frame_arrived_) {
+      first_frame_arrived_ = true;
+      capture_start_rtp_time_stamp_ = audioFrame.timestamp_;
+    } else {
+      // |ntp_time_ms_| won't be valid until at least 2 RTCP SRs are received.
+      if (audioFrame.ntp_time_ms_ > 0) {
+        // Compute |capture_start_ntp_time_ms_| so that
+        // |capture_start_ntp_time_ms_| + |elapsed_time_ms| == |ntp_time_ms_|
+        CriticalSectionScoped lock(ts_stats_lock_.get());
+        uint32_t elapsed_time_ms =
+            (audioFrame.timestamp_ - capture_start_rtp_time_stamp_) /
+            (audioFrame.sample_rate_hz_ * 1000);
+        capture_start_ntp_time_ms_ = audioFrame.ntp_time_ms_ - elapsed_time_ms;
+      }
+    }
+
    return 0;
 }

@ -836,6 +855,10 @@ Channel::Channel(int32_t channelId,
    playout_delay_ms_(0),
    _numberOfDiscardedPackets(0),
    send_sequence_number_(0),
+    ts_stats_lock_(CriticalSectionWrapper::CreateCriticalSection()),
+    first_frame_arrived_(false),
+    capture_start_rtp_time_stamp_(0),
+    capture_start_ntp_time_ms_(-1),
    _engineStatisticsPtr(NULL),
    _outputMixerPtr(NULL),
    _transmitMixerPtr(NULL),
@ -3371,7 +3394,7 @@ int Channel::GetRemoteRTCPReportBlocks(
 int
 Channel::GetRTPStatistics(CallStatistics& stats)
 {
-    // --- Part one of the final structure (four values)
+    // --- RtcpStatistics

    // The jitter statistics is updated for each received RTP packet and is
    // based on received packets.
@ -3398,7 +3421,7 @@ Channel::GetRTPStatistics(CallStatistics& stats)
                 stats.fractionLost, stats.cumulativeLost, stats.extendedMax,
                 stats.jitterSamples);

-    // --- Part two of the final structure (one value)
+    // --- RTT

    uint16_t RTT(0);
    RTCPMethod method = _rtpRtcpModule->RTCP();
@ -3441,7 +3464,7 @@ Channel::GetRTPStatistics(CallStatistics& stats)
                 VoEId(_instanceId, _channelId),
                 "GetRTPStatistics() => rttMs=%d", stats.rttMs);

-    // --- Part three of the final structure (four values)
+    // --- Data counters

    uint32_t bytesSent(0);
    uint32_t packetsSent(0);
@ -3473,6 +3496,11 @@ Channel::GetRTPStatistics(CallStatistics& stats)
                 stats.bytesSent, stats.packetsSent, stats.bytesReceived,
                 stats.packetsReceived);

+    // --- Timestamps
+    {
+      CriticalSectionScoped lock(ts_stats_lock_.get());
+      stats.capture_start_ntp_time_ms_ = capture_start_ntp_time_ms_;
+    }
    return 0;
 }

--- a/webrtc/voice_engine/channel.h
+++ b/webrtc/voice_engine/channel.h
@ -540,6 +540,15 @@ private:
    uint16_t send_sequence_number_;
    uint8_t restored_packet_[kVoiceEngineMaxIpPacketSizeBytes];

+    scoped_ptr<CriticalSectionWrapper> ts_stats_lock_;
+
+    bool first_frame_arrived_;
+    // The rtp timestamp of the first played out audio frame.
+    uint32_t capture_start_rtp_time_stamp_;
+    // The capture ntp time (in local timebase) of the first played out audio
+    // frame.
+    int64_t capture_start_ntp_time_ms_;
+
    // uses
    Statistics* _engineStatisticsPtr;
    OutputMixer* _outputMixerPtr;
--- a/webrtc/voice_engine/include/voe_rtp_rtcp.h
+++ b/webrtc/voice_engine/include/voe_rtp_rtcp.h
@ -86,6 +86,9 @@ struct CallStatistics
    int packetsSent;
    int bytesReceived;
    int packetsReceived;
+    // The capture ntp time (in local timebase) of the first played out audio
+    // frame.
+    int64_t capture_start_ntp_time_ms_;
 };

 // See section 6.4.1 in http://www.ietf.org/rfc/rfc3550.txt for details.
--- a/webrtc/voice_engine/voe_base_impl.cc
+++ b/webrtc/voice_engine/voe_base_impl.cc
@ -148,7 +148,9 @@ int32_t VoEBaseImpl::NeedMorePlayData(
        uint8_t nChannels,
        uint32_t samplesPerSec,
        void* audioSamples,
-        uint32_t& nSamplesOut)
+        uint32_t& nSamplesOut,
+        uint32_t* rtp_timestamp,
+        int64_t* ntp_time_ms)
 {
  WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_shared->instance_id(), -1),
               "VoEBaseImpl::NeedMorePlayData(nSamples=%u, "
@ -157,7 +159,8 @@ int32_t VoEBaseImpl::NeedMorePlayData(

  GetPlayoutData(static_cast<int>(samplesPerSec),
                 static_cast<int>(nChannels),
-                 static_cast<int>(nSamples), true, audioSamples);
+                 static_cast<int>(nSamples), true, audioSamples,
+                 rtp_timestamp, ntp_time_ms);

  nSamplesOut = _audioFrame.samples_per_channel_;

@ -233,12 +236,14 @@ void VoEBaseImpl::PushCaptureData(int voe_channel, const void* audio_data,

 void VoEBaseImpl::PullRenderData(int bits_per_sample, int sample_rate,
                                 int number_of_channels, int number_of_frames,
-                                 void* audio_data) {
+                                 void* audio_data,
+                                 uint32_t* rtp_timestamp,
+                                 int64_t* ntp_time_ms) {
  assert(bits_per_sample == 16);
  assert(number_of_frames == static_cast<int>(sample_rate / 100));

  GetPlayoutData(sample_rate, number_of_channels, number_of_frames, false,
-                 audio_data);
+                 audio_data, rtp_timestamp, ntp_time_ms);
 }

 int VoEBaseImpl::RegisterVoiceEngineObserver(VoiceEngineObserver& observer)
@ -1081,7 +1086,9 @@ int VoEBaseImpl::ProcessRecordedDataWithAPM(

 void VoEBaseImpl::GetPlayoutData(int sample_rate, int number_of_channels,
                                 int number_of_frames, bool feed_data_to_apm,
-                                 void* audio_data) {
+                                 void* audio_data,
+                                 uint32_t* rtp_timestamp,
+                                 int64_t* ntp_time_ms) {
  assert(_shared->output_mixer() != NULL);

  // TODO(andrew): if the device is running in mono, we should tell the mixer
@ -1102,6 +1109,9 @@ void VoEBaseImpl::GetPlayoutData(int sample_rate, int number_of_channels,
  // Deliver audio (PCM) samples to the ADM
  memcpy(audio_data, _audioFrame.data_,
         sizeof(int16_t) * number_of_frames * number_of_channels);
+
+  *rtp_timestamp = _audioFrame.timestamp_;
+  *ntp_time_ms = _audioFrame.ntp_time_ms_;
 }

 }  // namespace webrtc
--- a/webrtc/voice_engine/voe_base_impl.h
+++ b/webrtc/voice_engine/voe_base_impl.h
@ -79,7 +79,9 @@ public:
                                     uint8_t nChannels,
                                     uint32_t samplesPerSec,
                                     void* audioSamples,
-                                     uint32_t& nSamplesOut);
+                                     uint32_t& nSamplesOut,
+                                     uint32_t* rtp_timestamp,
+                                     int64_t* ntp_time_ms);

    virtual int OnDataAvailable(const int voe_channels[],
                                int number_of_voe_channels,
@ -102,7 +104,9 @@ public:

    virtual void PullRenderData(int bits_per_sample, int sample_rate,
                                int number_of_channels, int number_of_frames,
-                                void* audio_data);
+                                void* audio_data,
+                                uint32_t* rtp_timestamp,
+                                int64_t* ntp_time_ms);

    // AudioDeviceObserver
    virtual void OnErrorIsReported(ErrorCode error);
@ -138,7 +142,9 @@ private:

    void GetPlayoutData(int sample_rate, int number_of_channels,
                        int number_of_frames, bool feed_data_to_apm,
-                        void* audio_data);
+                        void* audio_data,
+                        uint32_t* rtp_timestamp,
+                        int64_t* ntp_time_ms);

    int32_t AddBuildInfo(char* str) const;
    int32_t AddVoEVersion(char* str) const;