Add interface to propagate audio capture timestamp to the renderer.

BUG=3111
R=andrew@webrtc.org, turaj@webrtc.org, xians@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/12239004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@6189 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
wu@webrtc.org 2014-05-19 17:39:11 +00:00
parent ebb467fdc8
commit cb711f77d2
15 changed files with 133 additions and 23 deletions

View File

@ -728,11 +728,22 @@ void FakeAudioCaptureModule::ReceiveFrameP() {
}
ResetRecBuffer();
uint32_t nSamplesOut = 0;
#ifdef USE_WEBRTC_DEV_BRANCH
uint32_t rtp_timestamp = 0;
int64_t ntp_time_ms = 0;
if (audio_callback_->NeedMorePlayData(kNumberSamples, kNumberBytesPerSample,
kNumberOfChannels, kSamplesPerSecond,
rec_buffer_, nSamplesOut,
&rtp_timestamp, &ntp_time_ms) != 0) {
ASSERT(false);
}
#else
if (audio_callback_->NeedMorePlayData(kNumberSamples, kNumberBytesPerSample,
kNumberOfChannels, kSamplesPerSecond,
rec_buffer_, nSamplesOut) != 0) {
ASSERT(false);
}
#endif
ASSERT(nSamplesOut == kNumberSamples);
}
// The SetBuffer() function ensures that after decoding, the audio buffer

View File

@ -84,13 +84,23 @@ class FakeAdmTest : public testing::Test,
const uint8_t nChannels,
const uint32_t samplesPerSec,
void* audioSamples,
#ifdef USE_WEBRTC_DEV_BRANCH
uint32_t& nSamplesOut,
uint32_t* rtp_timestamp,
int64_t* ntp_time_ms) {
#else
uint32_t& nSamplesOut) {
#endif
++pull_iterations_;
const uint32_t audio_buffer_size = nSamples * nBytesPerSample;
const uint32_t bytes_out = RecordedDataReceived() ?
CopyFromRecBuffer(audioSamples, audio_buffer_size):
GenerateZeroBuffer(audioSamples, audio_buffer_size);
nSamplesOut = bytes_out / nBytesPerSample;
#ifdef USE_WEBRTC_DEV_BRANCH
*rtp_timestamp = 0;
*ntp_time_ms = 0;
#endif
return 0;
}

View File

@ -473,6 +473,12 @@ int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
SetAudioFrameActivityAndType(vad_enabled_, type, audio_frame);
previous_audio_activity_ = audio_frame->vad_activity_;
call_stats_.DecodedByNetEq(audio_frame->speech_type_);
// Computes the RTP timestamp of the first sample in |audio_frame| from
// |PlayoutTimestamp|, which is the timestamp of the last sample of
// |audio_frame|.
audio_frame->timestamp_ =
PlayoutTimestamp() - audio_frame->samples_per_channel_;
return 0;
}

View File

@ -548,13 +548,16 @@ int32_t AudioDeviceBuffer::RequestPlayoutData(uint32_t nSamples)
if (_ptrCbAudioTransport)
{
uint32_t res(0);
uint32_t rtp_timestamp = 0;
int64_t ntp_time_ms = 0;
res = _ptrCbAudioTransport->NeedMorePlayData(_playSamples,
playBytesPerSample,
playChannels,
playSampleRate,
&_playBuffer[0],
nSamplesOut);
nSamplesOut,
&rtp_timestamp,
&ntp_time_ms);
if (res != 0)
{
WEBRTC_TRACE(kTraceError, kTraceAudioDevice, _id, "NeedMorePlayData() failed");

View File

@ -63,14 +63,16 @@ public:
const int32_t clockDrift,
const uint32_t currentMicLevel,
const bool keyPressed,
uint32_t& newMicLevel) = 0;
uint32_t& newMicLevel) = 0;
virtual int32_t NeedMorePlayData(const uint32_t nSamples,
const uint8_t nBytesPerSample,
const uint8_t nChannels,
const uint32_t samplesPerSec,
void* audioSamples,
uint32_t& nSamplesOut) = 0;
uint32_t& nSamplesOut,
uint32_t* rtp_timestamp,
int64_t* ntp_time_ms) = 0;
// Method to pass captured data directly and unmixed to network channels.
// |channel_ids| contains a list of VoE channels which are the
@ -125,7 +127,9 @@ public:
// channel.
virtual void PullRenderData(int bits_per_sample, int sample_rate,
int number_of_channels, int number_of_frames,
void* audio_data) {}
void* audio_data,
uint32_t* rtp_timestamp,
int64_t* ntp_time_ms) {}
protected:
virtual ~AudioTransport() {}

View File

@ -116,7 +116,9 @@ class AudioTransportAPI: public AudioTransport {
const uint8_t nChannels,
const uint32_t sampleRate,
void* audioSamples,
uint32_t& nSamplesOut) {
uint32_t& nSamplesOut,
uint32_t* rtp_timestamp,
int64_t* ntp_time_ms) {
play_count_++;
if (play_count_ % 100 == 0) {
if (nChannels == 1) {
@ -149,7 +151,9 @@ class AudioTransportAPI: public AudioTransport {
virtual void PullRenderData(int bits_per_sample, int sample_rate,
int number_of_channels, int number_of_frames,
void* audio_data) {}
void* audio_data,
uint32_t* rtp_timestamp,
int64_t* ntp_time_ms) {}
private:
uint32_t rec_count_;
uint32_t play_count_;

View File

@ -292,7 +292,9 @@ int32_t AudioTransportImpl::NeedMorePlayData(
const uint8_t nChannels,
const uint32_t samplesPerSec,
void* audioSamples,
uint32_t& nSamplesOut)
uint32_t& nSamplesOut,
uint32_t* rtp_timestamp,
int64_t* ntp_time_ms)
{
if (_fullDuplex)
{
@ -551,7 +553,9 @@ void AudioTransportImpl::PushCaptureData(int voe_channel,
void AudioTransportImpl::PullRenderData(int bits_per_sample, int sample_rate,
int number_of_channels,
int number_of_frames,
void* audio_data) {}
void* audio_data,
uint32_t* rtp_timestamp,
int64_t* ntp_time_ms) {}
FuncTestManager::FuncTestManager() :
_processThread(NULL),

View File

@ -118,7 +118,9 @@ public:
const uint8_t nChannels,
const uint32_t samplesPerSec,
void* audioSamples,
uint32_t& nSamplesOut);
uint32_t& nSamplesOut,
uint32_t* rtp_timestamp,
int64_t* ntp_time_ms);
virtual int OnDataAvailable(const int voe_channels[],
int number_of_voe_channels,
@ -138,7 +140,9 @@ public:
virtual void PullRenderData(int bits_per_sample, int sample_rate,
int number_of_channels, int number_of_frames,
void* audio_data);
void* audio_data,
uint32_t* rtp_timestamp,
int64_t* ntp_time_ms);
AudioTransportImpl(AudioDeviceModule* audioDevice);
~AudioTransportImpl();

View File

@ -684,7 +684,10 @@ class AudioFrame {
AudioFrame& operator-=(const AudioFrame& rhs);
int id_;
// RTP timestamp of the first sample in the AudioFrame.
uint32_t timestamp_;
// NTP time of the estimated capture time in local timebase in milliseconds.
int64_t ntp_time_ms_;
int16_t data_[kMaxDataSizeSamples];
int samples_per_channel_;
int sample_rate_hz_;
@ -705,6 +708,7 @@ class AudioFrame {
inline AudioFrame::AudioFrame()
: id_(-1),
timestamp_(0),
ntp_time_ms_(0),
data_(),
samples_per_channel_(0),
sample_rate_hz_(0),

View File

@ -121,13 +121,17 @@ void FakeAudioDevice::CaptureAudio() {
samples_needed = std::min(kFrequencyHz / time_since_last_playout_ms,
kBufferSizeBytes / 2);
uint32_t samples_out = 0;
uint32_t rtp_timestamp = 0;
int64_t ntp_time_ms = 0;
EXPECT_EQ(0,
audio_callback_->NeedMorePlayData(samples_needed,
2,
1,
kFrequencyHz,
playout_buffer_,
samples_out));
samples_out,
&rtp_timestamp,
&ntp_time_ms));
}
}
tick_->Wait(WEBRTC_EVENT_INFINITE);

View File

@ -664,6 +664,25 @@ int32_t Channel::GetAudioFrame(int32_t id, AudioFrame& audioFrame)
// Measure audio level (0-9)
_outputAudioLevel.ComputeLevel(audioFrame);
// TODO(wu): Calculate capture NTP time based on RTP timestamp and RTCP SR.
audioFrame.ntp_time_ms_ = 0;
if (!first_frame_arrived_) {
first_frame_arrived_ = true;
capture_start_rtp_time_stamp_ = audioFrame.timestamp_;
} else {
// |ntp_time_ms_| won't be valid until at least 2 RTCP SRs are received.
if (audioFrame.ntp_time_ms_ > 0) {
// Compute |capture_start_ntp_time_ms_| so that
// |capture_start_ntp_time_ms_| + |elapsed_time_ms| == |ntp_time_ms_|
CriticalSectionScoped lock(ts_stats_lock_.get());
uint32_t elapsed_time_ms =
(audioFrame.timestamp_ - capture_start_rtp_time_stamp_) /
(audioFrame.sample_rate_hz_ * 1000);
capture_start_ntp_time_ms_ = audioFrame.ntp_time_ms_ - elapsed_time_ms;
}
}
return 0;
}
@ -836,6 +855,10 @@ Channel::Channel(int32_t channelId,
playout_delay_ms_(0),
_numberOfDiscardedPackets(0),
send_sequence_number_(0),
ts_stats_lock_(CriticalSectionWrapper::CreateCriticalSection()),
first_frame_arrived_(false),
capture_start_rtp_time_stamp_(0),
capture_start_ntp_time_ms_(-1),
_engineStatisticsPtr(NULL),
_outputMixerPtr(NULL),
_transmitMixerPtr(NULL),
@ -3371,7 +3394,7 @@ int Channel::GetRemoteRTCPReportBlocks(
int
Channel::GetRTPStatistics(CallStatistics& stats)
{
// --- Part one of the final structure (four values)
// --- RtcpStatistics
// The jitter statistics is updated for each received RTP packet and is
// based on received packets.
@ -3398,7 +3421,7 @@ Channel::GetRTPStatistics(CallStatistics& stats)
stats.fractionLost, stats.cumulativeLost, stats.extendedMax,
stats.jitterSamples);
// --- Part two of the final structure (one value)
// --- RTT
uint16_t RTT(0);
RTCPMethod method = _rtpRtcpModule->RTCP();
@ -3441,7 +3464,7 @@ Channel::GetRTPStatistics(CallStatistics& stats)
VoEId(_instanceId, _channelId),
"GetRTPStatistics() => rttMs=%d", stats.rttMs);
// --- Part three of the final structure (four values)
// --- Data counters
uint32_t bytesSent(0);
uint32_t packetsSent(0);
@ -3473,6 +3496,11 @@ Channel::GetRTPStatistics(CallStatistics& stats)
stats.bytesSent, stats.packetsSent, stats.bytesReceived,
stats.packetsReceived);
// --- Timestamps
{
CriticalSectionScoped lock(ts_stats_lock_.get());
stats.capture_start_ntp_time_ms_ = capture_start_ntp_time_ms_;
}
return 0;
}

View File

@ -540,6 +540,15 @@ private:
uint16_t send_sequence_number_;
uint8_t restored_packet_[kVoiceEngineMaxIpPacketSizeBytes];
scoped_ptr<CriticalSectionWrapper> ts_stats_lock_;
bool first_frame_arrived_;
// The rtp timestamp of the first played out audio frame.
uint32_t capture_start_rtp_time_stamp_;
// The capture ntp time (in local timebase) of the first played out audio
// frame.
int64_t capture_start_ntp_time_ms_;
// uses
Statistics* _engineStatisticsPtr;
OutputMixer* _outputMixerPtr;

View File

@ -86,6 +86,9 @@ struct CallStatistics
int packetsSent;
int bytesReceived;
int packetsReceived;
// The capture ntp time (in local timebase) of the first played out audio
// frame.
int64_t capture_start_ntp_time_ms_;
};
// See section 6.4.1 in http://www.ietf.org/rfc/rfc3550.txt for details.

View File

@ -148,7 +148,9 @@ int32_t VoEBaseImpl::NeedMorePlayData(
uint8_t nChannels,
uint32_t samplesPerSec,
void* audioSamples,
uint32_t& nSamplesOut)
uint32_t& nSamplesOut,
uint32_t* rtp_timestamp,
int64_t* ntp_time_ms)
{
WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_shared->instance_id(), -1),
"VoEBaseImpl::NeedMorePlayData(nSamples=%u, "
@ -157,7 +159,8 @@ int32_t VoEBaseImpl::NeedMorePlayData(
GetPlayoutData(static_cast<int>(samplesPerSec),
static_cast<int>(nChannels),
static_cast<int>(nSamples), true, audioSamples);
static_cast<int>(nSamples), true, audioSamples,
rtp_timestamp, ntp_time_ms);
nSamplesOut = _audioFrame.samples_per_channel_;
@ -233,12 +236,14 @@ void VoEBaseImpl::PushCaptureData(int voe_channel, const void* audio_data,
void VoEBaseImpl::PullRenderData(int bits_per_sample, int sample_rate,
int number_of_channels, int number_of_frames,
void* audio_data) {
void* audio_data,
uint32_t* rtp_timestamp,
int64_t* ntp_time_ms) {
assert(bits_per_sample == 16);
assert(number_of_frames == static_cast<int>(sample_rate / 100));
GetPlayoutData(sample_rate, number_of_channels, number_of_frames, false,
audio_data);
audio_data, rtp_timestamp, ntp_time_ms);
}
int VoEBaseImpl::RegisterVoiceEngineObserver(VoiceEngineObserver& observer)
@ -1081,7 +1086,9 @@ int VoEBaseImpl::ProcessRecordedDataWithAPM(
void VoEBaseImpl::GetPlayoutData(int sample_rate, int number_of_channels,
int number_of_frames, bool feed_data_to_apm,
void* audio_data) {
void* audio_data,
uint32_t* rtp_timestamp,
int64_t* ntp_time_ms) {
assert(_shared->output_mixer() != NULL);
// TODO(andrew): if the device is running in mono, we should tell the mixer
@ -1102,6 +1109,9 @@ void VoEBaseImpl::GetPlayoutData(int sample_rate, int number_of_channels,
// Deliver audio (PCM) samples to the ADM
memcpy(audio_data, _audioFrame.data_,
sizeof(int16_t) * number_of_frames * number_of_channels);
*rtp_timestamp = _audioFrame.timestamp_;
*ntp_time_ms = _audioFrame.ntp_time_ms_;
}
} // namespace webrtc

View File

@ -79,7 +79,9 @@ public:
uint8_t nChannels,
uint32_t samplesPerSec,
void* audioSamples,
uint32_t& nSamplesOut);
uint32_t& nSamplesOut,
uint32_t* rtp_timestamp,
int64_t* ntp_time_ms);
virtual int OnDataAvailable(const int voe_channels[],
int number_of_voe_channels,
@ -102,7 +104,9 @@ public:
virtual void PullRenderData(int bits_per_sample, int sample_rate,
int number_of_channels, int number_of_frames,
void* audio_data);
void* audio_data,
uint32_t* rtp_timestamp,
int64_t* ntp_time_ms);
// AudioDeviceObserver
virtual void OnErrorIsReported(ErrorCode error);
@ -138,7 +142,9 @@ private:
void GetPlayoutData(int sample_rate, int number_of_channels,
int number_of_frames, bool feed_data_to_apm,
void* audio_data);
void* audio_data,
uint32_t* rtp_timestamp,
int64_t* ntp_time_ms);
int32_t AddBuildInfo(char* str) const;
int32_t AddVoEVersion(char* str) const;