Add new methods to AudioEncoder interface

The following three methods are added:
rtp_timestamp_rate_hz()
SetTargetBitrate()
SetProjectedPacketLossRate()

Default implementations are provided, and a few overrides are
implemented. AudioEncoderCopyRed and AudioEncoderCng propagate the new
methods to the underlying speech codec.

BUG=3926
COAUTHOR:kwiberg@webrtc.org

R=tina.legrand@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/34049004

Cr-Commit-Position: refs/heads/master@{#8171}
git-svn-id: http://webrtc.googlecode.com/svn/trunk@8171 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
henrik.lundin@webrtc.org 2015-01-27 18:24:45 +00:00
parent 5614cf16e7
commit 478cedc055
19 changed files with 175 additions and 30 deletions

View File

@ -18,4 +18,8 @@ AudioEncoder::EncodedInfo::EncodedInfo() : EncodedInfoLeaf() {
AudioEncoder::EncodedInfo::~EncodedInfo() {
}
int AudioEncoder::rtp_timestamp_rate_hz() const {
return sample_rate_hz();
}
} // namespace webrtc

View File

@ -58,7 +58,7 @@ class AudioEncoder {
// In case of error, false is returned, otherwise true. It is an error for the
// encoder to attempt to produce more than |max_encoded_bytes| bytes of
// output.
bool Encode(uint32_t timestamp,
bool Encode(uint32_t rtp_timestamp,
const int16_t* audio,
size_t num_samples_per_channel,
size_t max_encoded_bytes,
@ -66,11 +66,8 @@ class AudioEncoder {
EncodedInfo* info) {
CHECK_EQ(num_samples_per_channel,
static_cast<size_t>(sample_rate_hz() / 100));
bool ret = EncodeInternal(timestamp,
audio,
max_encoded_bytes,
encoded,
info);
bool ret =
EncodeInternal(rtp_timestamp, audio, max_encoded_bytes, encoded, info);
CHECK_LE(info->encoded_bytes, max_encoded_bytes);
return ret;
}
@ -80,6 +77,10 @@ class AudioEncoder {
virtual int sample_rate_hz() const = 0;
virtual int num_channels() const = 0;
// Returns the rate with which the RTP timestamps are updated. By default,
// this is the same as sample_rate_hz().
virtual int rtp_timestamp_rate_hz() const;
// Returns the number of 10 ms frames the encoder will put in the next
// packet. This value may only change when Encode() outputs a packet; i.e.,
// the encoder may vary the number of 10 ms frames from packet to packet, but
@ -91,8 +92,17 @@ class AudioEncoder {
// Num10MsFramesInNextPacket().
virtual int Max10MsFramesInAPacket() const = 0;
// Changes the target bitrate. The implementation is free to alter this value,
// e.g., if the desired value is outside the valid range.
virtual void SetTargetBitrate(int bits_per_second) {}
// Tells the implementation what the projected packet loss rate is. The rate
// is in the range [0.0, 1.0]. This rate is typically used to adjust channel
// coding efforts, such as FEC.
virtual void SetProjectedPacketLossRate(double fraction) {}
protected:
virtual bool EncodeInternal(uint32_t timestamp,
virtual bool EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded,

View File

@ -74,6 +74,10 @@ int AudioEncoderCng::sample_rate_hz() const {
return sample_rate_hz_;
}
int AudioEncoderCng::rtp_timestamp_rate_hz() const {
return speech_encoder_->rtp_timestamp_rate_hz();
}
int AudioEncoderCng::num_channels() const {
return num_channels_;
}
@ -86,7 +90,17 @@ int AudioEncoderCng::Max10MsFramesInAPacket() const {
return speech_encoder_->Max10MsFramesInAPacket();
}
bool AudioEncoderCng::EncodeInternal(uint32_t timestamp,
void AudioEncoderCng::SetTargetBitrate(int bits_per_second) {
speech_encoder_->SetTargetBitrate(bits_per_second);
}
void AudioEncoderCng::SetProjectedPacketLossRate(double fraction) {
DCHECK_GE(fraction, 0.0);
DCHECK_LE(fraction, 1.0);
speech_encoder_->SetProjectedPacketLossRate(fraction);
}
bool AudioEncoderCng::EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded,
@ -99,7 +113,7 @@ bool AudioEncoderCng::EncodeInternal(uint32_t timestamp,
const int num_samples = sample_rate_hz() / 100 * num_channels();
if (speech_buffer_.empty()) {
CHECK_EQ(frames_in_buffer_, 0);
first_timestamp_in_buffer_ = timestamp;
first_timestamp_in_buffer_ = rtp_timestamp;
}
for (int i = 0; i < num_samples; ++i) {
speech_buffer_.push_back(audio[i]);

View File

@ -196,6 +196,18 @@ TEST_F(AudioEncoderCngTest, CheckFrameSizePropagation) {
EXPECT_EQ(17, cng_->Num10MsFramesInNextPacket());
}
TEST_F(AudioEncoderCngTest, CheckChangeBitratePropagation) {
CreateCng();
EXPECT_CALL(mock_encoder_, SetTargetBitrate(4711));
cng_->SetTargetBitrate(4711);
}
TEST_F(AudioEncoderCngTest, CheckProjectedPacketLossRatePropagation) {
CreateCng();
EXPECT_CALL(mock_encoder_, SetProjectedPacketLossRate(0.5));
cng_->SetProjectedPacketLossRate(0.5);
}
TEST_F(AudioEncoderCngTest, EncodeCallsVad) {
EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket())
.WillRepeatedly(Return(1));

View File

@ -49,11 +49,14 @@ class AudioEncoderCng : public AudioEncoder {
virtual int sample_rate_hz() const OVERRIDE;
virtual int num_channels() const OVERRIDE;
int rtp_timestamp_rate_hz() const override;
virtual int Num10MsFramesInNextPacket() const OVERRIDE;
virtual int Max10MsFramesInAPacket() const OVERRIDE;
void SetTargetBitrate(int bits_per_second) override;
void SetProjectedPacketLossRate(double fraction) override;
protected:
virtual bool EncodeInternal(uint32_t timestamp,
virtual bool EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded,

View File

@ -59,14 +59,14 @@ int AudioEncoderPcm::Max10MsFramesInAPacket() const {
return num_10ms_frames_per_packet_;
}
bool AudioEncoderPcm::EncodeInternal(uint32_t timestamp,
bool AudioEncoderPcm::EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded,
EncodedInfo* info) {
const int num_samples = sample_rate_hz() / 100 * num_channels();
if (speech_buffer_.empty()) {
first_timestamp_in_buffer_ = timestamp;
first_timestamp_in_buffer_ = rtp_timestamp;
}
for (int i = 0; i < num_samples; ++i) {
speech_buffer_.push_back(audio[i]);

View File

@ -40,7 +40,7 @@ class AudioEncoderPcm : public AudioEncoder {
protected:
AudioEncoderPcm(const Config& config, int sample_rate_hz);
virtual bool EncodeInternal(uint32_t timestamp,
virtual bool EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded,

View File

@ -54,6 +54,11 @@ AudioEncoderG722::~AudioEncoderG722() {}
int AudioEncoderG722::sample_rate_hz() const {
return kSampleRateHz;
}
int AudioEncoderG722::rtp_timestamp_rate_hz() const {
// The RTP timestamp rate for G.722 is 8000 Hz, even though it is a 16 kHz
// codec.
return kSampleRateHz / 2;
}
int AudioEncoderG722::num_channels() const {
return num_channels_;
}
@ -64,7 +69,7 @@ int AudioEncoderG722::Max10MsFramesInAPacket() const {
return num_10ms_frames_per_packet_;
}
bool AudioEncoderG722::EncodeInternal(uint32_t timestamp,
bool AudioEncoderG722::EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded,
@ -75,7 +80,7 @@ bool AudioEncoderG722::EncodeInternal(uint32_t timestamp,
static_cast<size_t>(samples_per_channel) / 2 * num_channels_);
if (num_10ms_frames_buffered_ == 0)
first_timestamp_in_buffer_ = timestamp;
first_timestamp_in_buffer_ = rtp_timestamp;
// Deinterleave samples and save them in each channel's buffer.
const int start = kSampleRateHz / 100 * num_10ms_frames_buffered_;

View File

@ -31,12 +31,13 @@ class AudioEncoderG722 : public AudioEncoder {
virtual ~AudioEncoderG722();
virtual int sample_rate_hz() const OVERRIDE;
int rtp_timestamp_rate_hz() const override;
virtual int num_channels() const OVERRIDE;
virtual int Num10MsFramesInNextPacket() const OVERRIDE;
virtual int Max10MsFramesInAPacket() const OVERRIDE;
protected:
virtual bool EncodeInternal(uint32_t timestamp,
virtual bool EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded,

View File

@ -52,7 +52,7 @@ int AudioEncoderIlbc::Max10MsFramesInAPacket() const {
return num_10ms_frames_per_packet_;
}
bool AudioEncoderIlbc::EncodeInternal(uint32_t timestamp,
bool AudioEncoderIlbc::EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded,
@ -63,7 +63,7 @@ bool AudioEncoderIlbc::EncodeInternal(uint32_t timestamp,
// Save timestamp if starting a new packet.
if (num_10ms_frames_buffered_ == 0)
first_timestamp_in_buffer_ = timestamp;
first_timestamp_in_buffer_ = rtp_timestamp;
// Buffer input.
std::memcpy(input_buffer_ + kSampleRateHz / 100 * num_10ms_frames_buffered_,

View File

@ -35,7 +35,7 @@ class AudioEncoderIlbc : public AudioEncoder {
virtual int Max10MsFramesInAPacket() const OVERRIDE;
protected:
virtual bool EncodeInternal(uint32_t timestamp,
virtual bool EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded,

View File

@ -84,7 +84,7 @@ class AudioEncoderDecoderIsacT : public AudioEncoder, public AudioDecoder {
protected:
// AudioEncoder protected method.
virtual bool EncodeInternal(uint32_t timestamp,
virtual bool EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded,

View File

@ -120,7 +120,7 @@ int AudioEncoderDecoderIsacT<T>::Max10MsFramesInAPacket() const {
}
template <typename T>
bool AudioEncoderDecoderIsacT<T>::EncodeInternal(uint32_t timestamp,
bool AudioEncoderDecoderIsacT<T>::EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded,
@ -128,7 +128,7 @@ bool AudioEncoderDecoderIsacT<T>::EncodeInternal(uint32_t timestamp,
if (!packet_in_progress_) {
// Starting a new packet; remember the timestamp for later.
packet_in_progress_ = true;
packet_timestamp_ = timestamp;
packet_timestamp_ = rtp_timestamp;
}
int r;
{

View File

@ -25,6 +25,8 @@ class MockAudioEncoder : public AudioEncoder {
MOCK_CONST_METHOD0(num_channels, int());
MOCK_CONST_METHOD0(Num10MsFramesInNextPacket, int());
MOCK_CONST_METHOD0(Max10MsFramesInAPacket, int());
MOCK_METHOD1(SetTargetBitrate, void(int));
MOCK_METHOD1(SetProjectedPacketLossRate, void(double));
// Note, we explicitly chose not to create a mock for the Encode method.
MOCK_METHOD5(EncodeInternal,
bool(uint32_t timestamp,

View File

@ -10,12 +10,16 @@
#include "webrtc/modules/audio_coding/codecs/opus/interface/audio_encoder_opus.h"
#include "webrtc/base/checks.h"
#include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h"
namespace webrtc {
namespace {
const int kMinBitrateBps = 500;
const int kMaxBitrateBps = 512000;
// We always encode at 48 kHz.
const int kSampleRateHz = 48000;
@ -36,7 +40,8 @@ AudioEncoderOpus::Config::Config()
: frame_size_ms(20),
num_channels(1),
payload_type(120),
application(kVoip) {
application(kVoip),
bitrate_bps(64000) {
}
bool AudioEncoderOpus::Config::IsOk() const {
@ -44,6 +49,8 @@ bool AudioEncoderOpus::Config::IsOk() const {
return false;
if (num_channels <= 0)
return false;
if (bitrate_bps < kMinBitrateBps || bitrate_bps > kMaxBitrateBps)
return false;
return true;
}
@ -54,10 +61,12 @@ AudioEncoderOpus::AudioEncoderOpus(const Config& config)
payload_type_(config.payload_type),
application_(config.application),
samples_per_10ms_frame_(rtc::CheckedDivExact(kSampleRateHz, 100) *
num_channels_) {
num_channels_),
packet_loss_rate_(0.0) {
CHECK(config.IsOk());
input_buffer_.reserve(num_10ms_frames_per_packet_ * samples_per_10ms_frame_);
CHECK_EQ(0, WebRtcOpus_EncoderCreate(&inst_, num_channels_, application_));
SetTargetBitrate(config.bitrate_bps);
}
AudioEncoderOpus::~AudioEncoderOpus() {
@ -80,13 +89,67 @@ int AudioEncoderOpus::Max10MsFramesInAPacket() const {
return num_10ms_frames_per_packet_;
}
bool AudioEncoderOpus::EncodeInternal(uint32_t timestamp,
void AudioEncoderOpus::SetTargetBitrate(int bits_per_second) {
CHECK_EQ(WebRtcOpus_SetBitRate(
inst_, std::max(std::min(bits_per_second, kMaxBitrateBps),
kMinBitrateBps)),
0);
}
void AudioEncoderOpus::SetProjectedPacketLossRate(double fraction) {
DCHECK_GE(fraction, 0.0);
DCHECK_LE(fraction, 1.0);
// Optimize the loss rate to configure Opus. Basically, optimized loss rate is
// the input loss rate rounded down to various levels, because a robustly good
// audio quality is achieved by lowering the packet loss down.
// Additionally, to prevent toggling, margins are used, i.e., when jumping to
// a loss rate from below, a higher threshold is used than jumping to the same
// level from above.
const double kPacketLossRate20 = 0.20;
const double kPacketLossRate10 = 0.10;
const double kPacketLossRate5 = 0.05;
const double kPacketLossRate1 = 0.01;
const double kLossRate20Margin = 0.02;
const double kLossRate10Margin = 0.01;
const double kLossRate5Margin = 0.01;
double opt_loss_rate;
if (fraction >=
kPacketLossRate20 +
kLossRate20Margin *
(kPacketLossRate20 - packet_loss_rate_ > 0 ? 1 : -1)) {
opt_loss_rate = kPacketLossRate20;
} else if (fraction >=
kPacketLossRate10 +
kLossRate10Margin *
(kPacketLossRate10 - packet_loss_rate_ > 0 ? 1 : -1)) {
opt_loss_rate = kPacketLossRate10;
} else if (fraction >=
kPacketLossRate5 +
kLossRate5Margin *
(kPacketLossRate5 - packet_loss_rate_ > 0 ? 1 : -1)) {
opt_loss_rate = kPacketLossRate5;
} else if (fraction >= kPacketLossRate1) {
opt_loss_rate = kPacketLossRate1;
} else {
opt_loss_rate = 0;
}
if (packet_loss_rate_ != opt_loss_rate) {
// Ask the encoder to change the target packet loss rate.
CHECK_EQ(WebRtcOpus_SetPacketLossRate(
inst_, static_cast<int32_t>(opt_loss_rate * 100 + .5)),
0);
packet_loss_rate_ = opt_loss_rate;
}
}
bool AudioEncoderOpus::EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded,
EncodedInfo* info) {
if (input_buffer_.empty())
first_timestamp_in_buffer_ = timestamp;
first_timestamp_in_buffer_ = rtp_timestamp;
input_buffer_.insert(input_buffer_.end(), audio,
audio + samples_per_10ms_frame_);
if (input_buffer_.size() < (static_cast<size_t>(num_10ms_frames_per_packet_) *

View File

@ -32,6 +32,7 @@ class AudioEncoderOpus : public AudioEncoder {
int num_channels;
int payload_type;
ApplicationMode application;
int bitrate_bps;
};
explicit AudioEncoderOpus(const Config& config);
@ -41,9 +42,11 @@ class AudioEncoderOpus : public AudioEncoder {
virtual int num_channels() const OVERRIDE;
virtual int Num10MsFramesInNextPacket() const OVERRIDE;
virtual int Max10MsFramesInAPacket() const OVERRIDE;
void SetTargetBitrate(int bits_per_second) override;
void SetProjectedPacketLossRate(double fraction) override;
protected:
virtual bool EncodeInternal(uint32_t timestamp,
virtual bool EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded,
@ -58,6 +61,7 @@ class AudioEncoderOpus : public AudioEncoder {
std::vector<int16_t> input_buffer_;
OpusEncInst* inst_;
uint32_t first_timestamp_in_buffer_;
double packet_loss_rate_;
};
} // namespace webrtc

View File

@ -28,6 +28,10 @@ int AudioEncoderCopyRed::sample_rate_hz() const {
return speech_encoder_->sample_rate_hz();
}
int AudioEncoderCopyRed::rtp_timestamp_rate_hz() const {
return speech_encoder_->rtp_timestamp_rate_hz();
}
int AudioEncoderCopyRed::num_channels() const {
return speech_encoder_->num_channels();
}
@ -40,12 +44,22 @@ int AudioEncoderCopyRed::Max10MsFramesInAPacket() const {
return speech_encoder_->Max10MsFramesInAPacket();
}
bool AudioEncoderCopyRed::EncodeInternal(uint32_t timestamp,
void AudioEncoderCopyRed::SetTargetBitrate(int bits_per_second) {
speech_encoder_->SetTargetBitrate(bits_per_second);
}
void AudioEncoderCopyRed::SetProjectedPacketLossRate(double fraction) {
DCHECK_GE(fraction, 0.0);
DCHECK_LE(fraction, 1.0);
speech_encoder_->SetProjectedPacketLossRate(fraction);
}
bool AudioEncoderCopyRed::EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded,
EncodedInfo* info) {
if (!speech_encoder_->Encode(timestamp, audio,
if (!speech_encoder_->Encode(rtp_timestamp, audio,
static_cast<size_t>(sample_rate_hz() / 100),
max_encoded_bytes, encoded, info))
return false;

View File

@ -36,12 +36,15 @@ class AudioEncoderCopyRed : public AudioEncoder {
virtual ~AudioEncoderCopyRed();
virtual int sample_rate_hz() const OVERRIDE;
int rtp_timestamp_rate_hz() const override;
virtual int num_channels() const OVERRIDE;
virtual int Num10MsFramesInNextPacket() const OVERRIDE;
virtual int Max10MsFramesInAPacket() const OVERRIDE;
void SetTargetBitrate(int bits_per_second) override;
void SetProjectedPacketLossRate(double fraction) override;
protected:
virtual bool EncodeInternal(uint32_t timestamp,
virtual bool EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded,

View File

@ -121,6 +121,16 @@ TEST_F(AudioEncoderCopyRedTest, CheckMaxFrameSizePropagation) {
EXPECT_EQ(17, red_->Max10MsFramesInAPacket());
}
TEST_F(AudioEncoderCopyRedTest, CheckSetBitratePropagation) {
EXPECT_CALL(mock_encoder_, SetTargetBitrate(4711));
red_->SetTargetBitrate(4711);
}
TEST_F(AudioEncoderCopyRedTest, CheckProjectedPacketLossRatePropagation) {
EXPECT_CALL(mock_encoder_, SetProjectedPacketLossRate(0.5));
red_->SetProjectedPacketLossRate(0.5);
}
// Checks that the an Encode() call is immediately propagated to the speech
// encoder.
TEST_F(AudioEncoderCopyRedTest, CheckImmediateEncode) {