Prevent crash in NetEQ when decoder overflow.

NetEQ can crash when decoder gives too many output samples than it can handle. A practical case this happens is when multiple opus packets are combined.

The best solution is to pass the max size to the ACM decode function and let it return a failure if the max size if too small.

BUG=4361
R=henrik.lundin@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/45619004

Cr-Commit-Position: refs/heads/master@{#8730}
git-svn-id: http://webrtc.googlecode.com/svn/trunk@8730 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
minyue@webrtc.org 2015-03-16 12:30:37 +00:00
parent 4b89aa03bb
commit 7f7d7e3427
19 changed files with 453 additions and 188 deletions

View File

@ -16,12 +16,40 @@
namespace webrtc {
int AudioDecoder::DecodeRedundant(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
return Decode(encoded, encoded_len, sample_rate_hz, decoded, speech_type);
int AudioDecoder::Decode(const uint8_t* encoded, size_t encoded_len,
int sample_rate_hz, size_t max_decoded_bytes,
int16_t* decoded, SpeechType* speech_type) {
int duration = PacketDuration(encoded, encoded_len);
if (duration >= 0 && duration * sizeof(int16_t) > max_decoded_bytes) {
return -1;
}
return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded,
speech_type);
}
int AudioDecoder::DecodeRedundant(const uint8_t* encoded, size_t encoded_len,
int sample_rate_hz, size_t max_decoded_bytes,
int16_t* decoded, SpeechType* speech_type) {
int duration = PacketDurationRedundant(encoded, encoded_len);
if (duration >= 0 && duration * sizeof(int16_t) > max_decoded_bytes) {
return -1;
}
return DecodeRedundantInternal(encoded, encoded_len, sample_rate_hz, decoded,
speech_type);
}
int AudioDecoder::DecodeInternal(const uint8_t* encoded, size_t encoded_len,
int sample_rate_hz, int16_t* decoded,
SpeechType* speech_type) {
return kNotImplemented;
}
int AudioDecoder::DecodeRedundantInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz, int16_t* decoded,
SpeechType* speech_type) {
return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded,
speech_type);
}
bool AudioDecoder::HasDecodePlc() const { return false; }

View File

@ -35,22 +35,25 @@ class AudioDecoder {
virtual ~AudioDecoder() {}
// Decodes |encode_len| bytes from |encoded| and writes the result in
// |decoded|. The number of samples from all channels produced is in
// the return value. If the decoder produced comfort noise, |speech_type|
// |decoded|. The maximum bytes allowed to be written into |decoded| is
// |max_decoded_bytes|. The number of samples from all channels produced is
// in the return value. If the decoder produced comfort noise, |speech_type|
// is set to kComfortNoise, otherwise it is kSpeech. The desired output
// sample rate is provided in |sample_rate_hz|, which must be valid for the
// codec at hand.
virtual int Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
size_t max_decoded_bytes,
int16_t* decoded,
SpeechType* speech_type) = 0;
SpeechType* speech_type);
// Same as Decode(), but interfaces to the decoders redundant decode function.
// The default implementation simply calls the regular Decode() method.
virtual int DecodeRedundant(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
size_t max_decoded_bytes,
int16_t* decoded,
SpeechType* speech_type);
@ -99,6 +102,18 @@ class AudioDecoder {
protected:
static SpeechType ConvertSpeechType(int16_t type);
virtual int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type);
virtual int DecodeRedundantInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type);
size_t channels_;
private:

View File

@ -72,11 +72,6 @@ class AudioEncoderDecoderIsacT : public AudioEncoder, public AudioDecoder {
int Max10MsFramesInAPacket() const override;
// AudioDecoder methods.
int Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override;
bool HasDecodePlc() const override;
int DecodePlc(int num_frames, int16_t* decoded) override;
int Init() override;
@ -95,6 +90,13 @@ class AudioEncoderDecoderIsacT : public AudioEncoder, public AudioDecoder {
uint8_t* encoded,
EncodedInfo* info) override;
// AudioDecoder protected method.
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override;
private:
// This value is taken from STREAM_SIZE_MAX_60 for iSAC float (60 ms) and
// STREAM_MAXW16_60MS for iSAC fix (60 ms).

View File

@ -218,11 +218,11 @@ void AudioEncoderDecoderIsacT<T>::EncodeInternal(uint32_t rtp_timestamp,
}
template <typename T>
int AudioEncoderDecoderIsacT<T>::Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
int AudioEncoderDecoderIsacT<T>::DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
CriticalSectionScoped cs(state_lock_.get());
CHECK(sample_rate_hz == 16000 || sample_rate_hz == 32000)
<< "Unsupported sample rate " << sample_rate_hz;
@ -232,8 +232,8 @@ int AudioEncoderDecoderIsacT<T>::Decode(const uint8_t* encoded,
}
int16_t temp_type = 1; // Default is speech.
int16_t ret =
T::Decode(isac_state_, encoded, static_cast<int16_t>(encoded_len),
decoded, &temp_type);
T::DecodeInternal(isac_state_, encoded, static_cast<int16_t>(encoded_len),
decoded, &temp_type);
*speech_type = ConvertSpeechType(temp_type);
return ret;
}

View File

@ -36,11 +36,11 @@ struct IsacFix {
static inline int16_t Create(instance_type** inst) {
return WebRtcIsacfix_Create(inst);
}
static inline int16_t Decode(instance_type* inst,
const uint8_t* encoded,
int16_t len,
int16_t* decoded,
int16_t* speech_type) {
static inline int16_t DecodeInternal(instance_type* inst,
const uint8_t* encoded,
int16_t len,
int16_t* decoded,
int16_t* speech_type) {
return WebRtcIsacfix_Decode(inst, encoded, len, decoded, speech_type);
}
static inline int16_t DecodePlc(instance_type* inst,

View File

@ -35,11 +35,11 @@ struct IsacFloat {
static inline int16_t Create(instance_type** inst) {
return WebRtcIsac_Create(inst);
}
static inline int16_t Decode(instance_type* inst,
const uint8_t* encoded,
int16_t len,
int16_t* decoded,
int16_t* speech_type) {
static inline int16_t DecodeInternal(instance_type* inst,
const uint8_t* encoded,
int16_t len,
int16_t* decoded,
int16_t* speech_type) {
return WebRtcIsac_Decode(inst, encoded, len, decoded, speech_type);
}
static inline int16_t DecodePlc(instance_type* inst,

View File

@ -554,6 +554,54 @@ TEST_P(OpusTest, OpusDurationEstimation) {
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
}
TEST_P(OpusTest, OpusDecodeRepacketized) {
const int kPackets = 6;
PrepareSpeechData(channels_, 20, 20 * kPackets);
// Create encoder memory.
ASSERT_EQ(0, WebRtcOpus_EncoderCreate(&opus_encoder_,
channels_,
application_));
ASSERT_EQ(0, WebRtcOpus_DecoderCreate(&opus_decoder_,
channels_));
// Set bitrate.
EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_,
channels_ == 1 ? 32000 : 64000));
// Check number of channels for decoder.
EXPECT_EQ(channels_, WebRtcOpus_DecoderChannels(opus_decoder_));
// Encode & decode.
int16_t audio_type;
rtc::scoped_ptr<int16_t[]> output_data_decode(
new int16_t[kPackets * kOpus20msFrameSamples * channels_]);
OpusRepacketizer* rp = opus_repacketizer_create();
for (int idx = 0; idx < kPackets; idx++) {
encoded_bytes_ = WebRtcOpus_Encode(opus_encoder_,
speech_data_.GetNextBlock(),
kOpus20msFrameSamples, kMaxBytes,
bitstream_);
EXPECT_EQ(OPUS_OK, opus_repacketizer_cat(rp, bitstream_, encoded_bytes_));
}
encoded_bytes_ = opus_repacketizer_out(rp, bitstream_, kMaxBytes);
EXPECT_EQ(kOpus20msFrameSamples * kPackets,
WebRtcOpus_DurationEst(opus_decoder_, bitstream_, encoded_bytes_));
EXPECT_EQ(kOpus20msFrameSamples * kPackets,
WebRtcOpus_Decode(opus_decoder_, bitstream_, encoded_bytes_,
output_data_decode.get(), &audio_type));
// Free memory.
opus_repacketizer_destroy(rp);
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
}
INSTANTIATE_TEST_CASE_P(VariousMode,
OpusTest,
Combine(Values(1, 2), Values(0, 1)));

View File

@ -140,21 +140,23 @@ bool AudioDecoderProxy::IsSet() const {
int AudioDecoderProxy::Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
size_t max_decoded_bytes,
int16_t* decoded,
SpeechType* speech_type) {
CriticalSectionScoped decoder_lock(decoder_lock_.get());
return decoder_->Decode(encoded, encoded_len, sample_rate_hz, decoded,
speech_type);
return decoder_->Decode(encoded, encoded_len, sample_rate_hz,
max_decoded_bytes, decoded, speech_type);
}
int AudioDecoderProxy::DecodeRedundant(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
size_t max_decoded_bytes,
int16_t* decoded,
SpeechType* speech_type) {
CriticalSectionScoped decoder_lock(decoder_lock_.get());
return decoder_->DecodeRedundant(encoded, encoded_len, sample_rate_hz,
decoded, speech_type);
max_decoded_bytes, decoded, speech_type);
}
bool AudioDecoderProxy::HasDecodePlc() const {

View File

@ -49,11 +49,13 @@ class AudioDecoderProxy final : public AudioDecoder {
int Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
size_t max_decoded_bytes,
int16_t* decoded,
SpeechType* speech_type) override;
int DecodeRedundant(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
size_t max_decoded_bytes,
int16_t* decoded,
SpeechType* speech_type) override;
bool HasDecodePlc() const override;

View File

@ -38,11 +38,11 @@
namespace webrtc {
// PCMu
int AudioDecoderPcmU::Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
int AudioDecoderPcmU::DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
DCHECK_EQ(sample_rate_hz, 8000);
int16_t temp_type = 1; // Default is speech.
int16_t ret = WebRtcG711_DecodeU(encoded, static_cast<int16_t>(encoded_len),
@ -58,11 +58,11 @@ int AudioDecoderPcmU::PacketDuration(const uint8_t* encoded,
}
// PCMa
int AudioDecoderPcmA::Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
int AudioDecoderPcmA::DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
DCHECK_EQ(sample_rate_hz, 8000);
int16_t temp_type = 1; // Default is speech.
int16_t ret = WebRtcG711_DecodeA(encoded, static_cast<int16_t>(encoded_len),
@ -81,11 +81,11 @@ int AudioDecoderPcmA::PacketDuration(const uint8_t* encoded,
#ifdef WEBRTC_CODEC_PCM16
AudioDecoderPcm16B::AudioDecoderPcm16B() {}
int AudioDecoderPcm16B::Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
int AudioDecoderPcm16B::DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
DCHECK(sample_rate_hz == 8000 || sample_rate_hz == 16000 ||
sample_rate_hz == 32000 || sample_rate_hz == 48000)
<< "Unsupported sample rate " << sample_rate_hz;
@ -117,11 +117,11 @@ AudioDecoderIlbc::~AudioDecoderIlbc() {
WebRtcIlbcfix_DecoderFree(dec_state_);
}
int AudioDecoderIlbc::Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
int AudioDecoderIlbc::DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
DCHECK_EQ(sample_rate_hz, 8000);
int16_t temp_type = 1; // Default is speech.
int16_t ret = WebRtcIlbcfix_Decode(dec_state_, encoded,
@ -150,11 +150,11 @@ AudioDecoderG722::~AudioDecoderG722() {
WebRtcG722_FreeDecoder(dec_state_);
}
int AudioDecoderG722::Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
int AudioDecoderG722::DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
DCHECK_EQ(sample_rate_hz, 16000);
int16_t temp_type = 1; // Default is speech.
int16_t ret =
@ -185,11 +185,11 @@ AudioDecoderG722Stereo::~AudioDecoderG722Stereo() {
WebRtcG722_FreeDecoder(dec_state_right_);
}
int AudioDecoderG722Stereo::Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
int AudioDecoderG722Stereo::DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
DCHECK_EQ(sample_rate_hz, 16000);
int16_t temp_type = 1; // Default is speech.
// De-interleave the bit-stream into two separate payloads.
@ -270,11 +270,11 @@ AudioDecoderOpus::~AudioDecoderOpus() {
WebRtcOpus_DecoderFree(dec_state_);
}
int AudioDecoderOpus::Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
int AudioDecoderOpus::DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
DCHECK_EQ(sample_rate_hz, 48000);
int16_t temp_type = 1; // Default is speech.
int16_t ret = WebRtcOpus_Decode(dec_state_, encoded,
@ -286,16 +286,18 @@ int AudioDecoderOpus::Decode(const uint8_t* encoded,
return ret;
}
int AudioDecoderOpus::DecodeRedundant(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
int AudioDecoderOpus::DecodeRedundantInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
if (!PacketHasFec(encoded, encoded_len)) {
// This packet is a RED packet.
return Decode(encoded, encoded_len, sample_rate_hz, decoded, speech_type);
return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded,
speech_type);
}
DCHECK_EQ(sample_rate_hz, 48000);
int16_t temp_type = 1; // Default is speech.
int16_t ret = WebRtcOpus_DecodeFec(dec_state_, encoded,
static_cast<int16_t>(encoded_len), decoded,

View File

@ -37,13 +37,15 @@ namespace webrtc {
class AudioDecoderPcmU : public AudioDecoder {
public:
AudioDecoderPcmU() {}
virtual int Decode(const uint8_t* encoded,
virtual int Init() { return 0; }
virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
protected:
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type);
virtual int Init() { return 0; }
virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
SpeechType* speech_type) override;
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcmU);
@ -52,13 +54,15 @@ class AudioDecoderPcmU : public AudioDecoder {
class AudioDecoderPcmA : public AudioDecoder {
public:
AudioDecoderPcmA() {}
virtual int Decode(const uint8_t* encoded,
virtual int Init() { return 0; }
virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
protected:
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type);
virtual int Init() { return 0; }
virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
SpeechType* speech_type) override;
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcmA);
@ -92,13 +96,15 @@ class AudioDecoderPcmAMultiCh : public AudioDecoderPcmA {
class AudioDecoderPcm16B : public AudioDecoder {
public:
AudioDecoderPcm16B();
virtual int Decode(const uint8_t* encoded,
virtual int Init() { return 0; }
virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
protected:
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type);
virtual int Init() { return 0; }
virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
SpeechType* speech_type) override;
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcm16B);
@ -121,15 +127,17 @@ class AudioDecoderIlbc : public AudioDecoder {
public:
AudioDecoderIlbc();
virtual ~AudioDecoderIlbc();
virtual int Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type);
virtual bool HasDecodePlc() const { return true; }
virtual int DecodePlc(int num_frames, int16_t* decoded);
virtual int Init();
protected:
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override;
private:
IlbcDecoderInstance* dec_state_;
DISALLOW_COPY_AND_ASSIGN(AudioDecoderIlbc);
@ -141,15 +149,17 @@ class AudioDecoderG722 : public AudioDecoder {
public:
AudioDecoderG722();
virtual ~AudioDecoderG722();
virtual int Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type);
virtual bool HasDecodePlc() const { return false; }
virtual int Init();
virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
protected:
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override;
private:
G722DecInst* dec_state_;
DISALLOW_COPY_AND_ASSIGN(AudioDecoderG722);
@ -159,12 +169,14 @@ class AudioDecoderG722Stereo : public AudioDecoder {
public:
AudioDecoderG722Stereo();
virtual ~AudioDecoderG722Stereo();
virtual int Decode(const uint8_t* encoded,
virtual int Init();
protected:
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type);
virtual int Init();
SpeechType* speech_type) override;
private:
// Splits the stereo-interleaved payload in |encoded| into separate payloads
@ -187,22 +199,25 @@ class AudioDecoderOpus : public AudioDecoder {
public:
explicit AudioDecoderOpus(int num_channels);
virtual ~AudioDecoderOpus();
virtual int Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type);
virtual int DecodeRedundant(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type);
virtual int Init();
virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
virtual int PacketDurationRedundant(const uint8_t* encoded,
size_t encoded_len) const;
virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const;
protected:
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override;
int DecodeRedundantInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override;
private:
OpusDecInst* dec_state_;
DISALLOW_COPY_AND_ASSIGN(AudioDecoderOpus);
@ -219,13 +234,6 @@ class AudioDecoderCng : public AudioDecoder {
public:
explicit AudioDecoderCng();
virtual ~AudioDecoderCng();
virtual int Decode(const uint8_t* encoded,
size_t encoded_len,
int /*sample_rate_hz*/,
int16_t* decoded,
SpeechType* speech_type) {
return -1;
}
virtual int Init();
virtual int IncomingPacket(const uint8_t* payload,
size_t payload_len,
@ -235,6 +243,15 @@ class AudioDecoderCng : public AudioDecoder {
CNG_dec_inst* CngDecoderInstance() override { return dec_state_; }
protected:
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override {
return -1;
}
private:
CNG_dec_inst* dec_state_;
DISALLOW_COPY_AND_ASSIGN(AudioDecoderCng);

View File

@ -188,6 +188,7 @@ class AudioDecoderTest : public ::testing::Test {
AudioDecoder::SpeechType speech_type;
size_t dec_len = decoder_->Decode(
&encoded_[encoded_bytes_], enc_len, codec_input_rate_hz_,
frame_size_ * channels_ * sizeof(int16_t),
&decoded[processed_samples * channels_], &speech_type);
EXPECT_EQ(frame_size_ * channels_, dec_len);
encoded_bytes_ += enc_len;
@ -222,6 +223,7 @@ class AudioDecoderTest : public ::testing::Test {
EXPECT_EQ(0, decoder_->Init());
rtc::scoped_ptr<int16_t[]> output1(new int16_t[frame_size_ * channels_]);
dec_len = decoder_->Decode(encoded_, enc_len, codec_input_rate_hz_,
frame_size_ * channels_ * sizeof(int16_t),
output1.get(), &speech_type1);
ASSERT_LE(dec_len, frame_size_ * channels_);
EXPECT_EQ(frame_size_ * channels_, dec_len);
@ -229,6 +231,7 @@ class AudioDecoderTest : public ::testing::Test {
EXPECT_EQ(0, decoder_->Init());
rtc::scoped_ptr<int16_t[]> output2(new int16_t[frame_size_ * channels_]);
dec_len = decoder_->Decode(encoded_, enc_len, codec_input_rate_hz_,
frame_size_ * channels_ * sizeof(int16_t),
output2.get(), &speech_type2);
ASSERT_LE(dec_len, frame_size_ * channels_);
EXPECT_EQ(frame_size_ * channels_, dec_len);
@ -249,6 +252,7 @@ class AudioDecoderTest : public ::testing::Test {
EXPECT_EQ(0, decoder_->Init());
rtc::scoped_ptr<int16_t[]> output(new int16_t[frame_size_ * channels_]);
size_t dec_len = decoder_->Decode(encoded_, enc_len, codec_input_rate_hz_,
frame_size_ * channels_ * sizeof(int16_t),
output.get(), &speech_type);
EXPECT_EQ(frame_size_ * channels_, dec_len);
// Call DecodePlc and verify that we get one frame of data.
@ -340,6 +344,7 @@ class AudioDecoderIlbcTest : public AudioDecoderTest {
EXPECT_EQ(0, decoder_->Init());
rtc::scoped_ptr<int16_t[]> output(new int16_t[frame_size_ * channels_]);
size_t dec_len = decoder_->Decode(encoded_, enc_len, codec_input_rate_hz_,
frame_size_ * channels_ * sizeof(int16_t),
output.get(), &speech_type);
EXPECT_EQ(frame_size_, dec_len);
// Simply call DecodePlc and verify that we get 0 as return value.

View File

@ -22,9 +22,9 @@ class MockAudioDecoder : public AudioDecoder {
MockAudioDecoder() {}
virtual ~MockAudioDecoder() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_METHOD5(
MOCK_METHOD6(
Decode,
int(const uint8_t*, size_t, int, int16_t*, AudioDecoder::SpeechType*));
int(const uint8_t*, size_t, int, size_t, int16_t*, SpeechType*));
MOCK_CONST_METHOD0(HasDecodePlc, bool());
MOCK_METHOD2(DecodePlc, int(int, int16_t*));
MOCK_METHOD0(Init, int());

View File

@ -28,20 +28,20 @@ using ::testing::Invoke;
class ExternalPcm16B : public AudioDecoder {
public:
ExternalPcm16B() {}
virtual int Init() { return 0; }
virtual int Decode(const uint8_t* encoded,
protected:
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
SpeechType* speech_type) override {
int16_t ret = WebRtcPcm16b_Decode(
encoded, static_cast<int16_t>(encoded_len), decoded);
*speech_type = ConvertSpeechType(1);
return ret;
}
virtual int Init() { return 0; }
private:
DISALLOW_COPY_AND_ASSIGN(ExternalPcm16B);
};
@ -52,7 +52,7 @@ class MockExternalPcm16B : public ExternalPcm16B {
public:
MockExternalPcm16B() {
// By default, all calls are delegated to the real object.
ON_CALL(*this, Decode(_, _, _, _, _))
ON_CALL(*this, Decode(_, _, _, _, _, _))
.WillByDefault(Invoke(&real_, &ExternalPcm16B::Decode));
ON_CALL(*this, HasDecodePlc())
.WillByDefault(Invoke(&real_, &ExternalPcm16B::HasDecodePlc));
@ -68,10 +68,11 @@ class MockExternalPcm16B : public ExternalPcm16B {
virtual ~MockExternalPcm16B() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_METHOD5(Decode,
MOCK_METHOD6(Decode,
int(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
size_t max_decoded_bytes,
int16_t* decoded,
SpeechType* speech_type));
MOCK_CONST_METHOD0(HasDecodePlc,

View File

@ -101,7 +101,7 @@ class NetEqExternalDecoderUnitTest : public test::NetEqExternalDecoderTest {
} while (Lost()); // If lost, immediately read the next packet.
EXPECT_CALL(*external_decoder_,
Decode(_, payload_size_bytes_, 1000 * samples_per_ms_, _, _))
Decode(_, payload_size_bytes_, 1000 * samples_per_ms_, _, _, _))
.Times(NumExpectedDecodeCalls(num_loops));
uint32_t time_now = 0;

View File

@ -1267,6 +1267,7 @@ int NetEqImpl::DecodeLoop(PacketList* packet_list, Operations* operation,
", len=" << packet->payload_length;
decode_length = decoder->DecodeRedundant(
packet->payload, packet->payload_length, fs_hz_,
(decoded_buffer_length_ - *decoded_length) * sizeof(int16_t),
&decoded_buffer_[*decoded_length], speech_type);
} else {
LOG(LS_VERBOSE) << "Decoding packet: ts=" << packet->header.timestamp <<
@ -1275,8 +1276,10 @@ int NetEqImpl::DecodeLoop(PacketList* packet_list, Operations* operation,
", ssrc=" << packet->header.ssrc <<
", len=" << packet->payload_length;
decode_length =
decoder->Decode(packet->payload, packet->payload_length, fs_hz_,
&decoded_buffer_[*decoded_length], speech_type);
decoder->Decode(
packet->payload, packet->payload_length, fs_hz_,
(decoded_buffer_length_ - *decoded_length) * sizeof(int16_t),
&decoded_buffer_[*decoded_length], speech_type);
}
delete[] packet->payload;
@ -1606,8 +1609,9 @@ void NetEqImpl::DoCodecInternalCng() {
if (decoder) {
const uint8_t* dummy_payload = NULL;
AudioDecoder::SpeechType speech_type;
length =
decoder->Decode(dummy_payload, 0, fs_hz_, decoded_buffer, &speech_type);
length = decoder->Decode(
dummy_payload, 0, fs_hz_, kMaxFrameSize * sizeof(int16_t),
decoded_buffer, &speech_type);
}
assert(mute_factor_array_.get());
normal_->Process(decoded_buffer, length, last_mode_, mute_factor_array_.get(),

View File

@ -28,6 +28,7 @@
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
#include "webrtc/modules/audio_coding/neteq/timestamp_scaler.h"
using ::testing::AtLeast;
using ::testing::Return;
using ::testing::ReturnNull;
using ::testing::_;
@ -428,11 +429,12 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) {
CountingSamplesDecoder() : next_value_(1) {}
// Produce as many samples as input bytes (|encoded_len|).
virtual int Decode(const uint8_t* encoded,
size_t encoded_len,
int /*sample_rate_hz*/,
int16_t* decoded,
SpeechType* speech_type) {
int Decode(const uint8_t* encoded,
size_t encoded_len,
int /* sample_rate_hz */,
size_t /* max_decoded_bytes */,
int16_t* decoded,
SpeechType* speech_type) override {
for (size_t i = 0; i < encoded_len; ++i) {
decoded[i] = next_value_++;
}
@ -523,10 +525,10 @@ TEST_F(NetEqImplTest, ReorderedPacket) {
// The below expectation will make the mock decoder write
// |kPayloadLengthSamples| zeros to the output array, and mark it as speech.
EXPECT_CALL(mock_decoder,
Decode(Pointee(0), kPayloadLengthBytes, kSampleRateHz, _, _))
.WillOnce(DoAll(SetArrayArgument<3>(dummy_output,
Decode(Pointee(0), kPayloadLengthBytes, kSampleRateHz, _, _, _))
.WillOnce(DoAll(SetArrayArgument<4>(dummy_output,
dummy_output + kPayloadLengthSamples),
SetArgPointee<4>(AudioDecoder::kSpeech),
SetArgPointee<5>(AudioDecoder::kSpeech),
Return(kPayloadLengthSamples)));
EXPECT_EQ(NetEq::kOK,
neteq_->RegisterExternalDecoder(
@ -569,10 +571,10 @@ TEST_F(NetEqImplTest, ReorderedPacket) {
// Expect only the second packet to be decoded (the one with "2" as the first
// payload byte).
EXPECT_CALL(mock_decoder,
Decode(Pointee(2), kPayloadLengthBytes, kSampleRateHz, _, _))
.WillOnce(DoAll(SetArrayArgument<3>(dummy_output,
Decode(Pointee(2), kPayloadLengthBytes, kSampleRateHz, _, _, _))
.WillOnce(DoAll(SetArrayArgument<4>(dummy_output,
dummy_output + kPayloadLengthSamples),
SetArgPointee<4>(AudioDecoder::kSpeech),
SetArgPointee<5>(AudioDecoder::kSpeech),
Return(kPayloadLengthSamples)));
// Pull audio once.
@ -686,30 +688,30 @@ TEST_F(NetEqImplTest, CodecInternalCng) {
// Pointee(x) verifies that first byte of the payload equals x, this makes it
// possible to verify that the correct payload is fed to Decode().
EXPECT_CALL(mock_decoder, Decode(Pointee(0), kPayloadLengthBytes,
kSampleRateKhz * 1000, _, _))
.WillOnce(DoAll(SetArrayArgument<3>(dummy_output,
kSampleRateKhz * 1000, _, _, _))
.WillOnce(DoAll(SetArrayArgument<4>(dummy_output,
dummy_output + kPayloadLengthSamples),
SetArgPointee<4>(AudioDecoder::kSpeech),
SetArgPointee<5>(AudioDecoder::kSpeech),
Return(kPayloadLengthSamples)));
EXPECT_CALL(mock_decoder, Decode(Pointee(1), kPayloadLengthBytes,
kSampleRateKhz * 1000, _, _))
.WillOnce(DoAll(SetArrayArgument<3>(dummy_output,
kSampleRateKhz * 1000, _, _, _))
.WillOnce(DoAll(SetArrayArgument<4>(dummy_output,
dummy_output + kPayloadLengthSamples),
SetArgPointee<4>(AudioDecoder::kComfortNoise),
SetArgPointee<5>(AudioDecoder::kComfortNoise),
Return(kPayloadLengthSamples)));
EXPECT_CALL(mock_decoder, Decode(IsNull(), 0, kSampleRateKhz * 1000, _, _))
.WillOnce(DoAll(SetArrayArgument<3>(dummy_output,
EXPECT_CALL(mock_decoder, Decode(IsNull(), 0, kSampleRateKhz * 1000, _, _, _))
.WillOnce(DoAll(SetArrayArgument<4>(dummy_output,
dummy_output + kPayloadLengthSamples),
SetArgPointee<4>(AudioDecoder::kComfortNoise),
SetArgPointee<5>(AudioDecoder::kComfortNoise),
Return(kPayloadLengthSamples)));
EXPECT_CALL(mock_decoder, Decode(Pointee(2), kPayloadLengthBytes,
kSampleRateKhz * 1000, _, _))
.WillOnce(DoAll(SetArrayArgument<3>(dummy_output,
kSampleRateKhz * 1000, _, _, _))
.WillOnce(DoAll(SetArrayArgument<4>(dummy_output,
dummy_output + kPayloadLengthSamples),
SetArgPointee<4>(AudioDecoder::kSpeech),
SetArgPointee<5>(AudioDecoder::kSpeech),
Return(kPayloadLengthSamples)));
EXPECT_EQ(NetEq::kOK,
@ -793,4 +795,101 @@ TEST_F(NetEqImplTest, CodecInternalCng) {
EXPECT_CALL(mock_decoder, Die());
}
TEST_F(NetEqImplTest, UnsupportedDecoder) {
UseNoMocks();
CreateInstance();
static const size_t kNetEqMaxFrameSize = 2880; // 60 ms @ 48 kHz.
const uint8_t kPayloadType = 17; // Just an arbitrary number.
const uint32_t kReceiveTime = 17; // Value doesn't matter for this test.
const int kSampleRateHz = 8000;
const int kChannles = 1;
const int kPayloadLengthSamples = 10 * kSampleRateHz / 1000; // 10 ms.
const size_t kPayloadLengthBytes = 1;
uint8_t payload[kPayloadLengthBytes]= {0};
int16_t dummy_output[kPayloadLengthSamples] = {0};
WebRtcRTPHeader rtp_header;
rtp_header.header.payloadType = kPayloadType;
rtp_header.header.sequenceNumber = 0x1234;
rtp_header.header.timestamp = 0x12345678;
rtp_header.header.ssrc = 0x87654321;
class MockAudioDecoder : public AudioDecoder {
public:
int Init() override {
return 0;
}
MOCK_CONST_METHOD2(PacketDuration, int(const uint8_t*, size_t));
MOCK_METHOD5(DecodeInternal, int(const uint8_t*, size_t, int, int16_t*,
SpeechType*));
} decoder_;
const uint8_t kFirstPayloadValue = 1;
const uint8_t kSecondPayloadValue = 2;
EXPECT_CALL(decoder_, PacketDuration(Pointee(kFirstPayloadValue),
kPayloadLengthBytes))
.Times(AtLeast(1))
.WillRepeatedly(Return(kNetEqMaxFrameSize * kChannles + 1));
EXPECT_CALL(decoder_,
DecodeInternal(Pointee(kFirstPayloadValue), _, _, _, _))
.Times(0);
EXPECT_CALL(decoder_, DecodeInternal(Pointee(kSecondPayloadValue),
kPayloadLengthBytes,
kSampleRateHz, _, _))
.Times(1)
.WillOnce(DoAll(SetArrayArgument<3>(dummy_output,
dummy_output + kPayloadLengthSamples),
SetArgPointee<4>(AudioDecoder::kSpeech),
Return(kPayloadLengthSamples)));
EXPECT_CALL(decoder_, PacketDuration(Pointee(kSecondPayloadValue),
kPayloadLengthBytes))
.Times(AtLeast(1))
.WillRepeatedly(Return(kNetEqMaxFrameSize * kChannles));
EXPECT_EQ(NetEq::kOK,
neteq_->RegisterExternalDecoder(
&decoder_, kDecoderPCM16B, kPayloadType));
// Insert one packet.
payload[0] = kFirstPayloadValue; // This will make Decode() fail.
EXPECT_EQ(NetEq::kOK,
neteq_->InsertPacket(
rtp_header, payload, kPayloadLengthBytes, kReceiveTime));
// Insert another packet.
payload[0] = kSecondPayloadValue; // This will make Decode() successful.
rtp_header.header.sequenceNumber++;
// The second timestamp needs to be at least 30 ms after the first to make
// the second packet get decoded.
rtp_header.header.timestamp += 3 * kPayloadLengthSamples;
EXPECT_EQ(NetEq::kOK,
neteq_->InsertPacket(
rtp_header, payload, kPayloadLengthBytes, kReceiveTime));
const int kMaxOutputSize = 10 * kSampleRateHz / 1000;
int16_t output[kMaxOutputSize];
int samples_per_channel;
int num_channels;
NetEqOutputType type;
EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(kMaxOutputSize, output,
&samples_per_channel, &num_channels,
&type));
EXPECT_EQ(NetEq::kOtherDecoderError, neteq_->LastError());
EXPECT_EQ(kMaxOutputSize, samples_per_channel);
EXPECT_EQ(kChannles, num_channels);
EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(kMaxOutputSize, output,
&samples_per_channel, &num_channels,
&type));
EXPECT_EQ(kMaxOutputSize, samples_per_channel);
EXPECT_EQ(kChannles, num_channels);
}
} // namespace webrtc

View File

@ -35,25 +35,6 @@ class MockAudioDecoderOpus : public AudioDecoderOpus {
MOCK_METHOD0(Init, int());
// Override the following methods such that no actual payload is needed.
int Decode(const uint8_t* encoded,
size_t encoded_len,
int /*sample_rate_hz*/,
int16_t* decoded,
SpeechType* speech_type) override {
*speech_type = kSpeech;
memset(decoded, 0, sizeof(int16_t) * kPacketDuration * channels_);
return kPacketDuration * channels_;
}
int DecodeRedundant(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override {
return Decode(encoded, encoded_len, sample_rate_hz, decoded, speech_type);
}
int PacketDuration(const uint8_t* encoded,
size_t encoded_len) const override {
return kPacketDuration;
@ -72,6 +53,27 @@ class MockAudioDecoderOpus : public AudioDecoderOpus {
bool fec_enabled() const { return fec_enabled_; }
protected:
// Override the following methods such that no actual payload is needed.
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int /*sample_rate_hz*/,
int16_t* decoded,
SpeechType* speech_type) override {
*speech_type = kSpeech;
memset(decoded, 0, sizeof(int16_t) * kPacketDuration * channels_);
return kPacketDuration * channels_;
}
int DecodeRedundantInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override {
return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded,
speech_type);
}
private:
bool fec_enabled_;
};

View File

@ -9,6 +9,7 @@
*/
#include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h"
#include "webrtc/modules/audio_coding/codecs/opus/opus_inst.h"
#include "webrtc/modules/audio_coding/neteq/tools/neteq_quality_test.h"
#include "webrtc/test/testsupport/fileutils.h"
@ -52,7 +53,7 @@ static bool ValidateOutFilename(const char* flagname, const string& value) {
return false;
}
DEFINE_string(out_filename, OutputPath() + "neteq4_opus_fec_quality_test.pcm",
DEFINE_string(out_filename, OutputPath() + "neteq_opus_quality_test.pcm",
"Name of output audio file.");
static const bool out_filename_dummy =
@ -113,43 +114,66 @@ DEFINE_bool(fec, true, "Whether to enable FEC for encoding.");
DEFINE_bool(dtx, true, "Whether to enable DTX for encoding.");
class NetEqOpusFecQualityTest : public NetEqQualityTest {
// Define switch for number of sub packets to repacketize.
static bool ValidateSubPackets(const char* flagname, int32_t value) {
if (value >= 1 && value <= 3)
return true;
printf("Invalid number of sub packets, should be between 1 and 3.");
return false;
}
DEFINE_int32(sub_packets, 1, "Number of sub packets to repacketize.");
static const bool sub_packets_dummy =
RegisterFlagValidator(&FLAGS_sub_packets, &ValidateSubPackets);
class NetEqOpusQualityTest : public NetEqQualityTest {
protected:
NetEqOpusFecQualityTest();
NetEqOpusQualityTest();
void SetUp() override;
void TearDown() override;
virtual int EncodeBlock(int16_t* in_data, int block_size_samples,
uint8_t* payload, int max_bytes);
private:
WebRtcOpusEncInst* opus_encoder_;
OpusRepacketizer* repacketizer_;
int sub_block_size_samples_;
int channels_;
int bit_rate_kbps_;
bool fec_;
bool dtx_;
int target_loss_rate_;
int sub_packets_;
};
NetEqOpusFecQualityTest::NetEqOpusFecQualityTest()
: NetEqQualityTest(kOpusBlockDurationMs, kOpusSamplingKhz,
NetEqOpusQualityTest::NetEqOpusQualityTest()
: NetEqQualityTest(kOpusBlockDurationMs * FLAGS_sub_packets,
kOpusSamplingKhz,
kOpusSamplingKhz,
(FLAGS_channels == 1) ? kDecoderOpus : kDecoderOpus_2ch,
FLAGS_channels,
FLAGS_in_filename,
FLAGS_out_filename),
opus_encoder_(NULL),
repacketizer_(NULL),
sub_block_size_samples_(kOpusBlockDurationMs * kOpusSamplingKhz),
channels_(FLAGS_channels),
bit_rate_kbps_(FLAGS_bit_rate_kbps),
fec_(FLAGS_fec),
dtx_(FLAGS_dtx),
target_loss_rate_(FLAGS_reported_loss_rate) {
target_loss_rate_(FLAGS_reported_loss_rate),
sub_packets_(FLAGS_sub_packets) {
}
void NetEqOpusFecQualityTest::SetUp() {
void NetEqOpusQualityTest::SetUp() {
// If channels_ == 1, use Opus VOIP mode, otherwise, audio mode.
int app = channels_ == 1 ? 0 : 1;
// Create encoder memory.
WebRtcOpus_EncoderCreate(&opus_encoder_, channels_, app);
ASSERT_TRUE(opus_encoder_ != NULL);
ASSERT_TRUE(opus_encoder_);
// Create repacketizer.
repacketizer_ = opus_repacketizer_create();
ASSERT_TRUE(repacketizer_);
// Set bitrate.
EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, bit_rate_kbps_ * 1000));
if (fec_) {
@ -163,22 +187,36 @@ void NetEqOpusFecQualityTest::SetUp() {
NetEqQualityTest::SetUp();
}
void NetEqOpusFecQualityTest::TearDown() {
void NetEqOpusQualityTest::TearDown() {
// Free memory.
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
opus_repacketizer_destroy(repacketizer_);
NetEqQualityTest::TearDown();
}
int NetEqOpusFecQualityTest::EncodeBlock(int16_t* in_data,
int block_size_samples,
uint8_t* payload, int max_bytes) {
int value = WebRtcOpus_Encode(opus_encoder_, in_data,
block_size_samples, max_bytes,
payload);
int NetEqOpusQualityTest::EncodeBlock(int16_t* in_data,
int block_size_samples,
uint8_t* payload, int max_bytes) {
EXPECT_EQ(block_size_samples, sub_block_size_samples_ * sub_packets_);
int16_t* pointer = in_data;
int value;
opus_repacketizer_init(repacketizer_);
for (int idx = 0; idx < sub_packets_; idx++) {
value = WebRtcOpus_Encode(opus_encoder_, pointer, sub_block_size_samples_,
max_bytes, payload);
if (OPUS_OK != opus_repacketizer_cat(repacketizer_, payload, value)) {
opus_repacketizer_init(repacketizer_);
// If the repacketization fails, we discard this frame.
return 0;
}
pointer += sub_block_size_samples_ * channels_;
}
value = opus_repacketizer_out(repacketizer_, payload, max_bytes);
EXPECT_GE(value, 0);
return value;
}
TEST_F(NetEqOpusFecQualityTest, Test) {
TEST_F(NetEqOpusQualityTest, Test) {
Simulate(FLAGS_runtime_ms);
}