From 4321f175f1d2e6cfe1e56ece176c258f17101e83 Mon Sep 17 00:00:00 2001 From: "minyue@webrtc.org" Date: Tue, 9 Dec 2014 13:27:39 +0000 Subject: [PATCH] Adding DTX to WebRTC Opus wrapper This is a step toward adding Opus DTX support in WebRTC. Note that opus_encode() returns 1 byte in case of DTX, then the packet does not need to be transmitted. See https://mf4.xiph.org/jenkins/view/opus/job/opus/ws/doc/html/group__opus__encoder.html We transmit the first 1-byte packet to let decoder be in-sync BUG=webrtc:1014 R=henrik.lundin@webrtc.org Review URL: https://webrtc-codereview.appspot.com/13219004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7846 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../codecs/opus/interface/opus_interface.h | 28 +- .../audio_coding/codecs/opus/opus_inst.h | 2 + .../audio_coding/codecs/opus/opus_interface.c | 108 ++++-- .../audio_coding/codecs/opus/opus_unittest.cc | 352 ++++++++++++++---- .../neteq/test/neteq_opus_fec_quality_test.cc | 8 +- 5 files changed, 390 insertions(+), 108 deletions(-) diff --git a/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h b/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h index 38ff00dbd..d788af7ed 100644 --- a/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h +++ b/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h @@ -39,7 +39,7 @@ int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst); * Output: * - encoded : Output compressed data buffer * - * Return value : >0 - Length (in bytes) of coded data + * Return value : >=0 - Length (in bytes) of coded data * -1 - Error */ int16_t WebRtcOpus_Encode(OpusEncInst* inst, @@ -130,6 +130,32 @@ int16_t WebRtcOpus_EnableFec(OpusEncInst* inst); */ int16_t WebRtcOpus_DisableFec(OpusEncInst* inst); +/**************************************************************************** + * WebRtcOpus_EnableDtx() + * + * This function enables Opus internal DTX for encoding. + * + * Input: + * - inst : Encoder context + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst); + +/**************************************************************************** + * WebRtcOpus_DisableDtx() + * + * This function disables Opus internal DTX for encoding. + * + * Input: + * - inst : Encoder context + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst); + /* * WebRtcOpus_SetComplexity(...) * diff --git a/webrtc/modules/audio_coding/codecs/opus/opus_inst.h b/webrtc/modules/audio_coding/codecs/opus/opus_inst.h index 4597ab8eb..373db392a 100644 --- a/webrtc/modules/audio_coding/codecs/opus/opus_inst.h +++ b/webrtc/modules/audio_coding/codecs/opus/opus_inst.h @@ -15,12 +15,14 @@ struct WebRtcOpusEncInst { OpusEncoder* encoder; + int in_dtx_mode; }; struct WebRtcOpusDecInst { OpusDecoder* decoder; int prev_decoded_samples; int channels; + int in_dtx_mode; }; diff --git a/webrtc/modules/audio_coding/codecs/opus/opus_interface.c b/webrtc/modules/audio_coding/codecs/opus/opus_interface.c index ba7fe13ac..1b9986449 100644 --- a/webrtc/modules/audio_coding/codecs/opus/opus_interface.c +++ b/webrtc/modules/audio_coding/codecs/opus/opus_interface.c @@ -43,6 +43,7 @@ int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, int32_t channels) { state->encoder = opus_encoder_create(48000, channels, application, &error); + state->in_dtx_mode = 0; if (error == OPUS_OK && state->encoder != NULL) { *inst = state; return 0; @@ -80,9 +81,21 @@ int16_t WebRtcOpus_Encode(OpusEncInst* inst, encoded, length_encoded_buffer); - if (res > 0) { + if (res == 1) { + // Indicates DTX since the packet has nothing but a header. In principle, + // there is no need to send this packet. However, we do transmit the first + // occurrence to let the decoder know that the encoder enters DTX mode. + if (inst->in_dtx_mode) { + return 0; + } else { + inst->in_dtx_mode = 1; + return 1; + } + } else if (res > 1) { + inst->in_dtx_mode = 0; return res; } + return -1; } @@ -140,6 +153,22 @@ int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) { } } +int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) { + if (inst) { + return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(1)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) { + if (inst) { + return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(0)); + } else { + return -1; + } +} + int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) { if (inst) { return opus_encoder_ctl(inst->encoder, OPUS_SET_COMPLEXITY(complexity)); @@ -165,6 +194,7 @@ int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, int channels) { /* Creation of memory all ok. */ state->channels = channels; state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize; + state->in_dtx_mode = 0; *inst = state; return 0; } @@ -195,53 +225,61 @@ int WebRtcOpus_DecoderChannels(OpusDecInst* inst) { int16_t WebRtcOpus_DecoderInit(OpusDecInst* inst) { int error = opus_decoder_ctl(inst->decoder, OPUS_RESET_STATE); if (error == OPUS_OK) { + inst->in_dtx_mode = 0; return 0; } return -1; } +/* For decoder to determine if it is to output speech or comfort noise. */ +static int16_t DetermineAudioType(OpusDecInst* inst, int16_t encoded_bytes) { + // Audio type becomes comfort noise if |encoded_byte| is 1 and keeps + // to be so if the following |encoded_byte| are 0 or 1. + if (encoded_bytes == 0 && inst->in_dtx_mode) { + return 2; // Comfort noise. + } else if (encoded_bytes == 1) { + inst->in_dtx_mode = 1; + return 2; // Comfort noise. + } else { + inst->in_dtx_mode = 0; + return 0; // Speech. + } +} + /* |frame_size| is set to maximum Opus frame size in the normal case, and * is set to the number of samples needed for PLC in case of losses. * It is up to the caller to make sure the value is correct. */ -static int DecodeNative(OpusDecoder* inst, const uint8_t* encoded, +static int DecodeNative(OpusDecInst* inst, const uint8_t* encoded, int16_t encoded_bytes, int frame_size, - int16_t* decoded, int16_t* audio_type) { - int res = opus_decode( - inst, encoded, encoded_bytes, (opus_int16*)decoded, frame_size, 0); + int16_t* decoded, int16_t* audio_type, int decode_fec) { + int res = opus_decode(inst->decoder, encoded, encoded_bytes, + (opus_int16*)decoded, frame_size, decode_fec); - /* TODO(tlegrand): set to DTX for zero-length packets? */ - *audio_type = 0; + if (res <= 0) + return -1; - if (res > 0) { - return res; - } - return -1; -} + *audio_type = DetermineAudioType(inst, encoded_bytes); -static int DecodeFec(OpusDecoder* inst, const uint8_t* encoded, - int16_t encoded_bytes, int frame_size, - int16_t* decoded, int16_t* audio_type) { - int res = opus_decode( - inst, encoded, encoded_bytes, (opus_int16*)decoded, frame_size, 1); - - /* TODO(tlegrand): set to DTX for zero-length packets? */ - *audio_type = 0; - - if (res > 0) { - return res; - } - return -1; + return res; } int16_t WebRtcOpus_Decode(OpusDecInst* inst, const uint8_t* encoded, int16_t encoded_bytes, int16_t* decoded, int16_t* audio_type) { - int decoded_samples = DecodeNative(inst->decoder, - encoded, - encoded_bytes, - kWebRtcOpusMaxFrameSizePerChannel, - decoded, - audio_type); + int decoded_samples; + + if (encoded_bytes == 0) { + *audio_type = DetermineAudioType(inst, encoded_bytes); + decoded_samples = WebRtcOpus_DecodePlc(inst, decoded, 1); + } else { + decoded_samples = DecodeNative(inst, + encoded, + encoded_bytes, + kWebRtcOpusMaxFrameSizePerChannel, + decoded, + audio_type, + 0); + } if (decoded_samples < 0) { return -1; } @@ -264,8 +302,8 @@ int16_t WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded, plc_samples = number_of_lost_frames * inst->prev_decoded_samples; plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ? plc_samples : kWebRtcOpusMaxFrameSizePerChannel; - decoded_samples = DecodeNative(inst->decoder, NULL, 0, plc_samples, - decoded, &audio_type); + decoded_samples = DecodeNative(inst, NULL, 0, plc_samples, + decoded, &audio_type, 0); if (decoded_samples < 0) { return -1; } @@ -285,8 +323,8 @@ int16_t WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded, fec_samples = opus_packet_get_samples_per_frame(encoded, 48000); - decoded_samples = DecodeFec(inst->decoder, encoded, encoded_bytes, - fec_samples, decoded, audio_type); + decoded_samples = DecodeNative(inst, encoded, encoded_bytes, + fec_samples, decoded, audio_type, 1); if (decoded_samples < 0) { return -1; } diff --git a/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc b/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc index 9c34f5169..433bbbc0d 100644 --- a/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc +++ b/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc @@ -12,34 +12,50 @@ #include "testing/gtest/include/gtest/gtest.h" #include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h" #include "webrtc/modules/audio_coding/codecs/opus/opus_inst.h" +#include "webrtc/modules/audio_coding/neteq/tools/audio_loop.h" #include "webrtc/test/testsupport/fileutils.h" namespace webrtc { -// Number of samples in a 60 ms stereo frame, sampled at 48 kHz. -const int kOpusMaxFrameSamples = 48 * 60 * 2; +using test::AudioLoop; + // Maximum number of bytes in output bitstream. const size_t kMaxBytes = 1000; +// Sample rate of Opus. +const int kOpusRateKhz = 48; // Number of samples-per-channel in a 20 ms frame, sampled at 48 kHz. -const int kOpus20msFrameSamples = 48 * 20; +const int kOpus20msFrameSamples = kOpusRateKhz * 20; // Number of samples-per-channel in a 10 ms frame, sampled at 48 kHz. -const int kOpus10msFrameSamples = 48 * 10; +const int kOpus10msFrameSamples = kOpusRateKhz * 10; class OpusTest : public ::testing::Test { protected: OpusTest(); - virtual void SetUp(); void TestSetMaxPlaybackRate(opus_int32 expect, int32_t set); + void TestDtxEffect(bool dtx); + + // Prepare |speech_data_| for encoding, read from a hard-coded file. + // After preparation, |speech_data_.GetNextBlock()| returns a pointer to a + // block of |block_length_ms| milliseconds. The data is looped every + // |loop_length_ms| milliseconds. + void PrepareSpeechData(int channel, int block_length_ms, int loop_length_ms); + + int EncodeDecode(WebRtcOpusEncInst* encoder, + const int16_t* input_audio, + const int input_samples, + WebRtcOpusDecInst* decoder, + int16_t* output_audio, + int16_t* audio_type); WebRtcOpusEncInst* opus_mono_encoder_; WebRtcOpusEncInst* opus_stereo_encoder_; WebRtcOpusDecInst* opus_mono_decoder_; WebRtcOpusDecInst* opus_stereo_decoder_; - int16_t speech_data_[kOpusMaxFrameSamples]; - int16_t output_data_[kOpusMaxFrameSamples]; + AudioLoop speech_data_; uint8_t bitstream_[kMaxBytes]; + int encoded_bytes_; }; OpusTest::OpusTest() @@ -49,17 +65,16 @@ OpusTest::OpusTest() opus_stereo_decoder_(NULL) { } -void OpusTest::SetUp() { - FILE* input_file; +void OpusTest::PrepareSpeechData(int channel, int block_length_ms, + int loop_length_ms) { const std::string file_name = webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", "pcm"); - input_file = fopen(file_name.c_str(), "rb"); - ASSERT_TRUE(input_file != NULL); - ASSERT_EQ(kOpusMaxFrameSamples, - static_cast(fread(speech_data_, sizeof(int16_t), - kOpusMaxFrameSamples, input_file))); - fclose(input_file); - input_file = NULL; + if (loop_length_ms < block_length_ms) { + loop_length_ms = block_length_ms; + } + EXPECT_TRUE(speech_data_.Init(file_name, + loop_length_ms * kOpusRateKhz * channel, + block_length_ms * kOpusRateKhz * channel)); } void OpusTest::TestSetMaxPlaybackRate(opus_int32 expect, int32_t set) { @@ -76,6 +91,155 @@ void OpusTest::TestSetMaxPlaybackRate(opus_int32 expect, int32_t set) { EXPECT_EQ(expect, bandwidth); } +int OpusTest::EncodeDecode(WebRtcOpusEncInst* encoder, + const int16_t* input_audio, + const int input_samples, + WebRtcOpusDecInst* decoder, + int16_t* output_audio, + int16_t* audio_type) { + encoded_bytes_ = WebRtcOpus_Encode(encoder, + input_audio, + input_samples, kMaxBytes, + bitstream_); + return WebRtcOpus_Decode(decoder, bitstream_, + encoded_bytes_, output_audio, + audio_type); +} + +// Test if encoder/decoder can enter DTX mode properly and do not enter DTX when +// they should not. This test is signal dependent. +void OpusTest::TestDtxEffect(bool dtx) { + PrepareSpeechData(1, 20, 2000); + + // Create encoder memory. + EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_mono_encoder_, 1)); + EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_mono_decoder_, 1)); + + // Set bitrate. + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_mono_encoder_, 32000)); + + // Set input audio as silence. + int16_t silence[kOpus20msFrameSamples] = {0}; + + // Setting DTX. + EXPECT_EQ(0, dtx ? WebRtcOpus_EnableDtx(opus_mono_encoder_) : + WebRtcOpus_DisableDtx(opus_mono_encoder_)); + + int16_t audio_type; + int16_t output_data_decode[kOpus20msFrameSamples]; + + for (int i = 0; i < 100; ++i) { + EXPECT_EQ(kOpus20msFrameSamples, + EncodeDecode(opus_mono_encoder_, speech_data_.GetNextBlock(), + kOpus20msFrameSamples, opus_mono_decoder_, + output_data_decode, &audio_type)); + // If not DTX, it should never enter DTX mode. If DTX, we do not care since + // whether it enters DTX depends on the signal type. + if (!dtx) { + EXPECT_GT(encoded_bytes_, 1); + EXPECT_EQ(0, opus_mono_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_mono_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + } + } + + // We input some silent segments. In DTX mode, the encoder will stop sending. + // However, DTX may happen after a while. + for (int i = 0; i < 21; ++i) { + EXPECT_EQ(kOpus20msFrameSamples, + EncodeDecode(opus_mono_encoder_, silence, + kOpus20msFrameSamples, opus_mono_decoder_, + output_data_decode, &audio_type)); + if (!dtx) { + EXPECT_GT(encoded_bytes_, 1); + EXPECT_EQ(0, opus_mono_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_mono_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + } + } + + // For this input signal, DTX happens now. + EXPECT_EQ(kOpus20msFrameSamples, + EncodeDecode(opus_mono_encoder_, silence, + kOpus20msFrameSamples, opus_mono_decoder_, + output_data_decode, &audio_type)); + if (dtx) { + EXPECT_EQ(1, encoded_bytes_); // Send 1 byte. + EXPECT_EQ(1, opus_mono_encoder_->in_dtx_mode); + EXPECT_EQ(1, opus_mono_decoder_->in_dtx_mode); + EXPECT_EQ(2, audio_type); // Comfort noise. + } else { + EXPECT_GT(encoded_bytes_, 1); + EXPECT_EQ(0, opus_mono_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_mono_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + } + + // DTX mode is maintained 400 ms. + for (int i = 0; i < 20; ++i) { + EXPECT_EQ(kOpus20msFrameSamples, + EncodeDecode(opus_mono_encoder_, silence, + kOpus20msFrameSamples, opus_mono_decoder_, + output_data_decode, &audio_type)); + if (dtx) { + EXPECT_EQ(0, encoded_bytes_); // Send 0 byte. + EXPECT_EQ(1, opus_mono_encoder_->in_dtx_mode); + EXPECT_EQ(1, opus_mono_decoder_->in_dtx_mode); + EXPECT_EQ(2, audio_type); // Comfort noise. + } else { + EXPECT_GT(encoded_bytes_, 1); + EXPECT_EQ(0, opus_mono_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_mono_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + } + } + + // Quit DTX after 400 ms + EXPECT_EQ(kOpus20msFrameSamples, + EncodeDecode(opus_mono_encoder_, silence, + kOpus20msFrameSamples, opus_mono_decoder_, + output_data_decode, &audio_type)); + + EXPECT_GT(encoded_bytes_, 1); + EXPECT_EQ(0, opus_mono_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_mono_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + + // Enters DTX again immediately. + EXPECT_EQ(kOpus20msFrameSamples, + EncodeDecode(opus_mono_encoder_, silence, + kOpus20msFrameSamples, opus_mono_decoder_, + output_data_decode, &audio_type)); + if (dtx) { + EXPECT_EQ(1, encoded_bytes_); // Send 1 byte. + EXPECT_EQ(1, opus_mono_encoder_->in_dtx_mode); + EXPECT_EQ(1, opus_mono_decoder_->in_dtx_mode); + EXPECT_EQ(2, audio_type); // Comfort noise. + } else { + EXPECT_GT(encoded_bytes_, 1); + EXPECT_EQ(0, opus_mono_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_mono_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + } + + silence[0] = 10000; + if (dtx) { + // Verify that encoder/decoder can jump out from DTX mode. + EXPECT_EQ(kOpus20msFrameSamples, + EncodeDecode(opus_mono_encoder_, silence, + kOpus20msFrameSamples, opus_mono_decoder_, + output_data_decode, &audio_type)); + EXPECT_GT(encoded_bytes_, 1); + EXPECT_EQ(0, opus_mono_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_mono_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + } + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_mono_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_mono_decoder_)); +} + // Test failing Create. TEST_F(OpusTest, OpusCreateFail) { // Test to see that an invalid pointer is caught. @@ -110,6 +274,8 @@ TEST_F(OpusTest, OpusCreateFree) { } TEST_F(OpusTest, OpusEncodeDecodeMono) { + PrepareSpeechData(1, 20, 20); + // Create encoder memory. EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_mono_encoder_, 1)); EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_mono_decoder_, 1)); @@ -121,16 +287,12 @@ TEST_F(OpusTest, OpusEncodeDecodeMono) { EXPECT_EQ(1, WebRtcOpus_DecoderChannels(opus_mono_decoder_)); // Encode & decode. - int16_t encoded_bytes; int16_t audio_type; - int16_t output_data_decode[kOpusMaxFrameSamples]; - encoded_bytes = WebRtcOpus_Encode(opus_mono_encoder_, speech_data_, - kOpus20msFrameSamples, kMaxBytes, - bitstream_); + int16_t output_data_decode[kOpus20msFrameSamples]; EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_Decode(opus_mono_decoder_, bitstream_, - encoded_bytes, output_data_decode, - &audio_type)); + EncodeDecode(opus_mono_encoder_, speech_data_.GetNextBlock(), + kOpus20msFrameSamples, opus_mono_decoder_, + output_data_decode, &audio_type)); // Free memory. EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_mono_encoder_)); @@ -138,6 +300,8 @@ TEST_F(OpusTest, OpusEncodeDecodeMono) { } TEST_F(OpusTest, OpusEncodeDecodeStereo) { + PrepareSpeechData(2, 20, 20); + // Create encoder memory. EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_stereo_encoder_, 2)); EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_stereo_decoder_, 2)); @@ -149,16 +313,12 @@ TEST_F(OpusTest, OpusEncodeDecodeStereo) { EXPECT_EQ(2, WebRtcOpus_DecoderChannels(opus_stereo_decoder_)); // Encode & decode. - int16_t encoded_bytes; int16_t audio_type; - int16_t output_data_decode[kOpusMaxFrameSamples]; - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, - kOpus20msFrameSamples, kMaxBytes, - bitstream_); + int16_t output_data_decode[kOpus20msFrameSamples * 2]; EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_Decode(opus_stereo_decoder_, bitstream_, - encoded_bytes, output_data_decode, - &audio_type)); + EncodeDecode(opus_stereo_encoder_, speech_data_.GetNextBlock(), + kOpus20msFrameSamples, opus_stereo_decoder_, + output_data_decode, &audio_type)); // Free memory. EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_stereo_encoder_)); @@ -207,28 +367,25 @@ TEST_F(OpusTest, OpusSetComplexity) { // Encode and decode one frame (stereo), initialize the decoder and // decode once more. TEST_F(OpusTest, OpusDecodeInit) { + PrepareSpeechData(2, 20, 20); + // Create encoder memory. EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_stereo_encoder_, 2)); EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_stereo_decoder_, 2)); // Encode & decode. - int16_t encoded_bytes; int16_t audio_type; - int16_t output_data_decode[kOpusMaxFrameSamples]; - - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, - kOpus20msFrameSamples, kMaxBytes, - bitstream_); + int16_t output_data_decode[kOpus20msFrameSamples * 2]; EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_Decode(opus_stereo_decoder_, bitstream_, - encoded_bytes, output_data_decode, - &audio_type)); + EncodeDecode(opus_stereo_encoder_, speech_data_.GetNextBlock(), + kOpus20msFrameSamples, opus_stereo_decoder_, + output_data_decode, &audio_type)); EXPECT_EQ(0, WebRtcOpus_DecoderInit(opus_stereo_decoder_)); EXPECT_EQ(kOpus20msFrameSamples, WebRtcOpus_Decode(opus_stereo_decoder_, bitstream_, - encoded_bytes, output_data_decode, + encoded_bytes_, output_data_decode, &audio_type)); // Free memory. @@ -255,6 +412,61 @@ TEST_F(OpusTest, OpusEnableDisableFec) { EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_stereo_encoder_)); } +TEST_F(OpusTest, OpusEnableDisableDtx) { + // Test without creating encoder memory. + EXPECT_EQ(-1, WebRtcOpus_EnableDtx(opus_mono_encoder_)); + EXPECT_EQ(-1, WebRtcOpus_DisableDtx(opus_stereo_encoder_)); + + // Create encoder memory, try with different bitrates. + EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_mono_encoder_, 1)); + EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_stereo_encoder_, 2)); + + opus_int32 dtx; + + // DTX is off by default. + opus_encoder_ctl(opus_mono_encoder_->encoder, + OPUS_GET_DTX(&dtx)); + EXPECT_EQ(0, dtx); + + opus_encoder_ctl(opus_stereo_encoder_->encoder, + OPUS_GET_DTX(&dtx)); + EXPECT_EQ(0, dtx); + + // Test to enable DTX. + EXPECT_EQ(0, WebRtcOpus_EnableDtx(opus_mono_encoder_)); + opus_encoder_ctl(opus_mono_encoder_->encoder, + OPUS_GET_DTX(&dtx)); + EXPECT_EQ(1, dtx); + + EXPECT_EQ(0, WebRtcOpus_EnableDtx(opus_stereo_encoder_)); + opus_encoder_ctl(opus_stereo_encoder_->encoder, + OPUS_GET_DTX(&dtx)); + EXPECT_EQ(1, dtx); + + // Test to disable DTX. + EXPECT_EQ(0, WebRtcOpus_DisableDtx(opus_mono_encoder_)); + opus_encoder_ctl(opus_mono_encoder_->encoder, + OPUS_GET_DTX(&dtx)); + EXPECT_EQ(0, dtx); + + EXPECT_EQ(0, WebRtcOpus_DisableDtx(opus_stereo_encoder_)); + opus_encoder_ctl(opus_stereo_encoder_->encoder, + OPUS_GET_DTX(&dtx)); + EXPECT_EQ(0, dtx); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_mono_encoder_)); + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_stereo_encoder_)); +} + +TEST_F(OpusTest, OpusDtxOff) { + TestDtxEffect(false); +} + +TEST_F(OpusTest, OpusDtxOn) { + TestDtxEffect(true); +} + TEST_F(OpusTest, OpusSetPacketLossRate) { // Test without creating encoder memory. EXPECT_EQ(-1, WebRtcOpus_SetPacketLossRate(opus_mono_encoder_, 50)); @@ -303,6 +515,8 @@ TEST_F(OpusTest, OpusSetMaxPlaybackRate) { // PLC in mono mode. TEST_F(OpusTest, OpusDecodePlcMono) { + PrepareSpeechData(1, 20, 20); + // Create encoder memory. EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_mono_encoder_, 1)); EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_mono_decoder_, 1)); @@ -314,19 +528,15 @@ TEST_F(OpusTest, OpusDecodePlcMono) { EXPECT_EQ(1, WebRtcOpus_DecoderChannels(opus_mono_decoder_)); // Encode & decode. - int16_t encoded_bytes; int16_t audio_type; - int16_t output_data_decode[kOpusMaxFrameSamples]; - encoded_bytes = WebRtcOpus_Encode(opus_mono_encoder_, speech_data_, - kOpus20msFrameSamples, kMaxBytes, - bitstream_); + int16_t output_data_decode[kOpus20msFrameSamples]; EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_Decode(opus_mono_decoder_, bitstream_, - encoded_bytes, output_data_decode, - &audio_type)); + EncodeDecode(opus_mono_encoder_, speech_data_.GetNextBlock(), + kOpus20msFrameSamples, opus_mono_decoder_, + output_data_decode, &audio_type)); // Call decoder PLC. - int16_t plc_buffer[kOpusMaxFrameSamples]; + int16_t plc_buffer[kOpus20msFrameSamples]; EXPECT_EQ(kOpus20msFrameSamples, WebRtcOpus_DecodePlc(opus_mono_decoder_, plc_buffer, 1)); @@ -337,6 +547,8 @@ TEST_F(OpusTest, OpusDecodePlcMono) { // PLC in stereo mode. TEST_F(OpusTest, OpusDecodePlcStereo) { + PrepareSpeechData(2, 20, 20); + // Create encoder memory. EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_stereo_encoder_, 2)); EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_stereo_decoder_, 2)); @@ -348,19 +560,15 @@ TEST_F(OpusTest, OpusDecodePlcStereo) { EXPECT_EQ(2, WebRtcOpus_DecoderChannels(opus_stereo_decoder_)); // Encode & decode. - int16_t encoded_bytes; int16_t audio_type; - int16_t output_data_decode[kOpusMaxFrameSamples]; - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, - kOpus20msFrameSamples, kMaxBytes, - bitstream_); + int16_t output_data_decode[kOpus20msFrameSamples * 2]; EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_Decode(opus_stereo_decoder_, bitstream_, - encoded_bytes, output_data_decode, - &audio_type)); + EncodeDecode(opus_stereo_encoder_, speech_data_.GetNextBlock(), + kOpus20msFrameSamples, opus_stereo_decoder_, + output_data_decode, &audio_type)); // Call decoder PLC. - int16_t plc_buffer[kOpusMaxFrameSamples]; + int16_t plc_buffer[kOpus20msFrameSamples * 2]; EXPECT_EQ(kOpus20msFrameSamples, WebRtcOpus_DecodePlc(opus_stereo_decoder_, plc_buffer, 1)); @@ -371,27 +579,29 @@ TEST_F(OpusTest, OpusDecodePlcStereo) { // Duration estimation. TEST_F(OpusTest, OpusDurationEstimation) { + PrepareSpeechData(2, 20, 20); + // Create. EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_stereo_encoder_, 2)); EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_stereo_decoder_, 2)); - int16_t encoded_bytes; - - // 10 ms. - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, - kOpus10msFrameSamples, kMaxBytes, - bitstream_); + // 10 ms. We use only first 10 ms of a 20 ms block. + encoded_bytes_ = WebRtcOpus_Encode(opus_stereo_encoder_, + speech_data_.GetNextBlock(), + kOpus10msFrameSamples, kMaxBytes, + bitstream_); EXPECT_EQ(kOpus10msFrameSamples, WebRtcOpus_DurationEst(opus_stereo_decoder_, bitstream_, - encoded_bytes)); + encoded_bytes_)); // 20 ms - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, - kOpus20msFrameSamples, kMaxBytes, - bitstream_); + encoded_bytes_ = WebRtcOpus_Encode(opus_stereo_encoder_, + speech_data_.GetNextBlock(), + kOpus20msFrameSamples, kMaxBytes, + bitstream_); EXPECT_EQ(kOpus20msFrameSamples, WebRtcOpus_DurationEst(opus_stereo_decoder_, bitstream_, - encoded_bytes)); + encoded_bytes_)); // Free memory. EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_stereo_encoder_)); diff --git a/webrtc/modules/audio_coding/neteq/test/neteq_opus_fec_quality_test.cc b/webrtc/modules/audio_coding/neteq/test/neteq_opus_fec_quality_test.cc index dee99b874..eddbffd89 100644 --- a/webrtc/modules/audio_coding/neteq/test/neteq_opus_fec_quality_test.cc +++ b/webrtc/modules/audio_coding/neteq/test/neteq_opus_fec_quality_test.cc @@ -111,6 +111,8 @@ static const bool runtime_dummy = DEFINE_bool(fec, true, "Whether to enable FEC for encoding."); +DEFINE_bool(dtx, true, "Whether to enable DTX for encoding."); + class NetEqOpusFecQualityTest : public NetEqQualityTest { protected: NetEqOpusFecQualityTest(); @@ -123,6 +125,7 @@ class NetEqOpusFecQualityTest : public NetEqQualityTest { int channels_; int bit_rate_kbps_; bool fec_; + bool dtx_; int target_loss_rate_; }; @@ -137,6 +140,7 @@ NetEqOpusFecQualityTest::NetEqOpusFecQualityTest() channels_(FLAGS_channels), bit_rate_kbps_(FLAGS_bit_rate_kbps), fec_(FLAGS_fec), + dtx_(FLAGS_dtx), target_loss_rate_(FLAGS_reported_loss_rate) { } @@ -149,6 +153,9 @@ void NetEqOpusFecQualityTest::SetUp() { if (fec_) { EXPECT_EQ(0, WebRtcOpus_EnableFec(opus_encoder_)); } + if (dtx_) { + EXPECT_EQ(0, WebRtcOpus_EnableDtx(opus_encoder_)); + } EXPECT_EQ(0, WebRtcOpus_SetPacketLossRate(opus_encoder_, target_loss_rate_)); NetEqQualityTest::SetUp(); @@ -166,7 +173,6 @@ int NetEqOpusFecQualityTest::EncodeBlock(int16_t* in_data, int value = WebRtcOpus_Encode(opus_encoder_, in_data, block_size_samples, max_bytes, payload); - EXPECT_GT(value, 0); return value; }