From ecbe0aa543575664fd2fa05e6d9ea5aa81c85720 Mon Sep 17 00:00:00 2001 From: "minyue@webrtc.org" Date: Mon, 12 Aug 2013 06:48:09 +0000 Subject: [PATCH] Added Opus stereo support TESTED=git try BUG=webrtc:1360 R=tina.legrand@webrtc.org, turaj@webrtc.org Review URL: https://webrtc-codereview.appspot.com/1868004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@4521 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../codecs/opus/interface/opus_interface.h | 4 +- .../audio_coding/neteq4/audio_decoder_impl.cc | 14 ++- .../neteq4/audio_decoder_unittest.cc | 88 ++++++++++++++++--- .../neteq4/interface/audio_decoder.h | 6 +- 4 files changed, 89 insertions(+), 23 deletions(-) diff --git a/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h b/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h index 15d42c223..1370aff06 100644 --- a/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h +++ b/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h @@ -33,7 +33,7 @@ int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst); * Input: * - inst : Encoder context * - audio_in : Input speech data buffer - * - samples : Samples in audio_in + * - samples : Samples per channel in audio_in * - length_encoded_buffer : Output buffer size * * Output: @@ -101,7 +101,7 @@ int16_t WebRtcOpus_DecoderInitSlave(OpusDecInst* inst); * always return 1 since we're not using Opus's * built-in DTX/CNG scheme) * - * Return value : >0 - Samples in decoded vector + * Return value : >0 - Samples per channel in decoded vector * -1 - Error */ int16_t WebRtcOpus_DecodeNew(OpusDecInst* inst, const uint8_t* encoded, diff --git a/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.cc b/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.cc index b80c829c3..6d25e42a3 100644 --- a/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.cc +++ b/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.cc @@ -393,19 +393,17 @@ AudioDecoderOpus::~AudioDecoderOpus() { int AudioDecoderOpus::Decode(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, SpeechType* speech_type) { int16_t temp_type = 1; // Default is speech. - assert(channels_ == 1); - // TODO(hlundin): Allow 2 channels when WebRtcOpus_Decode provides both - // channels interleaved. - int16_t ret = WebRtcOpus_Decode( - static_cast(state_), - const_cast(reinterpret_cast(encoded)), - static_cast(encoded_len), decoded, &temp_type); + int16_t ret = WebRtcOpus_DecodeNew(static_cast(state_), encoded, + static_cast(encoded_len), decoded, + &temp_type); + if (ret > 0) + ret *= channels_; // Return total number of samples. *speech_type = ConvertSpeechType(temp_type); return ret; } int AudioDecoderOpus::Init() { - return WebRtcOpus_DecoderInit(static_cast(state_)); + return WebRtcOpus_DecoderInitNew(static_cast(state_)); } int AudioDecoderOpus::PacketDuration(const uint8_t* encoded, diff --git a/webrtc/modules/audio_coding/neteq4/audio_decoder_unittest.cc b/webrtc/modules/audio_coding/neteq4/audio_decoder_unittest.cc index 0f0f64d70..e4885dfdd 100644 --- a/webrtc/modules/audio_coding/neteq4/audio_decoder_unittest.cc +++ b/webrtc/modules/audio_coding/neteq4/audio_decoder_unittest.cc @@ -92,10 +92,14 @@ class AudioDecoderTest : public ::testing::Test { // Encodes and decodes audio. The absolute difference between the input and // output is compared vs |tolerance|, and the mean-squared error is compared - // with |mse|. The encoded stream should contain |expected_bytes|. + // with |mse|. The encoded stream should contain |expected_bytes|. For stereo + // audio, the absolute difference between the two channels is compared vs + // |channel_diff_tolerance|. void EncodeDecodeTest(size_t expected_bytes, int tolerance, double mse, - int delay = 0) { + int delay = 0, int channel_diff_tolerance = 0) { ASSERT_GE(tolerance, 0) << "Test must define a tolerance >= 0"; + ASSERT_GE(channel_diff_tolerance, 0) << + "Test must define a channel_diff_tolerance >= 0"; size_t processed_samples = 0u; encoded_bytes_ = 0u; InitEncoder(); @@ -116,22 +120,19 @@ class AudioDecoderTest : public ::testing::Test { #if !(defined(_WIN32) && defined(WEBRTC_ARCH_64_BITS)) EXPECT_EQ(expected_bytes, encoded_bytes_); CompareInputOutput(processed_samples, tolerance, delay); + if (channels_ == 2) + CompareTwoChannels(processed_samples, channel_diff_tolerance); EXPECT_LE(MseInputOutput(processed_samples, delay), mse); #endif } // The absolute difference between the input and output (the first channel) is // compared vs |tolerance|. The parameter |delay| is used to correct for codec - // delays. If |channels_| is 2, the method verifies that the two channels are - // identical. + // delays. virtual void CompareInputOutput(size_t num_samples, int tolerance, int delay) const { assert(num_samples <= data_length_); for (unsigned int n = 0; n < num_samples - delay; ++n) { - if (channels_ == 2) { - ASSERT_EQ(decoded_[channels_ * n], decoded_[channels_ * n + 1]) << - "Stereo samples differ."; - } ASSERT_NEAR(input_[n], decoded_[channels_ * n + delay], tolerance) << "Exit test on first diff; n = " << n; DataLog::InsertCell("CodecTest", "input", input_[n]); @@ -140,6 +141,15 @@ class AudioDecoderTest : public ::testing::Test { } } + // The absolute difference between the two channels in a stereo is compared vs + // |tolerance|. + virtual void CompareTwoChannels(size_t num_samples, int tolerance) const { + assert(num_samples <= data_length_); + for (unsigned int n = 0; n < num_samples; ++n) + ASSERT_NEAR(decoded_[channels_ * n], decoded_[channels_ * n + 1], + tolerance) << "Stereo samples differ."; + } + // Calculates mean-squared error between input and output (the first channel). // The parameter |delay| is used to correct for codec delays. virtual double MseInputOutput(size_t num_samples, int delay) const { @@ -162,13 +172,14 @@ class AudioDecoderTest : public ::testing::Test { int16_t* output2 = decoded_ + frame_size_; InitEncoder(); size_t enc_len = EncodeFrame(input_, frame_size_, encoded); + size_t dec_len; // Copy payload since iSAC fix destroys it during decode. // Issue: http://code.google.com/p/webrtc/issues/detail?id=845. // TODO(hlundin): Remove if the iSAC bug gets fixed. memcpy(encoded_copy, encoded, enc_len); AudioDecoder::SpeechType speech_type1, speech_type2; EXPECT_EQ(0, decoder_->Init()); - size_t dec_len = decoder_->Decode(encoded, enc_len, output1, &speech_type1); + dec_len = decoder_->Decode(encoded, enc_len, output1, &speech_type1); EXPECT_EQ(frame_size_ * channels_, dec_len); // Re-init decoder and decode again. EXPECT_EQ(0, decoder_->Init()); @@ -551,6 +562,51 @@ class AudioDecoderOpusTest : public AudioDecoderTest { OpusEncInst* encoder_; }; +class AudioDecoderOpusStereoTest : public AudioDecoderTest { + protected: + AudioDecoderOpusStereoTest() : AudioDecoderTest() { + channels_ = 2; + frame_size_ = 320; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderOpus(kDecoderOpus_2ch); + assert(decoder_); + WebRtcOpus_EncoderCreate(&encoder_, 2); + } + + ~AudioDecoderOpusStereoTest() { + WebRtcOpus_EncoderFree(encoder_); + } + + virtual void InitEncoder() {} + + virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, + uint8_t* output) { + // Create stereo by duplicating each sample in |input|. + const int input_stereo_samples = input_len_samples * 2; + int16_t* input_stereo = new int16_t[input_stereo_samples]; + for (size_t i = 0; i < input_len_samples; i++) + input_stereo[i * 2] = input_stereo[i * 2 + 1] = input[i]; + // Upsample from 32 to 48 kHz. + Resampler rs; + rs.Reset(32000, 48000, kResamplerSynchronousStereo); + const int max_resamp_len_samples = input_stereo_samples * 3 / 2; + int16_t* resamp_input = new int16_t[max_resamp_len_samples]; + int resamp_len_samples; + EXPECT_EQ(0, rs.Push(input_stereo, input_stereo_samples, resamp_input, + max_resamp_len_samples, resamp_len_samples)); + EXPECT_EQ(max_resamp_len_samples, resamp_len_samples); + int enc_len_bytes = + WebRtcOpus_Encode(encoder_, resamp_input, resamp_len_samples / 2, + data_length_, output); + EXPECT_GT(enc_len_bytes, 0); + delete [] resamp_input; + delete [] input_stereo; + return enc_len_bytes; + } + + OpusEncInst* encoder_; +}; + TEST_F(AudioDecoderPcmUTest, EncodeDecode) { int tolerance = 251; double mse = 1734.0; @@ -651,10 +707,11 @@ TEST_F(AudioDecoderG722StereoTest, CreateAndDestroy) { TEST_F(AudioDecoderG722StereoTest, EncodeDecode) { int tolerance = 6176; + int channel_diff_tolerance = 0; double mse = 238630.0; int delay = 22; // Delay from input to output. EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderG722_2ch)); - EncodeDecodeTest(data_length_, tolerance, mse, delay); + EncodeDecodeTest(data_length_, tolerance, mse, delay, channel_diff_tolerance); ReInitTest(); EXPECT_FALSE(decoder_->HasDecodePlc()); } @@ -669,6 +726,17 @@ TEST_F(AudioDecoderOpusTest, EncodeDecode) { EXPECT_FALSE(decoder_->HasDecodePlc()); } +TEST_F(AudioDecoderOpusStereoTest, EncodeDecode) { + int tolerance = 6176; + int channel_diff_tolerance = 0; + double mse = 238630.0; + int delay = 22; // Delay from input to output. + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderOpus_2ch)); + EncodeDecodeTest(1383, tolerance, mse, delay, channel_diff_tolerance); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + TEST(AudioDecoder, CodecSampleRateHz) { EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCMu)); EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCMa)); diff --git a/webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h b/webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h index 6e6b8e85e..f89f887f9 100644 --- a/webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h +++ b/webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h @@ -72,9 +72,9 @@ class AudioDecoder { virtual ~AudioDecoder() {} // Decodes |encode_len| bytes from |encoded| and writes the result in - // |decoded|. The number of samples produced is in the return value. If the - // decoder produced comfort noise, |speech_type| is set to kComfortNoise, - // otherwise it is kSpeech. + // |decoded|. The number of samples from all channels produced is in + // the return value. If the decoder produced comfort noise, |speech_type| + // is set to kComfortNoise, otherwise it is kSpeech. virtual int Decode(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, SpeechType* speech_type) = 0;