diff --git a/webrtc/modules/audio_coding/neteq4/audio_decoder.cc b/webrtc/modules/audio_coding/neteq4/audio_decoder.cc index 5a745ff13..3a53a1905 100644 --- a/webrtc/modules/audio_coding/neteq4/audio_decoder.cc +++ b/webrtc/modules/audio_coding/neteq4/audio_decoder.cc @@ -45,6 +45,7 @@ bool AudioDecoder::CodecSupported(NetEqDecoder codec_type) { #endif #ifdef WEBRTC_CODEC_G722 case kDecoderG722: + case kDecoderG722_2ch: #endif #ifdef WEBRTC_CODEC_OPUS case kDecoderOpus: @@ -91,6 +92,7 @@ int AudioDecoder::CodecSampleRateHz(NetEqDecoder codec_type) { #endif #ifdef WEBRTC_CODEC_G722 case kDecoderG722: + case kDecoderG722_2ch: #endif case kDecoderCNGwb: { return 16000; diff --git a/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.cc b/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.cc index 1d000ff06..248556aa2 100644 --- a/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.cc +++ b/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.cc @@ -12,6 +12,8 @@ #include +#include // memmove + #include "webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h" #include "webrtc/modules/audio_coding/codecs/g711/include/g711_interface.h" #ifdef WEBRTC_CODEC_G722 @@ -287,6 +289,90 @@ int AudioDecoderG722::PacketDuration(const uint8_t* encoded, // 1/2 encoded byte per sample per channel. return 2 * encoded_len / channels_; } + +AudioDecoderG722Stereo::AudioDecoderG722Stereo() + : AudioDecoderG722(), + state_left_(state_), // Base member |state_| is used for left channel. + state_right_(NULL) { + channels_ = 2; + // |state_left_| already created by the base class AudioDecoderG722. + WebRtcG722_CreateDecoder(reinterpret_cast(&state_right_)); +} + +AudioDecoderG722Stereo::~AudioDecoderG722Stereo() { + // |state_left_| will be freed by the base class AudioDecoderG722. + WebRtcG722_FreeDecoder(static_cast(state_right_)); +} + +int AudioDecoderG722Stereo::Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type) { + int16_t temp_type = 1; // Default is speech. + // De-interleave the bit-stream into two separate payloads. + uint8_t* encoded_deinterleaved = new uint8_t[encoded_len]; + SplitStereoPacket(encoded, encoded_len, encoded_deinterleaved); + // Decode left and right. + int16_t ret = WebRtcG722_Decode( + static_cast(state_left_), + reinterpret_cast(encoded_deinterleaved), + static_cast(encoded_len / 2), decoded, &temp_type); + if (ret >= 0) { + int decoded_len = ret; + ret = WebRtcG722_Decode( + static_cast(state_right_), + reinterpret_cast(&encoded_deinterleaved[encoded_len / 2]), + static_cast(encoded_len / 2), &decoded[decoded_len], &temp_type); + if (ret == decoded_len) { + decoded_len += ret; + // Interleave output. + for (int k = decoded_len / 2; k < decoded_len; k++) { + int16_t temp = decoded[k]; + memmove(&decoded[2 * k - decoded_len + 2], + &decoded[2 * k - decoded_len + 1], + (decoded_len - k - 1) * sizeof(int16_t)); + decoded[2 * k - decoded_len + 1] = temp; + } + ret = decoded_len; // Return total number of samples. + } + } + *speech_type = ConvertSpeechType(temp_type); + delete [] encoded_deinterleaved; + return ret; +} + +int AudioDecoderG722Stereo::Init() { + int ret = WebRtcG722_DecoderInit(static_cast(state_right_)); + if (ret != 0) { + return ret; + } + return AudioDecoderG722::Init(); +} + +// Split the stereo packet and place left and right channel after each other +// in the output array. +void AudioDecoderG722Stereo::SplitStereoPacket(const uint8_t* encoded, + size_t encoded_len, + uint8_t* encoded_deinterleaved) { + assert(encoded); + // Regroup the 4 bits/sample so |l1 l2| |r1 r2| |l3 l4| |r3 r4| ..., + // where "lx" is 4 bits representing left sample number x, and "rx" right + // sample. Two samples fit in one byte, represented with |...|. + for (size_t i = 0; i + 1 < encoded_len; i += 2) { + uint8_t right_byte = ((encoded[i] & 0x0F) << 4) + (encoded[i + 1] & 0x0F); + encoded_deinterleaved[i] = (encoded[i] & 0xF0) + (encoded[i + 1] >> 4); + encoded_deinterleaved[i + 1] = right_byte; + } + + // Move one byte representing right channel each loop, and place it at the + // end of the bytestream vector. After looping the data is reordered to: + // |l1 l2| |l3 l4| ... |l(N-1) lN| |r1 r2| |r3 r4| ... |r(N-1) r(N)|, + // where N is the total number of samples. + for (size_t i = 0; i < encoded_len / 2; i++) { + uint8_t right_byte = encoded_deinterleaved[i + 1]; + memmove(&encoded_deinterleaved[i + 1], &encoded_deinterleaved[i + 2], + encoded_len - i - 2); + encoded_deinterleaved[encoded_len - 1] = right_byte; + } +} #endif // Opus diff --git a/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.h b/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.h index 7aaa69af1..b74aed897 100644 --- a/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.h +++ b/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.h @@ -187,6 +187,29 @@ class AudioDecoderG722 : public AudioDecoder { private: DISALLOW_COPY_AND_ASSIGN(AudioDecoderG722); }; + +class AudioDecoderG722Stereo : public AudioDecoderG722 { + public: + AudioDecoderG722Stereo(); + virtual ~AudioDecoderG722Stereo(); + virtual int Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type); + virtual int Init(); + + private: + // Splits the stereo-interleaved payload in |encoded| into separate payloads + // for left and right channels. The separated payloads are written to + // |encoded_deinterleaved|, which must hold at least |encoded_len| samples. + // The left channel starts at offset 0, while the right channel starts at + // offset encoded_len / 2 into |encoded_deinterleaved|. + void SplitStereoPacket(const uint8_t* encoded, size_t encoded_len, + uint8_t* encoded_deinterleaved); + + void* const state_left_; + void* state_right_; + + DISALLOW_COPY_AND_ASSIGN(AudioDecoderG722Stereo); +}; #endif #ifdef WEBRTC_CODEC_OPUS diff --git a/webrtc/modules/audio_coding/neteq4/audio_decoder_unittest.cc b/webrtc/modules/audio_coding/neteq4/audio_decoder_unittest.cc index f91438fc6..3e1637be2 100644 --- a/webrtc/modules/audio_coding/neteq4/audio_decoder_unittest.cc +++ b/webrtc/modules/audio_coding/neteq4/audio_decoder_unittest.cc @@ -39,6 +39,7 @@ class AudioDecoderTest : public ::testing::Test { frame_size_(0), data_length_(0), encoded_bytes_(0), + channels_(1), decoder_(NULL) { input_file_ = webrtc::test::ProjectRootPath() + "resources/audio_coding/testfile32kHz.pcm"; @@ -51,7 +52,7 @@ class AudioDecoderTest : public ::testing::Test { ASSERT_GT(data_length_, 0u) << "The test must set data_length_ > 0"; input_ = new int16_t[data_length_]; encoded_ = new uint8_t[data_length_ * 2]; - decoded_ = new int16_t[data_length_]; + decoded_ = new int16_t[data_length_ * channels_]; // Open input file. input_fp_ = fopen(input_file_.c_str(), "rb"); ASSERT_TRUE(input_fp_ != NULL) << "Failed to open file " << input_file_; @@ -104,9 +105,10 @@ class AudioDecoderTest : public ::testing::Test { &encoded_[encoded_bytes_]); AudioDecoder::SpeechType speech_type; size_t dec_len = decoder_->Decode(&encoded_[encoded_bytes_], enc_len, - &decoded_[processed_samples], + &decoded_[processed_samples * + channels_], &speech_type); - EXPECT_EQ(frame_size_, dec_len); + EXPECT_EQ(frame_size_ * channels_, dec_len); encoded_bytes_ += enc_len; processed_samples += frame_size_; } @@ -115,29 +117,35 @@ class AudioDecoderTest : public ::testing::Test { EXPECT_LE(MseInputOutput(processed_samples, delay), mse); } - // The absolute difference between the input and output is compared vs - // |tolerance|. The parameter |delay| is used to correct for codec delays. - void CompareInputOutput(size_t num_samples, int tolerance, int delay) const { + // The absolute difference between the input and output (the first channel) is + // compared vs |tolerance|. The parameter |delay| is used to correct for codec + // delays. If |channels_| is 2, the method verifies that the two channels are + // identical. + virtual void CompareInputOutput(size_t num_samples, int tolerance, + int delay) const { assert(num_samples <= data_length_); for (unsigned int n = 0; n < num_samples - delay; ++n) { - ASSERT_NEAR(input_[n], decoded_[n + delay], tolerance) << + if (channels_ == 2) { + ASSERT_EQ(decoded_[channels_ * n], decoded_[channels_ * n + 1]) << + "Stereo samples differ."; + } + ASSERT_NEAR(input_[n], decoded_[channels_ * n + delay], tolerance) << "Exit test on first diff; n = " << n; DataLog::InsertCell("CodecTest", "input", input_[n]); - DataLog::InsertCell("CodecTest", "output", decoded_[n]); + DataLog::InsertCell("CodecTest", "output", decoded_[channels_ * n]); DataLog::NextRow("CodecTest"); } } - // Calculates mean-squared error between input and output. The parameter - // |delay| is used to correct for codec delays. - double MseInputOutput(size_t num_samples, int delay) const { + // Calculates mean-squared error between input and output (the first channel). + // The parameter |delay| is used to correct for codec delays. + virtual double MseInputOutput(size_t num_samples, int delay) const { assert(num_samples <= data_length_); if (num_samples == 0) return 0.0; - double squared_sum = 0.0; for (unsigned int n = 0; n < num_samples - delay; ++n) { - squared_sum += (input_[n] - decoded_[n + delay]) * - (input_[n] - decoded_[n + delay]); + squared_sum += (input_[n] - decoded_[channels_ * n + delay]) * + (input_[n] - decoded_[channels_ * n + delay]); } return squared_sum / (num_samples - delay); } @@ -158,11 +166,11 @@ class AudioDecoderTest : public ::testing::Test { AudioDecoder::SpeechType speech_type1, speech_type2; EXPECT_EQ(0, decoder_->Init()); size_t dec_len = decoder_->Decode(encoded, enc_len, output1, &speech_type1); - EXPECT_EQ(frame_size_, dec_len); + EXPECT_EQ(frame_size_ * channels_, dec_len); // Re-init decoder and decode again. EXPECT_EQ(0, decoder_->Init()); dec_len = decoder_->Decode(encoded_copy, enc_len, output2, &speech_type2); - EXPECT_EQ(frame_size_, dec_len); + EXPECT_EQ(frame_size_ * channels_, dec_len); for (unsigned int n = 0; n < frame_size_; ++n) { ASSERT_EQ(output1[n], output2[n]) << "Exit test on first diff; n = " << n; } @@ -193,6 +201,7 @@ class AudioDecoderTest : public ::testing::Test { size_t frame_size_; size_t data_length_; size_t encoded_bytes_; + size_t channels_; AudioDecoder* decoder_; }; @@ -461,6 +470,46 @@ class AudioDecoderG722Test : public AudioDecoderTest { G722EncInst* encoder_; }; +class AudioDecoderG722StereoTest : public AudioDecoderG722Test { + protected: + AudioDecoderG722StereoTest() : AudioDecoderG722Test() { + channels_ = 2; + // Delete the |decoder_| that was created by AudioDecoderG722Test and + // create an AudioDecoderG722Stereo object instead. + delete decoder_; + decoder_ = new AudioDecoderG722Stereo; + assert(decoder_); + } + + virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, + uint8_t* output) { + uint8_t* temp_output = new uint8_t[data_length_ * 2]; + // Encode a mono payload using the base test class. + int mono_enc_len_bytes = + AudioDecoderG722Test::EncodeFrame(input, input_len_samples, + temp_output); + // The bit-stream consists of 4-bit samples: + // +--------+--------+--------+ + // | s0 s1 | s2 s3 | s4 s5 | + // +--------+--------+--------+ + // + // Duplicate them to the |output| such that the stereo stream becomes: + // +--------+--------+--------+ + // | s0 s0 | s1 s1 | s2 s2 | + // +--------+--------+--------+ + EXPECT_LE(mono_enc_len_bytes * 2, static_cast(data_length_ * 2)); + uint8_t* output_ptr = output; + for (int i = 0; i < mono_enc_len_bytes; ++i) { + *output_ptr = (temp_output[i] & 0xF0) + (temp_output[i] >> 4); + ++output_ptr; + *output_ptr = (temp_output[i] << 4) + (temp_output[i] & 0x0F); + ++output_ptr; + } + delete [] temp_output; + return mono_enc_len_bytes * 2; + } +}; + class AudioDecoderOpusTest : public AudioDecoderTest { protected: AudioDecoderOpusTest() : AudioDecoderTest() { @@ -593,6 +642,20 @@ TEST_F(AudioDecoderG722Test, EncodeDecode) { EXPECT_FALSE(decoder_->HasDecodePlc()); } +TEST_F(AudioDecoderG722StereoTest, CreateAndDestroy) { + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderG722_2ch)); +} + +TEST_F(AudioDecoderG722StereoTest, EncodeDecode) { + int tolerance = 6176; + double mse = 238630.0; + int delay = 22; // Delay from input to output. + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderG722_2ch)); + EncodeDecodeTest(data_length_, tolerance, mse, delay); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + TEST_F(AudioDecoderOpusTest, EncodeDecode) { int tolerance = 6176; double mse = 238630.0; @@ -622,7 +685,7 @@ TEST(AudioDecoder, CodecSampleRateHz) { EXPECT_EQ(48000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16Bswb48kHz_2ch)); EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16B_5ch)); EXPECT_EQ(16000, AudioDecoder::CodecSampleRateHz(kDecoderG722)); - EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderG722_2ch)); + EXPECT_EQ(16000, AudioDecoder::CodecSampleRateHz(kDecoderG722_2ch)); EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderRED)); EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderAVT)); EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderCNGnb)); @@ -656,7 +719,7 @@ TEST(AudioDecoder, CodecSupported) { EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bswb48kHz_2ch)); EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16B_5ch)); EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderG722)); - EXPECT_FALSE(AudioDecoder::CodecSupported(kDecoderG722_2ch)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderG722_2ch)); EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderRED)); EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderAVT)); EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderCNGnb));