Implementing stereo support for G.722
This CL implements stereo support for G.722 through a new class AudioDecoderG722Stereo derived from AudioDecoderG722. Also implementing tests for G.722 stereo. Review URL: https://webrtc-codereview.appspot.com/1073006 git-svn-id: http://webrtc.googlecode.com/svn/trunk@3452 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
@@ -45,6 +45,7 @@ bool AudioDecoder::CodecSupported(NetEqDecoder codec_type) {
|
||||
#endif
|
||||
#ifdef WEBRTC_CODEC_G722
|
||||
case kDecoderG722:
|
||||
case kDecoderG722_2ch:
|
||||
#endif
|
||||
#ifdef WEBRTC_CODEC_OPUS
|
||||
case kDecoderOpus:
|
||||
@@ -91,6 +92,7 @@ int AudioDecoder::CodecSampleRateHz(NetEqDecoder codec_type) {
|
||||
#endif
|
||||
#ifdef WEBRTC_CODEC_G722
|
||||
case kDecoderG722:
|
||||
case kDecoderG722_2ch:
|
||||
#endif
|
||||
case kDecoderCNGwb: {
|
||||
return 16000;
|
||||
|
@@ -12,6 +12,8 @@
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <cstring> // memmove
|
||||
|
||||
#include "webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/g711/include/g711_interface.h"
|
||||
#ifdef WEBRTC_CODEC_G722
|
||||
@@ -287,6 +289,90 @@ int AudioDecoderG722::PacketDuration(const uint8_t* encoded,
|
||||
// 1/2 encoded byte per sample per channel.
|
||||
return 2 * encoded_len / channels_;
|
||||
}
|
||||
|
||||
AudioDecoderG722Stereo::AudioDecoderG722Stereo()
|
||||
: AudioDecoderG722(),
|
||||
state_left_(state_), // Base member |state_| is used for left channel.
|
||||
state_right_(NULL) {
|
||||
channels_ = 2;
|
||||
// |state_left_| already created by the base class AudioDecoderG722.
|
||||
WebRtcG722_CreateDecoder(reinterpret_cast<G722DecInst**>(&state_right_));
|
||||
}
|
||||
|
||||
AudioDecoderG722Stereo::~AudioDecoderG722Stereo() {
|
||||
// |state_left_| will be freed by the base class AudioDecoderG722.
|
||||
WebRtcG722_FreeDecoder(static_cast<G722DecInst*>(state_right_));
|
||||
}
|
||||
|
||||
int AudioDecoderG722Stereo::Decode(const uint8_t* encoded, size_t encoded_len,
|
||||
int16_t* decoded, SpeechType* speech_type) {
|
||||
int16_t temp_type = 1; // Default is speech.
|
||||
// De-interleave the bit-stream into two separate payloads.
|
||||
uint8_t* encoded_deinterleaved = new uint8_t[encoded_len];
|
||||
SplitStereoPacket(encoded, encoded_len, encoded_deinterleaved);
|
||||
// Decode left and right.
|
||||
int16_t ret = WebRtcG722_Decode(
|
||||
static_cast<G722DecInst*>(state_left_),
|
||||
reinterpret_cast<int16_t*>(encoded_deinterleaved),
|
||||
static_cast<int16_t>(encoded_len / 2), decoded, &temp_type);
|
||||
if (ret >= 0) {
|
||||
int decoded_len = ret;
|
||||
ret = WebRtcG722_Decode(
|
||||
static_cast<G722DecInst*>(state_right_),
|
||||
reinterpret_cast<int16_t*>(&encoded_deinterleaved[encoded_len / 2]),
|
||||
static_cast<int16_t>(encoded_len / 2), &decoded[decoded_len], &temp_type);
|
||||
if (ret == decoded_len) {
|
||||
decoded_len += ret;
|
||||
// Interleave output.
|
||||
for (int k = decoded_len / 2; k < decoded_len; k++) {
|
||||
int16_t temp = decoded[k];
|
||||
memmove(&decoded[2 * k - decoded_len + 2],
|
||||
&decoded[2 * k - decoded_len + 1],
|
||||
(decoded_len - k - 1) * sizeof(int16_t));
|
||||
decoded[2 * k - decoded_len + 1] = temp;
|
||||
}
|
||||
ret = decoded_len; // Return total number of samples.
|
||||
}
|
||||
}
|
||||
*speech_type = ConvertSpeechType(temp_type);
|
||||
delete [] encoded_deinterleaved;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int AudioDecoderG722Stereo::Init() {
|
||||
int ret = WebRtcG722_DecoderInit(static_cast<G722DecInst*>(state_right_));
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
return AudioDecoderG722::Init();
|
||||
}
|
||||
|
||||
// Split the stereo packet and place left and right channel after each other
|
||||
// in the output array.
|
||||
void AudioDecoderG722Stereo::SplitStereoPacket(const uint8_t* encoded,
|
||||
size_t encoded_len,
|
||||
uint8_t* encoded_deinterleaved) {
|
||||
assert(encoded);
|
||||
// Regroup the 4 bits/sample so |l1 l2| |r1 r2| |l3 l4| |r3 r4| ...,
|
||||
// where "lx" is 4 bits representing left sample number x, and "rx" right
|
||||
// sample. Two samples fit in one byte, represented with |...|.
|
||||
for (size_t i = 0; i + 1 < encoded_len; i += 2) {
|
||||
uint8_t right_byte = ((encoded[i] & 0x0F) << 4) + (encoded[i + 1] & 0x0F);
|
||||
encoded_deinterleaved[i] = (encoded[i] & 0xF0) + (encoded[i + 1] >> 4);
|
||||
encoded_deinterleaved[i + 1] = right_byte;
|
||||
}
|
||||
|
||||
// Move one byte representing right channel each loop, and place it at the
|
||||
// end of the bytestream vector. After looping the data is reordered to:
|
||||
// |l1 l2| |l3 l4| ... |l(N-1) lN| |r1 r2| |r3 r4| ... |r(N-1) r(N)|,
|
||||
// where N is the total number of samples.
|
||||
for (size_t i = 0; i < encoded_len / 2; i++) {
|
||||
uint8_t right_byte = encoded_deinterleaved[i + 1];
|
||||
memmove(&encoded_deinterleaved[i + 1], &encoded_deinterleaved[i + 2],
|
||||
encoded_len - i - 2);
|
||||
encoded_deinterleaved[encoded_len - 1] = right_byte;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Opus
|
||||
|
@@ -187,6 +187,29 @@ class AudioDecoderG722 : public AudioDecoder {
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(AudioDecoderG722);
|
||||
};
|
||||
|
||||
class AudioDecoderG722Stereo : public AudioDecoderG722 {
|
||||
public:
|
||||
AudioDecoderG722Stereo();
|
||||
virtual ~AudioDecoderG722Stereo();
|
||||
virtual int Decode(const uint8_t* encoded, size_t encoded_len,
|
||||
int16_t* decoded, SpeechType* speech_type);
|
||||
virtual int Init();
|
||||
|
||||
private:
|
||||
// Splits the stereo-interleaved payload in |encoded| into separate payloads
|
||||
// for left and right channels. The separated payloads are written to
|
||||
// |encoded_deinterleaved|, which must hold at least |encoded_len| samples.
|
||||
// The left channel starts at offset 0, while the right channel starts at
|
||||
// offset encoded_len / 2 into |encoded_deinterleaved|.
|
||||
void SplitStereoPacket(const uint8_t* encoded, size_t encoded_len,
|
||||
uint8_t* encoded_deinterleaved);
|
||||
|
||||
void* const state_left_;
|
||||
void* state_right_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(AudioDecoderG722Stereo);
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef WEBRTC_CODEC_OPUS
|
||||
|
@@ -39,6 +39,7 @@ class AudioDecoderTest : public ::testing::Test {
|
||||
frame_size_(0),
|
||||
data_length_(0),
|
||||
encoded_bytes_(0),
|
||||
channels_(1),
|
||||
decoder_(NULL) {
|
||||
input_file_ = webrtc::test::ProjectRootPath() +
|
||||
"resources/audio_coding/testfile32kHz.pcm";
|
||||
@@ -51,7 +52,7 @@ class AudioDecoderTest : public ::testing::Test {
|
||||
ASSERT_GT(data_length_, 0u) << "The test must set data_length_ > 0";
|
||||
input_ = new int16_t[data_length_];
|
||||
encoded_ = new uint8_t[data_length_ * 2];
|
||||
decoded_ = new int16_t[data_length_];
|
||||
decoded_ = new int16_t[data_length_ * channels_];
|
||||
// Open input file.
|
||||
input_fp_ = fopen(input_file_.c_str(), "rb");
|
||||
ASSERT_TRUE(input_fp_ != NULL) << "Failed to open file " << input_file_;
|
||||
@@ -104,9 +105,10 @@ class AudioDecoderTest : public ::testing::Test {
|
||||
&encoded_[encoded_bytes_]);
|
||||
AudioDecoder::SpeechType speech_type;
|
||||
size_t dec_len = decoder_->Decode(&encoded_[encoded_bytes_], enc_len,
|
||||
&decoded_[processed_samples],
|
||||
&decoded_[processed_samples *
|
||||
channels_],
|
||||
&speech_type);
|
||||
EXPECT_EQ(frame_size_, dec_len);
|
||||
EXPECT_EQ(frame_size_ * channels_, dec_len);
|
||||
encoded_bytes_ += enc_len;
|
||||
processed_samples += frame_size_;
|
||||
}
|
||||
@@ -115,29 +117,35 @@ class AudioDecoderTest : public ::testing::Test {
|
||||
EXPECT_LE(MseInputOutput(processed_samples, delay), mse);
|
||||
}
|
||||
|
||||
// The absolute difference between the input and output is compared vs
|
||||
// |tolerance|. The parameter |delay| is used to correct for codec delays.
|
||||
void CompareInputOutput(size_t num_samples, int tolerance, int delay) const {
|
||||
// The absolute difference between the input and output (the first channel) is
|
||||
// compared vs |tolerance|. The parameter |delay| is used to correct for codec
|
||||
// delays. If |channels_| is 2, the method verifies that the two channels are
|
||||
// identical.
|
||||
virtual void CompareInputOutput(size_t num_samples, int tolerance,
|
||||
int delay) const {
|
||||
assert(num_samples <= data_length_);
|
||||
for (unsigned int n = 0; n < num_samples - delay; ++n) {
|
||||
ASSERT_NEAR(input_[n], decoded_[n + delay], tolerance) <<
|
||||
if (channels_ == 2) {
|
||||
ASSERT_EQ(decoded_[channels_ * n], decoded_[channels_ * n + 1]) <<
|
||||
"Stereo samples differ.";
|
||||
}
|
||||
ASSERT_NEAR(input_[n], decoded_[channels_ * n + delay], tolerance) <<
|
||||
"Exit test on first diff; n = " << n;
|
||||
DataLog::InsertCell("CodecTest", "input", input_[n]);
|
||||
DataLog::InsertCell("CodecTest", "output", decoded_[n]);
|
||||
DataLog::InsertCell("CodecTest", "output", decoded_[channels_ * n]);
|
||||
DataLog::NextRow("CodecTest");
|
||||
}
|
||||
}
|
||||
|
||||
// Calculates mean-squared error between input and output. The parameter
|
||||
// |delay| is used to correct for codec delays.
|
||||
double MseInputOutput(size_t num_samples, int delay) const {
|
||||
// Calculates mean-squared error between input and output (the first channel).
|
||||
// The parameter |delay| is used to correct for codec delays.
|
||||
virtual double MseInputOutput(size_t num_samples, int delay) const {
|
||||
assert(num_samples <= data_length_);
|
||||
if (num_samples == 0) return 0.0;
|
||||
|
||||
double squared_sum = 0.0;
|
||||
for (unsigned int n = 0; n < num_samples - delay; ++n) {
|
||||
squared_sum += (input_[n] - decoded_[n + delay]) *
|
||||
(input_[n] - decoded_[n + delay]);
|
||||
squared_sum += (input_[n] - decoded_[channels_ * n + delay]) *
|
||||
(input_[n] - decoded_[channels_ * n + delay]);
|
||||
}
|
||||
return squared_sum / (num_samples - delay);
|
||||
}
|
||||
@@ -158,11 +166,11 @@ class AudioDecoderTest : public ::testing::Test {
|
||||
AudioDecoder::SpeechType speech_type1, speech_type2;
|
||||
EXPECT_EQ(0, decoder_->Init());
|
||||
size_t dec_len = decoder_->Decode(encoded, enc_len, output1, &speech_type1);
|
||||
EXPECT_EQ(frame_size_, dec_len);
|
||||
EXPECT_EQ(frame_size_ * channels_, dec_len);
|
||||
// Re-init decoder and decode again.
|
||||
EXPECT_EQ(0, decoder_->Init());
|
||||
dec_len = decoder_->Decode(encoded_copy, enc_len, output2, &speech_type2);
|
||||
EXPECT_EQ(frame_size_, dec_len);
|
||||
EXPECT_EQ(frame_size_ * channels_, dec_len);
|
||||
for (unsigned int n = 0; n < frame_size_; ++n) {
|
||||
ASSERT_EQ(output1[n], output2[n]) << "Exit test on first diff; n = " << n;
|
||||
}
|
||||
@@ -193,6 +201,7 @@ class AudioDecoderTest : public ::testing::Test {
|
||||
size_t frame_size_;
|
||||
size_t data_length_;
|
||||
size_t encoded_bytes_;
|
||||
size_t channels_;
|
||||
AudioDecoder* decoder_;
|
||||
};
|
||||
|
||||
@@ -461,6 +470,46 @@ class AudioDecoderG722Test : public AudioDecoderTest {
|
||||
G722EncInst* encoder_;
|
||||
};
|
||||
|
||||
class AudioDecoderG722StereoTest : public AudioDecoderG722Test {
|
||||
protected:
|
||||
AudioDecoderG722StereoTest() : AudioDecoderG722Test() {
|
||||
channels_ = 2;
|
||||
// Delete the |decoder_| that was created by AudioDecoderG722Test and
|
||||
// create an AudioDecoderG722Stereo object instead.
|
||||
delete decoder_;
|
||||
decoder_ = new AudioDecoderG722Stereo;
|
||||
assert(decoder_);
|
||||
}
|
||||
|
||||
virtual int EncodeFrame(const int16_t* input, size_t input_len_samples,
|
||||
uint8_t* output) {
|
||||
uint8_t* temp_output = new uint8_t[data_length_ * 2];
|
||||
// Encode a mono payload using the base test class.
|
||||
int mono_enc_len_bytes =
|
||||
AudioDecoderG722Test::EncodeFrame(input, input_len_samples,
|
||||
temp_output);
|
||||
// The bit-stream consists of 4-bit samples:
|
||||
// +--------+--------+--------+
|
||||
// | s0 s1 | s2 s3 | s4 s5 |
|
||||
// +--------+--------+--------+
|
||||
//
|
||||
// Duplicate them to the |output| such that the stereo stream becomes:
|
||||
// +--------+--------+--------+
|
||||
// | s0 s0 | s1 s1 | s2 s2 |
|
||||
// +--------+--------+--------+
|
||||
EXPECT_LE(mono_enc_len_bytes * 2, static_cast<int>(data_length_ * 2));
|
||||
uint8_t* output_ptr = output;
|
||||
for (int i = 0; i < mono_enc_len_bytes; ++i) {
|
||||
*output_ptr = (temp_output[i] & 0xF0) + (temp_output[i] >> 4);
|
||||
++output_ptr;
|
||||
*output_ptr = (temp_output[i] << 4) + (temp_output[i] & 0x0F);
|
||||
++output_ptr;
|
||||
}
|
||||
delete [] temp_output;
|
||||
return mono_enc_len_bytes * 2;
|
||||
}
|
||||
};
|
||||
|
||||
class AudioDecoderOpusTest : public AudioDecoderTest {
|
||||
protected:
|
||||
AudioDecoderOpusTest() : AudioDecoderTest() {
|
||||
@@ -593,6 +642,20 @@ TEST_F(AudioDecoderG722Test, EncodeDecode) {
|
||||
EXPECT_FALSE(decoder_->HasDecodePlc());
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderG722StereoTest, CreateAndDestroy) {
|
||||
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderG722_2ch));
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderG722StereoTest, EncodeDecode) {
|
||||
int tolerance = 6176;
|
||||
double mse = 238630.0;
|
||||
int delay = 22; // Delay from input to output.
|
||||
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderG722_2ch));
|
||||
EncodeDecodeTest(data_length_, tolerance, mse, delay);
|
||||
ReInitTest();
|
||||
EXPECT_FALSE(decoder_->HasDecodePlc());
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderOpusTest, EncodeDecode) {
|
||||
int tolerance = 6176;
|
||||
double mse = 238630.0;
|
||||
@@ -622,7 +685,7 @@ TEST(AudioDecoder, CodecSampleRateHz) {
|
||||
EXPECT_EQ(48000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16Bswb48kHz_2ch));
|
||||
EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16B_5ch));
|
||||
EXPECT_EQ(16000, AudioDecoder::CodecSampleRateHz(kDecoderG722));
|
||||
EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderG722_2ch));
|
||||
EXPECT_EQ(16000, AudioDecoder::CodecSampleRateHz(kDecoderG722_2ch));
|
||||
EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderRED));
|
||||
EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderAVT));
|
||||
EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderCNGnb));
|
||||
@@ -656,7 +719,7 @@ TEST(AudioDecoder, CodecSupported) {
|
||||
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bswb48kHz_2ch));
|
||||
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16B_5ch));
|
||||
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderG722));
|
||||
EXPECT_FALSE(AudioDecoder::CodecSupported(kDecoderG722_2ch));
|
||||
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderG722_2ch));
|
||||
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderRED));
|
||||
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderAVT));
|
||||
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderCNGnb));
|
||||
|
Reference in New Issue
Block a user