Added Opus stereo support
TESTED=git try BUG=webrtc:1360 R=tina.legrand@webrtc.org, turaj@webrtc.org Review URL: https://webrtc-codereview.appspot.com/1868004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@4521 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
@@ -33,7 +33,7 @@ int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst);
|
|||||||
* Input:
|
* Input:
|
||||||
* - inst : Encoder context
|
* - inst : Encoder context
|
||||||
* - audio_in : Input speech data buffer
|
* - audio_in : Input speech data buffer
|
||||||
* - samples : Samples in audio_in
|
* - samples : Samples per channel in audio_in
|
||||||
* - length_encoded_buffer : Output buffer size
|
* - length_encoded_buffer : Output buffer size
|
||||||
*
|
*
|
||||||
* Output:
|
* Output:
|
||||||
@@ -101,7 +101,7 @@ int16_t WebRtcOpus_DecoderInitSlave(OpusDecInst* inst);
|
|||||||
* always return 1 since we're not using Opus's
|
* always return 1 since we're not using Opus's
|
||||||
* built-in DTX/CNG scheme)
|
* built-in DTX/CNG scheme)
|
||||||
*
|
*
|
||||||
* Return value : >0 - Samples in decoded vector
|
* Return value : >0 - Samples per channel in decoded vector
|
||||||
* -1 - Error
|
* -1 - Error
|
||||||
*/
|
*/
|
||||||
int16_t WebRtcOpus_DecodeNew(OpusDecInst* inst, const uint8_t* encoded,
|
int16_t WebRtcOpus_DecodeNew(OpusDecInst* inst, const uint8_t* encoded,
|
||||||
|
@@ -393,19 +393,17 @@ AudioDecoderOpus::~AudioDecoderOpus() {
|
|||||||
int AudioDecoderOpus::Decode(const uint8_t* encoded, size_t encoded_len,
|
int AudioDecoderOpus::Decode(const uint8_t* encoded, size_t encoded_len,
|
||||||
int16_t* decoded, SpeechType* speech_type) {
|
int16_t* decoded, SpeechType* speech_type) {
|
||||||
int16_t temp_type = 1; // Default is speech.
|
int16_t temp_type = 1; // Default is speech.
|
||||||
assert(channels_ == 1);
|
int16_t ret = WebRtcOpus_DecodeNew(static_cast<OpusDecInst*>(state_), encoded,
|
||||||
// TODO(hlundin): Allow 2 channels when WebRtcOpus_Decode provides both
|
static_cast<int16_t>(encoded_len), decoded,
|
||||||
// channels interleaved.
|
&temp_type);
|
||||||
int16_t ret = WebRtcOpus_Decode(
|
if (ret > 0)
|
||||||
static_cast<OpusDecInst*>(state_),
|
ret *= channels_; // Return total number of samples.
|
||||||
const_cast<int16_t*>(reinterpret_cast<const int16_t*>(encoded)),
|
|
||||||
static_cast<int16_t>(encoded_len), decoded, &temp_type);
|
|
||||||
*speech_type = ConvertSpeechType(temp_type);
|
*speech_type = ConvertSpeechType(temp_type);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int AudioDecoderOpus::Init() {
|
int AudioDecoderOpus::Init() {
|
||||||
return WebRtcOpus_DecoderInit(static_cast<OpusDecInst*>(state_));
|
return WebRtcOpus_DecoderInitNew(static_cast<OpusDecInst*>(state_));
|
||||||
}
|
}
|
||||||
|
|
||||||
int AudioDecoderOpus::PacketDuration(const uint8_t* encoded,
|
int AudioDecoderOpus::PacketDuration(const uint8_t* encoded,
|
||||||
|
@@ -92,10 +92,14 @@ class AudioDecoderTest : public ::testing::Test {
|
|||||||
|
|
||||||
// Encodes and decodes audio. The absolute difference between the input and
|
// Encodes and decodes audio. The absolute difference between the input and
|
||||||
// output is compared vs |tolerance|, and the mean-squared error is compared
|
// output is compared vs |tolerance|, and the mean-squared error is compared
|
||||||
// with |mse|. The encoded stream should contain |expected_bytes|.
|
// with |mse|. The encoded stream should contain |expected_bytes|. For stereo
|
||||||
|
// audio, the absolute difference between the two channels is compared vs
|
||||||
|
// |channel_diff_tolerance|.
|
||||||
void EncodeDecodeTest(size_t expected_bytes, int tolerance, double mse,
|
void EncodeDecodeTest(size_t expected_bytes, int tolerance, double mse,
|
||||||
int delay = 0) {
|
int delay = 0, int channel_diff_tolerance = 0) {
|
||||||
ASSERT_GE(tolerance, 0) << "Test must define a tolerance >= 0";
|
ASSERT_GE(tolerance, 0) << "Test must define a tolerance >= 0";
|
||||||
|
ASSERT_GE(channel_diff_tolerance, 0) <<
|
||||||
|
"Test must define a channel_diff_tolerance >= 0";
|
||||||
size_t processed_samples = 0u;
|
size_t processed_samples = 0u;
|
||||||
encoded_bytes_ = 0u;
|
encoded_bytes_ = 0u;
|
||||||
InitEncoder();
|
InitEncoder();
|
||||||
@@ -116,22 +120,19 @@ class AudioDecoderTest : public ::testing::Test {
|
|||||||
#if !(defined(_WIN32) && defined(WEBRTC_ARCH_64_BITS))
|
#if !(defined(_WIN32) && defined(WEBRTC_ARCH_64_BITS))
|
||||||
EXPECT_EQ(expected_bytes, encoded_bytes_);
|
EXPECT_EQ(expected_bytes, encoded_bytes_);
|
||||||
CompareInputOutput(processed_samples, tolerance, delay);
|
CompareInputOutput(processed_samples, tolerance, delay);
|
||||||
|
if (channels_ == 2)
|
||||||
|
CompareTwoChannels(processed_samples, channel_diff_tolerance);
|
||||||
EXPECT_LE(MseInputOutput(processed_samples, delay), mse);
|
EXPECT_LE(MseInputOutput(processed_samples, delay), mse);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// The absolute difference between the input and output (the first channel) is
|
// The absolute difference between the input and output (the first channel) is
|
||||||
// compared vs |tolerance|. The parameter |delay| is used to correct for codec
|
// compared vs |tolerance|. The parameter |delay| is used to correct for codec
|
||||||
// delays. If |channels_| is 2, the method verifies that the two channels are
|
// delays.
|
||||||
// identical.
|
|
||||||
virtual void CompareInputOutput(size_t num_samples, int tolerance,
|
virtual void CompareInputOutput(size_t num_samples, int tolerance,
|
||||||
int delay) const {
|
int delay) const {
|
||||||
assert(num_samples <= data_length_);
|
assert(num_samples <= data_length_);
|
||||||
for (unsigned int n = 0; n < num_samples - delay; ++n) {
|
for (unsigned int n = 0; n < num_samples - delay; ++n) {
|
||||||
if (channels_ == 2) {
|
|
||||||
ASSERT_EQ(decoded_[channels_ * n], decoded_[channels_ * n + 1]) <<
|
|
||||||
"Stereo samples differ.";
|
|
||||||
}
|
|
||||||
ASSERT_NEAR(input_[n], decoded_[channels_ * n + delay], tolerance) <<
|
ASSERT_NEAR(input_[n], decoded_[channels_ * n + delay], tolerance) <<
|
||||||
"Exit test on first diff; n = " << n;
|
"Exit test on first diff; n = " << n;
|
||||||
DataLog::InsertCell("CodecTest", "input", input_[n]);
|
DataLog::InsertCell("CodecTest", "input", input_[n]);
|
||||||
@@ -140,6 +141,15 @@ class AudioDecoderTest : public ::testing::Test {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The absolute difference between the two channels in a stereo is compared vs
|
||||||
|
// |tolerance|.
|
||||||
|
virtual void CompareTwoChannels(size_t num_samples, int tolerance) const {
|
||||||
|
assert(num_samples <= data_length_);
|
||||||
|
for (unsigned int n = 0; n < num_samples; ++n)
|
||||||
|
ASSERT_NEAR(decoded_[channels_ * n], decoded_[channels_ * n + 1],
|
||||||
|
tolerance) << "Stereo samples differ.";
|
||||||
|
}
|
||||||
|
|
||||||
// Calculates mean-squared error between input and output (the first channel).
|
// Calculates mean-squared error between input and output (the first channel).
|
||||||
// The parameter |delay| is used to correct for codec delays.
|
// The parameter |delay| is used to correct for codec delays.
|
||||||
virtual double MseInputOutput(size_t num_samples, int delay) const {
|
virtual double MseInputOutput(size_t num_samples, int delay) const {
|
||||||
@@ -162,13 +172,14 @@ class AudioDecoderTest : public ::testing::Test {
|
|||||||
int16_t* output2 = decoded_ + frame_size_;
|
int16_t* output2 = decoded_ + frame_size_;
|
||||||
InitEncoder();
|
InitEncoder();
|
||||||
size_t enc_len = EncodeFrame(input_, frame_size_, encoded);
|
size_t enc_len = EncodeFrame(input_, frame_size_, encoded);
|
||||||
|
size_t dec_len;
|
||||||
// Copy payload since iSAC fix destroys it during decode.
|
// Copy payload since iSAC fix destroys it during decode.
|
||||||
// Issue: http://code.google.com/p/webrtc/issues/detail?id=845.
|
// Issue: http://code.google.com/p/webrtc/issues/detail?id=845.
|
||||||
// TODO(hlundin): Remove if the iSAC bug gets fixed.
|
// TODO(hlundin): Remove if the iSAC bug gets fixed.
|
||||||
memcpy(encoded_copy, encoded, enc_len);
|
memcpy(encoded_copy, encoded, enc_len);
|
||||||
AudioDecoder::SpeechType speech_type1, speech_type2;
|
AudioDecoder::SpeechType speech_type1, speech_type2;
|
||||||
EXPECT_EQ(0, decoder_->Init());
|
EXPECT_EQ(0, decoder_->Init());
|
||||||
size_t dec_len = decoder_->Decode(encoded, enc_len, output1, &speech_type1);
|
dec_len = decoder_->Decode(encoded, enc_len, output1, &speech_type1);
|
||||||
EXPECT_EQ(frame_size_ * channels_, dec_len);
|
EXPECT_EQ(frame_size_ * channels_, dec_len);
|
||||||
// Re-init decoder and decode again.
|
// Re-init decoder and decode again.
|
||||||
EXPECT_EQ(0, decoder_->Init());
|
EXPECT_EQ(0, decoder_->Init());
|
||||||
@@ -551,6 +562,51 @@ class AudioDecoderOpusTest : public AudioDecoderTest {
|
|||||||
OpusEncInst* encoder_;
|
OpusEncInst* encoder_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class AudioDecoderOpusStereoTest : public AudioDecoderTest {
|
||||||
|
protected:
|
||||||
|
AudioDecoderOpusStereoTest() : AudioDecoderTest() {
|
||||||
|
channels_ = 2;
|
||||||
|
frame_size_ = 320;
|
||||||
|
data_length_ = 10 * frame_size_;
|
||||||
|
decoder_ = new AudioDecoderOpus(kDecoderOpus_2ch);
|
||||||
|
assert(decoder_);
|
||||||
|
WebRtcOpus_EncoderCreate(&encoder_, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
~AudioDecoderOpusStereoTest() {
|
||||||
|
WebRtcOpus_EncoderFree(encoder_);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void InitEncoder() {}
|
||||||
|
|
||||||
|
virtual int EncodeFrame(const int16_t* input, size_t input_len_samples,
|
||||||
|
uint8_t* output) {
|
||||||
|
// Create stereo by duplicating each sample in |input|.
|
||||||
|
const int input_stereo_samples = input_len_samples * 2;
|
||||||
|
int16_t* input_stereo = new int16_t[input_stereo_samples];
|
||||||
|
for (size_t i = 0; i < input_len_samples; i++)
|
||||||
|
input_stereo[i * 2] = input_stereo[i * 2 + 1] = input[i];
|
||||||
|
// Upsample from 32 to 48 kHz.
|
||||||
|
Resampler rs;
|
||||||
|
rs.Reset(32000, 48000, kResamplerSynchronousStereo);
|
||||||
|
const int max_resamp_len_samples = input_stereo_samples * 3 / 2;
|
||||||
|
int16_t* resamp_input = new int16_t[max_resamp_len_samples];
|
||||||
|
int resamp_len_samples;
|
||||||
|
EXPECT_EQ(0, rs.Push(input_stereo, input_stereo_samples, resamp_input,
|
||||||
|
max_resamp_len_samples, resamp_len_samples));
|
||||||
|
EXPECT_EQ(max_resamp_len_samples, resamp_len_samples);
|
||||||
|
int enc_len_bytes =
|
||||||
|
WebRtcOpus_Encode(encoder_, resamp_input, resamp_len_samples / 2,
|
||||||
|
data_length_, output);
|
||||||
|
EXPECT_GT(enc_len_bytes, 0);
|
||||||
|
delete [] resamp_input;
|
||||||
|
delete [] input_stereo;
|
||||||
|
return enc_len_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
OpusEncInst* encoder_;
|
||||||
|
};
|
||||||
|
|
||||||
TEST_F(AudioDecoderPcmUTest, EncodeDecode) {
|
TEST_F(AudioDecoderPcmUTest, EncodeDecode) {
|
||||||
int tolerance = 251;
|
int tolerance = 251;
|
||||||
double mse = 1734.0;
|
double mse = 1734.0;
|
||||||
@@ -651,10 +707,11 @@ TEST_F(AudioDecoderG722StereoTest, CreateAndDestroy) {
|
|||||||
|
|
||||||
TEST_F(AudioDecoderG722StereoTest, EncodeDecode) {
|
TEST_F(AudioDecoderG722StereoTest, EncodeDecode) {
|
||||||
int tolerance = 6176;
|
int tolerance = 6176;
|
||||||
|
int channel_diff_tolerance = 0;
|
||||||
double mse = 238630.0;
|
double mse = 238630.0;
|
||||||
int delay = 22; // Delay from input to output.
|
int delay = 22; // Delay from input to output.
|
||||||
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderG722_2ch));
|
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderG722_2ch));
|
||||||
EncodeDecodeTest(data_length_, tolerance, mse, delay);
|
EncodeDecodeTest(data_length_, tolerance, mse, delay, channel_diff_tolerance);
|
||||||
ReInitTest();
|
ReInitTest();
|
||||||
EXPECT_FALSE(decoder_->HasDecodePlc());
|
EXPECT_FALSE(decoder_->HasDecodePlc());
|
||||||
}
|
}
|
||||||
@@ -669,6 +726,17 @@ TEST_F(AudioDecoderOpusTest, EncodeDecode) {
|
|||||||
EXPECT_FALSE(decoder_->HasDecodePlc());
|
EXPECT_FALSE(decoder_->HasDecodePlc());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(AudioDecoderOpusStereoTest, EncodeDecode) {
|
||||||
|
int tolerance = 6176;
|
||||||
|
int channel_diff_tolerance = 0;
|
||||||
|
double mse = 238630.0;
|
||||||
|
int delay = 22; // Delay from input to output.
|
||||||
|
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderOpus_2ch));
|
||||||
|
EncodeDecodeTest(1383, tolerance, mse, delay, channel_diff_tolerance);
|
||||||
|
ReInitTest();
|
||||||
|
EXPECT_FALSE(decoder_->HasDecodePlc());
|
||||||
|
}
|
||||||
|
|
||||||
TEST(AudioDecoder, CodecSampleRateHz) {
|
TEST(AudioDecoder, CodecSampleRateHz) {
|
||||||
EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCMu));
|
EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCMu));
|
||||||
EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCMa));
|
EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCMa));
|
||||||
|
@@ -72,9 +72,9 @@ class AudioDecoder {
|
|||||||
virtual ~AudioDecoder() {}
|
virtual ~AudioDecoder() {}
|
||||||
|
|
||||||
// Decodes |encode_len| bytes from |encoded| and writes the result in
|
// Decodes |encode_len| bytes from |encoded| and writes the result in
|
||||||
// |decoded|. The number of samples produced is in the return value. If the
|
// |decoded|. The number of samples from all channels produced is in
|
||||||
// decoder produced comfort noise, |speech_type| is set to kComfortNoise,
|
// the return value. If the decoder produced comfort noise, |speech_type|
|
||||||
// otherwise it is kSpeech.
|
// is set to kComfortNoise, otherwise it is kSpeech.
|
||||||
virtual int Decode(const uint8_t* encoded, size_t encoded_len,
|
virtual int Decode(const uint8_t* encoded, size_t encoded_len,
|
||||||
int16_t* decoded, SpeechType* speech_type) = 0;
|
int16_t* decoded, SpeechType* speech_type) = 0;
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user