Cleaning up audio_decoder_test.cc and adding ResampleInputAudioFile

This CL contains some cleaning up and refactoring of
audio_decoder_test.cc. A new class ResampleInputAudioFile is created
and used in the tests.

BUG=3926
R=kwiberg@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/31779004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@7528 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
henrik.lundin@webrtc.org
2014-10-27 12:58:18 +00:00
parent 0552356fda
commit a37f1dd6b8
4 changed files with 221 additions and 114 deletions

View File

@@ -14,9 +14,9 @@
#include <stdlib.h>
#include <string>
#include <vector>
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/common_audio/resampler/include/resampler.h"
#ifdef WEBRTC_CODEC_CELT
#include "webrtc/modules/audio_coding/codecs/celt/include/celt_interface.h"
#endif
@@ -28,45 +28,92 @@
#include "webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h"
#include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h"
#include "webrtc/modules/audio_coding/codecs/pcm16b/include/pcm16b.h"
#include "webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h"
#include "webrtc/system_wrappers/interface/data_log.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/test/testsupport/fileutils.h"
namespace webrtc {
namespace {
// The absolute difference between the input and output (the first channel) is
// compared vs |tolerance|. The parameter |delay| is used to correct for codec
// delays.
void CompareInputOutput(const std::vector<int16_t>& input,
const std::vector<int16_t>& output,
size_t num_samples,
size_t channels,
int tolerance,
int delay) {
ASSERT_LE(num_samples, input.size());
ASSERT_LE(num_samples * channels, output.size());
for (unsigned int n = 0; n < num_samples - delay; ++n) {
ASSERT_NEAR(input[n], output[channels * n + delay], tolerance)
<< "Exit test on first diff; n = " << n;
DataLog::InsertCell("CodecTest", "input", input[n]);
DataLog::InsertCell("CodecTest", "output", output[channels * n]);
DataLog::NextRow("CodecTest");
}
}
// The absolute difference between the first two channels in |output| is
// compared vs |tolerance|.
void CompareTwoChannels(const std::vector<int16_t>& output,
size_t samples_per_channel,
size_t channels,
int tolerance) {
ASSERT_GE(channels, 2u);
ASSERT_LE(samples_per_channel * channels, output.size());
for (unsigned int n = 0; n < samples_per_channel; ++n)
ASSERT_NEAR(output[channels * n], output[channels * n + 1], tolerance)
<< "Stereo samples differ.";
}
// Calculates mean-squared error between input and output (the first channel).
// The parameter |delay| is used to correct for codec delays.
double MseInputOutput(const std::vector<int16_t>& input,
const std::vector<int16_t>& output,
size_t num_samples,
size_t channels,
int delay) {
assert(delay < static_cast<int>(num_samples));
assert(num_samples <= input.size());
assert(num_samples * channels <= output.size());
if (num_samples == 0)
return 0.0;
double squared_sum = 0.0;
for (unsigned int n = 0; n < num_samples - delay; ++n) {
squared_sum += (input[n] - output[channels * n + delay]) *
(input[n] - output[channels * n + delay]);
}
return squared_sum / (num_samples - delay);
}
} // namespace
class AudioDecoderTest : public ::testing::Test {
protected:
AudioDecoderTest()
: input_fp_(NULL),
input_(NULL),
: input_audio_(webrtc::test::ProjectRootPath() +
"resources/audio_coding/testfile32kHz.pcm",
32000),
codec_input_rate_hz_(32000), // Legacy default value.
encoded_(NULL),
decoded_(NULL),
frame_size_(0),
data_length_(0),
encoded_bytes_(0),
channels_(1),
output_timestamp_(0),
decoder_(NULL) {
input_file_ = webrtc::test::ProjectRootPath() +
"resources/audio_coding/testfile32kHz.pcm";
}
decoder_(NULL) {}
virtual ~AudioDecoderTest() {}
virtual void SetUp() {
if (audio_encoder_)
codec_input_rate_hz_ = audio_encoder_->sample_rate_hz();
// Create arrays.
ASSERT_GT(data_length_, 0u) << "The test must set data_length_ > 0";
input_ = new int16_t[data_length_];
// Longest encoded data is produced by PCM16b with 2 bytes per sample.
encoded_ = new uint8_t[data_length_ * 2];
decoded_ = new int16_t[data_length_ * channels_];
// Open input file.
input_fp_ = fopen(input_file_.c_str(), "rb");
ASSERT_TRUE(input_fp_ != NULL) << "Failed to open file " << input_file_;
// Read data to |input_|.
ASSERT_EQ(data_length_,
fread(input_, sizeof(int16_t), data_length_, input_fp_)) <<
"Could not read enough data from file";
// Logging to view input and output in Matlab.
// Use 'gyp -Denable_data_logging=1' to enable logging.
DataLog::CreateLog();
@@ -78,15 +125,9 @@ class AudioDecoderTest : public ::testing::Test {
virtual void TearDown() {
delete decoder_;
decoder_ = NULL;
// Close input file.
fclose(input_fp_);
// Delete arrays.
delete [] input_;
input_ = NULL;
delete [] encoded_;
encoded_ = NULL;
delete [] decoded_;
decoded_ = NULL;
// Close log.
DataLog::ReturnLog();
}
@@ -127,13 +168,23 @@ class AudioDecoderTest : public ::testing::Test {
encoded_bytes_ = 0u;
InitEncoder();
EXPECT_EQ(0, decoder_->Init());
std::vector<int16_t> input;
std::vector<int16_t> decoded;
while (processed_samples + frame_size_ <= data_length_) {
size_t enc_len = EncodeFrame(&input_[processed_samples], frame_size_,
&encoded_[encoded_bytes_]);
// Extend input vector with |frame_size_|.
input.resize(input.size() + frame_size_, 0);
// Read from input file.
ASSERT_GE(input.size() - processed_samples, frame_size_);
ASSERT_TRUE(input_audio_.Read(
frame_size_, codec_input_rate_hz_, &input[processed_samples]));
size_t enc_len = EncodeFrame(
&input[processed_samples], frame_size_, &encoded_[encoded_bytes_]);
// Make sure that frame_size_ * channels_ samples are allocated and free.
decoded.resize((processed_samples + frame_size_) * channels_, 0);
AudioDecoder::SpeechType speech_type;
size_t dec_len = decoder_->Decode(&encoded_[encoded_bytes_], enc_len,
&decoded_[processed_samples *
channels_],
size_t dec_len = decoder_->Decode(&encoded_[encoded_bytes_],
enc_len,
&decoded[processed_samples * channels_],
&speech_type);
EXPECT_EQ(frame_size_ * channels_, dec_len);
encoded_bytes_ += enc_len;
@@ -145,65 +196,36 @@ class AudioDecoderTest : public ::testing::Test {
if (expected_bytes) {
EXPECT_EQ(expected_bytes, encoded_bytes_);
}
CompareInputOutput(processed_samples, tolerance, delay);
CompareInputOutput(
input, decoded, processed_samples, channels_, tolerance, delay);
if (channels_ == 2)
CompareTwoChannels(processed_samples, channel_diff_tolerance);
EXPECT_LE(MseInputOutput(processed_samples, delay), mse);
}
// The absolute difference between the input and output (the first channel) is
// compared vs |tolerance|. The parameter |delay| is used to correct for codec
// delays.
virtual void CompareInputOutput(size_t num_samples, int tolerance,
int delay) const {
assert(num_samples <= data_length_);
for (unsigned int n = 0; n < num_samples - delay; ++n) {
ASSERT_NEAR(input_[n], decoded_[channels_ * n + delay], tolerance) <<
"Exit test on first diff; n = " << n;
DataLog::InsertCell("CodecTest", "input", input_[n]);
DataLog::InsertCell("CodecTest", "output", decoded_[channels_ * n]);
DataLog::NextRow("CodecTest");
}
}
// The absolute difference between the two channels in a stereo is compared vs
// |tolerance|.
virtual void CompareTwoChannels(size_t samples_per_channel,
int tolerance) const {
assert(samples_per_channel <= data_length_);
for (unsigned int n = 0; n < samples_per_channel; ++n)
ASSERT_NEAR(decoded_[channels_ * n], decoded_[channels_ * n + 1],
tolerance) << "Stereo samples differ.";
}
// Calculates mean-squared error between input and output (the first channel).
// The parameter |delay| is used to correct for codec delays.
virtual double MseInputOutput(size_t num_samples, int delay) const {
assert(num_samples <= data_length_);
if (num_samples == 0) return 0.0;
double squared_sum = 0.0;
for (unsigned int n = 0; n < num_samples - delay; ++n) {
squared_sum += (input_[n] - decoded_[channels_ * n + delay]) *
(input_[n] - decoded_[channels_ * n + delay]);
}
return squared_sum / (num_samples - delay);
CompareTwoChannels(
decoded, processed_samples, channels_, channel_diff_tolerance);
EXPECT_LE(
MseInputOutput(input, decoded, processed_samples, channels_, delay),
mse);
}
// Encodes a payload and decodes it twice with decoder re-init before each
// decode. Verifies that the decoded result is the same.
void ReInitTest() {
int16_t* output1 = decoded_;
int16_t* output2 = decoded_ + frame_size_;
InitEncoder();
size_t enc_len = EncodeFrame(input_, frame_size_, encoded_);
scoped_ptr<int16_t[]> input(new int16_t[frame_size_]);
ASSERT_TRUE(
input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get()));
size_t enc_len = EncodeFrame(input.get(), frame_size_, encoded_);
size_t dec_len;
AudioDecoder::SpeechType speech_type1, speech_type2;
EXPECT_EQ(0, decoder_->Init());
dec_len = decoder_->Decode(encoded_, enc_len, output1, &speech_type1);
scoped_ptr<int16_t[]> output1(new int16_t[frame_size_ * channels_]);
dec_len = decoder_->Decode(encoded_, enc_len, output1.get(), &speech_type1);
ASSERT_LE(dec_len, frame_size_ * channels_);
EXPECT_EQ(frame_size_ * channels_, dec_len);
// Re-init decoder and decode again.
EXPECT_EQ(0, decoder_->Init());
dec_len = decoder_->Decode(encoded_, enc_len, output2, &speech_type2);
scoped_ptr<int16_t[]> output2(new int16_t[frame_size_ * channels_]);
dec_len = decoder_->Decode(encoded_, enc_len, output2.get(), &speech_type2);
ASSERT_LE(dec_len, frame_size_ * channels_);
EXPECT_EQ(frame_size_ * channels_, dec_len);
for (unsigned int n = 0; n < frame_size_; ++n) {
ASSERT_EQ(output1[n], output2[n]) << "Exit test on first diff; n = " << n;
@@ -214,24 +236,26 @@ class AudioDecoderTest : public ::testing::Test {
// Call DecodePlc and verify that the correct number of samples is produced.
void DecodePlcTest() {
InitEncoder();
size_t enc_len = EncodeFrame(input_, frame_size_, encoded_);
scoped_ptr<int16_t[]> input(new int16_t[frame_size_]);
ASSERT_TRUE(
input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get()));
size_t enc_len = EncodeFrame(input.get(), frame_size_, encoded_);
AudioDecoder::SpeechType speech_type;
EXPECT_EQ(0, decoder_->Init());
scoped_ptr<int16_t[]> output(new int16_t[frame_size_ * channels_]);
size_t dec_len =
decoder_->Decode(encoded_, enc_len, decoded_, &speech_type);
decoder_->Decode(encoded_, enc_len, output.get(), &speech_type);
EXPECT_EQ(frame_size_ * channels_, dec_len);
// Call DecodePlc and verify that we get one frame of data.
// (Overwrite the output from the above Decode call, but that does not
// matter.)
dec_len = decoder_->DecodePlc(1, decoded_);
dec_len = decoder_->DecodePlc(1, output.get());
EXPECT_EQ(frame_size_ * channels_, dec_len);
}
std::string input_file_;
FILE* input_fp_;
int16_t* input_;
test::ResampleInputAudioFile input_audio_;
int codec_input_rate_hz_;
uint8_t* encoded_;
int16_t* decoded_;
size_t frame_size_;
size_t data_length_;
size_t encoded_bytes_;
@@ -268,6 +292,7 @@ class AudioDecoderPcmATest : public AudioDecoderTest {
class AudioDecoderPcm16BTest : public AudioDecoderTest {
protected:
AudioDecoderPcm16BTest() : AudioDecoderTest() {
codec_input_rate_hz_ = 8000;
frame_size_ = 160;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderPcm16B(kDecoderPCM16B);
@@ -287,6 +312,7 @@ class AudioDecoderPcm16BTest : public AudioDecoderTest {
class AudioDecoderIlbcTest : public AudioDecoderTest {
protected:
AudioDecoderIlbcTest() : AudioDecoderTest() {
codec_input_rate_hz_ = 8000;
frame_size_ = 240;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderIlbc;
@@ -316,14 +342,18 @@ class AudioDecoderIlbcTest : public AudioDecoderTest {
// not return any data. It simply resets a few states and returns 0.
void DecodePlcTest() {
InitEncoder();
size_t enc_len = EncodeFrame(input_, frame_size_, encoded_);
scoped_ptr<int16_t[]> input(new int16_t[frame_size_]);
ASSERT_TRUE(
input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get()));
size_t enc_len = EncodeFrame(input.get(), frame_size_, encoded_);
AudioDecoder::SpeechType speech_type;
EXPECT_EQ(0, decoder_->Init());
scoped_ptr<int16_t[]> output(new int16_t[frame_size_ * channels_]);
size_t dec_len =
decoder_->Decode(encoded_, enc_len, decoded_, &speech_type);
decoder_->Decode(encoded_, enc_len, output.get(), &speech_type);
EXPECT_EQ(frame_size_, dec_len);
// Simply call DecodePlc and verify that we get 0 as return value.
EXPECT_EQ(0, decoder_->DecodePlc(1, decoded_));
EXPECT_EQ(0, decoder_->DecodePlc(1, output.get()));
}
iLBC_encinst_t* encoder_;
@@ -332,6 +362,7 @@ class AudioDecoderIlbcTest : public AudioDecoderTest {
class AudioDecoderIsacFloatTest : public AudioDecoderTest {
protected:
AudioDecoderIsacFloatTest() : AudioDecoderTest() {
codec_input_rate_hz_ = 16000;
input_size_ = 160;
frame_size_ = 480;
data_length_ = 10 * frame_size_;
@@ -369,6 +400,7 @@ class AudioDecoderIsacFloatTest : public AudioDecoderTest {
class AudioDecoderIsacSwbTest : public AudioDecoderTest {
protected:
AudioDecoderIsacSwbTest() : AudioDecoderTest() {
codec_input_rate_hz_ = 32000;
input_size_ = 320;
frame_size_ = 960;
data_length_ = 10 * frame_size_;
@@ -419,6 +451,7 @@ class AudioDecoderIsacFbTest : public AudioDecoderIsacSwbTest {
class AudioDecoderIsacFixTest : public AudioDecoderTest {
protected:
AudioDecoderIsacFixTest() : AudioDecoderTest() {
codec_input_rate_hz_ = 16000;
input_size_ = 160;
frame_size_ = 480;
data_length_ = 10 * frame_size_;
@@ -456,6 +489,7 @@ class AudioDecoderIsacFixTest : public AudioDecoderTest {
class AudioDecoderG722Test : public AudioDecoderTest {
protected:
AudioDecoderG722Test() : AudioDecoderTest() {
codec_input_rate_hz_ = 16000;
frame_size_ = 160;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderG722;
@@ -598,6 +632,7 @@ class AudioDecoderCeltStereoTest : public AudioDecoderTest {
class AudioDecoderOpusTest : public AudioDecoderTest {
protected:
AudioDecoderOpusTest() : AudioDecoderTest() {
codec_input_rate_hz_ = 48000;
frame_size_ = 480;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderOpus(kDecoderOpus);
@@ -609,29 +644,6 @@ class AudioDecoderOpusTest : public AudioDecoderTest {
WebRtcOpus_EncoderFree(encoder_);
}
virtual void SetUp() OVERRIDE {
AudioDecoderTest::SetUp();
// Upsample from 32 to 48 kHz.
// Because Opus is 48 kHz codec but the input file is 32 kHz, so the data
// read in |AudioDecoderTest::SetUp| has to be upsampled.
// |AudioDecoderTest::SetUp| has read |data_length_| samples, which is more
// than necessary after upsampling, so the end of audio that has been read
// is unused and the end of the buffer is overwritten by the resampled data.
Resampler rs;
rs.Reset(32000, 48000, kResamplerSynchronous);
const int before_resamp_len_samples = static_cast<int>(data_length_) * 2
/ 3;
int16_t* before_resamp_input = new int16_t[before_resamp_len_samples];
memcpy(before_resamp_input, input_,
sizeof(int16_t) * before_resamp_len_samples);
int resamp_len_samples;
EXPECT_EQ(0, rs.Push(before_resamp_input, before_resamp_len_samples,
input_, static_cast<int>(data_length_),
resamp_len_samples));
EXPECT_EQ(static_cast<int>(data_length_), resamp_len_samples);
delete[] before_resamp_input;
}
virtual void InitEncoder() {}
virtual int EncodeFrame(const int16_t* input, size_t input_len_samples,
@@ -661,15 +673,18 @@ class AudioDecoderOpusStereoTest : public AudioDecoderOpusTest {
uint8_t* output) OVERRIDE {
// Create stereo by duplicating each sample in |input|.
const int input_stereo_samples = static_cast<int>(input_len_samples) * 2;
int16_t* input_stereo = new int16_t[input_stereo_samples];
for (size_t i = 0; i < input_len_samples; i++)
input_stereo[i * 2] = input_stereo[i * 2 + 1] = input[i];
scoped_ptr<int16_t[]> input_stereo(new int16_t[input_stereo_samples]);
test::InputAudioFile::DuplicateInterleaved(
input, input_len_samples, 2, input_stereo.get());
int enc_len_bytes = WebRtcOpus_Encode(
encoder_, input_stereo, static_cast<int16_t>(input_len_samples),
static_cast<int16_t>(data_length_), output);
// Note that the input length is given as samples per channel.
int enc_len_bytes =
WebRtcOpus_Encode(encoder_,
input_stereo.get(),
static_cast<int16_t>(input_len_samples),
static_cast<int16_t>(data_length_),
output);
EXPECT_GT(enc_len_bytes, 0);
delete[] input_stereo;
return enc_len_bytes;
}
};

View File

@@ -155,6 +155,14 @@
'audio_decoder_unittest.cc',
'audio_decoder.cc',
'interface/audio_decoder.h',
# The files below are from the neteq_unittest_tools target, but that
# target depends (through long dependency chains) on the neteq
# target, which creates a conflict with the audio_decoder_impl.*
# files.
'tools/input_audio_file.cc',
'tools/input_audio_file.h',
'tools/resample_input_audio_file.cc',
'tools/resample_input_audio_file.h',
],
'conditions': [
['OS=="android"', {
@@ -193,6 +201,8 @@
'tools/packet.cc',
'tools/packet.h',
'tools/packet_source.h',
'tools/resample_input_audio_file.cc',
'tools/resample_input_audio_file.h',
'tools/rtp_file_source.cc',
'tools/rtp_file_source.h',
'tools/rtp_generator.cc',

View File

@@ -0,0 +1,42 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h"
#include "webrtc/base/checks.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
namespace webrtc {
namespace test {
bool ResampleInputAudioFile::Read(size_t samples,
int output_rate_hz,
int16_t* destination) {
const size_t samples_to_read = samples * file_rate_hz_ / output_rate_hz;
CHECK_EQ(samples_to_read * output_rate_hz, samples * file_rate_hz_)
<< "Frame size and sample rates don't add up to an integer.";
scoped_ptr<int16_t[]> temp_destination(new int16_t[samples_to_read]);
if (!InputAudioFile::Read(samples_to_read, temp_destination.get()))
return false;
resampler_.ResetIfNeeded(
file_rate_hz_, output_rate_hz, kResamplerSynchronous);
int output_length = 0;
CHECK_EQ(resampler_.Push(temp_destination.get(),
static_cast<int>(samples_to_read),
destination,
static_cast<int>(samples),
output_length),
0);
CHECK_EQ(static_cast<int>(samples), output_length);
return true;
}
} // namespace test
} // namespace webrtc

View File

@@ -0,0 +1,40 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_TOOLS_RESAMPLE_INPUT_AUDIO_FILE_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_TOOLS_RESAMPLE_INPUT_AUDIO_FILE_H_
#include <string>
#include "webrtc/base/constructormagic.h"
#include "webrtc/common_audio/resampler/include/resampler.h"
#include "webrtc/modules/audio_coding/neteq/tools/input_audio_file.h"
#include "webrtc/typedefs.h"
namespace webrtc {
namespace test {
// Class for handling a looping input audio file with resampling.
class ResampleInputAudioFile : public InputAudioFile {
public:
ResampleInputAudioFile(const std::string file_name, int file_rate_hz)
: InputAudioFile(file_name), file_rate_hz_(file_rate_hz) {}
bool Read(size_t samples, int output_rate_hz, int16_t* destination);
private:
const int file_rate_hz_;
Resampler resampler_;
DISALLOW_COPY_AND_ASSIGN(ResampleInputAudioFile);
};
} // namespace test
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_TOOLS_RESAMPLE_INPUT_AUDIO_FILE_H_