From a37f1dd6b8b4d4f4c5fb07af84359fdab4e00e3c Mon Sep 17 00:00:00 2001 From: "henrik.lundin@webrtc.org" Date: Mon, 27 Oct 2014 12:58:18 +0000 Subject: [PATCH] Cleaning up audio_decoder_test.cc and adding ResampleInputAudioFile This CL contains some cleaning up and refactoring of audio_decoder_test.cc. A new class ResampleInputAudioFile is created and used in the tests. BUG=3926 R=kwiberg@webrtc.org Review URL: https://webrtc-codereview.appspot.com/31779004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7528 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../neteq/audio_decoder_unittest.cc | 243 ++++++++++-------- webrtc/modules/audio_coding/neteq/neteq.gypi | 10 + .../neteq/tools/resample_input_audio_file.cc | 42 +++ .../neteq/tools/resample_input_audio_file.h | 40 +++ 4 files changed, 221 insertions(+), 114 deletions(-) create mode 100644 webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.cc create mode 100644 webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h diff --git a/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc b/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc index fdb7ac315..3a5a13ff7 100644 --- a/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc +++ b/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc @@ -14,9 +14,9 @@ #include #include +#include #include "testing/gtest/include/gtest/gtest.h" -#include "webrtc/common_audio/resampler/include/resampler.h" #ifdef WEBRTC_CODEC_CELT #include "webrtc/modules/audio_coding/codecs/celt/include/celt_interface.h" #endif @@ -28,45 +28,92 @@ #include "webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h" #include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h" #include "webrtc/modules/audio_coding/codecs/pcm16b/include/pcm16b.h" +#include "webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h" #include "webrtc/system_wrappers/interface/data_log.h" #include "webrtc/system_wrappers/interface/scoped_ptr.h" #include "webrtc/test/testsupport/fileutils.h" namespace webrtc { +namespace { +// The absolute difference between the input and output (the first channel) is +// compared vs |tolerance|. The parameter |delay| is used to correct for codec +// delays. +void CompareInputOutput(const std::vector& input, + const std::vector& output, + size_t num_samples, + size_t channels, + int tolerance, + int delay) { + ASSERT_LE(num_samples, input.size()); + ASSERT_LE(num_samples * channels, output.size()); + for (unsigned int n = 0; n < num_samples - delay; ++n) { + ASSERT_NEAR(input[n], output[channels * n + delay], tolerance) + << "Exit test on first diff; n = " << n; + DataLog::InsertCell("CodecTest", "input", input[n]); + DataLog::InsertCell("CodecTest", "output", output[channels * n]); + DataLog::NextRow("CodecTest"); + } +} + +// The absolute difference between the first two channels in |output| is +// compared vs |tolerance|. +void CompareTwoChannels(const std::vector& output, + size_t samples_per_channel, + size_t channels, + int tolerance) { + ASSERT_GE(channels, 2u); + ASSERT_LE(samples_per_channel * channels, output.size()); + for (unsigned int n = 0; n < samples_per_channel; ++n) + ASSERT_NEAR(output[channels * n], output[channels * n + 1], tolerance) + << "Stereo samples differ."; +} + +// Calculates mean-squared error between input and output (the first channel). +// The parameter |delay| is used to correct for codec delays. +double MseInputOutput(const std::vector& input, + const std::vector& output, + size_t num_samples, + size_t channels, + int delay) { + assert(delay < static_cast(num_samples)); + assert(num_samples <= input.size()); + assert(num_samples * channels <= output.size()); + if (num_samples == 0) + return 0.0; + double squared_sum = 0.0; + for (unsigned int n = 0; n < num_samples - delay; ++n) { + squared_sum += (input[n] - output[channels * n + delay]) * + (input[n] - output[channels * n + delay]); + } + return squared_sum / (num_samples - delay); +} +} // namespace + class AudioDecoderTest : public ::testing::Test { protected: AudioDecoderTest() - : input_fp_(NULL), - input_(NULL), + : input_audio_(webrtc::test::ProjectRootPath() + + "resources/audio_coding/testfile32kHz.pcm", + 32000), + codec_input_rate_hz_(32000), // Legacy default value. encoded_(NULL), - decoded_(NULL), frame_size_(0), data_length_(0), encoded_bytes_(0), channels_(1), output_timestamp_(0), - decoder_(NULL) { - input_file_ = webrtc::test::ProjectRootPath() + - "resources/audio_coding/testfile32kHz.pcm"; - } + decoder_(NULL) {} virtual ~AudioDecoderTest() {} virtual void SetUp() { + if (audio_encoder_) + codec_input_rate_hz_ = audio_encoder_->sample_rate_hz(); // Create arrays. ASSERT_GT(data_length_, 0u) << "The test must set data_length_ > 0"; - input_ = new int16_t[data_length_]; // Longest encoded data is produced by PCM16b with 2 bytes per sample. encoded_ = new uint8_t[data_length_ * 2]; - decoded_ = new int16_t[data_length_ * channels_]; - // Open input file. - input_fp_ = fopen(input_file_.c_str(), "rb"); - ASSERT_TRUE(input_fp_ != NULL) << "Failed to open file " << input_file_; - // Read data to |input_|. - ASSERT_EQ(data_length_, - fread(input_, sizeof(int16_t), data_length_, input_fp_)) << - "Could not read enough data from file"; // Logging to view input and output in Matlab. // Use 'gyp -Denable_data_logging=1' to enable logging. DataLog::CreateLog(); @@ -78,15 +125,9 @@ class AudioDecoderTest : public ::testing::Test { virtual void TearDown() { delete decoder_; decoder_ = NULL; - // Close input file. - fclose(input_fp_); // Delete arrays. - delete [] input_; - input_ = NULL; delete [] encoded_; encoded_ = NULL; - delete [] decoded_; - decoded_ = NULL; // Close log. DataLog::ReturnLog(); } @@ -127,13 +168,23 @@ class AudioDecoderTest : public ::testing::Test { encoded_bytes_ = 0u; InitEncoder(); EXPECT_EQ(0, decoder_->Init()); + std::vector input; + std::vector decoded; while (processed_samples + frame_size_ <= data_length_) { - size_t enc_len = EncodeFrame(&input_[processed_samples], frame_size_, - &encoded_[encoded_bytes_]); + // Extend input vector with |frame_size_|. + input.resize(input.size() + frame_size_, 0); + // Read from input file. + ASSERT_GE(input.size() - processed_samples, frame_size_); + ASSERT_TRUE(input_audio_.Read( + frame_size_, codec_input_rate_hz_, &input[processed_samples])); + size_t enc_len = EncodeFrame( + &input[processed_samples], frame_size_, &encoded_[encoded_bytes_]); + // Make sure that frame_size_ * channels_ samples are allocated and free. + decoded.resize((processed_samples + frame_size_) * channels_, 0); AudioDecoder::SpeechType speech_type; - size_t dec_len = decoder_->Decode(&encoded_[encoded_bytes_], enc_len, - &decoded_[processed_samples * - channels_], + size_t dec_len = decoder_->Decode(&encoded_[encoded_bytes_], + enc_len, + &decoded[processed_samples * channels_], &speech_type); EXPECT_EQ(frame_size_ * channels_, dec_len); encoded_bytes_ += enc_len; @@ -145,65 +196,36 @@ class AudioDecoderTest : public ::testing::Test { if (expected_bytes) { EXPECT_EQ(expected_bytes, encoded_bytes_); } - CompareInputOutput(processed_samples, tolerance, delay); + CompareInputOutput( + input, decoded, processed_samples, channels_, tolerance, delay); if (channels_ == 2) - CompareTwoChannels(processed_samples, channel_diff_tolerance); - EXPECT_LE(MseInputOutput(processed_samples, delay), mse); - } - - // The absolute difference between the input and output (the first channel) is - // compared vs |tolerance|. The parameter |delay| is used to correct for codec - // delays. - virtual void CompareInputOutput(size_t num_samples, int tolerance, - int delay) const { - assert(num_samples <= data_length_); - for (unsigned int n = 0; n < num_samples - delay; ++n) { - ASSERT_NEAR(input_[n], decoded_[channels_ * n + delay], tolerance) << - "Exit test on first diff; n = " << n; - DataLog::InsertCell("CodecTest", "input", input_[n]); - DataLog::InsertCell("CodecTest", "output", decoded_[channels_ * n]); - DataLog::NextRow("CodecTest"); - } - } - - // The absolute difference between the two channels in a stereo is compared vs - // |tolerance|. - virtual void CompareTwoChannels(size_t samples_per_channel, - int tolerance) const { - assert(samples_per_channel <= data_length_); - for (unsigned int n = 0; n < samples_per_channel; ++n) - ASSERT_NEAR(decoded_[channels_ * n], decoded_[channels_ * n + 1], - tolerance) << "Stereo samples differ."; - } - - // Calculates mean-squared error between input and output (the first channel). - // The parameter |delay| is used to correct for codec delays. - virtual double MseInputOutput(size_t num_samples, int delay) const { - assert(num_samples <= data_length_); - if (num_samples == 0) return 0.0; - double squared_sum = 0.0; - for (unsigned int n = 0; n < num_samples - delay; ++n) { - squared_sum += (input_[n] - decoded_[channels_ * n + delay]) * - (input_[n] - decoded_[channels_ * n + delay]); - } - return squared_sum / (num_samples - delay); + CompareTwoChannels( + decoded, processed_samples, channels_, channel_diff_tolerance); + EXPECT_LE( + MseInputOutput(input, decoded, processed_samples, channels_, delay), + mse); } // Encodes a payload and decodes it twice with decoder re-init before each // decode. Verifies that the decoded result is the same. void ReInitTest() { - int16_t* output1 = decoded_; - int16_t* output2 = decoded_ + frame_size_; InitEncoder(); - size_t enc_len = EncodeFrame(input_, frame_size_, encoded_); + scoped_ptr input(new int16_t[frame_size_]); + ASSERT_TRUE( + input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get())); + size_t enc_len = EncodeFrame(input.get(), frame_size_, encoded_); size_t dec_len; AudioDecoder::SpeechType speech_type1, speech_type2; EXPECT_EQ(0, decoder_->Init()); - dec_len = decoder_->Decode(encoded_, enc_len, output1, &speech_type1); + scoped_ptr output1(new int16_t[frame_size_ * channels_]); + dec_len = decoder_->Decode(encoded_, enc_len, output1.get(), &speech_type1); + ASSERT_LE(dec_len, frame_size_ * channels_); EXPECT_EQ(frame_size_ * channels_, dec_len); // Re-init decoder and decode again. EXPECT_EQ(0, decoder_->Init()); - dec_len = decoder_->Decode(encoded_, enc_len, output2, &speech_type2); + scoped_ptr output2(new int16_t[frame_size_ * channels_]); + dec_len = decoder_->Decode(encoded_, enc_len, output2.get(), &speech_type2); + ASSERT_LE(dec_len, frame_size_ * channels_); EXPECT_EQ(frame_size_ * channels_, dec_len); for (unsigned int n = 0; n < frame_size_; ++n) { ASSERT_EQ(output1[n], output2[n]) << "Exit test on first diff; n = " << n; @@ -214,24 +236,26 @@ class AudioDecoderTest : public ::testing::Test { // Call DecodePlc and verify that the correct number of samples is produced. void DecodePlcTest() { InitEncoder(); - size_t enc_len = EncodeFrame(input_, frame_size_, encoded_); + scoped_ptr input(new int16_t[frame_size_]); + ASSERT_TRUE( + input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get())); + size_t enc_len = EncodeFrame(input.get(), frame_size_, encoded_); AudioDecoder::SpeechType speech_type; EXPECT_EQ(0, decoder_->Init()); + scoped_ptr output(new int16_t[frame_size_ * channels_]); size_t dec_len = - decoder_->Decode(encoded_, enc_len, decoded_, &speech_type); + decoder_->Decode(encoded_, enc_len, output.get(), &speech_type); EXPECT_EQ(frame_size_ * channels_, dec_len); // Call DecodePlc and verify that we get one frame of data. // (Overwrite the output from the above Decode call, but that does not // matter.) - dec_len = decoder_->DecodePlc(1, decoded_); + dec_len = decoder_->DecodePlc(1, output.get()); EXPECT_EQ(frame_size_ * channels_, dec_len); } - std::string input_file_; - FILE* input_fp_; - int16_t* input_; + test::ResampleInputAudioFile input_audio_; + int codec_input_rate_hz_; uint8_t* encoded_; - int16_t* decoded_; size_t frame_size_; size_t data_length_; size_t encoded_bytes_; @@ -268,6 +292,7 @@ class AudioDecoderPcmATest : public AudioDecoderTest { class AudioDecoderPcm16BTest : public AudioDecoderTest { protected: AudioDecoderPcm16BTest() : AudioDecoderTest() { + codec_input_rate_hz_ = 8000; frame_size_ = 160; data_length_ = 10 * frame_size_; decoder_ = new AudioDecoderPcm16B(kDecoderPCM16B); @@ -287,6 +312,7 @@ class AudioDecoderPcm16BTest : public AudioDecoderTest { class AudioDecoderIlbcTest : public AudioDecoderTest { protected: AudioDecoderIlbcTest() : AudioDecoderTest() { + codec_input_rate_hz_ = 8000; frame_size_ = 240; data_length_ = 10 * frame_size_; decoder_ = new AudioDecoderIlbc; @@ -316,14 +342,18 @@ class AudioDecoderIlbcTest : public AudioDecoderTest { // not return any data. It simply resets a few states and returns 0. void DecodePlcTest() { InitEncoder(); - size_t enc_len = EncodeFrame(input_, frame_size_, encoded_); + scoped_ptr input(new int16_t[frame_size_]); + ASSERT_TRUE( + input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get())); + size_t enc_len = EncodeFrame(input.get(), frame_size_, encoded_); AudioDecoder::SpeechType speech_type; EXPECT_EQ(0, decoder_->Init()); + scoped_ptr output(new int16_t[frame_size_ * channels_]); size_t dec_len = - decoder_->Decode(encoded_, enc_len, decoded_, &speech_type); + decoder_->Decode(encoded_, enc_len, output.get(), &speech_type); EXPECT_EQ(frame_size_, dec_len); // Simply call DecodePlc and verify that we get 0 as return value. - EXPECT_EQ(0, decoder_->DecodePlc(1, decoded_)); + EXPECT_EQ(0, decoder_->DecodePlc(1, output.get())); } iLBC_encinst_t* encoder_; @@ -332,6 +362,7 @@ class AudioDecoderIlbcTest : public AudioDecoderTest { class AudioDecoderIsacFloatTest : public AudioDecoderTest { protected: AudioDecoderIsacFloatTest() : AudioDecoderTest() { + codec_input_rate_hz_ = 16000; input_size_ = 160; frame_size_ = 480; data_length_ = 10 * frame_size_; @@ -369,6 +400,7 @@ class AudioDecoderIsacFloatTest : public AudioDecoderTest { class AudioDecoderIsacSwbTest : public AudioDecoderTest { protected: AudioDecoderIsacSwbTest() : AudioDecoderTest() { + codec_input_rate_hz_ = 32000; input_size_ = 320; frame_size_ = 960; data_length_ = 10 * frame_size_; @@ -419,6 +451,7 @@ class AudioDecoderIsacFbTest : public AudioDecoderIsacSwbTest { class AudioDecoderIsacFixTest : public AudioDecoderTest { protected: AudioDecoderIsacFixTest() : AudioDecoderTest() { + codec_input_rate_hz_ = 16000; input_size_ = 160; frame_size_ = 480; data_length_ = 10 * frame_size_; @@ -456,6 +489,7 @@ class AudioDecoderIsacFixTest : public AudioDecoderTest { class AudioDecoderG722Test : public AudioDecoderTest { protected: AudioDecoderG722Test() : AudioDecoderTest() { + codec_input_rate_hz_ = 16000; frame_size_ = 160; data_length_ = 10 * frame_size_; decoder_ = new AudioDecoderG722; @@ -598,6 +632,7 @@ class AudioDecoderCeltStereoTest : public AudioDecoderTest { class AudioDecoderOpusTest : public AudioDecoderTest { protected: AudioDecoderOpusTest() : AudioDecoderTest() { + codec_input_rate_hz_ = 48000; frame_size_ = 480; data_length_ = 10 * frame_size_; decoder_ = new AudioDecoderOpus(kDecoderOpus); @@ -609,29 +644,6 @@ class AudioDecoderOpusTest : public AudioDecoderTest { WebRtcOpus_EncoderFree(encoder_); } - virtual void SetUp() OVERRIDE { - AudioDecoderTest::SetUp(); - // Upsample from 32 to 48 kHz. - // Because Opus is 48 kHz codec but the input file is 32 kHz, so the data - // read in |AudioDecoderTest::SetUp| has to be upsampled. - // |AudioDecoderTest::SetUp| has read |data_length_| samples, which is more - // than necessary after upsampling, so the end of audio that has been read - // is unused and the end of the buffer is overwritten by the resampled data. - Resampler rs; - rs.Reset(32000, 48000, kResamplerSynchronous); - const int before_resamp_len_samples = static_cast(data_length_) * 2 - / 3; - int16_t* before_resamp_input = new int16_t[before_resamp_len_samples]; - memcpy(before_resamp_input, input_, - sizeof(int16_t) * before_resamp_len_samples); - int resamp_len_samples; - EXPECT_EQ(0, rs.Push(before_resamp_input, before_resamp_len_samples, - input_, static_cast(data_length_), - resamp_len_samples)); - EXPECT_EQ(static_cast(data_length_), resamp_len_samples); - delete[] before_resamp_input; - } - virtual void InitEncoder() {} virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, @@ -661,15 +673,18 @@ class AudioDecoderOpusStereoTest : public AudioDecoderOpusTest { uint8_t* output) OVERRIDE { // Create stereo by duplicating each sample in |input|. const int input_stereo_samples = static_cast(input_len_samples) * 2; - int16_t* input_stereo = new int16_t[input_stereo_samples]; - for (size_t i = 0; i < input_len_samples; i++) - input_stereo[i * 2] = input_stereo[i * 2 + 1] = input[i]; + scoped_ptr input_stereo(new int16_t[input_stereo_samples]); + test::InputAudioFile::DuplicateInterleaved( + input, input_len_samples, 2, input_stereo.get()); - int enc_len_bytes = WebRtcOpus_Encode( - encoder_, input_stereo, static_cast(input_len_samples), - static_cast(data_length_), output); + // Note that the input length is given as samples per channel. + int enc_len_bytes = + WebRtcOpus_Encode(encoder_, + input_stereo.get(), + static_cast(input_len_samples), + static_cast(data_length_), + output); EXPECT_GT(enc_len_bytes, 0); - delete[] input_stereo; return enc_len_bytes; } }; diff --git a/webrtc/modules/audio_coding/neteq/neteq.gypi b/webrtc/modules/audio_coding/neteq/neteq.gypi index 0901615af..0d1d9ba9c 100644 --- a/webrtc/modules/audio_coding/neteq/neteq.gypi +++ b/webrtc/modules/audio_coding/neteq/neteq.gypi @@ -155,6 +155,14 @@ 'audio_decoder_unittest.cc', 'audio_decoder.cc', 'interface/audio_decoder.h', + # The files below are from the neteq_unittest_tools target, but that + # target depends (through long dependency chains) on the neteq + # target, which creates a conflict with the audio_decoder_impl.* + # files. + 'tools/input_audio_file.cc', + 'tools/input_audio_file.h', + 'tools/resample_input_audio_file.cc', + 'tools/resample_input_audio_file.h', ], 'conditions': [ ['OS=="android"', { @@ -193,6 +201,8 @@ 'tools/packet.cc', 'tools/packet.h', 'tools/packet_source.h', + 'tools/resample_input_audio_file.cc', + 'tools/resample_input_audio_file.h', 'tools/rtp_file_source.cc', 'tools/rtp_file_source.h', 'tools/rtp_generator.cc', diff --git a/webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.cc b/webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.cc new file mode 100644 index 000000000..f391466cc --- /dev/null +++ b/webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.cc @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h" + +#include "webrtc/base/checks.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { +namespace test { + +bool ResampleInputAudioFile::Read(size_t samples, + int output_rate_hz, + int16_t* destination) { + const size_t samples_to_read = samples * file_rate_hz_ / output_rate_hz; + CHECK_EQ(samples_to_read * output_rate_hz, samples * file_rate_hz_) + << "Frame size and sample rates don't add up to an integer."; + scoped_ptr temp_destination(new int16_t[samples_to_read]); + if (!InputAudioFile::Read(samples_to_read, temp_destination.get())) + return false; + resampler_.ResetIfNeeded( + file_rate_hz_, output_rate_hz, kResamplerSynchronous); + int output_length = 0; + CHECK_EQ(resampler_.Push(temp_destination.get(), + static_cast(samples_to_read), + destination, + static_cast(samples), + output_length), + 0); + CHECK_EQ(static_cast(samples), output_length); + return true; +} + +} // namespace test +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h b/webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h new file mode 100644 index 000000000..8c028005c --- /dev/null +++ b/webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_TOOLS_RESAMPLE_INPUT_AUDIO_FILE_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_TOOLS_RESAMPLE_INPUT_AUDIO_FILE_H_ + +#include + +#include "webrtc/base/constructormagic.h" +#include "webrtc/common_audio/resampler/include/resampler.h" +#include "webrtc/modules/audio_coding/neteq/tools/input_audio_file.h" +#include "webrtc/typedefs.h" + +namespace webrtc { +namespace test { + +// Class for handling a looping input audio file with resampling. +class ResampleInputAudioFile : public InputAudioFile { + public: + ResampleInputAudioFile(const std::string file_name, int file_rate_hz) + : InputAudioFile(file_name), file_rate_hz_(file_rate_hz) {} + + bool Read(size_t samples, int output_rate_hz, int16_t* destination); + + private: + const int file_rate_hz_; + Resampler resampler_; + DISALLOW_COPY_AND_ASSIGN(ResampleInputAudioFile); +}; + +} // namespace test +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_TOOLS_RESAMPLE_INPUT_AUDIO_FILE_H_