Consolidate audio conversion from Channel and TransmitMixer.

Replace the two versions with a single DownConvertToCodecFormat. As
mentioned in comments, this could be further consolidated with
RemixAndResample but we should write a full audio converter class in
that case.

Along the way:
- Fix the bug present in Channel::Demultiplex with mono input and a
stereo codec.
- Remove the 32 kHz max from the OnDataAvailable path. This avoids a
48 -> 32 -> 48 conversion when VoE is passed 48 kHz audio; instead we
get a straight pass-through to ACM. The 32 kHz conversion is still
needed in the RecordedDataIsAvailable path until APM natively supports
48 kHz.
- Merge resampler improvements from ACM1 to ACM2. This allows ACM to
handle 44.1 kHz audio passed to VoE and was originally done here:
https://webrtc-codereview.appspot.com/1590004
- Reuse the RemixAndResample unit tests for DownConvertToCodecFormat.
- Remove unused functions from utility.cc.

BUG=3155,3000,b/12867572
TESTED=voe_cmd_test using both the OnDataAvailable and
RecordedDataIsAvailable paths, with a captured audio format of all
combinations of {44.1,48} kHz and {1,2} channels, running through all
codecs, and finally using both ACM1 and ACM2.

R=henrika@webrtc.org, turaj@webrtc.org, xians@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/11019005

git-svn-id: http://webrtc.googlecode.com/svn/trunk@5843 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
andrew@webrtc.org
2014-04-03 21:56:01 +00:00
parent cca888a5bf
commit 40ee3d07ed
18 changed files with 1663 additions and 1791 deletions

View File

@@ -13,20 +13,15 @@
#include <string.h> #include <string.h>
#include "webrtc/common_audio/resampler/include/resampler.h" #include "webrtc/common_audio/resampler/include/resampler.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" #include "webrtc/system_wrappers/interface/logging.h"
#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
#include "webrtc/system_wrappers/interface/trace.h"
namespace webrtc { namespace webrtc {
namespace acm2 { namespace acm2 {
ACMResampler::ACMResampler() ACMResampler::ACMResampler() {
: resampler_crit_sect_(CriticalSectionWrapper::CreateCriticalSection()) {
} }
ACMResampler::~ACMResampler() { ACMResampler::~ACMResampler() {
delete resampler_crit_sect_;
} }
int ACMResampler::Resample10Msec(const int16_t* in_audio, int ACMResampler::Resample10Msec(const int16_t* in_audio,
@@ -34,37 +29,28 @@ int ACMResampler::Resample10Msec(const int16_t* in_audio,
int out_freq_hz, int out_freq_hz,
int num_audio_channels, int num_audio_channels,
int16_t* out_audio) { int16_t* out_audio) {
CriticalSectionScoped cs(resampler_crit_sect_); int in_length = in_freq_hz * num_audio_channels / 100;
int out_length = out_freq_hz * num_audio_channels / 100;
if (in_freq_hz == out_freq_hz) { if (in_freq_hz == out_freq_hz) {
size_t length = static_cast<size_t>(in_freq_hz * num_audio_channels / 100); memcpy(out_audio, in_audio, in_length * sizeof(int16_t));
memcpy(out_audio, in_audio, length * sizeof(int16_t)); return in_length / num_audio_channels;
return static_cast<int16_t>(in_freq_hz / 100);
} }
// |maxLen| is maximum number of samples for 10ms at 48kHz. if (resampler_.InitializeIfNeeded(in_freq_hz, out_freq_hz,
int max_len = 480 * num_audio_channels; num_audio_channels) != 0) {
int length_in = (in_freq_hz / 100) * num_audio_channels; LOG_FERR3(LS_ERROR, InitializeIfNeeded, in_freq_hz, out_freq_hz,
int out_len; num_audio_channels);
ResamplerType type = (num_audio_channels == 1) ? kResamplerSynchronous :
kResamplerSynchronousStereo;
if (resampler_.ResetIfNeeded(in_freq_hz, out_freq_hz, type) < 0) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, 0,
"Error in reset of resampler");
return -1; return -1;
} }
if (resampler_.Push(in_audio, length_in, out_audio, max_len, out_len) < 0) { out_length = resampler_.Resample(in_audio, in_length, out_audio, out_length);
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, 0, if (out_length == -1) {
"Error in resampler: resampler.Push"); LOG_FERR4(LS_ERROR, Resample, in_audio, in_length, out_audio, out_length);
return -1; return -1;
} }
return out_len / num_audio_channels; return out_length / num_audio_channels;
} }
} // namespace acm2 } // namespace acm2
} // namespace webrtc } // namespace webrtc

View File

@@ -11,13 +11,10 @@
#ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RESAMPLER_H_ #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RESAMPLER_H_
#define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RESAMPLER_H_ #define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RESAMPLER_H_
#include "webrtc/common_audio/resampler/include/resampler.h" #include "webrtc/common_audio/resampler/include/push_resampler.h"
#include "webrtc/typedefs.h" #include "webrtc/typedefs.h"
namespace webrtc { namespace webrtc {
class CriticalSectionWrapper;
namespace acm2 { namespace acm2 {
class ACMResampler { class ACMResampler {
@@ -32,13 +29,10 @@ class ACMResampler {
int16_t* out_audio); int16_t* out_audio);
private: private:
// Use the Resampler class. PushResampler resampler_;
Resampler resampler_;
CriticalSectionWrapper* resampler_crit_sect_;
}; };
} // namespace acm2 } // namespace acm2
} // namespace webrtc } // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RESAMPLER_H_ #endif // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RESAMPLER_H_

View File

@@ -1205,11 +1205,7 @@ int AudioCodingModuleImpl::Add10MsData(
return -1; return -1;
} }
// Allow for 8, 16, 32 and 48kHz input audio. if (audio_frame.sample_rate_hz_ > 48000) {
if ((audio_frame.sample_rate_hz_ != 8000)
&& (audio_frame.sample_rate_hz_ != 16000)
&& (audio_frame.sample_rate_hz_ != 32000)
&& (audio_frame.sample_rate_hz_ != 48000)) {
assert(false); assert(false);
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
"Cannot Add 10 ms audio, input frequency not valid"); "Cannot Add 10 ms audio, input frequency not valid");
@@ -1371,7 +1367,7 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
if (preprocess_frame_.samples_per_channel_ < 0) { if (preprocess_frame_.samples_per_channel_ < 0) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
"Cannot add 10 ms audio, resmapling failed"); "Cannot add 10 ms audio, resampling failed");
return -1; return -1;
} }
preprocess_frame_.sample_rate_hz_ = send_codec_inst_.plfreq; preprocess_frame_.sample_rate_hz_ = send_codec_inst_.plfreq;

View File

@@ -1273,11 +1273,7 @@ int32_t AudioCodingModuleImpl::Add10MsData(
return -1; return -1;
} }
// Allow for 8, 16, 32 and 48kHz input audio. if (audio_frame.sample_rate_hz_ > 48000) {
if ((audio_frame.sample_rate_hz_ != 8000)
&& (audio_frame.sample_rate_hz_ != 16000)
&& (audio_frame.sample_rate_hz_ != 32000)
&& (audio_frame.sample_rate_hz_ != 48000)) {
assert(false); assert(false);
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
"Cannot Add 10 ms audio, input frequency not valid"); "Cannot Add 10 ms audio, input frequency not valid");
@@ -1444,7 +1440,7 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
if (preprocess_frame_.samples_per_channel_ < 0) { if (preprocess_frame_.samples_per_channel_ < 0) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
"Cannot add 10 ms audio, resmapling failed"); "Cannot add 10 ms audio, resampling failed");
return -1; return -1;
} }
preprocess_frame_.sample_rate_hz_ = send_codec_inst_.plfreq; preprocess_frame_.sample_rate_hz_ = send_codec_inst_.plfreq;

View File

@@ -61,6 +61,8 @@ struct ExperimentalAgc {
bool enabled; bool enabled;
}; };
static const int kAudioProcMaxNativeSampleRateHz = 32000;
// The Audio Processing Module (APM) provides a collection of voice processing // The Audio Processing Module (APM) provides a collection of voice processing
// components designed for real-time communications software. // components designed for real-time communications software.
// //

View File

@@ -4150,61 +4150,26 @@ Channel::Demultiplex(const AudioFrame& audioFrame)
return 0; return 0;
} }
// TODO(xians): This method borrows quite some code from
// TransmitMixer::GenerateAudioFrame(), refactor these two methods and reduce
// code duplication.
void Channel::Demultiplex(const int16_t* audio_data, void Channel::Demultiplex(const int16_t* audio_data,
int sample_rate, int sample_rate,
int number_of_frames, int number_of_frames,
int number_of_channels) { int number_of_channels) {
// The highest sample rate that WebRTC supports for mono audio is 96kHz.
static const int kMaxNumberOfFrames = 960;
assert(number_of_frames <= kMaxNumberOfFrames);
// Get the send codec information for doing resampling or downmixing later on.
CodecInst codec; CodecInst codec;
GetSendCodec(codec); GetSendCodec(codec);
assert(codec.channels == 1 || codec.channels == 2);
int support_sample_rate = std::min(32000,
std::min(sample_rate, codec.plfreq));
// Downmix the data to mono if needed. if (!mono_recording_audio_.get()) {
const int16_t* audio_ptr = audio_data; // Temporary space for DownConvertToCodecFormat.
if (number_of_channels == 2 && codec.channels == 1) { mono_recording_audio_.reset(new int16_t[kMaxMonoDataSizeSamples]);
if (!mono_recording_audio_.get())
mono_recording_audio_.reset(new int16_t[kMaxNumberOfFrames]);
AudioFrameOperations::StereoToMono(audio_data, number_of_frames,
mono_recording_audio_.get());
audio_ptr = mono_recording_audio_.get();
} }
DownConvertToCodecFormat(audio_data,
// Resample the data to the sample rate that the codec is using. number_of_frames,
if (input_resampler_.InitializeIfNeeded(sample_rate, number_of_channels,
support_sample_rate, sample_rate,
codec.channels)) { codec.channels,
WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId, -1), codec.plfreq,
"Channel::Demultiplex() unable to resample"); mono_recording_audio_.get(),
return; &input_resampler_,
} &_audioFrame);
int out_length = input_resampler_.Resample(audio_ptr,
number_of_frames * codec.channels,
_audioFrame.data_,
AudioFrame::kMaxDataSizeSamples);
if (out_length == -1) {
WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId, -1),
"Channel::Demultiplex() resampling failed");
return;
}
_audioFrame.samples_per_channel_ = out_length / codec.channels;
_audioFrame.timestamp_ = -1;
_audioFrame.sample_rate_hz_ = support_sample_rate;
_audioFrame.speech_type_ = AudioFrame::kNormalSpeech;
_audioFrame.vad_activity_ = AudioFrame::kVadUnknown;
_audioFrame.num_channels_ = codec.channels;
_audioFrame.id_ = _channelId;
} }
uint32_t uint32_t
@@ -4694,11 +4659,11 @@ Channel::MixOrReplaceAudioWithFile(int mixingFrequency)
{ {
// Currently file stream is always mono. // Currently file stream is always mono.
// TODO(xians): Change the code when FilePlayer supports real stereo. // TODO(xians): Change the code when FilePlayer supports real stereo.
Utility::MixWithSat(_audioFrame.data_, MixWithSat(_audioFrame.data_,
_audioFrame.num_channels_, _audioFrame.num_channels_,
fileBuffer.get(), fileBuffer.get(),
1, 1,
fileSamples); fileSamples);
} }
else else
{ {
@@ -4754,11 +4719,11 @@ Channel::MixAudioWithFile(AudioFrame& audioFrame,
{ {
// Currently file stream is always mono. // Currently file stream is always mono.
// TODO(xians): Change the code when FilePlayer supports real stereo. // TODO(xians): Change the code when FilePlayer supports real stereo.
Utility::MixWithSat(audioFrame.data_, MixWithSat(audioFrame.data_,
audioFrame.num_channels_, audioFrame.num_channels_,
fileBuffer.get(), fileBuffer.get(),
1, 1,
fileSamples); fileSamples);
} }
else else
{ {

View File

@@ -545,7 +545,7 @@ private:
AudioLevel _outputAudioLevel; AudioLevel _outputAudioLevel;
bool _externalTransport; bool _externalTransport;
AudioFrame _audioFrame; AudioFrame _audioFrame;
scoped_array<int16_t> mono_recording_audio_; scoped_ptr<int16_t[]> mono_recording_audio_;
// Resampler is used when input data is stereo while codec is mono. // Resampler is used when input data is stereo while codec is mono.
PushResampler input_resampler_; PushResampler input_resampler_;
uint8_t _audioLevel_dBov; uint8_t _audioLevel_dBov;

View File

@@ -16,11 +16,10 @@
#include "webrtc/system_wrappers/interface/file_wrapper.h" #include "webrtc/system_wrappers/interface/file_wrapper.h"
#include "webrtc/system_wrappers/interface/trace.h" #include "webrtc/system_wrappers/interface/trace.h"
#include "webrtc/voice_engine/include/voe_external_media.h" #include "webrtc/voice_engine/include/voe_external_media.h"
#include "webrtc/voice_engine/output_mixer_internal.h"
#include "webrtc/voice_engine/statistics.h" #include "webrtc/voice_engine/statistics.h"
#include "webrtc/voice_engine/utility.h"
namespace webrtc { namespace webrtc {
namespace voe { namespace voe {
void void
@@ -528,7 +527,8 @@ int OutputMixer::GetMixedAudio(int sample_rate_hz,
frame->sample_rate_hz_ = sample_rate_hz; frame->sample_rate_hz_ = sample_rate_hz;
// TODO(andrew): Ideally the downmixing would occur much earlier, in // TODO(andrew): Ideally the downmixing would occur much earlier, in
// AudioCodingModule. // AudioCodingModule.
return RemixAndResample(_audioFrame, &resampler_, frame); RemixAndResample(_audioFrame, &resampler_, frame);
return 0;
} }
int32_t int32_t
@@ -565,7 +565,9 @@ OutputMixer::DoOperationsOnCombinedSignal()
} }
// --- Far-end Voice Quality Enhancement (AudioProcessing Module) // --- Far-end Voice Quality Enhancement (AudioProcessing Module)
// TODO(ajm): Check with VoEBase if |need_audio_processing| is false.
// If so, we don't need to call this method and can avoid the subsequent
// resampling. See: https://code.google.com/p/webrtc/issues/detail?id=3147
APMAnalyzeReverseStream(); APMAnalyzeReverseStream();
// --- External media processing // --- External media processing
@@ -603,8 +605,7 @@ void OutputMixer::APMAnalyzeReverseStream() {
AudioFrame frame; AudioFrame frame;
frame.num_channels_ = 1; frame.num_channels_ = 1;
frame.sample_rate_hz_ = _audioProcessingModulePtr->sample_rate_hz(); frame.sample_rate_hz_ = _audioProcessingModulePtr->sample_rate_hz();
if (RemixAndResample(_audioFrame, &audioproc_resampler_, &frame) == -1) RemixAndResample(_audioFrame, &audioproc_resampler_, &frame);
return;
if (_audioProcessingModulePtr->AnalyzeReverseStream(&frame) == -1) { if (_audioProcessingModulePtr->AnalyzeReverseStream(&frame) == -1) {
WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId,-1), WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId,-1),
@@ -657,5 +658,4 @@ OutputMixer::InsertInbandDtmfTone()
} }
} // namespace voe } // namespace voe
} // namespace webrtc } // namespace webrtc

View File

@@ -1,70 +0,0 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/voice_engine/output_mixer_internal.h"
#include "webrtc/common_audio/resampler/include/push_resampler.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/modules/utility/interface/audio_frame_operations.h"
#include "webrtc/system_wrappers/interface/logging.h"
#include "webrtc/system_wrappers/interface/trace.h"
namespace webrtc {
namespace voe {
int RemixAndResample(const AudioFrame& src_frame,
PushResampler* resampler,
AudioFrame* dst_frame) {
const int16_t* audio_ptr = src_frame.data_;
int audio_ptr_num_channels = src_frame.num_channels_;
int16_t mono_audio[AudioFrame::kMaxDataSizeSamples];
// Downmix before resampling.
if (src_frame.num_channels_ == 2 && dst_frame->num_channels_ == 1) {
AudioFrameOperations::StereoToMono(src_frame.data_,
src_frame.samples_per_channel_,
mono_audio);
audio_ptr = mono_audio;
audio_ptr_num_channels = 1;
}
if (resampler->InitializeIfNeeded(src_frame.sample_rate_hz_,
dst_frame->sample_rate_hz_,
audio_ptr_num_channels) == -1) {
dst_frame->CopyFrom(src_frame);
LOG_FERR3(LS_ERROR, InitializeIfNeeded, src_frame.sample_rate_hz_,
dst_frame->sample_rate_hz_, audio_ptr_num_channels);
return -1;
}
const int src_length = src_frame.samples_per_channel_ *
audio_ptr_num_channels;
int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_,
AudioFrame::kMaxDataSizeSamples);
if (out_length == -1) {
dst_frame->CopyFrom(src_frame);
LOG_FERR3(LS_ERROR, Resample, src_length, dst_frame->data_,
AudioFrame::kMaxDataSizeSamples);
return -1;
}
dst_frame->samples_per_channel_ = out_length / audio_ptr_num_channels;
// Upmix after resampling.
if (src_frame.num_channels_ == 1 && dst_frame->num_channels_ == 2) {
// The audio in dst_frame really is mono at this point; MonoToStereo will
// set this back to stereo.
dst_frame->num_channels_ = 1;
AudioFrameOperations::MonoToStereo(dst_frame);
}
return 0;
}
} // namespace voe
} // namespace webrtc

View File

@@ -1,33 +0,0 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_VOICE_ENGINE_OUTPUT_MIXER_INTERNAL_H_
#define WEBRTC_VOICE_ENGINE_OUTPUT_MIXER_INTERNAL_H_
namespace webrtc {
class AudioFrame;
class PushResampler;
namespace voe {
// Upmix or downmix and resample the audio in |src_frame| to |dst_frame|.
// Expects |dst_frame| to have its |num_channels_| and |sample_rate_hz_| set to
// the desired values. Updates |samples_per_channel_| accordingly.
//
// On failure, returns -1 and copies |src_frame| to |dst_frame|.
int RemixAndResample(const AudioFrame& src_frame,
PushResampler* resampler,
AudioFrame* dst_frame);
} // namespace voe
} // namespace webrtc
#endif // VOICE_ENGINE_OUTPUT_MIXER_INTERNAL_H_

View File

@@ -11,13 +11,20 @@
#include <math.h> #include <math.h>
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/voice_engine/output_mixer.h" #include "webrtc/common_audio/resampler/include/push_resampler.h"
#include "webrtc/voice_engine/output_mixer_internal.h" #include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/voice_engine/utility.h"
#include "webrtc/voice_engine/voice_engine_defines.h"
namespace webrtc { namespace webrtc {
namespace voe { namespace voe {
namespace { namespace {
enum FunctionToTest {
TestRemixAndResample,
TestDownConvertToCodecFormat
};
class OutputMixerTest : public ::testing::Test { class OutputMixerTest : public ::testing::Test {
protected: protected:
OutputMixerTest() { OutputMixerTest() {
@@ -29,7 +36,8 @@ class OutputMixerTest : public ::testing::Test {
} }
void RunResampleTest(int src_channels, int src_sample_rate_hz, void RunResampleTest(int src_channels, int src_sample_rate_hz,
int dst_channels, int dst_sample_rate_hz); int dst_channels, int dst_sample_rate_hz,
FunctionToTest function);
PushResampler resampler_; PushResampler resampler_;
AudioFrame src_frame_; AudioFrame src_frame_;
@@ -121,7 +129,8 @@ void VerifyFramesAreEqual(const AudioFrame& ref_frame,
void OutputMixerTest::RunResampleTest(int src_channels, void OutputMixerTest::RunResampleTest(int src_channels,
int src_sample_rate_hz, int src_sample_rate_hz,
int dst_channels, int dst_channels,
int dst_sample_rate_hz) { int dst_sample_rate_hz,
FunctionToTest function) {
PushResampler resampler; // Create a new one with every test. PushResampler resampler; // Create a new one with every test.
const int16_t kSrcLeft = 30; // Shouldn't overflow for any used sample rate. const int16_t kSrcLeft = 30; // Shouldn't overflow for any used sample rate.
const int16_t kSrcRight = 15; const int16_t kSrcRight = 15;
@@ -157,7 +166,21 @@ void OutputMixerTest::RunResampleTest(int src_channels,
/ src_sample_rate_hz * kInputKernelDelaySamples * dst_channels * 2; / src_sample_rate_hz * kInputKernelDelaySamples * dst_channels * 2;
printf("(%d, %d Hz) -> (%d, %d Hz) ", // SNR reported on the same line later. printf("(%d, %d Hz) -> (%d, %d Hz) ", // SNR reported on the same line later.
src_channels, src_sample_rate_hz, dst_channels, dst_sample_rate_hz); src_channels, src_sample_rate_hz, dst_channels, dst_sample_rate_hz);
EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler, &dst_frame_)); if (function == TestRemixAndResample) {
RemixAndResample(src_frame_, &resampler, &dst_frame_);
} else {
int16_t mono_buffer[kMaxMonoDataSizeSamples];
DownConvertToCodecFormat(src_frame_.data_,
src_frame_.samples_per_channel_,
src_frame_.num_channels_,
src_frame_.sample_rate_hz_,
dst_frame_.num_channels_,
dst_frame_.sample_rate_hz_,
mono_buffer,
&resampler,
&dst_frame_);
}
if (src_sample_rate_hz == 96000 && dst_sample_rate_hz == 8000) { if (src_sample_rate_hz == 96000 && dst_sample_rate_hz == 8000) {
// The sinc resampler gives poor SNR at this extreme conversion, but we // The sinc resampler gives poor SNR at this extreme conversion, but we
// expect to see this rarely in practice. // expect to see this rarely in practice.
@@ -171,13 +194,13 @@ TEST_F(OutputMixerTest, RemixAndResampleCopyFrameSucceeds) {
// Stereo -> stereo. // Stereo -> stereo.
SetStereoFrame(&src_frame_, 10, 10); SetStereoFrame(&src_frame_, 10, 10);
SetStereoFrame(&dst_frame_, 0, 0); SetStereoFrame(&dst_frame_, 0, 0);
EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_)); RemixAndResample(src_frame_, &resampler_, &dst_frame_);
VerifyFramesAreEqual(src_frame_, dst_frame_); VerifyFramesAreEqual(src_frame_, dst_frame_);
// Mono -> mono. // Mono -> mono.
SetMonoFrame(&src_frame_, 20); SetMonoFrame(&src_frame_, 20);
SetMonoFrame(&dst_frame_, 0); SetMonoFrame(&dst_frame_, 0);
EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_)); RemixAndResample(src_frame_, &resampler_, &dst_frame_);
VerifyFramesAreEqual(src_frame_, dst_frame_); VerifyFramesAreEqual(src_frame_, dst_frame_);
} }
@@ -186,20 +209,18 @@ TEST_F(OutputMixerTest, RemixAndResampleMixingOnlySucceeds) {
SetStereoFrame(&dst_frame_, 0, 0); SetStereoFrame(&dst_frame_, 0, 0);
SetMonoFrame(&src_frame_, 10); SetMonoFrame(&src_frame_, 10);
SetStereoFrame(&golden_frame_, 10, 10); SetStereoFrame(&golden_frame_, 10, 10);
EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_)); RemixAndResample(src_frame_, &resampler_, &dst_frame_);
VerifyFramesAreEqual(dst_frame_, golden_frame_); VerifyFramesAreEqual(dst_frame_, golden_frame_);
// Mono -> stereo. // Mono -> stereo.
SetMonoFrame(&dst_frame_, 0); SetMonoFrame(&dst_frame_, 0);
SetStereoFrame(&src_frame_, 10, 20); SetStereoFrame(&src_frame_, 10, 20);
SetMonoFrame(&golden_frame_, 15); SetMonoFrame(&golden_frame_, 15);
EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_)); RemixAndResample(src_frame_, &resampler_, &dst_frame_);
VerifyFramesAreEqual(golden_frame_, dst_frame_); VerifyFramesAreEqual(golden_frame_, dst_frame_);
} }
TEST_F(OutputMixerTest, RemixAndResampleSucceeds) { TEST_F(OutputMixerTest, RemixAndResampleSucceeds) {
// TODO(ajm): convert this to the parameterized TEST_P style used in
// sinc_resampler_unittest.cc. We can then easily add tighter SNR thresholds.
const int kSampleRates[] = {8000, 16000, 32000, 44100, 48000, 96000}; const int kSampleRates[] = {8000, 16000, 32000, 44100, 48000, 96000};
const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates); const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
const int kChannels[] = {1, 2}; const int kChannels[] = {1, 2};
@@ -209,7 +230,28 @@ TEST_F(OutputMixerTest, RemixAndResampleSucceeds) {
for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) { for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) {
for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) { for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) {
RunResampleTest(kChannels[src_channel], kSampleRates[src_rate], RunResampleTest(kChannels[src_channel], kSampleRates[src_rate],
kChannels[dst_channel], kSampleRates[dst_rate]); kChannels[dst_channel], kSampleRates[dst_rate],
TestRemixAndResample);
}
}
}
}
}
TEST_F(OutputMixerTest, ConvertToCodecFormatSucceeds) {
const int kSampleRates[] = {8000, 16000, 32000, 44100, 48000, 96000};
const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
const int kChannels[] = {1, 2};
const int kChannelsSize = sizeof(kChannels) / sizeof(*kChannels);
for (int src_rate = 0; src_rate < kSampleRatesSize; src_rate++) {
for (int dst_rate = 0; dst_rate < kSampleRatesSize; dst_rate++) {
for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) {
for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) {
if (dst_rate <= src_rate && dst_channel <= src_channel) {
RunResampleTest(kChannels[src_channel], kSampleRates[src_rate],
kChannels[src_channel], kSampleRates[dst_rate],
TestDownConvertToCodecFormat);
}
} }
} }
} }

File diff suppressed because it is too large Load Diff

View File

@@ -17,6 +17,7 @@
#include "webrtc/modules/interface/module_common_types.h" #include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/modules/utility/interface/file_player.h" #include "webrtc/modules/utility/interface/file_player.h"
#include "webrtc/modules/utility/interface/file_recorder.h" #include "webrtc/modules/utility/interface/file_recorder.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/voice_engine/include/voe_base.h" #include "webrtc/voice_engine/include/voe_base.h"
#include "webrtc/voice_engine/level_indicator.h" #include "webrtc/voice_engine/level_indicator.h"
#include "webrtc/voice_engine/monitor_module.h" #include "webrtc/voice_engine/monitor_module.h"
@@ -36,9 +37,7 @@ class MixedAudio;
class Statistics; class Statistics;
class TransmitMixer : public MonitorObserver, class TransmitMixer : public MonitorObserver,
public FileCallback public FileCallback {
{
public: public:
static int32_t Create(TransmitMixer*& mixer, uint32_t instanceId); static int32_t Create(TransmitMixer*& mixer, uint32_t instanceId);
@@ -175,10 +174,10 @@ private:
// sending codecs. // sending codecs.
void GetSendCodecInfo(int* max_sample_rate, int* max_channels); void GetSendCodecInfo(int* max_sample_rate, int* max_channels);
int GenerateAudioFrame(const int16_t audioSamples[], void GenerateAudioFrame(const int16_t audioSamples[],
int nSamples, int nSamples,
int nChannels, int nChannels,
int samplesPerSec); int samplesPerSec);
int32_t RecordAudioToFile(uint32_t mixingFrequency); int32_t RecordAudioToFile(uint32_t mixingFrequency);
int32_t MixOrReplaceAudioWithFile( int32_t MixOrReplaceAudioWithFile(
@@ -232,6 +231,7 @@ private:
int32_t _remainingMuteMicTimeMs; int32_t _remainingMuteMicTimeMs;
bool stereo_codec_; bool stereo_codec_;
bool swap_stereo_channels_; bool swap_stereo_channels_;
scoped_ptr<int16_t[]> mono_buffer_;
}; };
} // namespace voe } // namespace voe

View File

@@ -10,116 +10,150 @@
#include "webrtc/voice_engine/utility.h" #include "webrtc/voice_engine/utility.h"
#include "webrtc/common_audio/resampler/include/push_resampler.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/interface/module.h" #include "webrtc/common_types.h"
#include "webrtc/system_wrappers/interface/trace.h" #include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/modules/utility/interface/audio_frame_operations.h"
#include "webrtc/system_wrappers/interface/logging.h"
#include "webrtc/voice_engine/voice_engine_defines.h"
namespace webrtc namespace webrtc {
{ namespace voe {
namespace voe // TODO(ajm): There is significant overlap between RemixAndResample and
{ // ConvertToCodecFormat, but if we're to consolidate we should probably make a
enum{kMaxTargetLen = 2*32*10}; // stereo 32KHz 10ms // real converter class.
void RemixAndResample(const AudioFrame& src_frame,
PushResampler* resampler,
AudioFrame* dst_frame) {
const int16_t* audio_ptr = src_frame.data_;
int audio_ptr_num_channels = src_frame.num_channels_;
int16_t mono_audio[AudioFrame::kMaxDataSizeSamples];
void Utility::MixWithSat(int16_t target[], // Downmix before resampling.
int target_channel, if (src_frame.num_channels_ == 2 && dst_frame->num_channels_ == 1) {
const int16_t source[], AudioFrameOperations::StereoToMono(src_frame.data_,
int source_channel, src_frame.samples_per_channel_,
int source_len) mono_audio);
{ audio_ptr = mono_audio;
assert((target_channel == 1) || (target_channel == 2)); audio_ptr_num_channels = 1;
assert((source_channel == 1) || (source_channel == 2)); }
assert(source_len <= kMaxTargetLen);
if ((target_channel == 2) && (source_channel == 1)) if (resampler->InitializeIfNeeded(src_frame.sample_rate_hz_,
{ dst_frame->sample_rate_hz_,
// Convert source from mono to stereo. audio_ptr_num_channels) == -1) {
int32_t left = 0; dst_frame->CopyFrom(src_frame);
int32_t right = 0; LOG_FERR3(LS_ERROR, InitializeIfNeeded, src_frame.sample_rate_hz_,
for (int i = 0; i < source_len; ++i) { dst_frame->sample_rate_hz_, audio_ptr_num_channels);
left = source[i] + target[i*2]; assert(false);
right = source[i] + target[i*2 + 1]; }
target[i*2] = WebRtcSpl_SatW32ToW16(left);
target[i*2 + 1] = WebRtcSpl_SatW32ToW16(right); const int src_length = src_frame.samples_per_channel_ *
} audio_ptr_num_channels;
} int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_,
else if ((target_channel == 1) && (source_channel == 2)) AudioFrame::kMaxDataSizeSamples);
{ if (out_length == -1) {
// Convert source from stereo to mono. dst_frame->CopyFrom(src_frame);
int32_t temp = 0; LOG_FERR3(LS_ERROR, Resample, audio_ptr, src_length, dst_frame->data_);
for (int i = 0; i < source_len/2; ++i) { assert(false);
temp = ((source[i*2] + source[i*2 + 1])>>1) + target[i]; }
target[i] = WebRtcSpl_SatW32ToW16(temp); dst_frame->samples_per_channel_ = out_length / audio_ptr_num_channels;
}
} // Upmix after resampling.
else if (src_frame.num_channels_ == 1 && dst_frame->num_channels_ == 2) {
{ // The audio in dst_frame really is mono at this point; MonoToStereo will
int32_t temp = 0; // set this back to stereo.
for (int i = 0; i < source_len; ++i) { dst_frame->num_channels_ = 1;
temp = source[i] + target[i]; AudioFrameOperations::MonoToStereo(dst_frame);
target[i] = WebRtcSpl_SatW32ToW16(temp); }
}
}
} }
void Utility::MixSubtractWithSat(int16_t target[], void DownConvertToCodecFormat(const int16_t* src_data,
const int16_t source[], int samples_per_channel,
uint16_t len) int num_channels,
{ int sample_rate_hz,
int32_t temp(0); int codec_num_channels,
for (int i = 0; i < len; i++) int codec_rate_hz,
{ int16_t* mono_buffer,
temp = target[i] - source[i]; PushResampler* resampler,
if (temp > 32767) AudioFrame* dst_af) {
target[i] = 32767; assert(samples_per_channel <= kMaxMonoDataSizeSamples);
else if (temp < -32768) assert(num_channels == 1 || num_channels == 2);
target[i] = -32768; assert(codec_num_channels == 1 || codec_num_channels == 2);
else
target[i] = (int16_t) temp; // Never upsample the capture signal here. This should be done at the
} // end of the send chain.
int destination_rate = std::min(codec_rate_hz, sample_rate_hz);
// If no stereo codecs are in use, we downmix a stereo stream from the
// device early in the chain, before resampling.
if (num_channels == 2 && codec_num_channels == 1) {
AudioFrameOperations::StereoToMono(src_data, samples_per_channel,
mono_buffer);
src_data = mono_buffer;
num_channels = 1;
}
if (resampler->InitializeIfNeeded(
sample_rate_hz, destination_rate, num_channels) != 0) {
LOG_FERR3(LS_ERROR,
InitializeIfNeeded,
sample_rate_hz,
destination_rate,
num_channels);
assert(false);
}
const int in_length = samples_per_channel * num_channels;
int out_length = resampler->Resample(
src_data, in_length, dst_af->data_, AudioFrame::kMaxDataSizeSamples);
if (out_length == -1) {
LOG_FERR3(LS_ERROR, Resample, src_data, in_length, dst_af->data_);
assert(false);
}
dst_af->samples_per_channel_ = out_length / num_channels;
dst_af->sample_rate_hz_ = destination_rate;
dst_af->num_channels_ = num_channels;
dst_af->timestamp_ = -1;
dst_af->speech_type_ = AudioFrame::kNormalSpeech;
dst_af->vad_activity_ = AudioFrame::kVadUnknown;
} }
void Utility::MixAndScaleWithSat(int16_t target[], void MixWithSat(int16_t target[],
const int16_t source[], float scale, int target_channel,
uint16_t len) const int16_t source[],
{ int source_channel,
int32_t temp(0); int source_len) {
for (int i = 0; i < len; i++) assert(target_channel == 1 || target_channel == 2);
{ assert(source_channel == 1 || source_channel == 2);
temp = (int32_t) (target[i] + scale * source[i]);
if (temp > 32767)
target[i] = 32767;
else if (temp < -32768)
target[i] = -32768;
else
target[i] = (int16_t) temp;
}
}
void Utility::Scale(int16_t vector[], float scale, uint16_t len) if (target_channel == 2 && source_channel == 1) {
{ // Convert source from mono to stereo.
for (int i = 0; i < len; i++) int32_t left = 0;
{ int32_t right = 0;
vector[i] = (int16_t) (scale * vector[i]); for (int i = 0; i < source_len; ++i) {
left = source[i] + target[i * 2];
right = source[i] + target[i * 2 + 1];
target[i * 2] = WebRtcSpl_SatW32ToW16(left);
target[i * 2 + 1] = WebRtcSpl_SatW32ToW16(right);
} }
} } else if (target_channel == 1 && source_channel == 2) {
// Convert source from stereo to mono.
void Utility::ScaleWithSat(int16_t vector[], float scale, int32_t temp = 0;
uint16_t len) for (int i = 0; i < source_len / 2; ++i) {
{ temp = ((source[i * 2] + source[i * 2 + 1]) >> 1) + target[i];
int32_t temp(0); target[i] = WebRtcSpl_SatW32ToW16(temp);
for (int i = 0; i < len; i++)
{
temp = (int32_t) (scale * vector[i]);
if (temp > 32767)
vector[i] = 32767;
else if (temp < -32768)
vector[i] = -32768;
else
vector[i] = (int16_t) temp;
} }
} else {
int32_t temp = 0;
for (int i = 0; i < source_len; ++i) {
temp = source[i] + target[i];
target[i] = WebRtcSpl_SatW32ToW16(temp);
}
}
} }
} // namespace voe } // namespace voe
} // namespace webrtc } // namespace webrtc

View File

@@ -12,47 +12,48 @@
* Contains functions often used by different parts of VoiceEngine. * Contains functions often used by different parts of VoiceEngine.
*/ */
#ifndef WEBRTC_VOICE_ENGINE_UTILITY_H #ifndef WEBRTC_VOICE_ENGINE_UTILITY_H_
#define WEBRTC_VOICE_ENGINE_UTILITY_H #define WEBRTC_VOICE_ENGINE_UTILITY_H_
#include "webrtc/typedefs.h" #include "webrtc/typedefs.h"
#include "webrtc/voice_engine/voice_engine_defines.h"
namespace webrtc namespace webrtc {
{
class Module; class AudioFrame;
class PushResampler;
namespace voe namespace voe {
{
class Utility // Upmix or downmix and resample the audio in |src_frame| to |dst_frame|.
{ // Expects |dst_frame| to have its |num_channels_| and |sample_rate_hz_| set to
public: // the desired values. Updates |samples_per_channel_| accordingly.
static void MixWithSat(int16_t target[], //
int target_channel, // On failure, returns -1 and copies |src_frame| to |dst_frame|.
const int16_t source[], void RemixAndResample(const AudioFrame& src_frame,
int source_channel, PushResampler* resampler,
int source_len); AudioFrame* dst_frame);
static void MixSubtractWithSat(int16_t target[], // Downmix and downsample the audio in |src_data| to |dst_af| as necessary,
const int16_t source[], // specified by |codec_num_channels| and |codec_rate_hz|. |mono_buffer| is
uint16_t len); // temporary space and must be of sufficient size to hold the downmixed source
// audio (recommend using a size of kMaxMonoDataSizeSamples).
void DownConvertToCodecFormat(const int16_t* src_data,
int samples_per_channel,
int num_channels,
int sample_rate_hz,
int codec_num_channels,
int codec_rate_hz,
int16_t* mono_buffer,
PushResampler* resampler,
AudioFrame* dst_af);
static void MixAndScaleWithSat(int16_t target[], void MixWithSat(int16_t target[],
const int16_t source[], int target_channel,
float scale, const int16_t source[],
uint16_t len); int source_channel,
int source_len);
static void Scale(int16_t vector[], float scale, uint16_t len);
static void ScaleWithSat(int16_t vector[],
float scale,
uint16_t len);
};
} // namespace voe } // namespace voe
} // namespace webrtc } // namespace webrtc
#endif // WEBRTC_VOICE_ENGINE_UTILITY_H #endif // WEBRTC_VOICE_ENGINE_UTILITY_H_

View File

@@ -25,13 +25,6 @@
#include "webrtc/voice_engine/utility.h" #include "webrtc/voice_engine/utility.h"
#include "webrtc/voice_engine/voice_engine_impl.h" #include "webrtc/voice_engine/voice_engine_impl.h"
#if (defined(_WIN32) && defined(_DLL) && (_MSC_VER == 1400))
// Fix for VS 2005 MD/MDd link problem
#include <stdio.h>
extern "C"
{ FILE _iob[3] = { __iob_func()[0], __iob_func()[1], __iob_func()[2]}; }
#endif
namespace webrtc namespace webrtc
{ {
@@ -223,6 +216,9 @@ int VoEBaseImpl::OnDataAvailable(const int voe_channels[],
// No need to go through the APM, demultiplex the data to each VoE channel, // No need to go through the APM, demultiplex the data to each VoE channel,
// encode and send to the network. // encode and send to the network.
for (int i = 0; i < number_of_voe_channels; ++i) { for (int i = 0; i < number_of_voe_channels; ++i) {
// TODO(ajm): In the case where multiple channels are using the same codec
// rate, this path needlessly does extra conversions. We should convert once
// and share between channels.
OnData(voe_channels[i], audio_data, 16, sample_rate, OnData(voe_channels[i], audio_data, 16, sample_rate,
number_of_channels, number_of_frames); number_of_channels, number_of_frames);
} }

View File

@@ -57,8 +57,6 @@
'monitor_module.h', 'monitor_module.h',
'output_mixer.cc', 'output_mixer.cc',
'output_mixer.h', 'output_mixer.h',
'output_mixer_internal.cc',
'output_mixer_internal.h',
'shared_data.cc', 'shared_data.cc',
'shared_data.h', 'shared_data.h',
'statistics.cc', 'statistics.cc',

View File

@@ -27,6 +27,10 @@
namespace webrtc { namespace webrtc {
// Internal buffer size required for mono audio, based on the highest sample
// rate voice engine supports (10 ms of audio at 192 kHz).
static const int kMaxMonoDataSizeSamples = 1920;
// VolumeControl // VolumeControl
enum { kMinVolumeLevel = 0 }; enum { kMinVolumeLevel = 0 };
enum { kMaxVolumeLevel = 255 }; enum { kMaxVolumeLevel = 255 };