Consolidate audio conversion from Channel and TransmitMixer.

Replace the two versions with a single DownConvertToCodecFormat. As
mentioned in comments, this could be further consolidated with
RemixAndResample but we should write a full audio converter class in
that case.

Along the way:
- Fix the bug present in Channel::Demultiplex with mono input and a
stereo codec.
- Remove the 32 kHz max from the OnDataAvailable path. This avoids a
48 -> 32 -> 48 conversion when VoE is passed 48 kHz audio; instead we
get a straight pass-through to ACM. The 32 kHz conversion is still
needed in the RecordedDataIsAvailable path until APM natively supports
48 kHz.
- Merge resampler improvements from ACM1 to ACM2. This allows ACM to
handle 44.1 kHz audio passed to VoE and was originally done here:
https://webrtc-codereview.appspot.com/1590004
- Reuse the RemixAndResample unit tests for DownConvertToCodecFormat.
- Remove unused functions from utility.cc.

BUG=3155,3000,b/12867572
TESTED=voe_cmd_test using both the OnDataAvailable and
RecordedDataIsAvailable paths, with a captured audio format of all
combinations of {44.1,48} kHz and {1,2} channels, running through all
codecs, and finally using both ACM1 and ACM2.

R=henrika@webrtc.org, turaj@webrtc.org, xians@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/11019005

git-svn-id: http://webrtc.googlecode.com/svn/trunk@5843 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
andrew@webrtc.org
2014-04-03 21:56:01 +00:00
parent cca888a5bf
commit 40ee3d07ed
18 changed files with 1663 additions and 1791 deletions

View File

@@ -13,20 +13,15 @@
#include <string.h>
#include "webrtc/common_audio/resampler/include/resampler.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
#include "webrtc/system_wrappers/interface/trace.h"
#include "webrtc/system_wrappers/interface/logging.h"
namespace webrtc {
namespace acm2 {
ACMResampler::ACMResampler()
: resampler_crit_sect_(CriticalSectionWrapper::CreateCriticalSection()) {
ACMResampler::ACMResampler() {
}
ACMResampler::~ACMResampler() {
delete resampler_crit_sect_;
}
int ACMResampler::Resample10Msec(const int16_t* in_audio,
@@ -34,37 +29,28 @@ int ACMResampler::Resample10Msec(const int16_t* in_audio,
int out_freq_hz,
int num_audio_channels,
int16_t* out_audio) {
CriticalSectionScoped cs(resampler_crit_sect_);
int in_length = in_freq_hz * num_audio_channels / 100;
int out_length = out_freq_hz * num_audio_channels / 100;
if (in_freq_hz == out_freq_hz) {
size_t length = static_cast<size_t>(in_freq_hz * num_audio_channels / 100);
memcpy(out_audio, in_audio, length * sizeof(int16_t));
return static_cast<int16_t>(in_freq_hz / 100);
memcpy(out_audio, in_audio, in_length * sizeof(int16_t));
return in_length / num_audio_channels;
}
// |maxLen| is maximum number of samples for 10ms at 48kHz.
int max_len = 480 * num_audio_channels;
int length_in = (in_freq_hz / 100) * num_audio_channels;
int out_len;
ResamplerType type = (num_audio_channels == 1) ? kResamplerSynchronous :
kResamplerSynchronousStereo;
if (resampler_.ResetIfNeeded(in_freq_hz, out_freq_hz, type) < 0) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, 0,
"Error in reset of resampler");
if (resampler_.InitializeIfNeeded(in_freq_hz, out_freq_hz,
num_audio_channels) != 0) {
LOG_FERR3(LS_ERROR, InitializeIfNeeded, in_freq_hz, out_freq_hz,
num_audio_channels);
return -1;
}
if (resampler_.Push(in_audio, length_in, out_audio, max_len, out_len) < 0) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, 0,
"Error in resampler: resampler.Push");
out_length = resampler_.Resample(in_audio, in_length, out_audio, out_length);
if (out_length == -1) {
LOG_FERR4(LS_ERROR, Resample, in_audio, in_length, out_audio, out_length);
return -1;
}
return out_len / num_audio_channels;
return out_length / num_audio_channels;
}
} // namespace acm2
} // namespace webrtc

View File

@@ -11,13 +11,10 @@
#ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RESAMPLER_H_
#define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RESAMPLER_H_
#include "webrtc/common_audio/resampler/include/resampler.h"
#include "webrtc/common_audio/resampler/include/push_resampler.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class CriticalSectionWrapper;
namespace acm2 {
class ACMResampler {
@@ -32,13 +29,10 @@ class ACMResampler {
int16_t* out_audio);
private:
// Use the Resampler class.
Resampler resampler_;
CriticalSectionWrapper* resampler_crit_sect_;
PushResampler resampler_;
};
} // namespace acm2
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RESAMPLER_H_

View File

@@ -1205,11 +1205,7 @@ int AudioCodingModuleImpl::Add10MsData(
return -1;
}
// Allow for 8, 16, 32 and 48kHz input audio.
if ((audio_frame.sample_rate_hz_ != 8000)
&& (audio_frame.sample_rate_hz_ != 16000)
&& (audio_frame.sample_rate_hz_ != 32000)
&& (audio_frame.sample_rate_hz_ != 48000)) {
if (audio_frame.sample_rate_hz_ > 48000) {
assert(false);
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
"Cannot Add 10 ms audio, input frequency not valid");
@@ -1371,7 +1367,7 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
if (preprocess_frame_.samples_per_channel_ < 0) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
"Cannot add 10 ms audio, resmapling failed");
"Cannot add 10 ms audio, resampling failed");
return -1;
}
preprocess_frame_.sample_rate_hz_ = send_codec_inst_.plfreq;

View File

@@ -1273,11 +1273,7 @@ int32_t AudioCodingModuleImpl::Add10MsData(
return -1;
}
// Allow for 8, 16, 32 and 48kHz input audio.
if ((audio_frame.sample_rate_hz_ != 8000)
&& (audio_frame.sample_rate_hz_ != 16000)
&& (audio_frame.sample_rate_hz_ != 32000)
&& (audio_frame.sample_rate_hz_ != 48000)) {
if (audio_frame.sample_rate_hz_ > 48000) {
assert(false);
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
"Cannot Add 10 ms audio, input frequency not valid");
@@ -1444,7 +1440,7 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
if (preprocess_frame_.samples_per_channel_ < 0) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
"Cannot add 10 ms audio, resmapling failed");
"Cannot add 10 ms audio, resampling failed");
return -1;
}
preprocess_frame_.sample_rate_hz_ = send_codec_inst_.plfreq;

View File

@@ -61,6 +61,8 @@ struct ExperimentalAgc {
bool enabled;
};
static const int kAudioProcMaxNativeSampleRateHz = 32000;
// The Audio Processing Module (APM) provides a collection of voice processing
// components designed for real-time communications software.
//

View File

@@ -4150,61 +4150,26 @@ Channel::Demultiplex(const AudioFrame& audioFrame)
return 0;
}
// TODO(xians): This method borrows quite some code from
// TransmitMixer::GenerateAudioFrame(), refactor these two methods and reduce
// code duplication.
void Channel::Demultiplex(const int16_t* audio_data,
int sample_rate,
int number_of_frames,
int number_of_channels) {
// The highest sample rate that WebRTC supports for mono audio is 96kHz.
static const int kMaxNumberOfFrames = 960;
assert(number_of_frames <= kMaxNumberOfFrames);
// Get the send codec information for doing resampling or downmixing later on.
CodecInst codec;
GetSendCodec(codec);
assert(codec.channels == 1 || codec.channels == 2);
int support_sample_rate = std::min(32000,
std::min(sample_rate, codec.plfreq));
// Downmix the data to mono if needed.
const int16_t* audio_ptr = audio_data;
if (number_of_channels == 2 && codec.channels == 1) {
if (!mono_recording_audio_.get())
mono_recording_audio_.reset(new int16_t[kMaxNumberOfFrames]);
AudioFrameOperations::StereoToMono(audio_data, number_of_frames,
mono_recording_audio_.get());
audio_ptr = mono_recording_audio_.get();
if (!mono_recording_audio_.get()) {
// Temporary space for DownConvertToCodecFormat.
mono_recording_audio_.reset(new int16_t[kMaxMonoDataSizeSamples]);
}
// Resample the data to the sample rate that the codec is using.
if (input_resampler_.InitializeIfNeeded(sample_rate,
support_sample_rate,
codec.channels)) {
WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId, -1),
"Channel::Demultiplex() unable to resample");
return;
}
int out_length = input_resampler_.Resample(audio_ptr,
number_of_frames * codec.channels,
_audioFrame.data_,
AudioFrame::kMaxDataSizeSamples);
if (out_length == -1) {
WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId, -1),
"Channel::Demultiplex() resampling failed");
return;
}
_audioFrame.samples_per_channel_ = out_length / codec.channels;
_audioFrame.timestamp_ = -1;
_audioFrame.sample_rate_hz_ = support_sample_rate;
_audioFrame.speech_type_ = AudioFrame::kNormalSpeech;
_audioFrame.vad_activity_ = AudioFrame::kVadUnknown;
_audioFrame.num_channels_ = codec.channels;
_audioFrame.id_ = _channelId;
DownConvertToCodecFormat(audio_data,
number_of_frames,
number_of_channels,
sample_rate,
codec.channels,
codec.plfreq,
mono_recording_audio_.get(),
&input_resampler_,
&_audioFrame);
}
uint32_t
@@ -4694,11 +4659,11 @@ Channel::MixOrReplaceAudioWithFile(int mixingFrequency)
{
// Currently file stream is always mono.
// TODO(xians): Change the code when FilePlayer supports real stereo.
Utility::MixWithSat(_audioFrame.data_,
_audioFrame.num_channels_,
fileBuffer.get(),
1,
fileSamples);
MixWithSat(_audioFrame.data_,
_audioFrame.num_channels_,
fileBuffer.get(),
1,
fileSamples);
}
else
{
@@ -4754,11 +4719,11 @@ Channel::MixAudioWithFile(AudioFrame& audioFrame,
{
// Currently file stream is always mono.
// TODO(xians): Change the code when FilePlayer supports real stereo.
Utility::MixWithSat(audioFrame.data_,
audioFrame.num_channels_,
fileBuffer.get(),
1,
fileSamples);
MixWithSat(audioFrame.data_,
audioFrame.num_channels_,
fileBuffer.get(),
1,
fileSamples);
}
else
{

View File

@@ -545,7 +545,7 @@ private:
AudioLevel _outputAudioLevel;
bool _externalTransport;
AudioFrame _audioFrame;
scoped_array<int16_t> mono_recording_audio_;
scoped_ptr<int16_t[]> mono_recording_audio_;
// Resampler is used when input data is stereo while codec is mono.
PushResampler input_resampler_;
uint8_t _audioLevel_dBov;

View File

@@ -16,11 +16,10 @@
#include "webrtc/system_wrappers/interface/file_wrapper.h"
#include "webrtc/system_wrappers/interface/trace.h"
#include "webrtc/voice_engine/include/voe_external_media.h"
#include "webrtc/voice_engine/output_mixer_internal.h"
#include "webrtc/voice_engine/statistics.h"
#include "webrtc/voice_engine/utility.h"
namespace webrtc {
namespace voe {
void
@@ -528,7 +527,8 @@ int OutputMixer::GetMixedAudio(int sample_rate_hz,
frame->sample_rate_hz_ = sample_rate_hz;
// TODO(andrew): Ideally the downmixing would occur much earlier, in
// AudioCodingModule.
return RemixAndResample(_audioFrame, &resampler_, frame);
RemixAndResample(_audioFrame, &resampler_, frame);
return 0;
}
int32_t
@@ -565,7 +565,9 @@ OutputMixer::DoOperationsOnCombinedSignal()
}
// --- Far-end Voice Quality Enhancement (AudioProcessing Module)
// TODO(ajm): Check with VoEBase if |need_audio_processing| is false.
// If so, we don't need to call this method and can avoid the subsequent
// resampling. See: https://code.google.com/p/webrtc/issues/detail?id=3147
APMAnalyzeReverseStream();
// --- External media processing
@@ -603,8 +605,7 @@ void OutputMixer::APMAnalyzeReverseStream() {
AudioFrame frame;
frame.num_channels_ = 1;
frame.sample_rate_hz_ = _audioProcessingModulePtr->sample_rate_hz();
if (RemixAndResample(_audioFrame, &audioproc_resampler_, &frame) == -1)
return;
RemixAndResample(_audioFrame, &audioproc_resampler_, &frame);
if (_audioProcessingModulePtr->AnalyzeReverseStream(&frame) == -1) {
WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId,-1),
@@ -657,5 +658,4 @@ OutputMixer::InsertInbandDtmfTone()
}
} // namespace voe
} // namespace webrtc

View File

@@ -1,70 +0,0 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/voice_engine/output_mixer_internal.h"
#include "webrtc/common_audio/resampler/include/push_resampler.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/modules/utility/interface/audio_frame_operations.h"
#include "webrtc/system_wrappers/interface/logging.h"
#include "webrtc/system_wrappers/interface/trace.h"
namespace webrtc {
namespace voe {
int RemixAndResample(const AudioFrame& src_frame,
PushResampler* resampler,
AudioFrame* dst_frame) {
const int16_t* audio_ptr = src_frame.data_;
int audio_ptr_num_channels = src_frame.num_channels_;
int16_t mono_audio[AudioFrame::kMaxDataSizeSamples];
// Downmix before resampling.
if (src_frame.num_channels_ == 2 && dst_frame->num_channels_ == 1) {
AudioFrameOperations::StereoToMono(src_frame.data_,
src_frame.samples_per_channel_,
mono_audio);
audio_ptr = mono_audio;
audio_ptr_num_channels = 1;
}
if (resampler->InitializeIfNeeded(src_frame.sample_rate_hz_,
dst_frame->sample_rate_hz_,
audio_ptr_num_channels) == -1) {
dst_frame->CopyFrom(src_frame);
LOG_FERR3(LS_ERROR, InitializeIfNeeded, src_frame.sample_rate_hz_,
dst_frame->sample_rate_hz_, audio_ptr_num_channels);
return -1;
}
const int src_length = src_frame.samples_per_channel_ *
audio_ptr_num_channels;
int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_,
AudioFrame::kMaxDataSizeSamples);
if (out_length == -1) {
dst_frame->CopyFrom(src_frame);
LOG_FERR3(LS_ERROR, Resample, src_length, dst_frame->data_,
AudioFrame::kMaxDataSizeSamples);
return -1;
}
dst_frame->samples_per_channel_ = out_length / audio_ptr_num_channels;
// Upmix after resampling.
if (src_frame.num_channels_ == 1 && dst_frame->num_channels_ == 2) {
// The audio in dst_frame really is mono at this point; MonoToStereo will
// set this back to stereo.
dst_frame->num_channels_ = 1;
AudioFrameOperations::MonoToStereo(dst_frame);
}
return 0;
}
} // namespace voe
} // namespace webrtc

View File

@@ -1,33 +0,0 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_VOICE_ENGINE_OUTPUT_MIXER_INTERNAL_H_
#define WEBRTC_VOICE_ENGINE_OUTPUT_MIXER_INTERNAL_H_
namespace webrtc {
class AudioFrame;
class PushResampler;
namespace voe {
// Upmix or downmix and resample the audio in |src_frame| to |dst_frame|.
// Expects |dst_frame| to have its |num_channels_| and |sample_rate_hz_| set to
// the desired values. Updates |samples_per_channel_| accordingly.
//
// On failure, returns -1 and copies |src_frame| to |dst_frame|.
int RemixAndResample(const AudioFrame& src_frame,
PushResampler* resampler,
AudioFrame* dst_frame);
} // namespace voe
} // namespace webrtc
#endif // VOICE_ENGINE_OUTPUT_MIXER_INTERNAL_H_

View File

@@ -11,13 +11,20 @@
#include <math.h>
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/voice_engine/output_mixer.h"
#include "webrtc/voice_engine/output_mixer_internal.h"
#include "webrtc/common_audio/resampler/include/push_resampler.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/voice_engine/utility.h"
#include "webrtc/voice_engine/voice_engine_defines.h"
namespace webrtc {
namespace voe {
namespace {
enum FunctionToTest {
TestRemixAndResample,
TestDownConvertToCodecFormat
};
class OutputMixerTest : public ::testing::Test {
protected:
OutputMixerTest() {
@@ -29,7 +36,8 @@ class OutputMixerTest : public ::testing::Test {
}
void RunResampleTest(int src_channels, int src_sample_rate_hz,
int dst_channels, int dst_sample_rate_hz);
int dst_channels, int dst_sample_rate_hz,
FunctionToTest function);
PushResampler resampler_;
AudioFrame src_frame_;
@@ -121,7 +129,8 @@ void VerifyFramesAreEqual(const AudioFrame& ref_frame,
void OutputMixerTest::RunResampleTest(int src_channels,
int src_sample_rate_hz,
int dst_channels,
int dst_sample_rate_hz) {
int dst_sample_rate_hz,
FunctionToTest function) {
PushResampler resampler; // Create a new one with every test.
const int16_t kSrcLeft = 30; // Shouldn't overflow for any used sample rate.
const int16_t kSrcRight = 15;
@@ -157,7 +166,21 @@ void OutputMixerTest::RunResampleTest(int src_channels,
/ src_sample_rate_hz * kInputKernelDelaySamples * dst_channels * 2;
printf("(%d, %d Hz) -> (%d, %d Hz) ", // SNR reported on the same line later.
src_channels, src_sample_rate_hz, dst_channels, dst_sample_rate_hz);
EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler, &dst_frame_));
if (function == TestRemixAndResample) {
RemixAndResample(src_frame_, &resampler, &dst_frame_);
} else {
int16_t mono_buffer[kMaxMonoDataSizeSamples];
DownConvertToCodecFormat(src_frame_.data_,
src_frame_.samples_per_channel_,
src_frame_.num_channels_,
src_frame_.sample_rate_hz_,
dst_frame_.num_channels_,
dst_frame_.sample_rate_hz_,
mono_buffer,
&resampler,
&dst_frame_);
}
if (src_sample_rate_hz == 96000 && dst_sample_rate_hz == 8000) {
// The sinc resampler gives poor SNR at this extreme conversion, but we
// expect to see this rarely in practice.
@@ -171,13 +194,13 @@ TEST_F(OutputMixerTest, RemixAndResampleCopyFrameSucceeds) {
// Stereo -> stereo.
SetStereoFrame(&src_frame_, 10, 10);
SetStereoFrame(&dst_frame_, 0, 0);
EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
RemixAndResample(src_frame_, &resampler_, &dst_frame_);
VerifyFramesAreEqual(src_frame_, dst_frame_);
// Mono -> mono.
SetMonoFrame(&src_frame_, 20);
SetMonoFrame(&dst_frame_, 0);
EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
RemixAndResample(src_frame_, &resampler_, &dst_frame_);
VerifyFramesAreEqual(src_frame_, dst_frame_);
}
@@ -186,20 +209,18 @@ TEST_F(OutputMixerTest, RemixAndResampleMixingOnlySucceeds) {
SetStereoFrame(&dst_frame_, 0, 0);
SetMonoFrame(&src_frame_, 10);
SetStereoFrame(&golden_frame_, 10, 10);
EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
RemixAndResample(src_frame_, &resampler_, &dst_frame_);
VerifyFramesAreEqual(dst_frame_, golden_frame_);
// Mono -> stereo.
SetMonoFrame(&dst_frame_, 0);
SetStereoFrame(&src_frame_, 10, 20);
SetMonoFrame(&golden_frame_, 15);
EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
RemixAndResample(src_frame_, &resampler_, &dst_frame_);
VerifyFramesAreEqual(golden_frame_, dst_frame_);
}
TEST_F(OutputMixerTest, RemixAndResampleSucceeds) {
// TODO(ajm): convert this to the parameterized TEST_P style used in
// sinc_resampler_unittest.cc. We can then easily add tighter SNR thresholds.
const int kSampleRates[] = {8000, 16000, 32000, 44100, 48000, 96000};
const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
const int kChannels[] = {1, 2};
@@ -209,7 +230,28 @@ TEST_F(OutputMixerTest, RemixAndResampleSucceeds) {
for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) {
for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) {
RunResampleTest(kChannels[src_channel], kSampleRates[src_rate],
kChannels[dst_channel], kSampleRates[dst_rate]);
kChannels[dst_channel], kSampleRates[dst_rate],
TestRemixAndResample);
}
}
}
}
}
TEST_F(OutputMixerTest, ConvertToCodecFormatSucceeds) {
const int kSampleRates[] = {8000, 16000, 32000, 44100, 48000, 96000};
const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
const int kChannels[] = {1, 2};
const int kChannelsSize = sizeof(kChannels) / sizeof(*kChannels);
for (int src_rate = 0; src_rate < kSampleRatesSize; src_rate++) {
for (int dst_rate = 0; dst_rate < kSampleRatesSize; dst_rate++) {
for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) {
for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) {
if (dst_rate <= src_rate && dst_channel <= src_channel) {
RunResampleTest(kChannels[src_channel], kSampleRates[src_rate],
kChannels[src_channel], kSampleRates[dst_rate],
TestDownConvertToCodecFormat);
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -17,6 +17,7 @@
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/modules/utility/interface/file_player.h"
#include "webrtc/modules/utility/interface/file_recorder.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/voice_engine/include/voe_base.h"
#include "webrtc/voice_engine/level_indicator.h"
#include "webrtc/voice_engine/monitor_module.h"
@@ -36,9 +37,7 @@ class MixedAudio;
class Statistics;
class TransmitMixer : public MonitorObserver,
public FileCallback
{
public FileCallback {
public:
static int32_t Create(TransmitMixer*& mixer, uint32_t instanceId);
@@ -175,10 +174,10 @@ private:
// sending codecs.
void GetSendCodecInfo(int* max_sample_rate, int* max_channels);
int GenerateAudioFrame(const int16_t audioSamples[],
int nSamples,
int nChannels,
int samplesPerSec);
void GenerateAudioFrame(const int16_t audioSamples[],
int nSamples,
int nChannels,
int samplesPerSec);
int32_t RecordAudioToFile(uint32_t mixingFrequency);
int32_t MixOrReplaceAudioWithFile(
@@ -232,6 +231,7 @@ private:
int32_t _remainingMuteMicTimeMs;
bool stereo_codec_;
bool swap_stereo_channels_;
scoped_ptr<int16_t[]> mono_buffer_;
};
} // namespace voe

View File

@@ -10,116 +10,150 @@
#include "webrtc/voice_engine/utility.h"
#include "webrtc/common_audio/resampler/include/push_resampler.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/interface/module.h"
#include "webrtc/system_wrappers/interface/trace.h"
#include "webrtc/common_types.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/modules/utility/interface/audio_frame_operations.h"
#include "webrtc/system_wrappers/interface/logging.h"
#include "webrtc/voice_engine/voice_engine_defines.h"
namespace webrtc
{
namespace webrtc {
namespace voe {
namespace voe
{
enum{kMaxTargetLen = 2*32*10}; // stereo 32KHz 10ms
// TODO(ajm): There is significant overlap between RemixAndResample and
// ConvertToCodecFormat, but if we're to consolidate we should probably make a
// real converter class.
void RemixAndResample(const AudioFrame& src_frame,
PushResampler* resampler,
AudioFrame* dst_frame) {
const int16_t* audio_ptr = src_frame.data_;
int audio_ptr_num_channels = src_frame.num_channels_;
int16_t mono_audio[AudioFrame::kMaxDataSizeSamples];
void Utility::MixWithSat(int16_t target[],
int target_channel,
const int16_t source[],
int source_channel,
int source_len)
{
assert((target_channel == 1) || (target_channel == 2));
assert((source_channel == 1) || (source_channel == 2));
assert(source_len <= kMaxTargetLen);
// Downmix before resampling.
if (src_frame.num_channels_ == 2 && dst_frame->num_channels_ == 1) {
AudioFrameOperations::StereoToMono(src_frame.data_,
src_frame.samples_per_channel_,
mono_audio);
audio_ptr = mono_audio;
audio_ptr_num_channels = 1;
}
if ((target_channel == 2) && (source_channel == 1))
{
// Convert source from mono to stereo.
int32_t left = 0;
int32_t right = 0;
for (int i = 0; i < source_len; ++i) {
left = source[i] + target[i*2];
right = source[i] + target[i*2 + 1];
target[i*2] = WebRtcSpl_SatW32ToW16(left);
target[i*2 + 1] = WebRtcSpl_SatW32ToW16(right);
}
}
else if ((target_channel == 1) && (source_channel == 2))
{
// Convert source from stereo to mono.
int32_t temp = 0;
for (int i = 0; i < source_len/2; ++i) {
temp = ((source[i*2] + source[i*2 + 1])>>1) + target[i];
target[i] = WebRtcSpl_SatW32ToW16(temp);
}
}
else
{
int32_t temp = 0;
for (int i = 0; i < source_len; ++i) {
temp = source[i] + target[i];
target[i] = WebRtcSpl_SatW32ToW16(temp);
}
}
if (resampler->InitializeIfNeeded(src_frame.sample_rate_hz_,
dst_frame->sample_rate_hz_,
audio_ptr_num_channels) == -1) {
dst_frame->CopyFrom(src_frame);
LOG_FERR3(LS_ERROR, InitializeIfNeeded, src_frame.sample_rate_hz_,
dst_frame->sample_rate_hz_, audio_ptr_num_channels);
assert(false);
}
const int src_length = src_frame.samples_per_channel_ *
audio_ptr_num_channels;
int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_,
AudioFrame::kMaxDataSizeSamples);
if (out_length == -1) {
dst_frame->CopyFrom(src_frame);
LOG_FERR3(LS_ERROR, Resample, audio_ptr, src_length, dst_frame->data_);
assert(false);
}
dst_frame->samples_per_channel_ = out_length / audio_ptr_num_channels;
// Upmix after resampling.
if (src_frame.num_channels_ == 1 && dst_frame->num_channels_ == 2) {
// The audio in dst_frame really is mono at this point; MonoToStereo will
// set this back to stereo.
dst_frame->num_channels_ = 1;
AudioFrameOperations::MonoToStereo(dst_frame);
}
}
void Utility::MixSubtractWithSat(int16_t target[],
const int16_t source[],
uint16_t len)
{
int32_t temp(0);
for (int i = 0; i < len; i++)
{
temp = target[i] - source[i];
if (temp > 32767)
target[i] = 32767;
else if (temp < -32768)
target[i] = -32768;
else
target[i] = (int16_t) temp;
}
void DownConvertToCodecFormat(const int16_t* src_data,
int samples_per_channel,
int num_channels,
int sample_rate_hz,
int codec_num_channels,
int codec_rate_hz,
int16_t* mono_buffer,
PushResampler* resampler,
AudioFrame* dst_af) {
assert(samples_per_channel <= kMaxMonoDataSizeSamples);
assert(num_channels == 1 || num_channels == 2);
assert(codec_num_channels == 1 || codec_num_channels == 2);
// Never upsample the capture signal here. This should be done at the
// end of the send chain.
int destination_rate = std::min(codec_rate_hz, sample_rate_hz);
// If no stereo codecs are in use, we downmix a stereo stream from the
// device early in the chain, before resampling.
if (num_channels == 2 && codec_num_channels == 1) {
AudioFrameOperations::StereoToMono(src_data, samples_per_channel,
mono_buffer);
src_data = mono_buffer;
num_channels = 1;
}
if (resampler->InitializeIfNeeded(
sample_rate_hz, destination_rate, num_channels) != 0) {
LOG_FERR3(LS_ERROR,
InitializeIfNeeded,
sample_rate_hz,
destination_rate,
num_channels);
assert(false);
}
const int in_length = samples_per_channel * num_channels;
int out_length = resampler->Resample(
src_data, in_length, dst_af->data_, AudioFrame::kMaxDataSizeSamples);
if (out_length == -1) {
LOG_FERR3(LS_ERROR, Resample, src_data, in_length, dst_af->data_);
assert(false);
}
dst_af->samples_per_channel_ = out_length / num_channels;
dst_af->sample_rate_hz_ = destination_rate;
dst_af->num_channels_ = num_channels;
dst_af->timestamp_ = -1;
dst_af->speech_type_ = AudioFrame::kNormalSpeech;
dst_af->vad_activity_ = AudioFrame::kVadUnknown;
}
void Utility::MixAndScaleWithSat(int16_t target[],
const int16_t source[], float scale,
uint16_t len)
{
int32_t temp(0);
for (int i = 0; i < len; i++)
{
temp = (int32_t) (target[i] + scale * source[i]);
if (temp > 32767)
target[i] = 32767;
else if (temp < -32768)
target[i] = -32768;
else
target[i] = (int16_t) temp;
}
}
void MixWithSat(int16_t target[],
int target_channel,
const int16_t source[],
int source_channel,
int source_len) {
assert(target_channel == 1 || target_channel == 2);
assert(source_channel == 1 || source_channel == 2);
void Utility::Scale(int16_t vector[], float scale, uint16_t len)
{
for (int i = 0; i < len; i++)
{
vector[i] = (int16_t) (scale * vector[i]);
if (target_channel == 2 && source_channel == 1) {
// Convert source from mono to stereo.
int32_t left = 0;
int32_t right = 0;
for (int i = 0; i < source_len; ++i) {
left = source[i] + target[i * 2];
right = source[i] + target[i * 2 + 1];
target[i * 2] = WebRtcSpl_SatW32ToW16(left);
target[i * 2 + 1] = WebRtcSpl_SatW32ToW16(right);
}
}
void Utility::ScaleWithSat(int16_t vector[], float scale,
uint16_t len)
{
int32_t temp(0);
for (int i = 0; i < len; i++)
{
temp = (int32_t) (scale * vector[i]);
if (temp > 32767)
vector[i] = 32767;
else if (temp < -32768)
vector[i] = -32768;
else
vector[i] = (int16_t) temp;
} else if (target_channel == 1 && source_channel == 2) {
// Convert source from stereo to mono.
int32_t temp = 0;
for (int i = 0; i < source_len / 2; ++i) {
temp = ((source[i * 2] + source[i * 2 + 1]) >> 1) + target[i];
target[i] = WebRtcSpl_SatW32ToW16(temp);
}
} else {
int32_t temp = 0;
for (int i = 0; i < source_len; ++i) {
temp = source[i] + target[i];
target[i] = WebRtcSpl_SatW32ToW16(temp);
}
}
}
} // namespace voe
} // namespace webrtc

View File

@@ -12,47 +12,48 @@
* Contains functions often used by different parts of VoiceEngine.
*/
#ifndef WEBRTC_VOICE_ENGINE_UTILITY_H
#define WEBRTC_VOICE_ENGINE_UTILITY_H
#ifndef WEBRTC_VOICE_ENGINE_UTILITY_H_
#define WEBRTC_VOICE_ENGINE_UTILITY_H_
#include "webrtc/typedefs.h"
#include "webrtc/voice_engine/voice_engine_defines.h"
namespace webrtc
{
namespace webrtc {
class Module;
class AudioFrame;
class PushResampler;
namespace voe
{
namespace voe {
class Utility
{
public:
static void MixWithSat(int16_t target[],
int target_channel,
const int16_t source[],
int source_channel,
int source_len);
// Upmix or downmix and resample the audio in |src_frame| to |dst_frame|.
// Expects |dst_frame| to have its |num_channels_| and |sample_rate_hz_| set to
// the desired values. Updates |samples_per_channel_| accordingly.
//
// On failure, returns -1 and copies |src_frame| to |dst_frame|.
void RemixAndResample(const AudioFrame& src_frame,
PushResampler* resampler,
AudioFrame* dst_frame);
static void MixSubtractWithSat(int16_t target[],
const int16_t source[],
uint16_t len);
// Downmix and downsample the audio in |src_data| to |dst_af| as necessary,
// specified by |codec_num_channels| and |codec_rate_hz|. |mono_buffer| is
// temporary space and must be of sufficient size to hold the downmixed source
// audio (recommend using a size of kMaxMonoDataSizeSamples).
void DownConvertToCodecFormat(const int16_t* src_data,
int samples_per_channel,
int num_channels,
int sample_rate_hz,
int codec_num_channels,
int codec_rate_hz,
int16_t* mono_buffer,
PushResampler* resampler,
AudioFrame* dst_af);
static void MixAndScaleWithSat(int16_t target[],
const int16_t source[],
float scale,
uint16_t len);
static void Scale(int16_t vector[], float scale, uint16_t len);
static void ScaleWithSat(int16_t vector[],
float scale,
uint16_t len);
};
void MixWithSat(int16_t target[],
int target_channel,
const int16_t source[],
int source_channel,
int source_len);
} // namespace voe
} // namespace webrtc
#endif // WEBRTC_VOICE_ENGINE_UTILITY_H
#endif // WEBRTC_VOICE_ENGINE_UTILITY_H_

View File

@@ -25,13 +25,6 @@
#include "webrtc/voice_engine/utility.h"
#include "webrtc/voice_engine/voice_engine_impl.h"
#if (defined(_WIN32) && defined(_DLL) && (_MSC_VER == 1400))
// Fix for VS 2005 MD/MDd link problem
#include <stdio.h>
extern "C"
{ FILE _iob[3] = { __iob_func()[0], __iob_func()[1], __iob_func()[2]}; }
#endif
namespace webrtc
{
@@ -223,6 +216,9 @@ int VoEBaseImpl::OnDataAvailable(const int voe_channels[],
// No need to go through the APM, demultiplex the data to each VoE channel,
// encode and send to the network.
for (int i = 0; i < number_of_voe_channels; ++i) {
// TODO(ajm): In the case where multiple channels are using the same codec
// rate, this path needlessly does extra conversions. We should convert once
// and share between channels.
OnData(voe_channels[i], audio_data, 16, sample_rate,
number_of_channels, number_of_frames);
}

View File

@@ -57,8 +57,6 @@
'monitor_module.h',
'output_mixer.cc',
'output_mixer.h',
'output_mixer_internal.cc',
'output_mixer_internal.h',
'shared_data.cc',
'shared_data.h',
'statistics.cc',

View File

@@ -27,6 +27,10 @@
namespace webrtc {
// Internal buffer size required for mono audio, based on the highest sample
// rate voice engine supports (10 ms of audio at 192 kHz).
static const int kMaxMonoDataSizeSamples = 1920;
// VolumeControl
enum { kMinVolumeLevel = 0 };
enum { kMaxVolumeLevel = 255 };