Downmix before resampling in capture and render paths.

We previously had an error when a mono capture device was used with
a stereo codec. This is prevented by avoiding any remixing in
AudioProcessing. Instead, capture side downmixing is now done before
resampling. Upmixing can now be handled properly by AudioCoding,
since the AudioProcessing error condition has been removed.

On the render side, downmixing now occurs before resampling. Ideally
this would be handled still earlier in the chain. Similarly, downmixing
for the AudioProcessing reference data occurs before resampling. This
code has been refactored into RemixAndResample, with a comprehensive
unittest added in output_mixer_unittest.cc.

BUG=issue624
TEST=manually through voe_cmd_test, by using mono and stereo capture
and render devices with mono and stereo codecs. voice_engine_unittest,
voe_auto_test.

Review URL: https://webrtc-codereview.appspot.com/676004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@2448 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
andrew@webrtc.org 2012-06-27 03:25:31 +00:00
parent 7a281a5634
commit 4ecea3e105
18 changed files with 533 additions and 306 deletions

View File

@ -985,6 +985,8 @@ WebRtc_Word32 AudioCodingModuleImpl::Add10MsData(
// either mono-to-stereo or stereo-to-mono conversion.
WebRtc_Word16 audio[WEBRTC_10MS_PCM_AUDIO];
int audio_channels = _sendCodecInst.channels;
// TODO(andrew): reuse RemixAndResample here? The upmixing should be done
// after resampling. (Would require moving it somewhere common).
if (audio_frame.num_channels_ != audio_channels) {
if (audio_channels == 2) {
// Do mono-to-stereo conversion by copying each sample.

View File

@ -34,7 +34,7 @@ void MixFrames(AudioFrame* mixed_frame, AudioFrame* frame) {
// We only support mono-to-stereo.
assert(mixed_frame->num_channels_ == 2 &&
frame->num_channels_ == 1);
AudioFrameOperations::MonoToStereo(*frame);
AudioFrameOperations::MonoToStereo(frame);
}
*mixed_frame += *frame;

View File

@ -22,14 +22,30 @@ class AudioFrame;
// than a class.
class AudioFrameOperations {
public:
static int MonoToStereo(AudioFrame& frame);
// Upmixes mono |src_audio| to stereo |dst_audio|. This is an out-of-place
// operation, meaning src_audio and dst_audio must point to different
// buffers. It is the caller's responsibility to ensure that |dst_audio| is
// sufficiently large.
static void MonoToStereo(const int16_t* src_audio, int samples_per_channel,
int16_t* dst_audio);
// |frame.num_channels_| will be updated. This version checks for sufficient
// buffer size and that |num_channels_| is mono.
static int MonoToStereo(AudioFrame* frame);
static int StereoToMono(AudioFrame& frame);
// Downmixes stereo |src_audio| to mono |dst_audio|. This is an in-place
// operation, meaning |src_audio| and |dst_audio| may point to the same
// buffer.
static void StereoToMono(const int16_t* src_audio, int samples_per_channel,
int16_t* dst_audio);
// |frame.num_channels_| will be updated. This version checks that
// |num_channels_| is stereo.
static int StereoToMono(AudioFrame* frame);
// Swap the left and right channels of |frame|. Fails silently if |frame| is
// not stereo.
static void SwapStereoChannels(AudioFrame* frame);
// Zeros out the audio and sets |frame.energy| to zero.
static void Mute(AudioFrame& frame);
static int Scale(float left, float right, AudioFrame& frame);

View File

@ -13,41 +13,48 @@
namespace webrtc {
int AudioFrameOperations::MonoToStereo(AudioFrame& frame) {
if (frame.num_channels_ != 1) {
void AudioFrameOperations::MonoToStereo(const int16_t* src_audio,
int samples_per_channel,
int16_t* dst_audio) {
for (int i = 0; i < samples_per_channel; i++) {
dst_audio[2 * i] = src_audio[i];
dst_audio[2 * i + 1] = src_audio[i];
}
}
int AudioFrameOperations::MonoToStereo(AudioFrame* frame) {
if (frame->num_channels_ != 1) {
return -1;
}
if ((frame.samples_per_channel_ << 1) >=
AudioFrame::kMaxDataSizeSamples) {
// not enough memory to expand from mono to stereo
if ((frame->samples_per_channel_ * 2) >= AudioFrame::kMaxDataSizeSamples) {
// Not enough memory to expand from mono to stereo.
return -1;
}
int16_t payloadCopy[AudioFrame::kMaxDataSizeSamples];
memcpy(payloadCopy, frame.data_,
sizeof(int16_t) * frame.samples_per_channel_);
for (int i = 0; i < frame.samples_per_channel_; i++) {
frame.data_[2 * i] = payloadCopy[i];
frame.data_[2 * i + 1] = payloadCopy[i];
}
frame.num_channels_ = 2;
int16_t data_copy[AudioFrame::kMaxDataSizeSamples];
memcpy(data_copy, frame->data_,
sizeof(int16_t) * frame->samples_per_channel_);
MonoToStereo(data_copy, frame->samples_per_channel_, frame->data_);
frame->num_channels_ = 2;
return 0;
}
int AudioFrameOperations::StereoToMono(AudioFrame& frame) {
if (frame.num_channels_ != 2) {
void AudioFrameOperations::StereoToMono(const int16_t* src_audio,
int samples_per_channel,
int16_t* dst_audio) {
for (int i = 0; i < samples_per_channel; i++) {
dst_audio[i] = (src_audio[2 * i] + src_audio[2 * i + 1]) >> 1;
}
}
int AudioFrameOperations::StereoToMono(AudioFrame* frame) {
if (frame->num_channels_ != 2) {
return -1;
}
for (int i = 0; i < frame.samples_per_channel_; i++) {
frame.data_[i] = (frame.data_[2 * i] >> 1) +
(frame.data_[2 * i + 1] >> 1);
}
frame.num_channels_ = 1;
StereoToMono(frame->data_, frame->samples_per_channel_, frame->data_);
frame->num_channels_ = 1;
return 0;
}

View File

@ -14,7 +14,6 @@
#include "module_common_types.h"
namespace webrtc {
namespace voe {
namespace {
class AudioFrameOperationsTest : public ::testing::Test {
@ -53,44 +52,60 @@ void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
}
TEST_F(AudioFrameOperationsTest, MonoToStereoFailsWithBadParameters) {
EXPECT_EQ(-1, AudioFrameOperations::MonoToStereo(frame_));
EXPECT_EQ(-1, AudioFrameOperations::MonoToStereo(&frame_));
frame_.samples_per_channel_ = AudioFrame::kMaxDataSizeSamples;
frame_.num_channels_ = 1;
EXPECT_EQ(-1, AudioFrameOperations::MonoToStereo(frame_));
EXPECT_EQ(-1, AudioFrameOperations::MonoToStereo(&frame_));
}
TEST_F(AudioFrameOperationsTest, MonoToStereoSucceeds) {
frame_.num_channels_ = 1;
SetFrameData(&frame_, 1);
EXPECT_EQ(0, AudioFrameOperations::MonoToStereo(frame_));
AudioFrame temp_frame = frame_;
EXPECT_EQ(0, AudioFrameOperations::MonoToStereo(&frame_));
AudioFrame stereo_frame;
stereo_frame.samples_per_channel_ = 320;
stereo_frame.num_channels_ = 2;
SetFrameData(&stereo_frame, 1, 1);
VerifyFramesAreEqual(stereo_frame, frame_);
SetFrameData(&frame_, 0);
AudioFrameOperations::MonoToStereo(temp_frame.data_,
frame_.samples_per_channel_,
frame_.data_);
frame_.num_channels_ = 2; // Need to set manually.
VerifyFramesAreEqual(stereo_frame, frame_);
}
TEST_F(AudioFrameOperationsTest, StereoToMonoFailsWithBadParameters) {
frame_.num_channels_ = 1;
EXPECT_EQ(-1, AudioFrameOperations::StereoToMono(frame_));
EXPECT_EQ(-1, AudioFrameOperations::StereoToMono(&frame_));
}
TEST_F(AudioFrameOperationsTest, StereoToMonoSucceeds) {
SetFrameData(&frame_, 4, 2);
EXPECT_EQ(0, AudioFrameOperations::StereoToMono(frame_));
AudioFrame temp_frame = frame_;
EXPECT_EQ(0, AudioFrameOperations::StereoToMono(&frame_));
AudioFrame mono_frame;
mono_frame.samples_per_channel_ = 320;
mono_frame.num_channels_ = 1;
SetFrameData(&mono_frame, 3);
VerifyFramesAreEqual(mono_frame, frame_);
SetFrameData(&frame_, 0);
AudioFrameOperations::StereoToMono(temp_frame.data_,
frame_.samples_per_channel_,
frame_.data_);
frame_.num_channels_ = 1; // Need to set manually.
VerifyFramesAreEqual(mono_frame, frame_);
}
TEST_F(AudioFrameOperationsTest, StereoToMonoDoesNotWrapAround) {
SetFrameData(&frame_, -32768, -32768);
EXPECT_EQ(0, AudioFrameOperations::StereoToMono(frame_));
EXPECT_EQ(0, AudioFrameOperations::StereoToMono(&frame_));
AudioFrame mono_frame;
mono_frame.samples_per_channel_ = 320;
@ -208,5 +223,4 @@ TEST_F(AudioFrameOperationsTest, ScaleWithSatSucceeds) {
}
} // namespace
} // namespace voe
} // namespace webrtc

View File

@ -873,7 +873,7 @@ WebRtc_Word32 Channel::GetAudioFrame(const WebRtc_Word32 id,
{
// Emulate stereo mode since panning is active.
// The mono signal is copied to both left and right channels here.
AudioFrameOperations::MonoToStereo(audioFrame);
AudioFrameOperations::MonoToStereo(&audioFrame);
}
// For true stereo mode (when we are receiving a stereo signal), no
// action is needed.

View File

@ -14,8 +14,9 @@
#include "audio_frame_operations.h"
#include "critical_section_wrapper.h"
#include "file_wrapper.h"
#include "trace.h"
#include "output_mixer_internal.h"
#include "statistics.h"
#include "trace.h"
#include "voe_external_media.h"
namespace webrtc {
@ -472,13 +473,13 @@ int OutputMixer::StartRecordingPlayout(OutStream* stream,
notificationTime) != 0)
{
_engineStatisticsPtr->SetLastError(VE_BAD_FILE, kTraceError,
"StartRecordingAudioFile() failed to start file recording");
"StartRecordingAudioFile() failed to start file recording");
_outputFileRecorderPtr->StopRecording();
FileRecorder::DestroyFileRecorder(_outputFileRecorderPtr);
_outputFileRecorderPtr = NULL;
return -1;
}
_outputFileRecorderPtr->RegisterModuleFileCallback(this);
_outputFileRecording = true;
@ -514,86 +515,28 @@ int OutputMixer::StopRecordingPlayout()
return 0;
}
WebRtc_Word32
OutputMixer::GetMixedAudio(const WebRtc_Word32 desiredFreqHz,
const WebRtc_UWord8 channels,
AudioFrame& audioFrame)
{
WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_instanceId,-1),
"OutputMixer::GetMixedAudio(desiredFreqHz=%d, channels=&d)",
desiredFreqHz, channels);
int OutputMixer::GetMixedAudio(int sample_rate_hz,
int num_channels,
AudioFrame* frame) {
WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_instanceId,-1),
"OutputMixer::GetMixedAudio(sample_rate_hz=%d, num_channels=%d)",
sample_rate_hz, num_channels);
audioFrame = _audioFrame;
// --- Record playout if enabled
{
CriticalSectionScoped cs(&_fileCritSect);
if (_outputFileRecording && _outputFileRecorderPtr)
_outputFileRecorderPtr->RecordAudioToFile(_audioFrame);
}
// --- Record playout if enabled
{
CriticalSectionScoped cs(&_fileCritSect);
if (_outputFileRecording)
{
if (_outputFileRecorderPtr)
{
_outputFileRecorderPtr->RecordAudioToFile(audioFrame);
}
}
}
int outLen(0);
if (audioFrame.num_channels_ == 1)
{
if (_resampler.ResetIfNeeded(audioFrame.sample_rate_hz_,
desiredFreqHz,
kResamplerSynchronous) != 0)
{
WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId,-1),
"OutputMixer::GetMixedAudio() unable to resample - 1");
return -1;
}
}
else
{
if (_resampler.ResetIfNeeded(audioFrame.sample_rate_hz_,
desiredFreqHz,
kResamplerSynchronousStereo) != 0)
{
WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId,-1),
"OutputMixer::GetMixedAudio() unable to resample - 2");
return -1;
}
}
if (_resampler.Push(
_audioFrame.data_,
_audioFrame.samples_per_channel_*_audioFrame.num_channels_,
audioFrame.data_,
AudioFrame::kMaxDataSizeSamples,
outLen) == 0)
{
// Ensure that output from resampler matches the audio-frame format.
// Example: 10ms stereo output at 48kHz => outLen = 960 =>
// convert samples_per_channel_ to 480
audioFrame.samples_per_channel_ =
(outLen / _audioFrame.num_channels_);
audioFrame.sample_rate_hz_ = desiredFreqHz;
}
else
{
WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId,-1),
"OutputMixer::GetMixedAudio() resampling failed");
return -1;
}
if ((channels == 2) && (audioFrame.num_channels_ == 1))
{
AudioFrameOperations::MonoToStereo(audioFrame);
}
else if ((channels == 1) && (audioFrame.num_channels_ == 2))
{
AudioFrameOperations::StereoToMono(audioFrame);
}
return 0;
frame->num_channels_ = num_channels;
frame->sample_rate_hz_ = sample_rate_hz;
// TODO(andrew): Ideally the downmixing would occur much earlier, in
// AudioCodingModule.
return RemixAndResample(_audioFrame, &_resampler, frame);
}
WebRtc_Word32
WebRtc_Word32
OutputMixer::DoOperationsOnCombinedSignal()
{
if (_audioFrame.sample_rate_hz_ != _mixingFrequencyHz)
@ -615,7 +558,7 @@ OutputMixer::DoOperationsOnCombinedSignal()
{
if (_audioFrame.num_channels_ == 1)
{
AudioFrameOperations::MonoToStereo(_audioFrame);
AudioFrameOperations::MonoToStereo(&_audioFrame);
}
else
{
@ -640,7 +583,7 @@ OutputMixer::DoOperationsOnCombinedSignal()
{
_externalMediaCallbackPtr->Process(
-1,
kPlaybackAllChannelsMixed,
kPlaybackAllChannelsMixed,
(WebRtc_Word16*)_audioFrame.data_,
_audioFrame.samples_per_channel_,
_audioFrame.sample_rate_hz_,
@ -655,56 +598,22 @@ OutputMixer::DoOperationsOnCombinedSignal()
}
// ----------------------------------------------------------------------------
// Private methods
// Private methods
// ----------------------------------------------------------------------------
int
OutputMixer::APMAnalyzeReverseStream()
{
int outLen(0);
AudioFrame audioFrame = _audioFrame;
void OutputMixer::APMAnalyzeReverseStream() {
// Convert from mixing to AudioProcessing sample rate, determined by the send
// side. Downmix to mono.
AudioFrame frame;
frame.num_channels_ = 1;
frame.sample_rate_hz_ = _audioProcessingModulePtr->sample_rate_hz();
if (RemixAndResample(_audioFrame, &_apmResampler, &frame) == -1)
return;
// Convert from mixing frequency to APM frequency.
// Sending side determines APM frequency.
if (audioFrame.num_channels_ == 1)
{
_apmResampler.ResetIfNeeded(_audioFrame.sample_rate_hz_,
_audioProcessingModulePtr->sample_rate_hz(),
kResamplerSynchronous);
}
else
{
_apmResampler.ResetIfNeeded(_audioFrame.sample_rate_hz_,
_audioProcessingModulePtr->sample_rate_hz(),
kResamplerSynchronousStereo);
}
if (_apmResampler.Push(
_audioFrame.data_,
_audioFrame.samples_per_channel_*_audioFrame.num_channels_,
audioFrame.data_,
AudioFrame::kMaxDataSizeSamples,
outLen) == 0)
{
audioFrame.samples_per_channel_ =
(outLen / _audioFrame.num_channels_);
audioFrame.sample_rate_hz_ = _audioProcessingModulePtr->sample_rate_hz();
}
if (audioFrame.num_channels_ == 2)
{
AudioFrameOperations::StereoToMono(audioFrame);
}
// Perform far-end APM analyze
if (_audioProcessingModulePtr->AnalyzeReverseStream(&audioFrame) == -1)
{
WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId,-1),
"AudioProcessingModule::AnalyzeReverseStream() => error");
}
return 0;
if (_audioProcessingModulePtr->AnalyzeReverseStream(&frame) == -1) {
WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId,-1),
"AudioProcessingModule::AnalyzeReverseStream() => error");
}
}
int

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_VOICE_ENGINE_OUTPUT_MIXER_H
#define WEBRTC_VOICE_ENGINE_OUTPUT_MIXER_H
#ifndef WEBRTC_VOICE_ENGINE_OUTPUT_MIXER_H_
#define WEBRTC_VOICE_ENGINE_OUTPUT_MIXER_H_
#include "audio_conference_mixer.h"
#include "audio_conference_mixer_defines.h"
@ -72,9 +72,8 @@ public:
WebRtc_Word32 SetAnonymousMixabilityStatus(MixerParticipant& participant,
const bool mixable);
WebRtc_Word32 GetMixedAudio(const WebRtc_Word32 desiredFreqHz,
const WebRtc_UWord8 channels,
AudioFrame& audioFrame);
int GetMixedAudio(int sample_rate_hz, int num_channels,
AudioFrame* audioFrame);
// VoEVolumeControl
int GetSpeechOutputLevel(WebRtc_UWord32& level);
@ -95,14 +94,14 @@ public:
virtual ~OutputMixer();
public: // from AudioMixerOutputReceiver
// from AudioMixerOutputReceiver
virtual void NewMixedAudio(
const WebRtc_Word32 id,
const AudioFrame& generalAudioFrame,
const AudioFrame** uniqueAudioFrames,
const WebRtc_UWord32 size);
public: // from AudioMixerStatusReceiver
// from AudioMixerStatusReceiver
virtual void MixedParticipants(
const WebRtc_Word32 id,
const ParticipantStatistics* participantStatistics,
@ -116,7 +115,7 @@ public: // from AudioMixerStatusReceiver
virtual void MixedAudioLevel(const WebRtc_Word32 id,
const WebRtc_UWord32 level);
public: // For file recording
// For file recording
void PlayNotification(const WebRtc_Word32 id,
const WebRtc_UWord32 durationMs);
@ -128,14 +127,14 @@ public: // For file recording
private:
OutputMixer(const WebRtc_UWord32 instanceId);
int APMAnalyzeReverseStream();
void APMAnalyzeReverseStream();
int InsertInbandDtmfTone();
private: // uses
// uses
Statistics* _engineStatisticsPtr;
AudioProcessing* _audioProcessingModulePtr;
private: // owns
// owns
CriticalSectionWrapper& _callbackCritSect;
// protect the _outputFileRecorderPtr and _outputFileRecording
CriticalSectionWrapper& _fileCritSect;
@ -159,4 +158,4 @@ private: // owns
} // namespace werbtc
#endif // VOICE_ENGINE_OUTPUT_MIXER_H
#endif // VOICE_ENGINE_OUTPUT_MIXER_H_

View File

@ -0,0 +1,73 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "output_mixer_internal.h"
#include "audio_frame_operations.h"
#include "common_audio/resampler/include/resampler.h"
#include "module_common_types.h"
#include "trace.h"
namespace webrtc {
namespace voe {
int RemixAndResample(const AudioFrame& src_frame,
Resampler* resampler,
AudioFrame* dst_frame) {
const int16_t* audio_ptr = src_frame.data_;
int audio_ptr_num_channels = src_frame.num_channels_;
int16_t mono_audio[AudioFrame::kMaxDataSizeSamples];
// Downmix before resampling.
if (src_frame.num_channels_ == 2 && dst_frame->num_channels_ == 1) {
AudioFrameOperations::StereoToMono(src_frame.data_,
src_frame.samples_per_channel_,
mono_audio);
audio_ptr = mono_audio;
audio_ptr_num_channels = 1;
}
const ResamplerType resampler_type = audio_ptr_num_channels == 1 ?
kResamplerSynchronous : kResamplerSynchronousStereo;
if (resampler->ResetIfNeeded(src_frame.sample_rate_hz_,
dst_frame->sample_rate_hz_,
resampler_type) == -1) {
*dst_frame = src_frame;
WEBRTC_TRACE(kTraceError, kTraceVoice, -1,
"%s ResetIfNeeded failed", __FUNCTION__);
return -1;
}
int out_length = 0;
if (resampler->Push(audio_ptr,
src_frame.samples_per_channel_* audio_ptr_num_channels,
dst_frame->data_,
AudioFrame::kMaxDataSizeSamples,
out_length) == 0) {
dst_frame->samples_per_channel_ = out_length / audio_ptr_num_channels;
} else {
*dst_frame = src_frame;
WEBRTC_TRACE(kTraceError, kTraceVoice, -1,
"%s resampling failed", __FUNCTION__);
return -1;
}
// Upmix after resampling.
if (src_frame.num_channels_ == 1 && dst_frame->num_channels_ == 2) {
// The audio in dst_frame really is mono at this point; MonoToStereo will
// set this back to stereo.
dst_frame->num_channels_ = 1;
AudioFrameOperations::MonoToStereo(dst_frame);
}
return 0;
}
} // namespace voe
} // namespace webrtc

View File

@ -0,0 +1,33 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_VOICE_ENGINE_OUTPUT_MIXER_INTERNAL_H_
#define WEBRTC_VOICE_ENGINE_OUTPUT_MIXER_INTERNAL_H_
namespace webrtc {
class AudioFrame;
class Resampler;
namespace voe {
// Upmix or downmix and resample the audio in |src_frame| to |dst_frame|.
// Expects |dst_frame| to have its |num_channels_| and |sample_rate_hz_| set to
// the desired values. Updates |samples_per_channel_| accordingly.
//
// On failure, returns -1 and copies |src_frame| to |dst_frame|.
int RemixAndResample(const AudioFrame& src_frame,
Resampler* resampler,
AudioFrame* dst_frame);
} // namespace voe
} // namespace webrtc
#endif // VOICE_ENGINE_OUTPUT_MIXER_INTERNAL_H_

View File

@ -0,0 +1,214 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include "gtest/gtest.h"
#include "output_mixer.h"
#include "output_mixer_internal.h"
namespace webrtc {
namespace voe {
namespace {
class OutputMixerTest : public ::testing::Test {
protected:
OutputMixerTest() {
src_frame_.sample_rate_hz_ = 16000;
src_frame_.samples_per_channel_ = src_frame_.sample_rate_hz_ / 100;
src_frame_.num_channels_ = 1;
dst_frame_ = src_frame_;
golden_frame_ = src_frame_;
}
void RunResampleTest(int src_channels, int src_sample_rate_hz,
int dst_channels, int dst_sample_rate_hz);
Resampler resampler_;
AudioFrame src_frame_;
AudioFrame dst_frame_;
AudioFrame golden_frame_;
};
// Sets the signal value to increase by |data| with every sample. Floats are
// used so non-integer values result in rounding error, but not an accumulating
// error.
void SetMonoFrame(AudioFrame* frame, float data, int sample_rate_hz) {
frame->num_channels_ = 1;
frame->sample_rate_hz_ = sample_rate_hz;
frame->samples_per_channel_ = sample_rate_hz / 100;
for (int i = 0; i < frame->samples_per_channel_; i++) {
frame->data_[i] = data * i;
}
}
// Keep the existing sample rate.
void SetMonoFrame(AudioFrame* frame, float data) {
SetMonoFrame(frame, data, frame->sample_rate_hz_);
}
// Sets the signal value to increase by |left| and |right| with every sample in
// each channel respectively.
void SetStereoFrame(AudioFrame* frame, float left, float right,
int sample_rate_hz) {
frame->num_channels_ = 2;
frame->sample_rate_hz_ = sample_rate_hz;
frame->samples_per_channel_ = sample_rate_hz / 100;
for (int i = 0; i < frame->samples_per_channel_; i++) {
frame->data_[i * 2] = left * i;
frame->data_[i * 2 + 1] = right * i;
}
}
// Keep the existing sample rate.
void SetStereoFrame(AudioFrame* frame, float left, float right) {
SetStereoFrame(frame, left, right, frame->sample_rate_hz_);
}
void VerifyParams(const AudioFrame& ref_frame, const AudioFrame& test_frame) {
EXPECT_EQ(ref_frame.num_channels_, test_frame.num_channels_);
EXPECT_EQ(ref_frame.samples_per_channel_, test_frame.samples_per_channel_);
EXPECT_EQ(ref_frame.sample_rate_hz_, test_frame.sample_rate_hz_);
}
// Computes the best SNR based on the error between |ref_frame| and
// |test_frame|. It allows for up to a 30 sample delay between the signals to
// compensate for the resampling delay.
float ComputeSNR(const AudioFrame& ref_frame, const AudioFrame& test_frame) {
VerifyParams(ref_frame, test_frame);
float best_snr = 0;
int best_delay = 0;
for (int delay = 0; delay < 30; delay++) {
float mse = 0;
float variance = 0;
for (int i = 0; i < ref_frame.samples_per_channel_ *
ref_frame.num_channels_ - delay; i++) {
int error = ref_frame.data_[i] - test_frame.data_[i + delay];
mse += error * error;
variance += ref_frame.data_[i] * ref_frame.data_[i];
}
float snr = 100; // We assign 100 dB to the zero-error case.
if (mse > 0)
snr = 10 * log10(variance / mse);
if (snr > best_snr) {
best_snr = snr;
best_delay = delay;
}
}
printf("SNR=%.1f dB at delay=%d\n", best_snr, best_delay);
return best_snr;
}
void VerifyFramesAreEqual(const AudioFrame& ref_frame,
const AudioFrame& test_frame) {
VerifyParams(ref_frame, test_frame);
for (int i = 0; i < ref_frame.samples_per_channel_ * ref_frame.num_channels_;
i++) {
EXPECT_EQ(ref_frame.data_[i], test_frame.data_[i]);
}
}
void OutputMixerTest::RunResampleTest(int src_channels,
int src_sample_rate_hz,
int dst_channels,
int dst_sample_rate_hz) {
Resampler resampler; // Create a new one with every test.
const int16_t kSrcLeft = 60; // Shouldn't overflow for any used sample rate.
const int16_t kSrcRight = 30;
const float kResamplingFactor = (1.0 * src_sample_rate_hz) /
dst_sample_rate_hz;
const float kDstLeft = kResamplingFactor * kSrcLeft;
const float kDstRight = kResamplingFactor * kSrcRight;
const float kDstMono = (kDstLeft + kDstRight) / 2;
if (src_channels == 1)
SetMonoFrame(&src_frame_, kSrcLeft, src_sample_rate_hz);
else
SetStereoFrame(&src_frame_, kSrcLeft, kSrcRight, src_sample_rate_hz);
if (dst_channels == 1) {
SetMonoFrame(&dst_frame_, 0, dst_sample_rate_hz);
if (src_channels == 1)
SetMonoFrame(&golden_frame_, kDstLeft, dst_sample_rate_hz);
else
SetMonoFrame(&golden_frame_, kDstMono, dst_sample_rate_hz);
} else {
SetStereoFrame(&dst_frame_, 0, 0, dst_sample_rate_hz);
if (src_channels == 1)
SetStereoFrame(&golden_frame_, kDstLeft, kDstLeft, dst_sample_rate_hz);
else
SetStereoFrame(&golden_frame_, kDstLeft, kDstRight, dst_sample_rate_hz);
}
printf("(%d, %d Hz) -> (%d, %d Hz) ", // SNR reported on the same line later.
src_channels, src_sample_rate_hz, dst_channels, dst_sample_rate_hz);
EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler, &dst_frame_));
EXPECT_GT(ComputeSNR(golden_frame_, dst_frame_), 40.0f);
}
TEST_F(OutputMixerTest, RemixAndResampleFailsWithBadSampleRate) {
SetMonoFrame(&dst_frame_, 10, 44100);
EXPECT_EQ(-1, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
VerifyFramesAreEqual(src_frame_, dst_frame_);
}
TEST_F(OutputMixerTest, RemixAndResampleCopyFrameSucceeds) {
// Stereo -> stereo.
SetStereoFrame(&src_frame_, 10, 10);
SetStereoFrame(&dst_frame_, 0, 0);
EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
VerifyFramesAreEqual(src_frame_, dst_frame_);
// Mono -> mono.
SetMonoFrame(&src_frame_, 20);
SetMonoFrame(&dst_frame_, 0);
EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
VerifyFramesAreEqual(src_frame_, dst_frame_);
}
TEST_F(OutputMixerTest, RemixAndResampleMixingOnlySucceeds) {
// Stereo -> mono.
SetStereoFrame(&dst_frame_, 0, 0);
SetMonoFrame(&src_frame_, 10);
SetStereoFrame(&golden_frame_, 10, 10);
EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
VerifyFramesAreEqual(dst_frame_, golden_frame_);
// Mono -> stereo.
SetMonoFrame(&dst_frame_, 0);
SetStereoFrame(&src_frame_, 10, 20);
SetMonoFrame(&golden_frame_, 15);
EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
VerifyFramesAreEqual(golden_frame_, dst_frame_);
}
TEST_F(OutputMixerTest, RemixAndResampleSucceeds) {
// We don't attempt to be exhaustive here, but just get good coverage. Some
// combinations of rates will not be resampled, and some give an odd
// resampling factor which makes it more difficult to evaluate.
const int kSampleRates[] = {16000, 32000, 48000};
const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
const int kChannels[] = {1, 2};
const int kChannelsSize = sizeof(kChannels) / sizeof(*kChannels);
for (int src_rate = 0; src_rate < kSampleRatesSize; src_rate++) {
for (int dst_rate = 0; dst_rate < kSampleRatesSize; dst_rate++) {
for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) {
for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) {
RunResampleTest(kChannels[src_channel], kSampleRates[src_rate],
kChannels[dst_channel], kSampleRates[dst_rate]);
}
}
}
}
}
} // namespace
} // namespace voe
} // namespace webrtc

View File

@ -85,7 +85,7 @@ WebRtc_UWord16 SharedData::NumOfSendingChannels()
{
return 0;
}
WebRtc_UWord16 nChannelsSending(0);
WebRtc_Word32* channelsArray = new WebRtc_Word32[numOfChannels];

View File

@ -21,12 +21,15 @@
#include "voe_base_impl.h"
#include "voe_external_media.h"
#define WEBRTC_ABS(a) (((a) < 0) ? -(a) : (a))
#define WEBRTC_ABS(a) (((a) < 0) ? -(a) : (a))
namespace webrtc {
namespace voe {
// Used for downmixing before resampling.
static const int kMaxMonoDeviceDataSizeSamples = 480; // 10 ms, 48 kHz, mono.
void
TransmitMixer::OnPeriodicProcess()
{
@ -203,6 +206,7 @@ TransmitMixer::TransmitMixer(const WebRtc_UWord32 instanceId) :
_remainingMuteMicTimeMs(0),
_mixingFrequency(0),
_includeAudioLevelIndication(false),
stereo_codec_(false),
swap_stereo_channels_(false)
{
WEBRTC_TRACE(kTraceMemory, kTraceVoice, VoEId(_instanceId, -1),
@ -274,8 +278,8 @@ TransmitMixer::SetEngineInformation(ProcessThread& processThread,
return 0;
}
WebRtc_Word32
WebRtc_Word32
TransmitMixer::RegisterVoiceEngineObserver(VoiceEngineObserver& observer)
{
WEBRTC_TRACE(kTraceInfo, kTraceVoice, VoEId(_instanceId, -1),
@ -293,7 +297,7 @@ TransmitMixer::RegisterVoiceEngineObserver(VoiceEngineObserver& observer)
return 0;
}
WebRtc_Word32
WebRtc_Word32
TransmitMixer::SetAudioProcessingModule(AudioProcessing* audioProcessingModule)
{
WEBRTC_TRACE(kTraceInfo, kTraceVoice, VoEId(_instanceId, -1),
@ -304,7 +308,27 @@ TransmitMixer::SetAudioProcessingModule(AudioProcessing* audioProcessingModule)
return 0;
}
WebRtc_Word32
void TransmitMixer::CheckForSendCodecChanges() {
ScopedChannel sc(*_channelManagerPtr);
void* iterator = NULL;
Channel* channel = sc.GetFirstChannel(iterator);
_mixingFrequency = 8000;
stereo_codec_ = false;
while (channel != NULL) {
if (channel->Sending()) {
CodecInst codec;
channel->GetSendCodec(codec);
if (codec.channels == 2)
stereo_codec_ = true;
if (codec.plfreq > _mixingFrequency)
_mixingFrequency = codec.plfreq;
}
channel = sc.GetNextChannel(iterator);
}
}
WebRtc_Word32
TransmitMixer::PrepareDemux(const void* audioSamples,
const WebRtc_UWord32 nSamples,
const WebRtc_UWord8 nChannels,
@ -319,32 +343,14 @@ TransmitMixer::PrepareDemux(const void* audioSamples,
"currentMicLevel=%u)", nSamples, nChannels, samplesPerSec,
totalDelayMS, clockDrift, currentMicLevel);
const int mixingFrequency = _mixingFrequency;
ScopedChannel sc(*_channelManagerPtr);
void* iterator(NULL);
Channel* channelPtr = sc.GetFirstChannel(iterator);
_mixingFrequency = 8000;
bool stereo_codec = false; // Used for stereo swapping.
while (channelPtr != NULL) {
if (channelPtr->Sending()) {
CodecInst temp_codec;
channelPtr->GetSendCodec(temp_codec);
stereo_codec = temp_codec.channels == 2;
if (temp_codec.plfreq > _mixingFrequency)
_mixingFrequency = temp_codec.plfreq;
}
channelPtr = sc.GetNextChannel(iterator);
}
CheckForSendCodecChanges();
// --- Resample input audio and create/store the initial audio frame
if (GenerateAudioFrame((const WebRtc_Word16*) audioSamples,
if (GenerateAudioFrame(static_cast<const WebRtc_Word16*>(audioSamples),
nSamples,
nChannels,
samplesPerSec,
_mixingFrequency) == -1)
samplesPerSec) == -1)
{
return -1;
}
@ -353,7 +359,7 @@ TransmitMixer::PrepareDemux(const void* audioSamples,
APMProcessStream(totalDelayMS, clockDrift, currentMicLevel);
if (swap_stereo_channels_ && stereo_codec)
if (swap_stereo_channels_ && stereo_codec_)
// Only bother swapping if we're using a stereo codec.
AudioFrameOperations::SwapStereoChannels(&_audioFrame);
@ -418,20 +424,10 @@ TransmitMixer::PrepareDemux(const void* audioSamples,
}
}
if (_mixingFrequency != mixingFrequency)
{
WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_instanceId, -1),
"TransmitMixer::TransmitMixer::PrepareDemux() => "
"mixing frequency = %d",
_mixingFrequency);
}
return 0;
}
WebRtc_Word32
WebRtc_Word32
TransmitMixer::DemuxAndMix()
{
WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_instanceId, -1),
@ -455,11 +451,10 @@ TransmitMixer::DemuxAndMix()
}
channelPtr = sc.GetNextChannel(iterator);
}
return 0;
return 0;
}
WebRtc_Word32
WebRtc_Word32
TransmitMixer::EncodeAndSend()
{
WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_instanceId, -1),
@ -1155,49 +1150,52 @@ bool TransmitMixer::IsRecordingMic()
return _fileRecording;
}
WebRtc_Word32
TransmitMixer::GenerateAudioFrame(const WebRtc_Word16 audioSamples[],
const WebRtc_UWord32 nSamples,
const WebRtc_UWord8 nChannels,
const WebRtc_UWord32 samplesPerSec,
const int mixingFrequency)
int TransmitMixer::GenerateAudioFrame(const int16_t audio[],
int samples_per_channel,
int num_channels,
int sample_rate_hz)
{
WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_instanceId, -1),
"TransmitMixer::GenerateAudioFrame(nSamples=%u,"
"samplesPerSec=%u, mixingFrequency=%u)",
nSamples, samplesPerSec, mixingFrequency);
const int16_t* audio_ptr = audio;
int16_t mono_audio[kMaxMonoDeviceDataSizeSamples];
// If no stereo codecs are in use, we downmix a stereo stream from the
// device early in the chain, before resampling.
if (num_channels == 2 && !stereo_codec_) {
AudioFrameOperations::StereoToMono(audio, samples_per_channel,
mono_audio);
audio_ptr = mono_audio;
num_channels = 1;
}
ResamplerType resampType = (nChannels == 1) ?
ResamplerType resampler_type = (num_channels == 1) ?
kResamplerSynchronous : kResamplerSynchronousStereo;
if (_audioResampler.ResetIfNeeded(samplesPerSec,
mixingFrequency,
resampType) != 0)
if (_audioResampler.ResetIfNeeded(sample_rate_hz,
_mixingFrequency,
resampler_type) != 0)
{
WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId, -1),
"TransmitMixer::GenerateAudioFrame() unable to resample");
return -1;
}
if (_audioResampler.Push(
(WebRtc_Word16*) audioSamples,
nSamples * nChannels,
_audioFrame.data_,
AudioFrame::kMaxDataSizeSamples,
(int&) _audioFrame.samples_per_channel_) == -1)
if (_audioResampler.Push(audio_ptr,
samples_per_channel * num_channels,
_audioFrame.data_,
AudioFrame::kMaxDataSizeSamples,
_audioFrame.samples_per_channel_) == -1)
{
WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId, -1),
"TransmitMixer::GenerateAudioFrame() resampling failed");
return -1;
}
_audioFrame.samples_per_channel_ /= nChannels;
_audioFrame.samples_per_channel_ /= num_channels;
_audioFrame.id_ = _instanceId;
_audioFrame.timestamp_ = -1;
_audioFrame.sample_rate_hz_ = mixingFrequency;
_audioFrame.sample_rate_hz_ = _mixingFrequency;
_audioFrame.speech_type_ = AudioFrame::kNormalSpeech;
_audioFrame.vad_activity_ = AudioFrame::kVadUnknown;
_audioFrame.num_channels_ = nChannels;
_audioFrame.num_channels_ = num_channels;
return 0;
}
@ -1288,14 +1286,14 @@ WebRtc_Word32 TransmitMixer::APMProcessStream(
{
WebRtc_UWord16 captureLevel(currentMicLevel);
// Check if the number of input channels has changed. Retain the number
// of output channels.
// Check if the number of incoming channels has changed. This has taken
// both the capture device and send codecs into account.
if (_audioFrame.num_channels_ !=
_audioProcessingModulePtr->num_input_channels())
{
if (_audioProcessingModulePtr->set_num_channels(
_audioFrame.num_channels_,
_audioProcessingModulePtr->num_output_channels()))
_audioFrame.num_channels_))
{
WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId, -1),
"AudioProcessing::set_num_channels(%d, %d) => error",

View File

@ -164,11 +164,12 @@ public:
private:
TransmitMixer(const WebRtc_UWord32 instanceId);
WebRtc_Word32 GenerateAudioFrame(const WebRtc_Word16 audioSamples[],
const WebRtc_UWord32 nSamples,
const WebRtc_UWord8 nChannels,
const WebRtc_UWord32 samplesPerSec,
const int mixingFrequency);
void CheckForSendCodecChanges();
int GenerateAudioFrame(const int16_t audioSamples[],
int nSamples,
int nChannels,
int samplesPerSec);
WebRtc_Word32 RecordAudioToFile(const WebRtc_UWord32 mixingFrequency);
WebRtc_Word32 MixOrReplaceAudioWithFile(
@ -233,6 +234,7 @@ private:
WebRtc_Word32 _remainingMuteMicTimeMs;
int _mixingFrequency;
bool _includeAudioLevelIndication;
bool stereo_codec_;
bool swap_stereo_channels_;
};

View File

@ -194,9 +194,8 @@ WebRtc_Word32 VoEBaseImpl::RecordedDataIsAvailable(
// Perform channel-independent operations
// (APM, mix with file, record to file, mute, etc.)
_shared->transmit_mixer()->PrepareDemux(audioSamples, nSamples, nChannels,
samplesPerSec,
(WebRtc_UWord16) totalDelayMS, clockDrift,
currentVoEMicLevel);
samplesPerSec, static_cast<WebRtc_UWord16>(totalDelayMS), clockDrift,
currentVoEMicLevel);
// Copy the audio frame to each sending channel and perform
// channel-dependent operations (file mixing, mute, etc.) to prepare
@ -246,6 +245,8 @@ WebRtc_Word32 VoEBaseImpl::NeedMorePlayData(
assert(_shared->output_mixer() != NULL);
// TODO(andrew): if the device is running in mono, we should tell the mixer
// here so that it will only request mono from AudioCodingModule.
// Perform mixing of all active participants (channel-based mixing)
_shared->output_mixer()->MixActiveChannels();
@ -254,7 +255,7 @@ WebRtc_Word32 VoEBaseImpl::NeedMorePlayData(
// Retrieve the final output mix (resampled to match the ADM)
_shared->output_mixer()->GetMixedAudio(samplesPerSec, nChannels,
_audioFrame);
&_audioFrame);
assert(static_cast<int>(nSamples) == _audioFrame.samples_per_channel_);
assert(samplesPerSec ==
@ -521,11 +522,9 @@ int VoEBaseImpl::Init(AudioDeviceModule* external_adm)
return -1;
}
// Assume mono output until a send codec is set, and stereo input until
// we receive the first captured frame. We set stereo input here to
// avoid triggering a possible error in SetSendCodec when a stereo
// codec is selected.
if (_shared->audio_processing()->set_num_channels(2, 1) != 0)
// Assume mono until the audio frames are received from the capture
// device, at which point this can be updated.
if (_shared->audio_processing()->set_num_channels(1, 1) != 0)
{
_shared->SetLastError(VE_SOUNDCARD_ERROR, kTraceError,
"Init() failed to set channels for the primary audio stream");

View File

@ -144,48 +144,6 @@ int VoECodecImpl::SetSendCodec(int channel, const CodecInst& codec)
return -1;
}
// Need to check if we should change APM settings for mono/stereo.
// We'll check all channels (sending or not), so we don't have to
// check this again when starting/stopping sending.
voe::ScopedChannel sc2(_shared->channel_manager());
void* iterator(NULL);
channelPtr = sc2.GetFirstChannel(iterator);
int maxNumChannels = 1;
while (channelPtr != NULL)
{
CodecInst tmpCdc;
channelPtr->GetSendCodec(tmpCdc);
if (tmpCdc.channels > maxNumChannels)
maxNumChannels = tmpCdc.channels;
channelPtr = sc2.GetNextChannel(iterator);
}
// Reuse the currently set number of capture channels. We need to wait
// until receiving a frame to determine the true number.
//
// TODO(andrew): AudioProcessing will return an error if there are more
// output than input channels (it doesn't want to produce fake channels).
// This will happen with a stereo codec and a device which doesn't support
// stereo. AudioCoding should probably do the faking; look into how to
// handle this case properly.
//
// Check if the number of channels has changed to avoid an unnecessary
// reset.
// TODO(andrew): look at handling this logic in AudioProcessing.
if (_shared->audio_processing()->num_output_channels() != maxNumChannels)
{
if (_shared->audio_processing()->set_num_channels(
_shared->audio_processing()->num_input_channels(),
maxNumChannels) != 0)
{
_shared->SetLastError(VE_APM_ERROR, kTraceError,
"Init() failed to set APM channels for the send audio stream");
return -1;
}
}
return 0;
}

View File

@ -325,7 +325,7 @@ int VoEExternalMediaImpl::ExternalPlayoutGetData(
// Retrieve mixed output at the specified rate
shared_->output_mixer()->MixActiveChannels();
shared_->output_mixer()->DoOperationsOnCombinedSignal();
shared_->output_mixer()->GetMixedAudio(samplingFreqHz, 1, audioFrame);
shared_->output_mixer()->GetMixedAudio(samplingFreqHz, 1, &audioFrame);
// Deliver audio (PCM) samples to the external sink
memcpy(speechData10ms,

View File

@ -1,4 +1,4 @@
# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
# Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
@ -70,6 +70,8 @@
'monitor_module.h',
'output_mixer.cc',
'output_mixer.h',
'output_mixer_internal.cc',
'output_mixer_internal.h',
'shared_data.cc',
'shared_data.h',
'statistics.cc',
@ -122,8 +124,8 @@
'voice_engine_core',
'<(DEPTH)/testing/gtest.gyp:gtest',
'<(webrtc_root)/test/test.gyp:test_support_main',
# The rest are to satisfy the channel_unittest include chain.
# This would be unnecessary if we had qualified includes.
# The rest are to satisfy the unittests' include chain.
# This would be unnecessary if we used qualified includes.
'<(webrtc_root)/common_audio/common_audio.gyp:resampler',
'<(webrtc_root)/modules/modules.gyp:audio_device',
'<(webrtc_root)/modules/modules.gyp:audio_processing',
@ -140,6 +142,7 @@
],
'sources': [
'channel_unittest.cc',
'output_mixer_unittest.cc',
],
},
], # targets