Improve the mixing saturation protection scheme.

A single participant is not processed at all. With multiple
participants, we divide-by-2 as before when mixing. Afterwards,
the mixed signal is limited by the AGC to -7 dBFS and then doubled to
restore the original level.

This preserves the level while guaranteeing good saturation protection.

Add a test to voe_auto_test. Hijack and improve the existing mixing test
for this.

TEST=voe_auto_test, voe_cmd_test

Review URL: http://webrtc-codereview.appspot.com/241013

git-svn-id: http://webrtc.googlecode.com/svn/trunk@920 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
andrew@webrtc.org 2011-11-10 03:41:22 +00:00
parent 41f38555ed
commit c4f129f97c
12 changed files with 366 additions and 167 deletions

View File

@ -35,8 +35,7 @@ public:
}; };
// Factory method. Constructor disabled. // Factory method. Constructor disabled.
static AudioConferenceMixer* CreateAudioConferenceMixer( static AudioConferenceMixer* Create(int id);
const WebRtc_Word32 id);
virtual ~AudioConferenceMixer() {} virtual ~AudioConferenceMixer() {}
// Returns version of the module and its components // Returns version of the module and its components

View File

@ -12,6 +12,7 @@
'target_name': 'audio_conference_mixer', 'target_name': 'audio_conference_mixer',
'type': '<(library)', 'type': '<(library)',
'dependencies': [ 'dependencies': [
'audio_processing',
'<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers', '<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers',
], ],
'include_dirs': [ 'include_dirs': [
@ -32,25 +33,13 @@
'level_indicator.cc', 'level_indicator.cc',
'level_indicator.h', 'level_indicator.h',
'memory_pool.h', 'memory_pool.h',
'memory_pool_generic.h', 'memory_pool_posix.h',
'memory_pool_windows.h', 'memory_pool_win.h',
'audio_conference_mixer_impl.cc', 'audio_conference_mixer_impl.cc',
'audio_conference_mixer_impl.h', 'audio_conference_mixer_impl.h',
'time_scheduler.cc', 'time_scheduler.cc',
'time_scheduler.h', 'time_scheduler.h',
], ],
'conditions': [
['OS=="win"', {
'sources!': [
'memory_pool_generic.h',
],
}],
['OS!="win"', {
'sources!': [
'memory_pool_windows.h',
],
}],
],
}, },
], ],
} }

View File

@ -11,11 +11,21 @@
#include "audio_conference_mixer_defines.h" #include "audio_conference_mixer_defines.h"
#include "audio_conference_mixer_impl.h" #include "audio_conference_mixer_impl.h"
#include "audio_frame_manipulator.h" #include "audio_frame_manipulator.h"
#include "audio_processing.h"
#include "critical_section_wrapper.h" #include "critical_section_wrapper.h"
#include "map_wrapper.h" #include "map_wrapper.h"
#include "trace.h" #include "trace.h"
namespace webrtc { namespace webrtc {
namespace {
void SetParticipantStatistics(ParticipantStatistics* stats,
const AudioFrame& frame)
{
stats->participant = frame._id;
stats->level = frame._volume;
}
} // namespace
MixerParticipant::MixerParticipant() MixerParticipant::MixerParticipant()
: _mixHistory(new MixHistory()) : _mixHistory(new MixHistory())
{ {
@ -64,21 +74,26 @@ void MixHistory::ResetMixedStatus()
_isMixed = 0; _isMixed = 0;
} }
AudioConferenceMixer* AudioConferenceMixer::CreateAudioConferenceMixer( AudioConferenceMixer* AudioConferenceMixer::Create(int id)
const WebRtc_Word32 id)
{ {
WEBRTC_TRACE(kTraceModuleCall, kTraceAudioMixerServer, id, WEBRTC_TRACE(kTraceModuleCall, kTraceAudioMixerServer, id,
"CreateAudioConferenceMixer"); "Create");
return new AudioConferenceMixerImpl(id); AudioConferenceMixerImpl* mixer = new AudioConferenceMixerImpl(id);
if(!mixer->Init())
{
delete mixer;
return NULL;
}
return mixer;
} }
AudioConferenceMixerImpl::AudioConferenceMixerImpl(const WebRtc_Word32 id) AudioConferenceMixerImpl::AudioConferenceMixerImpl(int id)
: _scratchParticipantsToMixAmount(0), : _scratchParticipantsToMixAmount(0),
_scratchMixedParticipants(), _scratchMixedParticipants(),
_scratchVadPositiveParticipantsAmount(0), _scratchVadPositiveParticipantsAmount(0),
_scratchVadPositiveParticipants(), _scratchVadPositiveParticipants(),
_crit(CriticalSectionWrapper::CreateCriticalSection()), _crit(NULL),
_cbCrit(CriticalSectionWrapper::CreateCriticalSection()), _cbCrit(NULL),
_id(id), _id(id),
_minimumMixingFreq(kLowestPossible), _minimumMixingFreq(kLowestPossible),
_mixReceiver(NULL), _mixReceiver(NULL),
@ -87,30 +102,71 @@ AudioConferenceMixerImpl::AudioConferenceMixerImpl(const WebRtc_Word32 id)
_amountOf10MsUntilNextCallback(0), _amountOf10MsUntilNextCallback(0),
_mixerStatusCb(false), _mixerStatusCb(false),
_outputFrequency(kDefaultFrequency), _outputFrequency(kDefaultFrequency),
_sampleSize((_outputFrequency*kProcessPeriodicityInMs)/1000), _sampleSize(0),
_audioFramePool(NULL),
_participantList(), _participantList(),
_additionalParticipantList(), _additionalParticipantList(),
_amountOfMixableParticipants(0), _amountOfMixableParticipants(0),
_timeStamp(0), _timeStamp(0),
_timeScheduler(kProcessPeriodicityInMs), _timeScheduler(kProcessPeriodicityInMs),
_mixedAudioLevel(), _mixedAudioLevel(),
_processCalls(0) _processCalls(0),
_limiter(NULL)
{}
bool AudioConferenceMixerImpl::Init()
{ {
_crit.reset(CriticalSectionWrapper::CreateCriticalSection());
if (_crit.get() == NULL)
return false;
_cbCrit.reset(CriticalSectionWrapper::CreateCriticalSection());
if(_cbCrit.get() == NULL)
return false;
_limiter.reset(AudioProcessing::Create(_id));
if(_limiter.get() == NULL)
return false;
MemoryPool<AudioFrame>::CreateMemoryPool(_audioFramePool, MemoryPool<AudioFrame>::CreateMemoryPool(_audioFramePool,
DEFAULT_AUDIO_FRAME_POOLSIZE); DEFAULT_AUDIO_FRAME_POOLSIZE);
WEBRTC_TRACE(kTraceMemory, kTraceAudioMixerServer, _id, "%s created", if(_audioFramePool == NULL)
__FUNCTION__); return false;
if(SetOutputFrequency(kDefaultFrequency) == -1)
return false;
// Assume mono.
if (!SetNumLimiterChannels(1))
return false;
if(_limiter->gain_control()->set_mode(GainControl::kFixedDigital) !=
_limiter->kNoError)
return false;
// We smoothly limit the mixed frame to -7 dbFS. -6 would correspond to the
// divide-by-2 but -7 is used instead to give a bit of headroom since the
// AGC is not a hard limiter.
if(_limiter->gain_control()->set_target_level_dbfs(7) != _limiter->kNoError)
return false;
if(_limiter->gain_control()->set_compression_gain_db(0)
!= _limiter->kNoError)
return false;
if(_limiter->gain_control()->enable_limiter(true) != _limiter->kNoError)
return false;
if(_limiter->gain_control()->Enable(true) != _limiter->kNoError)
return false;
return true;
} }
AudioConferenceMixerImpl::~AudioConferenceMixerImpl() AudioConferenceMixerImpl::~AudioConferenceMixerImpl()
{ {
delete _crit;
delete _cbCrit;
MemoryPool<AudioFrame>::DeleteMemoryPool(_audioFramePool); MemoryPool<AudioFrame>::DeleteMemoryPool(_audioFramePool);
assert(_audioFramePool==NULL); assert(_audioFramePool == NULL);
WEBRTC_TRACE(kTraceMemory, kTraceAudioMixerServer, _id, "%s deleted",
__FUNCTION__);
} }
WebRtc_Word32 AudioConferenceMixerImpl::Version( WebRtc_Word32 AudioConferenceMixerImpl::Version(
@ -208,9 +264,10 @@ WebRtc_Word32 AudioConferenceMixerImpl::Process()
WebRtc_Word32 lowFreq = GetLowestMixingFrequency(); WebRtc_Word32 lowFreq = GetLowestMixingFrequency();
// SILK can run in 12 kHz and 24 kHz. These frequencies are not // SILK can run in 12 kHz and 24 kHz. These frequencies are not
// supported so use closet higher frequency to not lose any information. // supported so use the closest higher frequency to not lose any
// TODO (hellner): this is probably more appropriate to do in // information.
// GetLowestMixingFrequency(). // TODO(henrike): this is probably more appropriate to do in
// GetLowestMixingFrequency().
if (lowFreq == 12000) if (lowFreq == 12000)
{ {
lowFreq = 16000; lowFreq = 16000;
@ -276,6 +333,7 @@ WebRtc_Word32 AudioConferenceMixerImpl::Process()
} }
bool timeForMixerCallback = false; bool timeForMixerCallback = false;
int retval = 0;
WebRtc_Word32 audioLevel = 0; WebRtc_Word32 audioLevel = 0;
{ {
const ListItem* firstItem = mixList.First(); const ListItem* firstItem = mixList.First();
@ -287,11 +345,12 @@ WebRtc_Word32 AudioConferenceMixerImpl::Process()
numberOfChannels = static_cast<const AudioFrame*>( numberOfChannels = static_cast<const AudioFrame*>(
firstItem->GetItem())->_audioChannel; firstItem->GetItem())->_audioChannel;
} }
// TODO (hellner): it might be better to decide the number of channels // TODO(henrike): it might be better to decide the number of channels
// with an API instead of dynamically. // with an API instead of dynamically.
CriticalSectionScoped cs(*_crit); CriticalSectionScoped cs(*_crit);
if (!SetNumLimiterChannels(numberOfChannels))
retval = -1;
mixedAudio->UpdateFrame(-1, _timeStamp, NULL, 0, _outputFrequency, mixedAudio->UpdateFrame(-1, _timeStamp, NULL, 0, _outputFrequency,
AudioFrame::kNormalSpeech, AudioFrame::kNormalSpeech,
@ -299,15 +358,21 @@ WebRtc_Word32 AudioConferenceMixerImpl::Process()
_timeStamp += _sampleSize; _timeStamp += _sampleSize;
MixFromList(*mixedAudio,mixList); MixFromList(*mixedAudio, mixList);
MixAnonomouslyFromList(*mixedAudio, additionalFramesList); MixAnonomouslyFromList(*mixedAudio, additionalFramesList);
MixAnonomouslyFromList(*mixedAudio, rampOutList); MixAnonomouslyFromList(*mixedAudio, rampOutList);
if(mixedAudio->_payloadDataLengthInSamples == 0) if(mixedAudio->_payloadDataLengthInSamples == 0)
{ {
// Nothing was mixed set the audio samples to silence. // Nothing was mixed, set the audio samples to silence.
memset(mixedAudio->_payloadData, 0, _sampleSize); memset(mixedAudio->_payloadData, 0, _sampleSize);
mixedAudio->_payloadDataLengthInSamples = _sampleSize; mixedAudio->_payloadDataLengthInSamples = _sampleSize;
}
else
{
// Only call the limiter if we have something to mix.
if(!LimitMixedAudio(*mixedAudio))
retval = -1;
} }
_mixedAudioLevel.ComputeLevel(mixedAudio->_payloadData,_sampleSize); _mixedAudioLevel.ComputeLevel(mixedAudio->_payloadData,_sampleSize);
@ -362,7 +427,7 @@ WebRtc_Word32 AudioConferenceMixerImpl::Process()
CriticalSectionScoped cs(*_crit); CriticalSectionScoped cs(*_crit);
_processCalls--; _processCalls--;
} }
return 0; return retval;
} }
WebRtc_Word32 AudioConferenceMixerImpl::RegisterMixedStreamCallback( WebRtc_Word32 AudioConferenceMixerImpl::RegisterMixedStreamCallback(
@ -396,8 +461,17 @@ WebRtc_Word32 AudioConferenceMixerImpl::SetOutputFrequency(
const Frequency frequency) const Frequency frequency)
{ {
CriticalSectionScoped cs(*_crit); CriticalSectionScoped cs(*_crit);
const int error = _limiter->set_sample_rate_hz(frequency);
if(error != _limiter->kNoError)
{
WEBRTC_TRACE(kTraceError, kTraceAudioMixerServer, _id,
"Error from AudioProcessing: %d", error);
return -1;
}
_outputFrequency = frequency; _outputFrequency = frequency;
_sampleSize = (_outputFrequency*kProcessPeriodicityInMs) / 1000; _sampleSize = (_outputFrequency*kProcessPeriodicityInMs) / 1000;
return 0; return 0;
} }
@ -408,6 +482,24 @@ AudioConferenceMixerImpl::OutputFrequency() const
return _outputFrequency; return _outputFrequency;
} }
bool AudioConferenceMixerImpl::SetNumLimiterChannels(int numChannels)
{
if(_limiter->num_input_channels() != numChannels)
{
const int error = _limiter->set_num_channels(numChannels,
numChannels);
if(error != _limiter->kNoError)
{
WEBRTC_TRACE(kTraceError, kTraceAudioMixerServer, _id,
"Error from AudioProcessing: %d", error);
assert(false);
return false;
}
}
return true;
}
WebRtc_Word32 AudioConferenceMixerImpl::RegisterMixerStatusCallback( WebRtc_Word32 AudioConferenceMixerImpl::RegisterMixerStatusCallback(
AudioMixerStatusReceiver& mixerStatusCallback, AudioMixerStatusReceiver& mixerStatusCallback,
const WebRtc_UWord32 amountOf10MsBetweenCallbacks) const WebRtc_UWord32 amountOf10MsBetweenCallbacks)
@ -527,7 +619,7 @@ WebRtc_Word32 AudioConferenceMixerImpl::MixabilityStatus(
WEBRTC_TRACE(kTraceModuleCall, kTraceAudioMixerServer, _id, WEBRTC_TRACE(kTraceModuleCall, kTraceAudioMixerServer, _id,
"MixabilityStatus(participant,mixable)"); "MixabilityStatus(participant,mixable)");
CriticalSectionScoped cs(*_cbCrit); CriticalSectionScoped cs(*_cbCrit);
mixable = IsParticipantInList(participant,_participantList); mixable = IsParticipantInList(participant, _participantList);
return 0; return 0;
} }
@ -1046,13 +1138,29 @@ bool AudioConferenceMixerImpl::RemoveParticipantFromList(
return false; return false;
} }
WebRtc_Word32 AudioConferenceMixerImpl::MixFromList(AudioFrame& mixedAudioFrame, WebRtc_Word32 AudioConferenceMixerImpl::MixFromList(
ListWrapper& audioFrameList) AudioFrame& mixedAudio,
const ListWrapper& audioFrameList)
{ {
WEBRTC_TRACE(kTraceStream, kTraceAudioMixerServer, _id, WEBRTC_TRACE(kTraceStream, kTraceAudioMixerServer, _id,
"MixFromList(mixedAudioFrame, audioFrameList)"); "MixFromList(mixedAudio, audioFrameList)");
WebRtc_UWord32 position = 0; WebRtc_UWord32 position = 0;
ListItem* item = audioFrameList.First(); ListItem* item = audioFrameList.First();
if(item == NULL)
{
return 0;
}
if(_amountOfMixableParticipants == 1)
{
// No mixing required here; skip the saturation protection.
AudioFrame* audioFrame = static_cast<AudioFrame*>(item->GetItem());
mixedAudio = *audioFrame;
SetParticipantStatistics(&_scratchMixedParticipants[position],
*audioFrame);
return 0;
}
while(item != NULL) while(item != NULL)
{ {
if(position >= kMaximumAmountOfMixedParticipants) if(position >= kMaximumAmountOfMixedParticipants)
@ -1068,30 +1176,80 @@ WebRtc_Word32 AudioConferenceMixerImpl::MixFromList(AudioFrame& mixedAudioFrame,
position = 0; position = 0;
} }
AudioFrame* audioFrame = static_cast<AudioFrame*>(item->GetItem()); AudioFrame* audioFrame = static_cast<AudioFrame*>(item->GetItem());
mixedAudioFrame += *audioFrame;
_scratchMixedParticipants[position].participant = audioFrame->_id; // Divide by two to avoid saturation in the mixing.
_scratchMixedParticipants[position].level = audioFrame->_volume; *audioFrame >>= 1;
mixedAudio += *audioFrame;
SetParticipantStatistics(&_scratchMixedParticipants[position],
*audioFrame);
position++; position++;
item = audioFrameList.Next(item); item = audioFrameList.Next(item);
} }
return 0; return 0;
} }
// TODO(andrew): consolidate this function with MixFromList.
WebRtc_Word32 AudioConferenceMixerImpl::MixAnonomouslyFromList( WebRtc_Word32 AudioConferenceMixerImpl::MixAnonomouslyFromList(
AudioFrame& mixedAudioFrame, AudioFrame& mixedAudio,
ListWrapper& audioFrameList) const ListWrapper& audioFrameList)
{ {
WEBRTC_TRACE(kTraceStream, kTraceAudioMixerServer, _id, WEBRTC_TRACE(kTraceStream, kTraceAudioMixerServer, _id,
"MixAnonomouslyFromList(mixedAudioFrame, audioFrameList)"); "MixAnonomouslyFromList(mixedAudio, audioFrameList)");
ListItem* item = audioFrameList.First(); ListItem* item = audioFrameList.First();
if(item == NULL)
return 0;
if(_amountOfMixableParticipants == 1)
{
// No mixing required here; skip the saturation protection.
AudioFrame* audioFrame = static_cast<AudioFrame*>(item->GetItem());
mixedAudio = *audioFrame;
return 0;
}
while(item != NULL) while(item != NULL)
{ {
AudioFrame* audioFrame = static_cast<AudioFrame*>(item->GetItem()); AudioFrame* audioFrame = static_cast<AudioFrame*>(item->GetItem());
mixedAudioFrame += *audioFrame; // Divide by two to avoid saturation in the mixing.
*audioFrame >>= 1;
mixedAudio += *audioFrame;
item = audioFrameList.Next(item); item = audioFrameList.Next(item);
} }
return 0; return 0;
} }
bool AudioConferenceMixerImpl::LimitMixedAudio(AudioFrame& mixedAudio)
{
if(_amountOfMixableParticipants == 1)
{
return true;
}
// Smoothly limit the mixed frame.
const int error = _limiter->ProcessStream(&mixedAudio);
// And now we can safely restore the level. This procedure results in
// some loss of resolution, deemed acceptable.
//
// It's possible to apply the gain in the AGC (with a target level of 0 dbFS
// and compression gain of 6 dB). However, in the transition frame when this
// is enabled (moving from one to two participants) it has the potential to
// create discontinuities in the mixed frame.
//
// Instead we double the frame (with addition since left-shifting a
// negative value is undefined).
mixedAudio += mixedAudio;
if(error != _limiter->kNoError)
{
WEBRTC_TRACE(kTraceError, kTraceAudioMixerServer, _id,
"Error from AudioProcessing: %d", error);
assert(false);
return false;
}
return true;
}
} // namespace webrtc } // namespace webrtc

View File

@ -18,11 +18,13 @@
#include "list_wrapper.h" #include "list_wrapper.h"
#include "memory_pool.h" #include "memory_pool.h"
#include "module_common_types.h" #include "module_common_types.h"
#include "scoped_ptr.h"
#include "time_scheduler.h" #include "time_scheduler.h"
#define VERSION_STRING "Audio Conference Mixer Module 1.1.0" #define VERSION_STRING "Audio Conference Mixer Module 1.1.0"
namespace webrtc { namespace webrtc {
class AudioProcessing;
class CriticalSectionWrapper; class CriticalSectionWrapper;
// Cheshire cat implementation of MixerParticipant's non virtual functions. // Cheshire cat implementation of MixerParticipant's non virtual functions.
@ -50,11 +52,15 @@ private:
class AudioConferenceMixerImpl : public AudioConferenceMixer class AudioConferenceMixerImpl : public AudioConferenceMixer
{ {
public: public:
// AudioProcessing only accepts 10 ms frames.
enum {kProcessPeriodicityInMs = 10}; enum {kProcessPeriodicityInMs = 10};
AudioConferenceMixerImpl(const WebRtc_Word32 id); AudioConferenceMixerImpl(int id);
~AudioConferenceMixerImpl(); ~AudioConferenceMixerImpl();
// Must be called after ctor.
bool Init();
// Module functions // Module functions
virtual WebRtc_Word32 Version(WebRtc_Word8* version, virtual WebRtc_Word32 Version(WebRtc_Word8* version,
WebRtc_UWord32& remainingBufferInBytes, WebRtc_UWord32& remainingBufferInBytes,
@ -89,6 +95,10 @@ private:
WebRtc_Word32 SetOutputFrequency(const Frequency frequency); WebRtc_Word32 SetOutputFrequency(const Frequency frequency);
Frequency OutputFrequency() const; Frequency OutputFrequency() const;
// Must be called whenever an audio frame indicates the number of channels
// has changed.
bool SetNumLimiterChannels(int numChannels);
// Fills mixList with the AudioFrames pointers that should be used when // Fills mixList with the AudioFrames pointers that should be used when
// mixing. Fills mixParticipantList with ParticipantStatistics for the // mixing. Fills mixParticipantList with ParticipantStatistics for the
// participants who's AudioFrames are inside mixList. // participants who's AudioFrames are inside mixList.
@ -136,15 +146,19 @@ private:
MixerParticipant& removeParticipant, MixerParticipant& removeParticipant,
ListWrapper& participantList); ListWrapper& participantList);
// Mix the AudioFrames stored in audioFrameList into mixedAudioFrame. // Mix the AudioFrames stored in audioFrameList into mixedAudio.
WebRtc_Word32 MixFromList( WebRtc_Word32 MixFromList(
AudioFrame& mixedAudioFrame, AudioFrame& mixedAudio,
ListWrapper& audioFrameList); const ListWrapper& audioFrameList);
// Mix the AudioFrames stored in audioFrameList into mixedAudioFrame. No // Mix the AudioFrames stored in audioFrameList into mixedAudio. No
// record will be kept of this mix (e.g. the corresponding MixerParticipants // record will be kept of this mix (e.g. the corresponding MixerParticipants
// will not be marked as IsMixed() // will not be marked as IsMixed()
WebRtc_Word32 MixAnonomouslyFromList(AudioFrame& mixedAudioFrame, WebRtc_Word32 MixAnonomouslyFromList(AudioFrame& mixedAudio,
ListWrapper& audioFrameList); const ListWrapper& audioFrameList);
bool LimitMixedAudio(AudioFrame& mixedAudio);
bool _initialized;
// Scratch memory // Scratch memory
// Note that the scratch memory may only be touched in the scope of // Note that the scratch memory may only be touched in the scope of
@ -156,8 +170,8 @@ private:
ParticipantStatistics _scratchVadPositiveParticipants[ ParticipantStatistics _scratchVadPositiveParticipants[
kMaximumAmountOfMixedParticipants]; kMaximumAmountOfMixedParticipants];
CriticalSectionWrapper* _crit; scoped_ptr<CriticalSectionWrapper> _crit;
CriticalSectionWrapper* _cbCrit; scoped_ptr<CriticalSectionWrapper> _cbCrit;
WebRtc_Word32 _id; WebRtc_Word32 _id;
@ -195,6 +209,9 @@ private:
// Counter keeping track of concurrent calls to process. // Counter keeping track of concurrent calls to process.
// Note: should never be higher than 1 or lower than 0. // Note: should never be higher than 1 or lower than 0.
WebRtc_Word16 _processCalls; WebRtc_Word16 _processCalls;
// Used for inhibiting saturation in mixing.
scoped_ptr<AudioProcessing> _limiter;
}; };
} // namespace webrtc } // namespace webrtc

View File

@ -16,9 +16,9 @@
#include "typedefs.h" #include "typedefs.h"
#if _WIN32 #if _WIN32
#include "memory_pool_windows.h" #include "memory_pool_win.h"
#else #else
#include "memory_pool_generic.h" #include "memory_pool_posix.h"
#endif #endif
namespace webrtc { namespace webrtc {

View File

@ -113,7 +113,10 @@ class AudioProcessing : public Module {
// for each far-end stream which requires processing. On the server-side, // for each far-end stream which requires processing. On the server-side,
// this would typically be one instance for every incoming stream. // this would typically be one instance for every incoming stream.
static AudioProcessing* Create(int id); static AudioProcessing* Create(int id);
virtual ~AudioProcessing() {};
// TODO(andrew): remove this method. We now allow users to delete instances
// directly, useful for scoped_ptr.
// Destroys a |apm| instance. // Destroys a |apm| instance.
static void Destroy(AudioProcessing* apm); static void Destroy(AudioProcessing* apm);
@ -240,9 +243,6 @@ class AudioProcessing : public Module {
// Inherited from Module. // Inherited from Module.
virtual WebRtc_Word32 TimeUntilNextProcess() { return -1; }; virtual WebRtc_Word32 TimeUntilNextProcess() { return -1; };
virtual WebRtc_Word32 Process() { return -1; }; virtual WebRtc_Word32 Process() { return -1; };
protected:
virtual ~AudioProcessing() {};
}; };
// The acoustic echo cancellation (AEC) component provides better performance // The acoustic echo cancellation (AEC) component provides better performance

View File

@ -120,8 +120,7 @@ OutputMixer::Create(OutputMixer*& mixer, const WebRtc_UWord32 instanceId)
OutputMixer::OutputMixer(const WebRtc_UWord32 instanceId) : OutputMixer::OutputMixer(const WebRtc_UWord32 instanceId) :
_callbackCritSect(*CriticalSectionWrapper::CreateCriticalSection()), _callbackCritSect(*CriticalSectionWrapper::CreateCriticalSection()),
_fileCritSect(*CriticalSectionWrapper::CreateCriticalSection()), _fileCritSect(*CriticalSectionWrapper::CreateCriticalSection()),
_mixerModule(*AudioConferenceMixer:: _mixerModule(*AudioConferenceMixer::Create(instanceId)),
CreateAudioConferenceMixer(instanceId)),
_audioLevel(), _audioLevel(),
_dtmfGenerator(instanceId), _dtmfGenerator(instanceId),
_instanceId(instanceId), _instanceId(instanceId),

View File

@ -1459,8 +1459,7 @@ WebRtc_Word32 VoEBaseImpl::AddACMVersion(char* str) const
WebRtc_Word32 VoEBaseImpl::AddConferenceMixerVersion(char* str) const WebRtc_Word32 VoEBaseImpl::AddConferenceMixerVersion(char* str) const
{ {
AudioConferenceMixer* mixerPtr = AudioConferenceMixer* mixerPtr = AudioConferenceMixer::Create(-1);
AudioConferenceMixer::CreateAudioConferenceMixer(-1);
int len = AddModuleVersion(mixerPtr, str); int len = AddModuleVersion(mixerPtr, str);
delete mixerPtr; delete mixerPtr;
return len; return len;

View File

@ -11,6 +11,8 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <vector>
#include "critical_section_wrapper.h" #include "critical_section_wrapper.h"
#include "event_wrapper.h" #include "event_wrapper.h"
#include "thread_wrapper.h" #include "thread_wrapper.h"
@ -4945,106 +4947,138 @@ int VoEExtendedTest::TestFile()
// VoEExtendedTest::TestMixing // VoEExtendedTest::TestMixing
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
int VoEExtendedTest::TestMixing() // Creates and mixes |num_channels| with a constant amplitude of |input_value|.
{ // The mixed output is verified to always fall between |max_output_value| and
VoEBase* base = _mgr.BasePtr(); // |min_output_value|, after a startup phase.
VoEFile* file = _mgr.FilePtr(); int VoEExtendedTest::RunMixingTest(int num_channels,
VoECodec* codec = _mgr.CodecPtr(); int16_t input_value,
VoEAudioProcessing* apm = _mgr.APMPtr(); int16_t max_output_value,
int16_t min_output_value) {
VoEBase* base = _mgr.BasePtr();
VoEFile* file = _mgr.FilePtr();
VoECodec* codec = _mgr.CodecPtr();
VoEAudioProcessing* apm = _mgr.APMPtr();
// Use L16 at 16kHz to minimize distortion (file recording is 16kHz // Use L16 at 16kHz to minimize distortion (file recording is 16kHz
// and resampling will cause large distortions). // and resampling will cause large distortions).
CodecInst codec_inst; CodecInst codec_inst;
strcpy(codec_inst.plname, "L16"); strcpy(codec_inst.plname, "L16");
codec_inst.channels = 1; codec_inst.channels = 1;
codec_inst.rate = 256000; codec_inst.rate = 256000;
codec_inst.plfreq = 16000; codec_inst.plfreq = 16000;
codec_inst.pltype = 105; codec_inst.pltype = 105;
codec_inst.pacsize = 160; codec_inst.pacsize = 160;
apm->SetNsStatus(false); apm->SetNsStatus(false);
apm->SetAgcStatus(false); apm->SetAgcStatus(false);
apm->SetEcStatus(false); apm->SetEcStatus(false);
const char file_to_generate_name[] = "dc_file.pcm"; const char file_to_generate_name[] = "dc_file.pcm";
const char* input_filename = file_to_generate_name; const char* input_filename = file_to_generate_name;
FILE* file_to_generate = fopen(file_to_generate_name, "wb"); FILE* file_to_generate = fopen(file_to_generate_name, "wb");
const WebRtc_Word16 per_channel_value = 1000; ASSERT_TRUE(file_to_generate != NULL);
for (int i = 0; i < 160 * 100 * 5; i++) for (int i = 0; i < 160 * 100 * 5; i++) {
{ fwrite(&input_value, sizeof(input_value), 1, file_to_generate);
fwrite(&per_channel_value, sizeof(per_channel_value), 1, }
file_to_generate); fclose(file_to_generate);
}
fclose(file_to_generate);
// Create 4 channels and make sure that only three are mixed. TEST_MUSTPASS(base->Init());
TEST_MUSTPASS(base->Init());
int channels[4]; std::vector<int> channels(num_channels);
const int number_of_channels = sizeof(channels) / sizeof(channels[0]); for (int channel_index = 0; channel_index < num_channels; ++channel_index) {
for (int channel_index = 0; channel_index < number_of_channels; const int channel = base->CreateChannel();
++channel_index) channels[channel_index] = channel;
{ ASSERT_TRUE(channel != -1);
const int channel = base->CreateChannel(); TEST_MUSTPASS(codec->SetRecPayloadType(channel, codec_inst));
channels[channel_index] = channel; TEST_MUSTPASS(base->SetLocalReceiver(channel,
TEST_MUSTPASS((channel != -1) ? 0 : 1); 1234 + 2 * channel_index));
TEST_MUSTPASS(codec->SetRecPayloadType(channel, codec_inst)); TEST_MUSTPASS(base->SetSendDestination(channel,
TEST_MUSTPASS(base->SetLocalReceiver(channel, 1234 + 2 * channel_index,
1234 + 2 * channel_index)); "127.0.0.1"));
TEST_MUSTPASS(base->SetSendDestination(channel, TEST_MUSTPASS(base->StartReceive(channel));
1234 + 2 * channel_index, TEST_MUSTPASS(base->StartPlayout(channel));
"127.0.0.1")); TEST_MUSTPASS(codec->SetSendCodec(channel, codec_inst));
TEST_MUSTPASS(base->StartReceive(channel)); TEST_MUSTPASS(base->StartSend(channel));
TEST_MUSTPASS(base->StartPlayout(channel)); }
TEST_MUSTPASS(codec->SetSendCodec(channel, codec_inst)); for (int channel_index = 0; channel_index < num_channels; ++channel_index) {
TEST_MUSTPASS(base->StartSend(channel)); const int channel = channels[channel_index];
} TEST_MUSTPASS(file->StartPlayingFileAsMicrophone(channel,
for (int channel_index = 0; channel_index < number_of_channels; input_filename,
++channel_index) true));
{ }
const int channel = channels[channel_index]; const char mix_result[] = "mix_result.pcm";
TEST_MUSTPASS(file->StartPlayingFileAsMicrophone(channel, TEST_MUSTPASS(file->StartRecordingPlayout(-1/*record meeting*/,
input_filename, mix_result));
true)); TEST_LOG("Playing %d channels\n", num_channels);
} SLEEP(5000);
const char mix_result[] = "mix_result.pcm"; TEST_MUSTPASS(file->StopRecordingPlayout(-1));
TEST_MUSTPASS(file->StartRecordingPlayout(-1/*record meeting*/, TEST_LOG("Stopping\n");
mix_result));
printf("Playing %d channels\n", number_of_channels);
SLEEP(5000);
TEST_MUSTPASS(file->StopRecordingPlayout(-1));
printf("Stopping\n");
for (int channel_index = 0; channel_index < number_of_channels; for (int channel_index = 0; channel_index < num_channels; ++channel_index) {
++channel_index) const int channel = channels[channel_index];
{ channels[channel_index] = channel;
const int channel = channels[channel_index]; TEST_MUSTPASS(base->StopSend(channel));
channels[channel_index] = channel; TEST_MUSTPASS(base->StopPlayout(channel));
TEST_MUSTPASS(base->DeleteChannel(channel)); TEST_MUSTPASS(base->StopReceive(channel));
} TEST_MUSTPASS(base->DeleteChannel(channel));
}
FILE* verification_file = fopen(mix_result, "rb"); FILE* verification_file = fopen(mix_result, "rb");
WebRtc_Word16 mix_value = 0; ASSERT_TRUE(verification_file != NULL);
bool all_mix_values_too_low = true; int16_t mix_value = 0;
while (fread(&mix_value, sizeof(WebRtc_Word16), 1, verification_file)) // Skip the first 100 ms to avoid initialization and ramping-in effects.
{ ASSERT_TRUE(fseek(verification_file, sizeof(int16_t) * 1600, SEEK_SET) == 0);
// The mixed value should be: while (fread(&mix_value, sizeof(mix_value), 1, verification_file)) {
// The input value (from mic) * the number of participants to mix / ASSERT_TRUE(mix_value <= max_output_value)
// saturation factor (divide by two to avoid saturation). ASSERT_TRUE(mix_value >= min_output_value);
// The 1.2 comes from the fact that the audio has to be looped back }
// which will distort the original signal. I.e. allow 20% distortion. fclose(verification_file);
if (mix_value > 1.1 * per_channel_value * 3 / 2)
{ return 0;
TEST_MUSTPASS(-1); }
}
// At least once the value should be close to the expected mixed value. // TODO(andrew): move or copy these to the mixer module test when possible.
if (mix_value > 0.9 * per_channel_value * 3 / 2) int VoEExtendedTest::TestMixing() {
{ // These tests assume a maxmium of three mixed participants. We allow a
all_mix_values_too_low = false; // +/- 10% range around the expected output level to accout for distortion
} // from coding and processing in the loopback chain.
}
TEST_MUSTPASS(all_mix_values_too_low ? -1 : 0); // Create four channels and make sure that only three are mixed.
return 0; TEST_LOG("Test max-three-participant mixing.\n");
int16_t input_value = 1000;
int16_t expected_output = input_value * 3;
if (RunMixingTest(4, input_value, 1.1 * expected_output,
0.9 * expected_output) != 0) {
return -1;
}
// Ensure the mixing saturation protection is working. We can do this because
// the mixing limiter is given some headroom, so the expected output is less
// than full scale.
TEST_LOG("Test mixing saturation protection.\n");
input_value = 20000;
expected_output = 29204; // = -1 dBFS, the limiter headroom.
// If this isn't satisfied, we're not testing anything.
assert(input_value * 3 > 32767);
assert(1.1 * expected_output < 32767);
if (RunMixingTest(3, input_value, 1.1 * expected_output,
0.9 * expected_output) != 0) {
return -1;
}
// Ensure the mixing saturation protection is not applied when only using a
// single channel.
TEST_LOG("Test saturation protection has no effect on one channel.\n");
input_value = 32767;
expected_output = 32767;
// If this isn't satisfied, we're not testing anything.
assert(0.95 * expected_output > 29204); // = -1 dBFS, the limiter headroom.
if (RunMixingTest(1, input_value, expected_output,
0.95 * expected_output) != 0) {
return -1;
}
return 0;
} }
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------

View File

@ -314,6 +314,10 @@ private:
bool playout, bool playout,
bool send); bool send);
void StopMedia(int channel); void StopMedia(int channel);
int RunMixingTest(int num_channels,
int16_t input_value,
int16_t max_output_value,
int16_t min_output_value);
private: private:
VoETestManager& _mgr; VoETestManager& _mgr;
private: private: