git-svn-id: http://webrtc.googlecode.com/svn/trunk@370 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
hellner@google.com
2011-08-15 22:47:40 +00:00
parent 3296d76c50
commit 56294c61c9
9 changed files with 415 additions and 30 deletions

View File

@@ -24,7 +24,7 @@ class Trace;
class AudioConferenceMixer : public Module
{
public:
enum {kMaximumAmountOfMixedParticipants = 16};
enum {kMaximumAmountOfMixedParticipants = 3};
enum Frequency
{
kNbInHz = 8000,

View File

@@ -46,6 +46,13 @@ WebRtc_Word32 MixHistory::IsMixed(bool& mixed) const
return 0;
}
WebRtc_Word32 MixHistory::WasMixed(bool& wasMixed) const
{
// Was mixed is the same as is mixed depending on perspective. This function
// is for the perspective of AudioConferenceMixerImpl.
return IsMixed(wasMixed);
}
WebRtc_Word32 MixHistory::SetIsMixed(const bool mixed)
{
_isMixed = mixed ? 1 : 0;
@@ -180,6 +187,8 @@ WebRtc_Word32 AudioConferenceMixerImpl::TimeUntilNextProcess()
WebRtc_Word32 AudioConferenceMixerImpl::Process()
{
WebRtc_UWord32 remainingParticipantsAllowedToMix =
kMaximumAmountOfMixedParticipants;
{
CriticalSectionScoped cs(*_crit);
assert(_processCalls == 0);
@@ -190,6 +199,7 @@ WebRtc_Word32 AudioConferenceMixerImpl::Process()
}
ListWrapper mixList;
ListWrapper rampOutList;
MapWrapper mixedParticipantsMap;
{
CriticalSectionScoped cs(*_cbCrit);
@@ -240,7 +250,9 @@ WebRtc_Word32 AudioConferenceMixerImpl::Process()
}
}
UpdateToMix(mixList,mixedParticipantsMap);
UpdateToMix(mixList, rampOutList, mixedParticipantsMap,
remainingParticipantsAllowedToMix);
UpdateMixedStatus(mixedParticipantsMap);
_scratchParticipantsToMixAmount = mixedParticipantsMap.Size();
}
@@ -286,6 +298,7 @@ WebRtc_Word32 AudioConferenceMixerImpl::Process()
MixFromList(*mixedAudio,mixList);
MixAnonomouslyFromList(*mixedAudio, rampOutList);
if(mixedAudio->_payloadDataLengthInSamples == 0)
{
// Nothing was mixed set the audio samples to silence.
@@ -339,6 +352,7 @@ WebRtc_Word32 AudioConferenceMixerImpl::Process()
// Reclaim all outstanding memory.
_audioFramePool->PushMemory(mixedAudio);
ClearAudioFrameList(mixList);
ClearAudioFrameList(rampOutList);
{
CriticalSectionScoped cs(*_crit);
_processCalls--;
@@ -471,11 +485,6 @@ WebRtc_Word32 AudioConferenceMixerImpl::SetMixabilityStatus(
bool success = false;
if(mixable)
{
if(_amountOfMixableParticipants >=
kMaximumAmountOfMixedParticipants)
{
return -1;
}
success = AddParticipantToList(participant,_participantList);
}
else
@@ -579,16 +588,38 @@ WebRtc_Word32 AudioConferenceMixerImpl::GetLowestMixingFrequency()
void AudioConferenceMixerImpl::UpdateToMix(
ListWrapper& mixList,
MapWrapper& mixParticipantList)
ListWrapper& rampOutList,
MapWrapper& mixParticipantList,
WebRtc_UWord32& maxAudioFrameCounter)
{
WEBRTC_TRACE(kTraceStream, kTraceAudioMixerServer, _id,
"UpdateToMix(mixList,mixParticipantList)");
"GetVIPAudio(mixList,rampOutList,mixParticipantList,%d)",
maxAudioFrameCounter);
const WebRtc_UWord32 mixListStartSize = mixList.GetSize();
ListWrapper activeList; // Elements are AudioFrames
// Struct needed by the passive lists to keep track of which AudioFrame
// belongs to which MixerParticipant.
struct ParticipantFramePair
{
MixerParticipant* participant;
AudioFrame* audioFrame;
};
ListWrapper passiveWasNotMixedList; // Elements are MixerParticipant
ListWrapper passiveWasMixedList; // Elements are MixerParticipant
ListItem* item = _participantList.First();
while(item)
{
// Stop keeping track of passive participants if there are already
// enough participants available (they wont be mixed anyway).
bool mustAddToPassiveList = (maxAudioFrameCounter >
(activeList.GetSize() +
passiveWasMixedList.GetSize() +
passiveWasNotMixedList.GetSize()));
MixerParticipant* participant = static_cast<MixerParticipant*>(
item->GetItem());
bool wasMixed = false;
participant->_mixHistory->WasMixed(wasMixed);
AudioFrame* audioFrame = NULL;
if(_audioFramePool->PopMemory(audioFrame) == -1)
{
@@ -608,12 +639,157 @@ void AudioConferenceMixerImpl::UpdateToMix(
continue;
}
assert(audioFrame->_vadActivity != AudioFrame::kVadUnknown);
mixList.PushBack(static_cast<void*>(audioFrame));
mixParticipantList.Insert(audioFrame->_id,static_cast<void*>(
participant));
assert(mixParticipantList.Size() <= kMaximumAmountOfMixedParticipants);
if(audioFrame->_vadActivity == AudioFrame::kVadActive)
{
if(!wasMixed)
{
RampIn(*audioFrame);
}
if(activeList.GetSize() >= maxAudioFrameCounter)
{
// There are already more active participants than should be
// mixed. Only keep the ones with the highest energy.
ListItem* replaceItem = NULL;
CalculateEnergy(*audioFrame);
WebRtc_UWord32 lowestEnergy = audioFrame->_energy;
ListItem* activeItem = activeList.First();
while(activeItem)
{
AudioFrame* replaceFrame = static_cast<AudioFrame*>(
activeItem->GetItem());
CalculateEnergy(*replaceFrame);
if(replaceFrame->_energy < lowestEnergy)
{
replaceItem = activeItem;
lowestEnergy = replaceFrame->_energy;
}
activeItem = activeList.Next(activeItem);
}
if(replaceItem != NULL)
{
AudioFrame* replaceFrame = static_cast<AudioFrame*>(
replaceItem->GetItem());
bool replaceWasMixed = false;
MapItem* replaceParticipant = mixParticipantList.Find(
replaceFrame->_id);
static_cast<MixerParticipant*>(
replaceParticipant->GetItem())->_mixHistory->WasMixed(
replaceWasMixed);
mixParticipantList.Erase(replaceFrame->_id);
activeList.Erase(replaceItem);
activeList.PushFront(static_cast<void*>(audioFrame));
mixParticipantList.Insert(audioFrame->_id,
static_cast<void*>(participant));
assert(mixParticipantList.Size() <=
kMaximumAmountOfMixedParticipants);
if(replaceWasMixed)
{
RampOut(*replaceFrame);
rampOutList.PushBack(static_cast<void*>(replaceFrame));
assert(rampOutList.GetSize() <=
kMaximumAmountOfMixedParticipants);
} else {
_audioFramePool->PushMemory(replaceFrame);
}
} else {
if(wasMixed)
{
RampOut(*audioFrame);
rampOutList.PushBack(static_cast<void*>(audioFrame));
assert(rampOutList.GetSize() <=
kMaximumAmountOfMixedParticipants);
} else {
_audioFramePool->PushMemory(audioFrame);
}
}
} else {
activeList.PushFront(static_cast<void*>(audioFrame));
mixParticipantList.Insert(audioFrame->_id,
static_cast<void*>(participant));
assert(mixParticipantList.Size() <=
kMaximumAmountOfMixedParticipants);
}
} else {
if(wasMixed)
{
ParticipantFramePair* pair = new ParticipantFramePair;
pair->audioFrame = audioFrame;
pair->participant = participant;
passiveWasMixedList.PushBack(static_cast<void*>(pair));
} else if(mustAddToPassiveList) {
RampIn(*audioFrame);
ParticipantFramePair* pair = new ParticipantFramePair;
pair->audioFrame = audioFrame;
pair->participant = participant;
passiveWasNotMixedList.PushBack(static_cast<void*>(pair));
} else {
_audioFramePool->PushMemory(audioFrame);
}
}
item = _participantList.Next(item);
}
assert(activeList.GetSize() <= maxAudioFrameCounter);
// At this point it is known which participants should be mixed. Transfer
// this information to this functions output parameters.
while(!activeList.Empty())
{
ListItem* mixItem = activeList.First();
mixList.PushBack(mixItem->GetItem());
activeList.Erase(mixItem);
}
// Always mix a constant number of AudioFrames. If there aren't enough
// active participants mix passive ones. Starting with those that was mixed
// last iteration.
while(!passiveWasMixedList.Empty())
{
ListItem* mixItem = passiveWasMixedList.First();
ParticipantFramePair* pair = static_cast<ParticipantFramePair*>(
mixItem->GetItem());
if(mixList.GetSize() < maxAudioFrameCounter + mixListStartSize)
{
mixList.PushBack(pair->audioFrame);
mixParticipantList.Insert(pair->audioFrame->_id,
static_cast<void*>(pair->participant));
assert(mixParticipantList.Size() <=
kMaximumAmountOfMixedParticipants);
}
else
{
_audioFramePool->PushMemory(pair->audioFrame);
}
delete pair;
passiveWasMixedList.Erase(mixItem);
}
// And finally the ones that have not been mixed for a while.
while(!passiveWasNotMixedList.Empty())
{
ListItem* mixItem = passiveWasNotMixedList.First();
ParticipantFramePair* pair = static_cast<ParticipantFramePair*>(
mixItem->GetItem());
if(mixList.GetSize() < maxAudioFrameCounter + mixListStartSize)
{
mixList.PushBack(pair->audioFrame);
mixParticipantList.Insert(pair->audioFrame->_id,
static_cast<void*>(pair->participant));
assert(mixParticipantList.Size() <=
kMaximumAmountOfMixedParticipants);
}
else
{
_audioFramePool->PushMemory(pair->audioFrame);
}
delete pair;
passiveWasNotMixedList.Erase(mixItem);
}
assert(maxAudioFrameCounter + mixListStartSize >= mixList.GetSize());
maxAudioFrameCounter += mixListStartSize - mixList.GetSize();
}
void AudioConferenceMixerImpl::UpdateMixedStatus(
@@ -777,4 +953,22 @@ WebRtc_Word32 AudioConferenceMixerImpl::MixFromList(AudioFrame& mixedAudioFrame,
}
return 0;
}
WebRtc_Word32 AudioConferenceMixerImpl::MixAnonomouslyFromList(
AudioFrame& mixedAudioFrame,
ListWrapper& audioFrameList)
{
WEBRTC_TRACE(kTraceStream, kTraceAudioMixerServer, _id,
"MixAnonomouslyFromList(mixedAudioFrame, audioFrameList)");
ListItem* item = audioFrameList.First();
while(item != NULL)
{
AudioFrame* audioFrame = static_cast<AudioFrame*>(item->GetItem());
// Divide the AudioFrame samples by 2 to avoid saturation.
*audioFrame >>= 1;
mixedAudioFrame += *audioFrame;
item = audioFrameList.Next(item);
}
return 0;
}
} // namespace webrtc

View File

@@ -35,6 +35,10 @@ public:
// MixerParticipant function
WebRtc_Word32 IsMixed(bool& mixed) const;
// Sets wasMixed to true if the participant was mixed previous mix
// iteration.
WebRtc_Word32 WasMixed(bool& wasMixed) const;
// Updates the mixed status.
WebRtc_Word32 SetIsMixed(const bool mixed);
@@ -81,10 +85,17 @@ private:
WebRtc_Word32 SetOutputFrequency(const Frequency frequency);
Frequency OutputFrequency() const;
// Fill mixList with the AudioFrames pointers that should be used when
// mixing. Fill mixParticipantList with ParticipantStatistics for the
// Fills mixList with the AudioFrames pointers that should be used when
// mixing. Fills mixParticipantList with ParticipantStatistics for the
// participants who's AudioFrames are inside mixList.
void UpdateToMix(ListWrapper& mixList, MapWrapper& mixParticipantList);
// maxAudioFrameCounter both input and output specifies how many more
// AudioFrames that are allowed to be mixed.
// rampOutList contain AudioFrames corresponding to an audio stream that
// used to be mixed but shouldn't be mixed any longer. These AudioFrames
// should be ramped out over this AudioFrame to avoid audio discontinuities.
void UpdateToMix(ListWrapper& mixList, ListWrapper& rampOutList,
MapWrapper& mixParticipantList,
WebRtc_UWord32& maxAudioFrameCounter);
// Return the lowest mixing frequency that can be used without having to
// downsample any audio.
@@ -121,6 +132,11 @@ private:
WebRtc_Word32 MixFromList(
AudioFrame& mixedAudioFrame,
ListWrapper& audioFrameList);
// Mix the AudioFrames stored in audioFrameList into mixedAudioFrame. No
// record will be kept of this mix (e.g. the corresponding MixerParticipants
// will not be marked as IsMixed()
WebRtc_Word32 MixAnonomouslyFromList(AudioFrame& mixedAudioFrame,
ListWrapper& audioFrameList);
// Scratch memory
// Note that the scratch memory may only be touched in the scope of

View File

@@ -12,6 +12,32 @@
#include "module_common_types.h"
#include "typedefs.h"
namespace {
// Linear ramping over 80 samples.
// TODO(hellner): ramp using fix point?
const float rampArray[] = {0.0000f, 0.0127f, 0.0253f, 0.0380f,
0.0506f, 0.0633f, 0.0759f, 0.0886f,
0.1013f, 0.1139f, 0.1266f, 0.1392f,
0.1519f, 0.1646f, 0.1772f, 0.1899f,
0.2025f, 0.2152f, 0.2278f, 0.2405f,
0.2532f, 0.2658f, 0.2785f, 0.2911f,
0.3038f, 0.3165f, 0.3291f, 0.3418f,
0.3544f, 0.3671f, 0.3797f, 0.3924f,
0.4051f, 0.4177f, 0.4304f, 0.4430f,
0.4557f, 0.4684f, 0.4810f, 0.4937f,
0.5063f, 0.5190f, 0.5316f, 0.5443f,
0.5570f, 0.5696f, 0.5823f, 0.5949f,
0.6076f, 0.6203f, 0.6329f, 0.6456f,
0.6582f, 0.6709f, 0.6835f, 0.6962f,
0.7089f, 0.7215f, 0.7342f, 0.7468f,
0.7595f, 0.7722f, 0.7848f, 0.7975f,
0.8101f, 0.8228f, 0.8354f, 0.8481f,
0.8608f, 0.8734f, 0.8861f, 0.8987f,
0.9114f, 0.9241f, 0.9367f, 0.9494f,
0.9620f, 0.9747f, 0.9873f, 1.0000f};
const int rampSize = sizeof(rampArray)/sizeof(rampArray[0]);
} // namespace
namespace webrtc {
void CalculateEnergy(AudioFrame& audioFrame)
{
@@ -27,4 +53,28 @@ void CalculateEnergy(AudioFrame& audioFrame)
audioFrame._payloadData[position];
}
}
void RampIn(AudioFrame& audioFrame)
{
assert(rampSize <= audioFrame._payloadDataLengthInSamples);
for(int i = 0; i < rampSize; i++)
{
audioFrame._payloadData[i] = static_cast<WebRtc_Word16>
(rampArray[i] * audioFrame._payloadData[i]);
}
}
void RampOut(AudioFrame& audioFrame)
{
assert(rampSize <= audioFrame._payloadDataLengthInSamples);
for(int i = 0; i < rampSize; i++)
{
const int rampPos = rampSize - 1 - i;
audioFrame._payloadData[i] = static_cast<WebRtc_Word16>
(rampArray[rampPos] * audioFrame._payloadData[i]);
}
memset(&audioFrame._payloadData[rampSize], 0,
(audioFrame._payloadDataLengthInSamples - rampSize) *
sizeof(audioFrame._payloadData[0]));
}
} // namespace webrtc

View File

@@ -13,8 +13,14 @@
namespace webrtc {
class AudioFrame;
// Updates the audioFrame's energy (based on its samples).
void CalculateEnergy(AudioFrame& audioFrame);
// Apply linear step function that ramps in/out the audio samples in audioFrame
void RampIn(AudioFrame& audioFrame);
void RampOut(AudioFrame& audioFrame);
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CONFERENCE_MIXER_SOURCE_AUDIO_FRAME_MANIPULATOR_H_

View File

@@ -4891,6 +4891,112 @@ int VoEExtendedTest::TestFile()
return 0;
}
// ----------------------------------------------------------------------------
// VoEExtendedTest::TestMixing
// ----------------------------------------------------------------------------
int VoEExtendedTest::TestMixing()
{
VoEBase* base = _mgr.BasePtr();
VoEFile* file = _mgr.FilePtr();
VoECodec* codec = _mgr.CodecPtr();
VoEAudioProcessing* apm = _mgr.APMPtr();
// Use L16 at 16kHz to minimize distortion (file recording is 16kHz
// and resampling will cause large distortions).
CodecInst codec_inst;
strcpy(codec_inst.plname, "L16");
codec_inst.channels = 1;
codec_inst.rate = 256000;
codec_inst.plfreq = 16000;
codec_inst.pltype = 105;
codec_inst.pacsize = 160;
apm->SetNsStatus(false);
apm->SetAgcStatus(false);
apm->SetEcStatus(false);
const char file_to_generate_name[] = "dc_file.pcm";
const char* input_filename = file_to_generate_name;
FILE* file_to_generate = fopen(file_to_generate_name, "wb");
const WebRtc_Word16 per_channel_value = 1000;
for (int i = 0; i < 160 * 100 * 5; i++)
{
fwrite(&per_channel_value, sizeof(per_channel_value), 1,
file_to_generate);
}
fclose(file_to_generate);
// Create 4 channels and make sure that only three are mixed.
TEST_MUSTPASS(base->Init());
int channels[4];
const int number_of_channels = sizeof(channels) / sizeof(channels[0]);
for (int channel_index = 0; channel_index < number_of_channels;
++channel_index)
{
const int channel = base->CreateChannel();
channels[channel_index] = channel;
TEST_MUSTPASS((channel != -1) ? 0 : 1);
TEST_MUSTPASS(codec->SetRecPayloadType(channel, codec_inst));
TEST_MUSTPASS(base->SetLocalReceiver(channel,
1234 + 2 * channel_index));
TEST_MUSTPASS(base->SetSendDestination(channel,
1234 + 2 * channel_index,
"127.0.0.1"));
TEST_MUSTPASS(base->StartReceive(channel));
TEST_MUSTPASS(base->StartPlayout(channel));
TEST_MUSTPASS(codec->SetSendCodec(channel, codec_inst));
TEST_MUSTPASS(base->StartSend(channel));
}
for (int channel_index = 0; channel_index < number_of_channels;
++channel_index)
{
const int channel = channels[channel_index];
TEST_MUSTPASS(file->StartPlayingFileAsMicrophone(channel,
input_filename,
true));
}
const char mix_result[] = "mix_result.pcm";
TEST_MUSTPASS(file->StartRecordingPlayout(-1/*record meeting*/,
mix_result));
printf("Playing %d channels\n", number_of_channels);
SLEEP(5000);
TEST_MUSTPASS(file->StopRecordingPlayout(-1));
printf("Stopping\n");
for (int channel_index = 0; channel_index < number_of_channels;
++channel_index)
{
const int channel = channels[channel_index];
channels[channel_index] = channel;
TEST_MUSTPASS(base->DeleteChannel(channel));
}
FILE* verification_file = fopen(mix_result, "rb");
WebRtc_Word16 mix_value = 0;
bool all_mix_values_too_low = true;
while (fread(&mix_value, sizeof(WebRtc_Word16), 1, verification_file))
{
// The mixed value should be:
// The input value (from mic) * the number of participants to mix /
// saturation factor (divide by two to avoid saturation).
// The 1.2 comes from the fact that the audio has to be looped back
// which will distort the original signal. I.e. allow 20% distortion.
if (mix_value > 1.1 * per_channel_value * 3 / 2)
{
TEST_MUSTPASS(-1);
}
// At least once the value should be close to the expected mixed value.
if (mix_value > 0.9 * per_channel_value * 3 / 2)
{
all_mix_values_too_low = false;
}
}
TEST_MUSTPASS(all_mix_values_too_low ? -1 : 0);
return 0;
}
// ----------------------------------------------------------------------------
// VoEExtendedTest::TestHardware
// ----------------------------------------------------------------------------

View File

@@ -87,6 +87,7 @@ public:
int TestEncryption();
int TestExternalMedia();
int TestFile();
int TestMixing();
int TestHardware();
int TestNetEqStats();
int TestNetwork();

View File

@@ -510,19 +510,21 @@ bool SubAPIManager::GetExtendedMenuSelection(ExtendedSelection& sel)
if (_externalMedia) printf("\n"); else printf(" (NA)\n");
printf(" (7) File");
if (_file) printf("\n"); else printf(" (NA)\n");
printf(" (8) Hardware");
printf(" (8) Mixing");
if (_file) printf("\n"); else printf(" (NA)\n");
printf(" (9) Hardware");
if (_hardware) printf("\n"); else printf(" (NA)\n");
printf(" (9) NetEqStats");
printf(" (10) NetEqStats");
if (_netEqStats) printf("\n"); else printf(" (NA)\n");
printf(" (10) Network");
printf(" (11) Network");
if (_network) printf("\n"); else printf(" (NA)\n");
printf(" (11) RTP_RTCP");
printf(" (12) RTP_RTCP");
if (_rtp_rtcp) printf("\n"); else printf(" (NA)\n");
printf(" (12) VideoSync");
printf(" (13) VideoSync");
if (_videoSync) printf("\n"); else printf(" (NA)\n");
printf(" (13) VolumeControl");
printf(" (14) VolumeControl");
if (_volumeControl) printf("\n"); else printf(" (NA)\n");
printf(" (14) AudioProcessing");
printf(" (15) AudioProcessing");
if (_apm) printf("\n"); else printf(" (NA)\n");
printf("\n: ");
@@ -557,24 +559,27 @@ bool SubAPIManager::GetExtendedMenuSelection(ExtendedSelection& sel)
if (_file) xsel = XSEL_File;
break;
case 8:
if (_hardware) xsel = XSEL_Hardware;
if (_file) xsel = XSEL_Mixing;
break;
case 9:
if (_netEqStats) xsel = XSEL_NetEqStats;
if (_hardware) xsel = XSEL_Hardware;
break;
case 10:
if (_network) xsel = XSEL_Network;
if (_netEqStats) xsel = XSEL_NetEqStats;
break;
case 11:
if (_rtp_rtcp) xsel = XSEL_RTP_RTCP;
if (_network) xsel = XSEL_Network;
break;
case 12:
if (_videoSync) xsel = XSEL_VideoSync;
if (_rtp_rtcp) xsel = XSEL_RTP_RTCP;
break;
case 13:
if (_volumeControl) xsel = XSEL_VolumeControl;
if (_videoSync) xsel = XSEL_VideoSync;
break;
case 14:
if (_volumeControl) xsel = XSEL_VolumeControl;
break;
case 15:
if (_apm) xsel = XSEL_AudioProcessing;
break;
default:
@@ -3703,6 +3708,12 @@ int runAutoTest(TestType testType, ExtendedSelection extendedSel)
break;
xtend.TestPassed("File");
}
if (extendedSel == XSEL_Mixing || extendedSel == XSEL_All)
{
if ((mainRet = xtend.TestMixing()) == -1)
break;
xtend.TestPassed("Mixing");
}
if (extendedSel == XSEL_Hardware || extendedSel == XSEL_All)
{
if ((mainRet = xtend.TestHardware()) == -1)

View File

@@ -53,6 +53,7 @@ enum ExtendedSelection
XSEL_Encryption,
XSEL_ExternalMedia,
XSEL_File,
XSEL_Mixing,
XSEL_Hardware,
XSEL_NetEqStats,
XSEL_Network,