603 lines
21 KiB
C++
603 lines
21 KiB
C++
/*
|
|
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "rtp_receiver_audio.h"
|
|
|
|
#include <cassert> //assert
|
|
#include <cstring> // memcpy()
|
|
#include <math.h> // pow()
|
|
|
|
#include "critical_section_wrapper.h"
|
|
|
|
namespace webrtc {
|
|
RTPReceiverAudio::RTPReceiverAudio(const WebRtc_Word32 id):
|
|
_id(id),
|
|
_lastReceivedFrequency(8000),
|
|
_telephoneEvent(false),
|
|
_telephoneEventForwardToDecoder(false),
|
|
_telephoneEventDetectEndOfTone(false),
|
|
_telephoneEventPayloadType(-1),
|
|
_telephoneEventReported(),
|
|
_cngNBPayloadType(-1),
|
|
_cngWBPayloadType(-1),
|
|
_cngSWBPayloadType(-1),
|
|
_cngPayloadType(-1),
|
|
_G722PayloadType(-1),
|
|
_lastReceivedG722(false),
|
|
_criticalSectionFeedback(*CriticalSectionWrapper::CreateCriticalSection()),
|
|
_cbAudioFeedback(NULL)
|
|
{
|
|
}
|
|
|
|
RTPReceiverAudio::~RTPReceiverAudio()
|
|
{
|
|
delete &_criticalSectionFeedback;
|
|
}
|
|
|
|
WebRtc_Word32
|
|
RTPReceiverAudio::Init()
|
|
{
|
|
_lastReceivedFrequency = 8000;
|
|
_telephoneEvent = false;
|
|
_telephoneEventForwardToDecoder = false;
|
|
_telephoneEventDetectEndOfTone = false;
|
|
_telephoneEventPayloadType = -1;
|
|
|
|
while(_telephoneEventReported.Size() > 0)
|
|
{
|
|
_telephoneEventReported.Erase(_telephoneEventReported.First());
|
|
}
|
|
_cngNBPayloadType = -1;
|
|
_cngWBPayloadType = -1;
|
|
_cngSWBPayloadType = -1;
|
|
_cngPayloadType = -1;
|
|
_G722PayloadType = -1;
|
|
_lastReceivedG722 = false;
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
RTPReceiverAudio::ChangeUniqueId(const WebRtc_Word32 id)
|
|
{
|
|
_id = id;
|
|
}
|
|
|
|
WebRtc_Word32
|
|
RTPReceiverAudio::RegisterIncomingAudioCallback(RtpAudioFeedback* incomingMessagesCallback)
|
|
{
|
|
CriticalSectionScoped lock(_criticalSectionFeedback);
|
|
_cbAudioFeedback = incomingMessagesCallback;
|
|
return 0;
|
|
}
|
|
|
|
WebRtc_UWord32
|
|
RTPReceiverAudio::AudioFrequency() const
|
|
{
|
|
if(_lastReceivedG722)
|
|
{
|
|
return 8000;
|
|
}
|
|
return _lastReceivedFrequency;
|
|
}
|
|
|
|
// Outband TelephoneEvent(DTMF) detection
|
|
WebRtc_Word32
|
|
RTPReceiverAudio::SetTelephoneEventStatus(const bool enable,
|
|
const bool forwardToDecoder,
|
|
const bool detectEndOfTone)
|
|
{
|
|
_telephoneEvent= enable;
|
|
_telephoneEventDetectEndOfTone = detectEndOfTone;
|
|
_telephoneEventForwardToDecoder = forwardToDecoder;
|
|
return 0;
|
|
}
|
|
|
|
// Is outband TelephoneEvent(DTMF) turned on/off?
|
|
bool
|
|
RTPReceiverAudio::TelephoneEvent() const
|
|
{
|
|
return _telephoneEvent;
|
|
}
|
|
|
|
// Is forwarding of outband telephone events turned on/off?
|
|
bool
|
|
RTPReceiverAudio::TelephoneEventForwardToDecoder() const
|
|
{
|
|
return _telephoneEventForwardToDecoder;
|
|
}
|
|
|
|
bool
|
|
RTPReceiverAudio::TelephoneEventPayloadType(const WebRtc_Word8 payloadType) const
|
|
{
|
|
return (_telephoneEventPayloadType == payloadType)?true:false;
|
|
}
|
|
|
|
bool
|
|
RTPReceiverAudio::CNGPayloadType(const WebRtc_Word8 payloadType,
|
|
WebRtc_UWord32& frequency)
|
|
{
|
|
// we can have three CNG on 8000Hz, 16000Hz and 32000Hz
|
|
if(_cngNBPayloadType == payloadType)
|
|
{
|
|
frequency = 8000;
|
|
if ((_cngPayloadType != -1) &&(_cngPayloadType !=_cngNBPayloadType))
|
|
{
|
|
ResetStatistics();
|
|
}
|
|
_cngPayloadType = _cngNBPayloadType;
|
|
return true;
|
|
} else if(_cngWBPayloadType == payloadType)
|
|
{
|
|
// if last received codec is G.722 we must use frequency 8000
|
|
if(_lastReceivedG722)
|
|
{
|
|
frequency = 8000;
|
|
} else
|
|
{
|
|
frequency = 16000;
|
|
}
|
|
if ((_cngPayloadType != -1) &&(_cngPayloadType !=_cngWBPayloadType))
|
|
{
|
|
ResetStatistics();
|
|
}
|
|
_cngPayloadType = _cngWBPayloadType;
|
|
return true;
|
|
}else if(_cngSWBPayloadType == payloadType)
|
|
{
|
|
frequency = 32000;
|
|
if ((_cngPayloadType != -1) &&(_cngPayloadType !=_cngSWBPayloadType))
|
|
{
|
|
ResetStatistics();
|
|
}
|
|
_cngPayloadType = _cngSWBPayloadType;
|
|
return true;
|
|
}else
|
|
{
|
|
// not CNG
|
|
if(_G722PayloadType == payloadType)
|
|
{
|
|
_lastReceivedG722 = true;
|
|
}else
|
|
{
|
|
_lastReceivedG722 = false;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
Sample based or frame based codecs based on RFC 3551
|
|
|
|
NOTE! There is one error in the RFC, stating G.722 uses 8 bits/samples.
|
|
The correct rate is 4 bits/sample.
|
|
|
|
name of sampling default
|
|
encoding sample/frame bits/sample rate ms/frame ms/packet
|
|
|
|
Sample based audio codecs
|
|
DVI4 sample 4 var. 20
|
|
G722 sample 4 16,000 20
|
|
G726-40 sample 5 8,000 20
|
|
G726-32 sample 4 8,000 20
|
|
G726-24 sample 3 8,000 20
|
|
G726-16 sample 2 8,000 20
|
|
L8 sample 8 var. 20
|
|
L16 sample 16 var. 20
|
|
PCMA sample 8 var. 20
|
|
PCMU sample 8 var. 20
|
|
|
|
Frame based audio codecs
|
|
G723 frame N/A 8,000 30 30
|
|
G728 frame N/A 8,000 2.5 20
|
|
G729 frame N/A 8,000 10 20
|
|
G729D frame N/A 8,000 10 20
|
|
G729E frame N/A 8,000 10 20
|
|
GSM frame N/A 8,000 20 20
|
|
GSM-EFR frame N/A 8,000 20 20
|
|
LPC frame N/A 8,000 20 20
|
|
MPA frame N/A var. var.
|
|
|
|
G7221 frame N/A
|
|
*/
|
|
|
|
ModuleRTPUtility::Payload*
|
|
RTPReceiverAudio::RegisterReceiveAudioPayload(const WebRtc_Word8 payloadName[RTP_PAYLOAD_NAME_SIZE],
|
|
const WebRtc_Word8 payloadType,
|
|
const WebRtc_UWord32 frequency,
|
|
const WebRtc_UWord8 channels,
|
|
const WebRtc_UWord32 rate)
|
|
{
|
|
WebRtc_Word32 length = (WebRtc_Word32)strlen(payloadName);
|
|
if(length > RTP_PAYLOAD_NAME_SIZE)
|
|
{
|
|
assert(false);
|
|
return NULL;
|
|
}
|
|
|
|
if (ModuleRTPUtility::StringCompare(payloadName,"telephone-event",15))
|
|
{
|
|
_telephoneEventPayloadType = payloadType;
|
|
}
|
|
if (ModuleRTPUtility::StringCompare(payloadName,"cn",2))
|
|
{
|
|
// we can have three CNG on 8000Hz, 16000Hz and 32000Hz
|
|
if(frequency == 8000)
|
|
{
|
|
_cngNBPayloadType = payloadType;
|
|
|
|
} else if(frequency == 16000)
|
|
{
|
|
_cngWBPayloadType = payloadType;
|
|
|
|
} else if(frequency == 32000)
|
|
{
|
|
_cngSWBPayloadType = payloadType;
|
|
}else
|
|
{
|
|
assert(false);
|
|
return NULL;
|
|
}
|
|
}
|
|
WebRtc_UWord8 bitsPerSample = 0; // zero implies frame based
|
|
if (ModuleRTPUtility::StringCompare(payloadName,"DVI4",4))
|
|
{
|
|
bitsPerSample = 4;
|
|
} else if(ModuleRTPUtility::StringCompare(payloadName,"G722",4))
|
|
{
|
|
if(ModuleRTPUtility::StringCompare(payloadName,"G7221",5))
|
|
{
|
|
// frame based
|
|
} else
|
|
{
|
|
_G722PayloadType = payloadType;
|
|
bitsPerSample = 4;
|
|
}
|
|
} else if(ModuleRTPUtility::StringCompare(payloadName,"G726-40",7))
|
|
{
|
|
bitsPerSample = 5;
|
|
} else if(ModuleRTPUtility::StringCompare(payloadName,"G726-32",7))
|
|
{
|
|
bitsPerSample = 4;
|
|
} else if(ModuleRTPUtility::StringCompare(payloadName,"G726-24",7))
|
|
{
|
|
bitsPerSample = 3;
|
|
} else if(ModuleRTPUtility::StringCompare(payloadName,"G726-16",7))
|
|
{
|
|
bitsPerSample = 2;
|
|
} else if(ModuleRTPUtility::StringCompare(payloadName,"L8",2))
|
|
{
|
|
bitsPerSample = 8;
|
|
} else if(ModuleRTPUtility::StringCompare(payloadName,"L16",3))
|
|
{
|
|
bitsPerSample = 16;
|
|
} else if(ModuleRTPUtility::StringCompare(payloadName,"PCMU",4))
|
|
{
|
|
bitsPerSample = 8;
|
|
} else if(ModuleRTPUtility::StringCompare(payloadName,"PCMA",4))
|
|
{
|
|
bitsPerSample = 8;
|
|
}
|
|
|
|
ModuleRTPUtility::Payload* payload = new ModuleRTPUtility::Payload;
|
|
memcpy(payload->name, payloadName, length+1);
|
|
payload->typeSpecific.Audio.frequency = frequency;
|
|
payload->typeSpecific.Audio.channels = channels;
|
|
payload->typeSpecific.Audio.bitsPerSample = bitsPerSample;
|
|
payload->typeSpecific.Audio.rate = rate;
|
|
payload->audio = true;
|
|
return payload;
|
|
}
|
|
|
|
// we are not allowed to have any critsects when calling CallbackOfReceivedPayloadData
|
|
WebRtc_Word32
|
|
RTPReceiverAudio::ParseAudioCodecSpecific(WebRtcRTPHeader* rtpHeader,
|
|
const WebRtc_UWord8* payloadData,
|
|
const WebRtc_UWord16 payloadLength,
|
|
const ModuleRTPUtility::AudioPayload& audioSpecific,
|
|
const bool isRED)
|
|
{
|
|
WebRtc_UWord8 newEvents[MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS];
|
|
WebRtc_UWord8 removedEvents[MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS];
|
|
WebRtc_UWord8 numberOfNewEvents = 0;
|
|
WebRtc_UWord8 numberOfRemovedEvents = 0;
|
|
bool telephoneEventPacket = TelephoneEventPayloadType(rtpHeader->header.payloadType);
|
|
|
|
if(payloadLength == 0)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
{
|
|
CriticalSectionScoped lock(_criticalSectionFeedback);
|
|
|
|
if(telephoneEventPacket)
|
|
{
|
|
// RFC 4733 2.3
|
|
/*
|
|
0 1 2 3
|
|
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
| event |E|R| volume | duration |
|
|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
*/
|
|
if(payloadLength % 4 != 0)
|
|
{
|
|
return -1;
|
|
}
|
|
WebRtc_UWord8 numberOfEvents = payloadLength / 4;
|
|
|
|
// sanity
|
|
if(numberOfEvents >= MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS)
|
|
{
|
|
numberOfEvents = MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS;
|
|
}
|
|
for (int n = 0; n < numberOfEvents; n++)
|
|
{
|
|
bool end = (payloadData[(4*n)+1] & 0x80)? true:false;
|
|
|
|
if(_telephoneEventReported.Find(payloadData[4*n]) != NULL)
|
|
{
|
|
// we have already seen this event
|
|
if(end)
|
|
{
|
|
removedEvents[numberOfRemovedEvents]= payloadData[4*n];
|
|
numberOfRemovedEvents++;
|
|
_telephoneEventReported.Erase(payloadData[4*n]);
|
|
}
|
|
}else
|
|
{
|
|
if(end)
|
|
{
|
|
// don't add if it's a end of a tone
|
|
}else
|
|
{
|
|
newEvents[numberOfNewEvents] = payloadData[4*n];
|
|
numberOfNewEvents++;
|
|
_telephoneEventReported.Insert(payloadData[4*n],NULL);
|
|
}
|
|
}
|
|
}
|
|
|
|
// RFC 4733 2.5.1.3 & 2.5.2.3 Long-Duration Events
|
|
// should not be a problem since we don't care about the duration
|
|
|
|
// RFC 4733 See 2.5.1.5. & 2.5.2.4. Multiple Events in a Packet
|
|
}
|
|
|
|
if(_telephoneEvent && _cbAudioFeedback)
|
|
{
|
|
for (int n = 0; n < numberOfNewEvents; n++)
|
|
{
|
|
_cbAudioFeedback->OnReceivedTelephoneEvent(_id, newEvents[n], false);
|
|
}
|
|
if(_telephoneEventDetectEndOfTone)
|
|
{
|
|
for (int n = 0; n < numberOfRemovedEvents; n++)
|
|
{
|
|
_cbAudioFeedback->OnReceivedTelephoneEvent(_id, removedEvents[n], true);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if(! telephoneEventPacket )
|
|
{
|
|
_lastReceivedFrequency = audioSpecific.frequency;
|
|
}
|
|
|
|
// Check if this is a CNG packet, receiver might want to know
|
|
WebRtc_UWord32 dummy;
|
|
if(CNGPayloadType(rtpHeader->header.payloadType, dummy))
|
|
{
|
|
rtpHeader->type.Audio.isCNG=true;
|
|
rtpHeader->frameType = kAudioFrameCN;
|
|
}else
|
|
{
|
|
rtpHeader->frameType = kAudioFrameSpeech;
|
|
rtpHeader->type.Audio.isCNG=false;
|
|
}
|
|
|
|
// check if it's a DTMF event, hence something we can playout
|
|
if(telephoneEventPacket)
|
|
{
|
|
if(!_telephoneEventForwardToDecoder)
|
|
{
|
|
// don't forward event to decoder
|
|
return 0;
|
|
}
|
|
MapItem* first = _telephoneEventReported.First();
|
|
if(first && first->GetId() > 15)
|
|
{
|
|
// don't forward non DTMF events
|
|
return 0;
|
|
}
|
|
}
|
|
if(isRED && !(payloadData[0] & 0x80))
|
|
{
|
|
// we recive only one frame packed in a RED packet remove the RED wrapper
|
|
rtpHeader->header.payloadType = payloadData[0];
|
|
|
|
// only one frame in the RED strip the one byte to help NetEq
|
|
return CallbackOfReceivedPayloadData(payloadData+1,
|
|
payloadLength-1,
|
|
rtpHeader);
|
|
}
|
|
if(audioSpecific.channels > 1)
|
|
{
|
|
WebRtc_Word32 retVal = 0;
|
|
WebRtc_UWord16 channelLength = payloadLength/audioSpecific.channels;
|
|
|
|
if(audioSpecific.bitsPerSample > 0)
|
|
{
|
|
// sanity
|
|
assert((payloadLength*8)%audioSpecific.bitsPerSample == 0);
|
|
|
|
// sample based codec
|
|
|
|
// build matrix
|
|
WebRtc_UWord8 matrix[IP_PACKET_SIZE];
|
|
WebRtc_UWord32 offsetBytes = 0;
|
|
WebRtc_UWord32 offsetBytesInsert = 0;
|
|
// initialize matrix to 0
|
|
memset(matrix, 0, audioSpecific.channels*channelLength);
|
|
|
|
switch(audioSpecific.bitsPerSample)
|
|
{
|
|
case 1:
|
|
case 2:
|
|
case 3:
|
|
case 4:
|
|
case 5:
|
|
case 6:
|
|
case 7:
|
|
{
|
|
WebRtc_UWord32 offsetSamples = 0;
|
|
WebRtc_UWord32 offsetSamplesInsert = 0;
|
|
WebRtc_UWord16 bitMask = (WebRtc_UWord16)ModuleRTPUtility::pow2(audioSpecific.bitsPerSample)-1;
|
|
WebRtc_UWord16 samplesPerChannel =payloadLength*8/audioSpecific.bitsPerSample/audioSpecific.channels;
|
|
|
|
for(WebRtc_UWord32 i = 0; i < samplesPerChannel; i++)
|
|
{
|
|
WebRtc_UWord8 insertShift = (WebRtc_UWord8)((offsetSamplesInsert+audioSpecific.bitsPerSample)%16);
|
|
insertShift = 16 - insertShift; // inverse the calculation
|
|
|
|
for(WebRtc_UWord32 j = 0; j < audioSpecific.channels; j++)
|
|
{
|
|
// get sample
|
|
WebRtc_UWord16 s = payloadData[offsetBytes] << 8;
|
|
|
|
// check that we don't read outside the memory
|
|
if(offsetBytes < (WebRtc_UWord32)payloadLength -2)
|
|
{
|
|
s += payloadData[offsetBytes+1];
|
|
}
|
|
|
|
WebRtc_UWord8 readShift = (WebRtc_UWord8)((offsetSamples+audioSpecific.bitsPerSample)%16);
|
|
readShift = 16 - readShift; // inverse the calculation
|
|
s >>= readShift;
|
|
s &= bitMask;
|
|
|
|
// prepare for reading next sample
|
|
offsetSamples += audioSpecific.bitsPerSample;
|
|
if(readShift <= audioSpecific.bitsPerSample)
|
|
{
|
|
// next does not fitt
|
|
// or fitt exactly
|
|
offsetSamples -= 8;
|
|
offsetBytes++;
|
|
}
|
|
|
|
// insert sample into matrix
|
|
WebRtc_UWord32 columOffset = j*channelLength;
|
|
|
|
WebRtc_UWord16 insert = s << insertShift;
|
|
#if defined(WEBRTC_LITTLE_ENDIAN)
|
|
matrix[columOffset+offsetBytesInsert] |= static_cast<WebRtc_UWord8>(insert>>8);
|
|
matrix[columOffset+offsetBytesInsert+1] |= static_cast<WebRtc_UWord8>(insert);
|
|
#else
|
|
WebRtc_UWord16* matrixU16 = (WebRtc_UWord16*)&(matrix[columOffset+offsetBytesInsert]);
|
|
matrixU16[0] |= (s << insertShift);
|
|
#endif
|
|
}
|
|
// prepare for writing next sample
|
|
offsetSamplesInsert += audioSpecific.bitsPerSample;
|
|
if(insertShift <= audioSpecific.bitsPerSample)
|
|
{
|
|
// next does not fitt
|
|
// or fitt exactly
|
|
offsetSamplesInsert -= 8;
|
|
offsetBytesInsert++;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case 8:
|
|
{
|
|
WebRtc_UWord32 sample = 0;
|
|
for(WebRtc_UWord32 i = 0; i < channelLength; i++)
|
|
{
|
|
for(WebRtc_UWord32 j = 0; j < audioSpecific.channels; j++)
|
|
{
|
|
WebRtc_UWord32 columOffset = j*channelLength;
|
|
matrix[columOffset + i] = payloadData[sample++];
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case 16:
|
|
{
|
|
WebRtc_UWord32 sample = 0;
|
|
for(WebRtc_UWord32 i = 0; i < channelLength; i +=2)
|
|
{
|
|
for(WebRtc_UWord32 j = 0; j < audioSpecific.channels; j++)
|
|
{
|
|
WebRtc_UWord32 columOffset = j*channelLength;
|
|
matrix[columOffset + i] = payloadData[sample++];
|
|
matrix[columOffset + i + 1] = payloadData[sample++];
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
assert(false);
|
|
return -1;
|
|
}
|
|
// we support 16 bits sample
|
|
// callback for all channels
|
|
for(int channel = 0; channel < audioSpecific.channels && retVal == 0; channel++)
|
|
{
|
|
// one callback per channel
|
|
rtpHeader->type.Audio.channel = channel+1;
|
|
|
|
if(channel == 0)
|
|
{
|
|
// include the original packet only in the first callback
|
|
retVal = CallbackOfReceivedPayloadData(&matrix[channel*channelLength],
|
|
channelLength,
|
|
rtpHeader);
|
|
} else
|
|
{
|
|
retVal = CallbackOfReceivedPayloadData(&matrix[channel*channelLength],
|
|
channelLength,
|
|
rtpHeader);
|
|
}
|
|
}
|
|
} else
|
|
{
|
|
for(int channel = 1; channel <= audioSpecific.channels && retVal == 0; channel++)
|
|
{
|
|
// one callback per channel
|
|
rtpHeader->type.Audio.channel = channel;
|
|
|
|
if(channel == 1)
|
|
{
|
|
// include the original packet only in the first callback
|
|
retVal = CallbackOfReceivedPayloadData(payloadData,
|
|
channelLength,
|
|
rtpHeader);
|
|
} else
|
|
{
|
|
retVal = CallbackOfReceivedPayloadData(payloadData,
|
|
channelLength,
|
|
rtpHeader);
|
|
}
|
|
payloadData += channelLength;
|
|
}
|
|
}
|
|
return retVal;
|
|
}else
|
|
{
|
|
rtpHeader->type.Audio.channel = 1;
|
|
return CallbackOfReceivedPayloadData(payloadData,
|
|
payloadLength,
|
|
rtpHeader);
|
|
}
|
|
}
|
|
} // namespace webrtc
|