/* * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "rtp_receiver_audio.h" #include //assert #include // memcpy() #include // pow() #include "critical_section_wrapper.h" namespace webrtc { RTPReceiverAudio::RTPReceiverAudio(const WebRtc_Word32 id): _id(id), _lastReceivedFrequency(8000), _telephoneEvent(false), _telephoneEventForwardToDecoder(false), _telephoneEventDetectEndOfTone(false), _telephoneEventPayloadType(-1), _telephoneEventReported(), _cngNBPayloadType(-1), _cngWBPayloadType(-1), _cngSWBPayloadType(-1), _cngPayloadType(-1), _G722PayloadType(-1), _lastReceivedG722(false), _criticalSectionFeedback(*CriticalSectionWrapper::CreateCriticalSection()), _cbAudioFeedback(NULL) { } RTPReceiverAudio::~RTPReceiverAudio() { delete &_criticalSectionFeedback; } WebRtc_Word32 RTPReceiverAudio::Init() { _lastReceivedFrequency = 8000; _telephoneEvent = false; _telephoneEventForwardToDecoder = false; _telephoneEventDetectEndOfTone = false; _telephoneEventPayloadType = -1; while(_telephoneEventReported.Size() > 0) { _telephoneEventReported.Erase(_telephoneEventReported.First()); } _cngNBPayloadType = -1; _cngWBPayloadType = -1; _cngSWBPayloadType = -1; _cngPayloadType = -1; _G722PayloadType = -1; _lastReceivedG722 = false; return 0; } void RTPReceiverAudio::ChangeUniqueId(const WebRtc_Word32 id) { _id = id; } WebRtc_Word32 RTPReceiverAudio::RegisterIncomingAudioCallback(RtpAudioFeedback* incomingMessagesCallback) { CriticalSectionScoped lock(_criticalSectionFeedback); _cbAudioFeedback = incomingMessagesCallback; return 0; } WebRtc_UWord32 RTPReceiverAudio::AudioFrequency() const { if(_lastReceivedG722) { return 8000; } return _lastReceivedFrequency; } // Outband TelephoneEvent(DTMF) detection WebRtc_Word32 RTPReceiverAudio::SetTelephoneEventStatus(const bool enable, const bool forwardToDecoder, const bool detectEndOfTone) { _telephoneEvent= enable; _telephoneEventDetectEndOfTone = detectEndOfTone; _telephoneEventForwardToDecoder = forwardToDecoder; return 0; } // Is outband TelephoneEvent(DTMF) turned on/off? bool RTPReceiverAudio::TelephoneEvent() const { return _telephoneEvent; } // Is forwarding of outband telephone events turned on/off? bool RTPReceiverAudio::TelephoneEventForwardToDecoder() const { return _telephoneEventForwardToDecoder; } bool RTPReceiverAudio::TelephoneEventPayloadType(const WebRtc_Word8 payloadType) const { return (_telephoneEventPayloadType == payloadType)?true:false; } bool RTPReceiverAudio::CNGPayloadType(const WebRtc_Word8 payloadType, WebRtc_UWord32& frequency) { // we can have three CNG on 8000Hz, 16000Hz and 32000Hz if(_cngNBPayloadType == payloadType) { frequency = 8000; if ((_cngPayloadType != -1) &&(_cngPayloadType !=_cngNBPayloadType)) { ResetStatistics(); } _cngPayloadType = _cngNBPayloadType; return true; } else if(_cngWBPayloadType == payloadType) { // if last received codec is G.722 we must use frequency 8000 if(_lastReceivedG722) { frequency = 8000; } else { frequency = 16000; } if ((_cngPayloadType != -1) &&(_cngPayloadType !=_cngWBPayloadType)) { ResetStatistics(); } _cngPayloadType = _cngWBPayloadType; return true; }else if(_cngSWBPayloadType == payloadType) { frequency = 32000; if ((_cngPayloadType != -1) &&(_cngPayloadType !=_cngSWBPayloadType)) { ResetStatistics(); } _cngPayloadType = _cngSWBPayloadType; return true; }else { // not CNG if(_G722PayloadType == payloadType) { _lastReceivedG722 = true; }else { _lastReceivedG722 = false; } } return false; } /* Sample based or frame based codecs based on RFC 3551 NOTE! There is one error in the RFC, stating G.722 uses 8 bits/samples. The correct rate is 4 bits/sample. name of sampling default encoding sample/frame bits/sample rate ms/frame ms/packet Sample based audio codecs DVI4 sample 4 var. 20 G722 sample 4 16,000 20 G726-40 sample 5 8,000 20 G726-32 sample 4 8,000 20 G726-24 sample 3 8,000 20 G726-16 sample 2 8,000 20 L8 sample 8 var. 20 L16 sample 16 var. 20 PCMA sample 8 var. 20 PCMU sample 8 var. 20 Frame based audio codecs G723 frame N/A 8,000 30 30 G728 frame N/A 8,000 2.5 20 G729 frame N/A 8,000 10 20 G729D frame N/A 8,000 10 20 G729E frame N/A 8,000 10 20 GSM frame N/A 8,000 20 20 GSM-EFR frame N/A 8,000 20 20 LPC frame N/A 8,000 20 20 MPA frame N/A var. var. G7221 frame N/A */ ModuleRTPUtility::Payload* RTPReceiverAudio::RegisterReceiveAudioPayload(const WebRtc_Word8 payloadName[RTP_PAYLOAD_NAME_SIZE], const WebRtc_Word8 payloadType, const WebRtc_UWord32 frequency, const WebRtc_UWord8 channels, const WebRtc_UWord32 rate) { WebRtc_Word32 length = (WebRtc_Word32)strlen(payloadName); if(length > RTP_PAYLOAD_NAME_SIZE) { assert(false); return NULL; } if (ModuleRTPUtility::StringCompare(payloadName,"telephone-event",15)) { _telephoneEventPayloadType = payloadType; } if (ModuleRTPUtility::StringCompare(payloadName,"cn",2)) { // we can have three CNG on 8000Hz, 16000Hz and 32000Hz if(frequency == 8000) { _cngNBPayloadType = payloadType; } else if(frequency == 16000) { _cngWBPayloadType = payloadType; } else if(frequency == 32000) { _cngSWBPayloadType = payloadType; }else { assert(false); return NULL; } } WebRtc_UWord8 bitsPerSample = 0; // zero implies frame based if (ModuleRTPUtility::StringCompare(payloadName,"DVI4",4)) { bitsPerSample = 4; } else if(ModuleRTPUtility::StringCompare(payloadName,"G722",4)) { if(ModuleRTPUtility::StringCompare(payloadName,"G7221",5)) { // frame based } else { _G722PayloadType = payloadType; bitsPerSample = 4; } } else if(ModuleRTPUtility::StringCompare(payloadName,"G726-40",7)) { bitsPerSample = 5; } else if(ModuleRTPUtility::StringCompare(payloadName,"G726-32",7)) { bitsPerSample = 4; } else if(ModuleRTPUtility::StringCompare(payloadName,"G726-24",7)) { bitsPerSample = 3; } else if(ModuleRTPUtility::StringCompare(payloadName,"G726-16",7)) { bitsPerSample = 2; } else if(ModuleRTPUtility::StringCompare(payloadName,"L8",2)) { bitsPerSample = 8; } else if(ModuleRTPUtility::StringCompare(payloadName,"L16",3)) { bitsPerSample = 16; } else if(ModuleRTPUtility::StringCompare(payloadName,"PCMU",4)) { bitsPerSample = 8; } else if(ModuleRTPUtility::StringCompare(payloadName,"PCMA",4)) { bitsPerSample = 8; } ModuleRTPUtility::Payload* payload = new ModuleRTPUtility::Payload; memcpy(payload->name, payloadName, length+1); payload->typeSpecific.Audio.frequency = frequency; payload->typeSpecific.Audio.channels = channels; payload->typeSpecific.Audio.bitsPerSample = bitsPerSample; payload->typeSpecific.Audio.rate = rate; payload->audio = true; return payload; } // we are not allowed to have any critsects when calling CallbackOfReceivedPayloadData WebRtc_Word32 RTPReceiverAudio::ParseAudioCodecSpecific(WebRtcRTPHeader* rtpHeader, const WebRtc_UWord8* payloadData, const WebRtc_UWord16 payloadLength, const ModuleRTPUtility::AudioPayload& audioSpecific, const bool isRED) { WebRtc_UWord8 newEvents[MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS]; WebRtc_UWord8 removedEvents[MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS]; WebRtc_UWord8 numberOfNewEvents = 0; WebRtc_UWord8 numberOfRemovedEvents = 0; bool telephoneEventPacket = TelephoneEventPayloadType(rtpHeader->header.payloadType); if(payloadLength == 0) { return 0; } { CriticalSectionScoped lock(_criticalSectionFeedback); if(telephoneEventPacket) { // RFC 4733 2.3 /* 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | event |E|R| volume | duration | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ if(payloadLength % 4 != 0) { return -1; } WebRtc_UWord8 numberOfEvents = payloadLength / 4; // sanity if(numberOfEvents >= MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS) { numberOfEvents = MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS; } for (int n = 0; n < numberOfEvents; n++) { bool end = (payloadData[(4*n)+1] & 0x80)? true:false; if(_telephoneEventReported.Find(payloadData[4*n]) != NULL) { // we have already seen this event if(end) { removedEvents[numberOfRemovedEvents]= payloadData[4*n]; numberOfRemovedEvents++; _telephoneEventReported.Erase(payloadData[4*n]); } }else { if(end) { // don't add if it's a end of a tone }else { newEvents[numberOfNewEvents] = payloadData[4*n]; numberOfNewEvents++; _telephoneEventReported.Insert(payloadData[4*n],NULL); } } } // RFC 4733 2.5.1.3 & 2.5.2.3 Long-Duration Events // should not be a problem since we don't care about the duration // RFC 4733 See 2.5.1.5. & 2.5.2.4. Multiple Events in a Packet } if(_telephoneEvent && _cbAudioFeedback) { for (int n = 0; n < numberOfNewEvents; n++) { _cbAudioFeedback->OnReceivedTelephoneEvent(_id, newEvents[n], false); } if(_telephoneEventDetectEndOfTone) { for (int n = 0; n < numberOfRemovedEvents; n++) { _cbAudioFeedback->OnReceivedTelephoneEvent(_id, removedEvents[n], true); } } } } if(! telephoneEventPacket ) { _lastReceivedFrequency = audioSpecific.frequency; } // Check if this is a CNG packet, receiver might want to know WebRtc_UWord32 dummy; if(CNGPayloadType(rtpHeader->header.payloadType, dummy)) { rtpHeader->type.Audio.isCNG=true; rtpHeader->frameType = kAudioFrameCN; }else { rtpHeader->frameType = kAudioFrameSpeech; rtpHeader->type.Audio.isCNG=false; } // check if it's a DTMF event, hence something we can playout if(telephoneEventPacket) { if(!_telephoneEventForwardToDecoder) { // don't forward event to decoder return 0; } MapItem* first = _telephoneEventReported.First(); if(first && first->GetId() > 15) { // don't forward non DTMF events return 0; } } if(isRED && !(payloadData[0] & 0x80)) { // we recive only one frame packed in a RED packet remove the RED wrapper rtpHeader->header.payloadType = payloadData[0]; // only one frame in the RED strip the one byte to help NetEq return CallbackOfReceivedPayloadData(payloadData+1, payloadLength-1, rtpHeader); } if(audioSpecific.channels > 1) { WebRtc_Word32 retVal = 0; WebRtc_UWord16 channelLength = payloadLength/audioSpecific.channels; if(audioSpecific.bitsPerSample > 0) { // sanity assert((payloadLength*8)%audioSpecific.bitsPerSample == 0); // sample based codec // build matrix WebRtc_UWord8 matrix[IP_PACKET_SIZE]; WebRtc_UWord32 offsetBytes = 0; WebRtc_UWord32 offsetBytesInsert = 0; // initialize matrix to 0 memset(matrix, 0, audioSpecific.channels*channelLength); switch(audioSpecific.bitsPerSample) { case 1: case 2: case 3: case 4: case 5: case 6: case 7: { WebRtc_UWord32 offsetSamples = 0; WebRtc_UWord32 offsetSamplesInsert = 0; WebRtc_UWord16 bitMask = (WebRtc_UWord16)ModuleRTPUtility::pow2(audioSpecific.bitsPerSample)-1; WebRtc_UWord16 samplesPerChannel =payloadLength*8/audioSpecific.bitsPerSample/audioSpecific.channels; for(WebRtc_UWord32 i = 0; i < samplesPerChannel; i++) { WebRtc_UWord8 insertShift = (WebRtc_UWord8)((offsetSamplesInsert+audioSpecific.bitsPerSample)%16); insertShift = 16 - insertShift; // inverse the calculation for(WebRtc_UWord32 j = 0; j < audioSpecific.channels; j++) { // get sample WebRtc_UWord16 s = payloadData[offsetBytes] << 8; // check that we don't read outside the memory if(offsetBytes < (WebRtc_UWord32)payloadLength -2) { s += payloadData[offsetBytes+1]; } WebRtc_UWord8 readShift = (WebRtc_UWord8)((offsetSamples+audioSpecific.bitsPerSample)%16); readShift = 16 - readShift; // inverse the calculation s >>= readShift; s &= bitMask; // prepare for reading next sample offsetSamples += audioSpecific.bitsPerSample; if(readShift <= audioSpecific.bitsPerSample) { // next does not fitt // or fitt exactly offsetSamples -= 8; offsetBytes++; } // insert sample into matrix WebRtc_UWord32 columOffset = j*channelLength; WebRtc_UWord16 insert = s << insertShift; #if defined(WEBRTC_LITTLE_ENDIAN) matrix[columOffset+offsetBytesInsert] |= static_cast(insert>>8); matrix[columOffset+offsetBytesInsert+1] |= static_cast(insert); #else WebRtc_UWord16* matrixU16 = (WebRtc_UWord16*)&(matrix[columOffset+offsetBytesInsert]); matrixU16[0] |= (s << insertShift); #endif } // prepare for writing next sample offsetSamplesInsert += audioSpecific.bitsPerSample; if(insertShift <= audioSpecific.bitsPerSample) { // next does not fitt // or fitt exactly offsetSamplesInsert -= 8; offsetBytesInsert++; } } } break; case 8: { WebRtc_UWord32 sample = 0; for(WebRtc_UWord32 i = 0; i < channelLength; i++) { for(WebRtc_UWord32 j = 0; j < audioSpecific.channels; j++) { WebRtc_UWord32 columOffset = j*channelLength; matrix[columOffset + i] = payloadData[sample++]; } } } break; case 16: { WebRtc_UWord32 sample = 0; for(WebRtc_UWord32 i = 0; i < channelLength; i +=2) { for(WebRtc_UWord32 j = 0; j < audioSpecific.channels; j++) { WebRtc_UWord32 columOffset = j*channelLength; matrix[columOffset + i] = payloadData[sample++]; matrix[columOffset + i + 1] = payloadData[sample++]; } } } break; default: assert(false); return -1; } // we support 16 bits sample // callback for all channels for(int channel = 0; channel < audioSpecific.channels && retVal == 0; channel++) { // one callback per channel rtpHeader->type.Audio.channel = channel+1; if(channel == 0) { // include the original packet only in the first callback retVal = CallbackOfReceivedPayloadData(&matrix[channel*channelLength], channelLength, rtpHeader); } else { retVal = CallbackOfReceivedPayloadData(&matrix[channel*channelLength], channelLength, rtpHeader); } } } else { for(int channel = 1; channel <= audioSpecific.channels && retVal == 0; channel++) { // one callback per channel rtpHeader->type.Audio.channel = channel; if(channel == 1) { // include the original packet only in the first callback retVal = CallbackOfReceivedPayloadData(payloadData, channelLength, rtpHeader); } else { retVal = CallbackOfReceivedPayloadData(payloadData, channelLength, rtpHeader); } payloadData += channelLength; } } return retVal; }else { rtpHeader->type.Audio.channel = 1; return CallbackOfReceivedPayloadData(payloadData, payloadLength, rtpHeader); } } } // namespace webrtc