webrtc/modules/rtp_rtcp/source/rtp_sender_audio.cc

/*
 *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "rtp_sender_audio.h"

#include <string.h> //memcpy
#include <cassert> //assert

namespace webrtc {
RTPSenderAudio::RTPSenderAudio(const WebRtc_Word32 id, RTPSenderInterface* rtpSender) :
    _id(id),
    _rtpSender(rtpSender),
    _audioFeedbackCritsect(*CriticalSectionWrapper::CreateCriticalSection()),
    _audioFeedback(NULL),
    _sendAudioCritsect(*CriticalSectionWrapper::CreateCriticalSection()),
    _frequency(8000),
    _packetSizeSamples(160),
    _dtmfEventIsOn(false),
    _dtmfEventFirstPacketSent(false),
    _dtmfPayloadType(-1),
    _dtmfTimestamp(0),
    _dtmfKey(0),
    _dtmfLengthSamples(0),
    _dtmfLevel(0),
    _dtmfTimeLastSent(0),
    _dtmfTimestampLastSent(0),
    _REDPayloadType(-1),
    _inbandVADactive(false),
    _cngNBPayloadType(-1),
    _cngWBPayloadType(-1),
    _cngSWBPayloadType(-1),
    _lastPayloadType(-1),
    _includeAudioLevelIndication(false),    // @TODO - reset at Init()?
    _audioLevelIndicationID(0)
{
};

RTPSenderAudio::~RTPSenderAudio()
{
    delete &_sendAudioCritsect;
    delete &_audioFeedbackCritsect;
}

WebRtc_Word32
RTPSenderAudio::Init()
{
    CriticalSectionScoped cs(_sendAudioCritsect);

    _dtmfPayloadType = -1;
    _inbandVADactive = false;
    _cngNBPayloadType = -1;
    _cngWBPayloadType = -1;
    _cngSWBPayloadType = -1;
    _lastPayloadType = -1;
    _REDPayloadType = -1;
    _dtmfTimeLastSent = 0;
    _dtmfTimestampLastSent = 0;
    ResetDTMF();
    return 0;
}

void
RTPSenderAudio::ChangeUniqueId(const WebRtc_Word32 id)
{
    _id = id;
}

WebRtc_Word32
RTPSenderAudio::RegisterAudioCallback(RtpAudioFeedback* messagesCallback)
{
    CriticalSectionScoped cs(_audioFeedbackCritsect);
    _audioFeedback = messagesCallback;
    return 0;
}

void
RTPSenderAudio::SetAudioFrequency(const WebRtc_UWord32 f)
{
    CriticalSectionScoped cs(_sendAudioCritsect);
    _frequency = f;
}

WebRtc_UWord32
RTPSenderAudio::AudioFrequency() const
{
    CriticalSectionScoped cs(_sendAudioCritsect);
    return _frequency;
}

    // set audio packet size, used to determine when it's time to send a DTMF packet in silence (CNG)
WebRtc_Word32
RTPSenderAudio::SetAudioPacketSize(const WebRtc_UWord16 packetSizeSamples)
{
    CriticalSectionScoped cs(_sendAudioCritsect);

    _packetSizeSamples = packetSizeSamples;
    return 0;
}

WebRtc_Word32
RTPSenderAudio::RegisterAudioPayload(const WebRtc_Word8 payloadName[RTP_PAYLOAD_NAME_SIZE],
                                     const WebRtc_Word8 payloadType,
                                     const WebRtc_UWord32 frequency,
                                     const WebRtc_UWord8 channels,
                                     const WebRtc_UWord32 rate,
                                     ModuleRTPUtility::Payload*& payload)
{
    WebRtc_Word32 length = (WebRtc_Word32)strlen(payloadName);
    if(length > RTP_PAYLOAD_NAME_SIZE)
    {
        return -1;
    }

    CriticalSectionScoped cs(_sendAudioCritsect);

    if (ModuleRTPUtility::StringCompare(payloadName,"cn",2))
    {
        //  we can have multiple CNG payload types
        if(frequency == 8000)
        {
            _cngNBPayloadType = payloadType;

        } else if(frequency == 16000)
        {
            _cngWBPayloadType = payloadType;

        } else if(frequency == 32000)
        {
            _cngSWBPayloadType = payloadType;
        }else
        {
            return -1;
        }
    }
    if (ModuleRTPUtility::StringCompare(payloadName,"telephone-event",15))
    {
        // Don't add it to the list
        // we dont want to allow send with a DTMF payloadtype
        _dtmfPayloadType = payloadType;
        return 0;
        // The default timestamp rate is 8000 Hz, but other rates may be defined.
    }
    payload = new ModuleRTPUtility::Payload;
    payload->typeSpecific.Audio.frequency = frequency;
    payload->typeSpecific.Audio.channels = channels;
    payload->typeSpecific.Audio.rate = rate;
    payload->audio = true;
    memcpy(payload->name, payloadName, length+1);
    return 0;
}

bool
RTPSenderAudio::MarkerBit(const FrameType frameType,
                          const WebRtc_Word8 payloadType)
{
    CriticalSectionScoped cs(_sendAudioCritsect);

    // for audio true for first packet in a speech burst
    bool markerBit = false;
    if(_lastPayloadType != payloadType)
    {
        if(_cngNBPayloadType != -1)
        {
            // we have configured NB CNG
            if(_cngNBPayloadType == payloadType)
            {
                // only set a marker bit when we change payload type to a non CNG
                return false;
            }
        }
        if(_cngWBPayloadType != -1)
        {
            // we have configured WB CNG
            if(_cngWBPayloadType == payloadType)
            {
                // only set a marker bit when we change payload type to a non CNG
                return false;
            }
        }
        if(_cngSWBPayloadType != -1)
        {
            // we have configured SWB CNG
            if(_cngSWBPayloadType == payloadType)
            {
                // only set a marker bit when we change payload type to a non CNG
                return false;
            }
        }
        // payloadType differ
        if(_lastPayloadType == -1)
        {
            if(frameType != kAudioFrameCN)
            {
                // first packet and NOT CNG
                return true;

            }else
            {
                // first packet and CNG
                _inbandVADactive = true;
                return false;
            }
        }
        // not first packet AND
        // not CNG AND
        // payloadType changed

        // set a marker bit when we change payload type
        markerBit = true;
    }

    // For G.723 G.729, AMR etc we can have inband VAD
    if(frameType == kAudioFrameCN)
    {
        _inbandVADactive = true;

    } else if(_inbandVADactive)
    {
        _inbandVADactive = false;
        markerBit = true;
    }
    return markerBit;
}

bool
RTPSenderAudio::SendTelephoneEventActive(WebRtc_Word8& telephoneEvent) const
{
    if(_dtmfEventIsOn)
    {
        telephoneEvent = _dtmfKey;
        return true;
    }
    WebRtc_UWord32 delaySinceLastDTMF = (ModuleRTPUtility::GetTimeInMS() - _dtmfTimeLastSent);
    if(delaySinceLastDTMF < 100)
    {
        telephoneEvent = _dtmfKey;
        return true;
    }
    telephoneEvent = -1;
    return false;
}

WebRtc_Word32
RTPSenderAudio::SendAudio(const FrameType frameType,
                          const WebRtc_Word8 payloadType,
                          const WebRtc_UWord32 captureTimeStamp,
                          const WebRtc_UWord8* payloadData,
                          const WebRtc_UWord32 dataSize,
                          const RTPFragmentationHeader* fragmentation)
{
    WebRtc_UWord16 payloadSize = (WebRtc_UWord16)dataSize;
    WebRtc_UWord16 maxPayloadLength = _rtpSender->MaxPayloadLength();
    bool dtmfToneStarted = false;
    WebRtc_UWord16 dtmfLengthMS = 0;
    WebRtc_UWord8 key = 0;

    // Check if we have pending DTMFs to send
    if ( !_dtmfEventIsOn && PendingDTMF())
    {
        CriticalSectionScoped cs(_sendAudioCritsect);

        WebRtc_UWord32 delaySinceLastDTMF = (ModuleRTPUtility::GetTimeInMS() - _dtmfTimeLastSent);

        if(delaySinceLastDTMF > 100)
        {
            // New tone to play
            _dtmfTimestamp = captureTimeStamp;
            if (NextDTMF(&key, &dtmfLengthMS, &_dtmfLevel) >= 0)
            {
                _dtmfEventFirstPacketSent = false;
                _dtmfKey = key;
                _dtmfLengthSamples = (_frequency/1000)*dtmfLengthMS;
                dtmfToneStarted = true;
                _dtmfEventIsOn = true;
            }
        }
    }
    if(dtmfToneStarted)
    {
        CriticalSectionScoped cs(_audioFeedbackCritsect);
        if(_audioFeedback)
        {
            _audioFeedback->OnPlayTelephoneEvent(_id, key, dtmfLengthMS, _dtmfLevel);
        }
    }

    // A source MAY send events and coded audio packets for the same time
    // but we don't support it
    {
        _sendAudioCritsect.Enter();

        if (_dtmfEventIsOn)
        {
            if(frameType == kFrameEmpty)
            {
                // kFrameEmpty is used to drive the DTMF when in CN mode
                // it can be triggered more frequently than we want to send the DTMF packets
                if(_packetSizeSamples > (captureTimeStamp - _dtmfTimestampLastSent) )
                {
                    // not time to send yet
                    _sendAudioCritsect.Leave();
                    return 0;
                }
            }
            _dtmfTimestampLastSent = captureTimeStamp;

            WebRtc_UWord32 dtmfDurationSamples = (captureTimeStamp - _dtmfTimestamp);

            bool ended = false;
            bool send = true;

            if(_dtmfLengthSamples > dtmfDurationSamples)
            {
                if (dtmfDurationSamples > 0) // Skip send packet at start, since we shouldn't use duration 0
                {
                } else
                {
                    send = false;
                }
            }else
            {
                ended = true;
                _dtmfEventIsOn = false;
                _dtmfTimeLastSent = ModuleRTPUtility::GetTimeInMS();
            }
            // don't hold the critsect while calling SendTelephoneEventPacket
            _sendAudioCritsect.Leave();
            if(send)
            {
                if(dtmfDurationSamples > 0xffff)
                {
                    // RFC 4733 2.5.2.3 Long-Duration Events
                    SendTelephoneEventPacket(ended, _dtmfTimestamp, (WebRtc_UWord16)0xffff, false);

                    // set new timestap for this segment
                    _dtmfTimestamp = captureTimeStamp;
                    dtmfDurationSamples -= 0xffff;
                    _dtmfLengthSamples -= 0xffff;

                    return SendTelephoneEventPacket(ended, _dtmfTimestamp, (WebRtc_UWord16)dtmfDurationSamples, false);
                } else
                {
                    // set markerBit on the first packet in the burst
                    WebRtc_Word32 retVal = SendTelephoneEventPacket(ended, _dtmfTimestamp, (WebRtc_UWord16)dtmfDurationSamples, !_dtmfEventFirstPacketSent);
                    _dtmfEventFirstPacketSent = true;
                    return retVal;
                }
            }
            return(0);
        }
        _sendAudioCritsect.Leave();
    }
    if(payloadSize == 0 || payloadData == NULL)
    {
        if(frameType == kFrameEmpty)
        {
            // we don't send empty audio RTP packets
            // no error since we use it to drive DTMF when we use VAD
            return 0;
        }else
        {
            return -1;
        }
    }

    WebRtc_UWord8 dataBuffer[IP_PACKET_SIZE];
    bool markerBit = MarkerBit(frameType, payloadType);

    WebRtc_Word32 rtpHeaderLength = 0;
    WebRtc_UWord16 timestampOffset = 0;

    if( _REDPayloadType >= 0 &&
        fragmentation &&
        fragmentation->fragmentationVectorSize > 1 &&
        !markerBit)
    {
        // have we configured RED? use its payload type
        // we need to get the current timestamp to calc the diff
        WebRtc_UWord32 oldTimeStamp = _rtpSender->Timestamp();
        rtpHeaderLength = _rtpSender->BuildRTPheader(dataBuffer, _REDPayloadType, markerBit, captureTimeStamp);
        timestampOffset = WebRtc_UWord16(_rtpSender->Timestamp() - oldTimeStamp);
    } else
    {
        rtpHeaderLength= _rtpSender->BuildRTPheader(dataBuffer, payloadType, markerBit, captureTimeStamp);
    }
    if(rtpHeaderLength == -1)
    {
        return -1;
    }
    {
        CriticalSectionScoped cs(_sendAudioCritsect);

        if (_includeAudioLevelIndication)
        {
            dataBuffer[0] |= 0x10; // set eXtension bit

            // https://datatracker.ietf.org/doc/draft-lennox-avt-rtp-audio-level-exthdr/

            /*
            0                   1                   2                   3
            0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
            |      0xBE     |      0xDE     |            length=1           |
            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
            |  ID   | len=0 |V|   level     |      0x00     |      0x00     |
            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
            */

            // add the extension

            // add our ID (0xBEDE)
            ModuleRTPUtility::AssignUWord16ToBuffer(dataBuffer+rtpHeaderLength, RTP_AUDIO_LEVEL_UNIQUE_ID);
            rtpHeaderLength += 2;

            // add the length (length=1) in number of word32
            const WebRtc_UWord8 length = 1;
            ModuleRTPUtility::AssignUWord16ToBuffer(dataBuffer+rtpHeaderLength, length);
            rtpHeaderLength += 2;

            // add ID (defined by the user) and len(=0) byte
            const WebRtc_UWord8 id = _audioLevelIndicationID;
            const WebRtc_UWord8 len = 0;
            dataBuffer[rtpHeaderLength++] = (id << 4) + len;

            // add voice-activity flag (V) bit and the audio level (in dBov)
            const WebRtc_UWord8 V = (frameType == kAudioFrameSpeech);
            WebRtc_UWord8 level = _audioLevel_dBov;
            dataBuffer[rtpHeaderLength++] = (V << 7) + level;

            // add two bytes zero padding
            ModuleRTPUtility::AssignUWord16ToBuffer(dataBuffer+rtpHeaderLength, 0);
            rtpHeaderLength += 2;
        }

        if(maxPayloadLength < rtpHeaderLength + payloadSize )
        {
            // too large payload buffer
            return -1;
        }

        if( _REDPayloadType >= 0 &&                         // have we configured RED?
            fragmentation &&
            fragmentation->fragmentationVectorSize > 1 &&
            !markerBit)
        {
            if(fragmentation == NULL)
            {
                // this can't happen any more but save the code incase we want to use it later again
                // we don't send this type of packet due to old NetEq issue
                dataBuffer[rtpHeaderLength++] = (WebRtc_UWord8)payloadType;
                memcpy(dataBuffer+rtpHeaderLength, payloadData, payloadSize);
            }else
            {
                if( fragmentation->fragmentationVectorSize > 1 &&
                    !markerBit &&               // markerBit == first packet
                    timestampOffset <= 0x3fff)  // silence for too long send only new data
                {
                    if(fragmentation->fragmentationVectorSize != 2)
                    {
                        // we only support 2 codecs when using RED
                        return -1;
                    }
                    // only 0x80 if we have multiple blocks
                    dataBuffer[rtpHeaderLength++] = 0x80 + fragmentation->fragmentationPlType[1];
                    WebRtc_UWord32 blockLength = fragmentation->fragmentationLength[1];

                    // sanity blockLength
                    if(blockLength > 0x3ff) // block length 10 bits 1023 bytes
                    {
                        return -1;
                    }
                    WebRtc_UWord32 REDheader =  (timestampOffset << 10) + blockLength;
                    ModuleRTPUtility::AssignUWord24ToBuffer(dataBuffer+rtpHeaderLength, REDheader);
                    rtpHeaderLength += 3;

                    dataBuffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0];
                    // copy the RED data
                    memcpy(dataBuffer+rtpHeaderLength,
                        payloadData + fragmentation->fragmentationOffset[1],
                        fragmentation->fragmentationLength[1]);

                    // copy the normal data
                    memcpy( dataBuffer+rtpHeaderLength + fragmentation->fragmentationLength[1],
                            payloadData + fragmentation->fragmentationOffset[0],
                            fragmentation->fragmentationLength[0]);

                    payloadSize = WebRtc_UWord16(fragmentation->fragmentationLength[0] + fragmentation->fragmentationLength[1]);

                } else
                {
                    dataBuffer[rtpHeaderLength++] = (WebRtc_UWord8)payloadType;

                    memcpy( dataBuffer+rtpHeaderLength,
                            payloadData + fragmentation->fragmentationOffset[0],
                            fragmentation->fragmentationLength[0]);

                    payloadSize = WebRtc_UWord16(fragmentation->fragmentationLength[0]);
                }
            }
        }else
        {
            if( fragmentation &&
                fragmentation->fragmentationVectorSize > 0)
            {
                // use the fragment info if we have one
                memcpy( dataBuffer+rtpHeaderLength,
                        payloadData + fragmentation->fragmentationOffset[0],
                        fragmentation->fragmentationLength[0]);

                payloadSize = WebRtc_UWord16(fragmentation->fragmentationLength[0]);

            }else
            {
                memcpy(dataBuffer+rtpHeaderLength, payloadData, payloadSize);
            }
        }
        _lastPayloadType = payloadType;

    }   // end critical section

    return _rtpSender->SendToNetwork(dataBuffer, payloadSize, (WebRtc_UWord16)rtpHeaderLength);
}


WebRtc_Word32
RTPSenderAudio::SetAudioLevelIndicationStatus(const bool enable,
                                              const WebRtc_UWord8 ID)
{
    if(ID < 1 || ID > 14)
    {
        return -1;
    }
    CriticalSectionScoped cs(_sendAudioCritsect);

    _includeAudioLevelIndication = enable;
    _audioLevelIndicationID = ID;

    return 0;
}

WebRtc_Word32
RTPSenderAudio::AudioLevelIndicationStatus(bool& enable,
                                           WebRtc_UWord8& ID) const
{
    CriticalSectionScoped cs(_sendAudioCritsect);
    enable = _includeAudioLevelIndication;
    ID = _audioLevelIndicationID;
    return 0;
}

    // Audio level magnitude and voice activity flag are set for each RTP packet
WebRtc_Word32
RTPSenderAudio::SetAudioLevel(const WebRtc_UWord8 level_dBov)
{
    if (level_dBov > 127)
    {
        return -1;
    }
    CriticalSectionScoped cs(_sendAudioCritsect);
    _audioLevel_dBov = level_dBov;
    return 0;
}

    // Set payload type for Redundant Audio Data RFC 2198
WebRtc_Word32
RTPSenderAudio::SetRED(const WebRtc_Word8 payloadType)
{
    if(payloadType < -1 )
    {
        return -1;
    }
    _REDPayloadType = payloadType;
    return 0;
}

    // Get payload type for Redundant Audio Data RFC 2198
WebRtc_Word32
RTPSenderAudio::RED(WebRtc_Word8& payloadType) const
{
    if(_REDPayloadType == -1)
    {
        // not configured
        return -1;
    }
    payloadType = _REDPayloadType;
    return 0;
}

// Send a TelephoneEvent tone using RFC 2833 (4733)
WebRtc_Word32
RTPSenderAudio::SendTelephoneEvent(const WebRtc_UWord8 key,
                                   const WebRtc_UWord16 time_ms,
                                   const WebRtc_UWord8 level)
{
    // DTMF is protected by its own critsect
    if(_dtmfPayloadType < 0)
    {
        // TelephoneEvent payloadtype not configured
        return -1;
    }
    return AddDTMF(key, time_ms, level);
}

WebRtc_Word32
RTPSenderAudio::SendTelephoneEventPacket(const bool ended,
                                         const WebRtc_UWord32 dtmfTimeStamp,
                                         const WebRtc_UWord16 duration,
                                         const bool markerBit)
{
    WebRtc_UWord8 dtmfbuffer[IP_PACKET_SIZE];
    WebRtc_UWord8 sendCount = 1;
    WebRtc_Word32 retVal = 0;

    if(ended)
    {
        // resend last packet in an event 3 times
        sendCount = 3;
    }
    do
    {
        _sendAudioCritsect.Enter();

        //Send DTMF data
        _rtpSender->BuildRTPheader(dtmfbuffer, _dtmfPayloadType, markerBit, dtmfTimeStamp);

        // reset CSRC and X bit
        dtmfbuffer[0] &= 0xe0;

        //Create DTMF data
        /*    From RFC 2833:

         0                   1                   2                   3
         0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
        |     event     |E|R| volume    |          duration             |
        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
        */
        // R bit always cleared
        WebRtc_UWord8 R = 0x00;
        WebRtc_UWord8 volume = _dtmfLevel;

        // First packet un-ended
          WebRtc_UWord8 E = 0x00;

        if(ended)
        {
            E = 0x80;
        }

        // First byte is Event number, equals key number
        dtmfbuffer[12] = _dtmfKey;
        dtmfbuffer[13] = E|R|volume;
        ModuleRTPUtility::AssignUWord16ToBuffer(dtmfbuffer+14, duration);

        _sendAudioCritsect.Leave();
        retVal = _rtpSender->SendToNetwork(dtmfbuffer, 4, 12);
        sendCount--;

    }while (sendCount > 0 && retVal == 0);

    return retVal;
}
} // namespace webrtc