webrtc/modules/video_coding/main/source/qm_select.cc

/*
 *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "qm_select.h"
#include "internal_defines.h"
#include "qm_select_data.h"

#include "module_common_types.h"
#include "video_coding_defines.h"
#include "trace.h"

#include <math.h>

namespace webrtc {

VCMQmSelect::VCMQmSelect()
{
    _qm = new VCMQualityMode();
    _contentMetrics = new VideoContentMetrics();
     Reset();
}

VCMQmSelect::~VCMQmSelect()
{
    delete _qm;
    delete _contentMetrics;
}

void
VCMQmSelect::ResetQM()
{
    _motion.Reset();
    _spatial.Reset();
    _coherence.Reset();
    _stationaryMotion = 0;
    _aspectRatio = 1;
    _maxRateQM = 0;
    _imageType = 1;
    _userResolutionPref = 50; // Neutral
    _qm->Reset();
    return;
}

void
VCMQmSelect::ResetRates()
{
    _sumEncodedBytes = 0;
    _sumTargetRate = 0.0f;
    _sumIncomingFrameRate = 0.0f;
    _sumFrameRateMM = 0.0f;
    _sumSeqRateMM = 0.0f;
    _sumPacketLoss = 0.0f;
    _frameCnt = 0;
    _frameCntDelta = 0;
    _lowBufferCnt = 0;
    _updateRateCnt = 0;
    return;
}

void
VCMQmSelect::Reset()
{
    _stateDecFactorSpatial = 1;
    _stateDecFactorTemp  = 1;
    _bufferLevel = 0.0f;
    _targetBitRate = 0.0f;
    _incomingFrameRate = 0.0f;
    _userFrameRate = 0.0f;
    _perFrameBandwidth =0.0f;
    _prevTotalRate = 0.0f;
    _prevRttTime = 0;
    _prevPacketLoss = 0;
     ResetQM();
     ResetRates();
     return;
}

//Initialize after reset of encoder
WebRtc_Word32
VCMQmSelect::Initialize(float bitRate, float userFrameRate,
                        WebRtc_UWord32 width, WebRtc_UWord32 height)
{
    if (userFrameRate == 0.0f || width == 0 || height == 0)
    {
        return VCM_PARAMETER_ERROR;
    }
    _targetBitRate = bitRate;
    _userFrameRate = userFrameRate;

    // Encoder width and height
    _width = width;
    _height = height;

    // Initial buffer level
    _bufferLevel = INIT_BUFFER_LEVEL * _targetBitRate;

    // Per-frame bandwidth
    if ( _incomingFrameRate == 0 )
    {
        _perFrameBandwidth = _targetBitRate / _userFrameRate;
        _incomingFrameRate = _userFrameRate;
    }
    else
    {
    // Take average: this is due to delay in update of new encoder frame rate:
    // userFrameRate is the new one,
    // incomingFrameRate is the old one (based on previous ~ 1sec/RTCP report)
        _perFrameBandwidth = 0.5 *( _targetBitRate / _userFrameRate +
            _targetBitRate / _incomingFrameRate );
    }
    _init  = true;


    return VCM_OK;
}

WebRtc_Word32
VCMQmSelect::SetPreferences(WebRtc_Word8 resolPref)
{
    // Preference setting for temporal over spatial resolution
    // 100 means temporal, 0 means spatial, 50 is neutral
    _userResolutionPref = resolPref;

    return VCM_OK;
}

//Update after every encoded frame
void
VCMQmSelect::UpdateEncodedSize(WebRtc_Word64 encodedSize,
                               FrameType encodedFrameType)
{
    // Update encoded size;
    _sumEncodedBytes += encodedSize;
    _frameCnt++;

    // Convert to Kbps
    float encodedSizeKbits = (float)((encodedSize * 8.0) / 1000.0);

    // Update the buffer level:
    // per_frame_BW is updated when encoder is updated, every RTCP reports
    _bufferLevel += _perFrameBandwidth - encodedSizeKbits;

    // Mismatch here is based on difference of actual encoded frame size and
    // per-frame bandwidth, for delta frames
    // This is a much stronger condition on rate mismatch than sumSeqRateMM
    // Note: not used in this version
    /*
    const bool deltaFrame = (encodedFrameType != kVideoFrameKey &&
                             encodedFrameType != kVideoFrameGolden);

    // Sum the frame mismatch:
    if (deltaFrame)
    {
         _frameCntDelta++;
         if (encodedSizeKbits > 0)
            _sumFrameRateMM +=
            (float) (fabs(encodedSizeKbits - _perFrameBandwidth) /
            encodedSizeKbits);
    }
    */

    // Counter for occurrences of low buffer level
    if (_bufferLevel <= PERC_BUFFER_THR * INIT_BUFFER_LEVEL * _targetBitRate)
    {
        _lowBufferCnt++;
    }

}

//Update various quantities after SetTargetRates in MediaOpt
void
VCMQmSelect::UpdateRates(float targetBitRate, float avgSentBitRate,
                         float incomingFrameRate, WebRtc_UWord8 packetLoss)
{

    // Sum the target bitrate and incoming frame rate:
    // these values are the encoder rates (from previous update ~1sec),
    // i.e, before the update for next ~1sec
    _sumTargetRate += _targetBitRate;
    _sumIncomingFrameRate  += _incomingFrameRate;
    _updateRateCnt++;

    // Sum the received (from RTCP reports) packet loss rates
    _sumPacketLoss += (float) packetLoss / 255.0f;

    // Convert average sent bitrate to kbps
    float avgSentBitRatekbps = avgSentBitRate / 1000.0f;

    // Sum the sequence rate mismatch:
    // Mismatch here is based on difference between target rate the encoder
    // used (in previous ~1sec) and the average actual
    // encoding rate measured at current time
    if (fabs(_targetBitRate - avgSentBitRatekbps) <  THRESH_SUM_MM &&
        _targetBitRate > 0.0 )
    {
        _sumSeqRateMM += (float)
            (fabs(_targetBitRate - avgSentBitRatekbps) / _targetBitRate );
    }

    // Update QM with the current new target and frame rate:
    // these values are ones the encoder will use for the current/next ~1sec
    _targetBitRate =  targetBitRate;
    _incomingFrameRate = incomingFrameRate;

    // Update QM with an (average) encoder per_frame_bandwidth:
    // this is the per_frame_bw for the current/next ~1sec
    _perFrameBandwidth  = 0.0f;
    if (_incomingFrameRate > 0.0f)
    {
        _perFrameBandwidth = _targetBitRate / _incomingFrameRate;
    }

}

// Adjust the FEC rate based on the content and the network state
// (packet loss rate, total rate/bandwidth, round trip time).
// Note that packetLoss here is the filtered loss value.
WebRtc_UWord8
VCMQmSelect::AdjustFecFactor(WebRtc_UWord8 codeRateDelta, float totalRate,
                             float frameRate,WebRtc_UWord16 rttTime,
                             WebRtc_UWord8 packetLoss)
{
    // Default: no adjustment
    WebRtc_UWord8 codeRateDeltaAdjust = codeRateDelta;
    float adjustFec =  1.0f;

    // TODO (marpan):
    // Set FEC adjustment factor

    codeRateDeltaAdjust = static_cast<WebRtc_UWord8>(codeRateDelta * adjustFec);

     // Keep track of previous values of network state:
     // adjustment may be also based on pattern of changes in network state
    _prevTotalRate = totalRate;
    _prevRttTime = rttTime;
    _prevPacketLoss = packetLoss;

    return codeRateDeltaAdjust;
}

void
VCMQmSelect::UpdateContent(const VideoContentMetrics*  contentMetrics)
{
     _contentMetrics = contentMetrics;
}

// Select the resolution factors: frame size and frame rate change: (QM modes)
// Selection is for going back up in resolution, or going down in.
WebRtc_Word32
VCMQmSelect::SelectQuality(VCMQualityMode** qm)
{
    if (!_init)
    {
        return VCM_UNINITIALIZED;
    }
    if (_contentMetrics == NULL)
    {
        Reset(); //default values
        *qm =  _qm;
        return VCM_OK;
    }

    // Default settings
    _qm->spatialWidthFact = 1;
    _qm->spatialHeightFact = 1;
    _qm->temporalFact = 1;


    // Update native values
    _nativeWidth = _contentMetrics->nativeWidth;
    _nativeHeight = _contentMetrics->nativeHeight;
    _nativeFrameRate = _contentMetrics->nativeFrameRate;

    // Aspect ratio: used for selection of 1x2,2x1,2x2
    _aspectRatio = (float)_width / (float)_height;

    float avgTargetRate = 0.0f;
    float avgIncomingFrameRate = 0.0f;
    float ratioBufferLow = 0.0f;
    float rateMisMatch = 0.0f;
    float avgPacketLoss = 0.0f;
    if (_frameCnt > 0)
    {
        ratioBufferLow = (float)_lowBufferCnt / (float)_frameCnt;
    }
    if (_updateRateCnt > 0)
    {
        // Use seq-rate mismatch for now
        rateMisMatch = (float)_sumSeqRateMM / (float)_updateRateCnt;
        //rateMisMatch = (float)_sumFrameRateMM / (float)_frameCntDelta;

        // Average target and incoming frame rates
        avgTargetRate = (float)_sumTargetRate / (float)_updateRateCnt;
        avgIncomingFrameRate = (float)_sumIncomingFrameRate /
            (float)_updateRateCnt;

        // Average received packet loss rate
        avgPacketLoss =  (float)_sumPacketLoss / (float)_updateRateCnt;
    }

    // For QM selection below, may want to weight the average encoder rates
    // with the current (for next ~1sec) rate values.
    // Uniform average for now:
    float w1 = 0.5f;
    float w2 = 0.5f;
    avgTargetRate = w1 * avgTargetRate + w2 * _targetBitRate;
    avgIncomingFrameRate = w1 * avgIncomingFrameRate + w2 * _incomingFrameRate;

    // Set the maximum transitional rate and image type:
    // for up-sampled spatial dimensions.
    // This is needed to get the transRate for going back up in
    // spatial resolution (only 2x2 allowed in this version).
    SetMaxRateForQM(2 * _width, 2 * _height);
    WebRtc_UWord8  imageType2  = _imageType;
    WebRtc_UWord32 maxRateQM2 = _maxRateQM;

    // Set the maximum transitional rate and image type:
    // for the encoder spatial dimensions.
    SetMaxRateForQM(_width, _height);

    // Compute class state of the content.
    MotionNFD();
    Spatial();

    //
    // Get transitional rate from table, based on image type and content class.
    //

    // Get image class and content class: for going down spatially
    WebRtc_UWord8 imageClass = 1;
    if (_imageType <= 3) imageClass = 0;
    WebRtc_UWord8 contentClass  = 3 * _motion.level + _spatial.level;
    WebRtc_UWord8 tableIndex = imageClass * 9 + contentClass;
    float scaleTransRate = kScaleTransRateQm[tableIndex];

    // Get image class and content class: for going up spatially
    WebRtc_UWord8 imageClass2 = 1;
    if (imageType2 <= 3) imageClass2 = 0;
    WebRtc_UWord8 tableIndex2 = imageClass2 * 9 + contentClass;
    float scaleTransRate2 = kScaleTransRateQm[tableIndex2];

    // Transitonal rate for going down
    WebRtc_UWord32 estimatedTransRateDown = static_cast<WebRtc_UWord32>
        (_incomingFrameRate * scaleTransRate * _maxRateQM / 30);

    // Transitional rate for going up temporally
    WebRtc_UWord32 estimatedTransRateUpT = static_cast<WebRtc_UWord32>
        (TRANS_RATE_SCALE_UP_TEMP * 2 * _incomingFrameRate *
         scaleTransRate * _maxRateQM / 30);

   // Transitional rate for going up spatially
    WebRtc_UWord32 estimatedTransRateUpS = static_cast<WebRtc_UWord32>
        (TRANS_RATE_SCALE_UP_SPATIAL * _incomingFrameRate *
        scaleTransRate2 * maxRateQM2 / 30);

    //
    // Done with transitional rates
    //

    //
    //CHECK FOR GOING BACK UP IN RESOLUTION
    //
    bool selectedUp = false;
    // Check if native has been spatially down-sampled
    if (_stateDecFactorSpatial > 1)
    {
        // Check conditions on buffer level and rate_mismatch
        if ( (avgTargetRate > estimatedTransRateUpS) &&
             (ratioBufferLow < MAX_BUFFER_LOW) && (rateMisMatch < MAX_RATE_MM))
        {
            // width/height scaled back up:
            // setting 0 indicates scaling back to native
            _qm->spatialHeightFact = 0;
            _qm->spatialWidthFact = 0;
            selectedUp = true;
        }
    }
    //Check if native has been temporally down-sampled
    if (_stateDecFactorTemp > 1)
    {
        if ( (avgTargetRate > estimatedTransRateUpT) &&
             (ratioBufferLow < MAX_BUFFER_LOW) && (rateMisMatch < MAX_RATE_MM))
        {
            // temporal scale back up:
            // setting 0 indicates scaling back to native
            _qm->temporalFact = 0;
            selectedUp = true;
        }
    }

    // Leave QM if we selected to go back up in either spatial or temporal
    if (selectedUp == true)
    {
        // Update down-sampling state
        // Note: only temp reduction by 2 is allowed
        if (_qm->temporalFact == 0)
        {
            _stateDecFactorTemp = _stateDecFactorTemp / 2;
        }
        // Update down-sampling state
        // Note: only spatial reduction by 2x2 is allowed
        if (_qm->spatialHeightFact == 0 && _qm->spatialWidthFact == 0 )
        {
            _stateDecFactorSpatial = _stateDecFactorSpatial / 4;
        }
       *qm = _qm;
       return VCM_OK;
    }

    //
    // Done with checking for going back up in resolution
    //

    //
    //CHECK FOR RESOLUTION REDUCTION
    //

    // Resolution reduction if:
    // (1) target rate is lower than transitional rate, or
    // (2) buffer level is not stable, or
    // (3) rate mismatch is larger than threshold

    // Bias down-sampling based on packet loss conditions
    if (avgPacketLoss > LOSS_THR)
    {
        estimatedTransRateDown = LOSS_RATE_FAC * estimatedTransRateDown;
    }

    if ((avgTargetRate < estimatedTransRateDown ) ||
        (ratioBufferLow > MAX_BUFFER_LOW)
        || (rateMisMatch > MAX_RATE_MM))
    {

        WebRtc_UWord8 spatialFact = 1;
        WebRtc_UWord8 tempFact = 1;

        // Get the action
        spatialFact = kSpatialAction[contentClass];
        tempFact = kTemporalAction[contentClass];

        switch(spatialFact)
        {
        case 4:
            _qm->spatialWidthFact = 2;
            _qm->spatialHeightFact = 2;
            break;
        case 2:
             //default is 1x2 (H)
            _qm->spatialWidthFact = 2;
            _qm->spatialHeightFact = 1;
            // Select 1x2,2x1, or back to 2x2
            // Note: directional selection not used in this version
            // SelectSpatialDirectionMode((float) estimatedTransRateDown);
            break;
        default:
            _qm->spatialWidthFact = 1;
            _qm->spatialHeightFact = 1;
            break;
        }
        _qm->temporalFact = tempFact;

        // Sanity check on ST QM selection:
        // override the settings for too small image size and frame rate
        // Also check the limit on current down-sampling state

        // No spatial sampling if image size is too small (QCIF)
        if ( (_width * _height) <= MIN_IMAGE_SIZE  ||
            _stateDecFactorSpatial >= MAX_SPATIAL_DOWN_FACT)
        {
            _qm->spatialWidthFact = 1;
            _qm->spatialHeightFact = 1;
        }

        // No frame rate reduction below some point:
        // use the (average) incoming frame rate
        if ( avgIncomingFrameRate <= MIN_FRAME_RATE_QM  ||
            _stateDecFactorTemp >= MAX_TEMP_DOWN_FACT)
        {
            _qm->temporalFact = 1;
        }

        // No down-sampling if current downsampling state is above threshold
        if (_stateDecFactorTemp * _stateDecFactorSpatial >=
            MAX_SPATIAL_TEMP_DOWN_FACT)
        {
            _qm->spatialWidthFact = 1;
            _qm->spatialHeightFact = 1;
            _qm->temporalFact = 1;
        }
        //
        // Done with sanity checks on ST QM selection
        //

        // Update down-sampling states
        _stateDecFactorSpatial = _stateDecFactorSpatial * _qm->spatialWidthFact
            * _qm->spatialHeightFact;
        _stateDecFactorTemp = _stateDecFactorTemp * _qm->temporalFact;

        if (_qm->spatialWidthFact != 1 || _qm->spatialHeightFact != 1 ||
            _qm->temporalFact != 1)
        {

            WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceVideo, -1,
                         "Resolution reduction occurred"
                         "Content Metrics are: Motion = %d , Spatial = %d, "
                         "Rates are: Est. Trans. BR = %d, Avg.Target BR = %f",
                         _motion.level, _spatial.level,
                         estimatedTransRateDown, avgTargetRate);
        }

    }
    else
    {
      *qm = _qm;
      return VCM_OK;
    }
    // Done with checking for resolution reduction

    *qm = _qm;
    return VCM_OK;


}

WebRtc_Word32
VCMQmSelect::SelectSpatialDirectionMode(float transRate)
{
    // Default is 1x2 (H)

    // For bit rates well below transitional rate, we select 2x2
    if ( _targetBitRate < transRate * RATE_RED_SPATIAL_2X2 )
    {
        _qm->spatialWidthFact = 2;
        _qm->spatialHeightFact = 2;
        return VCM_OK;
    }

    // Otherwise check prediction errors, aspect ratio, horizontalness

    float spatialErr = _contentMetrics->spatialPredErr;
    float spatialErrH = _contentMetrics->spatialPredErrH;
    float spatialErrV = _contentMetrics->spatialPredErrV;

    // Favor 1x2 if aspect_ratio is 16:9
    if (_aspectRatio >= 16.0f / 9.0f )
    {
        //check if 1x2 has lowest prediction error
        if (spatialErrH < spatialErr && spatialErrH < spatialErrV)
        {
            return VCM_OK;
        }
    }

    // Check for 2x2 selection: favor 2x2 over 1x2 and 2x1
    if (spatialErr < spatialErrH * (1.0f + SPATIAL_ERR_2X2_VS_H) &&
        spatialErr < spatialErrV * (1.0f + SPATIAL_ERR_2X2_VS_V))
    {
        _qm->spatialWidthFact = 2;
        _qm->spatialHeightFact = 2;
         return VCM_OK;
    }

    // Check for 2x1 selection:
    if (spatialErrV < spatialErrH * (1.0f - SPATIAL_ERR_V_VS_H) &&
        spatialErrV < spatialErr * (1.0f - SPATIAL_ERR_2X2_VS_V))
    {
        _qm->spatialWidthFact = 1;
        _qm->spatialHeightFact = 2;
         return VCM_OK;
    }

    return VCM_OK;
}

void
VCMQmSelect::Coherence()
{
    float horizNZ  = _contentMetrics->motionHorizontalness;
    float distortionNZ  = _contentMetrics->motionClusterDistortion;

    // Coherence measure: combine horizontalness with cluster distortion
    _coherence.value = COH_MAX;
    if (distortionNZ > 0.)
    {
        _coherence.value = horizNZ / distortionNZ;
    }
    _coherence.value = VCM_MIN(COH_MAX, _coherence.value);

    if (_coherence.value < COHERENCE_THR)
    {
        _coherence.level = kLow;
    }
    else
    {
        _coherence.level = kHigh;
    }

}

void
VCMQmSelect::MotionNFD()
{
    _motion.value = _contentMetrics->motionMagnitudeNZ;

    // Determine motion level
    if (_motion.value < LOW_MOTION_NFD)
    {
        _motion.level = kLow;
    }
    else if (_motion.value > HIGH_MOTION_NFD)
    {
        _motion.level  = kHigh;
    }
    else
    {
        _motion.level = kDefault;
    }

}

void
VCMQmSelect::Motion()
{

    float sizeZeroMotion = _contentMetrics->sizeZeroMotion;
    float motionMagNZ = _contentMetrics->motionMagnitudeNZ;

    // Take product of size and magnitude with equal weight
    _motion.value = (1.0f - sizeZeroMotion) * motionMagNZ;

    // Stabilize: motionMagNZ could be large when only a
    // few motion blocks are non-zero
    _stationaryMotion = false;
    if (sizeZeroMotion > HIGH_ZERO_MOTION_SIZE)
    {
        _motion.value = 0.0f;
        _stationaryMotion = true;
    }
    // Determine motion level
    if (_motion.value < LOW_MOTION)
    {
        _motion.level = kLow;
    }
    else if (_motion.value > HIGH_MOTION)
    {
        _motion.level  = kHigh;
    }
    else
    {
        _motion.level = kDefault;
    }
}


void
VCMQmSelect::Spatial()
{
    float spatialErr =  _contentMetrics->spatialPredErr;
    float spatialErrH = _contentMetrics->spatialPredErrH;
    float spatialErrV = _contentMetrics->spatialPredErrV;
    // Spatial measure: take average of 3 prediction errors
    _spatial.value = (spatialErr + spatialErrH + spatialErrV) / 3.0f;

    float scale = 1.0f;
    // Reduce thresholds for HD scenes
    if (_imageType > 3)
    {
        scale = (float)SCALE_TEXTURE_HD;
    }

    if (_spatial.value > scale * HIGH_TEXTURE)
    {
        _spatial.level = kHigh;
    }
    else if (_spatial.value < scale * LOW_TEXTURE)
    {
        _spatial.level = kLow;
    }
    else
    {
         _spatial.level = kDefault;
    }
}


WebRtc_Word32
VCMQmSelect::SetMaxRateForQM(WebRtc_UWord32 width, WebRtc_UWord32 height)
{
    // Match image type
    WebRtc_UWord32 imageSize = width * height;

    if (imageSize < kFrameSizeTh[0])
    {
        _imageType  = 0;
    }
    else if (imageSize < kFrameSizeTh[1])
    {
        _imageType  = 1;
    }
    else if (imageSize < kFrameSizeTh[2])
    {
        _imageType  = 2;
    }
    else if (imageSize < kFrameSizeTh[3])
    {
        _imageType  = 3;
    }
    else if (imageSize < kFrameSizeTh[4])
    {
        _imageType  = 4;
    }
    else if (imageSize < kFrameSizeTh[5])
    {
        _imageType  = 5;
    }
    else
    {
        _imageType  = 6;
    }

    // Set max rate based on image size
    _maxRateQM = kMaxRateQm[_imageType];

    return VCM_OK;
}

} // end of namespace