Updates to qm_select: Function to update content state, and function for FEC rate adjustment.

Added packetLoss parameter to qm_select, and some code clean-up. Review URL: http://webrtc-codereview.appspot.com/44009 git-svn-id: http://webrtc.googlecode.com/svn/trunk@128 4adac7df-926f-26a2-2b94-8c16560cd09d
2011-06-28 00:02:51 +00:00
parent 6cc3f000fc
commit e02b57e397
4 changed files with 367 additions and 256 deletions
--- a/modules/video_coding/main/source/media_optimization.cc
+++ b/modules/video_coding/main/source/media_optimization.cc
@@ -197,7 +197,8 @@ VCMMediaOptimization::SetTargetRates(WebRtc_UWord32 bitRate,
    if (_enableQm)
    {
        //Update QM with rates
-        _qms->UpdateRates((float)_targetBitRate, _avgSentBitRateBps, _incomingFrameRate);
+        _qms->UpdateRates((float)_targetBitRate, _avgSentBitRateBps,
                          _incomingFrameRate, _fractionLost);
        //Check for QM selection
        bool selectQM = checkStatusForQMchange();
        if (selectQM)
@@ -537,9 +538,12 @@ VCMMediaOptimization::SelectQuality()
    // Reset quantities for QM select
    _qms->ResetQM();
    // Update QM will long-term averaged content metrics.
    _qms->UpdateContent(_content->LongTermAvgData());
    // Select quality mode
    VCMQualityMode* qm = NULL;
-    WebRtc_Word32 ret = _qms->SelectQuality(_content->LongTermAvgData(), &qm);
+    WebRtc_Word32 ret = _qms->SelectQuality(&qm);
    if (ret < 0)
    {
          return ret;
--- a/modules/video_coding/main/source/qm_select.cc
+++ b/modules/video_coding/main/source/qm_select.cc
@@ -23,12 +23,14 @@ namespace webrtc {
 VCMQmSelect::VCMQmSelect()
 {
    _qm = new VCMQualityMode();
    _contentMetrics = new VideoContentMetrics();
     Reset();
 }
 VCMQmSelect::~VCMQmSelect()
 {
    delete _qm;
    delete _contentMetrics;
 }
 void
@@ -50,10 +52,11 @@ void
 VCMQmSelect::ResetRates()
 {
    _sumEncodedBytes = 0;
-    _sumTargetRate = 0;
+    _sumTargetRate = 0.0f;
-    _sumIncomingFrameRate = 0;
+    _sumIncomingFrameRate = 0.0f;
-    _sumFrameRateMM = 0;
+    _sumFrameRateMM = 0.0f;
-    _sumSeqRateMM = 0;
+    _sumSeqRateMM = 0.0f;
    _sumPacketLoss = 0.0f;
    _frameCnt = 0;
    _frameCntDelta = 0;
    _lowBufferCnt = 0;
@@ -64,21 +67,25 @@ VCMQmSelect::ResetRates()
 void
 VCMQmSelect::Reset()
 {
-   _stateDecFactorSpatial = 1;
+    _stateDecFactorSpatial = 1;
-   _stateDecFactorTemp  = 1;
+    _stateDecFactorTemp  = 1;
-   _bufferLevel = 0;
+    _bufferLevel = 0.0f;
-   _targetBitRate = 0;
+    _targetBitRate = 0.0f;
-   _incomingFrameRate = 0;
+    _incomingFrameRate = 0.0f;
-   _userFrameRate = 0;
+    _userFrameRate = 0.0f;
-   _perFrameBandwidth =0;
+    _perFrameBandwidth =0.0f;
-    ResetQM();
+    _prevTotalRate = 0.0f;
-    ResetRates();
+    _prevRttTime = 0;
-    return;
+    _prevPacketLoss = 0;
     ResetQM();
     ResetRates();
     return;
 }
 //Initialize after reset of encoder
 WebRtc_Word32
-VCMQmSelect::Initialize(float bitRate, float userFrameRate, WebRtc_UWord32 width, WebRtc_UWord32 height)
+VCMQmSelect::Initialize(float bitRate, float userFrameRate,
                        WebRtc_UWord32 width, WebRtc_UWord32 height)
 {
    if (userFrameRate == 0.0f || width == 0 || height == 0)
    {
@@ -86,11 +93,15 @@ VCMQmSelect::Initialize(float bitRate, float userFrameRate, WebRtc_UWord32 width
    }
    _targetBitRate = bitRate;
    _userFrameRate = userFrameRate;
-    //Encoder width and height
+
    // Encoder width and height
    _width = width;
    _height = height;
-    //Initial buffer level
+
    // Initial buffer level
    _bufferLevel = INIT_BUFFER_LEVEL * _targetBitRate;
    // Per-frame bandwidth
    if ( _incomingFrameRate == 0 )
    {
        _perFrameBandwidth = _targetBitRate / _userFrameRate;
@@ -98,9 +109,11 @@ VCMQmSelect::Initialize(float bitRate, float userFrameRate, WebRtc_UWord32 width
    }
    else
    {
-    //Take average: this is due to delay in update of new frame rate in encoder:
+    // Take average: this is due to delay in update of new encoder frame rate:
-    //userFrameRate is the new one, incomingFrameRate is the old one (based on previous ~ 1sec)
+    // userFrameRate is the new one,
-        _perFrameBandwidth = 0.5 *( _targetBitRate / _userFrameRate + _targetBitRate / _incomingFrameRate );
+    // incomingFrameRate is the old one (based on previous ~ 1sec/RTCP report)
        _perFrameBandwidth = 0.5 *( _targetBitRate / _userFrameRate +
            _targetBitRate / _incomingFrameRate );
    }
    _init  = true;
@@ -112,7 +125,7 @@ WebRtc_Word32
 VCMQmSelect::SetPreferences(WebRtc_Word8 resolPref)
 {
    // Preference setting for temporal over spatial resolution
-    // 100 means temporal, 0 means spatial, 50 is neutral (we decide)
+    // 100 means temporal, 0 means spatial, 50 is neutral
    _userResolutionPref = resolPref;
    return VCM_OK;
@@ -120,69 +133,84 @@ VCMQmSelect::SetPreferences(WebRtc_Word8 resolPref)
 //Update after every encoded frame
 void
-VCMQmSelect::UpdateEncodedSize(WebRtc_Word64 encodedSize, FrameType encodedFrameType)
+VCMQmSelect::UpdateEncodedSize(WebRtc_Word64 encodedSize,
                               FrameType encodedFrameType)
 {
-    //Update encoded size;
+    // Update encoded size;
    _sumEncodedBytes += encodedSize;
    _frameCnt++;
-    //Convert to Kbps
+    // Convert to Kbps
    float encodedSizeKbits = (float)((encodedSize * 8.0) / 1000.0);
-    //Update the buffer level: per_frame_BW is updated when encoder is updated, every ~1sec
+    // Update the buffer level:
    // per_frame_BW is updated when encoder is updated, every RTCP reports
    _bufferLevel += _perFrameBandwidth - encodedSizeKbits;
    // Mismatch here is based on difference of actual encoded frame size and
    // per-frame bandwidth, for delta frames
    // This is a much stronger condition on rate mismatch than sumSeqRateMM
    // Note: not used in this version
    /*
    const bool deltaFrame = (encodedFrameType != kVideoFrameKey &&
                             encodedFrameType != kVideoFrameGolden);
-    //Sum the frame mismatch:
+    // Sum the frame mismatch:
    //Mismatch here is based on difference of actual encoded frame size and per-frame bandwidth, for delta frames
    //This is a much stronger condition on rate mismatch than sumSeqRateMM
    // Note: not used in this version
    /*
    if (deltaFrame)
    {
         _frameCntDelta++;
         if (encodedSizeKbits > 0)
-            _sumFrameRateMM += (float) (fabs(encodedSizeKbits - _perFrameBandwidth) / encodedSizeKbits);
+            _sumFrameRateMM +=
            (float) (fabs(encodedSizeKbits - _perFrameBandwidth) /
            encodedSizeKbits);
    }
    */
-    //Counter for occurrences of low buffer level
+    // Counter for occurrences of low buffer level
    if (_bufferLevel <= PERC_BUFFER_THR * INIT_BUFFER_LEVEL * _targetBitRate)
    {
        _lowBufferCnt++;
    }
 }
-//Update after SetTargetRates in MediaOpt (every ~1sec)
+//Update various quantities after SetTargetRates in MediaOpt
 void
-VCMQmSelect::UpdateRates(float targetBitRate, float avgSentBitRate, float incomingFrameRate)
+VCMQmSelect::UpdateRates(float targetBitRate, float avgSentBitRate,
                         float incomingFrameRate, WebRtc_UWord8 packetLoss)
 {
-    //Sum the target bitrate and incoming frame rate: these values are the encoder rates (from previous ~1sec),
+    // Sum the target bitrate and incoming frame rate:
-    //i.e, before the update for next ~1sec
+    // these values are the encoder rates (from previous update ~1sec),
    // i.e, before the update for next ~1sec
    _sumTargetRate += _targetBitRate;
    _sumIncomingFrameRate  += _incomingFrameRate;
    _updateRateCnt++;
-    //Convert to kbps
+    // Sum the received (from RTCP reports) packet loss rates
    _sumPacketLoss += (float) packetLoss / 255.0f;
    // Convert average sent bitrate to kbps
    float avgSentBitRatekbps = avgSentBitRate / 1000.0f;
-    //Sum the sequence rate mismatch:
+    // Sum the sequence rate mismatch:
-    //Mismatch here is based on difference between target rate the encoder used (in previous ~1sec) and the average actual
+    // Mismatch here is based on difference between target rate the encoder
-    //encoding rate at current time
+    // used (in previous ~1sec) and the average actual
-    if (fabs(_targetBitRate - avgSentBitRatekbps) <  THRESH_SUM_MM && _targetBitRate > 0.0 )
+    // encoding rate measured at current time
-        _sumSeqRateMM += (float) (fabs(_targetBitRate - avgSentBitRatekbps) / _targetBitRate );
+    if (fabs(_targetBitRate - avgSentBitRatekbps) <  THRESH_SUM_MM &&
        _targetBitRate > 0.0 )
    {
        _sumSeqRateMM += (float)
            (fabs(_targetBitRate - avgSentBitRatekbps) / _targetBitRate );
    }
-    //Update QM with the current new target and frame rate: these values are ones the encoder will use for the current/next ~1sec
+    // Update QM with the current new target and frame rate:
    // these values are ones the encoder will use for the current/next ~1sec
    _targetBitRate =  targetBitRate;
    _incomingFrameRate = incomingFrameRate;
-    //Update QM with an (average) encoder per_frame_bandwidth: this is the per_frame_bw for the next ~1sec
+    // Update QM with an (average) encoder per_frame_bandwidth:
    // this is the per_frame_bw for the current/next ~1sec
    _perFrameBandwidth  = 0.0f;
    if (_incomingFrameRate > 0.0f)
    {
@@ -191,119 +219,164 @@ VCMQmSelect::UpdateRates(float targetBitRate, float avgSentBitRate, float incomi
 }
 // Adjust the FEC rate based on the content and the network state
 // (packet loss rate, total rate/bandwidth, round trip time).
 // Note that packetLoss here is the filtered loss value.
 WebRtc_UWord8
 VCMQmSelect::AdjustFecFactor(WebRtc_UWord8 codeRateDelta, float totalRate,
                             float frameRate,WebRtc_UWord16 rttTime,
                             WebRtc_UWord8 packetLoss)
 {
    // Default: no adjustment
    WebRtc_UWord8 codeRateDeltaAdjust = codeRateDelta;
    float adjustFec =  1.0f;
    // TODO (marpan):
    // Set FEC adjustment factor
    codeRateDeltaAdjust = static_cast<WebRtc_UWord8>(codeRateDelta * adjustFec);
     // Keep track of previous values of network state:
     // adjustment may be also based on pattern of changes in network state
    _prevTotalRate = totalRate;
    _prevRttTime = rttTime;
    _prevPacketLoss = packetLoss;
    return codeRateDeltaAdjust;
 }
 void
 VCMQmSelect::UpdateContent(const VideoContentMetrics*  contentMetrics)
 {
     _contentMetrics = contentMetrics;
 }
 // Select the resolution factors: frame size and frame rate change: (QM modes)
 // Selection is for going back up in resolution, or going down in.
 WebRtc_Word32
-VCMQmSelect::SelectQuality(const VideoContentMetrics* contentMetrics, VCMQualityMode** qm)
+VCMQmSelect::SelectQuality(VCMQualityMode** qm)
 {
    if (!_init)
    {
        return VCM_UNINITIALIZED;
    }
-    if (contentMetrics == NULL)
+    if (_contentMetrics == NULL)
    {
        Reset(); //default values
        *qm =  _qm;
        return VCM_OK;
    }
-    //Default settings
+    // Default settings
    _qm->spatialWidthFact = 1;
    _qm->spatialHeightFact = 1;
    _qm->temporalFact = 1;
    _contentMetrics = contentMetrics;
-    //Update native values
+    // Update native values
    _nativeWidth = _contentMetrics->nativeWidth;
    _nativeHeight = _contentMetrics->nativeHeight;
    _nativeFrameRate = _contentMetrics->nativeFrameRate;
-    //Aspect ratio: used for selection of 1x2,2x1,2x2
+    // Aspect ratio: used for selection of 1x2,2x1,2x2
    _aspectRatio = (float)_width / (float)_height;
    float avgTargetRate = 0.0f;
    float avgIncomingFrameRate = 0.0f;
    float ratioBufferLow = 0.0f;
    float rateMisMatch = 0.0f;
    float avgPacketLoss = 0.0f;
    if (_frameCnt > 0)
    {
        ratioBufferLow = (float)_lowBufferCnt / (float)_frameCnt;
    }
    if (_updateRateCnt > 0)
    {
-        //use seq-rate mismatch for now
+        // Use seq-rate mismatch for now
        rateMisMatch = (float)_sumSeqRateMM / (float)_updateRateCnt;
        //rateMisMatch = (float)_sumFrameRateMM / (float)_frameCntDelta;
-        //average target and incoming frame rates
+        // Average target and incoming frame rates
        avgTargetRate = (float)_sumTargetRate / (float)_updateRateCnt;
-        avgIncomingFrameRate = (float)_sumIncomingFrameRate / (float)_updateRateCnt;
+        avgIncomingFrameRate = (float)_sumIncomingFrameRate /
            (float)_updateRateCnt;
        // Average received packet loss rate
        avgPacketLoss =  (float)_sumPacketLoss / (float)_updateRateCnt;
    }
-    //For qm selection below, may want to weight the average encoder rates with the current (for next ~1sec) rate values
+    // For QM selection below, may want to weight the average encoder rates
-    //uniform average for now:
+    // with the current (for next ~1sec) rate values.
    // Uniform average for now:
    float w1 = 0.5f;
    float w2 = 0.5f;
    avgTargetRate = w1 * avgTargetRate + w2 * _targetBitRate;
    avgIncomingFrameRate = w1 * avgIncomingFrameRate + w2 * _incomingFrameRate;
-    //Set the maximum transitional rate and image type: for up-sampled spatial dimensions
+    // Set the maximum transitional rate and image type:
-    //Needed to get the transRate for going back up in spatial resolution (only 2x2 allowed in this version)
+    // for up-sampled spatial dimensions.
    // This is needed to get the transRate for going back up in
    // spatial resolution (only 2x2 allowed in this version).
    SetMaxRateForQM(2 * _width, 2 * _height);
    WebRtc_UWord8  imageType2  = _imageType;
    WebRtc_UWord32 maxRateQM2 = _maxRateQM;
-    //Set the maximum transitional rate and image type: for the input/encoder spatial dimensions
+    // Set the maximum transitional rate and image type:
    // for the encoder spatial dimensions.
    SetMaxRateForQM(_width, _height);
-    //Compute metric features
+    // Compute class state of the content.
    MotionNFD();
    Spatial();
    //
-    //Get transitional rate from table, based on image type and content class
+    // Get transitional rate from table, based on image type and content class.
    //
-    //Get image size class: map _imageType to 2 classes
+    // Get image class and content class: for going down spatially
    WebRtc_UWord8 imageClass = 1;
    if (_imageType <= 3) imageClass = 0;
    WebRtc_UWord8 contentClass  = 3 * _motion.level + _spatial.level;
    WebRtc_UWord8 tableIndex = imageClass * 9 + contentClass;
    float scaleTransRate = kScaleTransRateQm[tableIndex];
-    // for transRate for going back up spatially
+    // Get image class and content class: for going up spatially
    WebRtc_UWord8 imageClass2 = 1;
    if (imageType2 <= 3) imageClass2 = 0;
    WebRtc_UWord8 tableIndex2 = imageClass2 * 9 + contentClass;
    float scaleTransRate2 = kScaleTransRateQm[tableIndex2];
    //
-    WebRtc_UWord32 estimatedTransRateDown = (WebRtc_UWord32) (_incomingFrameRate * scaleTransRate * _maxRateQM / 30);
+    // Transitonal rate for going down
-    WebRtc_UWord32 estimatedTransRateUpT =  (WebRtc_UWord32) (TRANS_RATE_SCALE_UP_TEMP * 2 * _incomingFrameRate * scaleTransRate * _maxRateQM / 30);
+    WebRtc_UWord32 estimatedTransRateDown = static_cast<WebRtc_UWord32>
-    WebRtc_UWord32 estimatedTransRateUpS =  (WebRtc_UWord32) (TRANS_RATE_SCALE_UP_SPATIAL * _incomingFrameRate * scaleTransRate2 * maxRateQM2 / 30);
+        (_incomingFrameRate * scaleTransRate * _maxRateQM / 30);
    // Transitional rate for going up temporally
    WebRtc_UWord32 estimatedTransRateUpT = static_cast<WebRtc_UWord32>
        (TRANS_RATE_SCALE_UP_TEMP * 2 * _incomingFrameRate *
         scaleTransRate * _maxRateQM / 30);
   // Transitional rate for going up spatially
    WebRtc_UWord32 estimatedTransRateUpS = static_cast<WebRtc_UWord32>
        (TRANS_RATE_SCALE_UP_SPATIAL * _incomingFrameRate *
        scaleTransRate2 * maxRateQM2 / 30);
    //
-    //done with transitional rate
+    // Done with transitional rates
    //
    WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceVideo, -1,
                   "Content Metrics: Motion = %d , Spatial = %d, Est. Trans. BR = %d",
                   _motion.level, _spatial.level, estimatedTransRateDown);
    //
    //CHECK FOR GOING BACK UP IN RESOLUTION
    //
    bool selectedUp = false;
-    //Check if native has been spatially down-sampled
+    // Check if native has been spatially down-sampled
    if (_stateDecFactorSpatial > 1)
    {
-        //check conditions on frame_skip and rate_mismatch
+        // Check conditions on buffer level and rate_mismatch
        if ( (avgTargetRate > estimatedTransRateUpS) &&
-             (ratioBufferLow < MAX_BUFFER_LOW) && (rateMisMatch < MAX_RATE_MM) )
+             (ratioBufferLow < MAX_BUFFER_LOW) && (rateMisMatch < MAX_RATE_MM))
        {
-            //width/height scaled back up: setting 0 indicates scaling back to native
+            // width/height scaled back up:
            // setting 0 indicates scaling back to native
            _qm->spatialHeightFact = 0;
            _qm->spatialWidthFact = 0;
            selectedUp = true;
@@ -313,25 +386,26 @@ VCMQmSelect::SelectQuality(const VideoContentMetrics* contentMetrics, VCMQuality
    if (_stateDecFactorTemp > 1)
    {
        if ( (avgTargetRate > estimatedTransRateUpT) &&
-             (ratioBufferLow < MAX_BUFFER_LOW) && (rateMisMatch < MAX_RATE_MM) )
+             (ratioBufferLow < MAX_BUFFER_LOW) && (rateMisMatch < MAX_RATE_MM))
        {
-            //temporal scale back up: setting 0 indicates scaling back to native
+            // temporal scale back up:
            // setting 0 indicates scaling back to native
            _qm->temporalFact = 0;
            selectedUp = true;
        }
    }
-    //leave QM if we selected to go back up in either spatial or temporal resolution
+    // Leave QM if we selected to go back up in either spatial or temporal
    if (selectedUp == true)
    {
-        //Update down-sampling state
+        // Update down-sampling state
-        //Note: only temp reduction by 2 is allowed
+        // Note: only temp reduction by 2 is allowed
        if (_qm->temporalFact == 0)
        {
            _stateDecFactorTemp = _stateDecFactorTemp / 2;
        }
-        //Update down-sampling state
+        // Update down-sampling state
-        //Note: only spatial reduction by 2x2 is allowed
+        // Note: only spatial reduction by 2x2 is allowed
        if (_qm->spatialHeightFact == 0 && _qm->spatialWidthFact == 0 )
        {
            _stateDecFactorSpatial = _stateDecFactorSpatial / 4;
@@ -341,73 +415,36 @@ VCMQmSelect::SelectQuality(const VideoContentMetrics* contentMetrics, VCMQuality
    }
    //
-    //done with checking for going back up
+    // Done with checking for going back up in resolution
    //
    //
    //CHECK FOR RESOLUTION REDUCTION
    //
-    //ST QM extraction if:
+    // Resolution reduction if:
-    // (1) target rate is lower than transitional rate (with safety margin), or
+    // (1) target rate is lower than transitional rate, or
-    // (2) frame skip is larger than threshold, or
+    // (2) buffer level is not stable, or
    // (3) rate mismatch is larger than threshold
-    if ( (avgTargetRate < estimatedTransRateDown ) || (ratioBufferLow > MAX_BUFFER_LOW)
+    // Bias down-sampling based on packet loss conditions
-         || (rateMisMatch > MAX_RATE_MM) )
+    if (avgPacketLoss > LOSS_THR)
    {
        estimatedTransRateDown = LOSS_RATE_FAC * estimatedTransRateDown;
    }
    if ((avgTargetRate < estimatedTransRateDown ) ||
        (ratioBufferLow > MAX_BUFFER_LOW)
        || (rateMisMatch > MAX_RATE_MM))
    {
        WebRtc_UWord8 spatialFact = 1;
        WebRtc_UWord8 tempFact = 1;
-        //Get the Action:
+        // Get the action
-        //Note: only consider spatial by 2x2 OR temporal reduction by 2 in this version
+        spatialFact = kSpatialAction[contentClass];
-        if (_motion.level == kLow && _spatial.level == kLow)
+        tempFact = kTemporalAction[contentClass];
-        {
+
            spatialFact = 1;
            tempFact = 1;
        }
        else if (_motion.level == kLow && _spatial.level == kHigh)
        {
            spatialFact = 1;
            tempFact = 2;
        }
        else if (_motion.level == kLow && _spatial.level == kDefault)
        {
            spatialFact = 1;
            tempFact = 2;
        }
        else if (_motion.level == kHigh && _spatial.level == kLow)
        {
            spatialFact = 4;
            tempFact = 1;
        }
        else if (_motion.level == kHigh && _spatial.level == kHigh)
        {
            spatialFact = 1;
            tempFact = 2;
        }
        else if (_motion.level == kHigh && _spatial.level == kDefault)
        {
            spatialFact = 4;
            tempFact = 1;
        }
        else if (_motion.level == kDefault && _spatial.level == kLow)
        {
            spatialFact = 4;
            tempFact = 1;
        }
        else if (_motion.level == kDefault && _spatial.level == kHigh)
        {
            spatialFact = 1;
            tempFact = 2;
        }
        else if (_motion.level == kDefault && _spatial.level == kDefault)
        {
            spatialFact = 1;
            tempFact = 1;
        }
        //
        switch(spatialFact)
        {
        case 4:
@@ -418,9 +455,9 @@ VCMQmSelect::SelectQuality(const VideoContentMetrics* contentMetrics, VCMQuality
             //default is 1x2 (H)
            _qm->spatialWidthFact = 2;
            _qm->spatialHeightFact = 1;
-            //Select 1x2,2x1, or back to 2x2: depends on prediction errors, aspect ratio, and horizontalness of motion
+            // Select 1x2,2x1, or back to 2x2
-            //Note: directional selection not used in this version
+            // Note: directional selection not used in this version
-            //SelectSpatialDirectionMode((float) estimatedTransRateDown);
+            // SelectSpatialDirectionMode((float) estimatedTransRateDown);
            break;
        default:
            _qm->spatialWidthFact = 1;
@@ -429,48 +466,62 @@ VCMQmSelect::SelectQuality(const VideoContentMetrics* contentMetrics, VCMQuality
        }
        _qm->temporalFact = tempFact;
-        //Sanity check on ST QM selection: override the settings for too small image size and frame rate
+        // Sanity check on ST QM selection:
-        //Also check limit the current down-sampling state
+        // override the settings for too small image size and frame rate
        // Also check the limit on current down-sampling state
-        //No spatial sampling if image size is too small (QCIF)
+        // No spatial sampling if image size is too small (QCIF)
-        if ( (_width * _height) <= MIN_IMAGE_SIZE  || _stateDecFactorSpatial >= MAX_SPATIAL_DOWN_FACT)
+        if ( (_width * _height) <= MIN_IMAGE_SIZE  ||
            _stateDecFactorSpatial >= MAX_SPATIAL_DOWN_FACT)
        {
            _qm->spatialWidthFact = 1;
            _qm->spatialHeightFact = 1;
        }
-        //No frame rate reduction below some point: use the (average) incoming frame rate
+        // No frame rate reduction below some point:
-        if ( avgIncomingFrameRate <= MIN_FRAME_RATE_QM  || _stateDecFactorTemp >= MAX_TEMP_DOWN_FACT)
+        // use the (average) incoming frame rate
        if ( avgIncomingFrameRate <= MIN_FRAME_RATE_QM  ||
            _stateDecFactorTemp >= MAX_TEMP_DOWN_FACT)
        {
            _qm->temporalFact = 1;
        }
-        //No down-sampling if current spatial-temporal downsampling state is above threshold
+        // No down-sampling if current downsampling state is above threshold
-        if (_stateDecFactorTemp * _stateDecFactorSpatial >= MAX_SPATIAL_TEMP_DOWN_FACT)
+        if (_stateDecFactorTemp * _stateDecFactorSpatial >=
            MAX_SPATIAL_TEMP_DOWN_FACT)
        {
            _qm->spatialWidthFact = 1;
            _qm->spatialHeightFact = 1;
            _qm->temporalFact = 1;
        }
        //
-        //done with sanity checks on ST QM selection
+        // Done with sanity checks on ST QM selection
        //
-        //Note: to disable spatial down-sampling
+        // Update down-sampling states
-        // _qm->spatialWidthFact = 1;
+        _stateDecFactorSpatial = _stateDecFactorSpatial * _qm->spatialWidthFact
-        // _qm->spatialHeightFact = 1;
+            * _qm->spatialHeightFact;
        //Update down-sampling states
        _stateDecFactorSpatial = _stateDecFactorSpatial * _qm->spatialWidthFact * _qm->spatialHeightFact;
        _stateDecFactorTemp = _stateDecFactorTemp * _qm->temporalFact;
        if (_qm->spatialWidthFact != 1 || _qm->spatialHeightFact != 1 ||
            _qm->temporalFact != 1)
        {
            WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceVideo, -1,
                         "Resolution reduction occurred"
                         "Content Metrics are: Motion = %d , Spatial = %d, "
                         "Rates are: Est. Trans. BR = %d, Avg.Target BR = %f",
                         _motion.level, _spatial.level,
                         estimatedTransRateDown, avgTargetRate);
        }
    }
    else
    {
      *qm = _qm;
      return VCM_OK;
    }
-    // done with checking for resolution reduction
+    // Done with checking for resolution reduction
    *qm = _qm;
    return VCM_OK;
@@ -481,9 +532,9 @@ VCMQmSelect::SelectQuality(const VideoContentMetrics* contentMetrics, VCMQuality
 WebRtc_Word32
 VCMQmSelect::SelectSpatialDirectionMode(float transRate)
 {
-    //Default is 1x2 (H)
+    // Default is 1x2 (H)
-    //For bit rates well below transitional rate, we select 2x2
+    // For bit rates well below transitional rate, we select 2x2
    if ( _targetBitRate < transRate * RATE_RED_SPATIAL_2X2 )
    {
        _qm->spatialWidthFact = 2;
@@ -491,13 +542,13 @@ VCMQmSelect::SelectSpatialDirectionMode(float transRate)
        return VCM_OK;
    }
-    //Otherwise check prediction errors, aspect ratio, horizonalness of motion
+    // Otherwise check prediction errors, aspect ratio, horizontalness
    float spatialErr = _contentMetrics->spatialPredErr;
    float spatialErrH = _contentMetrics->spatialPredErrH;
    float spatialErrV = _contentMetrics->spatialPredErrV;
-    //favor 1x2 if aspect_ratio is 16:9
+    // Favor 1x2 if aspect_ratio is 16:9
    if (_aspectRatio >= 16.0f / 9.0f )
    {
        //check if 1x2 has lowest prediction error
@@ -507,7 +558,7 @@ VCMQmSelect::SelectSpatialDirectionMode(float transRate)
        }
    }
-    //check for 2x2 selection: favor 2x2 over 1x2 and 2x1
+    // Check for 2x2 selection: favor 2x2 over 1x2 and 2x1
    if (spatialErr < spatialErrH * (1.0f + SPATIAL_ERR_2X2_VS_H) &&
        spatialErr < spatialErrV * (1.0f + SPATIAL_ERR_2X2_VS_V))
    {
@@ -516,7 +567,7 @@ VCMQmSelect::SelectSpatialDirectionMode(float transRate)
         return VCM_OK;
    }
-    //check for 2x1 selection:
+    // Check for 2x1 selection:
    if (spatialErrV < spatialErrH * (1.0f - SPATIAL_ERR_V_VS_H) &&
        spatialErrV < spatialErr * (1.0f - SPATIAL_ERR_2X2_VS_V))
    {
@@ -534,7 +585,7 @@ VCMQmSelect::Coherence()
    float horizNZ  = _contentMetrics->motionHorizontalness;
    float distortionNZ  = _contentMetrics->motionClusterDistortion;
-    //Coherence measure: combine horizontalness with cluster distortion
+    // Coherence measure: combine horizontalness with cluster distortion
    _coherence.value = COH_MAX;
    if (distortionNZ > 0.)
    {
@@ -558,7 +609,7 @@ VCMQmSelect::MotionNFD()
 {
    _motion.value = _contentMetrics->motionMagnitudeNZ;
-    // determine motion level
+    // Determine motion level
    if (_motion.value < LOW_MOTION_NFD)
    {
        _motion.level = kLow;
@@ -581,17 +632,18 @@ VCMQmSelect::Motion()
    float sizeZeroMotion = _contentMetrics->sizeZeroMotion;
    float motionMagNZ = _contentMetrics->motionMagnitudeNZ;
-    //take product of size and magnitude with equal weight for now
+    // Take product of size and magnitude with equal weight
    _motion.value = (1.0f - sizeZeroMotion) * motionMagNZ;
-    //stabilize: motionMagNZ could be large when only few motion blocks are non-zero
+    // Stabilize: motionMagNZ could be large when only a
    // few motion blocks are non-zero
    _stationaryMotion = false;
    if (sizeZeroMotion > HIGH_ZERO_MOTION_SIZE)
    {
        _motion.value = 0.0f;
        _stationaryMotion = true;
    }
-    // determine motion level
+    // Determine motion level
    if (_motion.value < LOW_MOTION)
    {
        _motion.level = kLow;
@@ -613,11 +665,11 @@ VCMQmSelect::Spatial()
    float spatialErr =  _contentMetrics->spatialPredErr;
    float spatialErrH = _contentMetrics->spatialPredErrH;
    float spatialErrV = _contentMetrics->spatialPredErrV;
-    //Spatial measure: take average of 3 prediction errors
+    // Spatial measure: take average of 3 prediction errors
    _spatial.value = (spatialErr + spatialErrH + spatialErrV) / 3.0f;
    float scale = 1.0f;
-    //Reduce thresholds for HD scenes
+    // Reduce thresholds for HD scenes
    if (_imageType > 3)
    {
        scale = (float)SCALE_TEXTURE_HD;
@@ -635,8 +687,6 @@ VCMQmSelect::Spatial()
    {
         _spatial.level = kDefault;
    }
 }
@@ -675,10 +725,10 @@ VCMQmSelect::SetMaxRateForQM(WebRtc_UWord32 width, WebRtc_UWord32 height)
        _imageType  = 6;
    }
-    // set max rate based on image size
+    // Set max rate based on image size
    _maxRateQM = kMaxRateQm[_imageType];
    return VCM_OK;
 }
-}
+} // end of namespace
--- a/modules/video_coding/main/source/qm_select.h
+++ b/modules/video_coding/main/source/qm_select.h
@@ -24,7 +24,8 @@ struct VideoContentMetrics;
 struct VCMQualityMode
 {
-    VCMQualityMode():spatialWidthFact(1), spatialHeightFact(1), temporalFact(1){}
+    VCMQualityMode():spatialWidthFact(1), spatialHeightFact(1),
        temporalFact(1){}
    void Reset()
    {
        spatialWidthFact = 1;
@@ -65,22 +66,33 @@ public:
    ~VCMQmSelect();
    // Initialize:
-    WebRtc_Word32 Initialize(float bitRate, float userFrameRate, WebRtc_UWord32 width, WebRtc_UWord32 height);
+    WebRtc_Word32 Initialize(float bitRate, float userFrameRate,
                             WebRtc_UWord32 width, WebRtc_UWord32 height);
    // Allow the user to set preferences: favor frame rate/resolution
    WebRtc_Word32 SetPreferences(WebRtc_Word8 resolPref);
-    // Extract ST QM behavior and make decision
+    // Extract ST (spatio-temporal) QM behavior and make decision
-    // Inputs: Content Metrics per frame (averaged over time)
+    // Inputs: qm: Reference to the quality modes pointer
-    //         qm: Reference to the quality modes pointer
+    WebRtc_Word32 SelectQuality(VCMQualityMode** qm);
    WebRtc_Word32 SelectQuality(const VideoContentMetrics* contentMetrics, VCMQualityMode** qm);
-    // Update QMselect with actual bit rate (size of the latest encoded frame) and frame type
+    // Update QM with actual bit rate
-    // -> update buffer level and frame-mismatch
+    // (size of the latest encoded frame) and frame type.
-    void UpdateEncodedSize(WebRtc_Word64 encodedSize, FrameType encodedFrameType);
+    void UpdateEncodedSize(WebRtc_Word64 encodedSize,
                           FrameType encodedFrameType);
    // Update QM with new bit/frame/loss rates from SetTargetRates
    void UpdateRates(float targetBitRate, float avgSentRate,
                     float incomingFrameRate, WebRtc_UWord8 packetLoss);
    // Update QM with the content metrics
    void UpdateContent(const VideoContentMetrics*  contentMetrics);
    // Adjust FEC rate based on content
    WebRtc_UWord8  AdjustFecFactor(WebRtc_UWord8 codeRateDelta, float totalRate,
                                   float frameRate, WebRtc_UWord16 rttTime,
                                   WebRtc_UWord8 packetLoss);
    // Update QM with new rates from SetTargetRates
    void UpdateRates(float targetBitRate, float avgSentRate, float incomingFrameRate);
    // Select 1x2,2x2,2x2 spatial sampling mode
    WebRtc_Word32 SelectSpatialDirectionMode(float transRate);
@@ -113,7 +125,7 @@ private:
    // Content Data
    const VideoContentMetrics*    _contentMetrics;
-    // Encoder stats/rate-control metrics
+    // Encoder rate control parameters, network parameters
    float                        _targetBitRate;
    float                        _userFrameRate;
    float                        _incomingFrameRate;
@@ -123,9 +135,13 @@ private:
    float                        _sumIncomingFrameRate;
    float                        _sumSeqRateMM;
    float                        _sumFrameRateMM;
    float                        _sumPacketLoss;
    float                        _prevTotalRate;
    WebRtc_UWord16               _prevRttTime;
    WebRtc_UWord8                _prevPacketLoss;
    WebRtc_Word64                _sumEncodedBytes;
-    //Encoder and native frame sizes
+    // Encoder and native frame sizes
    WebRtc_UWord32               _width;
    WebRtc_UWord32               _height;
    WebRtc_UWord32               _nativeWidth;
@@ -135,26 +151,26 @@ private:
    WebRtc_UWord32               _nativeFrameRate;
    WebRtc_UWord8                _stateDecFactorTemp;
-    //Counters
+    // Counters
    WebRtc_UWord32               _frameCnt;
    WebRtc_UWord32               _frameCntDelta;
    WebRtc_UWord32               _updateRateCnt;
    WebRtc_UWord32               _lowBufferCnt;
-    //Content L/M/H values
+    // Content L/M/H values
    VCMContFeature               _motion;
    VCMContFeature               _spatial;
    VCMContFeature               _coherence;
    bool                         _stationaryMotion;
-    //aspect ratio
+    // Aspect ratio
    float                        _aspectRatio;
-    //Max rate to saturate the transitionalRate
+    // Max rate to saturate the transitionalRate
    WebRtc_UWord32               _maxRateQM;
    WebRtc_UWord8                _imageType;
-    //User preference for resolution or qmax change
+    // User preference for resolution or qmax change
    WebRtc_UWord8                _userResolutionPref;
    bool                         _init;
    VCMQualityMode*              _qm;
--- a/modules/video_coding/main/source/qm_select_data.h
+++ b/modules/video_coding/main/source/qm_select_data.h
@@ -13,7 +13,7 @@
 /***************************************************************
 *QMSelectData.h
-* This file includes parameters used by the Quality Modes selection process
+* This file includes parameters for content-aware media optimization
 ****************************************************************/
 #include "typedefs.h"
@@ -21,38 +21,48 @@
 namespace webrtc
 {
-//Initial level of buffer in secs: should corresponds to wrapper settings
+//
 // PARAMETERS FOR RESOLUTION ADAPTATION
 //
 // Initial level of buffer in secs: should corresponds to wrapper settings
 #define INIT_BUFFER_LEVEL 0.5
-//
+// Threshold of (max) buffer size below which we consider too low (underflow)
 //PARAMETERS FOR QM SELECTION
 //
 //Threshold of (max) buffer size below which we consider too low (underflow)
 #define PERC_BUFFER_THR  0.10
-//Threshold on rate mismatch
+// Threshold on rate mismatch
 #define MAX_RATE_MM  0.5
-//Threshold on the occurrences of low buffer levels
+// Avoid outliers in seq-rate MM
 #define THRESH_SUM_MM 1000
 // Threshold on the occurrences of low buffer levels
 #define MAX_BUFFER_LOW 0.5
-//Factor for transitional rate for going back up in resolution
+// Factor for transitional rate for going back up in resolution
 #define TRANS_RATE_SCALE_UP_SPATIAL     1.25
 #define TRANS_RATE_SCALE_UP_TEMP        1.25
-//Maximum possible transitional rate: (units in kbps), for 30fps
+// Threshold on packet loss rate, above which favor resolution reduction
 #define LOSS_THR 0.1
 // Factor for reducing transitonal bitrate under packet loss
 #define LOSS_RATE_FAC 1.0
 // Maximum possible transitional rate for down-sampling:
 // (units in kbps), for 30fps
 const WebRtc_UWord16 kMaxRateQm[7] = {
    100,  //QCIF
-    500, //CIF
+    500,  //CIF
-    800, //VGA
+    800,  //VGA
    1500, //4CIF
    2000, //720 HD 4:3,
    2500, //720 HD 16:9
    3000  //1080HD
 };
-//Scale for transitional rate: based on content class
+// Scale for transitional rate: based on content class
 // motion=L/H/D,spatial==L/H/D: for low, high, middle levels
 const float kScaleTransRateQm[18] = {
    //4CIF and lower
@@ -63,7 +73,7 @@ const float kScaleTransRateQm[18] = {
    0.50f,       // H, H
    0.50f,       // H, D
    0.50f,       // D, L
-    0.625f,       // D, D
+    0.63f,       // D, D
    0.25f,       // D, H
    //over 4CIF: WHD, HD
@@ -74,62 +84,51 @@ const float kScaleTransRateQm[18] = {
    0.50f,       // H, H
    0.50f,       // H, D
    0.50f,       // D, L
-    0.625f,       // D, D
+    0.63f,       // D, D
    0.25f        // D, H
 };
-//Control the total amount of down-sampling allowed
+// Action for down-sampling:
 // motion=L/H/D,spatial==L/H/D: for low, high, middle levels
 const WebRtc_UWord8 kSpatialAction[9] = {
      1,       // L, L
      1,       // L, H
      1,       // L, D
      4,       // H ,L
      1,       // H, H
      4,       // H, D
      4,       // D, L
      1,       // D, D
      1,       // D, H
 };
 const WebRtc_UWord8 kTemporalAction[9] = {
      1,       // L, L
      2,       // L, H
      2,       // L, D
      1,       // H ,L
      2,       // H, H
      1,       // H, D
      1,       // D, L
      2,       // D, D
      1,       // D, H
 };
 // Control the total amount of down-sampling allowed
 #define MAX_SPATIAL_DOWN_FACT       4
 #define MAX_TEMP_DOWN_FACT          4
 #define MAX_SPATIAL_TEMP_DOWN_FACT  8
-//
+// Minimum image size for a spatial down-sampling:
-//
+// no spatial down-sampling if input size <= MIN_IMAGE_SIZE
 //
 //PARAMETETS FOR SETTING LOW/HIGH VALUES OF METRICS:
 //
 //Threshold to determine if high amount of zero_motion
 #define HIGH_ZERO_MOTION_SIZE 0.95
 //Thresholds for motion: motion level is derived from motion vectors: motion = size_nz*magn_nz
 #define HIGH_MOTION 0.7
 #define LOW_MOTION  0.4
 //Thresholds for motion: motion level is from NFD
 #define HIGH_MOTION_NFD 0.075
 #define LOW_MOTION_NFD  0.04
 //Thresholds for spatial prediction error: this is appLied on the min(2x2,1x2,2x1)
 #define HIGH_TEXTURE 0.035
 #define LOW_TEXTURE  0.025
 //Used to reduce thresholds for HD scenes: correction factor since higher
 //correlation in HD scenes means lower spatial prediction error
 #define SCALE_TEXTURE_HD 0.9;
 //Thresholds for distortion and horizontalness: applied on product: horiz_nz/dist_nz
 #define COHERENCE_THR   1.0
 #define COH_MAX 10
 //
 //
 #define RATE_RED_SPATIAL_2X2    0.6  //percentage reduction in transitional bitrate where 2x2 is selected over 1x2/2x1
 #define SPATIAL_ERR_2X2_VS_H    0.1  //percentage to favor 2x2
 #define SPATIAL_ERR_2X2_VS_V    0.1  //percentage to favor 2x2 over V
 #define SPATIAL_ERR_V_VS_H      0.1  //percentage to favor H over V
 //Minimum image size for a spatial mode selection: no spatial down-sampling if input size <= MIN_IMAGE_SIZE
 #define MIN_IMAGE_SIZE  25344 //176*144
-//Minimum frame rate for temporal mode: no frame rate reduction if incomingFrameRate <= MIN_FRAME_RATE
+// Minimum frame rate for temporal down-sampling:
 // no frame rate reduction if incomingFrameRate <= MIN_FRAME_RATE
 #define MIN_FRAME_RATE_QM  8
-//Avoid outliers in seq-rate MM
+// Boundaries for the closest standard frame size
 #define THRESH_SUM_MM 1000
 const WebRtc_UWord32 kFrameSizeTh[6] = {
    // boundaries for the closest standard frame size
    63360,    //between 176*144 and 352*288
    204288,   //between 352*288 and 640*480
    356352,   //between 640*480 and 704*576
@@ -139,6 +138,48 @@ const WebRtc_UWord32 kFrameSizeTh[6] = {
 };
 //
 // PARAMETERS FOR FEC ADJUSTMENT: TODO (marpan)
 //
 //
 // PARAMETETS FOR SETTING LOW/HIGH STATES OF CONTENT METRICS:
 //
 // Threshold to determine if high amount of zero_motion
 #define HIGH_ZERO_MOTION_SIZE 0.95
 // Thresholds for motion:
 // motion level is derived from motion vectors: motion = size_nz*magn_nz
 #define HIGH_MOTION 0.7
 #define LOW_MOTION  0.4
 // Thresholds for motion: motion level is from NFD
 #define HIGH_MOTION_NFD 0.075
 #define LOW_MOTION_NFD  0.04
 // Thresholds for spatial prediction error:
 // this is appLied on the min(2x2,1x2,2x1)
 #define HIGH_TEXTURE 0.035
 #define LOW_TEXTURE  0.025
 // Used to reduce thresholds for HD scenes: correction factor since higher
 // correlation in HD scenes means lower spatial prediction error
 #define SCALE_TEXTURE_HD 0.9;
 // Thresholds for distortion and horizontalness:
 // applied on product: horiz_nz/dist_nz
 #define COHERENCE_THR   1.0
 #define COH_MAX 10
 // percentage reduction in transitional bitrate for 2x2 selected over 1x2/2x1
 #define RATE_RED_SPATIAL_2X2    0.6
 #define SPATIAL_ERR_2X2_VS_H    0.1  //percentage to favor 2x2
 #define SPATIAL_ERR_2X2_VS_V    0.1  //percentage to favor 2x2 over V
 #define SPATIAL_ERR_V_VS_H      0.1  //percentage to favor H over V
 } // namespace webrtc
 #endif // WEBRTC_MODULES_VIDEO_CODING_SOURCE_QM_SELECT_DATA_H_