Updates for resolution adaptation.

1) added support for two additional modes: 
    -3/4 spatial down-sampling
    -2/3 frame rate reduction
2) updated unittest and added a few more tests
3) some code refactoring
Review URL: https://webrtc-codereview.appspot.com/429005

git-svn-id: http://webrtc.googlecode.com/svn/trunk@1854 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
marpan@webrtc.org 2012-03-07 17:16:10 +00:00
parent 9a065d1eae
commit accf607b3e
5 changed files with 1229 additions and 649 deletions

View File

@ -180,7 +180,7 @@ VCMMediaOptimization::SetTargetRates(WebRtc_UWord32 bitRate,
// Update encoding rates following protection settings
_frameDropper->SetRates(static_cast<float>(_targetBitRate), 0);
if (_enableQm && _numLayers == 1)
if (_enableQm)
{
// Update QM with rates
_qmResolution->UpdateRates((float)_targetBitRate, sent_video_rate,
@ -291,7 +291,7 @@ VCMMediaOptimization::SetEncodingData(VideoCodecType sendCodecType,
_numLayers = (numLayers <= 1) ? 1 : numLayers; // Can also be zero.
WebRtc_Word32 ret = VCM_OK;
ret = _qmResolution->Initialize((float)_targetBitRate, _userFrameRate,
_codecWidth, _codecHeight);
_codecWidth, _codecHeight, _numLayers);
return ret;
}
@ -575,68 +575,44 @@ VCMMediaOptimization::checkStatusForQMchange()
}
bool
VCMMediaOptimization::QMUpdate(VCMResolutionScale* qm)
{
// Check for no change
if (qm->spatialHeightFact == 1 &&
qm->spatialWidthFact == 1 &&
qm->temporalFact == 1) {
return false;
}
bool VCMMediaOptimization::QMUpdate(VCMResolutionScale* qm) {
// Check for no change
if (!qm->change_resolution) {
return false;
}
// Temporal
WebRtc_UWord32 frameRate = static_cast<WebRtc_UWord32>
(_incomingFrameRate + 0.5f);
// Check for change in frame rate.
if (qm->temporal_fact != 1.0f) {
_incomingFrameRate = _incomingFrameRate / qm->temporal_fact + 0.5f;
memset(_incomingFrameTimes, -1, sizeof(_incomingFrameTimes));
}
// Check if go back up in temporal resolution
if (qm->temporalFact == 0) {
// Currently only allow for 1/2 frame rate reduction per action.
// TODO (marpan): allow for 2/3 reduction.
frameRate = (WebRtc_UWord32) 2 * _incomingFrameRate;
}
// go down in temporal resolution
else {
frameRate = (WebRtc_UWord32)(_incomingFrameRate / qm->temporalFact + 1);
}
// Reset _incomingFrameRate if temporal action was selected.
if (qm->temporalFact != 1) {
memset(_incomingFrameTimes, -1, sizeof(_incomingFrameTimes));
_incomingFrameRate = frameRate;
}
// Spatial
WebRtc_UWord32 height = _codecHeight;
WebRtc_UWord32 width = _codecWidth;
// Check if go back up in spatial resolution, and update frame sizes.
// Currently only allow for 2x2 spatial down-sampling.
// TODO (marpan): allow for 1x2, 2x1, and 4/3x4/3 (or 3/2x3/2).
if (qm->spatialHeightFact == 0 && qm->spatialWidthFact == 0) {
width = _codecWidth * 2;
height = _codecHeight * 2;
} else {
width = _codecWidth / qm->spatialWidthFact;
height = _codecHeight / qm->spatialHeightFact;
}
_codecWidth = width;
_codecHeight = height;
// New frame sizes should never exceed the original sizes
// from SetEncodingData().
// Check for change in frame size.
if (qm->spatial_height_fact != 1.0 || qm->spatial_width_fact != 1.0) {
_codecWidth = static_cast<uint16_t>(_codecWidth /
qm->spatial_width_fact);
_codecHeight = static_cast<uint16_t>(_codecHeight /
qm->spatial_height_fact);
// New frame sizes should not exceed original size from SetEncodingData().
assert(_codecWidth <= _initCodecWidth);
assert(_codecHeight <= _initCodecHeight);
// Check that new frame sizes are multiples of two.
assert(_codecWidth % 2 == 0);
assert(_codecHeight % 2 == 0);
}
WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceVideoCoding, _id,
WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceVideoCoding, _id,
"Quality Mode Update: W = %d, H = %d, FR = %f",
width, height, frameRate);
_codecWidth, _codecHeight, _incomingFrameRate);
// Update VPM with new target frame rate and size
_videoQMSettingsCallback->SetVideoQMSettings(frameRate, width, height);
// Update VPM with new target frame rate and size
_videoQMSettingsCallback->SetVideoQMSettings(_incomingFrameRate,
_codecWidth,
_codecHeight);
_content->UpdateFrameRate(frameRate);
_qmResolution->UpdateCodecFrameSize(width, height);
return true;
_content->UpdateFrameRate(_incomingFrameRate);
_qmResolution->UpdateCodecFrameSize(_codecWidth, _codecHeight);
return true;
}
void

File diff suppressed because it is too large Load Diff

View File

@ -23,15 +23,33 @@ struct VideoContentMetrics;
struct VCMResolutionScale {
VCMResolutionScale()
: spatialWidthFact(1),
spatialHeightFact(1),
temporalFact(1) {
: spatial_width_fact(1.0f),
spatial_height_fact(1.0f),
temporal_fact(1.0f),
change_resolution(false) {
}
uint8_t spatialWidthFact;
uint8_t spatialHeightFact;
uint8_t temporalFact;
float spatial_width_fact;
float spatial_height_fact;
float temporal_fact;
bool change_resolution;
};
enum ImageType {
kQCIF = 0, // 176x144
kHCIF, // 264x216 = half(~3/4x3/4) CIF.
kQVGA, // 320x240 = quarter VGA.
kCIF, // 352x288
kHVGA, // 480x360 = half(~3/4x3/4) VGA.
kVGA, // 640x480
kQFULLHD, // 960x540 = quarter FULLHD, and half(~3/4x3/4) WHD.
kWHD, // 1280x720
kFULLHD, // 1920x1080
kNumImageTypes
};
const uint32_t kSizeOfImageType[kNumImageTypes] =
{ 25344, 57024, 76800, 101376, 172800, 307200, 518400, 921600, 2073600 };
enum LevelClass {
kLow,
kHigh,
@ -51,12 +69,44 @@ struct VCMContFeature {
LevelClass level;
};
enum ResolutionAction {
kDownResolution,
enum UpDownAction {
kUpResolution,
kNoChangeResolution
kDownResolution
};
enum SpatialAction {
kNoChangeSpatial,
kOneHalfSpatialUniform, // 3/4 x 3/4: 9/6 ~1/2 pixel reduction.
kOneQuarterSpatialUniform, // 1/2 x 1/2: 1/4 pixel reduction.
kNumModesSpatial
};
enum TemporalAction {
kNoChangeTemporal,
kTwoThirdsTemporal, // 2/3 frame rate reduction
kOneHalfTemporal, // 1/2 frame rate reduction
kNumModesTemporal
};
struct ResolutionAction {
ResolutionAction()
: spatial(kNoChangeSpatial),
temporal(kNoChangeTemporal) {
}
SpatialAction spatial;
TemporalAction temporal;
};
// Down-sampling factors for spatial (width and height), and temporal.
const float kFactorWidthSpatial[kNumModesSpatial] =
{ 1.0f, 4.0f / 3.0f, 2.0f };
const float kFactorHeightSpatial[kNumModesSpatial] =
{ 1.0f, 4.0f / 3.0f, 2.0f };
const float kFactorTemporal[kNumModesTemporal] =
{ 1.0f, 1.5f, 2.0f };
enum EncoderState {
kStableEncoding, // Low rate mis-match, stable buffer levels.
kStressedEncoding, // Significant over-shooting of target rate,
@ -79,7 +129,7 @@ class VCMQmMethod {
uint8_t ComputeContentClass();
// Update with the content metrics.
void UpdateContent(const VideoContentMetrics* contentMetrics);
void UpdateContent(const VideoContentMetrics* content_metrics);
// Compute spatial texture magnitude and level.
// Spatial texture is a spatial prediction error measure.
@ -90,29 +140,32 @@ class VCMQmMethod {
void ComputeMotionNFD();
// Get the imageType (CIF, VGA, HD, etc) for the system width/height.
uint8_t GetImageType(uint16_t width, uint16_t height);
ImageType GetImageType(uint16_t width, uint16_t height);
// Return the closest image type.
ImageType FindClosestImageType(uint16_t width, uint16_t height);
// Get the frame rate level.
LevelClass FrameRateLevel(float frame_rate);
protected:
// Content Data.
const VideoContentMetrics* _contentMetrics;
const VideoContentMetrics* content_metrics_;
// Encoder frame sizes and native frame sizes.
uint16_t _width;
uint16_t _height;
uint16_t _nativeWidth;
uint16_t _nativeHeight;
float _aspectRatio;
uint16_t width_;
uint16_t height_;
uint16_t native_width_;
uint16_t native_height_;
float aspect_ratio_;
// Image type and frame rate leve, for the current encoder resolution.
uint8_t _imageType;
LevelClass _frameRateLevel;
ImageType image_type_;
LevelClass framerate_level_;
// Content class data.
VCMContFeature _motion;
VCMContFeature _spatial;
uint8_t _contentClass;
bool _init;
VCMContFeature motion_;
VCMContFeature spatial_;
uint8_t content_class_;
bool init_;
};
// Resolution settings class
@ -135,27 +188,35 @@ class VCMQmResolution : public VCMQmMethod {
EncoderState GetEncoderState();
// Initialize after SetEncodingData in media_opt.
int Initialize(float bitRate, float userFrameRate,
uint16_t width, uint16_t height);
int Initialize(float bitrate,
float user_framerate,
uint16_t width,
uint16_t height,
int num_layers);
// Update the encoder frame size.
void UpdateCodecFrameSize(uint16_t width, uint16_t height);
// Update with actual bit rate (size of the latest encoded frame)
// and frame type, after every encoded frame.
void UpdateEncodedSize(int encodedSize,
FrameType encodedFrameType);
void UpdateEncodedSize(int encoded_size,
FrameType encoded_frame_type);
// Update with new target bitrate, actual encoder sent rate, frame_rate,
// loss rate: every ~1 sec from SetTargetRates in media_opt.
void UpdateRates(float targetBitRate, float encoderSentRate,
float incomingFrameRate, uint8_t packetLoss);
void UpdateRates(float target_bitrate,
float encoder_sent_rate,
float incoming_framerate,
uint8_t packet_loss);
// Extract ST (spatio-temporal) resolution action.
// Inputs: qm: Reference to the quality modes pointer.
// Output: the spatial and/or temporal scale change.
int SelectResolution(VCMResolutionScale** qm);
// Set the default resolution action.
void SetDefaultAction();
// Compute rates for the selection of down-sampling action.
void ComputeRatesForSelection();
@ -171,57 +232,89 @@ class VCMQmResolution : public VCMQmMethod {
// Check the condition for going up in resolution by the scale factors:
// |facWidth|, |facHeight|, |facTemp|.
// |scaleFac| is a scale factor for the transition rate.
bool ConditionForGoingUp(uint8_t facWidth, uint8_t facHeight,
uint8_t facTemp,
float scaleFac);
bool ConditionForGoingUp(float fac_width,
float fac_height,
float fac_temp,
float scale_fac);
// Get the bitrate threshold for the resolution action.
// The case |facWidth|=|facHeight|=|facTemp|==1 is for down-sampling action.
// |scaleFac| is a scale factor for the transition rate.
float GetTransitionRate(uint8_t facWidth, uint8_t facHeight,
uint8_t facTemp, float scaleFac);
float GetTransitionRate(float fac_width,
float fac_height,
float fac_temp,
float scale_fac);
// Update the downsampling state.
void UpdateDownsamplingState(ResolutionAction action);
// Update the down-sampling state.
void UpdateDownsamplingState(UpDownAction up_down);
// Return a state based on average target rate relative transition rate.
uint8_t RateClass(float transition_rate);
// Adjust the action selected from the table.
void AdjustAction();
// Check if the new frame sizes are still divisible by 2.
void CheckForEvenFrameSize();
// Insert latest down-sampling action into the history list.
void InsertLatestDownAction();
// Remove the last (first element) down-sampling action from the list.
void RemoveLastDownAction();
// Check constraints on the amount of down-sampling allowed.
void ConstrainAmountOfDownSampling();
// For going up in resolution: pick spatial or temporal action,
// if both actions were separately selected.
void PickSpatialOrTemporal();
// Select the directional (1x2 or 2x1) spatial down-sampling action.
void SelectSpatialDirectionMode(float transRate);
void SelectSpatialDirectionMode(float transition_rate);
private:
VCMResolutionScale* _qm;
enum { kDownActionHistorySize = 10};
VCMResolutionScale* qm_;
// Encoder rate control parameters.
float _targetBitRate;
float _userFrameRate;
float _incomingFrameRate;
float _perFrameBandwidth;
float _bufferLevel;
float target_bitrate_;
float user_framerate_;
float incoming_framerate_;
float per_frame_bandwidth_;
float buffer_level_;
// Data accumulated every ~1sec from MediaOpt.
float _sumTargetRate;
float _sumIncomingFrameRate;
float _sumRateMM;
float _sumRateMMSgn;
float _sumPacketLoss;
float sum_target_rate_;
float sum_incoming_framerate_;
float sum_rate_MM_;
float sum_rate_MM_sgn_;
float sum_packet_loss_;
// Counters.
uint32_t _frameCnt;
uint32_t _frameCntDelta;
uint32_t _updateRateCnt;
uint32_t _lowBufferCnt;
uint32_t frame_cnt_;
uint32_t frame_cnt_delta_;
uint32_t update_rate_cnt_;
uint32_t low_buffer_cnt_;
// Resolution state parameters.
uint8_t _stateDecFactorSpatial;
uint8_t _stateDecFactorTemp;
float state_dec_factor_spatial_;
float state_dec_factor_temporal_;
// Quantities used for selection.
float _avgTargetRate;
float _avgIncomingFrameRate;
float _avgRatioBufferLow;
float _avgRateMisMatch;
float _avgRateMisMatchSgn;
float _avgPacketLoss;
EncoderState _encoderState;
float avg_target_rate_;
float avg_incoming_framerate_;
float avg_ratio_buffer_low_;
float avg_rate_mismatch_;
float avg_rate_mismatch_sgn_;
float avg_packet_loss_;
EncoderState encoder_state_;
ResolutionAction action_;
// Short history of the down-sampling actions from the Initialize() state.
// This is needed for going up in resolution. Since the total amount of
// down-sampling actions are constrained, the length of the list need not be
// large: i.e., (4/3) ^{kDownActionHistorySize} <= kMaxDownSample.
ResolutionAction down_action_history_[kDownActionHistorySize];
int num_layers_;
};
// Robustness settings class.
@ -235,24 +328,24 @@ class VCMQmRobustness : public VCMQmMethod {
// Adjust FEC rate based on content: every ~1 sec from SetTargetRates.
// Returns an adjustment factor.
float AdjustFecFactor(uint8_t codeRateDelta,
float totalRate,
float frameRate,
uint32_t rttTime,
uint8_t packetLoss);
float AdjustFecFactor(uint8_t code_rate_delta,
float total_rate,
float framerate,
uint32_t rtt_time,
uint8_t packet_loss);
// Set the UEP protection on/off.
bool SetUepProtection(uint8_t codeRateDelta,
float totalRate,
uint8_t packetLoss,
bool frameType);
bool SetUepProtection(uint8_t code_rate_delta,
float total_rate,
uint8_t packet_loss,
bool frame_type);
private:
// Previous state of network parameters.
float _prevTotalRate;
uint32_t _prevRttTime;
uint8_t _prevPacketLoss;
uint8_t _prevCodeRateDelta;
float prev_total_rate_;
uint32_t prev_rtt_time_;
uint8_t prev_packet_loss_;
uint8_t prev_code_rate_delta_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_

View File

@ -23,11 +23,8 @@ namespace webrtc {
// PARAMETERS FOR RESOLUTION ADAPTATION
//
// Initial level of buffer in secs: should corresponds to wrapper settings.
const float kInitBufferLevel = 0.5f;
// Optimal level of buffer in secs: should corresponds to wrapper settings.
const float kOptBufferLevel = 0.6f;
const float kOptBufferLevel = 0.5f;
// Threshold of (max) buffer size below which we consider too low (underflow).
const float kPercBufferThr = 0.10f;
@ -42,6 +39,9 @@ const float kMaxRateMisMatch = 0.5f;
const float kRateOverShoot = 0.75f;
const float kRateUnderShoot = 0.75f;
// Factor to favor weighting the average rates with the current/last data.
const float kWeightRate = 0.70f;
// Factor for transitional rate for going back up in resolution.
const float kTransRateScaleUpSpatial = 1.25f;
const float kTransRateScaleUpTemp = 1.25f;
@ -53,16 +53,19 @@ const float kPacketLossThr = 0.1f;
// Factor for reducing transitonal bitrate under packet loss.
const float kPacketLossRateFac = 1.0f;
// Maximum possible transitional rate for down-sampling:
// (units in kbps), for 30fps.
const uint16_t kMaxRateQm[7] = {
100, // QCIF
const uint16_t kMaxRateQm[9] = {
50, // QCIF
100, // kHCIF
175, // kQVGA
250, // CIF
350, // HVGA
500, // VGA
800, // 4CIF
1000, // 720 HD 4:3,
1500, // 720 HD 16:9
2000 // 1080HD
1000, // QFULLHD
1500, // WHD
2000 // FULLHD
};
// Frame rate scale for maximum transition rate.
@ -75,32 +78,37 @@ const float kFrameRateFac[3] = {
// Scale for transitional rate: based on content class
// motion=L/H/D,spatial==L/H/D: for low, high, middle levels
const float kScaleTransRateQm[18] = {
// 4CIF and lower
// VGA and lower
0.50f, // L, L
0.50f, // L, H
0.50f, // L, D
0.50f, // H ,L
0.25f, // H, H
0.25f, // H, D
0.35f, // H, H
0.35f, // H, D
0.50f, // D, L
0.50f, // D, D
0.25f, // D, H
0.35f, // D, H
// over 4CIF: WHD, HD
// over VGA
0.50f, // L, L
0.50f, // L, H
0.50f, // L, D
0.50f, // H ,L
0.25f, // H, H
0.25f, // H, D
0.35f, // H, H
0.35f, // H, D
0.50f, // D, L
0.50f, // D, D
0.25f, // D, H
0.35f, // D, H
};
// Threshold on the target rate relative to transitional rate.
const float kFacLowRate = 0.75f;
// Action for down-sampling:
// motion=L/H/D,spatial==L/H/D: for low, high, middle levels
const uint8_t kSpatialAction[9] = {
// motion=L/H/D,spatial==L/H/D, for low, high, middle levels;
// rate = 0/1/2, for target rate state relative to transition rate.
const uint8_t kSpatialAction[27] = {
// rateClass = 0:
1, // L, L
1, // L, H
1, // L, D
@ -109,25 +117,70 @@ const uint8_t kSpatialAction[9] = {
4, // H, D
4, // D, L
1, // D, H
1, // D, D
2, // D, D
// rateClass = 1:
1, // L, L
1, // L, H
1, // L, D
4, // H ,L
1, // H, H
2, // H, D
2, // D, L
1, // D, H
2, // D, D
// rateClass = 2:
1, // L, L
1, // L, H
1, // L, D
2, // H ,L
1, // H, H
2, // H, D
2, // D, L
1, // D, H
2, // D, D
};
const uint8_t kTemporalAction[9] = {
1, // L, L
const uint8_t kTemporalAction[27] = {
// rateClass = 0:
3, // L, L
2, // L, H
2, // L, D
1, // H ,L
2, // H, H
3, // H, H
1, // H, D
1, // D, L
2, // D, H
1, // D, D
// rateClass = 1:
3, // L, L
2, // L, H
3, // L, D
1, // H ,L
3, // H, H
1, // H, D
1, // D, L
3, // D, H
1, // D, D
// rateClass = 2:
1, // L, L
3, // L, H
3, // L, D
1, // H ,L
3, // H, H
1, // H, D
1, // D, L
3, // D, H
1, // D, D
};
// Control the total amount of down-sampling allowed.
const int kMaxSpatialDown = 16;
const int kMaxTempDown = 4;
const int kMaxDownSample = 16;
const float kMaxSpatialDown = 8.0f;
const float kMaxTempDown = 4.0f;
const float kMaxDownSample = 16.0f;
// Minimum image size for a spatial down-sampling.
const int kMinImageSize= 176 * 144;
@ -136,16 +189,6 @@ const int kMinImageSize= 176 * 144;
// no frame rate reduction if incomingFrameRate <= MIN_FRAME_RATE
const int kMinFrameRate = 8;
// Boundaries for the closest standard frame size
const uint32_t kFrameSizeTh[6] = {
63360, // between 176*144 and 352*288
204288, // between 352*288 and 640*480
356352, // between 640*480 and 704*576
548352, // between 704*576 and 960*720
806400, // between 960*720 and 1280*720
1497600, // between 1280*720 and 1920*1080
};
//
// PARAMETERS FOR FEC ADJUSTMENT: TODO (marpan)
//

File diff suppressed because it is too large Load Diff