From 65732c36a8c8f60907b62d24c097782a0a0d7e1c Mon Sep 17 00:00:00 2001 From: paulwilkins Date: Thu, 5 May 2016 11:37:04 +0100 Subject: [PATCH] Fixed 8K two pass encoder crash. Bug found by Yunqing relating to the correction for size at 8K and above in get_twopass_worst_quality(). The basis for the correction was changed to the linear size relative to 1080P as a baseline and the adjustment has been clamped to prevent problems at extreme images sizes. For 1080P the results on our test sets were neutral but the low res and mid res sets saw a small gain (0.1%-0.2% average). I would also expect some gains on 4k and larger content where the previous correction was overly aggressive. Change-Id: I30b026b5f4535e9601e3178d738066459d19c8fb --- vp10/encoder/firstpass.c | 28 ++++++++++++++++++++-------- vp9/encoder/vp9_firstpass.c | 27 +++++++++++++++++++-------- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/vp10/encoder/firstpass.c b/vp10/encoder/firstpass.c index bc1ce001b..7c5d3c070 100644 --- a/vp10/encoder/firstpass.c +++ b/vp10/encoder/firstpass.c @@ -45,7 +45,6 @@ #define BOOST_BREAKOUT 12.5 #define BOOST_FACTOR 12.5 -#define ERR_DIVISOR 128.0 #define FACTOR_PT_LOW 0.70 #define FACTOR_PT_HIGH 0.90 #define FIRST_PASS_Q 10.0 @@ -231,6 +230,13 @@ static void subtract_stats(FIRSTPASS_STATS *section, section->duration -= frame->duration; } +// Calculate the linear size relative to a baseline of 1080P +#define BASE_SIZE 2073600.0 // 1920x1080 +static double get_linear_size_factor(const VP10_COMP *cpi) { + const double this_area = cpi->initial_width * cpi->initial_height; + return pow(this_area / BASE_SIZE, 0.5); +} + // Calculate an active area of the image that discounts formatting // bars and partially discounts other 0 energy areas. #define MIN_ACTIVE_AREA 0.5 @@ -1103,11 +1109,7 @@ static double calc_correction_factor(double err_per_mb, return fclamp(pow(error_term, power_term), 0.05, 5.0); } -// Larger image formats are expected to be a little harder to code relatively -// given the same prediction error score. This in part at least relates to the -// increased size and hence coding cost of motion vectors. -#define EDIV_SIZE_FACTOR 800 - +#define ERR_DIVISOR 100.0 static int get_twopass_worst_quality(const VP10_COMP *cpi, const double section_err, double inactive_zone, @@ -1126,12 +1128,22 @@ static int get_twopass_worst_quality(const VP10_COMP *cpi, const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone)); const double av_err_per_mb = section_err / active_mbs; const double speed_term = 1.0 + 0.04 * oxcf->speed; - const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR; + double ediv_size_correction; const int target_norm_bits_per_mb = ((uint64_t)section_target_bandwidth << BPER_MB_NORMBITS) / active_mbs; - int q; + // Larger image formats are expected to be a little harder to code + // relatively given the same prediction error score. This in part at + // least relates to the increased size and hence coding overheads of + // motion vectors. Some account of this is made through adjustment of + // the error divisor. + ediv_size_correction = + VPXMAX(0.2, VPXMIN(5.0, get_linear_size_factor(cpi))); + if (ediv_size_correction < 1.0) + ediv_size_correction = -(1.0 / ediv_size_correction); + ediv_size_correction *= 4.0; + // Try and pick a max Q that will be high enough to encode the // content at the given rate. for (q = rc->best_quality; q < rc->worst_quality; ++q) { diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index e25b64202..29491c56d 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -45,7 +45,6 @@ #define BOOST_BREAKOUT 12.5 #define BOOST_FACTOR 12.5 -#define ERR_DIVISOR 128.0 #define FACTOR_PT_LOW 0.70 #define FACTOR_PT_HIGH 0.90 #define FIRST_PASS_Q 10.0 @@ -237,6 +236,13 @@ static void subtract_stats(FIRSTPASS_STATS *section, section->duration -= frame->duration; } +// Calculate the linear size relative to a baseline of 1080P +#define BASE_SIZE 2073600.0 // 1920x1080 +static double get_linear_size_factor(const VP9_COMP *cpi) { + const double this_area = cpi->initial_width * cpi->initial_height; + return pow(this_area / BASE_SIZE, 0.5); +} + // Calculate an active area of the image that discounts formatting // bars and partially discounts other 0 energy areas. #define MIN_ACTIVE_AREA 0.5 @@ -1241,11 +1247,7 @@ static double calc_correction_factor(double err_per_mb, return fclamp(pow(error_term, power_term), 0.05, 5.0); } -// Larger image formats are expected to be a little harder to code relatively -// given the same prediction error score. This in part at least relates to the -// increased size and hence coding cost of motion vectors. -#define EDIV_SIZE_FACTOR 800 - +#define ERR_DIVISOR 100.0 static int get_twopass_worst_quality(const VP9_COMP *cpi, const double section_err, double inactive_zone, @@ -1267,16 +1269,25 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi, const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone)); const double av_err_per_mb = section_err / active_mbs; const double speed_term = 1.0 + 0.04 * oxcf->speed; - const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR; + double ediv_size_correction; const int target_norm_bits_per_mb = ((uint64_t)target_rate << BPER_MB_NORMBITS) / active_mbs; - int q; int is_svc_upper_layer = 0; if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0) is_svc_upper_layer = 1; + // Larger image formats are expected to be a little harder to code + // relatively given the same prediction error score. This in part at + // least relates to the increased size and hence coding overheads of + // motion vectors. Some account of this is made through adjustment of + // the error divisor. + ediv_size_correction = + VPXMAX(0.2, VPXMIN(5.0, get_linear_size_factor(cpi))); + if (ediv_size_correction < 1.0) + ediv_size_correction = -(1.0 / ediv_size_correction); + ediv_size_correction *= 4.0; // Try and pick a max Q that will be high enough to encode the // content at the given rate.