From 2cd1c2855e0b53ce2177f5c8a555a0980fd15492 Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Fri, 17 Jun 2011 14:19:51 -0400 Subject: [PATCH 1/3] Remove unnecessary bounds checking in motion search The starting points are always within the limits, and bounds checking on these points is not needed. For speed < 5, the encoded result changes a little because different treatment is taken while starting point equals the bounds. Change-Id: I09a402d310f51e305a3519f1601b1d17b05c6152 --- vp8/encoder/mcomp.c | 88 +++++++++++---------------------------------- 1 file changed, 20 insertions(+), 68 deletions(-) diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index d22fdb2e6..416948870 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -317,17 +317,6 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int whichdir ; int thismse; - - // Trap uncodable vectors - if ((abs((bestmv->as_mv.col << 3) - ref_mv->as_mv.col) > MAX_FULL_PEL_VAL) - || (abs((bestmv->as_mv.row << 3) - ref_mv->as_mv.row) > MAX_FULL_PEL_VAL)) - { - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; - *distortion = INT_MAX; - return INT_MAX; - } - // central mv bestmv->as_mv.row <<= 3; bestmv->as_mv.col <<= 3; @@ -628,16 +617,6 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, unsigned int sse; int thismse; - // Trap uncodable vectors - if ((abs((bestmv->as_mv.col << 3) - ref_mv->as_mv.col) > MAX_FULL_PEL_VAL) - || (abs((bestmv->as_mv.row << 3) - ref_mv->as_mv.row) > MAX_FULL_PEL_VAL)) - { - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; - *distortion = INT_MAX; - return INT_MAX; - } - // central mv bestmv->as_mv.row <<= 3; bestmv->as_mv.col <<= 3; @@ -1055,15 +1034,10 @@ int vp8_diamond_search_sad in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col); best_address = in_what; - // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits - if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && - (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) - { - // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, - in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); - } + // Check the starting position + bestsad = fn_ptr->sdf(what, what_stride, in_what, + in_what_stride, 0x7fffffff) + + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); // search_param determines the length of the initial step and hence the number of iterations // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. @@ -1178,15 +1152,10 @@ int vp8_diamond_search_sadx4 in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col); best_address = in_what; - // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits - if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && - (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) - { - // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, - in_what, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); - } + // Check the starting position + bestsad = fn_ptr->sdf(what, what_stride, + in_what, in_what_stride, 0x7fffffff) + + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); // search_param determines the length of the initial step and hence the number of iterations // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. @@ -1329,17 +1298,10 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; - // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits - if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && - (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) - { - // Baseline value at the centre - - //bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14)); - bestsad = fn_ptr->sdf(what, what_stride, bestaddress, - in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); - } + // Baseline value at the centre + bestsad = fn_ptr->sdf(what, what_stride, bestaddress, + in_what_stride, 0x7fffffff) + + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border if (col_min < x->mv_col_min) @@ -1430,15 +1392,10 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; - // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits - if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && - (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) - { - // Baseline value at the centre - bestsad = fn_ptr->sdf(what, what_stride, - bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); - } + // Baseline value at the centre + bestsad = fn_ptr->sdf(what, what_stride, + bestaddress, in_what_stride, 0x7fffffff) + + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border if (col_min < x->mv_col_min) @@ -1566,15 +1523,10 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; - // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits - if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && - (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) - { - // Baseline value at the centre - bestsad = fn_ptr->sdf(what, what_stride, - bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); - } + // Baseline value at the centre + bestsad = fn_ptr->sdf(what, what_stride, + bestaddress, in_what_stride, 0x7fffffff) + + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border if (col_min < x->mv_col_min) From 67a1f98c2c4b4f60df5150e350ea573cd401c612 Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Mon, 20 Jun 2011 14:44:16 -0400 Subject: [PATCH 2/3] Improved vp8dx_decode_bool Relocated the vp8dx_bool_decoder_fill() call, allowing the compiler to produce better assembly code. Tests showed a 1 - 2 % performance boost (x86 using gcc) for the 720p clip used. Change-Id: Ic5a4eefed8777e6eefa007d4f12dfc7e64482732 --- vp8/decoder/dboolhuff.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h index 5f6b211ea..f729837f9 100644 --- a/vp8/decoder/dboolhuff.h +++ b/vp8/decoder/dboolhuff.h @@ -81,11 +81,14 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) { int count; unsigned int range; + split = 1 + (((br->range - 1) * probability) >> 8); + + if(br->count < 0) + vp8dx_bool_decoder_fill(br); + value = br->value; count = br->count; - range = br->range; - split = 1 + (((range - 1) * probability) >> 8); bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); range = split; @@ -106,8 +109,7 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) { br->value = value; br->count = count; br->range = range; - if(count < 0) - vp8dx_bool_decoder_fill(br); + return bit; } From 10ed60dc718aa6133508c2fb5183cfc7dc67e920 Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Mon, 20 Jun 2011 16:30:26 -0700 Subject: [PATCH 3/3] adjusting the calculation of errorperbit RDMULT/RDDIV defines a bit worth of distortion in term of sum squared difference. This has also been used as errorperbit in subpixel motion search, where the distortions computed as variance of the difference. The variance of differences is different from sum squared differences by amount of DC squared. Typically, for inter predicted MBs, this difference averages around 10% between the two distortion, so this patch introduces a 110% constant in deriving errorperbit from RDMULT/RDDIV. Test on CIF set shows small but positive gain on overall PSNR (.03%) and SSIM (.07%), overall impact on average PSNR is 0. Change-Id: I95425f922d037b4d96083064a10c7cdd4948ee62 --- vp8/encoder/encodeframe.c | 7 ++++--- vp8/encoder/rdopt.c | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 892284ed2..132e50858 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -335,7 +335,8 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) { #if USE_ACT_INDEX x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2); - x->errorperbit = x->rdmult/x->rddiv; + x->errorperbit = x->rdmult * 100 /(110 * x->rddiv); + x->errorperbit += (x->errorperbit==0); #else INT64 a; INT64 b; @@ -346,8 +347,8 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) b = (2*act) + cpi->activity_avg; x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a); - x->errorperbit = x->rdmult/x->rddiv; - + x->errorperbit = x->rdmult * 100 /(110 * x->rddiv); + x->errorperbit += (x->errorperbit==0); #endif // Activity based Zbin adjustment diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 5ce61a04e..e05ffdb1a 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -238,7 +238,7 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4; } - cpi->mb.errorperbit = (cpi->RDMULT / 100); + cpi->mb.errorperbit = (cpi->RDMULT / 110); cpi->mb.errorperbit += (cpi->mb.errorperbit==0); vp8_set_speed_features(cpi);