From 2cd1c2855e0b53ce2177f5c8a555a0980fd15492 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Fri, 17 Jun 2011 14:19:51 -0400
Subject: [PATCH 1/3] Remove unnecessary bounds checking in motion search

The starting points are always within the limits, and bounds
checking on these points is not needed. For speed < 5, the
encoded result changes a little because different treatment
is taken while starting point equals the bounds.

Change-Id: I09a402d310f51e305a3519f1601b1d17b05c6152
---
 vp8/encoder/mcomp.c | 88 +++++++++++----------------------------------
 1 file changed, 20 insertions(+), 68 deletions(-)

diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index d22fdb2e6..416948870 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -317,17 +317,6 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     int whichdir ;
     int thismse;
 
-
-    // Trap uncodable vectors
-    if ((abs((bestmv->as_mv.col << 3) - ref_mv->as_mv.col) > MAX_FULL_PEL_VAL)
-        || (abs((bestmv->as_mv.row << 3) - ref_mv->as_mv.row) > MAX_FULL_PEL_VAL))
-    {
-        bestmv->as_mv.row <<= 3;
-        bestmv->as_mv.col <<= 3;
-        *distortion = INT_MAX;
-        return INT_MAX;
-    }
-
     // central mv
     bestmv->as_mv.row <<= 3;
     bestmv->as_mv.col <<= 3;
@@ -628,16 +617,6 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
     unsigned int sse;
     int thismse;
 
-    // Trap uncodable vectors
-    if ((abs((bestmv->as_mv.col << 3) - ref_mv->as_mv.col) > MAX_FULL_PEL_VAL)
-        || (abs((bestmv->as_mv.row << 3) - ref_mv->as_mv.row) > MAX_FULL_PEL_VAL))
-    {
-        bestmv->as_mv.row <<= 3;
-        bestmv->as_mv.col <<= 3;
-        *distortion = INT_MAX;
-        return INT_MAX;
-    }
-
     // central mv
     bestmv->as_mv.row <<= 3;
     bestmv->as_mv.col <<= 3;
@@ -1055,15 +1034,10 @@ int vp8_diamond_search_sad
     in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
     best_address = in_what;
 
-    // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
-    if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
-    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
-    {
-        // Check the starting position
-        bestsad = fn_ptr->sdf(what, what_stride, in_what,
-                              in_what_stride, 0x7fffffff)
-                + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
-    }
+    // Check the starting position
+    bestsad = fn_ptr->sdf(what, what_stride, in_what,
+                          in_what_stride, 0x7fffffff)
+              + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
 
     // search_param determines the length of the initial step and hence the number of iterations
     // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
@@ -1178,15 +1152,10 @@ int vp8_diamond_search_sadx4
     in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
     best_address = in_what;
 
-    // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
-    if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
-    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
-    {
-        // Check the starting position
-        bestsad = fn_ptr->sdf(what, what_stride,
-                              in_what, in_what_stride, 0x7fffffff)
-                + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
-    }
+    // Check the starting position
+    bestsad = fn_ptr->sdf(what, what_stride,
+                          in_what, in_what_stride, 0x7fffffff)
+              + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
 
     // search_param determines the length of the initial step and hence the number of iterations
     // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
@@ -1329,17 +1298,10 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
     best_mv->as_mv.row = ref_row;
     best_mv->as_mv.col = ref_col;
 
-    // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
-    if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
-    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
-    {
-        // Baseline value at the centre
-
-        //bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14));
-        bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
-                              in_what_stride, 0x7fffffff)
-                + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
-    }
+    // Baseline value at the centre
+    bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
+                          in_what_stride, 0x7fffffff)
+              + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
 
     // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
     if (col_min < x->mv_col_min)
@@ -1430,15 +1392,10 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
     best_mv->as_mv.row = ref_row;
     best_mv->as_mv.col = ref_col;
 
-    // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
-    if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
-    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
-    {
-        // Baseline value at the centre
-        bestsad = fn_ptr->sdf(what, what_stride,
-                              bestaddress, in_what_stride, 0x7fffffff)
-                + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
-    }
+    // Baseline value at the centre
+    bestsad = fn_ptr->sdf(what, what_stride,
+                          bestaddress, in_what_stride, 0x7fffffff)
+              + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
 
     // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
     if (col_min < x->mv_col_min)
@@ -1566,15 +1523,10 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
     best_mv->as_mv.row = ref_row;
     best_mv->as_mv.col = ref_col;
 
-    // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
-    if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
-    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
-    {
-        // Baseline value at the centre
-        bestsad = fn_ptr->sdf(what, what_stride,
-                              bestaddress, in_what_stride, 0x7fffffff)
-                + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
-    }
+    // Baseline value at the centre
+    bestsad = fn_ptr->sdf(what, what_stride,
+                          bestaddress, in_what_stride, 0x7fffffff)
+              + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
 
     // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
     if (col_min < x->mv_col_min)

From 67a1f98c2c4b4f60df5150e350ea573cd401c612 Mon Sep 17 00:00:00 2001
From: Scott LaVarnway <slavarnway@google.com>
Date: Mon, 20 Jun 2011 14:44:16 -0400
Subject: [PATCH 2/3] Improved vp8dx_decode_bool

Relocated the vp8dx_bool_decoder_fill() call, allowing
the compiler to produce better assembly code.  Tests
showed a 1 - 2 % performance boost (x86 using gcc)
for the 720p clip used.

Change-Id: Ic5a4eefed8777e6eefa007d4f12dfc7e64482732
---
 vp8/decoder/dboolhuff.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index 5f6b211ea..f729837f9 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -81,11 +81,14 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
     int count;
     unsigned int range;
 
+    split = 1 + (((br->range - 1) * probability) >> 8);
+
+    if(br->count < 0)
+        vp8dx_bool_decoder_fill(br);
+
     value = br->value;
     count = br->count;
-    range = br->range;
 
-    split = 1 + (((range - 1) * probability) >> 8);
     bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
 
     range = split;
@@ -106,8 +109,7 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
     br->value = value;
     br->count = count;
     br->range = range;
-    if(count < 0)
-        vp8dx_bool_decoder_fill(br);
+
     return bit;
 }
 

From 10ed60dc718aa6133508c2fb5183cfc7dc67e920 Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Mon, 20 Jun 2011 16:30:26 -0700
Subject: [PATCH 3/3] adjusting the calculation of errorperbit

RDMULT/RDDIV defines a bit worth of distortion in term of sum squared
difference. This has also been used as errorperbit in subpixel motion
search, where the distortions computed as variance of the difference.
The variance of differences is different from sum squared differences
by amount of DC squared. Typically, for inter predicted MBs, this
difference averages around 10% between the two distortion, so this patch
introduces a 110% constant in deriving errorperbit from RDMULT/RDDIV.

Test on CIF set shows small but positive gain on overall PSNR (.03%)
and SSIM (.07%), overall impact on average PSNR is 0.

Change-Id: I95425f922d037b4d96083064a10c7cdd4948ee62
---
 vp8/encoder/encodeframe.c | 7 ++++---
 vp8/encoder/rdopt.c       | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 892284ed2..132e50858 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -335,7 +335,8 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
 {
 #if USE_ACT_INDEX
     x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2);
-    x->errorperbit = x->rdmult/x->rddiv;
+    x->errorperbit = x->rdmult * 100 /(110 * x->rddiv);
+    x->errorperbit += (x->errorperbit==0);
 #else
     INT64 a;
     INT64 b;
@@ -346,8 +347,8 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
     b = (2*act) + cpi->activity_avg;
 
     x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
-    x->errorperbit = x->rdmult/x->rddiv;
-
+    x->errorperbit = x->rdmult * 100 /(110 * x->rddiv);
+    x->errorperbit += (x->errorperbit==0);
 #endif
 
     // Activity based Zbin adjustment
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 5ce61a04e..e05ffdb1a 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -238,7 +238,7 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
                 (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
     }
 
-    cpi->mb.errorperbit = (cpi->RDMULT / 100);
+    cpi->mb.errorperbit = (cpi->RDMULT / 110);
     cpi->mb.errorperbit += (cpi->mb.errorperbit==0);
 
     vp8_set_speed_features(cpi);