From 1faf2887981736b6e4c67ec7421caeb1dbfcc069 Mon Sep 17 00:00:00 2001
From: Jingning Han <jingning@google.com>
Date: Thu, 9 Jun 2016 11:20:26 -0700
Subject: [PATCH] Rework transform quantization pipeline

This commit reworks the transform and quantization unit. It enables
the use of adaptive quantization for intra modes. This further
improves the compression performance:
lowres 0.36%
midres 0.79%
hdres  0.73%

The key frame coding performance is improved:
lowres 1.7%
midres 1.9%
hdres  3.3%

The overall coding gains are:
lowres 1.1%
midres 1.8%
hdres  2.3%

Change-Id: Iaec1a3a4c1d5eac883ab526ed076d957060479dd
---
 vp10/encoder/encodemb.c | 57 +++++++++++++++++++++--------------------
 vp10/encoder/encodemb.h |  2 ++
 vp10/encoder/rdopt.c    | 13 +++++-----
 3 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c
index 835cc4393..bd9dc559e 100644
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -60,8 +60,10 @@ typedef struct vp10_token_state {
   tran_low_t    qc;
 } vp10_token_state;
 
-// TODO(jimbankoski): experiment to find optimal RD numbers.
-static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] ={ {9, 7}, {8, 5}, };
+// These numbers are empirically obtained.
+static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
+    {10, 6}, {8, 5},
+};
 
 #define UPDATE_RD_COST()\
 {\
@@ -898,7 +900,6 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
   struct encode_b_args *const args = arg;
   MACROBLOCK *const x = args->x;
   MACROBLOCKD *const xd = &x->e_mbd;
-  struct optimize_ctx *const ctx = args->ctx;
   struct macroblock_plane *const p = &x->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -910,8 +911,8 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
   const int bwl = b_width_log2_lookup[plane_bsize];
 #endif
   dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
-  a = &ctx->ta[plane][blk_col];
-  l = &ctx->tl[plane][blk_row];
+  a = &args->ta[blk_col];
+  l = &args->tl[blk_row];
 
   // TODO(jingning): per transformed block zero forcing only enabled for
   // luma component. will integrate chroma components as well.
@@ -1153,7 +1154,7 @@ void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
   MACROBLOCKD *const xd = &x->e_mbd;
   struct optimize_ctx ctx;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  struct encode_b_args arg = {x, &ctx, &mbmi->skip};
+  struct encode_b_args arg = {x, &ctx, &mbmi->skip, NULL, NULL};
   int plane;
 
   mbmi->skip = 1;
@@ -1177,6 +1178,9 @@ void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
 #endif
     vp10_subtract_plane(x, bsize, plane);
 
+    arg.ta = ctx.ta[plane];
+    arg.tl = ctx.tl[plane];
+
     if (x->optimize) {
 #if CONFIG_VAR_TX
       vp10_get_entropy_contexts(bsize, TX_4X4, pd,
@@ -1209,7 +1213,7 @@ void vp10_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize) {
   MACROBLOCKD *const xd = &x->e_mbd;
   struct optimize_ctx ctx;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  struct encode_b_args arg = {x, &ctx, &mbmi->skip};
+  struct encode_b_args arg = {x, &ctx, &mbmi->skip, NULL, NULL};
   int plane;
 
   mbmi->skip = 1;
@@ -1222,6 +1226,8 @@ void vp10_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize) {
     vp10_subtract_plane(x, bsize, plane);
     vp10_get_entropy_contexts(bsize, tx_size, pd,
                               ctx.ta[plane], ctx.tl[plane]);
+    arg.ta = ctx.ta[plane];
+    arg.tl = ctx.tl[plane];
     vp10_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
                                            &arg);
   }
@@ -1250,8 +1256,8 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col,
   const int src_stride = p->src.stride;
   const int dst_stride = pd->dst.stride;
   const int tx1d_size = get_tx1d_size(tx_size);
-
   INV_TXFM_PARAM inv_txfm_param;
+  ENTROPY_CONTEXT *a, *l;
 
   dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
   src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
@@ -1278,21 +1284,16 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col,
                        tx_size);
 #else  // CONFIG_NEW_QUANT
   vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
-                   VP10_XFORM_QUANT_B);
+                   VP10_XFORM_QUANT_FP);
 #endif  // CONFIG_NEW_QUANT
-
-  if (args->ctx != NULL) {
-    struct optimize_ctx *const ctx = args->ctx;
-    ENTROPY_CONTEXT *a, *l;
-    a = &ctx->ta[plane][blk_col];
-    l = &ctx->tl[plane][blk_row];
-    if (x->optimize && p->eobs[block]) {
-      int ctx;
-      ctx = combine_entropy_contexts(*a, *l);
-      *a = *l = vp10_optimize_b(x, plane, block, tx_size, ctx) > 0;
-    } else {
-      *a = *l = p->eobs[block] > 0;
-    }
+  a = &args->ta[blk_col];
+  l = &args->tl[blk_row];
+  if (x->optimize && p->eobs[block]) {
+    int ctx;
+    ctx = combine_entropy_contexts(*a, *l);
+    *a = *l = vp10_optimize_b(x, plane, block, tx_size, ctx) > 0;
+  } else {
+    *a = *l = p->eobs[block] > 0;
   }
 
   if (*eob) {
@@ -1319,18 +1320,18 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col,
 void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
                                    int enable_optimize_b) {
   const MACROBLOCKD *const xd = &x->e_mbd;
-  struct optimize_ctx ctx;
-  struct encode_b_args arg = {x, &ctx, &xd->mi[0]->mbmi.skip};
+  ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE];
+  ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE];
+
+  struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip, ta, tl};
 
   if (enable_optimize_b && x->optimize) {
     const struct macroblockd_plane* const pd = &xd->plane[plane];
     const TX_SIZE tx_size = plane ? get_uv_tx_size(&xd->mi[0]->mbmi, pd) :
         xd->mi[0]->mbmi.tx_size;
-    vp10_get_entropy_contexts(bsize, tx_size, pd,
-                              ctx.ta[plane], ctx.tl[plane]);
-  } else {
-    arg.ctx = NULL;
+    vp10_get_entropy_contexts(bsize, tx_size, pd, ta, tl);
   }
+
   vp10_foreach_transformed_block_in_plane(xd, bsize, plane,
                                           vp10_encode_block_intra, &arg);
 }
diff --git a/vp10/encoder/encodemb.h b/vp10/encoder/encodemb.h
index cef6cccbd..c241b003c 100644
--- a/vp10/encoder/encodemb.h
+++ b/vp10/encoder/encodemb.h
@@ -22,6 +22,8 @@ struct encode_b_args {
   MACROBLOCK *x;
   struct optimize_ctx *ctx;
   int8_t *skip;
+  ENTROPY_CONTEXT *ta;
+  ENTROPY_CONTEXT *tl;
 };
 
 typedef enum VP10_XFORM_QUANT {
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 01b4e782f..7079b2b38 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1213,18 +1213,17 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
   int rate;
   int64_t dist;
   int64_t sse;
-#if !CONFIG_NEW_QUANT
   ENTROPY_CONTEXT coeff_ctx = combine_entropy_contexts(
       *(args->t_above + blk_col), *(args->t_left + blk_row));
-#endif
 
   if (args->exit_early)
     return;
 
   if (!is_inter_block(mbmi)) {
-    struct encode_b_args arg = {x, NULL, &mbmi->skip};
+    struct encode_b_args intra_arg = {x, NULL, &mbmi->skip, args->t_above,
+                                      args->t_left};
     vp10_encode_block_intra(plane, block, blk_row, blk_col,
-                            plane_bsize, tx_size, &arg);
+                            plane_bsize, tx_size, &intra_arg);
 
     if (args->cpi->sf.use_transform_domain_distortion) {
       dist_block(args->cpi, x, plane, block, blk_row, blk_col,
@@ -1269,9 +1268,9 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
 #else
       vp10_xform_quant(x, plane, block, blk_row, blk_col,
                        plane_bsize, tx_size, VP10_XFORM_QUANT_FP);
+#endif  // CONFIG_NEW_QUANT
       if (x->plane[plane].eobs[block])
         vp10_optimize_b(x, plane, block, tx_size, coeff_ctx);
-#endif  // CONFIG_NEW_QUANT
       dist_block(args->cpi, x, plane, block, blk_row, blk_col,
                  tx_size, &dist, &sse);
     } else if (x->skip_txfm[plane][block >> (tx_size << 1)] ==
@@ -1325,9 +1324,9 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
 #else
     vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
                      VP10_XFORM_QUANT_FP);
+#endif  // CONFIG_NEW_QUANT
     if (x->plane[plane].eobs[block])
       vp10_optimize_b(x, plane, block, tx_size, coeff_ctx);
-#endif  // CONFIG_NEW_QUANT
     dist_block(args->cpi, x, plane, block, blk_row, blk_col,
                tx_size, &dist, &sse);
   }
@@ -9044,7 +9043,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
       if (this_mode != DC_PRED && this_mode != TM_PRED)
         rate2 += intra_cost_penalty;
       distortion2 = distortion_y + distortion_uv;
-      vp10_encode_intra_block_plane(x, bsize, 0, 0);
+      vp10_encode_intra_block_plane(x, bsize, 0, 1);
 #if CONFIG_VP9_HIGHBITDEPTH
       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
         x->recon_variance =