From b41dee8428a7cc943cf05f1707c64029ac313883 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rbultje@google.com>
Date: Thu, 7 Mar 2013 10:45:25 -0800
Subject: [PATCH] Add support for tx_select in i8x8 encoding in keyframes.

Also enable tx_select for keyframes.

Change-Id: Iadb1231d9fa7af0c8dce3d9b41830b93a302479e
---
 vp9/encoder/vp9_encodeframe.c |  31 ++---
 vp9/encoder/vp9_rdopt.c       | 217 ++++++++++++++++++++--------------
 2 files changed, 135 insertions(+), 113 deletions(-)

diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index f2be96dd7..ad4ce3b59 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1558,30 +1558,15 @@ void vp9_encode_frame(VP9_COMP *cpi) {
       txfm_type = ONLY_4X4;
       cpi->mb.e_mbd.lossless = 1;
     } else
-    /* FIXME (rbultje)
-     * this is a hack (no really), basically to work around the complete
-     * nonsense coefficient cost prediction for keyframes. The probabilities
-     * are reset to defaults, and thus we basically have no idea how expensive
-     * a 4x4 vs. 8x8 will really be. The result is that any estimate at which
-     * of the two is better is utterly bogus.
-     * I'd like to eventually remove this hack, but in order to do that, we
-     * need to move the frame reset code from the frame encode init to the
-     * bitstream write code, or alternatively keep a backup of the previous
-     * keyframe's probabilities as an estimate of what the current keyframe's
-     * coefficient cost distributions may look like. */
-    if (frame_type == 0) {
-      txfm_type = ALLOW_32X32;
-    } else
 #if 0
-    /* FIXME (rbultje)
-     * this code is disabled for a similar reason as the code above; the
-     * problem is that each time we "revert" to 4x4 only (or even 8x8 only),
-     * the coefficient probabilities for 16x16 (and 8x8) start lagging behind,
-     * thus leading to them lagging further behind and not being chosen for
-     * subsequent frames either. This is essentially a local minimum problem
-     * that we can probably fix by estimating real costs more closely within
-     * a frame, perhaps by re-calculating costs on-the-fly as frame encoding
-     * progresses. */
+    /* FIXME (rbultje): this code is disabled until we support cost updates
+     * while a frame is being encoded; the problem is that each time we
+     * "revert" to 4x4 only (or even 8x8 only), the coefficient probabilities
+     * for 16x16 (and 8x8) start lagging behind, thus leading to them lagging
+     * further behind and not being chosen for subsequent frames either. This
+     * is essentially a local minimum problem that we can probably fix by
+     * estimating real costs more closely within a frame, perhaps by re-
+     * calculating costs on-the-fly as frame encoding progresses. */
     if (cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] >
             cpi->rd_tx_select_threshes[frame_type][ONLY_4X4] &&
         cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] >
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 8778f7028..69c3de6e2 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1643,6 +1643,79 @@ static int64_t rd_pick_intra8x8mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
   return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
 }
 
+static int64_t rd_pick_intra8x8mby_modes_and_txsz(VP9_COMP *cpi, MACROBLOCK *x,
+                                                  int *rate, int *rate_y,
+                                                  int *distortion,
+                                                  int *mode8x8,
+                                                  int64_t best_yrd,
+                                                  int64_t *txfm_cache) {
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+  int cost0 = vp9_cost_bit(cm->prob_tx[0], 0);
+  int cost1 = vp9_cost_bit(cm->prob_tx[0], 1);
+  int64_t tmp_rd_4x4s, tmp_rd_8x8s;
+  int64_t tmp_rd_4x4, tmp_rd_8x8, tmp_rd;
+  int r4x4, tok4x4, d4x4, r8x8, tok8x8, d8x8;
+
+  mbmi->txfm_size = TX_4X4;
+  tmp_rd_4x4 = rd_pick_intra8x8mby_modes(cpi, x, &r4x4, &tok4x4,
+                                         &d4x4, best_yrd);
+  mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
+  mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
+  mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
+  mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
+  mbmi->txfm_size = TX_8X8;
+  tmp_rd_8x8 = rd_pick_intra8x8mby_modes(cpi, x, &r8x8, &tok8x8,
+                                         &d8x8, best_yrd);
+  txfm_cache[ONLY_4X4]  = tmp_rd_4x4;
+  txfm_cache[ALLOW_8X8] = tmp_rd_8x8;
+  txfm_cache[ALLOW_16X16] = tmp_rd_8x8;
+  tmp_rd_4x4s = tmp_rd_4x4 + RDCOST(x->rdmult, x->rddiv, cost0, 0);
+  tmp_rd_8x8s = tmp_rd_8x8 + RDCOST(x->rdmult, x->rddiv, cost1, 0);
+  txfm_cache[TX_MODE_SELECT] = tmp_rd_4x4s < tmp_rd_8x8s ?
+                               tmp_rd_4x4s : tmp_rd_8x8s;
+  if (cm->txfm_mode == TX_MODE_SELECT) {
+    if (tmp_rd_4x4s < tmp_rd_8x8s) {
+      *rate = r4x4 + cost0;
+      *rate_y = tok4x4 + cost0;
+      *distortion = d4x4;
+      mbmi->txfm_size = TX_4X4;
+      tmp_rd = tmp_rd_4x4s;
+    } else {
+      *rate = r8x8 + cost1;
+      *rate_y = tok8x8 + cost1;
+      *distortion = d8x8;
+      mbmi->txfm_size = TX_8X8;
+      tmp_rd = tmp_rd_8x8s;
+
+      mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
+      mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
+      mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
+      mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
+    }
+  } else if (cm->txfm_mode == ONLY_4X4) {
+    *rate = r4x4;
+    *rate_y = tok4x4;
+    *distortion = d4x4;
+    mbmi->txfm_size = TX_4X4;
+    tmp_rd = tmp_rd_4x4;
+  } else {
+    *rate = r8x8;
+    *rate_y = tok8x8;
+    *distortion = d8x8;
+    mbmi->txfm_size = TX_8X8;
+    tmp_rd = tmp_rd_8x8;
+
+    mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
+    mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
+    mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
+    mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
+  }
+
+  return tmp_rd;
+}
+
 static int rd_cost_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) {
   int b;
   int cost = 0;
@@ -4397,65 +4470,11 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         }
         break;
         case I8X8_PRED: {
-          int cost0 = vp9_cost_bit(cm->prob_tx[0], 0);
-          int cost1 = vp9_cost_bit(cm->prob_tx[0], 1);
-          int64_t tmp_rd_4x4s, tmp_rd_8x8s;
-          int64_t tmp_rd_4x4, tmp_rd_8x8, tmp_rd;
-          int r4x4, tok4x4, d4x4, r8x8, tok8x8, d8x8;
-          mbmi->txfm_size = TX_4X4;
-          tmp_rd_4x4 = rd_pick_intra8x8mby_modes(cpi, x, &r4x4, &tok4x4,
-                                                 &d4x4, best_yrd);
-          mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
-          mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
-          mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
-          mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
-          mbmi->txfm_size = TX_8X8;
-          tmp_rd_8x8 = rd_pick_intra8x8mby_modes(cpi, x, &r8x8, &tok8x8,
-                                                 &d8x8, best_yrd);
-          txfm_cache[ONLY_4X4]  = tmp_rd_4x4;
-          txfm_cache[ALLOW_8X8] = tmp_rd_8x8;
-          txfm_cache[ALLOW_16X16] = tmp_rd_8x8;
-          tmp_rd_4x4s = tmp_rd_4x4 + RDCOST(x->rdmult, x->rddiv, cost0, 0);
-          tmp_rd_8x8s = tmp_rd_8x8 + RDCOST(x->rdmult, x->rddiv, cost1, 0);
-          txfm_cache[TX_MODE_SELECT] = tmp_rd_4x4s < tmp_rd_8x8s ? tmp_rd_4x4s : tmp_rd_8x8s;
-          if (cm->txfm_mode == TX_MODE_SELECT) {
-            if (tmp_rd_4x4s < tmp_rd_8x8s) {
-              rate = r4x4 + cost0;
-              rate_y = tok4x4 + cost0;
-              distortion = d4x4;
-              mbmi->txfm_size = TX_4X4;
-              tmp_rd = tmp_rd_4x4s;
-            } else {
-              rate = r8x8 + cost1;
-              rate_y = tok8x8 + cost1;
-              distortion = d8x8;
-              mbmi->txfm_size = TX_8X8;
-              tmp_rd = tmp_rd_8x8s;
-
-              mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
-              mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
-              mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
-              mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
-            }
-          } else if (cm->txfm_mode == ONLY_4X4) {
-            rate = r4x4;
-            rate_y = tok4x4;
-            distortion = d4x4;
-            mbmi->txfm_size = TX_4X4;
-            tmp_rd = tmp_rd_4x4;
-          } else {
-            rate = r8x8;
-            rate_y = tok8x8;
-            distortion = d8x8;
-            mbmi->txfm_size = TX_8X8;
-            tmp_rd = tmp_rd_8x8;
-
-            mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
-            mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
-            mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
-            mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
-          }
+          int64_t tmp_rd;
 
+          tmp_rd = rd_pick_intra8x8mby_modes_and_txsz(cpi, x, &rate, &rate_y,
+                                                      &distortion, mode8x8,
+                                                      best_yrd, txfm_cache);
           rate2 += rate;
           rate2 += intra_cost_penalty;
           distortion2 += distortion;
@@ -5040,10 +5059,10 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
   int mode16x16;
   int mode8x8[4];
   int dist;
-  int modeuv, uv_intra_skippable, uv_intra_skippable_8x8;
+  int modeuv, modeuv8x8, uv_intra_skippable, uv_intra_skippable_8x8;
   int y_intra16x16_skippable = 0;
-  int64_t txfm_cache[NB_TXFM_MODES];
-  TX_SIZE txfm_size_16x16;
+  int64_t txfm_cache[2][NB_TXFM_MODES];
+  TX_SIZE txfm_size_16x16, txfm_size_8x8;
   int i;
 
   mbmi->ref_frame = INTRA_FRAME;
@@ -5054,64 +5073,82 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
   if (cpi->common.txfm_mode != ONLY_4X4) {
     rd_pick_intra_mbuv_mode_8x8(cpi, x, &rateuv8x8, &rateuv8x8_tokenonly,
                                 &distuv8x8, &uv_intra_skippable_8x8);
+    modeuv8x8 = mbmi->uv_mode;
   } else {
     uv_intra_skippable_8x8 = uv_intra_skippable;
     rateuv8x8 = rateuv;
     distuv8x8 = distuv;
     rateuv8x8_tokenonly = rateuv_tokenonly;
+    modeuv8x8 = modeuv;
   }
 
   // current macroblock under rate-distortion optimization test loop
   error16x16 = rd_pick_intra16x16mby_mode(cpi, x, &rate16x16,
                                           &rate16x16_tokenonly, &dist16x16,
-                                          &y_intra16x16_skippable, txfm_cache);
+                                          &y_intra16x16_skippable,
+                                          txfm_cache[1]);
   mode16x16 = mbmi->mode;
   txfm_size_16x16 = mbmi->txfm_size;
+  if (cpi->common.mb_no_coeff_skip && y_intra16x16_skippable &&
+      ((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable) ||
+       (cm->txfm_mode != ONLY_4X4 && uv_intra_skippable_8x8))) {
+    error16x16 -= RDCOST(x->rdmult, x->rddiv, rate16x16_tokenonly, 0);
+    rate16x16 -= rate16x16_tokenonly;
+  }
+  for (i = 0; i < NB_TXFM_MODES; i++) {
+    txfm_cache[0][i] = error16x16 - txfm_cache[1][cm->txfm_mode] +
+                       txfm_cache[1][i];
+  }
 
-  // FIXME(rbultje) support transform-size selection
-  mbmi->txfm_size = (cm->txfm_mode == ONLY_4X4) ? TX_4X4 : TX_8X8;
-  error8x8 = rd_pick_intra8x8mby_modes(cpi, x, &rate8x8, &rate8x8_tokenonly,
-                                       &dist8x8, error16x16);
-  mode8x8[0]= xd->mode_info_context->bmi[0].as_mode.first;
-  mode8x8[1]= xd->mode_info_context->bmi[2].as_mode.first;
-  mode8x8[2]= xd->mode_info_context->bmi[8].as_mode.first;
-  mode8x8[3]= xd->mode_info_context->bmi[10].as_mode.first;
+  error8x8 = rd_pick_intra8x8mby_modes_and_txsz(cpi, x, &rate8x8,
+                                                &rate8x8_tokenonly,
+                                                &dist8x8, mode8x8,
+                                                error16x16, txfm_cache[1]);
+  txfm_size_8x8 = mbmi->txfm_size;
+  for (i = 0; i < NB_TXFM_MODES; i++) {
+    int64_t tmp_rd = error8x8 - txfm_cache[1][cm->txfm_mode] + txfm_cache[1][i];
+    if (tmp_rd < txfm_cache[0][i])
+      txfm_cache[0][i] = tmp_rd;
+  }
 
   mbmi->txfm_size = TX_4X4;
   error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
                                        &rate4x4, &rate4x4_tokenonly,
                                        &dist4x4, error16x16);
+  for (i = 0; i < NB_TXFM_MODES; i++) {
+    if (error4x4 < txfm_cache[0][i])
+      txfm_cache[0][i] = error4x4;
+  }
 
   mbmi->mb_skip_coeff = 0;
-  if (cpi->common.mb_no_coeff_skip &&
-      y_intra16x16_skippable && uv_intra_skippable_8x8) {
+  if (cpi->common.mb_no_coeff_skip && y_intra16x16_skippable &&
+      ((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable) ||
+       (cm->txfm_mode != ONLY_4X4 && uv_intra_skippable_8x8))) {
     mbmi->mb_skip_coeff = 1;
     mbmi->mode = mode16x16;
-    mbmi->uv_mode = modeuv;
-    rate = rateuv8x8 + rate16x16 - rateuv8x8_tokenonly - rate16x16_tokenonly +
-           vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
-    dist = dist16x16 + (distuv8x8 >> 2);
+    mbmi->uv_mode = (cm->txfm_mode == ONLY_4X4) ? modeuv : modeuv8x8;
+    rate = rate16x16 + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
+    dist = dist16x16;
+    if (cm->txfm_mode == ONLY_4X4) {
+      rate += rateuv - rateuv_tokenonly;
+      dist += (distuv >> 2);
+    } else {
+      rate += rateuv8x8 - rateuv8x8_tokenonly;
+      dist += (distuv8x8 >> 2);
+    }
 
     mbmi->txfm_size = txfm_size_16x16;
-    memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0,
-           sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff));
   } else if (error8x8 > error16x16) {
     if (error4x4 < error16x16) {
       rate = rateuv + rate4x4;
       mbmi->mode = B_PRED;
       mbmi->txfm_size = TX_4X4;
       dist = dist4x4 + (distuv >> 2);
-      memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0,
-             sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff));
     } else {
       mbmi->txfm_size = txfm_size_16x16;
       mbmi->mode = mode16x16;
       rate = rate16x16 + rateuv8x8;
       dist = dist16x16 + (distuv8x8 >> 2);
-      for (i = 0; i < NB_TXFM_MODES; i++) {
-        x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff[i] =
-            error16x16 - txfm_cache[i];
-      }
     }
     if (cpi->common.mb_no_coeff_skip)
       rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
@@ -5121,22 +5158,22 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
       mbmi->mode = B_PRED;
       mbmi->txfm_size = TX_4X4;
       dist = dist4x4 + (distuv >> 2);
-      memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0,
-             sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff));
     } else {
-      // FIXME(rbultje) support transform-size selection
       mbmi->mode = I8X8_PRED;
-      mbmi->txfm_size = (cm->txfm_mode == ONLY_4X4) ? TX_4X4 : TX_8X8;
+      mbmi->txfm_size = txfm_size_8x8;
       set_i8x8_block_modes(x, mode8x8);
       rate = rate8x8 + rateuv;
       dist = dist8x8 + (distuv >> 2);
-      memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0,
-             sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff));
     }
     if (cpi->common.mb_no_coeff_skip)
       rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
   }
 
+  for (i = 0; i < NB_TXFM_MODES; i++) {
+    x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff[i] =
+        txfm_cache[0][cm->txfm_mode] - txfm_cache[0][i];
+  }
+
   *returnrate = rate;
   *returndist = dist;
 }