Reuse inter prediction result in real-time speed 6
In real-time speed 6, no partition search is done. The inter prediction results got from picking mode can be reused in the following encoding process. A speed feature reuse_inter_pred_sby is added to only enable the resue in speed 6. This patch doesn't change encoding result. RTC set tests showed that the encoding speed gain is 2% - 5%. Change-Id: I3884780f64ef95dd8be10562926542528713b92c
This commit is contained in:
		@@ -3364,7 +3364,10 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
 | 
			
		||||
      vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
 | 
			
		||||
                           &xd->block_refs[ref]->sf);
 | 
			
		||||
    }
 | 
			
		||||
    vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
 | 
			
		||||
    if (!cpi->sf.reuse_inter_pred_sby)
 | 
			
		||||
      vp9_build_inter_predictors_sby(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
 | 
			
		||||
 | 
			
		||||
    vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
 | 
			
		||||
 | 
			
		||||
    if (!x->skip) {
 | 
			
		||||
      mbmi->skip = 1;
 | 
			
		||||
 
 | 
			
		||||
@@ -23,6 +23,7 @@
 | 
			
		||||
#include "vp9/common/vp9_reconintra.h"
 | 
			
		||||
 | 
			
		||||
#include "vp9/encoder/vp9_encoder.h"
 | 
			
		||||
#include "vp9/encoder/vp9_pickmode.h"
 | 
			
		||||
#include "vp9/encoder/vp9_ratectrl.h"
 | 
			
		||||
#include "vp9/encoder/vp9_rdopt.h"
 | 
			
		||||
 | 
			
		||||
@@ -183,6 +184,22 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
 | 
			
		||||
  *out_dist_sum += dist << 4;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int get_pred_buffer(PRED_BUFFER *p, int len) {
 | 
			
		||||
  int i;
 | 
			
		||||
 | 
			
		||||
  for (i = 0; i < len; i++) {
 | 
			
		||||
    if (!p[i].in_use) {
 | 
			
		||||
      p[i].in_use = 1;
 | 
			
		||||
      return i;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return -1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void free_pred_buffer(PRED_BUFFER *p) {
 | 
			
		||||
  p->in_use = 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// TODO(jingning) placeholder for inter-frame non-RD mode decision.
 | 
			
		||||
// this needs various further optimizations. to be continued..
 | 
			
		||||
int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 | 
			
		||||
@@ -229,6 +246,31 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 | 
			
		||||
  const int pred_filter_search = (((mi_row + mi_col) >> bsl) +
 | 
			
		||||
                                      get_chessboard_index(cm)) % 2;
 | 
			
		||||
 | 
			
		||||
  // For speed 6, the result of interp filter is reused later in actual encoding
 | 
			
		||||
  // process.
 | 
			
		||||
  int bh = num_4x4_blocks_high_lookup[bsize] << 2;
 | 
			
		||||
  int bw = num_4x4_blocks_wide_lookup[bsize] << 2;
 | 
			
		||||
  int pixels_in_block = bh * bw;
 | 
			
		||||
  // tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
 | 
			
		||||
  PRED_BUFFER tmp[4];
 | 
			
		||||
  DECLARE_ALIGNED_ARRAY(16, uint8_t, pred_buf, 3 * 64 * 64);
 | 
			
		||||
  struct buf_2d orig_dst = pd->dst;
 | 
			
		||||
  PRED_BUFFER *best_pred = NULL;
 | 
			
		||||
  PRED_BUFFER *this_mode_pred = NULL;
 | 
			
		||||
  int i;
 | 
			
		||||
 | 
			
		||||
  if (cpi->sf.reuse_inter_pred_sby) {
 | 
			
		||||
    for (i = 0; i < 3; i++) {
 | 
			
		||||
      tmp[i].data = &pred_buf[pixels_in_block * i];
 | 
			
		||||
      tmp[i].stride = bw;
 | 
			
		||||
      tmp[i].in_use = 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    tmp[3].data = pd->dst.buf;
 | 
			
		||||
    tmp[3].stride = pd->dst.stride;
 | 
			
		||||
    tmp[3].in_use = 0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
 | 
			
		||||
 | 
			
		||||
  x->skip = 0;
 | 
			
		||||
@@ -324,6 +366,16 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 | 
			
		||||
      // Search for the best prediction filter type, when the resulting
 | 
			
		||||
      // motion vector is at sub-pixel accuracy level for luma component, i.e.,
 | 
			
		||||
      // the last three bits are all zeros.
 | 
			
		||||
      if (cpi->sf.reuse_inter_pred_sby) {
 | 
			
		||||
        if (this_mode == NEARESTMV) {
 | 
			
		||||
          this_mode_pred = &tmp[3];
 | 
			
		||||
        } else {
 | 
			
		||||
          this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
 | 
			
		||||
          pd->dst.buf = this_mode_pred->data;
 | 
			
		||||
          pd->dst.stride = bw;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
 | 
			
		||||
          pred_filter_search &&
 | 
			
		||||
          ((mbmi->mv[0].as_mv.row & 0x07) != 0 ||
 | 
			
		||||
@@ -334,6 +386,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 | 
			
		||||
        unsigned int pf_sse[3];
 | 
			
		||||
        int64_t best_cost = INT64_MAX;
 | 
			
		||||
        INTERP_FILTER best_filter = SWITCHABLE, filter;
 | 
			
		||||
        PRED_BUFFER *current_pred = this_mode_pred;
 | 
			
		||||
 | 
			
		||||
        for (filter = EIGHTTAP; filter <= EIGHTTAP_SHARP; ++filter) {
 | 
			
		||||
          int64_t cost;
 | 
			
		||||
@@ -348,8 +401,24 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 | 
			
		||||
            best_filter = filter;
 | 
			
		||||
            best_cost = cost;
 | 
			
		||||
            skip_txfm = x->skip_txfm;
 | 
			
		||||
 | 
			
		||||
            if (cpi->sf.reuse_inter_pred_sby) {
 | 
			
		||||
              if (this_mode_pred != current_pred) {
 | 
			
		||||
                free_pred_buffer(this_mode_pred);
 | 
			
		||||
                this_mode_pred = current_pred;
 | 
			
		||||
              }
 | 
			
		||||
 | 
			
		||||
              if (filter < EIGHTTAP_SHARP) {
 | 
			
		||||
                current_pred = &tmp[get_pred_buffer(tmp, 3)];
 | 
			
		||||
                pd->dst.buf = current_pred->data;
 | 
			
		||||
                pd->dst.stride = bw;
 | 
			
		||||
              }
 | 
			
		||||
            }
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (cpi->sf.reuse_inter_pred_sby && this_mode_pred != current_pred)
 | 
			
		||||
          free_pred_buffer(current_pred);
 | 
			
		||||
 | 
			
		||||
        mbmi->interp_filter = best_filter;
 | 
			
		||||
        rate = pf_rate[mbmi->interp_filter];
 | 
			
		||||
@@ -451,6 +520,16 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 | 
			
		||||
        best_pred_filter = mbmi->interp_filter;
 | 
			
		||||
        best_ref_frame = ref_frame;
 | 
			
		||||
        skip_txfm = x->skip_txfm;
 | 
			
		||||
 | 
			
		||||
        if (cpi->sf.reuse_inter_pred_sby) {
 | 
			
		||||
          if (best_pred != NULL)
 | 
			
		||||
            free_pred_buffer(best_pred);
 | 
			
		||||
 | 
			
		||||
          best_pred = this_mode_pred;
 | 
			
		||||
        }
 | 
			
		||||
      } else {
 | 
			
		||||
        if (cpi->sf.reuse_inter_pred_sby)
 | 
			
		||||
          free_pred_buffer(this_mode_pred);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      if (x->skip)
 | 
			
		||||
@@ -458,6 +537,19 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // If best prediction is not in dst buf, then copy the prediction block from
 | 
			
		||||
  // temp buf to dst buf.
 | 
			
		||||
  if (cpi->sf.reuse_inter_pred_sby && best_pred->data != orig_dst.buf) {
 | 
			
		||||
    uint8_t *copy_from, *copy_to;
 | 
			
		||||
 | 
			
		||||
    pd->dst = orig_dst;
 | 
			
		||||
    copy_to = pd->dst.buf;
 | 
			
		||||
 | 
			
		||||
    copy_from = best_pred->data;
 | 
			
		||||
 | 
			
		||||
    vp9_convolve_copy(copy_from, bw, copy_to, pd->dst.stride, NULL, 0, NULL, 0,
 | 
			
		||||
                      bw, bh);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  mbmi->mode = best_mode;
 | 
			
		||||
  mbmi->interp_filter = best_pred_filter;
 | 
			
		||||
@@ -471,12 +563,21 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 | 
			
		||||
  if (!x->skip && best_rd > inter_mode_thresh &&
 | 
			
		||||
      bsize <= cpi->sf.max_intra_bsize) {
 | 
			
		||||
    for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
 | 
			
		||||
      if (cpi->sf.reuse_inter_pred_sby) {
 | 
			
		||||
        pd->dst.buf = tmp[0].data;
 | 
			
		||||
        pd->dst.stride = bw;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      vp9_predict_intra_block(xd, 0, b_width_log2(bsize),
 | 
			
		||||
                              mbmi->tx_size, this_mode,
 | 
			
		||||
                              &p->src.buf[0], p->src.stride,
 | 
			
		||||
                              &pd->dst.buf[0], pd->dst.stride, 0, 0, 0);
 | 
			
		||||
 | 
			
		||||
      model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
 | 
			
		||||
 | 
			
		||||
      if (cpi->sf.reuse_inter_pred_sby)
 | 
			
		||||
        pd->dst = orig_dst;
 | 
			
		||||
 | 
			
		||||
      rate += cpi->mbmode_cost[this_mode];
 | 
			
		||||
      rate += intra_cost_penalty;
 | 
			
		||||
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
 | 
			
		||||
@@ -494,6 +595,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#if CONFIG_DENOISING
 | 
			
		||||
  vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col, bsize);
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -17,6 +17,12 @@
 | 
			
		||||
extern "C" {
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  uint8_t *data;
 | 
			
		||||
  int stride;
 | 
			
		||||
  int in_use;
 | 
			
		||||
} PRED_BUFFER;
 | 
			
		||||
 | 
			
		||||
int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 | 
			
		||||
                            const struct TileInfo *const tile,
 | 
			
		||||
                            int mi_row, int mi_col,
 | 
			
		||||
 
 | 
			
		||||
@@ -274,6 +274,9 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
 | 
			
		||||
    // is checked for a partition block. Later, we can try to allow large
 | 
			
		||||
    // partitions to do intra mode checking.
 | 
			
		||||
    sf->max_intra_bsize = BLOCK_8X8;
 | 
			
		||||
 | 
			
		||||
    // This feature is only enabled when partition search is disabled.
 | 
			
		||||
    sf->reuse_inter_pred_sby = 1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (speed >= 7) {
 | 
			
		||||
@@ -339,6 +342,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
 | 
			
		||||
  for (i = 0; i < BLOCK_SIZES; ++i)
 | 
			
		||||
    sf->inter_mode_mask[i] = INTER_ALL;
 | 
			
		||||
  sf->max_intra_bsize = BLOCK_64X64;
 | 
			
		||||
  sf->reuse_inter_pred_sby = 0;
 | 
			
		||||
  // This setting only takes effect when partition_search_type is set
 | 
			
		||||
  // to FIXED_PARTITION.
 | 
			
		||||
  sf->always_this_block_size = BLOCK_16X16;
 | 
			
		||||
 
 | 
			
		||||
@@ -353,6 +353,11 @@ typedef struct SPEED_FEATURES {
 | 
			
		||||
 | 
			
		||||
  // The threshold used in SOURCE_VAR_BASED_PARTITION search type.
 | 
			
		||||
  unsigned int source_var_thresh;
 | 
			
		||||
 | 
			
		||||
  // When partition is pre-set, the inter prediction result from pick_inter_mode
 | 
			
		||||
  // can be reused in final block encoding process. It is enabled only for real-
 | 
			
		||||
  // time mode speed 6.
 | 
			
		||||
  int reuse_inter_pred_sby;
 | 
			
		||||
} SPEED_FEATURES;
 | 
			
		||||
 | 
			
		||||
struct VP9_COMP;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user