Synchronize encoding process and tokenization handle

The encoding and tokenization process support the recursive transform block partition coding scheme. Change-Id: I47283cc6ee9c383059950623ece60a0fcce82e00
2015-04-20 10:43:36 -07:00 · 2015-04-20 10:43:36 -07:00 · df2042dc1e
commit df2042dc1e
parent a15cf9a5b7
8 changed files with 304 additions and 88 deletions
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@ -300,7 +300,7 @@ void vp9_foreach_transformed_block(
    const MACROBLOCKD* const xd, BLOCK_SIZE bsize,
    foreach_transformed_block_visitor visit, void *arg);
-static INLINE void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize,
+static void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize,
                                            TX_SIZE tx_size, int block,
                                            int *x, int *y) {
  const int bwl = b_width_log2_lookup[plane_bsize];
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@ -338,7 +338,7 @@ struct inter_args {
  const int16_t *const uv_dequant;
 };
-static void decode_reconstruct_tx(int mi_row, int mi_col,
+static void decode_reconstruct_tx(int blk_row, int blk_col,
                                  int plane, int block,
                                  TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
                                  void *arg) {
@ -351,33 +351,38 @@ static void decode_reconstruct_tx(int mi_row, int mi_col,
      get_uv_tx_size_impl(mbmi->tx_size, plane_bsize,
                          0, 0) : mbmi->tx_size;
-  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+  int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
  int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
  if (xd->mb_to_bottom_edge < 0)
    max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
  if (xd->mb_to_right_edge < 0)
    max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
    return;
  if (tx_size == plane_tx_size) {
    const int16_t *const dequant = (plane == 0) ? args->y_dequant
                                                : args->uv_dequant;
-    int x, y, eob;
+    int eob;
    txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
    eob = vp9_decode_block_tokens(cm, xd, args->counts, plane, block,
-                                  plane_bsize, x, y, tx_size, args->r, dequant);
+                                  plane_bsize, blk_col, blk_row,
                                  tx_size, args->r, dequant);
    inverse_transform_block(xd, plane, block, tx_size,
-                            &pd->dst.buf[4 * y * pd->dst.stride + 4 * x],
+                            &pd->dst.buf[4 * blk_row * pd->dst.stride +
                                         4 * blk_col],
                            pd->dst.stride, eob);
    *args->eobtotal += eob;
  } else {
    BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
-    int bh = num_8x8_blocks_high_lookup[bsize];
+    int bh = num_4x4_blocks_high_lookup[bsize];
    int max_blocks_high = cm->mi_rows;
    int max_blocks_wide = cm->mi_cols;
    int step = 1 << (2 *(tx_size - 1));
    int i;
    for (i = 0; i < 4; ++i) {
      int offsetr = (i >> 1) * bh / 2;
      int offsetc = (i & 0x01) * bh / 2;
-      if ((mi_row + offsetr < max_blocks_high) &&
+      decode_reconstruct_tx(blk_row + offsetr, blk_col + offsetc,
          (mi_col + offsetc < max_blocks_wide))
        decode_reconstruct_tx(mi_row + offsetr, mi_col + offsetc,
                            plane, block + i * step, tx_size - 1,
                            plane_bsize, arg);
    }
@ -485,8 +490,8 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
        for (idy = 0; idy < mi_height; idy += bh) {
          for (idx = 0; idx < mi_width; idx += bh) {
-            decode_reconstruct_tx(mi_row + idy / 2, mi_col + idx / 2,
+            decode_reconstruct_tx(idy, idx, plane, block,
-                                  plane, block, max_txsize_lookup[plane_bsize],
+                                  max_txsize_lookup[plane_bsize],
                                  plane_bsize, &arg);
            block += step;
          }
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@ -4084,9 +4084,7 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td,
    vp9_encode_sb(x, MAX(bsize, BLOCK_8X8));
-    vp9_tokenize_sb_inter(cpi, td, t, !output_enabled, mi_row, mi_col,
+    vp9_tokenize_sb_inter(cpi, td, t, !output_enabled, MAX(bsize, BLOCK_8X8));
                          MAX(bsize, BLOCK_8X8));
 //    vp9_tokenize_sb(cpi, td, t, !output_enabled, MAX(bsize, BLOCK_8X8));
  }
  if (output_enabled) {
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@ -530,6 +530,94 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
  }
 }
 void vp9_xform_quant_inter(MACROBLOCK *x, int plane, int block,
                           int blk_row, int blk_col,
                           BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  uint16_t *const eob = &p->eobs[block];
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  const int16_t *src_diff;
  src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
 #if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     switch (tx_size) {
      case TX_32X32:
        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
        vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
                                    p->round, p->quant, p->quant_shift, qcoeff,
                                    dqcoeff, pd->dequant, eob,
                                    scan_order->scan, scan_order->iscan);
        break;
      case TX_16X16:
        vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
        vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                              p->quant, p->quant_shift, qcoeff, dqcoeff,
                              pd->dequant, eob,
                              scan_order->scan, scan_order->iscan);
        break;
      case TX_8X8:
        vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
        vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
                              p->quant, p->quant_shift, qcoeff, dqcoeff,
                              pd->dequant, eob,
                              scan_order->scan, scan_order->iscan);
        break;
      case TX_4X4:
        x->fwd_txm4x4(src_diff, coeff, diff_stride);
        vp9_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                              p->quant, p->quant_shift, qcoeff, dqcoeff,
                              pd->dequant, eob,
                              scan_order->scan, scan_order->iscan);
        break;
      default:
        assert(0);
    }
    return;
  }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  switch (tx_size) {
    case TX_32X32:
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
      vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
                           p->quant, p->quant_shift, qcoeff, dqcoeff,
                           pd->dequant, eob, scan_order->scan,
                           scan_order->iscan);
      break;
    case TX_16X16:
      vp9_fdct16x16(src_diff, coeff, diff_stride);
      vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                     p->quant, p->quant_shift, qcoeff, dqcoeff,
                     pd->dequant, eob,
                     scan_order->scan, scan_order->iscan);
      break;
    case TX_8X8:
      vp9_fdct8x8(src_diff, coeff, diff_stride);
      vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
                     p->quant, p->quant_shift, qcoeff, dqcoeff,
                     pd->dequant, eob,
                     scan_order->scan, scan_order->iscan);
      break;
    case TX_4X4:
      x->fwd_txm4x4(src_diff, coeff, diff_stride);
      vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                     p->quant, p->quant_shift, qcoeff, dqcoeff,
                     pd->dequant, eob,
                     scan_order->scan, scan_order->iscan);
      break;
    default:
      assert(0);
      break;
  }
 }
 void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
@ -619,7 +707,8 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
  }
 }
-static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
+static void encode_block_b(int blk_row, int blk_col, int plane,
                           int block, BLOCK_SIZE plane_bsize,
                           TX_SIZE tx_size, void *arg) {
  struct encode_b_args *const args = arg;
  MACROBLOCK *const x = args->x;
@ -628,55 +717,24 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
  struct macroblock_plane *const p = &x->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  int i, j;
  uint8_t *dst;
  ENTROPY_CONTEXT *a, *l;
-  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+  dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
-  dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
+  a = &ctx->ta[plane][blk_col];
-  a = &ctx->ta[plane][i];
+  l = &ctx->tl[plane][blk_row];
  l = &ctx->tl[plane][j];
  // TODO(jingning): per transformed block zero forcing only enabled for
  // luma component. will integrate chroma components as well.
-  if (x->zcoeff_blk[tx_size][block] && plane == 0) {
+//  if (x->zcoeff_blk[tx_size][block] && plane == 0) {
-    p->eobs[block] = 0;
+//    p->eobs[block] = 0;
-    *a = *l = 0;
+//    *a = *l = 0;
-    return;
+//    return;
-  }
+//  }
-  if (!x->skip_recode) {
+  vp9_xform_quant_inter(x, plane, block, blk_row, blk_col,
-    if (x->quant_fp) {
+                        plane_bsize, tx_size);
      // Encoding process for rtc mode
      if (x->skip_txfm[0] == 1 && plane == 0) {
        // skip forward transform
        p->eobs[block] = 0;
        *a = *l = 0;
        return;
      } else {
        vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
      }
    } else {
      if (max_txsize_lookup[plane_bsize] == tx_size) {
        int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
        if (x->skip_txfm[txfm_blk_index] == 0) {
          // full forward transform and quantization
          vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
        } else if (x->skip_txfm[txfm_blk_index]== 2) {
          // fast path forward transform and quantization
          vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
        } else {
          // skip forward transform
          p->eobs[block] = 0;
          *a = *l = 0;
          return;
        }
      } else {
        vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
      }
    }
  }
-  if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
+  if (x->optimize) {
    const int ctx = combine_entropy_contexts(*a, *l);
    *a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0;
  } else {
@ -739,6 +797,44 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
  }
 }
 static void encode_block_inter(int blk_row, int blk_col,
                               int plane, int block, BLOCK_SIZE plane_bsize,
                               TX_SIZE tx_size, void *arg) {
  struct encode_b_args *const args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
  struct macroblockd_plane *const pd = &xd->plane[plane];
  TX_SIZE plane_tx_size = plane ?
      get_uv_tx_size_impl(mbmi->tx_size, plane_bsize,
                          0, 0) : mbmi->tx_size;
  int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
  int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
  if (xd->mb_to_bottom_edge < 0)
    max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
  if (xd->mb_to_right_edge < 0)
    max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
    return;
  if (tx_size == plane_tx_size) {
    encode_block_b(blk_row, blk_col, plane, block, plane_bsize, tx_size, arg);
  } else {
    BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
    int bh = num_4x4_blocks_high_lookup[bsize];
    int step = 1 << (2 *(tx_size - 1));
    int i;
    for (i = 0; i < 4; ++i) {
      int offsetr = (i >> 1) * bh / 2;
      int offsetc = (i & 0x01) * bh / 2;
      encode_block_inter(blk_row + offsetr, blk_col + offsetc,
                         plane, block + i * step, plane_bsize,
                         tx_size - 1, arg);
    }
  }
 }
 static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
                               TX_SIZE tx_size, void *arg) {
  MACROBLOCK *const x = (MACROBLOCK *)arg;
@ -783,18 +879,32 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
    return;
  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-    if (!x->skip_recode)
+    const struct macroblockd_plane *const pd = &xd->plane[plane];
    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
    const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
    const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
    BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
    int bh = num_4x4_blocks_wide_lookup[txb_size];
    int idx, idy;
    int block = 0;
    int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
    vp9_subtract_plane(x, bsize, plane);
-    if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
+    if (x->optimize) {
      const struct macroblockd_plane* const pd = &xd->plane[plane];
      const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
      vp9_get_entropy_contexts(bsize, tx_size, pd,
                               ctx.ta[plane], ctx.tl[plane]);
    }
-    vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
+    for (idy = 0; idy < mi_height; idy += bh) {
-                                           &arg);
+      for (idx = 0; idx < mi_width; idx += bh) {
        encode_block_inter(idy, idx, plane, block, plane_bsize,
                           max_txsize_lookup[plane_bsize], &arg);
        block += step;
      }
    }
  }
 }
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@ -29,6 +29,9 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
 void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
 void vp9_xform_quant_inter(MACROBLOCK *x, int plane, int block,
                           int blk_row, int blk_col,
                           BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
 void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@ -188,4 +188,5 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
    lf->filter_level = search_filter_level(sd, cpi,
                                           method == LPF_PICK_FROM_SUBIMAGE);
  }
  lf->filter_level = 0;
 }
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@ -442,6 +442,20 @@ struct tokenize_b_args {
  TOKENEXTRA **tp;
 };
 static void set_entropy_context_b_inter(int plane, int block,
                                        BLOCK_SIZE plane_bsize,
                                        int blk_row, int blk_col,
                                        TX_SIZE tx_size, void *arg) {
  struct tokenize_b_args* const args = arg;
  ThreadData *const td = args->td;
  MACROBLOCK *const x = &td->mb;
  MACROBLOCKD *const xd = &x->e_mbd;
  struct macroblock_plane *p = &x->plane[plane];
  struct macroblockd_plane *pd = &xd->plane[plane];
  vp9_set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0,
                   blk_col, blk_row);
 }
 static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize,
                                  TX_SIZE tx_size, void *arg) {
  struct tokenize_b_args* const args = arg;
@ -486,6 +500,85 @@ static INLINE int get_tx_eob(const struct segmentation *seg, int segment_id,
  return vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
 }
 static void tokenize_b_inter(int plane, int block, BLOCK_SIZE plane_bsize,
                             int blk_row, int blk_col,
                             TX_SIZE tx_size, void *arg) {
  struct tokenize_b_args* const args = arg;
  VP9_COMP *cpi = args->cpi;
  ThreadData *const td = args->td;
  MACROBLOCK *const x = &td->mb;
  MACROBLOCKD *const xd = &x->e_mbd;
  TOKENEXTRA **tp = args->tp;
  uint8_t token_cache[32 * 32];
  struct macroblock_plane *p = &x->plane[plane];
  struct macroblockd_plane *pd = &xd->plane[plane];
  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
  int pt; /* near block/prev token context index */
  int c;
  TOKENEXTRA *t = *tp;        /* store tokens starting here */
  int eob = p->eobs[block];
  const PLANE_TYPE type = pd->plane_type;
  const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  const int segment_id = mbmi->segment_id;
  const int16_t *scan, *nb;
  const scan_order *so;
  const int ref = is_inter_block(mbmi);
  unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
      td->rd_counts.coef_counts[tx_size][type][ref];
  vp9_prob (*const coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
      cpi->common.fc->coef_probs[tx_size][type][ref];
  unsigned int (*const eob_branch)[COEFF_CONTEXTS] =
      td->counts->eob_branch[tx_size][type][ref];
  const uint8_t *const band = get_band_translate(tx_size);
  const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
  int16_t token;
  EXTRABIT extra;
  pt = get_entropy_context(tx_size, pd->above_context + blk_col,
                           pd->left_context + blk_row);
  so = get_scan(xd, tx_size, type, block);
  scan = so->scan;
  nb = so->neighbors;
  c = 0;
  while (c < eob) {
    int v = 0;
    int skip_eob = 0;
    v = qcoeff[scan[c]];
    while (!v) {
      add_token_no_extra(&t, coef_probs[band[c]][pt], ZERO_TOKEN, skip_eob,
                         counts[band[c]][pt]);
      eob_branch[band[c]][pt] += !skip_eob;
      skip_eob = 1;
      token_cache[scan[c]] = 0;
      ++c;
      pt = get_coef_context(nb, token_cache, c);
      v = qcoeff[scan[c]];
    }
    vp9_get_token_extra(v, &token, &extra);
    add_token(&t, coef_probs[band[c]][pt], extra, (uint8_t)token,
              (uint8_t)skip_eob, counts[band[c]][pt]);
    eob_branch[band[c]][pt] += !skip_eob;
    token_cache[scan[c]] = vp9_pt_energy_class[token];
    ++c;
    pt = get_coef_context(nb, token_cache, c);
  }
  if (c < seg_eob) {
    add_token_no_extra(&t, coef_probs[band[c]][pt], EOB_TOKEN, 0,
                       counts[band[c]][pt]);
    ++eob_branch[band[c]][pt];
  }
  *tp = t;
  vp9_set_contexts(xd, pd, plane_bsize, tx_size, c > 0, blk_col, blk_row);
 }
 static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
                       TX_SIZE tx_size, void *arg) {
  struct tokenize_b_args* const args = arg;
@ -609,47 +702,55 @@ int vp9_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
 void tokenize_tx(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
                 int dry_run, TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
-                 int mi_row, int mi_col, int block, int plane,
+                 int blk_row, int blk_col, int block, int plane,
                 void *arg) {
  VP9_COMMON *cm = &cpi->common;
  MACROBLOCK *const x = &td->mb;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  TX_SIZE plane_tx_size = plane ?
      get_uv_tx_size_impl(mbmi->tx_size, plane_bsize,
                          0, 0) : mbmi->tx_size;
-  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+  int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
  int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
  if (xd->mb_to_bottom_edge < 0)
    max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
  if (xd->mb_to_right_edge < 0)
    max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
    return;
  if (tx_size == plane_tx_size) {
    if (!dry_run)
-      tokenize_b(plane, block, plane_bsize, tx_size, arg);
+      tokenize_b_inter(plane, block, plane_bsize,
                       blk_row, blk_col, tx_size, arg);
    else
-      set_entropy_context_b(plane, block, plane_bsize, tx_size, arg);
+      set_entropy_context_b_inter(plane, block, plane_bsize,
                                  blk_row, blk_col, tx_size, arg);
  } else {
    BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
-    int bh = num_8x8_blocks_high_lookup[bsize];
+    int bh = num_4x4_blocks_wide_lookup[bsize];
    int max_blocks_high = cm->mi_rows;
    int max_blocks_wide = cm->mi_cols;
    int i;
    assert(num_4x4_blocks_high_lookup[bsize] ==
           num_4x4_blocks_wide_lookup[bsize]);
    for (i = 0; i < 4; ++i) {
      int offsetr = (i >> 1) * bh / 2;
      int offsetc = (i & 0x01) * bh / 2;
      int step = 1 << (2 *(tx_size - 1));
      if ((mi_row + offsetr < max_blocks_high) &&
          (mi_col + offsetc < max_blocks_wide))
      tokenize_tx(cpi, td, t, dry_run, tx_size - 1, plane_bsize,
-                    mi_row + offsetr, mi_col + offsetc,
+                  blk_row + offsetr, blk_col + offsetc,
                  block + i * step, plane, arg);
    }
  }
 }
 void vp9_tokenize_sb_inter(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
-                           int dry_run, int mi_row, int mi_col,
+                           int dry_run, BLOCK_SIZE bsize) {
                           BLOCK_SIZE bsize) {
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCK *const x = &td->mb;
  MACROBLOCKD *const xd = &x->e_mbd;
@ -690,8 +791,7 @@ void vp9_tokenize_sb_inter(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
    for (idy = 0; idy < mi_height; idy += bh) {
      for (idx = 0; idx < mi_width; idx += bh) {
        tokenize_tx(cpi, td, t, dry_run, max_txsize_lookup[plane_bsize],
-                    plane_bsize, mi_row + idy / 2, mi_col + idx / 2,
+                    plane_bsize, idy, idx, block, plane, &arg);
                    block, plane, &arg);
        block += step;
      }
    }
--- a/vp9/encoder/vp9_tokenize.h
+++ b/vp9/encoder/vp9_tokenize.h
@ -52,8 +52,7 @@ struct VP9_COMP;
 struct ThreadData;
 void vp9_tokenize_sb_inter(struct VP9_COMP *cpi, struct ThreadData *td,
-                           TOKENEXTRA **t, int dry_run,
+                           TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize);
                           int mi_row, int mi_col, BLOCK_SIZE bsize);
 void vp9_tokenize_sb(struct VP9_COMP *cpi, struct ThreadData *td,
                     TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize);