Move dst to per-plane MACROBLOCKD data

First in a series of commits moving the framebuffers pointers to per-plane data, so that they can be indexed numerically rather than by name. Change-Id: I6e0d60fd4d51e6375c384eb7321776564df21775
2013-04-19 15:52:17 -07:00 · 2013-04-19 15:52:17 -07:00 · d12376aa2c
commit d12376aa2c
parent 9ec0f658a1
14 changed files with 238 additions and 175 deletions
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@ -338,6 +338,11 @@ struct scale_factors {

 enum { MAX_MB_PLANE = 3 };

+struct buf_2d {
+  uint8_t *buf;
+  int stride;
+};
+
 struct mb_plane {
  DECLARE_ALIGNED(16, int16_t,  qcoeff[64 * 64]);
  DECLARE_ALIGNED(16, int16_t,  dqcoeff[64 * 64]);
@ -346,6 +351,8 @@ struct mb_plane {
  PLANE_TYPE plane_type;
  int subsampling_x;
  int subsampling_y;
+  struct buf_2d dst;
+  struct buf_2d pre[2];
 };

 #define BLOCK_OFFSET(x, i, n) ((x) + (i) * (n))
@ -366,7 +373,6 @@ typedef struct macroblockd {

  YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
  YV12_BUFFER_CONFIG second_pre;
-  YV12_BUFFER_CONFIG dst;
  struct scale_factors scale_factor[2];
  struct scale_factors scale_factor_uv[2];

--- a/vp9/common/vp9_mbpitch.c
+++ b/vp9/common/vp9_mbpitch.c
@ -36,9 +36,9 @@ static void setup_macroblock(MACROBLOCKD *mb, BLOCKSET bs) {
  int i, stride;

  if (bs == DEST) {
-    y = &mb->dst.y_buffer;
-    u = &mb->dst.u_buffer;
-    v = &mb->dst.v_buffer;
+    y = &mb->plane[0].dst.buf;
+    u = &mb->plane[1].dst.buf;
+    v = &mb->plane[2].dst.buf;

    y2 = NULL;
    u2 = NULL;
@ -54,14 +54,14 @@ static void setup_macroblock(MACROBLOCKD *mb, BLOCKSET bs) {
  }

  // luma
-  stride = mb->dst.y_stride;
+  stride = mb->plane[0].dst.stride;
  for (i = 0; i < 16; ++i) {
    const int offset = (i >> 2) * 4 * stride + (i & 3) * 4;
    setup_block(&blockd[i], y, y2, stride, offset, bs);
  }

  // chroma
-  stride = mb->dst.uv_stride;
+  stride = mb->plane[1].dst.stride;
  for (i = 16; i < 20; i++) {
    const int offset = ((i - 16) >> 1) * 4 * stride + (i & 1) * 4;
    setup_block(&blockd[i],     u, u2, stride, offset, bs);
--- a/vp9/common/vp9_recon.c
+++ b/vp9/common/vp9_recon.c
@ -53,8 +53,8 @@ void vp9_recon2b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr,
 void vp9_recon_sby_c(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
  const int bw = 16 << mb_width_log2(bsize), bh = 16 << mb_height_log2(bsize);
  int x, y;
-  const int stride = mb->dst.y_stride;
-  uint8_t *dst = mb->dst.y_buffer;
+  const int stride = mb->plane[0].dst.stride;
+  uint8_t *dst = mb->plane[0].dst.buf;
  const int16_t *diff = mb->plane[0].diff;

  for (y = 0; y < bh; y++) {
@ -70,9 +70,9 @@ void vp9_recon_sbuv_c(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
  const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize);
  const int bw = 8 << bwl, bh = 8 << bhl;
  int x, y;
-  const int stride =  mb->dst.uv_stride;
-  uint8_t *u_dst = mb->dst.u_buffer;
-  uint8_t *v_dst = mb->dst.v_buffer;
+  const int stride =  mb->plane[1].dst.stride;
+  uint8_t *u_dst = mb->plane[1].dst.buf;
+  uint8_t *v_dst = mb->plane[2].dst.buf;
  const int16_t *u_diff = mb->plane[1].diff;
  const int16_t *v_diff = mb->plane[2].diff;

--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@ -614,11 +614,11 @@ void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd,
 void vp9_build_inter_predictors_sb(MACROBLOCKD *xd,
                                   int mb_row, int mb_col,
                                   BLOCK_SIZE_TYPE bsize) {
-  uint8_t *const y = xd->dst.y_buffer;
-  uint8_t *const u = xd->dst.u_buffer;
-  uint8_t *const v = xd->dst.v_buffer;
-  const int y_stride = xd->dst.y_stride;
-  const int uv_stride = xd->dst.uv_stride;
+  uint8_t *const y = xd->plane[0].dst.buf;
+  uint8_t *const u = xd->plane[1].dst.buf;
+  uint8_t *const v = xd->plane[2].dst.buf;
+  const int y_stride = xd->plane[0].dst.stride;
+  const int uv_stride = xd->plane[1].dst.stride;

  vp9_build_inter_predictors_sby(xd, y, y_stride, mb_row, mb_col, bsize);
  vp9_build_inter_predictors_sbuv(xd, u, v, uv_stride, mb_row, mb_col, bsize);
@ -670,8 +670,8 @@ static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) {
 static int64_t get_consistency_metric(MACROBLOCKD *xd,
                                      uint8_t *tmp_y, int tmp_ystride) {
  int block_size = 16 <<  xd->mode_info_context->mbmi.sb_type;
-  uint8_t *rec_y = xd->dst.y_buffer;
-  int rec_ystride = xd->dst.y_stride;
+  uint8_t *rec_y = xd->plane[0].dst.buf;
+  int rec_ystride = xd->plane[0].dst.stride;
  int64_t metric = 0;
  int i;
  if (xd->up_available) {
@ -1182,11 +1182,11 @@ void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd,
 void vp9_build_inter_predictors_sb(MACROBLOCKD *mb,
                                   int mb_row, int mb_col,
                                   BLOCK_SIZE_TYPE bsize) {
-  uint8_t *const y = mb->dst.y_buffer;
-  uint8_t *const u = mb->dst.u_buffer;
-  uint8_t *const v = mb->dst.v_buffer;
-  const int y_stride = mb->dst.y_stride;
-  const int uv_stride = mb->dst.uv_stride;
+  uint8_t *const y = mb->plane[0].dst.buf;
+  uint8_t *const u = mb->plane[1].dst.buf;
+  uint8_t *const v = mb->plane[2].dst.buf;
+  const int y_stride = mb->plane[0].dst.stride;
+  const int uv_stride = mb->plane[1].dst.stride;

  vp9_build_inter_predictors_sby(mb, y, y_stride, mb_row, mb_col, bsize);
  vp9_build_inter_predictors_sbuv(mb, u, v, uv_stride, mb_row, mb_col, bsize);
@ -1233,9 +1233,9 @@ void vp9_build_inter_predictors_mb(MACROBLOCKD *xd,
 /*encoder only*/
 void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd,
                                        int mb_row, int mb_col) {
-  uint8_t *const u = xd->dst.u_buffer;
-  uint8_t *const v = xd->dst.v_buffer;
-  const int uv_stride = xd->dst.uv_stride;
+  uint8_t *const u = xd->plane[1].dst.buf;
+  uint8_t *const v = xd->plane[2].dst.buf;
+  const int uv_stride = xd->plane[1].dst.stride;

  vp9_build_inter_predictors_sbuv(xd, u, v, uv_stride, mb_row, mb_col,
                                  BLOCK_SIZE_MB16X16);
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@ -83,8 +83,39 @@ static int scaled_buffer_offset(int x_offset,
                                int y_offset,
                                int stride,
                                const struct scale_factors *scale) {
-  return scale->scale_value_y(y_offset, scale) * stride +
-      scale->scale_value_x(x_offset, scale);
+  if (scale)
+    return scale->scale_value_y(y_offset, scale) * stride +
+        scale->scale_value_x(x_offset, scale);
+  return y_offset * stride + x_offset;
+}
+
+static void setup_pred_plane(struct buf_2d *dst,
+                             uint8_t *src, int stride,
+                             int mb_row, int mb_col,
+                             const struct scale_factors *scale,
+                             int subsampling_x, int subsampling_y) {
+  const int x = (16 * mb_col) >> subsampling_x;
+  const int y = (16 * mb_row) >> subsampling_y;
+  dst->buf = src + scaled_buffer_offset(x, y, stride, scale);
+  dst->stride = stride;
+}
+
+// TODO(jkoleszar): audit all uses of this that don't set mb_row, mb_col
+static void setup_dst_planes(MACROBLOCKD *xd,
+                             const YV12_BUFFER_CONFIG *src,
+                             int mb_row, int mb_col) {
+  setup_pred_plane(&xd->plane[0].dst,
+                   src->y_buffer, src->y_stride,
+                   mb_row, mb_col, NULL,
+                   xd->plane[0].subsampling_x, xd->plane[0].subsampling_y);
+  setup_pred_plane(&xd->plane[1].dst,
+                   src->u_buffer, src->uv_stride,
+                   mb_row, mb_col, NULL,
+                   xd->plane[1].subsampling_x, xd->plane[1].subsampling_y);
+  setup_pred_plane(&xd->plane[2].dst,
+                   src->v_buffer, src->uv_stride,
+                   mb_row, mb_col, NULL,
+                   xd->plane[2].subsampling_x, xd->plane[2].subsampling_y);
 }

 static void setup_pred_block(YV12_BUFFER_CONFIG *dst,
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@ -609,7 +609,7 @@ void vp9_build_interintra_16x16_predictors_mby(MACROBLOCKD *xd,
                                               int ystride) {
  uint8_t intrapredictor[256];
  vp9_build_intra_predictors(
-      xd->dst.y_buffer, xd->dst.y_stride,
+      xd->plane[0].dst.buf, xd->plane[0].dst.stride,
      intrapredictor, 16,
      xd->mode_info_context->mbmi.interintra_mode, 16, 16,
      xd->up_available, xd->left_available, xd->right_available);
@ -624,12 +624,12 @@ void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd,
  uint8_t uintrapredictor[64];
  uint8_t vintrapredictor[64];
  vp9_build_intra_predictors(
-      xd->dst.u_buffer, xd->dst.uv_stride,
+      xd->plane[1].dst.buf, xd->plane[1].dst.stride,
      uintrapredictor, 8,
      xd->mode_info_context->mbmi.interintra_uv_mode, 8, 8,
      xd->up_available, xd->left_available, xd->right_available);
  vp9_build_intra_predictors(
-      xd->dst.v_buffer, xd->dst.uv_stride,
+      xd->plane[2].dst.buf, xd->plane[1].dst.stride,
      vintrapredictor, 8,
      xd->mode_info_context->mbmi.interintra_uv_mode, 8, 8,
      xd->up_available, xd->left_available, xd->right_available);
@ -644,7 +644,7 @@ void vp9_build_interintra_32x32_predictors_sby(MACROBLOCKD *xd,
                                               int ystride) {
  uint8_t intrapredictor[1024];
  vp9_build_intra_predictors(
-      xd->dst.y_buffer, xd->dst.y_stride,
+      xd->plane[0].dst.buf, xd->plane[0].dst.stride,
      intrapredictor, 32,
      xd->mode_info_context->mbmi.interintra_mode, 32, 32,
      xd->up_available, xd->left_available, xd->right_available);
@ -659,12 +659,12 @@ void vp9_build_interintra_32x32_predictors_sbuv(MACROBLOCKD *xd,
  uint8_t uintrapredictor[256];
  uint8_t vintrapredictor[256];
  vp9_build_intra_predictors(
-      xd->dst.u_buffer, xd->dst.uv_stride,
+      xd->plane[1].dst.buf, xd->plane[1].dst.stride,
      uintrapredictor, 16,
      xd->mode_info_context->mbmi.interintra_uv_mode, 16, 16,
      xd->up_available, xd->left_available, xd->right_available);
  vp9_build_intra_predictors(
-      xd->dst.v_buffer, xd->dst.uv_stride,
+      xd->plane[2].dst.buf, xd->plane[1].dst.stride,
      vintrapredictor, 16,
      xd->mode_info_context->mbmi.interintra_uv_mode, 16, 16,
      xd->up_available, xd->left_available, xd->right_available);
@ -689,7 +689,7 @@ void vp9_build_interintra_64x64_predictors_sby(MACROBLOCKD *xd,
                                               int ystride) {
  uint8_t intrapredictor[4096];
  const int mode = xd->mode_info_context->mbmi.interintra_mode;
-  vp9_build_intra_predictors(xd->dst.y_buffer, xd->dst.y_stride,
+  vp9_build_intra_predictors(xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                             intrapredictor, 64, mode, 64, 64,
                             xd->up_available, xd->left_available,
                             xd->right_available);
@ -704,11 +704,11 @@ void vp9_build_interintra_64x64_predictors_sbuv(MACROBLOCKD *xd,
  uint8_t uintrapredictor[1024];
  uint8_t vintrapredictor[1024];
  const int mode = xd->mode_info_context->mbmi.interintra_uv_mode;
-  vp9_build_intra_predictors(xd->dst.u_buffer, xd->dst.uv_stride,
+  vp9_build_intra_predictors(xd->plane[1].dst.buf, xd->plane[1].dst.stride,
                             uintrapredictor, 32, mode, 32, 32,
                             xd->up_available, xd->left_available,
                             xd->right_available);
-  vp9_build_intra_predictors(xd->dst.v_buffer, xd->dst.uv_stride,
+  vp9_build_intra_predictors(xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                             vintrapredictor, 32, mode, 32, 32,
                             xd->up_available, xd->left_available,
                             xd->right_available);
@ -734,8 +734,8 @@ void vp9_build_intra_predictors_sby_s(MACROBLOCKD *xd,
  const int bwl = b_width_log2(bsize),  bw = 4 << bwl;
  const int bhl = b_height_log2(bsize), bh = 4 << bhl;

-  vp9_build_intra_predictors(xd->dst.y_buffer, xd->dst.y_stride,
-                             xd->dst.y_buffer, xd->dst.y_stride,
+  vp9_build_intra_predictors(xd->plane[0].dst.buf, xd->plane[0].dst.stride,
+                             xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                             xd->mode_info_context->mbmi.mode,
                             bw, bh,
                             xd->up_available, xd->left_available,
@ -747,13 +747,13 @@ void vp9_build_intra_predictors_sbuv_s(MACROBLOCKD *xd,
  const int bwl = b_width_log2(bsize)  - 1, bw = 4 << bwl;
  const int bhl = b_height_log2(bsize) - 1, bh = 4 << bhl;

-  vp9_build_intra_predictors(xd->dst.u_buffer, xd->dst.uv_stride,
-                             xd->dst.u_buffer, xd->dst.uv_stride,
+  vp9_build_intra_predictors(xd->plane[1].dst.buf, xd->plane[1].dst.stride,
+                             xd->plane[1].dst.buf, xd->plane[1].dst.stride,
                             xd->mode_info_context->mbmi.uv_mode,
                             bw, bh, xd->up_available,
                             xd->left_available, xd->right_available);
-  vp9_build_intra_predictors(xd->dst.v_buffer, xd->dst.uv_stride,
-                             xd->dst.v_buffer, xd->dst.uv_stride,
+  vp9_build_intra_predictors(xd->plane[2].dst.buf, xd->plane[1].dst.stride,
+                             xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                             xd->mode_info_context->mbmi.uv_mode,
                             bw, bh, xd->up_available,
                             xd->left_available, xd->right_available);
--- a/vp9/common/x86/vp9_recon_wrapper_sse2.c
+++ b/vp9/common/x86/vp9_recon_wrapper_sse2.c
@ -35,7 +35,7 @@ static void build_intra_predictors_mbuv_x86(MACROBLOCKD *xd,
                                            build_intra_pred_mbuv_fn_t ho_fn) {
  int mode = xd->mode_info_context->mbmi.uv_mode;
  build_intra_pred_mbuv_fn_t fn;
-  int src_stride = xd->dst.uv_stride;
+  int src_stride = xd->plane[1].dst.stride;

  switch (mode) {
    case  V_PRED:
@ -68,34 +68,34 @@ static void build_intra_predictors_mbuv_x86(MACROBLOCKD *xd,
      return;
  }

-  fn(dst_u, dst_stride, xd->dst.u_buffer, src_stride);
-  fn(dst_v, dst_stride, xd->dst.v_buffer, src_stride);
+  fn(dst_u, dst_stride, xd->plane[1].dst.buf, src_stride);
+  fn(dst_v, dst_stride, xd->plane[2].dst.buf, src_stride);
 }

 void vp9_build_intra_predictors_mbuv_sse2(MACROBLOCKD *xd) {
-  build_intra_predictors_mbuv_x86(xd, xd->dst.u_buffer,
-                                  xd->dst.v_buffer, xd->dst.uv_stride,
+  build_intra_predictors_mbuv_x86(xd, xd->plane[1].dst.buf,
+                                  xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                                  vp9_intra_pred_uv_tm_sse2,
                                  vp9_intra_pred_uv_ho_mmx2);
 }

 void vp9_build_intra_predictors_mbuv_ssse3(MACROBLOCKD *xd) {
-  build_intra_predictors_mbuv_x86(xd, xd->dst.u_buffer,
-                                  xd->dst.v_buffer, xd->dst.uv_stride,
+  build_intra_predictors_mbuv_x86(xd, xd->plane[1].dst.buf,
+                                  xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                                  vp9_intra_pred_uv_tm_ssse3,
                                  vp9_intra_pred_uv_ho_ssse3);
 }

 void vp9_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *xd) {
-  build_intra_predictors_mbuv_x86(xd, xd->dst.u_buffer,
-                                  xd->dst.v_buffer, xd->dst.uv_stride,
+  build_intra_predictors_mbuv_x86(xd, xd->plane[1].dst.buf,
+                                  xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                                  vp9_intra_pred_uv_tm_sse2,
                                  vp9_intra_pred_uv_ho_mmx2);
 }

 void vp9_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *xd) {
-  build_intra_predictors_mbuv_x86(xd, xd->dst.u_buffer,
-                                  xd->dst.v_buffer, xd->dst.uv_stride,
+  build_intra_predictors_mbuv_x86(xd, xd->plane[1].dst.buf,
+                                  xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                                  vp9_intra_pred_uv_tm_ssse3,
                                  vp9_intra_pred_uv_ho_ssse3);
 }
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@ -250,15 +250,15 @@ static void decode_16x16(MACROBLOCKD *xd) {
  const TX_TYPE tx_type = get_tx_type_16x16(xd, 0);

  vp9_dequant_iht_add_16x16_c(tx_type, xd->plane[0].qcoeff,
-                              xd->block[0].dequant, xd->dst.y_buffer,
-                              xd->dst.y_stride, xd->plane[0].eobs[0]);
+                              xd->block[0].dequant, xd->plane[0].dst.buf,
+                              xd->plane[0].dst.stride, xd->plane[0].eobs[0]);

  vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant,
-                           xd->dst.u_buffer, xd->dst.uv_stride,
+                           xd->plane[1].dst.buf, xd->plane[1].dst.stride,
                           xd->plane[1].eobs[0]);

  vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[20].dequant,
-                           xd->dst.v_buffer, xd->dst.uv_stride,
+                           xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                           xd->plane[2].eobs[0]);
 }

@ -275,7 +275,7 @@ static void decode_8x8(MACROBLOCKD *xd) {
      int16_t *q  = BLOCK_OFFSET(xd->plane[0].qcoeff, idx, 16);
      int16_t *dq = xd->block[0].dequant;
      uint8_t *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst;
-      int stride = xd->dst.y_stride;
+      int stride = xd->plane[0].dst.stride;
      if (mode == I8X8_PRED) {
        BLOCKD *b = &xd->block[ib];
        int i8x8mode = b->bmi.as_mode.first;
@ -287,8 +287,8 @@ static void decode_8x8(MACROBLOCKD *xd) {
    }
  } else {
    vp9_dequant_idct_add_y_block_8x8(xd->plane[0].qcoeff,
-                                     xd->block[0].dequant, xd->dst.y_buffer,
-                                     xd->dst.y_stride, xd);
+                                     xd->block[0].dequant, xd->plane[0].dst.buf,
+                                     xd->plane[0].dst.stride, xd);
  }

  // chroma
@ -315,16 +315,16 @@ static void decode_8x8(MACROBLOCKD *xd) {
    }
  } else if (mode == SPLITMV) {
    xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
-         xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs);
+         xd->plane[1].dst.buf, xd->plane[1].dst.stride, xd->plane[1].eobs);
    xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
-         xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs);
+         xd->plane[2].dst.buf, xd->plane[1].dst.stride, xd->plane[2].eobs);
  } else {
    vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant,
-                             xd->dst.u_buffer, xd->dst.uv_stride,
+                             xd->plane[1].dst.buf, xd->plane[1].dst.stride,
                             xd->plane[1].eobs[0]);

    vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[16].dequant,
-                             xd->dst.v_buffer, xd->dst.uv_stride,
+                             xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                             xd->plane[2].eobs[0]);
  }
 }
@ -396,27 +396,27 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_reader *r) {
 #endif
    vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16);
    xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
-         xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs);
+         xd->plane[1].dst.buf, xd->plane[1].dst.stride, xd->plane[1].eobs);
    xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
-         xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs);
+         xd->plane[2].dst.buf, xd->plane[1].dst.stride, xd->plane[2].eobs);
  } else if (mode == SPLITMV || get_tx_type_4x4(xd, 0) == DCT_DCT) {
    xd->itxm_add_y_block(xd->plane[0].qcoeff,
                          xd->block[0].dequant,
-                          xd->dst.y_buffer, xd->dst.y_stride, xd);
+                          xd->plane[0].dst.buf, xd->plane[0].dst.stride, xd);
    xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
-         xd->dst.u_buffer, xd->dst.uv_stride, xd->plane[1].eobs);
+         xd->plane[1].dst.buf, xd->plane[1].dst.stride, xd->plane[1].eobs);
    xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
-         xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs);
+         xd->plane[2].dst.buf, xd->plane[1].dst.stride, xd->plane[2].eobs);
  } else {
    for (i = 0; i < 16; i++) {
      tx_type = get_tx_type_4x4(xd, i);
      dequant_add_y(xd, tx_type, i);
    }
    xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
-                          xd->dst.u_buffer, xd->dst.uv_stride,
+                          xd->plane[1].dst.buf, xd->plane[1].dst.stride,
                          xd->plane[1].eobs);
    xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
-                          xd->dst.v_buffer, xd->dst.uv_stride,
+                          xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                          xd->plane[2].eobs);
  }
 }
@ -430,10 +430,11 @@ static INLINE void decode_sby_32x32(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
  for (n = 0; n < y_count; n++) {
    const int x_idx = n & (bw - 1);
    const int y_idx = n >> bwl;
-    const int y_offset = (y_idx * 32) * mb->dst.y_stride + (x_idx * 32);
+    const int y_offset = (y_idx * 32) * mb->plane[0].dst.stride + (x_idx * 32);
    vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[0].qcoeff, n, 1024),
                               mb->block[0].dequant ,
-                               mb->dst.y_buffer + y_offset, mb->dst.y_stride,
+                               mb->plane[0].dst.buf + y_offset,
+                               mb->plane[0].dst.stride,
                               mb->plane[0].eobs[n * 64]);
  }
 }
@ -446,15 +447,18 @@ static INLINE void decode_sbuv_32x32(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
  for (n = 0; n < uv_count; n++) {
     const int x_idx = n & (bw - 1);
     const int y_idx = n >> (bwl - 1);
-     const int uv_offset = (y_idx * 32) * mb->dst.uv_stride + (x_idx * 32);
+     const int uv_offset = (y_idx * 32) * mb->plane[1].dst.stride +
+         (x_idx * 32);
     vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[1].qcoeff, n, 1024),
                                mb->block[16].dequant,
-                                mb->dst.u_buffer + uv_offset,
-                                mb->dst.uv_stride, mb->plane[1].eobs[n * 64]);
+                                mb->plane[1].dst.buf + uv_offset,
+                                mb->plane[1].dst.stride,
+                                mb->plane[1].eobs[n * 64]);
     vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[2].qcoeff, n, 1024),
                                mb->block[20].dequant,
-                                mb->dst.v_buffer + uv_offset,
-                                mb->dst.uv_stride, mb->plane[2].eobs[n * 64]);
+                                mb->plane[2].dst.buf + uv_offset,
+                                mb->plane[1].dst.stride,
+                                mb->plane[2].eobs[n * 64]);
  }
 }

@ -467,14 +471,14 @@ static INLINE void decode_sby_16x16(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
  for (n = 0; n < y_count; n++) {
    const int x_idx = n & (bw - 1);
    const int y_idx = n >> bwl;
-    const int y_offset = (y_idx * 16) * mb->dst.y_stride + (x_idx * 16);
+    const int y_offset = (y_idx * 16) * mb->plane[0].dst.stride + (x_idx * 16);
    const TX_TYPE tx_type = get_tx_type_16x16(mb,
                                (y_idx * (4 * bw) + x_idx) * 4);
    vp9_dequant_iht_add_16x16_c(tx_type,
                                BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256),
                                mb->block[0].dequant,
-                                mb->dst.y_buffer + y_offset,
-                                mb->dst.y_stride,
+                                mb->plane[0].dst.buf + y_offset,
+                                mb->plane[0].dst.stride,
                                mb->plane[0].eobs[n * 16]);
  }
 }
@ -490,14 +494,16 @@ static INLINE void decode_sbuv_16x16(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
  for (n = 0; n < uv_count; n++) {
    const int x_idx = n & (bw - 1);
    const int y_idx = n >> (bwl - 1);
-    const int uv_offset = (y_idx * 16) * mb->dst.uv_stride + (x_idx * 16);
+    const int uv_offset = (y_idx * 16) * mb->plane[1].dst.stride + (x_idx * 16);
    vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[1].qcoeff, n, 256),
                               mb->block[16].dequant,
-                               mb->dst.u_buffer + uv_offset, mb->dst.uv_stride,
+                               mb->plane[1].dst.buf + uv_offset,
+                               mb->plane[1].dst.stride,
                               mb->plane[1].eobs[n * 16]);
    vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[2].qcoeff, n, 256),
                               mb->block[20].dequant,
-                               mb->dst.v_buffer + uv_offset, mb->dst.uv_stride,
+                               mb->plane[2].dst.buf + uv_offset,
+                               mb->plane[1].dst.stride,
                               mb->plane[2].eobs[n * 16]);
  }
 }
@ -512,14 +518,15 @@ static INLINE void decode_sby_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
  for (n = 0; n < y_count; n++) {
    const int x_idx = n & (bw - 1);
    const int y_idx = n >> bwl;
-    const int y_offset = (y_idx * 8) * xd->dst.y_stride + (x_idx * 8);
+    const int y_offset = (y_idx * 8) * xd->plane[0].dst.stride + (x_idx * 8);
    const TX_TYPE tx_type = get_tx_type_8x8(xd,
                                            (y_idx * (2 * bw) + x_idx) * 2);

    vp9_dequant_iht_add_8x8_c(tx_type,
                              BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64),
                              xd->block[0].dequant,
-                              xd->dst.y_buffer + y_offset, xd->dst.y_stride,
+                              xd->plane[0].dst.buf + y_offset,
+                              xd->plane[0].dst.stride,
                              xd->plane[0].eobs[n * 4]);
  }
 }
@ -534,14 +541,16 @@ static INLINE void decode_sbuv_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
  for (n = 0; n < uv_count; n++) {
    const int x_idx = n & (bw - 1);
    const int y_idx = n >> (bwl - 1);
-    const int uv_offset = (y_idx * 8) * xd->dst.uv_stride + (x_idx * 8);
+    const int uv_offset = (y_idx * 8) * xd->plane[1].dst.stride + (x_idx * 8);
    vp9_dequant_idct_add_8x8(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 64),
                             xd->block[16].dequant,
-                             xd->dst.u_buffer + uv_offset, xd->dst.uv_stride,
+                             xd->plane[1].dst.buf + uv_offset,
+                             xd->plane[1].dst.stride,
                             xd->plane[1].eobs[n * 4]);
    vp9_dequant_idct_add_8x8(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 64),
                             xd->block[20].dequant,
-                             xd->dst.v_buffer + uv_offset, xd->dst.uv_stride,
+                             xd->plane[2].dst.buf + uv_offset,
+                             xd->plane[1].dst.stride,
                             xd->plane[2].eobs[n * 4]);
  }
 }
@ -555,18 +564,19 @@ static INLINE void decode_sby_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
  for (n = 0; n < y_count; n++) {
    const int x_idx = n & (bw - 1);
    const int y_idx = n >> bwl;
-    const int y_offset = (y_idx * 4) * xd->dst.y_stride + (x_idx * 4);
+    const int y_offset = (y_idx * 4) * xd->plane[0].dst.stride + (x_idx * 4);
    const TX_TYPE tx_type = get_tx_type_4x4(xd, n);
    if (tx_type == DCT_DCT) {
      xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16),
                   xd->block[0].dequant,
-                   xd->dst.y_buffer + y_offset, xd->dst.y_stride,
+                   xd->plane[0].dst.buf + y_offset, xd->plane[0].dst.stride,
                   xd->plane[0].eobs[n]);
    } else {
      vp9_dequant_iht_add_c(tx_type,
                            BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16),
-                            xd->block[0].dequant, xd->dst.y_buffer + y_offset,
-                            xd->dst.y_stride, xd->plane[0].eobs[n]);
+                            xd->block[0].dequant,
+                            xd->plane[0].dst.buf + y_offset,
+                            xd->plane[0].dst.stride, xd->plane[0].eobs[n]);
    }
  }
 }
@ -580,13 +590,15 @@ static INLINE void decode_sbuv_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
  for (n = 0; n < uv_count; n++) {
    const int x_idx = n & (bw - 1);
    const int y_idx = n >> (bwl - 1);
-    const int uv_offset = (y_idx * 4) * xd->dst.uv_stride + (x_idx * 4);
+    const int uv_offset = (y_idx * 4) * xd->plane[1].dst.stride + (x_idx * 4);
    xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 16),
        xd->block[16].dequant,
-        xd->dst.u_buffer + uv_offset, xd->dst.uv_stride, xd->plane[1].eobs[n]);
+        xd->plane[1].dst.buf + uv_offset, xd->plane[1].dst.stride,
+        xd->plane[1].eobs[n]);
    xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 16),
        xd->block[20].dequant,
-        xd->dst.v_buffer + uv_offset, xd->dst.uv_stride, xd->plane[2].eobs[n]);
+        xd->plane[2].dst.buf + uv_offset, xd->plane[1].dst.stride,
+        xd->plane[2].eobs[n]);
  }
 }

@ -758,21 +770,21 @@ static void decode_mb(VP9D_COMP *pbi, MACROBLOCKD *xd,
    printf("final y\n");
    for (i = 0; i < 16; i++) {
      for (j = 0; j < 16; j++)
-        printf("%3d ", xd->dst.y_buffer[i * xd->dst.y_stride + j]);
+        printf("%3d ", xd->plane[0].dst.buf[i * xd->plane[0].dst.stride + j]);
      printf("\n");
    }
    printf("\n");
    printf("final u\n");
    for (i = 0; i < 8; i++) {
      for (j = 0; j < 8; j++)
-        printf("%3d ", xd->dst.u_buffer[i * xd->dst.uv_stride + j]);
+        printf("%3d ", xd->plane[1].dst.buf[i * xd->plane[1].dst.stride + j]);
      printf("\n");
    }
    printf("\n");
    printf("final v\n");
    for (i = 0; i < 8; i++) {
      for (j = 0; j < 8; j++)
-        printf("%3d ", xd->dst.v_buffer[i * xd->dst.uv_stride + j]);
+        printf("%3d ", xd->plane[2].dst.buf[i * xd->plane[1].dst.stride + j]);
      printf("\n");
    }
    fflush(stdout);
@ -820,9 +832,9 @@ static void set_offsets(VP9D_COMP *pbi, BLOCK_SIZE_TYPE bsize,
  set_mb_row(cm, xd, mb_row, bh);
  set_mb_col(cm, xd, mb_col, bw);

-  xd->dst.y_buffer = dst_fb->y_buffer + recon_yoffset;
-  xd->dst.u_buffer = dst_fb->u_buffer + recon_uvoffset;
-  xd->dst.v_buffer = dst_fb->v_buffer + recon_uvoffset;
+  xd->plane[0].dst.buf = dst_fb->y_buffer + recon_yoffset;
+  xd->plane[1].dst.buf = dst_fb->u_buffer + recon_uvoffset;
+  xd->plane[2].dst.buf = dst_fb->v_buffer + recon_uvoffset;
 }

 static void set_refs(VP9D_COMP *pbi, int mb_row, int mb_col) {
@ -1651,8 +1663,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
  // Initialize xd pointers. Any reference should do for xd->pre, so use 0.
  vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->active_ref_idx[0]],
             sizeof(YV12_BUFFER_CONFIG));
-  vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx],
-             sizeof(YV12_BUFFER_CONFIG));
+  setup_dst_planes(xd, &pc->yv12_fb[pc->new_fb_idx], 0, 0);

  // Create the segmentation map structure and set to 0
  if (!pc->last_frame_seg_map)
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@ -278,7 +278,7 @@ static void build_activity_map(VP9_COMP *cpi) {
    // for each macroblock col in image
    for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
 #if ALT_ACT_MEASURE
-      xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset;
+      xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
      xd->left_available = (mb_col != 0);
      recon_yoffset += 16;
 #endif
@ -305,8 +305,8 @@ static void build_activity_map(VP9_COMP *cpi) {

 #if ALT_ACT_MEASURE
    // extend the recon for intra prediction
-    vp9_extend_mb_row(new_yv12, xd->dst.y_buffer + 16,
-                      xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
+    vp9_extend_mb_row(new_yv12, xd->plane[0].dst.buf + 16,
+                      xd->plane[1].dst.buf + 8, xd->plane[2].dst.buf + 8);
 #endif

  }
@ -565,9 +565,7 @@ static void set_offsets(VP9_COMP *cpi,
  xd->prev_mode_info_context = cm->prev_mi + idx_str;

  // Set up destination pointers
-  setup_pred_block(&xd->dst,
-                   &cm->yv12_fb[dst_fb_idx],
-                   mb_row, mb_col, NULL, NULL);
+  setup_dst_planes(xd, &cm->yv12_fb[dst_fb_idx], mb_row, mb_col);

  /* Set up limit values for MV components to prevent them from
   * extending beyond the UMV borders assuming 16x16 block size */
@ -1244,7 +1242,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
  // Copy data over into macro block data structures.
  x->src = *cpi->Source;
  xd->pre = cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]];
-  xd->dst = cm->yv12_fb[cm->new_fb_idx];
+  setup_dst_planes(xd, &cm->yv12_fb[cm->new_fb_idx], 0, 0);

  // set up frame for intra coded blocks
  vp9_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
@ -2131,11 +2129,11 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
 #if CONFIG_COMP_INTERINTRA_PRED
      if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
        vp9_build_interintra_16x16_predictors_mb(xd,
-                                                 xd->dst.y_buffer,
-                                                 xd->dst.u_buffer,
-                                                 xd->dst.v_buffer,
-                                                 xd->dst.y_stride,
-                                                 xd->dst.uv_stride);
+                                                 xd->plane[0].dst.buf,
+                                                 xd->plane[1].dst.buf,
+                                                 xd->plane[2].dst.buf,
+                                                 xd->plane[0].dst.stride,
+                                                 xd->plane[1].dst.stride);
      }
 #endif
    }
@ -2173,21 +2171,21 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
      printf("final y\n");
      for (i = 0; i < 16; i++) {
        for (j = 0; j < 16; j++)
-          printf("%3d ", xd->dst.y_buffer[i * xd->dst.y_stride + j]);
+          printf("%3d ", xd->plane[0].dst.buf[i * xd->plane[0].dst.stride + j]);
        printf("\n");
      }
      printf("\n");
      printf("final u\n");
      for (i = 0; i < 8; i++) {
        for (j = 0; j < 8; j++)
-          printf("%3d ", xd->dst.u_buffer[i * xd->dst.uv_stride + j]);
+          printf("%3d ", xd->plane[1].dst.buf[i * xd->plane[1].dst.stride + j]);
        printf("\n");
      }
      printf("\n");
      printf("final v\n");
      for (i = 0; i < 8; i++) {
        for (j = 0; j < 8; j++)
-          printf("%3d ", xd->dst.v_buffer[i * xd->dst.uv_stride + j]);
+          printf("%3d ", xd->plane[2].dst.buf[i * xd->plane[1].dst.stride + j]);
        printf("\n");
      }
      fflush(stdout);
@ -2245,13 +2243,13 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
  MACROBLOCK *const x = &cpi->mb;
  MACROBLOCKD *const xd = &x->e_mbd;
  const uint8_t *src = x->src.y_buffer;
-  uint8_t *dst = xd->dst.y_buffer;
+  uint8_t *dst = xd->plane[0].dst.buf;
  const uint8_t *usrc = x->src.u_buffer;
-  uint8_t *udst = xd->dst.u_buffer;
+  uint8_t *udst = xd->plane[1].dst.buf;
  const uint8_t *vsrc = x->src.v_buffer;
-  uint8_t *vdst = xd->dst.v_buffer;
-  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
-  int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
+  uint8_t *vdst = xd->plane[2].dst.buf;
+  int src_y_stride = x->src.y_stride, dst_y_stride = xd->plane[0].dst.stride;
+  int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->plane[1].dst.stride;
  int n;
  MODE_INFO *mi = x->e_mbd.mode_info_context;
  unsigned int segment_id = mi->mbmi.segment_id;
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@ -88,7 +88,7 @@ void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) {
  vp9_build_intra_predictors_sby_s(xd, BLOCK_SIZE_MB16X16);
  vp9_subtract_sby_s_c(x->src_diff,
                       x->src.y_buffer, x->src.y_stride,
-                       xd->dst.y_buffer, xd->dst.y_stride,
+                       xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                       BLOCK_SIZE_MB16X16);

  switch (tx_size) {
@ -125,7 +125,8 @@ void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) {
  vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16);
  vp9_subtract_sbuv_s_c(x->src_diff,
                        x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
-                        xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride,
+                        xd->plane[1].dst.buf, xd->plane[2].dst.buf,
+                        xd->plane[1].dst.stride,
                        BLOCK_SIZE_MB16X16);

  switch (tx_size) {
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@ -107,11 +107,12 @@ void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc,
 static void subtract_mb(MACROBLOCK *x) {
  MACROBLOCKD *xd = &x->e_mbd;
  vp9_subtract_sby_s_c(x->src_diff, x->src.y_buffer, x->src.y_stride,
-                       xd->dst.y_buffer, xd->dst.y_stride,
+                       xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                       BLOCK_SIZE_MB16X16);
  vp9_subtract_sbuv_s_c(x->src_diff, x->src.u_buffer, x->src.v_buffer,
                        x->src.uv_stride,
-                        xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride,
+                        xd->plane[1].dst.buf, xd->plane[2].dst.buf,
+                        xd->plane[1].dst.stride,
                        BLOCK_SIZE_MB16X16);
 }

@ -913,10 +914,11 @@ void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x,
 void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col) {
  MACROBLOCKD *xd = &x->e_mbd;

-  vp9_build_inter_predictors_sby(xd, xd->dst.y_buffer, xd->dst.y_stride,
+  vp9_build_inter_predictors_sby(xd, xd->plane[0].dst.buf,
+                                 xd->plane[0].dst.stride,
                                 mb_row, mb_col, BLOCK_SIZE_MB16X16);
  vp9_subtract_sby_s_c(x->src_diff, x->src.y_buffer, x->src.y_stride,
-                       xd->dst.y_buffer, xd->dst.y_stride,
+                       xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                       BLOCK_SIZE_MB16X16);

  vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16);
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@ -32,6 +32,8 @@
 #include "vp9/common/vp9_entropymv.h"
 #include "vp9/encoder/vp9_encodemv.h"
 #include "./vpx_scale_rtcd.h"
+// TODO(jkoleszar): for setup_dst_planes
+#include "vp9/common/vp9_reconinter.h"

 #define OUTPUT_FPF 0

@ -484,7 +486,7 @@ void vp9_first_pass(VP9_COMP *cpi) {

  x->src = * cpi->Source;
  xd->pre = *lst_yv12;
-  xd->dst = *new_yv12;
+  setup_dst_planes(xd, new_yv12, 0, 0);

  x->partition_info = x->pi;

@ -533,9 +535,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
      int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);

      set_mb_col(cm, xd, mb_col, 1 << mb_height_log2(BLOCK_SIZE_MB16X16));
-      xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset;
-      xd->dst.u_buffer = new_yv12->u_buffer + recon_uvoffset;
-      xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset;
+      xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
+      xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset;
+      xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset;
      xd->left_available = (mb_col != 0);

      // do intra 16x16 prediction
@ -700,8 +702,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
    x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;

    // extend the recon for intra prediction
-    vp9_extend_mb_row(new_yv12, xd->dst.y_buffer + 16,
-                      xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
+    vp9_extend_mb_row(new_yv12, xd->plane[0].dst.buf + 16,
+                      xd->plane[1].dst.buf + 8, xd->plane[2].dst.buf + 8);
    vp9_clear_system_state();  // __asm emms;
  }

--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@ -71,10 +71,12 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
  }

  vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv);
-  vp9_build_inter_predictors_sby(xd, xd->dst.y_buffer, xd->dst.y_stride,
+  vp9_build_inter_predictors_sby(xd, xd->plane[0].dst.buf,
+                                 xd->plane[0].dst.stride,
                                 mb_row, mb_col, BLOCK_SIZE_MB16X16);
  best_err = vp9_sad16x16(x->src.y_buffer, x->src.y_stride,
-                          xd->dst.y_buffer, xd->dst.y_stride, INT_MAX);
+                          xd->plane[0].dst.buf, xd->plane[0].dst.stride,
+                          INT_MAX);

  /* restore UMV window */
  x->mv_col_min = tmp_col_min;
@ -189,13 +191,13 @@ static int find_best_16x16_intra

    xd->mode_info_context->mbmi.mode = mode;
    vp9_build_intra_predictors(x->src.y_buffer, x->src.y_stride,
-                               xd->dst.y_buffer, xd->dst.y_stride,
+                               xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                               xd->mode_info_context->mbmi.mode,
                               bw, bh,
                               xd->up_available, xd->left_available,
                               xd->right_available);
    err = vp9_sad16x16(x->src.y_buffer, x->src.y_stride,
-                       xd->dst.y_buffer, xd->dst.y_stride, best_err);
+                       xd->plane[0].dst.buf, xd->plane[0].dst.stride, best_err);

    // find best
    if (err < best_err) {
@ -234,8 +236,8 @@ static void update_mbgraph_mb_stats
  x->src.y_buffer = buf->y_buffer + mb_y_offset;
  x->src.y_stride = buf->y_stride;

-  xd->dst.y_buffer = cm->yv12_fb[cm->new_fb_idx].y_buffer + mb_y_offset;
-  xd->dst.y_stride = cm->yv12_fb[cm->new_fb_idx].y_stride;
+  xd->plane[0].dst.buf = cm->yv12_fb[cm->new_fb_idx].y_buffer + mb_y_offset;
+  xd->plane[0].dst.stride = cm->yv12_fb[cm->new_fb_idx].y_stride;

  // do intra 16x16 prediction
  intra_error = find_best_16x16_intra(cpi, buf, mb_y_offset,
@ -303,9 +305,9 @@ static void update_mbgraph_frame_stats
  x->mv_row_max     = (cm->mb_rows - 1) * 16 + VP9BORDERINPIXELS
                      - 16 - VP9_INTERP_EXTEND;
  xd->up_available  = 0;
-  xd->dst.y_stride  = buf->y_stride;
+  xd->plane[0].dst.stride  = buf->y_stride;
  xd->pre.y_stride  = buf->y_stride;
-  xd->dst.uv_stride = buf->uv_stride;
+  xd->plane[1].dst.stride = buf->uv_stride;
  xd->mode_info_context = &mi_local;

  for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@ -835,8 +835,8 @@ static void super_block_yrd(VP9_COMP *cpi,
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
-  uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
-  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
+  uint8_t *src = x->src.y_buffer, *dst = xd->plane[0].dst.buf;
+  int src_y_stride = x->src.y_stride, dst_y_stride = xd->plane[0].dst.stride;

  vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride, bs);

@ -1519,9 +1519,9 @@ static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
                             BLOCK_SIZE_TYPE bsize) {
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
-  uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer;
-  uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
-  int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
+  uint8_t *usrc = x->src.u_buffer, *udst = xd->plane[1].dst.buf;
+  uint8_t *vsrc = x->src.v_buffer, *vdst = xd->plane[2].dst.buf;
+  int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->plane[1].dst.stride;

  vp9_subtract_sbuv_s_c(x->src_diff, usrc, vsrc, src_uv_stride,
                        udst, vdst, dst_uv_stride, bsize);
@ -3035,7 +3035,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
        int tmp_dist_y, tmp_dist_u, tmp_dist_v;
        vp9_build_inter_predictors_sb(xd, mb_row, mb_col, bsize);
        var = cpi->fn_ptr[block_size].vf(*(b->base_src), b->src_stride,
-                                         xd->dst.y_buffer, xd->dst.y_stride,
+                                         xd->plane[0].dst.buf,
+                                         xd->plane[0].dst.stride,
                                         &sse);
        // Note our transform coeffs are 8 times an orthogonal transform.
        // Hence quantizer step is also 8 times. To get effective quantizer
@ -3044,13 +3045,15 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                 xd->block[0].dequant[1] >> 3,
                                 &tmp_rate_y, &tmp_dist_y);
        var = cpi->fn_ptr[uv_block_size].vf(x->src.u_buffer, x->src.uv_stride,
-                                            xd->dst.u_buffer, xd->dst.uv_stride,
+                                            xd->plane[1].dst.buf,
+                                            xd->plane[1].dst.stride,
                                            &sse);
        model_rd_from_var_lapndz(var, 8 * bw * 8 * bh,
                                 xd->block[16].dequant[1] >> 3,
                                 &tmp_rate_u, &tmp_dist_u);
        var = cpi->fn_ptr[uv_block_size].vf(x->src.v_buffer, x->src.uv_stride,
-                                            xd->dst.v_buffer, xd->dst.uv_stride,
+                                            xd->plane[2].dst.buf,
+                                            xd->plane[1].dst.stride,
                                            &sse);
        model_rd_from_var_lapndz(var, 8 * bw * 8 * bh,
                                 xd->block[20].dequant[1] >> 3,
@ -3079,15 +3082,15 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
        int i;
        for (i = 0; i < 16 * bh; ++i)
          vpx_memcpy(tmp_ybuf + i * 16 * bw,
-                     xd->dst.y_buffer + i * xd->dst.y_stride,
+                     xd->plane[0].dst.buf + i * xd->plane[0].dst.stride,
                     sizeof(unsigned char) * 16 * bw);
        for (i = 0; i < 8 * bh; ++i)
          vpx_memcpy(tmp_ubuf + i * 8 * bw,
-                     xd->dst.u_buffer + i * xd->dst.uv_stride,
+                     xd->plane[1].dst.buf + i * xd->plane[1].dst.stride,
                     sizeof(unsigned char) * 8 * bw);
        for (i = 0; i < 8 * bh; ++i)
          vpx_memcpy(tmp_vbuf + i * 8 * bw,
-                     xd->dst.v_buffer + i * xd->dst.uv_stride,
+                     xd->plane[2].dst.buf + i * xd->plane[1].dst.stride,
                     sizeof(unsigned char) * 8 * bw);
        pred_exists = 1;
      }
@ -3120,18 +3123,21 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
        int tmp_dist_y, tmp_dist_u, tmp_dist_v;
        vp9_build_inter_predictors_sb(xd, mb_row, mb_col, BLOCK_SIZE_MB16X16);
        var = vp9_variance16x16(*(b->base_src), b->src_stride,
-                                xd->dst.y_buffer, xd->dst.y_stride, &sse);
+                                xd->plane[0].dst.buf, xd->plane[0].dst.stride,
+                                &sse);
        // Note our transform coeffs are 8 times an orthogonal transform.
        // Hence quantizer step is also 8 times. To get effective quantizer
        // we need to divide by 8 before sending to modeling function.
        model_rd_from_var_lapndz(var, 16 * 16, xd->block[0].dequant[1] >> 3,
                                 &tmp_rate_y, &tmp_dist_y);
        var = vp9_variance8x8(x->src.u_buffer, x->src.uv_stride,
-                              xd->dst.u_buffer, xd->dst.uv_stride, &sse);
+                              xd->plane[1].dst.buf, xd->plane[1].dst.stride,
+                              &sse);
        model_rd_from_var_lapndz(var, 8 * 8, xd->block[16].dequant[1] >> 3,
                                 &tmp_rate_u, &tmp_dist_u);
        var = vp9_variance8x8(x->src.v_buffer, x->src.uv_stride,
-                              xd->dst.v_buffer, xd->dst.uv_stride, &sse);
+                              xd->plane[2].dst.buf, xd->plane[1].dst.stride,
+                              &sse);
        model_rd_from_var_lapndz(var, 8 * 8, xd->block[20].dequant[1] >> 3,
                                 &tmp_rate_v, &tmp_dist_v);
        rd = RDCOST(x->rdmult, x->rddiv,
@ -3158,15 +3164,15 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
        int i;
        for (i = 0; i < 16 * bh; ++i)
          vpx_memcpy(tmp_ybuf + i * 16 * bw,
-                     xd->dst.y_buffer + i * xd->dst.y_stride,
+                     xd->plane[0].dst.buf + i * xd->plane[0].dst.stride,
                     sizeof(unsigned char) * 16 * bw);
        for (i = 0; i < 8 * bh; ++i)
          vpx_memcpy(tmp_ubuf + i * 8 * bw,
-                     xd->dst.u_buffer + i * xd->dst.uv_stride,
+                     xd->plane[1].dst.buf + i * xd->plane[1].dst.stride,
                     sizeof(unsigned char) * 8 * bw);
        for (i = 0; i < 8 * bh; ++i)
          vpx_memcpy(tmp_vbuf + i * 8 * bw,
-                     xd->dst.v_buffer + i * xd->dst.uv_stride,
+                     xd->plane[2].dst.buf + i * xd->plane[1].dst.stride,
                     sizeof(unsigned char) * 8 * bw);
        pred_exists = 1;
      }
@ -3185,13 +3191,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
  if (pred_exists) {
    // FIXME(rbultje): mb code still predicts into xd->predictor
    for (i = 0; i < bh * 16; ++i)
-      vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride,
+      vpx_memcpy(xd->plane[0].dst.buf + i * xd->plane[0].dst.stride,
                 tmp_ybuf + i * bw * 16, sizeof(unsigned char) * bw * 16);
    for (i = 0; i < bh * 8; ++i)
-      vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride,
+      vpx_memcpy(xd->plane[1].dst.buf + i * xd->plane[1].dst.stride,
                 tmp_ubuf + i * bw * 8, sizeof(unsigned char) * bw * 8);
    for (i = 0; i < bh * 8; ++i)
-      vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride,
+      vpx_memcpy(xd->plane[2].dst.buf + i * xd->plane[1].dst.stride,
                 tmp_vbuf + i * bw * 8, sizeof(unsigned char) * bw * 8);
  } else {
    // Handles the special case when a filter that is not in the
@ -3217,11 +3223,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,

    if (bsize != BLOCK_SIZE_MB16X16) {
      var = cpi->fn_ptr[block_size].vf(*(b->base_src), b->src_stride,
-                                       xd->dst.y_buffer, xd->dst.y_stride,
+                                       xd->plane[0].dst.buf,
+                                       xd->plane[0].dst.stride,
                                       &sse);
    } else {
      var = vp9_variance16x16(*(b->base_src), b->src_stride,
-                              xd->dst.y_buffer, xd->dst.y_stride, &sse);
+                              xd->plane[0].dst.buf, xd->plane[0].dst.stride,
+                              &sse);
    }

    if ((int)sse < threshold) {
@ -3237,18 +3245,20 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
          unsigned int sse2u, sse2v;
          // FIXME(rbultje): mb predictors predict into xd->predictor
          var = cpi->fn_ptr[uv_block_size].vf(x->src.u_buffer, x->src.uv_stride,
-                                              xd->dst.u_buffer,
-                                              xd->dst.uv_stride, &sse2u);
+                                              xd->plane[1].dst.buf,
+                                              xd->plane[1].dst.stride, &sse2u);
          var = cpi->fn_ptr[uv_block_size].vf(x->src.v_buffer, x->src.uv_stride,
-                                              xd->dst.v_buffer,
-                                              xd->dst.uv_stride, &sse2v);
+                                              xd->plane[2].dst.buf,
+                                              xd->plane[1].dst.stride, &sse2v);
          sse2 = sse2u + sse2v;
        } else {
          unsigned int sse2u, sse2v;
          var = vp9_variance8x8(x->src.u_buffer, x->src.uv_stride,
-                                xd->dst.u_buffer, xd->dst.uv_stride, &sse2u);
+                                xd->plane[1].dst.buf, xd->plane[1].dst.stride,
+                                &sse2u);
          var = vp9_variance8x8(x->src.v_buffer, x->src.uv_stride,
-                                xd->dst.v_buffer, xd->dst.uv_stride, &sse2v);
+                                xd->plane[2].dst.buf, xd->plane[1].dst.stride,
+                                &sse2v);
          sse2 = sse2u + sse2v;
        }

@ -3764,8 +3774,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
        vp9_subtract_sbuv_s_c(x->src_diff,
                              x->src.u_buffer,
                              x->src.v_buffer, x->src.uv_stride,
-                              xd->dst.u_buffer,
-                              xd->dst.v_buffer, xd->dst.uv_stride,
+                              xd->plane[1].dst.buf,
+                              xd->plane[2].dst.buf, xd->plane[1].dst.stride,
                              BLOCK_SIZE_MB16X16);

        super_block_uvrd_4x4(cm, x, &rate_uv, &distortion_uv,