diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 40ad680b0..83c110264 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -29,7 +29,7 @@ typedef struct {
     B_PREDICTION_MODE mode;
     int_mv mv;
     int_mv second_mv;
-  } bmi[16];
+  } bmi[16 >> (2 * CONFIG_SB8X8)];
 } PARTITION_INFO;
 
 // Structure to hold snapshot of coding context during the mode picking process
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 95bba21a9..2a50a1550 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -395,17 +395,6 @@ static void update_state(VP9_COMP *cpi,
         x->partition_info->bmi[15 >> (CONFIG_SB8X8 * 2)].mv.as_int;
     mbmi->mv[1].as_int =
         x->partition_info->bmi[15 >> (CONFIG_SB8X8 * 2)].second_mv.as_int;
-#if CONFIG_SB8X8
-    vpx_memcpy(x->partition_info + mis, &ctx->partition_info,
-               sizeof(PARTITION_INFO));
-    vpx_memcpy(x->partition_info + 1, &ctx->partition_info,
-               sizeof(PARTITION_INFO));
-    vpx_memcpy(x->partition_info + mis + 1, &ctx->partition_info,
-               sizeof(PARTITION_INFO));
-    xd->mode_info_context[1].mbmi =
-    xd->mode_info_context[mis].mbmi =
-    xd->mode_info_context[1 + mis].mbmi = *mbmi;
-#endif
   }
 
   x->skip = ctx->skip;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 2c2d49b67..cf4b1e8e8 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -4870,6 +4870,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
                                              cpi->common.y_dc_delta_q);
 #if CONFIG_SB8X8
   int_mv seg_mvs[4][MAX_REF_FRAMES - 1];
+  union b_mode_info best_bmodes[4];
+  PARTITION_INFO best_partition;
 #endif
 
 #if CONFIG_SB8X8
@@ -5411,6 +5413,15 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
         *returndistortion = distortion2;
         best_rd = this_rd;
         vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO));
+#if CONFIG_SB8X8
+        vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
+
+        if (this_mode == I4X4_PRED || this_mode == SPLITMV) {
+          for (i = 0; i < 4; i++) {
+            best_bmodes[i] = xd->mode_info_context->bmi[i];
+          }
+        }
+#endif
       }
 #if 0
       // Testing this mode gave rise to an improvement in best error score.
@@ -5577,7 +5588,28 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
 
   // macroblock modes
   vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+#if CONFIG_SB8X8
+  if (best_mbmode.mode == I4X4_PRED) {
+    for (i = 0; i < 4; i++) {
+      xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
+    }
+  }
 
+  if (best_mbmode.mode == SPLITMV) {
+    for (i = 0; i < 4; i++)
+      xd->mode_info_context->bmi[i].as_mv[0].as_int =
+          best_bmodes[i].as_mv[0].as_int;
+    if (mbmi->second_ref_frame > 0)
+      for (i = 0; i < 4; i++)
+        xd->mode_info_context->bmi[i].as_mv[1].as_int =
+            best_bmodes[i].as_mv[1].as_int;
+
+    vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
+
+    mbmi->mv[0].as_int = x->partition_info->bmi[3].mv.as_int;
+    mbmi->mv[1].as_int = x->partition_info->bmi[3].second_mv.as_int;
+  }
+#endif
   for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
     if (best_pred_rd[i] == INT64_MAX)
       best_pred_diff[i] = INT_MIN;
@@ -5599,7 +5631,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
  end:
   set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
                     scale_factor);
-  store_coding_context(x, ctx, best_mode_index, NULL,
+  store_coding_context(x, ctx, best_mode_index,
+#if CONFIG_SB8X8
+                       &best_partition,
+#else
+                       NULL,
+#endif
                        &mbmi->ref_mvs[mbmi->ref_frame][0],
                        &mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 :
                                       mbmi->second_ref_frame][0],