diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index ae9f0aaa7..b4c06f5dd 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -143,6 +143,11 @@ struct macroblock {
   int rd_search;
   int skip_encode;
 
+  // Used to store sub partition's choices.
+  int fast_ms;
+  int_mv pred_mv;
+  int subblock_ref;
+
   // TODO(jingning): Need to refactor the structure arrays that buffers the
   // coding mode decisions of each partition type.
   PICK_MODE_CONTEXT ab4x4_context[4][4][4];
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 48c1b3361..3dd235a1f 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1466,6 +1466,138 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
       restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
     }
   }
+
+  x->fast_ms = 0;
+  x->pred_mv.as_int = 0;
+  x->subblock_ref = 0;
+
+  // Use 4 subblocks' motion estimation results to speed up current
+  // partition's checking.
+  if (cpi->sf.using_small_partition_info) {
+    // Only use 8x8 result for non HD videos.
+    // int use_8x8 = (MIN(cpi->common.width, cpi->common.height) < 720) ? 1 : 0;
+    int use_8x8 = 1;
+
+    if (cm->frame_type && !cpi->is_src_frame_alt_ref &&
+        ((use_8x8 && bsize == BLOCK_SIZE_MB16X16) ||
+        bsize == BLOCK_SIZE_SB32X32 || bsize == BLOCK_SIZE_SB64X64)) {
+      int ref0 = 0, ref1 = 0, ref2 = 0, ref3 = 0;
+
+      if (bsize == BLOCK_SIZE_MB16X16) {
+        ref0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.
+            ref_frame[0];
+        ref1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.
+            ref_frame[0];
+        ref2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.
+            ref_frame[0];
+        ref3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.
+            ref_frame[0];
+      } else if (bsize == BLOCK_SIZE_SB32X32) {
+        ref0 = x->mb_context[xd->sb_index][0].mic.mbmi.ref_frame[0];
+        ref1 = x->mb_context[xd->sb_index][1].mic.mbmi.ref_frame[0];
+        ref2 = x->mb_context[xd->sb_index][2].mic.mbmi.ref_frame[0];
+        ref3 = x->mb_context[xd->sb_index][3].mic.mbmi.ref_frame[0];
+      } else if (bsize == BLOCK_SIZE_SB64X64) {
+        ref0 = x->sb32_context[0].mic.mbmi.ref_frame[0];
+        ref1 = x->sb32_context[1].mic.mbmi.ref_frame[0];
+        ref2 = x->sb32_context[2].mic.mbmi.ref_frame[0];
+        ref3 = x->sb32_context[3].mic.mbmi.ref_frame[0];
+      }
+
+      // Currently, only consider 4 inter ref frames.
+      if (ref0 && ref1 && ref2 && ref3) {
+        int16_t mvr0 = 0, mvc0 = 0, mvr1 = 0, mvc1 = 0, mvr2 = 0, mvc2 = 0,
+            mvr3 = 0, mvc3 = 0;
+        int d01, d23, d02, d13;  // motion vector distance between 2 blocks
+
+        // Get each subblock's motion vectors.
+        if (bsize == BLOCK_SIZE_MB16X16) {
+          mvr0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.mv[0].
+              as_mv.row;
+          mvc0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.mv[0].
+              as_mv.col;
+          mvr1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.mv[0].
+              as_mv.row;
+          mvc1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.mv[0].
+              as_mv.col;
+          mvr2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.mv[0].
+              as_mv.row;
+          mvc2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.mv[0].
+              as_mv.col;
+          mvr3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.mv[0].
+              as_mv.row;
+          mvc3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.mv[0].
+              as_mv.col;
+        } else if (bsize == BLOCK_SIZE_SB32X32) {
+          mvr0 = x->mb_context[xd->sb_index][0].mic.mbmi.mv[0].as_mv.row;
+          mvc0 = x->mb_context[xd->sb_index][0].mic.mbmi.mv[0].as_mv.col;
+          mvr1 = x->mb_context[xd->sb_index][1].mic.mbmi.mv[0].as_mv.row;
+          mvc1 = x->mb_context[xd->sb_index][1].mic.mbmi.mv[0].as_mv.col;
+          mvr2 = x->mb_context[xd->sb_index][2].mic.mbmi.mv[0].as_mv.row;
+          mvc2 = x->mb_context[xd->sb_index][2].mic.mbmi.mv[0].as_mv.col;
+          mvr3 = x->mb_context[xd->sb_index][3].mic.mbmi.mv[0].as_mv.row;
+          mvc3 = x->mb_context[xd->sb_index][3].mic.mbmi.mv[0].as_mv.col;
+        } else if (bsize == BLOCK_SIZE_SB64X64) {
+          mvr0 = x->sb32_context[0].mic.mbmi.mv[0].as_mv.row;
+          mvc0 = x->sb32_context[0].mic.mbmi.mv[0].as_mv.col;
+          mvr1 = x->sb32_context[1].mic.mbmi.mv[0].as_mv.row;
+          mvc1 = x->sb32_context[1].mic.mbmi.mv[0].as_mv.col;
+          mvr2 = x->sb32_context[2].mic.mbmi.mv[0].as_mv.row;
+          mvc2 = x->sb32_context[2].mic.mbmi.mv[0].as_mv.col;
+          mvr3 = x->sb32_context[3].mic.mbmi.mv[0].as_mv.row;
+          mvc3 = x->sb32_context[3].mic.mbmi.mv[0].as_mv.col;
+        }
+
+        // Adjust sign if ref is alt_ref
+        if (cm->ref_frame_sign_bias[ref0]) {
+          mvr0 *= -1;
+          mvc0 *= -1;
+        }
+
+        if (cm->ref_frame_sign_bias[ref1]) {
+          mvr1 *= -1;
+          mvc1 *= -1;
+        }
+
+        if (cm->ref_frame_sign_bias[ref2]) {
+          mvr2 *= -1;
+          mvc2 *= -1;
+        }
+
+        if (cm->ref_frame_sign_bias[ref3]) {
+          mvr3 *= -1;
+          mvc3 *= -1;
+        }
+
+        // Calculate mv distances.
+        d01 = MAX(abs(mvr0 - mvr1), abs(mvc0 - mvc1));
+        d23 = MAX(abs(mvr2 - mvr3), abs(mvc2 - mvc3));
+        d02 = MAX(abs(mvr0 - mvr2), abs(mvc0 - mvc2));
+        d13 = MAX(abs(mvr1 - mvr3), abs(mvc1 - mvc3));
+
+        if (d01 < 24 && d23 < 24 && d02 < 24 && d13 < 24) {
+          // Set fast motion search level.
+          x->fast_ms = 1;
+
+          // Calculate prediction MV
+          x->pred_mv.as_mv.row = (mvr0 + mvr1 + mvr2 + mvr3) >> 2;
+          x->pred_mv.as_mv.col = (mvc0 + mvc1 + mvc2 + mvc3) >> 2;
+
+          if (ref0 == ref1 && ref1 == ref2 && ref2 == ref3 &&
+              d01 < 2 && d23 < 2 && d02 < 2 && d13 < 2) {
+            // Set fast motion search level.
+            x->fast_ms = 2;
+
+            if (!d01 && !d23 && !d02 && !d13) {
+              x->fast_ms = 3;
+              x->subblock_ref = ref0;
+            }
+          }
+        }
+      }
+    }
+  }
+
   if (!cpi->sf.use_partitions_less_than
       || (cpi->sf.use_partitions_less_than
           && bsize <= cpi->sf.less_than_block_size)) {
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 5ba9a41de..9042111f8 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -723,7 +723,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->use_rd_breakout = 0;
   sf->skip_encode_sb = 0;
   sf->use_uv_intra_rd_estimate = 0;
-
+  sf->using_small_partition_info = 0;
   // Skip any mode not chosen at size < X for all sizes > X
   // Hence BLOCK_SIZE_SB64X64 (skip is off)
   sf->unused_mode_skip_lvl = BLOCK_SIZE_SB64X64;
@@ -795,6 +795,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->use_rd_breakout = 1;
         sf->skip_encode_sb = 1;
         sf->use_uv_intra_rd_estimate = 1;
+        sf->using_small_partition_info = 1;
       }
       if (speed == 3) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index f63800c8d..19b1e3af5 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -268,6 +268,7 @@ typedef struct {
   int adjust_partitioning_from_last_frame;
   int last_partitioning_redo_frequency;
   int disable_splitmv;
+  int using_small_partition_info;
 
   // Implements various heuristics to skip searching modes
   // The heuristics selected are based on  flags
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 7d44250a2..eac7b2a19 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2334,6 +2334,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                  int mi_row, int mi_col,
                                  int_mv *tmp_mv, int *rate_mv) {
   MACROBLOCKD *xd = &x->e_mbd;
+  VP9_COMMON *cm = &cpi->common;
   MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
   int bestsme = INT_MAX;
@@ -2364,18 +2365,37 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
 
   vp9_clamp_mv_min_max(x, &ref_mv);
 
-  // Work out the size of the first step in the mv step search.
-  // 0 here is maximum length first step. 1 is MAX >> 1 etc.
-  if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
-    step_param = vp9_init_search_range(cpi, cpi->max_mv_magnitude);
-  } else {
-    step_param = vp9_init_search_range(
-                   cpi, MIN(cpi->common.width, cpi->common.height));
-  }
+  // Adjust search parameters based on small partitions' result.
+  if (x->fast_ms) {
+    // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 &&
+    // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) {
+    // adjust search range
+    step_param = 6;
+    if (x->fast_ms > 1)
+      step_param = 8;
 
-  // mvp_full.as_int = ref_mv[0].as_int;
-  mvp_full.as_int =
-      mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
+    // Get prediction MV.
+    mvp_full.as_int = x->pred_mv.as_int;
+
+    // Adjust MV sign if needed.
+    if (cm->ref_frame_sign_bias[ref]) {
+      mvp_full.as_mv.col *= -1;
+      mvp_full.as_mv.row *= -1;
+    }
+  } else {
+    // Work out the size of the first step in the mv step search.
+    // 0 here is maximum length first step. 1 is MAX >> 1 etc.
+    if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
+      step_param = vp9_init_search_range(cpi, cpi->max_mv_magnitude);
+    } else {
+      step_param = vp9_init_search_range(
+                     cpi, MIN(cpi->common.width, cpi->common.height));
+    }
+
+    // mvp_full.as_int = ref_mv[0].as_int;
+    mvp_full.as_int =
+        mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
+  }
 
   mvp_full.as_mv.col >>= 3;
   mvp_full.as_mv.row >>= 3;
@@ -3113,9 +3133,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   }
 
   // If intra is not masked off then get uv intra mode rd.
-  if (!cpi->sf.use_avoid_tested_higherror
+  if (x->fast_ms < 2 && (!cpi->sf.use_avoid_tested_higherror
       || (cpi->sf.use_avoid_tested_higherror
-          && (ref_frame_mask & (1 << INTRA_FRAME)))) {
+          && (ref_frame_mask & (1 << INTRA_FRAME))))) {
     // Note that the enumerator TXFM_MODE "matches" TX_SIZE.
     // Eg. ONLY_4X4 = TX_4X4, ALLOW_8X8 = TX_8X8 etc such that the MIN
     // operation below correctly constrains max_uvtxfm_size.
@@ -3194,6 +3214,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
 
     x->skip = 0;
 
+    // Skip some checking based on small partitions' result.
+    if (x->fast_ms > 1 && !ref_frame)
+      continue;
+    if (x->fast_ms > 2 && ref_frame != x->subblock_ref)
+      continue;
+
     if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_SIZE_SB8X8) {
       if (!(ref_frame_mask & (1 << ref_frame))) {
         continue;