Merge changes I922f8602,I0ac3343d into experimental

* changes: Use 256-byte aligned filter tables Set scale factors consistently for SPLITMV
2013-02-27 10:08:53 -08:00 · 2013-02-27 10:08:53 -08:00 · d8e68bd14b
commit d8e68bd14b
parent 350ba5f30e 6fd7dd1a70
5 changed files with 93 additions and 176 deletions
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@ -13,6 +13,7 @@ extern "C" {
 #include "./vpx_config.h"
 #include "./vp9_rtcd.h"
 #include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
 }
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "test/acm_random.h"
@ -430,60 +431,61 @@ TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
  }
 }

+DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = {
+    { 0,   0,   0,   0,   0,   0,   0, 128},
+    { 0,   0,   0,   0,   0,   0, 128},
+    { 0,   0,   0,   0,   0, 128},
+    { 0,   0,   0,   0, 128},
+    { 0,   0,   0, 128},
+    { 0,   0, 128},
+    { 0, 128},
+    { 128},
+    { 0,   0,   0,   0,   0,   0,   0, 128},
+    { 0,   0,   0,   0,   0,   0, 128},
+    { 0,   0,   0,   0,   0, 128},
+    { 0,   0,   0,   0, 128},
+    { 0,   0,   0, 128},
+    { 0,   0, 128},
+    { 0, 128},
+    { 128}
+};
+
 TEST_P(ConvolveTest, ChangeFilterWorks) {
  uint8_t* const in = input();
  uint8_t* const out = output();

-  const int16_t filters[][8] = {
-    { 0,   0,   0,   0,   0,   0,   0, 128},
-    { 0,   0,   0,   0,   0,   0, 128},
-    { 0,   0,   0,   0,   0, 128},
-    { 0,   0,   0,   0, 128},
-    { 0,   0,   0, 128},
-    { 0,   0, 128},
-    { 0, 128},
-    { 128},
-    { 0,   0,   0,   0,   0,   0,   0, 128},
-    { 0,   0,   0,   0,   0,   0, 128},
-    { 0,   0,   0,   0,   0, 128},
-    { 0,   0,   0,   0, 128},
-    { 0,   0,   0, 128},
-    { 0,   0, 128},
-    { 0, 128},
-    { 128},
-    { 0,   0,   0,   0,   0,   0,   0, 128},
-    { 0,   0,   0,   0,   0,   0, 128},
-    { 0,   0,   0,   0,   0, 128},
-    { 0,   0,   0,   0, 128},
-    { 0,   0,   0, 128},
-    { 0,   0, 128},
-    { 0, 128},
-    { 128},
-  };
-
  REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride,
-                                 filters[0], 17, filters[4], 16,
+                                 kChangeFilters[8], 17, kChangeFilters[4], 16,
                                 Width(), Height()));

-  for (int x = 0; x < (Width() > 4 ? 8 : 4); ++x) {
-    ASSERT_EQ(in[4], out[x]) << "x == " << x;
+  for (int x = 0; x < Width(); ++x) {
+    if (x < 8)
+      ASSERT_EQ(in[4], out[x]) << "x == " << x;
+    else
+      ASSERT_EQ(in[12], out[x]) << "x == " << x;
  }

  REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride,
-                                 filters[4], 16, filters[0], 17,
+                                 kChangeFilters[4], 16, kChangeFilters[8], 17,
                                 Width(), Height()));

-  for (int y = 0; y < (Height() > 4 ? 8 : 4); ++y) {
-    ASSERT_EQ(in[4 * kInputStride], out[y * kOutputStride]) << "y == " << y;
+  for (int y = 0; y < Height(); ++y) {
+    if (y < 8)
+      ASSERT_EQ(in[4 * kInputStride], out[y * kOutputStride]) << "y == " << y;
+    else
+      ASSERT_EQ(in[12 * kInputStride], out[y * kOutputStride]) << "y == " << y;
  }

  REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
-                                  filters[0], 17, filters[0], 17,
+                                  kChangeFilters[8], 17, kChangeFilters[8], 17,
                                  Width(), Height()));

-  for (int y = 0; y < (Height() > 4 ? 8 : 4); ++y) {
-    for (int x = 0; x < (Width() > 4 ? 8 : 4); ++x) {
-      ASSERT_EQ(in[4 * kInputStride + 4], out[y * kOutputStride + x])
+  for (int y = 0; y < Height(); ++y) {
+    for (int x = 0; x < Width(); ++x) {
+      const int ref_x = x < 8 ? 4 : 12;
+      const int ref_y = y < 8 ? 4 : 12;
+
+      ASSERT_EQ(in[ref_y * kInputStride + ref_x], out[y * kOutputStride + x])
          << "x == " << x << ", y == " << y;
    }
  }
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@ -19,7 +19,6 @@

 #define VP9_FILTER_WEIGHT 128
 #define VP9_FILTER_SHIFT  7
-#define ALIGN_FILTERS_256 0

 /* Assume a bank of 16 filters to choose from. There are two implementations
 * for filter wrapping behavior, since we want to be able to pick which filter
@ -34,8 +33,11 @@
 *    always 256 byte aligned.
 *
 * Implementations 2 and 3 are likely preferable, as they avoid an extra 2
- * parameters, and switching between them is trivial.
+ * parameters, and switching between them is trivial, with the
+ * ALIGN_FILTERS_256 macro, below.
 */
+ #define ALIGN_FILTERS_256 1
+
 static void convolve_horiz_c(const uint8_t *src, int src_stride,
                             uint8_t *dst, int dst_stride,
                             const int16_t *filter_x0, int x_step_q4,
@ -56,11 +58,12 @@ static void convolve_horiz_c(const uint8_t *src, int src_stride,
    const int16_t *filter_x = filter_x0;

    /* Initial phase offset */
-    int x_q4 = (filter_x - filter_x_base) / taps;
+    int x0_q4 = (filter_x - filter_x_base) / taps;
+    int x_q4 = x0_q4;

    for (x = 0; x < w; ++x) {
      /* Per-pixel src offset */
-      int src_x = x_q4 >> 4;
+      int src_x = (x_q4 - x0_q4) >> 4;

      for (sum = 0, k = 0; k < taps; ++k) {
        sum += src[src_x + k] * filter_x[k];
@ -97,11 +100,12 @@ static void convolve_avg_horiz_c(const uint8_t *src, int src_stride,
    const int16_t *filter_x = filter_x0;

    /* Initial phase offset */
-    int x_q4 = (filter_x - filter_x_base) / taps;
+    int x0_q4 = (filter_x - filter_x_base) / taps;
+    int x_q4 = x0_q4;

    for (x = 0; x < w; ++x) {
      /* Per-pixel src offset */
-      int src_x = x_q4 >> 4;
+      int src_x = (x_q4 - x0_q4) >> 4;

      for (sum = 0, k = 0; k < taps; ++k) {
        sum += src[src_x + k] * filter_x[k];
@ -138,11 +142,12 @@ static void convolve_vert_c(const uint8_t *src, int src_stride,
    const int16_t *filter_y = filter_y0;

    /* Initial phase offset */
-    int y_q4 = (filter_y - filter_y_base) / taps;
+    int y0_q4 = (filter_y - filter_y_base) / taps;
+    int y_q4 = y0_q4;

    for (y = 0; y < h; ++y) {
      /* Per-pixel src offset */
-      int src_y = y_q4 >> 4;
+      int src_y = (y_q4 - y0_q4) >> 4;

      for (sum = 0, k = 0; k < taps; ++k) {
        sum += src[(src_y + k) * src_stride] * filter_y[k];
@ -179,11 +184,12 @@ static void convolve_avg_vert_c(const uint8_t *src, int src_stride,
    const int16_t *filter_y = filter_y0;

    /* Initial phase offset */
-    int y_q4 = (filter_y - filter_y_base) / taps;
+    int y0_q4 = (filter_y - filter_y_base) / taps;
+    int y_q4 = y0_q4;

    for (y = 0; y < h; ++y) {
      /* Per-pixel src offset */
-      int src_y = y_q4 >> 4;
+      int src_y = (y_q4 - y0_q4) >> 4;

      for (sum = 0, k = 0; k < taps; ++k) {
        sum += src[(src_y + k) * src_stride] * filter_y[k];
--- a/vp9/common/vp9_filter.c
+++ b/vp9/common/vp9_filter.c
@ -15,26 +15,7 @@
 #include "vp9_rtcd.h"
 #include "vp9/common/vp9_common.h"

-/* TODO(jkoleszar): We can avoid duplicating these tables 2X by forcing 256
- * byte alignment of the table's base address.
- */
-DECLARE_ALIGNED(16, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS*2][8]) = {
-  { 0, 0, 0, 128,   0, 0, 0, 0 },
-  { 0, 0, 0, 120,   8, 0, 0, 0 },
-  { 0, 0, 0, 112,  16, 0, 0, 0 },
-  { 0, 0, 0, 104,  24, 0, 0, 0 },
-  { 0, 0, 0,  96,  32, 0, 0, 0 },
-  { 0, 0, 0,  88,  40, 0, 0, 0 },
-  { 0, 0, 0,  80,  48, 0, 0, 0 },
-  { 0, 0, 0,  72,  56, 0, 0, 0 },
-  { 0, 0, 0,  64,  64, 0, 0, 0 },
-  { 0, 0, 0,  56,  72, 0, 0, 0 },
-  { 0, 0, 0,  48,  80, 0, 0, 0 },
-  { 0, 0, 0,  40,  88, 0, 0, 0 },
-  { 0, 0, 0,  32,  96, 0, 0, 0 },
-  { 0, 0, 0,  24, 104, 0, 0, 0 },
-  { 0, 0, 0,  16, 112, 0, 0, 0 },
-  { 0, 0, 0,   8, 120, 0, 0, 0 },
+DECLARE_ALIGNED(256, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][8]) = {
  { 0, 0, 0, 128,   0, 0, 0, 0 },
  { 0, 0, 0, 120,   8, 0, 0, 0 },
  { 0, 0, 0, 112,  16, 0, 0, 0 },
@ -55,7 +36,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS*2][8]) = {

 #define FILTER_ALPHA       0
 #define FILTER_ALPHA_SHARP 1
-DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS*2][8])
+DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8])
    = {
 #if FILTER_ALPHA == 0
  /* Lagrangian interpolation filter */
@ -74,23 +55,8 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS*2][8])
  { -1,   4, -11,  37, 112, -16,   4, -1},
  { -1,   3,  -9,  27, 118, -13,   4, -1},
  { 0,   2,  -6,  18, 122, -10,   3, -1},
-  { 0,   1,  -3,   8, 126,  -5,   1,  0},
-  { 0,   0,   0, 128,   0,   0,   0,  0},
-  { 0,   1,  -5, 126,   8,  -3,   1,  0},
-  { -1,   3, -10, 122,  18,  -6,   2,  0},
-  { -1,   4, -13, 118,  27,  -9,   3, -1},
-  { -1,   4, -16, 112,  37, -11,   4, -1},
-  { -1,   5, -18, 105,  48, -14,   4, -1},
-  { -1,   5, -19,  97,  58, -16,   5, -1},
-  { -1,   6, -19,  88,  68, -18,   5, -1},
-  { -1,   6, -19,  78,  78, -19,   6, -1},
-  { -1,   5, -18,  68,  88, -19,   6, -1},
-  { -1,   5, -16,  58,  97, -19,   5, -1},
-  { -1,   4, -14,  48, 105, -18,   5, -1},
-  { -1,   4, -11,  37, 112, -16,   4, -1},
-  { -1,   3,  -9,  27, 118, -13,   4, -1},
-  { 0,   2,  -6,  18, 122, -10,   3, -1},
  { 0,   1,  -3,   8, 126,  -5,   1,  0}
+
 #elif FILTER_ALPHA == 50
  /* Generated using MATLAB:
   * alpha = 0.5;
@ -118,7 +84,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS*2][8])
 #endif  /* FILTER_ALPHA */
 };

-DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS*2][8])
+DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8])
    = {
 #if FILTER_ALPHA_SHARP == 1
  /* dct based filter */
@ -137,23 +103,8 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS*2][8])
  {-2,   6, -13,  37, 115, -20,   9, -4},
  {-2,   5, -10,  27, 121, -17,   7, -3},
  {-1,   3,  -6,  17, 125, -13,   5, -2},
-  {0,   1,  -3,   8, 127,  -7,   3, -1},
-  {0,   0,   0, 128,   0,   0,   0, 0},
-  {-1,   3,  -7, 127,   8,  -3,   1, 0},
-  {-2,   5, -13, 125,  17,  -6,   3, -1},
-  {-3,   7, -17, 121,  27, -10,   5, -2},
-  {-4,   9, -20, 115,  37, -13,   6, -2},
-  {-4,  10, -23, 108,  48, -16,   8, -3},
-  {-4,  10, -24, 100,  59, -19,   9, -3},
-  {-4,  11, -24,  90,  70, -21,  10, -4},
-  {-4,  11, -23,  80,  80, -23,  11, -4},
-  {-4,  10, -21,  70,  90, -24,  11, -4},
-  {-3,   9, -19,  59, 100, -24,  10, -4},
-  {-3,   8, -16,  48, 108, -23,  10, -4},
-  {-2,   6, -13,  37, 115, -20,   9, -4},
-  {-2,   5, -10,  27, 121, -17,   7, -3},
-  {-1,   3,  -6,  17, 125, -13,   5, -2},
  {0,   1,  -3,   8, 127,  -7,   3, -1}
+
 #elif FILTER_ALPHA_SHARP == 75
  /* alpha = 0.75 */
  {0,   0,   0, 128,   0,   0,   0, 0},
@ -175,8 +126,8 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS*2][8])
 #endif  /* FILTER_ALPHA_SHARP */
 };

-DECLARE_ALIGNED(16, const int16_t,
-                vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS*2][8]) = {
+DECLARE_ALIGNED(256, const int16_t,
+                vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8]) = {
  /* 8-tap lowpass filter */
  /* Hamming window */
  {-1, -7, 32, 80, 32, -7, -1,  0},
@ -194,26 +145,10 @@ DECLARE_ALIGNED(16, const int16_t,
  { 1, -3, -4, 50, 76, 16, -8,  0},
  { 1, -3, -5, 45, 78, 20, -8,  0},
  { 1, -2, -7, 41, 79, 24, -8,  0},
-  { 1, -2, -7, 37, 80, 28, -8, -1},
-  {-1, -7, 32, 80, 32, -7, -1,  0},
-  {-1, -8, 28, 80, 37, -7, -2,  1},
-  { 0, -8, 24, 79, 41, -7, -2,  1},
-  { 0, -8, 20, 78, 45, -5, -3,  1},
-  { 0, -8, 16, 76, 50, -4, -3,  1},
-  { 0, -7, 13, 74, 54, -3, -4,  1},
-  { 1, -7,  9, 71, 58, -1, -4,  1},
-  { 1, -6,  6, 68, 62,  1, -5,  1},
-  { 1, -6,  4, 65, 65,  4, -6,  1},
-  { 1, -5,  1, 62, 68,  6, -6,  1},
-  { 1, -4, -1, 58, 71,  9, -7,  1},
-  { 1, -4, -3, 54, 74, 13, -7,  0},
-  { 1, -3, -4, 50, 76, 16, -8,  0},
-  { 1, -3, -5, 45, 78, 20, -8,  0},
-  { 1, -2, -7, 41, 79, 24, -8,  0},
  { 1, -2, -7, 37, 80, 28, -8, -1}
 };

-DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS*2][8])
+DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][8])
    = {
  {0, 0,   0, 128,   0,   0, 0,  0},
  {0, 1,  -5, 125,   8,  -2, 1,  0},
@ -230,21 +165,5 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS*2][8])
  {0, 2, -10,  37, 110, -14, 3,  0},
  {0, 2,  -8,  27, 116, -11, 2,  0},
  {0, 1,  -5,  17, 122,  -8, 1,  0},
-  {0, 1,  -2,   8, 125,  -5, 1,  0},
-  {0, 0,   0, 128,   0,   0, 0,  0},
-  {0, 1,  -5, 125,   8,  -2, 1,  0},
-  {0, 1,  -8, 122,  17,  -5, 1,  0},
-  {0, 2, -11, 116,  27,  -8, 2,  0},
-  {0, 3, -14, 110,  37, -10, 2,  0},
-  {0, 3, -15, 103,  47, -12, 2,  0},
-  {0, 3, -16,  95,  57, -14, 3,  0},
-  {0, 3, -16,  86,  67, -15, 3,  0},
-  {0, 3, -16,  77,  77, -16, 3,  0},
-  {0, 3, -15,  67,  86, -16, 3,  0},
-  {0, 3, -14,  57,  95, -16, 3,  0},
-  {0, 2, -12,  47, 103, -15, 3,  0},
-  {0, 2, -10,  37, 110, -14, 3,  0},
-  {0, 2,  -8,  27, 116, -11, 2,  0},
-  {0, 1,  -5,  17, 122,  -8, 1,  0},
  {0, 1,  -2,   8, 125,  -5, 1,  0}
 };
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@ -21,11 +21,11 @@

 #define SUBPEL_SHIFTS 16

-extern const int16_t vp9_bilinear_filters[SUBPEL_SHIFTS*2][8];
-extern const int16_t vp9_sub_pel_filters_6[SUBPEL_SHIFTS*2][8];
-extern const int16_t vp9_sub_pel_filters_8[SUBPEL_SHIFTS*2][8];
-extern const int16_t vp9_sub_pel_filters_8s[SUBPEL_SHIFTS*2][8];
-extern const int16_t vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS*2][8];
+extern const int16_t vp9_bilinear_filters[SUBPEL_SHIFTS][8];
+extern const int16_t vp9_sub_pel_filters_6[SUBPEL_SHIFTS][8];
+extern const int16_t vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8];
+extern const int16_t vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8];
+extern const int16_t vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8];

 // The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
 // filter kernel as a 2 tap filter.
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@ -128,6 +128,7 @@ void vp9_setup_interp_filters(MACROBLOCKD *xd,
      break;
 #endif
  }
+  assert(((intptr_t)xd->subpix.filter_x & 0xff) == 0);
 }

 void vp9_copy_mem16x16_c(const uint8_t *src,
@ -314,12 +315,15 @@ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride,
 }

 static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1,
-                                      const struct scale_factors *scale,
+                                      struct scale_factors *scale,
                                      int block_size, int stride, int which_mv,
-                                      const struct subpix_fn_table *subpix) {
+                                      const struct subpix_fn_table *subpix,
+                                      int row, int col) {
  assert(d1->predictor - d0->predictor == block_size);
  assert(d1->pre == d0->pre + block_size);

+  set_scaled_offsets(&scale[which_mv], row, col);
+
  if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) {
    uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre;

@ -342,6 +346,9 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1,
                              &scale[which_mv],
                              block_size, block_size, which_mv,
                              subpix);
+
+    set_scaled_offsets(&scale[which_mv], row, col + block_size);
+
    vp9_build_inter_predictor(*base_pre1 + d1->pre,
                              d1->pre_stride,
                              d1->predictor, stride,
@ -441,11 +448,8 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd,
    BLOCKD *d1 = &blockd[i + 1];

    for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
-      set_scaled_offsets(&xd->scale_factor_uv[which_mv],
-                         mb_row * 8 + y, mb_col * 8 + x);
-
      build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv,
-                                &xd->subpix);
+                                &xd->subpix, mb_row * 8 + y, mb_col * 8 + x);
    }
  }
 }
@ -747,7 +751,8 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
 #endif
 }

-static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
+static void build_inter4x4_predictors_mb(MACROBLOCKD *xd,
+                                         int mb_row, int mb_col) {
  int i;
  MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
  BLOCKD *blockd = xd->block;
@ -758,6 +763,7 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
    for (i = 0; i < 16; i += 8) {
      BLOCKD *d0 = &blockd[i];
      BLOCKD *d1 = &blockd[i + 2];
+      const int y = i & 8;

      blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
      blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2];
@ -768,44 +774,25 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
          clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd);
        }

-        /* TODO(jkoleszar): Enabling this for EIGHTTAP_SMOOTH changes the
-         * result slightly, for reasons that are not immediately obvious to me.
-         * It probably makes sense to enable this for all filter types to be
-         * consistent with the way we do 8x4 below. Leaving disabled for now.
-         */
-        if (mbmi->interp_filter != EIGHTTAP_SMOOTH) {
-          build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16,
-                                    which_mv, &xd->subpix);
-        } else {
-          uint8_t **base_pre0 = which_mv ? d0->base_second_pre : d0->base_pre;
-          uint8_t **base_pre1 = which_mv ? d1->base_second_pre : d1->base_pre;
-
-          vp9_build_inter_predictor(*base_pre0 + d0->pre,
-                                    d0->pre_stride,
-                                    d0->predictor, 16,
-                                    &d0->bmi.as_mv[which_mv],
-                                    &xd->scale_factor[which_mv],
-                                    8, 8, which_mv, &xd->subpix);
-          vp9_build_inter_predictor(*base_pre1 + d1->pre,
-                                    d1->pre_stride,
-                                    d1->predictor, 16,
-                                    &d1->bmi.as_mv[which_mv],
-                                    &xd->scale_factor[which_mv],
-                                    8, 8, which_mv, &xd->subpix);
-        }
+        build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16,
+                                  which_mv, &xd->subpix,
+                                  mb_row * 16 + y, mb_col * 16);
      }
    }
  } else {
    for (i = 0; i < 16; i += 2) {
      BLOCKD *d0 = &blockd[i];
      BLOCKD *d1 = &blockd[i + 1];
+      const int x = (i & 3) * 4;
+      const int y = (i >> 2) * 4;

      blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
      blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1];

      for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
        build_2x1_inter_predictor(d0, d1, xd->scale_factor, 4, 16,
-                                  which_mv, &xd->subpix);
+                                  which_mv, &xd->subpix,
+                                  mb_row * 16 + y, mb_col * 16 + x);
      }
    }
  }
@ -813,10 +800,13 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
  for (i = 16; i < 24; i += 2) {
    BLOCKD *d0 = &blockd[i];
    BLOCKD *d1 = &blockd[i + 1];
+    const int x = 4 * (i & 1);
+    const int y = ((i - 16) >> 1) * 4;

    for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
      build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8,
-                                which_mv, &xd->subpix);
+                                which_mv, &xd->subpix,
+                                mb_row * 8 + y, mb_col * 8 + x);
    }
  }
 }
@ -945,6 +935,6 @@ void vp9_build_inter_predictors_mb(MACROBLOCKD *xd,
 #endif
  } else {
    build_4x4uvmvs(xd);
-    build_inter4x4_predictors_mb(xd);
+    build_inter4x4_predictors_mb(xd, mb_row, mb_col);
  }
 }