Redesigned recursive filters adapted to block-sizes

Recursive intra filters for 4x4 and 8x8 blocks are separately designed. Fixed bugs in rd loop. Change-Id: Id0b1752769f596ce8ea850863cadbc6a739804be
Fixed a bug in recursive extrapolation filter for intra prediction
2013-11-04 12:08:19 -08:00 · 2013-10-12 18:06:34 -07:00 · 2013-09-17 17:45:34 -07:00 · 2013-09-03 17:15:25 -07:00 · 2013-08-27 16:39:08 -07:00 · 2013-08-19 16:44:47 -07:00
30 changed files with 4157 additions and 15 deletions
--- a/4
+++ b/4
@@ -250,6 +250,10 @@ EXPERIMENT_LIST="
    multiple_arf
    non420
    alpha
+    interintra
+    filterintra
+    masked_interintra
+    masked_interinter
 "
 CONFIG_LIST="
    external_build
--- a/test/acm_random.h
+++ b/test/acm_random.h
@@ -38,7 +38,7 @@ class ACMRandom {
    // Returns a random value near 0 or near 255, to better exercise
    // saturation behavior.
    const uint8_t r = Rand8();
-    return r < 128 ? r << 4 : r >> 4;
+    return r <= 128 ? 255 - (r >> 4) : r >> 4;
  }

  int PseudoUniform(int range) {
--- a/vp8_scalable_patterns.c
+++ b/vp8_scalable_patterns.c
@@ -18,6 +18,7 @@
 #include <stdlib.h>
 #include <stdarg.h>
 #include <string.h>
+#include <time.h>
 #define VPX_CODEC_DISABLE_COMPAT 1
 #include "vpx/vpx_encoder.h"
 #include "vpx/vp8cx.h"
@@ -137,6 +138,8 @@ int main(int argc, char **argv) {
    int                  layer_flags[VPX_TS_MAX_PERIODICITY] = {0};
    int                  flag_periodicity;
    int                  max_intra_size_pct;
+    clock_t              before;
+    clock_t              after;

    /* Check usage and arguments */
    if (argc < 9)
@@ -639,6 +642,7 @@ int main(int argc, char **argv) {
    vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT,
                      max_intra_size_pct);

+    before = clock();
    frame_avail = 1;
    while (frame_avail || got_data) {
        vpx_codec_iter_t iter = NULL;
@@ -660,8 +664,8 @@ int main(int argc, char **argv) {
            got_data = 1;
            switch (pkt->kind) {
            case VPX_CODEC_CX_FRAME_PKT:
-                for (i=cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
-                                              i<cfg.ts_number_layers; i++)
+                for (i = cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
+                                              i < cfg.ts_number_layers; i++)
                {
                    write_ivf_frame_header(outfile[i], pkt);
                    (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
@@ -676,9 +680,13 @@ int main(int argc, char **argv) {
        frame_cnt++;
        pts += frame_duration;
    }
+    after = clock();
+
+    printf("Processed %d frames in %ld ms.\n", frame_cnt-1,
+           (int) (after - before) / (CLOCKS_PER_SEC / 1000));
+
    fclose (infile);

-    printf ("Processed %d frames.\n",frame_cnt-1);
    if (vpx_codec_destroy(&codec))
            die_codec (&codec, "Failed to destroy codec");

--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -89,6 +89,15 @@ static INLINE int is_inter_mode(MB_PREDICTION_MODE mode) {
  return mode >= NEARESTMV && mode <= NEWMV;
 }

+#if CONFIG_FILTERINTRA
+static INLINE int is_filter_allowed(MB_PREDICTION_MODE mode) {
+  return mode != DC_PRED &&
+         mode != D45_PRED &&
+         mode != D27_PRED &&
+         mode != D63_PRED;
+}
+#endif
+
 #define VP9_INTRA_MODES (TM_PRED + 1)

 #define VP9_INTER_MODES (1 + NEWMV - NEARESTMV)
@@ -130,8 +139,80 @@ static INLINE int mi_height_log2(BLOCK_SIZE_TYPE sb_type) {
  return mi_height_log2_lookup[sb_type];
 }

+#if CONFIG_INTERINTRA
+static INLINE TX_SIZE intra_size_log2_for_interintra(int bs) {
+  switch (bs) {
+    case 4:
+      return TX_4X4;
+      break;
+    case 8:
+      return TX_8X8;
+      break;
+    case 16:
+      return TX_16X16;
+      break;
+    case 32:
+      return TX_32X32;
+      break;
+    default:
+      return TX_32X32;
+      break;
+  }
+}
+
+static INLINE int is_interintra_allowed(BLOCK_SIZE_TYPE sb_type) {
+  return ((sb_type >= BLOCK_8X8) && (sb_type < BLOCK_64X64));
+}
+
+#if CONFIG_MASKED_INTERINTRA
+#define MASK_BITS_SML_INTERINTRA   3
+#define MASK_BITS_MED_INTERINTRA   4
+#define MASK_BITS_BIG_INTERINTRA   5
+#define MASK_NONE_INTERINTRA      -1
+static INLINE int get_mask_bits_interintra(BLOCK_SIZE_TYPE sb_type) {
+  if (sb_type == BLOCK_4X4)
+     return 0;
+  if (sb_type <= BLOCK_8X8)
+    return MASK_BITS_SML_INTERINTRA;
+  else if (sb_type <= BLOCK_32X32)
+    return MASK_BITS_MED_INTERINTRA;
+  else
+    return MASK_BITS_BIG_INTERINTRA;
+}
+#endif
+#endif
+
+#if CONFIG_MASKED_INTERINTER
+#define MASK_BITS_SML   3
+#define MASK_BITS_MED   4
+#define MASK_BITS_BIG   5
+#define MASK_NONE      -1
+
+static inline int get_mask_bits(BLOCK_SIZE_TYPE sb_type) {
+  if (sb_type == BLOCK_4X4)
+     return 0;
+  if (sb_type <= BLOCK_8X8)
+    return MASK_BITS_SML;
+  else if (sb_type <= BLOCK_32X32)
+    return MASK_BITS_MED;
+  else
+    return MASK_BITS_BIG;
+}
+#endif
+
 typedef struct {
  MB_PREDICTION_MODE mode, uv_mode;
+#if CONFIG_INTERINTRA
+  MB_PREDICTION_MODE interintra_mode, interintra_uv_mode;
+#if CONFIG_MASKED_INTERINTRA
+  int interintra_mask_index;
+  int interintra_uv_mask_index;
+  int use_masked_interintra;
+#endif
+#endif
+#if CONFIG_FILTERINTRA
+  int filterbit, uv_filterbit;
+#endif
  MV_REFERENCE_FRAME ref_frame[2];
  TX_SIZE txfm_size;
  int_mv mv[2]; // for each reference frame used
@@ -154,10 +235,18 @@ typedef struct {
  INTERPOLATIONFILTERTYPE interp_filter;

  BLOCK_SIZE_TYPE sb_type;
+
+#if CONFIG_MASKED_INTERINTER
+  int use_masked_compound;
+  int mask_index;
+#endif
 } MB_MODE_INFO;

 typedef struct {
  MB_MODE_INFO mbmi;
+#if CONFIG_FILTERINTRA
+  int b_filter_info[4];
+#endif
  union b_mode_info bmi[4];
 } MODE_INFO;

@@ -651,6 +740,43 @@ static void txfrm_block_to_raster_xy(MACROBLOCKD *xd,
  *y = raster_mb >> tx_cols_log2 << (txwl);
 }

+#if CONFIG_INTERINTRA
+static void extend_for_interintra(MACROBLOCKD* const xd,
+                                  BLOCK_SIZE_TYPE bsize) {
+  int bh = 4 << b_height_log2(bsize), bw = 4 << b_width_log2(bsize);
+  int ystride = xd->plane[0].dst.stride, uvstride = xd->plane[1].dst.stride;
+  uint8_t *pixel_y, *pixel_u, *pixel_v;
+  int ymargin, uvmargin;
+  if (xd->mb_to_bottom_edge < 0) {
+    int r;
+    ymargin = 0 - xd->mb_to_bottom_edge / 8;
+    uvmargin = 0 - xd->mb_to_bottom_edge / 16;
+    pixel_y = xd->plane[0].dst.buf - 1 + (bh - ymargin -1) * ystride;
+    pixel_u = xd->plane[1].dst.buf - 1 + (bh / 2 - uvmargin - 1) * uvstride;
+    pixel_v = xd->plane[2].dst.buf - 1 + (bh / 2 - uvmargin - 1) * uvstride;
+    for (r = 0; r < ymargin; r++)
+      xd->plane[0].dst.buf[-1 + (bh - r -1) * ystride] = *pixel_y;
+    for (r = 0; r < uvmargin; r++) {
+      xd->plane[1].dst.buf[-1 + (bh / 2 - r -1) * uvstride] = *pixel_u;
+      xd->plane[2].dst.buf[-1 + (bh / 2 - r -1) * uvstride] = *pixel_v;
+    }
+  }
+  if (xd->mb_to_right_edge < 0) {
+    ymargin = 0 - xd->mb_to_right_edge / 8;
+    uvmargin = 0 - xd->mb_to_right_edge / 16;
+    pixel_y = xd->plane[0].dst.buf + bw - ymargin - 1 - ystride;
+    pixel_u = xd->plane[1].dst.buf + bw / 2 - uvmargin - 1 - uvstride;
+    pixel_v = xd->plane[2].dst.buf + bw / 2 - uvmargin - 1 - uvstride;
+    vpx_memset(xd->plane[0].dst.buf + bw - ymargin - ystride,
+               *pixel_y, ymargin);
+    vpx_memset(xd->plane[1].dst.buf + bw / 2 - uvmargin - uvstride,
+               *pixel_u, uvmargin);
+    vpx_memset(xd->plane[2].dst.buf + bw / 2 - uvmargin - uvstride,
+               *pixel_v, uvmargin);
+  }
+}
+#endif
+
 static void extend_for_intra(MACROBLOCKD* const xd, int plane, int block,
                             BLOCK_SIZE_TYPE bsize, int ss_txfrm_size) {
  const int bw = plane_block_width(bsize, &xd->plane[plane]);
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -214,6 +214,16 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
  }
 };

+#if CONFIG_FILTERINTRA
+const vp9_prob vp9_default_filterintra_prob[TX_SIZES][VP9_INTRA_MODES] = {
+  // DC   V    H    D45  D135 D117 D153 D27  D63  TM
+    {160, 153, 171, 160, 140, 117, 115, 160, 160, 116},  // TX_4X4
+    {180, 151, 191, 180, 118,  66,  97, 180, 180, 120},  // TX_8X8
+    {200, 200, 200, 200, 200, 200, 200, 200, 200, 200},  // TX_16X16
+    {220, 220, 220, 220, 220, 220, 220, 220, 220, 220},  // TX_32X32
+};
+#endif
+
 static const vp9_prob default_inter_mode_probs[INTER_MODE_CONTEXTS]
                                              [VP9_INTER_MODES - 1] = {
  {2,       173,   34},  // 0 = both zero mv
@@ -325,6 +335,24 @@ static const vp9_prob default_switchable_interp_prob[VP9_SWITCHABLE_FILTERS+1]
  { 149, 144, },
 };

+#if CONFIG_INTERINTRA
+static const vp9_prob default_interintra_prob[BLOCK_SIZE_TYPES] = {
+  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
+};
+#if CONFIG_MASKED_INTERINTRA
+static const vp9_prob default_masked_interintra_prob[BLOCK_SIZE_TYPES] = {
+// 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180
+  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
+};
+#endif
+#endif
+
+#if CONFIG_MASKED_INTERINTER
+static const vp9_prob default_masked_interinter_prob[BLOCK_SIZE_TYPES] = {
+    192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
+};
+#endif
+
 void vp9_init_mbmode_probs(VP9_COMMON *cm) {
  vp9_copy(cm->fc.uv_mode_prob, default_if_uv_probs);
  vp9_copy(cm->fc.y_mode_prob, default_if_y_probs);
@@ -336,6 +364,18 @@ void vp9_init_mbmode_probs(VP9_COMMON *cm) {
  vp9_copy(cm->fc.single_ref_prob, default_single_ref_p);
  cm->fc.tx_probs = default_tx_probs;
  vp9_copy(cm->fc.mbskip_probs, default_mbskip_probs);
+#if CONFIG_INTERINTRA
+  vp9_copy(cm->fc.interintra_prob, default_interintra_prob);
+#if CONFIG_MASKED_INTERINTRA
+  vp9_copy(cm->fc.masked_interintra_prob, default_masked_interintra_prob);
+#endif
+#endif
+#if CONFIG_FILTERINTRA
+  vp9_copy(cm->fc.filterintra_prob, vp9_default_filterintra_prob);
+#endif
+#if CONFIG_MASKED_INTERINTER
+  vp9_copy(cm->fc.masked_compound_prob, default_masked_interinter_prob);
+#endif
 }

 const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = {
@@ -457,6 +497,42 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
  for (i = 0; i < MBSKIP_CONTEXTS; ++i)
    fc->mbskip_probs[i] = update_ct2(pre_fc->mbskip_probs[i],
                                     counts->mbskip[i]);
+
+#if CONFIG_INTERINTRA
+  if (cm->use_interintra) {
+    for (i = 0; i < BLOCK_SIZE_TYPES; ++i) {
+      if (is_interintra_allowed(i))
+        fc->interintra_prob[i] = update_ct2(pre_fc->interintra_prob[i],
+                                            counts->interintra[i]);
+    }
+#if CONFIG_MASKED_INTERINTRA
+    if (cm->use_masked_interintra) {
+      for (i = 0; i < BLOCK_SIZE_TYPES; ++i) {
+        if (is_interintra_allowed(i) && get_mask_bits_interintra(i))
+          fc->masked_interintra_prob[i] = update_ct2(
+                                          pre_fc->masked_interintra_prob[i],
+                                          counts->masked_interintra[i]);
+      }
+    }
+#endif
+  }
+#endif
+#if CONFIG_FILTERINTRA
+  for (i = 0; i < TX_SIZES; ++i)
+    for (j = 0; j < VP9_INTRA_MODES; ++j)
+      fc->filterintra_prob[i][j] = update_ct2(pre_fc->filterintra_prob[i][j],
+                                              counts->filterintra[i][j]);
+#endif
+#if CONFIG_MASKED_INTERINTER
+  if (cm->use_masked_compound) {
+    for (i = 0; i < BLOCK_SIZE_TYPES; ++i) {
+      if (get_mask_bits(i))
+        fc->masked_compound_prob[i] = update_ct2
+                                      (pre_fc->masked_compound_prob[i],
+                                       counts->masked_compound[i]);
+    }
+  }
+#endif
 }

 static void set_default_lf_deltas(struct loopfilter *lf) {
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -19,6 +19,18 @@
 #define VP9_MODE_UPDATE_PROB  252
 #define VP9_SWITCHABLE_FILTERS 3   // number of switchable filters

+#if CONFIG_INTERINTRA
+#define VP9_UPD_INTERINTRA_PROB 248
+#define SEPARATE_INTERINTRA_UV  0
+#if CONFIG_MASKED_INTERINTRA
+#define VP9_UPD_MASKED_INTERINTRA_PROB 248
+#endif
+#endif
+
+#if CONFIG_MASKED_INTERINTER
+#define VP9_UPD_MASKED_COMPOUND_PROB 248
+#endif
+
 // #define MODE_STATS

 struct VP9Common;
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -16,6 +16,7 @@
 #define LOG2_MI_SIZE 3
 #define LOG2_MI_BLOCK_SIZE (6 - LOG2_MI_SIZE)  // 64 = 2^6

+#define MAX_BLOCK_SIZE (1 << 6)  // max block size in pixel
 #define MI_SIZE (1 << LOG2_MI_SIZE)  // pixels per mi-unit
 #define MI_BLOCK_SIZE (1 << LOG2_MI_BLOCK_SIZE)  // mi-units per max block

--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -53,6 +53,18 @@ typedef struct frame_contexts {
  struct tx_probs tx_probs;
  vp9_prob mbskip_probs[MBSKIP_CONTEXTS];
  nmv_context nmvc;
+#if CONFIG_INTERINTRA
+  vp9_prob interintra_prob[BLOCK_SIZE_TYPES];
+#if CONFIG_MASKED_INTERINTRA
+  vp9_prob masked_interintra_prob[BLOCK_SIZE_TYPES];
+#endif
+#endif
+#if CONFIG_FILTERINTRA
+  vp9_prob filterintra_prob[TX_SIZES][VP9_INTRA_MODES];
+#endif
+#if CONFIG_MASKED_INTERINTER
+  vp9_prob masked_compound_prob[BLOCK_SIZE_TYPES];
+#endif
 } FRAME_CONTEXT;

 typedef struct {
@@ -72,6 +84,18 @@ typedef struct {
  struct tx_counts tx;
  unsigned int mbskip[MBSKIP_CONTEXTS][2];
  nmv_context_counts mv;
+#if CONFIG_INTERINTRA
+  unsigned int interintra[BLOCK_SIZE_TYPES][2];
+#if CONFIG_MASKED_INTERINTRA
+  unsigned int masked_interintra[BLOCK_SIZE_TYPES][2];
+#endif
+#endif
+#if CONFIG_FILTERINTRA
+  unsigned int filterintra[TX_SIZES][VP9_INTRA_MODES][2];
+#endif
+#if CONFIG_MASKED_INTERINTER
+  unsigned int masked_compound[BLOCK_SIZE_TYPES][2];
+#endif
 } FRAME_COUNTS;


@@ -202,6 +226,17 @@ typedef struct VP9Common {
  struct postproc_state  postproc_state;
 #endif

+#if CONFIG_INTERINTRA
+  int use_interintra;
+#if CONFIG_MASKED_INTERINTRA
+  int use_masked_interintra;
+#endif
+#endif
+
+#if CONFIG_MASKED_INTERINTER
+  int use_masked_compound;
+#endif
+
  int error_resilient_mode;
  int frame_parallel_decoding_mode;

--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -261,6 +261,302 @@ MV clamp_mv_to_umv_border_sb(const MV *src_mv,
  return clamped_mv;
 }

+#if CONFIG_MASKED_INTERINTER
+#define MASK_WEIGHT_BITS 6
+
+static int get_masked_weight(int m) {
+  #define SMOOTHER_LEN  32
+  static const uint8_t smoothfn[2 * SMOOTHER_LEN + 1] = {
+      0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  1,  1,  1,
+      1,  1,  2,  2,  3,  4,  5,  6,
+      8,  9, 12, 14, 17, 21, 24, 28,
+      32,
+      36, 40, 43, 47, 50, 52, 55, 56,
+      58, 59, 60, 61, 62, 62, 63, 63,
+      63, 63, 63, 64, 64, 64, 64, 64,
+      64, 64, 64, 64, 64, 64, 64, 64,
+  };
+  if (m < -SMOOTHER_LEN)
+    return 0;
+  else if (m > SMOOTHER_LEN)
+    return (1 << MASK_WEIGHT_BITS);
+  else
+    return smoothfn[m + SMOOTHER_LEN];
+}
+
+static int get_hard_mask(int m) {
+  return m > 0;
+}
+
+// Equation of line: f(x, y) = a[0]*(x - a[2]*w/4) + a[1]*(y - a[3]*h/4) = 0
+// The soft mask is obtained by computing f(x, y) and then calling
+// get_masked_weight(f(x, y)).
+static const int mask_params_sml[1 << MASK_BITS_SML][4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-2, -1, 2, 2},
+  { 1,  2, 2, 2},
+  {-1, -2, 2, 2},
+};
+
+static const int mask_params_med_hgtw[1 << MASK_BITS_MED][4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-2, -1, 2, 2},
+  { 1,  2, 2, 2},
+  {-1, -2, 2, 2},
+
+  {-1,  2, 2, 1},
+  { 1, -2, 2, 1},
+  {-1,  2, 2, 3},
+  { 1, -2, 2, 3},
+  { 1,  2, 2, 1},
+  {-1, -2, 2, 1},
+  { 1,  2, 2, 3},
+  {-1, -2, 2, 3},
+};
+
+static const int mask_params_med_hltw[1 << MASK_BITS_MED][4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-2, -1, 2, 2},
+  { 1,  2, 2, 2},
+  {-1, -2, 2, 2},
+
+  {-2,  1, 1, 2},
+  { 2, -1, 1, 2},
+  {-2,  1, 3, 2},
+  { 2, -1, 3, 2},
+  { 2,  1, 1, 2},
+  {-2, -1, 1, 2},
+  { 2,  1, 3, 2},
+  {-2, -1, 3, 2},
+};
+
+static const int mask_params_med_heqw[1 << MASK_BITS_MED][4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-2, -1, 2, 2},
+  { 1,  2, 2, 2},
+  {-1, -2, 2, 2},
+
+  { 0,  2, 0, 1},
+  { 0, -2, 0, 1},
+  { 0,  2, 0, 3},
+  { 0, -2, 0, 3},
+  { 2,  0, 1, 0},
+  {-2,  0, 1, 0},
+  { 2,  0, 3, 0},
+  {-2,  0, 3, 0},
+};
+
+static const int mask_params_big_hgtw[1 << MASK_BITS_BIG][4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-2, -1, 2, 2},
+  { 1,  2, 2, 2},
+  {-1, -2, 2, 2},
+
+  {-1,  2, 2, 1},
+  { 1, -2, 2, 1},
+  {-1,  2, 2, 3},
+  { 1, -2, 2, 3},
+  { 1,  2, 2, 1},
+  {-1, -2, 2, 1},
+  { 1,  2, 2, 3},
+  {-1, -2, 2, 3},
+
+  {-2,  1, 1, 2},
+  { 2, -1, 1, 2},
+  {-2,  1, 3, 2},
+  { 2, -1, 3, 2},
+  { 2,  1, 1, 2},
+  {-2, -1, 1, 2},
+  { 2,  1, 3, 2},
+  {-2, -1, 3, 2},
+
+  { 0,  2, 0, 1},
+  { 0, -2, 0, 1},
+  { 0,  2, 0, 2},
+  { 0, -2, 0, 2},
+  { 0,  2, 0, 3},
+  { 0, -2, 0, 3},
+  { 2,  0, 2, 0},
+  {-2,  0, 2, 0},
+};
+
+static const int mask_params_big_hltw[1 << MASK_BITS_BIG][4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-2, -1, 2, 2},
+  { 1,  2, 2, 2},
+  {-1, -2, 2, 2},
+
+  {-1,  2, 2, 1},
+  { 1, -2, 2, 1},
+  {-1,  2, 2, 3},
+  { 1, -2, 2, 3},
+  { 1,  2, 2, 1},
+  {-1, -2, 2, 1},
+  { 1,  2, 2, 3},
+  {-1, -2, 2, 3},
+
+  {-2,  1, 1, 2},
+  { 2, -1, 1, 2},
+  {-2,  1, 3, 2},
+  { 2, -1, 3, 2},
+  { 2,  1, 1, 2},
+  {-2, -1, 1, 2},
+  { 2,  1, 3, 2},
+  {-2, -1, 3, 2},
+
+  { 0,  2, 0, 2},
+  { 0, -2, 0, 2},
+  { 2,  0, 1, 0},
+  {-2,  0, 1, 0},
+  { 2,  0, 2, 0},
+  {-2,  0, 2, 0},
+  { 2,  0, 3, 0},
+  {-2,  0, 3, 0},
+};
+
+static const int mask_params_big_heqw[1 << MASK_BITS_BIG][4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-2, -1, 2, 2},
+  { 1,  2, 2, 2},
+  {-1, -2, 2, 2},
+
+  {-1,  2, 2, 1},
+  { 1, -2, 2, 1},
+  {-1,  2, 2, 3},
+  { 1, -2, 2, 3},
+  { 1,  2, 2, 1},
+  {-1, -2, 2, 1},
+  { 1,  2, 2, 3},
+  {-1, -2, 2, 3},
+
+  {-2,  1, 1, 2},
+  { 2, -1, 1, 2},
+  {-2,  1, 3, 2},
+  { 2, -1, 3, 2},
+  { 2,  1, 1, 2},
+  {-2, -1, 1, 2},
+  { 2,  1, 3, 2},
+  {-2, -1, 3, 2},
+
+  { 0,  2, 0, 1},
+  { 0, -2, 0, 1},
+  { 0,  2, 0, 3},
+  { 0, -2, 0, 3},
+  { 2,  0, 1, 0},
+  {-2,  0, 1, 0},
+  { 2,  0, 3, 0},
+  {-2,  0, 3, 0},
+};
+
+static const int *get_mask_params(int mask_index,
+                                  BLOCK_SIZE_TYPE sb_type,
+                                  int h, int w) {
+  const int *a;
+  const int mask_bits = get_mask_bits(sb_type);
+
+  if (mask_index == MASK_NONE)
+    return NULL;
+
+  if (mask_bits == MASK_BITS_SML) {
+    a = mask_params_sml[mask_index];
+  } else if (mask_bits == MASK_BITS_MED) {
+    if (h > w)
+      a = mask_params_med_hgtw[mask_index];
+    else if (h < w)
+      a = mask_params_med_hltw[mask_index];
+    else
+      a = mask_params_med_heqw[mask_index];
+  } else if (mask_bits == MASK_BITS_BIG) {
+    if (h > w)
+      a = mask_params_big_hgtw[mask_index];
+    else if (h < w)
+      a = mask_params_big_hltw[mask_index];
+    else
+      a = mask_params_big_heqw[mask_index];
+  } else {
+    assert(0);
+  }
+  return a;
+}
+
+void vp9_generate_masked_weight(int mask_index,
+                                BLOCK_SIZE_TYPE sb_type,
+                                int h, int w,
+                                uint8_t *mask, int stride) {
+  int i, j;
+  const int *a = get_mask_params(mask_index, sb_type, h, w);
+  if (!a) return;
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) {
+      int x = (j - (a[2] * w) / 4);
+      int y = (i - (a[3] * h) / 4);
+      int m = a[0] * x + a[1] * y;
+      mask[i * stride + j] = get_masked_weight(m);
+    }
+}
+
+void vp9_generate_hard_mask(int mask_index, BLOCK_SIZE_TYPE sb_type,
+                            int h, int w, uint8_t *mask, int stride) {
+  int i, j;
+  const int *a = get_mask_params(mask_index, sb_type, h, w);
+  if (!a) return;
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) {
+      int x = (j - (a[2] * w) / 4);
+      int y = (i - (a[3] * h) / 4);
+      int m = a[0] * x + a[1] * y;
+      mask[i * stride + j] = get_hard_mask(m);
+    }
+}
+
+static void build_masked_compound(uint8_t *dst, int dst_stride,
+                                  uint8_t *dst2, int dst2_stride,
+                                  int mask_index, BLOCK_SIZE_TYPE sb_type,
+                                  int h, int w) {
+  int i, j;
+  uint8_t mask[4096];
+  vp9_generate_masked_weight(mask_index, sb_type, h, w, mask, 64);
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) {
+      int m = mask[i * 64 + j];
+      dst[i * dst_stride + j] =  (dst[i * dst_stride + j] * m +
+                                  dst2[i * dst2_stride + j] *
+                                  ((1 << MASK_WEIGHT_BITS) - m) +
+                                  (1 << (MASK_WEIGHT_BITS - 1))) >>
+                                 MASK_WEIGHT_BITS;
+    }
+}
+#endif
+
 struct build_inter_predictors_args {
  MACROBLOCKD *xd;
  int x;
@@ -320,11 +616,31 @@ static void build_inter_predictors(int plane, int block,
                                                xd->mb_to_right_edge,
                                                xd->mb_to_bottom_edge);
    scale->set_scaled_offsets(scale, arg->y + y, arg->x + x);
+
+#if CONFIG_MASKED_INTERINTER
+    if (which_mv && xd->mode_info_context->mbmi.use_masked_compound) {
+      uint8_t tmp_dst[4096];
+      vp9_build_inter_predictor(pre, pre_stride,
+                                tmp_dst, 64,
+                                &res_mv, &xd->scale_factor[which_mv],
+                                4 << pred_w, 4 << pred_h, 0,
+                                &xd->subpix, MV_PRECISION_Q4);
+      build_masked_compound(dst, arg->dst_stride[plane],
+                            tmp_dst, 64,
+                            xd->mode_info_context->mbmi.mask_index,
+                            xd->mode_info_context->mbmi.sb_type,
+                            (4 << pred_h), (4 << pred_w));
+
+    } else {
+#endif
    vp9_build_inter_predictor(pre, pre_stride,
                              dst, arg->dst_stride[plane],
                              &res_mv, &xd->scale_factor[which_mv],
                              4 << pred_w, 4 << pred_h, which_mv,
                              &xd->subpix, MV_PRECISION_Q4);
+#if CONFIG_MASKED_INTERINTER
+    }
+#endif
  }
 }
 void vp9_build_inter_predictors_sby(MACROBLOCKD *xd,
@@ -375,8 +691,23 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *xd,
                                   int mi_row, int mi_col,
                                   BLOCK_SIZE_TYPE bsize) {

+#if CONFIG_INTERINTRA
+  uint8_t *const y = xd->plane[0].dst.buf;
+  uint8_t *const u = xd->plane[1].dst.buf;
+  uint8_t *const v = xd->plane[2].dst.buf;
+  const int y_stride = xd->plane[0].dst.stride;
+  const int uv_stride = xd->plane[1].dst.stride;
+#endif
  vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
  vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, bsize);
+#if CONFIG_INTERINTRA
+  if (xd->mode_info_context->mbmi.ref_frame[1] == INTRA_FRAME
+      && is_interintra_allowed(xd->mode_info_context->mbmi.sb_type)) {
+    xd->right_available = 0;
+    vp9_build_interintra_predictors(xd, y, u, v,
+                                    y_stride, uv_stride, bsize);
+  }
+#endif
 }

 // TODO(dkovalev: find better place for this function)
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -107,4 +107,11 @@ static void set_scale_factors(MACROBLOCKD *xd, int ref0, int ref1,

 void vp9_setup_scale_factors(VP9_COMMON *cm, int i);

+#if CONFIG_MASKED_INTERINTER
+void vp9_generate_masked_weight(int mask_index, BLOCK_SIZE_TYPE sb_type,
+                              int h, int w, uint8_t *mask, int stride);
+void vp9_generate_hard_mask(int mask_index, BLOCK_SIZE_TYPE sb_type,
+                          int h, int w, uint8_t *mask, int stride);
+#endif
+
 #endif  // VP9_COMMON_VP9_RECONINTER_H_
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -401,11 +401,123 @@ static void build_intra_predictors(uint8_t *src, int src_stride,
  }
 }

+#if CONFIG_FILTERINTRA
+static void filter_intra_predictors(uint8_t *ypred_ptr, int y_stride, int bs,
+                                    uint8_t *yabove_row, uint8_t *yleft_col,
+                                    int mode) {
+  static const int prec_bits = 10;
+  static const int round_val = 511;
+  static const int taps[10][3] = {
+      {   0,   0,    0},  // DC
+      { 972, 563, -534},  // V
+      { 441, 975, -417},  // H
+      {   0,   0,    0},  // D45
+      { 502, 546,  -48},  // D135
+      { 744, 523, -259},  // D117
+      { 379, 760,  -73},  // D153
+      {   0,   0,    0},  // D27
+      {   0,   0,    0},  // D63
+      { 783, 839, -687},  // TM
+  };
+  static const int taps8x8[10][3] = {
+      {  0,    0,    0},  // DC
+      {991,  655, -637},  // V
+      {522,  987, -493},  // H
+      {  0,    0,    0},  // d45
+      {551,  608, -193},  // d135
+      {762,  612, -392},  // d117
+      {492,  781, -260},  // d153
+      {  0,    0,    0},  // d27
+      {  0,    0,    0},  // d63
+      {823,  873, -715},  // TM
+  };
+
+  int k, r, c;
+  int pred[17][17];
+  int mean, ipred;
+  const int c1 = (bs == 4) ? taps[mode][0]: taps8x8[mode][0];
+  const int c2 = (bs == 4) ? taps[mode][1]: taps8x8[mode][1];
+  const int c3 = (bs == 4) ? taps[mode][2]: taps8x8[mode][2];
+
+  k = 0;
+  mean = 0;
+  while (k < bs) {
+    mean = mean + (int)yleft_col[k];
+    mean = mean + (int)yabove_row[k];
+    ++k;
+  }
+  mean = (mean + bs) / (2 * bs);
+
+  for (r = 0; r < bs; r++)
+    pred[r + 1][0] = (int)yleft_col[r] - mean;
+
+  for (c = 0; c < bs + 1; c++)
+    pred[0][c] = (int)yabove_row[c - 1] - mean;
+
+  for (r = 1; r < bs + 1; r++)
+    for (c = 1; c < bs + 1; c++) {
+      ipred = c1 * pred[r - 1][c] + c2 * pred[r][c - 1]
+              + c3 * pred[r - 1][c - 1];
+      pred[r][c] = ipred < 0 ? -((-ipred + round_val) >> prec_bits) :
+                               ((ipred + round_val) >> prec_bits);
+    }
+
+  for (r = 0; r < bs; r++) {
+    for (c = 0; c < bs; c++) {
+      ipred = pred[r + 1][c + 1] + mean;
+      ypred_ptr[c] = clip_pixel(ipred);
+    }
+    ypred_ptr += y_stride;
+  }
+}
+
+static void build_filter_intra_predictors(uint8_t *src, int src_stride,
+                                          uint8_t *pred_ptr, int stride,
+                                          MB_PREDICTION_MODE mode, TX_SIZE txsz,
+                                          int up_available, int left_available,
+                                          int right_available) {
+  int i;
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, left_col, 64);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, yabove_data, 128 + 16);
+  uint8_t *above_row = yabove_data + 16;
+  const int bs = 4 << txsz;
+
+  if (left_available) {
+    for (i = 0; i < bs; i++)
+      left_col[i] = src[i * src_stride - 1];
+  } else {
+    vpx_memset(left_col, 129, bs);
+  }
+
+  if (up_available) {
+    uint8_t *above_ptr = src - src_stride;
+    if (bs == 4 && right_available && left_available) {
+      above_row = above_ptr;
+    } else {
+      vpx_memcpy(above_row, above_ptr, bs);
+      if (bs == 4 && right_available)
+        vpx_memcpy(above_row + bs, above_ptr + bs, bs);
+      else
+        vpx_memset(above_row + bs, above_row[bs - 1], bs);
+      above_row[-1] = left_available ? above_ptr[-1] : 129;
+    }
+  } else {
+    vpx_memset(above_row, 127, bs * 2);
+    above_row[-1] = 127;
+  }
+
+  filter_intra_predictors(pred_ptr, stride, bs, above_row, left_col, mode);
+}
+#endif
+
 void vp9_predict_intra_block(MACROBLOCKD *xd,
                            int block_idx,
                            int bwl_in,
                            TX_SIZE tx_size,
                            int mode,
+#if CONFIG_FILTERINTRA
+                            int filterbit,
+#endif
                            uint8_t *reference, int ref_stride,
                            uint8_t *predictor, int pre_stride) {
  const int bwl = bwl_in - tx_size;
@@ -413,12 +525,685 @@ void vp9_predict_intra_block(MACROBLOCKD *xd,
  const int have_top = (block_idx >> bwl) || xd->up_available;
  const int have_left = (block_idx & wmask) || xd->left_available;
  const int have_right = ((block_idx & wmask) != wmask);
+#if CONFIG_FILTERINTRA
+  int filterflag = is_filter_allowed(mode) && (tx_size <= TX_8X8) && filterbit;
+#endif

  assert(bwl >= 0);
+#if CONFIG_FILTERINTRA
+  if (!filterflag) {
+#endif
  build_intra_predictors(reference, ref_stride,
                         predictor, pre_stride,
                         mode,
                         tx_size,
                         have_top, have_left,
                         have_right);
+#if CONFIG_FILTERINTRA
+  } else {
+    build_filter_intra_predictors(reference, ref_stride,
+                                  predictor, pre_stride,
+                                  mode,
+                                  tx_size,
+                                  have_top, have_left,
+                                  have_right);
+  }
+#endif
 }
+
+#if CONFIG_INTERINTRA
+// Intra predictor for the second square block in interintra prediction.
+// Prediction of the first block (in pred_ptr) will be used to generate half of
+// the boundary values.
+static void build_intra_predictors_for_2nd_block_interintra
+                                   (uint8_t *src, int src_stride,
+                                   uint8_t *pred_ptr, int stride,
+                                   MB_PREDICTION_MODE mode, TX_SIZE txsz,
+                                   int up_available, int left_available,
+                                   int right_available, int bwltbh) {
+  int i;
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, left_col, 64);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, yabove_data, 128 + 16);
+  uint8_t *above_row = yabove_data + 16;
+  const int bs = 4 << txsz;
+
+  // 127 127 127 .. 127 127 127 127 127 127
+  // 129  A   B  ..  Y   Z
+  // 129  C   D  ..  W   X
+  // 129  E   F  ..  U   V
+  // 129  G   H  ..  S   T   T   T   T   T
+  // ..
+
+  once(init_intra_pred_fn_ptrs);
+  if (left_available) {
+    for (i = 0; i < bs; i++) {
+      if (bwltbh)
+        left_col[i] = src[i * src_stride - 1];
+      else
+        left_col[i] = pred_ptr[i * stride - 1];
+    }
+  } else {
+    vpx_memset(left_col, 129, bs);
+  }
+
+  if (up_available) {
+    uint8_t *above_ptr;
+    if (bwltbh)
+      above_ptr = pred_ptr - stride;
+    else
+      above_ptr = src - src_stride;
+    if (bs == 4 && right_available && left_available) {
+      above_row = above_ptr;
+    } else {
+      vpx_memcpy(above_row, above_ptr, bs);
+      if (bs == 4 && right_available)
+        vpx_memcpy(above_row + bs, above_ptr + bs, bs);
+      else
+        vpx_memset(above_row + bs, above_row[bs - 1], bs);
+      above_row[-1] = left_available ? above_ptr[-1] : 129;
+    }
+  } else {
+    vpx_memset(above_row, 127, bs * 2);
+    above_row[-1] = 127;
+  }
+
+  if (mode == DC_PRED) {
+    dc_pred[left_available][up_available][txsz](pred_ptr, stride,
+                                                above_row, left_col);
+  } else {
+    pred[mode][txsz](pred_ptr, stride, above_row, left_col);
+  }
+}
+
+#if CONFIG_MASKED_INTERINTRA
+#define MASK_WEIGHT_BITS_INTERINTRA 6
+
+static int get_masked_weight_interintra(int m) {
+  #define SMOOTHER_LEN_INTERINTRA  32
+  static const uint8_t smoothfn[2 * SMOOTHER_LEN_INTERINTRA + 1] = {
+      0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  1,  1,  1,
+      1,  1,  2,  2,  3,  4,  5,  6,
+      8,  9, 12, 14, 17, 21, 24, 28,
+      32,
+      36, 40, 43, 47, 50, 52, 55, 56,
+      58, 59, 60, 61, 62, 62, 63, 63,
+      63, 63, 63, 64, 64, 64, 64, 64,
+      64, 64, 64, 64, 64, 64, 64, 64,
+  };
+  if (m < -SMOOTHER_LEN_INTERINTRA)
+    return 0;
+  else if (m > SMOOTHER_LEN_INTERINTRA)
+    return (1 << MASK_WEIGHT_BITS_INTERINTRA);
+  else
+    return smoothfn[m + SMOOTHER_LEN_INTERINTRA];
+}
+
+static int get_hard_mask_interintra(int m) {
+  return m > 0;
+}
+
+// Equation of line: f(x, y) = a[0]*(x - a[2]*w/4) + a[1]*(y - a[3]*h/4) = 0
+// The soft mask is obtained by computing f(x, y) and then calling
+// get_masked_weight(f(x, y)).
+static const int mask_params_sml_interintra[1 << MASK_BITS_SML_INTERINTRA]
+                                            [4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-2, -1, 2, 2},
+  { 1,  2, 2, 2},
+  {-1, -2, 2, 2},
+};
+
+static const int mask_params_med_hgtw_interintra[1 << MASK_BITS_MED_INTERINTRA]
+                                                 [4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-2, -1, 2, 2},
+  { 1,  2, 2, 2},
+  {-1, -2, 2, 2},
+
+  {-1,  2, 2, 1},
+  { 1, -2, 2, 1},
+  {-1,  2, 2, 3},
+  { 1, -2, 2, 3},
+  { 1,  2, 2, 1},
+  {-1, -2, 2, 1},
+  { 1,  2, 2, 3},
+  {-1, -2, 2, 3},
+};
+
+static const int mask_params_med_hltw_interintra[1 << MASK_BITS_MED_INTERINTRA]
+                                                 [4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-2, -1, 2, 2},
+  { 1,  2, 2, 2},
+  {-1, -2, 2, 2},
+
+  {-2,  1, 1, 2},
+  { 2, -1, 1, 2},
+  {-2,  1, 3, 2},
+  { 2, -1, 3, 2},
+  { 2,  1, 1, 2},
+  {-2, -1, 1, 2},
+  { 2,  1, 3, 2},
+  {-2, -1, 3, 2},
+};
+
+static const int mask_params_med_heqw_interintra[1 << MASK_BITS_MED_INTERINTRA]
+                                                 [4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-2, -1, 2, 2},
+  { 1,  2, 2, 2},
+  {-1, -2, 2, 2},
+
+  { 0,  2, 0, 1},
+  { 0, -2, 0, 1},
+  { 0,  2, 0, 3},
+  { 0, -2, 0, 3},
+  { 2,  0, 1, 0},
+  {-2,  0, 1, 0},
+  { 2,  0, 3, 0},
+  {-2,  0, 3, 0},
+};
+
+static const int mask_params_big_hgtw_interintra[1 << MASK_BITS_BIG_INTERINTRA]
+                                                 [4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-2, -1, 2, 2},
+  { 1,  2, 2, 2},
+  {-1, -2, 2, 2},
+
+  {-1,  2, 2, 1},
+  { 1, -2, 2, 1},
+  {-1,  2, 2, 3},
+  { 1, -2, 2, 3},
+  { 1,  2, 2, 1},
+  {-1, -2, 2, 1},
+  { 1,  2, 2, 3},
+  {-1, -2, 2, 3},
+
+  {-2,  1, 1, 2},
+  { 2, -1, 1, 2},
+  {-2,  1, 3, 2},
+  { 2, -1, 3, 2},
+  { 2,  1, 1, 2},
+  {-2, -1, 1, 2},
+  { 2,  1, 3, 2},
+  {-2, -1, 3, 2},
+
+  { 0,  2, 0, 1},
+  { 0, -2, 0, 1},
+  { 0,  2, 0, 2},
+  { 0, -2, 0, 2},
+  { 0,  2, 0, 3},
+  { 0, -2, 0, 3},
+  { 2,  0, 2, 0},
+  {-2,  0, 2, 0},
+};
+
+static const int mask_params_big_hltw_interintra[1 << MASK_BITS_BIG_INTERINTRA]
+                                                 [4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-2, -1, 2, 2},
+  { 1,  2, 2, 2},
+  {-1, -2, 2, 2},
+
+  {-1,  2, 2, 1},
+  { 1, -2, 2, 1},
+  {-1,  2, 2, 3},
+  { 1, -2, 2, 3},
+  { 1,  2, 2, 1},
+  {-1, -2, 2, 1},
+  { 1,  2, 2, 3},
+  {-1, -2, 2, 3},
+
+  {-2,  1, 1, 2},
+  { 2, -1, 1, 2},
+  {-2,  1, 3, 2},
+  { 2, -1, 3, 2},
+  { 2,  1, 1, 2},
+  {-2, -1, 1, 2},
+  { 2,  1, 3, 2},
+  {-2, -1, 3, 2},
+
+  { 0,  2, 0, 2},
+  { 0, -2, 0, 2},
+  { 2,  0, 1, 0},
+  {-2,  0, 1, 0},
+  { 2,  0, 2, 0},
+  {-2,  0, 2, 0},
+  { 2,  0, 3, 0},
+  {-2,  0, 3, 0},
+};
+
+static const int mask_params_big_heqw_interintra[1 << MASK_BITS_BIG_INTERINTRA]
+                                                 [4] = {
+  {-1,  2, 2, 2},
+  { 1, -2, 2, 2},
+  {-2,  1, 2, 2},
+  { 2, -1, 2, 2},
+  { 2,  1, 2, 2},
+  {-2, -1, 2, 2},
+  { 1,  2, 2, 2},
+  {-1, -2, 2, 2},
+
+  {-1,  2, 2, 1},
+  { 1, -2, 2, 1},
+  {-1,  2, 2, 3},
+  { 1, -2, 2, 3},
+  { 1,  2, 2, 1},
+  {-1, -2, 2, 1},
+  { 1,  2, 2, 3},
+  {-1, -2, 2, 3},
+
+  {-2,  1, 1, 2},
+  { 2, -1, 1, 2},
+  {-2,  1, 3, 2},
+  { 2, -1, 3, 2},
+  { 2,  1, 1, 2},
+  {-2, -1, 1, 2},
+  { 2,  1, 3, 2},
+  {-2, -1, 3, 2},
+
+  { 0,  2, 0, 1},
+  { 0, -2, 0, 1},
+  { 0,  2, 0, 3},
+  { 0, -2, 0, 3},
+  { 2,  0, 1, 0},
+  {-2,  0, 1, 0},
+  { 2,  0, 3, 0},
+  {-2,  0, 3, 0},
+};
+
+static const int *get_mask_params_interintra(int mask_index,
+                                             BLOCK_SIZE_TYPE sb_type,
+                                             int h, int w) {
+  const int *a;
+  const int mask_bits = get_mask_bits_interintra(sb_type);
+
+  if (mask_index == MASK_NONE_INTERINTRA)
+    return NULL;
+
+  if (mask_bits == MASK_BITS_SML_INTERINTRA) {
+    a = mask_params_sml_interintra[mask_index];
+  } else if (mask_bits == MASK_BITS_MED_INTERINTRA) {
+    if (h > w)
+      a = mask_params_med_hgtw_interintra[mask_index];
+    else if (h < w)
+      a = mask_params_med_hltw_interintra[mask_index];
+    else
+      a = mask_params_med_heqw_interintra[mask_index];
+  } else if (mask_bits == MASK_BITS_BIG_INTERINTRA) {
+    if (h > w)
+      a = mask_params_big_hgtw_interintra[mask_index];
+    else if (h < w)
+      a = mask_params_big_hltw_interintra[mask_index];
+    else
+      a = mask_params_big_heqw_interintra[mask_index];
+  } else {
+    assert(0);
+  }
+  return a;
+}
+
+void vp9_generate_masked_weight_interintra(int mask_index,
+                                           BLOCK_SIZE_TYPE sb_type,
+                                           int h, int w,
+                                           uint8_t *mask, int stride) {
+  int i, j;
+  const int *a = get_mask_params_interintra(mask_index, sb_type, h, w);
+  if (!a) return;
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) {
+      int x = (j - (a[2] * w) / 4);
+      int y = (i - (a[3] * h) / 4);
+      int m = a[0] * x + a[1] * y;
+      mask[i * stride + j] = get_masked_weight_interintra(m);
+    }
+}
+
+void vp9_generate_hard_mask_interintra(int mask_index, BLOCK_SIZE_TYPE sb_type,
+                            int h, int w, uint8_t *mask, int stride) {
+  int i, j;
+  const int *a = get_mask_params_interintra(mask_index, sb_type, h, w);
+  if (!a) return;
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) {
+      int x = (j - (a[2] * w) / 4);
+      int y = (i - (a[3] * h) / 4);
+      int m = a[0] * x + a[1] * y;
+      mask[i * stride + j] = get_hard_mask_interintra(m);
+    }
+}
+#endif
+
+static void combine_interintra(MB_PREDICTION_MODE mode,
+#if CONFIG_MASKED_INTERINTRA
+                               int use_masked_interintra,
+                               int mask_index,
+                               BLOCK_SIZE_TYPE bsize,
+#endif
+                               uint8_t *interpred,
+                               int interstride,
+                               uint8_t *intrapred,
+                               int intrastride,
+                               int bw, int bh) {
+  static const int scale_bits = 8;
+  static const int scale_max = 256;
+  static const int scale_round = 127;
+  static const int weights1d[64] = {
+      128, 125, 122, 119, 116, 114, 111, 109,
+      107, 105, 103, 101,  99,  97,  96,  94,
+       93,  91,  90,  89,  88,  86,  85,  84,
+       83,  82,  81,  81,  80,  79,  78,  78,
+       77,  76,  76,  75,  75,  74,  74,  73,
+       73,  72,  72,  71,  71,  71,  70,  70,
+       70,  70,  69,  69,  69,  69,  68,  68,
+       68,  68,  68,  67,  67,  67,  67,  67,
+  };
+
+  int size = MAX(bw, bh);
+  int size_scale = (size >= 64 ? 1 :
+                    size == 32 ? 2 :
+                    size == 16 ? 4 :
+                    size == 8  ? 8 : 16);
+  int i, j;
+
+#if CONFIG_MASKED_INTERINTRA
+  uint8_t mask[4096];
+  if (use_masked_interintra && get_mask_bits_interintra(bsize))
+    vp9_generate_masked_weight_interintra(mask_index, bsize, bh, bw, mask, bw);
+#endif
+
+  switch (mode) {
+    case V_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int k = i * interstride + j;
+          int scale = weights1d[i * size_scale];
+#if CONFIG_MASKED_INTERINTRA
+          int m = mask[i * bw + j];
+          if (use_masked_interintra && get_mask_bits_interintra(bsize))
+              interpred[k] = (intrapred[i * intrastride + j] * m +
+                              interpred[k] *
+                              ((1 << MASK_WEIGHT_BITS_INTERINTRA) - m) +
+                              (1 << (MASK_WEIGHT_BITS_INTERINTRA - 1))) >>
+                              MASK_WEIGHT_BITS_INTERINTRA;
+          else
+#endif
+          interpred[k] =
+              ((scale_max - scale) * interpred[k] +
+                  scale * intrapred[i * intrastride + j] + scale_round)
+                  >> scale_bits;
+        }
+      }
+     break;
+
+    case H_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int k = i * interstride + j;
+          int scale = weights1d[j * size_scale];
+#if CONFIG_MASKED_INTERINTRA
+          int m = mask[i * bw + j];
+          if (use_masked_interintra && get_mask_bits_interintra(bsize))
+              interpred[k] = (intrapred[i * intrastride + j] * m +
+                              interpred[k] *
+                              ((1 << MASK_WEIGHT_BITS_INTERINTRA) - m) +
+                              (1 << (MASK_WEIGHT_BITS_INTERINTRA - 1))) >>
+                              MASK_WEIGHT_BITS_INTERINTRA;
+          else
+#endif
+          interpred[k] =
+              ((scale_max - scale) * interpred[k] +
+                  scale * intrapred[i * intrastride + j] + scale_round)
+                  >> scale_bits;
+        }
+      }
+     break;
+
+    case D63_PRED:
+    case D117_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int k = i * interstride + j;
+          int scale = (weights1d[i * size_scale] * 3 +
+                       weights1d[j * size_scale]) >> 2;
+#if CONFIG_MASKED_INTERINTRA
+          int m = mask[i * bw + j];
+          if (use_masked_interintra && get_mask_bits_interintra(bsize))
+              interpred[k] = (intrapred[i * intrastride + j] * m +
+                              interpred[k] *
+                              ((1 << MASK_WEIGHT_BITS_INTERINTRA) - m) +
+                              (1 << (MASK_WEIGHT_BITS_INTERINTRA - 1))) >>
+                              MASK_WEIGHT_BITS_INTERINTRA;
+          else
+#endif
+          interpred[k] =
+              ((scale_max - scale) * interpred[k] +
+                  scale * intrapred[i * intrastride + j] + scale_round)
+                  >> scale_bits;
+        }
+      }
+     break;
+
+    case D27_PRED:
+    case D153_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int k = i * interstride + j;
+          int scale = (weights1d[j * size_scale] * 3 +
+                       weights1d[i * size_scale]) >> 2;
+#if CONFIG_MASKED_INTERINTRA
+          int m = mask[i * bw + j];
+          if (use_masked_interintra && get_mask_bits_interintra(bsize))
+              interpred[k] = (intrapred[i * intrastride + j] * m +
+                              interpred[k] *
+                              ((1 << MASK_WEIGHT_BITS_INTERINTRA) - m) +
+                              (1 << (MASK_WEIGHT_BITS_INTERINTRA - 1))) >>
+                              MASK_WEIGHT_BITS_INTERINTRA;
+          else
+#endif
+          interpred[k] =
+              ((scale_max - scale) * interpred[k] +
+                  scale * intrapred[i * intrastride + j] + scale_round)
+                  >> scale_bits;
+        }
+      }
+     break;
+
+    case D135_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int k = i * interstride + j;
+          int scale = weights1d[(i < j ? i : j) * size_scale];
+#if CONFIG_MASKED_INTERINTRA
+          int m = mask[i * bw + j];
+          if (use_masked_interintra && get_mask_bits_interintra(bsize))
+              interpred[k] = (intrapred[i * intrastride + j] * m +
+                              interpred[k] *
+                              ((1 << MASK_WEIGHT_BITS_INTERINTRA) - m) +
+                              (1 << (MASK_WEIGHT_BITS_INTERINTRA - 1))) >>
+                              MASK_WEIGHT_BITS_INTERINTRA;
+          else
+#endif
+          interpred[k] =
+              ((scale_max - scale) * interpred[k] +
+                  scale * intrapred[i * intrastride + j] + scale_round)
+                  >> scale_bits;
+        }
+      }
+     break;
+
+    case D45_PRED:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int k = i * interstride + j;
+          int scale = (weights1d[i * size_scale] +
+                       weights1d[j * size_scale]) >> 1;
+#if CONFIG_MASKED_INTERINTRA
+          int m = mask[i * bw + j];
+          if (use_masked_interintra && get_mask_bits_interintra(bsize))
+              interpred[k] = (intrapred[i * intrastride + j] * m +
+                              interpred[k] *
+                              ((1 << MASK_WEIGHT_BITS_INTERINTRA) - m) +
+                              (1 << (MASK_WEIGHT_BITS_INTERINTRA - 1))) >>
+                              MASK_WEIGHT_BITS_INTERINTRA;
+          else
+#endif
+          interpred[k] =
+              ((scale_max - scale) * interpred[k] +
+                  scale * intrapred[i * intrastride + j] + scale_round)
+                  >> scale_bits;
+        }
+      }
+     break;
+
+    case TM_PRED:
+    case DC_PRED:
+    default:
+      for (i = 0; i < bh; ++i) {
+        for (j = 0; j < bw; ++j) {
+          int k = i * interstride + j;
+#if CONFIG_MASKED_INTERINTRA
+          int m = mask[i * bw + j];
+          if (use_masked_interintra && get_mask_bits_interintra(bsize))
+              interpred[k] = (intrapred[i * intrastride + j] * m +
+                              interpred[k] *
+                              ((1 << MASK_WEIGHT_BITS_INTERINTRA) - m) +
+                              (1 << (MASK_WEIGHT_BITS_INTERINTRA - 1))) >>
+                              MASK_WEIGHT_BITS_INTERINTRA;
+          else
+#endif
+          interpred[k] = (interpred[k] + intrapred[i * intrastride + j]) >> 1;
+        }
+      }
+      break;
+  }
+}
+
+// Break down rectangular intra prediction for joint spatio-temporal prediction
+// into two square intra predictions.
+static void build_intra_predictors_for_interintra(uint8_t *src, int src_stride,
+                                           uint8_t *pred_ptr, int stride,
+                                           MB_PREDICTION_MODE mode,
+                                           int bw, int bh,
+                                           int up_available, int left_available,
+                                           int right_available) {
+  if (bw == bh) {
+    build_intra_predictors(src, src_stride, pred_ptr, stride,
+                           mode, intra_size_log2_for_interintra(bw),
+                           up_available, left_available, right_available);
+  } else if (bw < bh) {
+    uint8_t *src_bottom = src + bw * src_stride;
+    uint8_t *pred_ptr_bottom = pred_ptr + bw * stride;
+    build_intra_predictors(src, src_stride, pred_ptr, stride,
+                           mode, intra_size_log2_for_interintra(bw),
+                           up_available, left_available, right_available);
+    build_intra_predictors_for_2nd_block_interintra(src_bottom, src_stride,
+                                                    pred_ptr_bottom, stride,
+                                       mode, intra_size_log2_for_interintra(bw),
+                                       1, left_available, right_available, 1);
+  } else {
+    uint8_t *src_right = src + bh;
+    uint8_t *pred_ptr_right = pred_ptr + bh;
+    build_intra_predictors(src, src_stride, pred_ptr, stride,
+                           mode, intra_size_log2_for_interintra(bh),
+                           up_available, left_available, right_available);
+    build_intra_predictors_for_2nd_block_interintra(src_right, src_stride,
+                                                    pred_ptr_right, stride,
+                                       mode, intra_size_log2_for_interintra(bh),
+                                       up_available, 1, right_available, 0);
+  }
+}
+
+void vp9_build_interintra_predictors_sby(MACROBLOCKD *xd,
+                                         uint8_t *ypred,
+                                         int ystride,
+                                         BLOCK_SIZE_TYPE bsize) {
+  const struct macroblockd_plane* const pd = &xd->plane[0];
+  const int bw = plane_block_width(bsize, pd);
+  const int bh = plane_block_height(bsize, pd);
+  uint8_t intrapredictor[4096];
+  build_intra_predictors_for_interintra(
+      xd->plane[0].dst.buf, xd->plane[0].dst.stride,
+      intrapredictor, bw,
+      xd->mode_info_context->mbmi.interintra_mode, bw, bh,
+      xd->up_available, xd->left_available, xd->right_available);
+  combine_interintra(xd->mode_info_context->mbmi.interintra_mode,
+#if CONFIG_MASKED_INTERINTRA
+                     xd->mode_info_context->mbmi.use_masked_interintra,
+                     xd->mode_info_context->mbmi.interintra_mask_index,
+                     bsize,
+#endif
+                     ypred, ystride, intrapredictor, bw, bw, bh);
+}
+
+void vp9_build_interintra_predictors_sbuv(MACROBLOCKD *xd,
+                                          uint8_t *upred,
+                                          uint8_t *vpred,
+                                          int uvstride,
+                                          BLOCK_SIZE_TYPE bsize) {
+  int bwl = b_width_log2(bsize), bw = 2 << bwl;
+  int bhl = b_height_log2(bsize), bh = 2 << bhl;
+  uint8_t uintrapredictor[1024];
+  uint8_t vintrapredictor[1024];
+  build_intra_predictors_for_interintra(
+      xd->plane[1].dst.buf, xd->plane[1].dst.stride,
+      uintrapredictor, bw,
+      xd->mode_info_context->mbmi.interintra_uv_mode, bw, bh,
+      xd->up_available, xd->left_available, xd->right_available);
+  build_intra_predictors_for_interintra(
+      xd->plane[2].dst.buf, xd->plane[1].dst.stride,
+      vintrapredictor, bw,
+      xd->mode_info_context->mbmi.interintra_uv_mode, bw, bh,
+      xd->up_available, xd->left_available, xd->right_available);
+  combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode,
+#if CONFIG_MASKED_INTERINTRA
+                     xd->mode_info_context->mbmi.use_masked_interintra,
+                     xd->mode_info_context->mbmi.interintra_uv_mask_index,
+                     bsize,
+#endif
+                     upred, uvstride, uintrapredictor, bw, bw, bh);
+  combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode,
+#if CONFIG_MASKED_INTERINTRA
+                     xd->mode_info_context->mbmi.use_masked_interintra,
+                     xd->mode_info_context->mbmi.interintra_uv_mask_index,
+                     bsize,
+#endif
+                     vpred, uvstride, vintrapredictor, bw, bw, bh);
+}
+
+void vp9_build_interintra_predictors(MACROBLOCKD *xd,
+                                     uint8_t *ypred,
+                                     uint8_t *upred,
+                                     uint8_t *vpred,
+                                     int ystride, int uvstride,
+                                     BLOCK_SIZE_TYPE bsize) {
+  vp9_build_interintra_predictors_sby(xd, ypred, ystride, bsize);
+  vp9_build_interintra_predictors_sbuv(xd, upred, vpred, uvstride, bsize);
+}
+#endif
--- a/vp9/common/vp9_reconintra.h
+++ b/vp9/common/vp9_reconintra.h
@@ -25,6 +25,26 @@ void vp9_predict_intra_block(MACROBLOCKD *xd,
                            int block_idx,
                            int bwl_in,
                            TX_SIZE tx_size,
-                            int mode, uint8_t *ref, int ref_stride,
+                            int mode,
+#if CONFIG_FILTERINTRA
+                            int filterbit,
+#endif
+                            uint8_t *ref, int ref_stride,
                            uint8_t *predictor, int pre_stride);
+#if CONFIG_INTERINTRA
+void vp9_build_interintra_predictors(MACROBLOCKD *xd,
+                                     uint8_t *ypred,
+                                     uint8_t *upred,
+                                     uint8_t *vpred,
+                                     int ystride,
+                                     int uvstride,
+                                     BLOCK_SIZE_TYPE bsize);
+#if CONFIG_MASKED_INTERINTRA
+void vp9_generate_masked_weight_interintra(int mask_index,
+                                           BLOCK_SIZE_TYPE sb_type,
+                                           int h, int w,
+                                           uint8_t *mask, int stride);
+#endif
+
+#endif
 #endif  // VP9_COMMON_VP9_RECONINTRA_H_
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -557,6 +557,113 @@ specialize vp9_sad4x8_avg $sse_x86inc
 prototype unsigned int vp9_sad4x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
 specialize vp9_sad4x4_avg $sse_x86inc

+if [ "$CONFIG_MASKED_INTERINTER" = "yes" ] || ([ "$CONFIG_MASKED_INTERINTRA" = "yes" ] && [ "$CONFIG_INTERINTRA" = "yes" ]); then
+prototype int vp9_masked_diamond_search_sad "struct macroblock *x, uint8_t *mask, int mask_stride, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv, int is_second"
+specialize vp9_masked_diamond_search_sad
+
+prototype int vp9_masked_refining_search_sad "struct macroblock *x, uint8_t *mask, int mask_stride, union int_mv *ref_mv, int sad_per_bit, int distance, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv, int is_second"
+specialize vp9_refining_search_sad
+
+prototype unsigned int vp9_masked_variance32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_variance32x16
+
+prototype unsigned int vp9_masked_variance16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_variance16x32
+
+prototype unsigned int vp9_masked_variance64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_variance64x32
+
+prototype unsigned int vp9_masked_variance32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_variance32x64
+
+prototype unsigned int vp9_masked_variance32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_variance32x32
+
+prototype unsigned int vp9_masked_variance64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_variance64x64
+
+prototype unsigned int vp9_masked_variance16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_variance16x16
+
+prototype unsigned int vp9_masked_variance16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_variance16x8
+
+prototype unsigned int vp9_masked_variance8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_variance8x16
+
+prototype unsigned int vp9_masked_variance8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_variance8x8
+
+prototype unsigned int vp9_masked_variance4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_variance4x4
+
+prototype unsigned int vp9_masked_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_sub_pixel_variance64x64
+
+prototype unsigned int vp9_masked_sub_pixel_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_sub_pixel_variance32x64
+
+prototype unsigned int vp9_masked_sub_pixel_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_sub_pixel_variance64x32
+
+prototype unsigned int vp9_masked_sub_pixel_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_sub_pixel_variance32x16
+
+prototype unsigned int vp9_masked_sub_pixel_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_sub_pixel_variance16x32
+
+prototype unsigned int vp9_masked_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_sub_pixel_variance32x32
+
+prototype unsigned int vp9_masked_sub_pixel_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_sub_pixel_variance16x16
+
+prototype unsigned int vp9_masked_sub_pixel_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_sub_pixel_variance8x16
+
+prototype unsigned int vp9_masked_sub_pixel_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_sub_pixel_variance16x8
+
+prototype unsigned int vp9_masked_sub_pixel_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_sub_pixel_variance8x8
+
+prototype unsigned int vp9_masked_sub_pixel_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int *sse"
+specialize vp9_masked_sub_pixel_variance4x4
+
+prototype unsigned int vp9_masked_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int max_sad"
+specialize vp9_masked_sad64x64
+
+prototype unsigned int vp9_masked_sad32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int max_sad"
+specialize vp9_masked_sad32x64
+
+prototype unsigned int vp9_masked_sad64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int max_sad"
+specialize vp9_masked_sad64x32
+
+prototype unsigned int vp9_masked_sad32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int max_sad"
+specialize vp9_masked_sad32x16
+
+prototype unsigned int vp9_masked_sad16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int max_sad"
+specialize vp9_masked_sad16x32
+
+prototype unsigned int vp9_masked_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int max_sad"
+specialize vp9_masked_sad32x32
+
+prototype unsigned int vp9_masked_sad16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int max_sad"
+specialize vp9_masked_sad16x16
+
+prototype unsigned int vp9_masked_sad16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int max_sad"
+specialize vp9_masked_sad16x8
+
+prototype unsigned int vp9_masked_sad8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int max_sad"
+specialize vp9_masked_sad8x16
+
+prototype unsigned int vp9_masked_sad8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int max_sad"
+specialize vp9_masked_sad8x8
+
+prototype unsigned int vp9_masked_sad4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *msk_ptr, int msk_stride, unsigned int max_sad"
+specialize vp9_masked_sad4x4
+fi
+
 prototype unsigned int vp9_variance_halfpixvar16x16_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
 specialize vp9_variance_halfpixvar16x16_h $sse2_x86inc

--- a/vp9/common/vp9_sadmxn.h
+++ b/vp9/common/vp9_sadmxn.h
@@ -35,4 +35,31 @@ static INLINE unsigned int sad_mx_n_c(const uint8_t *src_ptr,
  return sad;
 }

+#if ((CONFIG_MASKED_INTERINTRA && CONFIG_INTERINTRA) || \
+    CONFIG_MASKED_INTERINTER)
+static INLINE unsigned int masked_sad_mx_n_c(const uint8_t *src_ptr,
+                                           int src_stride,
+                                           const uint8_t *ref_ptr,
+                                           int ref_stride,
+                                           const uint8_t *msk_ptr,
+                                           int msk_stride,
+                                           int m,
+                                           int n) {
+  int r, c;
+  unsigned int sad = 0;
+
+  for (r = 0; r < n; r++) {
+    for (c = 0; c < m; c++) {
+      sad += (msk_ptr[c]) * abs(src_ptr[c] - ref_ptr[c]);
+    }
+
+    src_ptr += src_stride;
+    ref_ptr += ref_stride;
+    msk_ptr += msk_stride;
+  }
+  sad = (sad + 31) >> 6;
+
+  return sad;
+}
+#endif
 #endif  // VP9_COMMON_VP9_SADMXN_H_
--- a/vp9/common/vp9_subpelvar.h
+++ b/vp9/common/vp9_subpelvar.h
@@ -39,6 +39,40 @@ static void variance(const uint8_t *src_ptr,
  }
 }

+#if ((CONFIG_MASKED_INTERINTRA && CONFIG_INTERINTRA) || \
+    CONFIG_MASKED_INTERINTER)
+static void masked_variance(const uint8_t *src_ptr,
+                            int  src_stride,
+                            const uint8_t *ref_ptr,
+                            int  ref_stride,
+                            const uint8_t *msk_ptr,
+                            int  msk_stride,
+                            int  w,
+                            int  h,
+                            unsigned int *sse,
+                            int *sum) {
+  int i, j;
+  int diff;
+
+  *sum = 0;
+  *sse = 0;
+
+  for (i = 0; i < h; i++) {
+    for (j = 0; j < w; j++) {
+      diff = (src_ptr[j] - ref_ptr[j]) * (msk_ptr[j]);
+      *sum += diff;
+      *sse += diff * diff;
+    }
+
+    src_ptr += src_stride;
+    ref_ptr += ref_stride;
+    msk_ptr += msk_stride;
+  }
+  *sum = (*sum >= 0) ? ((*sum + 31) >> 6) : -((-*sum + 31) >> 6);
+  *sse = (*sse + 2047) >> 12;
+}
+#endif
+
 /****************************************************************************
 *
 *  ROUTINE       : filter_block2d_bil_first_pass
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -169,6 +169,13 @@ static void read_intra_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *m,
    const MB_PREDICTION_MODE L = xd->left_available ?
                                  left_block_mode(m, 0) : DC_PRED;
    mbmi->mode = read_intra_mode(r, vp9_kf_y_mode_prob[A][L]);
+#if CONFIG_FILTERINTRA
+    if ((mbmi->txfm_size <= TX_8X8) && is_filter_allowed(mbmi->mode))
+      mbmi->filterbit = vp9_read(r,
+                         cm->fc.filterintra_prob[mbmi->txfm_size][mbmi->mode]);
+    else
+      mbmi->filterbit = 0;
+#endif
  } else {
    // Only 4x4, 4x8, 8x4 blocks
    const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];  // 1 or 2
@@ -188,13 +195,34 @@ static void read_intra_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *m,
          m->bmi[ib + 2].as_mode = b_mode;
        if (num_4x4_w == 2)
          m->bmi[ib + 1].as_mode = b_mode;
+#if CONFIG_FILTERINTRA
+        if (is_filter_allowed(b_mode))
+          m->b_filter_info[ib] = vp9_read(r,
+                                          cm->fc.filterintra_prob[0][b_mode]);
+        else
+          m->b_filter_info[ib] = 0;
+        if (num_4x4_h == 2)
+          m->b_filter_info[ib + 2] = m->b_filter_info[ib];
+        if (num_4x4_w == 2)
+          m->b_filter_info[ib + 1] = m->b_filter_info[ib];
+#endif
      }
    }

    mbmi->mode = m->bmi[3].as_mode;
+#if CONFIG_FILTERINTRA
+    mbmi->filterbit = m->b_filter_info[3];
+#endif
  }

  mbmi->uv_mode = read_intra_mode(r, vp9_kf_uv_mode_prob[mbmi->mode]);
+#if CONFIG_FILTERINTRA
+  if ((get_uv_tx_size(mbmi) <= TX_8X8) && is_filter_allowed(mbmi->uv_mode))
+    mbmi->uv_filterbit = vp9_read(r,
+                  cm->fc.filterintra_prob[get_uv_tx_size(mbmi)][mbmi->uv_mode]);
+  else
+    mbmi->uv_filterbit = 0;
+#endif
 }

 static int read_mv_component(vp9_reader *r,
@@ -390,6 +418,15 @@ static void read_intra_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
    const int size_group = size_group_lookup[bsize];
    mbmi->mode = read_intra_mode(r, cm->fc.y_mode_prob[size_group]);
    cm->counts.y_mode[size_group][mbmi->mode]++;
+#if CONFIG_FILTERINTRA
+    if (is_filter_allowed(mbmi->mode) && (mbmi->txfm_size <= TX_8X8)) {
+      mbmi->filterbit = vp9_read(r,
+                          cm->fc.filterintra_prob[mbmi->txfm_size][mbmi->mode]);
+      cm->counts.filterintra[mbmi->txfm_size][mbmi->mode][mbmi->filterbit]++;
+    } else {
+      mbmi->filterbit = 0;
+    }
+#endif
  } else {
     // Only 4x4, 4x8, 8x4 blocks
     const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];  // 1 or 2
@@ -407,13 +444,40 @@ static void read_intra_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
           mi->bmi[ib + 2].as_mode = b_mode;
         if (num_4x4_w == 2)
           mi->bmi[ib + 1].as_mode = b_mode;
+#if CONFIG_FILTERINTRA
+         if (is_filter_allowed(b_mode)) {
+           mi->b_filter_info[ib] = vp9_read(r,
+                                            cm->fc.filterintra_prob[0][b_mode]);
+           cm->counts.filterintra[0][b_mode][mi->b_filter_info[ib]]++;
+         } else {
+           mi->b_filter_info[ib] = 0;
+         }
+
+         if (num_4x4_h == 2)
+           mi->b_filter_info[ib + 2] = mi->b_filter_info[ib];
+         if (num_4x4_w == 2)
+           mi->b_filter_info[ib + 1] = mi->b_filter_info[ib];
+#endif
      }
    }
    mbmi->mode = mi->bmi[3].as_mode;
+#if CONFIG_FILTERINTRA
+    mbmi->filterbit = mi->b_filter_info[3];
+#endif
  }

  mbmi->uv_mode = read_intra_mode(r, cm->fc.uv_mode_prob[mbmi->mode]);
  cm->counts.uv_mode[mbmi->mode][mbmi->uv_mode]++;
+#if CONFIG_FILTERINTRA
+  if (is_filter_allowed(mbmi->uv_mode) && (get_uv_tx_size(mbmi) <= TX_8X8)) {
+    mbmi->uv_filterbit = vp9_read(r,
+                  cm->fc.filterintra_prob[get_uv_tx_size(mbmi)][mbmi->uv_mode]);
+    cm->counts.filterintra[get_uv_tx_size(mbmi)]
+                          [mbmi->uv_mode][mbmi->uv_filterbit]++;
+  } else {
+    mbmi->uv_filterbit = 0;
+  }
+#endif
 }

 static int read_is_inter_block(VP9D_COMP *pbi, int segment_id, vp9_reader *r) {
@@ -447,6 +511,11 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
  uint8_t inter_mode_ctx;
  MV_REFERENCE_FRAME ref0, ref1;

+#if CONFIG_MASKED_INTERINTER
+  mbmi->use_masked_compound = 0;
+  mbmi->mask_index = MASK_NONE;
+#endif
+
  read_ref_frames(pbi, r, mbmi->segment_id, mbmi->ref_frame);
  ref0 = mbmi->ref_frame[0];
  ref1 = mbmi->ref_frame[1];
@@ -486,6 +555,45 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
    }
  }

+#if CONFIG_INTERINTRA
+    if ((cm->use_interintra)
+        && is_interintra_allowed(bsize)
+        && is_inter_mode(mbmi->mode)
+        && (mbmi->ref_frame[1] == NONE)
+        ) {
+      mbmi->ref_frame[1] = (vp9_read(r, cm->fc.interintra_prob[bsize]) ?
+                            INTRA_FRAME : NONE);
+      cm->counts.interintra[bsize][mbmi->ref_frame[1] == INTRA_FRAME]++;
+#if CONFIG_MASKED_INTERINTRA
+      mbmi->use_masked_interintra = 0;
+#endif
+      if (mbmi->ref_frame[1] == INTRA_FRAME) {
+        int bsg = MIN(MIN(b_width_log2(bsize), b_height_log2(bsize)), 3);
+        mbmi->interintra_mode = read_intra_mode(r, cm->fc.y_mode_prob[bsg]);
+        cm->counts.y_mode[bsg][mbmi->interintra_mode]++;
+#if SEPARATE_INTERINTRA_UV
+        mbmi->interintra_uv_mode = read_intra_mode(r,
+                                   cm->fc.uv_mode_prob[mbmi->interintra_mode]);
+        cm->counts.uv_mode[mbmi->interintra_mode][mbmi->interintra_uv_mode]++;
+#else
+        mbmi->interintra_uv_mode = mbmi->interintra_mode;
+#endif
+#if CONFIG_MASKED_INTERINTRA
+        if (cm->use_masked_interintra && get_mask_bits_interintra(bsize)) {
+          mbmi->use_masked_interintra = vp9_read(r,
+                                          cm->fc.masked_interintra_prob[bsize]);
+          cm->counts.masked_interintra[bsize][mbmi->use_masked_interintra]++;
+          if (mbmi->use_masked_interintra) {
+            mbmi->interintra_mask_index = vp9_read_literal(r,
+                                               get_mask_bits_interintra(bsize));
+            mbmi->interintra_uv_mask_index = mbmi->interintra_mask_index;
+          }
+        }
+#endif
+      }
+    }
+#endif
+
  if (bsize < BLOCK_8X8) {
    const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];  // 1 or 2
    const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];  // 1 or 2
@@ -585,6 +693,21 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
        assert(!"Invalid inter mode value");
    }
  }
+#if CONFIG_MASKED_INTERINTER
+    mbmi->use_masked_compound = 0;
+    if (pbi->common.use_masked_compound &&
+        pbi->common.comp_pred_mode != SINGLE_PREDICTION_ONLY &&
+        is_inter_mode(mbmi->mode) &&
+        get_mask_bits(mi->mbmi.sb_type) &&
+        mbmi->ref_frame[1] > INTRA_FRAME) {
+      mbmi->use_masked_compound =
+          vp9_read(r, pbi->common.fc.masked_compound_prob[bsize]);
+      pbi->common.counts.masked_compound[bsize][mbmi->use_masked_compound]++;
+      if (mbmi->use_masked_compound) {
+        mbmi->mask_index = vp9_read_literal(r, get_mask_bits(mi->mbmi.sb_type));
+      }
+    }
+#endif
 }

 static void read_inter_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *mi,
@@ -618,6 +741,21 @@ static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) {
      if (vp9_read(r, VP9_MODE_UPDATE_PROB))
        vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]);

+#if CONFIG_MASKED_INTERINTER
+  if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) {
+    cm->use_masked_compound = vp9_read_bit(r);
+    if (cm->use_masked_compound) {
+      for (i = 0; i < BLOCK_SIZE_TYPES; ++i) {
+        if (get_mask_bits(i))
+          if (vp9_read(r, VP9_UPD_MASKED_COMPOUND_PROB))
+            vp9_diff_update_prob(r, &cm->fc.masked_compound_prob[i]);
+      }
+    }
+  } else {
+    cm->use_masked_compound = 0;
+  }
+#endif
+
  if (cm->comp_pred_mode != COMP_PREDICTION_ONLY)
    for (i = 0; i < REF_CONTEXTS; i++) {
      if (vp9_read(r, VP9_MODE_UPDATE_PROB))
@@ -652,6 +790,27 @@ void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) {
    if (cm->mcomp_filter_type == SWITCHABLE)
      read_switchable_interp_probs(&cm->fc, r);

+#if CONFIG_INTERINTRA
+    if (cm->use_interintra) {
+      int b;
+      for (b = 0; b < BLOCK_SIZE_TYPES; ++b) {
+        if (is_interintra_allowed(b))
+          if (vp9_read(r, VP9_UPD_INTERINTRA_PROB))
+            vp9_diff_update_prob(r, &cm->fc.interintra_prob[b]);
+      }
+#if CONFIG_MASKED_INTERINTRA
+      if (cm->use_masked_interintra) {
+        int k;
+        for (k = 0; k < BLOCK_SIZE_TYPES; ++k) {
+          if (is_interintra_allowed(k) && get_mask_bits_interintra(k))
+            if (vp9_read(r, VP9_UPD_MASKED_INTERINTRA_PROB))
+              vp9_diff_update_prob(r, &cm->fc.masked_interintra_prob[k]);
+        }
+      }
+#endif
+    }
+#endif
+
    for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
      if (vp9_read(r, VP9_MODE_UPDATE_PROB))
        vp9_diff_update_prob(r, &cm->fc.intra_inter_prob[i]);
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -140,12 +140,22 @@ static void decode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
  const int tx_ib = raster_block >> tx_size;
  const int mode = plane == 0 ? mi->mbmi.mode
                              : mi->mbmi.uv_mode;
+#if CONFIG_FILTERINTRA
+  int fbit = 0;
+#endif

  if (plane == 0 && mi->mbmi.sb_type < BLOCK_8X8) {
    assert(bsize == BLOCK_8X8);
    b_mode = mi->bmi[raster_block].as_mode;
+#if CONFIG_FILTERINTRA
+    fbit = mi->b_filter_info[raster_block];
+#endif
  } else {
    b_mode = mode;
+#if CONFIG_FILTERINTRA
+    if (tx_size <= TX_8X8)
+      fbit = plane == 0? mi->mbmi.filterbit: mi->mbmi.uv_filterbit;
+#endif
  }

  if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0)
@@ -153,6 +163,9 @@ static void decode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,

  plane_b_size = b_width_log2(bsize) - pd->subsampling_x;
  vp9_predict_intra_block(xd, tx_ib, plane_b_size, tx_size, b_mode,
+#if CONFIG_FILTERINTRA
+                          fbit,
+#endif
                          dst, pd->dst.stride,
                          dst, pd->dst.stride);

@@ -255,6 +268,11 @@ static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col,
      set_ref(pbi, 1, mi_row, mi_col);

    vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+#if CONFIG_INTERINTRA
+    if (cm->use_interintra && (mbmi->ref_frame[1] == INTRA_FRAME)) {
+      extend_for_interintra(xd, bsize);
+    }
+#endif
    vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
    eobtotal = decode_tokens(pbi, bsize, r);
    if (less8x8) {
@@ -877,6 +895,17 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
      xd->allow_high_precision_mv = vp9_rb_read_bit(rb);
      cm->mcomp_filter_type = read_interp_filter_type(rb);

+#if CONFIG_INTERINTRA
+      cm->use_interintra = vp9_rb_read_bit(rb);
+#if CONFIG_MASKED_INTERINTRA
+      if (cm->use_interintra) {
+        cm->use_masked_interintra = vp9_rb_read_bit(rb);
+      } else {
+        cm->use_masked_interintra = 0;
+      }
+#endif
+#endif
+
      for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i)
        vp9_setup_scale_factors(cm, i);

--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -447,6 +447,12 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {

    if (bsize >= BLOCK_8X8) {
      write_intra_mode(bc, mode, pc->fc.y_mode_prob[size_group_lookup[bsize]]);
+#if CONFIG_FILTERINTRA
+      if (is_filter_allowed(mode) && (mi->txfm_size <= TX_8X8)) {
+        vp9_write(bc, mi->filterbit,
+                  pc->fc.filterintra_prob[mi->txfm_size][mode]);
+      }
+#endif
    } else {
      int idx, idy;
      const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
@@ -455,10 +461,22 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
        for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
          const MB_PREDICTION_MODE bm = m->bmi[idy * 2 + idx].as_mode;
          write_intra_mode(bc, bm, pc->fc.y_mode_prob[0]);
+#if CONFIG_FILTERINTRA
+          if (is_filter_allowed(bm)) {
+            vp9_write(bc, m->b_filter_info[idy * 2 + idx],
+                      pc->fc.filterintra_prob[0][bm]);
+          }
+#endif
        }
      }
    }
    write_intra_mode(bc, mi->uv_mode, pc->fc.uv_mode_prob[mode]);
+#if CONFIG_FILTERINTRA
+    if (is_filter_allowed(mi->uv_mode) && (get_uv_tx_size(mi) <= TX_8X8)) {
+      vp9_write(bc, mi->uv_filterbit,
+                pc->fc.filterintra_prob[get_uv_tx_size(mi)][mi->uv_mode]);
+    }
+#endif
  } else {
    vp9_prob *mv_ref_p;
    encode_ref_frame(cpi, bc);
@@ -486,6 +504,36 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
      assert(mi->interp_filter == pc->mcomp_filter_type);
    }

+#if CONFIG_INTERINTRA
+    if ((pc->use_interintra)
+        && is_interintra_allowed(bsize)
+        && is_inter_mode(mode)
+        && (mi->ref_frame[1] <= INTRA_FRAME)) {
+        vp9_write(bc, mi->ref_frame[1] == INTRA_FRAME,
+                  pc->fc.interintra_prob[bsize]);
+        if (mi->ref_frame[1] == INTRA_FRAME) {
+          const int bwl = b_width_log2(bsize),
+                    bhl = b_height_log2(bsize);
+          write_intra_mode(bc, mi->interintra_mode,
+                           pc->fc.y_mode_prob[MIN(3, MIN(bwl, bhl))]);
+#if SEPARATE_INTERINTRA_UV
+          write_intra_mode(bc, mi->interintra_uv_mode,
+                        pc->fc.uv_mode_prob[mi->interintra_mode]);
+#endif
+#if CONFIG_MASKED_INTERINTRA
+        if (get_mask_bits_interintra(mi->sb_type) &&
+            pc->use_masked_interintra) {
+          vp9_write(bc, mi->use_masked_interintra,
+                    pc->fc.masked_interintra_prob[bsize]);
+          if (mi->use_masked_interintra) {
+            vp9_write_literal(bc, mi->interintra_mask_index,
+                              get_mask_bits_interintra(mi->sb_type));
+          }
+        }
+#endif
+      }
+    }
+#endif
    if (bsize < BLOCK_8X8) {
      int j;
      MB_PREDICTION_MODE blockmode;
@@ -528,6 +576,18 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
        vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv, &mi->best_second_mv.as_mv,
                      nmvc, allow_hp);
    }
+#if CONFIG_MASKED_INTERINTER
+  if (cpi->common.use_masked_compound &&
+      cpi->common.comp_pred_mode != SINGLE_PREDICTION_ONLY &&
+      is_inter_mode(mode) &&
+      get_mask_bits(mi->sb_type) &&
+      mi->ref_frame[1] > INTRA_FRAME) {
+    vp9_write(bc, mi->use_masked_compound, pc->fc.masked_compound_prob[bsize]);
+    if (mi->use_masked_compound) {
+      vp9_write_literal(bc, mi->mask_index, get_mask_bits(mi->sb_type));
+    }
+  }
+#endif
  }
 }

@@ -552,6 +612,11 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO *m,
    const MB_PREDICTION_MODE L = xd->left_available ?
                                 left_block_mode(m, 0) : DC_PRED;
    write_intra_mode(bc, ym, vp9_kf_y_mode_prob[A][L]);
+#if CONFIG_FILTERINTRA
+    if (is_filter_allowed(ym) && (m->mbmi.txfm_size <= TX_8X8))
+      vp9_write(bc, m->mbmi.filterbit,
+                c->fc.filterintra_prob[m->mbmi.txfm_size][ym]);
+#endif
  } else {
    int idx, idy;
    const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[m->mbmi.sb_type];
@@ -567,11 +632,21 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO *m,
        ++intra_mode_stats[A][L][bm];
 #endif
        write_intra_mode(bc, bm, vp9_kf_y_mode_prob[A][L]);
+#if CONFIG_FILTERINTRA
+        if (is_filter_allowed(bm))
+          vp9_write(bc, m->b_filter_info[i], c->fc.filterintra_prob[0][bm]);
+#endif
      }
    }
  }

  write_intra_mode(bc, m->mbmi.uv_mode, vp9_kf_uv_mode_prob[ym]);
+#if CONFIG_FILTERINTRA
+  if (is_filter_allowed(m->mbmi.uv_mode) &&
+      (get_uv_tx_size(&(m->mbmi)) <= TX_8X8))
+    vp9_write(bc, m->mbmi.uv_filterbit,
+           c->fc.filterintra_prob[get_uv_tx_size(&(m->mbmi))][m->mbmi.uv_mode]);
+#endif
 }

 static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
@@ -1332,6 +1407,40 @@ static void write_uncompressed_header(VP9_COMP *cpi,

      fix_mcomp_filter_type(cpi);
      write_interp_filter_type(cm->mcomp_filter_type, wb);
+#if CONFIG_INTERINTRA
+      if (!cpi->dummy_packing && cm->use_interintra) {
+        int b;
+        cm->use_interintra = 0;
+        for (b = 0; b < BLOCK_SIZE_TYPES; ++b) {
+          if (is_interintra_allowed(b) && (cpi->interintra_count[b][1] > 0)) {
+            cm->use_interintra = 1;
+            break;
+          }
+        }
+      }
+      vp9_wb_write_bit(wb, cm->use_interintra);
+      if (!cm->use_interintra)
+        vp9_zero(cpi->interintra_count);
+#if CONFIG_MASKED_INTERINTRA
+      if (!cpi->dummy_packing && cm->use_interintra
+          && cm->use_masked_interintra) {
+        int k;
+        cm->use_masked_interintra = 0;
+        for (k = 0; k < BLOCK_SIZE_TYPES; ++k) {
+          if (is_interintra_allowed(k) && get_mask_bits_interintra(k) &&
+              (cpi->masked_interintra_count[k][1] > 0)) {
+            cm->use_masked_interintra = 1;
+            break;
+          }
+        }
+      }
+      if (cm->use_interintra) {
+        vp9_wb_write_bit(wb, cm->use_masked_interintra);
+        if (!cm->use_masked_interintra)
+          vp9_zero(cpi->masked_interintra_count);
+      }
+#endif
+#endif
    }
  }

@@ -1382,6 +1491,31 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
    if (cm->mcomp_filter_type == SWITCHABLE)
      update_switchable_interp_probs(cpi, &header_bc);

+#if CONFIG_INTERINTRA
+    if (cm->use_interintra) {
+      int b;
+      for (b = 0; b < BLOCK_SIZE_TYPES; ++b) {
+        if (is_interintra_allowed(b))
+          vp9_cond_prob_diff_update(&header_bc,
+                                    &cm->fc.interintra_prob[b],
+                                    VP9_UPD_INTERINTRA_PROB,
+                                    cpi->interintra_count[b]);
+      }
+#if CONFIG_MASKED_INTERINTRA
+      if (cm->use_masked_interintra) {
+        int k;
+        for (k = 0; k < BLOCK_SIZE_TYPES; ++k) {
+          if (is_interintra_allowed(k) && get_mask_bits_interintra(k))
+            vp9_cond_prob_diff_update(&header_bc,
+                                      &cm->fc.masked_interintra_prob[k],
+                                      VP9_UPD_MASKED_INTERINTRA_PROB,
+                                      cpi->masked_interintra_count[k]);
+        }
+      }
+#endif
+    }
+#endif
+
    for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
      vp9_cond_prob_diff_update(&header_bc, &fc->intra_inter_prob[i],
                                VP9_MODE_UPDATE_PROB,
@@ -1401,6 +1535,35 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
                                      VP9_MODE_UPDATE_PROB,
                                      cpi->comp_inter_count[i]);
      }
+#if CONFIG_MASKED_INTERINTER
+      if (use_compound_pred) {
+        if (!cpi->dummy_packing && cm->use_masked_compound) {
+          cm->use_masked_compound = 0;
+          for (i = 0; i < BLOCK_SIZE_TYPES; i++) {
+            if (get_mask_bits(i) && (cpi->masked_compound_counts[i][1] > 0)) {
+              cm->use_masked_compound = 1;
+              break;
+            }
+          }
+        }
+        vp9_write_bit(&header_bc, cm->use_masked_compound);
+        if (cm->use_masked_compound) {
+          for (i = 0; i < BLOCK_SIZE_TYPES; i++) {
+            if (get_mask_bits(i))
+              vp9_cond_prob_diff_update(&header_bc,
+                                        &fc->masked_compound_prob[i],
+                                        VP9_UPD_MASKED_COMPOUND_PROB,
+                                        cpi->masked_compound_counts[i]);
+          }
+        } else {
+          vp9_zero(cpi->masked_compound_counts);
+        }
+      } else {
+        if (!cpi->dummy_packing)
+          cm->use_masked_compound = 0;
+        vp9_zero(cpi->masked_compound_counts);
+      }
+#endif
    }

    if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) {
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -434,6 +434,41 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
            xd->mode_info_context[mis * j + i].mbmi = *mbmi;
    }

+#if CONFIG_INTERINTRA
+    if (cm->use_interintra
+        && is_interintra_allowed(mbmi->sb_type)
+        && mbmi->mode >= NEARESTMV && mbmi->mode <= NEWMV &&
+        mbmi->ref_frame[1] <= INTRA_FRAME) {
+      if (mbmi->ref_frame[1] == INTRA_FRAME) {
+        const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
+        const int bsl = MIN(bwl, bhl);
+        ++cpi->y_mode_count[MIN(bsl, 3)][mbmi->interintra_mode];
+        ++cpi->interintra_count[mbmi->sb_type][1];
+#if SEPARATE_INTERINTRA_UV
+        ++cpi->uv_mode_count[mbmi->interintra_mode][mbmi->interintra_uv_mode];
+#endif
+#if CONFIG_MASKED_INTERINTRA
+        if (cm->use_masked_interintra &&
+            get_mask_bits_interintra(mbmi->sb_type))
+          ++cpi->masked_interintra_count[mbmi->sb_type]
+                                        [mbmi->use_masked_interintra];
+#endif
+      } else {
+        ++cpi->interintra_count[mbmi->sb_type][0];
+      }
+    }
+#endif
+
+#if CONFIG_MASKED_INTERINTER
+    if (cm->use_masked_compound &&
+        cm->comp_pred_mode != SINGLE_PREDICTION_ONLY &&
+        is_inter_mode(mbmi->mode) &&
+        get_mask_bits(mbmi->sb_type) &&
+        mbmi->ref_frame[1] > INTRA_FRAME) {
+      ++cpi->masked_compound_counts[bsize][mbmi->use_masked_compound];
+    }
+#endif
+
    if (cm->mcomp_filter_type == SWITCHABLE && is_inter_mode(mbmi->mode)) {
      const int ctx = vp9_get_pred_context_switchable_interp(xd);
      ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
@@ -505,14 +540,12 @@ static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col,
  // Set up destination pointers
  setup_dst_planes(xd, &cm->yv12_fb[dst_fb_idx], mi_row, mi_col);

-  /* Set up limit values for MV components to prevent them from
-   * extending beyond the UMV borders assuming 16x16 block size */
-  x->mv_row_min = -((mi_row * MI_SIZE)+ VP9BORDERINPIXELS - VP9_INTERP_EXTEND);
-  x->mv_col_min = -((mi_col * MI_SIZE)+ VP9BORDERINPIXELS - VP9_INTERP_EXTEND);
-  x->mv_row_max = ((cm->mi_rows - mi_row) * MI_SIZE
-      + (VP9BORDERINPIXELS - MI_SIZE * mi_height - VP9_INTERP_EXTEND));
-  x->mv_col_max = ((cm->mi_cols - mi_col) * MI_SIZE
-      + (VP9BORDERINPIXELS - MI_SIZE * mi_width - VP9_INTERP_EXTEND));
+  // Set up limit values for MV components
+  // mv beyond the range do not produce new/different prediction block
+  x->mv_row_min = -((mi_row * MI_SIZE)+ MAX_BLOCK_SIZE - VP9_INTERP_EXTEND);
+  x->mv_col_min = -((mi_col * MI_SIZE)+ MAX_BLOCK_SIZE - VP9_INTERP_EXTEND);
+  x->mv_row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
+  x->mv_col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;

  // Set up distance of MB to edge of frame in 1/8th pel units
  assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
@@ -1990,6 +2023,10 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {

  xd->mode_info_context->mbmi.mode = DC_PRED;
  xd->mode_info_context->mbmi.uv_mode = DC_PRED;
+#if CONFIG_FILTERINTRA
+  xd->mode_info_context->mbmi.filterbit = 0;
+  xd->mode_info_context->mbmi.uv_filterbit = 0;
+#endif

  vp9_zero(cpi->y_mode_count)
  vp9_zero(cpi->y_uv_mode_count)
@@ -2001,6 +2038,21 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
  vp9_zero(cpi->comp_ref_count);
  vp9_zero(cm->counts.tx);
  vp9_zero(cm->counts.mbskip);
+#if CONFIG_INTERINTRA
+  vp9_zero(cpi->interintra_count);
+  vp9_zero(cpi->interintra_select_count);
+#if CONFIG_MASKED_INTERINTRA
+  vp9_zero(cpi->masked_interintra_count);
+  vp9_zero(cpi->masked_interintra_select_count);
+#endif
+#endif
+#if CONFIG_FILTERINTRA
+  vp9_zero(cm->counts.filterintra);
+#endif
+#if CONFIG_MASKED_INTERINTER
+  vp9_zero(cpi->masked_compound_counts);
+  vp9_zero(cpi->masked_compound_select_counts);
+#endif

  // Note: this memset assumes above_context[0], [1] and [2]
  // are allocated as part of the same buffer.
@@ -2492,13 +2544,30 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) {
  const MACROBLOCKD *xd = &x->e_mbd;
  const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
  const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
+#if CONFIG_FILTERINTRA
+  const int uv_fbit = xd->mode_info_context->mbmi.uv_filterbit;
+  int fbit = xd->mode_info_context->mbmi.filterbit;
+#endif

  ++cpi->y_uv_mode_count[m][uvm];
+#if CONFIG_FILTERINTRA
+  if (is_filter_allowed(uvm) &&
+      (get_uv_tx_size(&(xd->mode_info_context->mbmi)) <= TX_8X8))
+    ++cpi->common.counts.filterintra
+                         [get_uv_tx_size(&(xd->mode_info_context->mbmi))]
+                         [uvm][uv_fbit];
+#endif
  if (xd->mode_info_context->mbmi.sb_type >= BLOCK_8X8) {
    const BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
    const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
    const int bsl = MIN(bwl, bhl);
    ++cpi->y_mode_count[MIN(bsl, 3)][m];
+#if CONFIG_FILTERINTRA
+    if (is_filter_allowed(m) &&
+        (xd->mode_info_context->mbmi.txfm_size <= TX_8X8))
+      ++cpi->common.counts.filterintra[xd->mode_info_context->mbmi.txfm_size]
+                                       [m][fbit];
+#endif
  } else {
    int idx, idy;
    int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[
@@ -2509,6 +2578,12 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) {
      for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
        int m = xd->mode_info_context->bmi[idy * 2 + idx].as_mode;
        ++cpi->y_mode_count[0][m];
+#if CONFIG_FILTERINTRA
+        if (is_filter_allowed(m)) {
+          fbit = xd->mode_info_context->b_filter_info[idy * 2 + idx];
+          ++cpi->common.counts.filterintra[0][m][fbit];
+        }
+#endif
      }
    }
  }
@@ -2609,6 +2684,12 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
    setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
                     &xd->scale_factor[1]);

+#if CONFIG_INTERINTRA
+    if (cm->use_interintra
+        && (mbmi->ref_frame[1] == INTRA_FRAME)) {
+      extend_for_interintra(xd, bsize);
+    }
+#endif

    vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
  }
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -640,6 +640,9 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
  const int16_t *scan, *iscan;
  TX_TYPE tx_type;
  MB_PREDICTION_MODE mode;
+#if CONFIG_FILTERINTRA
+  int fbit = 0;
+#endif
  const int bwl = b_width_log2(bsize) - pd->subsampling_x, bw = 1 << bwl;
  const int twl = bwl - tx_size, twmask = (1 << twl) - 1;
  int xoff, yoff;
@@ -660,6 +663,9 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
      scan = vp9_default_scan_32x32;
      iscan = vp9_default_iscan_32x32;
      mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
+#if CONFIG_FILTERINTRA
+      fbit = plane == 0 ? mbmi->filterbit : mbmi->uv_filterbit;
+#endif
      block >>= 6;
      xoff = 32 * (block & twmask);
      yoff = 32 * (block >> twl);
@@ -667,6 +673,9 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
      src = p->src.buf + yoff * p->src.stride + xoff;
      src_diff = p->src_diff + 4 * bw * yoff + xoff;
      vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode,
+#if CONFIG_FILTERINTRA
+                              fbit,
+#endif
                              dst, pd->dst.stride, dst, pd->dst.stride);
      vp9_subtract_block(32, 32, src_diff, bw * 4,
                         src, p->src.stride, dst, pd->dst.stride);
@@ -685,6 +694,9 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
      scan = get_scan_16x16(tx_type);
      iscan = get_iscan_16x16(tx_type);
      mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
+#if CONFIG_FILTERINTRA
+      fbit = plane == 0 ? mbmi->filterbit : mbmi->uv_filterbit;
+#endif
      block >>= 4;
      xoff = 16 * (block & twmask);
      yoff = 16 * (block >> twl);
@@ -692,6 +704,9 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
      src = p->src.buf + yoff * p->src.stride + xoff;
      src_diff = p->src_diff + 4 * bw * yoff + xoff;
      vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode,
+#if CONFIG_FILTERINTRA
+                              fbit,
+#endif
                              dst, pd->dst.stride, dst, pd->dst.stride);
      vp9_subtract_block(16, 16, src_diff, bw * 4,
                         src, p->src.stride, dst, pd->dst.stride);
@@ -714,6 +729,9 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
      scan = get_scan_8x8(tx_type);
      iscan = get_iscan_8x8(tx_type);
      mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
+#if CONFIG_FILTERINTRA
+      fbit = plane == 0 ? mbmi->filterbit : mbmi->uv_filterbit;
+#endif
      block >>= 2;
      xoff = 8 * (block & twmask);
      yoff = 8 * (block >> twl);
@@ -721,6 +739,9 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
      src = p->src.buf + yoff * p->src.stride + xoff;
      src_diff = p->src_diff + 4 * bw * yoff + xoff;
      vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode,
+#if CONFIG_FILTERINTRA
+                              fbit,
+#endif
                              dst, pd->dst.stride, dst, pd->dst.stride);
      vp9_subtract_block(8, 8, src_diff, bw * 4,
                         src, p->src.stride, dst, pd->dst.stride);
@@ -747,12 +768,22 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
      else
        mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;

+#if CONFIG_FILTERINTRA
+      if (mbmi->sb_type < BLOCK_8X8 && plane == 0)
+        fbit = xd->mode_info_context->b_filter_info[block];
+      else
+        fbit = plane == 0 ? mbmi->filterbit : mbmi->uv_filterbit;
+#endif
+
      xoff = 4 * (block & twmask);
      yoff = 4 * (block >> twl);
      dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
      src = p->src.buf + yoff * p->src.stride + xoff;
      src_diff = p->src_diff + 4 * bw * yoff + xoff;
      vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
+#if CONFIG_FILTERINTRA
+                              fbit,
+#endif
                              dst, pd->dst.stride, dst, pd->dst.stride);
      vp9_subtract_block(4, 4, src_diff, bw * 4,
                         src, p->src.stride, dst, pd->dst.stride);
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -146,7 +146,13 @@ static int find_best_16x16_intra(VP9_COMP *cpi,
    unsigned int err;

    xd->mode_info_context->mbmi.mode = mode;
+#if CONFIG_FILTERINTRA
+    xd->mode_info_context->mbmi.filterbit = 0;
+#endif
    vp9_predict_intra_block(xd, 0, 2, TX_16X16, mode,
+#if CONFIG_FILTERINTRA
+                            0,
+#endif
                            x->plane[0].src.buf, x->plane[0].src.stride,
                            xd->plane[0].dst.buf, xd->plane[0].dst.stride);
    err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -614,8 +614,11 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
 #undef DIST
 #undef IFMVCV
 #undef CHECK_BETTER
+#if !((CONFIG_MASKED_INTERINTRA && CONFIG_INTERINTRA) || \
+    CONFIG_MASKED_INTERINTER)
 #undef MIN
 #undef MAX
+#endif

 #undef SP

@@ -1165,6 +1168,504 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x,
                                                   mvcost, x->errorperbit);
 }

+#if ((CONFIG_MASKED_INTERINTRA && CONFIG_INTERINTRA) || \
+    CONFIG_MASKED_INTERINTER)
+
+#define MVC(r, c)                                       \
+    (mvcost ?                                           \
+     ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +         \
+       mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
+      error_per_bit + 4096) >> 13 : 0)
+
+#define SP(x) (((x) & 7) << 1)  // convert motion vector component to offset
+                                // for svf calc
+
+#define IFMVCV(r, c, s, e)                                \
+    if (c >= minc && c <= maxc && r >= minr && r <= maxr) \
+      s                                                   \
+    else                                                  \
+      e;
+
+/* pointer to predictor base of a motionvector */
+#define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset)))
+
+/* returns subpixel variance error function */
+#define DIST(r, c) \
+    vfp->msvf(PRE(r, c), y_stride, SP(c), SP(r), z, src_stride, \
+              mask, mask_stride, &sse)
+
+/* checks if (r, c) has better score than previous best */
+#define CHECK_BETTER(v, r, c) \
+    IFMVCV(r, c, {                                                       \
+      thismse = (DIST(r, c));                                            \
+      if ((v = MVC(r, c) + thismse) < besterr) {                         \
+        besterr = v;                                                     \
+        br = r;                                                          \
+        bc = c;                                                          \
+        *distortion = thismse;                                           \
+        *sse1 = sse;                                                     \
+      }                                                                  \
+    },                                                                   \
+    v = INT_MAX;)
+
+int vp9_find_best_masked_sub_pixel_step_iteratively(
+    MACROBLOCK *x, uint8_t *mask, int mask_stride, int_mv *bestmv,
+    int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
+    int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
+    int is_second) {
+
+  uint8_t *z = x->plane[0].src.buf;
+  int src_stride = x->plane[0].src.stride;
+  MACROBLOCKD *xd = &x->e_mbd;
+
+  int rr, rc, br, bc, hstep;
+  int tr, tc;
+  unsigned int besterr = INT_MAX;
+  unsigned int left, right, up, down, diag;
+  unsigned int sse;
+  unsigned int whichdir;
+  unsigned int halfiters = 4;
+  unsigned int quarteriters = 4;
+  unsigned int eighthiters = 4;
+  int thismse;
+  int maxc, minc, maxr, minr;
+  int y_stride;
+  int offset;
+  int usehp = xd->allow_high_precision_mv;
+
+  uint8_t *y = xd->plane[0].pre[is_second].buf +
+               (bestmv->as_mv.row) * xd->plane[0].pre[is_second].stride +
+               bestmv->as_mv.col;
+
+  y_stride = xd->plane[0].pre[is_second].stride;
+
+  rr = ref_mv->as_mv.row;
+  rc = ref_mv->as_mv.col;
+  br = bestmv->as_mv.row << 3;
+  bc = bestmv->as_mv.col << 3;
+  hstep = 4;
+  minc = MAX(x->mv_col_min << 3,
+             (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1));
+  maxc = MIN(x->mv_col_max << 3,
+             (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1));
+  minr = MAX(x->mv_row_min << 3,
+             (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1));
+  maxr = MIN(x->mv_row_max << 3,
+             (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1));
+
+  tr = br;
+  tc = bc;
+
+  offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
+
+  // central mv
+  bestmv->as_mv.row <<= 3;
+  bestmv->as_mv.col <<= 3;
+
+  // calculate central point error
+  besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
+  *distortion = besterr;
+  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+
+  // TODO(*): Each subsequent iteration checks at least one point in
+  // common with the last iteration could be 2 ( if diag selected)
+  while (--halfiters) {
+    // 1/2 pel
+    CHECK_BETTER(left, tr, tc - hstep);
+    CHECK_BETTER(right, tr, tc + hstep);
+    CHECK_BETTER(up, tr - hstep, tc);
+    CHECK_BETTER(down, tr + hstep, tc);
+
+    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+
+    switch (whichdir) {
+      case 0:
+        CHECK_BETTER(diag, tr - hstep, tc - hstep);
+        break;
+      case 1:
+        CHECK_BETTER(diag, tr - hstep, tc + hstep);
+        break;
+      case 2:
+        CHECK_BETTER(diag, tr + hstep, tc - hstep);
+        break;
+      case 3:
+        CHECK_BETTER(diag, tr + hstep, tc + hstep);
+        break;
+    }
+
+    // no reason to check the same one again.
+    if (tr == br && tc == bc)
+      break;
+
+    tr = br;
+    tc = bc;
+  }
+
+  // TODO(*): Each subsequent iteration checks at least one point in common with
+  // the last iteration could be 2 ( if diag selected) 1/4 pel
+  hstep >>= 1;
+  while (--quarteriters) {
+    CHECK_BETTER(left, tr, tc - hstep);
+    CHECK_BETTER(right, tr, tc + hstep);
+    CHECK_BETTER(up, tr - hstep, tc);
+    CHECK_BETTER(down, tr + hstep, tc);
+
+    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+
+    switch (whichdir) {
+      case 0:
+        CHECK_BETTER(diag, tr - hstep, tc - hstep);
+        break;
+      case 1:
+        CHECK_BETTER(diag, tr - hstep, tc + hstep);
+        break;
+      case 2:
+        CHECK_BETTER(diag, tr + hstep, tc - hstep);
+        break;
+      case 3:
+        CHECK_BETTER(diag, tr + hstep, tc + hstep);
+        break;
+    }
+
+    // no reason to check the same one again.
+    if (tr == br && tc == bc)
+      break;
+
+    tr = br;
+    tc = bc;
+  }
+
+  if (xd->allow_high_precision_mv) {
+    usehp = vp9_use_mv_hp(&ref_mv->as_mv);
+  } else {
+    usehp = 0;
+  }
+
+  if (usehp) {
+    hstep >>= 1;
+    while (--eighthiters) {
+      CHECK_BETTER(left, tr, tc - hstep);
+      CHECK_BETTER(right, tr, tc + hstep);
+      CHECK_BETTER(up, tr - hstep, tc);
+      CHECK_BETTER(down, tr + hstep, tc);
+
+      whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
+
+      switch (whichdir) {
+        case 0:
+          CHECK_BETTER(diag, tr - hstep, tc - hstep);
+          break;
+        case 1:
+          CHECK_BETTER(diag, tr - hstep, tc + hstep);
+          break;
+        case 2:
+          CHECK_BETTER(diag, tr + hstep, tc - hstep);
+          break;
+        case 3:
+          CHECK_BETTER(diag, tr + hstep, tc + hstep);
+          break;
+      }
+
+      // no reason to check the same one again.
+      if (tr == br && tc == bc)
+        break;
+
+      tr = br;
+      tc = bc;
+    }
+  }
+  bestmv->as_mv.row = br;
+  bestmv->as_mv.col = bc;
+
+  if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
+      (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
+    return INT_MAX;
+
+  return besterr;
+}
+#undef MVC
+#undef PRE
+#undef DIST
+#undef IFMVCV
+#undef CHECK_BETTER
+#undef MIN
+#undef MAX
+
+int vp9_masked_refining_search_sad_c(MACROBLOCK *x,
+                                     uint8_t * mask, int mask_stride,
+                                     int_mv *ref_mv, int error_per_bit,
+                                     int search_range,
+                                     vp9_variance_fn_ptr_t *fn_ptr,
+                                     int *mvjcost, int *mvcost[2],
+                                     int_mv *center_mv, int is_second) {
+  const MACROBLOCKD* const xd = &x->e_mbd;
+  MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
+  int i, j;
+  int this_row_offset, this_col_offset;
+
+  int what_stride = x->plane[0].src.stride;
+  int in_what_stride = xd->plane[0].pre[is_second].stride;
+  uint8_t *what = x->plane[0].src.buf;
+  uint8_t *best_address = xd->plane[0].pre[is_second].buf +
+      (ref_mv->as_mv.row * xd->plane[0].pre[is_second].stride) +
+      ref_mv->as_mv.col;
+  uint8_t *check_here;
+  unsigned int thissad;
+  int_mv this_mv;
+  unsigned int bestsad = INT_MAX;
+  int_mv fcenter_mv;
+
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+
+  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
+  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
+
+  bestsad = fn_ptr->msdf(what, what_stride, best_address, in_what_stride,
+                         mask, mask_stride, 0x7fffffff) +
+      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+
+  for (i = 0; i < search_range; i++) {
+    int best_site = -1;
+
+    for (j = 0; j < 4; j++) {
+      this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
+      this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
+
+      if ((this_col_offset > x->mv_col_min) &&
+          (this_col_offset < x->mv_col_max) &&
+          (this_row_offset > x->mv_row_min) &&
+          (this_row_offset < x->mv_row_max)) {
+        check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
+                     best_address;
+        thissad = fn_ptr->msdf(what, what_stride, check_here, in_what_stride,
+                               mask, mask_stride, bestsad);
+
+        if (thissad < bestsad) {
+          this_mv.as_mv.row = this_row_offset;
+          this_mv.as_mv.col = this_col_offset;
+          thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
+                                    mvsadcost, error_per_bit);
+
+          if (thissad < bestsad) {
+            bestsad = thissad;
+            best_site = j;
+          }
+        }
+      }
+    }
+
+    if (best_site == -1) {
+      break;
+    } else {
+      ref_mv->as_mv.row += neighbors[best_site].row;
+      ref_mv->as_mv.col += neighbors[best_site].col;
+      best_address += (neighbors[best_site].row) * in_what_stride +
+          neighbors[best_site].col;
+    }
+  }
+
+  this_mv.as_mv.row = ref_mv->as_mv.row << 3;
+  this_mv.as_mv.col = ref_mv->as_mv.col << 3;
+
+  if (bestsad < INT_MAX)
+    return
+        fn_ptr->mvf(what, what_stride, best_address, in_what_stride,
+                    mask, mask_stride,
+                   (unsigned int *)(&thissad)) +
+        mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
+  else
+    return INT_MAX;
+}
+
+int vp9_masked_diamond_search_sad_c(MACROBLOCK *x,
+                                    uint8_t *mask, int mask_stride,
+                                    int_mv *ref_mv, int_mv *best_mv,
+                                    int search_param, int sad_per_bit,
+                                    int *num00, vp9_variance_fn_ptr_t *fn_ptr,
+                                    int *mvjcost, int *mvcost[2],
+                                    int_mv *center_mv, int is_second) {
+  int i, j, step;
+
+  const MACROBLOCKD* const xd = &x->e_mbd;
+  uint8_t *what = x->plane[0].src.buf;
+  int what_stride = x->plane[0].src.stride;
+  uint8_t *in_what;
+  int in_what_stride = xd->plane[0].pre[0].stride;
+  uint8_t *best_address;
+
+  int tot_steps;
+  int_mv this_mv;
+
+  int bestsad = INT_MAX;
+  int best_site = 0;
+  int last_site = 0;
+
+  int ref_row, ref_col;
+  int this_row_offset, this_col_offset;
+  search_site *ss;
+
+  uint8_t *check_here;
+  int thissad;
+  int_mv fcenter_mv;
+
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+
+  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
+  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
+
+  clamp_mv(&ref_mv->as_mv, x->mv_col_min, x->mv_col_max,
+           x->mv_row_min, x->mv_row_max);
+  ref_row = ref_mv->as_mv.row;
+  ref_col = ref_mv->as_mv.col;
+  *num00 = 0;
+  best_mv->as_mv.row = ref_row;
+  best_mv->as_mv.col = ref_col;
+
+  // Work out the start point for the search
+  in_what = (uint8_t *)(xd->plane[0].pre[is_second].buf +
+                        (ref_row * (xd->plane[0].pre[is_second].stride)) +
+                        ref_col);
+  best_address = in_what;
+
+  // Check the starting position
+  bestsad = fn_ptr->msdf(what, what_stride, in_what,
+                         in_what_stride, mask, mask_stride, 0x7fffffff)
+            + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
+                             sad_per_bit);
+
+  // search_param determines the length of the initial step and
+  // hence the number of iterations
+  // 0 = initial step (MAX_FIRST_STEP) pel,
+  // 1 = (MAX_FIRST_STEP/2) pel,
+  // 2 = (MAX_FIRST_STEP/4) pel... etc.
+  ss = &x->ss[search_param * x->searches_per_step];
+  tot_steps = (x->ss_count / x->searches_per_step) - search_param;
+
+  i = 1;
+
+  for (step = 0; step < tot_steps; step++) {
+    for (j = 0; j < x->searches_per_step; j++) {
+      // Trap illegal vectors
+      this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
+      this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
+
+      if ((this_col_offset > x->mv_col_min) &&
+          (this_col_offset < x->mv_col_max) &&
+          (this_row_offset > x->mv_row_min) &&
+          (this_row_offset < x->mv_row_max)) {
+        check_here = ss[i].offset + best_address;
+        thissad = fn_ptr->msdf(what, what_stride, check_here, in_what_stride,
+                               mask, mask_stride, bestsad);
+
+        if (thissad < bestsad) {
+          this_mv.as_mv.row = this_row_offset;
+          this_mv.as_mv.col = this_col_offset;
+          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+                                    mvjsadcost, mvsadcost, sad_per_bit);
+
+          if (thissad < bestsad) {
+            bestsad = thissad;
+            best_site = i;
+          }
+        }
+      }
+      i++;
+    }
+
+    if (best_site != last_site) {
+      best_mv->as_mv.row += ss[best_site].mv.row;
+      best_mv->as_mv.col += ss[best_site].mv.col;
+      best_address += ss[best_site].offset;
+      last_site = best_site;
+    } else if (best_address == in_what) {
+      (*num00)++;
+    }
+  }
+
+  this_mv.as_mv.row = best_mv->as_mv.row << 3;
+  this_mv.as_mv.col = best_mv->as_mv.col << 3;
+
+  if (bestsad == INT_MAX)
+    return INT_MAX;
+
+  return
+      fn_ptr->mvf(what, what_stride, best_address, in_what_stride,
+                  mask, mask_stride, (unsigned int *)(&thissad)) +
+      mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
+}
+
+int vp9_masked_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x,
+                                  uint8_t *mask, int mask_stride,
+                                  int_mv *mvp_full, int step_param,
+                                  int sadpb, int further_steps,
+                                  int do_refine, vp9_variance_fn_ptr_t *fn_ptr,
+                                  int_mv *ref_mv, int_mv *dst_mv,
+                                  int is_second) {
+  int_mv temp_mv;
+  int thissme, n, num00, bestsme;
+
+  bestsme = vp9_masked_diamond_search_sad_c(x, mask, mask_stride, mvp_full,
+                                            &temp_mv, step_param, sadpb, &num00,
+                                            fn_ptr, x->nmvjointcost,
+                                            x->mvcost, ref_mv, is_second);
+  dst_mv->as_int = temp_mv.as_int;
+
+  n = num00;
+  num00 = 0;
+
+  /* If there won't be more n-step search, check to see if
+   * refining search is needed. */
+  if (n > further_steps)
+    do_refine = 0;
+
+  while (n < further_steps) {
+    n++;
+
+    if (num00) {
+      num00--;
+    } else {
+      thissme = vp9_masked_diamond_search_sad_c(x, mask, mask_stride,
+                                                mvp_full, &temp_mv,
+                                                step_param + n, sadpb,
+                                                &num00,
+                                                fn_ptr, x->nmvjointcost,
+                                                x->mvcost,
+                                                ref_mv, is_second);
+
+      /* check to see if refining search is needed. */
+      if (num00 > (further_steps - n))
+        do_refine = 0;
+
+      if (thissme < bestsme) {
+        bestsme = thissme;
+        dst_mv->as_int = temp_mv.as_int;
+      }
+    }
+  }
+
+  /* final 1-away diamond refining search */
+  if (do_refine == 1) {
+    int search_range = 8;
+    int_mv best_mv;
+    best_mv.as_int = dst_mv->as_int;
+    thissme = vp9_masked_refining_search_sad_c(x, mask, mask_stride,
+                                               &best_mv, sadpb,
+                                               search_range,
+                                               fn_ptr, x->nmvjointcost,
+                                               x->mvcost,
+                                               ref_mv, is_second);
+
+    if (thissme < bestsme) {
+      bestsme = thissme;
+      dst_mv->as_int = best_mv.as_int;
+    }
+  }
+  return bestsme;
+}
+#endif
+
 int vp9_diamond_search_sadx4(MACROBLOCK *x,
                             int_mv *ref_mv, int_mv *best_mv, int search_param,
                             int sad_per_bit, int *num00,
@@ -1718,7 +2219,7 @@ int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv,
      }
    }

-    while ((c + 2) < col_max) {
+    while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
      int i;

      fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -120,4 +120,19 @@ int vp9_refining_search_8p_c(MACROBLOCK *x,
                             int *mvjcost, int *mvcost[2],
                             int_mv *center_mv, const uint8_t *second_pred,
                             int w, int h);
+
+#if ((CONFIG_MASKED_INTERINTRA && CONFIG_INTERINTRA) || \
+    CONFIG_MASKED_INTERINTER)
+int vp9_find_best_masked_sub_pixel_step_iteratively(
+    MACROBLOCK *x, uint8_t *mask, int mask_stride, int_mv *bestmv,
+    int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
+    int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse,
+    int is_second);
+
+int vp9_masked_full_pixel_diamond(
+    struct VP9_COMP *cpi, MACROBLOCK *x, uint8_t *mask, int mask_stride,
+    int_mv *mvp_full, int step_param, int sadpb, int further_steps,
+    int do_refine, vp9_variance_fn_ptr_t *fn_ptr, int_mv *ref_mv,
+    int_mv *dst_mv, int is_second);
+#endif
 #endif  // VP9_ENCODER_VP9_MCOMP_H_
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -619,6 +619,24 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) {
  sf->thresh_mult[THR_D27_PRED] += speed_multiplier * 2500;
  sf->thresh_mult[THR_D63_PRED] += speed_multiplier * 2500;

+#if CONFIG_INTERINTRA
+  sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL   ] += speed_multiplier * 1500;
+  sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG   ] += speed_multiplier * 1500;
+  sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA   ] += speed_multiplier * 1500;
+
+  sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] += speed_multiplier * 1500;
+  sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] += speed_multiplier * 1500;
+  sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] += speed_multiplier * 1500;
+
+  sf->thresh_mult[THR_COMP_INTERINTRA_NEARL   ] += speed_multiplier * 1500;
+  sf->thresh_mult[THR_COMP_INTERINTRA_NEARG   ] += speed_multiplier * 1500;
+  sf->thresh_mult[THR_COMP_INTERINTRA_NEARA   ] += speed_multiplier * 1500;
+
+  sf->thresh_mult[THR_COMP_INTERINTRA_NEWL    ] += speed_multiplier * 2000;
+  sf->thresh_mult[THR_COMP_INTERINTRA_NEWG    ] += speed_multiplier * 2000;
+  sf->thresh_mult[THR_COMP_INTERINTRA_NEWA    ] += speed_multiplier * 2000;
+#endif
+
  if (cpi->sf.skip_lots_of_modes) {
    for (i = 0; i < MAX_MODES; ++i)
      sf->thresh_mult[i] = INT_MAX;
@@ -648,6 +666,12 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) {
    sf->thresh_mult[THR_ZEROMV   ] = INT_MAX;
    sf->thresh_mult[THR_NEARMV   ] = INT_MAX;
    sf->thresh_mult[THR_SPLITMV  ] = INT_MAX;
+#if CONFIG_INTERINTRA
+    sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL   ] = INT_MAX;
+    sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = INT_MAX;
+    sf->thresh_mult[THR_COMP_INTERINTRA_NEARL   ] = INT_MAX;
+    sf->thresh_mult[THR_COMP_INTERINTRA_NEWL    ] = INT_MAX;
+#endif
  }
  if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
    sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
@@ -655,6 +679,12 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) {
    sf->thresh_mult[THR_NEARG    ] = INT_MAX;
    sf->thresh_mult[THR_NEWG     ] = INT_MAX;
    sf->thresh_mult[THR_SPLITG   ] = INT_MAX;
+#if CONFIG_INTERINTRA
+    sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG   ] = INT_MAX;
+    sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = INT_MAX;
+    sf->thresh_mult[THR_COMP_INTERINTRA_NEARG   ] = INT_MAX;
+    sf->thresh_mult[THR_COMP_INTERINTRA_NEWG    ] = INT_MAX;
+#endif
  }
  if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
    sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
@@ -662,6 +692,12 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) {
    sf->thresh_mult[THR_NEARA    ] = INT_MAX;
    sf->thresh_mult[THR_NEWA     ] = INT_MAX;
    sf->thresh_mult[THR_SPLITA   ] = INT_MAX;
+#if CONFIG_INTERINTRA
+    sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA   ] = INT_MAX;
+    sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = INT_MAX;
+    sf->thresh_mult[THR_COMP_INTERINTRA_NEARA   ] = INT_MAX;
+    sf->thresh_mult[THR_COMP_INTERINTRA_NEWA    ] = INT_MAX;
+#endif
  }

  if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
@@ -1690,6 +1726,37 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
      vp9_sub_pixel_avg_variance4x4, NULL, NULL, NULL,
      vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d)

+#if ((CONFIG_MASKED_INTERINTRA && CONFIG_INTERINTRA) || \
+    CONFIG_MASKED_INTERINTER)
+#define MBFP(BT, MSDF, MVF, MSVF) \
+  cpi->fn_ptr[BT].msdf            = MSDF; \
+  cpi->fn_ptr[BT].mvf             = MVF; \
+  cpi->fn_ptr[BT].msvf            = MSVF;
+
+  MBFP(BLOCK_64X64, vp9_masked_sad64x64, vp9_masked_variance64x64,
+       vp9_masked_sub_pixel_variance64x64)
+  MBFP(BLOCK_64X32, vp9_masked_sad64x32, vp9_masked_variance64x32,
+         vp9_masked_sub_pixel_variance64x32)
+  MBFP(BLOCK_32X64, vp9_masked_sad32x64, vp9_masked_variance32x64,
+         vp9_masked_sub_pixel_variance32x64)
+  MBFP(BLOCK_32X32, vp9_masked_sad32x32, vp9_masked_variance32x32,
+       vp9_masked_sub_pixel_variance32x32)
+  MBFP(BLOCK_32X16, vp9_masked_sad32x16, vp9_masked_variance32x16,
+       vp9_masked_sub_pixel_variance32x16)
+  MBFP(BLOCK_16X32, vp9_masked_sad16x32, vp9_masked_variance16x32,
+       vp9_masked_sub_pixel_variance16x32)
+  MBFP(BLOCK_16X16, vp9_masked_sad16x16, vp9_masked_variance16x16,
+         vp9_masked_sub_pixel_variance16x16)
+  MBFP(BLOCK_16X8, vp9_masked_sad16x8, vp9_masked_variance16x8,
+         vp9_masked_sub_pixel_variance16x8)
+  MBFP(BLOCK_8X16, vp9_masked_sad8x16, vp9_masked_variance8x16,
+         vp9_masked_sub_pixel_variance8x16)
+  MBFP(BLOCK_8X8, vp9_masked_sad8x8, vp9_masked_variance8x8,
+       vp9_masked_sub_pixel_variance8x8)
+  MBFP(BLOCK_4X4, vp9_masked_sad4x4, vp9_masked_variance4x4,
+         vp9_masked_sub_pixel_variance4x4)
+#endif
+
  cpi->full_search_sad = vp9_full_search_sad;
  cpi->diamond_search_sad = vp9_diamond_search_sad;
  cpi->refining_search_sad = vp9_refining_search_sad;
@@ -2453,6 +2520,44 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
                                 cm->subsampling_x, cm->subsampling_y);
 }

+#if CONFIG_INTERINTRA
+static void select_interintra_mode(VP9_COMP *cpi) {
+  static const double threshold = 0.007;
+  VP9_COMMON *cm = &cpi->common;
+  int sum = cpi->interintra_select_count[1] + cpi->interintra_select_count[0];
+  if (sum) {
+    double fraction = (double)cpi->interintra_select_count[1] / (double)sum;
+    cm->use_interintra = (fraction > threshold);
+  }
+}
+
+#if CONFIG_MASKED_INTERINTRA
+static void select_masked_interintra_mode(VP9_COMP *cpi) {
+  static const double threshold = 1/100.0;
+  VP9_COMMON *cm = &cpi->common;
+  int sum = cpi->masked_interintra_select_count[1] +
+      cpi->masked_interintra_select_count[0];
+  if (sum) {
+    double fraction = (double) cpi->masked_interintra_select_count[1] / sum;
+    cm->use_masked_interintra = (fraction > threshold);
+  }
+}
+#endif
+#endif
+
+#if CONFIG_MASKED_INTERINTER
+static void select_masked_compound_mode(VP9_COMP *cpi) {
+  static const double threshold = 1/128.0;
+  VP9_COMMON *cm = &cpi->common;
+  int sum = cpi->masked_compound_select_counts[1] +
+      cpi->masked_compound_select_counts[0];
+  if (sum) {
+    double fraction = (double) cpi->masked_compound_select_counts[1] / sum;
+    cm->use_masked_compound = (fraction > threshold);
+  }
+}
+#endif
+
 static void scale_references(VP9_COMP *cpi) {
  VP9_COMMON *cm = &cpi->common;
  int i;
@@ -2857,6 +2962,21 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
    set_mvcost(&cpi->mb);
  }

+#if CONFIG_INTERINTRA
+  if (cm->current_video_frame == 0) {
+    cm->use_interintra = 1;
+#if CONFIG_MASKED_INTERINTRA
+    cm->use_masked_interintra = 1;
+#endif
+  }
+#endif
+
+#if CONFIG_MASKED_INTERINTER
+  if (cm->current_video_frame == 0) {
+    cm->use_masked_compound = 0;
+  }
+#endif
+
 #if CONFIG_POSTPROC

  if (cpi->oxcf.noise_sensitivity > 0) {
@@ -3194,6 +3314,15 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
    vp9_copy(counts->single_ref, cpi->single_ref_count);
    vp9_copy(counts->comp_ref, cpi->comp_ref_count);
    counts->mv = cpi->NMVcount;
+#if CONFIG_INTERINTRA
+    vp9_copy(counts->interintra, cpi->interintra_count);
+#if CONFIG_MASKED_INTERINTRA
+    vp9_copy(counts->masked_interintra, cpi->masked_interintra_count);
+#endif
+#endif
+#if CONFIG_MASKED_INTERINTER
+    vp9_copy(counts->masked_compound, cpi->masked_compound_counts);
+#endif
    if (!cpi->common.error_resilient_mode &&
        !cpi->common.frame_parallel_decoding_mode) {
      vp9_adapt_mode_probs(&cpi->common);
@@ -3201,6 +3330,22 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
    }
  }

+#if CONFIG_INTERINTRA
+  if (cm->frame_type != KEY_FRAME) {
+    select_interintra_mode(cpi);
+#if CONFIG_MASKED_INTERINTRA
+    if (cpi->common.use_interintra)
+      select_masked_interintra_mode(cpi);
+    else
+      cpi->common.use_masked_interintra = 0;
+#endif
+  }
+#endif
+#if CONFIG_MASKED_INTERINTER
+  if (cm->frame_type != KEY_FRAME)
+    select_masked_compound_mode(cpi);
+#endif
+
 #ifdef ENTROPY_STATS
  vp9_update_mode_context_stats(cpi);
 #endif
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -47,7 +47,11 @@

 #define KEY_FRAME_CONTEXT 5

+#if CONFIG_INTERINTRA
+#define MAX_MODES 48
+#else
 #define MAX_MODES 36
+#endif

 #define MIN_THRESHMULT  32
 #define MAX_THRESHMULT  512
@@ -91,6 +95,19 @@ typedef struct {

  struct tx_probs tx_probs;
  vp9_prob mbskip_probs[MBSKIP_CONTEXTS];
+
+#if CONFIG_INTERINTRA
+  vp9_prob interintra_prob[BLOCK_SIZE_TYPES];
+#if CONFIG_MASKED_INTERINTRA
+  vp9_prob masked_interintra_prob[BLOCK_SIZE_TYPES];
+#endif
+#endif
+#if CONFIG_FILTERINTRA
+  vp9_prob filterintra_prob[TX_SIZES][VP9_INTRA_MODES];
+#endif
+#if CONFIG_MASKED_INTERINTER
+  vp9_prob masked_compound_prob[BLOCK_SIZE_TYPES];
+#endif
 } CODING_CONTEXT;

 typedef struct {
@@ -187,6 +204,23 @@ typedef enum {
  THR_D63_PRED,
  THR_D117_PRED,
  THR_D45_PRED,
+
+#if CONFIG_INTERINTRA
+  THR_COMP_INTERINTRA_ZEROL,
+  THR_COMP_INTERINTRA_NEARESTL,
+  THR_COMP_INTERINTRA_NEARL,
+  THR_COMP_INTERINTRA_NEWL,
+
+  THR_COMP_INTERINTRA_ZEROG,
+  THR_COMP_INTERINTRA_NEARESTG,
+  THR_COMP_INTERINTRA_NEARG,
+  THR_COMP_INTERINTRA_NEWG,
+
+  THR_COMP_INTERINTRA_ZEROA,
+  THR_COMP_INTERINTRA_NEARESTA,
+  THR_COMP_INTERINTRA_NEARA,
+  THR_COMP_INTERINTRA_NEWA,
+#endif
 } THR_MODES;

 typedef enum {
@@ -468,6 +502,19 @@ typedef struct VP9_COMP {
  int y_uv_mode_count[VP9_INTRA_MODES][VP9_INTRA_MODES];
  unsigned int partition_count[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];

+#if CONFIG_INTERINTRA
+  unsigned int interintra_count[BLOCK_SIZE_TYPES][2];
+  unsigned int interintra_select_count[2];
+#if CONFIG_MASKED_INTERINTRA
+  unsigned int masked_interintra_count[BLOCK_SIZE_TYPES][2];
+  unsigned int masked_interintra_select_count[2];
+#endif
+#endif
+#if CONFIG_MASKED_INTERINTER
+  unsigned int masked_compound_counts[BLOCK_SIZE_TYPES][2];
+  unsigned int masked_compound_select_counts[2];
+#endif
+
  nmv_context_counts NMVcount;

  vp9_coeff_count coef_counts[TX_SIZES][BLOCK_TYPES];
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -106,6 +106,18 @@ void vp9_save_coding_context(VP9_COMP *cpi) {
  vp9_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob);
  cc->tx_probs = cm->fc.tx_probs;
  vp9_copy(cc->mbskip_probs, cm->fc.mbskip_probs);
+#if CONFIG_INTERINTRA
+  vp9_copy(cc->interintra_prob, cm->fc.interintra_prob);
+#if CONFIG_MASKED_INTERINTRA
+  vp9_copy(cc->masked_interintra_prob, cm->fc.masked_interintra_prob);
+#endif
+#endif
+#if CONFIG_FILTERINTRA
+  vp9_copy(cc->filterintra_prob, cm->fc.filterintra_prob);
+#endif
+#if CONFIG_MASKED_INTERINTER
+  vp9_copy(cc->masked_compound_prob, cm->fc.masked_compound_prob);
+#endif
 }

 void vp9_restore_coding_context(VP9_COMP *cpi) {
@@ -145,6 +157,18 @@ void vp9_restore_coding_context(VP9_COMP *cpi) {
  vp9_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob);
  cm->fc.tx_probs = cc->tx_probs;
  vp9_copy(cm->fc.mbskip_probs, cc->mbskip_probs);
+#if CONFIG_INTERINTRA
+  vp9_copy(cm->fc.interintra_prob, cc->interintra_prob);
+#if CONFIG_MASKED_INTERINTRA
+  vp9_copy(cm->fc.masked_interintra_prob, cc->masked_interintra_prob);
+#endif
+#endif
+#if CONFIG_FILTERINTRA
+  vp9_copy(cm->fc.filterintra_prob, cc->filterintra_prob);
+#endif
+#if CONFIG_MASKED_INTERINTER
+  vp9_copy(cm->fc.masked_compound_prob, cc->masked_compound_prob);
+#endif
 }

 void vp9_setup_key_frame(VP9_COMP *cpi) {
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
--- a/vp9/encoder/vp9_sad_c.c
+++ b/vp9/encoder/vp9_sad_c.c
@@ -613,3 +613,127 @@ void vp9_sad4x4x4d_c(const uint8_t *src_ptr,
  sad_array[3] = vp9_sad4x4(src_ptr, src_stride,
                            ref_ptr[3], ref_stride, 0x7fffffff);
 }
+
+#if ((CONFIG_MASKED_INTERINTRA && CONFIG_INTERINTRA) || \
+    CONFIG_MASKED_INTERINTER)
+unsigned int vp9_masked_sad64x64_c(const uint8_t *src_ptr,
+                                   int  src_stride,
+                                   const uint8_t *ref_ptr,
+                                   int  ref_stride,
+                                   const uint8_t *msk_ptr,
+                                   int  msk_stride,
+                                   unsigned int max_sad) {
+  return masked_sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride,
+                           msk_ptr, msk_stride,  64, 64);
+}
+
+unsigned int vp9_masked_sad64x32_c(const uint8_t *src_ptr,
+                                   int  src_stride,
+                                   const uint8_t *ref_ptr,
+                                   int  ref_stride,
+                                   const uint8_t *msk_ptr,
+                                   int  msk_stride,
+                                   unsigned int max_sad) {
+  return masked_sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride,
+                           msk_ptr, msk_stride, 64, 32);
+}
+
+unsigned int vp9_masked_sad32x64_c(const uint8_t *src_ptr,
+                                   int  src_stride,
+                                   const uint8_t *ref_ptr,
+                                   int  ref_stride,
+                                   const uint8_t *msk_ptr,
+                                   int  msk_stride,
+                                   unsigned int max_sad) {
+  return masked_sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride,
+                           msk_ptr, msk_stride, 32, 64);
+}
+
+unsigned int vp9_masked_sad32x32_c(const uint8_t *src_ptr,
+                                   int  src_stride,
+                                   const uint8_t *ref_ptr,
+                                   int  ref_stride,
+                                   const uint8_t *msk_ptr,
+                                   int  msk_stride,
+                                   unsigned int max_sad) {
+  return masked_sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride,
+                           msk_ptr, msk_stride, 32, 32);
+}
+
+unsigned int vp9_masked_sad16x32_c(const uint8_t *src_ptr,
+                                   int  src_stride,
+                                   const uint8_t *ref_ptr,
+                                   int  ref_stride,
+                                   const uint8_t *msk_ptr,
+                                   int  msk_stride,
+                                   unsigned int max_sad) {
+  return masked_sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride,
+                           msk_ptr, msk_stride, 16, 32);
+}
+
+unsigned int vp9_masked_sad32x16_c(const uint8_t *src_ptr,
+                                   int  src_stride,
+                                   const uint8_t *ref_ptr,
+                                   int  ref_stride,
+                                   const uint8_t *msk_ptr,
+                                   int  msk_stride,
+                                   unsigned int max_sad) {
+  return masked_sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride,
+                           msk_ptr, msk_stride, 32, 16);
+}
+
+unsigned int vp9_masked_sad16x16_c(const uint8_t *src_ptr,
+                                   int  src_stride,
+                                   const uint8_t *ref_ptr,
+                                   int  ref_stride,
+                                   const uint8_t *msk_ptr,
+                                   int  msk_stride,
+                                   unsigned int max_sad) {
+  return masked_sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride,
+                           msk_ptr, msk_stride, 16, 16);
+}
+
+unsigned int vp9_masked_sad8x16_c(const uint8_t *src_ptr,
+                                  int  src_stride,
+                                  const uint8_t *ref_ptr,
+                                  int  ref_stride,
+                                  const uint8_t *msk_ptr,
+                                  int  msk_stride,
+                                  unsigned int max_sad) {
+  return masked_sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride,
+                           msk_ptr, msk_stride, 8, 16);
+}
+
+unsigned int vp9_masked_sad16x8_c(const uint8_t *src_ptr,
+                                  int  src_stride,
+                                  const uint8_t *ref_ptr,
+                                  int  ref_stride,
+                                  const uint8_t *msk_ptr,
+                                  int  msk_stride,
+                                  unsigned int max_sad) {
+  return masked_sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride,
+                           msk_ptr, msk_stride, 16, 8);
+}
+
+unsigned int vp9_masked_sad8x8_c(const uint8_t *src_ptr,
+                                 int  src_stride,
+                                 const uint8_t *ref_ptr,
+                                 int  ref_stride,
+                                 const uint8_t *msk_ptr,
+                                 int  msk_stride,
+                                 unsigned int max_sad) {
+  return masked_sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride,
+                           msk_ptr, msk_stride, 8, 8);
+}
+
+unsigned int vp9_masked_sad4x4_c(const uint8_t *src_ptr,
+                                 int  src_stride,
+                                 const uint8_t *ref_ptr,
+                                 int  ref_stride,
+                                 const uint8_t *msk_ptr,
+                                 int  msk_stride,
+                                 unsigned int max_sad) {
+  return masked_sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride,
+                           msk_ptr, msk_stride, 4, 4);
+}
+#endif
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -67,6 +67,32 @@ typedef unsigned int (*vp9_subp_avg_variance_fn_t)(const uint8_t *src_ptr,
                                                   unsigned int *sse,
                                                   const uint8_t *second_pred);

+#if CONFIG_MASKED_INTERINTRA || CONFIG_MASKED_INTERINTER
+typedef unsigned int(*vp9_masked_sad_fn_t)(const uint8_t *src_ptr,
+                                         int source_stride,
+                                         const uint8_t *ref_ptr,
+                                         int ref_stride,
+                                         const uint8_t *msk_ptr,
+                                         int msk_stride,
+                                         unsigned int max_sad);
+typedef unsigned int (*vp9_masked_variance_fn_t)(const uint8_t *src_ptr,
+                                               int source_stride,
+                                               const uint8_t *ref_ptr,
+                                               int ref_stride,
+                                               const uint8_t *msk_ptr,
+                                               int msk_stride,
+                                               unsigned int *sse);
+typedef unsigned int (*vp9_masked_subpixvariance_fn_t)(const uint8_t *src_ptr,
+                                                     int source_stride,
+                                                     int xoffset,
+                                                     int yoffset,
+                                                     const uint8_t *ref_ptr,
+                                                     int Refstride,
+                                                     const uint8_t *msk_ptr,
+                                                     int msk_stride,
+                                                     unsigned int *sse);
+#endif
+
 typedef void (*vp9_ssimpf_fn_t)(uint8_t *s, int sp, uint8_t *r,
                                int rp, unsigned long *sum_s,
                                unsigned long *sum_r, unsigned long *sum_sq_s,
@@ -92,6 +118,12 @@ typedef struct vp9_variance_vtable {
  vp9_sad_multi_fn_t         sdx3f;
  vp9_sad_multi1_fn_t        sdx8f;
  vp9_sad_multi_d_fn_t       sdx4df;
+#if ((CONFIG_MASKED_INTERINTRA && CONFIG_INTERINTRA) || \
+    CONFIG_MASKED_INTERINTER)
+  vp9_masked_sad_fn_t            msdf;
+  vp9_masked_variance_fn_t       mvf;
+  vp9_masked_subpixvariance_fn_t msvf;
+#endif
 } vp9_variance_fn_ptr_t;

 static void comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
--- a/vp9/encoder/vp9_variance_c.c
+++ b/vp9/encoder/vp9_variance_c.c
@@ -955,3 +955,449 @@ unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
  comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
  return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
 }
+
+#if ((CONFIG_MASKED_INTERINTRA && CONFIG_INTERINTRA) || \
+    CONFIG_MASKED_INTERINTER)
+unsigned int vp9_masked_variance64x64_c(const uint8_t *src_ptr,
+                                        int  source_stride,
+                                        const uint8_t *ref_ptr,
+                                        int  recon_stride,
+                                        const uint8_t *msk_ptr,
+                                        int  msk_stride,
+                                        unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  masked_variance(src_ptr, source_stride, ref_ptr, recon_stride,
+                  msk_ptr, msk_stride, 64, 64, &var, &avg);
+  *sse = var;
+  return (var - (((int64_t)avg * avg) >> 12));
+}
+
+unsigned int vp9_masked_variance64x32_c(const uint8_t *src_ptr,
+                                        int  source_stride,
+                                        const uint8_t *ref_ptr,
+                                        int  recon_stride,
+                                        const uint8_t *msk_ptr,
+                                        int  msk_stride,
+                                        unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  masked_variance(src_ptr, source_stride, ref_ptr, recon_stride,
+                  msk_ptr, msk_stride, 64, 32, &var, &avg);
+  *sse = var;
+  return (var - (((int64_t)avg * avg) >> 11));
+}
+
+unsigned int vp9_masked_variance32x64_c(const uint8_t *src_ptr,
+                                        int  source_stride,
+                                        const uint8_t *ref_ptr,
+                                        int  recon_stride,
+                                        const uint8_t *msk_ptr,
+                                        int  msk_stride,
+                                        unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  masked_variance(src_ptr, source_stride, ref_ptr, recon_stride,
+                  msk_ptr, msk_stride, 32, 64, &var, &avg);
+  *sse = var;
+  return (var - (((int64_t)avg * avg) >> 11));
+}
+
+unsigned int vp9_masked_variance32x32_c(const uint8_t *src_ptr,
+                                        int  source_stride,
+                                        const uint8_t *ref_ptr,
+                                        int  recon_stride,
+                                        const uint8_t *msk_ptr,
+                                        int  msk_stride,
+                                        unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  masked_variance(src_ptr, source_stride, ref_ptr, recon_stride,
+                  msk_ptr, msk_stride, 32, 32, &var, &avg);
+  *sse = var;
+  return (var - (((int64_t)avg * avg) >> 10));
+}
+
+unsigned int vp9_masked_variance32x16_c(const uint8_t *src_ptr,
+                                        int  source_stride,
+                                        const uint8_t *ref_ptr,
+                                        int  recon_stride,
+                                        const uint8_t *msk_ptr,
+                                        int  msk_stride,
+                                        unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  masked_variance(src_ptr, source_stride, ref_ptr, recon_stride,
+                  msk_ptr, msk_stride, 32, 16, &var, &avg);
+  *sse = var;
+  return (var - (((int64_t)avg * avg) >> 9));
+}
+
+unsigned int vp9_masked_variance16x32_c(const uint8_t *src_ptr,
+                                        int  source_stride,
+                                        const uint8_t *ref_ptr,
+                                        int  recon_stride,
+                                        const uint8_t *msk_ptr,
+                                        int  msk_stride,
+                                        unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  masked_variance(src_ptr, source_stride, ref_ptr, recon_stride,
+                  msk_ptr, msk_stride, 16, 32, &var, &avg);
+  *sse = var;
+  return (var - (((int64_t)avg * avg) >> 9));
+}
+
+unsigned int vp9_masked_variance16x16_c(const uint8_t *src_ptr,
+                                        int  source_stride,
+                                        const uint8_t *ref_ptr,
+                                        int  recon_stride,
+                                        const uint8_t *msk_ptr,
+                                        int  msk_stride,
+                                        unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  masked_variance(src_ptr, source_stride, ref_ptr, recon_stride,
+                  msk_ptr, msk_stride, 16, 16, &var, &avg);
+  *sse = var;
+  return (var - (((int64_t)avg * avg) >> 8));
+}
+
+unsigned int vp9_masked_variance16x8_c(const uint8_t *src_ptr,
+                                       int  source_stride,
+                                       const uint8_t *ref_ptr,
+                                       int  recon_stride,
+                                       const uint8_t *msk_ptr,
+                                       int  msk_stride,
+                                       unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  masked_variance(src_ptr, source_stride, ref_ptr, recon_stride,
+                  msk_ptr, msk_stride, 16, 8, &var, &avg);
+  *sse = var;
+  return (var - (((int64_t)avg * avg) >> 7));
+}
+
+unsigned int vp9_masked_variance8x16_c(const uint8_t *src_ptr,
+                                       int  source_stride,
+                                       const uint8_t *ref_ptr,
+                                       int  recon_stride,
+                                       const uint8_t *msk_ptr,
+                                       int  msk_stride,
+                                       unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  masked_variance(src_ptr, source_stride, ref_ptr, recon_stride,
+                  msk_ptr, msk_stride, 8, 16, &var, &avg);
+  *sse = var;
+  return (var - (((int64_t)avg * avg) >> 7));
+}
+
+unsigned int vp9_masked_variance8x8_c(const uint8_t *src_ptr,
+                                      int  source_stride,
+                                      const uint8_t *ref_ptr,
+                                      int  recon_stride,
+                                      const uint8_t *msk_ptr,
+                                      int  msk_stride,
+                                      unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  masked_variance(src_ptr, source_stride, ref_ptr, recon_stride,
+                  msk_ptr, msk_stride, 8, 8, &var, &avg);
+  *sse = var;
+  return (var - (((int64_t)avg * avg) >> 6));
+}
+
+unsigned int vp9_masked_variance4x4_c(const uint8_t *src_ptr,
+                                      int  source_stride,
+                                      const uint8_t *ref_ptr,
+                                      int  recon_stride,
+                                      const uint8_t *msk_ptr,
+                                      int  msk_stride,
+                                      unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  masked_variance(src_ptr, source_stride, ref_ptr, recon_stride,
+                  msk_ptr, msk_stride, 4, 4, &var, &avg);
+  *sse = var;
+  return (var - (((int64_t)avg * avg) >> 4));
+}
+
+unsigned int vp9_masked_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
+                                                  int  src_pixels_per_line,
+                                                  int  xoffset,
+                                                  int  yoffset,
+                                                  const uint8_t *dst_ptr,
+                                                  int dst_pixels_per_line,
+                                                  const uint8_t *msk_ptr,
+                                                  int msk_stride,
+                                                  unsigned int *sse) {
+  uint16_t fdata3[65 * 64];  // Temp data bufffer used in filtering
+  uint8_t temp2[68 * 64];
+  const int16_t *hfilter, *vfilter;
+
+  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+                                    1, 65, 64, hfilter);
+  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
+
+  return vp9_masked_variance64x64_c(temp2, 64, dst_ptr, dst_pixels_per_line,
+                                    msk_ptr, msk_stride, sse);
+}
+
+unsigned int vp9_masked_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
+                                                  int  src_pixels_per_line,
+                                                  int  xoffset,
+                                                  int  yoffset,
+                                                  const uint8_t *dst_ptr,
+                                                  int dst_pixels_per_line,
+                                                  const uint8_t *msk_ptr,
+                                                  int msk_stride,
+                                                  unsigned int *sse) {
+  uint16_t fdata3[65 * 64];  // Temp data bufffer used in filtering
+  uint8_t temp2[68 * 64];
+  const int16_t *hfilter, *vfilter;
+
+  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+                                    1, 33, 64, hfilter);
+  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
+
+  return vp9_masked_variance64x32_c(temp2, 64, dst_ptr, dst_pixels_per_line,
+                                    msk_ptr, msk_stride, sse);
+}
+
+unsigned int vp9_masked_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
+                                                  int  src_pixels_per_line,
+                                                  int  xoffset,
+                                                  int  yoffset,
+                                                  const uint8_t *dst_ptr,
+                                                  int dst_pixels_per_line,
+                                                  const uint8_t *msk_ptr,
+                                                  int msk_stride,
+                                                  unsigned int *sse) {
+  uint16_t fdata3[65 * 64];  // Temp data bufffer used in filtering
+  uint8_t temp2[68 * 64];
+  const int16_t *hfilter, *vfilter;
+
+  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+                                    1, 65, 32, hfilter);
+  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 32, vfilter);
+
+  return vp9_masked_variance32x64_c(temp2, 64, dst_ptr, dst_pixels_per_line,
+                                    msk_ptr, msk_stride, sse);
+}
+
+unsigned int vp9_masked_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
+                                                  int  src_pixels_per_line,
+                                                  int  xoffset,
+                                                  int  yoffset,
+                                                  const uint8_t *dst_ptr,
+                                                  int dst_pixels_per_line,
+                                                  const uint8_t *msk_ptr,
+                                                  int msk_stride,
+                                                  unsigned int *sse) {
+  uint16_t fdata3[33 * 32];  // Temp data bufffer used in filtering
+  uint8_t temp2[36 * 32];
+  const int16_t *hfilter, *vfilter;
+
+  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+                                    1, 33, 32, hfilter);
+  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
+
+  return vp9_masked_variance32x32_c(temp2, 32, dst_ptr, dst_pixels_per_line,
+                                    msk_ptr, msk_stride, sse);
+}
+
+unsigned int vp9_masked_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
+                                           int  src_pixels_per_line,
+                                           int  xoffset,
+                                           int  yoffset,
+                                           const uint8_t *dst_ptr,
+                                           int dst_pixels_per_line,
+                                           const uint8_t *msk_ptr,
+                                           int msk_stride,
+                                           unsigned int *sse) {
+  uint16_t fdata3[33 * 32];  // Temp data bufffer used in filtering
+  uint8_t temp2[36 * 32];
+  const int16_t *hfilter, *vfilter;
+
+  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+                                    1, 17, 32, hfilter);
+  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
+
+  return vp9_masked_variance32x16_c(temp2, 32, dst_ptr, dst_pixels_per_line,
+                                    msk_ptr, msk_stride, sse);
+}
+
+unsigned int vp9_masked_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
+                                                  int  src_pixels_per_line,
+                                                  int  xoffset,
+                                                  int  yoffset,
+                                                  const uint8_t *dst_ptr,
+                                                  int dst_pixels_per_line,
+                                                  const uint8_t *msk_ptr,
+                                                  int msk_stride,
+                                                  unsigned int *sse) {
+  uint16_t fdata3[33 * 32];  // Temp data bufffer used in filtering
+  uint8_t temp2[36 * 32];
+  const int16_t *hfilter, *vfilter;
+
+  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+                                    1, 33, 16, hfilter);
+  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 16, vfilter);
+
+  return vp9_masked_variance16x32_c(temp2, 32, dst_ptr, dst_pixels_per_line,
+                                    msk_ptr, msk_stride, sse);
+}
+
+unsigned int vp9_masked_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
+                                                  int  src_pixels_per_line,
+                                                  int  xoffset,
+                                                  int  yoffset,
+                                                  const uint8_t *dst_ptr,
+                                                  int dst_pixels_per_line,
+                                                  const uint8_t *msk_ptr,
+                                                  int msk_stride,
+                                                  unsigned int *sse) {
+  uint16_t fdata3[17 * 16];  // Temp data bufffer used in filtering
+  uint8_t temp2[20 * 16];
+  const int16_t *hfilter, *vfilter;
+
+  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+                                    1, 17, 16, hfilter);
+  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
+
+  return vp9_masked_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line,
+                                    msk_ptr, msk_stride, sse);
+}
+
+unsigned int vp9_masked_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
+                                                 int  src_pixels_per_line,
+                                                 int  xoffset,
+                                                 int  yoffset,
+                                                 const uint8_t *dst_ptr,
+                                                 int dst_pixels_per_line,
+                                                 const uint8_t *msk_ptr,
+                                                 int msk_stride,
+                                                 unsigned int *sse) {
+  uint16_t fdata3[16 * 9];  // Temp data bufffer used in filtering
+  uint8_t temp2[20 * 16];
+  const int16_t *hfilter, *vfilter;
+
+  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+                                    1, 9, 16, hfilter);
+  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
+
+  return vp9_masked_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line,
+                                   msk_ptr, msk_stride, sse);
+}
+
+unsigned int vp9_masked_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
+                                                 int  src_pixels_per_line,
+                                                 int  xoffset,
+                                                 int  yoffset,
+                                                 const uint8_t *dst_ptr,
+                                                 int dst_pixels_per_line,
+                                                 const uint8_t *msk_ptr,
+                                                 int msk_stride,
+                                                 unsigned int *sse) {
+  uint16_t fdata3[9 * 16];  // Temp data bufffer used in filtering
+  uint8_t temp2[20 * 16];
+  const int16_t *hfilter, *vfilter;
+
+  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+                                    1, 17, 8, hfilter);
+  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
+
+  return vp9_masked_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line,
+                                   msk_ptr, msk_stride, sse);
+}
+
+unsigned int vp9_masked_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
+                                                int  src_pixels_per_line,
+                                                int  xoffset,
+                                                int  yoffset,
+                                                const uint8_t *dst_ptr,
+                                                int dst_pixels_per_line,
+                                                const uint8_t *msk_ptr,
+                                                int msk_stride,
+                                                unsigned int *sse) {
+  uint16_t fdata3[9 * 8];  // Temp data bufffer used in filtering
+  uint8_t temp2[20 * 16];
+  const int16_t *hfilter, *vfilter;
+
+  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+                                    1, 9, 8, hfilter);
+  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
+
+  return vp9_masked_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line,
+                                  msk_ptr, msk_stride, sse);
+}
+
+unsigned int vp9_masked_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
+                                                int  src_pixels_per_line,
+                                                int  xoffset,
+                                                int  yoffset,
+                                                const uint8_t *dst_ptr,
+                                                int dst_pixels_per_line,
+                                                const uint8_t *msk_ptr,
+                                                int msk_stride,
+                                                unsigned int *sse) {
+  uint8_t temp2[20 * 16];
+  const int16_t *hfilter, *vfilter;
+  uint16_t fdata3[5 * 4];  // Temp data bufffer used in filtering
+
+  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+  // First filter 1d Horizontal
+  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+                                    1, 5, 4, hfilter);
+
+  // Now filter Verticaly
+  var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
+
+  return vp9_masked_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line,
+                                  msk_ptr, msk_stride, sse);
+}
+#endif
Author	SHA1	Message	Date
Yue Chen	a5a74224d3	Redesigned recursive filters adapted to block-sizes Recursive intra filters for 4x4 and 8x8 blocks are separately designed. Fixed bugs in rd loop. Change-Id: Id0b1752769f596ce8ea850863cadbc6a739804be	2013-11-04 12:08:19 -08:00
Yue Chen	769ce06eeb	Fixed a bug in recursive extrapolation filter for intra prediction Estimation of local mean, which is used to get zero-mean signals before linear filtering, is corrected. Change-Id: If73d0ae479201fc60a34baa3f15d61e5aecb1162	2013-10-12 18:06:34 -07:00
John Koleszar	fd44975bc8	Make Rand8Extremes more extreme Previous code didn't do what was expected, we want numbers within 16 of the extrema. Change-Id: I20c18627c482ec66e8405ddad74ca9276c0c65dc	2013-09-17 17:45:34 -07:00
Yue Chen	6ce9f36322	New flags for masked compound inter-inter/inter-intra Masked inter-inter will be enabled when CONFIG_MASKED_INTERINTER is on. Masked inter-intra will be enabled only when both CONFIG_MASKED_INTERINTRA and CONFIG_INTERINTRA are on. Change-Id: I57efcfe6a3ef2d53129ef703030366503dfa3762	2013-09-03 17:15:25 -07:00
Yue Chen	8b05d6a248	Masked joint spatio-temporal prediction Exploit wedge partition in joint spatio-temporal prediction. One slice will be intra predicted. The other slice will be inter predicted. Bit-rate reduction: +0.583% derf (+0.307 on top of interintra) +1.298% stdhdraw250 (+0.367% on top of interintra) Change-Id: Iec4bba5a47d0419778458c25b550574a42b3a250	2013-08-27 16:39:08 -07:00
Yue Chen	1a9ef5bcd0	Improved joint spatio-temporal prediction Switch to the correct reference to generate intra component of joint predictions. Change-Id: Ibec72cf53b3be3f7333fe5a29c57e41239b30820	2013-08-19 16:44:47 -07:00
Deb Mukherjee	92fb82a980	Adds sb-type context to probs in interintra expt Adds sb_type context to the probabilities in the interintra experiment. Change-Id: I5dec4318fb859a550ad5e7ed83378e17ba48e8ed	2013-08-15 11:17:50 -07:00
Yue Chen	1306c1b09b	Masked Compound Inter Prediction The masked compound motion compensation has mask types separating a block into wedges at specific angles and offsets. The mask is used to weight pixels from the first and second predictors to obtain the final predictor. The weighting is smooth near the partition boundaries but becomes a selecton farther away. Bit-rate reduction: +0.960%(derfraw300) +0.651%(stdhdraw250) Change-Id: I1327d22d3fc585b72ffa0e03abd90f3980f0876a	2013-08-14 16:56:53 -07:00
Yue Chen	f99fbcd682	Recursive extrapolation filter The recursive intra filter is implemented. 6 extrapolation intra filters are added as extra modes for 4x4 and 8x8 blocks. Signaling bits are added at the block level to indicate if a normal intra mode is switched to recursive intra filter mode. They are entropy coded by maintaining a backward adaptive probability table showing the usage of recursive filters at different block-sizes and different intra modes. Bit-rate reduction: +0.458% (derf) Change-Id: I1b8e00405ea1494002ca40de1db52c51259012c4	2013-08-14 15:59:11 -07:00
Yue Chen	0207fa679f	Merge "Improved joint spatio-temporal prediction" into experimental	2013-08-14 13:47:01 -07:00
Yue Chen	e2bb669a3d	Improved joint spatio-temporal prediction Full search of optimal interintra mode is performed instead of inferring the interintra mode from optimal intra mode. Bit-rate reduction: +0.811% stdhdraw250 +0.238% derf Change-Id: I80e905a51fba0e9fb7eb00a3342d21f452825377	2013-08-14 11:33:40 -07:00
John Koleszar	151ae7ae50	Merge branch 'master' into experimental Conflicts: configure vp9/common/vp9_entropymode.c vp9/common/vp9_onyxc_int.h vp9/common/vp9_reconinter.c vp9/common/vp9_reconintra.c vp9/common/x86/vp9_idct_intrin_sse2.c vp9/decoder/vp9_decodemv.c vp9/decoder/vp9_decodframe.c vp9/encoder/vp9_bitstream.c vp9/encoder/vp9_encodeframe.c vp9/encoder/vp9_onyx_if.c vp9/encoder/vp9_onyx_int.h vp9/encoder/vp9_rdopt.c Change-Id: I2191e8cf074677d6def890720a6b095457efce18	2013-08-07 14:44:06 -07:00
Yue Chen	4dd3b07478	Merge origin/master into experimental Change-Id: I53dc16716ff02f35089df1aeb3e5eeb825271dab	2013-08-07 13:43:49 -07:00
Yue Chen	35abb70353	Joint Spatio-temporal Prediction: Updated A new prediction scheme is implemented within July 3 experimental branch exploiting both motion compensation and intra prediction in Inter frames. Bit-rate reduction: derf: +0.147% (on July 3 head) stdhd: +0.591% (on June 25 head) Change-Id: Iec3d97afaad6fa99881187228971a405c3d2ec88	2013-07-30 09:18:43 -07:00
John Koleszar	5a70b23158	Merge remote-tracking branch 'origin/master' into experimental Change-Id: Ifda4ce2647cd79b87e8450fbaf79c59165b8388f	2013-07-03 11:32:58 -07:00
John Koleszar	d9879e2c73	Merge remote-tracking branch 'origin/master' into experimental Conflicts: vp9/common/vp9_rtcd_defs.sh vp9/encoder/vp9_encodeframe.c Change-Id: I365fb9e78a550c68aa9caca7fff84af43526b439	2013-06-28 09:42:40 -07:00
Ronald S. Bultje	9536db22cd	Merge "Only do metrics on cropped (visible) area of picture." into experimental	2013-06-25 12:02:27 -07:00
Ronald S. Bultje	3904505d8e	Merge "Don't skip right/bottom border pixels in SSIM calculations." into experimental	2013-06-25 12:02:23 -07:00
sujee	01c43e86d9	Trivial change to add crude timing information for encoding section Change-Id: I84e07c2f1240b95d5de083df06eb3d581bfb9b68	2013-06-18 14:58:31 -07:00
Scott LaVarnway	885d8a4397	Eliminated prev_mip memsets/memcpys in encoder This patch swaps ptrs instead of copying and uses the last show_frame flag instead of setting the entire buffer to zero. On the decode side, forced prev_mi to always point to a valid mode info buffer. Required by the next frame. Change-Id: I90441eaf087868e9f9bc368e15e0becc848c4a51	2013-06-18 14:42:31 -04:00
Jingning Han	8d139a5d29	Merge "Enable sse2 version of sad8x4/4x8" into experimental	2013-06-14 13:16:13 -07:00
Christian Duvivier	54f86290b6	Remove copy stages in vp9_short_idct32x32. Add another set of internal variables to allow removal of all the copy stages. Change-Id: I2e1cf36b7d057fbb7515fce737f7eee391edf842	2013-06-13 16:12:21 -07:00
Ronald S. Bultje	538e97ffd8	Fix row tiling. Change-Id: I57be4eeaea6e4402f6a0cc04f5c6b7a5d9aedf9b	2013-06-12 10:30:06 -07:00
Ronald S. Bultje	70dd502153	Merge "Implement SSE version for sad4x8x4d and SSE2 version for sad8x4x4d." into experimental	2013-06-11 18:33:31 -07:00
Ronald S. Bultje	10eb64ab9f	Implement SSE version for sad4x8x4d and SSE2 version for sad8x4x4d. Encoding time of crew (CIF, first 50 frames) @ 1500kbps goes from 4min56 to 4min42. Change-Id: I92c0c8b32980d2ae7c6dafc8b883a2c7fcd14a9f	2013-06-11 15:34:21 -07:00
Ronald S. Bultje	df50e5c01a	Only do metrics on cropped (visible) area of picture. The part where we align it by 8 or 16 is an implementation detail that shouldn't matter to the outside world. Change-Id: I9edd6f08b51b31c839c0ea91f767640bccb08d53	2013-06-10 11:47:22 -07:00
Ronald S. Bultje	2ec602c8e2	Don't skip right/bottom border pixels in SSIM calculations. Change-Id: I75acb55ade54bef6ad7703ed5e691581fa2f8fe1	2013-06-10 11:36:04 -07:00