From db5dd4999602fc4615ec61d16b22bd58a4e1dd0c Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Mon, 12 Jan 2015 18:09:10 -0800 Subject: [PATCH] Adds wedge-partitions for compound prediction Results with this experiment only: +0.642% on derflr. With other experiments: +4.733% Change-Id: Ieb2022f8e49ac38a7e7129e261a6bf69ae9666b9 --- configure | 1 + vp9/common/vp9_blockd.h | 22 ++ vp9/common/vp9_entropymode.c | 19 +- vp9/common/vp9_entropymode.h | 6 + vp9/common/vp9_reconinter.c | 577 +++++++++++++++++++++++++++++++++- vp9/common/vp9_reconinter.h | 24 ++ vp9/common/vp9_rtcd_defs.pl | 118 +++++++ vp9/decoder/vp9_decodeframe.c | 19 +- vp9/decoder/vp9_decodemv.c | 35 ++- vp9/encoder/vp9_bitstream.c | 20 ++ vp9/encoder/vp9_encodeframe.c | 37 ++- vp9/encoder/vp9_encoder.c | 34 ++ vp9/encoder/vp9_mcomp.c | 351 +++++++++++++++++++++ vp9/encoder/vp9_mcomp.h | 20 ++ vp9/encoder/vp9_rdopt.c | 306 +++++++++++++++++- vp9/encoder/vp9_sad.c | 44 +++ vp9/encoder/vp9_variance.c | 95 ++++++ vp9/encoder/vp9_variance.h | 30 ++ 18 files changed, 1731 insertions(+), 27 deletions(-) diff --git a/configure b/configure index fd21538f8..bd2011360 100755 --- a/configure +++ b/configure @@ -289,6 +289,7 @@ EXPERIMENT_LIST=" supertx copy_mode interintra + wedge_partition " CONFIG_LIST=" external_build diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 05b017fe3..a5befac5b 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -165,6 +165,10 @@ typedef struct { PREDICTION_MODE interintra_mode; PREDICTION_MODE interintra_uv_mode; #endif // CONFIG_INTERINTRA +#if CONFIG_WEDGE_PARTITION + int use_wedge_interinter; + int wedge_index; +#endif // CONFIG_WEDGE_PARTITION } MB_MODE_INFO; typedef struct MODE_INFO { @@ -439,6 +443,24 @@ static INLINE int is_interintra_allowed(BLOCK_SIZE sb_type) { } #endif // CONFIG_INTERINTRA +#if CONFIG_WEDGE_PARTITION +#define WEDGE_BITS_SML 3 +#define WEDGE_BITS_MED 4 +#define WEDGE_BITS_BIG 5 +#define WEDGE_NONE -1 + +static inline int get_wedge_bits(BLOCK_SIZE sb_type) { + if (sb_type < BLOCK_8X8) + return 0; + if (sb_type <= BLOCK_8X8) + return WEDGE_BITS_SML; + else if (sb_type <= BLOCK_32X32) + return WEDGE_BITS_MED; + else + return WEDGE_BITS_BIG; +} +#endif // CONFIG_WEDGE_PARTITION + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index e5a0b3e2c..0d9b64b44 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -13,6 +13,12 @@ #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_seg_common.h" +#if CONFIG_WEDGE_PARTITION +static const vp9_prob default_wedge_interinter_prob[BLOCK_SIZES] = { + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 +}; +#endif // CONFIG_WEDGE_PARTITION + #if CONFIG_INTERINTRA static const vp9_prob default_interintra_prob[BLOCK_SIZES] = { 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 @@ -179,7 +185,7 @@ static const vp9_prob default_filterintra_prob[TX_SIZES][INTRA_MODES] = { {175, 203, 213, 86, 45, 71, 41, 150, 125, 154}, {235, 230, 154, 202, 154, 205, 37, 128, 0, 202} }; -#endif +#endif // CONFIG_FILTERINTRA const vp9_prob vp9_kf_partition_probs[PARTITION_CONTEXTS] [PARTITION_TYPES - 1] = { @@ -466,6 +472,9 @@ void vp9_init_mode_probs(FRAME_CONTEXT *fc) { #if CONFIG_INTERINTRA vp9_copy(fc->interintra_prob, default_interintra_prob); #endif // CONFIG_INTERINTRA +#if CONFIG_WEDGE_PARTITION + vp9_copy(fc->wedge_interinter_prob, default_wedge_interinter_prob); +#endif // CONFIG_WEDGE_PARTITION } const vp9_tree_index vp9_switchable_interp_tree @@ -615,6 +624,14 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { counts->interintra[i]); } #endif // CONFIG_INTERINTRA +#if CONFIG_WEDGE_PARTITION + for (i = 0; i < BLOCK_SIZES; ++i) { + if (get_wedge_bits(i)) + fc->wedge_interinter_prob[i] = adapt_prob + (pre_fc->wedge_interinter_prob[i], + counts->wedge_interinter[i]); + } +#endif // CONFIG_WEDGE_PARTITION } static void set_default_lf_deltas(struct loopfilter *lf) { diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h index 2b7a0c06e..f34656309 100644 --- a/vp9/common/vp9_entropymode.h +++ b/vp9/common/vp9_entropymode.h @@ -77,6 +77,9 @@ typedef struct frame_contexts { #if CONFIG_INTERINTRA vp9_prob interintra_prob[BLOCK_SIZES]; #endif // CONFIG_INTERINTRA +#if CONFIG_WEDGE_PARTITION + vp9_prob wedge_interinter_prob[BLOCK_SIZES]; +#endif // CONFIG_WEDGE_PARTITION } FRAME_CONTEXT; typedef struct { @@ -118,6 +121,9 @@ typedef struct { #if CONFIG_INTERINTRA unsigned int interintra[BLOCK_SIZES][2]; #endif // CONFIG_INTERINTRA +#if CONFIG_WEDGE_PARTITION + unsigned int wedge_interinter[BLOCK_SIZES][2]; +#endif // CONFIG_WEDGE_PARTITION } FRAME_COUNTS; extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 2f92c113c..3d3494f54 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -257,9 +257,349 @@ static MV average_split_mvs(const struct macroblockd_plane *pd, return res; } +#if CONFIG_WEDGE_PARTITION +#define WEDGE_WEIGHT_BITS 6 + +static int get_masked_weight(int m) { + #define SMOOTHER_LEN 32 + static const uint8_t smoothfn[2 * SMOOTHER_LEN + 1] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 1, + 1, 1, 2, 2, 3, 4, 5, 6, + 8, 9, 12, 14, 17, 21, 24, 28, + 32, + 36, 40, 43, 47, 50, 52, 55, 56, + 58, 59, 60, 61, 62, 62, 63, 63, + 63, 63, 63, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + }; + if (m < -SMOOTHER_LEN) + return 0; + else if (m > SMOOTHER_LEN) + return (1 << WEDGE_WEIGHT_BITS); + else + return smoothfn[m + SMOOTHER_LEN]; +} + +static int get_hard_mask(int m) { + return 1 << WEDGE_WEIGHT_BITS * (m > 0); +} + +// Equation of line: f(x, y) = a[0]*(x - a[2]*w/4) + a[1]*(y - a[3]*h/4) = 0 +// The soft mask is obtained by computing f(x, y) and then calling +// get_masked_weight(f(x, y)). +static const int wedge_params_sml[1 << WEDGE_BITS_SML][4] = { + {-1, 2, 2, 2}, + { 1, -2, 2, 2}, + {-2, 1, 2, 2}, + { 2, -1, 2, 2}, + { 2, 1, 2, 2}, + {-2, -1, 2, 2}, + { 1, 2, 2, 2}, + {-1, -2, 2, 2}, +}; + +static const int wedge_params_med_hgtw[1 << WEDGE_BITS_MED][4] = { + {-1, 2, 2, 2}, + { 1, -2, 2, 2}, + {-2, 1, 2, 2}, + { 2, -1, 2, 2}, + { 2, 1, 2, 2}, + {-2, -1, 2, 2}, + { 1, 2, 2, 2}, + {-1, -2, 2, 2}, + + {-1, 2, 2, 1}, + { 1, -2, 2, 1}, + {-1, 2, 2, 3}, + { 1, -2, 2, 3}, + { 1, 2, 2, 1}, + {-1, -2, 2, 1}, + { 1, 2, 2, 3}, + {-1, -2, 2, 3}, +}; + +static const int wedge_params_med_hltw[1 << WEDGE_BITS_MED][4] = { + {-1, 2, 2, 2}, + { 1, -2, 2, 2}, + {-2, 1, 2, 2}, + { 2, -1, 2, 2}, + { 2, 1, 2, 2}, + {-2, -1, 2, 2}, + { 1, 2, 2, 2}, + {-1, -2, 2, 2}, + + {-2, 1, 1, 2}, + { 2, -1, 1, 2}, + {-2, 1, 3, 2}, + { 2, -1, 3, 2}, + { 2, 1, 1, 2}, + {-2, -1, 1, 2}, + { 2, 1, 3, 2}, + {-2, -1, 3, 2}, +}; + +static const int wedge_params_med_heqw[1 << WEDGE_BITS_MED][4] = { + {-1, 2, 2, 2}, + { 1, -2, 2, 2}, + {-2, 1, 2, 2}, + { 2, -1, 2, 2}, + { 2, 1, 2, 2}, + {-2, -1, 2, 2}, + { 1, 2, 2, 2}, + {-1, -2, 2, 2}, + + { 0, 2, 0, 1}, + { 0, -2, 0, 1}, + { 0, 2, 0, 3}, + { 0, -2, 0, 3}, + { 2, 0, 1, 0}, + {-2, 0, 1, 0}, + { 2, 0, 3, 0}, + {-2, 0, 3, 0}, +}; + +static const int wedge_params_big_hgtw[1 << WEDGE_BITS_BIG][4] = { + {-1, 2, 2, 2}, + { 1, -2, 2, 2}, + {-2, 1, 2, 2}, + { 2, -1, 2, 2}, + { 2, 1, 2, 2}, + {-2, -1, 2, 2}, + { 1, 2, 2, 2}, + {-1, -2, 2, 2}, + + {-1, 2, 2, 1}, + { 1, -2, 2, 1}, + {-1, 2, 2, 3}, + { 1, -2, 2, 3}, + { 1, 2, 2, 1}, + {-1, -2, 2, 1}, + { 1, 2, 2, 3}, + {-1, -2, 2, 3}, + + {-2, 1, 1, 2}, + { 2, -1, 1, 2}, + {-2, 1, 3, 2}, + { 2, -1, 3, 2}, + { 2, 1, 1, 2}, + {-2, -1, 1, 2}, + { 2, 1, 3, 2}, + {-2, -1, 3, 2}, + + { 0, 2, 0, 1}, + { 0, -2, 0, 1}, + { 0, 2, 0, 2}, + { 0, -2, 0, 2}, + { 0, 2, 0, 3}, + { 0, -2, 0, 3}, + { 2, 0, 2, 0}, + {-2, 0, 2, 0}, +}; + +static const int wedge_params_big_hltw[1 << WEDGE_BITS_BIG][4] = { + {-1, 2, 2, 2}, + { 1, -2, 2, 2}, + {-2, 1, 2, 2}, + { 2, -1, 2, 2}, + { 2, 1, 2, 2}, + {-2, -1, 2, 2}, + { 1, 2, 2, 2}, + {-1, -2, 2, 2}, + + {-1, 2, 2, 1}, + { 1, -2, 2, 1}, + {-1, 2, 2, 3}, + { 1, -2, 2, 3}, + { 1, 2, 2, 1}, + {-1, -2, 2, 1}, + { 1, 2, 2, 3}, + {-1, -2, 2, 3}, + + {-2, 1, 1, 2}, + { 2, -1, 1, 2}, + {-2, 1, 3, 2}, + { 2, -1, 3, 2}, + { 2, 1, 1, 2}, + {-2, -1, 1, 2}, + { 2, 1, 3, 2}, + {-2, -1, 3, 2}, + + { 0, 2, 0, 2}, + { 0, -2, 0, 2}, + { 2, 0, 1, 0}, + {-2, 0, 1, 0}, + { 2, 0, 2, 0}, + {-2, 0, 2, 0}, + { 2, 0, 3, 0}, + {-2, 0, 3, 0}, +}; + +static const int wedge_params_big_heqw[1 << WEDGE_BITS_BIG][4] = { + {-1, 2, 2, 2}, + { 1, -2, 2, 2}, + {-2, 1, 2, 2}, + { 2, -1, 2, 2}, + { 2, 1, 2, 2}, + {-2, -1, 2, 2}, + { 1, 2, 2, 2}, + {-1, -2, 2, 2}, + + {-1, 2, 2, 1}, + { 1, -2, 2, 1}, + {-1, 2, 2, 3}, + { 1, -2, 2, 3}, + { 1, 2, 2, 1}, + {-1, -2, 2, 1}, + { 1, 2, 2, 3}, + {-1, -2, 2, 3}, + + {-2, 1, 1, 2}, + { 2, -1, 1, 2}, + {-2, 1, 3, 2}, + { 2, -1, 3, 2}, + { 2, 1, 1, 2}, + {-2, -1, 1, 2}, + { 2, 1, 3, 2}, + {-2, -1, 3, 2}, + + { 0, 2, 0, 1}, + { 0, -2, 0, 1}, + { 0, 2, 0, 3}, + { 0, -2, 0, 3}, + { 2, 0, 1, 0}, + {-2, 0, 1, 0}, + { 2, 0, 3, 0}, + {-2, 0, 3, 0}, +}; + +static const int *get_wedge_params(int wedge_index, + BLOCK_SIZE sb_type, + int h, int w) { + const int *a = NULL; + const int wedge_bits = get_wedge_bits(sb_type); + + if (wedge_index == WEDGE_NONE) + return NULL; + + if (wedge_bits == WEDGE_BITS_SML) { + a = wedge_params_sml[wedge_index]; + } else if (wedge_bits == WEDGE_BITS_MED) { + if (h > w) + a = wedge_params_med_hgtw[wedge_index]; + else if (h < w) + a = wedge_params_med_hltw[wedge_index]; + else + a = wedge_params_med_heqw[wedge_index]; + } else if (wedge_bits == WEDGE_BITS_BIG) { + if (h > w) + a = wedge_params_big_hgtw[wedge_index]; + else if (h < w) + a = wedge_params_big_hltw[wedge_index]; + else + a = wedge_params_big_heqw[wedge_index]; + } else { + assert(0); + } + return a; +} + +void vp9_generate_masked_weight(int wedge_index, + BLOCK_SIZE sb_type, + int h, int w, + uint8_t *mask, int stride) { + int i, j; + const int *a = get_wedge_params(wedge_index, sb_type, h, w); + if (!a) return; + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int x = (j - (a[2] * w) / 4); + int y = (i - (a[3] * h) / 4); + int m = a[0] * x + a[1] * y; + mask[i * stride + j] = get_masked_weight(m); + } +} + +void vp9_generate_hard_mask(int wedge_index, BLOCK_SIZE sb_type, + int h, int w, uint8_t *mask, int stride) { + int i, j; + const int *a = get_wedge_params(wedge_index, sb_type, h, w); + if (!a) return; + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int x = (j - (a[2] * w) / 4); + int y = (i - (a[3] * h) / 4); + int m = a[0] * x + a[1] * y; + mask[i * stride + j] = get_hard_mask(m); + } +} + +static void build_masked_compound(uint8_t *dst, int dst_stride, + uint8_t *dst2, int dst2_stride, + int wedge_index, BLOCK_SIZE sb_type, + int h, int w) { + int i, j; + uint8_t mask[4096]; + vp9_generate_masked_weight(wedge_index, sb_type, h, w, mask, 64); + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int m = mask[i * 64 + j]; + dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + + dst2[i * dst2_stride + j] * + ((1 << WEDGE_WEIGHT_BITS) - m) + + (1 << (WEDGE_WEIGHT_BITS - 1))) >> + WEDGE_WEIGHT_BITS; + } +} + +#if CONFIG_SUPERTX +void generate_masked_weight_extend(int wedge_index, int plane, + BLOCK_SIZE sb_type, int h, int w, + int wedge_offset_x, int wedge_offset_y, + uint8_t *mask, int stride) { + int i, j; + int subh = (plane ? 2 : 4) << b_height_log2_lookup[sb_type]; + int subw = (plane ? 2 : 4) << b_width_log2_lookup[sb_type]; + const int *a = get_wedge_params(wedge_index, sb_type, subh, subw); + if (!a) return; + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int x = (j - (a[2] * subw) / 4 - wedge_offset_x); + int y = (i - (a[3] * subh) / 4 - wedge_offset_y); + int m = a[0] * x + a[1] * y; + mask[i * stride + j] = get_masked_weight(m); + } +} + +static void build_masked_compound_extend(uint8_t *dst, int dst_stride, + uint8_t *dst2, int dst2_stride, + int plane, + int wedge_index, BLOCK_SIZE sb_type, + int wedge_offset_x, int wedge_offset_y, + int h, int w) { + int i, j; + uint8_t mask[4096]; + generate_masked_weight_extend(wedge_index, plane, sb_type, h, w, + wedge_offset_x, wedge_offset_y, mask, 64); + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int m = mask[i * 64 + j]; + dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + + dst2[i * dst2_stride + j] * + ((1 << WEDGE_WEIGHT_BITS) - m) + + (1 << (WEDGE_WEIGHT_BITS - 1))) >> + WEDGE_WEIGHT_BITS; + } +} +#endif // CONFIG_SUPERTX +#endif // CONFIG_WEDGE_PARTITION + static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, int bw, int bh, int x, int y, int w, int h, +#if CONFIG_SUPERTX && CONFIG_WEDGE_PARTITION + int wedge_offset_x, int wedge_offset_y, +#endif // CONFIG_SUPERTX && CONFIG_WEDGE_PARTITION int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; const MODE_INFO *mi = xd->mi[0].src_mi; @@ -306,6 +646,38 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride + (scaled_mv.col >> SUBPEL_BITS); +#if CONFIG_WEDGE_PARTITION + if (ref && get_wedge_bits(mi->mbmi.sb_type) + && mi->mbmi.use_wedge_interinter) { + uint8_t tmp_dst[4096]; +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + high_inter_predictor(pre, pre_buf->stride, tmp_dst, 64, + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, + xd->bd); + } else { + inter_predictor(pre, pre_buf->stride, tmp_dst, 64, + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); + } +#else + inter_predictor(pre, pre_buf->stride, tmp_dst, 64, + subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys); +#endif // CONFIG_VP9_HIGHBITDEPTH +#if CONFIG_SUPERTX + // TODO(debargha): Need high bitdepth versions + build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, 64, plane, + mi->mbmi.wedge_index, mi->mbmi.sb_type, + wedge_offset_x, wedge_offset_y, h, w); +#else + build_masked_compound(dst, dst_buf->stride, tmp_dst, 64, + mi->mbmi.wedge_index, mi->mbmi.sb_type, h, w); +#endif // CONFIG_SUPERTX + } else { + inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); + } +#else // CONFIG_WEDGE_PARTITION + #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, @@ -319,6 +691,7 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); #endif // CONFIG_VP9_HIGHBITDEPTH +#endif // CONFIG_WEDGE_PARTITION } } @@ -342,10 +715,18 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) build_inter_predictors(xd, plane, i++, bw, bh, - 4 * x, 4 * y, 4, 4, mi_x, mi_y); + 4 * x, 4 * y, 4, 4, +#if CONFIG_SUPERTX && CONFIG_WEDGE_PARTITION + 0, 0, +#endif + mi_x, mi_y); } else { build_inter_predictors(xd, plane, 0, bw, bh, - 0, 0, bw, bh, mi_x, mi_y); + 0, 0, bw, bh, +#if CONFIG_SUPERTX && CONFIG_WEDGE_PARTITION + 0, 0, +#endif + mi_x, mi_y); } } } @@ -483,6 +864,39 @@ void vp9_build_masked_inter_predictor_complex( } } +#if CONFIG_WEDGE_PARTITION +void vp9_build_inter_predictors_sb_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize) { + int plane; + const int mi_x = mi_col_ori * MI_SIZE; + const int mi_y = mi_row_ori * MI_SIZE; + const int wedge_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int wedge_offset_y = (mi_row - mi_row_ori) * MI_SIZE; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + if (xd->mi[0].src_mi->mbmi.sb_type < BLOCK_8X8) { + int i = 0, x, y; + assert(bsize == BLOCK_8X8); + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) + build_inter_predictors(xd, plane, i++, bw, bh, 4 * x, 4 * y, 4, 4, + wedge_offset_x, wedge_offset_y, mi_x, mi_y); + } else { + build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh, + wedge_offset_x, wedge_offset_y, mi_x, mi_y); + } + } +} +#endif // CONFIG_WEDGE_PARTITION + void vp9_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, int mi_row, int mi_col, int mi_row_ori, @@ -491,6 +905,10 @@ void vp9_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, PARTITION_TYPE partition) { const int mi_x = mi_col_ori * MI_SIZE; const int mi_y = mi_row_ori * MI_SIZE; +#if CONFIG_WEDGE_PARTITION + const int wedge_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int wedge_offset_y = (mi_row - mi_row_ori) * MI_SIZE; +#endif // CONFIG_WEDGE_PARTITION uint8_t *orig_dst; int orig_dst_stride; int bw = 4 << b_width_log2_lookup[top_bsize]; @@ -502,6 +920,9 @@ void vp9_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, orig_dst = xd->plane[0].dst.buf; orig_dst_stride = xd->plane[0].dst.stride; build_inter_predictors(xd, 0, 0, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif mi_x, mi_y); xd->plane[0].dst.buf = tmp_buf; @@ -509,22 +930,37 @@ void vp9_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, switch (partition) { case PARTITION_HORZ: build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif mi_x, mi_y); break; case PARTITION_VERT: build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif mi_x, mi_y); break; case PARTITION_SPLIT: build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif mi_x, mi_y); xd->plane[0].dst.buf = tmp_buf1; xd->plane[0].dst.stride = MAXTXLEN; build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif mi_x, mi_y); xd->plane[0].dst.buf = tmp_buf2; xd->plane[0].dst.stride = MAXTXLEN; build_inter_predictors(xd, 0, 3, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif mi_x, mi_y); break; default: @@ -563,12 +999,19 @@ void vp9_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, } void vp9_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, +#if CONFIG_WEDGE_PARTITION + int mi_row, int mi_col, +#endif int mi_row_ori, int mi_col_ori, BLOCK_SIZE top_bsize) { int plane; const int mi_x = mi_col_ori * MI_SIZE; const int mi_y = mi_row_ori * MI_SIZE; +#if CONFIG_WEDGE_PARTITION + const int wedge_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int wedge_offset_y = (mi_row - mi_row_ori) * MI_SIZE; +#endif for (plane = 1; plane < MAX_MB_PLANE; ++plane) { const BLOCK_SIZE plane_bsize = get_plane_block_size(top_bsize, &xd->plane[plane]); @@ -578,6 +1021,9 @@ void vp9_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, const int bh = 4 * num_4x4_h; build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif mi_x, mi_y); } } @@ -588,6 +1034,9 @@ void vp9_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, int bw, int bh, int x, int y, int w, int h, +#if CONFIG_SUPERTX && CONFIG_WEDGE_PARTITION + int wedge_offset_x, int wedge_offset_y, +#endif int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; const MODE_INFO *mi = xd->mi[0].src_mi; @@ -748,6 +1197,50 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, } } +#if CONFIG_WEDGE_PARTITION + if (ref && get_wedge_bits(mi->mbmi.sb_type) + && mi->mbmi.use_wedge_interinter) { + uint8_t tmp_dst[4096]; +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + high_inter_predictor(buf_ptr, buf_stride, tmp_dst, 64, + subpel_x, subpel_y, sf, w, h, ref, kernel, + xs, ys, xd->bd); + } else { + inter_predictor(buf_ptr, buf_stride, tmp_dst, 64, + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); + } +#else + inter_predictor(buf_ptr, buf_stride, tmp_dst, 64, + subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys); +#endif // CONFIG_VP9_HIGHBITDEPTH +#if CONFIG_SUPERTX + // TODO(debargha): highbitdepth versions + build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, 64, plane, + mi->mbmi.wedge_index, mi->mbmi.sb_type, + wedge_offset_x, wedge_offset_y, h, w); +#else + build_masked_compound(dst, dst_buf->stride, tmp_dst, 64, + mi->mbmi.wedge_index, mi->mbmi.sb_type, h, w); +#endif // CONFIG_SUPERTX + } else { +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, kernel, + xs, ys, xd->bd); + } else { + inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys); + } +#else + inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys); +#endif // CONFIG_VP9_HIGHBITDEPTH + } + +#else // CONFIG_WEDGE_PARTITION + #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, @@ -760,6 +1253,7 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); #endif // CONFIG_VP9_HIGHBITDEPTH +#endif // CONFIG_WEDGE_PARTITION } } @@ -782,10 +1276,18 @@ void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) dec_build_inter_predictors(xd, plane, i++, bw, bh, - 4 * x, 4 * y, 4, 4, mi_x, mi_y); + 4 * x, 4 * y, 4, 4, +#if CONFIG_SUPERTX && CONFIG_WEDGE_PARTITION + 0, 0, +#endif + mi_x, mi_y); } else { dec_build_inter_predictors(xd, plane, 0, bw, bh, - 0, 0, bw, bh, mi_x, mi_y); + 0, 0, bw, bh, +#if CONFIG_SUPERTX && CONFIG_WEDGE_PARTITION + 0, 0, +#endif + mi_x, mi_y); } } #if CONFIG_INTERINTRA @@ -800,6 +1302,41 @@ void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, } #if CONFIG_SUPERTX +#if CONFIG_WEDGE_PARTITION +void vp9_dec_build_inter_predictors_sb_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize) { + int plane; + const int mi_x = mi_col_ori * MI_SIZE; + const int mi_y = mi_row_ori * MI_SIZE; + const int wedge_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int wedge_offset_y = (mi_row - mi_row_ori) * MI_SIZE; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + if (xd->mi[0].src_mi->mbmi.sb_type < BLOCK_8X8) { + int i = 0, x, y; + assert(bsize == BLOCK_8X8); + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) + dec_build_inter_predictors(xd, plane, i++, bw, bh, 4 * x, 4 * y, 4, 4, + wedge_offset_x, wedge_offset_y, + mi_x, mi_y); + } else { + dec_build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh, + wedge_offset_x, wedge_offset_y, + mi_x, mi_y); + } + } +} +#endif // CONFIG_WEDGE_PARTITION + void vp9_dec_build_inter_predictors_sby_sub8x8_extend( MACROBLOCKD *xd, int mi_row, int mi_col, @@ -809,6 +1346,10 @@ void vp9_dec_build_inter_predictors_sby_sub8x8_extend( PARTITION_TYPE partition) { const int mi_x = mi_col_ori * MI_SIZE; const int mi_y = mi_row_ori * MI_SIZE; +#if CONFIG_WEDGE_PARTITION + const int wedge_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int wedge_offset_y = (mi_row - mi_row_ori) * MI_SIZE; +#endif uint8_t *orig_dst; int orig_dst_stride; int bw = 4 << b_width_log2_lookup[top_bsize]; @@ -820,6 +1361,9 @@ void vp9_dec_build_inter_predictors_sby_sub8x8_extend( orig_dst = xd->plane[0].dst.buf; orig_dst_stride = xd->plane[0].dst.stride; dec_build_inter_predictors(xd, 0, 0, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif mi_x, mi_y); xd->plane[0].dst.buf = tmp_buf; @@ -827,22 +1371,37 @@ void vp9_dec_build_inter_predictors_sby_sub8x8_extend( switch (partition) { case PARTITION_HORZ: dec_build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif mi_x, mi_y); break; case PARTITION_VERT: dec_build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif mi_x, mi_y); break; case PARTITION_SPLIT: dec_build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif mi_x, mi_y); xd->plane[0].dst.buf = tmp_buf1; xd->plane[0].dst.stride = MAXTXLEN; dec_build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif mi_x, mi_y); xd->plane[0].dst.buf = tmp_buf2; xd->plane[0].dst.stride = MAXTXLEN; dec_build_inter_predictors(xd, 0, 3, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif mi_x, mi_y); break; default: @@ -881,12 +1440,19 @@ void vp9_dec_build_inter_predictors_sby_sub8x8_extend( } void vp9_dec_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, +#if CONFIG_WEDGE_PARTITION + int mi_row, int mi_col, +#endif int mi_row_ori, int mi_col_ori, BLOCK_SIZE top_bsize) { int plane; const int mi_x = mi_col_ori * MI_SIZE; const int mi_y = mi_row_ori * MI_SIZE; +#if CONFIG_WEDGE_PARTITION + const int wedge_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int wedge_offset_y = (mi_row - mi_row_ori) * MI_SIZE; +#endif for (plane = 1; plane < MAX_MB_PLANE; ++plane) { const BLOCK_SIZE plane_bsize = get_plane_block_size(top_bsize, &xd->plane[plane]); @@ -896,6 +1462,9 @@ void vp9_dec_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, const int bh = 4 * num_4x4_h; dec_build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif mi_x, mi_y); } } diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 922395311..d3faa26e0 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -76,6 +76,13 @@ void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, const struct scale_factors *sf); +#if CONFIG_WEDGE_PARTITION +void vp9_generate_masked_weight(int wedge_index, BLOCK_SIZE sb_type, + int h, int w, uint8_t *mask, int stride); +void vp9_generate_hard_mask(int wedge_index, BLOCK_SIZE sb_type, + int h, int w, uint8_t *mask, int stride); +#endif // CONFIG_WEDGE_PARTITION + #if CONFIG_SUPERTX struct macroblockd_plane; void vp9_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, @@ -85,6 +92,9 @@ void vp9_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, BLOCK_SIZE top_bsize, PARTITION_TYPE partition); void vp9_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, +#if CONFIG_WEDGE_PARTITION + int mi_row, int mi_col, +#endif int mi_row_ori, int mi_col_ori, BLOCK_SIZE top_bsize); @@ -100,9 +110,23 @@ void vp9_dec_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, BLOCK_SIZE top_bsize, PARTITION_TYPE p); void vp9_dec_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, +#if CONFIG_WEDGE_PARTITION + int mi_row, int mi_col, +#endif int mi_row_ori, int mi_col_ori, BLOCK_SIZE top_bsize); + +#if CONFIG_WEDGE_PARTITION +void vp9_build_inter_predictors_sb_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize); +void vp9_dec_build_inter_predictors_sb_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize); +#endif // CONFIG_WEDGE_PARTITION #endif // CONFIG_SUPERTX #ifdef __cplusplus diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index d2d836b4c..cc03a87b3 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -1220,6 +1220,124 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vp9_highbd_avg_8x8/; } +if (vpx_config("CONFIG_WEDGE_PARTITION") eq "yes") { + add_proto qw/unsigned int vp9_masked_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_variance32x16/; + + add_proto qw/unsigned int vp9_masked_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masdctked_variance16x32/; + + add_proto qw/unsigned int vp9_masked_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_variance64x32/; + + add_proto qw/unsigned int vp9_masked_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_variance32x64/; + + add_proto qw/unsigned int vp9_masked_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_variance32x32/; + + add_proto qw/unsigned int vp9_masked_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_variance64x64/; + + add_proto qw/unsigned int vp9_masked_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_variance16x16/; + + add_proto qw/unsigned int vp9_masked_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_variance16x8/; + + add_proto qw/unsigned int vp9_masked_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_variance8x16/; + + add_proto qw/unsigned int vp9_masked_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_variance8x8/; + + add_proto qw/unsigned int vp9_masked_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_variance8x4/; + + add_proto qw/unsigned int vp9_masked_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_variance4x8/; + + add_proto qw/unsigned int vp9_masked_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_variance4x4/; + + add_proto qw/unsigned int vp9_masked_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_sub_pixel_variance64x64/; + + add_proto qw/unsigned int vp9_masked_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_sub_pixel_variance32x64/; + + add_proto qw/unsigned int vp9_masked_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_sub_pixel_variance64x32/; + + add_proto qw/unsigned int vp9_masked_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_sub_pixel_variance32x16/; + + add_proto qw/unsigned int vp9_masked_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_sub_pixel_variance16x32/; + + add_proto qw/unsigned int vp9_masked_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_sub_pixel_variance32x32/; + + add_proto qw/unsigned int vp9_masked_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_sub_pixel_variance16x16/; + + add_proto qw/unsigned int vp9_masked_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_sub_pixel_variance8x16/; + + add_proto qw/unsigned int vp9_masked_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_sub_pixel_variance16x8/; + + add_proto qw/unsigned int vp9_masked_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_sub_pixel_variance8x8/; + + add_proto qw/unsigned int vp9_masked_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_sub_pixel_variance8x4/; + + add_proto qw/unsigned int vp9_masked_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_sub_pixel_variance4x8/; + + add_proto qw/unsigned int vp9_masked_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_sub_pixel_variance4x4/; + + add_proto qw/unsigned int vp9_masked_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_masked_sad64x64/; + + add_proto qw/unsigned int vp9_masked_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_masked_sad32x64/; + + add_proto qw/unsigned int vp9_masked_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_masked_sad64x32/; + + add_proto qw/unsigned int vp9_masked_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_masked_sad32x16/; + + add_proto qw/unsigned int vp9_masked_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_masked_sad16x32/; + + add_proto qw/unsigned int vp9_masked_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_masked_sad32x32/; + + add_proto qw/unsigned int vp9_masked_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_masked_sad16x16/; + + add_proto qw/unsigned int vp9_masked_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_masked_sad16x8/; + + add_proto qw/unsigned int vp9_masked_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_masked_sad8x16/; + + add_proto qw/unsigned int vp9_masked_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_masked_sad8x8/; + + add_proto qw/unsigned int vp9_masked_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_masked_sad8x4/; + + add_proto qw/unsigned int vp9_masked_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_masked_sad4x8/; + + add_proto qw/unsigned int vp9_masked_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_masked_sad4x4/; +} # ENCODEMB INVOKE add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 12c72b5bc..a1963edd9 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -786,7 +786,12 @@ static void dec_predict_b_extend(VP9_COMMON *const cm, MACROBLOCKD *const xd, if (has_second_ref(&xd->mi[0].mbmi)) set_ref(cm, xd, 1, mi_row_ori, mi_col_ori); mbmi->tx_size = b_width_log2_lookup[top_bsize]; +#if CONFIG_WEDGE_PARTITION + vp9_dec_build_inter_predictors_sb_extend(xd, mi_row, mi_col, + mi_row_ori, mi_col_ori, top_bsize); +#else vp9_dec_build_inter_predictors_sb(xd, mi_row_ori, mi_col_ori, top_bsize); +#endif // CONFIG_WEDGE_PARTITION } static void dec_predict_b_sub8x8_extend(VP9_COMMON *const cm, @@ -806,7 +811,11 @@ static void dec_predict_b_sub8x8_extend(VP9_COMMON *const cm, vp9_dec_build_inter_predictors_sby_sub8x8_extend(xd, mi_row, mi_col, mi_row_ori, mi_col_ori, top_bsize, partition); - vp9_dec_build_inter_predictors_sbuv_sub8x8_extend(xd, mi_row_ori, mi_col_ori, + vp9_dec_build_inter_predictors_sbuv_sub8x8_extend(xd, +#if CONFIG_WEDGE_PARTITION + mi_row, mi_col, +#endif + mi_row_ori, mi_col_ori, top_bsize); } @@ -2253,6 +2262,14 @@ static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data, } } #endif // CONFIG_INTERINTRA +#if CONFIG_WEDGE_PARTITION + if (cm->reference_mode != SINGLE_REFERENCE) { + for (i = 0; i < BLOCK_SIZES; i++) { + if (get_wedge_bits(i)) + vp9_diff_update_prob(&r, &fc->wedge_interinter_prob[i]); + } + } +#endif // CONFIG_WEDGE_PARTITION } return vp9_reader_has_error(&r); diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index edf4e9098..473c18616 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -747,8 +747,22 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm, nearestmv, nearmv, is_compound, allow_hp, r); } #if CONFIG_TX_SKIP - mbmi->uv_mode = mbmi->mode; + mbmi->uv_mode = mbmi->mode; #endif +#if CONFIG_WEDGE_PARTITION + mbmi->use_wedge_interinter = 0; + if (cm->reference_mode != SINGLE_REFERENCE && + is_inter_mode(mbmi->mode) && + get_wedge_bits(bsize) && + mbmi->ref_frame[1] > INTRA_FRAME) { + mbmi->use_wedge_interinter = + vp9_read(r, cm->fc.wedge_interinter_prob[bsize]); + cm->counts.wedge_interinter[bsize][mbmi->use_wedge_interinter]++; + if (mbmi->use_wedge_interinter) { + mbmi->wedge_index = vp9_read_literal(r, get_wedge_bits(bsize)); + } + } +#endif // CONFIG_WEDGE_PARTITION } static void read_inter_frame_mode_info(VP9_COMMON *const cm, @@ -792,23 +806,26 @@ static void read_inter_frame_mode_info(VP9_COMMON *const cm, COPY_MODE copy_mode_backup = mbmi->copy_mode; #if CONFIG_SUPERTX TX_SIZE tx_size_backup = mbmi->tx_size; -#endif +#endif // CONFIG_SUPERTX #if CONFIG_EXT_TX EXT_TX_TYPE ext_txfrm_backup = mbmi->ext_txfrm; -#endif +#endif // CONFIG_EXT_TX inter_block = 1; *mbmi = *inter_ref_list[mbmi->copy_mode - REF0]; +#if CONFIG_SUPERTX + mbmi->tx_size = tx_size_backup; +#endif // CONFIG_SUPERTX +#if CONFIG_EXT_TX + mbmi->ext_txfrm = ext_txfrm_backup; +#endif // CONFIG_EXT_TX #if CONFIG_INTERINTRA if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE; #endif // CONFIG_INTERINTRA -#if CONFIG_SUPERTX - mbmi->tx_size = tx_size_backup; -#endif -#if CONFIG_EXT_TX - mbmi->ext_txfrm = ext_txfrm_backup; -#endif +#if CONFIG_WEDGE_PARTITION + mbmi->use_wedge_interinter = 0; +#endif // CONFIG_WEDGE_PARTITION mbmi->sb_type = bsize_backup; mbmi->mode = NEARESTMV; mbmi->skip = skip_backup; diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index e34f86037..2ccb208de 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -567,6 +567,17 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, allow_hp); } } +#if CONFIG_WEDGE_PARTITION + if (cm->reference_mode != SINGLE_REFERENCE && + is_inter_mode(mode) && + get_wedge_bits(bsize) && + mbmi->ref_frame[1] > INTRA_FRAME) { + vp9_write(w, mbmi->use_wedge_interinter, + cm->fc.wedge_interinter_prob[bsize]); + if (mbmi->use_wedge_interinter) + vp9_write_literal(w, mbmi->wedge_index, get_wedge_bits(bsize)); + } +#endif // CONFIG_WEDGE_PARTITION } } @@ -1648,6 +1659,15 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { } } #endif // CONFIG_INTERINTRA +#if CONFIG_WEDGE_PARTITION + if (cm->reference_mode != SINGLE_REFERENCE) { + for (i = 0; i < BLOCK_SIZES; i++) + if (get_wedge_bits(i)) + vp9_cond_prob_diff_update(&header_bc, + &fc->wedge_interinter_prob[i], + cm->counts.wedge_interinter[i]); + } +#endif // CONFIG_WEDGE_PARTITION } vp9_stop_encode(&header_bc); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 255ac2191..27531f790 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -62,7 +62,11 @@ static int check_intra_b(PICK_MODE_CONTEXT *ctx); static int check_intra_sb(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree); -static void predict_superblock(VP9_COMP *cpi, int mi_row_ori, int mi_col_ori, +static void predict_superblock(VP9_COMP *cpi, +#if CONFIG_WEDGE_PARTITION + int mi_row, int mi_col, +#endif // CONFIG_WEDGE_PARTITION + int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize); static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size, PC_TREE *pc_tree); @@ -871,7 +875,13 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, ++cm->counts.interintra[bsize][0]; } } -#endif +#endif // CONFIG_INTERINTRA +#if CONFIG_WEDGE_PARTITION + if (cm->reference_mode != SINGLE_REFERENCE && + get_wedge_bits(bsize) && + mbmi->ref_frame[1] > INTRA_FRAME) + ++cm->counts.wedge_interinter[bsize][mbmi->use_wedge_interinter]; +#endif // CONFIG_WEDGE_PARTITION } rd_opt->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; @@ -971,6 +981,12 @@ static void update_state_supertx(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, const int ctx = vp9_get_pred_context_switchable_interp(xd); ++cm->counts.switchable_interp[ctx][mbmi->interp_filter]; } +#if CONFIG_WEDGE_PARTITION + if (cm->reference_mode != SINGLE_REFERENCE && + get_wedge_bits(bsize) && + mbmi->ref_frame[1] > INTRA_FRAME) + ++cm->counts.wedge_interinter[bsize][mbmi->use_wedge_interinter]; +#endif // CONFIG_WEDGE_PARTITION } rd_opt->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; @@ -4914,6 +4930,9 @@ static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size, } static void predict_superblock(VP9_COMP *cpi, +#if CONFIG_WEDGE_PARTITION + int mi_row, int mi_col, +#endif // CONFIG_WEDGE_PARTITION int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; @@ -4937,7 +4956,12 @@ static void predict_superblock(VP9_COMP *cpi, vp9_setup_pre_planes(xd, ref, cfg, mi_row_ori, mi_col_ori, &xd->block_refs[ref]->sf); } +#if CONFIG_WEDGE_PARTITION + vp9_build_inter_predictors_sb_extend(xd, mi_row, mi_col, + mi_row_ori, mi_col_ori, bsize); +#else vp9_build_inter_predictors_sb(xd, mi_row_ori, mi_col_ori, bsize); +#endif // CONFIG_WEDGE_PARTITION } static void predict_superblock_sub8x8_extend(VP9_COMP *cpi, @@ -4970,6 +4994,9 @@ static void predict_superblock_sub8x8_extend(VP9_COMP *cpi, mi_row_ori, mi_col_ori, top_bsize, partition); vp9_build_inter_predictors_sbuv_sub8x8_extend(xd, +#if CONFIG_WEDGE_PARTITION + mi_row, mi_col, +#endif mi_row_ori, mi_col_ori, top_bsize); } @@ -4996,7 +5023,11 @@ static void predict_b_extend(VP9_COMP *cpi, const TileInfo *const tile, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize) { set_offsets_extend(cpi, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, bsize, top_bsize); - predict_superblock(cpi, mi_row_ori, mi_col_ori, top_bsize); + predict_superblock(cpi, +#if CONFIG_WEDGE_PARTITION + mi_row, mi_col, +#endif + mi_row_ori, mi_col_ori, top_bsize); if (output_enabled) update_stats(&cpi->common, &cpi->mb); diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index efc37bf18..a8680a160 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -1655,6 +1655,40 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { vp9_sub_pixel_avg_variance4x4, vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d) +#if CONFIG_WEDGE_PARTITION +#define MBFP(BT, MSDF, MVF, MSVF) \ + cpi->fn_ptr[BT].msdf = MSDF; \ + cpi->fn_ptr[BT].mvf = MVF; \ + cpi->fn_ptr[BT].msvf = MSVF; + + MBFP(BLOCK_64X64, vp9_masked_sad64x64, vp9_masked_variance64x64, + vp9_masked_sub_pixel_variance64x64) + MBFP(BLOCK_64X32, vp9_masked_sad64x32, vp9_masked_variance64x32, + vp9_masked_sub_pixel_variance64x32) + MBFP(BLOCK_32X64, vp9_masked_sad32x64, vp9_masked_variance32x64, + vp9_masked_sub_pixel_variance32x64) + MBFP(BLOCK_32X32, vp9_masked_sad32x32, vp9_masked_variance32x32, + vp9_masked_sub_pixel_variance32x32) + MBFP(BLOCK_32X16, vp9_masked_sad32x16, vp9_masked_variance32x16, + vp9_masked_sub_pixel_variance32x16) + MBFP(BLOCK_16X32, vp9_masked_sad16x32, vp9_masked_variance16x32, + vp9_masked_sub_pixel_variance16x32) + MBFP(BLOCK_16X16, vp9_masked_sad16x16, vp9_masked_variance16x16, + vp9_masked_sub_pixel_variance16x16) + MBFP(BLOCK_16X8, vp9_masked_sad16x8, vp9_masked_variance16x8, + vp9_masked_sub_pixel_variance16x8) + MBFP(BLOCK_8X16, vp9_masked_sad8x16, vp9_masked_variance8x16, + vp9_masked_sub_pixel_variance8x16) + MBFP(BLOCK_8X8, vp9_masked_sad8x8, vp9_masked_variance8x8, + vp9_masked_sub_pixel_variance8x8) + MBFP(BLOCK_4X8, vp9_masked_sad4x8, vp9_masked_variance4x8, + vp9_masked_sub_pixel_variance4x8) + MBFP(BLOCK_8X4, vp9_masked_sad8x4, vp9_masked_variance8x4, + vp9_masked_sub_pixel_variance8x4) + MBFP(BLOCK_4X4, vp9_masked_sad4x4, vp9_masked_variance4x4, + vp9_masked_sub_pixel_variance4x4) +#endif // CONFIG_WEDGE_PARTITION + #if CONFIG_VP9_HIGHBITDEPTH highbd_set_var_fns(cpi); #endif diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 69b419384..b2cdc76a2 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -2037,3 +2037,354 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, return var; } + +#if CONFIG_WEDGE_PARTITION +/* returns subpixel variance error function */ +#define DIST(r, c) \ + vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ + src_stride, mask, mask_stride, &sse) + +/* checks if (r, c) has better score than previous best */ + +#define MVC(r, c) \ + (mvcost ? \ + ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ + mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ + error_per_bit + 4096) >> 13 : 0) + +#define CHECK_BETTER(v, r, c) \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + thismse = (DIST(r, c)); \ + if ((v = MVC(r, c) + thismse) < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } + +int vp9_find_best_masked_sub_pixel_tree(const MACROBLOCK *x, + uint8_t *mask, int mask_stride, + MV *bestmv, const MV *ref_mv, + int allow_hp, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1, int is_second) { + const uint8_t *const z = x->plane[0].src.buf; + const int src_stride = x->plane[0].src.stride; + const MACROBLOCKD *xd = &x->e_mbd; + unsigned int besterr = INT_MAX; + unsigned int sse; + unsigned int whichdir; + int thismse; + unsigned int halfiters = iters_per_step; + unsigned int quarteriters = iters_per_step; + unsigned int eighthiters = iters_per_step; + + const int y_stride = xd->plane[0].pre[is_second].stride; + const int offset = bestmv->row * y_stride + bestmv->col; + const uint8_t *const y = xd->plane[0].pre[is_second].buf; + + int rr = ref_mv->row; + int rc = ref_mv->col; + int br = bestmv->row * 8; + int bc = bestmv->col * 8; + int hstep = 4; + const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); + const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); + const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); + const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); + + int tr = br; + int tc = bc; + + // central mv + bestmv->row *= 8; + bestmv->col *= 8; + + // calculate central point error + besterr = vfp->mvf(y + offset, y_stride, z, src_stride, mask, mask_stride, + sse1); + *distortion = besterr; + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); + + // 1/2 pel + FIRST_LEVEL_CHECKS; + if (halfiters > 1) { + SECOND_LEVEL_CHECKS; + } + tr = br; + tc = bc; + + // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only + if (forced_stop != 2) { + hstep >>= 1; + FIRST_LEVEL_CHECKS; + if (quarteriters > 1) { + SECOND_LEVEL_CHECKS; + } + tr = br; + tc = bc; + } + + if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { + hstep >>= 1; + FIRST_LEVEL_CHECKS; + if (eighthiters > 1) { + SECOND_LEVEL_CHECKS; + } + tr = br; + tc = bc; + } + // These lines insure static analysis doesn't warn that + // tr and tc aren't used after the above point. + (void) tr; + (void) tc; + + bestmv->row = br; + bestmv->col = bc; + + if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || + (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) + return INT_MAX; + + return besterr; +} + +#undef DIST +#undef MVC +#undef CHECK_BETTER + +int vp9_get_masked_mvpred_var(const MACROBLOCK *x, + uint8_t *mask, int mask_stride, + const MV *best_mv, const MV *center_mv, + const vp9_variance_fn_ptr_t *vfp, + int use_mvcost, int is_second) { + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[is_second]; + const MV mv = {best_mv->row * 8, best_mv->col * 8}; + unsigned int unused; + + return vfp->mvf(what->buf, what->stride, + get_buf_from_mv(in_what, best_mv), in_what->stride, + mask, mask_stride, &unused) + + (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, + x->mvcost, x->errorperbit) : 0); +} + +int vp9_masked_refining_search_sad_c(const MACROBLOCK *x, + uint8_t *mask, int mask_stride, + MV *ref_mv, int error_per_bit, + int search_range, + const vp9_variance_fn_ptr_t *fn_ptr, + const MV *center_mv, int is_second) { + const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[is_second]; + const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; + unsigned int best_sad = fn_ptr->msdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), + in_what->stride, mask, mask_stride) + + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); + int i, j; + + for (i = 0; i < search_range; i++) { + int best_site = -1; + + for (j = 0; j < 4; j++) { + const MV mv = {ref_mv->row + neighbors[j].row, + ref_mv->col + neighbors[j].col}; + if (is_mv_in(x, &mv)) { + unsigned int sad = fn_ptr->msdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, mask, mask_stride); + if (sad < best_sad) { + sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); + if (sad < best_sad) { + best_sad = sad; + best_site = j; + } + } + } + } + + if (best_site == -1) { + break; + } else { + ref_mv->row += neighbors[best_site].row; + ref_mv->col += neighbors[best_site].col; + } + } + return best_sad; +} + +int vp9_masked_diamond_search_sad_c(const MACROBLOCK *x, + const search_site_config *cfg, + uint8_t *mask, int mask_stride, + MV *ref_mv, MV *best_mv, + int search_param, + int sad_per_bit, int *num00, + const vp9_variance_fn_ptr_t *fn_ptr, + const MV *center_mv, int is_second) { + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[is_second]; + // search_param determines the length of the initial step and hence the number + // of iterations + // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = + // (MAX_FIRST_STEP/4) pel... etc. + const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step]; + const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param; + const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; + const uint8_t *best_address, *in_what_ref; + int best_sad = INT_MAX; + int best_site = 0; + int last_site = 0; + int i, j, step; + + clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); + in_what_ref = get_buf_from_mv(in_what, ref_mv); + best_address = in_what_ref; + *num00 = 0; + *best_mv = *ref_mv; + + // Check the starting position + best_sad = fn_ptr->msdf(what->buf, what->stride, + best_address, in_what->stride, + mask, mask_stride) + + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); + + i = 1; + + for (step = 0; step < tot_steps; step++) { + for (j = 0; j < cfg->searches_per_step; j++) { + const MV mv = {best_mv->row + ss[i].mv.row, + best_mv->col + ss[i].mv.col}; + if (is_mv_in(x, &mv)) { + int sad = fn_ptr->msdf(what->buf, what->stride, + best_address + ss[i].offset, in_what->stride, + mask, mask_stride); + if (sad < best_sad) { + sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + best_site = i; + } + } + } + + i++; + } + + if (best_site != last_site) { + best_mv->row += ss[best_site].mv.row; + best_mv->col += ss[best_site].mv.col; + best_address += ss[best_site].offset; + last_site = best_site; +#if defined(NEW_DIAMOND_SEARCH) + while (1) { + const MV this_mv = {best_mv->row + ss[best_site].mv.row, + best_mv->col + ss[best_site].mv.col}; + if (is_mv_in(x, &this_mv)) { + int sad = fn_ptr->msdf(what->buf, what->stride, + best_address + ss[best_site].offset, + in_what->stride, mask, mask_stride); + if (sad < best_sad) { + sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + best_mv->row += ss[best_site].mv.row; + best_mv->col += ss[best_site].mv.col; + best_address += ss[best_site].offset; + continue; + } + } + } + break; + } +#endif + } else if (best_address == in_what_ref) { + (*num00)++; + } + } + return best_sad; +} + +int vp9_masked_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, + uint8_t *mask, int mask_stride, + MV *mvp_full, int step_param, + int sadpb, int further_steps, int do_refine, + const vp9_variance_fn_ptr_t *fn_ptr, + const MV *ref_mv, MV *dst_mv, + int is_second) { + MV temp_mv; + int thissme, n, num00 = 0; + int bestsme = vp9_masked_diamond_search_sad_c(x, &cpi->ss_cfg, + mask, mask_stride, + mvp_full, &temp_mv, + step_param, sadpb, &n, + fn_ptr, ref_mv, is_second); + if (bestsme < INT_MAX) + bestsme = vp9_get_masked_mvpred_var(x, mask, mask_stride, &temp_mv, ref_mv, + fn_ptr, 1, is_second); + *dst_mv = temp_mv; + + // If there won't be more n-step search, check to see if refining search is + // needed. + if (n > further_steps) + do_refine = 0; + + while (n < further_steps) { + ++n; + + if (num00) { + num00--; + } else { + thissme = vp9_masked_diamond_search_sad_c(x, &cpi->ss_cfg, + mask, mask_stride, + mvp_full, &temp_mv, + step_param + n, sadpb, &num00, + fn_ptr, ref_mv, is_second); + if (thissme < INT_MAX) + thissme = vp9_get_masked_mvpred_var(x, mask, mask_stride, + &temp_mv, ref_mv, fn_ptr, 1, + is_second); + + // check to see if refining search is needed. + if (num00 > further_steps - n) + do_refine = 0; + + if (thissme < bestsme) { + bestsme = thissme; + *dst_mv = temp_mv; + } + } + } + + // final 1-away diamond refining search + if (do_refine) { + const int search_range = 8; + MV best_mv = *dst_mv; + thissme = vp9_masked_refining_search_sad_c(x, mask, mask_stride, + &best_mv, sadpb, search_range, + fn_ptr, ref_mv, is_second); + if (thissme < INT_MAX) + thissme = vp9_get_masked_mvpred_var(x, mask, mask_stride, + &best_mv, ref_mv, fn_ptr, 1, + is_second); + if (thissme < bestsme) { + bestsme = thissme; + *dst_mv = best_mv; + } + } + return bestsme; +} +#endif // CONFIG_WEDGE_PARTITION diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 9ddca250c..4afd407f1 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -146,6 +146,26 @@ int vp9_full_pixel_search(struct VP9_COMP *cpi, MACROBLOCK *x, const MV *ref_mv, MV *tmp_mv, int var_max, int rd); +#if CONFIG_WEDGE_PARTITION +int vp9_find_best_masked_sub_pixel_tree(const MACROBLOCK *x, + uint8_t *mask, int mask_stride, + MV *bestmv, const MV *ref_mv, + int allow_hp, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1, int is_second); +int vp9_masked_full_pixel_diamond(const struct VP9_COMP *cpi, MACROBLOCK *x, + uint8_t *mask, int mask_stride, + MV *mvp_full, int step_param, + int sadpb, int further_steps, int do_refine, + const vp9_variance_fn_ptr_t *fn_ptr, + const MV *ref_mv, MV *dst_mv, + int is_second); +#endif // CONFIG_WEDGE_PARTITION #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 74eb46262..2ec021dd5 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -305,8 +305,8 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, } } - *skip_txfm_sb = skip_flag; - *skip_sse_sb = total_sse << 4; + if (skip_txfm_sb) *skip_txfm_sb = skip_flag; + if (skip_sse_sb) *skip_sse_sb = total_sse << 4; *out_rate_sum = (int)rate_sum; *out_dist_sum = dist_sum << 4; } @@ -2063,7 +2063,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, // max mv magnitude and the best ref mvs of the current block for // the given reference. step_param = (vp9_init_search_range(max_mv) + - cpi->mv_step_param) / 2; + cpi->mv_step_param) / 2; } else { step_param = cpi->mv_step_param; } @@ -2484,7 +2484,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, // max mv magnitude and that based on the best ref mvs of the current // block for the given reference. step_param = (vp9_init_search_range(x->max_mv_context[ref]) + - cpi->mv_step_param) / 2; + cpi->mv_step_param) / 2; } else { step_param = cpi->mv_step_param; } @@ -2753,6 +2753,169 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd, } } +#if CONFIG_WEDGE_PARTITION +static void do_masked_motion_search(VP9_COMP *cpi, MACROBLOCK *x, + uint8_t *mask, int mask_stride, + BLOCK_SIZE bsize, + int mi_row, int mi_col, + int_mv *tmp_mv, int *rate_mv, + int is_second) { + MACROBLOCKD *xd = &x->e_mbd; + const VP9_COMMON *cm = &cpi->common; + MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; + struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; + int bestsme = INT_MAX; + int step_param; + int sadpb = x->sadperbit16; + MV mvp_full; + int ref = mbmi->ref_frame[is_second]; + MV ref_mv = mbmi->ref_mvs[ref][0].as_mv; + + int tmp_col_min = x->mv_col_min; + int tmp_col_max = x->mv_col_max; + int tmp_row_min = x->mv_row_min; + int tmp_row_max = x->mv_row_max; + + const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, + ref); + + MV pred_mv[3]; + pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv; + pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv; + pred_mv[2] = x->pred_mv[ref]; + + if (scaled_ref_frame) { + int i; + // Swap out the reference frame for a version that's been scaled to + // match the resolution of the current frame, allowing the existing + // motion search code to be used without additional modifications. + for (i = 0; i < MAX_MB_PLANE; i++) + backup_yv12[i] = xd->plane[i].pre[is_second]; + + vp9_setup_pre_planes(xd, is_second, scaled_ref_frame, mi_row, mi_col, NULL); + } + + vp9_set_mv_search_range(x, &ref_mv); + + // Work out the size of the first step in the mv step search. + // 0 here is maximum length first step. 1 is MAX >> 1 etc. + if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { + // Take wtd average of the step_params based on the last frame's + // max mv magnitude and that based on the best ref mvs of the current + // block for the given reference. + step_param = (vp9_init_search_range(x->max_mv_context[ref]) + + cpi->mv_step_param) / 2; + } else { + step_param = cpi->mv_step_param; + } + + // TODO(debargha): is show_frame needed here? + if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 && + cm->show_frame) { + int boffset = 2 * (b_width_log2_lookup[BLOCK_64X64] - + MIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); + step_param = MAX(step_param, boffset); + } + + if (cpi->sf.adaptive_motion_search) { + int bwl = b_width_log2_lookup[bsize]; + int bhl = b_height_log2_lookup[bsize]; + int i; + int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4); + + if (tlevel < 5) + step_param += 2; + + for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) { + if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { + x->pred_mv[ref].row = 0; + x->pred_mv[ref].col = 0; + tmp_mv->as_int = INVALID_MV; + + if (scaled_ref_frame) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[is_second] = backup_yv12[i]; + } + return; + } + } + } + + mvp_full = pred_mv[x->mv_best_ref_index[ref]]; + + mvp_full.col >>= 3; + mvp_full.row >>= 3; + + bestsme = vp9_masked_full_pixel_diamond(cpi, x, mask, mask_stride, + &mvp_full, step_param, sadpb, + MAX_MVSEARCH_STEPS - 1 - step_param, + 1, &cpi->fn_ptr[bsize], + &ref_mv, &tmp_mv->as_mv, is_second); + + x->mv_col_min = tmp_col_min; + x->mv_col_max = tmp_col_max; + x->mv_row_min = tmp_row_min; + x->mv_row_max = tmp_row_max; + + if (bestsme < INT_MAX) { + int dis; /* TODO: use dis in distortion calculation later. */ + vp9_find_best_masked_sub_pixel_tree(x, mask, mask_stride, + &tmp_mv->as_mv, &ref_mv, + cm->allow_high_precision_mv, + x->errorperbit, + &cpi->fn_ptr[bsize], + cpi->sf.mv.subpel_force_stop, + cpi->sf.mv.subpel_iters_per_step, + x->nmvjointcost, x->mvcost, + &dis, &x->pred_sse[ref], is_second); + } + *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + + if (cpi->sf.adaptive_motion_search && cm->show_frame) + x->pred_mv[ref] = tmp_mv->as_mv; + + if (scaled_ref_frame) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[is_second] = backup_yv12[i]; + } +} + +static void do_masked_motion_search_indexed(VP9_COMP *cpi, MACROBLOCK *x, + int wedge_index, + BLOCK_SIZE bsize, + int mi_row, int mi_col, + int_mv *tmp_mv, int *rate_mv) { + MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; + BLOCK_SIZE sb_type = mbmi->sb_type; + int w = (4 << b_width_log2_lookup[sb_type]); + int h = (4 << b_height_log2_lookup[sb_type]); + int i, j; + uint8_t mask[4096]; + int mask_stride = 64; + + vp9_generate_masked_weight(wedge_index, sb_type, h, w, + mask, mask_stride); + /* + vp9_generate_hard_mask(wedge_index, sb_type, h, w, + mask, mask_stride); + */ + + do_masked_motion_search(cpi, x, mask, mask_stride, bsize, + mi_row, mi_col, &tmp_mv[0], &rate_mv[0], 0); + + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) + mask[i * mask_stride + j] = 64 - mask[i * mask_stride + j]; + + do_masked_motion_search(cpi, x, mask, mask_stride, bsize, + mi_row, mi_col, &tmp_mv[1], &rate_mv[1], 1); +} +#endif // CONFIG_WEDGE_PARTITION + static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int64_t txfm_cache[], @@ -2768,6 +2931,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_INTERINTRA int *compmode_interintra_cost, int single_newmv_rate[MAX_REF_FRAMES], +#endif +#if CONFIG_WEDGE_PARTITION + int *compmode_wedge_cost, #endif int64_t *psse, const int64_t ref_best_rd) { @@ -2796,8 +2962,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *orig_dst[MAX_MB_PLANE]; int orig_dst_stride[MAX_MB_PLANE]; int rs = 0; -#if CONFIG_INTERINTRA +#if CONFIG_INTERINTRA || CONFIG_WEDGE_PARTITION int rate_mv_tmp = 0; +#endif +#if CONFIG_INTERINTRA const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME); #endif INTERP_FILTER best_filter = SWITCHABLE; @@ -2813,6 +2981,11 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t skip_sse_sb = INT64_MAX; int64_t distortion_y = 0, distortion_uv = 0; +#if CONFIG_WEDGE_PARTITION + mbmi->use_wedge_interinter = 0; + *compmode_wedge_cost = 0; +#endif // CONFIG_WEDGE_PARTITION + #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16); @@ -2862,7 +3035,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, &mbmi->ref_mvs[refs[1]][0].as_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); } -#if !CONFIG_INTERINTRA +#if !(CONFIG_INTERINTRA || CONFIG_WEDGE_PARTITION) *rate2 += rate_mv; #endif } else { @@ -2886,13 +3059,15 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, &tmp_mv, &rate_mv); if (tmp_mv.as_int == INVALID_MV) return INT64_MAX; +#if !CONFIG_WEDGE_PARTITION *rate2 += rate_mv; +#endif frame_mv[refs[0]].as_int = xd->mi[0].src_mi->bmi[0].as_mv[0].as_int = tmp_mv.as_int; single_newmv[refs[0]].as_int = tmp_mv.as_int; #endif // CONFIG_INTERINTRA } -#if CONFIG_INTERINTRA +#if CONFIG_WEDGE_PARTITION || CONFIG_INTERINTRA rate_mv_tmp = rate_mv; #endif } @@ -3045,6 +3220,98 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, cm->interp_filter : best_filter; rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi) : 0; +#if CONFIG_WEDGE_PARTITION + if (is_comp_pred && get_wedge_bits(bsize)) { + int wedge_index, best_wedge_index = WEDGE_NONE, rs; + int rate_sum; + int64_t dist_sum; + int64_t best_rd_nowedge = INT64_MAX; + int64_t best_rd_wedge = INT64_MAX; + int wedge_types; + mbmi->use_wedge_interinter = 0; + rs = vp9_cost_bit(cm->fc.wedge_interinter_prob[bsize], 0); + vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, NULL, NULL); + rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv_tmp + rate_sum, dist_sum); + best_rd_nowedge = rd; + mbmi->use_wedge_interinter = 1; + rs = get_wedge_bits(bsize) * 256 + + vp9_cost_bit(cm->fc.wedge_interinter_prob[bsize], 1); + wedge_types = (1 << get_wedge_bits(bsize)); + if (this_mode == NEWMV) { + int_mv tmp_mv[2]; + int rate_mvs[2], tmp_rate_mv; + for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) { + mbmi->wedge_index = wedge_index; + vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, NULL, NULL); + rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv_tmp + rate_sum, dist_sum); + if (rd < best_rd_wedge) { + best_wedge_index = wedge_index; + best_rd_wedge = rd; + } + } + mbmi->wedge_index = best_wedge_index; + do_masked_motion_search_indexed(cpi, x, mbmi->wedge_index, bsize, + mi_row, mi_col, + tmp_mv, rate_mvs); + tmp_rate_mv = rate_mvs[0] + rate_mvs[1]; + mbmi->mv[0].as_int = tmp_mv[0].as_int; + mbmi->mv[1].as_int = tmp_mv[1].as_int; + vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, NULL, NULL); + rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_mv + rate_sum, dist_sum); + if (rd < best_rd_wedge) { + best_rd_wedge = rd; + } else { + mbmi->mv[0].as_int = cur_mv[0].as_int; + mbmi->mv[1].as_int = cur_mv[1].as_int; + tmp_rate_mv = rate_mv_tmp; + } + if (best_rd_wedge < best_rd_nowedge) { + mbmi->use_wedge_interinter = 1; + mbmi->wedge_index = best_wedge_index; + xd->mi[0].src_mi->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; + xd->mi[0].src_mi->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int; + rate_mv_tmp = tmp_rate_mv; + } else { + mbmi->use_wedge_interinter = 0; + mbmi->mv[0].as_int = cur_mv[0].as_int; + mbmi->mv[1].as_int = cur_mv[1].as_int; + } + } else { + for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) { + mbmi->wedge_index = wedge_index; + vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, NULL, NULL); + rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv_tmp + rate_sum, dist_sum); + if (rd < best_rd_wedge) { + best_wedge_index = wedge_index; + best_rd_wedge = rd; + } + } + if (best_rd_wedge < best_rd_nowedge) { + mbmi->use_wedge_interinter = 1; + mbmi->wedge_index = best_wedge_index; + } else { + mbmi->use_wedge_interinter = 0; + } + } + + if (ref_best_rd < INT64_MAX && + MIN(best_rd_wedge, best_rd_nowedge) / 2 > ref_best_rd) + return INT64_MAX; + + pred_exists = 0; + if (mbmi->use_wedge_interinter) + *compmode_wedge_cost = get_wedge_bits(bsize) * 256 + + vp9_cost_bit(cm->fc.wedge_interinter_prob[bsize], 1); + else + *compmode_wedge_cost = + vp9_cost_bit(cm->fc.wedge_interinter_prob[bsize], 0); + } +#endif // CONFIG_WEDGE_PARTITION + #if CONFIG_INTERINTRA if ((!is_comp_pred) && is_comp_interintra_pred && is_interintra_allowed(mbmi->sb_type)) { @@ -3096,7 +3363,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } #endif // CONFIG_INTERINTRA -#if CONFIG_INTERINTRA +#if CONFIG_INTERINTRA || CONFIG_WEDGE_PARTITION *rate2 += rate_mv_tmp; #endif @@ -3588,6 +3855,9 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int compmode_cost = 0; #if CONFIG_INTERINTRA int compmode_interintra_cost = 0; +#endif +#if CONFIG_WEDGE_PARTITION + int compmode_wedge_cost = 0; #endif int rate2 = 0, rate_y = 0, rate_uv = 0; int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; @@ -3783,6 +4053,9 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interintra_mode = (PREDICTION_MODE)(DC_PRED - 1); mbmi->interintra_uv_mode = (PREDICTION_MODE)(DC_PRED - 1); #endif +#if CONFIG_WEDGE_PARTITION + mbmi->use_wedge_interinter = 0; +#endif if (ref_frame == INTRA_FRAME) { TX_SIZE uv_tx; @@ -3921,6 +4194,9 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_INTERINTRA &compmode_interintra_cost, single_newmv_rate, +#endif +#if CONFIG_WEDGE_PARTITION + &compmode_wedge_cost, #endif &total_sse, best_rd); if (this_rd == INT64_MAX) @@ -3935,6 +4211,11 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_INTERINTRA rate2 += compmode_interintra_cost; #endif // CONFIG_INTERINTRA +#if CONFIG_WEDGE_PARTITION + if ((cm->reference_mode == REFERENCE_MODE_SELECT || + cm->reference_mode == COMPOUND_REFERENCE) && comp_pred) + rate2 += compmode_wedge_cost; +#endif // Estimate the reference frame signaling cost and add it // to the rolling cost variable. @@ -4259,7 +4540,10 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_INTERINTRA if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE; -#endif +#endif // CONFIG_INTERINTRA +#if CONFIG_WEDGE_PARTITION + mbmi->use_wedge_interinter = 0; +#endif // CONFIG_WEDGE_PARTITION mbmi->sb_type = bsize; mbmi->inter_ref_count = inter_ref_count; mbmi->copy_mode = copy_mode; @@ -4576,6 +4860,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, b_mode_info best_bmodes[4]; int best_skip2 = 0; int ref_frame_skip_mask[2] = { 0 }; + #if CONFIG_EXT_TX mbmi->ext_txfrm = NORM; #endif @@ -4587,6 +4872,9 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_COPY_MODE mbmi->copy_mode = NOREF; #endif +#if CONFIG_WEDGE_PARTITION + mbmi->use_wedge_interinter = 0; +#endif x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4); diff --git a/vp9/encoder/vp9_sad.c b/vp9/encoder/vp9_sad.c index 73134f2f2..e7eda9781 100644 --- a/vp9/encoder/vp9_sad.c +++ b/vp9/encoder/vp9_sad.c @@ -274,3 +274,47 @@ highbd_sadMxNxK(4, 4, 8) highbd_sadMxNx4D(4, 4) #endif // CONFIG_VP9_HIGHBITDEPTH + +#if CONFIG_WEDGE_PARTITION +// TODO(debargha): Need highbd versions of these +static INLINE unsigned int masked_sad(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + const uint8_t *m, int m_stride, + int width, int height) { + int y, x; + unsigned int sad = 0; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + sad += m[x] * abs(a[x] - b[x]); + + a += a_stride; + b += b_stride; + m += m_stride; + } + sad = (sad + 31) >> 6; + + return sad; +} + +#define MASKSADMxN(m, n) \ +unsigned int vp9_masked_sad##m##x##n##_c(const uint8_t *src, int src_stride, \ + const uint8_t *ref, int ref_stride, \ + const uint8_t *msk, int msk_stride) { \ + return masked_sad(src, src_stride, ref, ref_stride, msk, msk_stride, m, n); \ +} + +MASKSADMxN(64, 64) +MASKSADMxN(64, 32) +MASKSADMxN(32, 64) +MASKSADMxN(32, 32) +MASKSADMxN(32, 16) +MASKSADMxN(16, 32) +MASKSADMxN(16, 16) +MASKSADMxN(16, 8) +MASKSADMxN(8, 16) +MASKSADMxN(8, 8) +MASKSADMxN(8, 4) +MASKSADMxN(4, 8) +MASKSADMxN(4, 4) +#endif // CONFIG_WEDGE_PARTITION diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index 4555bde1e..7225e92c8 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -649,3 +649,98 @@ void vp9_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8, } } #endif // CONFIG_VP9_HIGHBITDEPTH + +#if CONFIG_WEDGE_PARTITION +// TODO(debargha): Need highbd versions of these +void masked_variance(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + const uint8_t *m, int m_stride, + int w, int h, unsigned int *sse, int *sum) { + int i, j; + + *sum = 0; + *sse = 0; + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + const int diff = (a[j] - b[j]) * (m[j]); + *sum += diff; + *sse += diff * diff; + } + + a += a_stride; + b += b_stride; + m += m_stride; + } + *sum = (*sum >= 0) ? ((*sum + 31) >> 6) : -((-*sum + 31) >> 6); + *sse = (*sse + 2047) >> 12; +} + +#define MASK_VAR(W, H) \ +unsigned int vp9_masked_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ + const uint8_t *b, int b_stride, \ + const uint8_t *m, int m_stride, \ + unsigned int *sse) { \ + int sum; \ + masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, &sum); \ + return *sse - (((int64_t)sum * sum) / (W * H)); \ +} + +#define MASK_SUBPIX_VAR(W, H) \ +unsigned int vp9_masked_sub_pixel_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, \ + int xoffset, int yoffset, \ + const uint8_t *dst, int dst_stride, \ + const uint8_t *msk, int msk_stride, \ + unsigned int *sse) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint8_t temp2[H * W]; \ +\ + var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \ + BILINEAR_FILTERS_2TAP(xoffset)); \ + var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + BILINEAR_FILTERS_2TAP(yoffset)); \ +\ + return vp9_masked_variance##W##x##H##_c(temp2, W, dst, dst_stride, \ + msk, msk_stride, sse); \ +} + +MASK_VAR(4, 4) +MASK_SUBPIX_VAR(4, 4) + +MASK_VAR(4, 8) +MASK_SUBPIX_VAR(4, 8) + +MASK_VAR(8, 4) +MASK_SUBPIX_VAR(8, 4) + +MASK_VAR(8, 8) +MASK_SUBPIX_VAR(8, 8) + +MASK_VAR(8, 16) +MASK_SUBPIX_VAR(8, 16) + +MASK_VAR(16, 8) +MASK_SUBPIX_VAR(16, 8) + +MASK_VAR(16, 16) +MASK_SUBPIX_VAR(16, 16) + +MASK_VAR(16, 32) +MASK_SUBPIX_VAR(16, 32) + +MASK_VAR(32, 16) +MASK_SUBPIX_VAR(32, 16) + +MASK_VAR(32, 32) +MASK_SUBPIX_VAR(32, 32) + +MASK_VAR(32, 64) +MASK_SUBPIX_VAR(32, 64) + +MASK_VAR(64, 32) +MASK_SUBPIX_VAR(64, 32) + +MASK_VAR(64, 64) +MASK_SUBPIX_VAR(64, 64) +#endif // CONFIG_WEDGE_PARTITION diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index 53148f23c..668bb4141 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -84,6 +84,31 @@ typedef unsigned int (*vp9_subp_avg_variance_fn_t)(const uint8_t *src_ptr, unsigned int *sse, const uint8_t *second_pred); +#if CONFIG_WEDGE_PARTITION +typedef unsigned int(*vp9_masked_sad_fn_t)(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int ref_stride, + const uint8_t *msk_ptr, + int msk_stride); +typedef unsigned int (*vp9_masked_variance_fn_t)(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int ref_stride, + const uint8_t *msk_ptr, + int msk_stride, + unsigned int *sse); +typedef unsigned int (*vp9_masked_subpixvariance_fn_t)(const uint8_t *src_ptr, + int source_stride, + int xoffset, + int yoffset, + const uint8_t *ref_ptr, + int Refstride, + const uint8_t *msk_ptr, + int msk_stride, + unsigned int *sse); +#endif // CONFIG_WEDGE_PARTITION + typedef struct vp9_variance_vtable { vp9_sad_fn_t sdf; vp9_sad_avg_fn_t sdaf; @@ -93,6 +118,11 @@ typedef struct vp9_variance_vtable { vp9_sad_multi_fn_t sdx3f; vp9_sad_multi_fn_t sdx8f; vp9_sad_multi_d_fn_t sdx4df; +#if CONFIG_WEDGE_PARTITION + vp9_masked_sad_fn_t msdf; + vp9_masked_variance_fn_t mvf; + vp9_masked_subpixvariance_fn_t msvf; +#endif // CONFIG_WEDGE_PARTITION } vp9_variance_fn_ptr_t; void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,