From a4dfcd9a2de002c77007bbb1c3a1c8a6f4e3f5c3 Mon Sep 17 00:00:00 2001 From: Yue Chen Date: Mon, 11 Aug 2014 16:39:23 -0700 Subject: [PATCH] Implementing transform overlapping multiple blocks We removed the restriction that transform blocks could not exceed the size of prediction blocks. Smoothing masks are applied to reduce discontinuity between prediction blocks in order to realize the efficiency of large transform. 0.997%/0.895% bit-rate reduction is achieved on derf/stdhd set. Change-Id: I8db241bab9fe74d864809e95f76b771ee59a2def --- configure | 1 + vp9/common/vp9_blockd.h | 15 + vp9/common/vp9_common_data.c | 9 + vp9/common/vp9_common_data.h | 3 + vp9/common/vp9_entropymode.c | 35 +- vp9/common/vp9_entropymode.h | 9 + vp9/common/vp9_loopfilter.c | 200 +++++ vp9/common/vp9_reconinter.c | 535 +++++++++++- vp9/common/vp9_reconinter.h | 47 ++ vp9/decoder/vp9_decodeframe.c | 460 +++++++++- vp9/decoder/vp9_decodemv.c | 40 +- vp9/decoder/vp9_decodemv.h | 8 + vp9/encoder/vp9_bitstream.c | 145 +++- vp9/encoder/vp9_encodeframe.c | 1485 ++++++++++++++++++++++++++++++++- vp9/encoder/vp9_encodemb.c | 20 + vp9/encoder/vp9_encodemb.h | 3 + vp9/encoder/vp9_rdopt.c | 83 ++ vp9/encoder/vp9_rdopt.h | 21 + vp9/encoder/vp9_tokenize.c | 38 + vp9/encoder/vp9_tokenize.h | 4 + 20 files changed, 3098 insertions(+), 63 deletions(-) diff --git a/configure b/configure index fe370a5c0..b9ce9aab3 100755 --- a/configure +++ b/configure @@ -278,6 +278,7 @@ EXPERIMENT_LIST=" masked_interintra filterintra ext_tx + supertx " CONFIG_LIST=" external_build diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 3636ac5c8..baa46f37b 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -334,6 +334,13 @@ typedef struct macroblockd { PARTITION_CONTEXT left_seg_context[8]; } MACROBLOCKD; +#if CONFIG_SUPERTX +static INLINE int supertx_enabled(const MB_MODE_INFO *mbmi) { + return mbmi->tx_size > + MIN(b_width_log2(mbmi->sb_type), b_height_log2(mbmi->sb_type)); +} +#endif + static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize, PARTITION_TYPE partition) { const BLOCK_SIZE subsize = subsize_lookup[partition][bsize]; @@ -399,7 +406,15 @@ static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize) { } static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) { +#if CONFIG_SUPERTX + if (!supertx_enabled(mbmi)) { +#endif return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type); +#if CONFIG_SUPERTX + } else { + return uvsupertx_size_lookup[mbmi->tx_size]; + } +#endif } static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize, diff --git a/vp9/common/vp9_common_data.c b/vp9/common/vp9_common_data.c index a927823e0..8159dea1d 100644 --- a/vp9/common/vp9_common_data.c +++ b/vp9/common/vp9_common_data.c @@ -133,6 +133,15 @@ const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = { {{BLOCK_64X64, BLOCK_64X32}, {BLOCK_32X64, BLOCK_32X32}}, }; +#if CONFIG_SUPERTX +const TX_SIZE uvsupertx_size_lookup[TX_SIZES] = { + TX_4X4, + TX_4X4, + TX_8X8, + TX_16X16 +}; +#endif + // Generates 4 bit field in which each bit set to 1 represents // a blocksize partition 1111 means we split 64x64, 32x32, 16x16 // and 8x8. 1000 means we just split the 64x64 to 32x32 diff --git a/vp9/common/vp9_common_data.h b/vp9/common/vp9_common_data.h index f41962747..6a163f6fb 100644 --- a/vp9/common/vp9_common_data.h +++ b/vp9/common/vp9_common_data.h @@ -31,6 +31,9 @@ extern const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES]; extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES]; extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES]; extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2]; +#if CONFIG_SUPERTX +extern const TX_SIZE uvsupertx_size_lookup[TX_SIZES]; +#endif #ifdef __cplusplus } // extern "C" diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index b52be94ed..532a96e48 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -31,7 +31,7 @@ static const vp9_prob default_masked_interintra_prob[BLOCK_SIZES] = { #endif #if CONFIG_FILTERINTRA -const vp9_prob default_filterintra_prob[TX_SIZES][INTRA_MODES] = { +static const vp9_prob default_filterintra_prob[TX_SIZES][INTRA_MODES] = { // DC V H D45 D135 D117 D153 D207 D63 TM {153, 171, 147, 150, 129, 101, 100, 153, 132, 111}, {171, 173, 185, 131, 70, 53, 70, 148, 127, 114}, @@ -41,7 +41,17 @@ const vp9_prob default_filterintra_prob[TX_SIZES][INTRA_MODES] = { #endif #if CONFIG_EXT_TX -const vp9_prob default_ext_tx_prob = 178; // 0.6 = 153, 0.7 = 178, 0.8 = 204 +static const vp9_prob default_ext_tx_prob = 178; +#endif + +#if CONFIG_SUPERTX +static const vp9_prob default_supertx_prob[TX_SIZES] = { + 255, 160, 160, 160 +}; + +static const vp9_prob default_supertxsplit_prob[TX_SIZES] = { + 255, 200, 200, 200 +}; #endif const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] = { @@ -372,6 +382,10 @@ void vp9_init_mode_probs(FRAME_CONTEXT *fc) { #if CONFIG_EXT_TX fc->ext_tx_prob = default_ext_tx_prob; #endif +#if CONFIG_SUPERTX + vp9_copy(fc->supertx_prob, default_supertx_prob); + vp9_copy(fc->supertxsplit_prob, default_supertxsplit_prob); +#endif } const vp9_tree_index vp9_switchable_interp_tree @@ -504,6 +518,23 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { #if CONFIG_EXT_TX fc->ext_tx_prob = adapt_prob(pre_fc->ext_tx_prob, counts->ext_tx); #endif + +#if CONFIG_SUPERTX + for (i = 1; i < TX_SIZES; ++i) { + fc->supertx_prob[i] = adapt_prob(pre_fc->supertx_prob[i], + counts->supertx[i]); +/* fprintf(stderr, "%d(%d %d) ", fc->supertx_prob[i], + counts->supertx[i][0], counts->supertx[i][1]);*/ + } + + for (i = 1; i < TX_SIZES; ++i) { + fc->supertxsplit_prob[i] = adapt_prob(pre_fc->supertxsplit_prob[i], + counts->supertxsplit[i]); +/* fprintf(stderr, "%d(%d %d) ", fc->supertxsplit_prob[i], + counts->supertxsplit[i][0], counts->supertxsplit[i][1]);*/ + } +/* fprintf(stderr, "\n");*/ +#endif } static void set_default_lf_deltas(struct loopfilter *lf) { diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h index b8b0d4b84..7d5209e5a 100644 --- a/vp9/common/vp9_entropymode.h +++ b/vp9/common/vp9_entropymode.h @@ -67,6 +67,10 @@ typedef struct frame_contexts { #if CONFIG_EXT_TX vp9_prob ext_tx_prob; #endif +#if CONFIG_SUPERTX + vp9_prob supertx_prob[TX_SIZES]; + vp9_prob supertxsplit_prob[TX_SIZES]; +#endif } FRAME_CONTEXT; typedef struct { @@ -101,6 +105,11 @@ typedef struct { #if CONFIG_EXT_TX unsigned int ext_tx[2]; #endif +#if CONFIG_SUPERTX + unsigned int supertx[TX_SIZES][2]; + unsigned int supertxsplit[TX_SIZES][2]; + unsigned int supertx_size[BLOCK_SIZES]; +#endif } FRAME_COUNTS; extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index efd0249f4..e88b759a3 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -206,6 +206,13 @@ static const int mode_lf_lut[MB_MODE_COUNT] = { 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0) }; +#if CONFIG_SUPERTX +static int supertx_enabled_lpf(const MB_MODE_INFO *mbmi) { + return mbmi->tx_size > + MIN(b_width_log2(mbmi->sb_type), b_height_log2(mbmi->sb_type)); +} +#endif + static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { int lvl; @@ -572,6 +579,85 @@ static void build_masks(const loop_filter_info_n *const lfi_n, *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; } +#if CONFIG_SUPERTX +static void build_masks_supertx(const loop_filter_info_n *const lfi_n, + const MODE_INFO *mi, const int shift_y, + const int shift_uv, + LOOP_FILTER_MASK *lfm) { + const MB_MODE_INFO *mbmi = &mi->mbmi; + const TX_SIZE tx_size_y = mbmi->tx_size; + const TX_SIZE tx_size_uv = get_uv_tx_size(mbmi); + const BLOCK_SIZE block_size = 3 * (int)tx_size_y; + const int filter_level = get_filter_level(lfi_n, mbmi); + uint64_t *const left_y = &lfm->left_y[tx_size_y]; + uint64_t *const above_y = &lfm->above_y[tx_size_y]; + uint64_t *const int_4x4_y = &lfm->int_4x4_y; + uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; + uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; + uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; + int i; + + // If filter level is 0 we don't loop filter. + if (!filter_level) { + return; + } else { + const int w = num_8x8_blocks_wide_lookup[block_size]; + const int h = num_8x8_blocks_high_lookup[block_size]; + int index = shift_y; + for (i = 0; i < h; i++) { + vpx_memset(&lfm->lfl_y[index], filter_level, w); + index += 8; + } + } + + // These set 1 in the current block size for the block size edges. + // For instance if the block size is 32x16, we'll set : + // above = 1111 + // 0000 + // and + // left = 1000 + // = 1000 + // NOTE : In this example the low bit is left most ( 1000 ) is stored as + // 1, not 8... + // + // U and v set things on a 16 bit scale. + // + *above_y |= above_prediction_mask[block_size] << shift_y; + *above_uv |= above_prediction_mask_uv[block_size] << shift_uv; + *left_y |= left_prediction_mask[block_size] << shift_y; + *left_uv |= left_prediction_mask_uv[block_size] << shift_uv; + + // If the block has no coefficients and is not intra we skip applying + // the loop filter on block edges. + if (mbmi->skip && is_inter_block(mbmi)) + return; + + // Here we are adding a mask for the transform size. The transform + // size mask is set to be correct for a 64x64 prediction block size. We + // mask to match the size of the block we are working on and then shift it + // into place.. + *above_y |= (size_mask[block_size] & + above_64x64_txform_mask[tx_size_y]) << shift_y; + *above_uv |= (size_mask_uv[block_size] & + above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; + + *left_y |= (size_mask[block_size] & + left_64x64_txform_mask[tx_size_y]) << shift_y; + *left_uv |= (size_mask_uv[block_size] & + left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; + + // Here we are trying to determine what to do with the internal 4x4 block + // boundaries. These differ from the 4x4 boundaries on the outside edge of + // an 8x8 in that the internal ones can be skipped and don't depend on + // the prediction block size. + if (tx_size_y == TX_4X4) + *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y; + + if (tx_size_uv == TX_4X4) + *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; +} +#endif + // This function does the same thing as the one above with the exception that // it only affects the y masks. It exists because for blocks < 16x16 in size, // we only update u and v masks on the first block. @@ -615,6 +701,48 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y; } +#if CONFIG_SUPERTX +static void build_y_mask_supertx(const loop_filter_info_n *const lfi_n, + const MODE_INFO *mi, const int shift_y, + LOOP_FILTER_MASK *lfm) { + const MB_MODE_INFO *mbmi = &mi->mbmi; + const TX_SIZE tx_size_y = mbmi->tx_size; + const BLOCK_SIZE block_size = 3 * (int)tx_size_y; + const int filter_level = get_filter_level(lfi_n, mbmi); + uint64_t *const left_y = &lfm->left_y[tx_size_y]; + uint64_t *const above_y = &lfm->above_y[tx_size_y]; + uint64_t *const int_4x4_y = &lfm->int_4x4_y; + int i; + + if (!filter_level) { + return; + } else { + const int w = num_8x8_blocks_wide_lookup[block_size]; + const int h = num_8x8_blocks_high_lookup[block_size]; + int index = shift_y; + for (i = 0; i < h; i++) { + vpx_memset(&lfm->lfl_y[index], filter_level, w); + index += 8; + } + } + + *above_y |= above_prediction_mask[block_size] << shift_y; + *left_y |= left_prediction_mask[block_size] << shift_y; + + if (mbmi->skip && is_inter_block(mbmi)) + return; + + *above_y |= (size_mask[block_size] & + above_64x64_txform_mask[tx_size_y]) << shift_y; + + *left_y |= (size_mask[block_size] & + left_64x64_txform_mask[tx_size_y]) << shift_y; + + if (tx_size_y == TX_4X4) + *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y; +} +#endif + // This function sets up the bit masks for the entire 64x64 region represented // by mi_row, mi_col. // TODO(JBB): This function only works for yv12. @@ -650,6 +778,9 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, cm->mi_rows - mi_row : MI_BLOCK_SIZE); const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? cm->mi_cols - mi_col : MI_BLOCK_SIZE); +#if CONFIG_SUPERTX + int supertx; +#endif vp9_zero(*lfm); @@ -687,20 +818,43 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); break; case BLOCK_32X16: +#if CONFIG_SUPERTX + supertx = supertx_enabled_lpf(&mip[0]->mbmi); + if (!supertx) { +#endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_32_row_offset + 2 >= max_rows) continue; mip2 = mip + mode_info_stride * 2; build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm); +#if CONFIG_SUPERTX + } else { + build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm); + } +#endif break; case BLOCK_16X32: +#if CONFIG_SUPERTX + supertx = supertx_enabled_lpf(&mip[0]->mbmi); + if (!supertx) { +#endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_32_col_offset + 2 >= max_cols) continue; mip2 = mip + 2; build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm); +#if CONFIG_SUPERTX + } else { + build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm); + } +#endif break; default: +#if CONFIG_SUPERTX + if (mip[0]->mbmi.tx_size == TX_32X32) { + build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm); + } else { +#endif for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16]; const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16]; @@ -717,24 +871,56 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); break; case BLOCK_16X8: +#if CONFIG_SUPERTX + supertx = supertx_enabled_lpf(&mip[0]->mbmi); + if (!supertx) { +#endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_16_row_offset + 1 >= max_rows) continue; mip2 = mip + mode_info_stride; build_y_mask(lfi_n, mip2[0], shift_y+8, lfm); +#if CONFIG_SUPERTX + } else { + build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm); + } +#endif break; case BLOCK_8X16: +#if CONFIG_SUPERTX + supertx = supertx_enabled_lpf(&mip[0]->mbmi); + if (!supertx) { +#endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_16_col_offset +1 >= max_cols) continue; mip2 = mip + 1; build_y_mask(lfi_n, mip2[0], shift_y+1, lfm); +#if CONFIG_SUPERTX + } else { + build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm); + } +#endif break; default: { +#if CONFIG_SUPERTX + if (mip[0]->mbmi.tx_size == TX_16X16) { + build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm); + } else { +#endif const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16] + shift_8_y[0]; +#if CONFIG_SUPERTX + supertx = supertx_enabled_lpf(&mip[0]->mbmi); + if (!supertx) { +#endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); +#if CONFIG_SUPERTX + } else { + build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm); + } +#endif mip += offset[0]; for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { const int shift_y = shift_32_y[idx_32] + @@ -748,12 +934,26 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, if (mi_8_col_offset >= max_cols || mi_8_row_offset >= max_rows) continue; +#if CONFIG_SUPERTX + supertx = supertx_enabled_lpf(&mip[0]->mbmi); + if (!supertx) +#endif build_y_mask(lfi_n, mip[0], shift_y, lfm); +#if CONFIG_SUPERTX + else + build_y_mask_supertx(lfi_n, mip[0], shift_y, lfm); +#endif } +#if CONFIG_SUPERTX + } +#endif break; } } } +#if CONFIG_SUPERTX + } +#endif break; } } diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index e2b8768c3..ee973d2b6 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -164,7 +164,7 @@ static int get_masked_weight(int m) { } static int get_hard_mask(int m) { - return m > 0; + return 1 << MASK_WEIGHT_BITS * (m > 0); } // Equation of line: f(x, y) = a[0]*(x - a[2]*w/4) + a[1]*(y - a[3]*h/4) = 0 @@ -426,18 +426,62 @@ static void build_masked_compound(uint8_t *dst, int dst_stride, for (i = 0; i < h; ++i) for (j = 0; j < w; ++j) { int m = mask[i * 64 + j]; - dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + - dst2[i * dst2_stride + j] * - ((1 << MASK_WEIGHT_BITS) - m) + - (1 << (MASK_WEIGHT_BITS - 1))) >> + dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + + dst2[i * dst2_stride + j] * + ((1 << MASK_WEIGHT_BITS) - m) + + (1 << (MASK_WEIGHT_BITS - 1))) >> MASK_WEIGHT_BITS; } } + +#if CONFIG_SUPERTX +void generate_masked_weight_extend(int mask_index, int plane, + BLOCK_SIZE sb_type, int h, int w, + int mask_offset_x, int mask_offset_y, + uint8_t *mask, int stride) { + int i, j; + int subh = (plane ? 2 : 4) << b_height_log2(sb_type); + int subw = (plane ? 2 : 4) << b_width_log2(sb_type); + const int *a = get_mask_params(mask_index, sb_type, subh, subw); + if (!a) return; + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int x = (j - (a[2] * subw) / 4 - mask_offset_x); + int y = (i - (a[3] * subh) / 4 - mask_offset_y); + int m = a[0] * x + a[1] * y; + mask[i * stride + j] = get_masked_weight(m); + } +} + +static void build_masked_compound_extend(uint8_t *dst, int dst_stride, + uint8_t *dst2, int dst2_stride, + int plane, + int mask_index, BLOCK_SIZE sb_type, + int mask_offset_x, int mask_offset_y, + int h, int w) { + int i, j; + uint8_t mask[4096]; + generate_masked_weight_extend(mask_index, plane, sb_type, h, w, + mask_offset_x, mask_offset_y, mask, 64); + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int m = mask[i * 64 + j]; + dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + + dst2[i * dst2_stride + j] * + ((1 << MASK_WEIGHT_BITS) - m) + + (1 << (MASK_WEIGHT_BITS - 1))) >> + MASK_WEIGHT_BITS; + } +} +#endif #endif static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, int bw, int bh, int x, int y, int w, int h, +#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER + int mask_offset_x, int mask_offset_y, +#endif int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; const MODE_INFO *mi = xd->mi[0]; @@ -495,8 +539,14 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, uint8_t tmp_dst[4096]; inter_predictor(pre, pre_buf->stride, tmp_dst, 64, subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys); +#if !CONFIG_SUPERTX build_masked_compound(dst, dst_buf->stride, tmp_dst, 64, mi->mbmi.mask_index, mi->mbmi.sb_type, h, w); +#else + build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, 64, plane, + mi->mbmi.mask_index, mi->mbmi.sb_type, + mask_offset_x, mask_offset_y, h, w); +#endif } else { #endif inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, @@ -527,10 +577,18 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) build_inter_predictors(xd, plane, i++, bw, bh, - 4 * x, 4 * y, 4, 4, mi_x, mi_y); + 4 * x, 4 * y, 4, 4, +#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER + 0, 0, +#endif + mi_x, mi_y); } else { build_inter_predictors(xd, plane, 0, bw, bh, - 0, 0, bw, bh, mi_x, mi_y); + 0, 0, bw, bh, +#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER + 0, 0, +#endif + mi_x, mi_y); } } } @@ -558,6 +616,7 @@ void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, xd->plane[2].dst.stride, bsize); #endif } + void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize) { build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, @@ -573,11 +632,287 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, #endif } +#if CONFIG_SUPERTX +static int get_masked_weight_supertx(int m) { + #define SMOOTHER_LEN 32 + static const uint8_t smoothfn[2 * SMOOTHER_LEN + 1] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 1, + 1, 1, 2, 2, 3, 4, 5, 6, + 8, 9, 12, 14, 17, 21, 24, 28, + 32, + 36, 40, 43, 47, 50, 52, 55, 56, + 58, 59, 60, 61, 62, 62, 63, 63, + 63, 63, 63, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + }; + if (m < -SMOOTHER_LEN) + return 0; + else if (m > SMOOTHER_LEN) + return 64; + else + return smoothfn[m + SMOOTHER_LEN]; +} + +static const uint8_t mask_8[8] = { + 64, 64, 62, 52, 12, 2, 0, 0 +}; + +static const uint8_t mask_16[16] = { + 63, 62, 60, 58, 55, 50, 43, 36, 28, 21, 14, 9, 6, 4, 2, 1 +}; + +static const uint8_t mask_32[32] = { + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 61, 57, 52, 45, 36, + 28, 19, 12, 7, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static void generate_1dmask(int length, uint8_t *mask) { + int i; + switch (length) { + case 8: + vpx_memcpy(mask, mask_8, length); + break; + case 16: + vpx_memcpy(mask, mask_16, length); + break; + case 32: + vpx_memcpy(mask, mask_32, length); + break; + default: + assert(0); + } + if (length > 16) { + for (i = 0; i < length; ++i) + mask[i] = get_masked_weight_supertx(-1 * (2 * i - length + 1)); + } +} + +void vp9_build_masked_inter_predictor_complex(uint8_t *dst, int dst_stride, + uint8_t *dst2, int dst2_stride, + int plane, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize, + BLOCK_SIZE top_bsize, + PARTITION_TYPE partition) { + int i, j; + uint8_t mask[32]; + int top_w = 4 << b_width_log2(top_bsize), + top_h = 4 << b_height_log2(top_bsize); + int w = 4 << b_width_log2(bsize), h = 4 << b_height_log2(bsize); + int w_offset = (mi_col - mi_col_ori) << 3, + h_offset = (mi_row - mi_row_ori) << 3; + int m; + + if (plane > 0) { + top_w = top_w >> 1; top_h = top_h >> 1; + w = w >> 1; h = h >> 1; + w_offset = w_offset >> 1; h_offset = h_offset >> 1; + } + switch (partition) { + case PARTITION_HORZ: + generate_1dmask(h, mask + h_offset); + vpx_memset(mask, 64, h_offset); + vpx_memset(mask + h_offset + h, 0, top_h - h_offset - h); + break; + case PARTITION_VERT: + generate_1dmask(w, mask + w_offset); + vpx_memset(mask, 64, w_offset); + vpx_memset(mask + w_offset + w, 0, top_w - w_offset - w); + break; + default: + assert(0); + } + for (i = 0; i < top_h; ++i) + for (j = 0; j < top_w; ++j) { + m = partition == PARTITION_HORZ ? mask[i] : mask[j]; + if (m == 64) + continue; + if (m == 0) + dst[i * dst_stride + j] = dst2[i * dst2_stride + j]; + else + dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + + dst2[i * dst2_stride + j] * + (64 - m) + 32) >> 6; + } +} + +#if CONFIG_MASKED_INTERINTER +void vp9_build_inter_predictors_sb_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize) { + int plane; + const int mi_x = mi_col_ori * MI_SIZE; + const int mi_y = mi_row_ori * MI_SIZE; + const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) { + int i = 0, x, y; + assert(bsize == BLOCK_8X8); + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) + build_inter_predictors(xd, plane, i++, bw, bh, 4 * x, 4 * y, 4, 4, + mask_offset_x, mask_offset_y, mi_x, mi_y); + } else { + build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh, + mask_offset_x, mask_offset_y, mi_x, mi_y); + } + } +} +#endif + +void vp9_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize, + PARTITION_TYPE partition) { + const int mi_x = mi_col_ori * MI_SIZE; + const int mi_y = mi_row_ori * MI_SIZE; +#if CONFIG_MASKED_INTERINTER + const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE; +#endif + uint8_t *orig_dst; + int orig_dst_stride; + int bw = 4 << b_width_log2(top_bsize); + int bh = 4 << b_height_log2(top_bsize); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, 32 * 32); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, 32 * 32); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, 32 * 32); + + orig_dst = xd->plane[0].dst.buf; + orig_dst_stride = xd->plane[0].dst.stride; + build_inter_predictors(xd, 0, 0, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + + xd->plane[0].dst.buf = tmp_buf; + xd->plane[0].dst.stride = 32; + switch (partition) { + case PARTITION_HORZ: + build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + break; + case PARTITION_VERT: + build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + break; + case PARTITION_SPLIT: + build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + xd->plane[0].dst.buf = tmp_buf1; + xd->plane[0].dst.stride = 32; + build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + xd->plane[0].dst.buf = tmp_buf2; + xd->plane[0].dst.stride = 32; + build_inter_predictors(xd, 0, 3, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + break; + default: + assert(0); + } + + if (partition != PARTITION_SPLIT) { + vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, + tmp_buf, 32, + 0, mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + partition); + xd->plane[0].dst.buf = orig_dst; + xd->plane[0].dst.stride = orig_dst_stride; + } else { + vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, + tmp_buf, 32, + 0, mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(tmp_buf1, 32, + tmp_buf2, 32, + 0, mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, + tmp_buf1, 32, + 0, mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_HORZ); + xd->plane[0].dst.buf = orig_dst; + xd->plane[0].dst.stride = orig_dst_stride; + } +} + +void vp9_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, +#if CONFIG_MASKED_INTERINTER + int mi_row, int mi_col, +#endif + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize) { + int plane; + const int mi_x = mi_col_ori * MI_SIZE; + const int mi_y = mi_row_ori * MI_SIZE; +#if CONFIG_MASKED_INTERINTER + const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE; +#endif + for (plane = 1; plane < MAX_MB_PLANE; ++plane) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(top_bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + } +} +#endif + // TODO(jingning): This function serves as a placeholder for decoder prediction // using on demand border extension. It should be moved to /decoder/ directory. static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, int bw, int bh, int x, int y, int w, int h, +#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER + int mask_offset_x, int mask_offset_y, +#endif int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; const MODE_INFO *mi = xd->mi[0]; @@ -715,8 +1050,14 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, uint8_t tmp_dst[4096]; inter_predictor(buf_ptr, buf_stride, tmp_dst, 64, subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys); +#if !CONFIG_SUPERTX build_masked_compound(dst, dst_buf->stride, tmp_dst, 64, mi->mbmi.mask_index, mi->mbmi.sb_type, h, w); +#else + build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, 64, plane, + mi->mbmi.mask_index, mi->mbmi.sb_type, + mask_offset_x, mask_offset_y, h, w); +#endif } else { #endif inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, @@ -746,10 +1087,18 @@ void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) dec_build_inter_predictors(xd, plane, i++, bw, bh, - 4 * x, 4 * y, 4, 4, mi_x, mi_y); + 4 * x, 4 * y, 4, 4, +#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER + 0, 0, +#endif + mi_x, mi_y); } else { dec_build_inter_predictors(xd, plane, 0, bw, bh, - 0, 0, bw, bh, mi_x, mi_y); + 0, 0, bw, bh, +#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER + 0, 0, +#endif + mi_x, mi_y); } } #if CONFIG_INTERINTRA @@ -763,6 +1112,174 @@ void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, #endif } +#if CONFIG_SUPERTX +#if CONFIG_MASKED_INTERINTER +void vp9_dec_build_inter_predictors_sb_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize) { + int plane; + const int mi_x = mi_col_ori * MI_SIZE; + const int mi_y = mi_row_ori * MI_SIZE; + const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) { + int i = 0, x, y; + assert(bsize == BLOCK_8X8); + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) + dec_build_inter_predictors(xd, plane, i++, bw, bh, 4 * x, 4 * y, 4, 4, + mask_offset_x, mask_offset_y, mi_x, mi_y); + } else { + dec_build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh, + mask_offset_x, mask_offset_y, mi_x, mi_y); + } + } +} +#endif + +void vp9_dec_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize, + PARTITION_TYPE partition) { + const int mi_x = mi_col_ori * MI_SIZE; + const int mi_y = mi_row_ori * MI_SIZE; +#if CONFIG_MASKED_INTERINTER + const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE; +#endif + uint8_t *orig_dst; + int orig_dst_stride; + int bw = 4 << b_width_log2(top_bsize); + int bh = 4 << b_height_log2(top_bsize); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, 32 * 32); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, 32 * 32); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, 32 * 32); + + orig_dst = xd->plane[0].dst.buf; + orig_dst_stride = xd->plane[0].dst.stride; + dec_build_inter_predictors(xd, 0, 0, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + + xd->plane[0].dst.buf = tmp_buf; + xd->plane[0].dst.stride = 32; + switch (partition) { + case PARTITION_HORZ: + dec_build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + break; + case PARTITION_VERT: + dec_build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + break; + case PARTITION_SPLIT: + dec_build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + xd->plane[0].dst.buf = tmp_buf1; + xd->plane[0].dst.stride = 32; + dec_build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + xd->plane[0].dst.buf = tmp_buf2; + xd->plane[0].dst.stride = 32; + dec_build_inter_predictors(xd, 0, 3, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + break; + default: + assert(0); + } + + if (partition != PARTITION_SPLIT) { + vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, + tmp_buf, 32, + 0, mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + partition); + xd->plane[0].dst.buf = orig_dst; + xd->plane[0].dst.stride = orig_dst_stride; + } else { + vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, + tmp_buf, 32, + 0, mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(tmp_buf1, 32, + tmp_buf2, 32, + 0, mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, + tmp_buf1, 32, + 0, mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_HORZ); + xd->plane[0].dst.buf = orig_dst; + xd->plane[0].dst.stride = orig_dst_stride; + } +} + +void vp9_dec_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, +#if CONFIG_MASKED_INTERINTER + int mi_row, int mi_col, +#endif + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize) { + int plane; + const int mi_x = mi_col_ori * MI_SIZE; + const int mi_y = mi_row_ori * MI_SIZE; +#if CONFIG_MASKED_INTERINTER + const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE; +#endif + for (plane = 1; plane < MAX_MB_PLANE; ++plane) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(top_bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + dec_build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + } +} +#endif + void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col) { diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 34733c7ae..00edacaa9 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -72,6 +72,53 @@ void vp9_generate_hard_mask(int mask_index, BLOCK_SIZE sb_type, int h, int w, uint8_t *mask, int stride); #endif +#if CONFIG_SUPERTX +void vp9_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize, + PARTITION_TYPE partition); +void vp9_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, +#if CONFIG_MASKED_INTERINTER + int mi_row, int mi_col, +#endif + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize); +void vp9_build_masked_inter_predictor_complex(uint8_t *dst, int dst_stride, + uint8_t *dst2, int dst2_stride, + int plane, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize, + BLOCK_SIZE top_bsize, + PARTITION_TYPE partition); +void vp9_dec_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize, + PARTITION_TYPE p); +void vp9_dec_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, +#if CONFIG_MASKED_INTERINTER + int mi_row, int mi_col, +#endif + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize); +#if CONFIG_MASKED_INTERINTER +void vp9_build_inter_predictors_sb_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize); +void vp9_dec_build_inter_predictors_sb_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize); +#endif +#endif + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index fb14b6822..c9c9ce81f 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -335,6 +335,84 @@ static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, return &xd->mi[0]->mbmi; } +#if CONFIG_SUPERTX +static void set_offsets_extend(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + BLOCK_SIZE top_bsize, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori) { + const int bw = num_8x8_blocks_wide_lookup[top_bsize]; + const int bh = num_8x8_blocks_high_lookup[top_bsize]; + const int offset = mi_row * cm->mi_stride + mi_col; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + + set_mi_row_col(xd, tile, mi_row_ori, bh, mi_col_ori, bw, + cm->mi_rows, cm->mi_cols); +} + +static void set_mb_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int x_mis = MIN(bw, cm->mi_cols - mi_col); + const int y_mis = MIN(bh, cm->mi_rows - mi_row); + const int offset = mi_row * cm->mi_stride + mi_col; + int x, y; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + xd->mi[0]->mbmi.sb_type = bsize; + for (y = 0; y < y_mis; ++y) + for (x = !y; x < x_mis; ++x) + xd->mi[y * cm->mi_stride + x] = xd->mi[0]; + + set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); +} + +static void set_offsets_topblock(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int offset = mi_row * cm->mi_stride + mi_col; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + + set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); + + vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); +} + +static void set_param_topblock(VP9_COMMON *const cm, MACROBLOCKD *const xd, + BLOCK_SIZE bsize, int mi_row, int mi_col, +#if CONFIG_EXT_TX + int txfm, +#endif + int skip) { + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int x_mis = MIN(bw, cm->mi_cols - mi_col); + const int y_mis = MIN(bh, cm->mi_rows - mi_row); + const int offset = mi_row * cm->mi_stride + mi_col; + int x, y; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + + for (y = 0; y < y_mis; ++y) + for (x = 0; x < x_mis; ++x) { + xd->mi[y * cm->mi_stride + x]->mbmi.skip = skip; +#if CONFIG_EXT_TX + xd->mi[y * cm->mi_stride + x]->mbmi.ext_txfrm = txfm; +#endif + } +} +#endif + static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd, int idx, int mi_row, int mi_col) { MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; @@ -348,14 +426,246 @@ static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd, xd->corrupted |= ref_buffer->buf->corrupted; } +#if CONFIG_SUPERTX +static void dec_predict_b_extend(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE top_bsize) { + set_offsets_extend(cm, xd, tile, top_bsize, mi_row, mi_col, + mi_row_ori, mi_col_ori); + + set_ref(cm, xd, 0, mi_row_ori, mi_col_ori); + if (has_second_ref(&xd->mi[0]->mbmi)) + set_ref(cm, xd, 1, mi_row_ori, mi_col_ori); + xd->mi[0]->mbmi.tx_size = b_width_log2(top_bsize); +#if !CONFIG_MASKED_INTERINTER + vp9_dec_build_inter_predictors_sb(xd, mi_row_ori, mi_col_ori, top_bsize); +#else + vp9_dec_build_inter_predictors_sb_extend(xd, mi_row, mi_col, + mi_row_ori, mi_col_ori, top_bsize); +#endif +} + +static void dec_predict_b_sub8x8_extend(VP9_COMMON *const cm, + MACROBLOCKD *const xd, + const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE top_bsize, + PARTITION_TYPE partition) { + set_offsets_extend(cm, xd, tile, top_bsize, mi_row, mi_col, + mi_row_ori, mi_col_ori); + + set_ref(cm, xd, 0, mi_row_ori, mi_col_ori); + if (has_second_ref(&xd->mi[0]->mbmi)) + set_ref(cm, xd, 1, mi_row_ori, mi_col_ori); + xd->mi[0]->mbmi.tx_size = b_width_log2(top_bsize); + vp9_dec_build_inter_predictors_sby_sub8x8_extend(xd, mi_row, mi_col, + mi_row_ori, mi_col_ori, + top_bsize, partition); + vp9_dec_build_inter_predictors_sbuv_sub8x8_extend(xd, +#if CONFIG_MASKED_INTERINTER + mi_row, mi_col, +#endif + mi_row_ori, mi_col_ori, + top_bsize); +} + +static void dec_predict_sb_complex(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + uint8_t *dst_buf[3], int dst_stride[3]) { + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + MB_MODE_INFO *mbmi; + int i, offset = mi_row * cm->mi_stride + mi_col; + + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, MAX_MB_PLANE * 32 * 32); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, MAX_MB_PLANE * 32 * 32); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf3, MAX_MB_PLANE * 32 * 32); + uint8_t *dst_buf1[3] = {tmp_buf1, tmp_buf1 + 32 * 32, tmp_buf1 + 2 * 32 * 32}; + uint8_t *dst_buf2[3] = {tmp_buf2, tmp_buf2 + 32 * 32, tmp_buf2 + 2 * 32 * 32}; + uint8_t *dst_buf3[3] = {tmp_buf3, tmp_buf3 + 32 * 32, tmp_buf3 + 2 * 32 * 32}; + int dst_stride1[3] = {32, 32, 32}; + int dst_stride2[3] = {32, 32, 32}; + int dst_stride3[3] = {32, 32, 32}; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + mbmi = &xd->mi[0]->mbmi; + partition = partition_lookup[bsl][mbmi->sb_type]; + subsize = get_subsize(bsize, partition); + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + } + + switch (partition) { + case PARTITION_NONE: + assert(bsize < top_bsize); + dec_predict_b_extend(cm, xd, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, + top_bsize); + break; + case PARTITION_HORZ: + if (bsize > BLOCK_8X8) { + dec_predict_b_extend(cm, xd, tile, mi_row, mi_col, mi_row_ori, + mi_col_ori, top_bsize); + } else { + dec_predict_b_sub8x8_extend(cm, xd, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, + top_bsize, partition); + } + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = tmp_buf1 + i * 32 * 32; + xd->plane[i].dst.stride = 32; + } + dec_predict_b_extend(cm, xd, tile, mi_row + hbs, mi_col, + mi_row_ori, mi_col_ori, top_bsize); + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + dst_buf1[i], dst_stride1[i], + i, + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } + break; + case PARTITION_VERT: + if (bsize > BLOCK_8X8) { + dec_predict_b_extend(cm, xd, tile, mi_row, mi_col, mi_row_ori, + mi_col_ori, top_bsize); + } else { + dec_predict_b_sub8x8_extend(cm, xd, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, + top_bsize, partition); + } + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = tmp_buf1 + i * 32 * 32; + xd->plane[i].dst.stride = 32; + } + dec_predict_b_extend(cm, xd, tile, mi_row, mi_col + hbs, mi_row_ori, + mi_col_ori, top_bsize); + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + dst_buf1[i], dst_stride1[i], + i, + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + } + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + dec_predict_b_sub8x8_extend(cm, xd, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, + top_bsize, partition); + } else { + dec_predict_sb_complex(cm, xd, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, subsize, top_bsize, + dst_buf, dst_stride); + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) + dec_predict_sb_complex(cm, xd, tile, mi_row, mi_col + hbs, + mi_row_ori, mi_col_ori, subsize, top_bsize, + dst_buf1, dst_stride1); + if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) + dec_predict_sb_complex(cm, xd, tile, mi_row + hbs, mi_col, + mi_row_ori, mi_col_ori, subsize, top_bsize, + dst_buf2, dst_stride2); + if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) + dec_predict_sb_complex(cm, xd, tile, mi_row + hbs, mi_col + hbs, + mi_row_ori, mi_col_ori, subsize, top_bsize, + dst_buf3, dst_stride3); + for (i = 0; i < MAX_MB_PLANE; i++) { + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) { + vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + dst_buf1[i], + dst_stride1[i], + i, mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + if (mi_row + hbs < cm->mi_rows) { + vp9_build_masked_inter_predictor_complex(dst_buf2[i], + dst_stride2[i], + dst_buf3[i], + dst_stride3[i], + i, mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + i, mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) { + vp9_build_masked_inter_predictor_complex(dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + i, mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } + } + break; + default: + assert(0); + } +} +#endif + static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, const TileInfo *const tile, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif int mi_row, int mi_col, vp9_reader *r, BLOCK_SIZE bsize) { const int less8x8 = bsize < BLOCK_8X8; +#if !CONFIG_SUPERTX MB_MODE_INFO *mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col); - vp9_read_mode_info(cm, xd, tile, mi_row, mi_col, r); +#else + MB_MODE_INFO *mbmi; + if (!supertx_enabled) { + mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col); + } else { + set_mb_offsets(cm, xd, tile, bsize, mi_row, mi_col); + } +#endif + vp9_read_mode_info(cm, xd, tile, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col, r); +#if CONFIG_SUPERTX + if (!supertx_enabled) { +#endif if (less8x8) bsize = BLOCK_8X8; @@ -389,6 +699,9 @@ static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, mbmi->skip = 1; // skip loopfilter } } +#if CONFIG_SUPERTX + } +#endif xd->corrupted |= vp9_reader_has_error(r); } @@ -419,45 +732,161 @@ static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs, static void decode_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, const TileInfo *const tile, +#if CONFIG_SUPERTX + int read_token, int supertx_enabled, +#endif int mi_row, int mi_col, vp9_reader* r, BLOCK_SIZE bsize) { const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2; PARTITION_TYPE partition; BLOCK_SIZE subsize; +#if CONFIG_SUPERTX + int skip = 0; +#if CONFIG_EXT_TX + int txfm = 0; +#endif +#endif if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r); subsize = get_subsize(bsize, partition); +#if CONFIG_SUPERTX + if (cm->frame_type != KEY_FRAME && + partition != PARTITION_NONE && + bsize <= BLOCK_32X32 && + !supertx_enabled) { + TX_SIZE supertx_size = b_width_log2(bsize); + if (partition == PARTITION_SPLIT) { + supertx_enabled = vp9_read(r, cm->fc.supertxsplit_prob[supertx_size]); + cm->counts.supertxsplit[supertx_size][supertx_enabled]++; + } else { + supertx_enabled = vp9_read(r, cm->fc.supertx_prob[supertx_size]); + cm->counts.supertx[supertx_size][supertx_enabled]++; + } + } + if (supertx_enabled && read_token) { + int offset = mi_row * cm->mi_stride + mi_col; + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[bsize], + mi_col, num_8x8_blocks_wide_lookup[bsize], + cm->mi_rows, cm->mi_cols); + set_skip_context(xd, mi_row, mi_col); + // Here we assume mbmi->segment_id = 0 + skip = read_skip(cm, xd, 0, r); + if (skip) + reset_skip_context(xd, bsize); +#if CONFIG_EXT_TX + if (bsize <= BLOCK_16X16 && !skip) { + txfm = vp9_read(r, cm->fc.ext_tx_prob); + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.ext_tx[txfm]; + } +#endif + } +#endif if (subsize < BLOCK_8X8) { - decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_block(cm, xd, tile, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col, r, subsize); } else { switch (partition) { case PARTITION_NONE: - decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_block(cm, xd, tile, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col, r, subsize); break; case PARTITION_HORZ: - decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_block(cm, xd, tile, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col, r, subsize); if (mi_row + hbs < cm->mi_rows) - decode_block(cm, xd, tile, mi_row + hbs, mi_col, r, subsize); + decode_block(cm, xd, tile, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row + hbs, mi_col, r, subsize); break; case PARTITION_VERT: - decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_block(cm, xd, tile, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col, r, subsize); if (mi_col + hbs < cm->mi_cols) - decode_block(cm, xd, tile, mi_row, mi_col + hbs, r, subsize); + decode_block(cm, xd, tile, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col + hbs, r, subsize); break; case PARTITION_SPLIT: - decode_partition(cm, xd, tile, mi_row, mi_col, r, subsize); - decode_partition(cm, xd, tile, mi_row, mi_col + hbs, r, subsize); - decode_partition(cm, xd, tile, mi_row + hbs, mi_col, r, subsize); - decode_partition(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize); + decode_partition(cm, xd, tile, +#if CONFIG_SUPERTX + !supertx_enabled, supertx_enabled, +#endif + mi_row, mi_col, r, subsize); + decode_partition(cm, xd, tile, +#if CONFIG_SUPERTX + !supertx_enabled, supertx_enabled, +#endif + mi_row, mi_col + hbs, r, subsize); + decode_partition(cm, xd, tile, +#if CONFIG_SUPERTX + !supertx_enabled, supertx_enabled, +#endif + mi_row + hbs, mi_col, r, subsize); + decode_partition(cm, xd, tile, +#if CONFIG_SUPERTX + !supertx_enabled, supertx_enabled, +#endif + mi_row + hbs, mi_col + hbs, r, subsize); break; default: assert(0 && "Invalid partition type"); } } +#if CONFIG_SUPERTX + if (supertx_enabled && read_token) { + uint8_t *dst_buf[3]; + int dst_stride[3], i; + + vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); + for (i = 0; i < MAX_MB_PLANE; i++) { + dst_buf[i] = xd->plane[i].dst.buf; + dst_stride[i] = xd->plane[i].dst.stride; + } + dec_predict_sb_complex(cm, xd, tile, mi_row, mi_col, mi_row, mi_col, + bsize, bsize, dst_buf, dst_stride); + + if (!skip) { + int eobtotal = 0; + struct inter_args arg = { cm, xd, r, &eobtotal }; + set_offsets_topblock(cm, xd, tile, bsize, mi_row, mi_col); +#if CONFIG_EXT_TX + xd->mi[0]->mbmi.ext_txfrm = txfm; +#endif + vp9_foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg); + if (!(subsize < BLOCK_8X8) && eobtotal == 0) + skip = 1; + } + set_param_topblock(cm, xd, bsize, mi_row, mi_col, +#if CONFIG_EXT_TX + txfm, +#endif + skip); + } +#endif + // update partition context if (bsize >= BLOCK_8X8 && (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) @@ -855,7 +1284,11 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, vp9_zero(tile_data->xd.left_seg_context); for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; mi_col += MI_BLOCK_SIZE) { - decode_partition(tile_data->cm, &tile_data->xd, &tile, mi_row, mi_col, + decode_partition(tile_data->cm, &tile_data->xd, &tile, +#if CONFIG_SUPERTX + 1, 0, +#endif + mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64); } } @@ -909,6 +1342,9 @@ static int tile_worker_hook(void *arg1, void *arg2) { for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { decode_partition(tile_data->cm, &tile_data->xd, tile, +#if CONFIG_SUPERTX + 1, 0, +#endif mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64); } } diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 15309d2a0..549d08450 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -144,7 +144,11 @@ static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, return segment_id; } +#if !CONFIG_SUPERTX static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, +#else +int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, +#endif int segment_id, vp9_reader *r) { if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { return 1; @@ -550,6 +554,9 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm, MACROBLOCKD *const xd, const TileInfo *const tile, MODE_INFO *const mi, +#if CONFIG_SUPERTX && CONFIG_EXT_TX + int supertx_enabled, +#endif int mi_row, int mi_col, vp9_reader *r) { MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; @@ -564,6 +571,9 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm, #if CONFIG_EXT_TX if (mbmi->tx_size <= TX_16X16 && bsize >= BLOCK_8X8 && +#if CONFIG_SUPERTX + !supertx_enabled && +#endif !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) && !mbmi->skip) { mbmi->ext_txfrm = vp9_read(r, cm->fc.ext_tx_prob); @@ -700,6 +710,9 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm, static void read_inter_frame_mode_info(VP9_COMMON *const cm, MACROBLOCKD *const xd, const TileInfo *const tile, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif int mi_row, int mi_col, vp9_reader *r) { MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; @@ -707,23 +720,46 @@ static void read_inter_frame_mode_info(VP9_COMMON *const cm, mbmi->mv[0].as_int = 0; mbmi->mv[1].as_int = 0; +#if CONFIG_SUPERTX + if (!supertx_enabled) { +#endif mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r); mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r); inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r); mbmi->tx_size = read_tx_size(cm, xd, cm->tx_mode, mbmi->sb_type, !mbmi->skip || !inter_block, r); +#if CONFIG_SUPERTX + } else { + const int ctx = vp9_get_intra_inter_context(xd); + mbmi->segment_id = 0; + inter_block = 1; + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.intra_inter[ctx][1]; + } +#endif if (inter_block) - read_inter_block_mode_info(cm, xd, tile, mi, mi_row, mi_col, r); + read_inter_block_mode_info(cm, xd, tile, mi, +#if CONFIG_SUPERTX && CONFIG_EXT_TX + supertx_enabled, +#endif + mi_row, mi_col, r); else read_intra_block_mode_info(cm, mi, r); } void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd, const TileInfo *const tile, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif int mi_row, int mi_col, vp9_reader *r) { if (frame_is_intra_only(cm)) read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r); else - read_inter_frame_mode_info(cm, xd, tile, mi_row, mi_col, r); + read_inter_frame_mode_info(cm, xd, tile, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col, r); } diff --git a/vp9/decoder/vp9_decodemv.h b/vp9/decoder/vp9_decodemv.h index 7394b62b4..baebb5dce 100644 --- a/vp9/decoder/vp9_decodemv.h +++ b/vp9/decoder/vp9_decodemv.h @@ -21,8 +21,16 @@ struct TileInfo; void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd, const struct TileInfo *const tile, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif int mi_row, int mi_col, vp9_reader *r); +#if CONFIG_SUPERTX +int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, + int segment_id, vp9_reader *r); +#endif + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index cbc12a584..8f3620136 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -39,6 +39,18 @@ static struct vp9_token switchable_interp_encodings[SWITCHABLE_FILTERS]; static struct vp9_token partition_encodings[PARTITION_TYPES]; static struct vp9_token inter_mode_encodings[INTER_MODES]; +#if CONFIG_SUPERTX +static int vp9_check_supertx(VP9_COMMON *cm, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + MODE_INFO **mi; + + mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); + + return mi[0]->mbmi.tx_size == b_width_log2(bsize) && + mi[0]->mbmi.sb_type < bsize; +} +#endif + void vp9_entropy_mode_init() { vp9_tokens_from_tree(intra_mode_encodings, vp9_intra_mode_tree); vp9_tokens_from_tree(switchable_interp_encodings, vp9_switchable_interp_tree); @@ -225,6 +237,9 @@ static void write_ref_frames(const VP9_COMP *cpi, vp9_writer *w) { } static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif vp9_writer *w) { VP9_COMMON *const cm = &cpi->common; const nmv_context *nmvc = &cm->fc.nmvc; @@ -252,12 +267,28 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, } } +#if CONFIG_SUPERTX + if (!supertx_enabled) +#endif skip = write_skip(cpi, segment_id, mi, w); +#if CONFIG_SUPERTX + else + skip = mbmi->skip; +#endif +#if CONFIG_SUPERTX + if (!supertx_enabled) { +#endif if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) vp9_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd)); +#if CONFIG_SUPERTX + } +#endif if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT && +#if CONFIG_SUPERTX + !supertx_enabled && +#endif !(is_inter && (skip || vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)))) { write_selected_tx_size(cpi, mbmi->tx_size, bsize, w); @@ -305,6 +336,9 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, #if CONFIG_EXT_TX if (mbmi->tx_size <= TX_16X16 && bsize >= BLOCK_8X8 && +#if CONFIG_SUPERTX + !supertx_enabled && +#endif !mbmi->skip && !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { vp9_write(w, mbmi->ext_txfrm, cm->fc.ext_tx_prob); @@ -451,6 +485,9 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO **mi_8x8, static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile, vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; @@ -466,11 +503,21 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile, if (frame_is_intra_only(cm)) { write_mb_modes_kf(cpi, xd->mi, w); } else { +#if CONFIG_SUPERTX + pack_inter_mode_mvs(cpi, m, supertx_enabled, w); +#else pack_inter_mode_mvs(cpi, m, w); +#endif } +#if CONFIG_SUPERTX + if (!supertx_enabled) { +#endif assert(*tok < tok_end); pack_mb_tokens(w, tok, tok_end); +#if CONFIG_SUPERTX + } +#endif } static void write_partition(VP9_COMMON *cm, MACROBLOCKD *xd, @@ -497,6 +544,9 @@ static void write_partition(VP9_COMMON *cm, MACROBLOCKD *xd, static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile, vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end, +#if CONFIG_SUPERTX + int pack_token, int supertx_enabled, +#endif int mi_row, int mi_col, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; @@ -513,36 +563,105 @@ static void write_modes_sb(VP9_COMP *cpi, partition = partition_lookup[bsl][m->mbmi.sb_type]; write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w); subsize = get_subsize(bsize, partition); +#if CONFIG_SUPERTX + xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); + set_mi_row_col(xd, tile, + mi_row, num_8x8_blocks_high_lookup[bsize], + mi_col, num_8x8_blocks_wide_lookup[bsize], + cm->mi_rows, cm->mi_cols); + if (!supertx_enabled && cm->frame_type != KEY_FRAME && + partition != PARTITION_NONE && bsize <= BLOCK_32X32) { + TX_SIZE supertx_size = b_width_log2(bsize); + vp9_prob prob = partition == PARTITION_SPLIT ? + cm->fc.supertxsplit_prob[supertx_size] : + cm->fc.supertx_prob[supertx_size]; + supertx_enabled = (xd->mi[0]->mbmi.tx_size == supertx_size); + vp9_write(w, supertx_enabled, prob); + if (supertx_enabled) { + vp9_write(w, xd->mi[0]->mbmi.skip, vp9_get_skip_prob(cm, xd)); +#if CONFIG_EXT_TX + if (supertx_size <= TX_16X16 && !xd->mi[0]->mbmi.skip) + vp9_write(w, xd->mi[0]->mbmi.ext_txfrm, cm->fc.ext_tx_prob); +#endif + } + } +#endif if (subsize < BLOCK_8X8) { - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col); } else { switch (partition) { case PARTITION_NONE: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col); break; case PARTITION_HORZ: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col); if (mi_row + bs < cm->mi_rows) - write_modes_b(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row + bs, mi_col); break; case PARTITION_VERT: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col); if (mi_col + bs < cm->mi_cols) - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + bs); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col + bs); break; case PARTITION_SPLIT: - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col + bs, + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + !supertx_enabled, supertx_enabled, +#endif + mi_row, mi_col, subsize); + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + !supertx_enabled, supertx_enabled, +#endif + mi_row, mi_col + bs, subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col, + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + !supertx_enabled, supertx_enabled, +#endif + mi_row + bs, mi_col, subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col + bs, + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + !supertx_enabled, supertx_enabled, +#endif + mi_row + bs, mi_col + bs, subsize); break; default: assert(0); } } +#if CONFIG_SUPERTX + if (partition != PARTITION_NONE && supertx_enabled && pack_token) { + assert(*tok < tok_end); + pack_mb_tokens(w, tok, tok_end); + } +#endif // update partition context if (bsize >= BLOCK_8X8 && @@ -560,7 +679,11 @@ static void write_modes(VP9_COMP *cpi, vp9_zero(cpi->mb.e_mbd.left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + 1, 0, +#endif + mi_row, mi_col, BLOCK_64X64); } } diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index da07f9edb..cd6862f4c 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -41,6 +41,9 @@ #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" #include "vp9/encoder/vp9_tokenize.h" +#if CONFIG_SUPERTX +#include "vp9/encoder/vp9_cost.h" +#endif #define GF_ZEROMV_ZBIN_BOOST 0 #define LF_ZEROMV_ZBIN_BOOST 0 @@ -52,6 +55,43 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); +#if CONFIG_SUPERTX +static int check_intra_b(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + PICK_MODE_CONTEXT *ctx); + +static int check_intra_sb(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + PC_TREE *pc_tree); +static void predict_superblock(VP9_COMP *cpi, int output_enabled, +#if CONFIG_MASKED_INTERINTER + int mi_row, int mi_col, +#endif + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); +static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size, + PC_TREE *pc_tree); +static void predict_sb_complex(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + int output_enabled, BLOCK_SIZE bsize, + BLOCK_SIZE top_bsize, + uint8_t *dst_buf[3], int dst_stride[3], + PC_TREE *pc_tree); +static void update_state_sb_supertx(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + BLOCK_SIZE bsize, + int output_enabled, PC_TREE *pc_tree); +static void rd_supertx_sb(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int *tmp_rate, int64_t *tmp_dist, + int *skippable, int64_t *sse, +#if CONFIG_EXT_TX + int *best_tx, +#endif + PC_TREE *pc_tree); +#endif + // Motion vector component magnitude threshold for defining fast motion. #define FAST_MOTION_MV_THRESH 24 @@ -193,6 +233,74 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, } } +#if CONFIG_SUPERTX +static void set_offsets_supertx(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize) { + MACROBLOCK *const x = &cpi->mb; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + + // Set up distance of MB to edge of frame in 1/8th pel units. + assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); + set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, + cm->mi_rows, cm->mi_cols); +} + +static void set_offsets_extend(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize) { + MACROBLOCK *const x = &cpi->mb; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *mbmi; + const int mi_width = num_8x8_blocks_wide_lookup[top_bsize]; + const int mi_height = num_8x8_blocks_high_lookup[top_bsize]; + const struct segmentation *const seg = &cm->seg; + + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + + mbmi = &xd->mi[0]->mbmi; + + // Set up limit values for MV components. + // Mv beyond the range do not produce new/different prediction block. + x->mv_row_min = -(((mi_row_ori + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND); + x->mv_col_min = -(((mi_col_ori + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND); + x->mv_row_max = (cm->mi_rows - mi_row_ori) * MI_SIZE + VP9_INTERP_EXTEND; + x->mv_col_max = (cm->mi_cols - mi_col_ori) * MI_SIZE + VP9_INTERP_EXTEND; + + // Set up distance of MB to edge of frame in 1/8th pel units. + assert(!(mi_col_ori & (mi_width - 1)) && !(mi_row_ori & (mi_height - 1))); + set_mi_row_col(xd, tile, mi_row_ori, mi_height, mi_col_ori, mi_width, + cm->mi_rows, cm->mi_cols); + xd->up_available = (mi_row != 0); + xd->left_available = (mi_col > tile->mi_col_start); + + // R/D setup. + x->rddiv = cpi->rd.RDDIV; + x->rdmult = cpi->rd.RDMULT; + + // Setup segment ID. + if (seg->enabled) { + if (cpi->oxcf.aq_mode != VARIANCE_AQ) { + const uint8_t *const map = seg->update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); + } + vp9_init_plane_quantizers(cpi, x); + + x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id]; + } else { + mbmi->segment_id = 0; + x->encode_breakout = cpi->encode_breakout; + } +} +#endif + static void duplicate_mode_info_in_sb(VP9_COMMON * const cm, MACROBLOCKD *const xd, int mi_row, @@ -531,7 +639,9 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, const int mi_height = num_8x8_blocks_high_lookup[bsize]; int max_plane; +#if !CONFIG_SUPERTX assert(mi->mbmi.sb_type == bsize); +#endif *mi_addr = *mi; @@ -669,6 +779,288 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, } } +#if CONFIG_SUPERTX +static void update_state_supertx(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int output_enabled) { + int i, y, x_idx; + VP9_COMMON *const cm = &cpi->common; + RD_OPT *const rd_opt = &cpi->rd; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *mi = &ctx->mic; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *mi_addr = xd->mi[0]; + const struct segmentation *const seg = &cm->seg; + const int mis = cm->mi_stride; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + + *mi_addr = *mi; + assert(is_inter_block(mbmi)); + + // If segmentation in use + if (seg->enabled && output_enabled) { + // For in frame complexity AQ copy the segment id from the segment map. + if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { + const uint8_t *const map = seg->update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mi_addr->mbmi.segment_id = + vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); + } + // Else for cyclic refresh mode update the segment map, set the segment id + // and then update the quantizer. + else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, + mi_row, mi_col, bsize, 1); + vp9_init_plane_quantizers(cpi, x); + } + } + + // Restore the coding context of the MB to that that was in place + // when the mode was picked for it + for (y = 0; y < mi_height; y++) + for (x_idx = 0; x_idx < mi_width; x_idx++) + if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx + && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) { + xd->mi[x_idx + y * mis] = mi_addr; + } + + if (cpi->oxcf.aq_mode) + vp9_init_plane_quantizers(cpi, x); + + if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) { + mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; + mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; + } + + x->skip = ctx->skip; + vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk, + sizeof(uint8_t) * ctx->num_4x4_blk); + + if (!output_enabled) + return; + + if (!frame_is_intra_only(cm)) { + if (is_inter_block(mbmi)) { + vp9_update_mv_count(cm, xd); + + if (cm->interp_filter == SWITCHABLE) { + const int ctx = vp9_get_pred_context_switchable_interp(xd); + ++cm->counts.switchable_interp[ctx][mbmi->interp_filter]; + } +#if CONFIG_MASKED_INTERINTER + if (cm->use_masked_interinter && + cm->reference_mode != SINGLE_REFERENCE && + get_mask_bits(bsize) && + mbmi->ref_frame[1] > INTRA_FRAME) + ++cm->counts.masked_interinter[bsize][mbmi->use_masked_interinter]; +#endif + +#if CONFIG_INTERINTRA + if (cm->use_interintra && + is_interintra_allowed(bsize) && + is_inter_mode(mbmi->mode) && + (mbmi->ref_frame[1] <= INTRA_FRAME)) { + if (mbmi->ref_frame[1] == INTRA_FRAME) { + assert(0); + ++cm->counts.y_mode[size_group_lookup[bsize]][mbmi->interintra_mode]; + ++cm->counts.interintra[bsize][1]; +#if CONFIG_MASKED_INTERINTRA + if (cm->use_masked_interintra && get_mask_bits_interintra(bsize)) + ++cm->counts.masked_interintra[bsize][mbmi->use_masked_interintra]; +#endif + } else { + ++cm->counts.interintra[bsize][0]; + } + } +#endif + } + + rd_opt->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; + rd_opt->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; + rd_opt->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; + + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) + rd_opt->filter_diff[i] += ctx->best_filter_diff[i]; + } +} + +static void update_state_sb_supertx(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + BLOCK_SIZE bsize, + int output_enabled, PC_TREE *pc_tree) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + struct macroblock_plane *const p = x->plane; + struct macroblockd_plane *const pd = xd->plane; + int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + PARTITION_TYPE partition = pc_tree->partitioning; + BLOCK_SIZE subsize = get_subsize(bsize, partition); + int i; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + switch (partition) { + case PARTITION_NONE: + set_offsets_supertx(cpi, tile, mi_row, mi_col, subsize); + update_state_supertx(cpi, &pc_tree->none, mi_row, mi_col, + subsize, output_enabled); + break; + case PARTITION_VERT: + set_offsets_supertx(cpi, tile, mi_row, mi_col, subsize); + update_state_supertx(cpi, &pc_tree->vertical[0], mi_row, mi_col, + subsize, output_enabled); + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { + set_offsets_supertx(cpi, tile, mi_row, mi_col + hbs, subsize); + update_state_supertx(cpi, &pc_tree->vertical[1], mi_row, mi_col + hbs, + subsize, output_enabled); + } + break; + case PARTITION_HORZ: + set_offsets_supertx(cpi, tile, mi_row, mi_col, subsize); + update_state_supertx(cpi, &pc_tree->horizontal[0], mi_row, mi_col, + subsize, output_enabled); + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { + set_offsets_supertx(cpi, tile, mi_row + hbs, mi_col, subsize); + update_state_supertx(cpi, &pc_tree->horizontal[1], mi_row + hbs, mi_col, + subsize, output_enabled); + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + set_offsets_supertx(cpi, tile, mi_row, mi_col, subsize); + update_state_supertx(cpi, pc_tree->leaf_split[0], mi_row, mi_col, + subsize, output_enabled); + } else { + set_offsets_supertx(cpi, tile, mi_row, mi_col, subsize); + update_state_sb_supertx(cpi, tile, mi_row, mi_col, subsize, + output_enabled, pc_tree->split[0]); + set_offsets_supertx(cpi, tile, mi_row, mi_col + hbs, subsize); + update_state_sb_supertx(cpi, tile, mi_row, mi_col + hbs, subsize, + output_enabled, pc_tree->split[1]); + set_offsets_supertx(cpi, tile, mi_row + hbs, mi_col, subsize); + update_state_sb_supertx(cpi, tile, mi_row + hbs, mi_col, subsize, + output_enabled, pc_tree->split[2]); + set_offsets_supertx(cpi, tile, mi_row + hbs, mi_col + hbs, subsize); + update_state_sb_supertx(cpi, tile, mi_row + hbs, mi_col + hbs, subsize, + output_enabled, pc_tree->split[3]); + } + break; + default: + assert(0); + } + + for (i = 0; i < MAX_MB_PLANE; ++i) { + p[i].coeff = (&pc_tree->none)->coeff_pbuf[i][1]; + p[i].qcoeff = (&pc_tree->none)->qcoeff_pbuf[i][1]; + pd[i].dqcoeff = (&pc_tree->none)->dqcoeff_pbuf[i][1]; + p[i].eobs = (&pc_tree->none)->eobs_pbuf[i][1]; + } +} + +static void update_supertx_param(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, +#if CONFIG_EXT_TX + int best_tx, +#endif + TX_SIZE supertx_size) { + MACROBLOCK *const x = &cpi->mb; + + ctx->mic.mbmi.tx_size = supertx_size; + vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[supertx_size], + sizeof(uint8_t) * ctx->num_4x4_blk); + ctx->skip = x->skip; +#if CONFIG_EXT_TX + ctx->mic.mbmi.ext_txfrm = best_tx; +#endif +} + +static void update_supertx_param_sb(VP9_COMP *cpi, int mi_row, int mi_col, + BLOCK_SIZE bsize, +#if CONFIG_EXT_TX + int best_tx, +#endif + TX_SIZE supertx_size, PC_TREE *pc_tree) { + VP9_COMMON *const cm = &cpi->common; + int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + PARTITION_TYPE partition = pc_tree->partitioning; + BLOCK_SIZE subsize = get_subsize(bsize, partition); + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + switch (partition) { + case PARTITION_NONE: + update_supertx_param(cpi, &pc_tree->none, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + break; + case PARTITION_VERT: + update_supertx_param(cpi, &pc_tree->vertical[0], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) + update_supertx_param(cpi, &pc_tree->vertical[1], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + break; + case PARTITION_HORZ: + update_supertx_param(cpi, &pc_tree->horizontal[0], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) + update_supertx_param(cpi, &pc_tree->horizontal[1], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + update_supertx_param(cpi, pc_tree->leaf_split[0], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + } else { + update_supertx_param_sb(cpi, mi_row, mi_col, subsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree->split[0]); + update_supertx_param_sb(cpi, mi_row, mi_col + hbs, subsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree->split[1]); + update_supertx_param_sb(cpi, mi_row + hbs, mi_col, subsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree->split[2]); + update_supertx_param_sb(cpi, mi_row + hbs, mi_col + hbs, subsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree->split[3]); + } + break; + default: + assert(0); + } +} +#endif + void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col) { uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, @@ -719,7 +1111,11 @@ static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, int *rate, static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, - int *totalrate, int64_t *totaldist, + int *totalrate, +#if CONFIG_SUPERTX + int *totalrate_nocoef, +#endif + int64_t *totaldist, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd, int block) { VP9_COMMON *const cm = &cpi->common; @@ -746,6 +1142,9 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, // there is nothing to be done. if (block != 0) { *totalrate = 0; +#if CONFIG_SUPERTX + *totalrate_nocoef = 0; +#endif *totaldist = 0; return; } @@ -809,17 +1208,35 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, if (frame_is_intra_only(cm)) { vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist, bsize, ctx, best_rd); +#if CONFIG_SUPERTX + *totalrate_nocoef = 0; +#endif } else { if (bsize >= BLOCK_8X8) { if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) +#if CONFIG_SUPERTX + { +#endif vp9_rd_pick_inter_mode_sb_seg_skip(cpi, x, tile, mi_row, mi_col, - totalrate, totaldist, bsize, ctx, + totalrate, + totaldist, bsize, ctx, best_rd); +#if CONFIG_SUPERTX + *totalrate_nocoef = *totalrate; + } +#endif else vp9_rd_pick_inter_mode_sb(cpi, x, tile, mi_row, mi_col, - totalrate, totaldist, bsize, ctx, best_rd); + totalrate, +#if CONFIG_SUPERTX + totalrate_nocoef, +#endif + totaldist, bsize, ctx, best_rd); } else { vp9_rd_pick_inter_mode_sub8x8(cpi, x, tile, mi_row, mi_col, totalrate, +#if CONFIG_SUPERTX + totalrate_nocoef, +#endif totaldist, bsize, ctx, best_rd); } } @@ -829,6 +1246,9 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, if (aq_mode == VARIANCE_AQ && *totalrate != INT_MAX) { vp9_clear_system_state(); *totalrate = (int)round(*totalrate * rdmult_ratio); +#if CONFIG_SUPERTX + *totalrate_nocoef = (int)round(*totalrate_nocoef * rdmult_ratio); +#endif } } @@ -981,6 +1401,80 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, if (output_enabled && bsize != BLOCK_4X4) cm->counts.partition[ctx][partition]++; +#if CONFIG_SUPERTX + if (cm->frame_type != KEY_FRAME && + bsize <= BLOCK_32X32 && + partition != PARTITION_NONE) { + int supertx_enabled; + TX_SIZE supertx_size = b_width_log2(bsize); + supertx_enabled = check_supertx_sb(bsize, supertx_size, pc_tree); + if (supertx_enabled) { + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + int x_idx, y_idx, i; + uint8_t *dst_buf[3]; + int dst_stride[3]; + set_skip_context(xd, mi_row, mi_col); + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + update_state_sb_supertx(cpi, tile, mi_row, mi_col, bsize, + output_enabled, pc_tree); + + vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), + mi_row, mi_col); + for (i = 0; i < MAX_MB_PLANE; i++) { + dst_buf[i] = xd->plane[i].dst.buf; + dst_stride[i] = xd->plane[i].dst.stride; + } + predict_sb_complex(cpi, tile, mi_row, mi_col, mi_row, mi_col, + output_enabled, bsize, bsize, + dst_buf, dst_stride, pc_tree); + + set_offsets(cpi, tile, mi_row, mi_col, bsize); + if (!x->skip) { + xd->mi[0]->mbmi.skip = 1; + vp9_encode_sb_supertx(x, bsize); + vp9_tokenize_sb_supertx(cpi, tp, !output_enabled, bsize); + } else { + xd->mi[0]->mbmi.skip = 1; + if (output_enabled) + cm->counts.skip[vp9_get_skip_context(xd)][1]++; + reset_skip_context(xd, bsize); + } + if (output_enabled) { + for (y_idx = 0; y_idx < mi_height; y_idx++) + for (x_idx = 0; x_idx < mi_width; x_idx++) { + if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx + && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height + > y_idx) { + xd->mi[x_idx + y_idx * cm->mi_stride]->mbmi.skip = + xd->mi[0]->mbmi.skip; + } + } + if (partition != PARTITION_SPLIT) + cm->counts.supertx[supertx_size][1]++; + else + cm->counts.supertxsplit[supertx_size][1]++; + cm->counts.supertx_size[supertx_size]++; +#if CONFIG_EXT_TX + if (supertx_size <= TX_16X16 && !xd->mi[0]->mbmi.skip) + ++cm->counts.ext_tx[xd->mi[0]->mbmi.ext_txfrm]; +#endif + (*tp)->token = EOSB_TOKEN; + (*tp)++; + } + if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) + update_partition_context(xd, mi_row, mi_col, subsize, bsize); + return; + } else { + if (output_enabled) { + if (partition != PARTITION_SPLIT) + cm->counts.supertx[supertx_size][0]++; + else + cm->counts.supertxsplit[supertx_size][0]++; + } + } + } +#endif switch (partition) { case PARTITION_NONE: encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize, @@ -1458,7 +1952,11 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, MODE_INFO **mi_8x8, TOKENEXTRA **tp, int mi_row, int mi_col, - BLOCK_SIZE bsize, int *rate, int64_t *dist, + BLOCK_SIZE bsize, int *rate, +#if CONFIG_SUPERTX + int *rate_nocoef, +#endif + int64_t *dist, int do_recon, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; @@ -1486,6 +1984,11 @@ static void rd_use_partition(VP9_COMP *cpi, BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type; int do_partition_search = 1; PICK_MODE_CONTEXT *ctx = &pc_tree->none; +#if CONFIG_SUPERTX + int last_part_rate_nocoef = INT_MAX; + int none_rate_nocoef = INT_MAX; + int chosen_rate_nocoef = INT_MAX; +#endif if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; @@ -1526,13 +2029,20 @@ static void rd_use_partition(VP9_COMP *cpi, mi_row + (mi_step >> 1) < cm->mi_rows && mi_col + (mi_step >> 1) < cm->mi_cols) { pc_tree->partitioning = PARTITION_NONE; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, +#if CONFIG_SUPERTX + &none_rate_nocoef, +#endif + &none_dist, bsize, ctx, INT64_MAX, 0); pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (none_rate < INT_MAX) { none_rate += cpi->partition_cost[pl][PARTITION_NONE]; +#if CONFIG_SUPERTX + none_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE]; +#endif none_rd = RDCOST(x->rdmult, x->rddiv, none_rate, none_dist); } @@ -1545,82 +2055,136 @@ static void rd_use_partition(VP9_COMP *cpi, switch (partition) { case PARTITION_NONE: rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, +#if CONFIG_SUPERTX + &last_part_rate_nocoef, +#endif &last_part_dist, bsize, ctx, INT64_MAX, 0); break; case PARTITION_HORZ: rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, +#if CONFIG_SUPERTX + &last_part_rate_nocoef, +#endif &last_part_dist, subsize, &pc_tree->horizontal[0], INT64_MAX, 0); if (last_part_rate != INT_MAX && bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) { int rt = 0; +#if CONFIG_SUPERTX + int rt_nocoef = 0; +#endif int64_t dt = 0; PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; update_state(cpi, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx); - rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &rt, &dt, + rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &rt, +#if CONFIG_SUPERTX + &rt_nocoef, +#endif + &dt, subsize, &pc_tree->horizontal[1], INT64_MAX, 1); if (rt == INT_MAX || dt == INT64_MAX) { last_part_rate = INT_MAX; +#if CONFIG_SUPERTX + last_part_rate_nocoef = INT_MAX; +#endif last_part_dist = INT64_MAX; break; } last_part_rate += rt; +#if CONFIG_SUPERTX + last_part_rate_nocoef += rt_nocoef; +#endif last_part_dist += dt; } break; case PARTITION_VERT: rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, +#if CONFIG_SUPERTX + &last_part_rate_nocoef, +#endif &last_part_dist, subsize, &pc_tree->vertical[0], INT64_MAX, 0); if (last_part_rate != INT_MAX && bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) { int rt = 0; +#if CONFIG_SUPERTX + int rt_nocoef = 0; +#endif int64_t dt = 0; PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0]; update_state(cpi, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx); - rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &rt, &dt, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &rt, +#if CONFIG_SUPERTX + &rt_nocoef, +#endif + &dt, subsize, &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX, 1); if (rt == INT_MAX || dt == INT64_MAX) { last_part_rate = INT_MAX; +#if CONFIG_SUPERTX + last_part_rate = INT_MAX; +#endif last_part_dist = INT64_MAX; break; } last_part_rate += rt; +#if CONFIG_SUPERTX + last_part_rate_nocoef += rt_nocoef; +#endif last_part_dist += dt; } break; case PARTITION_SPLIT: if (bsize == BLOCK_8X8) { rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, +#if CONFIG_SUPERTX + &last_part_rate_nocoef, +#endif &last_part_dist, subsize, pc_tree->leaf_split[0], INT64_MAX, 0); break; } last_part_rate = 0; +#if CONFIG_SUPERTX + last_part_rate_nocoef = 0; +#endif last_part_dist = 0; for (i = 0; i < 4; i++) { int x_idx = (i & 1) * (mi_step >> 1); int y_idx = (i >> 1) * (mi_step >> 1); int jj = i >> 1, ii = i & 0x01; int rt; +#if CONFIG_SUPERTX + int rt_nocoef; +#endif int64_t dt; if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) continue; rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp, - mi_row + y_idx, mi_col + x_idx, subsize, &rt, &dt, + mi_row + y_idx, mi_col + x_idx, subsize, &rt, +#if CONFIG_SUPERTX + &rt_nocoef, +#endif + &dt, i != 3, pc_tree->split[i]); if (rt == INT_MAX || dt == INT64_MAX) { last_part_rate = INT_MAX; +#if CONFIG_SUPERTX + last_part_rate_nocoef = INT_MAX; +#endif last_part_dist = INT64_MAX; break; } last_part_rate += rt; +#if CONFIG_SUPERTX + last_part_rate_nocoef += rt_nocoef; +#endif last_part_dist += dt; } break; @@ -1631,6 +2195,9 @@ static void rd_use_partition(VP9_COMP *cpi, pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (last_part_rate < INT_MAX) { last_part_rate += cpi->partition_cost[pl][partition]; +#if CONFIG_SUPERTX + last_part_rate_nocoef += cpi->partition_cost[pl][partition]; +#endif last_part_rd = RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist); } @@ -1644,6 +2211,9 @@ static void rd_use_partition(VP9_COMP *cpi, mi_col + (mi_step >> 1) == cm->mi_cols)) { BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); chosen_rate = 0; +#if CONFIG_SUPERTX + chosen_rate_nocoef = 0; +#endif chosen_dist = 0; restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); pc_tree->partitioning = PARTITION_SPLIT; @@ -1653,6 +2223,9 @@ static void rd_use_partition(VP9_COMP *cpi, int x_idx = (i & 1) * (mi_step >> 1); int y_idx = (i >> 1) * (mi_step >> 1); int rt = 0; +#if CONFIG_SUPERTX + int rt_nocoef = 0; +#endif int64_t dt = 0; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; @@ -1662,7 +2235,11 @@ static void rd_use_partition(VP9_COMP *cpi, save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); pc_tree->split[i]->partitioning = PARTITION_NONE; - rd_pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &rt, &dt, + rd_pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &rt, +#if CONFIG_SUPERTX + &rt_nocoef, +#endif + &dt, split_subsize, &pc_tree->split[i]->none, INT64_MAX, i); @@ -1670,11 +2247,17 @@ static void rd_use_partition(VP9_COMP *cpi, if (rt == INT_MAX || dt == INT64_MAX) { chosen_rate = INT_MAX; +#if CONFIG_SUPERTX + chosen_rate_nocoef = INT_MAX; +#endif chosen_dist = INT64_MAX; break; } chosen_rate += rt; +#if CONFIG_SUPERTX + chosen_rate_nocoef += rt_nocoef; +#endif chosen_dist += dt; if (i != 3) @@ -1684,10 +2267,16 @@ static void rd_use_partition(VP9_COMP *cpi, pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx, split_subsize); chosen_rate += cpi->partition_cost[pl][PARTITION_NONE]; +#if CONFIG_SUPERTX + chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE]; +#endif } pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (chosen_rate < INT_MAX) { chosen_rate += cpi->partition_cost[pl][PARTITION_SPLIT]; +#if CONFIG_SUPERTX + chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_SPLIT]; +#endif chosen_rd = RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist); } } @@ -1698,6 +2287,9 @@ static void rd_use_partition(VP9_COMP *cpi, if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition; chosen_rate = last_part_rate; +#if CONFIG_SUPERTX + chosen_rate_nocoef = last_part_rate_nocoef; +#endif chosen_dist = last_part_dist; chosen_rd = last_part_rd; } @@ -1706,6 +2298,9 @@ static void rd_use_partition(VP9_COMP *cpi, if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; chosen_rate = none_rate; +#if CONFIG_SUPERTX + chosen_rate_nocoef = none_rate_nocoef; +#endif chosen_dist = none_dist; } @@ -1735,6 +2330,9 @@ static void rd_use_partition(VP9_COMP *cpi, } *rate = chosen_rate; +#if CONFIG_SUPERTX + *rate_nocoef = chosen_rate_nocoef; +#endif *dist = chosen_dist; } @@ -1930,6 +2528,9 @@ static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int *rate, +#if CONFIG_SUPERTX + int *rate_nocoef, +#endif int64_t *dist, int do_recon, int64_t best_rd, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; @@ -1943,6 +2544,13 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, int i, pl; BLOCK_SIZE subsize; int this_rate, sum_rate = 0, best_rate = INT_MAX; +#if CONFIG_SUPERTX + int this_rate_nocoef, sum_rate_nocoef = 0, best_rate_nocoef = INT_MAX; + int tmp_rate; + int abort_flag; + int64_t tmp_dist, tmp_rd; + PARTITION_TYPE best_partition; +#endif int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX; int64_t sum_rd = 0; int do_split = bsize >= BLOCK_8X8; @@ -2000,12 +2608,19 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, // PARTITION_NONE if (partition_none_allowed) { - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, +#if CONFIG_SUPERTX + &this_rate_nocoef, +#endif + &this_dist, bsize, ctx, best_rd, 0); if (this_rate != INT_MAX) { if (bsize >= BLOCK_8X8) { pl = partition_plane_context(xd, mi_row, mi_col, bsize); this_rate += cpi->partition_cost[pl][PARTITION_NONE]; +#if CONFIG_SUPERTX + this_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE]; +#endif } sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); if (sum_rd < best_rd) { @@ -2013,6 +2628,10 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, int64_t stop_thresh_rd; best_rate = this_rate; +#if CONFIG_SUPERTX + best_rate_nocoef = this_rate_nocoef; + assert(best_rate_nocoef >= 0); +#endif best_dist = this_dist; best_rd = sum_rd; if (bsize >= BLOCK_8X8) @@ -2049,21 +2668,69 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed) pc_tree->leaf_split[0]->pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, +#if !CONFIG_SUPERTX + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, + &sum_dist, subsize, pc_tree->leaf_split[0], best_rd, 0); - if (sum_rate == INT_MAX) { +#else + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, + &sum_rate_nocoef, + &sum_dist, subsize, + pc_tree->leaf_split[0], INT64_MAX, 0); +#endif + if (sum_rate == INT_MAX) sum_rd = INT64_MAX; - } else { + else sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - if (sum_rd < best_rd) { - update_state(cpi, pc_tree->leaf_split[0], mi_row, mi_col, subsize, 0); - encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, - pc_tree->leaf_split[0]); - update_partition_context(xd, mi_row, mi_col, subsize, bsize); +#if CONFIG_SUPERTX + if (cm->frame_type != KEY_FRAME && + sum_rd < INT64_MAX) { + TX_SIZE supertx_size = b_width_log2(bsize); + best_partition = pc_tree->partitioning; + pc_tree->partitioning = PARTITION_SPLIT; + + sum_rate += vp9_cost_bit(cm->fc.supertxsplit_prob[supertx_size], 0); + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + + if (is_inter_mode(pc_tree->leaf_split[0]->mic.mbmi.mode)) { + int skippable = 1; + int64_t sse = 0; +#if CONFIG_EXT_TX + int best_tx = 0; +#endif + + tmp_rate = sum_rate_nocoef; + tmp_dist = 0; + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + rd_supertx_sb(cpi, tile, mi_row, mi_col, bsize, &tmp_rate, &tmp_dist, + &skippable, &sse, +#if CONFIG_EXT_TX + &best_tx, +#endif + pc_tree); + + tmp_rate += vp9_cost_bit(cm->fc.supertxsplit_prob[supertx_size], 1); + tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist); + if (tmp_rd < sum_rd) { + sum_rd = tmp_rd; + sum_rate = tmp_rate; + sum_dist = tmp_dist; + update_supertx_param_sb(cpi, mi_row, mi_col, bsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree); + } } + pc_tree->partitioning = best_partition; } +#endif } else { +#if !CONFIG_SUPERTX for (i = 0; i < 4 && sum_rd < best_rd; ++i) { +#else + for (i = 0; i < 4 && sum_rd < INT64_MAX; ++i) { +#endif const int x_idx = (i & 1) * mi_step; const int y_idx = (i >> 1) * mi_step; @@ -2072,27 +2739,88 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); - +#if !CONFIG_SUPERTX rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize, &this_rate, &this_dist, i != 3, best_rd - sum_rd, pc_tree->split[i]); +#else + rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, + subsize, &this_rate, + &this_rate_nocoef, + &this_dist, i != 3, + INT64_MAX - sum_rd, pc_tree->split[i]); +#endif if (this_rate == INT_MAX) { sum_rd = INT64_MAX; } else { sum_rate += this_rate; +#if CONFIG_SUPERTX + sum_rate_nocoef += this_rate_nocoef; +#endif sum_dist += this_dist; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); } } +#if CONFIG_SUPERTX + if (cm->frame_type != KEY_FRAME && + sum_rd < INT64_MAX && i == 4 && + bsize <= BLOCK_32X32) { + TX_SIZE supertx_size = b_width_log2(bsize); + best_partition = pc_tree->partitioning; + pc_tree->partitioning = PARTITION_SPLIT; + + sum_rate += vp9_cost_bit(cm->fc.supertxsplit_prob[supertx_size], 0); + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + + if (!check_intra_sb(cpi, tile, mi_row, mi_col, bsize, pc_tree)) { + int skippable = 1; + int64_t sse = 0; +#if CONFIG_EXT_TX + int best_tx = 0; +#endif + + tmp_rate = sum_rate_nocoef; + tmp_dist = 0; + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + rd_supertx_sb(cpi, tile, mi_row, mi_col, bsize, &tmp_rate, &tmp_dist, + &skippable, &sse, +#if CONFIG_EXT_TX + &best_tx, +#endif + pc_tree); + + tmp_rate += vp9_cost_bit(cm->fc.supertxsplit_prob[supertx_size], 1); + tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist); + if (tmp_rd < sum_rd) { + sum_rd = tmp_rd; + sum_rate = tmp_rate; + sum_dist = tmp_dist; + update_supertx_param_sb(cpi, mi_row, mi_col, bsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree); + } + } + pc_tree->partitioning = best_partition; + } +#endif } if (sum_rd < best_rd && i == 4) { pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += cpi->partition_cost[pl][PARTITION_SPLIT]; +#if CONFIG_SUPERTX + sum_rate_nocoef += cpi->partition_cost[pl][PARTITION_SPLIT]; +#endif sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { best_rate = sum_rate; +#if CONFIG_SUPERTX + best_rate_nocoef = sum_rate_nocoef; + assert(best_rate_nocoef >= 0); +#endif best_dist = sum_dist; best_rd = sum_rd; pc_tree->partitioning = PARTITION_SPLIT; @@ -2115,10 +2843,17 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, partition_none_allowed) pc_tree->horizontal[0].pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, +#if CONFIG_SUPERTX + &sum_rate_nocoef, +#endif + &sum_dist, subsize, &pc_tree->horizontal[0], best_rd, 0); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); +#if CONFIG_SUPERTX + abort_flag = sum_rd >= best_rd; +#endif if (sum_rd < best_rd && mi_row + mi_step < cm->mi_rows) { PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; update_state(cpi, ctx, mi_row, mi_col, subsize, 0); @@ -2130,24 +2865,86 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, partition_none_allowed) pc_tree->horizontal[1].pred_interp_filter = ctx->mic.mbmi.interp_filter; +#if !CONFIG_SUPERTX rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rate, &this_dist, subsize, &pc_tree->horizontal[1], best_rd - sum_rd, 1); +#else + rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rate, + &this_rate_nocoef, + &this_dist, subsize, &pc_tree->horizontal[1], + INT64_MAX, 1); +#endif if (this_rate == INT_MAX) { sum_rd = INT64_MAX; } else { sum_rate += this_rate; +#if CONFIG_SUPERTX + sum_rate_nocoef += this_rate_nocoef; +#endif sum_dist += this_dist; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); } } +#if CONFIG_SUPERTX + if (cm->frame_type != KEY_FRAME && + !abort_flag && + sum_rd < INT64_MAX && + bsize <= BLOCK_32X32) { + TX_SIZE supertx_size = b_width_log2(bsize); + best_partition = pc_tree->partitioning; + pc_tree->partitioning = PARTITION_HORZ; + + sum_rate += vp9_cost_bit(cm->fc.supertx_prob[supertx_size], 0); + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + + if (!check_intra_sb(cpi, tile, mi_row, mi_col, bsize, pc_tree)) { + int skippable = 1; + int64_t sse = 0; +#if CONFIG_EXT_TX + int best_tx = 0; +#endif + + tmp_rate = sum_rate_nocoef; + tmp_dist = 0; + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + rd_supertx_sb(cpi, tile, mi_row, mi_col, bsize, &tmp_rate, &tmp_dist, + &skippable, &sse, +#if CONFIG_EXT_TX + &best_tx, +#endif + pc_tree); + + tmp_rate += vp9_cost_bit(cm->fc.supertx_prob[supertx_size], 1); + tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist); + if (tmp_rd < sum_rd) { + sum_rd = tmp_rd; + sum_rate = tmp_rate; + sum_dist = tmp_dist; + update_supertx_param_sb(cpi, mi_row, mi_col, bsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree); + } + } + pc_tree->partitioning = best_partition; + } +#endif if (sum_rd < best_rd) { pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += cpi->partition_cost[pl][PARTITION_HORZ]; +#if CONFIG_SUPERTX + sum_rate_nocoef += cpi->partition_cost[pl][PARTITION_HORZ]; +#endif sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { best_rd = sum_rd; best_rate = sum_rate; +#if CONFIG_SUPERTX + best_rate_nocoef = sum_rate_nocoef; + assert(best_rate_nocoef >= 0); +#endif best_dist = sum_dist; pc_tree->partitioning = PARTITION_HORZ; } @@ -2164,9 +2961,16 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, partition_none_allowed) pc_tree->vertical[0].pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, +#if CONFIG_SUPERTX + &sum_rate_nocoef, +#endif + &sum_dist, subsize, &pc_tree->vertical[0], best_rd, 0); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); +#if CONFIG_SUPERTX + abort_flag = sum_rd >= best_rd; +#endif if (sum_rd < best_rd && mi_col + mi_step < cm->mi_cols) { update_state(cpi, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, @@ -2178,24 +2982,87 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, partition_none_allowed) pc_tree->vertical[1].pred_interp_filter = ctx->mic.mbmi.interp_filter; +#if !CONFIG_SUPERTX rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rate, &this_dist, subsize, &pc_tree->vertical[1], best_rd - sum_rd, 1); +#else + rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rate, + &this_rate_nocoef, + &this_dist, subsize, + &pc_tree->vertical[1], INT64_MAX, + 1); +#endif if (this_rate == INT_MAX) { sum_rd = INT64_MAX; } else { sum_rate += this_rate; +#if CONFIG_SUPERTX + sum_rate_nocoef += this_rate_nocoef; +#endif sum_dist += this_dist; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); } } +#if CONFIG_SUPERTX + if (cm->frame_type != KEY_FRAME && + !abort_flag && + sum_rd < INT64_MAX && + bsize <= BLOCK_32X32) { + TX_SIZE supertx_size = b_width_log2(bsize); + best_partition = pc_tree->partitioning; + pc_tree->partitioning = PARTITION_VERT; + + sum_rate += vp9_cost_bit(cm->fc.supertx_prob[supertx_size], 0); + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + + if (!check_intra_sb(cpi, tile, mi_row, mi_col, bsize, pc_tree)) { + int skippable = 1; + int64_t sse = 0; +#if CONFIG_EXT_TX + int best_tx = 0; +#endif + + tmp_rate = sum_rate_nocoef; + tmp_dist = 0; + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + rd_supertx_sb(cpi, tile, mi_row, mi_col, bsize, &tmp_rate, &tmp_dist, + &skippable, &sse, +#if CONFIG_EXT_TX + &best_tx, +#endif + pc_tree); + + tmp_rate += vp9_cost_bit(cm->fc.supertx_prob[supertx_size], 1); + tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist); + if (tmp_rd < sum_rd) { + sum_rd = tmp_rd; + sum_rate = tmp_rate; + sum_dist = tmp_dist; + update_supertx_param_sb(cpi, mi_row, mi_col, bsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree); + } + } + pc_tree->partitioning = best_partition; + } +#endif if (sum_rd < best_rd) { pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += cpi->partition_cost[pl][PARTITION_VERT]; +#if CONFIG_SUPERTX + sum_rate_nocoef += cpi->partition_cost[pl][PARTITION_VERT]; +#endif sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { best_rate = sum_rate; +#if CONFIG_SUPERTX + best_rate_nocoef = sum_rate_nocoef; + assert(best_rate_nocoef >= 0); +#endif best_dist = sum_dist; best_rd = sum_rd; pc_tree->partitioning = PARTITION_VERT; @@ -2209,6 +3076,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, // checks occur in some sub function and thus are used... (void) best_rd; *rate = best_rate; +#if CONFIG_SUPERTX + *rate_nocoef = best_rate_nocoef; +#endif *dist = best_dist; if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon) { @@ -2251,6 +3121,9 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { int dummy_rate; +#if CONFIG_SUPERTX + int dummy_rate_nocoef; +#endif int64_t dummy_dist; int i; @@ -2283,18 +3156,30 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, sf->always_this_block_size); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, cpi->pc_root); + &dummy_rate, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif + &dummy_dist, 1, cpi->pc_root); } else if (sf->partition_search_type == VAR_BASED_FIXED_PARTITION) { BLOCK_SIZE bsize; set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col); set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, cpi->pc_root); + &dummy_rate, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif + &dummy_dist, 1, cpi->pc_root); } else if (sf->partition_search_type == VAR_BASED_PARTITION) { choose_partitioning(cpi, tile, mi_row, mi_col); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, cpi->pc_root); + &dummy_rate, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif + &dummy_dist, 1, cpi->pc_root); } else { GF_GROUP * gf_grp = &cpi->twopass.gf_group; int last_was_mid_sequence_overlay = 0; @@ -2320,7 +3205,11 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, &sf->max_partition_size); } rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, INT64_MAX, + &dummy_rate, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif + &dummy_dist, 1, INT64_MAX, cpi->pc_root); } else { if (sf->constrain_copy_partition && @@ -2330,7 +3219,11 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, else copy_partitioning(cm, mi, prev_mi); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, cpi->pc_root); + &dummy_rate, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif + &dummy_dist, 1, cpi->pc_root); } } } else { @@ -2342,7 +3235,11 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, &sf->max_partition_size); } rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, INT64_MAX, cpi->pc_root); + &dummy_rate, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif + &dummy_dist, 1, INT64_MAX, cpi->pc_root); } } } @@ -3244,17 +4141,37 @@ void vp9_encode_frame(VP9_COMP *cpi) { count32x32 += cm->counts.tx.p32x32[i][TX_32X32]; } +#if !CONFIG_SUPERTX if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && count32x32 == 0) { +#else + if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && + count32x32 == 0 && + cm->counts.supertx_size[TX_16X16] == 0 && + cm->counts.supertx_size[TX_32X32] == 0) { +#endif cm->tx_mode = ALLOW_8X8; reset_skip_tx_size(cm, TX_8X8); +#if !CONFIG_SUPERTX } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 && count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) { +#else + } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 && + count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0 && + cm->counts.supertx_size[TX_8X8] == 0 && + cm->counts.supertx_size[TX_16X16] == 0 && + cm->counts.supertx_size[TX_32X32] == 0) { +#endif cm->tx_mode = ONLY_4X4; reset_skip_tx_size(cm, TX_4X4); } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) { cm->tx_mode = ALLOW_32X32; +#if !CONFIG_SUPERTX } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) { +#else + } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0 && + cm->counts.supertx_size[TX_32X32] == 0) { +#endif cm->tx_mode = ALLOW_16X16; reset_skip_tx_size(cm, TX_16X16); } @@ -3433,3 +4350,517 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, #endif } } + +#if CONFIG_SUPERTX +static int check_intra_b(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + PICK_MODE_CONTEXT *ctx) { +#if CONFIG_INTERINTRA + return !is_inter_mode((&ctx->mic)->mbmi.mode) || + (ctx->mic.mbmi.ref_frame[1] == INTRA_FRAME); +#else + return !is_inter_mode((&ctx->mic)->mbmi.mode); +#endif +} + +static int check_intra_sb(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + PC_TREE *pc_tree) { + VP9_COMMON *const cm = &cpi->common; + + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + PARTITION_TYPE partition; + BLOCK_SIZE subsize = bsize; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return 1; + + if (bsize >= BLOCK_8X8) + subsize = get_subsize(bsize, pc_tree->partitioning); + else + subsize = BLOCK_4X4; + + partition = partition_lookup[bsl][subsize]; + + switch (partition) { + case PARTITION_NONE: + return check_intra_b(cpi, tile, mi_row, mi_col, subsize, &pc_tree->none); + break; + case PARTITION_VERT: + if (check_intra_b(cpi, tile, mi_row, mi_col, subsize, + &pc_tree->vertical[0])) + return 1; + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { + if (check_intra_b(cpi, tile, mi_row, mi_col + hbs, subsize, + &pc_tree->vertical[1])) + return 1; + } + break; + case PARTITION_HORZ: + if (check_intra_b(cpi, tile, mi_row, mi_col, subsize, + &pc_tree->horizontal[0])) + return 1; + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { + if (check_intra_b(cpi, tile, mi_row + hbs, mi_col, subsize, + &pc_tree->horizontal[1])) + return 1; + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + if (check_intra_b(cpi, tile, mi_row, mi_col, subsize, + pc_tree->leaf_split[0])) + return 1; + } else { + if (check_intra_sb(cpi, tile, mi_row, mi_col, subsize, + pc_tree->split[0])) + return 1; + if (check_intra_sb(cpi, tile, mi_row, mi_col + hbs, subsize, + pc_tree->split[1])) + return 1; + if (check_intra_sb(cpi, tile, mi_row + hbs, mi_col, subsize, + pc_tree->split[2])) + return 1; + if (check_intra_sb(cpi, tile, mi_row + hbs, mi_col + hbs, subsize, + pc_tree->split[3])) + return 1; + } + break; + default: + assert(0); + } + return 0; +} + +static int check_supertx_b(TX_SIZE supertx_size, PICK_MODE_CONTEXT *ctx) { + return ctx->mic.mbmi.tx_size == supertx_size; +} + +static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size, + PC_TREE *pc_tree) { + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + + partition = pc_tree->partitioning; + subsize = get_subsize(bsize, partition); + switch (partition) { + case PARTITION_NONE: + return check_supertx_b(supertx_size, &pc_tree->none); + case PARTITION_VERT: + return check_supertx_b(supertx_size, &pc_tree->vertical[0]); + case PARTITION_HORZ: + return check_supertx_b(supertx_size, &pc_tree->horizontal[0]); + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) + return check_supertx_b(supertx_size, pc_tree->leaf_split[0]); + else + return check_supertx_sb(subsize, supertx_size, pc_tree->split[0]); + default: + assert(0); + } +} + +static void predict_superblock(VP9_COMP *cpi, + int output_enabled, +#if CONFIG_MASKED_INTERINTER + int mi_row, int mi_col, +#endif + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO **mi_8x8 = xd->mi; + MODE_INFO *mi = mi_8x8[0]; + MB_MODE_INFO *mbmi = &mi->mbmi; + int ref; + const int is_compound = has_second_ref(mbmi); + + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); + + cpi->zbin_mode_boost = get_zbin_mode_boost(mbmi, + cpi->zbin_mode_boost_enabled); + vp9_update_zbin_extra(cpi, x); + + for (ref = 0; ref < 1 + is_compound; ++ref) { + YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, + mbmi->ref_frame[ref]); + vp9_setup_pre_planes(xd, ref, cfg, mi_row_ori, mi_col_ori, + &xd->block_refs[ref]->sf); + } +#if !CONFIG_MASKED_INTERINTER + vp9_build_inter_predictors_sb(xd, mi_row_ori, mi_col_ori, bsize); +#else + vp9_build_inter_predictors_sb_extend(xd, mi_row, mi_col, + mi_row_ori, mi_col_ori, bsize); +#endif +} + +static void predict_superblock_sub8x8_extend(VP9_COMP *cpi, + int output_enabled, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE top_bsize, + PICK_MODE_CONTEXT *ctx, + PARTITION_TYPE partition) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO **mi_8x8 = xd->mi; + MODE_INFO *mi = mi_8x8[0]; + MB_MODE_INFO *mbmi = &mi->mbmi; + int ref; + const int is_compound = has_second_ref(mbmi); + + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); + + cpi->zbin_mode_boost = get_zbin_mode_boost(mbmi, + cpi->zbin_mode_boost_enabled); + vp9_update_zbin_extra(cpi, x); + + for (ref = 0; ref < 1 + is_compound; ++ref) { + YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, + mbmi->ref_frame[ref]); + vp9_setup_pre_planes(xd, ref, cfg, mi_row_ori, mi_col_ori, + &xd->block_refs[ref]->sf); + } + vp9_build_inter_predictors_sby_sub8x8_extend(xd, mi_row, mi_col, + mi_row_ori, mi_col_ori, + top_bsize, partition); + vp9_build_inter_predictors_sbuv_sub8x8_extend(xd, +#if CONFIG_MASKED_INTERINTER + mi_row, mi_col, +#endif + mi_row_ori, mi_col_ori, + top_bsize); +} + +static void predict_b_sub8x8_extend(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + int output_enabled, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + PICK_MODE_CONTEXT *ctx, + PARTITION_TYPE partition) { + set_offsets_extend(cpi, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, + bsize, top_bsize); + predict_superblock_sub8x8_extend(cpi, output_enabled, mi_row, mi_col, + mi_row_ori, mi_col_ori, + top_bsize, ctx, partition); + + if (output_enabled) + update_stats(cpi); +} + +static void predict_b_extend(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + int output_enabled, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + PICK_MODE_CONTEXT *ctx) { + set_offsets_extend(cpi, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, + bsize, top_bsize); + predict_superblock(cpi, output_enabled, +#if CONFIG_MASKED_INTERINTER + mi_row, mi_col, +#endif + mi_row_ori, mi_col_ori, top_bsize, ctx); + + if (output_enabled) + update_stats(cpi); +} + +// This function generates prediction for multiple blocks, between which +// discontinuity around boundary is reduced by smoothing masks. The basic +// smoothing mask is a soft step function along horz/vert direction. In more +// complicated case when a block is split into 4 subblocks, the basic mask is +// first applied to neighboring subblocks (2 pairs) in horizontal direction and +// then applied to the 2 masked prediction mentioned above in vertical direction +// If the block is split into more than one level, at every stage, masked +// prediction is stored in dst_buf[] passed from higher level. +static void predict_sb_complex(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + int output_enabled, BLOCK_SIZE bsize, + BLOCK_SIZE top_bsize, + uint8_t *dst_buf[3], int dst_stride[3], + PC_TREE *pc_tree) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + + int i, ctx; + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, MAX_MB_PLANE * 32 * 32); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, MAX_MB_PLANE * 32 * 32); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf3, MAX_MB_PLANE * 32 * 32); + uint8_t *dst_buf1[3] = {tmp_buf1, tmp_buf1 + 32 * 32, tmp_buf1 + 2 * 32 * 32}; + uint8_t *dst_buf2[3] = {tmp_buf2, tmp_buf2 + 32 * 32, tmp_buf2 + 2 * 32 * 32}; + uint8_t *dst_buf3[3] = {tmp_buf3, tmp_buf3 + 32 * 32, tmp_buf3 + 2 * 32 * 32}; + int dst_stride1[3] = {32, 32, 32}; + int dst_stride2[3] = {32, 32, 32}; + int dst_stride3[3] = {32, 32, 32}; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + if (bsize >= BLOCK_8X8) { + ctx = partition_plane_context(xd, mi_row, mi_col, bsize); + subsize = get_subsize(bsize, pc_tree->partitioning); + } else { + ctx = 0; + subsize = BLOCK_4X4; + } + partition = partition_lookup[bsl][subsize]; + if (output_enabled && bsize != BLOCK_4X4 && bsize < top_bsize) + cm->counts.partition[ctx][partition]++; + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + } + + switch (partition) { + case PARTITION_NONE: + assert(bsize < top_bsize); + predict_b_extend(cpi, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, + output_enabled, bsize, top_bsize, + &pc_tree->none); + break; + case PARTITION_HORZ: + if (bsize > BLOCK_8X8) { + predict_b_extend(cpi, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, + output_enabled, subsize, top_bsize, + &pc_tree->horizontal[0]); + } else { + predict_b_sub8x8_extend(cpi, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, output_enabled, + bsize, top_bsize, &pc_tree->horizontal[0], + PARTITION_HORZ); + } + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = tmp_buf1 + i * 32 * 32; + xd->plane[i].dst.stride = 32; + } + predict_b_extend(cpi, tile, mi_row + hbs, mi_col, + mi_row_ori, mi_col_ori, output_enabled, + subsize, top_bsize, &pc_tree->horizontal[1]); + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + dst_buf1[i], dst_stride1[i], + i, + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } + break; + case PARTITION_VERT: + if (bsize > BLOCK_8X8) { + predict_b_extend(cpi, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, + output_enabled, subsize, top_bsize, + &pc_tree->vertical[0]); + } else { + predict_b_sub8x8_extend(cpi, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, output_enabled, + bsize, top_bsize, &pc_tree->vertical[0], + PARTITION_VERT); + } + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = tmp_buf1 + i * 32 * 32; + xd->plane[i].dst.stride = 32; + } + predict_b_extend(cpi, tile, mi_row, mi_col + hbs, + mi_row_ori, mi_col_ori, output_enabled, + subsize, top_bsize, &pc_tree->vertical[1]); + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + dst_buf1[i], dst_stride1[i], + i, + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + } + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + predict_b_sub8x8_extend(cpi, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, output_enabled, + bsize, top_bsize, pc_tree->leaf_split[0], + PARTITION_SPLIT); + } else { + predict_sb_complex(cpi, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, output_enabled, subsize, + top_bsize, dst_buf, dst_stride, + pc_tree->split[0]); + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) + predict_sb_complex(cpi, tile, mi_row, mi_col + hbs, + mi_row_ori, mi_col_ori, output_enabled, subsize, + top_bsize, dst_buf1, dst_stride1, + pc_tree->split[1]); + if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) + predict_sb_complex(cpi, tile, mi_row + hbs, mi_col, + mi_row_ori, mi_col_ori, output_enabled, subsize, + top_bsize, dst_buf2, dst_stride2, + pc_tree->split[2]); + if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) + predict_sb_complex(cpi, tile, mi_row + hbs, mi_col + hbs, + mi_row_ori, mi_col_ori, output_enabled, subsize, + top_bsize, dst_buf3, dst_stride3, + pc_tree->split[3]); + for (i = 0; i < MAX_MB_PLANE; i++) { + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) { + vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + dst_buf1[i], dst_stride1[i], + i, + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + if (mi_row + hbs < cm->mi_rows) { + vp9_build_masked_inter_predictor_complex(dst_buf2[i], + dst_stride2[i], + dst_buf3[i], + dst_stride3[i], + i, mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + i, mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) { + vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + dst_buf2[i], dst_stride2[i], + i, + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } + } + break; + default: + assert(0); + } + + if (bsize < top_bsize && (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)) + update_partition_context(xd, mi_row, mi_col, subsize, bsize); +} + +static void rd_supertx_sb(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int *tmp_rate, int64_t *tmp_dist, + int *skippable, int64_t *sse, +#if CONFIG_EXT_TX + int *best_tx, +#endif + PC_TREE *pc_tree) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + int plane, pnskip, this_rate, base_rate = *tmp_rate; + int64_t pnsse, this_dist; + uint8_t *dst_buf[3]; + int dst_stride[3]; + + update_state_sb_supertx(cpi, tile, mi_row, mi_col, bsize, 0, pc_tree); + vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), + mi_row, mi_col); + for (plane = 0; plane < MAX_MB_PLANE; plane++) { + dst_buf[plane] = xd->plane[plane].dst.buf; + dst_stride[plane] = xd->plane[plane].dst.stride; + } + predict_sb_complex(cpi, tile, mi_row, mi_col, mi_row, mi_col, + 0, bsize, bsize, dst_buf, dst_stride, pc_tree); + + set_offsets(cpi, tile, mi_row, mi_col, bsize); +#if CONFIG_EXT_TX + *best_tx = 0; +#endif + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + TX_SIZE tx_size = plane ? (b_width_log2(bsize) - 1) : b_width_log2(bsize); + vp9_subtract_plane(x, bsize, plane); +#if CONFIG_EXT_TX + if (bsize <= BLOCK_16X16 && plane == 0) { + int txfm, this_rate_tx, pnskip_tx, + best_tx_nostx = xd->mi[0]->mbmi.ext_txfrm; + int64_t this_dist_tx, pnsse_tx, rd, bestrd_tx = INT64_MAX; + + for (txfm = 0; txfm < EXT_TX_TYPES; txfm++) { + xd->mi[0]->mbmi.ext_txfrm = txfm; + txfm_rd_in_plane_supertx(x, &this_rate_tx, &this_dist_tx, + &pnskip_tx, &pnsse_tx, + INT64_MAX, plane, bsize, tx_size, 0); + this_rate_tx += vp9_cost_bit(cm->fc.ext_tx_prob, txfm); + rd = RDCOST(x->rdmult, x->rddiv, this_rate_tx, this_dist_tx); + if (rd < bestrd_tx * 0.97 || bestrd_tx == INT64_MAX) { + *best_tx = txfm; + bestrd_tx = rd; + this_rate = this_rate_tx; + this_dist = this_dist_tx; + pnskip = pnskip_tx; + pnsse = pnsse_tx; + } + } + + xd->mi[0]->mbmi.ext_txfrm = best_tx_nostx; + } else { +#endif + txfm_rd_in_plane_supertx(x, &this_rate, &this_dist, &pnskip, &pnsse, + INT64_MAX, plane, bsize, tx_size, 0); +#if CONFIG_EXT_TX + } +#endif + *tmp_rate += this_rate; + *tmp_dist += this_dist; + *sse += pnsse; + *skippable &= pnskip; + } + x->skip = 0; + if (*skippable) { + *tmp_rate = base_rate; + *tmp_rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); +#if CONFIG_EXT_TX + if (bsize <= BLOCK_16X16) + *tmp_rate -= vp9_cost_bit(cm->fc.ext_tx_prob, *best_tx); + *best_tx = 0; +#endif + } else { + if (RDCOST(x->rdmult, x->rddiv, *tmp_rate - base_rate, *tmp_dist) + < RDCOST(x->rdmult, x->rddiv, 0, *sse)) { + *tmp_rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); + } else { + *tmp_rate = base_rate; + *tmp_dist = *sse; + *tmp_rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); +#if CONFIG_EXT_TX + if (bsize <= BLOCK_16X16) + *tmp_rate -= vp9_cost_bit(cm->fc.ext_tx_prob, *best_tx); + *best_tx = 0; +#endif + x->skip = 1; + } + } +} +#endif diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 243b06b55..e3e3af897 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -578,6 +578,26 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { } } +#if CONFIG_SUPERTX +void vp9_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize) { + MACROBLOCKD *const xd = &x->e_mbd; + struct optimize_ctx ctx; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + struct encode_b_args arg = {x, &ctx, &mbmi->skip}; + int plane; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + BLOCK_SIZE plane_size = bsize - 3 * (plane > 0); + const struct macroblockd_plane* const pd = &xd->plane[plane]; + const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size; + vp9_subtract_plane(x, bsize, plane); + vp9_get_entropy_contexts(bsize, tx_size, pd, + ctx.ta[plane], ctx.tl[plane]); + encode_block(plane, 0, plane_size, b_width_log2(plane_size), &arg); + } +} +#endif + static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct encode_b_args* const args = arg; diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 3196c9920..d4db160cf 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -21,6 +21,9 @@ extern "C" { #endif void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize); +#if CONFIG_SUPERTX +void vp9_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize); +#endif void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize); void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 03b7a2758..4beddffa9 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -611,8 +611,10 @@ static INLINE int cost_coeffs(MACROBLOCK *x, int pt = combine_entropy_contexts(*A, *L); int c, cost; // Check for consistency of tx_size with mode info +#if !CONFIG_SUPERTX assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size : get_uv_tx_size(mbmi) == tx_size); +#endif if (eob == 0) { // single eob token @@ -777,7 +779,11 @@ void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, } } +#if !CONFIG_SUPERTX static void txfm_rd_in_plane(MACROBLOCK *x, +#else +void txfm_rd_in_plane(MACROBLOCK *x, +#endif int *rate, int64_t *distortion, int *skippable, int64_t *sse, int64_t ref_best_rd, int plane, @@ -813,6 +819,41 @@ static void txfm_rd_in_plane(MACROBLOCK *x, } } +#if CONFIG_SUPERTX +void txfm_rd_in_plane_supertx(MACROBLOCK *x, + int *rate, int64_t *distortion, + int *skippable, int64_t *sse, + int64_t ref_best_rd, int plane, + BLOCK_SIZE bsize, TX_SIZE tx_size, + int use_fast_coef_casting) { + MACROBLOCKD *const xd = &x->e_mbd; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + struct rdcost_block_args args; + vp9_zero(args); + args.x = x; + args.best_rd = ref_best_rd; + args.use_fast_coef_costing = use_fast_coef_casting; + + vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); + + args.so = get_scan(xd, tx_size, pd->plane_type, 0); + + block_rd_txfm(plane, 0, get_plane_block_size(bsize, pd), tx_size, &args); + + if (args.skip) { + *rate = INT_MAX; + *distortion = INT64_MAX; + *sse = INT64_MAX; + *skippable = 0; + } else { + *distortion = args.this_dist; + *rate = args.this_rate; + *sse = args.this_sse; + *skippable = !x->plane[plane].eobs[0]; + } +} +#endif + static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skip, int64_t *sse, @@ -3687,6 +3728,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, int mi_row, int mi_col, int *returnrate, +#if CONFIG_SUPERTX + int *returnrate_nocoef, +#endif int64_t *returndistortion, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, @@ -3768,6 +3812,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, x->pred_sse[i] = INT_MAX; *returnrate = INT_MAX; +#if CONFIG_SUPERTX + *returnrate_nocoef = INT_MAX; +#endif for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; @@ -4042,6 +4089,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, TX_SIZE uv_tx; #if CONFIG_FILTERINTRA mbmi->filterbit = 0; +#endif +#if CONFIG_EXT_TX + mbmi->ext_txfrm = 0; #endif intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize, tx_cache, best_rd); @@ -4174,6 +4224,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Back out the coefficient coding costs rate2 -= (rate_y + rate_uv); // for best yrd calculation +#if CONFIG_SUPERTX + rate_y = 0; +#endif rate_uv = 0; // Cost the skip mb case @@ -4253,6 +4306,15 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } *returnrate = rate2; +#if CONFIG_SUPERTX + *returnrate_nocoef = rate2 - rate_y - rate_uv; + if (!disable_skip) { + *returnrate_nocoef -= vp9_cost_bit(vp9_get_skip_prob(cm, xd), + skippable || this_skip2); + } + *returnrate_nocoef -= vp9_cost_bit(vp9_get_intra_inter_prob(cm, xd), + mbmi->ref_frame[0] != INTRA_FRAME); +#endif *returndistortion = distortion2; best_rd = this_rd; best_mbmode = *mbmi; @@ -4536,6 +4598,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, int mi_row, int mi_col, int *returnrate, +#if CONFIG_SUPERTX + int *returnrate_nocoef, +#endif int64_t *returndistortion, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, @@ -4611,6 +4676,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, rate_uv_intra = INT_MAX; *returnrate = INT_MAX; +#if CONFIG_SUPERTX + *returnrate_nocoef = INT_MAX; +#endif for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { @@ -4750,6 +4818,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (ref_frame == INTRA_FRAME) { int rate; +#if CONFIG_EXT_TX + mbmi->ext_txfrm = 0; +#endif if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, &distortion_y, best_rd) >= best_rd) continue; @@ -5004,6 +5075,15 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } *returnrate = rate2; +#if CONFIG_SUPERTX + *returnrate_nocoef = rate2 - rate_y - rate_uv; + if (!disable_skip) + *returnrate_nocoef -= vp9_cost_bit(vp9_get_skip_prob(cm, xd), + this_skip2); + *returnrate_nocoef -= vp9_cost_bit(vp9_get_intra_inter_prob(cm, xd), + mbmi->ref_frame[0] != INTRA_FRAME); + assert(*returnrate_nocoef > 0); +#endif *returndistortion = distortion2; best_rd = this_rd; best_yrd = best_rd - @@ -5109,6 +5189,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (best_rd == INT64_MAX) { *returnrate = INT_MAX; +#if CONFIG_SUPERTX + *returnrate_nocoef = INT_MAX; +#endif *returndistortion = INT64_MAX; return best_rd; } diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index 4f60f1c71..668a7f31e 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -171,6 +171,9 @@ int64_t vp9_rd_pick_inter_mode_sb(struct VP9_COMP *cpi, struct macroblock *x, const struct TileInfo *const tile, int mi_row, int mi_col, int *returnrate, +#if CONFIG_SUPERTX + int *returnrate_nocoef, +#endif int64_t *returndistortion, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, @@ -191,6 +194,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(struct VP9_COMP *cpi, const struct TileInfo *const tile, int mi_row, int mi_col, int *returnrate, +#if CONFIG_SUPERTX + int *returnrate_nocoef, +#endif int64_t *returndistortion, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, @@ -222,6 +228,21 @@ void vp9_setup_pred_block(const MACROBLOCKD *xd, int mi_row, int mi_col, const struct scale_factors *scale, const struct scale_factors *scale_uv); + +#if CONFIG_SUPERTX +void txfm_rd_in_plane_supertx(MACROBLOCK *x, + int *rate, int64_t *distortion, + int *skippable, int64_t *sse, + int64_t ref_best_rd, int plane, + BLOCK_SIZE bsize, TX_SIZE tx_size, + int use_fast_coef_casting); +void txfm_rd_in_plane(MACROBLOCK *x, + int *rate, int64_t *distortion, + int *skippable, int64_t *sse, + int64_t ref_best_rd, int plane, + BLOCK_SIZE bsize, TX_SIZE tx_size, + int use_fast_coef_casting); +#endif #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index dcca92d21..e28ac3a09 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -334,3 +334,41 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, *t = t_backup; } } + +#if CONFIG_SUPERTX +void vp9_tokenize_sb_supertx(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, + BLOCK_SIZE bsize) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + TOKENEXTRA *t_backup = *t; + const int ctx = vp9_get_skip_context(xd); + const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id, + SEG_LVL_SKIP); + struct tokenize_b_args arg = {cpi, xd, t}; + int plane; + if (mbmi->skip) { + if (!dry_run) + cm->counts.skip[ctx][1] += skip_inc; + reset_skip_context(xd, bsize); + if (dry_run) + *t = t_backup; + return; + } + + if (!dry_run) { + cm->counts.skip[ctx][0] += skip_inc; + for (plane = 0; plane < MAX_MB_PLANE; plane++) { + BLOCK_SIZE plane_size = plane ? (bsize - 3) : bsize; + tokenize_b(plane, 0, plane_size, b_width_log2(plane_size), &arg); + } + } else { + for (plane = 0; plane < MAX_MB_PLANE; plane++) { + BLOCK_SIZE plane_size = plane ? (bsize - 3) : bsize; + set_entropy_context_b(plane, 0, plane_size, b_width_log2(plane_size), + &arg); + } + *t = t_backup; + } +} +#endif diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 063c0bafe..5f4f7e5be 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -46,6 +46,10 @@ struct VP9_COMP; void vp9_tokenize_sb(struct VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize); +#if CONFIG_SUPERTX +void vp9_tokenize_sb_supertx(struct VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, + BLOCK_SIZE bsize); +#endif extern const int16_t *vp9_dct_value_cost_ptr; /* TODO: The Token field should be broken out into a separate char array to