diff --git a/configure b/configure index fe370a5c0..b9ce9aab3 100755 --- a/configure +++ b/configure @@ -278,6 +278,7 @@ EXPERIMENT_LIST=" masked_interintra filterintra ext_tx + supertx " CONFIG_LIST=" external_build diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 3636ac5c8..baa46f37b 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -334,6 +334,13 @@ typedef struct macroblockd { PARTITION_CONTEXT left_seg_context[8]; } MACROBLOCKD; +#if CONFIG_SUPERTX +static INLINE int supertx_enabled(const MB_MODE_INFO *mbmi) { + return mbmi->tx_size > + MIN(b_width_log2(mbmi->sb_type), b_height_log2(mbmi->sb_type)); +} +#endif + static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize, PARTITION_TYPE partition) { const BLOCK_SIZE subsize = subsize_lookup[partition][bsize]; @@ -399,7 +406,15 @@ static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize) { } static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) { +#if CONFIG_SUPERTX + if (!supertx_enabled(mbmi)) { +#endif return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type); +#if CONFIG_SUPERTX + } else { + return uvsupertx_size_lookup[mbmi->tx_size]; + } +#endif } static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize, diff --git a/vp9/common/vp9_common_data.c b/vp9/common/vp9_common_data.c index a927823e0..8159dea1d 100644 --- a/vp9/common/vp9_common_data.c +++ b/vp9/common/vp9_common_data.c @@ -133,6 +133,15 @@ const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = { {{BLOCK_64X64, BLOCK_64X32}, {BLOCK_32X64, BLOCK_32X32}}, }; +#if CONFIG_SUPERTX +const TX_SIZE uvsupertx_size_lookup[TX_SIZES] = { + TX_4X4, + TX_4X4, + TX_8X8, + TX_16X16 +}; +#endif + // Generates 4 bit field in which each bit set to 1 represents // a blocksize partition 1111 means we split 64x64, 32x32, 16x16 // and 8x8. 1000 means we just split the 64x64 to 32x32 diff --git a/vp9/common/vp9_common_data.h b/vp9/common/vp9_common_data.h index f41962747..6a163f6fb 100644 --- a/vp9/common/vp9_common_data.h +++ b/vp9/common/vp9_common_data.h @@ -31,6 +31,9 @@ extern const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES]; extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES]; extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES]; extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2]; +#if CONFIG_SUPERTX +extern const TX_SIZE uvsupertx_size_lookup[TX_SIZES]; +#endif #ifdef __cplusplus } // extern "C" diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index b52be94ed..532a96e48 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -31,7 +31,7 @@ static const vp9_prob default_masked_interintra_prob[BLOCK_SIZES] = { #endif #if CONFIG_FILTERINTRA -const vp9_prob default_filterintra_prob[TX_SIZES][INTRA_MODES] = { +static const vp9_prob default_filterintra_prob[TX_SIZES][INTRA_MODES] = { // DC V H D45 D135 D117 D153 D207 D63 TM {153, 171, 147, 150, 129, 101, 100, 153, 132, 111}, {171, 173, 185, 131, 70, 53, 70, 148, 127, 114}, @@ -41,7 +41,17 @@ const vp9_prob default_filterintra_prob[TX_SIZES][INTRA_MODES] = { #endif #if CONFIG_EXT_TX -const vp9_prob default_ext_tx_prob = 178; // 0.6 = 153, 0.7 = 178, 0.8 = 204 +static const vp9_prob default_ext_tx_prob = 178; +#endif + +#if CONFIG_SUPERTX +static const vp9_prob default_supertx_prob[TX_SIZES] = { + 255, 160, 160, 160 +}; + +static const vp9_prob default_supertxsplit_prob[TX_SIZES] = { + 255, 200, 200, 200 +}; #endif const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] = { @@ -372,6 +382,10 @@ void vp9_init_mode_probs(FRAME_CONTEXT *fc) { #if CONFIG_EXT_TX fc->ext_tx_prob = default_ext_tx_prob; #endif +#if CONFIG_SUPERTX + vp9_copy(fc->supertx_prob, default_supertx_prob); + vp9_copy(fc->supertxsplit_prob, default_supertxsplit_prob); +#endif } const vp9_tree_index vp9_switchable_interp_tree @@ -504,6 +518,23 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { #if CONFIG_EXT_TX fc->ext_tx_prob = adapt_prob(pre_fc->ext_tx_prob, counts->ext_tx); #endif + +#if CONFIG_SUPERTX + for (i = 1; i < TX_SIZES; ++i) { + fc->supertx_prob[i] = adapt_prob(pre_fc->supertx_prob[i], + counts->supertx[i]); +/* fprintf(stderr, "%d(%d %d) ", fc->supertx_prob[i], + counts->supertx[i][0], counts->supertx[i][1]);*/ + } + + for (i = 1; i < TX_SIZES; ++i) { + fc->supertxsplit_prob[i] = adapt_prob(pre_fc->supertxsplit_prob[i], + counts->supertxsplit[i]); +/* fprintf(stderr, "%d(%d %d) ", fc->supertxsplit_prob[i], + counts->supertxsplit[i][0], counts->supertxsplit[i][1]);*/ + } +/* fprintf(stderr, "\n");*/ +#endif } static void set_default_lf_deltas(struct loopfilter *lf) { diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h index b8b0d4b84..7d5209e5a 100644 --- a/vp9/common/vp9_entropymode.h +++ b/vp9/common/vp9_entropymode.h @@ -67,6 +67,10 @@ typedef struct frame_contexts { #if CONFIG_EXT_TX vp9_prob ext_tx_prob; #endif +#if CONFIG_SUPERTX + vp9_prob supertx_prob[TX_SIZES]; + vp9_prob supertxsplit_prob[TX_SIZES]; +#endif } FRAME_CONTEXT; typedef struct { @@ -101,6 +105,11 @@ typedef struct { #if CONFIG_EXT_TX unsigned int ext_tx[2]; #endif +#if CONFIG_SUPERTX + unsigned int supertx[TX_SIZES][2]; + unsigned int supertxsplit[TX_SIZES][2]; + unsigned int supertx_size[BLOCK_SIZES]; +#endif } FRAME_COUNTS; extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index efd0249f4..e88b759a3 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -206,6 +206,13 @@ static const int mode_lf_lut[MB_MODE_COUNT] = { 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0) }; +#if CONFIG_SUPERTX +static int supertx_enabled_lpf(const MB_MODE_INFO *mbmi) { + return mbmi->tx_size > + MIN(b_width_log2(mbmi->sb_type), b_height_log2(mbmi->sb_type)); +} +#endif + static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { int lvl; @@ -572,6 +579,85 @@ static void build_masks(const loop_filter_info_n *const lfi_n, *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; } +#if CONFIG_SUPERTX +static void build_masks_supertx(const loop_filter_info_n *const lfi_n, + const MODE_INFO *mi, const int shift_y, + const int shift_uv, + LOOP_FILTER_MASK *lfm) { + const MB_MODE_INFO *mbmi = &mi->mbmi; + const TX_SIZE tx_size_y = mbmi->tx_size; + const TX_SIZE tx_size_uv = get_uv_tx_size(mbmi); + const BLOCK_SIZE block_size = 3 * (int)tx_size_y; + const int filter_level = get_filter_level(lfi_n, mbmi); + uint64_t *const left_y = &lfm->left_y[tx_size_y]; + uint64_t *const above_y = &lfm->above_y[tx_size_y]; + uint64_t *const int_4x4_y = &lfm->int_4x4_y; + uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; + uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; + uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; + int i; + + // If filter level is 0 we don't loop filter. + if (!filter_level) { + return; + } else { + const int w = num_8x8_blocks_wide_lookup[block_size]; + const int h = num_8x8_blocks_high_lookup[block_size]; + int index = shift_y; + for (i = 0; i < h; i++) { + vpx_memset(&lfm->lfl_y[index], filter_level, w); + index += 8; + } + } + + // These set 1 in the current block size for the block size edges. + // For instance if the block size is 32x16, we'll set : + // above = 1111 + // 0000 + // and + // left = 1000 + // = 1000 + // NOTE : In this example the low bit is left most ( 1000 ) is stored as + // 1, not 8... + // + // U and v set things on a 16 bit scale. + // + *above_y |= above_prediction_mask[block_size] << shift_y; + *above_uv |= above_prediction_mask_uv[block_size] << shift_uv; + *left_y |= left_prediction_mask[block_size] << shift_y; + *left_uv |= left_prediction_mask_uv[block_size] << shift_uv; + + // If the block has no coefficients and is not intra we skip applying + // the loop filter on block edges. + if (mbmi->skip && is_inter_block(mbmi)) + return; + + // Here we are adding a mask for the transform size. The transform + // size mask is set to be correct for a 64x64 prediction block size. We + // mask to match the size of the block we are working on and then shift it + // into place.. + *above_y |= (size_mask[block_size] & + above_64x64_txform_mask[tx_size_y]) << shift_y; + *above_uv |= (size_mask_uv[block_size] & + above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; + + *left_y |= (size_mask[block_size] & + left_64x64_txform_mask[tx_size_y]) << shift_y; + *left_uv |= (size_mask_uv[block_size] & + left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; + + // Here we are trying to determine what to do with the internal 4x4 block + // boundaries. These differ from the 4x4 boundaries on the outside edge of + // an 8x8 in that the internal ones can be skipped and don't depend on + // the prediction block size. + if (tx_size_y == TX_4X4) + *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y; + + if (tx_size_uv == TX_4X4) + *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; +} +#endif + // This function does the same thing as the one above with the exception that // it only affects the y masks. It exists because for blocks < 16x16 in size, // we only update u and v masks on the first block. @@ -615,6 +701,48 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y; } +#if CONFIG_SUPERTX +static void build_y_mask_supertx(const loop_filter_info_n *const lfi_n, + const MODE_INFO *mi, const int shift_y, + LOOP_FILTER_MASK *lfm) { + const MB_MODE_INFO *mbmi = &mi->mbmi; + const TX_SIZE tx_size_y = mbmi->tx_size; + const BLOCK_SIZE block_size = 3 * (int)tx_size_y; + const int filter_level = get_filter_level(lfi_n, mbmi); + uint64_t *const left_y = &lfm->left_y[tx_size_y]; + uint64_t *const above_y = &lfm->above_y[tx_size_y]; + uint64_t *const int_4x4_y = &lfm->int_4x4_y; + int i; + + if (!filter_level) { + return; + } else { + const int w = num_8x8_blocks_wide_lookup[block_size]; + const int h = num_8x8_blocks_high_lookup[block_size]; + int index = shift_y; + for (i = 0; i < h; i++) { + vpx_memset(&lfm->lfl_y[index], filter_level, w); + index += 8; + } + } + + *above_y |= above_prediction_mask[block_size] << shift_y; + *left_y |= left_prediction_mask[block_size] << shift_y; + + if (mbmi->skip && is_inter_block(mbmi)) + return; + + *above_y |= (size_mask[block_size] & + above_64x64_txform_mask[tx_size_y]) << shift_y; + + *left_y |= (size_mask[block_size] & + left_64x64_txform_mask[tx_size_y]) << shift_y; + + if (tx_size_y == TX_4X4) + *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y; +} +#endif + // This function sets up the bit masks for the entire 64x64 region represented // by mi_row, mi_col. // TODO(JBB): This function only works for yv12. @@ -650,6 +778,9 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, cm->mi_rows - mi_row : MI_BLOCK_SIZE); const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? cm->mi_cols - mi_col : MI_BLOCK_SIZE); +#if CONFIG_SUPERTX + int supertx; +#endif vp9_zero(*lfm); @@ -687,20 +818,43 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); break; case BLOCK_32X16: +#if CONFIG_SUPERTX + supertx = supertx_enabled_lpf(&mip[0]->mbmi); + if (!supertx) { +#endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_32_row_offset + 2 >= max_rows) continue; mip2 = mip + mode_info_stride * 2; build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm); +#if CONFIG_SUPERTX + } else { + build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm); + } +#endif break; case BLOCK_16X32: +#if CONFIG_SUPERTX + supertx = supertx_enabled_lpf(&mip[0]->mbmi); + if (!supertx) { +#endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_32_col_offset + 2 >= max_cols) continue; mip2 = mip + 2; build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm); +#if CONFIG_SUPERTX + } else { + build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm); + } +#endif break; default: +#if CONFIG_SUPERTX + if (mip[0]->mbmi.tx_size == TX_32X32) { + build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm); + } else { +#endif for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16]; const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16]; @@ -717,24 +871,56 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); break; case BLOCK_16X8: +#if CONFIG_SUPERTX + supertx = supertx_enabled_lpf(&mip[0]->mbmi); + if (!supertx) { +#endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_16_row_offset + 1 >= max_rows) continue; mip2 = mip + mode_info_stride; build_y_mask(lfi_n, mip2[0], shift_y+8, lfm); +#if CONFIG_SUPERTX + } else { + build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm); + } +#endif break; case BLOCK_8X16: +#if CONFIG_SUPERTX + supertx = supertx_enabled_lpf(&mip[0]->mbmi); + if (!supertx) { +#endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_16_col_offset +1 >= max_cols) continue; mip2 = mip + 1; build_y_mask(lfi_n, mip2[0], shift_y+1, lfm); +#if CONFIG_SUPERTX + } else { + build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm); + } +#endif break; default: { +#if CONFIG_SUPERTX + if (mip[0]->mbmi.tx_size == TX_16X16) { + build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm); + } else { +#endif const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16] + shift_8_y[0]; +#if CONFIG_SUPERTX + supertx = supertx_enabled_lpf(&mip[0]->mbmi); + if (!supertx) { +#endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); +#if CONFIG_SUPERTX + } else { + build_masks_supertx(lfi_n, mip[0], shift_y, shift_uv, lfm); + } +#endif mip += offset[0]; for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { const int shift_y = shift_32_y[idx_32] + @@ -748,12 +934,26 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, if (mi_8_col_offset >= max_cols || mi_8_row_offset >= max_rows) continue; +#if CONFIG_SUPERTX + supertx = supertx_enabled_lpf(&mip[0]->mbmi); + if (!supertx) +#endif build_y_mask(lfi_n, mip[0], shift_y, lfm); +#if CONFIG_SUPERTX + else + build_y_mask_supertx(lfi_n, mip[0], shift_y, lfm); +#endif } +#if CONFIG_SUPERTX + } +#endif break; } } } +#if CONFIG_SUPERTX + } +#endif break; } } diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index e2b8768c3..ee973d2b6 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -164,7 +164,7 @@ static int get_masked_weight(int m) { } static int get_hard_mask(int m) { - return m > 0; + return 1 << MASK_WEIGHT_BITS * (m > 0); } // Equation of line: f(x, y) = a[0]*(x - a[2]*w/4) + a[1]*(y - a[3]*h/4) = 0 @@ -426,18 +426,62 @@ static void build_masked_compound(uint8_t *dst, int dst_stride, for (i = 0; i < h; ++i) for (j = 0; j < w; ++j) { int m = mask[i * 64 + j]; - dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + - dst2[i * dst2_stride + j] * - ((1 << MASK_WEIGHT_BITS) - m) + - (1 << (MASK_WEIGHT_BITS - 1))) >> + dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + + dst2[i * dst2_stride + j] * + ((1 << MASK_WEIGHT_BITS) - m) + + (1 << (MASK_WEIGHT_BITS - 1))) >> MASK_WEIGHT_BITS; } } + +#if CONFIG_SUPERTX +void generate_masked_weight_extend(int mask_index, int plane, + BLOCK_SIZE sb_type, int h, int w, + int mask_offset_x, int mask_offset_y, + uint8_t *mask, int stride) { + int i, j; + int subh = (plane ? 2 : 4) << b_height_log2(sb_type); + int subw = (plane ? 2 : 4) << b_width_log2(sb_type); + const int *a = get_mask_params(mask_index, sb_type, subh, subw); + if (!a) return; + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int x = (j - (a[2] * subw) / 4 - mask_offset_x); + int y = (i - (a[3] * subh) / 4 - mask_offset_y); + int m = a[0] * x + a[1] * y; + mask[i * stride + j] = get_masked_weight(m); + } +} + +static void build_masked_compound_extend(uint8_t *dst, int dst_stride, + uint8_t *dst2, int dst2_stride, + int plane, + int mask_index, BLOCK_SIZE sb_type, + int mask_offset_x, int mask_offset_y, + int h, int w) { + int i, j; + uint8_t mask[4096]; + generate_masked_weight_extend(mask_index, plane, sb_type, h, w, + mask_offset_x, mask_offset_y, mask, 64); + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int m = mask[i * 64 + j]; + dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + + dst2[i * dst2_stride + j] * + ((1 << MASK_WEIGHT_BITS) - m) + + (1 << (MASK_WEIGHT_BITS - 1))) >> + MASK_WEIGHT_BITS; + } +} +#endif #endif static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, int bw, int bh, int x, int y, int w, int h, +#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER + int mask_offset_x, int mask_offset_y, +#endif int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; const MODE_INFO *mi = xd->mi[0]; @@ -495,8 +539,14 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, uint8_t tmp_dst[4096]; inter_predictor(pre, pre_buf->stride, tmp_dst, 64, subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys); +#if !CONFIG_SUPERTX build_masked_compound(dst, dst_buf->stride, tmp_dst, 64, mi->mbmi.mask_index, mi->mbmi.sb_type, h, w); +#else + build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, 64, plane, + mi->mbmi.mask_index, mi->mbmi.sb_type, + mask_offset_x, mask_offset_y, h, w); +#endif } else { #endif inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, @@ -527,10 +577,18 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) build_inter_predictors(xd, plane, i++, bw, bh, - 4 * x, 4 * y, 4, 4, mi_x, mi_y); + 4 * x, 4 * y, 4, 4, +#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER + 0, 0, +#endif + mi_x, mi_y); } else { build_inter_predictors(xd, plane, 0, bw, bh, - 0, 0, bw, bh, mi_x, mi_y); + 0, 0, bw, bh, +#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER + 0, 0, +#endif + mi_x, mi_y); } } } @@ -558,6 +616,7 @@ void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, xd->plane[2].dst.stride, bsize); #endif } + void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize) { build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, @@ -573,11 +632,287 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, #endif } +#if CONFIG_SUPERTX +static int get_masked_weight_supertx(int m) { + #define SMOOTHER_LEN 32 + static const uint8_t smoothfn[2 * SMOOTHER_LEN + 1] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 1, + 1, 1, 2, 2, 3, 4, 5, 6, + 8, 9, 12, 14, 17, 21, 24, 28, + 32, + 36, 40, 43, 47, 50, 52, 55, 56, + 58, 59, 60, 61, 62, 62, 63, 63, + 63, 63, 63, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + }; + if (m < -SMOOTHER_LEN) + return 0; + else if (m > SMOOTHER_LEN) + return 64; + else + return smoothfn[m + SMOOTHER_LEN]; +} + +static const uint8_t mask_8[8] = { + 64, 64, 62, 52, 12, 2, 0, 0 +}; + +static const uint8_t mask_16[16] = { + 63, 62, 60, 58, 55, 50, 43, 36, 28, 21, 14, 9, 6, 4, 2, 1 +}; + +static const uint8_t mask_32[32] = { + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 61, 57, 52, 45, 36, + 28, 19, 12, 7, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static void generate_1dmask(int length, uint8_t *mask) { + int i; + switch (length) { + case 8: + vpx_memcpy(mask, mask_8, length); + break; + case 16: + vpx_memcpy(mask, mask_16, length); + break; + case 32: + vpx_memcpy(mask, mask_32, length); + break; + default: + assert(0); + } + if (length > 16) { + for (i = 0; i < length; ++i) + mask[i] = get_masked_weight_supertx(-1 * (2 * i - length + 1)); + } +} + +void vp9_build_masked_inter_predictor_complex(uint8_t *dst, int dst_stride, + uint8_t *dst2, int dst2_stride, + int plane, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize, + BLOCK_SIZE top_bsize, + PARTITION_TYPE partition) { + int i, j; + uint8_t mask[32]; + int top_w = 4 << b_width_log2(top_bsize), + top_h = 4 << b_height_log2(top_bsize); + int w = 4 << b_width_log2(bsize), h = 4 << b_height_log2(bsize); + int w_offset = (mi_col - mi_col_ori) << 3, + h_offset = (mi_row - mi_row_ori) << 3; + int m; + + if (plane > 0) { + top_w = top_w >> 1; top_h = top_h >> 1; + w = w >> 1; h = h >> 1; + w_offset = w_offset >> 1; h_offset = h_offset >> 1; + } + switch (partition) { + case PARTITION_HORZ: + generate_1dmask(h, mask + h_offset); + vpx_memset(mask, 64, h_offset); + vpx_memset(mask + h_offset + h, 0, top_h - h_offset - h); + break; + case PARTITION_VERT: + generate_1dmask(w, mask + w_offset); + vpx_memset(mask, 64, w_offset); + vpx_memset(mask + w_offset + w, 0, top_w - w_offset - w); + break; + default: + assert(0); + } + for (i = 0; i < top_h; ++i) + for (j = 0; j < top_w; ++j) { + m = partition == PARTITION_HORZ ? mask[i] : mask[j]; + if (m == 64) + continue; + if (m == 0) + dst[i * dst_stride + j] = dst2[i * dst2_stride + j]; + else + dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + + dst2[i * dst2_stride + j] * + (64 - m) + 32) >> 6; + } +} + +#if CONFIG_MASKED_INTERINTER +void vp9_build_inter_predictors_sb_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize) { + int plane; + const int mi_x = mi_col_ori * MI_SIZE; + const int mi_y = mi_row_ori * MI_SIZE; + const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) { + int i = 0, x, y; + assert(bsize == BLOCK_8X8); + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) + build_inter_predictors(xd, plane, i++, bw, bh, 4 * x, 4 * y, 4, 4, + mask_offset_x, mask_offset_y, mi_x, mi_y); + } else { + build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh, + mask_offset_x, mask_offset_y, mi_x, mi_y); + } + } +} +#endif + +void vp9_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize, + PARTITION_TYPE partition) { + const int mi_x = mi_col_ori * MI_SIZE; + const int mi_y = mi_row_ori * MI_SIZE; +#if CONFIG_MASKED_INTERINTER + const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE; +#endif + uint8_t *orig_dst; + int orig_dst_stride; + int bw = 4 << b_width_log2(top_bsize); + int bh = 4 << b_height_log2(top_bsize); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, 32 * 32); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, 32 * 32); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, 32 * 32); + + orig_dst = xd->plane[0].dst.buf; + orig_dst_stride = xd->plane[0].dst.stride; + build_inter_predictors(xd, 0, 0, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + + xd->plane[0].dst.buf = tmp_buf; + xd->plane[0].dst.stride = 32; + switch (partition) { + case PARTITION_HORZ: + build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + break; + case PARTITION_VERT: + build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + break; + case PARTITION_SPLIT: + build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + xd->plane[0].dst.buf = tmp_buf1; + xd->plane[0].dst.stride = 32; + build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + xd->plane[0].dst.buf = tmp_buf2; + xd->plane[0].dst.stride = 32; + build_inter_predictors(xd, 0, 3, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + break; + default: + assert(0); + } + + if (partition != PARTITION_SPLIT) { + vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, + tmp_buf, 32, + 0, mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + partition); + xd->plane[0].dst.buf = orig_dst; + xd->plane[0].dst.stride = orig_dst_stride; + } else { + vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, + tmp_buf, 32, + 0, mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(tmp_buf1, 32, + tmp_buf2, 32, + 0, mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, + tmp_buf1, 32, + 0, mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_HORZ); + xd->plane[0].dst.buf = orig_dst; + xd->plane[0].dst.stride = orig_dst_stride; + } +} + +void vp9_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, +#if CONFIG_MASKED_INTERINTER + int mi_row, int mi_col, +#endif + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize) { + int plane; + const int mi_x = mi_col_ori * MI_SIZE; + const int mi_y = mi_row_ori * MI_SIZE; +#if CONFIG_MASKED_INTERINTER + const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE; +#endif + for (plane = 1; plane < MAX_MB_PLANE; ++plane) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(top_bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + } +} +#endif + // TODO(jingning): This function serves as a placeholder for decoder prediction // using on demand border extension. It should be moved to /decoder/ directory. static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, int bw, int bh, int x, int y, int w, int h, +#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER + int mask_offset_x, int mask_offset_y, +#endif int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; const MODE_INFO *mi = xd->mi[0]; @@ -715,8 +1050,14 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, uint8_t tmp_dst[4096]; inter_predictor(buf_ptr, buf_stride, tmp_dst, 64, subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys); +#if !CONFIG_SUPERTX build_masked_compound(dst, dst_buf->stride, tmp_dst, 64, mi->mbmi.mask_index, mi->mbmi.sb_type, h, w); +#else + build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, 64, plane, + mi->mbmi.mask_index, mi->mbmi.sb_type, + mask_offset_x, mask_offset_y, h, w); +#endif } else { #endif inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, @@ -746,10 +1087,18 @@ void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) dec_build_inter_predictors(xd, plane, i++, bw, bh, - 4 * x, 4 * y, 4, 4, mi_x, mi_y); + 4 * x, 4 * y, 4, 4, +#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER + 0, 0, +#endif + mi_x, mi_y); } else { dec_build_inter_predictors(xd, plane, 0, bw, bh, - 0, 0, bw, bh, mi_x, mi_y); + 0, 0, bw, bh, +#if CONFIG_SUPERTX && CONFIG_MASKED_INTERINTER + 0, 0, +#endif + mi_x, mi_y); } } #if CONFIG_INTERINTRA @@ -763,6 +1112,174 @@ void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, #endif } +#if CONFIG_SUPERTX +#if CONFIG_MASKED_INTERINTER +void vp9_dec_build_inter_predictors_sb_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize) { + int plane; + const int mi_x = mi_col_ori * MI_SIZE; + const int mi_y = mi_row_ori * MI_SIZE; + const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) { + int i = 0, x, y; + assert(bsize == BLOCK_8X8); + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) + dec_build_inter_predictors(xd, plane, i++, bw, bh, 4 * x, 4 * y, 4, 4, + mask_offset_x, mask_offset_y, mi_x, mi_y); + } else { + dec_build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh, + mask_offset_x, mask_offset_y, mi_x, mi_y); + } + } +} +#endif + +void vp9_dec_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize, + PARTITION_TYPE partition) { + const int mi_x = mi_col_ori * MI_SIZE; + const int mi_y = mi_row_ori * MI_SIZE; +#if CONFIG_MASKED_INTERINTER + const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE; +#endif + uint8_t *orig_dst; + int orig_dst_stride; + int bw = 4 << b_width_log2(top_bsize); + int bh = 4 << b_height_log2(top_bsize); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, 32 * 32); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, 32 * 32); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, 32 * 32); + + orig_dst = xd->plane[0].dst.buf; + orig_dst_stride = xd->plane[0].dst.stride; + dec_build_inter_predictors(xd, 0, 0, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + + xd->plane[0].dst.buf = tmp_buf; + xd->plane[0].dst.stride = 32; + switch (partition) { + case PARTITION_HORZ: + dec_build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + break; + case PARTITION_VERT: + dec_build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + break; + case PARTITION_SPLIT: + dec_build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + xd->plane[0].dst.buf = tmp_buf1; + xd->plane[0].dst.stride = 32; + dec_build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + xd->plane[0].dst.buf = tmp_buf2; + xd->plane[0].dst.stride = 32; + dec_build_inter_predictors(xd, 0, 3, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + break; + default: + assert(0); + } + + if (partition != PARTITION_SPLIT) { + vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, + tmp_buf, 32, + 0, mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + partition); + xd->plane[0].dst.buf = orig_dst; + xd->plane[0].dst.stride = orig_dst_stride; + } else { + vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, + tmp_buf, 32, + 0, mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(tmp_buf1, 32, + tmp_buf2, 32, + 0, mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, + tmp_buf1, 32, + 0, mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_HORZ); + xd->plane[0].dst.buf = orig_dst; + xd->plane[0].dst.stride = orig_dst_stride; + } +} + +void vp9_dec_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, +#if CONFIG_MASKED_INTERINTER + int mi_row, int mi_col, +#endif + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize) { + int plane; + const int mi_x = mi_col_ori * MI_SIZE; + const int mi_y = mi_row_ori * MI_SIZE; +#if CONFIG_MASKED_INTERINTER + const int mask_offset_x = (mi_col - mi_col_ori) * MI_SIZE; + const int mask_offset_y = (mi_row - mi_row_ori) * MI_SIZE; +#endif + for (plane = 1; plane < MAX_MB_PLANE; ++plane) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(top_bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + dec_build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh, +#if CONFIG_MASKED_INTERINTER + mask_offset_x, mask_offset_y, +#endif + mi_x, mi_y); + } +} +#endif + void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col) { diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 34733c7ae..00edacaa9 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -72,6 +72,53 @@ void vp9_generate_hard_mask(int mask_index, BLOCK_SIZE sb_type, int h, int w, uint8_t *mask, int stride); #endif +#if CONFIG_SUPERTX +void vp9_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize, + PARTITION_TYPE partition); +void vp9_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, +#if CONFIG_MASKED_INTERINTER + int mi_row, int mi_col, +#endif + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize); +void vp9_build_masked_inter_predictor_complex(uint8_t *dst, int dst_stride, + uint8_t *dst2, int dst2_stride, + int plane, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize, + BLOCK_SIZE top_bsize, + PARTITION_TYPE partition); +void vp9_dec_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize, + PARTITION_TYPE p); +void vp9_dec_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, +#if CONFIG_MASKED_INTERINTER + int mi_row, int mi_col, +#endif + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize); +#if CONFIG_MASKED_INTERINTER +void vp9_build_inter_predictors_sb_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize); +void vp9_dec_build_inter_predictors_sb_extend(MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize); +#endif +#endif + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index fb14b6822..c9c9ce81f 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -335,6 +335,84 @@ static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, return &xd->mi[0]->mbmi; } +#if CONFIG_SUPERTX +static void set_offsets_extend(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + BLOCK_SIZE top_bsize, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori) { + const int bw = num_8x8_blocks_wide_lookup[top_bsize]; + const int bh = num_8x8_blocks_high_lookup[top_bsize]; + const int offset = mi_row * cm->mi_stride + mi_col; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + + set_mi_row_col(xd, tile, mi_row_ori, bh, mi_col_ori, bw, + cm->mi_rows, cm->mi_cols); +} + +static void set_mb_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int x_mis = MIN(bw, cm->mi_cols - mi_col); + const int y_mis = MIN(bh, cm->mi_rows - mi_row); + const int offset = mi_row * cm->mi_stride + mi_col; + int x, y; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + xd->mi[0]->mbmi.sb_type = bsize; + for (y = 0; y < y_mis; ++y) + for (x = !y; x < x_mis; ++x) + xd->mi[y * cm->mi_stride + x] = xd->mi[0]; + + set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); +} + +static void set_offsets_topblock(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int offset = mi_row * cm->mi_stride + mi_col; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + + set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); + + vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); +} + +static void set_param_topblock(VP9_COMMON *const cm, MACROBLOCKD *const xd, + BLOCK_SIZE bsize, int mi_row, int mi_col, +#if CONFIG_EXT_TX + int txfm, +#endif + int skip) { + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int x_mis = MIN(bw, cm->mi_cols - mi_col); + const int y_mis = MIN(bh, cm->mi_rows - mi_row); + const int offset = mi_row * cm->mi_stride + mi_col; + int x, y; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + + for (y = 0; y < y_mis; ++y) + for (x = 0; x < x_mis; ++x) { + xd->mi[y * cm->mi_stride + x]->mbmi.skip = skip; +#if CONFIG_EXT_TX + xd->mi[y * cm->mi_stride + x]->mbmi.ext_txfrm = txfm; +#endif + } +} +#endif + static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd, int idx, int mi_row, int mi_col) { MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; @@ -348,14 +426,246 @@ static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd, xd->corrupted |= ref_buffer->buf->corrupted; } +#if CONFIG_SUPERTX +static void dec_predict_b_extend(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE top_bsize) { + set_offsets_extend(cm, xd, tile, top_bsize, mi_row, mi_col, + mi_row_ori, mi_col_ori); + + set_ref(cm, xd, 0, mi_row_ori, mi_col_ori); + if (has_second_ref(&xd->mi[0]->mbmi)) + set_ref(cm, xd, 1, mi_row_ori, mi_col_ori); + xd->mi[0]->mbmi.tx_size = b_width_log2(top_bsize); +#if !CONFIG_MASKED_INTERINTER + vp9_dec_build_inter_predictors_sb(xd, mi_row_ori, mi_col_ori, top_bsize); +#else + vp9_dec_build_inter_predictors_sb_extend(xd, mi_row, mi_col, + mi_row_ori, mi_col_ori, top_bsize); +#endif +} + +static void dec_predict_b_sub8x8_extend(VP9_COMMON *const cm, + MACROBLOCKD *const xd, + const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE top_bsize, + PARTITION_TYPE partition) { + set_offsets_extend(cm, xd, tile, top_bsize, mi_row, mi_col, + mi_row_ori, mi_col_ori); + + set_ref(cm, xd, 0, mi_row_ori, mi_col_ori); + if (has_second_ref(&xd->mi[0]->mbmi)) + set_ref(cm, xd, 1, mi_row_ori, mi_col_ori); + xd->mi[0]->mbmi.tx_size = b_width_log2(top_bsize); + vp9_dec_build_inter_predictors_sby_sub8x8_extend(xd, mi_row, mi_col, + mi_row_ori, mi_col_ori, + top_bsize, partition); + vp9_dec_build_inter_predictors_sbuv_sub8x8_extend(xd, +#if CONFIG_MASKED_INTERINTER + mi_row, mi_col, +#endif + mi_row_ori, mi_col_ori, + top_bsize); +} + +static void dec_predict_sb_complex(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + uint8_t *dst_buf[3], int dst_stride[3]) { + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + MB_MODE_INFO *mbmi; + int i, offset = mi_row * cm->mi_stride + mi_col; + + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, MAX_MB_PLANE * 32 * 32); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, MAX_MB_PLANE * 32 * 32); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf3, MAX_MB_PLANE * 32 * 32); + uint8_t *dst_buf1[3] = {tmp_buf1, tmp_buf1 + 32 * 32, tmp_buf1 + 2 * 32 * 32}; + uint8_t *dst_buf2[3] = {tmp_buf2, tmp_buf2 + 32 * 32, tmp_buf2 + 2 * 32 * 32}; + uint8_t *dst_buf3[3] = {tmp_buf3, tmp_buf3 + 32 * 32, tmp_buf3 + 2 * 32 * 32}; + int dst_stride1[3] = {32, 32, 32}; + int dst_stride2[3] = {32, 32, 32}; + int dst_stride3[3] = {32, 32, 32}; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + mbmi = &xd->mi[0]->mbmi; + partition = partition_lookup[bsl][mbmi->sb_type]; + subsize = get_subsize(bsize, partition); + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + } + + switch (partition) { + case PARTITION_NONE: + assert(bsize < top_bsize); + dec_predict_b_extend(cm, xd, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, + top_bsize); + break; + case PARTITION_HORZ: + if (bsize > BLOCK_8X8) { + dec_predict_b_extend(cm, xd, tile, mi_row, mi_col, mi_row_ori, + mi_col_ori, top_bsize); + } else { + dec_predict_b_sub8x8_extend(cm, xd, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, + top_bsize, partition); + } + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = tmp_buf1 + i * 32 * 32; + xd->plane[i].dst.stride = 32; + } + dec_predict_b_extend(cm, xd, tile, mi_row + hbs, mi_col, + mi_row_ori, mi_col_ori, top_bsize); + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + dst_buf1[i], dst_stride1[i], + i, + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } + break; + case PARTITION_VERT: + if (bsize > BLOCK_8X8) { + dec_predict_b_extend(cm, xd, tile, mi_row, mi_col, mi_row_ori, + mi_col_ori, top_bsize); + } else { + dec_predict_b_sub8x8_extend(cm, xd, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, + top_bsize, partition); + } + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = tmp_buf1 + i * 32 * 32; + xd->plane[i].dst.stride = 32; + } + dec_predict_b_extend(cm, xd, tile, mi_row, mi_col + hbs, mi_row_ori, + mi_col_ori, top_bsize); + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + dst_buf1[i], dst_stride1[i], + i, + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + } + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + dec_predict_b_sub8x8_extend(cm, xd, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, + top_bsize, partition); + } else { + dec_predict_sb_complex(cm, xd, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, subsize, top_bsize, + dst_buf, dst_stride); + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) + dec_predict_sb_complex(cm, xd, tile, mi_row, mi_col + hbs, + mi_row_ori, mi_col_ori, subsize, top_bsize, + dst_buf1, dst_stride1); + if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) + dec_predict_sb_complex(cm, xd, tile, mi_row + hbs, mi_col, + mi_row_ori, mi_col_ori, subsize, top_bsize, + dst_buf2, dst_stride2); + if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) + dec_predict_sb_complex(cm, xd, tile, mi_row + hbs, mi_col + hbs, + mi_row_ori, mi_col_ori, subsize, top_bsize, + dst_buf3, dst_stride3); + for (i = 0; i < MAX_MB_PLANE; i++) { + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) { + vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + dst_buf1[i], + dst_stride1[i], + i, mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + if (mi_row + hbs < cm->mi_rows) { + vp9_build_masked_inter_predictor_complex(dst_buf2[i], + dst_stride2[i], + dst_buf3[i], + dst_stride3[i], + i, mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + i, mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) { + vp9_build_masked_inter_predictor_complex(dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + i, mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } + } + break; + default: + assert(0); + } +} +#endif + static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, const TileInfo *const tile, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif int mi_row, int mi_col, vp9_reader *r, BLOCK_SIZE bsize) { const int less8x8 = bsize < BLOCK_8X8; +#if !CONFIG_SUPERTX MB_MODE_INFO *mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col); - vp9_read_mode_info(cm, xd, tile, mi_row, mi_col, r); +#else + MB_MODE_INFO *mbmi; + if (!supertx_enabled) { + mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col); + } else { + set_mb_offsets(cm, xd, tile, bsize, mi_row, mi_col); + } +#endif + vp9_read_mode_info(cm, xd, tile, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col, r); +#if CONFIG_SUPERTX + if (!supertx_enabled) { +#endif if (less8x8) bsize = BLOCK_8X8; @@ -389,6 +699,9 @@ static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, mbmi->skip = 1; // skip loopfilter } } +#if CONFIG_SUPERTX + } +#endif xd->corrupted |= vp9_reader_has_error(r); } @@ -419,45 +732,161 @@ static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs, static void decode_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, const TileInfo *const tile, +#if CONFIG_SUPERTX + int read_token, int supertx_enabled, +#endif int mi_row, int mi_col, vp9_reader* r, BLOCK_SIZE bsize) { const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2; PARTITION_TYPE partition; BLOCK_SIZE subsize; +#if CONFIG_SUPERTX + int skip = 0; +#if CONFIG_EXT_TX + int txfm = 0; +#endif +#endif if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r); subsize = get_subsize(bsize, partition); +#if CONFIG_SUPERTX + if (cm->frame_type != KEY_FRAME && + partition != PARTITION_NONE && + bsize <= BLOCK_32X32 && + !supertx_enabled) { + TX_SIZE supertx_size = b_width_log2(bsize); + if (partition == PARTITION_SPLIT) { + supertx_enabled = vp9_read(r, cm->fc.supertxsplit_prob[supertx_size]); + cm->counts.supertxsplit[supertx_size][supertx_enabled]++; + } else { + supertx_enabled = vp9_read(r, cm->fc.supertx_prob[supertx_size]); + cm->counts.supertx[supertx_size][supertx_enabled]++; + } + } + if (supertx_enabled && read_token) { + int offset = mi_row * cm->mi_stride + mi_col; + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[bsize], + mi_col, num_8x8_blocks_wide_lookup[bsize], + cm->mi_rows, cm->mi_cols); + set_skip_context(xd, mi_row, mi_col); + // Here we assume mbmi->segment_id = 0 + skip = read_skip(cm, xd, 0, r); + if (skip) + reset_skip_context(xd, bsize); +#if CONFIG_EXT_TX + if (bsize <= BLOCK_16X16 && !skip) { + txfm = vp9_read(r, cm->fc.ext_tx_prob); + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.ext_tx[txfm]; + } +#endif + } +#endif if (subsize < BLOCK_8X8) { - decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_block(cm, xd, tile, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col, r, subsize); } else { switch (partition) { case PARTITION_NONE: - decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_block(cm, xd, tile, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col, r, subsize); break; case PARTITION_HORZ: - decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_block(cm, xd, tile, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col, r, subsize); if (mi_row + hbs < cm->mi_rows) - decode_block(cm, xd, tile, mi_row + hbs, mi_col, r, subsize); + decode_block(cm, xd, tile, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row + hbs, mi_col, r, subsize); break; case PARTITION_VERT: - decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_block(cm, xd, tile, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col, r, subsize); if (mi_col + hbs < cm->mi_cols) - decode_block(cm, xd, tile, mi_row, mi_col + hbs, r, subsize); + decode_block(cm, xd, tile, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col + hbs, r, subsize); break; case PARTITION_SPLIT: - decode_partition(cm, xd, tile, mi_row, mi_col, r, subsize); - decode_partition(cm, xd, tile, mi_row, mi_col + hbs, r, subsize); - decode_partition(cm, xd, tile, mi_row + hbs, mi_col, r, subsize); - decode_partition(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize); + decode_partition(cm, xd, tile, +#if CONFIG_SUPERTX + !supertx_enabled, supertx_enabled, +#endif + mi_row, mi_col, r, subsize); + decode_partition(cm, xd, tile, +#if CONFIG_SUPERTX + !supertx_enabled, supertx_enabled, +#endif + mi_row, mi_col + hbs, r, subsize); + decode_partition(cm, xd, tile, +#if CONFIG_SUPERTX + !supertx_enabled, supertx_enabled, +#endif + mi_row + hbs, mi_col, r, subsize); + decode_partition(cm, xd, tile, +#if CONFIG_SUPERTX + !supertx_enabled, supertx_enabled, +#endif + mi_row + hbs, mi_col + hbs, r, subsize); break; default: assert(0 && "Invalid partition type"); } } +#if CONFIG_SUPERTX + if (supertx_enabled && read_token) { + uint8_t *dst_buf[3]; + int dst_stride[3], i; + + vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); + for (i = 0; i < MAX_MB_PLANE; i++) { + dst_buf[i] = xd->plane[i].dst.buf; + dst_stride[i] = xd->plane[i].dst.stride; + } + dec_predict_sb_complex(cm, xd, tile, mi_row, mi_col, mi_row, mi_col, + bsize, bsize, dst_buf, dst_stride); + + if (!skip) { + int eobtotal = 0; + struct inter_args arg = { cm, xd, r, &eobtotal }; + set_offsets_topblock(cm, xd, tile, bsize, mi_row, mi_col); +#if CONFIG_EXT_TX + xd->mi[0]->mbmi.ext_txfrm = txfm; +#endif + vp9_foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg); + if (!(subsize < BLOCK_8X8) && eobtotal == 0) + skip = 1; + } + set_param_topblock(cm, xd, bsize, mi_row, mi_col, +#if CONFIG_EXT_TX + txfm, +#endif + skip); + } +#endif + // update partition context if (bsize >= BLOCK_8X8 && (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) @@ -855,7 +1284,11 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, vp9_zero(tile_data->xd.left_seg_context); for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; mi_col += MI_BLOCK_SIZE) { - decode_partition(tile_data->cm, &tile_data->xd, &tile, mi_row, mi_col, + decode_partition(tile_data->cm, &tile_data->xd, &tile, +#if CONFIG_SUPERTX + 1, 0, +#endif + mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64); } } @@ -909,6 +1342,9 @@ static int tile_worker_hook(void *arg1, void *arg2) { for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { decode_partition(tile_data->cm, &tile_data->xd, tile, +#if CONFIG_SUPERTX + 1, 0, +#endif mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64); } } diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 15309d2a0..549d08450 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -144,7 +144,11 @@ static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, return segment_id; } +#if !CONFIG_SUPERTX static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, +#else +int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, +#endif int segment_id, vp9_reader *r) { if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { return 1; @@ -550,6 +554,9 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm, MACROBLOCKD *const xd, const TileInfo *const tile, MODE_INFO *const mi, +#if CONFIG_SUPERTX && CONFIG_EXT_TX + int supertx_enabled, +#endif int mi_row, int mi_col, vp9_reader *r) { MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; @@ -564,6 +571,9 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm, #if CONFIG_EXT_TX if (mbmi->tx_size <= TX_16X16 && bsize >= BLOCK_8X8 && +#if CONFIG_SUPERTX + !supertx_enabled && +#endif !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) && !mbmi->skip) { mbmi->ext_txfrm = vp9_read(r, cm->fc.ext_tx_prob); @@ -700,6 +710,9 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm, static void read_inter_frame_mode_info(VP9_COMMON *const cm, MACROBLOCKD *const xd, const TileInfo *const tile, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif int mi_row, int mi_col, vp9_reader *r) { MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; @@ -707,23 +720,46 @@ static void read_inter_frame_mode_info(VP9_COMMON *const cm, mbmi->mv[0].as_int = 0; mbmi->mv[1].as_int = 0; +#if CONFIG_SUPERTX + if (!supertx_enabled) { +#endif mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r); mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r); inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r); mbmi->tx_size = read_tx_size(cm, xd, cm->tx_mode, mbmi->sb_type, !mbmi->skip || !inter_block, r); +#if CONFIG_SUPERTX + } else { + const int ctx = vp9_get_intra_inter_context(xd); + mbmi->segment_id = 0; + inter_block = 1; + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.intra_inter[ctx][1]; + } +#endif if (inter_block) - read_inter_block_mode_info(cm, xd, tile, mi, mi_row, mi_col, r); + read_inter_block_mode_info(cm, xd, tile, mi, +#if CONFIG_SUPERTX && CONFIG_EXT_TX + supertx_enabled, +#endif + mi_row, mi_col, r); else read_intra_block_mode_info(cm, mi, r); } void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd, const TileInfo *const tile, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif int mi_row, int mi_col, vp9_reader *r) { if (frame_is_intra_only(cm)) read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r); else - read_inter_frame_mode_info(cm, xd, tile, mi_row, mi_col, r); + read_inter_frame_mode_info(cm, xd, tile, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col, r); } diff --git a/vp9/decoder/vp9_decodemv.h b/vp9/decoder/vp9_decodemv.h index 7394b62b4..baebb5dce 100644 --- a/vp9/decoder/vp9_decodemv.h +++ b/vp9/decoder/vp9_decodemv.h @@ -21,8 +21,16 @@ struct TileInfo; void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd, const struct TileInfo *const tile, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif int mi_row, int mi_col, vp9_reader *r); +#if CONFIG_SUPERTX +int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, + int segment_id, vp9_reader *r); +#endif + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index cbc12a584..8f3620136 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -39,6 +39,18 @@ static struct vp9_token switchable_interp_encodings[SWITCHABLE_FILTERS]; static struct vp9_token partition_encodings[PARTITION_TYPES]; static struct vp9_token inter_mode_encodings[INTER_MODES]; +#if CONFIG_SUPERTX +static int vp9_check_supertx(VP9_COMMON *cm, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + MODE_INFO **mi; + + mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); + + return mi[0]->mbmi.tx_size == b_width_log2(bsize) && + mi[0]->mbmi.sb_type < bsize; +} +#endif + void vp9_entropy_mode_init() { vp9_tokens_from_tree(intra_mode_encodings, vp9_intra_mode_tree); vp9_tokens_from_tree(switchable_interp_encodings, vp9_switchable_interp_tree); @@ -225,6 +237,9 @@ static void write_ref_frames(const VP9_COMP *cpi, vp9_writer *w) { } static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif vp9_writer *w) { VP9_COMMON *const cm = &cpi->common; const nmv_context *nmvc = &cm->fc.nmvc; @@ -252,12 +267,28 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, } } +#if CONFIG_SUPERTX + if (!supertx_enabled) +#endif skip = write_skip(cpi, segment_id, mi, w); +#if CONFIG_SUPERTX + else + skip = mbmi->skip; +#endif +#if CONFIG_SUPERTX + if (!supertx_enabled) { +#endif if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) vp9_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd)); +#if CONFIG_SUPERTX + } +#endif if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT && +#if CONFIG_SUPERTX + !supertx_enabled && +#endif !(is_inter && (skip || vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)))) { write_selected_tx_size(cpi, mbmi->tx_size, bsize, w); @@ -305,6 +336,9 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, #if CONFIG_EXT_TX if (mbmi->tx_size <= TX_16X16 && bsize >= BLOCK_8X8 && +#if CONFIG_SUPERTX + !supertx_enabled && +#endif !mbmi->skip && !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { vp9_write(w, mbmi->ext_txfrm, cm->fc.ext_tx_prob); @@ -451,6 +485,9 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO **mi_8x8, static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile, vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; @@ -466,11 +503,21 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile, if (frame_is_intra_only(cm)) { write_mb_modes_kf(cpi, xd->mi, w); } else { +#if CONFIG_SUPERTX + pack_inter_mode_mvs(cpi, m, supertx_enabled, w); +#else pack_inter_mode_mvs(cpi, m, w); +#endif } +#if CONFIG_SUPERTX + if (!supertx_enabled) { +#endif assert(*tok < tok_end); pack_mb_tokens(w, tok, tok_end); +#if CONFIG_SUPERTX + } +#endif } static void write_partition(VP9_COMMON *cm, MACROBLOCKD *xd, @@ -497,6 +544,9 @@ static void write_partition(VP9_COMMON *cm, MACROBLOCKD *xd, static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile, vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end, +#if CONFIG_SUPERTX + int pack_token, int supertx_enabled, +#endif int mi_row, int mi_col, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; @@ -513,36 +563,105 @@ static void write_modes_sb(VP9_COMP *cpi, partition = partition_lookup[bsl][m->mbmi.sb_type]; write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w); subsize = get_subsize(bsize, partition); +#if CONFIG_SUPERTX + xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); + set_mi_row_col(xd, tile, + mi_row, num_8x8_blocks_high_lookup[bsize], + mi_col, num_8x8_blocks_wide_lookup[bsize], + cm->mi_rows, cm->mi_cols); + if (!supertx_enabled && cm->frame_type != KEY_FRAME && + partition != PARTITION_NONE && bsize <= BLOCK_32X32) { + TX_SIZE supertx_size = b_width_log2(bsize); + vp9_prob prob = partition == PARTITION_SPLIT ? + cm->fc.supertxsplit_prob[supertx_size] : + cm->fc.supertx_prob[supertx_size]; + supertx_enabled = (xd->mi[0]->mbmi.tx_size == supertx_size); + vp9_write(w, supertx_enabled, prob); + if (supertx_enabled) { + vp9_write(w, xd->mi[0]->mbmi.skip, vp9_get_skip_prob(cm, xd)); +#if CONFIG_EXT_TX + if (supertx_size <= TX_16X16 && !xd->mi[0]->mbmi.skip) + vp9_write(w, xd->mi[0]->mbmi.ext_txfrm, cm->fc.ext_tx_prob); +#endif + } + } +#endif if (subsize < BLOCK_8X8) { - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col); } else { switch (partition) { case PARTITION_NONE: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col); break; case PARTITION_HORZ: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col); if (mi_row + bs < cm->mi_rows) - write_modes_b(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row + bs, mi_col); break; case PARTITION_VERT: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col); if (mi_col + bs < cm->mi_cols) - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + bs); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + mi_row, mi_col + bs); break; case PARTITION_SPLIT: - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col + bs, + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + !supertx_enabled, supertx_enabled, +#endif + mi_row, mi_col, subsize); + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + !supertx_enabled, supertx_enabled, +#endif + mi_row, mi_col + bs, subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col, + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + !supertx_enabled, supertx_enabled, +#endif + mi_row + bs, mi_col, subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col + bs, + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + !supertx_enabled, supertx_enabled, +#endif + mi_row + bs, mi_col + bs, subsize); break; default: assert(0); } } +#if CONFIG_SUPERTX + if (partition != PARTITION_NONE && supertx_enabled && pack_token) { + assert(*tok < tok_end); + pack_mb_tokens(w, tok, tok_end); + } +#endif // update partition context if (bsize >= BLOCK_8X8 && @@ -560,7 +679,11 @@ static void write_modes(VP9_COMP *cpi, vp9_zero(cpi->mb.e_mbd.left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + 1, 0, +#endif + mi_row, mi_col, BLOCK_64X64); } } diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index da07f9edb..cd6862f4c 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -41,6 +41,9 @@ #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" #include "vp9/encoder/vp9_tokenize.h" +#if CONFIG_SUPERTX +#include "vp9/encoder/vp9_cost.h" +#endif #define GF_ZEROMV_ZBIN_BOOST 0 #define LF_ZEROMV_ZBIN_BOOST 0 @@ -52,6 +55,43 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); +#if CONFIG_SUPERTX +static int check_intra_b(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + PICK_MODE_CONTEXT *ctx); + +static int check_intra_sb(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + PC_TREE *pc_tree); +static void predict_superblock(VP9_COMP *cpi, int output_enabled, +#if CONFIG_MASKED_INTERINTER + int mi_row, int mi_col, +#endif + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); +static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size, + PC_TREE *pc_tree); +static void predict_sb_complex(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + int output_enabled, BLOCK_SIZE bsize, + BLOCK_SIZE top_bsize, + uint8_t *dst_buf[3], int dst_stride[3], + PC_TREE *pc_tree); +static void update_state_sb_supertx(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + BLOCK_SIZE bsize, + int output_enabled, PC_TREE *pc_tree); +static void rd_supertx_sb(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int *tmp_rate, int64_t *tmp_dist, + int *skippable, int64_t *sse, +#if CONFIG_EXT_TX + int *best_tx, +#endif + PC_TREE *pc_tree); +#endif + // Motion vector component magnitude threshold for defining fast motion. #define FAST_MOTION_MV_THRESH 24 @@ -193,6 +233,74 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, } } +#if CONFIG_SUPERTX +static void set_offsets_supertx(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize) { + MACROBLOCK *const x = &cpi->mb; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + + // Set up distance of MB to edge of frame in 1/8th pel units. + assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); + set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, + cm->mi_rows, cm->mi_cols); +} + +static void set_offsets_extend(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize) { + MACROBLOCK *const x = &cpi->mb; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *mbmi; + const int mi_width = num_8x8_blocks_wide_lookup[top_bsize]; + const int mi_height = num_8x8_blocks_high_lookup[top_bsize]; + const struct segmentation *const seg = &cm->seg; + + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + + mbmi = &xd->mi[0]->mbmi; + + // Set up limit values for MV components. + // Mv beyond the range do not produce new/different prediction block. + x->mv_row_min = -(((mi_row_ori + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND); + x->mv_col_min = -(((mi_col_ori + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND); + x->mv_row_max = (cm->mi_rows - mi_row_ori) * MI_SIZE + VP9_INTERP_EXTEND; + x->mv_col_max = (cm->mi_cols - mi_col_ori) * MI_SIZE + VP9_INTERP_EXTEND; + + // Set up distance of MB to edge of frame in 1/8th pel units. + assert(!(mi_col_ori & (mi_width - 1)) && !(mi_row_ori & (mi_height - 1))); + set_mi_row_col(xd, tile, mi_row_ori, mi_height, mi_col_ori, mi_width, + cm->mi_rows, cm->mi_cols); + xd->up_available = (mi_row != 0); + xd->left_available = (mi_col > tile->mi_col_start); + + // R/D setup. + x->rddiv = cpi->rd.RDDIV; + x->rdmult = cpi->rd.RDMULT; + + // Setup segment ID. + if (seg->enabled) { + if (cpi->oxcf.aq_mode != VARIANCE_AQ) { + const uint8_t *const map = seg->update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); + } + vp9_init_plane_quantizers(cpi, x); + + x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id]; + } else { + mbmi->segment_id = 0; + x->encode_breakout = cpi->encode_breakout; + } +} +#endif + static void duplicate_mode_info_in_sb(VP9_COMMON * const cm, MACROBLOCKD *const xd, int mi_row, @@ -531,7 +639,9 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, const int mi_height = num_8x8_blocks_high_lookup[bsize]; int max_plane; +#if !CONFIG_SUPERTX assert(mi->mbmi.sb_type == bsize); +#endif *mi_addr = *mi; @@ -669,6 +779,288 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, } } +#if CONFIG_SUPERTX +static void update_state_supertx(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int output_enabled) { + int i, y, x_idx; + VP9_COMMON *const cm = &cpi->common; + RD_OPT *const rd_opt = &cpi->rd; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *mi = &ctx->mic; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *mi_addr = xd->mi[0]; + const struct segmentation *const seg = &cm->seg; + const int mis = cm->mi_stride; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + + *mi_addr = *mi; + assert(is_inter_block(mbmi)); + + // If segmentation in use + if (seg->enabled && output_enabled) { + // For in frame complexity AQ copy the segment id from the segment map. + if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { + const uint8_t *const map = seg->update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mi_addr->mbmi.segment_id = + vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); + } + // Else for cyclic refresh mode update the segment map, set the segment id + // and then update the quantizer. + else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, + mi_row, mi_col, bsize, 1); + vp9_init_plane_quantizers(cpi, x); + } + } + + // Restore the coding context of the MB to that that was in place + // when the mode was picked for it + for (y = 0; y < mi_height; y++) + for (x_idx = 0; x_idx < mi_width; x_idx++) + if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx + && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) { + xd->mi[x_idx + y * mis] = mi_addr; + } + + if (cpi->oxcf.aq_mode) + vp9_init_plane_quantizers(cpi, x); + + if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) { + mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; + mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; + } + + x->skip = ctx->skip; + vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk, + sizeof(uint8_t) * ctx->num_4x4_blk); + + if (!output_enabled) + return; + + if (!frame_is_intra_only(cm)) { + if (is_inter_block(mbmi)) { + vp9_update_mv_count(cm, xd); + + if (cm->interp_filter == SWITCHABLE) { + const int ctx = vp9_get_pred_context_switchable_interp(xd); + ++cm->counts.switchable_interp[ctx][mbmi->interp_filter]; + } +#if CONFIG_MASKED_INTERINTER + if (cm->use_masked_interinter && + cm->reference_mode != SINGLE_REFERENCE && + get_mask_bits(bsize) && + mbmi->ref_frame[1] > INTRA_FRAME) + ++cm->counts.masked_interinter[bsize][mbmi->use_masked_interinter]; +#endif + +#if CONFIG_INTERINTRA + if (cm->use_interintra && + is_interintra_allowed(bsize) && + is_inter_mode(mbmi->mode) && + (mbmi->ref_frame[1] <= INTRA_FRAME)) { + if (mbmi->ref_frame[1] == INTRA_FRAME) { + assert(0); + ++cm->counts.y_mode[size_group_lookup[bsize]][mbmi->interintra_mode]; + ++cm->counts.interintra[bsize][1]; +#if CONFIG_MASKED_INTERINTRA + if (cm->use_masked_interintra && get_mask_bits_interintra(bsize)) + ++cm->counts.masked_interintra[bsize][mbmi->use_masked_interintra]; +#endif + } else { + ++cm->counts.interintra[bsize][0]; + } + } +#endif + } + + rd_opt->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; + rd_opt->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; + rd_opt->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; + + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) + rd_opt->filter_diff[i] += ctx->best_filter_diff[i]; + } +} + +static void update_state_sb_supertx(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + BLOCK_SIZE bsize, + int output_enabled, PC_TREE *pc_tree) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + struct macroblock_plane *const p = x->plane; + struct macroblockd_plane *const pd = xd->plane; + int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + PARTITION_TYPE partition = pc_tree->partitioning; + BLOCK_SIZE subsize = get_subsize(bsize, partition); + int i; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + switch (partition) { + case PARTITION_NONE: + set_offsets_supertx(cpi, tile, mi_row, mi_col, subsize); + update_state_supertx(cpi, &pc_tree->none, mi_row, mi_col, + subsize, output_enabled); + break; + case PARTITION_VERT: + set_offsets_supertx(cpi, tile, mi_row, mi_col, subsize); + update_state_supertx(cpi, &pc_tree->vertical[0], mi_row, mi_col, + subsize, output_enabled); + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { + set_offsets_supertx(cpi, tile, mi_row, mi_col + hbs, subsize); + update_state_supertx(cpi, &pc_tree->vertical[1], mi_row, mi_col + hbs, + subsize, output_enabled); + } + break; + case PARTITION_HORZ: + set_offsets_supertx(cpi, tile, mi_row, mi_col, subsize); + update_state_supertx(cpi, &pc_tree->horizontal[0], mi_row, mi_col, + subsize, output_enabled); + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { + set_offsets_supertx(cpi, tile, mi_row + hbs, mi_col, subsize); + update_state_supertx(cpi, &pc_tree->horizontal[1], mi_row + hbs, mi_col, + subsize, output_enabled); + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + set_offsets_supertx(cpi, tile, mi_row, mi_col, subsize); + update_state_supertx(cpi, pc_tree->leaf_split[0], mi_row, mi_col, + subsize, output_enabled); + } else { + set_offsets_supertx(cpi, tile, mi_row, mi_col, subsize); + update_state_sb_supertx(cpi, tile, mi_row, mi_col, subsize, + output_enabled, pc_tree->split[0]); + set_offsets_supertx(cpi, tile, mi_row, mi_col + hbs, subsize); + update_state_sb_supertx(cpi, tile, mi_row, mi_col + hbs, subsize, + output_enabled, pc_tree->split[1]); + set_offsets_supertx(cpi, tile, mi_row + hbs, mi_col, subsize); + update_state_sb_supertx(cpi, tile, mi_row + hbs, mi_col, subsize, + output_enabled, pc_tree->split[2]); + set_offsets_supertx(cpi, tile, mi_row + hbs, mi_col + hbs, subsize); + update_state_sb_supertx(cpi, tile, mi_row + hbs, mi_col + hbs, subsize, + output_enabled, pc_tree->split[3]); + } + break; + default: + assert(0); + } + + for (i = 0; i < MAX_MB_PLANE; ++i) { + p[i].coeff = (&pc_tree->none)->coeff_pbuf[i][1]; + p[i].qcoeff = (&pc_tree->none)->qcoeff_pbuf[i][1]; + pd[i].dqcoeff = (&pc_tree->none)->dqcoeff_pbuf[i][1]; + p[i].eobs = (&pc_tree->none)->eobs_pbuf[i][1]; + } +} + +static void update_supertx_param(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, +#if CONFIG_EXT_TX + int best_tx, +#endif + TX_SIZE supertx_size) { + MACROBLOCK *const x = &cpi->mb; + + ctx->mic.mbmi.tx_size = supertx_size; + vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[supertx_size], + sizeof(uint8_t) * ctx->num_4x4_blk); + ctx->skip = x->skip; +#if CONFIG_EXT_TX + ctx->mic.mbmi.ext_txfrm = best_tx; +#endif +} + +static void update_supertx_param_sb(VP9_COMP *cpi, int mi_row, int mi_col, + BLOCK_SIZE bsize, +#if CONFIG_EXT_TX + int best_tx, +#endif + TX_SIZE supertx_size, PC_TREE *pc_tree) { + VP9_COMMON *const cm = &cpi->common; + int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + PARTITION_TYPE partition = pc_tree->partitioning; + BLOCK_SIZE subsize = get_subsize(bsize, partition); + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + switch (partition) { + case PARTITION_NONE: + update_supertx_param(cpi, &pc_tree->none, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + break; + case PARTITION_VERT: + update_supertx_param(cpi, &pc_tree->vertical[0], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) + update_supertx_param(cpi, &pc_tree->vertical[1], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + break; + case PARTITION_HORZ: + update_supertx_param(cpi, &pc_tree->horizontal[0], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) + update_supertx_param(cpi, &pc_tree->horizontal[1], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + update_supertx_param(cpi, pc_tree->leaf_split[0], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + } else { + update_supertx_param_sb(cpi, mi_row, mi_col, subsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree->split[0]); + update_supertx_param_sb(cpi, mi_row, mi_col + hbs, subsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree->split[1]); + update_supertx_param_sb(cpi, mi_row + hbs, mi_col, subsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree->split[2]); + update_supertx_param_sb(cpi, mi_row + hbs, mi_col + hbs, subsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree->split[3]); + } + break; + default: + assert(0); + } +} +#endif + void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col) { uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, @@ -719,7 +1111,11 @@ static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, int *rate, static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, - int *totalrate, int64_t *totaldist, + int *totalrate, +#if CONFIG_SUPERTX + int *totalrate_nocoef, +#endif + int64_t *totaldist, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd, int block) { VP9_COMMON *const cm = &cpi->common; @@ -746,6 +1142,9 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, // there is nothing to be done. if (block != 0) { *totalrate = 0; +#if CONFIG_SUPERTX + *totalrate_nocoef = 0; +#endif *totaldist = 0; return; } @@ -809,17 +1208,35 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, if (frame_is_intra_only(cm)) { vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist, bsize, ctx, best_rd); +#if CONFIG_SUPERTX + *totalrate_nocoef = 0; +#endif } else { if (bsize >= BLOCK_8X8) { if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) +#if CONFIG_SUPERTX + { +#endif vp9_rd_pick_inter_mode_sb_seg_skip(cpi, x, tile, mi_row, mi_col, - totalrate, totaldist, bsize, ctx, + totalrate, + totaldist, bsize, ctx, best_rd); +#if CONFIG_SUPERTX + *totalrate_nocoef = *totalrate; + } +#endif else vp9_rd_pick_inter_mode_sb(cpi, x, tile, mi_row, mi_col, - totalrate, totaldist, bsize, ctx, best_rd); + totalrate, +#if CONFIG_SUPERTX + totalrate_nocoef, +#endif + totaldist, bsize, ctx, best_rd); } else { vp9_rd_pick_inter_mode_sub8x8(cpi, x, tile, mi_row, mi_col, totalrate, +#if CONFIG_SUPERTX + totalrate_nocoef, +#endif totaldist, bsize, ctx, best_rd); } } @@ -829,6 +1246,9 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, if (aq_mode == VARIANCE_AQ && *totalrate != INT_MAX) { vp9_clear_system_state(); *totalrate = (int)round(*totalrate * rdmult_ratio); +#if CONFIG_SUPERTX + *totalrate_nocoef = (int)round(*totalrate_nocoef * rdmult_ratio); +#endif } } @@ -981,6 +1401,80 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, if (output_enabled && bsize != BLOCK_4X4) cm->counts.partition[ctx][partition]++; +#if CONFIG_SUPERTX + if (cm->frame_type != KEY_FRAME && + bsize <= BLOCK_32X32 && + partition != PARTITION_NONE) { + int supertx_enabled; + TX_SIZE supertx_size = b_width_log2(bsize); + supertx_enabled = check_supertx_sb(bsize, supertx_size, pc_tree); + if (supertx_enabled) { + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + int x_idx, y_idx, i; + uint8_t *dst_buf[3]; + int dst_stride[3]; + set_skip_context(xd, mi_row, mi_col); + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + update_state_sb_supertx(cpi, tile, mi_row, mi_col, bsize, + output_enabled, pc_tree); + + vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), + mi_row, mi_col); + for (i = 0; i < MAX_MB_PLANE; i++) { + dst_buf[i] = xd->plane[i].dst.buf; + dst_stride[i] = xd->plane[i].dst.stride; + } + predict_sb_complex(cpi, tile, mi_row, mi_col, mi_row, mi_col, + output_enabled, bsize, bsize, + dst_buf, dst_stride, pc_tree); + + set_offsets(cpi, tile, mi_row, mi_col, bsize); + if (!x->skip) { + xd->mi[0]->mbmi.skip = 1; + vp9_encode_sb_supertx(x, bsize); + vp9_tokenize_sb_supertx(cpi, tp, !output_enabled, bsize); + } else { + xd->mi[0]->mbmi.skip = 1; + if (output_enabled) + cm->counts.skip[vp9_get_skip_context(xd)][1]++; + reset_skip_context(xd, bsize); + } + if (output_enabled) { + for (y_idx = 0; y_idx < mi_height; y_idx++) + for (x_idx = 0; x_idx < mi_width; x_idx++) { + if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx + && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height + > y_idx) { + xd->mi[x_idx + y_idx * cm->mi_stride]->mbmi.skip = + xd->mi[0]->mbmi.skip; + } + } + if (partition != PARTITION_SPLIT) + cm->counts.supertx[supertx_size][1]++; + else + cm->counts.supertxsplit[supertx_size][1]++; + cm->counts.supertx_size[supertx_size]++; +#if CONFIG_EXT_TX + if (supertx_size <= TX_16X16 && !xd->mi[0]->mbmi.skip) + ++cm->counts.ext_tx[xd->mi[0]->mbmi.ext_txfrm]; +#endif + (*tp)->token = EOSB_TOKEN; + (*tp)++; + } + if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) + update_partition_context(xd, mi_row, mi_col, subsize, bsize); + return; + } else { + if (output_enabled) { + if (partition != PARTITION_SPLIT) + cm->counts.supertx[supertx_size][0]++; + else + cm->counts.supertxsplit[supertx_size][0]++; + } + } + } +#endif switch (partition) { case PARTITION_NONE: encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize, @@ -1458,7 +1952,11 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, MODE_INFO **mi_8x8, TOKENEXTRA **tp, int mi_row, int mi_col, - BLOCK_SIZE bsize, int *rate, int64_t *dist, + BLOCK_SIZE bsize, int *rate, +#if CONFIG_SUPERTX + int *rate_nocoef, +#endif + int64_t *dist, int do_recon, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; @@ -1486,6 +1984,11 @@ static void rd_use_partition(VP9_COMP *cpi, BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type; int do_partition_search = 1; PICK_MODE_CONTEXT *ctx = &pc_tree->none; +#if CONFIG_SUPERTX + int last_part_rate_nocoef = INT_MAX; + int none_rate_nocoef = INT_MAX; + int chosen_rate_nocoef = INT_MAX; +#endif if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; @@ -1526,13 +2029,20 @@ static void rd_use_partition(VP9_COMP *cpi, mi_row + (mi_step >> 1) < cm->mi_rows && mi_col + (mi_step >> 1) < cm->mi_cols) { pc_tree->partitioning = PARTITION_NONE; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, +#if CONFIG_SUPERTX + &none_rate_nocoef, +#endif + &none_dist, bsize, ctx, INT64_MAX, 0); pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (none_rate < INT_MAX) { none_rate += cpi->partition_cost[pl][PARTITION_NONE]; +#if CONFIG_SUPERTX + none_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE]; +#endif none_rd = RDCOST(x->rdmult, x->rddiv, none_rate, none_dist); } @@ -1545,82 +2055,136 @@ static void rd_use_partition(VP9_COMP *cpi, switch (partition) { case PARTITION_NONE: rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, +#if CONFIG_SUPERTX + &last_part_rate_nocoef, +#endif &last_part_dist, bsize, ctx, INT64_MAX, 0); break; case PARTITION_HORZ: rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, +#if CONFIG_SUPERTX + &last_part_rate_nocoef, +#endif &last_part_dist, subsize, &pc_tree->horizontal[0], INT64_MAX, 0); if (last_part_rate != INT_MAX && bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) { int rt = 0; +#if CONFIG_SUPERTX + int rt_nocoef = 0; +#endif int64_t dt = 0; PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; update_state(cpi, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx); - rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &rt, &dt, + rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &rt, +#if CONFIG_SUPERTX + &rt_nocoef, +#endif + &dt, subsize, &pc_tree->horizontal[1], INT64_MAX, 1); if (rt == INT_MAX || dt == INT64_MAX) { last_part_rate = INT_MAX; +#if CONFIG_SUPERTX + last_part_rate_nocoef = INT_MAX; +#endif last_part_dist = INT64_MAX; break; } last_part_rate += rt; +#if CONFIG_SUPERTX + last_part_rate_nocoef += rt_nocoef; +#endif last_part_dist += dt; } break; case PARTITION_VERT: rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, +#if CONFIG_SUPERTX + &last_part_rate_nocoef, +#endif &last_part_dist, subsize, &pc_tree->vertical[0], INT64_MAX, 0); if (last_part_rate != INT_MAX && bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) { int rt = 0; +#if CONFIG_SUPERTX + int rt_nocoef = 0; +#endif int64_t dt = 0; PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0]; update_state(cpi, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx); - rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &rt, &dt, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &rt, +#if CONFIG_SUPERTX + &rt_nocoef, +#endif + &dt, subsize, &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX, 1); if (rt == INT_MAX || dt == INT64_MAX) { last_part_rate = INT_MAX; +#if CONFIG_SUPERTX + last_part_rate = INT_MAX; +#endif last_part_dist = INT64_MAX; break; } last_part_rate += rt; +#if CONFIG_SUPERTX + last_part_rate_nocoef += rt_nocoef; +#endif last_part_dist += dt; } break; case PARTITION_SPLIT: if (bsize == BLOCK_8X8) { rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, +#if CONFIG_SUPERTX + &last_part_rate_nocoef, +#endif &last_part_dist, subsize, pc_tree->leaf_split[0], INT64_MAX, 0); break; } last_part_rate = 0; +#if CONFIG_SUPERTX + last_part_rate_nocoef = 0; +#endif last_part_dist = 0; for (i = 0; i < 4; i++) { int x_idx = (i & 1) * (mi_step >> 1); int y_idx = (i >> 1) * (mi_step >> 1); int jj = i >> 1, ii = i & 0x01; int rt; +#if CONFIG_SUPERTX + int rt_nocoef; +#endif int64_t dt; if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) continue; rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp, - mi_row + y_idx, mi_col + x_idx, subsize, &rt, &dt, + mi_row + y_idx, mi_col + x_idx, subsize, &rt, +#if CONFIG_SUPERTX + &rt_nocoef, +#endif + &dt, i != 3, pc_tree->split[i]); if (rt == INT_MAX || dt == INT64_MAX) { last_part_rate = INT_MAX; +#if CONFIG_SUPERTX + last_part_rate_nocoef = INT_MAX; +#endif last_part_dist = INT64_MAX; break; } last_part_rate += rt; +#if CONFIG_SUPERTX + last_part_rate_nocoef += rt_nocoef; +#endif last_part_dist += dt; } break; @@ -1631,6 +2195,9 @@ static void rd_use_partition(VP9_COMP *cpi, pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (last_part_rate < INT_MAX) { last_part_rate += cpi->partition_cost[pl][partition]; +#if CONFIG_SUPERTX + last_part_rate_nocoef += cpi->partition_cost[pl][partition]; +#endif last_part_rd = RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist); } @@ -1644,6 +2211,9 @@ static void rd_use_partition(VP9_COMP *cpi, mi_col + (mi_step >> 1) == cm->mi_cols)) { BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); chosen_rate = 0; +#if CONFIG_SUPERTX + chosen_rate_nocoef = 0; +#endif chosen_dist = 0; restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); pc_tree->partitioning = PARTITION_SPLIT; @@ -1653,6 +2223,9 @@ static void rd_use_partition(VP9_COMP *cpi, int x_idx = (i & 1) * (mi_step >> 1); int y_idx = (i >> 1) * (mi_step >> 1); int rt = 0; +#if CONFIG_SUPERTX + int rt_nocoef = 0; +#endif int64_t dt = 0; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; @@ -1662,7 +2235,11 @@ static void rd_use_partition(VP9_COMP *cpi, save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); pc_tree->split[i]->partitioning = PARTITION_NONE; - rd_pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &rt, &dt, + rd_pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &rt, +#if CONFIG_SUPERTX + &rt_nocoef, +#endif + &dt, split_subsize, &pc_tree->split[i]->none, INT64_MAX, i); @@ -1670,11 +2247,17 @@ static void rd_use_partition(VP9_COMP *cpi, if (rt == INT_MAX || dt == INT64_MAX) { chosen_rate = INT_MAX; +#if CONFIG_SUPERTX + chosen_rate_nocoef = INT_MAX; +#endif chosen_dist = INT64_MAX; break; } chosen_rate += rt; +#if CONFIG_SUPERTX + chosen_rate_nocoef += rt_nocoef; +#endif chosen_dist += dt; if (i != 3) @@ -1684,10 +2267,16 @@ static void rd_use_partition(VP9_COMP *cpi, pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx, split_subsize); chosen_rate += cpi->partition_cost[pl][PARTITION_NONE]; +#if CONFIG_SUPERTX + chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE]; +#endif } pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (chosen_rate < INT_MAX) { chosen_rate += cpi->partition_cost[pl][PARTITION_SPLIT]; +#if CONFIG_SUPERTX + chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_SPLIT]; +#endif chosen_rd = RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist); } } @@ -1698,6 +2287,9 @@ static void rd_use_partition(VP9_COMP *cpi, if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition; chosen_rate = last_part_rate; +#if CONFIG_SUPERTX + chosen_rate_nocoef = last_part_rate_nocoef; +#endif chosen_dist = last_part_dist; chosen_rd = last_part_rd; } @@ -1706,6 +2298,9 @@ static void rd_use_partition(VP9_COMP *cpi, if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; chosen_rate = none_rate; +#if CONFIG_SUPERTX + chosen_rate_nocoef = none_rate_nocoef; +#endif chosen_dist = none_dist; } @@ -1735,6 +2330,9 @@ static void rd_use_partition(VP9_COMP *cpi, } *rate = chosen_rate; +#if CONFIG_SUPERTX + *rate_nocoef = chosen_rate_nocoef; +#endif *dist = chosen_dist; } @@ -1930,6 +2528,9 @@ static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int *rate, +#if CONFIG_SUPERTX + int *rate_nocoef, +#endif int64_t *dist, int do_recon, int64_t best_rd, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; @@ -1943,6 +2544,13 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, int i, pl; BLOCK_SIZE subsize; int this_rate, sum_rate = 0, best_rate = INT_MAX; +#if CONFIG_SUPERTX + int this_rate_nocoef, sum_rate_nocoef = 0, best_rate_nocoef = INT_MAX; + int tmp_rate; + int abort_flag; + int64_t tmp_dist, tmp_rd; + PARTITION_TYPE best_partition; +#endif int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX; int64_t sum_rd = 0; int do_split = bsize >= BLOCK_8X8; @@ -2000,12 +2608,19 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, // PARTITION_NONE if (partition_none_allowed) { - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, +#if CONFIG_SUPERTX + &this_rate_nocoef, +#endif + &this_dist, bsize, ctx, best_rd, 0); if (this_rate != INT_MAX) { if (bsize >= BLOCK_8X8) { pl = partition_plane_context(xd, mi_row, mi_col, bsize); this_rate += cpi->partition_cost[pl][PARTITION_NONE]; +#if CONFIG_SUPERTX + this_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE]; +#endif } sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); if (sum_rd < best_rd) { @@ -2013,6 +2628,10 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, int64_t stop_thresh_rd; best_rate = this_rate; +#if CONFIG_SUPERTX + best_rate_nocoef = this_rate_nocoef; + assert(best_rate_nocoef >= 0); +#endif best_dist = this_dist; best_rd = sum_rd; if (bsize >= BLOCK_8X8) @@ -2049,21 +2668,69 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed) pc_tree->leaf_split[0]->pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, +#if !CONFIG_SUPERTX + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, + &sum_dist, subsize, pc_tree->leaf_split[0], best_rd, 0); - if (sum_rate == INT_MAX) { +#else + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, + &sum_rate_nocoef, + &sum_dist, subsize, + pc_tree->leaf_split[0], INT64_MAX, 0); +#endif + if (sum_rate == INT_MAX) sum_rd = INT64_MAX; - } else { + else sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - if (sum_rd < best_rd) { - update_state(cpi, pc_tree->leaf_split[0], mi_row, mi_col, subsize, 0); - encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, - pc_tree->leaf_split[0]); - update_partition_context(xd, mi_row, mi_col, subsize, bsize); +#if CONFIG_SUPERTX + if (cm->frame_type != KEY_FRAME && + sum_rd < INT64_MAX) { + TX_SIZE supertx_size = b_width_log2(bsize); + best_partition = pc_tree->partitioning; + pc_tree->partitioning = PARTITION_SPLIT; + + sum_rate += vp9_cost_bit(cm->fc.supertxsplit_prob[supertx_size], 0); + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + + if (is_inter_mode(pc_tree->leaf_split[0]->mic.mbmi.mode)) { + int skippable = 1; + int64_t sse = 0; +#if CONFIG_EXT_TX + int best_tx = 0; +#endif + + tmp_rate = sum_rate_nocoef; + tmp_dist = 0; + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + rd_supertx_sb(cpi, tile, mi_row, mi_col, bsize, &tmp_rate, &tmp_dist, + &skippable, &sse, +#if CONFIG_EXT_TX + &best_tx, +#endif + pc_tree); + + tmp_rate += vp9_cost_bit(cm->fc.supertxsplit_prob[supertx_size], 1); + tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist); + if (tmp_rd < sum_rd) { + sum_rd = tmp_rd; + sum_rate = tmp_rate; + sum_dist = tmp_dist; + update_supertx_param_sb(cpi, mi_row, mi_col, bsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree); + } } + pc_tree->partitioning = best_partition; } +#endif } else { +#if !CONFIG_SUPERTX for (i = 0; i < 4 && sum_rd < best_rd; ++i) { +#else + for (i = 0; i < 4 && sum_rd < INT64_MAX; ++i) { +#endif const int x_idx = (i & 1) * mi_step; const int y_idx = (i >> 1) * mi_step; @@ -2072,27 +2739,88 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); - +#if !CONFIG_SUPERTX rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize, &this_rate, &this_dist, i != 3, best_rd - sum_rd, pc_tree->split[i]); +#else + rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, + subsize, &this_rate, + &this_rate_nocoef, + &this_dist, i != 3, + INT64_MAX - sum_rd, pc_tree->split[i]); +#endif if (this_rate == INT_MAX) { sum_rd = INT64_MAX; } else { sum_rate += this_rate; +#if CONFIG_SUPERTX + sum_rate_nocoef += this_rate_nocoef; +#endif sum_dist += this_dist; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); } } +#if CONFIG_SUPERTX + if (cm->frame_type != KEY_FRAME && + sum_rd < INT64_MAX && i == 4 && + bsize <= BLOCK_32X32) { + TX_SIZE supertx_size = b_width_log2(bsize); + best_partition = pc_tree->partitioning; + pc_tree->partitioning = PARTITION_SPLIT; + + sum_rate += vp9_cost_bit(cm->fc.supertxsplit_prob[supertx_size], 0); + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + + if (!check_intra_sb(cpi, tile, mi_row, mi_col, bsize, pc_tree)) { + int skippable = 1; + int64_t sse = 0; +#if CONFIG_EXT_TX + int best_tx = 0; +#endif + + tmp_rate = sum_rate_nocoef; + tmp_dist = 0; + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + rd_supertx_sb(cpi, tile, mi_row, mi_col, bsize, &tmp_rate, &tmp_dist, + &skippable, &sse, +#if CONFIG_EXT_TX + &best_tx, +#endif + pc_tree); + + tmp_rate += vp9_cost_bit(cm->fc.supertxsplit_prob[supertx_size], 1); + tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist); + if (tmp_rd < sum_rd) { + sum_rd = tmp_rd; + sum_rate = tmp_rate; + sum_dist = tmp_dist; + update_supertx_param_sb(cpi, mi_row, mi_col, bsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree); + } + } + pc_tree->partitioning = best_partition; + } +#endif } if (sum_rd < best_rd && i == 4) { pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += cpi->partition_cost[pl][PARTITION_SPLIT]; +#if CONFIG_SUPERTX + sum_rate_nocoef += cpi->partition_cost[pl][PARTITION_SPLIT]; +#endif sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { best_rate = sum_rate; +#if CONFIG_SUPERTX + best_rate_nocoef = sum_rate_nocoef; + assert(best_rate_nocoef >= 0); +#endif best_dist = sum_dist; best_rd = sum_rd; pc_tree->partitioning = PARTITION_SPLIT; @@ -2115,10 +2843,17 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, partition_none_allowed) pc_tree->horizontal[0].pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, +#if CONFIG_SUPERTX + &sum_rate_nocoef, +#endif + &sum_dist, subsize, &pc_tree->horizontal[0], best_rd, 0); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); +#if CONFIG_SUPERTX + abort_flag = sum_rd >= best_rd; +#endif if (sum_rd < best_rd && mi_row + mi_step < cm->mi_rows) { PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; update_state(cpi, ctx, mi_row, mi_col, subsize, 0); @@ -2130,24 +2865,86 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, partition_none_allowed) pc_tree->horizontal[1].pred_interp_filter = ctx->mic.mbmi.interp_filter; +#if !CONFIG_SUPERTX rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rate, &this_dist, subsize, &pc_tree->horizontal[1], best_rd - sum_rd, 1); +#else + rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rate, + &this_rate_nocoef, + &this_dist, subsize, &pc_tree->horizontal[1], + INT64_MAX, 1); +#endif if (this_rate == INT_MAX) { sum_rd = INT64_MAX; } else { sum_rate += this_rate; +#if CONFIG_SUPERTX + sum_rate_nocoef += this_rate_nocoef; +#endif sum_dist += this_dist; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); } } +#if CONFIG_SUPERTX + if (cm->frame_type != KEY_FRAME && + !abort_flag && + sum_rd < INT64_MAX && + bsize <= BLOCK_32X32) { + TX_SIZE supertx_size = b_width_log2(bsize); + best_partition = pc_tree->partitioning; + pc_tree->partitioning = PARTITION_HORZ; + + sum_rate += vp9_cost_bit(cm->fc.supertx_prob[supertx_size], 0); + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + + if (!check_intra_sb(cpi, tile, mi_row, mi_col, bsize, pc_tree)) { + int skippable = 1; + int64_t sse = 0; +#if CONFIG_EXT_TX + int best_tx = 0; +#endif + + tmp_rate = sum_rate_nocoef; + tmp_dist = 0; + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + rd_supertx_sb(cpi, tile, mi_row, mi_col, bsize, &tmp_rate, &tmp_dist, + &skippable, &sse, +#if CONFIG_EXT_TX + &best_tx, +#endif + pc_tree); + + tmp_rate += vp9_cost_bit(cm->fc.supertx_prob[supertx_size], 1); + tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist); + if (tmp_rd < sum_rd) { + sum_rd = tmp_rd; + sum_rate = tmp_rate; + sum_dist = tmp_dist; + update_supertx_param_sb(cpi, mi_row, mi_col, bsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree); + } + } + pc_tree->partitioning = best_partition; + } +#endif if (sum_rd < best_rd) { pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += cpi->partition_cost[pl][PARTITION_HORZ]; +#if CONFIG_SUPERTX + sum_rate_nocoef += cpi->partition_cost[pl][PARTITION_HORZ]; +#endif sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { best_rd = sum_rd; best_rate = sum_rate; +#if CONFIG_SUPERTX + best_rate_nocoef = sum_rate_nocoef; + assert(best_rate_nocoef >= 0); +#endif best_dist = sum_dist; pc_tree->partitioning = PARTITION_HORZ; } @@ -2164,9 +2961,16 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, partition_none_allowed) pc_tree->vertical[0].pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, +#if CONFIG_SUPERTX + &sum_rate_nocoef, +#endif + &sum_dist, subsize, &pc_tree->vertical[0], best_rd, 0); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); +#if CONFIG_SUPERTX + abort_flag = sum_rd >= best_rd; +#endif if (sum_rd < best_rd && mi_col + mi_step < cm->mi_cols) { update_state(cpi, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, @@ -2178,24 +2982,87 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, partition_none_allowed) pc_tree->vertical[1].pred_interp_filter = ctx->mic.mbmi.interp_filter; +#if !CONFIG_SUPERTX rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rate, &this_dist, subsize, &pc_tree->vertical[1], best_rd - sum_rd, 1); +#else + rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rate, + &this_rate_nocoef, + &this_dist, subsize, + &pc_tree->vertical[1], INT64_MAX, + 1); +#endif if (this_rate == INT_MAX) { sum_rd = INT64_MAX; } else { sum_rate += this_rate; +#if CONFIG_SUPERTX + sum_rate_nocoef += this_rate_nocoef; +#endif sum_dist += this_dist; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); } } +#if CONFIG_SUPERTX + if (cm->frame_type != KEY_FRAME && + !abort_flag && + sum_rd < INT64_MAX && + bsize <= BLOCK_32X32) { + TX_SIZE supertx_size = b_width_log2(bsize); + best_partition = pc_tree->partitioning; + pc_tree->partitioning = PARTITION_VERT; + + sum_rate += vp9_cost_bit(cm->fc.supertx_prob[supertx_size], 0); + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + + if (!check_intra_sb(cpi, tile, mi_row, mi_col, bsize, pc_tree)) { + int skippable = 1; + int64_t sse = 0; +#if CONFIG_EXT_TX + int best_tx = 0; +#endif + + tmp_rate = sum_rate_nocoef; + tmp_dist = 0; + restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + rd_supertx_sb(cpi, tile, mi_row, mi_col, bsize, &tmp_rate, &tmp_dist, + &skippable, &sse, +#if CONFIG_EXT_TX + &best_tx, +#endif + pc_tree); + + tmp_rate += vp9_cost_bit(cm->fc.supertx_prob[supertx_size], 1); + tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist); + if (tmp_rd < sum_rd) { + sum_rd = tmp_rd; + sum_rate = tmp_rate; + sum_dist = tmp_dist; + update_supertx_param_sb(cpi, mi_row, mi_col, bsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree); + } + } + pc_tree->partitioning = best_partition; + } +#endif if (sum_rd < best_rd) { pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += cpi->partition_cost[pl][PARTITION_VERT]; +#if CONFIG_SUPERTX + sum_rate_nocoef += cpi->partition_cost[pl][PARTITION_VERT]; +#endif sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { best_rate = sum_rate; +#if CONFIG_SUPERTX + best_rate_nocoef = sum_rate_nocoef; + assert(best_rate_nocoef >= 0); +#endif best_dist = sum_dist; best_rd = sum_rd; pc_tree->partitioning = PARTITION_VERT; @@ -2209,6 +3076,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, // checks occur in some sub function and thus are used... (void) best_rd; *rate = best_rate; +#if CONFIG_SUPERTX + *rate_nocoef = best_rate_nocoef; +#endif *dist = best_dist; if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon) { @@ -2251,6 +3121,9 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { int dummy_rate; +#if CONFIG_SUPERTX + int dummy_rate_nocoef; +#endif int64_t dummy_dist; int i; @@ -2283,18 +3156,30 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, sf->always_this_block_size); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, cpi->pc_root); + &dummy_rate, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif + &dummy_dist, 1, cpi->pc_root); } else if (sf->partition_search_type == VAR_BASED_FIXED_PARTITION) { BLOCK_SIZE bsize; set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col); set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, cpi->pc_root); + &dummy_rate, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif + &dummy_dist, 1, cpi->pc_root); } else if (sf->partition_search_type == VAR_BASED_PARTITION) { choose_partitioning(cpi, tile, mi_row, mi_col); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, cpi->pc_root); + &dummy_rate, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif + &dummy_dist, 1, cpi->pc_root); } else { GF_GROUP * gf_grp = &cpi->twopass.gf_group; int last_was_mid_sequence_overlay = 0; @@ -2320,7 +3205,11 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, &sf->max_partition_size); } rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, INT64_MAX, + &dummy_rate, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif + &dummy_dist, 1, INT64_MAX, cpi->pc_root); } else { if (sf->constrain_copy_partition && @@ -2330,7 +3219,11 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, else copy_partitioning(cm, mi, prev_mi); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, cpi->pc_root); + &dummy_rate, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif + &dummy_dist, 1, cpi->pc_root); } } } else { @@ -2342,7 +3235,11 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, &sf->max_partition_size); } rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, INT64_MAX, cpi->pc_root); + &dummy_rate, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif + &dummy_dist, 1, INT64_MAX, cpi->pc_root); } } } @@ -3244,17 +4141,37 @@ void vp9_encode_frame(VP9_COMP *cpi) { count32x32 += cm->counts.tx.p32x32[i][TX_32X32]; } +#if !CONFIG_SUPERTX if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && count32x32 == 0) { +#else + if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && + count32x32 == 0 && + cm->counts.supertx_size[TX_16X16] == 0 && + cm->counts.supertx_size[TX_32X32] == 0) { +#endif cm->tx_mode = ALLOW_8X8; reset_skip_tx_size(cm, TX_8X8); +#if !CONFIG_SUPERTX } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 && count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) { +#else + } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 && + count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0 && + cm->counts.supertx_size[TX_8X8] == 0 && + cm->counts.supertx_size[TX_16X16] == 0 && + cm->counts.supertx_size[TX_32X32] == 0) { +#endif cm->tx_mode = ONLY_4X4; reset_skip_tx_size(cm, TX_4X4); } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) { cm->tx_mode = ALLOW_32X32; +#if !CONFIG_SUPERTX } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) { +#else + } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0 && + cm->counts.supertx_size[TX_32X32] == 0) { +#endif cm->tx_mode = ALLOW_16X16; reset_skip_tx_size(cm, TX_16X16); } @@ -3433,3 +4350,517 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, #endif } } + +#if CONFIG_SUPERTX +static int check_intra_b(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + PICK_MODE_CONTEXT *ctx) { +#if CONFIG_INTERINTRA + return !is_inter_mode((&ctx->mic)->mbmi.mode) || + (ctx->mic.mbmi.ref_frame[1] == INTRA_FRAME); +#else + return !is_inter_mode((&ctx->mic)->mbmi.mode); +#endif +} + +static int check_intra_sb(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + PC_TREE *pc_tree) { + VP9_COMMON *const cm = &cpi->common; + + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + PARTITION_TYPE partition; + BLOCK_SIZE subsize = bsize; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return 1; + + if (bsize >= BLOCK_8X8) + subsize = get_subsize(bsize, pc_tree->partitioning); + else + subsize = BLOCK_4X4; + + partition = partition_lookup[bsl][subsize]; + + switch (partition) { + case PARTITION_NONE: + return check_intra_b(cpi, tile, mi_row, mi_col, subsize, &pc_tree->none); + break; + case PARTITION_VERT: + if (check_intra_b(cpi, tile, mi_row, mi_col, subsize, + &pc_tree->vertical[0])) + return 1; + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { + if (check_intra_b(cpi, tile, mi_row, mi_col + hbs, subsize, + &pc_tree->vertical[1])) + return 1; + } + break; + case PARTITION_HORZ: + if (check_intra_b(cpi, tile, mi_row, mi_col, subsize, + &pc_tree->horizontal[0])) + return 1; + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { + if (check_intra_b(cpi, tile, mi_row + hbs, mi_col, subsize, + &pc_tree->horizontal[1])) + return 1; + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + if (check_intra_b(cpi, tile, mi_row, mi_col, subsize, + pc_tree->leaf_split[0])) + return 1; + } else { + if (check_intra_sb(cpi, tile, mi_row, mi_col, subsize, + pc_tree->split[0])) + return 1; + if (check_intra_sb(cpi, tile, mi_row, mi_col + hbs, subsize, + pc_tree->split[1])) + return 1; + if (check_intra_sb(cpi, tile, mi_row + hbs, mi_col, subsize, + pc_tree->split[2])) + return 1; + if (check_intra_sb(cpi, tile, mi_row + hbs, mi_col + hbs, subsize, + pc_tree->split[3])) + return 1; + } + break; + default: + assert(0); + } + return 0; +} + +static int check_supertx_b(TX_SIZE supertx_size, PICK_MODE_CONTEXT *ctx) { + return ctx->mic.mbmi.tx_size == supertx_size; +} + +static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size, + PC_TREE *pc_tree) { + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + + partition = pc_tree->partitioning; + subsize = get_subsize(bsize, partition); + switch (partition) { + case PARTITION_NONE: + return check_supertx_b(supertx_size, &pc_tree->none); + case PARTITION_VERT: + return check_supertx_b(supertx_size, &pc_tree->vertical[0]); + case PARTITION_HORZ: + return check_supertx_b(supertx_size, &pc_tree->horizontal[0]); + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) + return check_supertx_b(supertx_size, pc_tree->leaf_split[0]); + else + return check_supertx_sb(subsize, supertx_size, pc_tree->split[0]); + default: + assert(0); + } +} + +static void predict_superblock(VP9_COMP *cpi, + int output_enabled, +#if CONFIG_MASKED_INTERINTER + int mi_row, int mi_col, +#endif + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO **mi_8x8 = xd->mi; + MODE_INFO *mi = mi_8x8[0]; + MB_MODE_INFO *mbmi = &mi->mbmi; + int ref; + const int is_compound = has_second_ref(mbmi); + + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); + + cpi->zbin_mode_boost = get_zbin_mode_boost(mbmi, + cpi->zbin_mode_boost_enabled); + vp9_update_zbin_extra(cpi, x); + + for (ref = 0; ref < 1 + is_compound; ++ref) { + YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, + mbmi->ref_frame[ref]); + vp9_setup_pre_planes(xd, ref, cfg, mi_row_ori, mi_col_ori, + &xd->block_refs[ref]->sf); + } +#if !CONFIG_MASKED_INTERINTER + vp9_build_inter_predictors_sb(xd, mi_row_ori, mi_col_ori, bsize); +#else + vp9_build_inter_predictors_sb_extend(xd, mi_row, mi_col, + mi_row_ori, mi_col_ori, bsize); +#endif +} + +static void predict_superblock_sub8x8_extend(VP9_COMP *cpi, + int output_enabled, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE top_bsize, + PICK_MODE_CONTEXT *ctx, + PARTITION_TYPE partition) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO **mi_8x8 = xd->mi; + MODE_INFO *mi = mi_8x8[0]; + MB_MODE_INFO *mbmi = &mi->mbmi; + int ref; + const int is_compound = has_second_ref(mbmi); + + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); + + cpi->zbin_mode_boost = get_zbin_mode_boost(mbmi, + cpi->zbin_mode_boost_enabled); + vp9_update_zbin_extra(cpi, x); + + for (ref = 0; ref < 1 + is_compound; ++ref) { + YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, + mbmi->ref_frame[ref]); + vp9_setup_pre_planes(xd, ref, cfg, mi_row_ori, mi_col_ori, + &xd->block_refs[ref]->sf); + } + vp9_build_inter_predictors_sby_sub8x8_extend(xd, mi_row, mi_col, + mi_row_ori, mi_col_ori, + top_bsize, partition); + vp9_build_inter_predictors_sbuv_sub8x8_extend(xd, +#if CONFIG_MASKED_INTERINTER + mi_row, mi_col, +#endif + mi_row_ori, mi_col_ori, + top_bsize); +} + +static void predict_b_sub8x8_extend(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + int output_enabled, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + PICK_MODE_CONTEXT *ctx, + PARTITION_TYPE partition) { + set_offsets_extend(cpi, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, + bsize, top_bsize); + predict_superblock_sub8x8_extend(cpi, output_enabled, mi_row, mi_col, + mi_row_ori, mi_col_ori, + top_bsize, ctx, partition); + + if (output_enabled) + update_stats(cpi); +} + +static void predict_b_extend(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + int output_enabled, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + PICK_MODE_CONTEXT *ctx) { + set_offsets_extend(cpi, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, + bsize, top_bsize); + predict_superblock(cpi, output_enabled, +#if CONFIG_MASKED_INTERINTER + mi_row, mi_col, +#endif + mi_row_ori, mi_col_ori, top_bsize, ctx); + + if (output_enabled) + update_stats(cpi); +} + +// This function generates prediction for multiple blocks, between which +// discontinuity around boundary is reduced by smoothing masks. The basic +// smoothing mask is a soft step function along horz/vert direction. In more +// complicated case when a block is split into 4 subblocks, the basic mask is +// first applied to neighboring subblocks (2 pairs) in horizontal direction and +// then applied to the 2 masked prediction mentioned above in vertical direction +// If the block is split into more than one level, at every stage, masked +// prediction is stored in dst_buf[] passed from higher level. +static void predict_sb_complex(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + int output_enabled, BLOCK_SIZE bsize, + BLOCK_SIZE top_bsize, + uint8_t *dst_buf[3], int dst_stride[3], + PC_TREE *pc_tree) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + + int i, ctx; + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, MAX_MB_PLANE * 32 * 32); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, MAX_MB_PLANE * 32 * 32); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf3, MAX_MB_PLANE * 32 * 32); + uint8_t *dst_buf1[3] = {tmp_buf1, tmp_buf1 + 32 * 32, tmp_buf1 + 2 * 32 * 32}; + uint8_t *dst_buf2[3] = {tmp_buf2, tmp_buf2 + 32 * 32, tmp_buf2 + 2 * 32 * 32}; + uint8_t *dst_buf3[3] = {tmp_buf3, tmp_buf3 + 32 * 32, tmp_buf3 + 2 * 32 * 32}; + int dst_stride1[3] = {32, 32, 32}; + int dst_stride2[3] = {32, 32, 32}; + int dst_stride3[3] = {32, 32, 32}; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + if (bsize >= BLOCK_8X8) { + ctx = partition_plane_context(xd, mi_row, mi_col, bsize); + subsize = get_subsize(bsize, pc_tree->partitioning); + } else { + ctx = 0; + subsize = BLOCK_4X4; + } + partition = partition_lookup[bsl][subsize]; + if (output_enabled && bsize != BLOCK_4X4 && bsize < top_bsize) + cm->counts.partition[ctx][partition]++; + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + } + + switch (partition) { + case PARTITION_NONE: + assert(bsize < top_bsize); + predict_b_extend(cpi, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, + output_enabled, bsize, top_bsize, + &pc_tree->none); + break; + case PARTITION_HORZ: + if (bsize > BLOCK_8X8) { + predict_b_extend(cpi, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, + output_enabled, subsize, top_bsize, + &pc_tree->horizontal[0]); + } else { + predict_b_sub8x8_extend(cpi, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, output_enabled, + bsize, top_bsize, &pc_tree->horizontal[0], + PARTITION_HORZ); + } + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = tmp_buf1 + i * 32 * 32; + xd->plane[i].dst.stride = 32; + } + predict_b_extend(cpi, tile, mi_row + hbs, mi_col, + mi_row_ori, mi_col_ori, output_enabled, + subsize, top_bsize, &pc_tree->horizontal[1]); + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + dst_buf1[i], dst_stride1[i], + i, + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } + break; + case PARTITION_VERT: + if (bsize > BLOCK_8X8) { + predict_b_extend(cpi, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, + output_enabled, subsize, top_bsize, + &pc_tree->vertical[0]); + } else { + predict_b_sub8x8_extend(cpi, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, output_enabled, + bsize, top_bsize, &pc_tree->vertical[0], + PARTITION_VERT); + } + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = tmp_buf1 + i * 32 * 32; + xd->plane[i].dst.stride = 32; + } + predict_b_extend(cpi, tile, mi_row, mi_col + hbs, + mi_row_ori, mi_col_ori, output_enabled, + subsize, top_bsize, &pc_tree->vertical[1]); + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + dst_buf1[i], dst_stride1[i], + i, + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + } + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + predict_b_sub8x8_extend(cpi, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, output_enabled, + bsize, top_bsize, pc_tree->leaf_split[0], + PARTITION_SPLIT); + } else { + predict_sb_complex(cpi, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, output_enabled, subsize, + top_bsize, dst_buf, dst_stride, + pc_tree->split[0]); + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) + predict_sb_complex(cpi, tile, mi_row, mi_col + hbs, + mi_row_ori, mi_col_ori, output_enabled, subsize, + top_bsize, dst_buf1, dst_stride1, + pc_tree->split[1]); + if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) + predict_sb_complex(cpi, tile, mi_row + hbs, mi_col, + mi_row_ori, mi_col_ori, output_enabled, subsize, + top_bsize, dst_buf2, dst_stride2, + pc_tree->split[2]); + if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) + predict_sb_complex(cpi, tile, mi_row + hbs, mi_col + hbs, + mi_row_ori, mi_col_ori, output_enabled, subsize, + top_bsize, dst_buf3, dst_stride3, + pc_tree->split[3]); + for (i = 0; i < MAX_MB_PLANE; i++) { + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) { + vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + dst_buf1[i], dst_stride1[i], + i, + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + if (mi_row + hbs < cm->mi_rows) { + vp9_build_masked_inter_predictor_complex(dst_buf2[i], + dst_stride2[i], + dst_buf3[i], + dst_stride3[i], + i, mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + i, mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) { + vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + dst_buf2[i], dst_stride2[i], + i, + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } + } + break; + default: + assert(0); + } + + if (bsize < top_bsize && (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)) + update_partition_context(xd, mi_row, mi_col, subsize, bsize); +} + +static void rd_supertx_sb(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int *tmp_rate, int64_t *tmp_dist, + int *skippable, int64_t *sse, +#if CONFIG_EXT_TX + int *best_tx, +#endif + PC_TREE *pc_tree) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + int plane, pnskip, this_rate, base_rate = *tmp_rate; + int64_t pnsse, this_dist; + uint8_t *dst_buf[3]; + int dst_stride[3]; + + update_state_sb_supertx(cpi, tile, mi_row, mi_col, bsize, 0, pc_tree); + vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), + mi_row, mi_col); + for (plane = 0; plane < MAX_MB_PLANE; plane++) { + dst_buf[plane] = xd->plane[plane].dst.buf; + dst_stride[plane] = xd->plane[plane].dst.stride; + } + predict_sb_complex(cpi, tile, mi_row, mi_col, mi_row, mi_col, + 0, bsize, bsize, dst_buf, dst_stride, pc_tree); + + set_offsets(cpi, tile, mi_row, mi_col, bsize); +#if CONFIG_EXT_TX + *best_tx = 0; +#endif + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + TX_SIZE tx_size = plane ? (b_width_log2(bsize) - 1) : b_width_log2(bsize); + vp9_subtract_plane(x, bsize, plane); +#if CONFIG_EXT_TX + if (bsize <= BLOCK_16X16 && plane == 0) { + int txfm, this_rate_tx, pnskip_tx, + best_tx_nostx = xd->mi[0]->mbmi.ext_txfrm; + int64_t this_dist_tx, pnsse_tx, rd, bestrd_tx = INT64_MAX; + + for (txfm = 0; txfm < EXT_TX_TYPES; txfm++) { + xd->mi[0]->mbmi.ext_txfrm = txfm; + txfm_rd_in_plane_supertx(x, &this_rate_tx, &this_dist_tx, + &pnskip_tx, &pnsse_tx, + INT64_MAX, plane, bsize, tx_size, 0); + this_rate_tx += vp9_cost_bit(cm->fc.ext_tx_prob, txfm); + rd = RDCOST(x->rdmult, x->rddiv, this_rate_tx, this_dist_tx); + if (rd < bestrd_tx * 0.97 || bestrd_tx == INT64_MAX) { + *best_tx = txfm; + bestrd_tx = rd; + this_rate = this_rate_tx; + this_dist = this_dist_tx; + pnskip = pnskip_tx; + pnsse = pnsse_tx; + } + } + + xd->mi[0]->mbmi.ext_txfrm = best_tx_nostx; + } else { +#endif + txfm_rd_in_plane_supertx(x, &this_rate, &this_dist, &pnskip, &pnsse, + INT64_MAX, plane, bsize, tx_size, 0); +#if CONFIG_EXT_TX + } +#endif + *tmp_rate += this_rate; + *tmp_dist += this_dist; + *sse += pnsse; + *skippable &= pnskip; + } + x->skip = 0; + if (*skippable) { + *tmp_rate = base_rate; + *tmp_rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); +#if CONFIG_EXT_TX + if (bsize <= BLOCK_16X16) + *tmp_rate -= vp9_cost_bit(cm->fc.ext_tx_prob, *best_tx); + *best_tx = 0; +#endif + } else { + if (RDCOST(x->rdmult, x->rddiv, *tmp_rate - base_rate, *tmp_dist) + < RDCOST(x->rdmult, x->rddiv, 0, *sse)) { + *tmp_rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); + } else { + *tmp_rate = base_rate; + *tmp_dist = *sse; + *tmp_rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); +#if CONFIG_EXT_TX + if (bsize <= BLOCK_16X16) + *tmp_rate -= vp9_cost_bit(cm->fc.ext_tx_prob, *best_tx); + *best_tx = 0; +#endif + x->skip = 1; + } + } +} +#endif diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 243b06b55..e3e3af897 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -578,6 +578,26 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { } } +#if CONFIG_SUPERTX +void vp9_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize) { + MACROBLOCKD *const xd = &x->e_mbd; + struct optimize_ctx ctx; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + struct encode_b_args arg = {x, &ctx, &mbmi->skip}; + int plane; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + BLOCK_SIZE plane_size = bsize - 3 * (plane > 0); + const struct macroblockd_plane* const pd = &xd->plane[plane]; + const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size; + vp9_subtract_plane(x, bsize, plane); + vp9_get_entropy_contexts(bsize, tx_size, pd, + ctx.ta[plane], ctx.tl[plane]); + encode_block(plane, 0, plane_size, b_width_log2(plane_size), &arg); + } +} +#endif + static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct encode_b_args* const args = arg; diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 3196c9920..d4db160cf 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -21,6 +21,9 @@ extern "C" { #endif void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize); +#if CONFIG_SUPERTX +void vp9_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize); +#endif void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize); void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 03b7a2758..4beddffa9 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -611,8 +611,10 @@ static INLINE int cost_coeffs(MACROBLOCK *x, int pt = combine_entropy_contexts(*A, *L); int c, cost; // Check for consistency of tx_size with mode info +#if !CONFIG_SUPERTX assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size : get_uv_tx_size(mbmi) == tx_size); +#endif if (eob == 0) { // single eob token @@ -777,7 +779,11 @@ void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, } } +#if !CONFIG_SUPERTX static void txfm_rd_in_plane(MACROBLOCK *x, +#else +void txfm_rd_in_plane(MACROBLOCK *x, +#endif int *rate, int64_t *distortion, int *skippable, int64_t *sse, int64_t ref_best_rd, int plane, @@ -813,6 +819,41 @@ static void txfm_rd_in_plane(MACROBLOCK *x, } } +#if CONFIG_SUPERTX +void txfm_rd_in_plane_supertx(MACROBLOCK *x, + int *rate, int64_t *distortion, + int *skippable, int64_t *sse, + int64_t ref_best_rd, int plane, + BLOCK_SIZE bsize, TX_SIZE tx_size, + int use_fast_coef_casting) { + MACROBLOCKD *const xd = &x->e_mbd; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + struct rdcost_block_args args; + vp9_zero(args); + args.x = x; + args.best_rd = ref_best_rd; + args.use_fast_coef_costing = use_fast_coef_casting; + + vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); + + args.so = get_scan(xd, tx_size, pd->plane_type, 0); + + block_rd_txfm(plane, 0, get_plane_block_size(bsize, pd), tx_size, &args); + + if (args.skip) { + *rate = INT_MAX; + *distortion = INT64_MAX; + *sse = INT64_MAX; + *skippable = 0; + } else { + *distortion = args.this_dist; + *rate = args.this_rate; + *sse = args.this_sse; + *skippable = !x->plane[plane].eobs[0]; + } +} +#endif + static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skip, int64_t *sse, @@ -3687,6 +3728,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, int mi_row, int mi_col, int *returnrate, +#if CONFIG_SUPERTX + int *returnrate_nocoef, +#endif int64_t *returndistortion, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, @@ -3768,6 +3812,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, x->pred_sse[i] = INT_MAX; *returnrate = INT_MAX; +#if CONFIG_SUPERTX + *returnrate_nocoef = INT_MAX; +#endif for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; @@ -4042,6 +4089,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, TX_SIZE uv_tx; #if CONFIG_FILTERINTRA mbmi->filterbit = 0; +#endif +#if CONFIG_EXT_TX + mbmi->ext_txfrm = 0; #endif intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize, tx_cache, best_rd); @@ -4174,6 +4224,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Back out the coefficient coding costs rate2 -= (rate_y + rate_uv); // for best yrd calculation +#if CONFIG_SUPERTX + rate_y = 0; +#endif rate_uv = 0; // Cost the skip mb case @@ -4253,6 +4306,15 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } *returnrate = rate2; +#if CONFIG_SUPERTX + *returnrate_nocoef = rate2 - rate_y - rate_uv; + if (!disable_skip) { + *returnrate_nocoef -= vp9_cost_bit(vp9_get_skip_prob(cm, xd), + skippable || this_skip2); + } + *returnrate_nocoef -= vp9_cost_bit(vp9_get_intra_inter_prob(cm, xd), + mbmi->ref_frame[0] != INTRA_FRAME); +#endif *returndistortion = distortion2; best_rd = this_rd; best_mbmode = *mbmi; @@ -4536,6 +4598,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, int mi_row, int mi_col, int *returnrate, +#if CONFIG_SUPERTX + int *returnrate_nocoef, +#endif int64_t *returndistortion, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, @@ -4611,6 +4676,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, rate_uv_intra = INT_MAX; *returnrate = INT_MAX; +#if CONFIG_SUPERTX + *returnrate_nocoef = INT_MAX; +#endif for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { @@ -4750,6 +4818,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (ref_frame == INTRA_FRAME) { int rate; +#if CONFIG_EXT_TX + mbmi->ext_txfrm = 0; +#endif if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, &distortion_y, best_rd) >= best_rd) continue; @@ -5004,6 +5075,15 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } *returnrate = rate2; +#if CONFIG_SUPERTX + *returnrate_nocoef = rate2 - rate_y - rate_uv; + if (!disable_skip) + *returnrate_nocoef -= vp9_cost_bit(vp9_get_skip_prob(cm, xd), + this_skip2); + *returnrate_nocoef -= vp9_cost_bit(vp9_get_intra_inter_prob(cm, xd), + mbmi->ref_frame[0] != INTRA_FRAME); + assert(*returnrate_nocoef > 0); +#endif *returndistortion = distortion2; best_rd = this_rd; best_yrd = best_rd - @@ -5109,6 +5189,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (best_rd == INT64_MAX) { *returnrate = INT_MAX; +#if CONFIG_SUPERTX + *returnrate_nocoef = INT_MAX; +#endif *returndistortion = INT64_MAX; return best_rd; } diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index 4f60f1c71..668a7f31e 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -171,6 +171,9 @@ int64_t vp9_rd_pick_inter_mode_sb(struct VP9_COMP *cpi, struct macroblock *x, const struct TileInfo *const tile, int mi_row, int mi_col, int *returnrate, +#if CONFIG_SUPERTX + int *returnrate_nocoef, +#endif int64_t *returndistortion, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, @@ -191,6 +194,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(struct VP9_COMP *cpi, const struct TileInfo *const tile, int mi_row, int mi_col, int *returnrate, +#if CONFIG_SUPERTX + int *returnrate_nocoef, +#endif int64_t *returndistortion, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, @@ -222,6 +228,21 @@ void vp9_setup_pred_block(const MACROBLOCKD *xd, int mi_row, int mi_col, const struct scale_factors *scale, const struct scale_factors *scale_uv); + +#if CONFIG_SUPERTX +void txfm_rd_in_plane_supertx(MACROBLOCK *x, + int *rate, int64_t *distortion, + int *skippable, int64_t *sse, + int64_t ref_best_rd, int plane, + BLOCK_SIZE bsize, TX_SIZE tx_size, + int use_fast_coef_casting); +void txfm_rd_in_plane(MACROBLOCK *x, + int *rate, int64_t *distortion, + int *skippable, int64_t *sse, + int64_t ref_best_rd, int plane, + BLOCK_SIZE bsize, TX_SIZE tx_size, + int use_fast_coef_casting); +#endif #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index dcca92d21..e28ac3a09 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -334,3 +334,41 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, *t = t_backup; } } + +#if CONFIG_SUPERTX +void vp9_tokenize_sb_supertx(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, + BLOCK_SIZE bsize) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + TOKENEXTRA *t_backup = *t; + const int ctx = vp9_get_skip_context(xd); + const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id, + SEG_LVL_SKIP); + struct tokenize_b_args arg = {cpi, xd, t}; + int plane; + if (mbmi->skip) { + if (!dry_run) + cm->counts.skip[ctx][1] += skip_inc; + reset_skip_context(xd, bsize); + if (dry_run) + *t = t_backup; + return; + } + + if (!dry_run) { + cm->counts.skip[ctx][0] += skip_inc; + for (plane = 0; plane < MAX_MB_PLANE; plane++) { + BLOCK_SIZE plane_size = plane ? (bsize - 3) : bsize; + tokenize_b(plane, 0, plane_size, b_width_log2(plane_size), &arg); + } + } else { + for (plane = 0; plane < MAX_MB_PLANE; plane++) { + BLOCK_SIZE plane_size = plane ? (bsize - 3) : bsize; + set_entropy_context_b(plane, 0, plane_size, b_width_log2(plane_size), + &arg); + } + *t = t_backup; + } +} +#endif diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 063c0bafe..5f4f7e5be 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -46,6 +46,10 @@ struct VP9_COMP; void vp9_tokenize_sb(struct VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize); +#if CONFIG_SUPERTX +void vp9_tokenize_sb_supertx(struct VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, + BLOCK_SIZE bsize); +#endif extern const int16_t *vp9_dct_value_cost_ptr; /* TODO: The Token field should be broken out into a separate char array to