From 35d38646ec9e38e7e422f3f6d8cbe8a2b13561fa Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Tue, 10 Mar 2015 12:46:52 -0700 Subject: [PATCH] Misc changes to support high-bitdepth with supertx Change-Id: I0331646d1c55deb6e4631e64bd6b092fb892a43e --- vp9/common/vp9_reconinter.c | 624 +++++++++++++++++++++++++--------- vp9/common/vp9_reconinter.h | 1 + vp9/decoder/vp9_decodeframe.c | 234 ++++++++++++- vp9/decoder/vp9_decodemv.c | 6 + vp9/encoder/vp9_dct.c | 5 +- vp9/encoder/vp9_encodeframe.c | 281 ++++++++++++++- 6 files changed, 951 insertions(+), 200 deletions(-) diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index ff1034294..39b80a714 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -145,14 +145,14 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride, } #if CONFIG_VP9_HIGHBITDEPTH -static void high_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int subpel_x, - const int subpel_y, - const struct scale_factors *sf, - int w, int h, int ref, - const InterpKernel *kernel, - int xs, int ys, int bd) { +static void highbd_inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int subpel_x, + const int subpel_y, + const struct scale_factors *sf, + int w, int h, int ref, + const InterpKernel *kernel, + int xs, int ys, int bd) { sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( src, src_stride, dst, dst_stride, kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); @@ -175,8 +175,9 @@ void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride, src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); - high_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, - sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, bd); + highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, + sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, + bd); } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -258,7 +259,6 @@ static MV average_split_mvs(const struct macroblockd_plane *pd, } #if CONFIG_WEDGE_PARTITION - static int get_masked_weight(int m) { #define SMOOTHER_LEN 32 static const uint8_t smoothfn[2 * SMOOTHER_LEN + 1] = { @@ -590,6 +590,31 @@ static void build_masked_compound_extend(uint8_t *dst, int dst_stride, WEDGE_WEIGHT_BITS; } } + +#if CONFIG_VP9_HIGHBITDEPTH +static void build_masked_compound_extend_highbd( + uint8_t *dst_8, int dst_stride, + uint8_t *dst2_8, int dst2_stride, int plane, + int wedge_index, BLOCK_SIZE sb_type, + int wedge_offset_x, int wedge_offset_y, + int h, int w) { + int i, j; + uint8_t mask[4096]; + uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); + uint16_t *dst2 = CONVERT_TO_SHORTPTR(dst2_8); + generate_masked_weight_extend(wedge_index, plane, sb_type, h, w, + wedge_offset_x, wedge_offset_y, mask, 64); + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int m = mask[i * 64 + j]; + dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + + dst2[i * dst2_stride + j] * + ((1 << WEDGE_WEIGHT_BITS) - m) + + (1 << (WEDGE_WEIGHT_BITS - 1))) >> + WEDGE_WEIGHT_BITS; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_SUPERTX #endif // CONFIG_WEDGE_PARTITION @@ -651,24 +676,39 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, uint8_t tmp_dst[4096]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_inter_predictor(pre, pre_buf->stride, tmp_dst, 64, - subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, - xd->bd); + highbd_inter_predictor(pre, pre_buf->stride, tmp_dst, 64, + subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys, + xd->bd); } else { inter_predictor(pre, pre_buf->stride, tmp_dst, 64, - subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); + subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys); } #else inter_predictor(pre, pre_buf->stride, tmp_dst, 64, subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys); #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_SUPERTX - // TODO(debargha): Need high bitdepth versions +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + build_masked_compound_extend_highbd( + dst, dst_buf->stride, tmp_dst, 64, plane, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, + wedge_offset_x, wedge_offset_y, h, w); + } else { + build_masked_compound_extend( + dst, dst_buf->stride, tmp_dst, 64, plane, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, + wedge_offset_x, wedge_offset_y, h, w); + } +#else build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, 64, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); -#else +#endif // CONFIG_VP9_HIGHBITDEPTH +#else // CONFIG_SUPERTX build_masked_compound(dst, dst_buf->stride, tmp_dst, 64, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, h, w); @@ -677,13 +717,14 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); } + #else // CONFIG_WEDGE_PARTITION #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, - xd->bd); + highbd_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, + xd->bd); } else { inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); @@ -817,6 +858,7 @@ static void generate_1dmask(int length, uint8_t *mask) { } void vp9_build_masked_inter_predictor_complex( + MACROBLOCKD *xd, uint8_t *dst, int dst_stride, uint8_t *dst2, int dst2_stride, const struct macroblockd_plane *pd, int mi_row, int mi_col, int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, @@ -850,6 +892,27 @@ void vp9_build_masked_inter_predictor_complex( default: assert(0); } +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + uint16_t *dst16= CONVERT_TO_SHORTPTR(dst); + uint16_t *dst216 = CONVERT_TO_SHORTPTR(dst2); + for (i = 0; i < top_h; ++i) { + for (j = 0; j < top_w; ++j) { + const int m = (partition == PARTITION_HORZ ? mask[i] : mask[j]); + if (m == 64) + continue; + else if (m == 0) + dst16[i * dst_stride + j] = dst216[i * dst2_stride + j]; + else + dst16[i * dst_stride + j] = (dst16[i * dst_stride + j] * m + + dst216[i * dst2_stride + j] * (64 - m) + + 32) >> 6; + } + } + return; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + (void) xd; for (i = 0; i < top_h; ++i) { for (j = 0; j < top_w; ++j) { const int m = (partition == PARTITION_HORZ ? mask[i] : mask[j]); @@ -898,12 +961,13 @@ void vp9_build_inter_predictors_sb_extend(MACROBLOCKD *xd, } #endif // CONFIG_WEDGE_PARTITION -void vp9_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, - int mi_row, int mi_col, - int mi_row_ori, - int mi_col_ori, - BLOCK_SIZE top_bsize, - PARTITION_TYPE partition) { +void vp9_build_inter_predictors_sby_sub8x8_extend( + MACROBLOCKD *xd, + int mi_row, int mi_col, + int mi_row_ori, + int mi_col_ori, + BLOCK_SIZE top_bsize, + PARTITION_TYPE partition) { const int mi_x = mi_col_ori * MI_SIZE; const int mi_y = mi_row_ori * MI_SIZE; #if CONFIG_WEDGE_PARTITION @@ -914,89 +978,191 @@ void vp9_build_inter_predictors_sby_sub8x8_extend(MACROBLOCKD *xd, int orig_dst_stride; int bw = 4 << b_width_log2_lookup[top_bsize]; int bh = 4 << b_height_log2_lookup[top_bsize]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAXTXLEN * MAXTXLEN); - DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, MAXTXLEN * MAXTXLEN); - DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, MAXTXLEN * MAXTXLEN); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, 2 * MAXTXLEN * MAXTXLEN); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, 2 * MAXTXLEN * MAXTXLEN); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, 2 * MAXTXLEN * MAXTXLEN); - orig_dst = xd->plane[0].dst.buf; - orig_dst_stride = xd->plane[0].dst.stride; - build_inter_predictors(xd, 0, 0, bw, bh, 0, 0, bw, bh, + orig_dst = xd->plane[0].dst.buf; + orig_dst_stride = xd->plane[0].dst.stride; + build_inter_predictors(xd, 0, 0, bw, bh, 0, 0, bw, bh, #if CONFIG_WEDGE_PARTITION - wedge_offset_x, wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif - mi_x, mi_y); + mi_x, mi_y); - xd->plane[0].dst.buf = tmp_buf; - xd->plane[0].dst.stride = MAXTXLEN; - switch (partition) { - case PARTITION_HORZ: - build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, + xd->plane[0].dst.buf = CONVERT_TO_BYTEPTR(tmp_buf); + xd->plane[0].dst.stride = MAXTXLEN; + switch (partition) { + case PARTITION_HORZ: + build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, #if CONFIG_WEDGE_PARTITION - wedge_offset_x, wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif - mi_x, mi_y); - break; - case PARTITION_VERT: - build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, + mi_x, mi_y); + break; + case PARTITION_VERT: + build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, #if CONFIG_WEDGE_PARTITION - wedge_offset_x, wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif - mi_x, mi_y); - break; - case PARTITION_SPLIT: - build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, + mi_x, mi_y); + break; + case PARTITION_SPLIT: + build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, #if CONFIG_WEDGE_PARTITION - wedge_offset_x, wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif - mi_x, mi_y); - xd->plane[0].dst.buf = tmp_buf1; - xd->plane[0].dst.stride = MAXTXLEN; - build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, + mi_x, mi_y); + xd->plane[0].dst.buf = CONVERT_TO_BYTEPTR(tmp_buf1); + xd->plane[0].dst.stride = MAXTXLEN; + build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, #if CONFIG_WEDGE_PARTITION - wedge_offset_x, wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif - mi_x, mi_y); - xd->plane[0].dst.buf = tmp_buf2; - xd->plane[0].dst.stride = MAXTXLEN; - build_inter_predictors(xd, 0, 3, bw, bh, 0, 0, bw, bh, + mi_x, mi_y); + xd->plane[0].dst.buf = CONVERT_TO_BYTEPTR(tmp_buf2); + xd->plane[0].dst.stride = MAXTXLEN; + build_inter_predictors(xd, 0, 3, bw, bh, 0, 0, bw, bh, #if CONFIG_WEDGE_PARTITION - wedge_offset_x, wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif - mi_x, mi_y); - break; - default: - assert(0); + mi_x, mi_y); + break; + default: + assert(0); + } + if (partition != PARTITION_SPLIT) { + vp9_build_masked_inter_predictor_complex( + xd, + orig_dst, orig_dst_stride, + CONVERT_TO_BYTEPTR(tmp_buf), MAXTXLEN, + &xd->plane[0], mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + partition); + } else { + vp9_build_masked_inter_predictor_complex( + xd, + orig_dst, orig_dst_stride, + CONVERT_TO_BYTEPTR(tmp_buf), MAXTXLEN, + &xd->plane[0], mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex( + xd, + CONVERT_TO_BYTEPTR(tmp_buf1), MAXTXLEN, + CONVERT_TO_BYTEPTR(tmp_buf2), MAXTXLEN, + &xd->plane[0], mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex( + xd, + orig_dst, orig_dst_stride, + CONVERT_TO_BYTEPTR(tmp_buf1), MAXTXLEN, + &xd->plane[0], mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_HORZ); + } + xd->plane[0].dst.buf = orig_dst; + xd->plane[0].dst.stride = orig_dst_stride; + return; } +#endif // CONFIG_VP9_HIGHBITDEPTH - if (partition != PARTITION_SPLIT) { - vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, - tmp_buf, MAXTXLEN, - &xd->plane[0], mi_row, mi_col, - mi_row_ori, mi_col_ori, - BLOCK_8X8, top_bsize, - partition); - } else { - vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, - tmp_buf, MAXTXLEN, - &xd->plane[0], mi_row, mi_col, - mi_row_ori, mi_col_ori, - BLOCK_8X8, top_bsize, - PARTITION_VERT); - vp9_build_masked_inter_predictor_complex(tmp_buf1, MAXTXLEN, - tmp_buf2, MAXTXLEN, - &xd->plane[0], mi_row, mi_col, - mi_row_ori, mi_col_ori, - BLOCK_8X8, top_bsize, - PARTITION_VERT); - vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, - tmp_buf1, MAXTXLEN, - &xd->plane[0], mi_row, mi_col, - mi_row_ori, mi_col_ori, - BLOCK_8X8, top_bsize, - PARTITION_HORZ); + { + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAXTXLEN * MAXTXLEN); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, MAXTXLEN * MAXTXLEN); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, MAXTXLEN * MAXTXLEN); + + orig_dst = xd->plane[0].dst.buf; + orig_dst_stride = xd->plane[0].dst.stride; + build_inter_predictors(xd, 0, 0, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif + mi_x, mi_y); + + xd->plane[0].dst.buf = tmp_buf; + xd->plane[0].dst.stride = MAXTXLEN; + switch (partition) { + case PARTITION_HORZ: + build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif + mi_x, mi_y); + break; + case PARTITION_VERT: + build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif + mi_x, mi_y); + break; + case PARTITION_SPLIT: + build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif + mi_x, mi_y); + xd->plane[0].dst.buf = tmp_buf1; + xd->plane[0].dst.stride = MAXTXLEN; + build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif + mi_x, mi_y); + xd->plane[0].dst.buf = tmp_buf2; + xd->plane[0].dst.stride = MAXTXLEN; + build_inter_predictors(xd, 0, 3, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif + mi_x, mi_y); + break; + default: + assert(0); + } + + if (partition != PARTITION_SPLIT) { + vp9_build_masked_inter_predictor_complex(xd, + orig_dst, orig_dst_stride, + tmp_buf, MAXTXLEN, + &xd->plane[0], mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + partition); + } else { + vp9_build_masked_inter_predictor_complex(xd, + orig_dst, orig_dst_stride, + tmp_buf, MAXTXLEN, + &xd->plane[0], mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(xd, + tmp_buf1, MAXTXLEN, + tmp_buf2, MAXTXLEN, + &xd->plane[0], mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(xd, + orig_dst, orig_dst_stride, + tmp_buf1, MAXTXLEN, + &xd->plane[0], mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_HORZ); + } + xd->plane[0].dst.buf = orig_dst; + xd->plane[0].dst.stride = orig_dst_stride; } - xd->plane[0].dst.buf = orig_dst; - xd->plane[0].dst.stride = orig_dst_stride; } void vp9_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, @@ -1051,8 +1217,8 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, struct buf_2d *const dst_buf = &pd->dst; uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; const MV mv = mi->mbmi.sb_type < BLOCK_8X8 - ? average_split_mvs(pd, mi, ref, block) - : mi->mbmi.mv[ref].as_mv; + ? average_split_mvs(pd, mi, ref, block) + : mi->mbmi.mv[ref].as_mv; const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, pd->subsampling_x, @@ -1204,24 +1370,38 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, uint8_t tmp_dst[4096]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_inter_predictor(buf_ptr, buf_stride, tmp_dst, 64, - subpel_x, subpel_y, sf, w, h, ref, kernel, + highbd_inter_predictor(buf_ptr, buf_stride, tmp_dst, 64, + subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys, xd->bd); } else { inter_predictor(buf_ptr, buf_stride, tmp_dst, 64, - subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); + subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys); } #else inter_predictor(buf_ptr, buf_stride, tmp_dst, 64, subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys); #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_SUPERTX - // TODO(debargha): highbitdepth versions +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + build_masked_compound_extend_highbd( + dst, dst_buf->stride, tmp_dst, 64, plane, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, + wedge_offset_x, wedge_offset_y, h, w); + } else { + build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, 64, plane, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, + wedge_offset_x, wedge_offset_y, h, w); + } +#else build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, 64, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); -#else +#endif // CONFIG_VP9_HIGHBITDEPTH +#else // CONFIG_SUPERTX build_masked_compound(dst, dst_buf->stride, tmp_dst, 64, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, h, w); @@ -1229,9 +1409,9 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, } else { #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, kernel, - xs, ys, xd->bd); + highbd_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, kernel, + xs, ys, xd->bd); } else { inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); @@ -1246,8 +1426,9 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); + highbd_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, + xd->bd); } else { inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); @@ -1357,89 +1538,192 @@ void vp9_dec_build_inter_predictors_sby_sub8x8_extend( int orig_dst_stride; int bw = 4 << b_width_log2_lookup[top_bsize]; int bh = 4 << b_height_log2_lookup[top_bsize]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAXTXLEN * MAXTXLEN); - DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, MAXTXLEN * MAXTXLEN); - DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, MAXTXLEN * MAXTXLEN); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, 2 * MAXTXLEN * MAXTXLEN); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, 2 * MAXTXLEN * MAXTXLEN); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, 2 * MAXTXLEN * MAXTXLEN); - orig_dst = xd->plane[0].dst.buf; - orig_dst_stride = xd->plane[0].dst.stride; - dec_build_inter_predictors(xd, 0, 0, bw, bh, 0, 0, bw, bh, + orig_dst = xd->plane[0].dst.buf; + orig_dst_stride = xd->plane[0].dst.stride; + dec_build_inter_predictors(xd, 0, 0, bw, bh, 0, 0, bw, bh, #if CONFIG_WEDGE_PARTITION - wedge_offset_x, wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif - mi_x, mi_y); + mi_x, mi_y); - xd->plane[0].dst.buf = tmp_buf; - xd->plane[0].dst.stride = MAXTXLEN; - switch (partition) { - case PARTITION_HORZ: - dec_build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, + xd->plane[0].dst.buf = CONVERT_TO_BYTEPTR(tmp_buf); + xd->plane[0].dst.stride = MAXTXLEN; + switch (partition) { + case PARTITION_HORZ: + dec_build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, #if CONFIG_WEDGE_PARTITION - wedge_offset_x, wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif - mi_x, mi_y); - break; - case PARTITION_VERT: - dec_build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, + mi_x, mi_y); + break; + case PARTITION_VERT: + dec_build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, #if CONFIG_WEDGE_PARTITION - wedge_offset_x, wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif - mi_x, mi_y); - break; - case PARTITION_SPLIT: - dec_build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, + mi_x, mi_y); + break; + case PARTITION_SPLIT: + dec_build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, #if CONFIG_WEDGE_PARTITION - wedge_offset_x, wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif - mi_x, mi_y); - xd->plane[0].dst.buf = tmp_buf1; - xd->plane[0].dst.stride = MAXTXLEN; - dec_build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, + mi_x, mi_y); + xd->plane[0].dst.buf = CONVERT_TO_BYTEPTR(tmp_buf1); + xd->plane[0].dst.stride = MAXTXLEN; + dec_build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, #if CONFIG_WEDGE_PARTITION - wedge_offset_x, wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif - mi_x, mi_y); - xd->plane[0].dst.buf = tmp_buf2; - xd->plane[0].dst.stride = MAXTXLEN; - dec_build_inter_predictors(xd, 0, 3, bw, bh, 0, 0, bw, bh, + mi_x, mi_y); + xd->plane[0].dst.buf = CONVERT_TO_BYTEPTR(tmp_buf2); + xd->plane[0].dst.stride = MAXTXLEN; + dec_build_inter_predictors(xd, 0, 3, bw, bh, 0, 0, bw, bh, #if CONFIG_WEDGE_PARTITION - wedge_offset_x, wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif - mi_x, mi_y); - break; - default: - assert(0); + mi_x, mi_y); + break; + default: + assert(0); + } + + if (partition != PARTITION_SPLIT) { + vp9_build_masked_inter_predictor_complex( + xd, + orig_dst, orig_dst_stride, + CONVERT_TO_BYTEPTR(tmp_buf), MAXTXLEN, + &xd->plane[0], mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + partition); + } else { + vp9_build_masked_inter_predictor_complex( + xd, + orig_dst, orig_dst_stride, + CONVERT_TO_BYTEPTR(tmp_buf), MAXTXLEN, + &xd->plane[0], mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex( + xd, + CONVERT_TO_BYTEPTR(tmp_buf1), MAXTXLEN, + CONVERT_TO_BYTEPTR(tmp_buf2), MAXTXLEN, + &xd->plane[0], mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex( + xd, + orig_dst, orig_dst_stride, + CONVERT_TO_BYTEPTR(tmp_buf1), MAXTXLEN, + &xd->plane[0], mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_HORZ); + } + xd->plane[0].dst.buf = orig_dst; + xd->plane[0].dst.stride = orig_dst_stride; + return; } +#endif // CONFIG_VP9_HIGHBITDEPTH - if (partition != PARTITION_SPLIT) { - vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, - tmp_buf, MAXTXLEN, - &xd->plane[0], mi_row, mi_col, - mi_row_ori, mi_col_ori, - BLOCK_8X8, top_bsize, - partition); - } else { - vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, - tmp_buf, MAXTXLEN, - &xd->plane[0], mi_row, mi_col, - mi_row_ori, mi_col_ori, - BLOCK_8X8, top_bsize, - PARTITION_VERT); - vp9_build_masked_inter_predictor_complex(tmp_buf1, MAXTXLEN, - tmp_buf2, MAXTXLEN, - &xd->plane[0], mi_row, mi_col, - mi_row_ori, mi_col_ori, - BLOCK_8X8, top_bsize, - PARTITION_VERT); - vp9_build_masked_inter_predictor_complex(orig_dst, orig_dst_stride, - tmp_buf1, MAXTXLEN, - &xd->plane[0], mi_row, mi_col, - mi_row_ori, mi_col_ori, - BLOCK_8X8, top_bsize, - PARTITION_HORZ); + { + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAXTXLEN * MAXTXLEN); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, MAXTXLEN * MAXTXLEN); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, MAXTXLEN * MAXTXLEN); + + orig_dst = xd->plane[0].dst.buf; + orig_dst_stride = xd->plane[0].dst.stride; + dec_build_inter_predictors(xd, 0, 0, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif + mi_x, mi_y); + + xd->plane[0].dst.buf = tmp_buf; + xd->plane[0].dst.stride = MAXTXLEN; + switch (partition) { + case PARTITION_HORZ: + dec_build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif + mi_x, mi_y); + break; + case PARTITION_VERT: + dec_build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif + mi_x, mi_y); + break; + case PARTITION_SPLIT: + dec_build_inter_predictors(xd, 0, 1, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif + mi_x, mi_y); + xd->plane[0].dst.buf = tmp_buf1; + xd->plane[0].dst.stride = MAXTXLEN; + dec_build_inter_predictors(xd, 0, 2, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif + mi_x, mi_y); + xd->plane[0].dst.buf = tmp_buf2; + xd->plane[0].dst.stride = MAXTXLEN; + dec_build_inter_predictors(xd, 0, 3, bw, bh, 0, 0, bw, bh, +#if CONFIG_WEDGE_PARTITION + wedge_offset_x, wedge_offset_y, +#endif + mi_x, mi_y); + break; + default: + assert(0); + } + + if (partition != PARTITION_SPLIT) { + vp9_build_masked_inter_predictor_complex(xd, + orig_dst, orig_dst_stride, + tmp_buf, MAXTXLEN, + &xd->plane[0], mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + partition); + } else { + vp9_build_masked_inter_predictor_complex(xd, + orig_dst, orig_dst_stride, + tmp_buf, MAXTXLEN, + &xd->plane[0], mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(xd, + tmp_buf1, MAXTXLEN, + tmp_buf2, MAXTXLEN, + &xd->plane[0], mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(xd, + orig_dst, orig_dst_stride, + tmp_buf1, MAXTXLEN, + &xd->plane[0], mi_row, mi_col, + mi_row_ori, mi_col_ori, + BLOCK_8X8, top_bsize, + PARTITION_HORZ); + } + xd->plane[0].dst.buf = orig_dst; + xd->plane[0].dst.stride = orig_dst_stride; } - xd->plane[0].dst.buf = orig_dst; - xd->plane[0].dst.stride = orig_dst_stride; } void vp9_dec_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index d3faa26e0..b7f5e0be9 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -99,6 +99,7 @@ void vp9_build_inter_predictors_sbuv_sub8x8_extend(MACROBLOCKD *xd, int mi_col_ori, BLOCK_SIZE top_bsize); void vp9_build_masked_inter_predictor_complex( + MACROBLOCKD *xd, uint8_t *dst, int dst_stride, uint8_t *dst2, int dst2_stride, const struct macroblockd_plane *pd, int mi_row, int mi_col, int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index fc07ce434..1b9edc603 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -884,15 +884,16 @@ static void dec_predict_sb_complex(VP9_COMMON *const cm, MACROBLOCKD *const xd, } if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].dst.buf = tmp_buf1 + i * MAXTXLEN * MAXTXLEN; - xd->plane[i].dst.stride = MAXTXLEN; + xd->plane[i].dst.buf = dst_buf1[i]; + xd->plane[i].dst.stride = dst_stride1[i]; } dec_predict_b_extend(cm, xd, tile, mi_row + hbs, mi_col, mi_row_ori, mi_col_ori, top_bsize); for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].dst.buf = dst_buf[i]; xd->plane[i].dst.stride = dst_stride[i]; - vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + vp9_build_masked_inter_predictor_complex(xd, + dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], &xd->plane[i], mi_row, mi_col, @@ -913,15 +914,16 @@ static void dec_predict_sb_complex(VP9_COMMON *const cm, MACROBLOCKD *const xd, } if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].dst.buf = tmp_buf1 + i * MAXTXLEN * MAXTXLEN; - xd->plane[i].dst.stride = MAXTXLEN; + xd->plane[i].dst.buf = dst_buf1[i]; + xd->plane[i].dst.stride = dst_stride1[i]; } dec_predict_b_extend(cm, xd, tile, mi_row, mi_col + hbs, mi_row_ori, mi_col_ori, top_bsize); for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].dst.buf = dst_buf[i]; xd->plane[i].dst.stride = dst_stride[i]; - vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + vp9_build_masked_inter_predictor_complex(xd, + dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], &xd->plane[i], mi_row, mi_col, @@ -954,7 +956,8 @@ static void dec_predict_sb_complex(VP9_COMMON *const cm, MACROBLOCKD *const xd, dst_buf3, dst_stride3); for (i = 0; i < MAX_MB_PLANE; i++) { if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) { - vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + vp9_build_masked_inter_predictor_complex(xd, + dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], &xd->plane[i], @@ -963,7 +966,8 @@ static void dec_predict_sb_complex(VP9_COMMON *const cm, MACROBLOCKD *const xd, bsize, top_bsize, PARTITION_VERT); if (mi_row + hbs < cm->mi_rows) { - vp9_build_masked_inter_predictor_complex(dst_buf2[i], + vp9_build_masked_inter_predictor_complex(xd, + dst_buf2[i], dst_stride2[i], dst_buf3[i], dst_stride3[i], @@ -972,7 +976,8 @@ static void dec_predict_sb_complex(VP9_COMMON *const cm, MACROBLOCKD *const xd, mi_row_ori, mi_col_ori, bsize, top_bsize, PARTITION_VERT); - vp9_build_masked_inter_predictor_complex(dst_buf[i], + vp9_build_masked_inter_predictor_complex(xd, + dst_buf[i], dst_stride[i], dst_buf2[i], dst_stride2[i], @@ -983,7 +988,8 @@ static void dec_predict_sb_complex(VP9_COMMON *const cm, MACROBLOCKD *const xd, PARTITION_HORZ); } } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) { - vp9_build_masked_inter_predictor_complex(dst_buf[i], + vp9_build_masked_inter_predictor_complex(xd, + dst_buf[i], dst_stride[i], dst_buf2[i], dst_stride2[i], @@ -1000,6 +1006,204 @@ static void dec_predict_sb_complex(VP9_COMMON *const cm, MACROBLOCKD *const xd, assert(0); } } + +#if CONFIG_VP9_HIGHBITDEPTH +static void dec_predict_sb_complex_highbd( + VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + uint8_t *dst_buf[3], int dst_stride[3]) { + const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + MB_MODE_INFO *mbmi; + int i, offset = mi_row * cm->mi_stride + mi_col; + + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, + MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * sizeof(uint16_t)); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, + MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * sizeof(uint16_t)); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf3, + MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * sizeof(uint16_t)); + uint8_t *dst_buf1[3] = { + CONVERT_TO_BYTEPTR(tmp_buf1), + CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * sizeof(uint16_t)), + CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * sizeof(uint16_t))}; + uint8_t *dst_buf2[3] = { + CONVERT_TO_BYTEPTR(tmp_buf2), + CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * sizeof(uint16_t)), + CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * sizeof(uint16_t))}; + uint8_t *dst_buf3[3] = { + CONVERT_TO_BYTEPTR(tmp_buf3), + CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * sizeof(uint16_t)), + CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * sizeof(uint16_t))}; + int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + xd->mi = cm->mi + offset; + xd->mi[0].src_mi = &xd->mi[0]; + mbmi = &xd->mi[0].mbmi; + partition = partition_lookup[bsl][mbmi->sb_type]; + subsize = get_subsize(bsize, partition); + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + } + + switch (partition) { + case PARTITION_NONE: + assert(bsize < top_bsize); + dec_predict_b_extend(cm, xd, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, + top_bsize); + break; + case PARTITION_HORZ: + if (bsize > BLOCK_8X8) { + dec_predict_b_extend(cm, xd, tile, mi_row, mi_col, mi_row_ori, + mi_col_ori, top_bsize); + } else { + dec_predict_b_sub8x8_extend(cm, xd, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, + top_bsize, partition); + } + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf1[i]; + xd->plane[i].dst.stride = dst_stride1[i]; + } + dec_predict_b_extend(cm, xd, tile, mi_row + hbs, mi_col, + mi_row_ori, mi_col_ori, top_bsize); + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp9_build_masked_inter_predictor_complex( + xd, + dst_buf[i], dst_stride[i], + dst_buf1[i], dst_stride1[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } + break; + case PARTITION_VERT: + if (bsize > BLOCK_8X8) { + dec_predict_b_extend(cm, xd, tile, mi_row, mi_col, mi_row_ori, + mi_col_ori, top_bsize); + } else { + dec_predict_b_sub8x8_extend(cm, xd, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, + top_bsize, partition); + } + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf1[i]; + xd->plane[i].dst.stride = dst_stride1[i]; + } + dec_predict_b_extend(cm, xd, tile, mi_row, mi_col + hbs, mi_row_ori, + mi_col_ori, top_bsize); + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp9_build_masked_inter_predictor_complex( + xd, + dst_buf[i], dst_stride[i], + dst_buf1[i], dst_stride1[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + } + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + dec_predict_b_sub8x8_extend(cm, xd, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, + top_bsize, partition); + } else { + dec_predict_sb_complex_highbd(cm, xd, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, subsize, + top_bsize, dst_buf, dst_stride); + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) + dec_predict_sb_complex_highbd(cm, xd, tile, mi_row, mi_col + hbs, + mi_row_ori, mi_col_ori, subsize, + top_bsize, dst_buf1, dst_stride1); + if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) + dec_predict_sb_complex_highbd(cm, xd, tile, mi_row + hbs, mi_col, + mi_row_ori, mi_col_ori, subsize, + top_bsize, dst_buf2, dst_stride2); + if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) + dec_predict_sb_complex_highbd(cm, xd, tile, + mi_row + hbs, mi_col + hbs, + mi_row_ori, mi_col_ori, subsize, + top_bsize, dst_buf3, dst_stride3); + for (i = 0; i < MAX_MB_PLANE; i++) { + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) { + vp9_build_masked_inter_predictor_complex( + xd, + dst_buf[i], dst_stride[i], + dst_buf1[i], + dst_stride1[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + if (mi_row + hbs < cm->mi_rows) { + vp9_build_masked_inter_predictor_complex( + xd, + dst_buf2[i], + dst_stride2[i], + dst_buf3[i], + dst_stride3[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex( + xd, + dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) { + vp9_build_masked_inter_predictor_complex( + xd, + dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } + } + break; + default: + assert(0); + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_SUPERTX static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, @@ -1239,8 +1443,14 @@ static void decode_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, dst_buf[i] = xd->plane[i].dst.buf; dst_stride[i] = xd->plane[i].dst.stride; } - dec_predict_sb_complex(cm, xd, tile, mi_row, mi_col, mi_row, mi_col, - bsize, bsize, dst_buf, dst_stride); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + dec_predict_sb_complex_highbd(cm, xd, tile, mi_row, mi_col, mi_row, + mi_col, bsize, bsize, dst_buf, dst_stride); + else +#endif // CONFIG_VP9_HIGHBITDEPTH + dec_predict_sb_complex(cm, xd, tile, mi_row, mi_col, mi_row, mi_col, + bsize, bsize, dst_buf, dst_stride); if (!skip) { int eobtotal = 0; diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 1d93c1886..ebf822b66 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -875,6 +875,9 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm, int_mv nearestmv[2], nearmv[2]; int inter_mode_ctx, ref, is_compound; +#if CONFIG_SUPERTX + (void) supertx_enabled; +#endif #if CONFIG_COPY_MODE if (mbmi->copy_mode == NOREF) @@ -1112,6 +1115,9 @@ static void read_inter_frame_mode_info(VP9_COMMON *const cm, int num_candidate = 0; MB_MODE_INFO *inter_ref_list[18] = {NULL}; #endif +#if CONFIG_SUPERTX + (void) supertx_enabled; +#endif mbmi->mv[0].as_int = 0; mbmi->mv[1].as_int = 0; diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 706afb26e..84a5dab28 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -1404,7 +1404,8 @@ void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) { temp_in[j] = output[j + i * 32]; fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) - out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; + out[j + i * 32] = (tran_low_t) + ((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2); } } @@ -1435,7 +1436,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { temp_in[j] = output[j + i * 32]; fdct32(temp_in, temp_out, 1); for (j = 0; j < 32; ++j) - out[j + i * 32] = temp_out[j]; + out[j + i * 32] = (tran_low_t)temp_out[j]; } } diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 2991d5333..3fb49a3ad 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -80,6 +80,15 @@ static void predict_sb_complex(VP9_COMP *cpi, const TileInfo *const tile, BLOCK_SIZE top_bsize, uint8_t *dst_buf[3], int dst_stride[3], PC_TREE *pc_tree); +#if CONFIG_VP9_HIGHBITDEPTH +static void predict_sb_complex_highbd(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + int output_enabled, BLOCK_SIZE bsize, + BLOCK_SIZE top_bsize, + uint8_t *dst_buf[3], int dst_stride[3], + PC_TREE *pc_tree); +#endif // CONFIG_VP9_HIGHBITDEPTH static void update_state_sb_supertx(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, BLOCK_SIZE bsize, @@ -291,7 +300,7 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, #if CONFIG_SUPERTX static void set_offsets_supertx(VP9_COMP *cpi, const TileInfo *const tile, - int mi_row, int mi_col, BLOCK_SIZE bsize) { + int mi_row, int mi_col, BLOCK_SIZE bsize) { MACROBLOCK *const x = &cpi->mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; @@ -1620,9 +1629,16 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, dst_buf[i] = xd->plane[i].dst.buf; dst_stride[i] = xd->plane[i].dst.stride; } - predict_sb_complex(cpi, tile, mi_row, mi_col, mi_row, mi_col, - output_enabled, bsize, bsize, - dst_buf, dst_stride, pc_tree); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + predict_sb_complex_highbd(cpi, tile, mi_row, mi_col, mi_row, mi_col, + output_enabled, bsize, bsize, + dst_buf, dst_stride, pc_tree); + else +#endif // CONFIG_VP9_HIGHBITDEPTH + predict_sb_complex(cpi, tile, mi_row, mi_col, mi_row, mi_col, + output_enabled, bsize, bsize, + dst_buf, dst_stride, pc_tree); set_offsets(cpi, tile, mi_row, mi_col, bsize); if (!x->skip) { @@ -5340,8 +5356,8 @@ static void predict_sb_complex(VP9_COMP *cpi, const TileInfo *const tile, } if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].dst.buf = tmp_buf1 + i * MAXTXLEN * MAXTXLEN; - xd->plane[i].dst.stride = MAXTXLEN; + xd->plane[i].dst.buf = dst_buf1[i]; + xd->plane[i].dst.stride = dst_stride1[i]; } predict_b_extend(cpi, tile, mi_row + hbs, mi_col, mi_row_ori, mi_col_ori, output_enabled, @@ -5349,7 +5365,8 @@ static void predict_sb_complex(VP9_COMP *cpi, const TileInfo *const tile, for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].dst.buf = dst_buf[i]; xd->plane[i].dst.stride = dst_stride[i]; - vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + vp9_build_masked_inter_predictor_complex(xd, + dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], &xd->plane[i], mi_row, mi_col, @@ -5370,8 +5387,8 @@ static void predict_sb_complex(VP9_COMP *cpi, const TileInfo *const tile, } if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].dst.buf = tmp_buf1 + i * MAXTXLEN * MAXTXLEN; - xd->plane[i].dst.stride = MAXTXLEN; + xd->plane[i].dst.buf = dst_buf1[i]; + xd->plane[i].dst.stride = dst_stride1[i]; } predict_b_extend(cpi, tile, mi_row, mi_col + hbs, mi_row_ori, mi_col_ori, output_enabled, @@ -5379,7 +5396,8 @@ static void predict_sb_complex(VP9_COMP *cpi, const TileInfo *const tile, for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].dst.buf = dst_buf[i]; xd->plane[i].dst.stride = dst_stride[i]; - vp9_build_masked_inter_predictor_complex(dst_buf[i], dst_stride[i], + vp9_build_masked_inter_predictor_complex(xd, + dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], &xd->plane[i], mi_row, mi_col, @@ -5416,7 +5434,8 @@ static void predict_sb_complex(VP9_COMP *cpi, const TileInfo *const tile, pc_tree->split[3]); for (i = 0; i < MAX_MB_PLANE; i++) { if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) { - vp9_build_masked_inter_predictor_complex(dst_buf[i], + vp9_build_masked_inter_predictor_complex(xd, + dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], @@ -5426,7 +5445,8 @@ static void predict_sb_complex(VP9_COMP *cpi, const TileInfo *const tile, bsize, top_bsize, PARTITION_VERT); if (mi_row + hbs < cm->mi_rows) { - vp9_build_masked_inter_predictor_complex(dst_buf2[i], + vp9_build_masked_inter_predictor_complex(xd, + dst_buf2[i], dst_stride2[i], dst_buf3[i], dst_stride3[i], @@ -5435,7 +5455,8 @@ static void predict_sb_complex(VP9_COMP *cpi, const TileInfo *const tile, mi_row_ori, mi_col_ori, bsize, top_bsize, PARTITION_VERT); - vp9_build_masked_inter_predictor_complex(dst_buf[i], + vp9_build_masked_inter_predictor_complex(xd, + dst_buf[i], dst_stride[i], dst_buf2[i], dst_stride2[i], @@ -5446,7 +5467,8 @@ static void predict_sb_complex(VP9_COMP *cpi, const TileInfo *const tile, PARTITION_HORZ); } } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) { - vp9_build_masked_inter_predictor_complex(dst_buf[i], + vp9_build_masked_inter_predictor_complex(xd, + dst_buf[i], dst_stride[i], dst_buf2[i], dst_stride2[i], @@ -5467,6 +5489,223 @@ static void predict_sb_complex(VP9_COMP *cpi, const TileInfo *const tile, update_partition_context(xd, mi_row, mi_col, subsize, bsize); } +#if CONFIG_VP9_HIGHBITDEPTH +static void predict_sb_complex_highbd(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + int output_enabled, BLOCK_SIZE bsize, + BLOCK_SIZE top_bsize, + uint8_t *dst_buf[3], int dst_stride[3], + PC_TREE *pc_tree) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + + const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + + int i, ctx; + + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf1, + MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * sizeof(uint16_t)); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf2, + MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * sizeof(uint16_t)); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf3, + MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * sizeof(uint16_t)); + uint8_t *dst_buf1[3] = { + CONVERT_TO_BYTEPTR(tmp_buf1), + CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * sizeof(uint16_t)), + CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * sizeof(uint16_t))}; + uint8_t *dst_buf2[3] = { + CONVERT_TO_BYTEPTR(tmp_buf2), + CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * sizeof(uint16_t)), + CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * sizeof(uint16_t))}; + uint8_t *dst_buf3[3] = { + CONVERT_TO_BYTEPTR(tmp_buf3), + CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * sizeof(uint16_t)), + CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * sizeof(uint16_t))}; + + int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + if (bsize >= BLOCK_8X8) { + ctx = partition_plane_context(xd, mi_row, mi_col, bsize); + subsize = get_subsize(bsize, pc_tree->partitioning); + } else { + ctx = 0; + subsize = BLOCK_4X4; + } + partition = partition_lookup[bsl][subsize]; + if (output_enabled && bsize != BLOCK_4X4 && bsize < top_bsize) + cm->counts.partition[ctx][partition]++; + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + } + + switch (partition) { + case PARTITION_NONE: + assert(bsize < top_bsize); + predict_b_extend(cpi, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, + output_enabled, bsize, top_bsize); + break; + case PARTITION_HORZ: + if (bsize > BLOCK_8X8) { + predict_b_extend(cpi, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, + output_enabled, subsize, top_bsize); + } else { + predict_b_sub8x8_extend(cpi, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, output_enabled, + bsize, top_bsize, PARTITION_HORZ); + } + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf1[i]; + xd->plane[i].dst.stride = dst_stride1[i]; + } + predict_b_extend(cpi, tile, mi_row + hbs, mi_col, + mi_row_ori, mi_col_ori, output_enabled, + subsize, top_bsize); + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp9_build_masked_inter_predictor_complex( + xd, + dst_buf[i], dst_stride[i], + dst_buf1[i], dst_stride1[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } + break; + case PARTITION_VERT: + if (bsize > BLOCK_8X8) { + predict_b_extend(cpi, tile, mi_row, mi_col, mi_row_ori, mi_col_ori, + output_enabled, subsize, top_bsize); + } else { + predict_b_sub8x8_extend(cpi, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, output_enabled, + bsize, top_bsize, PARTITION_VERT); + } + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf1[i]; + xd->plane[i].dst.stride = dst_stride1[i]; + } + predict_b_extend(cpi, tile, mi_row, mi_col + hbs, + mi_row_ori, mi_col_ori, output_enabled, + subsize, top_bsize); + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp9_build_masked_inter_predictor_complex( + xd, + dst_buf[i], dst_stride[i], + dst_buf1[i], dst_stride1[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_ori, mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + } + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + predict_b_sub8x8_extend(cpi, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, output_enabled, + bsize, top_bsize, PARTITION_SPLIT); + } else { + predict_sb_complex_highbd(cpi, tile, mi_row, mi_col, + mi_row_ori, mi_col_ori, output_enabled, + subsize, top_bsize, dst_buf, dst_stride, + pc_tree->split[0]); + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) + predict_sb_complex_highbd(cpi, tile, mi_row, mi_col + hbs, + mi_row_ori, mi_col_ori, output_enabled, + subsize, top_bsize, dst_buf1, dst_stride1, + pc_tree->split[1]); + if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) + predict_sb_complex_highbd(cpi, tile, mi_row + hbs, mi_col, + mi_row_ori, mi_col_ori, output_enabled, + subsize, top_bsize, dst_buf2, dst_stride2, + pc_tree->split[2]); + if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) + predict_sb_complex_highbd(cpi, tile, mi_row + hbs, mi_col + hbs, + mi_row_ori, mi_col_ori, output_enabled, + subsize, top_bsize, dst_buf3, dst_stride3, + pc_tree->split[3]); + for (i = 0; i < MAX_MB_PLANE; i++) { + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) { + vp9_build_masked_inter_predictor_complex(xd, + dst_buf[i], + dst_stride[i], + dst_buf1[i], + dst_stride1[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_ori, + mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + if (mi_row + hbs < cm->mi_rows) { + vp9_build_masked_inter_predictor_complex(xd, + dst_buf2[i], + dst_stride2[i], + dst_buf3[i], + dst_stride3[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_ori, + mi_col_ori, + bsize, top_bsize, + PARTITION_VERT); + vp9_build_masked_inter_predictor_complex(xd, + dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_ori, + mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) { + vp9_build_masked_inter_predictor_complex(xd, + dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_ori, + mi_col_ori, + bsize, top_bsize, + PARTITION_HORZ); + } + } + } + break; + default: + assert(0); + } + + if (bsize < top_bsize && (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)) + update_partition_context(xd, mi_row, mi_col, subsize, bsize); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + static void rd_supertx_sb(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, BLOCK_SIZE bsize, int *tmp_rate, int64_t *tmp_dist, @@ -5497,8 +5736,18 @@ static void rd_supertx_sb(VP9_COMP *cpi, const TileInfo *const tile, dst_buf[plane] = xd->plane[plane].dst.buf; dst_stride[plane] = xd->plane[plane].dst.stride; } - predict_sb_complex(cpi, tile, mi_row, mi_col, mi_row, mi_col, - 0, bsize, bsize, dst_buf, dst_stride, pc_tree); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + predict_sb_complex_highbd(cpi, tile, mi_row, mi_col, mi_row, mi_col, + 0, bsize, bsize, dst_buf, dst_stride, pc_tree); + } else { + predict_sb_complex(cpi, tile, mi_row, mi_col, mi_row, mi_col, + 0, bsize, bsize, dst_buf, dst_stride, pc_tree); + } +#else + predict_sb_complex(cpi, tile, mi_row, mi_col, mi_row, mi_col, + 0, bsize, bsize, dst_buf, dst_stride, pc_tree); +#endif // CONFIG_VP9_HIGHBITDEPTH set_offsets(cpi, tile, mi_row, mi_col, bsize); #if CONFIG_EXT_TX