diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index dc6625760..2736417b8 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -432,8 +432,10 @@ typedef struct macroblockd { DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_MB_PLANE][CODING_UNIT_SIZE * CODING_UNIT_SIZE]); #if CONFIG_PALETTE - DECLARE_ALIGNED(16, uint8_t, color_index_map[2][64 * 64]); - DECLARE_ALIGNED(16, uint8_t, palette_map_buffer[64 * 64]); + DECLARE_ALIGNED(16, uint8_t, color_index_map[2][CODING_UNIT_SIZE * + CODING_UNIT_SIZE]); + DECLARE_ALIGNED(16, uint8_t, palette_map_buffer[CODING_UNIT_SIZE * + CODING_UNIT_SIZE]); #endif // CONFIG_PALETTE ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index df15fbcda..e19b55413 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -16,7 +16,10 @@ #if CONFIG_WEDGE_PARTITION static const vp9_prob default_wedge_interinter_prob[BLOCK_SIZES] = { - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, +#if CONFIG_EXT_CODING_UNIT_SIZE + 192, 192, 192 +#endif }; #endif // CONFIG_WEDGE_PARTITION @@ -42,11 +45,17 @@ const vp9_tree_index vp9_sr_usfilter_tree[TREE_SIZE(SR_USFILTER_NUM)] = { #if CONFIG_INTERINTRA static const vp9_prob default_interintra_prob[BLOCK_SIZES] = { - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, +#if CONFIG_EXT_CODING_UNIT_SIZE + 192, 192, 192 +#endif }; #if CONFIG_WEDGE_PARTITION static const vp9_prob default_wedge_interintra_prob[BLOCK_SIZES] = { - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, +#if CONFIG_EXT_CODING_UNIT_SIZE + 192, 192, 192 +#endif }; #endif // CONFIG_WEDGE_PARTITION #endif // CONFIG_INTERINTRA diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 347ea8cf0..4a81a405a 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -579,11 +579,12 @@ static void build_masked_compound(uint8_t *dst, int dst_stride, int wedge_index, BLOCK_SIZE sb_type, int h, int w) { int i, j; - uint8_t mask[4096]; - vp9_generate_masked_weight(wedge_index, sb_type, h, w, mask, 64); + uint8_t mask[CODING_UNIT_SIZE * CODING_UNIT_SIZE]; + vp9_generate_masked_weight(wedge_index, sb_type, h, w, mask, + CODING_UNIT_SIZE); for (i = 0; i < h; ++i) for (j = 0; j < w; ++j) { - int m = mask[i * 64 + j]; + int m = mask[i * CODING_UNIT_SIZE + j]; dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + dst2[i * dst2_stride + j] * ((1 << WEDGE_WEIGHT_BITS) - m) + @@ -598,13 +599,14 @@ static void build_masked_compound_highbd(uint8_t *dst_8, int dst_stride, int wedge_index, BLOCK_SIZE sb_type, int h, int w) { int i, j; - uint8_t mask[4096]; + uint8_t mask[CODING_UNIT_SIZE * CODING_UNIT_SIZE]; uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); uint16_t *dst2 = CONVERT_TO_SHORTPTR(dst2_8); - vp9_generate_masked_weight(wedge_index, sb_type, h, w, mask, 64); + vp9_generate_masked_weight(wedge_index, sb_type, h, w, mask, + CODING_UNIT_SIZE); for (i = 0; i < h; ++i) for (j = 0; j < w; ++j) { - int m = mask[i * 64 + j]; + int m = mask[i * CODING_UNIT_SIZE + j]; dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + dst2[i * dst2_stride + j] * ((1 << WEDGE_WEIGHT_BITS) - m) + @@ -640,12 +642,13 @@ static void build_masked_compound_extend(uint8_t *dst, int dst_stride, int wedge_offset_x, int wedge_offset_y, int h, int w) { int i, j; - uint8_t mask[4096]; + uint8_t mask[CODING_UNIT_SIZE * CODING_UNIT_SIZE]; generate_masked_weight_extend(wedge_index, plane, sb_type, h, w, - wedge_offset_x, wedge_offset_y, mask, 64); + wedge_offset_x, wedge_offset_y, mask, + CODING_UNIT_SIZE); for (i = 0; i < h; ++i) for (j = 0; j < w; ++j) { - int m = mask[i * 64 + j]; + int m = mask[i * CODING_UNIT_SIZE + j]; dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + dst2[i * dst2_stride + j] * ((1 << WEDGE_WEIGHT_BITS) - m) + @@ -662,14 +665,15 @@ static void build_masked_compound_extend_highbd( int wedge_offset_x, int wedge_offset_y, int h, int w) { int i, j; - uint8_t mask[4096]; + uint8_t mask[CODING_UNIT_SIZE * CODING_UNIT_SIZE]; uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); uint16_t *dst2 = CONVERT_TO_SHORTPTR(dst2_8); generate_masked_weight_extend(wedge_index, plane, sb_type, h, w, - wedge_offset_x, wedge_offset_y, mask, 64); + wedge_offset_x, wedge_offset_y, mask, + CODING_UNIT_SIZE); for (i = 0; i < h; ++i) for (j = 0; j < w; ++j) { - int m = mask[i * 64 + j]; + int m = mask[i * CODING_UNIT_SIZE + j]; dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + dst2[i * dst2_stride + j] * ((1 << WEDGE_WEIGHT_BITS) - m) + @@ -765,33 +769,33 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, if (ref && get_wedge_bits(mi->mbmi.sb_type) && mi->mbmi.use_wedge_interinter) { #if CONFIG_VP9_HIGHBITDEPTH - uint8_t tmp_dst_[8192]; + uint8_t tmp_dst_[2 * CODING_UNIT_SIZE * CODING_UNIT_SIZE]; uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; #else - uint8_t tmp_dst[4096]; + uint8_t tmp_dst[CODING_UNIT_SIZE * CODING_UNIT_SIZE]; #endif #if CONFIG_GLOBAL_MOTION if (is_global) { vp9_warp_plane(gm[ref], pre_buf->buf0, pre_buf->width, pre_buf->height, pre_buf->stride, tmp_dst, (mi_x >> pd->subsampling_x) + x, - (mi_y >> pd->subsampling_y) + y, w, h, 64, + (mi_y >> pd->subsampling_y) + y, w, h, CODING_UNIT_SIZE, pd->subsampling_x, pd->subsampling_y, xs, ys); } else { #endif // CONFIG_GLOBAL_MOTION #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - highbd_inter_predictor(pre, pre_buf->stride, tmp_dst, 64, - subpel_x, subpel_y, sf, w, h, 0, kernel, - xs, ys, xd->bd); + highbd_inter_predictor(pre, pre_buf->stride, tmp_dst, + CODING_UNIT_SIZE, subpel_x, subpel_y, sf, w, h, + 0, kernel, xs, ys, xd->bd); } else { - inter_predictor(pre, pre_buf->stride, tmp_dst, 64, + inter_predictor(pre, pre_buf->stride, tmp_dst, CODING_UNIT_SIZE, subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys); } #else - inter_predictor(pre, pre_buf->stride, tmp_dst, 64, + inter_predictor(pre, pre_buf->stride, tmp_dst, CODING_UNIT_SIZE, subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys); #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_GLOBAL_MOTION @@ -801,19 +805,20 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { build_masked_compound_extend_highbd( - dst, dst_buf->stride, tmp_dst, 64, plane, + dst, dst_buf->stride, tmp_dst, CODING_UNIT_SIZE, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); } else { build_masked_compound_extend( - dst, dst_buf->stride, tmp_dst, 64, plane, + dst, dst_buf->stride, tmp_dst, CODING_UNIT_SIZE, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); } #else - build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, 64, plane, + build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, + CODING_UNIT_SIZE, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); @@ -821,12 +826,13 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, #else // CONFIG_SUPERTX #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst, 64, + build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst, + CODING_UNIT_SIZE, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, h, w); else #endif // CONFIG_VP9_HIGHBITDEPTH - build_masked_compound(dst, dst_buf->stride, tmp_dst, 64, + build_masked_compound(dst, dst_buf->stride, tmp_dst, CODING_UNIT_SIZE, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, h, w); #endif // CONFIG_SUPERTX @@ -1514,33 +1520,33 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, if (ref && get_wedge_bits(mi->mbmi.sb_type) && mi->mbmi.use_wedge_interinter) { #if CONFIG_VP9_HIGHBITDEPTH - uint8_t tmp_dst_[8192]; + uint8_t tmp_dst_[2 * CODING_UNIT_SIZE * CODING_UNIT_SIZE]; uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; #else - uint8_t tmp_dst[4096]; + uint8_t tmp_dst[CODING_UNIT_SIZE * CODING_UNIT_SIZE]; #endif #if CONFIG_GLOBAL_MOTION if (is_global) { vp9_warp_plane(gm[ref], pre_buf->buf0, pre_buf->width, pre_buf->height, pre_buf->stride, tmp_dst, (mi_x >> pd->subsampling_x) + x, - (mi_y >> pd->subsampling_y) + y, w, h, 64, + (mi_y >> pd->subsampling_y) + y, w, h, CODING_UNIT_SIZE, pd->subsampling_x, pd->subsampling_y, xs, ys); } else { #endif // CONFIG_GLOBAL_MOTION #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - highbd_inter_predictor(buf_ptr, buf_stride, tmp_dst, 64, + highbd_inter_predictor(buf_ptr, buf_stride, tmp_dst, CODING_UNIT_SIZE, subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys, xd->bd); } else { - inter_predictor(buf_ptr, buf_stride, tmp_dst, 64, + inter_predictor(buf_ptr, buf_stride, tmp_dst, CODING_UNIT_SIZE, subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys); } #else - inter_predictor(buf_ptr, buf_stride, tmp_dst, 64, + inter_predictor(buf_ptr, buf_stride, tmp_dst, CODING_UNIT_SIZE, subpel_x, subpel_y, sf, w, h, 0, kernel, xs, ys); #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_GLOBAL_MOTION @@ -1550,18 +1556,20 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { build_masked_compound_extend_highbd( - dst, dst_buf->stride, tmp_dst, 64, plane, + dst, dst_buf->stride, tmp_dst, CODING_UNIT_SIZE, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); } else { - build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, 64, plane, + build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, + CODING_UNIT_SIZE, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); } #else - build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, 64, plane, + build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, + CODING_UNIT_SIZE, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); @@ -1569,16 +1577,17 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, #else // CONFIG_SUPERTX #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst, 64, + build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst, + CODING_UNIT_SIZE, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, h, w); } else { - build_masked_compound(dst, dst_buf->stride, tmp_dst, 64, + build_masked_compound(dst, dst_buf->stride, tmp_dst, CODING_UNIT_SIZE, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, h, w); } #else - build_masked_compound(dst, dst_buf->stride, tmp_dst, 64, + build_masked_compound(dst, dst_buf->stride, tmp_dst, CODING_UNIT_SIZE, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, h, w); #endif // CONFIG_VP9_HIGHBITDEPTH @@ -2060,19 +2069,19 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane, if (ref && get_wedge_bits(mi->mbmi.sb_type) && mi->mbmi.use_wedge_interinter) { #if CONFIG_VP9_HIGHBITDEPTH - uint8_t tmp_dst_[8192]; + uint8_t tmp_dst_[2 * CODING_UNIT_SIZE * CODING_UNIT_SIZE]; uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; #else - uint8_t tmp_dst[4096]; + uint8_t tmp_dst[CODING_UNIT_SIZE * CODING_UNIT_SIZE]; #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_GLOBAL_MOTION if (is_global) { vp9_warp_plane(gm[ref], pre_buf->buf0, pre_buf->width, pre_buf->height, pre_buf->stride, tmp_dst, (mi_x >> pd->subsampling_x) + x, - (mi_y >> pd->subsampling_y) + y, w, h, 64, + (mi_y >> pd->subsampling_y) + y, w, h, CODING_UNIT_SIZE, pd->subsampling_x, pd->subsampling_y, xs, ys); } else { #endif // CONFIG_GLOBAL_MOTION @@ -2080,18 +2089,20 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane, if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int k; for (k = 0; k < h; ++k) - vpx_memcpy(tmp_dst_ + 128 * k, ext_dst1 + ext_dst_stride1 * 2 * k, - w * 2); + vpx_memcpy(tmp_dst_ + 2 * CODING_UNIT_SIZE * k, ext_dst1 + + ext_dst_stride1 * 2 * k, w * 2); } else { int k; for (k = 0; k < h; ++k) - vpx_memcpy(tmp_dst_ + 64 * k, ext_dst1 + ext_dst_stride1 * k, w); + vpx_memcpy(tmp_dst_ + CODING_UNIT_SIZE * k, ext_dst1 + + ext_dst_stride1 * k, w); } #else { int k; for (k = 0; k < h; ++k) - vpx_memcpy(tmp_dst + 64 * k, ext_dst1 + ext_dst_stride1 * k, w); + vpx_memcpy(tmp_dst + CODING_UNIT_SIZE * k, ext_dst1 + + ext_dst_stride1 * k, w); } #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_GLOBAL_MOTION @@ -2101,19 +2112,20 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { build_masked_compound_extend_highbd( - dst, dst_buf->stride, tmp_dst, 64, plane, + dst, dst_buf->stride, tmp_dst, CODING_UNIT_SIZE, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); } else { build_masked_compound_extend( - dst, dst_buf->stride, tmp_dst, 64, plane, + dst, dst_buf->stride, tmp_dst, CODING_UNIT_SIZE, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); } #else - build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, 64, plane, + build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, + CODING_UNIT_SIZE, plane, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); @@ -2121,12 +2133,13 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane, #else // CONFIG_SUPERTX #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst, 64, + build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst, + CODING_UNIT_SIZE, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, h, w); else #endif // CONFIG_VP9_HIGHBITDEPTH - build_masked_compound(dst, dst_buf->stride, tmp_dst, 64, + build_masked_compound(dst, dst_buf->stride, tmp_dst, CODING_UNIT_SIZE, mi->mbmi.interinter_wedge_index, mi->mbmi.sb_type, h, w); #endif // CONFIG_SUPERTX diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index 638ff9664..d299c17d3 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -1443,6 +1443,7 @@ static INLINE TX_SIZE blocklen_to_txsize(int bs) { return TX_32X32; break; case 64: + case 128: default: #if CONFIG_TX64X64 return TX_64X64; @@ -1768,7 +1769,7 @@ static void combine_interintra(PREDICTION_MODE mode, #if CONFIG_WEDGE_PARTITION if (use_wedge_interintra && get_wedge_bits(bsize)) { - uint8_t mask[4096]; + uint8_t mask[CODING_UNIT_SIZE * CODING_UNIT_SIZE]; vp9_generate_masked_weight_interintra(wedge_index, bsize, bh, bw, mask, bw); for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { @@ -1917,7 +1918,7 @@ static void combine_interintra_highbd(PREDICTION_MODE mode, #if CONFIG_WEDGE_PARTITION if (use_wedge_interintra && get_wedge_bits(bsize)) { - uint8_t mask[4096]; + uint8_t mask[CODING_UNIT_SIZE * CODING_UNIT_SIZE]; vp9_generate_masked_weight_interintra(wedge_index, bsize, bh, bw, mask, bw); for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { @@ -2418,7 +2419,8 @@ void vp9_build_interintra_predictors_sby(MACROBLOCKD *xd, int bh = 4 << b_height_log2_lookup[bsize]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, intrapredictor, 4096); + DECLARE_ALIGNED_ARRAY(16, uint16_t, intrapredictor, CODING_UNIT_SIZE * + CODING_UNIT_SIZE); build_intra_predictors_for_interintra_highbd( xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride, CONVERT_TO_BYTEPTR(intrapredictor), bw, @@ -2466,8 +2468,10 @@ void vp9_build_interintra_predictors_sbuv(MACROBLOCKD *xd, int bh = 4 << b_height_log2_lookup[uvbsize]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, uintrapredictor, 4096); - DECLARE_ALIGNED_ARRAY(16, uint16_t, vintrapredictor, 4096); + DECLARE_ALIGNED_ARRAY(16, uint16_t, uintrapredictor, CODING_UNIT_SIZE * + CODING_UNIT_SIZE); + DECLARE_ALIGNED_ARRAY(16, uint16_t, vintrapredictor, CODING_UNIT_SIZE * + CODING_UNIT_SIZE); build_intra_predictors_for_interintra_highbd( xd, xd->plane[1].dst.buf, xd->plane[1].dst.stride, CONVERT_TO_BYTEPTR(uintrapredictor), bw, diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index cdb24ed44..7caa9295f 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -1563,6 +1563,35 @@ if (vpx_config("CONFIG_WEDGE_PARTITION") eq "yes") { add_proto qw/unsigned int vp9_masked_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; specialize qw/vp9_masked_sad4x4/; + + if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { + add_proto qw/unsigned int vp9_masked_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_variance128x128/; + + add_proto qw/unsigned int vp9_masked_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masdctked_variance128x64/; + + add_proto qw/unsigned int vp9_masked_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_variance64x128/; + + add_proto qw/unsigned int vp9_masked_sub_pixel_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_sub_pixel_variance128x128/; + + add_proto qw/unsigned int vp9_masked_sub_pixel_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_sub_pixel_variance128x64/; + + add_proto qw/unsigned int vp9_masked_sub_pixel_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_masked_sub_pixel_variance64x128/; + + add_proto qw/unsigned int vp9_masked_sad128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_masked_sad128x128/; + + add_proto qw/unsigned int vp9_masked_sad128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_masked_sad128x64/; + + add_proto qw/unsigned int vp9_masked_sad64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_masked_sad64x128/; + } } # ENCODEMB INVOKE @@ -2797,6 +2826,71 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/unsigned int vp9_highbd_masked_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; specialize qw/vp9_highbd_masked_sad4x4/; + + if (vpx_config("CONFIG_EXT_CODING_UNIT_SIZE") eq "yes") { + add_proto qw/unsigned int vp9_highbd_masked_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_variance128x128/; + + add_proto qw/unsigned int vp9_highbd_masked_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_variance128x64/; + + add_proto qw/unsigned int vp9_highbd_masked_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_variance64x128/; + + add_proto qw/unsigned int vp9_highbd_10_masked_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance128x128/; + + add_proto qw/unsigned int vp9_highbd_10_masked_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance128x64/; + + add_proto qw/unsigned int vp9_highbd_10_masked_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance64x128/; + + add_proto qw/unsigned int vp9_highbd_12_masked_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance128x128/; + + add_proto qw/unsigned int vp9_highbd_12_masked_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance128x64/; + + add_proto qw/unsigned int vp9_highbd_12_masked_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_variance64x128/; + + add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_sub_pixel_variance128x128/; + + add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_sub_pixel_variance128x64/; + + add_proto qw/unsigned int vp9_highbd_masked_sub_pixel_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_masked_sub_pixel_variance64x128/; + + add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_sub_pixel_variance128x128/; + + add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_sub_pixel_variance128x64/; + + add_proto qw/unsigned int vp9_highbd_10_masked_sub_pixel_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_10_masked_sub_pixel_variance64x128/; + + add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance128x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_sub_pixel_variance128x128/; + + add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance128x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_sub_pixel_variance128x64/; + + add_proto qw/unsigned int vp9_highbd_12_masked_sub_pixel_variance64x128/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, const uint8_t *m, int m_stride, unsigned int *sse"; + specialize qw/vp9_highbd_12_masked_sub_pixel_variance64x128/; + + add_proto qw/unsigned int vp9_highbd_masked_sad128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_highbd_masked_sad128x128/; + + add_proto qw/unsigned int vp9_highbd_masked_sad128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_highbd_masked_sad128x64/; + + add_proto qw/unsigned int vp9_highbd_masked_sad64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride"; + specialize qw/vp9_highbd_masked_sad64x128/; + } } # ENCODEMB INVOKE diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index d982c41c1..b5d215f2b 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -2770,7 +2770,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, 0, #endif mi_row, mi_col, - &tile_data->bit_reader, BLOCK_64X64); + &tile_data->bit_reader, BLOCK_LARGEST); } pbi->mb.corrupted |= tile_data->xd.corrupted; } diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index db10fb6d4..fdea439a6 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -626,7 +626,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, if (!is_inter && bsize >= BLOCK_8X8 && cm->allow_palette_mode) { int n, i, j, k, rows, cols, palette_ctx, color_ctx; int color_new_idx = -1, color_order[PALETTE_MAX_SIZE]; - uint8_t buffer[4096]; + uint8_t buffer[CODING_UNIT_SIZE * CODING_UNIT_SIZE]; const MODE_INFO *above_mi = xd->up_available ? xd->mi[-xd->mi_stride].src_mi : NULL; const MODE_INFO *left_mi = xd->left_available ? @@ -1039,7 +1039,7 @@ static void write_mb_modes_kf(const VP9_COMMON *cm, ) { int n, m1, m2, i, j, k, rows, cols, palette_ctx, color_ctx; int color_new_idx = -1, color_order[PALETTE_MAX_SIZE]; - uint8_t buffer[4096]; + uint8_t buffer[CODING_UNIT_SIZE * CODING_UNIT_SIZE]; palette_ctx = 0; if (above_mi) diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 6f3e9ed31..3456112cf 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -157,8 +157,10 @@ struct macroblock { int eob, int bd); #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_PALETTE - DECLARE_ALIGNED(16, double, kmeans_data_buffer[MAX_MB_PLANE * 64 * 64]); - DECLARE_ALIGNED(16, int, kmeans_indices_buffer[64 * 64]); + DECLARE_ALIGNED(16, double, kmeans_data_buffer[MAX_MB_PLANE * + CODING_UNIT_SIZE * CODING_UNIT_SIZE]); + DECLARE_ALIGNED(16, int, kmeans_indices_buffer[CODING_UNIT_SIZE * + CODING_UNIT_SIZE]); #endif // CONFIG_PALETTE }; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 9a22501f2..90891ffaa 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -2995,7 +2995,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, set_offsets(cpi, tile, mi_row, mi_col, bsize); #if CONFIG_PALETTE - if (bsize == BLOCK_64X64) { + if (bsize == BLOCK_LARGEST) { c = &pc_tree->current; c->palette_buf_size = cm->current_palette_size; vpx_memcpy(c->palette_colors_buf, cm->current_palette_colors, diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 3210b3213..4370b72e0 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -876,6 +876,11 @@ static unsigned int fnname##_bits12(const uint8_t *src_ptr, \ m, m_stride) >> 4; \ } +#if CONFIG_EXT_CODING_UNIT_SIZE +MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad128x128) +MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad128x64) +MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad64x128) +#endif MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad64x64) MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad64x32) MAKE_MBFP_SAD_WRAPPER(vp9_highbd_masked_sad32x64) @@ -1059,6 +1064,20 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) { #endif #if CONFIG_WEDGE_PARTITION +#if CONFIG_EXT_CODING_UNIT_SIZE + HIGHBD_MBFP(BLOCK_128X128, + vp9_highbd_masked_sad128x128_bits8, + vp9_highbd_masked_variance128x128, + vp9_highbd_masked_sub_pixel_variance128x128) + HIGHBD_MBFP(BLOCK_128X64, + vp9_highbd_masked_sad128x64_bits8, + vp9_highbd_masked_variance128x64, + vp9_highbd_masked_sub_pixel_variance128x64) + HIGHBD_MBFP(BLOCK_64X128, + vp9_highbd_masked_sad64x128_bits8, + vp9_highbd_masked_variance64x128, + vp9_highbd_masked_sub_pixel_variance64x128) +#endif // CONFIG_EXT_CODING_UNIT_SIZE HIGHBD_MBFP(BLOCK_64X64, vp9_highbd_masked_sad64x64_bits8, vp9_highbd_masked_variance64x64, @@ -1278,6 +1297,20 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) { #endif #if CONFIG_WEDGE_PARTITION +#if CONFIG_EXT_CODING_UNIT_SIZE + HIGHBD_MBFP(BLOCK_128X128, + vp9_highbd_masked_sad128x128_bits10, + vp9_highbd_10_masked_variance128x128, + vp9_highbd_10_masked_sub_pixel_variance128x128) + HIGHBD_MBFP(BLOCK_128X64, + vp9_highbd_masked_sad128x64_bits10, + vp9_highbd_10_masked_variance128x64, + vp9_highbd_10_masked_sub_pixel_variance128x64) + HIGHBD_MBFP(BLOCK_64X128, + vp9_highbd_masked_sad64x128_bits10, + vp9_highbd_10_masked_variance64x128, + vp9_highbd_10_masked_sub_pixel_variance64x128) +#endif // CONFIG_EXT_CODING_UNIT_SIZE HIGHBD_MBFP(BLOCK_64X64, vp9_highbd_masked_sad64x64_bits10, vp9_highbd_10_masked_variance64x64, @@ -1497,6 +1530,20 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) { #endif #if CONFIG_WEDGE_PARTITION +#if CONFIG_EXT_CODING_UNIT_SIZE + HIGHBD_MBFP(BLOCK_128X128, + vp9_highbd_masked_sad128x128_bits12, + vp9_highbd_12_masked_variance128x128, + vp9_highbd_12_masked_sub_pixel_variance128x128) + HIGHBD_MBFP(BLOCK_128X64, + vp9_highbd_masked_sad128x64_bits12, + vp9_highbd_12_masked_variance128x64, + vp9_highbd_12_masked_sub_pixel_variance128x64) + HIGHBD_MBFP(BLOCK_64X128, + vp9_highbd_masked_sad64x128_bits12, + vp9_highbd_12_masked_variance64x128, + vp9_highbd_12_masked_sub_pixel_variance64x128) +#endif // CONFIG_EXT_CODING_UNIT_SIZE HIGHBD_MBFP(BLOCK_64X64, vp9_highbd_masked_sad64x64_bits12, vp9_highbd_12_masked_variance64x64, @@ -1995,6 +2042,14 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { cpi->fn_ptr[BT].mvf = MVF; \ cpi->fn_ptr[BT].msvf = MSVF; +#if CONFIG_EXT_CODING_UNIT_SIZE + MBFP(BLOCK_128X128, vp9_masked_sad128x128, vp9_masked_variance128x128, + vp9_masked_sub_pixel_variance128x128) + MBFP(BLOCK_128X64, vp9_masked_sad128x64, vp9_masked_variance128x64, + vp9_masked_sub_pixel_variance128x64) + MBFP(BLOCK_64X128, vp9_masked_sad64x128, vp9_masked_variance64x128, + vp9_masked_sub_pixel_variance64x128) +#endif MBFP(BLOCK_64X64, vp9_masked_sad64x64, vp9_masked_variance64x64, vp9_masked_sub_pixel_variance64x64) MBFP(BLOCK_64X32, vp9_masked_sad64x32, vp9_masked_variance64x32, diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index c4cd87756..46e5865b7 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -2481,9 +2481,11 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, #endif // CONFIG_VP9_HIGHBITDEPTH vpx_memset(x->kmeans_data_buffer, 0, - sizeof(x->kmeans_data_buffer[0] * 4096)); + sizeof(x->kmeans_data_buffer[0] * CODING_UNIT_SIZE * + CODING_UNIT_SIZE)); vpx_memset(x->kmeans_indices_buffer, 0, - sizeof(x->kmeans_indices_buffer[0] * 4096)); + sizeof(x->kmeans_indices_buffer[0] * CODING_UNIT_SIZE * + CODING_UNIT_SIZE)); mic->mbmi.palette_enabled[0] = 1; vp9_cost_tokens(palette_size_cost, cpi->common.fc.palette_size_prob[bsize - BLOCK_8X8], @@ -5114,8 +5116,8 @@ static void do_masked_motion_search_indexed(VP9_COMP *cpi, MACROBLOCK *x, int w = (4 << b_width_log2_lookup[sb_type]); int h = (4 << b_height_log2_lookup[sb_type]); int i, j; - uint8_t mask[4096]; - int mask_stride = 64; + uint8_t mask[CODING_UNIT_SIZE * CODING_UNIT_SIZE]; + int mask_stride = CODING_UNIT_SIZE; vp9_generate_masked_weight(wedge_index, sb_type, h, w, mask, mask_stride); // vp9_generate_hard_mask(wedge_index, sb_type, h, w, mask, mask_stride); @@ -5628,10 +5630,15 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (have_newmv_in_inter_mode(this_mode)) { int_mv tmp_mv[2]; int rate_mvs[2], tmp_rate_mv = 0; - uint8_t pred0[8192 * 3], pred1[8192 * 3]; - uint8_t *preds0[3] = {pred0, pred0 + 8192, pred0 + 16384}; - uint8_t *preds1[3] = {pred1, pred1 + 8192, pred1 + 16384}; - int strides[3] = {64, 64, 64}; + uint8_t pred0[2 * CODING_UNIT_SIZE * CODING_UNIT_SIZE * 3]; + uint8_t pred1[2 * CODING_UNIT_SIZE * CODING_UNIT_SIZE * 3]; + uint8_t *preds0[3] = {pred0, + pred0 + 2 * CODING_UNIT_SIZE * CODING_UNIT_SIZE, + pred0 + 4 * CODING_UNIT_SIZE * CODING_UNIT_SIZE}; + uint8_t *preds1[3] = {pred1, + pred1 + 2 * CODING_UNIT_SIZE * CODING_UNIT_SIZE, + pred1 + 4 * CODING_UNIT_SIZE * CODING_UNIT_SIZE}; + int strides[3] = {CODING_UNIT_SIZE, CODING_UNIT_SIZE, CODING_UNIT_SIZE}; vp9_build_inter_predictors_for_planes_single_buf( xd, bsize, mi_row, mi_col, 0, preds0, strides); vp9_build_inter_predictors_for_planes_single_buf( @@ -5702,10 +5709,15 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mv[1].as_int = cur_mv[1].as_int; } } else { - uint8_t pred0[8192 * 3], pred1[8192 * 3]; - uint8_t *preds0[3] = {pred0, pred0 + 8192, pred0 + 16384}; - uint8_t *preds1[3] = {pred1, pred1 + 8192, pred1 + 16384}; - int strides[3] = {64, 64, 64}; + uint8_t pred0[2 * CODING_UNIT_SIZE * CODING_UNIT_SIZE * 3]; + uint8_t pred1[2 * CODING_UNIT_SIZE * CODING_UNIT_SIZE * 3]; + uint8_t *preds0[3] = {pred0, + pred0 + 2 * CODING_UNIT_SIZE * CODING_UNIT_SIZE, + pred0 + 4 * CODING_UNIT_SIZE * CODING_UNIT_SIZE}; + uint8_t *preds1[3] = {pred1, + pred1 + 2 * CODING_UNIT_SIZE * CODING_UNIT_SIZE, + pred1 + 4 * CODING_UNIT_SIZE * CODING_UNIT_SIZE}; + int strides[3] = {CODING_UNIT_SIZE, CODING_UNIT_SIZE, CODING_UNIT_SIZE}; vp9_build_inter_predictors_for_planes_single_buf( xd, bsize, mi_row, mi_col, 0, preds0, strides); vp9_build_inter_predictors_for_planes_single_buf( @@ -5761,7 +5773,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, #ifdef WEDGE_INTERINTRA_REFINE_SEARCH int bw = 4 << b_width_log2_lookup[mbmi->sb_type], bh = 4 << b_height_log2_lookup[mbmi->sb_type]; - uint8_t mask[4096]; + uint8_t mask[CODING_UNIT_SIZE * CODING_UNIT_SIZE]; int_mv tmp_mv; int tmp_rate_mv = 0; #endif // WEDGE_INTERINTRA_REFINE_SEARCH @@ -5769,7 +5781,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->ref_frame[1] = NONE; for (j = 0; j < MAX_MB_PLANE; j++) { xd->plane[j].dst.buf = tmp_buf + j * tmp_buf_sz; - xd->plane[j].dst.stride = 64; + xd->plane[j].dst.stride = CODING_UNIT_SIZE; } vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); restore_dst_buf(xd, orig_dst, orig_dst_stride); @@ -5781,8 +5793,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interintra_uv_mode = interintra_mode; rmode = cpi->mbmode_cost[mbmi->interintra_mode]; vp9_build_interintra_predictors(xd, tmp_buf, tmp_buf + tmp_buf_sz, - tmp_buf + 2 * tmp_buf_sz, 64, 64, 64, - bsize); + tmp_buf + 2 * tmp_buf_sz, + CODING_UNIT_SIZE, CODING_UNIT_SIZE, + CODING_UNIT_SIZE, bsize); model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, &skip_txfm_sb, &skip_sse_sb); rd = RDCOST(x->rdmult, x->rddiv, rmode + rate_sum, dist_sum); @@ -5799,8 +5812,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (wedge_bits) { mbmi->use_wedge_interintra = 0; vp9_build_interintra_predictors(xd, tmp_buf, tmp_buf + tmp_buf_sz, - tmp_buf + 2 * tmp_buf_sz, 64, 64, 64, - bsize); + tmp_buf + 2 * tmp_buf_sz, + CODING_UNIT_SIZE, CODING_UNIT_SIZE, + CODING_UNIT_SIZE, bsize); model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, NULL, NULL); rwedge = vp9_cost_bit(cm->fc.wedge_interintra_prob[bsize], 0); rd = RDCOST(x->rdmult, x->rddiv, @@ -5815,8 +5829,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interintra_wedge_index = wedge_index; mbmi->interintra_uv_wedge_index = wedge_index; vp9_build_interintra_predictors(xd, tmp_buf, tmp_buf + tmp_buf_sz, - tmp_buf + 2 * tmp_buf_sz, 64, 64, 64, - bsize); + tmp_buf + 2 * tmp_buf_sz, + CODING_UNIT_SIZE, CODING_UNIT_SIZE, + CODING_UNIT_SIZE, bsize); model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, NULL, NULL); rd = RDCOST(x->rdmult, x->rddiv, rmode + rate_mv_tmp + rwedge + rate_sum, dist_sum); @@ -6186,7 +6201,8 @@ static void rd_pick_palette_444(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, const uint8_t *src_y = x->plane[0].src.buf; const uint8_t *src_u = x->plane[1].src.buf; const uint8_t *src_v = x->plane[2].src.buf; - uint8_t palette_color_map_copy[4096], best_palette_color_map[4096]; + uint8_t palette_color_map_copy[CODING_UNIT_SIZE * CODING_UNIT_SIZE]; + uint8_t best_palette_color_map[CODING_UNIT_SIZE * CODING_UNIT_SIZE]; int rows = 4 * num_4x4_blocks_high_lookup[bsize]; int cols = 4 * num_4x4_blocks_wide_lookup[bsize]; int src_stride_y = x->plane[0].src.stride; @@ -6255,9 +6271,11 @@ static void rd_pick_palette_444(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, vpx_memcpy(palette_color_map_copy, xd->plane[0].color_index_map, rows * cols * sizeof(xd->plane[0].color_index_map[0])); vpx_memset(x->kmeans_data_buffer, 0, - sizeof(x->kmeans_data_buffer[0]) * 3 * 4096); + sizeof(x->kmeans_data_buffer[0]) * 3 * CODING_UNIT_SIZE * + CODING_UNIT_SIZE); vpx_memset(xd->palette_map_buffer, 0, - sizeof(xd->palette_map_buffer[0]) * 4096); + sizeof(xd->palette_map_buffer[0]) * CODING_UNIT_SIZE * + CODING_UNIT_SIZE); vpx_memset(centroids, 0, sizeof(centroids[0]) * 3 * PALETTE_MAX_SIZE); vp9_cost_tokens(palette_size_cost, cpi->common.fc.palette_size_prob[bsize - BLOCK_8X8], @@ -6738,11 +6756,11 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_VP9_HIGHBITDEPTH uint16_t best_palette[PALETTE_MAX_SIZE]; uint16_t palette_colors_uv[TX_SIZES][2 * PALETTE_MAX_SIZE]; - uint16_t palette_color_map_uv[TX_SIZES][4096]; + uint16_t palette_color_map_uv[TX_SIZES][CODING_UNIT_SIZE * CODING_UNIT_SIZE]; #else uint8_t best_palette[PALETTE_MAX_SIZE]; uint8_t palette_colors_uv[TX_SIZES][2 * PALETTE_MAX_SIZE]; - uint8_t palette_color_map_uv[TX_SIZES][4096]; + uint8_t palette_color_map_uv[TX_SIZES][CODING_UNIT_SIZE * CODING_UNIT_SIZE]; #endif // CONFIG_VP9_HIGHBITDEPTH const MODE_INFO *above_mi = xd->up_available ? xd->mi[-xd->mi_stride].src_mi : NULL; @@ -8056,9 +8074,11 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } #endif vpx_memset(x->kmeans_data_buffer, 0, - sizeof(x->kmeans_data_buffer[0] * 4096)); + sizeof(x->kmeans_data_buffer[0] * CODING_UNIT_SIZE * + CODING_UNIT_SIZE)); vpx_memset(x->kmeans_indices_buffer, 0, - sizeof(x->kmeans_indices_buffer[0] * 4096)); + sizeof(x->kmeans_indices_buffer[0] * CODING_UNIT_SIZE * + CODING_UNIT_SIZE)); mbmi->palette_enabled[0] = 1; vp9_cost_tokens(palette_size_cost, cpi->common.fc.palette_size_prob[bsize - BLOCK_8X8], diff --git a/vp9/encoder/vp9_sad.c b/vp9/encoder/vp9_sad.c index 9081dd765..12c93d7bf 100644 --- a/vp9/encoder/vp9_sad.c +++ b/vp9/encoder/vp9_sad.c @@ -336,6 +336,11 @@ unsigned int vp9_masked_sad##m##x##n##_c(const uint8_t *src, int src_stride, \ return masked_sad(src, src_stride, ref, ref_stride, msk, msk_stride, m, n); \ } +#if CONFIG_EXT_CODING_UNIT_SIZE +MASKSADMxN(128, 128) +MASKSADMxN(128, 64) +MASKSADMxN(64, 128) +#endif MASKSADMxN(64, 64) MASKSADMxN(64, 32) MASKSADMxN(32, 64) @@ -373,7 +378,7 @@ static INLINE unsigned int highbd_masked_sad(const uint8_t *a8, int a_stride, return sad; } -#define highbd_MASKSADMxN(m, n) \ +#define HIGHBD_MASKSADMXN(m, n) \ unsigned int vp9_highbd_masked_sad##m##x##n##_c(const uint8_t *src, \ int src_stride, \ const uint8_t *ref, \ @@ -384,18 +389,23 @@ unsigned int vp9_highbd_masked_sad##m##x##n##_c(const uint8_t *src, \ msk, msk_stride, m, n); \ } -highbd_MASKSADMxN(64, 64) -highbd_MASKSADMxN(64, 32) -highbd_MASKSADMxN(32, 64) -highbd_MASKSADMxN(32, 32) -highbd_MASKSADMxN(32, 16) -highbd_MASKSADMxN(16, 32) -highbd_MASKSADMxN(16, 16) -highbd_MASKSADMxN(16, 8) -highbd_MASKSADMxN(8, 16) -highbd_MASKSADMxN(8, 8) -highbd_MASKSADMxN(8, 4) -highbd_MASKSADMxN(4, 8) -highbd_MASKSADMxN(4, 4) +#if CONFIG_EXT_CODING_UNIT_SIZE +HIGHBD_MASKSADMXN(128, 128) +HIGHBD_MASKSADMXN(128, 64) +HIGHBD_MASKSADMXN(64, 128) +#endif +HIGHBD_MASKSADMXN(64, 64) +HIGHBD_MASKSADMXN(64, 32) +HIGHBD_MASKSADMXN(32, 64) +HIGHBD_MASKSADMXN(32, 32) +HIGHBD_MASKSADMXN(32, 16) +HIGHBD_MASKSADMXN(16, 32) +HIGHBD_MASKSADMXN(16, 16) +HIGHBD_MASKSADMXN(16, 8) +HIGHBD_MASKSADMXN(8, 16) +HIGHBD_MASKSADMXN(8, 8) +HIGHBD_MASKSADMXN(8, 4) +HIGHBD_MASKSADMXN(4, 8) +HIGHBD_MASKSADMXN(4, 4) #endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_WEDGE_PARTITION diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index ad0cc36ee..a89e02859 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -772,6 +772,17 @@ MASK_SUBPIX_VAR(64, 32) MASK_VAR(64, 64) MASK_SUBPIX_VAR(64, 64) +#if CONFIG_EXT_CODING_UNIT_SIZE +MASK_VAR(64, 128) +MASK_SUBPIX_VAR(64, 128) + +MASK_VAR(128, 64) +MASK_SUBPIX_VAR(128, 64) + +MASK_VAR(128, 128) +MASK_SUBPIX_VAR(128, 128) +#endif // CONFIG_EXT_CODING_UNIT_SIZE + #if CONFIG_VP9_HIGHBITDEPTH void highbd_masked_variance64(const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, @@ -971,5 +982,15 @@ HIGHBD_MASK_SUBPIX_VAR(64, 32) HIGHBD_MASK_VAR(64, 64) HIGHBD_MASK_SUBPIX_VAR(64, 64) +#if CONFIG_EXT_CODING_UNIT_SIZE +HIGHBD_MASK_VAR(64, 128) +HIGHBD_MASK_SUBPIX_VAR(64, 128) + +HIGHBD_MASK_VAR(128, 64) +HIGHBD_MASK_SUBPIX_VAR(128, 64) + +HIGHBD_MASK_VAR(128, 128) +HIGHBD_MASK_SUBPIX_VAR(128, 128) +#endif // CONFIG_EXT_CODING_UNIT_SIZE #endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_WEDGE_PARTITION