From 14011f037d0353d1bef1b21dd65a348d9792bfd3 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Fri, 1 Jul 2016 12:20:45 -0700 Subject: [PATCH] Remove txfrm_block_to_raster_xy() from vp9 encoder The transform block row and column positions are always available outside the callees. There is no need to re-compute these values again. This approach has been used by the decoder. This commit removes txfrm_block_to_raster_xy() function. Change-Id: I5b90f91a0d8b7c35cfa7d171da9edf8202630108 --- vp9/common/vp9_blockd.c | 2 +- vp9/common/vp9_blockd.h | 12 +------ vp9/encoder/vp9_encodemb.c | 65 ++++++++++++++++---------------------- vp9/encoder/vp9_encodemb.h | 9 +++--- vp9/encoder/vp9_pickmode.c | 12 +++---- vp9/encoder/vp9_rdopt.c | 22 ++++++------- vp9/encoder/vp9_tokenize.c | 29 +++++++++-------- 7 files changed, 65 insertions(+), 86 deletions(-) diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c index 7bab27d4f..88320584c 100644 --- a/vp9/common/vp9_blockd.c +++ b/vp9/common/vp9_blockd.c @@ -66,7 +66,7 @@ void vp9_foreach_transformed_block_in_plane( for (r = 0; r < max_blocks_high; r += (1 << tx_size)) { // Skip visiting the sub blocks that are wholly within the UMV. for (c = 0; c < max_blocks_wide; c += (1 << tx_size)) { - visit(plane, i, plane_bsize, tx_size, arg); + visit(plane, i, r, c, plane_bsize, tx_size, arg); i += step; } i += extra_step; diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 3d26fb2b5..85b99c4bc 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -270,6 +270,7 @@ static INLINE const vpx_prob *get_y_mode_probs(const MODE_INFO *mi, } typedef void (*foreach_transformed_block_visitor)(int plane, int block, + int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg); @@ -283,17 +284,6 @@ void vp9_foreach_transformed_block( const MACROBLOCKD* const xd, BLOCK_SIZE bsize, foreach_transformed_block_visitor visit, void *arg); -static INLINE void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, int block, - int *x, int *y) { - const int bwl = b_width_log2_lookup[plane_bsize]; - const int tx_cols_log2 = bwl - tx_size; - const int tx_cols = 1 << tx_cols_log2; - const int raster_mb = block >> (tx_size << 1); - *x = (raster_mb & (tx_cols - 1)) << tx_size; - *y = (raster_mb >> tx_cols_log2) << tx_size; -} - void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, int aoff, int loff); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 169943c10..fec86a788 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -335,7 +335,7 @@ static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src, } #endif // CONFIG_VP9_HIGHBITDEPTH -void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, +void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; @@ -346,10 +346,8 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; - int i, j; const int16_t *src_diff; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - src_diff = &p->src_diff[4 * (j * diff_stride + i)]; + src_diff = &p->src_diff[4 * (row * diff_stride + col)]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -425,7 +423,7 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, } } -void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, +void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; @@ -435,12 +433,8 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; - int i, j; const int16_t *src_diff; - - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - src_diff = &p->src_diff[4 * (j * diff_stride + i)]; - + src_diff = &p->src_diff[4 * (row * diff_stride + col)]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { switch (tx_size) { @@ -506,7 +500,7 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, } } -void vp9_xform_quant(MACROBLOCK *x, int plane, int block, +void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; @@ -517,10 +511,8 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; - int i, j; const int16_t *src_diff; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - src_diff = &p->src_diff[4 * (j * diff_stride + i)]; + src_diff = &p->src_diff[4 * (row * diff_stride + col)]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -595,7 +587,8 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, } } -static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, +static void encode_block(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct encode_b_args *const args = arg; MACROBLOCK *const x = args->x; @@ -604,13 +597,11 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - int i, j; uint8_t *dst; ENTROPY_CONTEXT *a, *l; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i]; - a = &ctx->ta[plane][i]; - l = &ctx->tl[plane][j]; + dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; + a = &ctx->ta[plane][col]; + l = &ctx->tl[plane][row]; // TODO(jingning): per transformed block zero forcing only enabled for // luma component. will integrate chroma components as well. @@ -629,17 +620,17 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, *a = *l = 0; return; } else { - vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size); + vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size); } } else { if (max_txsize_lookup[plane_bsize] == tx_size) { int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1)); if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) { // full forward transform and quantization - vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) { // fast path forward transform and quantization - vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size); + vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size); } else { // skip forward transform p->eobs[block] = 0; @@ -647,7 +638,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, return; } } else { - vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); } } } @@ -715,19 +706,18 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, } } -static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, +static void encode_block_pass1(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { MACROBLOCK *const x = (MACROBLOCK *)arg; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - int i, j; uint8_t *dst; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i]; + dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; - vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); if (p->eobs[block] > 0) { #if CONFIG_VP9_HIGHBITDEPTH @@ -774,7 +764,8 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { } } -void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, +void vp9_encode_block_intra(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct encode_b_args* const args = arg; MACROBLOCK *const x = args->x; @@ -795,18 +786,16 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, uint16_t *eob = &p->eobs[block]; const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; - int i, j; struct optimize_ctx *const ctx = args->ctx; ENTROPY_CONTEXT *a = NULL; ENTROPY_CONTEXT *l = NULL; int entropy_ctx = 0; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - dst = &pd->dst.buf[4 * (j * dst_stride + i)]; - src = &p->src.buf[4 * (j * src_stride + i)]; - src_diff = &p->src_diff[4 * (j * diff_stride + i)]; + dst = &pd->dst.buf[4 * (row * dst_stride + col)]; + src = &p->src.buf[4 * (row * src_stride + col)]; + src_diff = &p->src_diff[4 * (row * diff_stride + col)]; if (args->ctx != NULL) { - a = &ctx->ta[plane][i]; - l = &ctx->tl[plane][j]; + a = &ctx->ta[plane][col]; + l = &ctx->tl[plane][row]; entropy_ctx = combine_entropy_contexts(*a, *l); } @@ -826,7 +815,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_predict_intra_block(xd, bwl, tx_size, mode, x->skip_encode ? src : dst, x->skip_encode ? src_stride : dst_stride, - dst, dst_stride, i, j, plane); + dst, dst_stride, col, row, plane); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 25b0b23e0..62d75a129 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -25,16 +25,17 @@ struct encode_b_args { }; void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize); void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize); -void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, +void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size); -void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, +void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size); -void vp9_xform_quant(MACROBLOCK *x, int plane, int block, +void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size); void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); -void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, +void vp9_encode_block_intra(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg); void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane, diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 7db6ef2b0..7bde50f6f 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -932,7 +932,8 @@ struct estimate_block_intra_args { RD_COST *rdc; }; -static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, +static void estimate_block_intra(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct estimate_block_intra_args* const args = arg; VP9_COMP *const cpi = args->cpi; @@ -945,20 +946,19 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, uint8_t *const dst_buf_base = pd->dst.buf; const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; - int i, j; RD_COST this_rdc; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); + (void)block; - p->src.buf = &src_buf_base[4 * (j * src_stride + i)]; - pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)]; + p->src.buf = &src_buf_base[4 * (row * src_stride + col)]; + pd->dst.buf = &dst_buf_base[4 * (row * dst_stride + col)]; // Use source buffer as an approximation for the fully reconstructed buffer. vp9_predict_intra_block(xd, b_width_log2_lookup[plane_bsize], tx_size, args->mode, x->skip_encode ? p->src.buf : pd->dst.buf, x->skip_encode ? src_stride : dst_stride, pd->dst.buf, dst_stride, - i, j, plane); + col, row, plane); if (plane == 0) { int64_t this_sse = INT64_MAX; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 47bcd8665..28530386c 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -498,18 +498,16 @@ static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, } } -static int rate_block(int plane, int block, BLOCK_SIZE plane_bsize, +static int rate_block(int plane, int block, int row, int col, TX_SIZE tx_size, struct rdcost_block_args* args) { - int x_idx, y_idx; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx); - - return cost_coeffs(args->x, plane, block, args->t_above + x_idx, - args->t_left + y_idx, tx_size, + return cost_coeffs(args->x, plane, block, args->t_above + col, + args->t_left + row, tx_size, args->so->scan, args->so->neighbors, args->use_fast_coef_costing); } -static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, +static void block_rd_txfm(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct rdcost_block_args *args = arg; MACROBLOCK *const x = args->x; @@ -525,20 +523,20 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, if (!is_inter_block(mi)) { struct encode_b_args arg = {x, NULL, &mi->skip}; - vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &arg); + vp9_encode_block_intra(plane, block, row, col, plane_bsize, tx_size, &arg); dist_block(x, plane, block, tx_size, &dist, &sse); } else if (max_txsize_lookup[plane_bsize] == tx_size) { if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == SKIP_TXFM_NONE) { // full forward transform and quantization - vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); dist_block(x, plane, block, tx_size, &dist, &sse); } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == SKIP_TXFM_AC_ONLY) { // compute DC coefficient tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); - vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size); + vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size); sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; dist = sse; if (x->plane[plane].eobs[block]) { @@ -562,7 +560,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, } } else { // full forward transform and quantization - vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); dist_block(x, plane, block, tx_size, &dist, &sse); } @@ -572,7 +570,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, return; } - rate = rate_block(plane, block, plane_bsize, tx_size, args); + rate = rate_block(plane, block, row, col, tx_size, args); rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist); rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse); diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index edec755dd..4400da42d 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -319,7 +319,8 @@ struct tokenize_b_args { TOKENEXTRA **tp; }; -static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize, +static void set_entropy_context_b(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct tokenize_b_args* const args = arg; ThreadData *const td = args->td; @@ -327,10 +328,8 @@ static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize, MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; - int aoff, loff; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff); vp9_set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0, - aoff, loff); + col, row); } static INLINE void add_token(TOKENEXTRA **t, const vpx_prob *context_tree, @@ -353,7 +352,8 @@ static INLINE void add_token_no_extra(TOKENEXTRA **t, ++counts[token]; } -static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, +static void tokenize_b(int plane, int block, int row, int col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct tokenize_b_args* const args = arg; VP9_COMP *cpi = args->cpi; @@ -384,11 +384,8 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, const int tx_eob = 16 << (tx_size << 1); int16_t token; EXTRABIT extra; - int aoff, loff; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff); - - pt = get_entropy_context(tx_size, pd->above_context + aoff, - pd->left_context + loff); + pt = get_entropy_context(tx_size, pd->above_context + col, + pd->left_context + row); so = get_scan(xd, tx_size, type, block); scan = so->scan; nb = so->neighbors; @@ -426,20 +423,23 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, *tp = t; - vp9_set_contexts(xd, pd, plane_bsize, tx_size, c > 0, aoff, loff); + vp9_set_contexts(xd, pd, plane_bsize, tx_size, c > 0, col, row); } struct is_skippable_args { uint16_t *eobs; int *skippable; }; -static void is_skippable(int plane, int block, + +static void is_skippable(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *argv) { struct is_skippable_args *args = argv; (void)plane; (void)plane_bsize; (void)tx_size; + (void)row; + (void)col; args->skippable[0] &= (!args->eobs[block]); } @@ -453,14 +453,15 @@ int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { return result; } -static void has_high_freq_coeff(int plane, int block, +static void has_high_freq_coeff(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *argv) { struct is_skippable_args *args = argv; int eobs = (tx_size == TX_4X4) ? 3 : 10; (void) plane; (void) plane_bsize; - + (void) row; + (void) col; *(args->skippable) |= (args->eobs[block] > eobs); }