Complete high bitdepth VAR_TX implementation.
VAR_TX now works in the high bitdepth configuration. Change-Id: I4114d7d9ed59c598f1e4d35b8e75876c07074ba7
This commit is contained in:
@@ -652,13 +652,8 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
if (!is_inter_block(mbmi)) {
|
if (!is_inter_block(mbmi)) {
|
||||||
#if CONFIG_VAR_TX
|
|
||||||
struct encode_b_args arg = {x, NULL, &mbmi->skip};
|
struct encode_b_args arg = {x, NULL, &mbmi->skip};
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VAR_TX
|
||||||
vp10_encode_block_intra(plane, block, blk_row, blk_col,
|
|
||||||
plane_bsize, tx_size, &arg);
|
|
||||||
dist_block(x, plane, block, tx_size, &dist, &sse);
|
|
||||||
#else
|
|
||||||
uint8_t *dst, *src;
|
uint8_t *dst, *src;
|
||||||
int src_stride = x->plane[plane].src.stride;
|
int src_stride = x->plane[plane].src.stride;
|
||||||
int dst_stride = xd->plane[plane].dst.stride;
|
int dst_stride = xd->plane[plane].dst.stride;
|
||||||
@@ -680,9 +675,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
|
|||||||
args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride,
|
args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride,
|
||||||
dst, dst_stride, &tmp_sse);
|
dst, dst_stride, &tmp_sse);
|
||||||
dist = (int64_t)tmp_sse * 16;
|
dist = (int64_t)tmp_sse * 16;
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
||||||
#else
|
#else
|
||||||
struct encode_b_args arg = {x, NULL, &mbmi->skip};
|
|
||||||
vp10_encode_block_intra(plane, block, blk_row, blk_col,
|
vp10_encode_block_intra(plane, block, blk_row, blk_col,
|
||||||
plane_bsize, tx_size, &arg);
|
plane_bsize, tx_size, &arg);
|
||||||
dist_block(x, plane, block, tx_size, &dist, &sse);
|
dist_block(x, plane, block, tx_size, &dist, &sse);
|
||||||
@@ -1487,18 +1480,20 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
|
|||||||
#if CONFIG_VAR_TX
|
#if CONFIG_VAR_TX
|
||||||
const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
|
const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
|
||||||
*(templ + idy));
|
*(templ + idy));
|
||||||
#endif
|
#endif // CONFIG_VAR_TX
|
||||||
vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
|
vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
|
||||||
vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
|
vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
|
||||||
ratey += cost_coeffs(x, 0, block,
|
|
||||||
#if CONFIG_VAR_TX
|
#if CONFIG_VAR_TX
|
||||||
coeff_ctx,
|
ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
|
||||||
|
so->neighbors, cpi->sf.use_fast_coef_costing);
|
||||||
|
*(tempa + idx) = !(p->eobs[block] == 0);
|
||||||
|
*(templ + idy) = !(p->eobs[block] == 0);
|
||||||
#else
|
#else
|
||||||
tempa + idx, templ + idy,
|
ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
|
||||||
#endif
|
|
||||||
TX_4X4,
|
TX_4X4,
|
||||||
so->scan, so->neighbors,
|
so->scan, so->neighbors,
|
||||||
cpi->sf.use_fast_coef_costing);
|
cpi->sf.use_fast_coef_costing);
|
||||||
|
#endif // CONFIG_VAR_TX
|
||||||
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
|
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
|
||||||
goto next_highbd;
|
goto next_highbd;
|
||||||
vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
|
vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
|
||||||
@@ -1511,18 +1506,19 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
|
|||||||
#if CONFIG_VAR_TX
|
#if CONFIG_VAR_TX
|
||||||
const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
|
const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
|
||||||
*(templ + idy));
|
*(templ + idy));
|
||||||
#endif
|
#endif // CONFIG_VAR_TX
|
||||||
vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
|
vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
|
||||||
vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
|
vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
|
||||||
ratey += cost_coeffs(x, 0, block,
|
|
||||||
#if CONFIG_VAR_TX
|
#if CONFIG_VAR_TX
|
||||||
coeff_ctx,
|
ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
|
||||||
|
so->neighbors, cpi->sf.use_fast_coef_costing);
|
||||||
|
*(tempa + idx) = !(p->eobs[block] == 0);
|
||||||
|
*(templ + idy) = !(p->eobs[block] == 0);
|
||||||
#else
|
#else
|
||||||
tempa + idx, templ + idy,
|
ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
|
||||||
#endif
|
TX_4X4, so->scan, so->neighbors,
|
||||||
TX_4X4,
|
|
||||||
so->scan, so->neighbors,
|
|
||||||
cpi->sf.use_fast_coef_costing);
|
cpi->sf.use_fast_coef_costing);
|
||||||
|
#endif // CONFIG_VAR_TX
|
||||||
distortion += vp10_highbd_block_error(
|
distortion += vp10_highbd_block_error(
|
||||||
coeff, BLOCK_OFFSET(pd->dqcoeff, block),
|
coeff, BLOCK_OFFSET(pd->dqcoeff, block),
|
||||||
16, &unused, xd->bd) >> 2;
|
16, &unused, xd->bd) >> 2;
|
||||||
@@ -1555,6 +1551,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
|
|||||||
next_highbd:
|
next_highbd:
|
||||||
{}
|
{}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (best_rd >= rd_thresh)
|
if (best_rd >= rd_thresh)
|
||||||
return best_rd;
|
return best_rd;
|
||||||
|
|
||||||
@@ -1604,8 +1601,8 @@ next_highbd:
|
|||||||
TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
|
TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
|
||||||
const scan_order *so = get_scan(TX_4X4, tx_type, 0);
|
const scan_order *so = get_scan(TX_4X4, tx_type, 0);
|
||||||
#if CONFIG_VAR_TX
|
#if CONFIG_VAR_TX
|
||||||
int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
|
const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
|
||||||
*(templ + idy));
|
*(templ + idy));
|
||||||
#endif
|
#endif
|
||||||
vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
|
vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
|
||||||
vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
|
vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
|
||||||
@@ -1629,8 +1626,8 @@ next_highbd:
|
|||||||
TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
|
TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
|
||||||
const scan_order *so = get_scan(TX_4X4, tx_type, 0);
|
const scan_order *so = get_scan(TX_4X4, tx_type, 0);
|
||||||
#if CONFIG_VAR_TX
|
#if CONFIG_VAR_TX
|
||||||
int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
|
const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
|
||||||
*(templ + idy));
|
*(templ + idy));
|
||||||
#endif
|
#endif
|
||||||
vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
|
vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
|
||||||
vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
|
vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
|
||||||
@@ -2321,12 +2318,6 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
|
|||||||
MACROBLOCKD *xd = &x->e_mbd;
|
MACROBLOCKD *xd = &x->e_mbd;
|
||||||
const struct macroblock_plane *const p = &x->plane[plane];
|
const struct macroblock_plane *const p = &x->plane[plane];
|
||||||
struct macroblockd_plane *const pd = &xd->plane[plane];
|
struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
|
||||||
const int ss_txfrm_size = tx_size << 1;
|
|
||||||
int64_t this_sse;
|
|
||||||
int shift = tx_size == TX_32X32 ? 0 : 2;
|
|
||||||
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
|
|
||||||
#endif
|
|
||||||
unsigned int tmp_sse = 0;
|
unsigned int tmp_sse = 0;
|
||||||
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||||
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
|
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
|
||||||
@@ -2391,35 +2382,59 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
|
|||||||
cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &tmp_sse);
|
cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &tmp_sse);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
|
||||||
*dist += vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
|
|
||||||
&this_sse, xd->bd) >> shift;
|
|
||||||
*bsse += this_sse >> shift;
|
|
||||||
#else
|
|
||||||
*bsse += (int64_t)tmp_sse * 16;
|
*bsse += (int64_t)tmp_sse * 16;
|
||||||
|
|
||||||
if (p->eobs[block] > 0) {
|
if (p->eobs[block] > 0) {
|
||||||
switch (tx_size) {
|
const int lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
|
||||||
case TX_32X32:
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block],
|
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||||
|
const int bd = xd->bd;
|
||||||
|
switch (tx_size) {
|
||||||
|
case TX_32X32:
|
||||||
|
vp10_highbd_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32,
|
||||||
|
p->eobs[block], bd, tx_type);
|
||||||
|
break;
|
||||||
|
case TX_16X16:
|
||||||
|
vp10_highbd_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32,
|
||||||
|
p->eobs[block], bd, tx_type);
|
||||||
|
break;
|
||||||
|
case TX_8X8:
|
||||||
|
vp10_highbd_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32,
|
||||||
|
p->eobs[block], bd, tx_type);
|
||||||
|
break;
|
||||||
|
case TX_4X4:
|
||||||
|
vp10_highbd_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32,
|
||||||
|
p->eobs[block], bd, tx_type, lossless);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0 && "Invalid transform size");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
switch (tx_size) {
|
||||||
|
case TX_32X32:
|
||||||
|
vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block],
|
||||||
|
tx_type);
|
||||||
|
break;
|
||||||
|
case TX_16X16:
|
||||||
|
vp10_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, p->eobs[block],
|
||||||
|
tx_type);
|
||||||
|
break;
|
||||||
|
case TX_8X8:
|
||||||
|
vp10_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, p->eobs[block],
|
||||||
tx_type);
|
tx_type);
|
||||||
break;
|
break;
|
||||||
case TX_16X16:
|
case TX_4X4:
|
||||||
vp10_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, p->eobs[block],
|
vp10_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, p->eobs[block],
|
||||||
tx_type);
|
tx_type, lossless);
|
||||||
break;
|
break;
|
||||||
case TX_8X8:
|
default:
|
||||||
vp10_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, p->eobs[block],
|
assert(0 && "Invalid transform size");
|
||||||
tx_type);
|
break;
|
||||||
break;
|
}
|
||||||
case TX_4X4:
|
|
||||||
vp10_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, p->eobs[block],
|
|
||||||
tx_type,
|
|
||||||
xd->lossless[xd->mi[0]->mbmi.segment_id]);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
assert(0 && "Invalid transform size");
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((bh >> 2) + blk_col > max_blocks_wide ||
|
if ((bh >> 2) + blk_col > max_blocks_wide ||
|
||||||
@@ -2444,7 +2459,6 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
*dist += (int64_t)tmp_sse * 16;
|
*dist += (int64_t)tmp_sse * 16;
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
||||||
|
|
||||||
*rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size,
|
*rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size,
|
||||||
scan_order->scan, scan_order->neighbors, 0);
|
scan_order->scan, scan_order->neighbors, 0);
|
||||||
|
|||||||
Reference in New Issue
Block a user