From 0c0f3efdeb1cc61b878b1fde8be5b4a6787b253c Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Thu, 18 Feb 2016 15:42:19 -0800 Subject: [PATCH] Properly normalize HBD sse computation This fixes a bug in HBD sum of squared error computation introduced in #abd00505d1c658cc106bad51369197270a299f92. Change-Id: I9d4e8627eb8ea491bac44794c40c7f1e6ba135dc --- vp10/encoder/rdopt.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 736adbbab..bfc0983a8 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -658,6 +658,10 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, plane_bsize, tx_size, &arg); { +#if CONFIG_VP9_HIGHBITDEPTH + const VP10_COMP *cpi = args->cpi; + const uint32_t hbd_shift = (cpi->common.bit_depth - 8) * 2; +#endif const int bs = 4 << tx_size; const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; const vpx_variance_fn_t variance = args->cpi->fn_ptr[tx_bsize].vf; @@ -674,8 +678,12 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; unsigned int tmp; - +#if CONFIG_VP9_HIGHBITDEPTH + sse = (int64_t)ROUND_POWER_OF_TWO( + vpx_sum_squares_2d_i16(diff, diff_stride, bs), hbd_shift) * 16; +#else sse = (int64_t)vpx_sum_squares_2d_i16(diff, diff_stride, bs) * 16; +#endif variance(src, src_stride, dst, dst_stride, &tmp); dist = (int64_t)tmp * 16; } @@ -2332,6 +2340,7 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, rec_buffer_alloc_16[32 * 32]); uint8_t *rec_buffer; + const uint32_t hbd_shift = (cpi->common.bit_depth - 8) * 2; #else DECLARE_ALIGNED(16, uint8_t, rec_buffer[32 * 32]); #endif @@ -2372,11 +2381,21 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, for (idy = 0; idy < blocks_height; idy += 2) { for (idx = 0; idx < blocks_width; idx += 2) { const int16_t *d = diff + 4 * idy * diff_stride + 4 * idx; +#if CONFIG_VP9_HIGHBITDEPTH + tmp_sse += ROUND_POWER_OF_TWO( + vpx_sum_squares_2d_i16(d, diff_stride, 8), hbd_shift); +#else tmp_sse += vpx_sum_squares_2d_i16(d, diff_stride, 8); +#endif } } } else { +#if CONFIG_VP9_HIGHBITDEPTH + tmp_sse = ROUND_POWER_OF_TWO( + vpx_sum_squares_2d_i16(diff, diff_stride, bh), hbd_shift); +#else tmp_sse = vpx_sum_squares_2d_i16(diff, diff_stride, bh); +#endif } *bsse += (int64_t)tmp_sse * 16;