Properly normalize HBD sse computation

This fixes a bug in HBD sum of squared error computation introduced
in  #abd00505d1c658cc106bad51369197270a299f92.

Change-Id: I9d4e8627eb8ea491bac44794c40c7f1e6ba135dc
This commit is contained in:
Yaowu Xu
2016-02-18 15:42:19 -08:00
parent 6ed7f7a516
commit 0c0f3efdeb

View File

@@ -658,6 +658,10 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
plane_bsize, tx_size, &arg); plane_bsize, tx_size, &arg);
{ {
#if CONFIG_VP9_HIGHBITDEPTH
const VP10_COMP *cpi = args->cpi;
const uint32_t hbd_shift = (cpi->common.bit_depth - 8) * 2;
#endif
const int bs = 4 << tx_size; const int bs = 4 << tx_size;
const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
const vpx_variance_fn_t variance = args->cpi->fn_ptr[tx_bsize].vf; const vpx_variance_fn_t variance = args->cpi->fn_ptr[tx_bsize].vf;
@@ -674,8 +678,12 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
unsigned int tmp; unsigned int tmp;
#if CONFIG_VP9_HIGHBITDEPTH
sse = (int64_t)ROUND_POWER_OF_TWO(
vpx_sum_squares_2d_i16(diff, diff_stride, bs), hbd_shift) * 16;
#else
sse = (int64_t)vpx_sum_squares_2d_i16(diff, diff_stride, bs) * 16; sse = (int64_t)vpx_sum_squares_2d_i16(diff, diff_stride, bs) * 16;
#endif
variance(src, src_stride, dst, dst_stride, &tmp); variance(src, src_stride, dst, dst_stride, &tmp);
dist = (int64_t)tmp * 16; dist = (int64_t)tmp * 16;
} }
@@ -2332,6 +2340,7 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, rec_buffer_alloc_16[32 * 32]); DECLARE_ALIGNED(16, uint16_t, rec_buffer_alloc_16[32 * 32]);
uint8_t *rec_buffer; uint8_t *rec_buffer;
const uint32_t hbd_shift = (cpi->common.bit_depth - 8) * 2;
#else #else
DECLARE_ALIGNED(16, uint8_t, rec_buffer[32 * 32]); DECLARE_ALIGNED(16, uint8_t, rec_buffer[32 * 32]);
#endif #endif
@@ -2372,11 +2381,21 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
for (idy = 0; idy < blocks_height; idy += 2) { for (idy = 0; idy < blocks_height; idy += 2) {
for (idx = 0; idx < blocks_width; idx += 2) { for (idx = 0; idx < blocks_width; idx += 2) {
const int16_t *d = diff + 4 * idy * diff_stride + 4 * idx; const int16_t *d = diff + 4 * idy * diff_stride + 4 * idx;
#if CONFIG_VP9_HIGHBITDEPTH
tmp_sse += ROUND_POWER_OF_TWO(
vpx_sum_squares_2d_i16(d, diff_stride, 8), hbd_shift);
#else
tmp_sse += vpx_sum_squares_2d_i16(d, diff_stride, 8); tmp_sse += vpx_sum_squares_2d_i16(d, diff_stride, 8);
#endif
} }
} }
} else { } else {
#if CONFIG_VP9_HIGHBITDEPTH
tmp_sse = ROUND_POWER_OF_TWO(
vpx_sum_squares_2d_i16(diff, diff_stride, bh), hbd_shift);
#else
tmp_sse = vpx_sum_squares_2d_i16(diff, diff_stride, bh); tmp_sse = vpx_sum_squares_2d_i16(diff, diff_stride, bh);
#endif
} }
*bsse += (int64_t)tmp_sse * 16; *bsse += (int64_t)tmp_sse * 16;