diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 5f1b0a515..391c0186f 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -171,19 +171,43 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, int64_t dist_sum = 0; const int ref = xd->mi[0]->mbmi.ref_frame[0]; unsigned int sse; + unsigned int var = 0; const int shift = 8; + int rate; + int64_t dist; + + x->pred_sse[ref] = 0; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblock_plane *const p = &x->plane[i]; struct macroblockd_plane *const pd = &xd->plane[i]; const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); + const TX_SIZE max_tx_size = max_txsize_lookup[bs]; + const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size]; + int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]); + int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]); + int idx, idy; + int lw = b_width_log2_lookup[unit_size] + 2; + int lh = b_height_log2_lookup[unit_size] + 2; - const unsigned int var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, - pd->dst.buf, pd->dst.stride, - &sse); + x->bsse[i] = 0; + + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw); + uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh); + + var += cpi->fn_ptr[unit_size].vf(src , p->src.stride, + dst, pd->dst.stride, &sse); + + x->bsse[i] += sse; + if (i == 0) + x->pred_sse[ref] += sse; + } + } if (!x->select_tx_size) { - if (sse < p->quant_thred[0] >> shift) + if (x->bsse[i] < p->quant_thred[0] >> shift) x->skip_txfm[i] = 1; else if (var < p->quant_thred[1] >> shift) x->skip_txfm[i] = 2; @@ -191,10 +215,6 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, x->skip_txfm[i] = 0; } - x->bsse[i] = sse; - if (i == 0) - x->pred_sse[ref] = sse; - // Fast approximate the modelling function. if (cpi->oxcf.speed > 4) { int64_t rate; @@ -210,9 +230,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, rate_sum += rate; dist_sum += dist; } else { - int rate; - int64_t dist; - vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs], + vp9_model_rd_from_var_lapndz(x->bsse[i], 1 << num_pels_log2_lookup[bs], pd->dequant[1] >> 3, &rate, &dist); rate_sum += rate; dist_sum += dist;