Improved highbitdepth RDO

Change-Id: I5bc6e94d9f3f64b2467f357da0d097347ad5f0c6
This commit is contained in:
Peter de Rivaz
2014-06-10 12:55:53 +01:00
committed by Deb Mukherjee
parent f08489e609
commit 321bd42060
3 changed files with 98 additions and 3 deletions

View File

@@ -210,6 +210,11 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
best = rd_cost1 < rd_cost0; best = rd_cost1 < rd_cost0;
base_bits = vp9_dct_value_cost_ptr[x]; base_bits = vp9_dct_value_cost_ptr[x];
dx = mul * (dqcoeff[rc] - coeff[rc]); dx = mul * (dqcoeff[rc] - coeff[rc]);
#if CONFIG_HIGH_TRANSFORMS && CONFIG_VP9_HIGH
if (xd->cur_buf->flags & YV12_FLAG_HIGH) {
dx >>= xd->bps - 8;
}
#endif
d2 = dx * dx; d2 = dx * dx;
tokens[i][0].rate = base_bits + (best ? rate1 : rate0); tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
tokens[i][0].error = d2 + (best ? error1 : error0); tokens[i][0].error = d2 + (best ? error1 : error0);
@@ -264,7 +269,15 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
base_bits = vp9_dct_value_cost_ptr[x]; base_bits = vp9_dct_value_cost_ptr[x];
if (shortcut) { if (shortcut) {
#if CONFIG_HIGH_TRANSFORMS && CONFIG_VP9_HIGH
if (xd->cur_buf->flags & YV12_FLAG_HIGH) {
dx -= ((dequant_ptr[rc != 0] >> (xd->bps - 8)) + sz) ^ sz;
} else {
dx -= (dequant_ptr[rc != 0] + sz) ^ sz; dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
}
#else
dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
#endif
d2 = dx * dx; d2 = dx * dx;
} }
tokens[i][1].rate = base_bits + (best ? rate1 : rate0); tokens[i][1].rate = base_bits + (best ? rate1 : rate0);

View File

@@ -172,8 +172,20 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
if ((sse >> 3) > var) if ((sse >> 3) > var)
sse = var; sse = var;
#if CONFIG_HIGH_TRANSFORMS && CONFIG_VP9_HIGH
if (xd->cur_buf->flags & YV12_FLAG_HIGH) {
vp9_model_rd_from_var_lapndz(var + sse,
1 << num_pels_log2_lookup[bsize],
pd->dequant[1] >> (xd->bps - 5),
&rate, &dist);
} else {
vp9_model_rd_from_var_lapndz(var + sse, 1 << num_pels_log2_lookup[bsize], vp9_model_rd_from_var_lapndz(var + sse, 1 << num_pels_log2_lookup[bsize],
pd->dequant[1] >> 3, &rate, &dist); pd->dequant[1] >> 3, &rate, &dist);
}
#else
vp9_model_rd_from_var_lapndz(var + sse, 1 << num_pels_log2_lookup[bsize],
pd->dequant[1] >> 3, &rate, &dist);
#endif
*out_rate_sum = rate; *out_rate_sum = rate;
*out_dist_sum = dist << 3; *out_dist_sum = dist << 3;
} }
@@ -380,16 +392,30 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// The encode_breakout input // The encode_breakout input
const unsigned int min_thresh = const unsigned int min_thresh =
MIN(((unsigned int)x->encode_breakout << 4), max_thresh); MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
#if CONFIG_HIGH_TRANSFORMS && CONFIG_VP9_HIGH
const int shift = 2 * xd->bps - 16;
#endif
// Calculate threshold according to dequant value. // Calculate threshold according to dequant value.
thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9; thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
#if CONFIG_HIGH_TRANSFORMS && CONFIG_VP9_HIGH
if (xd->cur_buf->flags & YV12_FLAG_HIGH) {
if (shift > 0)
thresh_ac = ROUND_POWER_OF_TWO(thresh_ac, shift);
}
#endif
thresh_ac = clamp(thresh_ac, min_thresh, max_thresh); thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
// Adjust ac threshold according to partition size. // Adjust ac threshold according to partition size.
thresh_ac >>= 8 - (b_width_log2_lookup[bsize] + thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
b_height_log2_lookup[bsize]); b_height_log2_lookup[bsize]);
thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6); thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
#if CONFIG_HIGH_TRANSFORMS && CONFIG_VP9_HIGH
if (xd->cur_buf->flags & YV12_FLAG_HIGH) {
if (shift > 0)
thresh_dc = ROUND_POWER_OF_TWO(thresh_dc, shift);
}
#endif
// Y skipping condition checking for ac and dc. // Y skipping condition checking for ac and dc.
if (var <= thresh_ac && (sse - var) <= thresh_dc) { if (var <= thresh_ac && (sse - var) <= thresh_dc) {

View File

@@ -479,6 +479,11 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
int64_t dist; int64_t dist;
int64_t square_error = sse; int64_t square_error = sse;
int quantizer = (pd->dequant[1] >> 3); int quantizer = (pd->dequant[1] >> 3);
#if CONFIG_VP9_HIGH && CONFIG_HIGH_TRANSFORMS
if (xd->cur_buf->flags & YV12_FLAG_HIGH) {
quantizer >>= (xd->bps - 8);
}
#endif
if (quantizer < 120) if (quantizer < 120)
rate = (square_error * (280 - quantizer)) >> 8; rate = (square_error * (280 - quantizer)) >> 8;
@@ -490,8 +495,19 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
} else { } else {
int rate; int rate;
int64_t dist; int64_t dist;
#if CONFIG_HIGH_TRANSFORMS && CONFIG_VP9_HIGH
if (xd->cur_buf->flags & YV12_FLAG_HIGH) {
vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
pd->dequant[1] >> (xd->bps - 5),
&rate, &dist);
} else {
vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs], vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
pd->dequant[1] >> 3, &rate, &dist); pd->dequant[1] >> 3, &rate, &dist);
}
#else
vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
pd->dequant[1] >> 3, &rate, &dist);
#endif
rate_sum += rate; rate_sum += rate;
dist_sum += dist; dist_sum += dist;
} }
@@ -538,8 +554,19 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
&pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride, &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
&sse); &sse);
// sse works better than var, since there is no dc prediction used // sse works better than var, since there is no dc prediction used
#if CONFIG_HIGH_TRANSFORMS && CONFIG_VP9_HIGH
if (xd->cur_buf->flags & YV12_FLAG_HIGH) {
vp9_model_rd_from_var_lapndz(sse, t * t,
pd->dequant[1] >> (xd->bps - 5),
&rate, &dist);
} else {
vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
&rate, &dist); &rate, &dist);
}
#else
vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
&rate, &dist);
#endif
rate_sum += rate; rate_sum += rate;
dist_sum += dist; dist_sum += dist;
*out_skip &= (rate < 1024); *out_skip &= (rate < 1024);
@@ -703,6 +730,11 @@ static void dist_block(int plane, int block, TX_SIZE tx_size,
// TODO(jingning): tune the model to better capture the distortion. // TODO(jingning): tune the model to better capture the distortion.
int64_t p = (pd->dequant[1] * pd->dequant[1] * int64_t p = (pd->dequant[1] * pd->dequant[1] *
(1 << ss_txfrm_size)) >> (shift + 2); (1 << ss_txfrm_size)) >> (shift + 2);
#if CONFIG_VP9_HIGH && CONFIG_HIGH_TRANSFORMS
if (xd->cur_buf->flags & YV12_FLAG_HIGH) {
p >>= ((xd->bps - 8) * 2);
}
#endif
args->dist += (p >> 4); args->dist += (p >> 4);
args->sse += p; args->sse += p;
} }
@@ -3077,6 +3109,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// Calculate threshold according to dequant value. // Calculate threshold according to dequant value.
thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9; thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
#if CONFIG_HIGH_TRANSFORMS && CONFIG_VP9_HIGH
if (xd->cur_buf->flags & YV12_FLAG_HIGH) {
const int shift = 2 * xd->bps - 16;
if (shift > 0)
thresh_ac = ROUND_POWER_OF_TWO(thresh_ac, shift);
}
#endif
thresh_ac = clamp(thresh_ac, min_thresh, max_thresh); thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride, var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
@@ -3093,6 +3132,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
unsigned int thresh_dc; unsigned int thresh_dc;
thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6); thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
#if CONFIG_HIGH_TRANSFORMS && CONFIG_VP9_HIGH
if (xd->cur_buf->flags & YV12_FLAG_HIGH) {
const int shift = 2 * xd->bps - 16;
if (shift > 0)
thresh_dc = ROUND_POWER_OF_TWO(thresh_dc, shift);
}
#endif
// dc skipping checking // dc skipping checking
if ((sse - var) < thresh_dc || sse == var) { if ((sse - var) < thresh_dc || sse == var) {
@@ -3705,9 +3751,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// based on qp, activity mask and history // based on qp, activity mask and history
if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) && if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
(mode_index > MIN_EARLY_TERM_INDEX)) { (mode_index > MIN_EARLY_TERM_INDEX)) {
const int qstep = xd->plane[0].dequant[1]; int qstep = xd->plane[0].dequant[1];
// TODO(debargha): Enhance this by specializing for each mode_index // TODO(debargha): Enhance this by specializing for each mode_index
int scale = 4; int scale = 4;
#if CONFIG_HIGH_TRANSFORMS && CONFIG_VP9_HIGH
if (xd->cur_buf->flags & YV12_FLAG_HIGH) {
qstep >>= (xd->bps - 8);
}
#endif
if (x->source_variance < UINT_MAX) { if (x->source_variance < UINT_MAX) {
const int var_adjust = (x->source_variance < 16); const int var_adjust = (x->source_variance < 16);
scale -= var_adjust; scale -= var_adjust;
@@ -4364,9 +4415,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
// based on qp, activity mask and history // based on qp, activity mask and history
if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) && if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
(ref_index > MIN_EARLY_TERM_INDEX)) { (ref_index > MIN_EARLY_TERM_INDEX)) {
const int qstep = xd->plane[0].dequant[1]; int qstep = xd->plane[0].dequant[1];
// TODO(debargha): Enhance this by specializing for each mode_index // TODO(debargha): Enhance this by specializing for each mode_index
int scale = 4; int scale = 4;
#if CONFIG_HIGH_TRANSFORMS && CONFIG_VP9_HIGH
if (xd->cur_buf->flags & YV12_FLAG_HIGH) {
qstep >>= (xd->bps - 8);
}
#endif
if (x->source_variance < UINT_MAX) { if (x->source_variance < UINT_MAX) {
const int var_adjust = (x->source_variance < 16); const int var_adjust = (x->source_variance < 16);
scale -= var_adjust; scale -= var_adjust;