Merge "Add missing calls to emms in the adaptive quantization code"

This commit is contained in:
Paul Wilkins 2013-10-18 01:59:03 -07:00 committed by Gerrit Code Review
commit eaf2d4cbfc
3 changed files with 58 additions and 13 deletions

View File

@ -563,7 +563,10 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
MACROBLOCK *const x = &cpi->mb; MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
int orig_rdmult = x->rdmult; int orig_rdmult = x->rdmult;
double rdmult_ratio = 1.0; double rdmult_ratio;
vp9_clear_system_state(); // __asm emms;
rdmult_ratio = 1.0; // avoid uninitialized warnings
// Use the lower precision, but faster, 32x32 fdct for mode selection. // Use the lower precision, but faster, 32x32 fdct for mode selection.
x->use_lp32x32fdct = 1; x->use_lp32x32fdct = 1;
@ -602,7 +605,10 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
if (cpi->oxcf.tuning == VP8_TUNE_SSIM) if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
vp9_activity_masking(cpi, x); vp9_activity_masking(cpi, x);
if (cpi->sf.variance_adaptive_quantization) {
vp9_clear_system_state(); // __asm emms;
x->rdmult = round(x->rdmult * rdmult_ratio); x->rdmult = round(x->rdmult * rdmult_ratio);
}
// Find best coding mode & reconstruct the MB so it is available // Find best coding mode & reconstruct the MB so it is available
// as a predictor for MBs that follow in the SB // as a predictor for MBs that follow in the SB
@ -618,10 +624,14 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
totaldist, bsize, ctx, best_rd); totaldist, bsize, ctx, best_rd);
} }
if (cpi->sf.variance_adaptive_quantization) {
x->rdmult = orig_rdmult; x->rdmult = orig_rdmult;
if (*totalrate != INT_MAX) if (*totalrate != INT_MAX) {
vp9_clear_system_state(); // __asm emms;
*totalrate = round(*totalrate * rdmult_ratio); *totalrate = round(*totalrate * rdmult_ratio);
} }
}
}
static void update_stats(VP9_COMP *cpi) { static void update_stats(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common; VP9_COMMON *const cm = &cpi->common;

View File

@ -554,7 +554,10 @@ void vp9_first_pass(VP9_COMP *cpi) {
int this_error; int this_error;
int gf_motion_error = INT_MAX; int gf_motion_error = INT_MAX;
int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
double error_weight = 1.0; double error_weight;
vp9_clear_system_state(); // __asm emms;
error_weight = 1.0; // avoid uninitialized warnings
xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset; xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset; xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset;
@ -587,7 +590,11 @@ void vp9_first_pass(VP9_COMP *cpi) {
} }
// do intra 16x16 prediction // do intra 16x16 prediction
this_error = error_weight * vp9_encode_intra(x, use_dc_pred); this_error = vp9_encode_intra(x, use_dc_pred);
if (cpi->sf.variance_adaptive_quantization) {
vp9_clear_system_state(); // __asm emms;
this_error *= error_weight;
}
// intrapenalty below deals with situations where the intra and inter // intrapenalty below deals with situations where the intra and inter
// error scores are very low (eg a plain black frame). // error scores are very low (eg a plain black frame).
@ -622,7 +629,10 @@ void vp9_first_pass(VP9_COMP *cpi) {
first_pass_motion_search(cpi, x, &best_ref_mv, first_pass_motion_search(cpi, x, &best_ref_mv,
&mv.as_mv, lst_yv12, &mv.as_mv, lst_yv12,
&motion_error, recon_yoffset); &motion_error, recon_yoffset);
if (cpi->sf.variance_adaptive_quantization) {
vp9_clear_system_state(); // __asm emms;
motion_error *= error_weight; motion_error *= error_weight;
}
// If the current best reference mv is not centered on 0,0 then do a 0,0 // If the current best reference mv is not centered on 0,0 then do a 0,0
// based search as well. // based search as well.
@ -630,7 +640,10 @@ void vp9_first_pass(VP9_COMP *cpi) {
tmp_err = INT_MAX; tmp_err = INT_MAX;
first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv.as_mv, first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv.as_mv,
lst_yv12, &tmp_err, recon_yoffset); lst_yv12, &tmp_err, recon_yoffset);
if (cpi->sf.variance_adaptive_quantization) {
vp9_clear_system_state(); // __asm emms;
tmp_err *= error_weight; tmp_err *= error_weight;
}
if (tmp_err < motion_error) { if (tmp_err < motion_error) {
motion_error = tmp_err; motion_error = tmp_err;
@ -647,7 +660,10 @@ void vp9_first_pass(VP9_COMP *cpi) {
first_pass_motion_search(cpi, x, &zero_ref_mv, first_pass_motion_search(cpi, x, &zero_ref_mv,
&tmp_mv.as_mv, gld_yv12, &tmp_mv.as_mv, gld_yv12,
&gf_motion_error, recon_yoffset); &gf_motion_error, recon_yoffset);
if (cpi->sf.variance_adaptive_quantization) {
vp9_clear_system_state(); // __asm emms;
gf_motion_error *= error_weight; gf_motion_error *= error_weight;
}
if ((gf_motion_error < motion_error) && if ((gf_motion_error < motion_error) &&
(gf_motion_error < this_error)) { (gf_motion_error < this_error)) {

View File

@ -37,25 +37,36 @@ DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = {0};
unsigned int vp9_vaq_segment_id(int energy) { unsigned int vp9_vaq_segment_id(int energy) {
ENERGY_IN_BOUNDS(energy); ENERGY_IN_BOUNDS(energy);
return SEGMENT_ID(energy); return SEGMENT_ID(energy);
} }
double vp9_vaq_rdmult_ratio(int energy) { double vp9_vaq_rdmult_ratio(int energy) {
ENERGY_IN_BOUNDS(energy); ENERGY_IN_BOUNDS(energy);
vp9_clear_system_state(); // __asm emms;
return RDMULT_RATIO(energy); return RDMULT_RATIO(energy);
} }
double vp9_vaq_inv_q_ratio(int energy) { double vp9_vaq_inv_q_ratio(int energy) {
ENERGY_IN_BOUNDS(energy); ENERGY_IN_BOUNDS(energy);
vp9_clear_system_state(); // __asm emms;
return Q_RATIO(-energy); return Q_RATIO(-energy);
} }
void vp9_vaq_init() { void vp9_vaq_init() {
int i; int i;
double base_ratio = 1.8; double base_ratio;
assert(ENERGY_SPAN <= MAX_SEGMENTS); assert(ENERGY_SPAN <= MAX_SEGMENTS);
vp9_clear_system_state(); // __asm emms;
base_ratio = 1.8;
for (i = ENERGY_MIN; i <= ENERGY_MAX; i++) { for (i = ENERGY_MIN; i <= ENERGY_MAX; i++) {
Q_RATIO(i) = pow(base_ratio, i/3.0); Q_RATIO(i) = pow(base_ratio, i/3.0);
} }
@ -74,6 +85,8 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi) {
seg->abs_delta = SEGMENT_DELTADATA; seg->abs_delta = SEGMENT_DELTADATA;
vp9_clear_system_state(); // __asm emms;
for (i = ENERGY_MIN; i <= ENERGY_MAX; i++) { for (i = ENERGY_MIN; i <= ENERGY_MAX; i++) {
int qindex_delta, segment_rdmult; int qindex_delta, segment_rdmult;
@ -89,6 +102,7 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi) {
segment_rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + qindex_delta + segment_rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + qindex_delta +
cm->y_dc_delta_q); cm->y_dc_delta_q);
RDMULT_RATIO(i) = (double) segment_rdmult / base_rdmult; RDMULT_RATIO(i) = (double) segment_rdmult / base_rdmult;
} }
} }
@ -120,9 +134,14 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
} }
int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) { int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
double energy;
unsigned int var = block_variance(cpi, x, bs);
vp9_clear_system_state(); // __asm emms;
// if (var <= 1000) // if (var <= 1000)
// return 0; // return 0;
unsigned int var = block_variance(cpi, x, bs);
double energy = 0.9*(logf(var + 1) - 10.0); energy = 0.9*(logf(var + 1) - 10.0);
return clamp(round(energy), ENERGY_MIN, ENERGY_MAX); return clamp(round(energy), ENERGY_MIN, ENERGY_MAX);
} }