Use the fast quantizer for inter mode selection
Use the fast quantizer for inter mode selection and the regular quantizer for the rest of the encode for good quality, speed 1. Both performance and quality were improved. The quality gains will make up for the quality loss mentioned in I9dc089007ca08129fb6c11fe7692777ebb8647b0. Change-Id: Ia90bc9cf326a7c65d60d31fa32f6465ab6984d21
This commit is contained in:
parent
e463b95b4e
commit
516ea8460b
@ -29,7 +29,7 @@ extern int vp8_fast_quantize_b_neon_func(short *coeff_ptr, short *zbin_ptr, shor
|
|||||||
|
|
||||||
void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d)
|
void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d)
|
||||||
{
|
{
|
||||||
d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant);
|
d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant_fast);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -33,6 +33,7 @@ typedef struct
|
|||||||
|
|
||||||
// 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
|
// 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
|
||||||
short *quant;
|
short *quant;
|
||||||
|
short *quant_fast;
|
||||||
short *quant_shift;
|
short *quant_shift;
|
||||||
short *zbin;
|
short *zbin;
|
||||||
short *zrun_zbin_boost;
|
short *zrun_zbin_boost;
|
||||||
|
@ -179,6 +179,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
|||||||
{
|
{
|
||||||
// dc values
|
// dc values
|
||||||
quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
|
quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
|
||||||
|
cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
|
||||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
|
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
|
||||||
cpi->Y1quant_shift[Q] + 0, quant_val);
|
cpi->Y1quant_shift[Q] + 0, quant_val);
|
||||||
cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
||||||
@ -187,6 +188,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
|||||||
cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
|
cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
|
||||||
|
|
||||||
quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
|
quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
|
||||||
|
cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
|
||||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
|
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
|
||||||
cpi->Y2quant_shift[Q] + 0, quant_val);
|
cpi->Y2quant_shift[Q] + 0, quant_val);
|
||||||
cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
|
cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
|
||||||
@ -195,6 +197,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
|||||||
cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
|
cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
|
||||||
|
|
||||||
quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
|
quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
|
||||||
|
cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
|
||||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
|
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
|
||||||
cpi->UVquant_shift[Q] + 0, quant_val);
|
cpi->UVquant_shift[Q] + 0, quant_val);
|
||||||
cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
|
cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
|
||||||
@ -208,6 +211,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
|||||||
int rc = vp8_default_zig_zag1d[i];
|
int rc = vp8_default_zig_zag1d[i];
|
||||||
|
|
||||||
quant_val = vp8_ac_yquant(Q);
|
quant_val = vp8_ac_yquant(Q);
|
||||||
|
cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
|
||||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
|
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
|
||||||
cpi->Y1quant_shift[Q] + rc, quant_val);
|
cpi->Y1quant_shift[Q] + rc, quant_val);
|
||||||
cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
||||||
@ -216,6 +220,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
|||||||
cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
|
cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
|
||||||
|
|
||||||
quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
|
quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
|
||||||
|
cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
|
||||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
|
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
|
||||||
cpi->Y2quant_shift[Q] + rc, quant_val);
|
cpi->Y2quant_shift[Q] + rc, quant_val);
|
||||||
cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
|
cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
|
||||||
@ -224,6 +229,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
|||||||
cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
|
cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
|
||||||
|
|
||||||
quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
|
quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
|
||||||
|
cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
|
||||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
|
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
|
||||||
cpi->UVquant_shift[Q] + rc, quant_val);
|
cpi->UVquant_shift[Q] + rc, quant_val);
|
||||||
cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
||||||
@ -325,6 +331,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
|
|||||||
for (i = 0; i < 16; i++)
|
for (i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
x->block[i].quant = cpi->Y1quant[QIndex];
|
x->block[i].quant = cpi->Y1quant[QIndex];
|
||||||
|
x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
|
||||||
x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
|
x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
|
||||||
x->block[i].zbin = cpi->Y1zbin[QIndex];
|
x->block[i].zbin = cpi->Y1zbin[QIndex];
|
||||||
x->block[i].round = cpi->Y1round[QIndex];
|
x->block[i].round = cpi->Y1round[QIndex];
|
||||||
@ -339,6 +346,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
|
|||||||
for (i = 16; i < 24; i++)
|
for (i = 16; i < 24; i++)
|
||||||
{
|
{
|
||||||
x->block[i].quant = cpi->UVquant[QIndex];
|
x->block[i].quant = cpi->UVquant[QIndex];
|
||||||
|
x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
|
||||||
x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
|
x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
|
||||||
x->block[i].zbin = cpi->UVzbin[QIndex];
|
x->block[i].zbin = cpi->UVzbin[QIndex];
|
||||||
x->block[i].round = cpi->UVround[QIndex];
|
x->block[i].round = cpi->UVround[QIndex];
|
||||||
@ -349,6 +357,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
|
|||||||
|
|
||||||
// Y2
|
// Y2
|
||||||
zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
|
zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
|
||||||
|
x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
|
||||||
x->block[24].quant = cpi->Y2quant[QIndex];
|
x->block[24].quant = cpi->Y2quant[QIndex];
|
||||||
x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
|
x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
|
||||||
x->block[24].zbin = cpi->Y2zbin[QIndex];
|
x->block[24].zbin = cpi->Y2zbin[QIndex];
|
||||||
@ -1270,7 +1279,18 @@ int vp8cx_encode_inter_macroblock
|
|||||||
|
|
||||||
if (cpi->sf.RD)
|
if (cpi->sf.RD)
|
||||||
{
|
{
|
||||||
|
/* Are we using the fast quantizer for the mode selection? */
|
||||||
|
if(cpi->sf.use_fastquant_for_pick)
|
||||||
|
cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
|
||||||
|
|
||||||
inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
|
inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
|
||||||
|
|
||||||
|
/* switch back to the regular quantizer for the encode */
|
||||||
|
if (cpi->sf.improved_quant)
|
||||||
|
{
|
||||||
|
cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
|
@ -591,6 +591,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
|||||||
sf->max_fs_radius = 32;
|
sf->max_fs_radius = 32;
|
||||||
sf->iterative_sub_pixel = 1;
|
sf->iterative_sub_pixel = 1;
|
||||||
sf->optimize_coefficients = 1;
|
sf->optimize_coefficients = 1;
|
||||||
|
sf->use_fastquant_for_pick = 0;
|
||||||
|
|
||||||
sf->first_step = 0;
|
sf->first_step = 0;
|
||||||
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
|
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
|
||||||
@ -758,7 +759,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
|||||||
|
|
||||||
cpi->mode_check_freq[THR_SPLITG] = 4;
|
cpi->mode_check_freq[THR_SPLITG] = 4;
|
||||||
cpi->mode_check_freq[THR_SPLITA] = 4;
|
cpi->mode_check_freq[THR_SPLITA] = 4;
|
||||||
cpi->mode_check_freq[THR_SPLITMV] = 0;
|
cpi->mode_check_freq[THR_SPLITMV] = 2;
|
||||||
|
|
||||||
sf->thresh_mult[THR_TM ] = 1500;
|
sf->thresh_mult[THR_TM ] = 1500;
|
||||||
sf->thresh_mult[THR_V_PRED ] = 1500;
|
sf->thresh_mult[THR_V_PRED ] = 1500;
|
||||||
@ -789,8 +790,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
|||||||
sf->thresh_mult[THR_SPLITA ] = 20000;
|
sf->thresh_mult[THR_SPLITA ] = 20000;
|
||||||
}
|
}
|
||||||
|
|
||||||
sf->improved_quant = 0;
|
sf->use_fastquant_for_pick = 1;
|
||||||
sf->improved_dct = 0;
|
|
||||||
|
|
||||||
sf->first_step = 1;
|
sf->first_step = 1;
|
||||||
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
|
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
|
||||||
@ -798,6 +798,8 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
|||||||
|
|
||||||
if (Speed > 1)
|
if (Speed > 1)
|
||||||
{
|
{
|
||||||
|
sf->use_fastquant_for_pick = 0;
|
||||||
|
|
||||||
cpi->mode_check_freq[THR_SPLITG] = 15;
|
cpi->mode_check_freq[THR_SPLITG] = 15;
|
||||||
cpi->mode_check_freq[THR_SPLITA] = 15;
|
cpi->mode_check_freq[THR_SPLITA] = 15;
|
||||||
cpi->mode_check_freq[THR_SPLITMV] = 7;
|
cpi->mode_check_freq[THR_SPLITMV] = 7;
|
||||||
@ -831,6 +833,11 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
|||||||
sf->thresh_mult[THR_SPLITA ] = 50000;
|
sf->thresh_mult[THR_SPLITA ] = 50000;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sf->first_step = 1;
|
||||||
|
|
||||||
|
sf->improved_quant = 0;
|
||||||
|
sf->improved_dct = 0;
|
||||||
|
|
||||||
// Only do recode loop on key frames, golden frames and
|
// Only do recode loop on key frames, golden frames and
|
||||||
// alt ref frames
|
// alt ref frames
|
||||||
sf->recode_loop = 2;
|
sf->recode_loop = 2;
|
||||||
|
@ -182,6 +182,8 @@ typedef struct
|
|||||||
int first_step;
|
int first_step;
|
||||||
int optimize_coefficients;
|
int optimize_coefficients;
|
||||||
|
|
||||||
|
int use_fastquant_for_pick;
|
||||||
|
|
||||||
} SPEED_FEATURES;
|
} SPEED_FEATURES;
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
@ -269,6 +271,9 @@ typedef struct
|
|||||||
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]);
|
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]);
|
||||||
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]);
|
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]);
|
||||||
DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]);
|
DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]);
|
||||||
|
DECLARE_ALIGNED(16, short, Y1quant_fast[QINDEX_RANGE][16]);
|
||||||
|
DECLARE_ALIGNED(16, short, Y2quant_fast[QINDEX_RANGE][16]);
|
||||||
|
DECLARE_ALIGNED(16, short, UVquant_fast[QINDEX_RANGE][16]);
|
||||||
|
|
||||||
|
|
||||||
MACROBLOCK mb;
|
MACROBLOCK mb;
|
||||||
|
@ -27,7 +27,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
|||||||
short *coeff_ptr = b->coeff;
|
short *coeff_ptr = b->coeff;
|
||||||
short *zbin_ptr = b->zbin;
|
short *zbin_ptr = b->zbin;
|
||||||
short *round_ptr = b->round;
|
short *round_ptr = b->round;
|
||||||
short *quant_ptr = b->quant;
|
short *quant_ptr = b->quant_fast;
|
||||||
short *quant_shift_ptr = b->quant_shift;
|
short *quant_shift_ptr = b->quant_shift;
|
||||||
short *qcoeff_ptr = d->qcoeff;
|
short *qcoeff_ptr = d->qcoeff;
|
||||||
short *dqcoeff_ptr = d->dqcoeff;
|
short *dqcoeff_ptr = d->dqcoeff;
|
||||||
@ -74,7 +74,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
|||||||
int x, y, z, sz;
|
int x, y, z, sz;
|
||||||
short *coeff_ptr = b->coeff;
|
short *coeff_ptr = b->coeff;
|
||||||
short *round_ptr = b->round;
|
short *round_ptr = b->round;
|
||||||
short *quant_ptr = b->quant;
|
short *quant_ptr = b->quant_fast;
|
||||||
short *qcoeff_ptr = d->qcoeff;
|
short *qcoeff_ptr = d->qcoeff;
|
||||||
short *dqcoeff_ptr = d->dqcoeff;
|
short *dqcoeff_ptr = d->dqcoeff;
|
||||||
short *dequant_ptr = d->dequant;
|
short *dequant_ptr = d->dequant;
|
||||||
|
@ -32,7 +32,7 @@ void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
|
|||||||
short *coeff_ptr = b->coeff;
|
short *coeff_ptr = b->coeff;
|
||||||
short *zbin_ptr = b->zbin;
|
short *zbin_ptr = b->zbin;
|
||||||
short *round_ptr = b->round;
|
short *round_ptr = b->round;
|
||||||
short *quant_ptr = b->quant;
|
short *quant_ptr = b->quant_fast;
|
||||||
short *qcoeff_ptr = d->qcoeff;
|
short *qcoeff_ptr = d->qcoeff;
|
||||||
short *dqcoeff_ptr = d->dqcoeff;
|
short *dqcoeff_ptr = d->dqcoeff;
|
||||||
short *dequant_ptr = d->dequant;
|
short *dequant_ptr = d->dequant;
|
||||||
@ -90,7 +90,7 @@ void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
|
|||||||
short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
|
short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
|
||||||
short *coeff_ptr = b->coeff;
|
short *coeff_ptr = b->coeff;
|
||||||
short *round_ptr = b->round;
|
short *round_ptr = b->round;
|
||||||
short *quant_ptr = b->quant;
|
short *quant_ptr = b->quant_fast;
|
||||||
short *qcoeff_ptr = d->qcoeff;
|
short *qcoeff_ptr = d->qcoeff;
|
||||||
short *dqcoeff_ptr = d->dqcoeff;
|
short *dqcoeff_ptr = d->dqcoeff;
|
||||||
short *dequant_ptr = d->dequant;
|
short *dequant_ptr = d->dequant;
|
||||||
@ -183,7 +183,7 @@ void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d)
|
|||||||
d->qcoeff,
|
d->qcoeff,
|
||||||
d->dequant,
|
d->dequant,
|
||||||
b->round,
|
b->round,
|
||||||
b->quant,
|
b->quant_fast,
|
||||||
d->dqcoeff
|
d->dqcoeff
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user