Make the quantizer exact.
This replaces the approximate division-by-multiplication in the quantizer with an exact one that costs just one add and one shift extra. The asm versions have not been updated in this patch, and thus have been disabled, since the new method requires different multipliers which are not compatible with the old method. Change-Id: I53ac887af0f969d906e464c88b1f4be69c6b1206
This commit is contained in:
parent
08eed049d4
commit
e04e293522
@ -63,7 +63,7 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
|
||||
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_neon;
|
||||
|
||||
cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon;
|
||||
/*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon;*/
|
||||
#elif HAVE_ARMV6
|
||||
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c;
|
||||
cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c;
|
||||
|
@ -33,6 +33,7 @@ typedef struct
|
||||
|
||||
// 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
|
||||
short(*quant)[4];
|
||||
short(*quant_shift)[4];
|
||||
short(*zbin)[4];
|
||||
short(*zrun_zbin_boost);
|
||||
short(*round)[4];
|
||||
|
@ -103,6 +103,18 @@ static const int qzbin_factors[129] =
|
||||
80,
|
||||
};
|
||||
|
||||
static void vp8cx_invert_quant(short *quant, short *shift, short d)
|
||||
{
|
||||
unsigned t;
|
||||
int l;
|
||||
t = d;
|
||||
for(l = 0; t > 1; l++)
|
||||
t>>=1;
|
||||
t = 1 + (1<<(16+l))/d;
|
||||
*quant = (short)(t - (1<<16));
|
||||
*shift = l;
|
||||
}
|
||||
|
||||
void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
{
|
||||
int r, c;
|
||||
@ -116,21 +128,24 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
{
|
||||
// dc values
|
||||
quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
|
||||
cpi->Y1quant[Q][0][0] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->Y1quant[Q][0] + 0,
|
||||
cpi->Y1quant_shift[Q][0] + 0, quant_val);
|
||||
cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
||||
cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
|
||||
cpi->common.Y1dequant[Q][0][0] = quant_val;
|
||||
cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
|
||||
|
||||
quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
|
||||
cpi->Y2quant[Q][0][0] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->Y2quant[Q][0] + 0,
|
||||
cpi->Y2quant_shift[Q][0] + 0, quant_val);
|
||||
cpi->Y2zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
||||
cpi->Y2round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
|
||||
cpi->common.Y2dequant[Q][0][0] = quant_val;
|
||||
cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
|
||||
|
||||
quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
|
||||
cpi->UVquant[Q][0][0] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->UVquant[Q][0] + 0,
|
||||
cpi->UVquant_shift[Q][0] + 0, quant_val);
|
||||
cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
|
||||
cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
|
||||
cpi->common.UVdequant[Q][0][0] = quant_val;
|
||||
@ -144,21 +159,24 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
c = (rc & 3);
|
||||
|
||||
quant_val = vp8_ac_yquant(Q);
|
||||
cpi->Y1quant[Q][r][c] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->Y1quant[Q][r] + c,
|
||||
cpi->Y1quant_shift[Q][r] + c, quant_val);
|
||||
cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
||||
cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
|
||||
cpi->common.Y1dequant[Q][r][c] = quant_val;
|
||||
cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
|
||||
|
||||
quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
|
||||
cpi->Y2quant[Q][r][c] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->Y2quant[Q][r] + c,
|
||||
cpi->Y2quant_shift[Q][r] + c, quant_val);
|
||||
cpi->Y2zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
||||
cpi->Y2round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
|
||||
cpi->common.Y2dequant[Q][r][c] = quant_val;
|
||||
cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
|
||||
|
||||
quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
|
||||
cpi->UVquant[Q][r][c] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->UVquant[Q][r] + c,
|
||||
cpi->UVquant_shift[Q][r] + c, quant_val);
|
||||
cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
||||
cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
|
||||
cpi->common.UVdequant[Q][r][c] = quant_val;
|
||||
@ -198,6 +216,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
x->block[i].quant = cpi->Y1quant[QIndex];
|
||||
x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
|
||||
x->block[i].zbin = cpi->Y1zbin[QIndex];
|
||||
x->block[i].round = cpi->Y1round[QIndex];
|
||||
x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
|
||||
@ -211,6 +230,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
for (i = 16; i < 24; i++)
|
||||
{
|
||||
x->block[i].quant = cpi->UVquant[QIndex];
|
||||
x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
|
||||
x->block[i].zbin = cpi->UVzbin[QIndex];
|
||||
x->block[i].round = cpi->UVround[QIndex];
|
||||
x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
|
||||
@ -221,6 +241,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
// Y2
|
||||
zbin_extra = (cpi->common.Y2dequant[QIndex][0][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
|
||||
x->block[24].quant = cpi->Y2quant[QIndex];
|
||||
x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
|
||||
x->block[24].zbin = cpi->Y2zbin[QIndex];
|
||||
x->block[24].round = cpi->Y2round[QIndex];
|
||||
x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
|
||||
|
@ -286,6 +286,7 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
|
||||
for (i = 0; i < 25; i++)
|
||||
{
|
||||
z->block[i].quant = x->block[i].quant;
|
||||
z->block[i].quant_shift = x->block[i].quant_shift;
|
||||
z->block[i].zbin = x->block[i].zbin;
|
||||
z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost;
|
||||
z->block[i].round = x->block[i].round;
|
||||
|
@ -234,14 +234,17 @@ typedef struct
|
||||
{
|
||||
|
||||
DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][4][4]);
|
||||
DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][4][4]);
|
||||
DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][4][4]);
|
||||
DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][4][4]);
|
||||
|
||||
DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][4][4]);
|
||||
DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][4][4]);
|
||||
DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][4][4]);
|
||||
DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][4][4]);
|
||||
|
||||
DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][4][4]);
|
||||
DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][4][4]);
|
||||
DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][4][4]);
|
||||
DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][4][4]);
|
||||
|
||||
|
@ -25,6 +25,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
short *zbin_ptr = &b->zbin[0][0];
|
||||
short *round_ptr = &b->round[0][0];
|
||||
short *quant_ptr = &b->quant[0][0];
|
||||
short *quant_shift_ptr = &b->quant_shift[0][0];
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
short *dqcoeff_ptr = d->dqcoeff;
|
||||
short *dequant_ptr = &d->dequant[0][0];
|
||||
@ -45,7 +46,9 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
|
||||
if (x >= zbin)
|
||||
{
|
||||
y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
|
||||
x += round_ptr[rc];
|
||||
y = (((x * quant_ptr[rc]) >> 16) + x)
|
||||
>> quant_shift_ptr[rc]; // quantize (x)
|
||||
x = (y ^ sz) - sz; // get the sign back
|
||||
qcoeff_ptr[rc] = x; // write to destination
|
||||
dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
|
||||
@ -69,6 +72,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
|
||||
short *zbin_ptr = &b->zbin[0][0];
|
||||
short *round_ptr = &b->round[0][0];
|
||||
short *quant_ptr = &b->quant[0][0];
|
||||
short *quant_shift_ptr = &b->quant_shift[0][0];
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
short *dqcoeff_ptr = d->dqcoeff;
|
||||
short *dequant_ptr = &d->dequant[0][0];
|
||||
@ -95,7 +99,9 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
|
||||
|
||||
if (x >= zbin)
|
||||
{
|
||||
y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
|
||||
x += round_ptr[rc];
|
||||
y = (((x * quant_ptr[rc]) >> 16) + x)
|
||||
>> quant_shift_ptr[rc]; // quantize (x)
|
||||
x = (y ^ sz) - sz; // get the sign back
|
||||
qcoeff_ptr[rc] = x; // write to destination
|
||||
dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
|
||||
|
@ -238,7 +238,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.encodemb.submby = vp8_subtract_mby_mmx;
|
||||
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_mmx;
|
||||
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx;
|
||||
/*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx;*/
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -285,8 +285,8 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_xmm;
|
||||
/* cpi->rtcd.encodemb.sub* not implemented for wmt */
|
||||
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse;
|
||||
cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2;
|
||||
/*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse;
|
||||
cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2;*/
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user