Merge "Make the quantizer exact."

2010-07-23 09:26:26 -07:00 · 2010-07-23 09:26:26 -07:00 · f5cf8553a2
commit f5cf8553a2
parent d576690ba1 e04e293522
7 changed files with 44 additions and 12 deletions
--- a/vp8/encoder/arm/csystemdependent.c
+++ b/vp8/encoder/arm/csystemdependent.c
@ -63,7 +63,7 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
    cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_neon;

    cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;
+    /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;*/
 #elif HAVE_ARMV6
    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
    cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@ -33,6 +33,7 @@ typedef struct

    // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
    short(*quant)[4];
+    short(*quant_shift)[4];
    short(*zbin)[4];
    short(*zrun_zbin_boost);
    short(*round)[4];
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@ -103,6 +103,18 @@ static const int qzbin_factors[129] =
    80,
 };

+static void vp8cx_invert_quant(short *quant, short *shift, short d)
+{
+    unsigned t;
+    int l;
+    t = d;
+    for(l = 0; t > 1; l++)
+        t>>=1;
+    t = 1 + (1<<(16+l))/d;
+    *quant = (short)(t - (1<<16));
+    *shift = l;
+}
+
 void vp8cx_init_quantizer(VP8_COMP *cpi)
 {
    int r, c;
@ -116,21 +128,24 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
    {
        // dc values
        quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
-        cpi->Y1quant[Q][0][0] = (1 << 16) / quant_val;
+        vp8cx_invert_quant(cpi->Y1quant[Q][0] + 0,
+                           cpi->Y1quant_shift[Q][0] + 0, quant_val);
        cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
        cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
        cpi->common.Y1dequant[Q][0][0] = quant_val;
        cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;

        quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
-        cpi->Y2quant[Q][0][0] = (1 << 16) / quant_val;
+        vp8cx_invert_quant(cpi->Y2quant[Q][0] + 0,
+                           cpi->Y2quant_shift[Q][0] + 0, quant_val);
        cpi->Y2zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
        cpi->Y2round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
        cpi->common.Y2dequant[Q][0][0] = quant_val;
        cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;

        quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
-        cpi->UVquant[Q][0][0] = (1 << 16) / quant_val;
+        vp8cx_invert_quant(cpi->UVquant[Q][0] + 0,
+                           cpi->UVquant_shift[Q][0] + 0, quant_val);
        cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
        cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
        cpi->common.UVdequant[Q][0][0] = quant_val;
@ -144,21 +159,24 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
            c = (rc & 3);

            quant_val = vp8_ac_yquant(Q);
-            cpi->Y1quant[Q][r][c] = (1 << 16) / quant_val;
+            vp8cx_invert_quant(cpi->Y1quant[Q][r] + c,
+                               cpi->Y1quant_shift[Q][r] + c, quant_val);
            cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
            cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
            cpi->common.Y1dequant[Q][r][c] = quant_val;
            cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;

            quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
-            cpi->Y2quant[Q][r][c] = (1 << 16) / quant_val;
+            vp8cx_invert_quant(cpi->Y2quant[Q][r] + c,
+                               cpi->Y2quant_shift[Q][r] + c, quant_val);
            cpi->Y2zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
            cpi->Y2round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
            cpi->common.Y2dequant[Q][r][c] = quant_val;
            cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;

            quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
-            cpi->UVquant[Q][r][c] = (1 << 16) / quant_val;
+            vp8cx_invert_quant(cpi->UVquant[Q][r] + c,
+                               cpi->UVquant_shift[Q][r] + c, quant_val);
            cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
            cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
            cpi->common.UVdequant[Q][r][c] = quant_val;
@ -198,6 +216,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
    for (i = 0; i < 16; i++)
    {
        x->block[i].quant = cpi->Y1quant[QIndex];
+        x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
        x->block[i].zbin = cpi->Y1zbin[QIndex];
        x->block[i].round = cpi->Y1round[QIndex];
        x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
@ -211,6 +230,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
    for (i = 16; i < 24; i++)
    {
        x->block[i].quant = cpi->UVquant[QIndex];
+        x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
        x->block[i].zbin = cpi->UVzbin[QIndex];
        x->block[i].round = cpi->UVround[QIndex];
        x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
@ -221,6 +241,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
    // Y2
    zbin_extra = (cpi->common.Y2dequant[QIndex][0][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
    x->block[24].quant = cpi->Y2quant[QIndex];
+    x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
    x->block[24].zbin = cpi->Y2zbin[QIndex];
    x->block[24].round = cpi->Y2round[QIndex];
    x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@ -286,6 +286,7 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
    for (i = 0; i < 25; i++)
    {
        z->block[i].quant           = x->block[i].quant;
+        z->block[i].quant_shift     = x->block[i].quant_shift;
        z->block[i].zbin            = x->block[i].zbin;
        z->block[i].zrun_zbin_boost   = x->block[i].zrun_zbin_boost;
        z->block[i].round           = x->block[i].round;
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@ -234,14 +234,17 @@ typedef struct
 {

    DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][4][4]);
    DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][4][4]);
    DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][4][4]);

    DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][4][4]);
    DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][4][4]);
    DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][4][4]);

    DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][4][4]);
    DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][4][4]);
    DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][4][4]);

--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@ -25,6 +25,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
    short *zbin_ptr   = &b->zbin[0][0];
    short *round_ptr  = &b->round[0][0];
    short *quant_ptr  = &b->quant[0][0];
+    short *quant_shift_ptr = &b->quant_shift[0][0];
    short *qcoeff_ptr = d->qcoeff;
    short *dqcoeff_ptr = d->dqcoeff;
    short *dequant_ptr = &d->dequant[0][0];
@ -45,7 +46,9 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)

        if (x >= zbin)
        {
-            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+            x += round_ptr[rc];
+            y  = (((x * quant_ptr[rc]) >> 16) + x)
+                 >> quant_shift_ptr[rc];                // quantize (x)
            x  = (y ^ sz) - sz;                         // get the sign back
            qcoeff_ptr[rc] = x;                          // write to destination
            dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
@ -69,6 +72,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
    short *zbin_ptr   = &b->zbin[0][0];
    short *round_ptr  = &b->round[0][0];
    short *quant_ptr  = &b->quant[0][0];
+    short *quant_shift_ptr = &b->quant_shift[0][0];
    short *qcoeff_ptr = d->qcoeff;
    short *dqcoeff_ptr = d->dqcoeff;
    short *dequant_ptr = &d->dequant[0][0];
@ -95,7 +99,9 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)

        if (x >= zbin)
        {
-            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+            x += round_ptr[rc];
+            y  = (((x * quant_ptr[rc]) >> 16) + x)
+                 >> quant_shift_ptr[rc];                // quantize (x)
            x  = (y ^ sz) - sz;                         // get the sign back
            qcoeff_ptr[rc]  = x;                         // write to destination
            dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@ -238,7 +238,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
        cpi->rtcd.encodemb.submby                = vp8_subtract_mby_mmx;
        cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_mmx;

-        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;
+        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;*/
    }

 #endif
@ -285,8 +285,8 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_xmm;
        /* cpi->rtcd.encodemb.sub* not implemented for wmt */

-        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse;
-        cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;
+        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse;
+        cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;*/
    }

 #endif