From 0a49747b01401900272ccf0dfbb6481429707ad5 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 11 Nov 2010 12:41:07 -0500
Subject: [PATCH 1/2] quantizer: fix assertion in fast quantizer path

The fast quantizer assembly code has not been updated to match the new
exact quantizer, which was made the default in commit 6adbe09.
Specifically, they are not aware of the potential for the coefficient
to be scaled, which results in the quantized result exceeding the range
of the DCT. This patch restores the previous behavior of using the
non-shifted coefficients when in the fast quantizer code path, but
unfortunately requires rebuilding the tables when switching between the
two.

Change-Id: I0a33f5b3850335011a06906f49fafed54dda9546
---
 vp8/encoder/encodeframe.c | 47 ++++++++++++++----------
 vp8/encoder/onyx_if.c     |  3 ++
 vp8/encoder/quantize.c    | 75 +++++++++++++++++++++------------------
 3 files changed, 72 insertions(+), 53 deletions(-)

diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index b67edd39f..2aac20b31 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -146,16 +146,25 @@ static const int qzbin_factors_y2[129] =
 
 #define EXACT_QUANT
 #ifdef EXACT_QUANT
-static void vp8cx_invert_quant(short *quant, short *shift, short d)
+static void vp8cx_invert_quant(int improved_quant, short *quant,
+                               short *shift, short d)
 {
-    unsigned t;
-    int l;
-    t = d;
-    for(l = 0; t > 1; l++)
-        t>>=1;
-    t = 1 + (1<<(16+l))/d;
-    *quant = (short)(t - (1<<16));
-    *shift = l;
+    if(improved_quant)
+    {
+        unsigned t;
+        int l;
+        t = d;
+        for(l = 0; t > 1; l++)
+            t>>=1;
+        t = 1 + (1<<(16+l))/d;
+        *quant = (short)(t - (1<<16));
+        *shift = l;
+    }
+    else
+    {
+        *quant = (1 << 16) / d;
+        *shift = 0;
+    }
 }
 
 void vp8cx_init_quantizer(VP8_COMP *cpi)
@@ -170,7 +179,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
     {
         // dc values
         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
-        vp8cx_invert_quant(cpi->Y1quant[Q] + 0,
+        vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
                            cpi->Y1quant_shift[Q] + 0, quant_val);
         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
@@ -178,7 +187,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
-        vp8cx_invert_quant(cpi->Y2quant[Q] + 0,
+        vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
                            cpi->Y2quant_shift[Q] + 0, quant_val);
         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
@@ -186,7 +195,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
-        vp8cx_invert_quant(cpi->UVquant[Q] + 0,
+        vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
                            cpi->UVquant_shift[Q] + 0, quant_val);
         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
@@ -199,7 +208,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
             int rc = vp8_default_zig_zag1d[i];
 
             quant_val = vp8_ac_yquant(Q);
-            vp8cx_invert_quant(cpi->Y1quant[Q] + rc,
+            vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
                                cpi->Y1quant_shift[Q] + rc, quant_val);
             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
@@ -207,7 +216,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 
             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
-            vp8cx_invert_quant(cpi->Y2quant[Q] + rc,
+            vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
                                cpi->Y2quant_shift[Q] + rc, quant_val);
             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
@@ -215,7 +224,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 
             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
-            vp8cx_invert_quant(cpi->UVquant[Q] + rc,
+            vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
                                cpi->UVquant_shift[Q] + rc, quant_val);
             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
@@ -405,14 +414,14 @@ void encode_mb_row(VP8_COMP *cpi,
     // Set up limit values for vertical motion vector components
     // to prevent them extending beyond the UMV borders
     x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
-    x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) 
+    x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
                         + (VP8BORDERINPIXELS - 16);
 
     // for each macroblock col in image
     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
     {
-        // Distance of Mb to the left & right edges, specified in 
-        // 1/8th pel units as they are always compared to values 
+        // Distance of Mb to the left & right edges, specified in
+        // 1/8th pel units as they are always compared to values
         // that are in 1/8th pel units
         xd->mb_to_left_edge = -((mb_col * 16) << 3);
         xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
@@ -420,7 +429,7 @@ void encode_mb_row(VP8_COMP *cpi,
         // Set up limit values for horizontal motion vector components
         // to prevent them extending beyond the UMV borders
         x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
-        x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) 
+        x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
                             + (VP8BORDERINPIXELS - 16);
 
         xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index ba7bb104b..00ecf97a6 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -563,6 +563,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
     int Speed = cpi->Speed;
     int i;
     VP8_COMMON *cm = &cpi->common;
+    int last_improved_quant = sf->improved_quant;
 
     // Initialise default mode frequency sampling variables
     for (i = 0; i < MAX_MODES; i ++)
@@ -1262,6 +1263,8 @@ void vp8_set_speed_features(VP8_COMP *cpi)
     {
         cpi->mb.quantize_b      = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
     }
+    if (cpi->sf.improved_quant != last_improved_quant)
+        vp8cx_init_quantizer(cpi);
 
 #if CONFIG_RUNTIME_CPU_DETECT
     cpi->mb.e_mbd.rtcd = &cpi->common.rtcd;
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index c2c0351c0..a1be6614b 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -17,7 +17,8 @@
 #include "predictdc.h"
 
 #define EXACT_QUANT
-#ifdef EXACT_QUANT
+
+#ifdef EXACT_FASTQUANT
 void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
 {
     int i, rc, eob;
@@ -64,6 +65,45 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
     d->eob = eob + 1;
 }
 
+#else
+
+void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
+{
+    int i, rc, eob;
+    int zbin;
+    int x, y, z, sz;
+    short *coeff_ptr   = b->coeff;
+    short *round_ptr   = b->round;
+    short *quant_ptr   = b->quant;
+    short *qcoeff_ptr  = d->qcoeff;
+    short *dqcoeff_ptr = d->dqcoeff;
+    short *dequant_ptr = d->dequant;
+
+    eob = -1;
+    for (i = 0; i < 16; i++)
+    {
+        rc   = vp8_default_zig_zag1d[i];
+        z    = coeff_ptr[rc];
+
+        sz = (z >> 31);                                 // sign of z
+        x  = (z ^ sz) - sz;                             // x = abs(z)
+
+        y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+        x  = (y ^ sz) - sz;                         // get the sign back
+        qcoeff_ptr[rc] = x;                          // write to destination
+        dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
+
+        if (y)
+        {
+            eob = i;                                // last nonzero coeffs
+        }
+    }
+    d->eob = eob + 1;
+}
+
+#endif
+
+#ifdef EXACT_QUANT
 void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
 {
     int i, rc, eob;
@@ -178,39 +218,6 @@ void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
 }
 
 #else
-void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
-{
-    int i, rc, eob;
-    int zbin;
-    int x, y, z, sz;
-    short *coeff_ptr   = b->coeff;
-    short *round_ptr   = b->round;
-    short *quant_ptr   = b->quant;
-    short *qcoeff_ptr  = d->qcoeff;
-    short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = d->dequant;
-
-    eob = -1;
-    for (i = 0; i < 16; i++)
-    {
-        rc   = vp8_default_zig_zag1d[i];
-        z    = coeff_ptr[rc];
-
-        sz = (z >> 31);                                 // sign of z
-        x  = (z ^ sz) - sz;                             // x = abs(z)
-
-        y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
-        x  = (y ^ sz) - sz;                         // get the sign back
-        qcoeff_ptr[rc] = x;                          // write to destination
-        dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
-
-        if (y)
-        {
-            eob = i;                                // last nonzero coeffs
-        }
-    }
-    d->eob = eob + 1;
-}
 
 void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
 {

From 8c2dfde3ed0c6e99ec20b9a4f85e2e4772a956ba Mon Sep 17 00:00:00 2001
From: Frank Galligan <fgalligan@google.com>
Date: Wed, 3 Nov 2010 23:33:00 -0400
Subject: [PATCH 2/2] Fixed bug first cluster timecode of webm file is wrong.

When the first pts equaled 0 ivfenc was incorrectly increasing the
pts by 1. I changed the pts and last pts to be signed. I also set
the default value of last pts to -1.

Change-Id: I30bcec5af9b16d93fa9e3abbea7764b133e9cd73
---
 vpxenc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/vpxenc.c b/vpxenc.c
index b139c6829..af9839ce5 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -435,7 +435,7 @@ struct EbmlGlobal
     int debug;
 
     FILE    *stream;
-    uint64_t last_pts_ms;
+    int64_t last_pts_ms;
     vpx_rational_t  framerate;
 
     /* These pointers are to the start of an element */
@@ -648,7 +648,7 @@ write_webm_block(EbmlGlobal                *glob,
     unsigned char  track_number;
     unsigned short block_timecode = 0;
     unsigned char  flags;
-    uint64_t       pts_ms;
+    int64_t        pts_ms;
     int            start_cluster = 0, is_keyframe;
 
     /* Calculate the PTS of this frame in milliseconds */
@@ -1074,6 +1074,7 @@ int main(int argc, const char **argv_)
     int                      psnr_count = 0;
 
     exec_name = argv_[0];
+    ebml.last_pts_ms = -1;
 
     if (argc < 3)
         usage_exit();