From 0a49747b01401900272ccf0dfbb6481429707ad5 Mon Sep 17 00:00:00 2001 From: John Koleszar Date: Thu, 11 Nov 2010 12:41:07 -0500 Subject: [PATCH 1/2] quantizer: fix assertion in fast quantizer path The fast quantizer assembly code has not been updated to match the new exact quantizer, which was made the default in commit 6adbe09. Specifically, they are not aware of the potential for the coefficient to be scaled, which results in the quantized result exceeding the range of the DCT. This patch restores the previous behavior of using the non-shifted coefficients when in the fast quantizer code path, but unfortunately requires rebuilding the tables when switching between the two. Change-Id: I0a33f5b3850335011a06906f49fafed54dda9546 --- vp8/encoder/encodeframe.c | 47 ++++++++++++++---------- vp8/encoder/onyx_if.c | 3 ++ vp8/encoder/quantize.c | 75 +++++++++++++++++++++------------------ 3 files changed, 72 insertions(+), 53 deletions(-) diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index b67edd39f..2aac20b31 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -146,16 +146,25 @@ static const int qzbin_factors_y2[129] = #define EXACT_QUANT #ifdef EXACT_QUANT -static void vp8cx_invert_quant(short *quant, short *shift, short d) +static void vp8cx_invert_quant(int improved_quant, short *quant, + short *shift, short d) { - unsigned t; - int l; - t = d; - for(l = 0; t > 1; l++) - t>>=1; - t = 1 + (1<<(16+l))/d; - *quant = (short)(t - (1<<16)); - *shift = l; + if(improved_quant) + { + unsigned t; + int l; + t = d; + for(l = 0; t > 1; l++) + t>>=1; + t = 1 + (1<<(16+l))/d; + *quant = (short)(t - (1<<16)); + *shift = l; + } + else + { + *quant = (1 << 16) / d; + *shift = 0; + } } void vp8cx_init_quantizer(VP8_COMP *cpi) @@ -170,7 +179,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) { // dc values quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q); - vp8cx_invert_quant(cpi->Y1quant[Q] + 0, + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0, cpi->Y1quant_shift[Q] + 0, quant_val); cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7; @@ -178,7 +187,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7; quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q); - vp8cx_invert_quant(cpi->Y2quant[Q] + 0, + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0, cpi->Y2quant_shift[Q] + 0, quant_val); cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7; cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7; @@ -186,7 +195,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7; quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q); - vp8cx_invert_quant(cpi->UVquant[Q] + 0, + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0, cpi->UVquant_shift[Q] + 0, quant_val); cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;; cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7; @@ -199,7 +208,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) int rc = vp8_default_zig_zag1d[i]; quant_val = vp8_ac_yquant(Q); - vp8cx_invert_quant(cpi->Y1quant[Q] + rc, + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc, cpi->Y1quant_shift[Q] + rc, quant_val); cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7; @@ -207,7 +216,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7; quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q); - vp8cx_invert_quant(cpi->Y2quant[Q] + rc, + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc, cpi->Y2quant_shift[Q] + rc, quant_val); cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7; cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7; @@ -215,7 +224,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7; quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q); - vp8cx_invert_quant(cpi->UVquant[Q] + rc, + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc, cpi->UVquant_shift[Q] + rc, quant_val); cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7; @@ -405,14 +414,14 @@ void encode_mb_row(VP8_COMP *cpi, // Set up limit values for vertical motion vector components // to prevent them extending beyond the UMV borders x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - // Distance of Mb to the left & right edges, specified in - // 1/8th pel units as they are always compared to values + // Distance of Mb to the left & right edges, specified in + // 1/8th pel units as they are always compared to values // that are in 1/8th pel units xd->mb_to_left_edge = -((mb_col * 16) << 3); xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; @@ -420,7 +429,7 @@ void encode_mb_row(VP8_COMP *cpi, // Set up limit values for horizontal motion vector components // to prevent them extending beyond the UMV borders x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index ba7bb104b..00ecf97a6 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -563,6 +563,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) int Speed = cpi->Speed; int i; VP8_COMMON *cm = &cpi->common; + int last_improved_quant = sf->improved_quant; // Initialise default mode frequency sampling variables for (i = 0; i < MAX_MODES; i ++) @@ -1262,6 +1263,8 @@ void vp8_set_speed_features(VP8_COMP *cpi) { cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb); } + if (cpi->sf.improved_quant != last_improved_quant) + vp8cx_init_quantizer(cpi); #if CONFIG_RUNTIME_CPU_DETECT cpi->mb.e_mbd.rtcd = &cpi->common.rtcd; diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c index c2c0351c0..a1be6614b 100644 --- a/vp8/encoder/quantize.c +++ b/vp8/encoder/quantize.c @@ -17,7 +17,8 @@ #include "predictdc.h" #define EXACT_QUANT -#ifdef EXACT_QUANT + +#ifdef EXACT_FASTQUANT void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) { int i, rc, eob; @@ -64,6 +65,45 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) d->eob = eob + 1; } +#else + +void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *coeff_ptr = b->coeff; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + + eob = -1; + for (i = 0; i < 16; i++) + { + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + } + } + d->eob = eob + 1; +} + +#endif + +#ifdef EXACT_QUANT void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) { int i, rc, eob; @@ -178,39 +218,6 @@ void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d) } #else -void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) -{ - int i, rc, eob; - int zbin; - int x, y, z, sz; - short *coeff_ptr = b->coeff; - short *round_ptr = b->round; - short *quant_ptr = b->quant; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = d->dequant; - - eob = -1; - for (i = 0; i < 16; i++) - { - rc = vp8_default_zig_zag1d[i]; - z = coeff_ptr[rc]; - - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) - - y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x) - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value - - if (y) - { - eob = i; // last nonzero coeffs - } - } - d->eob = eob + 1; -} void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) { From 8c2dfde3ed0c6e99ec20b9a4f85e2e4772a956ba Mon Sep 17 00:00:00 2001 From: Frank Galligan Date: Wed, 3 Nov 2010 23:33:00 -0400 Subject: [PATCH 2/2] Fixed bug first cluster timecode of webm file is wrong. When the first pts equaled 0 ivfenc was incorrectly increasing the pts by 1. I changed the pts and last pts to be signed. I also set the default value of last pts to -1. Change-Id: I30bcec5af9b16d93fa9e3abbea7764b133e9cd73 --- vpxenc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vpxenc.c b/vpxenc.c index b139c6829..af9839ce5 100644 --- a/vpxenc.c +++ b/vpxenc.c @@ -435,7 +435,7 @@ struct EbmlGlobal int debug; FILE *stream; - uint64_t last_pts_ms; + int64_t last_pts_ms; vpx_rational_t framerate; /* These pointers are to the start of an element */ @@ -648,7 +648,7 @@ write_webm_block(EbmlGlobal *glob, unsigned char track_number; unsigned short block_timecode = 0; unsigned char flags; - uint64_t pts_ms; + int64_t pts_ms; int start_cluster = 0, is_keyframe; /* Calculate the PTS of this frame in milliseconds */ @@ -1074,6 +1074,7 @@ int main(int argc, const char **argv_) int psnr_count = 0; exec_name = argv_[0]; + ebml.last_pts_ms = -1; if (argc < 3) usage_exit();