Re-design quantization process

This commit re-designs the quantization process for transform
coefficient blocks of size 4x4 to 16x16. It improves compression
performance for speed 7 by 3.85%. The SSSE3 version for the
new quantization process is included.

The average runtime of the 8x8 block quantization is reduced
from 285 cycles -> 255 cycles, i.e., over 10% faster.

Change-Id: I61278aa02efc70599b962d3314671db5b0446a50
This commit is contained in:
Jingning Han
2014-07-01 16:10:44 -07:00
parent 6643b8868d
commit 9ac2f66320
10 changed files with 310 additions and 6 deletions

View File

@@ -42,9 +42,9 @@ void vp9_quantize_dc(const int16_t *coeff_ptr, int skip_block,
}
void vp9_quantize_dc_32x32(const int16_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
const int16_t *round_ptr, const int16_t quant,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
int eob = -1;
if (!skip_block) {
@@ -63,6 +63,47 @@ void vp9_quantize_dc_32x32(const int16_t *coeff_ptr, int skip_block,
*eob_ptr = eob + 1;
}
void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t count,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
int i, eob = -1;
// TODO(jingning) Decide the need of these arguments after the
// quantization process is completed.
(void)zbin_ptr;
(void)quant_shift_ptr;
(void)zbin_oq_value;
(void)iscan;
vpx_memset(qcoeff_ptr, 0, count * sizeof(int16_t));
vpx_memset(dqcoeff_ptr, 0, count * sizeof(int16_t));
if (!skip_block) {
// Quantization pass: All coefficients with index >= zero_flag are
// skippable. Note: zero_flag can be zero.
for (i = 0; i < count; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
tmp = (tmp * quant_ptr[rc != 0]) >> 16;
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
if (tmp)
eob = i;
}
}
*eob_ptr = eob + 1;
}
void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
@@ -207,11 +248,16 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
const int qrounding_factor = q == 0 ? 64 : 48;
for (i = 0; i < 2; ++i) {
int qrounding_factor_fp = i == 0 ? 48 : 42;
if (q == 0)
qrounding_factor_fp = 64;
// y
quant = i == 0 ? vp9_dc_quant(q, cm->y_dc_delta_q)
: vp9_ac_quant(q, 0);
invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], quant);
quants->y_quant_fp[q][i] = (1 << 16) / quant;
quants->y_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7;
quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
quants->y_round[q][i] = (qrounding_factor * quant) >> 7;
cm->y_dequant[q][i] = quant;
@@ -222,6 +268,7 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
invert_quant(&quants->uv_quant[q][i],
&quants->uv_quant_shift[q][i], quant);
quants->uv_quant_fp[q][i] = (1 << 16) / quant;
quants->uv_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7;
quants->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
quants->uv_round[q][i] = (qrounding_factor * quant) >> 7;
cm->uv_dequant[q][i] = quant;
@@ -240,6 +287,7 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
for (i = 2; i < 8; i++) {
quants->y_quant[q][i] = quants->y_quant[q][1];
quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1];
quants->y_round_fp[q][i] = quants->y_round_fp[q][1];
quants->y_quant_shift[q][i] = quants->y_quant_shift[q][1];
quants->y_zbin[q][i] = quants->y_zbin[q][1];
quants->y_round[q][i] = quants->y_round[q][1];
@@ -247,6 +295,7 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
quants->uv_quant[q][i] = quants->uv_quant[q][1];
quants->uv_quant_fp[q][i] = quants->uv_quant_fp[q][1];
quants->uv_round_fp[q][i] = quants->uv_round_fp[q][1];
quants->uv_quant_shift[q][i] = quants->uv_quant_shift[q][1];
quants->uv_zbin[q][i] = quants->uv_zbin[q][1];
quants->uv_round[q][i] = quants->uv_round[q][1];
@@ -276,6 +325,7 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
// Y
x->plane[0].quant = quants->y_quant[qindex];
x->plane[0].quant_fp = quants->y_quant_fp[qindex];
x->plane[0].round_fp = quants->y_round_fp[qindex];
x->plane[0].quant_shift = quants->y_quant_shift[qindex];
x->plane[0].zbin = quants->y_zbin[qindex];
x->plane[0].round = quants->y_round[qindex];
@@ -286,6 +336,7 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
for (i = 1; i < 3; i++) {
x->plane[i].quant = quants->uv_quant[qindex];
x->plane[i].quant_fp = quants->uv_quant_fp[qindex];
x->plane[i].round_fp = quants->uv_round_fp[qindex];
x->plane[i].quant_shift = quants->uv_quant_shift[qindex];
x->plane[i].zbin = quants->uv_zbin[qindex];
x->plane[i].round = quants->uv_round[qindex];