2010-05-18 17:58:33 +02:00
|
|
|
/*
|
2010-09-09 14:16:39 +02:00
|
|
|
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
2010-05-18 17:58:33 +02:00
|
|
|
*
|
2010-06-18 18:39:21 +02:00
|
|
|
* Use of this source code is governed by a BSD-style license
|
2010-06-04 22:19:40 +02:00
|
|
|
* that can be found in the LICENSE file in the root of the source
|
|
|
|
* tree. An additional intellectual property rights grant can be found
|
2010-06-18 18:39:21 +02:00
|
|
|
* in the file PATENTS. All contributing project authors may
|
2010-06-04 22:19:40 +02:00
|
|
|
* be found in the AUTHORS file in the root of the source tree.
|
2010-05-18 17:58:33 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <math.h>
|
|
|
|
#include "vpx_mem/vpx_mem.h"
|
|
|
|
|
2012-11-28 19:41:40 +01:00
|
|
|
#include "vp9/encoder/vp9_onyx_int.h"
|
|
|
|
#include "vp9/encoder/vp9_quantize.h"
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/common/vp9_quant_common.h"
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/common/vp9_seg_common.h"
|
2011-10-05 12:26:00 +02:00
|
|
|
|
2011-02-14 23:18:18 +01:00
|
|
|
#ifdef ENC_DEBUG
|
|
|
|
extern int enc_debug;
|
|
|
|
#endif
|
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
void vp9_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) {
|
2012-06-25 21:26:09 +02:00
|
|
|
int i, rc, eob;
|
|
|
|
int zbin;
|
|
|
|
int x, y, z, sz;
|
2012-12-19 00:31:19 +01:00
|
|
|
int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
|
|
|
|
int16_t *coeff_ptr = b->coeff;
|
|
|
|
int16_t *zbin_ptr = b->zbin;
|
|
|
|
int16_t *round_ptr = b->round;
|
|
|
|
int16_t *quant_ptr = b->quant;
|
|
|
|
uint8_t *quant_shift_ptr = b->quant_shift;
|
|
|
|
int16_t *qcoeff_ptr = d->qcoeff;
|
|
|
|
int16_t *dqcoeff_ptr = d->dqcoeff;
|
|
|
|
int16_t *dequant_ptr = d->dequant;
|
|
|
|
int zbin_oq_value = b->zbin_extra;
|
2012-06-25 21:26:09 +02:00
|
|
|
|
|
|
|
int const *pt_scan ;
|
|
|
|
|
2012-10-16 01:41:41 +02:00
|
|
|
switch (tx_type) {
|
2012-12-19 00:31:19 +01:00
|
|
|
case ADST_DCT:
|
2012-12-18 19:49:10 +01:00
|
|
|
pt_scan = vp9_row_scan_4x4;
|
2012-06-25 21:26:09 +02:00
|
|
|
break;
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
case DCT_ADST:
|
2012-12-18 19:49:10 +01:00
|
|
|
pt_scan = vp9_col_scan_4x4;
|
2012-06-25 21:26:09 +02:00
|
|
|
break;
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
default:
|
2012-12-18 19:49:10 +01:00
|
|
|
pt_scan = vp9_default_zig_zag1d_4x4;
|
2012-06-25 21:26:09 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
vpx_memset(qcoeff_ptr, 0, 32);
|
|
|
|
vpx_memset(dqcoeff_ptr, 0, 32);
|
|
|
|
|
|
|
|
eob = -1;
|
|
|
|
|
2013-01-29 14:33:17 +01:00
|
|
|
if (!b->skip_block) {
|
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
rc = pt_scan[i];
|
|
|
|
z = coeff_ptr[rc];
|
|
|
|
|
|
|
|
zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
|
|
|
|
zbin_boost_ptr++;
|
|
|
|
|
|
|
|
sz = (z >> 31); // sign of z
|
|
|
|
x = (z ^ sz) - sz; // x = abs(z)
|
|
|
|
|
|
|
|
if (x >= zbin) {
|
|
|
|
x += round_ptr[rc];
|
|
|
|
y = (((x * quant_ptr[rc]) >> 16) + x)
|
|
|
|
>> quant_shift_ptr[rc]; // quantize (x)
|
|
|
|
x = (y ^ sz) - sz; // get the sign back
|
|
|
|
qcoeff_ptr[rc] = x; // write to destination
|
|
|
|
dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
|
|
|
|
|
|
|
|
if (y) {
|
|
|
|
eob = i; // last nonzero coeffs
|
|
|
|
zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength
|
|
|
|
}
|
2012-06-25 21:26:09 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
d->eob = eob + 1;
|
|
|
|
}
|
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
void vp9_regular_quantize_b_4x4(BLOCK *b, BLOCKD *d) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int i, rc, eob;
|
|
|
|
int zbin;
|
|
|
|
int x, y, z, sz;
|
2012-12-19 00:31:19 +01:00
|
|
|
int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
|
|
|
|
int16_t *coeff_ptr = b->coeff;
|
|
|
|
int16_t *zbin_ptr = b->zbin;
|
|
|
|
int16_t *round_ptr = b->round;
|
|
|
|
int16_t *quant_ptr = b->quant;
|
|
|
|
uint8_t *quant_shift_ptr = b->quant_shift;
|
|
|
|
int16_t *qcoeff_ptr = d->qcoeff;
|
|
|
|
int16_t *dqcoeff_ptr = d->dqcoeff;
|
|
|
|
int16_t *dequant_ptr = d->dequant;
|
|
|
|
int zbin_oq_value = b->zbin_extra;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
vpx_memset(qcoeff_ptr, 0, 32);
|
|
|
|
vpx_memset(dqcoeff_ptr, 0, 32);
|
|
|
|
|
|
|
|
eob = -1;
|
|
|
|
|
2013-01-29 14:33:17 +01:00
|
|
|
if (!b->skip_block) {
|
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
rc = vp9_default_zig_zag1d_4x4[i];
|
|
|
|
z = coeff_ptr[rc];
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-01-29 14:33:17 +01:00
|
|
|
zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
|
|
|
|
zbin_boost_ptr++;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-01-29 14:33:17 +01:00
|
|
|
sz = (z >> 31); // sign of z
|
|
|
|
x = (z ^ sz) - sz; // x = abs(z)
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-01-29 14:33:17 +01:00
|
|
|
if (x >= zbin) {
|
|
|
|
x += round_ptr[rc];
|
2012-06-25 21:26:09 +02:00
|
|
|
|
2013-01-29 14:33:17 +01:00
|
|
|
y = (((x * quant_ptr[rc]) >> 16) + x)
|
|
|
|
>> quant_shift_ptr[rc]; // quantize (x)
|
|
|
|
x = (y ^ sz) - sz; // get the sign back
|
|
|
|
qcoeff_ptr[rc] = x; // write to destination
|
|
|
|
dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-01-29 14:33:17 +01:00
|
|
|
if (y) {
|
|
|
|
eob = i; // last nonzero coeffs
|
|
|
|
zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength
|
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
d->eob = eob + 1;
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2010-10-11 22:49:52 +02:00
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
void vp9_quantize_mby_4x4_c(MACROBLOCK *x) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int i;
|
2012-11-16 00:14:38 +01:00
|
|
|
|
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
TX_TYPE tx_type = get_tx_type_4x4(&x->e_mbd, &x->e_mbd.block[i]);
|
|
|
|
if (tx_type != DCT_DCT) {
|
|
|
|
vp9_ht_quantize_b_4x4(&x->block[i], &x->e_mbd.block[i], tx_type);
|
|
|
|
} else {
|
|
|
|
x->quantize_b_4x4(&x->block[i], &x->e_mbd.block[i]);
|
|
|
|
}
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
void vp9_quantize_mbuv_4x4_c(MACROBLOCK *x) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int i;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
for (i = 16; i < 24; i++)
|
2012-10-13 06:41:58 +02:00
|
|
|
x->quantize_b_4x4(&x->block[i], &x->e_mbd.block[i]);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2011-05-19 17:04:03 +02:00
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
void vp9_quantize_mb_4x4_c(MACROBLOCK *x) {
|
|
|
|
vp9_quantize_mby_4x4_c(x);
|
|
|
|
vp9_quantize_mbuv_4x4_c(x);
|
2012-10-13 07:42:06 +02:00
|
|
|
}
|
2012-02-29 02:11:12 +01:00
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
void vp9_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) {
|
2013-02-13 19:49:55 +01:00
|
|
|
int16_t *qcoeff_ptr = d->qcoeff;
|
|
|
|
int16_t *dqcoeff_ptr = d->dqcoeff;
|
|
|
|
|
|
|
|
vpx_memset(qcoeff_ptr, 0, 64 * sizeof(int16_t));
|
|
|
|
vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(int16_t));
|
|
|
|
|
2013-02-12 02:43:27 +01:00
|
|
|
if (!b->skip_block) {
|
|
|
|
int i, rc, eob;
|
|
|
|
int zbin;
|
|
|
|
int x, y, z, sz;
|
|
|
|
int zero_run;
|
|
|
|
int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
|
|
|
|
int16_t *coeff_ptr = b->coeff;
|
|
|
|
int16_t *zbin_ptr = b->zbin;
|
|
|
|
int16_t *round_ptr = b->round;
|
|
|
|
int16_t *quant_ptr = b->quant;
|
|
|
|
uint8_t *quant_shift_ptr = b->quant_shift;
|
|
|
|
int16_t *dequant_ptr = d->dequant;
|
|
|
|
int zbin_oq_value = b->zbin_extra;
|
|
|
|
|
|
|
|
eob = -1;
|
|
|
|
|
|
|
|
// Special case for DC as it is the one triggering access in various
|
|
|
|
// tables: {zbin, quant, quant_shift, dequant}_ptr[rc != 0]
|
|
|
|
{
|
|
|
|
z = coeff_ptr[0];
|
|
|
|
zbin = (zbin_ptr[0] + zbin_boost_ptr[0] + zbin_oq_value);
|
|
|
|
zero_run = 1;
|
2012-12-19 00:31:19 +01:00
|
|
|
|
2013-02-12 02:43:27 +01:00
|
|
|
sz = (z >> 31); // sign of z
|
|
|
|
x = (z ^ sz) - sz; // x = abs(z)
|
2011-02-14 23:18:18 +01:00
|
|
|
|
2013-02-12 02:43:27 +01:00
|
|
|
if (x >= zbin) {
|
|
|
|
x += (round_ptr[0]);
|
|
|
|
y = ((int)(((int)(x * quant_ptr[0]) >> 16) + x))
|
|
|
|
>> quant_shift_ptr[0]; // quantize (x)
|
|
|
|
x = (y ^ sz) - sz; // get the sign back
|
|
|
|
qcoeff_ptr[0] = x; // write to destination
|
|
|
|
dqcoeff_ptr[0] = x * dequant_ptr[0]; // dequantized value
|
2011-02-14 23:18:18 +01:00
|
|
|
|
2013-02-12 02:43:27 +01:00
|
|
|
if (y) {
|
|
|
|
eob = 0; // last nonzero coeffs
|
|
|
|
zero_run = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (i = 1; i < 64; i++) {
|
2013-01-29 14:33:17 +01:00
|
|
|
rc = vp9_default_zig_zag1d_8x8[i];
|
|
|
|
z = coeff_ptr[rc];
|
2013-02-12 02:43:27 +01:00
|
|
|
zbin = (zbin_ptr[1] + zbin_boost_ptr[zero_run] + zbin_oq_value);
|
|
|
|
// The original code was incrementing zero_run while keeping it at
|
|
|
|
// maximum 15 by adding "(zero_run < 15)". The same is achieved by
|
|
|
|
// removing the opposite of the sign mask of "(zero_run - 15)".
|
|
|
|
zero_run -= (zero_run - 15) >> 31;
|
2013-01-29 14:33:17 +01:00
|
|
|
|
|
|
|
sz = (z >> 31); // sign of z
|
|
|
|
x = (z ^ sz) - sz; // x = abs(z)
|
|
|
|
|
|
|
|
if (x >= zbin) {
|
|
|
|
x += (round_ptr[rc != 0]);
|
2013-02-12 02:43:27 +01:00
|
|
|
y = ((int)(((int)(x * quant_ptr[1]) >> 16) + x))
|
|
|
|
>> quant_shift_ptr[1]; // quantize (x)
|
2013-01-29 14:33:17 +01:00
|
|
|
x = (y ^ sz) - sz; // get the sign back
|
|
|
|
qcoeff_ptr[rc] = x; // write to destination
|
2013-02-12 02:43:27 +01:00
|
|
|
dqcoeff_ptr[rc] = x * dequant_ptr[1]; // dequantized value
|
2013-01-29 14:33:17 +01:00
|
|
|
|
|
|
|
if (y) {
|
|
|
|
eob = i; // last nonzero coeffs
|
|
|
|
zero_run = 0;
|
|
|
|
}
|
2011-02-14 23:18:18 +01:00
|
|
|
}
|
|
|
|
}
|
2013-02-12 02:43:27 +01:00
|
|
|
d->eob = eob + 1;
|
|
|
|
} else {
|
|
|
|
d->eob = 0;
|
2011-02-14 23:18:18 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
void vp9_quantize_mby_8x8(MACROBLOCK *x) {
|
2011-02-14 23:18:18 +01:00
|
|
|
int i;
|
2012-10-13 07:42:06 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
for (i = 0; i < 16; i ++) {
|
2011-02-14 23:18:18 +01:00
|
|
|
x->e_mbd.block[i].eob = 0;
|
|
|
|
}
|
2012-11-16 00:14:38 +01:00
|
|
|
for (i = 0; i < 16; i += 4) {
|
2011-02-14 23:18:18 +01:00
|
|
|
x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]);
|
2012-11-16 00:14:38 +01:00
|
|
|
}
|
2011-02-14 23:18:18 +01:00
|
|
|
}
|
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
void vp9_quantize_mbuv_8x8(MACROBLOCK *x) {
|
2011-02-14 23:18:18 +01:00
|
|
|
int i;
|
|
|
|
|
2012-08-03 02:03:14 +02:00
|
|
|
for (i = 16; i < 24; i ++)
|
2011-02-14 23:18:18 +01:00
|
|
|
x->e_mbd.block[i].eob = 0;
|
2012-07-14 00:21:29 +02:00
|
|
|
for (i = 16; i < 24; i += 4)
|
2011-02-14 23:18:18 +01:00
|
|
|
x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]);
|
|
|
|
}
|
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
void vp9_quantize_mb_8x8(MACROBLOCK *x) {
|
|
|
|
vp9_quantize_mby_8x8(x);
|
|
|
|
vp9_quantize_mbuv_8x8(x);
|
2012-10-13 07:42:06 +02:00
|
|
|
}
|
2012-08-03 02:03:14 +02:00
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
void vp9_quantize_mby_16x16(MACROBLOCK *x) {
|
2012-08-03 02:03:14 +02:00
|
|
|
int i;
|
2012-10-13 07:42:06 +02:00
|
|
|
|
2012-08-03 02:03:14 +02:00
|
|
|
for (i = 0; i < 16; i++)
|
|
|
|
x->e_mbd.block[i].eob = 0;
|
|
|
|
x->quantize_b_16x16(&x->block[0], &x->e_mbd.block[0]);
|
|
|
|
}
|
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
void vp9_quantize_mb_16x16(MACROBLOCK *x) {
|
|
|
|
vp9_quantize_mby_16x16(x);
|
|
|
|
vp9_quantize_mbuv_8x8(x);
|
2012-08-03 02:03:14 +02:00
|
|
|
}
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
static void quantize(int16_t *zbin_boost_orig_ptr,
|
2013-01-29 14:33:17 +01:00
|
|
|
int16_t *coeff_ptr, int n_coeffs, int skip_block,
|
2012-12-19 00:31:19 +01:00
|
|
|
int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr,
|
|
|
|
uint8_t *quant_shift_ptr,
|
|
|
|
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
|
|
|
|
int16_t *dequant_ptr, int zbin_oq_value,
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
int *eob_ptr, const int *scan, int mul) {
|
2012-08-03 02:03:14 +02:00
|
|
|
int i, rc, eob;
|
|
|
|
int zbin;
|
|
|
|
int x, y, z, sz;
|
2013-01-24 19:28:33 +01:00
|
|
|
int zero_run = 0;
|
2012-12-19 00:31:19 +01:00
|
|
|
int16_t *zbin_boost_ptr = zbin_boost_orig_ptr;
|
2012-08-03 02:03:14 +02:00
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
|
|
|
|
vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
|
2012-08-03 02:03:14 +02:00
|
|
|
|
|
|
|
eob = -1;
|
2013-01-29 14:33:17 +01:00
|
|
|
|
|
|
|
if (!skip_block) {
|
|
|
|
for (i = 0; i < n_coeffs; i++) {
|
|
|
|
rc = scan[i];
|
|
|
|
z = coeff_ptr[rc] * mul;
|
|
|
|
|
|
|
|
zbin = (zbin_ptr[rc != 0] + zbin_boost_ptr[zero_run] + zbin_oq_value);
|
|
|
|
zero_run += (zero_run < 15);
|
|
|
|
|
|
|
|
sz = (z >> 31); // sign of z
|
|
|
|
x = (z ^ sz) - sz; // x = abs(z)
|
|
|
|
|
|
|
|
if (x >= zbin) {
|
|
|
|
x += (round_ptr[rc != 0]);
|
|
|
|
y = ((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x))
|
|
|
|
>> quant_shift_ptr[rc != 0]; // quantize (x)
|
|
|
|
x = (y ^ sz) - sz; // get the sign back
|
|
|
|
qcoeff_ptr[rc] = x; // write to destination
|
|
|
|
dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0] / mul; // dequantized value
|
|
|
|
|
|
|
|
if (y) {
|
|
|
|
eob = i; // last nonzero coeffs
|
|
|
|
zero_run = 0;
|
|
|
|
}
|
2012-08-03 02:03:14 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
*eob_ptr = eob + 1;
|
2012-08-03 02:03:14 +02:00
|
|
|
}
|
|
|
|
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
void vp9_regular_quantize_b_16x16(BLOCK *b, BLOCKD *d) {
|
2013-01-24 19:28:33 +01:00
|
|
|
quantize(b->zrun_zbin_boost,
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
b->coeff,
|
2013-01-29 14:33:17 +01:00
|
|
|
256, b->skip_block,
|
2013-01-24 19:28:33 +01:00
|
|
|
b->zbin, b->round, b->quant, b->quant_shift,
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
d->qcoeff,
|
|
|
|
d->dqcoeff,
|
|
|
|
d->dequant,
|
|
|
|
b->zbin_extra,
|
|
|
|
&d->eob, vp9_default_zig_zag1d_16x16, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
void vp9_quantize_sby_32x32(MACROBLOCK *x) {
|
2013-01-29 14:33:17 +01:00
|
|
|
MACROBLOCKD *xd = &x->e_mbd;
|
|
|
|
BLOCK *b = &x->block[0];
|
|
|
|
BLOCKD *d = &xd->block[0];
|
|
|
|
|
|
|
|
d->eob = 0;
|
|
|
|
quantize(b->zrun_zbin_boost,
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
x->sb_coeff_data.coeff,
|
2013-01-29 14:33:17 +01:00
|
|
|
1024, b->skip_block,
|
|
|
|
b->zbin,
|
|
|
|
b->round, b->quant, b->quant_shift,
|
|
|
|
xd->sb_coeff_data.qcoeff,
|
|
|
|
xd->sb_coeff_data.dqcoeff,
|
|
|
|
d->dequant,
|
|
|
|
b->zbin_extra,
|
|
|
|
&d->eob,
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
vp9_default_zig_zag1d_32x32, 2);
|
|
|
|
}
|
|
|
|
|
|
|
|
void vp9_quantize_sbuv_16x16(MACROBLOCK *x) {
|
|
|
|
int i;
|
2013-01-29 14:33:17 +01:00
|
|
|
MACROBLOCKD *xd = &x->e_mbd;
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
|
2013-01-29 14:33:17 +01:00
|
|
|
xd->block[16].eob = 0;
|
|
|
|
xd->block[20].eob = 0;
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
for (i = 16; i < 24; i += 4)
|
2013-01-24 19:28:33 +01:00
|
|
|
quantize(x->block[i].zrun_zbin_boost,
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
x->sb_coeff_data.coeff + 1024 + (i - 16) * 64,
|
2013-01-29 14:33:17 +01:00
|
|
|
256, x->block[i].skip_block,
|
2013-01-24 19:28:33 +01:00
|
|
|
x->block[i].zbin,
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
x->block[i].round, x->block[0].quant, x->block[i].quant_shift,
|
2013-01-29 14:33:17 +01:00
|
|
|
xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64,
|
|
|
|
xd->sb_coeff_data.dqcoeff + 1024 + (i - 16) * 64,
|
|
|
|
xd->block[i].dequant,
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
x->block[i].zbin_extra,
|
2013-01-29 14:33:17 +01:00
|
|
|
&xd->block[i].eob,
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
vp9_default_zig_zag1d_16x16, 1);
|
|
|
|
}
|
|
|
|
|
2011-05-09 09:09:41 +02:00
|
|
|
/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
|
|
|
|
* these two C functions if corresponding optimized routine is not available.
|
|
|
|
* NEON optimized version implements currently the fast quantization for pair
|
|
|
|
* of blocks. */
|
2012-10-30 22:25:33 +01:00
|
|
|
void vp9_regular_quantize_b_4x4_pair(BLOCK *b1, BLOCK *b2,
|
|
|
|
BLOCKD *d1, BLOCKD *d2) {
|
2012-10-30 20:58:42 +01:00
|
|
|
vp9_regular_quantize_b_4x4(b1, d1);
|
|
|
|
vp9_regular_quantize_b_4x4(b2, d2);
|
2011-05-09 09:09:41 +02:00
|
|
|
}
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
static void invert_quant(int16_t *quant,
|
|
|
|
uint8_t *shift, int d) {
|
2012-07-14 00:21:29 +02:00
|
|
|
unsigned t;
|
|
|
|
int l;
|
|
|
|
t = d;
|
|
|
|
for (l = 0; t > 1; l++)
|
|
|
|
t >>= 1;
|
|
|
|
t = 1 + (1 << (16 + l)) / d;
|
2012-12-19 00:31:19 +01:00
|
|
|
*quant = (int16_t)(t - (1 << 16));
|
2012-07-14 00:21:29 +02:00
|
|
|
*shift = l;
|
2011-05-19 17:04:03 +02:00
|
|
|
}
|
|
|
|
|
2012-10-31 01:53:32 +01:00
|
|
|
void vp9_init_quantizer(VP9_COMP *cpi) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int i;
|
|
|
|
int quant_val;
|
|
|
|
int Q;
|
2011-12-02 15:57:21 +01:00
|
|
|
|
2013-01-24 19:28:33 +01:00
|
|
|
static const int zbin_boost[16] = { 0, 0, 0, 8, 8, 8, 10, 12,
|
|
|
|
14, 16, 20, 24, 28, 32, 36, 40 };
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
for (Q = 0; Q < QINDEX_RANGE; Q++) {
|
2012-10-31 00:25:53 +01:00
|
|
|
int qzbin_factor = (vp9_dc_quant(Q, 0) < 148) ? 84 : 80;
|
2012-02-09 17:44:46 +01:00
|
|
|
|
2013-02-12 06:14:46 +01:00
|
|
|
int qrounding_factor = 48;
|
Add lossless compression mode.
This commit adds lossless compression capability to the experimental
branch. The lossless experiment can be enabled using --enable-lossless
in configure. When the experiment is enabled, the encoder will use
lossless compression mode by command line option --lossless, and the
decoder automatically recognizes a losslessly encoded clip and decodes
accordingly.
To achieve the lossless coding, this commit has changed the following:
1. To encode at lossless mode, encoder forces the use of unit
quantizer, i.e, Q 0, where effective quantization is 1. Encoder also
disables the usage of 8x8 transform and allows only 4x4 transform;
2. At Q 0, the first order 4x4 DCT/IDCT have been switched over
to a pair of forward and inverse Walsh-Hadamard Transform
(http://goo.gl/EIsfy), with proper scaling applied to match the range
of the original 4x4 DCT/IDCT pair;
3. At Q 0, the second order remains to use the previous
walsh-hadamard transform pair. However, to maintain the reversibility
in second order transform at Q 0, scaling down is applied to first
order DC coefficients prior to forward transform, and scaling up is
applied to the second order output prior to quantization. Symmetric
upscaling and downscaling are added around inverse second order
transform;
4. At lossless mode, encoder also disables a number of minor
features to ensure no loss is introduced, these features includes:
a. Trellis quantization optimization
b. Loop filtering
c. Aggressive zero-binning, rounding and zero-bin boosting
d. Mode based zero-bin boosting
Lossless coding test was performed on all clips within the derf set,
to verify that the commit has achieved lossless compression for all
clips. The average compression ratio is around 2.57 to 1.
(http://goo.gl/dEShs)
Change-Id: Ia3aba7dd09df40dd590f93b9aba134defbc64e34
2012-06-14 04:03:31 +02:00
|
|
|
#if CONFIG_LOSSLESS
|
2013-02-12 06:14:46 +01:00
|
|
|
if (cpi->oxcf.lossless && Q == 0) {
|
|
|
|
qzbin_factor = 64;
|
|
|
|
qrounding_factor = 64;
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
Add lossless compression mode.
This commit adds lossless compression capability to the experimental
branch. The lossless experiment can be enabled using --enable-lossless
in configure. When the experiment is enabled, the encoder will use
lossless compression mode by command line option --lossless, and the
decoder automatically recognizes a losslessly encoded clip and decodes
accordingly.
To achieve the lossless coding, this commit has changed the following:
1. To encode at lossless mode, encoder forces the use of unit
quantizer, i.e, Q 0, where effective quantization is 1. Encoder also
disables the usage of 8x8 transform and allows only 4x4 transform;
2. At Q 0, the first order 4x4 DCT/IDCT have been switched over
to a pair of forward and inverse Walsh-Hadamard Transform
(http://goo.gl/EIsfy), with proper scaling applied to match the range
of the original 4x4 DCT/IDCT pair;
3. At Q 0, the second order remains to use the previous
walsh-hadamard transform pair. However, to maintain the reversibility
in second order transform at Q 0, scaling down is applied to first
order DC coefficients prior to forward transform, and scaling up is
applied to the second order output prior to quantization. Symmetric
upscaling and downscaling are added around inverse second order
transform;
4. At lossless mode, encoder also disables a number of minor
features to ensure no loss is introduced, these features includes:
a. Trellis quantization optimization
b. Loop filtering
c. Aggressive zero-binning, rounding and zero-bin boosting
d. Mode based zero-bin boosting
Lossless coding test was performed on all clips within the derf set,
to verify that the commit has achieved lossless compression for all
clips. The average compression ratio is around 2.57 to 1.
(http://goo.gl/dEShs)
Change-Id: Ia3aba7dd09df40dd590f93b9aba134defbc64e34
2012-06-14 04:03:31 +02:00
|
|
|
#endif
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// dc values
|
2012-10-31 00:25:53 +01:00
|
|
|
quant_val = vp9_dc_quant(Q, cpi->common.y1dc_delta_q);
|
2012-07-14 00:21:29 +02:00
|
|
|
invert_quant(cpi->Y1quant[Q] + 0,
|
|
|
|
cpi->Y1quant_shift[Q] + 0, quant_val);
|
|
|
|
cpi->Y1zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
|
|
|
|
cpi->Y1round[Q][0] = (qrounding_factor * quant_val) >> 7;
|
|
|
|
cpi->common.Y1dequant[Q][0] = quant_val;
|
|
|
|
cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
|
|
|
|
|
2012-10-31 00:25:53 +01:00
|
|
|
quant_val = vp9_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
|
2012-07-14 00:21:29 +02:00
|
|
|
invert_quant(cpi->UVquant[Q] + 0,
|
|
|
|
cpi->UVquant_shift[Q] + 0, quant_val);
|
2012-08-03 02:03:14 +02:00
|
|
|
cpi->UVzbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
|
2012-07-14 00:21:29 +02:00
|
|
|
cpi->UVround[Q][0] = (qrounding_factor * quant_val) >> 7;
|
|
|
|
cpi->common.UVdequant[Q][0] = quant_val;
|
|
|
|
cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
|
|
|
|
|
|
|
|
// all the 4x4 ac values =;
|
|
|
|
for (i = 1; i < 16; i++) {
|
2012-12-18 19:49:10 +01:00
|
|
|
int rc = vp9_default_zig_zag1d_4x4[i];
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2012-10-31 00:25:53 +01:00
|
|
|
quant_val = vp9_ac_yquant(Q);
|
2012-07-14 00:21:29 +02:00
|
|
|
invert_quant(cpi->Y1quant[Q] + rc,
|
|
|
|
cpi->Y1quant_shift[Q] + rc, quant_val);
|
|
|
|
cpi->Y1zbin[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
|
|
|
|
cpi->Y1round[Q][rc] = (qrounding_factor * quant_val) >> 7;
|
|
|
|
cpi->common.Y1dequant[Q][rc] = quant_val;
|
|
|
|
cpi->zrun_zbin_boost_y1[Q][i] =
|
|
|
|
((quant_val * zbin_boost[i]) + 64) >> 7;
|
|
|
|
|
2012-10-31 00:25:53 +01:00
|
|
|
quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q);
|
2012-07-14 00:21:29 +02:00
|
|
|
invert_quant(cpi->UVquant[Q] + rc,
|
|
|
|
cpi->UVquant_shift[Q] + rc, quant_val);
|
|
|
|
cpi->UVzbin[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
|
|
|
|
cpi->UVround[Q][rc] = (qrounding_factor * quant_val) >> 7;
|
|
|
|
cpi->common.UVdequant[Q][rc] = quant_val;
|
|
|
|
cpi->zrun_zbin_boost_uv[Q][i] =
|
|
|
|
((quant_val * zbin_boost[i]) + 64) >> 7;
|
2011-05-19 17:04:03 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
}
|
2011-05-19 17:04:03 +02:00
|
|
|
|
2012-10-31 01:53:32 +01:00
|
|
|
void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int i;
|
|
|
|
int QIndex;
|
|
|
|
MACROBLOCKD *xd = &x->e_mbd;
|
|
|
|
int zbin_extra;
|
|
|
|
int segment_id = xd->mode_info_context->mbmi.segment_id;
|
|
|
|
|
|
|
|
// Select the baseline MB Q index allowing for any segment level change.
|
2012-10-30 06:15:27 +01:00
|
|
|
if (vp9_segfeature_active(xd, segment_id, SEG_LVL_ALT_Q)) {
|
2012-07-14 00:21:29 +02:00
|
|
|
// Abs Value
|
|
|
|
if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA)
|
2012-10-30 06:15:27 +01:00
|
|
|
QIndex = vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
// Delta Value
|
|
|
|
else {
|
|
|
|
QIndex = cpi->common.base_qindex +
|
2012-10-30 06:15:27 +01:00
|
|
|
vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
// Clamp to valid range
|
|
|
|
QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;
|
2011-05-19 17:04:03 +02:00
|
|
|
}
|
2012-07-14 00:21:29 +02:00
|
|
|
} else
|
|
|
|
QIndex = cpi->common.base_qindex;
|
|
|
|
|
|
|
|
// Y
|
|
|
|
zbin_extra = (cpi->common.Y1dequant[QIndex][1] *
|
|
|
|
(cpi->zbin_over_quant +
|
|
|
|
cpi->zbin_mode_boost +
|
|
|
|
x->act_zbin_adj)) >> 7;
|
|
|
|
|
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
x->block[i].quant = cpi->Y1quant[QIndex];
|
|
|
|
x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
|
|
|
|
x->block[i].zbin = cpi->Y1zbin[QIndex];
|
|
|
|
x->block[i].round = cpi->Y1round[QIndex];
|
|
|
|
x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
|
|
|
|
x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
|
2012-12-19 00:31:19 +01:00
|
|
|
x->block[i].zbin_extra = (int16_t)zbin_extra;
|
2011-05-19 17:04:03 +02:00
|
|
|
|
2013-01-28 16:22:53 +01:00
|
|
|
// Segment skip feature.
|
2013-01-29 14:33:17 +01:00
|
|
|
x->block[i].skip_block =
|
|
|
|
vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// UV
|
|
|
|
zbin_extra = (cpi->common.UVdequant[QIndex][1] *
|
|
|
|
(cpi->zbin_over_quant +
|
|
|
|
cpi->zbin_mode_boost +
|
|
|
|
x->act_zbin_adj)) >> 7;
|
|
|
|
|
|
|
|
for (i = 16; i < 24; i++) {
|
|
|
|
x->block[i].quant = cpi->UVquant[QIndex];
|
|
|
|
x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
|
|
|
|
x->block[i].zbin = cpi->UVzbin[QIndex];
|
|
|
|
x->block[i].round = cpi->UVround[QIndex];
|
|
|
|
x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
|
|
|
|
x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
|
2012-12-19 00:31:19 +01:00
|
|
|
x->block[i].zbin_extra = (int16_t)zbin_extra;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2013-01-28 16:22:53 +01:00
|
|
|
// Segment skip feature.
|
2013-01-29 14:33:17 +01:00
|
|
|
x->block[i].skip_block =
|
|
|
|
vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
/* save this macroblock QIndex for vp9_update_zbin_extra() */
|
2012-10-16 01:41:41 +02:00
|
|
|
x->e_mbd.q_index = QIndex;
|
2011-05-19 17:04:03 +02:00
|
|
|
}
|
|
|
|
|
2012-10-31 01:53:32 +01:00
|
|
|
void vp9_update_zbin_extra(VP9_COMP *cpi, MACROBLOCK *x) {
|
2012-07-14 00:21:29 +02:00
|
|
|
int i;
|
2012-10-16 01:41:41 +02:00
|
|
|
int QIndex = x->e_mbd.q_index;
|
2012-07-14 00:21:29 +02:00
|
|
|
int zbin_extra;
|
|
|
|
|
|
|
|
// Y
|
|
|
|
zbin_extra = (cpi->common.Y1dequant[QIndex][1] *
|
|
|
|
(cpi->zbin_over_quant +
|
|
|
|
cpi->zbin_mode_boost +
|
|
|
|
x->act_zbin_adj)) >> 7;
|
|
|
|
for (i = 0; i < 16; i++) {
|
2012-12-19 00:31:19 +01:00
|
|
|
x->block[i].zbin_extra = (int16_t)zbin_extra;
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2011-05-19 17:04:03 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// UV
|
|
|
|
zbin_extra = (cpi->common.UVdequant[QIndex][1] *
|
|
|
|
(cpi->zbin_over_quant +
|
|
|
|
cpi->zbin_mode_boost +
|
|
|
|
x->act_zbin_adj)) >> 7;
|
2011-05-19 17:04:03 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
for (i = 16; i < 24; i++) {
|
2012-12-19 00:31:19 +01:00
|
|
|
x->block[i].zbin_extra = (int16_t)zbin_extra;
|
2012-07-14 00:21:29 +02:00
|
|
|
}
|
2011-05-19 17:04:03 +02:00
|
|
|
}
|
|
|
|
|
2012-10-31 01:53:32 +01:00
|
|
|
void vp9_frame_init_quantizer(VP9_COMP *cpi) {
|
2012-07-14 00:21:29 +02:00
|
|
|
// Clear Zbin mode boost for default case
|
|
|
|
cpi->zbin_mode_boost = 0;
|
2011-05-19 17:04:03 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// MB level quantizer setup
|
2012-10-30 22:25:33 +01:00
|
|
|
vp9_mb_init_quantizer(cpi, &cpi->mb);
|
2011-05-19 17:04:03 +02:00
|
|
|
}
|
|
|
|
|
2012-10-31 01:53:32 +01:00
|
|
|
void vp9_set_quantizer(struct VP9_COMP *cpi, int Q) {
|
|
|
|
VP9_COMMON *cm = &cpi->common;
|
2011-05-19 17:04:03 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
cm->base_qindex = Q;
|
2012-01-20 01:56:46 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// if any of the delta_q values are changing update flag will
|
|
|
|
// have to be set.
|
|
|
|
cm->y1dc_delta_q = 0;
|
|
|
|
cm->uvdc_delta_q = 0;
|
|
|
|
cm->uvac_delta_q = 0;
|
2011-05-19 17:04:03 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
// quantizer has to be reinitialized if any delta_q changes.
|
|
|
|
// As there are not any here for now this is inactive code.
|
|
|
|
// if(update)
|
2012-10-30 22:25:33 +01:00
|
|
|
// vp9_init_quantizer(cpi);
|
2011-05-19 17:04:03 +02:00
|
|
|
}
|