Move dequant from BLOCKD to per-plane MACROBLOCKD

This data can vary per-plane, but not per-block.

Change-Id: I1971b0b2c2e697d2118e38b54ef446e52f63c65a
This commit is contained in:
John Koleszar 2013-04-24 14:48:17 -07:00
parent 4bd0f4f646
commit 15255eef82
10 changed files with 34 additions and 81 deletions

View File

@ -280,7 +280,6 @@ typedef struct {
typedef struct blockd {
int16_t *diff;
int16_t *dequant;
/* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
uint8_t **base_pre;
@ -335,6 +334,7 @@ struct macroblockd_plane {
int subsampling_y;
struct buf_2d dst;
struct buf_2d pre[2];
int16_t *dequant;
};
#define BLOCK_OFFSET(x, i, n) ((x) + (i) * (n))

View File

@ -206,11 +206,9 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *mb) {
const int qindex = get_qindex(mb, segment_id, pc->base_qindex);
mb->q_index = qindex;
for (i = 0; i < 16; i++)
mb->block[i].dequant = pc->y_dequant[qindex];
for (i = 16; i < 24; i++)
mb->block[i].dequant = pc->uv_dequant[qindex];
mb->plane[0].dequant = pc->y_dequant[qindex];
for (i = 1; i < MAX_MB_PLANE; i++)
mb->plane[i].dequant = pc->uv_dequant[qindex];
if (mb->lossless) {
assert(qindex == 0);
@ -354,7 +352,8 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_reader *r) {
xd->mode_info_context->bmi[i].as_mode.context = b->bmi.as_mode.context =
vp9_find_bpred_context(xd, b);
if (!xd->mode_info_context->mbmi.mb_skip_coeff)
vp9_decode_coefs_4x4(pbi, xd, r, PLANE_TYPE_Y_WITH_DC, i, b->dequant);
vp9_decode_coefs_4x4(pbi, xd, r, PLANE_TYPE_Y_WITH_DC, i,
xd->plane[0].dequant);
#endif
vp9_intra4x4_predict(xd, b, b_mode, *(b->base_dst) + b->dst,
b->dst_stride);
@ -363,7 +362,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_reader *r) {
}
#if CONFIG_NEWBINTRAMODES
if (!xd->mode_info_context->mbmi.mb_skip_coeff)
vp9_decode_mb_tokens_4x4_uv(pbi, xd, r, xd->block[16].dequant);
vp9_decode_mb_tokens_4x4_uv(pbi, xd, r, xd->plane[1].dequant);
#endif
vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16);
xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->plane[1].dst.buf,
@ -596,7 +595,7 @@ static void decode_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col,
mb_init_dequantizer(pbi, xd);
// dequantization and idct
eobtotal = vp9_decode_tokens(pbi, xd, r, bsize, xd->block[0].dequant);
eobtotal = vp9_decode_tokens(pbi, xd, r, bsize, xd->plane[0].dequant);
if (eobtotal == 0) { // skip loopfilter
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> bwl;
@ -671,7 +670,7 @@ static void decode_mb(VP9D_COMP *pbi, MACROBLOCKD *xd,
if (mode != I4X4_PRED)
#endif
eobtotal = vp9_decode_tokens(pbi, xd, r, BLOCK_SIZE_MB16X16,
xd->block[0].dequant);
xd->plane[0].dequant);
}
}

View File

@ -1,29 +0,0 @@
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_ports/asm_offsets.h"
#include "vpx_config.h"
#include "vp9/encoder/vp9_block.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_tokenize.h"
BEGIN
/* regular quantize */
DEFINE(vp9_blockd_dequant, offsetof(BLOCKD, dequant));
END
/* add asserts for any offset that is not supported by assembly code
* add asserts for any size that is not supported by assembly code
*/

View File

@ -509,7 +509,7 @@ void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> bwl;
optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant,
ta + x_idx, tl + y_idx, TX_32X32, 64 * bw * bh);
}
}
@ -532,7 +532,7 @@ void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> bwl;
optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant,
ta + x_idx, tl + y_idx, TX_16X16, 16 * bw * bh);
}
}
@ -560,7 +560,7 @@ void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> bwl;
optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant,
ta + x_idx, tl + y_idx, TX_8X8, 4 * bw * bh);
}
}
@ -585,7 +585,7 @@ void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> bwl;
optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant,
ta + x_idx, tl + y_idx, TX_4X4, bh * bw);
}
}
@ -599,7 +599,7 @@ void vp9_optimize_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
assert(bsize == BLOCK_SIZE_SB64X64);
for (b = 256; b < 384; b += 64) {
const int cidx = b >= 320 ? 20 : 16;
const int plane = 1 + (b >= 320);
a = ta + vp9_block2above_sb64[TX_32X32][b];
l = tl + vp9_block2left_sb64[TX_32X32][b];
a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
@ -610,7 +610,7 @@ void vp9_optimize_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
l3 = l + 3 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
a_ec = (a[0] + a[1] + a1[0] + a1[1] + a2[0] + a2[1] + a3[0] + a3[1]) != 0;
l_ec = (l[0] + l[1] + l1[0] + l1[1] + l2[0] + l2[1] + l3[0] + l3[1]) != 0;
optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.plane[plane].dequant,
&a_ec, &l_ec, TX_32X32, 256);
}
}
@ -638,11 +638,10 @@ void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
}
for (plane = 0; plane < 2; plane++) {
const int cidx = 16 + plane * 4;
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
optimize_b(cm, x, uvoff + n * 16, PLANE_TYPE_UV,
x->e_mbd.block[cidx].dequant,
x->e_mbd.plane[plane + 1].dequant,
&ta[plane][x_idx], &tl[plane][y_idx],
TX_16X16, bh * bw * 64);
}
@ -671,11 +670,10 @@ void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
}
for (plane = 0; plane < 2; plane++) {
const int cidx = 16 + plane * 4;
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
optimize_b(cm, x, uvoff + n * 4, PLANE_TYPE_UV,
x->e_mbd.block[cidx].dequant,
x->e_mbd.plane[plane + 1].dequant,
&ta[plane][x_idx], &tl[plane][y_idx],
TX_8X8, bh * bw * 16);
}
@ -708,11 +706,10 @@ void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
}
for (plane = 0; plane < 2; plane++) {
const int cidx = 16 + plane * 4;
for (n = 0; n < bw * bh; n++) {
const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
optimize_b(cm, x, uvoff + n, PLANE_TYPE_UV,
x->e_mbd.block[cidx].dequant,
x->e_mbd.plane[plane + 1].dequant,
&ta[plane][x_idx], &tl[plane][y_idx],
TX_4X4, bh * bw * 4);
}

View File

@ -28,7 +28,6 @@ static INLINE int plane_idx(int plane) {
void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
MACROBLOCKD *const xd = &mb->e_mbd;
BLOCKD *const d = &xd->block[0];
int i, rc, eob;
int zbin;
int x, y, z, sz;
@ -41,7 +40,7 @@ void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
int16_t *round_ptr = mb->plane[0].round;
int16_t *quant_ptr = mb->plane[0].quant;
uint8_t *quant_shift_ptr = mb->plane[0].quant_shift;
int16_t *dequant_ptr = d->dequant;
int16_t *dequant_ptr = xd->plane[0].dequant;
int zbin_oq_value = mb->plane[0].zbin_extra;
const int *pt_scan = get_scan_4x4(tx_type);
@ -84,7 +83,6 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, int y_blocks) {
MACROBLOCKD *const xd = &mb->e_mbd;
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
const int c_idx = plane_idx(pb_idx.plane);
BLOCKD *const d = &xd->block[c_idx];
int i, rc, eob;
int zbin;
int x, y, z, sz;
@ -99,7 +97,7 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, int y_blocks) {
int16_t *round_ptr = mb->plane[pb_idx.plane].round;
int16_t *quant_ptr = mb->plane[pb_idx.plane].quant;
uint8_t *quant_shift_ptr = mb->plane[pb_idx.plane].quant_shift;
int16_t *dequant_ptr = d->dequant;
int16_t *dequant_ptr = xd->plane[0].dequant;
int zbin_oq_value = mb->plane[pb_idx.plane].zbin_extra;
if (c_idx == 0) assert(pb_idx.plane == 0);
@ -152,7 +150,6 @@ void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
pb_idx.block, 16);
int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff,
pb_idx.block, 16);
BLOCKD *const d = &xd->block[c_idx];
const int *pt_scan = get_scan_8x8(tx_type);
if (c_idx == 0) assert(pb_idx.plane == 0);
@ -171,7 +168,7 @@ void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
int16_t *round_ptr = mb->plane[pb_idx.plane].round;
int16_t *quant_ptr = mb->plane[pb_idx.plane].quant;
uint8_t *quant_shift_ptr = mb->plane[pb_idx.plane].quant_shift;
int16_t *dequant_ptr = d->dequant;
int16_t *dequant_ptr = xd->plane[pb_idx.plane].dequant;
int zbin_oq_value = mb->plane[pb_idx.plane].zbin_extra;
eob = -1;
@ -286,7 +283,6 @@ void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
MACROBLOCKD *const xd = &mb->e_mbd;
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
const int c_idx = plane_idx(pb_idx.plane);
BLOCKD *const d = &xd->block[c_idx];
const int *pt_scan = get_scan_16x16(tx_type);
if (c_idx == 0) assert(pb_idx.plane == 0);
@ -301,7 +297,7 @@ void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
mb->plane[pb_idx.plane].quant_shift,
BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
d->dequant,
xd->plane[pb_idx.plane].dequant,
mb->plane[pb_idx.plane].zbin_extra,
&xd->plane[pb_idx.plane].eobs[pb_idx.block],
pt_scan, 1);
@ -311,7 +307,6 @@ void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx, int y_blocks) {
MACROBLOCKD *const xd = &mb->e_mbd;
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
const int c_idx = plane_idx(pb_idx.plane);
BLOCKD *const d = &xd->block[c_idx];
if (c_idx == 0) assert(pb_idx.plane == 0);
if (c_idx == 16) assert(pb_idx.plane == 1);
@ -325,7 +320,7 @@ void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx, int y_blocks) {
mb->plane[pb_idx.plane].quant_shift,
BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
d->dequant,
xd->plane[pb_idx.plane].dequant,
mb->plane[pb_idx.plane].zbin_extra,
&xd->plane[pb_idx.plane].eobs[pb_idx.block],
vp9_default_zig_zag1d_32x32, 2);
@ -528,8 +523,7 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
x->plane[0].round = cpi->Y1round[qindex];
x->plane[0].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[qindex];
x->plane[0].zbin_extra = (int16_t)zbin_extra;
for (i = 0; i < 16; i++)
x->e_mbd.block[i].dequant = cpi->common.y_dequant[qindex];
x->e_mbd.plane[0].dequant = cpi->common.y_dequant[qindex];
// UV
zbin_extra = (cpi->common.uv_dequant[qindex][1] *
@ -542,9 +536,8 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
x->plane[i].round = cpi->UVround[qindex];
x->plane[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[qindex];
x->plane[i].zbin_extra = (int16_t)zbin_extra;
x->e_mbd.plane[i].dequant = cpi->common.uv_dequant[qindex];
}
for (i = 16; i < 24; i++)
x->e_mbd.block[i].dequant = cpi->common.uv_dequant[qindex];
x->skip_block = vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);

View File

@ -3050,7 +3050,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
model_rd_from_var_lapndz(var, 16 * bw * 16 * bh,
xd->block[0].dequant[1] >> 3,
xd->plane[0].dequant[1] >> 3,
&tmp_rate_y, &tmp_dist_y);
var = cpi->fn_ptr[uv_block_size].vf(x->plane[1].src.buf,
x->plane[1].src.stride,
@ -3058,7 +3058,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
xd->plane[1].dst.stride,
&sse);
model_rd_from_var_lapndz(var, 8 * bw * 8 * bh,
xd->block[16].dequant[1] >> 3,
xd->plane[1].dequant[1] >> 3,
&tmp_rate_u, &tmp_dist_u);
var = cpi->fn_ptr[uv_block_size].vf(x->plane[2].src.buf,
x->plane[1].src.stride,
@ -3066,7 +3066,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
xd->plane[1].dst.stride,
&sse);
model_rd_from_var_lapndz(var, 8 * bw * 8 * bh,
xd->block[20].dequant[1] >> 3,
xd->plane[2].dequant[1] >> 3,
&tmp_rate_v, &tmp_dist_v);
rd = RDCOST(x->rdmult, x->rddiv,
rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
@ -3138,17 +3138,17 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
model_rd_from_var_lapndz(var, 16 * 16, xd->block[0].dequant[1] >> 3,
model_rd_from_var_lapndz(var, 16 * 16, xd->plane[0].dequant[1] >> 3,
&tmp_rate_y, &tmp_dist_y);
var = vp9_variance8x8(x->plane[1].src.buf, x->plane[1].src.stride,
xd->plane[1].dst.buf, xd->plane[1].dst.stride,
&sse);
model_rd_from_var_lapndz(var, 8 * 8, xd->block[16].dequant[1] >> 3,
model_rd_from_var_lapndz(var, 8 * 8, xd->plane[1].dequant[1] >> 3,
&tmp_rate_u, &tmp_dist_u);
var = vp9_variance8x8(x->plane[2].src.buf, x->plane[1].src.stride,
xd->plane[2].dst.buf, xd->plane[1].dst.stride,
&sse);
model_rd_from_var_lapndz(var, 8 * 8, xd->block[20].dequant[1] >> 3,
model_rd_from_var_lapndz(var, 8 * 8, xd->plane[2].dequant[1] >> 3,
&tmp_rate_v, &tmp_dist_v);
rd = RDCOST(x->rdmult, x->rddiv,
rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
@ -3225,8 +3225,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
x->skip = 1;
else if (x->encode_breakout) {
unsigned int var, sse;
int threshold = (xd->block[0].dequant[1]
* xd->block[0].dequant[1] >> 4);
int threshold = (xd->plane[0].dequant[1]
* xd->plane[0].dequant[1] >> 4);
if (threshold < x->encode_breakout)
threshold = x->encode_breakout;
@ -3244,7 +3244,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
if ((int)sse < threshold) {
unsigned int q2dc = xd->block[0].dequant[0];
unsigned int q2dc = xd->plane[0].dequant[0];
/* If there is no codeable 2nd order dc
or a very small uniform pixel change change */
if ((sse - var < q2dc * q2dc >> 4) ||

View File

@ -9,7 +9,6 @@
%include "vpx_ports/x86_abi_support.asm"
%include "vp9_asm_enc_offsets.asm"
; void vp9_regular_quantize_b_sse2 | arg

View File

@ -9,7 +9,6 @@
%include "vpx_ports/x86_abi_support.asm"
%include "vp9_asm_enc_offsets.asm"
; void vp9_regular_quantize_b_sse4 | arg

View File

@ -9,7 +9,6 @@
%include "vpx_ports/x86_abi_support.asm"
%include "vp9_asm_enc_offsets.asm"
; void vp9_fast_quantize_b_ssse3 | arg

View File

@ -26,7 +26,6 @@ VP9_CX_SRCS-yes += vp9_cx_iface.c
#INCLUDES += common
#INCLUDES += encoder
VP9_CX_SRCS-yes += encoder/vp9_asm_enc_offsets.c
VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
VP9_CX_SRCS-yes += encoder/vp9_boolhuff.c
VP9_CX_SRCS-yes += encoder/vp9_dct.c
@ -117,6 +116,3 @@ endif
VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes))
$(eval $(call asm_offsets_template,\
vp9_asm_enc_offsets.asm, $(VP9_PREFIX)encoder/vp9_asm_enc_offsets.c))