Reduced the size of Y1Dequant and friends to [128][2]

This patch removes the local copies of the dequantize
constants and implements John's idea as described
in "Make a local copy of the dequantized data" commit.

Change-Id: Ic6b7d681f00bf63263f71ff1e39ab2f80729e8b2
This commit is contained in:
Scott LaVarnway 2012-01-04 11:56:50 -05:00 committed by John Koleszar
parent 5bfa29b6c5
commit 5f25d4c175
12 changed files with 102 additions and 100 deletions

View File

@ -23,22 +23,20 @@ extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
#if HAVE_ARMV7
void vp8_dequantize_b_neon(BLOCKD *d)
void vp8_dequantize_b_neon(BLOCKD *d, short *DQC)
{
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
short *DQC = d->dequant;
vp8_dequantize_b_loop_neon(Q, DQC, DQ);
}
#endif
#if HAVE_ARMV6
void vp8_dequantize_b_v6(BLOCKD *d)
void vp8_dequantize_b_v6(BLOCKD *d, short *DQC)
{
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
short *DQC = d->dequant;
vp8_dequantize_b_loop_v6(Q, DQC, DQ);
}

View File

@ -209,6 +209,11 @@ typedef struct MacroBlockD
DECLARE_ALIGNED(16, short, dqcoeff[400]);
DECLARE_ALIGNED(16, char, eobs[25]);
DECLARE_ALIGNED(16, short, dequant_y1[16]);
DECLARE_ALIGNED(16, short, dequant_y1_dc[16]);
DECLARE_ALIGNED(16, short, dequant_y2[16]);
DECLARE_ALIGNED(16, short, dequant_uv[16]);
/* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
BLOCKD block[25];
int fullpixel_mask;

View File

@ -14,12 +14,11 @@
#include "vp8/common/idct.h"
#include "vpx_mem/vpx_mem.h"
void vp8_dequantize_b_c(BLOCKD *d)
void vp8_dequantize_b_c(BLOCKD *d, short *DQC)
{
int i;
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
short *DQC = d->dequant;
for (i = 0; i < 16; i++)
{

View File

@ -14,7 +14,7 @@
#include "vp8/common/blockd.h"
#define prototype_dequant_block(sym) \
void sym(BLOCKD *x)
void sym(BLOCKD *x, short *DQC)
#define prototype_dequant_idct_add(sym) \
void sym(short *input, short *dq, \

View File

@ -36,13 +36,7 @@ static void eob_adjust(char *eobs, short *diff)
static void vp8_inverse_transform_mby(MACROBLOCKD *xd,
const VP8_COMMON_RTCD *rtcd)
{
short *DQC = xd->block[0].dequant;
/* save the dc dequant constant in case it is overridden */
short dc_dequant_temp = DQC[0];
#if CONFIG_MULTITHREAD
DECLARE_ALIGNED(16, short, local_dequant[16]);
#endif
short *DQC = xd->dequant_y1;
if (xd->mode_info_context->mbmi.mode != SPLITMV)
{
@ -59,22 +53,11 @@ static void vp8_inverse_transform_mby(MACROBLOCKD *xd,
}
eob_adjust(xd->eobs, xd->qcoeff);
#if CONFIG_MULTITHREAD
DQC = local_dequant;
vpx_memcpy(DQC, xd->block[0].dequant,
sizeof(local_dequant));
#endif
/* override the dc dequant constant */
DQC[0] = 1;
DQC = xd->dequant_y1_dc;
}
DEQUANT_INVOKE (&rtcd->dequant, idct_add_y_block)
(xd->qcoeff, DQC,
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
/* restore the dc dequant constant */
DQC[0] = dc_dequant_temp;
}
#endif

View File

@ -93,9 +93,9 @@ typedef struct VP8Common
{
struct vpx_internal_error_info error;
DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]);
DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]);
DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]);
int Width;
int Height;

View File

@ -14,12 +14,12 @@
extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
void vp8_dequantize_b_mmx(BLOCKD *d)
void vp8_dequantize_b_mmx(BLOCKD *d, short *DQC)
{
short *sq = (short *) d->qcoeff;
short *dq = (short *) d->dqcoeff;
short *q = (short *) d->dequant;
vp8_dequantize_b_impl_mmx(sq, dq, q);
vp8_dequantize_b_impl_mmx(sq, dq, DQC);
}
void vp8_dequant_idct_add_y_block_mmx

View File

@ -42,7 +42,6 @@
void vp8cx_init_de_quantizer(VP8D_COMP *pbi)
{
int i;
int Q;
VP8_COMMON *const pc = & pbi->common;
@ -52,15 +51,9 @@ void vp8cx_init_de_quantizer(VP8D_COMP *pbi)
pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q);
pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q);
/* all the ac values = ; */
for (i = 1; i < 16; i++)
{
int rc = vp8_default_zig_zag1d[i];
pc->Y1dequant[Q][rc] = (short)vp8_ac_yquant(Q);
pc->Y2dequant[Q][rc] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);
pc->UVdequant[Q][rc] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);
}
pc->Y1dequant[Q][1] = (short)vp8_ac_yquant(Q);
pc->Y2dequant[Q][1] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);
pc->UVdequant[Q][1] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);
}
}
@ -88,19 +81,19 @@ void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
else
QIndex = pc->base_qindex;
/* Set up the block level dequant pointers */
for (i = 0; i < 16; i++)
/* Set up the macroblock dequant constants */
xd->dequant_y1_dc[0] = 1;
xd->dequant_y1[0] = pc->Y1dequant[QIndex][0];
xd->dequant_y2[0] = pc->Y2dequant[QIndex][0];
xd->dequant_uv[0] = pc->UVdequant[QIndex][0];
for (i = 1; i < 16; i++)
{
xd->block[i].dequant = pc->Y1dequant[QIndex];
xd->dequant_y1_dc[i] =
xd->dequant_y1[i] = pc->Y1dequant[QIndex][1];
xd->dequant_y2[i] = pc->Y2dequant[QIndex][1];
xd->dequant_uv[i] = pc->UVdequant[QIndex][1];
}
for (i = 16; i < 24; i++)
{
xd->block[i].dequant = pc->UVdequant[QIndex];
}
xd->block[24].dequant = pc->Y2dequant[QIndex];
}
#if CONFIG_RUNTIME_CPU_DETECT
@ -180,6 +173,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
}
else
{
short *DQC = xd->dequant_y1;
/* clear out residual eob info */
if(xd->mode_info_context->mbmi.mb_skip_coeff)
vpx_memset(xd->eobs, 0, 25);
@ -200,13 +195,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
if (xd->eobs[i] > 1)
{
DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add)
(b->qcoeff, b->dequant,
(b->qcoeff, DQC,
*(b->base_dst) + b->dst, b->dst_stride);
}
else
{
IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
(b->qcoeff[0] * b->dequant[0],
(b->qcoeff[0] * DQC[0],
*(b->base_dst) + b->dst, b->dst_stride,
*(b->base_dst) + b->dst, b->dst_stride);
((int *)b->qcoeff)[0] = 0;
@ -233,10 +228,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
/* dequantization and idct */
if (mode != B_PRED)
{
short *DQC = xd->block[0].dequant;
/* save the dc dequant constant in case it is overridden */
short dc_dequant_temp = DQC[0];
short *DQC = xd->dequant_y1;
if (mode != SPLITMV)
{
@ -245,7 +237,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
/* do 2nd order transform on the dc block */
if (xd->eobs[24] > 1)
{
DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b);
DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b,
xd->dequant_y2);
IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],
xd->qcoeff);
@ -260,7 +253,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
}
else
{
b->dqcoeff[0] = b->qcoeff[0] * b->dequant[0];
b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0],
xd->qcoeff);
((int *)b->qcoeff)[0] = 0;
@ -269,20 +262,17 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
/* override the dc dequant constant in order to preserve the
* dc components
*/
DQC[0] = 1;
DQC = xd->dequant_y1_dc;
}
DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block)
(xd->qcoeff, xd->block[0].dequant,
(xd->qcoeff, DQC,
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
/* restore the dc dequant constant */
DQC[0] = dc_dequant_temp;
}
DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block)
(xd->qcoeff+16*16, xd->block[16].dequant,
(xd->qcoeff+16*16, xd->dequant_uv,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);
}

View File

@ -37,7 +37,7 @@ extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
{
VP8_COMMON *const pc = & pbi->common;
int i, j;
int i;
for (i = 0; i < count; i++)
{
@ -77,10 +77,10 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
mbd->current_bc = &pbi->bc2;
for (j = 0; j < 25; j++)
{
mbd->block[j].dequant = xd->block[j].dequant;
}
vpx_memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
vpx_memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
vpx_memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
vpx_memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
mbd->fullpixel_mask = 0xffffffff;
if(pc->full_pixel)
@ -177,6 +177,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
/* dequantization and idct */
if (xd->mode_info_context->mbmi.mode == B_PRED)
{
short *DQC = xd->dequant_y1;
for (i = 0; i < 16; i++)
{
BLOCKD *b = &xd->block[i];
@ -190,13 +192,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
if (xd->eobs[i] > 1)
{
DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add)
(b->qcoeff, b->dequant,
(b->qcoeff, DQC,
*(b->base_dst) + b->dst, b->dst_stride);
}
else
{
IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
(b->qcoeff[0] * b->dequant[0],
(b->qcoeff[0] * DQC[0],
*(b->base_dst) + b->dst, b->dst_stride,
*(b->base_dst) + b->dst, b->dst_stride);
((int *)b->qcoeff)[0] = 0;
@ -206,9 +208,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
}
else
{
short *DQC = xd->block[0].dequant;
DECLARE_ALIGNED(16, short, local_dequant[16]);
short *DQC = xd->dequant_y1;
if (xd->mode_info_context->mbmi.mode != SPLITMV)
{
@ -217,7 +217,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
/* do 2nd order transform on the dc block */
if (xd->eobs[24] > 1)
{
DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b);
DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b, xd->dequant_y2);
IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],
xd->qcoeff);
@ -232,20 +232,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
}
else
{
b->dqcoeff[0] = b->qcoeff[0] * b->dequant[0];
b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], xd->qcoeff);
((int *)b->qcoeff)[0] = 0;
}
/* make a local copy of the dequant constants */
vpx_memcpy(local_dequant, xd->block[0].dequant,
sizeof(local_dequant));
/* override the dc dequant constant */
local_dequant[0] = 1;
/* use the new dequant constants */
DQC = local_dequant;
DQC = xd->dequant_y1_dc;
}
DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block)
@ -255,7 +248,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
}
DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block)
(xd->qcoeff+16*16, xd->block[16].dequant,
(xd->qcoeff+16*16, xd->dequant_uv,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);
}

View File

@ -1120,7 +1120,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
vp8_inverse_transform_mby(xd, IF_RTCD(&cpi->common.rtcd));
DEQUANT_INVOKE (&cpi->common.rtcd.dequant, idct_add_uv_block)
(xd->qcoeff+16*16, xd->block[16].dequant,
(xd->qcoeff+16*16, xd->dequant_uv,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);
return rate;
@ -1305,7 +1305,7 @@ int vp8cx_encode_inter_macroblock
vp8_inverse_transform_mby(xd, IF_RTCD(&cpi->common.rtcd));
DEQUANT_INVOKE (&cpi->common.rtcd.dequant, idct_add_uv_block)
(xd->qcoeff+16*16, xd->block[16].dequant,
(xd->qcoeff+16*16, xd->dequant_uv,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);
}

View File

@ -384,10 +384,22 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
zd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
for (i = 0; i < 25; i++)
{
zd->block[i].dequant = xd->block[i].dequant;
}
vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
#if 1
/*TODO: Remove dequant from BLOCKD. This is a temporary solution until
* the quantizer code uses a passed in pointer to the dequant constants.
* This will also require modifications to the x86 and neon assembly.
* */
for (i = 0; i < 16; i++)
zd->block[i].dequant = zd->dequant_y1;
for (i = 16; i < 24; i++)
zd->block[i].dequant = zd->dequant_uv;
zd->block[24].dequant = zd->dequant_y2;
#endif
}
}

View File

@ -504,7 +504,6 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
cpi->Y1quant_shift[Q][i] = cpi->Y1quant_shift[Q][1];
cpi->Y1zbin[Q][i] = cpi->Y1zbin[Q][1];
cpi->Y1round[Q][i] = cpi->Y1round[Q][1];
cpi->common.Y1dequant[Q][i] = cpi->common.Y1dequant[Q][1];
cpi->zrun_zbin_boost_y1[Q][i] = (cpi->common.Y1dequant[Q][1] *
zbin_boost[i]) >> 7;
@ -513,7 +512,6 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
cpi->Y2quant_shift[Q][i] = cpi->Y2quant_shift[Q][1];
cpi->Y2zbin[Q][i] = cpi->Y2zbin[Q][1];
cpi->Y2round[Q][i] = cpi->Y2round[Q][1];
cpi->common.Y2dequant[Q][i] = cpi->common.Y2dequant[Q][1];
cpi->zrun_zbin_boost_y2[Q][i] = (cpi->common.Y2dequant[Q][1] *
zbin_boost[i]) >> 7;
@ -522,7 +520,6 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
cpi->UVquant_shift[Q][i] = cpi->UVquant_shift[Q][1];
cpi->UVzbin[Q][i] = cpi->UVzbin[Q][1];
cpi->UVround[Q][i] = cpi->UVround[Q][1];
cpi->common.UVdequant[Q][i] = cpi->common.UVdequant[Q][1];
cpi->zrun_zbin_boost_uv[Q][i] = (cpi->common.UVdequant[Q][1] *
zbin_boost[i]) >> 7;
}
@ -641,6 +638,31 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
*/
if (!ok_to_skip || QIndex != x->q_index)
{
xd->dequant_y1_dc[0] = 1;
xd->dequant_y1[0] = cpi->common.Y1dequant[QIndex][0];
xd->dequant_y2[0] = cpi->common.Y2dequant[QIndex][0];
xd->dequant_uv[0] = cpi->common.UVdequant[QIndex][0];
for (i = 1; i < 16; i++)
{
xd->dequant_y1_dc[i] =
xd->dequant_y1[i] = cpi->common.Y1dequant[QIndex][1];
xd->dequant_y2[i] = cpi->common.Y2dequant[QIndex][1];
xd->dequant_uv[i] = cpi->common.UVdequant[QIndex][1];
}
#if 1
/*TODO: Remove dequant from BLOCKD. This is a temporary solution until
* the quantizer code uses a passed in pointer to the dequant constants.
* This will also require modifications to the x86 and neon assembly.
* */
for (i = 0; i < 16; i++)
x->e_mbd.block[i].dequant = xd->dequant_y1; //cpi->common.Y1dequant[QIndex];
for (i = 16; i < 24; i++)
x->e_mbd.block[i].dequant = xd->dequant_uv; //cpi->common.UVdequant[QIndex];
x->e_mbd.block[24].dequant = xd->dequant_y2; //cpi->common.Y2dequant[QIndex];
#endif
// Y
zbin_extra = ZBIN_EXTRA_Y;
@ -651,7 +673,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
x->block[i].zbin = cpi->Y1zbin[QIndex];
x->block[i].round = cpi->Y1round[QIndex];
x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
x->block[i].zbin_extra = (short)zbin_extra;
}
@ -666,7 +687,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
x->block[i].zbin = cpi->UVzbin[QIndex];
x->block[i].round = cpi->UVround[QIndex];
x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
x->block[i].zbin_extra = (short)zbin_extra;
}
@ -679,7 +699,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
x->block[24].zbin = cpi->Y2zbin[QIndex];
x->block[24].round = cpi->Y2round[QIndex];
x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
x->block[24].zbin_extra = (short)zbin_extra;
@ -689,6 +708,9 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip)
cpi->last_zbin_over_quant = cpi->zbin_over_quant;
cpi->last_zbin_mode_boost = cpi->zbin_mode_boost;
x->last_act_zbin_adj = x->act_zbin_adj;
}
else if(cpi->last_zbin_over_quant != cpi->zbin_over_quant
|| cpi->last_zbin_mode_boost != cpi->zbin_mode_boost