Moving tx_type == DCT_DCT checks inside iht_add functions.

tx_type == DCT_DCT check is an implementation detail of iht_add. Also
adding dequant_add_y function with explicit DCT_DCT check inside.

Change-Id: Ia3cb0225601752cdef0ff6f0acd3a09d9dbd8938
This commit is contained in:
Dmitry Kovalev 2013-04-16 11:02:57 -07:00
parent e87c7f0930
commit 3609856ac5
2 changed files with 74 additions and 107 deletions

View File

@ -204,15 +204,9 @@ static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd,
BOOL_DECODER* const bc) {
const TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
if (tx_type != DCT_DCT) {
vp9_dequant_iht_add_16x16_c(tx_type, xd->plane[0].qcoeff,
xd->block[0].dequant, xd->dst.y_buffer,
xd->dst.y_stride, xd->plane[0].eobs[0]);
} else {
vp9_dequant_idct_add_16x16(xd->plane[0].qcoeff, xd->block[0].dequant,
xd->dst.y_buffer, xd->dst.y_stride,
xd->plane[0].eobs[0]);
}
vp9_dequant_iht_add_16x16_c(tx_type, xd->plane[0].qcoeff,
xd->block[0].dequant, xd->dst.y_buffer,
xd->dst.y_stride, xd->plane[0].eobs[0]);
vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.uv_stride,
@ -244,12 +238,8 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
vp9_intra8x8_predict(xd, b, i8x8mode, dst, stride);
}
tx_type = get_tx_type_8x8(xd, ib);
if (tx_type != DCT_DCT) {
vp9_dequant_iht_add_8x8_c(tx_type, q, dq, dst, stride,
xd->plane[0].eobs[idx]);
} else {
vp9_dequant_idct_add_8x8(q, dq, dst, stride, xd->plane[0].eobs[idx]);
}
vp9_dequant_iht_add_8x8_c(tx_type, q, dq, dst, stride,
xd->plane[0].eobs[idx]);
}
} else {
vp9_dequant_idct_add_y_block_8x8(xd->plane[0].qcoeff,
@ -295,6 +285,22 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
}
}
static INLINE void dequant_add_y(MACROBLOCKD *xd, TX_TYPE tx_type, int idx) {
BLOCKD *const b = &xd->block[idx];
struct mb_plane *const y = &xd->plane[0];
if (tx_type != DCT_DCT) {
vp9_dequant_iht_add_c(tx_type,
BLOCK_OFFSET(y->qcoeff, idx, 16),
b->dequant, *(b->base_dst) + b->dst,
b->dst_stride, y->eobs[idx]);
} else {
xd->itxm_add(BLOCK_OFFSET(y->qcoeff, idx, 16),
b->dequant, *(b->base_dst) + b->dst,
b->dst_stride, y->eobs[idx]);
}
}
static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
BOOL_DECODER* const bc) {
TX_TYPE tx_type;
@ -310,19 +316,8 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
vp9_intra8x8_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst,
b->dst_stride);
for (j = 0; j < 4; j++) {
b = &xd->block[ib + iblock[j]];
tx_type = get_tx_type_4x4(xd, ib + iblock[j]);
if (tx_type != DCT_DCT) {
vp9_dequant_iht_add_c(tx_type,
BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16),
b->dequant, *(b->base_dst) + b->dst,
b->dst_stride,
xd->plane[0].eobs[ib + iblock[j]]);
} else {
xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16),
b->dequant, *(b->base_dst) + b->dst, b->dst_stride,
xd->plane[0].eobs[ib + iblock[j]]);
}
dequant_add_y(xd, tx_type, ib + iblock[j]);
}
b = &xd->block[16 + i];
vp9_intra_uv4x4_predict(xd, b, i8x8mode, *(b->base_dst) + b->dst,
@ -350,16 +345,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
vp9_intra4x4_predict(xd, b, b_mode, *(b->base_dst) + b->dst,
b->dst_stride);
tx_type = get_tx_type_4x4(xd, i);
if (tx_type != DCT_DCT) {
vp9_dequant_iht_add_c(tx_type,
BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
b->dequant, *(b->base_dst) + b->dst,
b->dst_stride, xd->plane[0].eobs[i]);
} else {
xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
b->dequant, *(b->base_dst) + b->dst, b->dst_stride,
xd->plane[0].eobs[i]);
}
dequant_add_y(xd, tx_type, i);
}
#if CONFIG_NEWBINTRAMODES
if (!xd->mode_info_context->mbmi.mb_skip_coeff)
@ -380,18 +366,8 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->dst.v_buffer, xd->dst.uv_stride, xd->plane[2].eobs);
} else {
for (i = 0; i < 16; i++) {
BLOCKD *b = &xd->block[i];
tx_type = get_tx_type_4x4(xd, i);
if (tx_type != DCT_DCT) {
vp9_dequant_iht_add_c(tx_type,
BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
b->dequant, *(b->base_dst) + b->dst,
b->dst_stride, xd->plane[0].eobs[i]);
} else {
xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
b->dequant, *(b->base_dst) + b->dst, b->dst_stride,
xd->plane[0].eobs[i]);
}
dequant_add_y(xd, tx_type, i);
}
xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.uv_stride,
@ -451,19 +427,12 @@ static INLINE void decode_sby_16x16(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
const int y_offset = (y_idx * 16) * mb->dst.y_stride + (x_idx * 16);
const TX_TYPE tx_type = get_tx_type_16x16(mb,
(y_idx * (4 * bw) + x_idx) * 4);
if (tx_type == DCT_DCT) {
vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256),
mb->block[0].dequant ,
mb->dst.y_buffer + y_offset, mb->dst.y_stride,
mb->plane[0].eobs[n * 16]);
} else {
vp9_dequant_iht_add_16x16_c(tx_type,
BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256),
mb->block[0].dequant,
mb->dst.y_buffer + y_offset,
mb->dst.y_stride,
mb->plane[0].eobs[n * 16]);
}
vp9_dequant_iht_add_16x16_c(tx_type,
BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256),
mb->block[0].dequant,
mb->dst.y_buffer + y_offset,
mb->dst.y_stride,
mb->plane[0].eobs[n * 16]);
}
}
@ -503,18 +472,12 @@ static INLINE void decode_sby_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int y_offset = (y_idx * 8) * xd->dst.y_stride + (x_idx * 8);
const TX_TYPE tx_type = get_tx_type_8x8(xd,
(y_idx * (2 * bw) + x_idx) * 2);
if (tx_type == DCT_DCT) {
vp9_dequant_idct_add_8x8(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64),
xd->block[0].dequant,
xd->dst.y_buffer + y_offset,
xd->dst.y_stride, xd->plane[0].eobs[n * 4]);
} else {
vp9_dequant_iht_add_8x8_c(tx_type,
BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64),
xd->block[0].dequant,
xd->dst.y_buffer + y_offset, xd->dst.y_stride,
xd->plane[0].eobs[n * 4]);
}
vp9_dequant_iht_add_8x8_c(tx_type,
BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64),
xd->block[0].dequant,
xd->dst.y_buffer + y_offset, xd->dst.y_stride,
xd->plane[0].eobs[n * 4]);
}
}

View File

@ -75,32 +75,40 @@ void vp9_add_constant_residual_32x32_c(const int16_t diff, uint8_t *dest,
void vp9_dequant_iht_add_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq,
uint8_t *dest, int stride, int eob) {
int i;
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
for (i = 0; i < 16; i++)
input[i] *= dq[i];
if (tx_type == DCT_DCT) {
vp9_dequant_idct_add(input, dq, dest, stride, eob);
} else {
int i;
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
vp9_short_iht4x4(input, output, 4, tx_type);
vpx_memset(input, 0, 32);
vp9_add_residual_4x4(output, dest, stride);
for (i = 0; i < 16; i++)
input[i] *= dq[i];
vp9_short_iht4x4(input, output, 4, tx_type);
vpx_memset(input, 0, 32);
vp9_add_residual_4x4(output, dest, stride);
}
}
void vp9_dequant_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq, uint8_t *dest,
int stride, int eob) {
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 64);
if (tx_type == DCT_DCT) {
vp9_dequant_idct_add_8x8(input, dq, dest, stride, eob);
} else {
if (eob > 0) {
int i;
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 64);
if (eob > 0) {
int i;
input[0] *= dq[0];
for (i = 1; i < 64; i++)
input[i] *= dq[1];
input[0] *= dq[0];
for (i = 1; i < 64; i++)
input[i] *= dq[1];
vp9_short_iht8x8(input, output, 8, tx_type);
vpx_memset(input, 0, 128);
vp9_add_residual_8x8(output, dest, stride);
vp9_short_iht8x8(input, output, 8, tx_type);
vpx_memset(input, 0, 128);
vp9_add_residual_8x8(output, dest, stride);
}
}
}
@ -236,26 +244,22 @@ void vp9_dequant_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq,
uint8_t *dest, int stride,
int eob) {
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256);
if (tx_type == DCT_DCT) {
vp9_dequant_idct_add_16x16(input, dq, dest, stride, eob);
} else {
DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256);
if (eob > 0) {
int i;
if (eob > 0) {
int i;
input[0] *= dq[0];
input[0] *= dq[0];
for (i = 1; i < 256; i++)
input[i] *= dq[1];
// recover quantizer for 4 4x4 blocks
for (i = 1; i < 256; i++)
input[i] *= dq[1];
// inverse hybrid transform
vp9_short_iht16x16(input, output, 16, tx_type);
// the idct halves ( >> 1) the pitch
// vp9_short_idct16x16(input, output, 32);
vpx_memset(input, 0, 512);
vp9_add_residual_16x16(output, dest, stride);
vp9_short_iht16x16(input, output, 16, tx_type);
vpx_memset(input, 0, 512);
vp9_add_residual_16x16(output, dest, stride);
}
}
}