Shortcut 8x8/16x16 inverse 2D-DCT

This commit brought back the shortcut implementation of 8x8/16x16
inverse 2D-DCT. When the eob <= 10, it skips the inverse transform
operations on row 4:7/4:15 in the first round. For bus_cif at 1000
kbps, this provides about 2% speed-up at speed 0.

Change-Id: I453e2d72956467d75be4ad8c04b4482ab889d572
This commit is contained in:
Jingning Han 2013-07-26 17:01:51 -07:00
parent 325e0aa650
commit 38fa487164
2 changed files with 20 additions and 2 deletions

View File

@ -95,6 +95,9 @@ void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) {
// DC only DCT coefficient
vp9_short_idct8x8_1_add(input, dest, stride);
input[0] = 0;
} else if (eob <= 10) {
vp9_short_idct10_8x8_add(input, dest, stride);
vpx_memset(input, 0, 128);
} else {
vp9_short_idct8x8_add(input, dest, stride);
vpx_memset(input, 0, 128);
@ -128,6 +131,9 @@ void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) {
input[0] = 0;
vp9_add_constant_residual_16x16(out, dest, stride);
} else if (eob <= 10) {
vp9_short_idct10_16x16_add(input, dest, stride);
vpx_memset(input, 0, 512);
} else {
vp9_short_idct16x16_add(input, dest, stride);
vpx_memset(input, 0, 512);

View File

@ -52,10 +52,21 @@ static void inverse_transform_b_8x8_add(MACROBLOCKD *xd, int eob,
int stride) {
if (eob <= 1)
vp9_short_idct8x8_1_add(dqcoeff, dest, stride);
else if (eob <= 10)
vp9_short_idct10_8x8_add(dqcoeff, dest, stride);
else
vp9_short_idct8x8_add(dqcoeff, dest, stride);
}
static void inverse_transform_b_16x16_add(MACROBLOCKD *xd, int eob,
int16_t *dqcoeff, uint8_t *dest,
int stride) {
if (eob <= 10)
vp9_short_idct10_16x16_add(dqcoeff, dest, stride);
else
vp9_short_idct16x16_add(dqcoeff, dest, stride);
}
static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int plane) {
struct macroblock_plane *const p = &x->plane[plane];
const MACROBLOCKD *const xd = &x->e_mbd;
@ -538,7 +549,8 @@ static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride);
break;
case TX_16X16:
vp9_short_idct16x16_add(dqcoeff, dst, pd->dst.stride);
inverse_transform_b_16x16_add(xd, pd->eobs[block], dqcoeff,
dst, pd->dst.stride);
break;
case TX_8X8:
inverse_transform_b_8x8_add(xd, pd->eobs[block], dqcoeff,
@ -691,7 +703,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
pd->dequant, p->zbin_extra, eob, scan, iscan);
if (!x->skip_encode && *eob) {
if (tx_type == DCT_DCT)
vp9_short_idct16x16_add(dqcoeff, dst, pd->dst.stride);
inverse_transform_b_16x16_add(xd, *eob, dqcoeff, dst, pd->dst.stride);
else
vp9_short_iht16x16_add(dqcoeff, dst, pd->dst.stride, tx_type);
}