VC1: merge idct8x8, coeff adjustments and put_pixels.

Merging these functions allows merging some loops, which makes the
results (particularly after SIMD optimizations) much faster.
This commit is contained in:
Ronald S. Bultje
2011-02-21 09:07:13 -05:00
parent 8d9ac969cb
commit f8bed30d8b
5 changed files with 170 additions and 40 deletions

View File

@@ -280,6 +280,28 @@ static int vop_dquant_decoding(VC1Context *v)
static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb);
static void simple_idct_put_rangered(uint8_t *dest, int line_size, DCTELEM *block)
{
int i;
ff_simple_idct(block);
for (i = 0; i < 64; i++) block[i] = (block[i] - 64) << 1;
ff_put_pixels_clamped_c(block, dest, line_size);
}
static void simple_idct_put_signed(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_simple_idct(block);
ff_put_signed_pixels_clamped_c(block, dest, line_size);
}
static void simple_idct_put_signed_rangered(uint8_t *dest, int line_size, DCTELEM *block)
{
int i;
ff_simple_idct(block);
for (i = 0; i < 64; i++) block[i] <<= 1;
ff_put_signed_pixels_clamped_c(block, dest, line_size);
}
/**
* Decode Simple/Main Profiles sequence header
* @see Figure 7-8, p16-17
@@ -337,7 +359,11 @@ int vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitConte
v->res_fasttx = get_bits1(gb);
if (!v->res_fasttx)
{
v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct;
v->vc1dsp.vc1_inv_trans_8x8_add = ff_simple_idct_add;
v->vc1dsp.vc1_inv_trans_8x8_put[0] = ff_simple_idct_put;
v->vc1dsp.vc1_inv_trans_8x8_put[1] = simple_idct_put_rangered;
v->vc1dsp.vc1_inv_trans_8x8_put_signed[0] = simple_idct_put_signed;
v->vc1dsp.vc1_inv_trans_8x8_put_signed[1] = simple_idct_put_signed_rangered;
v->vc1dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add;
v->vc1dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add;
v->vc1dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add;