VP56: move vp56_edge_filter to new VP56DSPContext

Using macro templates allows the vp[56]_adjust functions to be
inlined instead of called through function pointers.  The new
function pointers enable optimised implementations of the filters.

4% faster VP6 decoding on Cortex-A8.

Originally committed as revision 22992 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Måns Rullgård
2010-04-30 21:30:22 +00:00
parent ff866063e9
commit 5e1ba34bbb
7 changed files with 126 additions and 54 deletions

View File

@@ -300,27 +300,12 @@ static void vp56_add_predictors_dc(VP56Context *s, VP56Frame ref_frame)
}
}
static void vp56_edge_filter(VP56Context *s, uint8_t *yuv,
int pix_inc, int line_inc, int t)
{
int pix2_inc = 2 * pix_inc;
int i, v;
for (i=0; i<12; i++) {
v = (yuv[-pix2_inc] + 3*(yuv[0]-yuv[-pix_inc]) - yuv[pix_inc] + 4) >>3;
v = s->adjust(v, t);
yuv[-pix_inc] = av_clip_uint8(yuv[-pix_inc] + v);
yuv[0] = av_clip_uint8(yuv[0] - v);
yuv += line_inc;
}
}
static void vp56_deblock_filter(VP56Context *s, uint8_t *yuv,
int stride, int dx, int dy)
{
int t = vp56_filter_threshold[s->quantizer];
if (dx) vp56_edge_filter(s, yuv + 10-dx , 1, stride, t);
if (dy) vp56_edge_filter(s, yuv + stride*(10-dy), stride, 1, t);
if (dx) s->vp56dsp.edge_filter_hor(yuv + 10-dx , stride, t);
if (dy) s->vp56dsp.edge_filter_ver(yuv + stride*(10-dy), stride, t);
}
static void vp56_mc(VP56Context *s, int b, int plane, uint8_t *src,
@@ -665,6 +650,7 @@ av_cold void vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
if (avctx->idct_algo == FF_IDCT_AUTO)
avctx->idct_algo = FF_IDCT_VP3;
dsputil_init(&s->dsp, avctx);
ff_vp56dsp_init(&s->vp56dsp, avctx->codec->id);
ff_init_scantable(s->dsp.idct_permutation, &s->scantable,ff_zigzag_direct);
for (i=0; i<4; i++)