From 0a359cf157957f3eb37760f731fa75dd320fd659 Mon Sep 17 00:00:00 2001 From: Jason Garrett-Glaser Date: Sun, 18 Jan 2009 07:14:36 +0000 Subject: [PATCH] Faster VC-1 C loopfilter using lots of xor magic Originally committed as revision 16670 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/vc1.c | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c index e41d128f2d..115ee717d5 100644 --- a/libavcodec/vc1.c +++ b/libavcodec/vc1.c @@ -314,30 +314,39 @@ static int bitplane_decoding(uint8_t* data, int *raw_flag, VC1Context *v) * @return whether other 3 pairs should be filtered or not * @see 8.6 */ -static int vc1_filter_line(uint8_t* src, int stride, int pq){ - int a0, a1, a2, a3, d, clip, filt3 = 0; +static int av_always_inline vc1_filter_line(uint8_t* src, int stride, int pq){ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; - a0 = (2*(src[-2*stride] - src[ 1*stride]) - 5*(src[-1*stride] - src[ 0*stride]) + 4) >> 3; - if(FFABS(a0) < pq){ - a1 = (2*(src[-4*stride] - src[-1*stride]) - 5*(src[-3*stride] - src[-2*stride]) + 4) >> 3; - a2 = (2*(src[ 0*stride] - src[ 3*stride]) - 5*(src[ 1*stride] - src[ 2*stride]) + 4) >> 3; - a3 = FFMIN(FFABS(a1), FFABS(a2)); - if(a3 < FFABS(a0)){ - d = 5 * ((a0 >=0 ? a3 : -a3) - a0) / 8; - clip = (src[-1*stride] - src[ 0*stride])/2; + int a0 = (2*(src[-2*stride] - src[ 1*stride]) - 5*(src[-1*stride] - src[ 0*stride]) + 4) >> 3; + int a0_sign = a0 >> 31; /* Store sign */ + a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */ + if(a0 < pq){ + int a1 = FFABS((2*(src[-4*stride] - src[-1*stride]) - 5*(src[-3*stride] - src[-2*stride]) + 4) >> 3); + int a2 = FFABS((2*(src[ 0*stride] - src[ 3*stride]) - 5*(src[ 1*stride] - src[ 2*stride]) + 4) >> 3); + if(a1 < a0 || a2 < a0){ + int clip = src[-1*stride] - src[ 0*stride]; + int clip_sign = clip >> 31; + clip = ((clip ^ clip_sign) - clip_sign)>>1; if(clip){ - filt3 = 1; - if(clip > 0) - d = av_clip(d, 0, clip); - else - d = av_clip(d, clip, 0); - src[-1*stride] = cm[src[-1*stride] - d]; - src[ 0*stride] = cm[src[ 0*stride] + d]; + int a3 = FFMIN(a1, a2); + int d = 5 * (a3 - a0); + int d_sign = (d >> 31); + d = ((d ^ d_sign) - d_sign) >> 3; + d_sign ^= a0_sign; + + if( (d_sign ^ clip_sign) | ~d ) + d = 0; + else{ + d = FFMIN(d, clip); + d = (d ^ d_sign) - d_sign; /* Restore sign */ + src[-1*stride] = cm[src[-1*stride] - d]; + src[ 0*stride] = cm[src[ 0*stride] + d]; + } + return 1; } } } - return filt3; + return 0; } /**