2001-10-11 00:13:27 +02:00
/*
2003-02-11 22:13:30 +01:00
Copyright ( C ) 2001 - 2003 Michael Niedermayer ( michaelni @ gmx . at )
2001-10-11 00:13:27 +02:00
2004-05-26 22:15:15 +02:00
AltiVec optimizations ( C ) 2004 Romain Dolbeau < romain @ dolbeau . org >
2001-10-11 00:13:27 +02:00
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation ; either version 2 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program ; if not , write to the Free Software
Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA
*/
2003-03-06 14:51:18 +01:00
/**
* @ file postprocess . c
* postprocessing .
*/
2001-10-11 00:13:27 +02:00
/*
2004-05-26 22:15:15 +02:00
C MMX MMX2 3 DNow AltiVec
isVertDC Ec Ec Ec
isVertMinMaxOk Ec Ec Ec
doVertLowPass E e e Ec
doVertDefFilter Ec Ec e e Ec
2001-10-11 00:13:27 +02:00
isHorizDC Ec Ec
2001-10-24 18:39:40 +02:00
isHorizMinMaxOk a E
doHorizLowPass E e e
2001-11-19 23:20:30 +01:00
doHorizDefFilter Ec Ec e e
2004-05-26 22:15:15 +02:00
deRing E e e * Ecp
2001-10-15 05:01:08 +02:00
Vertical RKAlgo1 E a a
2001-10-25 13:42:34 +02:00
Horizontal RKAlgo1 a a
2001-11-13 03:40:56 +01:00
Vertical X1 # a E E
Horizontal X1 # a E E
2001-10-17 22:42:07 +02:00
LinIpolDeinterlace e E E *
CubicIpolDeinterlace a e e *
LinBlendDeinterlace e E E *
2003-01-23 05:19:24 +01:00
MedianDeinterlace # E Ec Ec
2001-11-14 03:46:58 +01:00
TempDeNoiser # E e e
2001-10-12 00:35:45 +02:00
2001-11-13 03:40:56 +01:00
* i dont have a 3 dnow CPU - > its untested , but noone said it doesnt work so it seems to work
# more or less selfinvented filters so the exactness isnt too meaningfull
2001-10-11 00:13:27 +02:00
E = Exact implementation
2001-10-17 22:42:07 +02:00
e = allmost exact implementation ( slightly different rounding , . . . )
2001-10-11 00:13:27 +02:00
a = alternative / approximate impl
c = checked against the other implementations ( - vo md5 )
2004-05-26 22:15:15 +02:00
p = partially optimized , still some work to do
2001-10-11 00:13:27 +02:00
*/
/*
TODO :
reduce the time wasted on the mem transfer
unroll stuff if instructions depend too much on the prior one
move YScale thing to the end instead of fixing QP
2001-10-11 00:21:19 +02:00
write a faster and higher quality deblocking filter : )
2001-10-12 00:35:45 +02:00
make the mainloop more flexible ( variable number of blocks at once
( the if / else stuff per block is slowing things down )
2001-10-13 04:31:15 +02:00
compare the quality & speed of all filters
split this huge file
2001-10-30 22:14:02 +01:00
optimize c versions
2001-11-13 03:40:56 +01:00
try to unroll inner for ( x = 0 . . . loop to avoid these damn if ( x . . . checks
2001-10-11 00:13:27 +02:00
. . .
2001-10-11 00:21:19 +02:00
*/
2001-10-16 04:31:14 +02:00
//Changelog: use the CVS log
2001-10-11 00:13:27 +02:00
2003-02-11 22:13:30 +01:00
# include "config.h"
2001-10-11 00:13:27 +02:00
# include <inttypes.h>
# include <stdio.h>
2001-10-12 00:35:45 +02:00
# include <stdlib.h>
2001-10-23 01:36:35 +02:00
# include <string.h>
2001-10-26 01:34:14 +02:00
# ifdef HAVE_MALLOC_H
# include <malloc.h>
# endif
2001-10-11 00:13:27 +02:00
//#undef HAVE_MMX2
2001-10-11 00:21:19 +02:00
//#define HAVE_3DNOW
2001-10-11 00:13:27 +02:00
//#undef HAVE_MMX
2001-11-24 23:16:29 +01:00
//#undef ARCH_X86
2001-11-19 23:20:30 +01:00
//#define DEBUG_BRIGHTNESS
2003-02-14 22:27:25 +01:00
# ifdef USE_FASTMEMCPY
2004-01-31 23:58:32 +01:00
# include "fastmemcpy.h"
2002-11-04 23:45:34 +01:00
# endif
2001-10-11 00:21:19 +02:00
# include "postprocess.h"
2002-11-02 14:58:14 +01:00
# include "postprocess_internal.h"
2003-02-14 22:27:25 +01:00
# include "mangle.h" //FIXME should be supressed
2001-10-11 00:13:27 +02:00
2003-02-18 10:33:21 +01:00
# ifndef HAVE_MEMALIGN
# define memalign(a,b) malloc(b)
# endif
2001-10-13 17:53:24 +02:00
# define MIN(a,b) ((a) > (b) ? (b) : (a))
# define MAX(a,b) ((a) < (b) ? (b) : (a))
# define ABS(a) ((a) > 0 ? (a) : (-(a)))
# define SIGN(a) ((a) > 0 ? 1 : -1)
2001-10-23 01:36:35 +02:00
# define GET_MODE_BUFFER_SIZE 500
# define OPTIONS_ARRAY_SIZE 10
2002-10-28 20:31:04 +01:00
# define BLOCK_SIZE 8
# define TEMP_STRIDE 8
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
2001-10-23 01:36:35 +02:00
2004-03-01 13:46:20 +01:00
# if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
# define attribute_used __attribute__((used))
# else
# define attribute_used
# endif
2001-11-24 23:16:29 +01:00
# ifdef ARCH_X86
2004-03-01 13:46:20 +01:00
static uint64_t __attribute__ ( ( aligned ( 8 ) ) ) attribute_used w05 = 0x0005000500050005LL ;
static uint64_t __attribute__ ( ( aligned ( 8 ) ) ) attribute_used w20 = 0x0020002000200020LL ;
static uint64_t __attribute__ ( ( aligned ( 8 ) ) ) attribute_used b00 = 0x0000000000000000LL ;
static uint64_t __attribute__ ( ( aligned ( 8 ) ) ) attribute_used b01 = 0x0101010101010101LL ;
static uint64_t __attribute__ ( ( aligned ( 8 ) ) ) attribute_used b02 = 0x0202020202020202LL ;
static uint64_t __attribute__ ( ( aligned ( 8 ) ) ) attribute_used b08 = 0x0808080808080808LL ;
static uint64_t __attribute__ ( ( aligned ( 8 ) ) ) attribute_used b80 = 0x8080808080808080LL ;
2001-11-06 17:53:47 +01:00
# endif
2001-10-11 00:13:27 +02:00
2003-03-29 14:31:12 +01:00
static uint8_t clip_table [ 3 * 256 ] ;
static uint8_t * const clip_tab = clip_table + 256 ;
2004-05-25 20:07:25 +02:00
static const int verbose = 0 ;
2001-12-26 20:45:49 +01:00
2004-03-01 13:46:20 +01:00
static const int attribute_used deringThreshold = 20 ;
2001-10-11 00:13:27 +02:00
2002-10-28 20:31:04 +01:00
2001-10-23 01:36:35 +02:00
static struct PPFilter filters [ ] =
{
{ " hb " , " hdeblock " , 1 , 1 , 3 , H_DEBLOCK } ,
{ " vb " , " vdeblock " , 1 , 2 , 4 , V_DEBLOCK } ,
2002-10-28 20:31:04 +01:00
/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
{ " vr " , " rkvdeblock " , 1 , 2 , 4 , V_RK1_FILTER } , */
2001-10-23 01:36:35 +02:00
{ " h1 " , " x1hdeblock " , 1 , 1 , 3 , H_X1_FILTER } ,
{ " v1 " , " x1vdeblock " , 1 , 2 , 4 , V_X1_FILTER } ,
{ " dr " , " dering " , 1 , 5 , 6 , DERING } ,
{ " al " , " autolevels " , 0 , 1 , 2 , LEVEL_FIX } ,
2001-12-28 01:09:55 +01:00
{ " lb " , " linblenddeint " , 1 , 1 , 4 , LINEAR_BLEND_DEINT_FILTER } ,
{ " li " , " linipoldeint " , 1 , 1 , 4 , LINEAR_IPOL_DEINT_FILTER } ,
{ " ci " , " cubicipoldeint " , 1 , 1 , 4 , CUBIC_IPOL_DEINT_FILTER } ,
{ " md " , " mediandeint " , 1 , 1 , 4 , MEDIAN_DEINT_FILTER } ,
2002-10-28 20:31:04 +01:00
{ " fd " , " ffmpegdeint " , 1 , 1 , 4 , FFMPEG_DEINT_FILTER } ,
2003-03-29 14:31:12 +01:00
{ " l5 " , " lowpass5 " , 1 , 1 , 4 , LOWPASS5_DEINT_FILTER } ,
2001-11-13 03:40:56 +01:00
{ " tn " , " tmpnoise " , 1 , 7 , 8 , TEMP_NOISE_FILTER } ,
2002-01-03 15:16:13 +01:00
{ " fq " , " forcequant " , 1 , 0 , 0 , FORCE_QUANT } ,
2001-10-23 01:36:35 +02:00
{ NULL , NULL , 0 , 0 , 0 , 0 } //End Marker
} ;
static char * replaceTable [ ] =
{
2001-11-13 03:40:56 +01:00
" default " , " hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400 " ,
" de " , " hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400 " ,
" fast " , " x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400 " ,
" fa " , " x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400 " ,
2001-10-23 01:36:35 +02:00
NULL //End Marker
} ;
2001-10-11 00:13:27 +02:00
2002-10-28 20:31:04 +01:00
# ifdef ARCH_X86
2001-10-11 00:13:27 +02:00
static inline void prefetchnta ( void * p )
{
asm volatile ( " prefetchnta (%0) \n \t "
: : " r " ( p )
) ;
}
static inline void prefetcht0 ( void * p )
{
asm volatile ( " prefetcht0 (%0) \n \t "
: : " r " ( p )
) ;
}
static inline void prefetcht1 ( void * p )
{
asm volatile ( " prefetcht1 (%0) \n \t "
: : " r " ( p )
) ;
}
static inline void prefetcht2 ( void * p )
{
asm volatile ( " prefetcht2 (%0) \n \t "
: : " r " ( p )
) ;
}
2001-10-13 17:36:04 +02:00
# endif
2001-10-11 00:13:27 +02:00
2001-11-24 23:16:29 +01:00
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
2001-10-11 00:13:27 +02:00
2001-11-20 19:07:13 +01:00
/**
* Check if the given 8 x8 Block is mostly " flat "
*/
2004-05-26 22:15:15 +02:00
static inline int isHorizDC_C ( uint8_t src [ ] , int stride , PPContext * c )
2001-11-20 19:07:13 +01:00
{
int numEq = 0 ;
int y ;
2003-04-18 14:45:34 +02:00
const int dcOffset = ( ( c - > nonBQP * c - > ppMode . baseDcDiff ) > > 8 ) + 1 ;
2002-10-29 19:35:15 +01:00
const int dcThreshold = dcOffset * 2 + 1 ;
2003-04-18 14:45:34 +02:00
2001-11-20 19:07:13 +01:00
for ( y = 0 ; y < BLOCK_SIZE ; y + + )
{
2002-10-28 20:31:04 +01:00
if ( ( ( unsigned ) ( src [ 0 ] - src [ 1 ] + dcOffset ) ) < dcThreshold ) numEq + + ;
if ( ( ( unsigned ) ( src [ 1 ] - src [ 2 ] + dcOffset ) ) < dcThreshold ) numEq + + ;
if ( ( ( unsigned ) ( src [ 2 ] - src [ 3 ] + dcOffset ) ) < dcThreshold ) numEq + + ;
if ( ( ( unsigned ) ( src [ 3 ] - src [ 4 ] + dcOffset ) ) < dcThreshold ) numEq + + ;
if ( ( ( unsigned ) ( src [ 4 ] - src [ 5 ] + dcOffset ) ) < dcThreshold ) numEq + + ;
if ( ( ( unsigned ) ( src [ 5 ] - src [ 6 ] + dcOffset ) ) < dcThreshold ) numEq + + ;
if ( ( ( unsigned ) ( src [ 6 ] - src [ 7 ] + dcOffset ) ) < dcThreshold ) numEq + + ;
src + = stride ;
}
return numEq > c - > ppMode . flatnessThreshold ;
}
/**
* Check if the middle 8 x8 Block in the given 8 x16 block is flat
*/
static inline int isVertDC_C ( uint8_t src [ ] , int stride , PPContext * c ) {
int numEq = 0 ;
int y ;
2003-04-18 14:45:34 +02:00
const int dcOffset = ( ( c - > nonBQP * c - > ppMode . baseDcDiff ) > > 8 ) + 1 ;
2002-10-29 19:35:15 +01:00
const int dcThreshold = dcOffset * 2 + 1 ;
2003-04-18 14:45:34 +02:00
2002-10-28 20:31:04 +01:00
src + = stride * 4 ; // src points to begin of the 8x8 Block
for ( y = 0 ; y < BLOCK_SIZE - 1 ; y + + )
{
if ( ( ( unsigned ) ( src [ 0 ] - src [ 0 + stride ] + dcOffset ) ) < dcThreshold ) numEq + + ;
if ( ( ( unsigned ) ( src [ 1 ] - src [ 1 + stride ] + dcOffset ) ) < dcThreshold ) numEq + + ;
if ( ( ( unsigned ) ( src [ 2 ] - src [ 2 + stride ] + dcOffset ) ) < dcThreshold ) numEq + + ;
if ( ( ( unsigned ) ( src [ 3 ] - src [ 3 + stride ] + dcOffset ) ) < dcThreshold ) numEq + + ;
if ( ( ( unsigned ) ( src [ 4 ] - src [ 4 + stride ] + dcOffset ) ) < dcThreshold ) numEq + + ;
if ( ( ( unsigned ) ( src [ 5 ] - src [ 5 + stride ] + dcOffset ) ) < dcThreshold ) numEq + + ;
if ( ( ( unsigned ) ( src [ 6 ] - src [ 6 + stride ] + dcOffset ) ) < dcThreshold ) numEq + + ;
if ( ( ( unsigned ) ( src [ 7 ] - src [ 7 + stride ] + dcOffset ) ) < dcThreshold ) numEq + + ;
2001-11-20 19:07:13 +01:00
src + = stride ;
}
2002-10-28 20:31:04 +01:00
return numEq > c - > ppMode . flatnessThreshold ;
2001-11-20 19:07:13 +01:00
}
2004-05-26 22:15:15 +02:00
static inline int isHorizMinMaxOk_C ( uint8_t src [ ] , int stride , int QP )
2001-11-20 19:07:13 +01:00
{
2003-06-26 13:31:34 +02:00
int i ;
# if 1
for ( i = 0 ; i < 2 ; i + + ) {
if ( ( unsigned ) ( src [ 0 ] - src [ 5 ] + 2 * QP ) > 4 * QP ) return 0 ;
src + = stride ;
if ( ( unsigned ) ( src [ 2 ] - src [ 7 ] + 2 * QP ) > 4 * QP ) return 0 ;
src + = stride ;
if ( ( unsigned ) ( src [ 4 ] - src [ 1 ] + 2 * QP ) > 4 * QP ) return 0 ;
src + = stride ;
if ( ( unsigned ) ( src [ 6 ] - src [ 3 ] + 2 * QP ) > 4 * QP ) return 0 ;
src + = stride ;
}
# else
for ( i = 0 ; i < 8 ; i + + ) {
if ( ( unsigned ) ( src [ 0 ] - src [ 7 ] + 2 * QP ) > 4 * QP ) return 0 ;
src + = stride ;
}
# endif
return 1 ;
}
2001-11-20 19:07:13 +01:00
2003-06-26 13:31:34 +02:00
static inline int isVertMinMaxOk_C ( uint8_t src [ ] , int stride , int QP )
{
# if 1
# if 1
int x ;
src + = stride * 4 ;
for ( x = 0 ; x < BLOCK_SIZE ; x + = 4 )
{
if ( ( unsigned ) ( src [ x + 0 * stride ] - src [ x + 5 * stride ] + 2 * QP ) > 4 * QP ) return 0 ;
if ( ( unsigned ) ( src [ 1 + x + 2 * stride ] - src [ 1 + x + 7 * stride ] + 2 * QP ) > 4 * QP ) return 0 ;
if ( ( unsigned ) ( src [ 2 + x + 4 * stride ] - src [ 2 + x + 1 * stride ] + 2 * QP ) > 4 * QP ) return 0 ;
if ( ( unsigned ) ( src [ 3 + x + 6 * stride ] - src [ 3 + x + 3 * stride ] + 2 * QP ) > 4 * QP ) return 0 ;
}
# else
int x ;
src + = stride * 3 ;
for ( x = 0 ; x < BLOCK_SIZE ; x + + )
{
if ( ( unsigned ) ( src [ x + stride ] - src [ x + ( stride < < 3 ) ] + 2 * QP ) > 4 * QP ) return 0 ;
}
# endif
return 1 ;
# else
int x ;
src + = stride * 4 ;
for ( x = 0 ; x < BLOCK_SIZE ; x + + )
{
int min = 255 ;
int max = 0 ;
int y ;
for ( y = 0 ; y < 8 ; y + + ) {
int v = src [ x + y * stride ] ;
if ( v > max ) max = v ;
if ( v < min ) min = v ;
}
if ( max - min > 2 * QP ) return 0 ;
}
2001-11-20 19:07:13 +01:00
return 1 ;
2003-06-26 13:31:34 +02:00
# endif
}
2004-05-26 22:15:15 +02:00
static inline int horizClassify_C ( uint8_t src [ ] , int stride , PPContext * c ) {
if ( isHorizDC_C ( src , stride , c ) ) {
if ( isHorizMinMaxOk_C ( src , stride , c - > QP ) )
return 1 ;
else
return 0 ;
} else {
return 2 ;
}
}
2003-06-26 13:31:34 +02:00
static inline int vertClassify_C ( uint8_t src [ ] , int stride , PPContext * c ) {
if ( isVertDC_C ( src , stride , c ) ) {
if ( isVertMinMaxOk_C ( src , stride , c - > QP ) )
return 1 ;
else
return 0 ;
} else {
return 2 ;
}
2001-11-20 19:07:13 +01:00
}
2004-05-26 22:15:15 +02:00
static inline void doHorizDefFilter_C ( uint8_t dst [ ] , int stride , PPContext * c )
2001-11-20 19:07:13 +01:00
{
int y ;
for ( y = 0 ; y < BLOCK_SIZE ; y + + )
{
2004-05-26 22:15:15 +02:00
const int middleEnergy = 5 * ( dst [ 4 ] - dst [ 3 ] ) + 2 * ( dst [ 2 ] - dst [ 5 ] ) ;
2001-11-20 19:07:13 +01:00
2004-05-26 22:15:15 +02:00
if ( ABS ( middleEnergy ) < 8 * c - > QP )
2001-11-20 19:07:13 +01:00
{
const int q = ( dst [ 3 ] - dst [ 4 ] ) / 2 ;
const int leftEnergy = 5 * ( dst [ 2 ] - dst [ 1 ] ) + 2 * ( dst [ 0 ] - dst [ 3 ] ) ;
const int rightEnergy = 5 * ( dst [ 6 ] - dst [ 5 ] ) + 2 * ( dst [ 4 ] - dst [ 7 ] ) ;
int d = ABS ( middleEnergy ) - MIN ( ABS ( leftEnergy ) , ABS ( rightEnergy ) ) ;
d = MAX ( d , 0 ) ;
d = ( 5 * d + 32 ) > > 6 ;
d * = SIGN ( - middleEnergy ) ;
if ( q > 0 )
{
d = d < 0 ? 0 : d ;
d = d > q ? q : d ;
}
else
{
d = d > 0 ? 0 : d ;
d = d < q ? q : d ;
}
dst [ 3 ] - = d ;
dst [ 4 ] + = d ;
}
dst + = stride ;
}
}
/**
* Do a horizontal low pass filter on the 10 x8 block ( dst points to middle 8 x8 Block )
* using the 9 - Tap Filter ( 1 , 1 , 2 , 2 , 4 , 2 , 2 , 1 , 1 ) / 16 ( C version )
*/
2004-05-26 22:15:15 +02:00
static inline void doHorizLowPass_C ( uint8_t dst [ ] , int stride , PPContext * c )
2001-11-20 19:07:13 +01:00
{
int y ;
for ( y = 0 ; y < BLOCK_SIZE ; y + + )
{
2004-05-26 22:15:15 +02:00
const int first = ABS ( dst [ - 1 ] - dst [ 0 ] ) < c - > QP ? dst [ - 1 ] : dst [ 0 ] ;
const int last = ABS ( dst [ 8 ] - dst [ 7 ] ) < c - > QP ? dst [ 8 ] : dst [ 7 ] ;
2001-11-20 19:07:13 +01:00
int sums [ 9 ] ;
sums [ 0 ] = first + dst [ 0 ] ;
sums [ 1 ] = dst [ 0 ] + dst [ 1 ] ;
sums [ 2 ] = dst [ 1 ] + dst [ 2 ] ;
sums [ 3 ] = dst [ 2 ] + dst [ 3 ] ;
sums [ 4 ] = dst [ 3 ] + dst [ 4 ] ;
sums [ 5 ] = dst [ 4 ] + dst [ 5 ] ;
sums [ 6 ] = dst [ 5 ] + dst [ 6 ] ;
sums [ 7 ] = dst [ 6 ] + dst [ 7 ] ;
sums [ 8 ] = dst [ 7 ] + last ;
dst [ 0 ] = ( ( sums [ 0 ] < < 2 ) + ( ( first + sums [ 2 ] ) < < 1 ) + sums [ 4 ] + 8 ) > > 4 ;
dst [ 1 ] = ( ( dst [ 1 ] < < 2 ) + ( ( first + sums [ 0 ] + sums [ 3 ] ) < < 1 ) + sums [ 5 ] + 8 ) > > 4 ;
dst [ 2 ] = ( ( dst [ 2 ] < < 2 ) + ( ( first + sums [ 1 ] + sums [ 4 ] ) < < 1 ) + sums [ 6 ] + 8 ) > > 4 ;
dst [ 3 ] = ( ( dst [ 3 ] < < 2 ) + ( ( sums [ 2 ] + sums [ 5 ] ) < < 1 ) + sums [ 0 ] + sums [ 7 ] + 8 ) > > 4 ;
dst [ 4 ] = ( ( dst [ 4 ] < < 2 ) + ( ( sums [ 3 ] + sums [ 6 ] ) < < 1 ) + sums [ 1 ] + sums [ 8 ] + 8 ) > > 4 ;
dst [ 5 ] = ( ( dst [ 5 ] < < 2 ) + ( ( last + sums [ 7 ] + sums [ 4 ] ) < < 1 ) + sums [ 2 ] + 8 ) > > 4 ;
dst [ 6 ] = ( ( ( last + dst [ 6 ] ) < < 2 ) + ( ( dst [ 7 ] + sums [ 5 ] ) < < 1 ) + sums [ 3 ] + 8 ) > > 4 ;
dst [ 7 ] = ( ( sums [ 8 ] < < 2 ) + ( ( last + sums [ 6 ] ) < < 1 ) + sums [ 4 ] + 8 ) > > 4 ;
dst + = stride ;
}
}
2001-10-24 18:39:40 +02:00
/**
2001-11-24 23:16:29 +01:00
* Experimental Filter 1 ( Horizontal )
* will not damage linear gradients
* Flat blocks should look like they where passed through the ( 1 , 1 , 2 , 2 , 4 , 2 , 2 , 1 , 1 ) 9 - Tap filter
* can only smooth blocks at the expected locations ( it cant smooth them if they did move )
* MMX2 version does correct clipping C version doesnt
* not identical with the vertical one
2001-10-24 18:39:40 +02:00
*/
2001-11-24 23:16:29 +01:00
static inline void horizX1Filter ( uint8_t * src , int stride , int QP )
{
2001-11-13 03:40:56 +01:00
int y ;
2001-11-24 23:16:29 +01:00
static uint64_t * lut = NULL ;
if ( lut = = NULL )
2001-11-13 03:40:56 +01:00
{
2001-11-24 23:16:29 +01:00
int i ;
lut = ( uint64_t * ) memalign ( 8 , 256 * 8 ) ;
for ( i = 0 ; i < 256 ; i + + )
2001-11-13 03:40:56 +01:00
{
2001-11-24 23:16:29 +01:00
int v = i < 128 ? 2 * i : 2 * ( i - 256 ) ;
2001-11-13 03:40:56 +01:00
/*
2001-11-24 23:16:29 +01:00
//Simulate 112242211 9-Tap filter
uint64_t a = ( v / 16 ) & 0xFF ;
uint64_t b = ( v / 8 ) & 0xFF ;
uint64_t c = ( v / 4 ) & 0xFF ;
uint64_t d = ( 3 * v / 8 ) & 0xFF ;
2001-11-13 03:40:56 +01:00
*/
2001-11-24 23:16:29 +01:00
//Simulate piecewise linear interpolation
uint64_t a = ( v / 16 ) & 0xFF ;
uint64_t b = ( v * 3 / 16 ) & 0xFF ;
uint64_t c = ( v * 5 / 16 ) & 0xFF ;
uint64_t d = ( 7 * v / 16 ) & 0xFF ;
uint64_t A = ( 0x100 - a ) & 0xFF ;
uint64_t B = ( 0x100 - b ) & 0xFF ;
uint64_t C = ( 0x100 - c ) & 0xFF ;
uint64_t D = ( 0x100 - c ) & 0xFF ;
lut [ i ] = ( a < < 56 ) | ( b < < 48 ) | ( c < < 40 ) | ( d < < 32 ) |
( D < < 24 ) | ( C < < 16 ) | ( B < < 8 ) | ( A ) ;
//lut[i] = (v<<32) | (v<<24);
2001-11-13 03:40:56 +01:00
}
}
2001-11-24 23:16:29 +01:00
for ( y = 0 ; y < BLOCK_SIZE ; y + + )
2001-11-13 03:40:56 +01:00
{
2001-11-24 23:16:29 +01:00
int a = src [ 1 ] - src [ 2 ] ;
int b = src [ 3 ] - src [ 4 ] ;
int c = src [ 5 ] - src [ 6 ] ;
int d = MAX ( ABS ( b ) - ( ABS ( a ) + ABS ( c ) ) / 2 , 0 ) ;
if ( d < QP )
2001-11-13 03:40:56 +01:00
{
2001-11-24 23:16:29 +01:00
int v = d * SIGN ( - b ) ;
src [ 1 ] + = v / 8 ;
src [ 2 ] + = v / 4 ;
src [ 3 ] + = 3 * v / 8 ;
src [ 4 ] - = 3 * v / 8 ;
src [ 5 ] - = v / 4 ;
src [ 6 ] - = v / 8 ;
2001-11-13 03:40:56 +01:00
}
2001-11-24 23:16:29 +01:00
src + = stride ;
2001-11-13 03:40:56 +01:00
}
2001-11-24 23:16:29 +01:00
}
2001-11-27 02:12:30 +01:00
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
2001-11-24 23:16:29 +01:00
//Plain C versions
2001-11-27 02:12:30 +01:00
# if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
# define COMPILE_C
# endif
2004-05-26 22:15:15 +02:00
# ifdef ARCH_POWERPC
# ifdef HAVE_ALTIVEC
# define COMPILE_ALTIVEC
# ifndef CONFIG_DARWIN
# warning "################################################################################"
# warning "WARNING: No gcc available as of today (2004-05-25) seems to be able to compile properly some of the code under non-Darwin PPC OSes. Some functions result in wrong results, while others simply won't compile (gcc explodes after allocating 1GiB+)."
# warning "################################################################################"
# endif //CONFIG_DARWIN
# endif //HAVE_ALTIVEC
# endif //ARCH_POWERPC
2002-10-28 20:31:04 +01:00
# ifdef ARCH_X86
2001-11-27 02:12:30 +01:00
# if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
# define COMPILE_MMX
# endif
# if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
# define COMPILE_MMX2
# endif
# if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
# define COMPILE_3DNOW
# endif
2002-10-28 20:31:04 +01:00
# endif //ARCH_X86
2001-11-27 02:12:30 +01:00
# undef HAVE_MMX
# undef HAVE_MMX2
# undef HAVE_3DNOW
2004-05-26 22:15:15 +02:00
# undef HAVE_ALTIVEC
2001-11-27 02:12:30 +01:00
# undef ARCH_X86
# ifdef COMPILE_C
2001-11-24 23:16:29 +01:00
# undef HAVE_MMX
# undef HAVE_MMX2
# undef HAVE_3DNOW
# undef ARCH_X86
# define RENAME(a) a ## _C
# include "postprocess_template.c"
2001-11-27 02:12:30 +01:00
# endif
2001-11-24 23:16:29 +01:00
2004-05-26 22:15:15 +02:00
# ifdef ARCH_POWERPC
# ifdef COMPILE_ALTIVEC
# undef RENAME
# define HAVE_ALTIVEC
# define RENAME(a) a ## _altivec
# include "postprocess_altivec_template.c"
# include "postprocess_template.c"
# endif
# endif //ARCH_POWERPC
2001-11-24 23:16:29 +01:00
//MMX versions
2001-11-27 02:12:30 +01:00
# ifdef COMPILE_MMX
2001-11-24 23:16:29 +01:00
# undef RENAME
# define HAVE_MMX
# undef HAVE_MMX2
# undef HAVE_3DNOW
# define ARCH_X86
# define RENAME(a) a ## _MMX
# include "postprocess_template.c"
2001-11-27 02:12:30 +01:00
# endif
2001-11-24 23:16:29 +01:00
//MMX2 versions
2001-11-27 02:12:30 +01:00
# ifdef COMPILE_MMX2
2001-11-24 23:16:29 +01:00
# undef RENAME
# define HAVE_MMX
# define HAVE_MMX2
# undef HAVE_3DNOW
# define ARCH_X86
# define RENAME(a) a ## _MMX2
# include "postprocess_template.c"
2001-11-27 02:12:30 +01:00
# endif
2001-11-24 23:16:29 +01:00
//3DNOW versions
2001-11-27 02:12:30 +01:00
# ifdef COMPILE_3DNOW
2001-11-24 23:16:29 +01:00
# undef RENAME
# define HAVE_MMX
# undef HAVE_MMX2
# define HAVE_3DNOW
# define ARCH_X86
# define RENAME(a) a ## _3DNow
# include "postprocess_template.c"
2001-11-27 02:12:30 +01:00
# endif
2001-11-24 23:16:29 +01:00
// minor note: the HAVE_xyz is messed up after that line so dont use it
static inline void postProcess ( uint8_t src [ ] , int srcStride , uint8_t dst [ ] , int dstStride , int width , int height ,
2002-11-02 14:58:14 +01:00
QP_STORE_T QPs [ ] , int QPStride , int isColor , pp_mode_t * vm , pp_context_t * vc )
2001-11-24 23:16:29 +01:00
{
2002-10-28 20:31:04 +01:00
PPContext * c = ( PPContext * ) vc ;
2002-11-02 14:58:14 +01:00
PPMode * ppMode = ( PPMode * ) vm ;
2002-10-28 20:31:04 +01:00
c - > ppMode = * ppMode ; //FIXME
2001-11-24 23:16:29 +01:00
// useing ifs here as they are faster than function pointers allthough the
// difference wouldnt be messureable here but its much better because
// someone might exchange the cpu whithout restarting mplayer ;)
2001-11-27 02:12:30 +01:00
# ifdef RUNTIME_CPUDETECT
2002-10-28 20:31:04 +01:00
# ifdef ARCH_X86
2001-11-24 23:16:29 +01:00
// ordered per speed fasterst first
2002-10-30 22:02:21 +01:00
if ( c - > cpuCaps & PP_CPU_CAPS_MMX2 )
2002-10-28 20:31:04 +01:00
postProcess_MMX2 ( src , srcStride , dst , dstStride , width , height , QPs , QPStride , isColor , c ) ;
2002-10-30 22:02:21 +01:00
else if ( c - > cpuCaps & PP_CPU_CAPS_3DNOW )
2002-10-28 20:31:04 +01:00
postProcess_3DNow ( src , srcStride , dst , dstStride , width , height , QPs , QPStride , isColor , c ) ;
2002-10-30 22:02:21 +01:00
else if ( c - > cpuCaps & PP_CPU_CAPS_MMX )
2002-10-28 20:31:04 +01:00
postProcess_MMX ( src , srcStride , dst , dstStride , width , height , QPs , QPStride , isColor , c ) ;
2001-11-24 23:16:29 +01:00
else
2002-10-28 20:31:04 +01:00
postProcess_C ( src , srcStride , dst , dstStride , width , height , QPs , QPStride , isColor , c ) ;
2001-11-24 23:16:29 +01:00
# else
2004-05-26 22:15:15 +02:00
# ifdef ARCH_POWERPC
# ifdef HAVE_ALTIVEC
else if ( c - > cpuCaps & PP_CPU_CAPS_ALTIVEC )
postProcess_altivec ( src , srcStride , dst , dstStride , width , height , QPs , QPStride , isColor , c ) ;
else
# endif
# endif
2002-10-28 20:31:04 +01:00
postProcess_C ( src , srcStride , dst , dstStride , width , height , QPs , QPStride , isColor , c ) ;
2001-11-14 03:46:58 +01:00
# endif
2001-11-27 02:12:30 +01:00
# else //RUNTIME_CPUDETECT
# ifdef HAVE_MMX2
2002-10-28 20:31:04 +01:00
postProcess_MMX2 ( src , srcStride , dst , dstStride , width , height , QPs , QPStride , isColor , c ) ;
2001-11-27 02:12:30 +01:00
# elif defined (HAVE_3DNOW)
2002-10-28 20:31:04 +01:00
postProcess_3DNow ( src , srcStride , dst , dstStride , width , height , QPs , QPStride , isColor , c ) ;
2001-11-27 02:12:30 +01:00
# elif defined (HAVE_MMX)
2002-10-28 20:31:04 +01:00
postProcess_MMX ( src , srcStride , dst , dstStride , width , height , QPs , QPStride , isColor , c ) ;
2004-05-26 22:15:15 +02:00
# elif defined (HAVE_ALTIVEC)
postProcess_altivec ( src , srcStride , dst , dstStride , width , height , QPs , QPStride , isColor , c ) ;
2001-11-27 02:12:30 +01:00
# else
2002-10-28 20:31:04 +01:00
postProcess_C ( src , srcStride , dst , dstStride , width , height , QPs , QPStride , isColor , c ) ;
2001-11-27 02:12:30 +01:00
# endif
# endif //!RUNTIME_CPUDETECT
2001-11-13 03:40:56 +01:00
}
2001-11-24 23:16:29 +01:00
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
2001-10-11 00:21:19 +02:00
2001-10-23 01:36:35 +02:00
/* -pp Command line Help
*/
2002-10-30 21:50:33 +01:00
char * pp_help =
2002-11-03 11:05:19 +01:00
" <filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]... \n "
2002-01-04 14:39:06 +01:00
" long form example: \n "
2002-11-03 11:05:19 +01:00
" vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock \n "
2002-01-04 14:39:06 +01:00
" short form example: \n "
2002-11-03 11:05:19 +01:00
" vb:a/hb:a/lb de,-vb \n "
2002-01-04 14:39:06 +01:00
" more examples: \n "
2002-10-30 22:13:07 +01:00
" tn:64:128:256 \n "
2002-01-04 14:39:06 +01:00
" Filters Options \n "
" short long name short long option Description \n "
2003-09-14 18:52:34 +02:00
" * * a autoq CPU power dependent enabler \n "
" c chrom chrominance filtering enabled \n "
" y nochrom chrominance filtering disabled \n "
" hb hdeblock (2 threshold) horizontal deblocking filter \n "
2003-04-18 14:50:00 +02:00
" 1. difference factor: default=32, higher -> more deblocking \n "
" 2. flatness threshold: default=39, lower -> more deblocking \n "
2002-01-04 14:39:06 +01:00
" the h & v deblocking filters share these \n "
2003-09-14 18:52:34 +02:00
" so you can't set different thresholds for h / v \n "
" vb vdeblock (2 threshold) vertical deblocking filter \n "
" h1 x1hdeblock experimental h deblock filter 1 \n "
" v1 x1vdeblock experimental v deblock filter 1 \n "
" dr dering deringing filter \n "
2002-01-04 14:39:06 +01:00
" al autolevels automatic brightness / contrast \n "
" f fullyrange stretch luminance to (0..255) \n "
" lb linblenddeint linear blend deinterlacer \n "
" li linipoldeint linear interpolating deinterlace \n "
" ci cubicipoldeint cubic interpolating deinterlacer \n "
" md mediandeint median deinterlacer \n "
2002-10-28 20:31:04 +01:00
" fd ffmpegdeint ffmpeg deinterlacer \n "
2002-01-04 14:39:06 +01:00
" de default hb:a,vb:a,dr:a,al \n "
" fa fast h1:a,v1:a,dr:a,al \n "
2003-09-14 18:52:34 +02:00
" tn tmpnoise (3 threshold) temporal noise reducer \n "
2002-01-04 14:39:06 +01:00
" 1. <= 2. <= 3. larger -> stronger filtering \n "
2003-09-14 18:52:34 +02:00
" fq forceQuant <quantizer> force quantizer \n "
2002-01-04 14:39:06 +01:00
;
2001-10-23 01:36:35 +02:00
2002-11-02 14:58:14 +01:00
pp_mode_t * pp_get_mode_by_name_and_quality ( char * name , int quality )
2001-10-23 01:36:35 +02:00
{
char temp [ GET_MODE_BUFFER_SIZE ] ;
char * p = temp ;
2002-10-28 20:31:04 +01:00
char * filterDelimiters = " ,/ " ;
2001-10-23 01:36:35 +02:00
char * optionDelimiters = " : " ;
2002-11-02 14:58:14 +01:00
struct PPMode * ppMode ;
2001-10-23 01:36:35 +02:00
char * filterToken ;
2002-11-02 14:58:14 +01:00
ppMode = memalign ( 8 , sizeof ( PPMode ) ) ;
ppMode - > lumMode = 0 ;
ppMode - > chromMode = 0 ;
ppMode - > maxTmpNoise [ 0 ] = 700 ;
ppMode - > maxTmpNoise [ 1 ] = 1500 ;
ppMode - > maxTmpNoise [ 2 ] = 3000 ;
ppMode - > maxAllowedY = 234 ;
ppMode - > minAllowedY = 16 ;
2003-04-18 14:50:00 +02:00
ppMode - > baseDcDiff = 256 / 8 ;
ppMode - > flatnessThreshold = 56 - 16 - 1 ;
2002-11-02 14:58:14 +01:00
ppMode - > maxClippedThreshold = 0.01 ;
ppMode - > error = 0 ;
2002-10-29 21:12:24 +01:00
2001-10-23 01:36:35 +02:00
strncpy ( temp , name , GET_MODE_BUFFER_SIZE ) ;
2002-01-13 21:51:49 +01:00
if ( verbose > 1 ) printf ( " pp: %s \n " , name ) ;
2001-11-13 03:40:56 +01:00
2001-10-23 01:36:35 +02:00
for ( ; ; ) {
char * filterName ;
2002-11-02 15:20:05 +01:00
int q = 1000000 ; //PP_QUALITY_MAX;
2001-10-23 01:36:35 +02:00
int chrom = - 1 ;
char * option ;
char * options [ OPTIONS_ARRAY_SIZE ] ;
int i ;
int filterNameOk = 0 ;
int numOfUnknownOptions = 0 ;
int enable = 1 ; //does the user want us to enabled or disabled the filter
filterToken = strtok ( p , filterDelimiters ) ;
if ( filterToken = = NULL ) break ;
2001-11-13 03:40:56 +01:00
p + = strlen ( filterToken ) + 1 ; // p points to next filterToken
2001-10-23 01:36:35 +02:00
filterName = strtok ( filterToken , optionDelimiters ) ;
2002-01-13 21:51:49 +01:00
if ( verbose > 1 ) printf ( " pp: %s::%s \n " , filterToken , filterName ) ;
2001-10-23 01:36:35 +02:00
if ( * filterName = = ' - ' )
{
enable = 0 ;
filterName + + ;
}
2001-11-13 03:40:56 +01:00
2001-10-23 01:36:35 +02:00
for ( ; ; ) { //for all options
option = strtok ( NULL , optionDelimiters ) ;
if ( option = = NULL ) break ;
2002-01-13 21:51:49 +01:00
if ( verbose > 1 ) printf ( " pp: option: %s \n " , option ) ;
2001-10-23 01:36:35 +02:00
if ( ! strcmp ( " autoq " , option ) | | ! strcmp ( " a " , option ) ) q = quality ;
else if ( ! strcmp ( " nochrom " , option ) | | ! strcmp ( " y " , option ) ) chrom = 0 ;
else if ( ! strcmp ( " chrom " , option ) | | ! strcmp ( " c " , option ) ) chrom = 1 ;
else
{
options [ numOfUnknownOptions ] = option ;
numOfUnknownOptions + + ;
}
if ( numOfUnknownOptions > = OPTIONS_ARRAY_SIZE - 1 ) break ;
}
2001-11-13 03:40:56 +01:00
options [ numOfUnknownOptions ] = NULL ;
2001-10-23 01:36:35 +02:00
/* replace stuff from the replace Table */
for ( i = 0 ; replaceTable [ 2 * i ] ! = NULL ; i + + )
{
if ( ! strcmp ( replaceTable [ 2 * i ] , filterName ) )
{
int newlen = strlen ( replaceTable [ 2 * i + 1 ] ) ;
int plen ;
int spaceLeft ;
if ( p = = NULL ) p = temp , * p = 0 ; //last filter
else p - - , * p = ' , ' ; //not last filter
plen = strlen ( p ) ;
2002-05-23 17:40:08 +02:00
spaceLeft = p - temp + plen ;
2001-10-23 01:36:35 +02:00
if ( spaceLeft + newlen > = GET_MODE_BUFFER_SIZE )
{
2002-11-02 14:58:14 +01:00
ppMode - > error + + ;
2001-10-23 01:36:35 +02:00
break ;
}
memmove ( p + newlen , p , plen + 1 ) ;
memcpy ( p , replaceTable [ 2 * i + 1 ] , newlen ) ;
filterNameOk = 1 ;
}
}
for ( i = 0 ; filters [ i ] . shortName ! = NULL ; i + + )
{
2001-11-13 03:40:56 +01:00
// printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
2001-10-23 01:36:35 +02:00
if ( ! strcmp ( filters [ i ] . longName , filterName )
| | ! strcmp ( filters [ i ] . shortName , filterName ) )
{
2002-11-02 14:58:14 +01:00
ppMode - > lumMode & = ~ filters [ i ] . mask ;
ppMode - > chromMode & = ~ filters [ i ] . mask ;
2001-10-23 01:36:35 +02:00
filterNameOk = 1 ;
if ( ! enable ) break ; // user wants to disable it
if ( q > = filters [ i ] . minLumQuality )
2002-11-02 14:58:14 +01:00
ppMode - > lumMode | = filters [ i ] . mask ;
2001-10-23 01:36:35 +02:00
if ( chrom = = 1 | | ( chrom = = - 1 & & filters [ i ] . chromDefault ) )
if ( q > = filters [ i ] . minChromQuality )
2002-11-02 14:58:14 +01:00
ppMode - > chromMode | = filters [ i ] . mask ;
2001-10-23 01:36:35 +02:00
if ( filters [ i ] . mask = = LEVEL_FIX )
{
int o ;
2002-11-02 14:58:14 +01:00
ppMode - > minAllowedY = 16 ;
ppMode - > maxAllowedY = 234 ;
2001-10-23 01:36:35 +02:00
for ( o = 0 ; options [ o ] ! = NULL ; o + + )
2001-12-28 14:56:04 +01:00
{
2001-10-23 01:36:35 +02:00
if ( ! strcmp ( options [ o ] , " fullyrange " )
| | ! strcmp ( options [ o ] , " f " ) )
{
2002-11-02 14:58:14 +01:00
ppMode - > minAllowedY = 0 ;
ppMode - > maxAllowedY = 255 ;
2001-10-23 01:36:35 +02:00
numOfUnknownOptions - - ;
}
2001-12-28 14:56:04 +01:00
}
2001-10-23 01:36:35 +02:00
}
2001-11-13 03:40:56 +01:00
else if ( filters [ i ] . mask = = TEMP_NOISE_FILTER )
{
int o ;
int numOfNoises = 0 ;
for ( o = 0 ; options [ o ] ! = NULL ; o + + )
{
char * tail ;
2002-11-02 14:58:14 +01:00
ppMode - > maxTmpNoise [ numOfNoises ] =
2001-11-13 03:40:56 +01:00
strtol ( options [ o ] , & tail , 0 ) ;
if ( tail ! = options [ o ] )
{
numOfNoises + + ;
numOfUnknownOptions - - ;
if ( numOfNoises > = 3 ) break ;
}
}
}
2001-12-28 01:09:55 +01:00
else if ( filters [ i ] . mask = = V_DEBLOCK | | filters [ i ] . mask = = H_DEBLOCK )
{
int o ;
for ( o = 0 ; options [ o ] ! = NULL & & o < 2 ; o + + )
{
char * tail ;
int val = strtol ( options [ o ] , & tail , 0 ) ;
if ( tail = = options [ o ] ) break ;
numOfUnknownOptions - - ;
2002-11-02 14:58:14 +01:00
if ( o = = 0 ) ppMode - > baseDcDiff = val ;
else ppMode - > flatnessThreshold = val ;
2001-12-28 01:09:55 +01:00
}
}
2002-01-03 15:16:13 +01:00
else if ( filters [ i ] . mask = = FORCE_QUANT )
{
int o ;
2002-11-02 14:58:14 +01:00
ppMode - > forcedQuant = 15 ;
2002-01-03 15:16:13 +01:00
for ( o = 0 ; options [ o ] ! = NULL & & o < 1 ; o + + )
{
char * tail ;
int val = strtol ( options [ o ] , & tail , 0 ) ;
if ( tail = = options [ o ] ) break ;
numOfUnknownOptions - - ;
2002-11-02 14:58:14 +01:00
ppMode - > forcedQuant = val ;
2002-01-03 15:16:13 +01:00
}
}
2001-10-23 01:36:35 +02:00
}
}
2002-11-02 14:58:14 +01:00
if ( ! filterNameOk ) ppMode - > error + + ;
ppMode - > error + = numOfUnknownOptions ;
2001-10-23 01:36:35 +02:00
}
2002-11-02 14:58:14 +01:00
if ( verbose > 1 ) printf ( " pp: lumMode=%X, chromMode=%X \n " , ppMode - > lumMode , ppMode - > chromMode ) ;
if ( ppMode - > error )
{
fprintf ( stderr , " %d errors in postprocess string \" %s \" \n " , ppMode - > error , name ) ;
free ( ppMode ) ;
return NULL ;
}
2001-10-23 01:36:35 +02:00
return ppMode ;
}
2002-11-02 14:58:14 +01:00
void pp_free_mode ( pp_mode_t * mode ) {
if ( mode ) free ( mode ) ;
}
2002-12-29 01:57:23 +01:00
static void reallocAlign ( void * * p , int alignment , int size ) {
if ( * p ) free ( * p ) ;
* p = memalign ( alignment , size ) ;
memset ( * p , 0 , size ) ;
}
2003-04-18 14:45:34 +02:00
static void reallocBuffers ( PPContext * c , int width , int height , int stride , int qpStride ) {
2002-10-29 19:35:15 +01:00
int mbWidth = ( width + 15 ) > > 4 ;
int mbHeight = ( height + 15 ) > > 4 ;
2002-12-29 01:57:23 +01:00
int i ;
c - > stride = stride ;
2003-04-18 14:45:34 +02:00
c - > qpStride = qpStride ;
2002-10-28 20:31:04 +01:00
2002-12-29 01:57:23 +01:00
reallocAlign ( ( void * * ) & c - > tempDst , 8 , stride * 24 ) ;
reallocAlign ( ( void * * ) & c - > tempSrc , 8 , stride * 24 ) ;
reallocAlign ( ( void * * ) & c - > tempBlocks , 8 , 2 * 16 * 8 ) ;
reallocAlign ( ( void * * ) & c - > yHistogram , 8 , 256 * sizeof ( uint64_t ) ) ;
2002-10-28 20:31:04 +01:00
for ( i = 0 ; i < 256 ; i + + )
c - > yHistogram [ i ] = width * height / 64 * 15 / 256 ;
for ( i = 0 ; i < 3 ; i + + )
2002-01-04 22:56:11 +01:00
{
2002-10-28 20:31:04 +01:00
//Note:the +17*1024 is just there so i dont have to worry about r/w over te end
2002-12-29 01:57:23 +01:00
reallocAlign ( ( void * * ) & c - > tempBlured [ i ] , 8 , stride * mbHeight * 16 + 17 * 1024 ) ;
reallocAlign ( ( void * * ) & c - > tempBluredPast [ i ] , 8 , 256 * ( ( height + 7 ) & ( ~ 7 ) ) / 2 + 17 * 1024 ) ; //FIXME size
2002-01-04 22:56:11 +01:00
}
2001-12-26 20:45:49 +01:00
2003-03-29 14:31:12 +01:00
reallocAlign ( ( void * * ) & c - > deintTemp , 8 , 2 * width + 32 ) ;
2003-04-18 14:45:34 +02:00
reallocAlign ( ( void * * ) & c - > nonBQPTable , 8 , qpStride * mbHeight * sizeof ( QP_STORE_T ) ) ;
reallocAlign ( ( void * * ) & c - > stdQPTable , 8 , qpStride * mbHeight * sizeof ( QP_STORE_T ) ) ;
2002-12-29 01:57:23 +01:00
reallocAlign ( ( void * * ) & c - > forcedQPTable , 8 , mbWidth * sizeof ( QP_STORE_T ) ) ;
}
2003-05-24 20:48:30 +02:00
static void global_init ( void ) {
2003-03-29 14:31:12 +01:00
int i ;
memset ( clip_table , 0 , 256 ) ;
for ( i = 256 ; i < 512 ; i + + )
clip_table [ i ] = i ;
memset ( clip_table + 512 , 0 , 256 ) ;
}
2002-12-29 01:57:23 +01:00
pp_context_t * pp_get_context ( int width , int height , int cpuCaps ) {
PPContext * c = memalign ( 32 , sizeof ( PPContext ) ) ;
int stride = ( width + 15 ) & ( ~ 15 ) ; //assumed / will realloc if needed
2003-04-18 14:45:34 +02:00
int qpStride = ( width + 15 ) / 16 + 2 ; //assumed / will realloc if needed
2002-12-29 01:57:23 +01:00
2003-03-29 14:31:12 +01:00
global_init ( ) ;
2002-12-29 01:57:23 +01:00
memset ( c , 0 , sizeof ( PPContext ) ) ;
c - > cpuCaps = cpuCaps ;
2003-01-05 20:10:42 +01:00
if ( cpuCaps & PP_FORMAT ) {
c - > hChromaSubSample = cpuCaps & 0x3 ;
c - > vChromaSubSample = ( cpuCaps > > 4 ) & 0x3 ;
} else {
c - > hChromaSubSample = 1 ;
c - > vChromaSubSample = 1 ;
}
2002-12-29 01:57:23 +01:00
2003-04-18 14:45:34 +02:00
reallocBuffers ( c , width , height , stride , qpStride ) ;
2002-12-29 01:57:23 +01:00
2002-10-28 20:31:04 +01:00
c - > frameNum = - 1 ;
2001-12-26 20:45:49 +01:00
2002-10-28 20:31:04 +01:00
return c ;
2001-12-26 20:45:49 +01:00
}
2002-10-29 19:51:52 +01:00
void pp_free_context ( void * vc ) {
2002-10-28 20:31:04 +01:00
PPContext * c = ( PPContext * ) vc ;
int i ;
for ( i = 0 ; i < 3 ; i + + ) free ( c - > tempBlured [ i ] ) ;
for ( i = 0 ; i < 3 ; i + + ) free ( c - > tempBluredPast [ i ] ) ;
free ( c - > tempBlocks ) ;
free ( c - > yHistogram ) ;
free ( c - > tempDst ) ;
free ( c - > tempSrc ) ;
free ( c - > deintTemp ) ;
2003-04-18 14:45:34 +02:00
free ( c - > stdQPTable ) ;
2002-10-29 19:35:15 +01:00
free ( c - > nonBQPTable ) ;
2002-12-29 01:57:23 +01:00
free ( c - > forcedQPTable ) ;
memset ( c , 0 , sizeof ( PPContext ) ) ;
2002-10-28 20:31:04 +01:00
free ( c ) ;
}
2002-10-29 19:51:52 +01:00
void pp_postprocess ( uint8_t * src [ 3 ] , int srcStride [ 3 ] ,
2002-10-28 20:31:04 +01:00
uint8_t * dst [ 3 ] , int dstStride [ 3 ] ,
2002-10-29 19:35:15 +01:00
int width , int height ,
2002-10-28 20:31:04 +01:00
QP_STORE_T * QP_store , int QPStride ,
2002-11-02 14:58:14 +01:00
pp_mode_t * vm , void * vc , int pict_type )
2001-10-23 01:36:35 +02:00
{
2002-10-29 19:35:15 +01:00
int mbWidth = ( width + 15 ) > > 4 ;
int mbHeight = ( height + 15 ) > > 4 ;
2002-11-02 14:58:14 +01:00
PPMode * mode = ( PPMode * ) vm ;
2002-10-29 19:35:15 +01:00
PPContext * c = ( PPContext * ) vc ;
2002-12-29 01:57:23 +01:00
int minStride = MAX ( srcStride [ 0 ] , dstStride [ 0 ] ) ;
2003-04-18 14:45:34 +02:00
if ( c - > stride < minStride | | c - > qpStride < QPStride )
reallocBuffers ( c , width , height ,
MAX ( minStride , c - > stride ) ,
MAX ( c - > qpStride , QPStride ) ) ;
2002-10-28 20:31:04 +01:00
2002-01-03 15:16:13 +01:00
if ( QP_store = = NULL | | ( mode - > lumMode & FORCE_QUANT ) )
2001-11-11 02:18:40 +01:00
{
2002-01-03 15:16:13 +01:00
int i ;
2002-12-29 01:57:23 +01:00
QP_store = c - > forcedQPTable ;
2002-10-28 20:31:04 +01:00
QPStride = 0 ;
2002-01-03 15:16:13 +01:00
if ( mode - > lumMode & FORCE_QUANT )
2002-12-29 01:57:23 +01:00
for ( i = 0 ; i < mbWidth ; i + + ) QP_store [ i ] = mode - > forcedQuant ;
2002-01-03 15:16:13 +01:00
else
2002-12-29 01:57:23 +01:00
for ( i = 0 ; i < mbWidth ; i + + ) QP_store [ i ] = 1 ;
2001-11-11 02:18:40 +01:00
}
2003-04-18 14:45:34 +02:00
//printf("pict_type:%d\n", pict_type);
if ( pict_type & PP_PICT_TYPE_QP2 ) {
int i ;
const int count = mbHeight * QPStride ;
for ( i = 0 ; i < ( count > > 2 ) ; i + + ) {
( ( uint32_t * ) c - > stdQPTable ) [ i ] = ( ( ( uint32_t * ) QP_store ) [ i ] > > 1 ) & 0x7F7F7F7F ;
}
for ( i < < = 2 ; i < count ; i + + ) {
c - > stdQPTable [ i ] = QP_store [ i ] > > 1 ;
}
QP_store = c - > stdQPTable ;
}
2002-10-29 19:35:15 +01:00
if ( 0 ) {
int x , y ;
for ( y = 0 ; y < mbHeight ; y + + ) {
for ( x = 0 ; x < mbWidth ; x + + ) {
printf ( " %2d " , QP_store [ x + y * QPStride ] ) ;
}
printf ( " \n " ) ;
}
printf ( " \n " ) ;
}
2002-10-30 00:36:37 +01:00
2003-04-18 14:45:34 +02:00
if ( ( pict_type & 7 ) ! = 3 )
2002-10-29 19:35:15 +01:00
{
2003-04-18 14:45:34 +02:00
int i ;
const int count = mbHeight * QPStride ;
for ( i = 0 ; i < ( count > > 2 ) ; i + + ) {
2004-01-04 18:29:51 +01:00
( ( uint32_t * ) c - > nonBQPTable ) [ i ] = ( ( uint32_t * ) QP_store ) [ i ] & 0x3F3F3F3F ;
2003-04-18 14:45:34 +02:00
}
for ( i < < = 2 ; i < count ; i + + ) {
2004-01-04 18:29:51 +01:00
c - > nonBQPTable [ i ] = QP_store [ i ] & 0x3F ;
2002-10-29 19:35:15 +01:00
}
}
2001-11-11 02:18:40 +01:00
2002-10-29 21:12:24 +01:00
if ( verbose > 2 )
2002-01-13 21:51:49 +01:00
{
printf ( " using npp filters 0x%X/0x%X \n " , mode - > lumMode , mode - > chromMode ) ;
}
2002-10-28 20:31:04 +01:00
postProcess ( src [ 0 ] , srcStride [ 0 ] , dst [ 0 ] , dstStride [ 0 ] ,
2003-02-20 18:30:51 +01:00
width , height , QP_store , QPStride , 0 , mode , c ) ;
2001-10-23 01:36:35 +02:00
2003-01-05 20:10:42 +01:00
width = ( width ) > > c - > hChromaSubSample ;
height = ( height ) > > c - > vChromaSubSample ;
2001-10-23 01:36:35 +02:00
2001-11-24 03:05:06 +01:00
if ( mode - > chromMode )
{
2002-10-28 20:31:04 +01:00
postProcess ( src [ 1 ] , srcStride [ 1 ] , dst [ 1 ] , dstStride [ 1 ] ,
2003-02-20 18:30:51 +01:00
width , height , QP_store , QPStride , 1 , mode , c ) ;
2002-10-28 20:31:04 +01:00
postProcess ( src [ 2 ] , srcStride [ 2 ] , dst [ 2 ] , dstStride [ 2 ] ,
2003-02-20 18:30:51 +01:00
width , height , QP_store , QPStride , 2 , mode , c ) ;
2001-11-24 03:05:06 +01:00
}
2002-10-28 20:31:04 +01:00
else if ( srcStride [ 1 ] = = dstStride [ 1 ] & & srcStride [ 2 ] = = dstStride [ 2 ] )
2001-11-24 03:05:06 +01:00
{
2002-10-29 19:35:15 +01:00
memcpy ( dst [ 1 ] , src [ 1 ] , srcStride [ 1 ] * height ) ;
memcpy ( dst [ 2 ] , src [ 2 ] , srcStride [ 2 ] * height ) ;
2001-11-24 03:05:06 +01:00
}
else
{
int y ;
2002-10-29 19:35:15 +01:00
for ( y = 0 ; y < height ; y + + )
2001-11-24 03:05:06 +01:00
{
2002-10-29 19:35:15 +01:00
memcpy ( & ( dst [ 1 ] [ y * dstStride [ 1 ] ] ) , & ( src [ 1 ] [ y * srcStride [ 1 ] ] ) , width ) ;
memcpy ( & ( dst [ 2 ] [ y * dstStride [ 2 ] ] ) , & ( src [ 2 ] [ y * srcStride [ 2 ] ] ) , width ) ;
2001-11-24 03:05:06 +01:00
}
}
2001-10-23 01:36:35 +02:00
}