motion estimation cleanup

replace ugly macros by always_inline functions, that way its much more readable and flexible as always_inline can simply be removed while the macros couldnt be
about 0.5 % speedup with default parameters

Originally committed as revision 3037 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Michael Niedermayer 2004-04-22 03:31:29 +00:00
parent c4a1714826
commit 2750b827b3
4 changed files with 480 additions and 673 deletions

View File

@ -1575,7 +1575,7 @@ typedef struct AVCodecContext {
* - encoding: set by user * - encoding: set by user
* - decoding: set by user * - decoding: set by user
*/ */
void *me_threshold; int me_threshold;
} AVCodecContext; } AVCodecContext;

File diff suppressed because it is too large Load Diff

View File

@ -22,58 +22,32 @@
* @file motion_est_template.c * @file motion_est_template.c
* Motion estimation template. * Motion estimation template.
*/ */
//FIXME ref2_y next_pic?
//lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...) //lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
//Note, the last line is there to kill these ugly unused var warnings
#define LOAD_COMMON\ #define LOAD_COMMON\
uint32_t * const score_map= s->me.score_map;\ uint32_t * const score_map= s->me.score_map;\
const int time_pp= s->pp_time;\
const int time_pb= s->pb_time;\
const int xmin= s->me.xmin;\ const int xmin= s->me.xmin;\
const int ymin= s->me.ymin;\ const int ymin= s->me.ymin;\
const int xmax= s->me.xmax;\ const int xmax= s->me.xmax;\
const int ymax= s->me.ymax;\ const int ymax= s->me.ymax;\
uint8_t * const src_y= src_data[0];\ uint8_t *mv_penalty= s->me.current_mv_penalty;\
uint8_t * const src_u= src_data[1];\ const int pred_x= s->me.pred_x;\
uint8_t * const src_v= src_data[2];\ const int pred_y= s->me.pred_y;\
uint8_t * const ref_y= ref_data[0];\
uint8_t * const ref_u= ref_data[1];\
uint8_t * const ref_v= ref_data[2];\
op_pixels_func (*hpel_put)[4];\
op_pixels_func (*hpel_avg)[4]= &s->dsp.avg_pixels_tab[size];\
op_pixels_func (*chroma_hpel_put)[4];\
qpel_mc_func (*qpel_put)[16];\
qpel_mc_func (*qpel_avg)[16]= &s->dsp.avg_qpel_pixels_tab[size];\
const __attribute__((unused)) int unu= time_pp + time_pb + (size_t)src_u + (size_t)src_v + (size_t)ref_u + (size_t)ref_v\
+ (size_t)hpel_avg + (size_t)qpel_avg + (size_t)score_map\
+ xmin + xmax + ymin + ymax;\
if(s->no_rounding /*FIXME b_type*/){\
hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];\
chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];\
qpel_put= &s->dsp.put_no_rnd_qpel_pixels_tab[size];\
}else{\
hpel_put=& s->dsp.put_pixels_tab[size];\
chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];\
qpel_put= &s->dsp.put_qpel_pixels_tab[size];\
}
#ifdef CMP_HPEL
#define CHECK_HALF_MV(dx, dy, x, y)\ #define CHECK_HALF_MV(dx, dy, x, y)\
{\ {\
const int hx= 2*(x)+(dx);\ const int hx= 2*(x)+(dx);\
const int hy= 2*(y)+(dy);\ const int hy= 2*(y)+(dy);\
CMP_HPEL(d, dx, dy, x, y, size);\ d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\ d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
COPY3_IF_LT(dmin, d, bx, hx, by, hy)\ COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
} }
#if 0 #if 0
static int RENAME(hpel_motion_search)(MpegEncContext * s, static int hpel_motion_search)(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin, int *mx_ptr, int *my_ptr, int dmin,
int pred_x, int pred_y, uint8_t *ref_data[3], uint8_t *ref_data[3],
int size, uint8_t * const mv_penalty) int size)
{ {
const int xx = 16 * s->mb_x + 8*(n&1); const int xx = 16 * s->mb_x + 8*(n&1);
const int yy = 16 * s->mb_y + 8*(n>>1); const int yy = 16 * s->mb_y + 8*(n>>1);
@ -94,8 +68,8 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s,
hpel_put=& s->dsp.put_pixels_tab[size]; hpel_put=& s->dsp.put_pixels_tab[size];
chroma_hpel_put= &s->dsp.put_pixels_tab[size+1]; chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
} }
cmp= s->dsp.me_cmp[size]; cmpf= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1]; chroma_cmpf= s->dsp.me_cmp[size+1];
cmp_sub= s->dsp.me_sub_cmp[size]; cmp_sub= s->dsp.me_sub_cmp[size];
chroma_cmp_sub= s->dsp.me_sub_cmp[size+1]; chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
@ -138,11 +112,10 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s,
} }
#else #else
static int RENAME(hpel_motion_search)(MpegEncContext * s, static int hpel_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin, int *mx_ptr, int *my_ptr, int dmin,
int pred_x, int pred_y, uint8_t *src_data[3], int src_index, int ref_index,
uint8_t *ref_data[3], int stride, int uvstride, int size, int h)
int size, int h, uint8_t * const mv_penalty)
{ {
const int mx = *mx_ptr; const int mx = *mx_ptr;
const int my = *my_ptr; const int my = *my_ptr;
@ -151,6 +124,7 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s,
int bx=2*mx, by=2*my; int bx=2*mx, by=2*my;
LOAD_COMMON LOAD_COMMON
int flags= s->me.sub_flags;
//FIXME factorize //FIXME factorize
@ -164,7 +138,7 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s,
} }
if(s->avctx->me_cmp != s->avctx->me_sub_cmp){ if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
CMP_HPEL(dmin, 0, 0, mx, my, size); dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
if(mx || my || size>0) if(mx || my || size>0)
dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor; dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
} }
@ -246,14 +220,16 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s,
} }
#endif #endif
static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3], static int inline get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
uint8_t *ref_data[3], int stride, int uvstride, int ref_index)
uint8_t * const mv_penalty)
{ {
// const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp; // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
const int size= 0; const int size= 0;
const int h= 16; const int h= 16;
const int penalty_factor= s->me.mb_penalty_factor; const int penalty_factor= s->me.mb_penalty_factor;
const int flags= s->me.mb_flags;
const int qpel= flags & FLAG_QPEL;
const int mask= 1+2*qpel;
me_cmp_func cmp_sub, chroma_cmp_sub; me_cmp_func cmp_sub, chroma_cmp_sub;
int d; int d;
@ -267,7 +243,7 @@ static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pre
assert(!s->me.skip); assert(!s->me.skip);
assert(s->avctx->me_sub_cmp != s->avctx->mb_cmp); assert(s->avctx->me_sub_cmp != s->avctx->mb_cmp);
CMP_HPEL(d, mx&1, my&1, mx>>1, my>>1, size); d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
//FIXME check cbp before adding penalty for (0,0) vector //FIXME check cbp before adding penalty for (0,0) vector
if(mx || my || size>0) if(mx || my || size>0)
d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor; d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
@ -275,26 +251,19 @@ static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pre
return d; return d;
} }
#endif /* CMP_HPEL */
#ifdef CMP_QPEL
#define CHECK_QUARTER_MV(dx, dy, x, y)\ #define CHECK_QUARTER_MV(dx, dy, x, y)\
{\ {\
const int hx= 4*(x)+(dx);\ const int hx= 4*(x)+(dx);\
const int hy= 4*(y)+(dy);\ const int hy= 4*(y)+(dy);\
CMP_QPEL(d, dx, dy, x, y, size);\ d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\ d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
COPY3_IF_LT(dmin, d, bx, hx, by, hy)\ COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
} }
static int RENAME(qpel_motion_search)(MpegEncContext * s, static int qpel_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin, int *mx_ptr, int *my_ptr, int dmin,
int pred_x, int pred_y, uint8_t *src_data[3], int src_index, int ref_index,
uint8_t *ref_data[3], int stride, int uvstride, int size, int h)
int size, int h, uint8_t * const mv_penalty)
{ {
const int mx = *mx_ptr; const int mx = *mx_ptr;
const int my = *my_ptr; const int my = *my_ptr;
@ -302,13 +271,14 @@ static int RENAME(qpel_motion_search)(MpegEncContext * s,
const int map_generation= s->me.map_generation; const int map_generation= s->me.map_generation;
const int subpel_quality= s->avctx->me_subpel_quality; const int subpel_quality= s->avctx->me_subpel_quality;
uint32_t *map= s->me.map; uint32_t *map= s->me.map;
me_cmp_func cmp, chroma_cmp; me_cmp_func cmpf, chroma_cmpf;
me_cmp_func cmp_sub, chroma_cmp_sub; me_cmp_func cmp_sub, chroma_cmp_sub;
LOAD_COMMON LOAD_COMMON
int flags= s->me.sub_flags;
cmp= s->dsp.me_cmp[size]; cmpf= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1]; //factorize FIXME chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
//FIXME factorize //FIXME factorize
cmp_sub= s->dsp.me_sub_cmp[size]; cmp_sub= s->dsp.me_sub_cmp[size];
@ -321,7 +291,7 @@ static int RENAME(qpel_motion_search)(MpegEncContext * s,
} }
if(s->avctx->me_cmp != s->avctx->me_sub_cmp){ if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
CMP_QPEL(dmin, 0, 0, mx, my, size); dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
if(mx || my || size>0) if(mx || my || size>0)
dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor; dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
} }
@ -386,7 +356,7 @@ static int RENAME(qpel_motion_search)(MpegEncContext * s,
if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
}else{ }else{
CMP(tl, mx-1, my-1, size); //FIXME wrong if chroma me is different tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
} }
cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c; cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
@ -509,36 +479,6 @@ static int RENAME(qpel_motion_search)(MpegEncContext * s,
return dmin; return dmin;
} }
static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3],
uint8_t *ref_data[3], int stride, int uvstride,
uint8_t * const mv_penalty)
{
const int size= 0;
const int h= 16;
const int penalty_factor= s->me.mb_penalty_factor;
me_cmp_func cmp_sub, chroma_cmp_sub;
int d;
LOAD_COMMON
//FIXME factorize
cmp_sub= s->dsp.mb_cmp[size];
chroma_cmp_sub= s->dsp.mb_cmp[size+1];
assert(!s->me.skip);
assert(s->avctx->me_sub_cmp != s->avctx->mb_cmp);
CMP_QPEL(d, mx&3, my&3, mx>>2, my>>2, size);
//FIXME check cbp before adding penalty for (0,0) vector
if(mx || my || size>0)
d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
return d;
}
#endif /* CMP_QPEL */
#define CHECK_MV(x,y)\ #define CHECK_MV(x,y)\
{\ {\
@ -546,7 +486,7 @@ static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pre
const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
/*printf("check_mv %d %d\n", x, y);*/\ /*printf("check_mv %d %d\n", x, y);*/\
if(map[index]!=key){\ if(map[index]!=key){\
CMP(d, x, y, size);\ d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
map[index]= key;\ map[index]= key;\
score_map[index]= d;\ score_map[index]= d;\
d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\ d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
@ -570,7 +510,7 @@ static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pre
const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
/*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\ /*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
if(map[index]!=key){\ if(map[index]!=key){\
CMP(d, x, y, size);\ d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
map[index]= key;\ map[index]= key;\
score_map[index]= d;\ score_map[index]= d;\
d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\ d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
@ -590,27 +530,29 @@ if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x,
if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\ if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\ if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
#define LOAD_COMMON2\
uint32_t *map= s->me.map;\
const int qpel= flags&FLAG_QPEL;\
const int shift= 1+qpel;\
static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, int dmin, static always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
uint8_t *src_data[3], int src_index, int ref_index, int const penalty_factor,
uint8_t *ref_data[3], int stride, int uvstride, int size, int h, int flags)
int const pred_x, int const pred_y, int const penalty_factor,
int const shift,
uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty
)
{ {
me_cmp_func cmp, chroma_cmp; me_cmp_func cmpf, chroma_cmpf;
int next_dir=-1; int next_dir=-1;
LOAD_COMMON LOAD_COMMON
LOAD_COMMON2
int map_generation= s->me.map_generation;
cmp= s->dsp.me_cmp[size]; cmpf= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1]; chroma_cmpf= s->dsp.me_cmp[size+1];
{ /* ensure that the best point is in the MAP as h/qpel refinement needs it */ { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation; const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1); const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
if(map[index]!=key){ //this will be executed only very rarey if(map[index]!=key){ //this will be executed only very rarey
CMP(score_map[index], best[0], best[1], size); score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
map[index]= key; map[index]= key;
} }
} }
@ -634,20 +576,18 @@ static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, in
} }
} }
static inline int RENAME(funny_diamond_search)(MpegEncContext * s, int *best, int dmin, static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
uint8_t *src_data[3], int src_index, int ref_index, int const penalty_factor,
uint8_t *ref_data[3], int stride, int uvstride, int size, int h, int flags)
int const pred_x, int const pred_y, int const penalty_factor,
int const shift,
uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty
)
{ {
me_cmp_func cmp, chroma_cmp; me_cmp_func cmpf, chroma_cmpf;
int dia_size; int dia_size;
LOAD_COMMON LOAD_COMMON
LOAD_COMMON2
int map_generation= s->me.map_generation;
cmp= s->dsp.me_cmp[size]; cmpf= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1]; chroma_cmpf= s->dsp.me_cmp[size+1];
for(dia_size=1; dia_size<=4; dia_size++){ for(dia_size=1; dia_size<=4; dia_size++){
int dir; int dir;
@ -702,7 +642,7 @@ if(256*256*256*64 % (stats[0]+1)==0){
const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\ const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
/*printf("sab check %d %d\n", ax, ay);*/\ /*printf("sab check %d %d\n", ax, ay);*/\
if(map[index]!=key){\ if(map[index]!=key){\
CMP(d, ax, ay, size);\ d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
map[index]= key;\ map[index]= key;\
score_map[index]= d;\ score_map[index]= d;\
d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\ d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
@ -726,22 +666,20 @@ if(256*256*256*64 % (stats[0]+1)==0){
} }
#define MAX_SAB_SIZE 16 #define MAX_SAB_SIZE 16
static inline int RENAME(sab_diamond_search)(MpegEncContext * s, int *best, int dmin, static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
uint8_t *src_data[3], int src_index, int ref_index, int const penalty_factor,
uint8_t *ref_data[3], int stride, int uvstride, int size, int h, int flags)
int const pred_x, int const pred_y, int const penalty_factor,
int const shift,
uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty
)
{ {
me_cmp_func cmp, chroma_cmp; me_cmp_func cmpf, chroma_cmpf;
Minima minima[MAX_SAB_SIZE]; Minima minima[MAX_SAB_SIZE];
const int minima_count= ABS(s->me.dia_size); const int minima_count= ABS(s->me.dia_size);
int i, j; int i, j;
LOAD_COMMON LOAD_COMMON
LOAD_COMMON2
int map_generation= s->me.map_generation;
cmp= s->dsp.me_cmp[size]; cmpf= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1]; chroma_cmpf= s->dsp.me_cmp[size+1];
for(j=i=0; i<ME_MAP_SIZE; i++){ for(j=i=0; i<ME_MAP_SIZE; i++){
uint32_t key= map[i]; uint32_t key= map[i];
@ -807,20 +745,18 @@ static inline int RENAME(sab_diamond_search)(MpegEncContext * s, int *best, int
return dmin; return dmin;
} }
static inline int RENAME(var_diamond_search)(MpegEncContext * s, int *best, int dmin, static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
uint8_t *src_data[3], int src_index, int ref_index, int const penalty_factor,
uint8_t *ref_data[3], int stride, int uvstride, int size, int h, int flags)
int const pred_x, int const pred_y, int const penalty_factor,
int const shift,
uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty
)
{ {
me_cmp_func cmp, chroma_cmp; me_cmp_func cmpf, chroma_cmpf;
int dia_size; int dia_size;
LOAD_COMMON LOAD_COMMON
LOAD_COMMON2
int map_generation= s->me.map_generation;
cmp= s->dsp.me_cmp[size]; cmpf= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1]; chroma_cmpf= s->dsp.me_cmp[size+1];
for(dia_size=1; dia_size<=s->me.dia_size; dia_size++){ for(dia_size=1; dia_size<=s->me.dia_size; dia_size++){
int dir, start, end; int dir, start, end;
@ -885,31 +821,42 @@ if(256*256*256*64 % (stats[0]+1)==0){
return dmin; return dmin;
} }
static int RENAME(epzs_motion_search)(MpegEncContext * s, static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
int *mx_ptr, int *my_ptr, int src_index, int ref_index, int const penalty_factor,
int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3], int size, int h, int flags){
uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2], if(s->me.dia_size==-1)
int ref_mv_scale, uint8_t * const mv_penalty) return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
else if(s->me.dia_size<-1)
return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
else if(s->me.dia_size<2)
return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
else
return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
}
static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
int ref_mv_scale, int flags)
{ {
int best[2]={0, 0}; int best[2]={0, 0};
int d, dmin; int d, dmin;
const int shift= 1+s->quarter_sample;
uint32_t *map= s->me.map;
int map_generation; int map_generation;
const int penalty_factor= s->me.penalty_factor; const int penalty_factor= s->me.penalty_factor;
const int size=0; const int size=0;
const int h=16; const int h=16;
const int ref_mv_stride= s->mb_stride; //pass as arg FIXME const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
me_cmp_func cmp, chroma_cmp; me_cmp_func cmpf, chroma_cmpf;
LOAD_COMMON
cmp= s->dsp.me_cmp[size]; LOAD_COMMON
chroma_cmp= s->dsp.me_cmp[size+1]; LOAD_COMMON2
cmpf= s->dsp.me_cmp[size];
chroma_cmpf= s->dsp.me_cmp[size+1];
map_generation= update_map_generation(s); map_generation= update_map_generation(s);
CMP(dmin, 0, 0, size); dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
map[0]= map_generation; map[0]= map_generation;
score_map[0]= dmin; score_map[0]= dmin;
@ -974,22 +921,7 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s,
} }
//check(best[0],best[1],0, b0) //check(best[0],best[1],0, b0)
if(s->me.dia_size==-1) dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
pred_x, pred_y, penalty_factor,
shift, map, map_generation, size, h, mv_penalty);
else if(s->me.dia_size<-1)
dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
pred_x, pred_y, penalty_factor,
shift, map, map_generation, size, h, mv_penalty);
else if(s->me.dia_size<2)
dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
pred_x, pred_y, penalty_factor,
shift, map, map_generation, size, h, mv_penalty);
else
dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
pred_x, pred_y, penalty_factor,
shift, map, map_generation, size, h, mv_penalty);
//check(best[0],best[1],0, b1) //check(best[0],best[1],0, b1)
*mx_ptr= best[0]; *mx_ptr= best[0];
@ -999,29 +931,42 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s,
return dmin; return dmin;
} }
#ifndef CMP_DIRECT /* no 4mv search needed in direct mode */ //this function is dedicated to the braindamaged gcc
static int RENAME(epzs_motion_search4)(MpegEncContext * s, static inline int epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
int *mx_ptr, int *my_ptr, int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
int P[10][2], int pred_x, int pred_y, int ref_mv_scale)
uint8_t *src_data[3], {
uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2], //FIXME convert other functions in the same way if faster
int ref_mv_scale, uint8_t * const mv_penalty) switch(s->me.flags){
case 0:
return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0);
// case FLAG_QPEL:
// return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
default:
return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, s->me.flags);
}
}
static int epzs_motion_search4(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int P[10][2],
int src_index, int ref_index, int16_t (*last_mv)[2],
int ref_mv_scale)
{ {
int best[2]={0, 0}; int best[2]={0, 0};
int d, dmin; int d, dmin;
const int shift= 1+s->quarter_sample;
uint32_t *map= s->me.map;
int map_generation; int map_generation;
const int penalty_factor= s->me.penalty_factor; const int penalty_factor= s->me.penalty_factor;
const int size=1; const int size=1;
const int h=8; const int h=8;
const int ref_mv_stride= s->mb_stride; const int ref_mv_stride= s->mb_stride;
const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride; const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
me_cmp_func cmp, chroma_cmp; me_cmp_func cmpf, chroma_cmpf;
LOAD_COMMON LOAD_COMMON
int flags= s->me.flags;
LOAD_COMMON2
cmp= s->dsp.me_cmp[size]; cmpf= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1]; chroma_cmpf= s->dsp.me_cmp[size+1];
map_generation= update_map_generation(s); map_generation= update_map_generation(s);
@ -1053,23 +998,7 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s,
(last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
} }
if(s->me.dia_size==-1) dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
pred_x, pred_y, penalty_factor,
shift, map, map_generation, size, h, mv_penalty);
else if(s->me.dia_size<-1)
dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
pred_x, pred_y, penalty_factor,
shift, map, map_generation, size, h, mv_penalty);
else if(s->me.dia_size<2)
dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
pred_x, pred_y, penalty_factor,
shift, map, map_generation, size, h, mv_penalty);
else
dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
pred_x, pred_y, penalty_factor,
shift, map, map_generation, size, h, mv_penalty);
*mx_ptr= best[0]; *mx_ptr= best[0];
*my_ptr= best[1]; *my_ptr= best[1];
@ -1079,28 +1008,26 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s,
} }
//try to merge with above FIXME (needs PSNR test) //try to merge with above FIXME (needs PSNR test)
static int RENAME(epzs_motion_search2)(MpegEncContext * s, static int epzs_motion_search2(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int *mx_ptr, int *my_ptr, int P[10][2],
int P[10][2], int pred_x, int pred_y, int src_index, int ref_index, int16_t (*last_mv)[2],
uint8_t *src_data[3], int ref_mv_scale)
uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2],
int ref_mv_scale, uint8_t * const mv_penalty)
{ {
int best[2]={0, 0}; int best[2]={0, 0};
int d, dmin; int d, dmin;
const int shift= 1+s->quarter_sample;
uint32_t *map= s->me.map;
int map_generation; int map_generation;
const int penalty_factor= s->me.penalty_factor; const int penalty_factor= s->me.penalty_factor;
const int size=0; //FIXME pass as arg const int size=0; //FIXME pass as arg
const int h=8; const int h=8;
const int ref_mv_stride= s->mb_stride; const int ref_mv_stride= s->mb_stride;
const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride; const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
me_cmp_func cmp, chroma_cmp; me_cmp_func cmpf, chroma_cmpf;
LOAD_COMMON LOAD_COMMON
int flags= s->me.flags;
LOAD_COMMON2
cmp= s->dsp.me_cmp[size]; cmpf= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1]; chroma_cmpf= s->dsp.me_cmp[size+1];
map_generation= update_map_generation(s); map_generation= update_map_generation(s);
@ -1132,23 +1059,7 @@ static int RENAME(epzs_motion_search2)(MpegEncContext * s,
(last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
} }
if(s->me.dia_size==-1) dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
pred_x, pred_y, penalty_factor,
shift, map, map_generation, size, h, mv_penalty);
else if(s->me.dia_size<-1)
dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
pred_x, pred_y, penalty_factor,
shift, map, map_generation, size, h, mv_penalty);
else if(s->me.dia_size<2)
dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
pred_x, pred_y, penalty_factor,
shift, map, map_generation, size, h, mv_penalty);
else
dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
pred_x, pred_y, penalty_factor,
shift, map, map_generation, size, h, mv_penalty);
*mx_ptr= best[0]; *mx_ptr= best[0];
*my_ptr= best[1]; *my_ptr= best[1];
@ -1156,4 +1067,3 @@ static int RENAME(epzs_motion_search2)(MpegEncContext * s,
// printf("%d %d %d \n", best[0], best[1], dmin); // printf("%d %d %d \n", best[0], best[1], dmin);
return dmin; return dmin;
} }
#endif /* !CMP_DIRECT */

View File

@ -200,6 +200,10 @@ typedef struct MotionEstContext{
int co_located_mv[4][2]; ///< mv from last p frame for direct mode ME int co_located_mv[4][2]; ///< mv from last p frame for direct mode ME
int direct_basis_mv[4][2]; int direct_basis_mv[4][2];
uint8_t *scratchpad; ///< data area for the me algo, so that the ME doesnt need to malloc/free uint8_t *scratchpad; ///< data area for the me algo, so that the ME doesnt need to malloc/free
uint8_t *best_mb;
uint8_t *temp_mb[2];
uint8_t *temp;
int best_bits;
uint32_t *map; ///< map to avoid duplicate evaluations uint32_t *map; ///< map to avoid duplicate evaluations
uint32_t *score_map; ///< map to store the scores uint32_t *score_map; ///< map to store the scores
int map_generation; int map_generation;
@ -207,31 +211,33 @@ typedef struct MotionEstContext{
int penalty_factor; int penalty_factor;
int sub_penalty_factor; int sub_penalty_factor;
int mb_penalty_factor; int mb_penalty_factor;
int flags;
int sub_flags;
int mb_flags;
int pre_pass; ///< = 1 for the pre pass int pre_pass; ///< = 1 for the pre pass
int dia_size; int dia_size;
int xmin; int xmin;
int xmax; int xmax;
int ymin; int ymin;
int ymax; int ymax;
int pred_x;
int pred_y;
uint8_t *src[4][4];
uint8_t *ref[4][4];
int stride;
int uvstride;
/* cmp, chroma_cmp;*/
op_pixels_func (*hpel_put)[4];
op_pixels_func (*hpel_avg)[4];
op_pixels_func (*chroma_hpel_put)[4];
qpel_mc_func (*qpel_put)[16];
qpel_mc_func (*qpel_avg)[16];
uint8_t (*mv_penalty)[MAX_MV*2+1]; ///< amount of bits needed to encode a MV uint8_t (*mv_penalty)[MAX_MV*2+1]; ///< amount of bits needed to encode a MV
uint8_t *current_mv_penalty;
int (*sub_motion_search)(struct MpegEncContext * s, int (*sub_motion_search)(struct MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin, int *mx_ptr, int *my_ptr, int dmin,
int pred_x, int pred_y, uint8_t *src_data[3], int src_index, int ref_index,
uint8_t *ref_data[6], int stride, int uvstride, int size, int h);
int size, int h, uint8_t * const mv_penalty);
int (*motion_search[7])(struct MpegEncContext * s,
int *mx_ptr, int *my_ptr,
int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3],
uint8_t *ref_data[6], int stride, int uvstride, int16_t (*last_mv)[2],
int ref_mv_scale, uint8_t * const mv_penalty);
int (*pre_motion_search)(struct MpegEncContext * s,
int *mx_ptr, int *my_ptr,
int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3],
uint8_t *ref_data[6], int stride, int uvstride, int16_t (*last_mv)[2],
int ref_mv_scale, uint8_t * const mv_penalty);
int (*get_mb_score)(struct MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3],
uint8_t *ref_data[6], int stride, int uvstride,
uint8_t * const mv_penalty);
}MotionEstContext; }MotionEstContext;
/** /**