Merge remote branch 'origin/master' into experimental
Change-Id: I920c3ed6af244ef9032b744675d9f664e5878d0e
This commit is contained in:
commit
a5d3febc13
@ -884,6 +884,8 @@ process_common_toolchain() {
|
||||
link_with_cc=gcc
|
||||
tune_cflags="-march="
|
||||
setup_gnu_toolchain
|
||||
#for 32 bit x86 builds, -O3 did not turn on this flag
|
||||
enabled optimizations && check_add_cflags -fomit-frame-pointer
|
||||
;;
|
||||
esac
|
||||
|
||||
|
@ -12,8 +12,6 @@
|
||||
#ifndef _PTHREAD_EMULATION
|
||||
#define _PTHREAD_EMULATION
|
||||
|
||||
#define VPXINFINITE 10000 /* 10second. */
|
||||
|
||||
#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD
|
||||
|
||||
/* Thread management macros */
|
||||
@ -28,7 +26,7 @@
|
||||
#define pthread_t HANDLE
|
||||
#define pthread_attr_t DWORD
|
||||
#define pthread_create(thhandle,attr,thfunc,tharg) (int)((*thhandle=(HANDLE)_beginthreadex(NULL,0,(unsigned int (__stdcall *)(void *))thfunc,tharg,0,NULL))==NULL)
|
||||
#define pthread_join(thread, result) ((WaitForSingleObject((thread),VPXINFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread))
|
||||
#define pthread_join(thread, result) ((WaitForSingleObject((thread),INFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread))
|
||||
#define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread)
|
||||
#define thread_sleep(nms) Sleep(nms)
|
||||
#define pthread_cancel(thread) terminate_thread(thread,0)
|
||||
@ -62,7 +60,7 @@
|
||||
#define sem_t HANDLE
|
||||
#define pause(voidpara) __asm PAUSE
|
||||
#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateEvent(NULL,FALSE,FALSE,NULL))==NULL)
|
||||
#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,VPXINFINITE))
|
||||
#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,INFINITE))
|
||||
#define sem_post(sem) SetEvent(*sem)
|
||||
#define sem_destroy(sem) if(*sem)((int)(CloseHandle(*sem))==TRUE)
|
||||
#define thread_sleep(nms) Sleep(nms)
|
||||
|
@ -76,7 +76,6 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
|
||||
pbi->common.current_video_frame = 0;
|
||||
pbi->ready_for_new_data = 1;
|
||||
|
||||
pbi->CPUFreq = 0; /*vp8_get_processor_freq();*/
|
||||
#if CONFIG_MULTITHREAD
|
||||
pbi->max_threads = oxcf->max_threads;
|
||||
vp8_decoder_create_threads(pbi);
|
||||
@ -252,7 +251,6 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
|
||||
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
|
||||
VP8_COMMON *cm = &pbi->common;
|
||||
int retcode = 0;
|
||||
struct vpx_usec_timer timer;
|
||||
|
||||
/*if(pbi->ready_for_new_data == 0)
|
||||
return -1;*/
|
||||
@ -317,8 +315,6 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
|
||||
|
||||
pbi->common.error.setjmp = 1;
|
||||
|
||||
vpx_usec_timer_start(&timer);
|
||||
|
||||
/*cm->current_video_frame++;*/
|
||||
pbi->Source = source;
|
||||
pbi->source_sz = size;
|
||||
@ -379,15 +375,9 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
|
||||
|
||||
if(pbi->common.filter_level)
|
||||
{
|
||||
struct vpx_usec_timer lpftimer;
|
||||
vpx_usec_timer_start(&lpftimer);
|
||||
/* Apply the loop filter if appropriate. */
|
||||
|
||||
vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
|
||||
|
||||
vpx_usec_timer_mark(&lpftimer);
|
||||
pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
|
||||
|
||||
cm->last_frame_type = cm->frame_type;
|
||||
cm->last_filter_type = cm->filter_type;
|
||||
cm->last_sharpness_level = cm->sharpness_level;
|
||||
@ -398,11 +388,6 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
|
||||
|
||||
vp8_clear_system_state();
|
||||
|
||||
vpx_usec_timer_mark(&timer);
|
||||
pbi->decode_microseconds = vpx_usec_timer_elapsed(&timer);
|
||||
|
||||
pbi->time_decoding += pbi->decode_microseconds;
|
||||
|
||||
/*vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);*/
|
||||
|
||||
if (cm->show_frame)
|
||||
|
@ -81,12 +81,6 @@ typedef struct VP8Decompressor
|
||||
const unsigned char *Source;
|
||||
unsigned int source_sz;
|
||||
|
||||
|
||||
unsigned int CPUFreq;
|
||||
unsigned int decode_microseconds;
|
||||
unsigned int time_decoding;
|
||||
unsigned int time_loop_filtering;
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
/* variable for threading */
|
||||
|
||||
|
@ -34,7 +34,7 @@ typedef struct
|
||||
// 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
|
||||
short *quant;
|
||||
short *quant_fast;
|
||||
short *quant_shift;
|
||||
unsigned char *quant_shift;
|
||||
short *zbin;
|
||||
short *zrun_zbin_boost;
|
||||
short *round;
|
||||
|
@ -147,7 +147,7 @@ static const int qzbin_factors_y2[129] =
|
||||
#define EXACT_QUANT
|
||||
#ifdef EXACT_QUANT
|
||||
static void vp8cx_invert_quant(int improved_quant, short *quant,
|
||||
short *shift, short d)
|
||||
unsigned char *shift, short d)
|
||||
{
|
||||
if(improved_quant)
|
||||
{
|
||||
|
@ -194,13 +194,13 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
|
||||
#define DIST(r,c) vfp->svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
|
||||
#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
|
||||
#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
|
||||
#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse;}}, v=INT_MAX;)// checks if (r,c) has better score than previous best
|
||||
#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
|
||||
#define MIN(x,y) (((x)<(y))?(x):(y))
|
||||
#define MAX(x,y) (((x)>(y))?(x):(y))
|
||||
|
||||
//#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }
|
||||
|
||||
int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion)
|
||||
int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1)
|
||||
{
|
||||
unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
|
||||
unsigned char *z = (*(b->base_src) + b->src);
|
||||
@ -226,7 +226,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
bestmv->col <<= 3;
|
||||
|
||||
// calculate central point error
|
||||
besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
|
||||
besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1);
|
||||
*distortion = besterr;
|
||||
besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
@ -316,7 +316,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
#undef CHECK_BETTER
|
||||
#undef MIN
|
||||
#undef MAX
|
||||
int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion)
|
||||
int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1)
|
||||
{
|
||||
int bestmse = INT_MAX;
|
||||
MV startmv;
|
||||
@ -345,7 +345,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
startmv = *bestmv;
|
||||
|
||||
// calculate central point error
|
||||
bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
|
||||
bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1);
|
||||
*distortion = bestmse;
|
||||
bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
@ -360,6 +360,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
*bestmv = this_mv;
|
||||
bestmse = left;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
this_mv.col += 8;
|
||||
@ -371,6 +372,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
*bestmv = this_mv;
|
||||
bestmse = right;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
// go up then down and check error
|
||||
@ -384,6 +386,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
*bestmv = this_mv;
|
||||
bestmse = up;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
this_mv.row += 8;
|
||||
@ -395,6 +398,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
*bestmv = this_mv;
|
||||
bestmse = down;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
|
||||
@ -436,6 +440,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
*bestmv = this_mv;
|
||||
bestmse = diag;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
// }
|
||||
@ -473,6 +478,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
*bestmv = this_mv;
|
||||
bestmse = left;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
this_mv.col += 4;
|
||||
@ -484,6 +490,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
*bestmv = this_mv;
|
||||
bestmse = right;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
// go up then down and check error
|
||||
@ -507,6 +514,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
*bestmv = this_mv;
|
||||
bestmse = up;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
this_mv.row += 4;
|
||||
@ -518,6 +526,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
*bestmv = this_mv;
|
||||
bestmse = down;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
|
||||
@ -608,12 +617,13 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
*bestmv = this_mv;
|
||||
bestmse = diag;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
return bestmse;
|
||||
}
|
||||
|
||||
int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion)
|
||||
int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1)
|
||||
{
|
||||
int bestmse = INT_MAX;
|
||||
MV startmv;
|
||||
@ -640,7 +650,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
startmv = *bestmv;
|
||||
|
||||
// calculate central point error
|
||||
bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
|
||||
bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1);
|
||||
*distortion = bestmse;
|
||||
bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
@ -655,6 +665,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
*bestmv = this_mv;
|
||||
bestmse = left;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
this_mv.col += 8;
|
||||
@ -666,6 +677,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
*bestmv = this_mv;
|
||||
bestmse = right;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
// go up then down and check error
|
||||
@ -679,6 +691,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
*bestmv = this_mv;
|
||||
bestmse = up;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
this_mv.row += 8;
|
||||
@ -690,6 +703,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
*bestmv = this_mv;
|
||||
bestmse = down;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
// somewhat strangely not doing all the diagonals for half pel is slower than doing them.
|
||||
@ -741,6 +755,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
*bestmv = this_mv;
|
||||
bestmse = diag;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
this_mv.col += 8;
|
||||
@ -752,6 +767,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
*bestmv = this_mv;
|
||||
bestmse = diag;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
this_mv.col = (this_mv.col - 8) | 4;
|
||||
@ -764,6 +780,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
*bestmv = this_mv;
|
||||
bestmse = diag;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
this_mv.col += 8;
|
||||
@ -775,6 +792,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
*bestmv = this_mv;
|
||||
bestmse = diag;
|
||||
*distortion = thismse;
|
||||
*sse1 = sse;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -49,7 +49,7 @@ extern int vp8_hex_search
|
||||
|
||||
typedef int (fractional_mv_step_fp)
|
||||
(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv,
|
||||
int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion);
|
||||
int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse);
|
||||
extern fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively;
|
||||
extern fractional_mv_step_fp vp8_find_best_sub_pixel_step;
|
||||
extern fractional_mv_step_fp vp8_find_best_half_pixel_step;
|
||||
|
@ -244,17 +244,17 @@ typedef struct
|
||||
{
|
||||
|
||||
DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, unsigned char, Y1quant_shift[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][16]);
|
||||
|
||||
DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, unsigned char, Y2quant_shift[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][16]);
|
||||
|
||||
DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, unsigned char, UVquant_shift[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][16]);
|
||||
|
||||
|
@ -50,7 +50,7 @@ extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
|
||||
extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv);
|
||||
|
||||
|
||||
int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion)
|
||||
int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse)
|
||||
{
|
||||
(void) b;
|
||||
(void) d;
|
||||
@ -59,6 +59,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
(void) vfp;
|
||||
(void) mvcost;
|
||||
(void) distortion;
|
||||
(void) sse;
|
||||
bestmv->row <<= 3;
|
||||
bestmv->col <<= 3;
|
||||
return 0;
|
||||
@ -443,7 +444,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re
|
||||
int bestsme;
|
||||
//int all_rds[MAX_MODES]; // Experimental debug code.
|
||||
int best_mode_index = 0;
|
||||
int sse = INT_MAX;
|
||||
unsigned int sse = INT_MAX;
|
||||
|
||||
MV mvp;
|
||||
int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
|
||||
@ -796,7 +797,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re
|
||||
}
|
||||
|
||||
if (bestsme < INT_MAX)
|
||||
cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost, &distortion2);
|
||||
cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost, &distortion2, &sse);
|
||||
|
||||
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
|
||||
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
|
||||
@ -827,7 +828,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re
|
||||
x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mode_info_context->mbmi.mv.as_int;
|
||||
|
||||
if((this_mode != NEWMV) || !(have_subp_search))
|
||||
distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], (unsigned int *)(&sse));
|
||||
distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &sse);
|
||||
|
||||
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
|
||||
|
||||
|
@ -27,7 +27,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
short *zbin_ptr = b->zbin;
|
||||
short *round_ptr = b->round;
|
||||
short *quant_ptr = b->quant_fast;
|
||||
short *quant_shift_ptr = b->quant_shift;
|
||||
unsigned char *quant_shift_ptr = b->quant_shift;
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
short *dqcoeff_ptr = d->dqcoeff;
|
||||
short *dequant_ptr = d->dequant;
|
||||
@ -112,7 +112,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
|
||||
short *zbin_ptr = b->zbin;
|
||||
short *round_ptr = b->round;
|
||||
short *quant_ptr = b->quant;
|
||||
short *quant_shift_ptr = b->quant_shift;
|
||||
unsigned char *quant_shift_ptr = b->quant_shift;
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
short *dqcoeff_ptr = d->dqcoeff;
|
||||
short *dequant_ptr = d->dequant;
|
||||
@ -166,7 +166,7 @@ void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
|
||||
int sz;
|
||||
short *coeff_ptr;
|
||||
short *quant_ptr;
|
||||
short *quant_shift_ptr;
|
||||
unsigned char *quant_shift_ptr;
|
||||
short *qcoeff_ptr;
|
||||
short *dqcoeff_ptr;
|
||||
short *dequant_ptr;
|
||||
|
@ -1271,13 +1271,14 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
if (bestsme < INT_MAX)
|
||||
{
|
||||
int distortion;
|
||||
unsigned int sse;
|
||||
|
||||
if (!cpi->common.full_pixel)
|
||||
cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
|
||||
bsi->ref_mv, x->errorperbit / 2, v_fn_ptr, x->mvcost, &distortion);
|
||||
bsi->ref_mv, x->errorperbit / 2, v_fn_ptr, x->mvcost, &distortion, &sse);
|
||||
else
|
||||
vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
|
||||
bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost, &distortion);
|
||||
bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost, &distortion, &sse);
|
||||
}
|
||||
} /* NEW4X4 */
|
||||
|
||||
@ -2255,9 +2256,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
x->mv_row_max = tmp_row_max;
|
||||
|
||||
if (bestsme < INT_MAX)
|
||||
{
|
||||
int dis; /* TODO: use dis in distortion calculation later. */
|
||||
cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis);
|
||||
{
|
||||
int dis; /* TODO: use dis in distortion calculation later. */
|
||||
unsigned int sse;
|
||||
cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse);
|
||||
}
|
||||
|
||||
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
|
||||
@ -2304,7 +2306,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
}
|
||||
else if (x->encode_breakout)
|
||||
{
|
||||
int sum, sse;
|
||||
int sum;
|
||||
unsigned int sse;
|
||||
int threshold = (xd->block[0].dequant[1]
|
||||
* xd->block[0].dequant[1] >>4);
|
||||
|
||||
@ -2313,7 +2316,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
|
||||
VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)
|
||||
(x->src.y_buffer, x->src.y_stride,
|
||||
x->e_mbd.predictor, 16, (unsigned int *)(&sse), &sum);
|
||||
x->e_mbd.predictor, 16, &sse, &sum);
|
||||
|
||||
if (sse < threshold)
|
||||
{
|
||||
@ -2337,8 +2340,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
distortion_uv = sse2;
|
||||
|
||||
disable_skip = 1;
|
||||
this_rd = RDCOST(x->rdmult, x->rddiv, rate2,
|
||||
distortion2);
|
||||
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -290,8 +290,8 @@ void ssim_parms_8x8_c
|
||||
}
|
||||
}
|
||||
|
||||
const static long long c1 = 426148; // (256^2*(.01*255)^2
|
||||
const static long long c2 = 3835331; //(256^2*(.03*255)^2
|
||||
const static long long cc1 = 26634; // (64^2*(.01*255)^2
|
||||
const static long long cc2 = 239708; // (64^2*(.03*255)^2
|
||||
|
||||
static double similarity
|
||||
(
|
||||
@ -303,10 +303,19 @@ static double similarity
|
||||
int count
|
||||
)
|
||||
{
|
||||
long long ssim_n = (2*sum_s*sum_r+ c1)*(2*count*sum_sxr-2*sum_s*sum_r+c2);
|
||||
long long ssim_n, ssim_d;
|
||||
long long c1, c2;
|
||||
|
||||
long long ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
|
||||
(count*sum_sq_s-sum_s*sum_s + count*sum_sq_r-sum_r*sum_r +c2) ;
|
||||
//scale the constants by number of pixels
|
||||
c1 = (cc1*count*count)>>12;
|
||||
c2 = (cc2*count*count)>>12;
|
||||
|
||||
ssim_n = (2*sum_s*sum_r+ c1)*((long long) 2*count*sum_sxr-
|
||||
(long long) 2*sum_s*sum_r+c2);
|
||||
|
||||
ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
|
||||
((long long)count*sum_sq_s-(long long)sum_s*sum_s +
|
||||
(long long)count*sum_sq_r-(long long) sum_r*sum_r +c2) ;
|
||||
|
||||
return ssim_n * 1.0 / ssim_d;
|
||||
}
|
||||
@ -332,18 +341,33 @@ long dssim(unsigned char *s,int sp, unsigned char *r,int rp,
|
||||
const vp8_variance_rtcd_vtable_t *rtcd)
|
||||
{
|
||||
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
|
||||
double ssim3;
|
||||
long long ssim_n;
|
||||
long long ssim_d;
|
||||
long long ssim3;
|
||||
long long ssim_n,ssim_n1,ssim_n2;
|
||||
long long ssim_d,ssim_d1,ssim_d2;
|
||||
long long ssim_t1,ssim_t2;
|
||||
long long c1, c2;
|
||||
|
||||
// normalize by 256/64
|
||||
c1 = cc1*16;
|
||||
c2 = cc2*16;
|
||||
|
||||
rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
|
||||
ssim_n = (2*sum_s*sum_r+ c1)*(2*256*sum_sxr-2*sum_s*sum_r+c2);
|
||||
ssim_n1 = (2*sum_s*sum_r+ c1);
|
||||
|
||||
ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
|
||||
(256*sum_sq_s-sum_s*sum_s + 256*sum_sq_r-sum_r*sum_r +c2) ;
|
||||
ssim_n2 =((long long) 2*256*sum_sxr-(long long) 2*sum_s*sum_r+c2);
|
||||
|
||||
ssim3 = 256 * (ssim_d-ssim_n) / ssim_d;
|
||||
return (long)( 256*ssim3 * ssim3 );
|
||||
ssim_d1 =((long long)sum_s*sum_s +(long long)sum_r*sum_r+c1);
|
||||
|
||||
ssim_d2 = (256 * (long long) sum_sq_s-(long long) sum_s*sum_s +
|
||||
(long long) 256*sum_sq_r-(long long) sum_r*sum_r +c2) ;
|
||||
|
||||
ssim_t1 = 256 - 256 * ssim_n1 / ssim_d1;
|
||||
ssim_t2 = 256 - 256 * ssim_n2 / ssim_d2;
|
||||
|
||||
ssim3 = 256 *ssim_t1 * ssim_t2;
|
||||
if(ssim3 <0 )
|
||||
ssim3=0;
|
||||
return (long)( ssim3 );
|
||||
}
|
||||
// TODO: (jbb) this 8x8 window might be too big + we may want to pick pixels
|
||||
// such that the window regions overlap block boundaries to penalize blocking
|
||||
@ -361,18 +385,20 @@ double vp8_ssim2
|
||||
)
|
||||
{
|
||||
int i,j;
|
||||
|
||||
int samples =0;
|
||||
double ssim_total=0;
|
||||
|
||||
// we can sample points as frequently as we like start with 1 per 8x8
|
||||
for(i=0; i < height; i+=8, img1 += stride_img1*8, img2 += stride_img2*8)
|
||||
// we can sample points as frequently as we like start with 1 per 4x4
|
||||
for(i=0; i < height-8; i+=4, img1 += stride_img1*4, img2 += stride_img2*4)
|
||||
{
|
||||
for(j=0; j < width; j+=8 )
|
||||
for(j=0; j < width-8; j+=4 )
|
||||
{
|
||||
ssim_total += ssim_8x8(img1, stride_img1, img2, stride_img2, rtcd);
|
||||
double v = ssim_8x8(img1+j, stride_img1, img2+j, stride_img2, rtcd);
|
||||
ssim_total += v;
|
||||
samples++;
|
||||
}
|
||||
}
|
||||
ssim_total /= (width/8 * height /8);
|
||||
ssim_total /= samples;
|
||||
return ssim_total;
|
||||
|
||||
}
|
||||
@ -405,4 +431,4 @@ double vp8_calc_ssim
|
||||
*weight = 1;
|
||||
|
||||
return ssimv;
|
||||
}
|
||||
}
|
@ -209,10 +209,11 @@ static int vp8_temporal_filter_find_matching_mb_c
|
||||
//if (bestsme > error_thresh && bestsme < INT_MAX)
|
||||
{
|
||||
int distortion;
|
||||
unsigned int sse;
|
||||
bestsme = cpi->find_fractional_mv_step(x, b, d,
|
||||
&d->bmi.mv.as_mv, &best_ref_mv1,
|
||||
x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
|
||||
mvcost, &distortion);
|
||||
mvcost, &distortion, &sse);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -142,7 +142,7 @@ sym(vp8_regular_quantize_b_sse2):
|
||||
movsx edi, WORD PTR[rsp + temp_qcoeff + %1 * 2]
|
||||
|
||||
; downshift by quant_shift[rc]
|
||||
movsx ecx, WORD PTR[rax + %1 * 2] ; quant_shift_ptr[rc]
|
||||
movsx cx, BYTE PTR[rax + %1] ; quant_shift_ptr[rc]
|
||||
sar edi, cl ; also sets Z bit
|
||||
je rq_zigzag_loop_%1 ; !y
|
||||
mov WORD PTR[rsp + qcoeff + %1 * 2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc]
|
||||
|
@ -16,12 +16,12 @@
|
||||
paddusw xmm14, xmm4 ; sum_r
|
||||
movdqa xmm1, xmm3
|
||||
pmaddwd xmm1, xmm1
|
||||
paddq xmm13, xmm1 ; sum_sq_s
|
||||
paddd xmm13, xmm1 ; sum_sq_s
|
||||
movdqa xmm2, xmm4
|
||||
pmaddwd xmm2, xmm2
|
||||
paddq xmm12, xmm2 ; sum_sq_r
|
||||
paddd xmm12, xmm2 ; sum_sq_r
|
||||
pmaddwd xmm3, xmm4
|
||||
paddq xmm11, xmm3 ; sum_sxr
|
||||
paddd xmm11, xmm3 ; sum_sxr
|
||||
%endmacro
|
||||
|
||||
; Sum across the register %1 starting with q words
|
||||
@ -66,6 +66,7 @@ sym(vp8_ssim_parms_16x16_sse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 9
|
||||
SAVE_XMM
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
@ -115,19 +116,20 @@ NextRow:
|
||||
SUM_ACROSS_Q xmm11
|
||||
|
||||
mov rdi,arg(4)
|
||||
movq [rdi], xmm15;
|
||||
movd [rdi], xmm15;
|
||||
mov rdi,arg(5)
|
||||
movq [rdi], xmm14;
|
||||
movd [rdi], xmm14;
|
||||
mov rdi,arg(6)
|
||||
movq [rdi], xmm13;
|
||||
movd [rdi], xmm13;
|
||||
mov rdi,arg(7)
|
||||
movq [rdi], xmm12;
|
||||
movd [rdi], xmm12;
|
||||
mov rdi,arg(8)
|
||||
movq [rdi], xmm11;
|
||||
movd [rdi], xmm11;
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
@ -154,6 +156,7 @@ sym(vp8_ssim_parms_8x8_sse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 9
|
||||
SAVE_XMM
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
@ -174,11 +177,8 @@ sym(vp8_ssim_parms_8x8_sse3):
|
||||
NextRow2:
|
||||
|
||||
;grab source and reference pixels
|
||||
movq xmm5, [rsi]
|
||||
movq xmm6, [rdi]
|
||||
|
||||
movdqa xmm3, xmm5
|
||||
movdqa xmm4, xmm6
|
||||
movq xmm3, [rsi]
|
||||
movq xmm4, [rdi]
|
||||
punpcklbw xmm3, xmm0 ; low_s
|
||||
punpcklbw xmm4, xmm0 ; low_r
|
||||
|
||||
@ -197,19 +197,20 @@ NextRow2:
|
||||
SUM_ACROSS_Q xmm11
|
||||
|
||||
mov rdi,arg(4)
|
||||
movq [rdi], xmm15;
|
||||
movd [rdi], xmm15;
|
||||
mov rdi,arg(5)
|
||||
movq [rdi], xmm14;
|
||||
movd [rdi], xmm14;
|
||||
mov rdi,arg(6)
|
||||
movq [rdi], xmm13;
|
||||
movd [rdi], xmm13;
|
||||
mov rdi,arg(7)
|
||||
movq [rdi], xmm12;
|
||||
movd [rdi], xmm12;
|
||||
mov rdi,arg(8)
|
||||
movq [rdi], xmm11;
|
||||
movd [rdi], xmm11;
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -151,8 +151,8 @@ x86_readtsc(void)
|
||||
__asm__ __volatile__ ("pause \n\t")
|
||||
#else
|
||||
#if ARCH_X86_64
|
||||
/* No pause intrinsic for windows x64 */
|
||||
#define x86_pause_hint()
|
||||
#define x86_pause_hint()\
|
||||
_mm_pause();
|
||||
#else
|
||||
#define x86_pause_hint()\
|
||||
__asm pause
|
||||
|
Loading…
x
Reference in New Issue
Block a user