diff --git a/vp8/common/extend.c b/vp8/common/extend.c index 5b8b4e475..036bafc5d 100644 --- a/vp8/common/extend.c +++ b/vp8/common/extend.c @@ -38,7 +38,7 @@ static void copy_and_extend_plane dest_ptr1 = d - el; dest_ptr2 = d + w; - for (i = 0; i < h - 0 + 1; i++) + for (i = 0; i < h; i++) { vpx_memset(dest_ptr1, src_ptr1[0], el); vpx_memcpy(dest_ptr1 + el, src_ptr1, w); diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm index 7f6fd93e4..0ec18de76 100644 --- a/vp8/common/x86/subpixel_ssse3.asm +++ b/vp8/common/x86/subpixel_ssse3.asm @@ -194,10 +194,6 @@ sym(vp8_filter_block1d16_h6_ssse3): mov rdi, arg(2) ;output_ptr -;; -;; cmp esi, DWORD PTR [rax] -;; je vp8_filter_block1d16_h4_ssse3 - mov rsi, arg(0) ;src_ptr movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 @@ -271,61 +267,7 @@ filter_block1d16_h6_rowloop_ssse3: pop rdi pop rsi RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -vp8_filter_block1d16_h4_ssse3: - movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - movsxd rcx, dword ptr arg(4) ;output_height - movsxd rdx, dword ptr arg(3) ;output_pitch - -filter_block1d16_h4_rowloop_ssse3: - movdqu xmm1, XMMWORD PTR [rsi - 2] - - movdqa xmm2, xmm1 - pshufb xmm1, [GLOBAL(shuf2b)] - pshufb xmm2, [GLOBAL(shuf3b)] - pmaddubsw xmm1, xmm5 - - movdqu xmm3, XMMWORD PTR [rsi + 6] - - pmaddubsw xmm2, xmm6 - movdqa xmm0, xmm3 - pshufb xmm3, [GLOBAL(shuf3b)] - pshufb xmm0, [GLOBAL(shuf2b)] - - paddsw xmm1, [GLOBAL(rd)] - paddsw xmm1, xmm2 - - pmaddubsw xmm0, xmm5 - pmaddubsw xmm3, xmm6 - - psraw xmm1, 7 - packuswb xmm1, xmm1 - lea rsi, [rsi + rax] - paddsw xmm3, xmm0 - paddsw xmm3, [GLOBAL(rd)] - psraw xmm3, 7 - packuswb xmm3, xmm3 - - punpcklqdq xmm1, xmm3 - - movdqa XMMWORD Ptr [rdi], xmm1 - - add rdi, rdx - dec rcx - jnz filter_block1d16_h4_rowloop_ssse3 - - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c old mode 100755 new mode 100644 diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index f7e7c03fe..eb840d70b 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -194,13 +194,13 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) #define DIST(r,c) vfp->svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function. #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost -#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best +#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse;}}, v=INT_MAX;)// checks if (r,c) has better score than previous best #define MIN(x,y) (((x)<(y))?(x):(y)) #define MAX(x,y) (((x)>(y))?(x):(y)) //#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; } -int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]) +int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion) { unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col; unsigned char *z = (*(b->base_src) + b->src); @@ -214,6 +214,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, unsigned int whichdir; unsigned int halfiters = 4; unsigned int quarteriters = 4; + int thismse; int minc = MAX(x->mv_col_min << 2, (ref_mv->col >> 1) - ((1 << mvlong_width) - 1)); int maxc = MIN(x->mv_col_max << 2, (ref_mv->col >> 1) + ((1 << mvlong_width) - 1)); @@ -226,6 +227,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, // calculate central point error besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse); + *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) @@ -314,7 +316,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, #undef CHECK_BETTER #undef MIN #undef MAX -int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]) +int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion) { int bestmse = INT_MAX; MV startmv; @@ -325,6 +327,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, int left, right, up, down, diag; unsigned int sse; int whichdir ; + int thismse; // Trap uncodable vectors @@ -332,6 +335,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, { bestmv->row <<= 3; bestmv->col <<= 3; + *distortion = INT_MAX; return INT_MAX; } @@ -342,50 +346,55 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, // calculate central point error bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse); + *distortion = bestmse; bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); // go left then right and check error this_mv.row = startmv.row; this_mv.col = ((startmv.col - 8) | 4); - left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse); - left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + thismse = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse); + left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (left < bestmse) { *bestmv = this_mv; bestmse = left; + *distortion = thismse; } this_mv.col += 8; - right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse); - right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + thismse = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse); + right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (right < bestmse) { *bestmv = this_mv; bestmse = right; + *distortion = thismse; } // go up then down and check error this_mv.col = startmv.col; this_mv.row = ((startmv.row - 8) | 4); - up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + thismse = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); + up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (up < bestmse) { *bestmv = this_mv; bestmse = up; + *distortion = thismse; } this_mv.row += 8; - down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse); - down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + thismse = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse); + down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (down < bestmse) { *bestmv = this_mv; bestmse = down; + *distortion = thismse; } @@ -400,32 +409,33 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, case 0: this_mv.col = (this_mv.col - 8) | 4; this_mv.row = (this_mv.row - 8) | 4; - diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); break; case 1: this_mv.col += 4; this_mv.row = (this_mv.row - 8) | 4; - diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); break; case 2: this_mv.col = (this_mv.col - 8) | 4; this_mv.row += 4; - diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse); break; case 3: default: this_mv.col += 4; this_mv.row += 4; - diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse); + thismse = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse); break; } - diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) { *bestmv = this_mv; bestmse = diag; + *distortion = thismse; } // } @@ -448,30 +458,32 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.col & 7) { this_mv.col = startmv.col - 2; - left = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.col = (startmv.col - 8) | 6; - left = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse); + thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse); } - left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (left < bestmse) { *bestmv = this_mv; bestmse = left; + *distortion = thismse; } this_mv.col += 4; - right = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (right < bestmse) { *bestmv = this_mv; bestmse = right; + *distortion = thismse; } // go up then down and check error @@ -480,30 +492,32 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.row & 7) { this_mv.row = startmv.row - 2; - up = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.row = (startmv.row - 8) | 6; - up = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); + thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); } - up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (up < bestmse) { *bestmv = this_mv; bestmse = up; + *distortion = thismse; } this_mv.row += 4; - down = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (down < bestmse) { *bestmv = this_mv; bestmse = down; + *distortion = thismse; } @@ -525,12 +539,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.col & 7) { this_mv.col -= 2; - diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.col = (startmv.col - 8) | 6; - diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; + thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; } } else @@ -540,12 +554,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.col & 7) { this_mv.col -= 2; - diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); + thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); } else { this_mv.col = (startmv.col - 8) | 6; - diag = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse); + thismse = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse); } } @@ -556,12 +570,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.row & 7) { this_mv.row -= 2; - diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.row = (startmv.row - 8) | 6; - diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); + thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); } break; @@ -571,36 +585,35 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.col & 7) { this_mv.col -= 2; - diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.col = (startmv.col - 8) | 6; - diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; + thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; } break; case 3: this_mv.col += 2; this_mv.row += 2; - diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); break; } - diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) { *bestmv = this_mv; bestmse = diag; + *distortion = thismse; } -// } - return bestmse; } -int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]) +int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion) { int bestmse = INT_MAX; MV startmv; @@ -610,12 +623,14 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm unsigned char *z = (*(b->base_src) + b->src); int left, right, up, down, diag; unsigned int sse; + int thismse; // Trap uncodable vectors if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL)) { bestmv->row <<= 3; bestmv->col <<= 3; + *distortion = INT_MAX; return INT_MAX; } @@ -626,50 +641,55 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm // calculate central point error bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse); + *distortion = bestmse; bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); // go left then right and check error this_mv.row = startmv.row; this_mv.col = ((startmv.col - 8) | 4); - left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse); - left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + thismse = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse); + left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (left < bestmse) { *bestmv = this_mv; bestmse = left; + *distortion = thismse; } this_mv.col += 8; - right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse); - right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + thismse = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse); + right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (right < bestmse) { *bestmv = this_mv; bestmse = right; + *distortion = thismse; } // go up then down and check error this_mv.col = startmv.col; this_mv.row = ((startmv.row - 8) | 4); - up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + thismse = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); + up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (up < bestmse) { *bestmv = this_mv; bestmse = up; + *distortion = thismse; } this_mv.row += 8; - down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse); - down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + thismse = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse); + down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (down < bestmse) { *bestmv = this_mv; bestmse = down; + *distortion = thismse; } // somewhat strangely not doing all the diagonals for half pel is slower than doing them. @@ -713,44 +733,48 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm #else this_mv.col = (this_mv.col - 8) | 4; this_mv.row = (this_mv.row - 8) | 4; - diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + thismse = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) { *bestmv = this_mv; bestmse = diag; + *distortion = thismse; } this_mv.col += 8; - diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + thismse = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) { *bestmv = this_mv; bestmse = diag; + *distortion = thismse; } this_mv.col = (this_mv.col - 8) | 4; this_mv.row = startmv.row + 4; - diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse); - diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + thismse = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse); + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) { *bestmv = this_mv; bestmse = diag; + *distortion = thismse; } this_mv.col += 8; - diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse); - diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + thismse = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse); + diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) { *bestmv = this_mv; bestmse = diag; + *distortion = thismse; } #endif diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h index 5efcec296..72faf8ea6 100644 --- a/vp8/encoder/mcomp.h +++ b/vp8/encoder/mcomp.h @@ -49,7 +49,7 @@ extern int vp8_hex_search typedef int (fractional_mv_step_fp) (MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, - int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]); + int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion); extern fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively; extern fractional_mv_step_fp vp8_find_best_sub_pixel_step; extern fractional_mv_step_fp vp8_find_best_half_pixel_step; diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index 4b3c6c632..111cd74ba 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -50,7 +50,7 @@ extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]); extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv); -int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]) +int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion) { (void) b; (void) d; @@ -58,6 +58,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, (void) error_per_bit; (void) vfp; (void) mvcost; + (void) distortion; bestmv->row <<= 3; bestmv->col <<= 3; return 0; @@ -459,6 +460,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re int skip_mode[4] = {0, 0, 0, 0}; + int have_subp_search = cpi->sf.half_pixel_search; /* In real-time mode, when Speed >= 15, no sub-pixel search. */ + vpx_memset(mode_mv, 0, sizeof(mode_mv)); vpx_memset(nearest_mv, 0, sizeof(nearest_mv)); vpx_memset(near_mv, 0, sizeof(near_mv)); @@ -639,10 +642,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re switch (this_mode) { case B_PRED: - distortion2 = *returndistortion; // Best so far passed in as breakout value to vp8_pick_intra4x4mby_modes - vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate, &distortion2); - rate2 += rate; - distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff); + // Pass best so far to vp8_pick_intra4x4mby_modes to use as breakout + distortion2 = *returndistortion; + vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, + &rate, &distortion2); if (distortion2 == INT_MAX) { @@ -650,6 +653,11 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re } else { + rate2 += rate; + distortion2 = VARIANCE_INVOKE + (&cpi->rtcd.variance, get16x16prederror)( + x->src.y_buffer, x->src.y_stride, + x->e_mbd.predictor, 16, 0x7fffffff); this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); if (this_rd < best_intra_rd) @@ -788,7 +796,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re } if (bestsme < INT_MAX) - cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost); + cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost, &distortion2); mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; @@ -818,7 +826,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re x->e_mbd.block[0].bmi.mode = this_mode; x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mode_info_context->mbmi.mv.as_int; - distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], (unsigned int *)(&sse)); + if((this_mode != NEWMV) || !(have_subp_search)) + distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], (unsigned int *)(&sse)); this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 354933de2..f775fadd9 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -1270,12 +1270,14 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, if (bestsme < INT_MAX) { + int distortion; + if (!cpi->common.full_pixel) cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], - bsi->ref_mv, x->errorperbit / 2, v_fn_ptr, x->mvcost); + bsi->ref_mv, x->errorperbit / 2, v_fn_ptr, x->mvcost, &distortion); else vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], - bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost); + bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost, &distortion); } } /* NEW4X4 */ @@ -2253,8 +2255,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int x->mv_row_max = tmp_row_max; if (bestsme < INT_MAX) - // cpi->find_fractional_mv_step(x,b,d,&d->bmi.mv.as_mv,&best_ref_mv,x->errorperbit/2,cpi->fn_ptr.svf,cpi->fn_ptr.vf,x->mvcost); // normal mvc=11 - cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost); + { + int dis; /* TODO: use dis in distortion calculation later. */ + cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis); + } mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c index 8cd9f4711..5ede33f4a 100644 --- a/vp8/encoder/temporal_filter.c +++ b/vp8/encoder/temporal_filter.c @@ -208,10 +208,11 @@ static int vp8_temporal_filter_find_matching_mb_c // Try sub-pixel MC? //if (bestsme > error_thresh && bestsme < INT_MAX) { + int distortion; bestsme = cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], - mvcost); + mvcost, &distortion); } #endif diff --git a/vpxenc.c b/vpxenc.c old mode 100755 new mode 100644