Add joint motion search in comp_inter_inter mode(experiment)

In current code, motion vectors got from single prediction mode are used
in compound prediction mode directly. These motion vectors may not give
accurate prediction since they are searched independently. In this patch,
we took Pascal's suggestion, and did joint motion search in compound
prediction mode to find better motion vectors in this situation.
Test results:
Overall PSNR: 0.570%(derf), 0.918%(stdhd);
SSIM: 0.572%(derf), 1.009%(stdhd);

The encoder is a little slower. This can be improved since some c
code is used in motion search.

Change-Id: Ib30c9240f6c56c9b070867b4ca89412a76d9f3c6
This commit is contained in:
Yunqing Wang 2013-05-07 09:45:28 -07:00
parent 4305dd4778
commit 9f5811c2da
9 changed files with 837 additions and 33 deletions

1
configure vendored
View File

@ -247,6 +247,7 @@ EXPERIMENT_LIST="
multiple_arf
non420
ab4x4
comp_inter_joint_search
"
CONFIG_LIST="
external_build

View File

@ -36,6 +36,7 @@ typedef enum BLOCK_SIZE_TYPE {
BLOCK_SIZE_SB32X64,
BLOCK_SIZE_SB64X32,
BLOCK_SIZE_SB64X64,
BLOCK_SIZE_TYPES
} BLOCK_SIZE_TYPE;
typedef enum PARTITION_TYPE {

View File

@ -337,41 +337,74 @@ vp9_variance4x4_mmx=vp9_variance4x4_mmx
prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance64x64 sse2
prototype unsigned int vp9_sub_pixel_avg_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance64x64
prototype unsigned int vp9_sub_pixel_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance32x64
prototype unsigned int vp9_sub_pixel_avg_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance32x64
prototype unsigned int vp9_sub_pixel_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance64x32
prototype unsigned int vp9_sub_pixel_avg_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance64x32
prototype unsigned int vp9_sub_pixel_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance32x16
prototype unsigned int vp9_sub_pixel_avg_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance32x16
prototype unsigned int vp9_sub_pixel_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance16x32
prototype unsigned int vp9_sub_pixel_avg_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance16x32
prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance32x32 sse2
prototype unsigned int vp9_sub_pixel_avg_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance32x32
prototype unsigned int vp9_sub_pixel_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance16x16 sse2 mmx ssse3
prototype unsigned int vp9_sub_pixel_avg_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance16x16
prototype unsigned int vp9_sub_pixel_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance8x16 sse2 mmx
vp9_sub_pixel_variance8x16_sse2=vp9_sub_pixel_variance8x16_wmt
prototype unsigned int vp9_sub_pixel_avg_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance8x16
prototype unsigned int vp9_sub_pixel_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance16x8 sse2 mmx ssse3
vp9_sub_pixel_variance16x8_sse2=vp9_sub_pixel_variance16x8_ssse3;
vp9_sub_pixel_variance16x8_sse2=vp9_sub_pixel_variance16x8_wmt
prototype unsigned int vp9_sub_pixel_avg_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance16x8
prototype unsigned int vp9_sub_pixel_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance8x8 sse2 mmx
vp9_sub_pixel_variance8x8_sse2=vp9_sub_pixel_variance8x8_wmt
prototype unsigned int vp9_sub_pixel_avg_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance8x8
prototype unsigned int vp9_sub_pixel_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance4x4 sse2 mmx
vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
prototype unsigned int vp9_sub_pixel_avg_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance4x4
prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad64x64 sse2

View File

@ -413,6 +413,201 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x,
return besterr;
}
#if CONFIG_COMP_INTER_JOINT_SEARCH
#undef DIST
/* returns subpixel variance error function */
#define DIST(r, c) \
vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \
z, src_stride, &sse, second_pred)
int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
int_mv *bestmv, int_mv *ref_mv,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int *mvjcost, int *mvcost[2],
int *distortion,
unsigned int *sse1,
const uint8_t *second_pred, int w, int h) {
uint8_t *z = x->plane[0].src.buf;
int src_stride = x->plane[0].src.stride;
MACROBLOCKD *xd = &x->e_mbd;
int rr, rc, br, bc, hstep;
int tr, tc;
unsigned int besterr = INT_MAX;
unsigned int left, right, up, down, diag;
unsigned int sse;
unsigned int whichdir;
unsigned int halfiters = 4;
unsigned int quarteriters = 4;
unsigned int eighthiters = 4;
int thismse;
int maxc, minc, maxr, minr;
int y_stride;
int offset;
int usehp = xd->allow_high_precision_mv;
uint8_t *comp_pred = vpx_memalign(16, w * h * sizeof(uint8_t));
uint8_t *y = xd->plane[0].pre[0].buf +
(bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
bestmv->as_mv.col;
y_stride = xd->plane[0].pre[0].stride;
rr = ref_mv->as_mv.row;
rc = ref_mv->as_mv.col;
br = bestmv->as_mv.row << 3;
bc = bestmv->as_mv.col << 3;
hstep = 4;
minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) -
((1 << MV_MAX_BITS) - 1));
maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) +
((1 << MV_MAX_BITS) - 1));
minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) -
((1 << MV_MAX_BITS) - 1));
maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) +
((1 << MV_MAX_BITS) - 1));
tr = br;
tc = bc;
offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
// central mv
bestmv->as_mv.row <<= 3;
bestmv->as_mv.col <<= 3;
// calculate central point error
// TODO(yunqingwang): central pointer error was already calculated in full-
// pixel search, and can be passed in this function.
comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost,
error_per_bit, xd->allow_high_precision_mv);
// Each subsequent iteration checks at least one point in
// common with the last iteration could be 2 ( if diag selected)
while (--halfiters) {
// 1/2 pel
CHECK_BETTER(left, tr, tc - hstep);
CHECK_BETTER(right, tr, tc + hstep);
CHECK_BETTER(up, tr - hstep, tc);
CHECK_BETTER(down, tr + hstep, tc);
whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
switch (whichdir) {
case 0:
CHECK_BETTER(diag, tr - hstep, tc - hstep);
break;
case 1:
CHECK_BETTER(diag, tr - hstep, tc + hstep);
break;
case 2:
CHECK_BETTER(diag, tr + hstep, tc - hstep);
break;
case 3:
CHECK_BETTER(diag, tr + hstep, tc + hstep);
break;
}
// no reason to check the same one again.
if (tr == br && tc == bc)
break;
tr = br;
tc = bc;
}
// Each subsequent iteration checks at least one point in common with
// the last iteration could be 2 ( if diag selected) 1/4 pel
hstep >>= 1;
while (--quarteriters) {
CHECK_BETTER(left, tr, tc - hstep);
CHECK_BETTER(right, tr, tc + hstep);
CHECK_BETTER(up, tr - hstep, tc);
CHECK_BETTER(down, tr + hstep, tc);
whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
switch (whichdir) {
case 0:
CHECK_BETTER(diag, tr - hstep, tc - hstep);
break;
case 1:
CHECK_BETTER(diag, tr - hstep, tc + hstep);
break;
case 2:
CHECK_BETTER(diag, tr + hstep, tc - hstep);
break;
case 3:
CHECK_BETTER(diag, tr + hstep, tc + hstep);
break;
}
// no reason to check the same one again.
if (tr == br && tc == bc)
break;
tr = br;
tc = bc;
}
if (xd->allow_high_precision_mv) {
usehp = vp9_use_nmv_hp(&ref_mv->as_mv);
} else {
usehp = 0;
}
if (usehp) {
hstep >>= 1;
while (--eighthiters) {
CHECK_BETTER(left, tr, tc - hstep);
CHECK_BETTER(right, tr, tc + hstep);
CHECK_BETTER(up, tr - hstep, tc);
CHECK_BETTER(down, tr + hstep, tc);
whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
switch (whichdir) {
case 0:
CHECK_BETTER(diag, tr - hstep, tc - hstep);
break;
case 1:
CHECK_BETTER(diag, tr - hstep, tc + hstep);
break;
case 2:
CHECK_BETTER(diag, tr + hstep, tc - hstep);
break;
case 3:
CHECK_BETTER(diag, tr + hstep, tc + hstep);
break;
}
// no reason to check the same one again.
if (tr == br && tc == bc)
break;
tr = br;
tc = bc;
}
}
bestmv->as_mv.row = br;
bestmv->as_mv.col = bc;
vpx_free(comp_pred);
if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
(abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
return INT_MAX;
return besterr;
}
#endif // CONFIG_COMP_INTER_JOINT_SEARCH
#undef MVC
#undef PRE
#undef DIST
@ -2132,7 +2327,109 @@ int vp9_refining_search_sadx4(MACROBLOCK *x,
return INT_MAX;
}
#if CONFIG_COMP_INTER_JOINT_SEARCH
/* This function is called when we do joint motion search in comp_inter_inter
* mode.
*/
int vp9_refining_search_8p_c(MACROBLOCK *x,
int_mv *ref_mv, int error_per_bit,
int search_range, vp9_variance_fn_ptr_t *fn_ptr,
int *mvjcost, int *mvcost[2], int_mv *center_mv,
const uint8_t *second_pred, int w, int h) {
const MACROBLOCKD* const xd = &x->e_mbd;
MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
{-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
int i, j;
int this_row_offset, this_col_offset;
int what_stride = x->plane[0].src.stride;
int in_what_stride = xd->plane[0].pre[0].stride;
uint8_t *what = x->plane[0].src.buf;
uint8_t *best_address = xd->plane[0].pre[0].buf +
(ref_mv->as_mv.row * xd->plane[0].pre[0].stride) +
ref_mv->as_mv.col;
uint8_t *check_here;
unsigned int thissad;
int_mv this_mv;
unsigned int bestsad = INT_MAX;
int_mv fcenter_mv;
int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
/* Compound pred buffer */
uint8_t *comp_pred = vpx_memalign(16, w * h * sizeof(uint8_t));
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
/* Get compound pred by averaging two pred blocks. */
comp_avg_pred(comp_pred, second_pred, w, h, best_address, in_what_stride);
bestsad = fn_ptr->sdf(what, what_stride, comp_pred, w, 0x7fffffff) +
mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
for (i = 0; i < search_range; i++) {
int best_site = -1;
for (j = 0; j < 8; j++) {
this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
if ((this_col_offset > x->mv_col_min) &&
(this_col_offset < x->mv_col_max) &&
(this_row_offset > x->mv_row_min) &&
(this_row_offset < x->mv_row_max)) {
check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
best_address;
/* Get compound block and use it to calculate SAD. */
comp_avg_pred(comp_pred, second_pred, w, h, check_here,
in_what_stride);
thissad = fn_ptr->sdf(what, what_stride, comp_pred, w, bestsad);
if (thissad < bestsad) {
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
mvsadcost, error_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
best_site = j;
}
}
}
}
if (best_site == -1) {
break;
} else {
ref_mv->as_mv.row += neighbors[best_site].row;
ref_mv->as_mv.col += neighbors[best_site].col;
best_address += (neighbors[best_site].row) * in_what_stride +
neighbors[best_site].col;
}
}
this_mv.as_mv.row = ref_mv->as_mv.row << 3;
this_mv.as_mv.col = ref_mv->as_mv.col << 3;
if (bestsad < INT_MAX) {
int besterr;
comp_avg_pred(comp_pred, second_pred, w, h, best_address, in_what_stride);
besterr = fn_ptr->vf(what, what_stride, comp_pred, w,
(unsigned int *)(&thissad)) +
mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
xd->allow_high_precision_mv);
vpx_free(comp_pred);
return besterr;
} else {
vpx_free(comp_pred);
return INT_MAX;
}
}
#endif // CONFIG_COMP_INTER_JOINT_SEARCH
#ifdef ENTROPY_STATS
void print_mode_context(VP9_COMMON *pc) {

View File

@ -79,5 +79,21 @@ typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x,
int *mvjcost, int *mvcost[2],
int_mv *center_mv);
#if CONFIG_COMP_INTER_JOINT_SEARCH
int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
int_mv *bestmv, int_mv *ref_mv,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int *mvjcost, int *mvcost[2],
int *distortion, unsigned int *sse1,
const uint8_t *second_pred,
int w, int h);
int vp9_refining_search_8p_c(MACROBLOCK *x,
int_mv *ref_mv, int error_per_bit,
int search_range, vp9_variance_fn_ptr_t *fn_ptr,
int *mvjcost, int *mvcost[2],
int_mv *center_mv, const uint8_t *second_pred,
int w, int h);
#endif // CONFIG_COMP_INTER_JOINT_SEARCH
#endif // VP9_ENCODER_VP9_MCOMP_H_

View File

@ -1516,10 +1516,11 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
for (i = 0; i < MAX_MODES; i++)
cpi->rd_thresh_mult[i] = 128;
#define BFP(BT, SDF, VF, SVF, SVFHH, SVFHV, SVFHHV, SDX3F, SDX8F, SDX4DF) \
#define BFP(BT, SDF, VF, SVF, SVAF, SVFHH, SVFHV, SVFHHV, SDX3F, SDX8F, SDX4DF)\
cpi->fn_ptr[BT].sdf = SDF; \
cpi->fn_ptr[BT].vf = VF; \
cpi->fn_ptr[BT].svf = SVF; \
cpi->fn_ptr[BT].svaf = SVAF; \
cpi->fn_ptr[BT].svf_halfpix_h = SVFHH; \
cpi->fn_ptr[BT].svf_halfpix_v = SVFHV; \
cpi->fn_ptr[BT].svf_halfpix_hv = SVFHHV; \
@ -1528,57 +1529,64 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
cpi->fn_ptr[BT].sdx4df = SDX4DF;
BFP(BLOCK_32X16, vp9_sad32x16, vp9_variance32x16, vp9_sub_pixel_variance32x16,
NULL, NULL,
vp9_sub_pixel_avg_variance32x16, NULL, NULL,
NULL, NULL, NULL,
vp9_sad32x16x4d)
BFP(BLOCK_16X32, vp9_sad16x32, vp9_variance16x32, vp9_sub_pixel_variance16x32,
NULL, NULL,
vp9_sub_pixel_avg_variance16x32, NULL, NULL,
NULL, NULL, NULL,
vp9_sad16x32x4d)
BFP(BLOCK_64X32, vp9_sad64x32, vp9_variance64x32, vp9_sub_pixel_variance64x32,
NULL, NULL,
vp9_sub_pixel_avg_variance64x32, NULL, NULL,
NULL, NULL, NULL,
vp9_sad64x32x4d)
BFP(BLOCK_32X64, vp9_sad32x64, vp9_variance32x64, vp9_sub_pixel_variance32x64,
NULL, NULL,
vp9_sub_pixel_avg_variance32x64, NULL, NULL,
NULL, NULL, NULL,
vp9_sad32x64x4d)
BFP(BLOCK_32X32, vp9_sad32x32, vp9_variance32x32, vp9_sub_pixel_variance32x32,
vp9_variance_halfpixvar32x32_h, vp9_variance_halfpixvar32x32_v,
vp9_sub_pixel_avg_variance32x32, vp9_variance_halfpixvar32x32_h,
vp9_variance_halfpixvar32x32_v,
vp9_variance_halfpixvar32x32_hv, vp9_sad32x32x3, vp9_sad32x32x8,
vp9_sad32x32x4d)
BFP(BLOCK_64X64, vp9_sad64x64, vp9_variance64x64, vp9_sub_pixel_variance64x64,
vp9_variance_halfpixvar64x64_h, vp9_variance_halfpixvar64x64_v,
vp9_sub_pixel_avg_variance64x64, vp9_variance_halfpixvar64x64_h,
vp9_variance_halfpixvar64x64_v,
vp9_variance_halfpixvar64x64_hv, vp9_sad64x64x3, vp9_sad64x64x8,
vp9_sad64x64x4d)
BFP(BLOCK_16X16, vp9_sad16x16, vp9_variance16x16, vp9_sub_pixel_variance16x16,
vp9_variance_halfpixvar16x16_h, vp9_variance_halfpixvar16x16_v,
vp9_variance_halfpixvar16x16_hv, vp9_sad16x16x3, vp9_sad16x16x8,
vp9_sad16x16x4d)
vp9_sub_pixel_avg_variance16x16, vp9_variance_halfpixvar16x16_h,
vp9_variance_halfpixvar16x16_v,
vp9_variance_halfpixvar16x16_hv, vp9_sad16x16x3, vp9_sad16x16x8,
vp9_sad16x16x4d)
BFP(BLOCK_16X8, vp9_sad16x8, vp9_variance16x8, vp9_sub_pixel_variance16x8,
NULL, NULL, NULL, vp9_sad16x8x3, vp9_sad16x8x8, vp9_sad16x8x4d)
vp9_sub_pixel_avg_variance16x8, NULL, NULL, NULL,
vp9_sad16x8x3, vp9_sad16x8x8, vp9_sad16x8x4d)
BFP(BLOCK_8X16, vp9_sad8x16, vp9_variance8x16, vp9_sub_pixel_variance8x16,
NULL, NULL, NULL, vp9_sad8x16x3, vp9_sad8x16x8, vp9_sad8x16x4d)
vp9_sub_pixel_avg_variance8x16, NULL, NULL, NULL,
vp9_sad8x16x3, vp9_sad8x16x8, vp9_sad8x16x4d)
BFP(BLOCK_8X8, vp9_sad8x8, vp9_variance8x8, vp9_sub_pixel_variance8x8,
NULL, NULL, NULL, vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d)
vp9_sub_pixel_avg_variance8x8, NULL, NULL, NULL,
vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d)
BFP(BLOCK_4X8, NULL, vp9_variance4x8, NULL,
NULL, NULL, NULL, NULL, NULL, NULL)
NULL, NULL, NULL, NULL, NULL, NULL, NULL)
BFP(BLOCK_8X4, NULL, vp9_variance8x4, NULL,
NULL, NULL, NULL, NULL, NULL, NULL)
NULL, NULL, NULL, NULL, NULL, NULL, NULL)
BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4,
NULL, NULL, NULL, vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d)
vp9_sub_pixel_avg_variance4x4, NULL, NULL, NULL,
vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d)
cpi->full_search_sad = vp9_full_search_sad;
cpi->diamond_search_sad = vp9_diamond_search_sad;

View File

@ -1807,8 +1807,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
INTERPOLATIONFILTERTYPE *best_filter,
int_mv frame_mv[MB_MODE_COUNT]
[MAX_REF_FRAMES],
YV12_BUFFER_CONFIG *scaled_ref_frame,
int mi_row, int mi_col) {
YV12_BUFFER_CONFIG **scaled_ref_frame,
int mi_row, int mi_col,
int_mv single_newmv[MAX_REF_FRAMES]) {
const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize);
VP9_COMMON *cm = &cpi->common;
@ -1838,6 +1839,152 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
if (is_comp_pred) {
#if CONFIG_COMP_INTER_JOINT_SEARCH
const int b_sz[BLOCK_SIZE_TYPES][2] = {
{4, 4},
{8, 8},
{8, 16},
{16, 8},
{16, 16},
{16, 32},
{32, 16},
{32, 32},
{32, 64},
{64, 32},
{64, 64}
};
int ite;
// Prediction buffer from second frame.
uint8_t *second_pred = vpx_memalign(16, b_sz[bsize][0] *
b_sz[bsize][1] * sizeof(uint8_t));
// Do joint motion search in compound mode to get more accurate mv.
struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}};
struct buf_2d scaled_first_yv12;
if (scaled_ref_frame[0]) {
int i;
// Swap out the reference frame for a version that's been scaled to
// match the resolution of the current frame, allowing the existing
// motion search code to be used without additional modifications.
for (i = 0; i < MAX_MB_PLANE; i++)
backup_yv12[i] = xd->plane[i].pre[0];
setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col,
NULL, NULL);
}
if (scaled_ref_frame[1]) {
int i;
for (i = 0; i < MAX_MB_PLANE; i++)
backup_second_yv12[i] = xd->plane[i].pre[1];
setup_pre_planes(xd, scaled_ref_frame[1], NULL, mi_row, mi_col,
NULL, NULL);
}
xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0],
mi_row, mi_col);
xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1],
mi_row, mi_col);
scaled_first_yv12 = xd->plane[0].pre[0];
// Initialize mv using single prediction mode result.
frame_mv[NEWMV][refs[0]].as_int = single_newmv[refs[0]].as_int;
frame_mv[NEWMV][refs[1]].as_int = single_newmv[refs[1]].as_int;
// Iteration: joint search is done once for each ref frame.
// Tried allowing search multiple times iteratively, and break out if
// it couldn't find better mv. But tests didn't show noticeable
// improvement.
for (ite = 0; ite < 2; ite++) {
struct buf_2d ref_yv12[2] = {xd->plane[0].pre[0],
xd->plane[0].pre[1]};
int bestsme = INT_MAX;
int sadpb = x->sadperbit16;
int_mv tmp_mv;
int search_range = 3;
int tmp_col_min = x->mv_col_min;
int tmp_col_max = x->mv_col_max;
int tmp_row_min = x->mv_row_min;
int tmp_row_max = x->mv_row_max;
int id = ite % 2;
// Get pred block from second frame.
vp9_build_inter_predictor(ref_yv12[!id].buf,
ref_yv12[!id].stride,
second_pred, b_sz[bsize][0],
&frame_mv[NEWMV][refs[!id]],
&xd->scale_factor[!id],
b_sz[bsize][0], b_sz[bsize][1], 0,
&xd->subpix);
// Compound motion search on first ref frame.
if (id)
xd->plane[0].pre[0] = ref_yv12[id];
vp9_clamp_mv_min_max(x, &ref_mv[id]);
// Use mv result from single mode as mvp.
tmp_mv.as_int = frame_mv[NEWMV][refs[id]].as_int;
tmp_mv.as_mv.col >>= 3;
tmp_mv.as_mv.row >>= 3;
// Small-range full-pixel motion search
bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
search_range,
&cpi->fn_ptr[block_size],
x->nmvjointcost, x->mvcost,
&ref_mv[id], second_pred,
b_sz[bsize][0], b_sz[bsize][1]);
x->mv_col_min = tmp_col_min;
x->mv_col_max = tmp_col_max;
x->mv_row_min = tmp_row_min;
x->mv_row_max = tmp_row_max;
if (bestsme < INT_MAX) {
int dis; /* TODO: use dis in distortion calculation later. */
unsigned int sse;
vp9_find_best_sub_pixel_comp(x, &tmp_mv,
&ref_mv[id],
x->errorperbit,
&cpi->fn_ptr[block_size],
x->nmvjointcost, x->mvcost,
&dis, &sse, second_pred,
b_sz[bsize][0], b_sz[bsize][1]);
}
frame_mv[NEWMV][refs[id]].as_int =
xd->mode_info_context->bmi[0].as_mv[1].as_int = tmp_mv.as_int;
if (id)
xd->plane[0].pre[0] = scaled_first_yv12;
}
// restore the predictor
if (scaled_ref_frame[0]) {
int i;
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[0] = backup_yv12[i];
}
if (scaled_ref_frame[1]) {
int i;
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[1] = backup_second_yv12[i];
}
vpx_free(second_pred);
#endif // CONFIG_COMP_INTER_JOINT_SEARCH
if (frame_mv[NEWMV][refs[0]].as_int == INVALID_MV ||
frame_mv[NEWMV][refs[1]].as_int == INVALID_MV)
return INT64_MAX;
@ -1862,7 +2009,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int tmp_row_min = x->mv_row_min;
int tmp_row_max = x->mv_row_max;
if (scaled_ref_frame) {
if (scaled_ref_frame[0]) {
int i;
// Swap out the reference frame for a version that's been scaled to
@ -1871,7 +2018,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < MAX_MB_PLANE; i++)
backup_yv12[i] = xd->plane[i].pre[0];
setup_pre_planes(xd, scaled_ref_frame, NULL, mi_row, mi_col,
setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col,
NULL, NULL);
}
@ -1914,6 +2061,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
frame_mv[NEWMV][refs[0]].as_int =
xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
single_newmv[refs[0]].as_int = tmp_mv.as_int;
// Add the new motion vector cost to our rolling cost variable
*rate2 += vp9_mv_bit_cost(&tmp_mv, &ref_mv[0],
@ -1921,7 +2069,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
96, xd->allow_high_precision_mv);
// restore the predictor, if required
if (scaled_ref_frame) {
if (scaled_ref_frame[0]) {
int i;
for (i = 0; i < MAX_MB_PLANE; i++)
@ -2205,6 +2353,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
int frame_mdcounts[4][4];
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
int_mv single_newmv[MAX_REF_FRAMES];
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
int idx_list[4] = {0,
@ -2251,6 +2400,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
xd->mode_info_context->mbmi.segment_id = segment_id;
estimate_ref_frame_costs(cpi, segment_id, ref_costs);
vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
vpx_memset(&single_newmv, 0, sizeof(single_newmv));
for (i = 0; i < NB_PREDICTION_TYPES; ++i)
best_pred_rd[i] = INT64_MAX;
@ -2608,7 +2758,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), is_comp_pred);
mbmi->mode = this_mode;
} else {
YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL};
int fb;
if (mbmi->ref_frame == LAST_FRAME) {
@ -2620,7 +2770,20 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb])
scaled_ref_frame = &cm->yv12_fb[cpi->scaled_ref_idx[fb]];
scaled_ref_frame[0] = &cm->yv12_fb[cpi->scaled_ref_idx[fb]];
if (comp_pred) {
if (mbmi->second_ref_frame == LAST_FRAME) {
fb = cpi->lst_fb_idx;
} else if (mbmi->second_ref_frame == GOLDEN_FRAME) {
fb = cpi->gld_fb_idx;
} else {
fb = cpi->alt_fb_idx;
}
if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb])
scaled_ref_frame[1] = &cm->yv12_fb[cpi->scaled_ref_idx[fb]];
}
this_rd = handle_inter_mode(cpi, x, bsize,
mdcounts, txfm_cache,
@ -2630,7 +2793,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
&rate_uv, &distortion_uv,
&mode_excluded, &disable_skip,
mode_index, &tmp_best_filter, frame_mv,
scaled_ref_frame, mi_row, mi_col);
scaled_ref_frame, mi_row, mi_col,
single_newmv);
if (this_rd == INT64_MAX)
continue;
}

View File

@ -12,6 +12,7 @@
#define VP9_ENCODER_VP9_VARIANCE_H_
#include "vpx/vpx_integer.h"
// #include "./vpx_config.h"
typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr,
int source_stride,
@ -50,6 +51,15 @@ typedef unsigned int (*vp9_subpixvariance_fn_t)(const uint8_t *src_ptr,
int Refstride,
unsigned int *sse);
typedef unsigned int (*vp9_subp_avg_variance_fn_t)(const uint8_t *src_ptr,
int source_stride,
int xoffset,
int yoffset,
const uint8_t *ref_ptr,
int Refstride,
unsigned int *sse,
const uint8_t *second_pred);
typedef void (*vp9_ssimpf_fn_t)(uint8_t *s, int sp, uint8_t *r,
int rp, unsigned long *sum_s,
unsigned long *sum_r, unsigned long *sum_sq_s,
@ -64,15 +74,33 @@ typedef unsigned int (*vp9_get16x16prederror_fn_t)(const uint8_t *src_ptr,
int ref_stride);
typedef struct vp9_variance_vtable {
vp9_sad_fn_t sdf;
vp9_variance_fn_t vf;
vp9_subpixvariance_fn_t svf;
vp9_variance_fn_t svf_halfpix_h;
vp9_variance_fn_t svf_halfpix_v;
vp9_variance_fn_t svf_halfpix_hv;
vp9_sad_multi_fn_t sdx3f;
vp9_sad_multi1_fn_t sdx8f;
vp9_sad_multi_d_fn_t sdx4df;
vp9_sad_fn_t sdf;
vp9_variance_fn_t vf;
vp9_subpixvariance_fn_t svf;
vp9_subp_avg_variance_fn_t svaf;
vp9_variance_fn_t svf_halfpix_h;
vp9_variance_fn_t svf_halfpix_v;
vp9_variance_fn_t svf_halfpix_hv;
vp9_sad_multi_fn_t sdx3f;
vp9_sad_multi1_fn_t sdx8f;
vp9_sad_multi_d_fn_t sdx4df;
} vp9_variance_fn_ptr_t;
// #if CONFIG_COMP_INTER_JOINT_SEARCH
static void comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int weight,
int height, uint8_t *ref, int ref_stride) {
int i, j;
for (i = 0; i < height; i++) {
for (j = 0; j < weight; j++) {
int tmp;
tmp = pred[j] + ref[j];
comp_pred[j] = (tmp + 1) >> 1;
}
comp_pred += weight;
pred += weight;
ref += ref_stride;
}
}
// #endif // CONFIG_COMP_INTER_JOINT_SEARCH
#endif // VP9_ENCODER_VP9_VARIANCE_H_

View File

@ -13,6 +13,7 @@
#include "vp9/common/vp9_filter.h"
#include "vp9/common/vp9_subpelvar.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
unsigned int i, sum = 0;
@ -58,6 +59,29 @@ unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
return vp9_variance64x32_c(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
uint16_t fdata3[65 * 64]; // Temp data bufffer used in filtering
uint8_t temp2[68 * 64];
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer
const int16_t *hfilter, *vfilter;
hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 64, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
return vp9_variance64x32_c(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_variance32x64_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
@ -92,6 +116,29 @@ unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
return vp9_variance32x64_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
uint16_t fdata3[65 * 64]; // Temp data bufffer used in filtering
uint8_t temp2[68 * 64];
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer
const int16_t *hfilter, *vfilter;
hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 65, 32, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
return vp9_variance32x64_c(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_variance32x16_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
@ -126,6 +173,29 @@ unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
return vp9_variance32x16_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
uint16_t fdata3[33 * 32]; // Temp data bufffer used in filtering
uint8_t temp2[36 * 32];
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer
const int16_t *hfilter, *vfilter;
hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 32, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
return vp9_variance32x16_c(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_variance16x32_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
@ -160,6 +230,29 @@ unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
return vp9_variance16x32_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
uint16_t fdata3[33 * 32]; // Temp data bufffer used in filtering
uint8_t temp2[36 * 32];
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer
const int16_t *hfilter, *vfilter;
hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 16, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
return vp9_variance16x32_c(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
@ -317,6 +410,31 @@ unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
return vp9_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
uint8_t temp2[20 * 16];
const int16_t *hfilter, *vfilter;
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer
uint16_t fdata3[5 * 4]; // Temp data bufffer used in filtering
hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
// First filter 1d Horizontal
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 5, 4, hfilter);
// Now filter Verticaly
var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter);
comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
return vp9_variance4x4_c(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
int src_pixels_per_line,
@ -339,6 +457,29 @@ unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
return vp9_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
uint16_t fdata3[9 * 8]; // Temp data bufffer used in filtering
uint8_t temp2[20 * 16];
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer
const int16_t *hfilter, *vfilter;
hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 8, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
return vp9_variance8x8_c(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
@ -360,6 +501,30 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
return vp9_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
uint16_t fdata3[17 * 16];
uint8_t temp2[20 * 16];
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer
const int16_t *hfilter, *vfilter;
hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 16, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
return vp9_variance16x16_c(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
@ -381,6 +546,29 @@ unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
return vp9_variance64x64_c(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
uint16_t fdata3[65 * 64]; // Temp data bufffer used in filtering
uint8_t temp2[68 * 64];
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer
const int16_t *hfilter, *vfilter;
hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 65, 64, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
return vp9_variance64x64_c(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
@ -402,6 +590,29 @@ unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
return vp9_variance32x32_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
uint16_t fdata3[33 * 32]; // Temp data bufffer used in filtering
uint8_t temp2[36 * 32];
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer
const int16_t *hfilter, *vfilter;
hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 32, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
return vp9_variance32x32_c(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
@ -543,6 +754,29 @@ unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
return vp9_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
uint16_t fdata3[16 * 9]; // Temp data bufffer used in filtering
uint8_t temp2[20 * 16];
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer
const int16_t *hfilter, *vfilter;
hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 16, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
return vp9_variance16x8_c(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
@ -564,3 +798,25 @@ unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
return vp9_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
uint16_t fdata3[9 * 16]; // Temp data bufffer used in filtering
uint8_t temp2[20 * 16];
DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer
const int16_t *hfilter, *vfilter;
hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 8, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
return vp9_variance8x16_c(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
}