Remove memcpy() in handle_inter_mode() filter selection.
Encode time of first 50 frames of bus (speed 0) @ 1500kbps goes from 2min4.9 to 2min3.1, i.e. a 1.4% speedup overall. Change-Id: Ibe8b08d159797504c5d0c5122de1b6da3b6595e0
This commit is contained in:
parent
ed995afba1
commit
fcf7998a47
@ -2569,11 +2569,14 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
(mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
|
(mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
|
||||||
int_mv cur_mv[2];
|
int_mv cur_mv[2];
|
||||||
int64_t this_rd = 0;
|
int64_t this_rd = 0;
|
||||||
unsigned char tmp_buf[MAX_MB_PLANE][64 * 64];
|
DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
|
||||||
int pred_exists = 0;
|
int pred_exists = 0;
|
||||||
int interpolating_intpel_seen = 0;
|
int interpolating_intpel_seen = 0;
|
||||||
int intpel_mv;
|
int intpel_mv;
|
||||||
int64_t rd, best_rd = INT64_MAX;
|
int64_t rd, best_rd = INT64_MAX;
|
||||||
|
int best_needs_copy = 0;
|
||||||
|
uint8_t *orig_dst[MAX_MB_PLANE];
|
||||||
|
int orig_dst_stride[MAX_MB_PLANE];
|
||||||
|
|
||||||
switch (this_mode) {
|
switch (this_mode) {
|
||||||
int rate_mv;
|
int rate_mv;
|
||||||
@ -2630,6 +2633,16 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
mbmi->mv[i].as_int = cur_mv[i].as_int;
|
mbmi->mv[i].as_int = cur_mv[i].as_int;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// do first prediction into the destination buffer. Do the next
|
||||||
|
// prediction into a temporary buffer. Then keep track of which one
|
||||||
|
// of these currently holds the best predictor, and use the other
|
||||||
|
// one for future predictions. In the end, copy from tmp_buf to
|
||||||
|
// dst if necessary.
|
||||||
|
for (i = 0; i < MAX_MB_PLANE; i++) {
|
||||||
|
orig_dst[i] = xd->plane[i].dst.buf;
|
||||||
|
orig_dst_stride[i] = xd->plane[i].dst.stride;
|
||||||
|
}
|
||||||
|
|
||||||
/* We don't include the cost of the second reference here, because there
|
/* We don't include the cost of the second reference here, because there
|
||||||
* are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
|
* are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
|
||||||
* words if you present them in that order, the second one is always known
|
* words if you present them in that order, the second one is always known
|
||||||
@ -2657,7 +2670,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
|
|
||||||
cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
|
cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
|
||||||
for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
|
for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
|
||||||
int rs;
|
int rs, j;
|
||||||
int64_t rs_rd;
|
int64_t rs_rd;
|
||||||
const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
|
const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
|
||||||
const int is_intpel_interp = intpel_mv &&
|
const int is_intpel_interp = intpel_mv &&
|
||||||
@ -2679,6 +2692,20 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
} else {
|
} else {
|
||||||
int rate_sum = 0;
|
int rate_sum = 0;
|
||||||
int64_t dist_sum = 0;
|
int64_t dist_sum = 0;
|
||||||
|
if ((cm->mcomp_filter_type == SWITCHABLE &&
|
||||||
|
i && !best_needs_copy) ||
|
||||||
|
(cm->mcomp_filter_type != SWITCHABLE &&
|
||||||
|
cm->mcomp_filter_type != mbmi->interp_filter)) {
|
||||||
|
for (j = 0; j < MAX_MB_PLANE; j++) {
|
||||||
|
xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
|
||||||
|
xd->plane[j].dst.stride = 64;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (j = 0; j < MAX_MB_PLANE; j++) {
|
||||||
|
xd->plane[j].dst.buf = orig_dst[j];
|
||||||
|
xd->plane[j].dst.stride = orig_dst_stride[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
|
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
|
||||||
model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
|
model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
|
||||||
cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
|
cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
|
||||||
@ -2699,27 +2726,23 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
if (newbest) {
|
if (newbest) {
|
||||||
best_rd = rd;
|
best_rd = rd;
|
||||||
*best_filter = mbmi->interp_filter;
|
*best_filter = mbmi->interp_filter;
|
||||||
|
if (cm->mcomp_filter_type == SWITCHABLE && i &&
|
||||||
|
!(interpolating_intpel_seen && is_intpel_interp))
|
||||||
|
best_needs_copy = !best_needs_copy;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
|
if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
|
||||||
(cm->mcomp_filter_type != SWITCHABLE &&
|
(cm->mcomp_filter_type != SWITCHABLE &&
|
||||||
cm->mcomp_filter_type == mbmi->interp_filter)) {
|
cm->mcomp_filter_type == mbmi->interp_filter)) {
|
||||||
int p;
|
|
||||||
|
|
||||||
for (p = 0; p < MAX_MB_PLANE; p++) {
|
|
||||||
struct macroblockd_plane *pd = &xd->plane[p];
|
|
||||||
const int bw = plane_block_width(bsize, pd);
|
|
||||||
const int bh = plane_block_height(bsize, pd);
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < bh; i++)
|
|
||||||
vpx_memcpy(&tmp_buf[p][64 * i], pd->dst.buf + i * pd->dst.stride,
|
|
||||||
bw);
|
|
||||||
}
|
|
||||||
pred_exists = 1;
|
pred_exists = 1;
|
||||||
}
|
}
|
||||||
interpolating_intpel_seen |= is_intpel_interp;
|
interpolating_intpel_seen |= is_intpel_interp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < MAX_MB_PLANE; i++) {
|
||||||
|
xd->plane[i].dst.buf = orig_dst[i];
|
||||||
|
xd->plane[i].dst.stride = orig_dst_stride[i];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set the appripriate filter
|
// Set the appripriate filter
|
||||||
@ -2727,18 +2750,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
cm->mcomp_filter_type : *best_filter;
|
cm->mcomp_filter_type : *best_filter;
|
||||||
vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
|
vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
|
||||||
|
|
||||||
|
|
||||||
if (pred_exists) {
|
if (pred_exists) {
|
||||||
int p;
|
if (best_needs_copy) {
|
||||||
|
// again temporarily set the buffers to local memory to prevent a memcpy
|
||||||
for (p = 0; p < MAX_MB_PLANE; p++) {
|
for (i = 0; i < MAX_MB_PLANE; i++) {
|
||||||
struct macroblockd_plane *pd = &xd->plane[p];
|
xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
|
||||||
const int bw = plane_block_width(bsize, pd);
|
xd->plane[i].dst.stride = 64;
|
||||||
const int bh = plane_block_height(bsize, pd);
|
}
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < bh; i++)
|
|
||||||
vpx_memcpy(pd->dst.buf + i * pd->dst.stride, &tmp_buf[p][64 * i], bw);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Handles the special case when a filter that is not in the
|
// Handles the special case when a filter that is not in the
|
||||||
@ -2812,6 +2830,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
if (*rate_y == INT_MAX) {
|
if (*rate_y == INT_MAX) {
|
||||||
*rate2 = INT_MAX;
|
*rate2 = INT_MAX;
|
||||||
*distortion = INT64_MAX;
|
*distortion = INT64_MAX;
|
||||||
|
for (i = 0; i < MAX_MB_PLANE; i++) {
|
||||||
|
xd->plane[i].dst.buf = orig_dst[i];
|
||||||
|
xd->plane[i].dst.stride = orig_dst_stride[i];
|
||||||
|
}
|
||||||
return INT64_MAX;
|
return INT64_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2835,6 +2857,11 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < MAX_MB_PLANE; i++) {
|
||||||
|
xd->plane[i].dst.buf = orig_dst[i];
|
||||||
|
xd->plane[i].dst.stride = orig_dst_stride[i];
|
||||||
|
}
|
||||||
|
|
||||||
return this_rd; // if 0, this will be re-calculated by caller
|
return this_rd; // if 0, this will be re-calculated by caller
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user