Convert motion search config from AoS to SoA
This is a prerequisite for vectorizing vp9_diamond_search_sad_c. Change-Id: I49cd9148782410ca8b16e8a468ca9e7c6d088410
This commit is contained in:
@@ -103,17 +103,17 @@ static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
|
|||||||
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
|
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
|
||||||
int len, ss_count = 1;
|
int len, ss_count = 1;
|
||||||
|
|
||||||
cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
|
cfg->ss_mv[0].col = 0;
|
||||||
cfg->ss[0].offset = 0;
|
cfg->ss_mv[0].row = 0;
|
||||||
|
cfg->ss_os[0] = 0;
|
||||||
|
|
||||||
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
|
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
|
||||||
// Generate offsets for 4 search sites per step.
|
// Generate offsets for 4 search sites per step.
|
||||||
const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
|
const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < 4; ++i) {
|
for (i = 0; i < 4; ++i, ++ss_count) {
|
||||||
search_site *const ss = &cfg->ss[ss_count++];
|
cfg->ss_mv[ss_count] = ss_mvs[i];
|
||||||
ss->mv = ss_mvs[i];
|
cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
|
||||||
ss->offset = ss->mv.row * stride + ss->mv.col;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -124,8 +124,9 @@ void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
|
|||||||
void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
|
void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
|
||||||
int len, ss_count = 1;
|
int len, ss_count = 1;
|
||||||
|
|
||||||
cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
|
cfg->ss_mv[0].col = 0;
|
||||||
cfg->ss[0].offset = 0;
|
cfg->ss_mv[0].row = 0;
|
||||||
|
cfg->ss_os[0] = 0;
|
||||||
|
|
||||||
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
|
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
|
||||||
// Generate offsets for 8 search sites per step.
|
// Generate offsets for 8 search sites per step.
|
||||||
@@ -134,10 +135,9 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
|
|||||||
{-len, -len}, {-len, len}, {len, -len}, {len, len}
|
{-len, -len}, {-len, len}, {len, -len}, {len, len}
|
||||||
};
|
};
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < 8; ++i) {
|
for (i = 0; i < 8; ++i, ++ss_count) {
|
||||||
search_site *const ss = &cfg->ss[ss_count++];
|
cfg->ss_mv[ss_count] = ss_mvs[i];
|
||||||
ss->mv = ss_mvs[i];
|
cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
|
||||||
ss->offset = ss->mv.row * stride + ss->mv.col;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1623,7 +1623,9 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
|
|||||||
// 0 = initial step (MAX_FIRST_STEP) pel
|
// 0 = initial step (MAX_FIRST_STEP) pel
|
||||||
// 1 = (MAX_FIRST_STEP/2) pel,
|
// 1 = (MAX_FIRST_STEP/2) pel,
|
||||||
// 2 = (MAX_FIRST_STEP/4) pel...
|
// 2 = (MAX_FIRST_STEP/4) pel...
|
||||||
const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
|
// const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
|
||||||
|
const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
|
||||||
|
const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
|
||||||
const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
|
const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
|
||||||
|
|
||||||
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
|
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
|
||||||
@@ -1649,10 +1651,10 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
|
|||||||
|
|
||||||
// All_in is true if every one of the points we are checking are within
|
// All_in is true if every one of the points we are checking are within
|
||||||
// the bounds of the image.
|
// the bounds of the image.
|
||||||
all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min);
|
all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_row_min);
|
||||||
all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max);
|
all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_row_max);
|
||||||
all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min);
|
all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_col_min);
|
||||||
all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max);
|
all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_col_max);
|
||||||
|
|
||||||
// If all the pixels are within the bounds we don't check whether the
|
// If all the pixels are within the bounds we don't check whether the
|
||||||
// search point is valid in this loop, otherwise we check each point
|
// search point is valid in this loop, otherwise we check each point
|
||||||
@@ -1664,15 +1666,15 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
|
|||||||
unsigned char const *block_offset[4];
|
unsigned char const *block_offset[4];
|
||||||
|
|
||||||
for (t = 0; t < 4; t++)
|
for (t = 0; t < 4; t++)
|
||||||
block_offset[t] = ss[i + t].offset + best_address;
|
block_offset[t] = ss_os[i + t] + best_address;
|
||||||
|
|
||||||
fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
|
fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
|
||||||
sad_array);
|
sad_array);
|
||||||
|
|
||||||
for (t = 0; t < 4; t++, i++) {
|
for (t = 0; t < 4; t++, i++) {
|
||||||
if (sad_array[t] < bestsad) {
|
if (sad_array[t] < bestsad) {
|
||||||
const MV this_mv = {best_mv->row + ss[i].mv.row,
|
const MV this_mv = {best_mv->row + ss_mv[i].row,
|
||||||
best_mv->col + ss[i].mv.col};
|
best_mv->col + ss_mv[i].col};
|
||||||
sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv,
|
sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv,
|
||||||
sad_per_bit);
|
sad_per_bit);
|
||||||
if (sad_array[t] < bestsad) {
|
if (sad_array[t] < bestsad) {
|
||||||
@@ -1685,11 +1687,11 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
|
|||||||
} else {
|
} else {
|
||||||
for (j = 0; j < cfg->searches_per_step; j++) {
|
for (j = 0; j < cfg->searches_per_step; j++) {
|
||||||
// Trap illegal vectors
|
// Trap illegal vectors
|
||||||
const MV this_mv = {best_mv->row + ss[i].mv.row,
|
const MV this_mv = {best_mv->row + ss_mv[i].row,
|
||||||
best_mv->col + ss[i].mv.col};
|
best_mv->col + ss_mv[i].col};
|
||||||
|
|
||||||
if (is_mv_in(x, &this_mv)) {
|
if (is_mv_in(x, &this_mv)) {
|
||||||
const uint8_t *const check_here = ss[i].offset + best_address;
|
const uint8_t *const check_here = ss_os[i] + best_address;
|
||||||
unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
|
unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
|
||||||
in_what_stride);
|
in_what_stride);
|
||||||
|
|
||||||
@@ -1705,25 +1707,25 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (best_site != last_site) {
|
if (best_site != last_site) {
|
||||||
best_mv->row += ss[best_site].mv.row;
|
best_mv->row += ss_mv[best_site].row;
|
||||||
best_mv->col += ss[best_site].mv.col;
|
best_mv->col += ss_mv[best_site].col;
|
||||||
best_address += ss[best_site].offset;
|
best_address += ss_os[best_site];
|
||||||
last_site = best_site;
|
last_site = best_site;
|
||||||
#if defined(NEW_DIAMOND_SEARCH)
|
#if defined(NEW_DIAMOND_SEARCH)
|
||||||
while (1) {
|
while (1) {
|
||||||
const MV this_mv = {best_mv->row + ss[best_site].mv.row,
|
const MV this_mv = {best_mv->row + ss_mv[best_site].row,
|
||||||
best_mv->col + ss[best_site].mv.col};
|
best_mv->col + ss_mv[best_site].col};
|
||||||
if (is_mv_in(x, &this_mv)) {
|
if (is_mv_in(x, &this_mv)) {
|
||||||
const uint8_t *const check_here = ss[best_site].offset + best_address;
|
const uint8_t *const check_here = ss_os[best_site] + best_address;
|
||||||
unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
|
unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
|
||||||
in_what_stride);
|
in_what_stride);
|
||||||
if (thissad < bestsad) {
|
if (thissad < bestsad) {
|
||||||
thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
|
thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
|
||||||
if (thissad < bestsad) {
|
if (thissad < bestsad) {
|
||||||
bestsad = thissad;
|
bestsad = thissad;
|
||||||
best_mv->row += ss[best_site].mv.row;
|
best_mv->row += ss_mv[best_site].row;
|
||||||
best_mv->col += ss[best_site].mv.col;
|
best_mv->col += ss_mv[best_site].col;
|
||||||
best_address += ss[best_site].offset;
|
best_address += ss_os[best_site];
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -31,14 +31,10 @@ extern "C" {
|
|||||||
// for Block_16x16
|
// for Block_16x16
|
||||||
#define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND)
|
#define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND)
|
||||||
|
|
||||||
// motion search site
|
|
||||||
typedef struct search_site {
|
|
||||||
MV mv;
|
|
||||||
int offset;
|
|
||||||
} search_site;
|
|
||||||
|
|
||||||
typedef struct search_site_config {
|
typedef struct search_site_config {
|
||||||
search_site ss[8 * MAX_MVSEARCH_STEPS + 1];
|
// motion search sites
|
||||||
|
MV ss_mv[8 * MAX_MVSEARCH_STEPS + 1]; // Motion vector
|
||||||
|
intptr_t ss_os[8 * MAX_MVSEARCH_STEPS + 1]; // Offset
|
||||||
int ss_count;
|
int ss_count;
|
||||||
int searches_per_step;
|
int searches_per_step;
|
||||||
} search_site_config;
|
} search_site_config;
|
||||||
|
Reference in New Issue
Block a user