Convert motion search config from AoS to SoA

This is a prerequisite for vectorizing vp9_diamond_search_sad_c.

Change-Id: I49cd9148782410ca8b16e8a468ca9e7c6d088410
This commit is contained in:
Geza Lore
2015-10-26 09:39:38 +00:00
parent dc9d36c0a6
commit 965a8dea0b
2 changed files with 37 additions and 39 deletions

View File

@@ -103,17 +103,17 @@ static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) { void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
int len, ss_count = 1; int len, ss_count = 1;
cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0; cfg->ss_mv[0].col = 0;
cfg->ss[0].offset = 0; cfg->ss_mv[0].row = 0;
cfg->ss_os[0] = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) { for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Generate offsets for 4 search sites per step. // Generate offsets for 4 search sites per step.
const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}}; const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
int i; int i;
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i, ++ss_count) {
search_site *const ss = &cfg->ss[ss_count++]; cfg->ss_mv[ss_count] = ss_mvs[i];
ss->mv = ss_mvs[i]; cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
ss->offset = ss->mv.row * stride + ss->mv.col;
} }
} }
@@ -124,8 +124,9 @@ void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
void vp9_init3smotion_compensation(search_site_config *cfg, int stride) { void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
int len, ss_count = 1; int len, ss_count = 1;
cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0; cfg->ss_mv[0].col = 0;
cfg->ss[0].offset = 0; cfg->ss_mv[0].row = 0;
cfg->ss_os[0] = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) { for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Generate offsets for 8 search sites per step. // Generate offsets for 8 search sites per step.
@@ -134,10 +135,9 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
{-len, -len}, {-len, len}, {len, -len}, {len, len} {-len, -len}, {-len, len}, {len, -len}, {len, len}
}; };
int i; int i;
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i, ++ss_count) {
search_site *const ss = &cfg->ss[ss_count++]; cfg->ss_mv[ss_count] = ss_mvs[i];
ss->mv = ss_mvs[i]; cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
ss->offset = ss->mv.row * stride + ss->mv.col;
} }
} }
@@ -1623,7 +1623,9 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
// 0 = initial step (MAX_FIRST_STEP) pel // 0 = initial step (MAX_FIRST_STEP) pel
// 1 = (MAX_FIRST_STEP/2) pel, // 1 = (MAX_FIRST_STEP/2) pel,
// 2 = (MAX_FIRST_STEP/4) pel... // 2 = (MAX_FIRST_STEP/4) pel...
const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param; const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
@@ -1649,10 +1651,10 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
// All_in is true if every one of the points we are checking are within // All_in is true if every one of the points we are checking are within
// the bounds of the image. // the bounds of the image.
all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min); all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_row_min);
all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max); all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_row_max);
all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min); all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_col_min);
all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max); all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_col_max);
// If all the pixels are within the bounds we don't check whether the // If all the pixels are within the bounds we don't check whether the
// search point is valid in this loop, otherwise we check each point // search point is valid in this loop, otherwise we check each point
@@ -1664,15 +1666,15 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
unsigned char const *block_offset[4]; unsigned char const *block_offset[4];
for (t = 0; t < 4; t++) for (t = 0; t < 4; t++)
block_offset[t] = ss[i + t].offset + best_address; block_offset[t] = ss_os[i + t] + best_address;
fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
sad_array); sad_array);
for (t = 0; t < 4; t++, i++) { for (t = 0; t < 4; t++, i++) {
if (sad_array[t] < bestsad) { if (sad_array[t] < bestsad) {
const MV this_mv = {best_mv->row + ss[i].mv.row, const MV this_mv = {best_mv->row + ss_mv[i].row,
best_mv->col + ss[i].mv.col}; best_mv->col + ss_mv[i].col};
sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv,
sad_per_bit); sad_per_bit);
if (sad_array[t] < bestsad) { if (sad_array[t] < bestsad) {
@@ -1685,11 +1687,11 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
} else { } else {
for (j = 0; j < cfg->searches_per_step; j++) { for (j = 0; j < cfg->searches_per_step; j++) {
// Trap illegal vectors // Trap illegal vectors
const MV this_mv = {best_mv->row + ss[i].mv.row, const MV this_mv = {best_mv->row + ss_mv[i].row,
best_mv->col + ss[i].mv.col}; best_mv->col + ss_mv[i].col};
if (is_mv_in(x, &this_mv)) { if (is_mv_in(x, &this_mv)) {
const uint8_t *const check_here = ss[i].offset + best_address; const uint8_t *const check_here = ss_os[i] + best_address;
unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
in_what_stride); in_what_stride);
@@ -1705,25 +1707,25 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
} }
} }
if (best_site != last_site) { if (best_site != last_site) {
best_mv->row += ss[best_site].mv.row; best_mv->row += ss_mv[best_site].row;
best_mv->col += ss[best_site].mv.col; best_mv->col += ss_mv[best_site].col;
best_address += ss[best_site].offset; best_address += ss_os[best_site];
last_site = best_site; last_site = best_site;
#if defined(NEW_DIAMOND_SEARCH) #if defined(NEW_DIAMOND_SEARCH)
while (1) { while (1) {
const MV this_mv = {best_mv->row + ss[best_site].mv.row, const MV this_mv = {best_mv->row + ss_mv[best_site].row,
best_mv->col + ss[best_site].mv.col}; best_mv->col + ss_mv[best_site].col};
if (is_mv_in(x, &this_mv)) { if (is_mv_in(x, &this_mv)) {
const uint8_t *const check_here = ss[best_site].offset + best_address; const uint8_t *const check_here = ss_os[best_site] + best_address;
unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
in_what_stride); in_what_stride);
if (thissad < bestsad) { if (thissad < bestsad) {
thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
if (thissad < bestsad) { if (thissad < bestsad) {
bestsad = thissad; bestsad = thissad;
best_mv->row += ss[best_site].mv.row; best_mv->row += ss_mv[best_site].row;
best_mv->col += ss[best_site].mv.col; best_mv->col += ss_mv[best_site].col;
best_address += ss[best_site].offset; best_address += ss_os[best_site];
continue; continue;
} }
} }

View File

@@ -31,14 +31,10 @@ extern "C" {
// for Block_16x16 // for Block_16x16
#define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND) #define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND)
// motion search site
typedef struct search_site {
MV mv;
int offset;
} search_site;
typedef struct search_site_config { typedef struct search_site_config {
search_site ss[8 * MAX_MVSEARCH_STEPS + 1]; // motion search sites
MV ss_mv[8 * MAX_MVSEARCH_STEPS + 1]; // Motion vector
intptr_t ss_os[8 * MAX_MVSEARCH_STEPS + 1]; // Offset
int ss_count; int ss_count;
int searches_per_step; int searches_per_step;
} search_site_config; } search_site_config;