loopfilter improvements

Local variable offsets are now consistent for the functions,
removed unused parameters, reworked the assembly to eliminate
stalls/instructions.

Change-Id: Iaa37668f8a9bb8754df435f6a51c3a08d547f879
This commit is contained in:
Scott LaVarnway 2012-04-12 14:22:47 -04:00
parent d9ca52452b
commit e0a80519c7
3 changed files with 418 additions and 491 deletions

View File

@ -210,6 +210,8 @@ void vp8_loop_filter_frame
int mb_row; int mb_row;
int mb_col; int mb_col;
int mb_rows = cm->mb_rows;
int mb_cols = cm->mb_cols;
int filter_level; int filter_level;
@ -217,6 +219,8 @@ void vp8_loop_filter_frame
/* Point at base of Mb MODE_INFO list */ /* Point at base of Mb MODE_INFO list */
const MODE_INFO *mode_info_context = cm->mi; const MODE_INFO *mode_info_context = cm->mi;
int post_y_stride = post->y_stride;
int post_uv_stride = post->uv_stride;
/* Initialize the loop filter for this frame. */ /* Initialize the loop filter for this frame. */
vp8_loop_filter_frame_init(cm, mbd, cm->filter_level); vp8_loop_filter_frame_init(cm, mbd, cm->filter_level);
@ -227,23 +231,23 @@ void vp8_loop_filter_frame
v_ptr = post->v_buffer; v_ptr = post->v_buffer;
/* vp8_filter each macro block */ /* vp8_filter each macro block */
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) if (cm->filter_type == NORMAL_LOOPFILTER)
{ {
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) for (mb_row = 0; mb_row < mb_rows; mb_row++)
{ {
int skip_lf = (mode_info_context->mbmi.mode != B_PRED && for (mb_col = 0; mb_col < mb_cols; mb_col++)
mode_info_context->mbmi.mode != SPLITMV &&
mode_info_context->mbmi.mb_skip_coeff);
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
const int seg = mode_info_context->mbmi.segment_id;
const int ref_frame = mode_info_context->mbmi.ref_frame;
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
if (filter_level)
{ {
if (cm->filter_type == NORMAL_LOOPFILTER) int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
mode_info_context->mbmi.mode != SPLITMV &&
mode_info_context->mbmi.mb_skip_coeff);
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
const int seg = mode_info_context->mbmi.segment_id;
const int ref_frame = mode_info_context->mbmi.ref_frame;
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
if (filter_level)
{ {
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
lfi.mblim = lfi_n->mblim[filter_level]; lfi.mblim = lfi_n->mblim[filter_level];
@ -253,54 +257,87 @@ void vp8_loop_filter_frame
if (mb_col > 0) if (mb_col > 0)
vp8_loop_filter_mbv vp8_loop_filter_mbv
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
if (!skip_lf) if (!skip_lf)
vp8_loop_filter_bv vp8_loop_filter_bv
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
/* don't apply across umv border */ /* don't apply across umv border */
if (mb_row > 0) if (mb_row > 0)
vp8_loop_filter_mbh vp8_loop_filter_mbh
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
if (!skip_lf) if (!skip_lf)
vp8_loop_filter_bh vp8_loop_filter_bh
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
} }
else
y_ptr += 16;
u_ptr += 8;
v_ptr += 8;
mode_info_context++; /* step to next MB */
}
y_ptr += post_y_stride * 16 - post->y_width;
u_ptr += post_uv_stride * 8 - post->uv_width;
v_ptr += post_uv_stride * 8 - post->uv_width;
mode_info_context++; /* Skip border mb */
}
}
else /* SIMPLE_LOOPFILTER */
{
for (mb_row = 0; mb_row < mb_rows; mb_row++)
{
for (mb_col = 0; mb_col < mb_cols; mb_col++)
{
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
mode_info_context->mbmi.mode != SPLITMV &&
mode_info_context->mbmi.mb_skip_coeff);
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
const int seg = mode_info_context->mbmi.segment_id;
const int ref_frame = mode_info_context->mbmi.ref_frame;
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
if (filter_level)
{ {
const unsigned char * mblim = lfi_n->mblim[filter_level];
const unsigned char * blim = lfi_n->blim[filter_level];
if (mb_col > 0) if (mb_col > 0)
vp8_loop_filter_simple_mbv vp8_loop_filter_simple_mbv
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]); (y_ptr, post_y_stride, mblim);
if (!skip_lf) if (!skip_lf)
vp8_loop_filter_simple_bv vp8_loop_filter_simple_bv
(y_ptr, post->y_stride, lfi_n->blim[filter_level]); (y_ptr, post_y_stride, blim);
/* don't apply across umv border */ /* don't apply across umv border */
if (mb_row > 0) if (mb_row > 0)
vp8_loop_filter_simple_mbh vp8_loop_filter_simple_mbh
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]); (y_ptr, post_y_stride, mblim);
if (!skip_lf) if (!skip_lf)
vp8_loop_filter_simple_bh vp8_loop_filter_simple_bh
(y_ptr, post->y_stride, lfi_n->blim[filter_level]); (y_ptr, post_y_stride, blim);
} }
y_ptr += 16;
u_ptr += 8;
v_ptr += 8;
mode_info_context++; /* step to next MB */
} }
y_ptr += post_y_stride * 16 - post->y_width;
u_ptr += post_uv_stride * 8 - post->uv_width;
v_ptr += post_uv_stride * 8 - post->uv_width;
y_ptr += 16; mode_info_context++; /* Skip border mb */
u_ptr += 8;
v_ptr += 8;
mode_info_context++; /* step to next MB */
} }
y_ptr += post->y_stride * 16 - post->y_width;
u_ptr += post->uv_stride * 8 - post->uv_width;
v_ptr += post->uv_stride * 8 - post->uv_width;
mode_info_context++; /* Skip border mb */
} }
} }

File diff suppressed because it is too large Load Diff

View File

@ -16,6 +16,10 @@
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\ void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
const unsigned char *limit, const unsigned char *thresh, int count) const unsigned char *limit, const unsigned char *thresh, int count)
#define prototype_loopfilter_nc(sym) \
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
const unsigned char *limit, const unsigned char *thresh)
#define prototype_simple_loopfilter(sym) \ #define prototype_simple_loopfilter(sym) \
void sym(unsigned char *y, int ystride, const unsigned char *blimit) void sym(unsigned char *y, int ystride, const unsigned char *blimit)
@ -30,11 +34,11 @@ prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
prototype_loopfilter(vp8_loop_filter_bv_y_sse2); prototype_loopfilter(vp8_loop_filter_bv_y_sse2);
prototype_loopfilter(vp8_loop_filter_bh_y_sse2); prototype_loopfilter(vp8_loop_filter_bh_y_sse2);
#else #else
prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2); prototype_loopfilter_nc(vp8_loop_filter_vertical_edge_sse2);
prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2); prototype_loopfilter_nc(vp8_loop_filter_horizontal_edge_sse2);
#endif #endif
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2); prototype_loopfilter_nc(vp8_mbloop_filter_vertical_edge_sse2);
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2); prototype_loopfilter_nc(vp8_mbloop_filter_horizontal_edge_sse2);
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2; extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2; extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
@ -124,7 +128,7 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi) int y_stride, int uv_stride, loop_filter_info *lfi)
{ {
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr);
if (u_ptr) if (u_ptr)
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
@ -135,7 +139,7 @@ void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi) int y_stride, int uv_stride, loop_filter_info *lfi)
{ {
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr);
if (u_ptr) if (u_ptr)
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
@ -149,9 +153,9 @@ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
#if ARCH_X86_64 #if ARCH_X86_64
vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
#else #else
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
#endif #endif
if (u_ptr) if (u_ptr)
@ -174,9 +178,9 @@ void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
#if ARCH_X86_64 #if ARCH_X86_64
vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
#else #else
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
#endif #endif
if (u_ptr) if (u_ptr)