loopfilter improvements
Local variable offsets are now consistent for the functions, removed unused parameters, reworked the assembly to eliminate stalls/instructions. Change-Id: Iaa37668f8a9bb8754df435f6a51c3a08d547f879
This commit is contained in:
parent
d9ca52452b
commit
e0a80519c7
@ -210,6 +210,8 @@ void vp8_loop_filter_frame
|
|||||||
|
|
||||||
int mb_row;
|
int mb_row;
|
||||||
int mb_col;
|
int mb_col;
|
||||||
|
int mb_rows = cm->mb_rows;
|
||||||
|
int mb_cols = cm->mb_cols;
|
||||||
|
|
||||||
int filter_level;
|
int filter_level;
|
||||||
|
|
||||||
@ -217,6 +219,8 @@ void vp8_loop_filter_frame
|
|||||||
|
|
||||||
/* Point at base of Mb MODE_INFO list */
|
/* Point at base of Mb MODE_INFO list */
|
||||||
const MODE_INFO *mode_info_context = cm->mi;
|
const MODE_INFO *mode_info_context = cm->mi;
|
||||||
|
int post_y_stride = post->y_stride;
|
||||||
|
int post_uv_stride = post->uv_stride;
|
||||||
|
|
||||||
/* Initialize the loop filter for this frame. */
|
/* Initialize the loop filter for this frame. */
|
||||||
vp8_loop_filter_frame_init(cm, mbd, cm->filter_level);
|
vp8_loop_filter_frame_init(cm, mbd, cm->filter_level);
|
||||||
@ -227,23 +231,23 @@ void vp8_loop_filter_frame
|
|||||||
v_ptr = post->v_buffer;
|
v_ptr = post->v_buffer;
|
||||||
|
|
||||||
/* vp8_filter each macro block */
|
/* vp8_filter each macro block */
|
||||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
|
if (cm->filter_type == NORMAL_LOOPFILTER)
|
||||||
{
|
{
|
||||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
for (mb_row = 0; mb_row < mb_rows; mb_row++)
|
||||||
{
|
{
|
||||||
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
for (mb_col = 0; mb_col < mb_cols; mb_col++)
|
||||||
mode_info_context->mbmi.mode != SPLITMV &&
|
|
||||||
mode_info_context->mbmi.mb_skip_coeff);
|
|
||||||
|
|
||||||
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
|
|
||||||
const int seg = mode_info_context->mbmi.segment_id;
|
|
||||||
const int ref_frame = mode_info_context->mbmi.ref_frame;
|
|
||||||
|
|
||||||
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
|
||||||
|
|
||||||
if (filter_level)
|
|
||||||
{
|
{
|
||||||
if (cm->filter_type == NORMAL_LOOPFILTER)
|
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||||
|
mode_info_context->mbmi.mode != SPLITMV &&
|
||||||
|
mode_info_context->mbmi.mb_skip_coeff);
|
||||||
|
|
||||||
|
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
|
||||||
|
const int seg = mode_info_context->mbmi.segment_id;
|
||||||
|
const int ref_frame = mode_info_context->mbmi.ref_frame;
|
||||||
|
|
||||||
|
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||||
|
|
||||||
|
if (filter_level)
|
||||||
{
|
{
|
||||||
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||||
lfi.mblim = lfi_n->mblim[filter_level];
|
lfi.mblim = lfi_n->mblim[filter_level];
|
||||||
@ -253,54 +257,87 @@ void vp8_loop_filter_frame
|
|||||||
|
|
||||||
if (mb_col > 0)
|
if (mb_col > 0)
|
||||||
vp8_loop_filter_mbv
|
vp8_loop_filter_mbv
|
||||||
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
|
(y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
|
||||||
|
|
||||||
if (!skip_lf)
|
if (!skip_lf)
|
||||||
vp8_loop_filter_bv
|
vp8_loop_filter_bv
|
||||||
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
|
(y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
|
||||||
|
|
||||||
/* don't apply across umv border */
|
/* don't apply across umv border */
|
||||||
if (mb_row > 0)
|
if (mb_row > 0)
|
||||||
vp8_loop_filter_mbh
|
vp8_loop_filter_mbh
|
||||||
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
|
(y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
|
||||||
|
|
||||||
if (!skip_lf)
|
if (!skip_lf)
|
||||||
vp8_loop_filter_bh
|
vp8_loop_filter_bh
|
||||||
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
|
(y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
y_ptr += 16;
|
||||||
|
u_ptr += 8;
|
||||||
|
v_ptr += 8;
|
||||||
|
|
||||||
|
mode_info_context++; /* step to next MB */
|
||||||
|
}
|
||||||
|
y_ptr += post_y_stride * 16 - post->y_width;
|
||||||
|
u_ptr += post_uv_stride * 8 - post->uv_width;
|
||||||
|
v_ptr += post_uv_stride * 8 - post->uv_width;
|
||||||
|
|
||||||
|
mode_info_context++; /* Skip border mb */
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else /* SIMPLE_LOOPFILTER */
|
||||||
|
{
|
||||||
|
for (mb_row = 0; mb_row < mb_rows; mb_row++)
|
||||||
|
{
|
||||||
|
for (mb_col = 0; mb_col < mb_cols; mb_col++)
|
||||||
|
{
|
||||||
|
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||||
|
mode_info_context->mbmi.mode != SPLITMV &&
|
||||||
|
mode_info_context->mbmi.mb_skip_coeff);
|
||||||
|
|
||||||
|
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
|
||||||
|
const int seg = mode_info_context->mbmi.segment_id;
|
||||||
|
const int ref_frame = mode_info_context->mbmi.ref_frame;
|
||||||
|
|
||||||
|
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||||
|
if (filter_level)
|
||||||
{
|
{
|
||||||
|
const unsigned char * mblim = lfi_n->mblim[filter_level];
|
||||||
|
const unsigned char * blim = lfi_n->blim[filter_level];
|
||||||
|
|
||||||
if (mb_col > 0)
|
if (mb_col > 0)
|
||||||
vp8_loop_filter_simple_mbv
|
vp8_loop_filter_simple_mbv
|
||||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
(y_ptr, post_y_stride, mblim);
|
||||||
|
|
||||||
if (!skip_lf)
|
if (!skip_lf)
|
||||||
vp8_loop_filter_simple_bv
|
vp8_loop_filter_simple_bv
|
||||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
(y_ptr, post_y_stride, blim);
|
||||||
|
|
||||||
/* don't apply across umv border */
|
/* don't apply across umv border */
|
||||||
if (mb_row > 0)
|
if (mb_row > 0)
|
||||||
vp8_loop_filter_simple_mbh
|
vp8_loop_filter_simple_mbh
|
||||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
(y_ptr, post_y_stride, mblim);
|
||||||
|
|
||||||
if (!skip_lf)
|
if (!skip_lf)
|
||||||
vp8_loop_filter_simple_bh
|
vp8_loop_filter_simple_bh
|
||||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
(y_ptr, post_y_stride, blim);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
y_ptr += 16;
|
||||||
|
u_ptr += 8;
|
||||||
|
v_ptr += 8;
|
||||||
|
|
||||||
|
mode_info_context++; /* step to next MB */
|
||||||
}
|
}
|
||||||
|
y_ptr += post_y_stride * 16 - post->y_width;
|
||||||
|
u_ptr += post_uv_stride * 8 - post->uv_width;
|
||||||
|
v_ptr += post_uv_stride * 8 - post->uv_width;
|
||||||
|
|
||||||
y_ptr += 16;
|
mode_info_context++; /* Skip border mb */
|
||||||
u_ptr += 8;
|
|
||||||
v_ptr += 8;
|
|
||||||
|
|
||||||
mode_info_context++; /* step to next MB */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
y_ptr += post->y_stride * 16 - post->y_width;
|
|
||||||
u_ptr += post->uv_stride * 8 - post->uv_width;
|
|
||||||
v_ptr += post->uv_stride * 8 - post->uv_width;
|
|
||||||
|
|
||||||
mode_info_context++; /* Skip border mb */
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -16,6 +16,10 @@
|
|||||||
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
|
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
|
||||||
const unsigned char *limit, const unsigned char *thresh, int count)
|
const unsigned char *limit, const unsigned char *thresh, int count)
|
||||||
|
|
||||||
|
#define prototype_loopfilter_nc(sym) \
|
||||||
|
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
|
||||||
|
const unsigned char *limit, const unsigned char *thresh)
|
||||||
|
|
||||||
#define prototype_simple_loopfilter(sym) \
|
#define prototype_simple_loopfilter(sym) \
|
||||||
void sym(unsigned char *y, int ystride, const unsigned char *blimit)
|
void sym(unsigned char *y, int ystride, const unsigned char *blimit)
|
||||||
|
|
||||||
@ -30,11 +34,11 @@ prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
|
|||||||
prototype_loopfilter(vp8_loop_filter_bv_y_sse2);
|
prototype_loopfilter(vp8_loop_filter_bv_y_sse2);
|
||||||
prototype_loopfilter(vp8_loop_filter_bh_y_sse2);
|
prototype_loopfilter(vp8_loop_filter_bh_y_sse2);
|
||||||
#else
|
#else
|
||||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2);
|
prototype_loopfilter_nc(vp8_loop_filter_vertical_edge_sse2);
|
||||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2);
|
prototype_loopfilter_nc(vp8_loop_filter_horizontal_edge_sse2);
|
||||||
#endif
|
#endif
|
||||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2);
|
prototype_loopfilter_nc(vp8_mbloop_filter_vertical_edge_sse2);
|
||||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2);
|
prototype_loopfilter_nc(vp8_mbloop_filter_horizontal_edge_sse2);
|
||||||
|
|
||||||
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
|
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
|
||||||
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
|
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
|
||||||
@ -124,7 +128,7 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned
|
|||||||
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
|
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
|
||||||
@ -135,7 +139,7 @@ void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
|||||||
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||||
{
|
{
|
||||||
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
|
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
|
||||||
@ -149,9 +153,9 @@ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
|||||||
#if ARCH_X86_64
|
#if ARCH_X86_64
|
||||||
vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
#else
|
#else
|
||||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
@ -174,9 +178,9 @@ void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
|||||||
#if ARCH_X86_64
|
#if ARCH_X86_64
|
||||||
vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||||
#else
|
#else
|
||||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user