Improve SSE2 loopfilter functions
Restructured and rewrote SSE2 loopfilter functions. Combined u and v into one function to take advantage of SSE2 128-bit registers. Tests on test clips showed a 4% decoder performance improvement on Linux desktop. Change-Id: Iccc6669f09e17f2224da715f7547d6f93b0a4987
This commit is contained in:
parent
f1a3b1e0d9
commit
bead039d4d
@ -14,16 +14,6 @@
|
|||||||
#include "loopfilter.h"
|
#include "loopfilter.h"
|
||||||
#include "onyxc_int.h"
|
#include "onyxc_int.h"
|
||||||
|
|
||||||
typedef void loop_filter_uvfunction
|
|
||||||
(
|
|
||||||
unsigned char *u, // source pointer
|
|
||||||
int p, // pitch
|
|
||||||
const signed char *flimit,
|
|
||||||
const signed char *limit,
|
|
||||||
const signed char *thresh,
|
|
||||||
unsigned char *v
|
|
||||||
);
|
|
||||||
|
|
||||||
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
|
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
|
||||||
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
|
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
|
||||||
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
|
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
|
||||||
|
@ -117,5 +117,14 @@ typedef struct
|
|||||||
#define LF_INVOKE(ctx,fn) vp8_lf_##fn
|
#define LF_INVOKE(ctx,fn) vp8_lf_##fn
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
typedef void loop_filter_uvfunction
|
||||||
|
(
|
||||||
|
unsigned char *u, // source pointer
|
||||||
|
int p, // pitch
|
||||||
|
const signed char *flimit,
|
||||||
|
const signed char *limit,
|
||||||
|
const signed char *thresh,
|
||||||
|
unsigned char *v
|
||||||
|
);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -34,6 +34,11 @@ prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
|
|||||||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
|
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
|
||||||
prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2);
|
prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2);
|
||||||
|
|
||||||
|
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
|
||||||
|
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
|
||||||
|
extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_sse2;
|
||||||
|
extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
|
||||||
|
|
||||||
#if HAVE_MMX
|
#if HAVE_MMX
|
||||||
// Horizontal MB filtering
|
// Horizontal MB filtering
|
||||||
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||||
@ -157,10 +162,7 @@ void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
|||||||
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
|
||||||
|
|
||||||
if (v_ptr)
|
|
||||||
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -183,10 +185,7 @@ void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
|||||||
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
|
||||||
|
|
||||||
if (v_ptr)
|
|
||||||
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -211,10 +210,7 @@ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
|||||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride);
|
||||||
|
|
||||||
if (v_ptr)
|
|
||||||
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -241,10 +237,7 @@ void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
|||||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||||
|
|
||||||
if (u_ptr)
|
if (u_ptr)
|
||||||
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4);
|
||||||
|
|
||||||
if (v_ptr)
|
|
||||||
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user