Various VP8 x86 deblocking speedups
SSSE3 versions, improve SSE2 versions a bit. SSE2/SSSE3 mbedge h functions are currently broken, so explicitly disable them. Originally committed as revision 24403 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
@@ -223,64 +223,31 @@ extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16], int stride)
|
||||
extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]);
|
||||
extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
|
||||
|
||||
extern void ff_vp8_v_loop_filter_simple_mmx (uint8_t *dst, int stride, int flim);
|
||||
extern void ff_vp8_v_loop_filter_simple_mmxext(uint8_t *dst, int stride, int flim);
|
||||
extern void ff_vp8_v_loop_filter_simple_sse2 (uint8_t *dst, int stride, int flim);
|
||||
extern void ff_vp8_h_loop_filter_simple_mmx (uint8_t *dst, int stride, int flim);
|
||||
extern void ff_vp8_h_loop_filter_simple_mmxext(uint8_t *dst, int stride, int flim);
|
||||
extern void ff_vp8_h_loop_filter_simple_sse2 (uint8_t *dst, int stride, int flim);
|
||||
#define DECLARE_LOOP_FILTER(NAME)\
|
||||
extern void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\
|
||||
extern void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\
|
||||
extern void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\
|
||||
int e, int i, int hvt);\
|
||||
extern void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\
|
||||
int e, int i, int hvt);\
|
||||
extern void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\
|
||||
int s, int e, int i, int hvt);\
|
||||
extern void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\
|
||||
int s, int e, int i, int hvt);\
|
||||
extern void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\
|
||||
int e, int i, int hvt);\
|
||||
extern void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\
|
||||
int e, int i, int hvt);\
|
||||
extern void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\
|
||||
int s, int e, int i, int hvt);\
|
||||
extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\
|
||||
int s, int e, int i, int hvt);
|
||||
|
||||
extern void ff_vp8_v_loop_filter16y_inner_mmx (uint8_t *dst, int stride,
|
||||
int e, int i, int hvt);
|
||||
extern void ff_vp8_v_loop_filter16y_inner_mmxext(uint8_t *dst, int stride,
|
||||
int e, int i, int hvt);
|
||||
extern void ff_vp8_v_loop_filter16y_inner_sse2 (uint8_t *dst, int stride,
|
||||
int e, int i, int hvt);
|
||||
extern void ff_vp8_h_loop_filter16y_inner_mmx (uint8_t *dst, int stride,
|
||||
int e, int i, int hvt);
|
||||
extern void ff_vp8_h_loop_filter16y_inner_mmxext(uint8_t *dst, int stride,
|
||||
int e, int i, int hvt);
|
||||
extern void ff_vp8_h_loop_filter16y_inner_sse2 (uint8_t *dst, int stride,
|
||||
int e, int i, int hvt);
|
||||
DECLARE_LOOP_FILTER(mmx)
|
||||
DECLARE_LOOP_FILTER(mmxext)
|
||||
DECLARE_LOOP_FILTER(sse2)
|
||||
DECLARE_LOOP_FILTER(ssse3)
|
||||
|
||||
extern void ff_vp8_v_loop_filter8uv_inner_mmx (uint8_t *dstU, uint8_t *dstV,
|
||||
int s, int e, int i, int hvt);
|
||||
extern void ff_vp8_v_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV,
|
||||
int s, int e, int i, int hvt);
|
||||
extern void ff_vp8_v_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV,
|
||||
int s, int e, int i, int hvt);
|
||||
extern void ff_vp8_h_loop_filter8uv_inner_mmx (uint8_t *dstU, uint8_t *dstV,
|
||||
int s, int e, int i, int hvt);
|
||||
extern void ff_vp8_h_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV,
|
||||
int s, int e, int i, int hvt);
|
||||
extern void ff_vp8_h_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV,
|
||||
int s, int e, int i, int hvt);
|
||||
|
||||
extern void ff_vp8_v_loop_filter16y_mbedge_mmx (uint8_t *dst, int stride,
|
||||
int e, int i, int hvt);
|
||||
extern void ff_vp8_v_loop_filter16y_mbedge_mmxext(uint8_t *dst, int stride,
|
||||
int e, int i, int hvt);
|
||||
extern void ff_vp8_v_loop_filter16y_mbedge_sse2 (uint8_t *dst, int stride,
|
||||
int e, int i, int hvt);
|
||||
extern void ff_vp8_h_loop_filter16y_mbedge_mmx (uint8_t *dst, int stride,
|
||||
int e, int i, int hvt);
|
||||
extern void ff_vp8_h_loop_filter16y_mbedge_mmxext(uint8_t *dst, int stride,
|
||||
int e, int i, int hvt);
|
||||
extern void ff_vp8_h_loop_filter16y_mbedge_sse2 (uint8_t *dst, int stride,
|
||||
int e, int i, int hvt);
|
||||
|
||||
extern void ff_vp8_v_loop_filter8uv_mbedge_mmx (uint8_t *dstU, uint8_t *dstV,
|
||||
int s, int e, int i, int hvt);
|
||||
extern void ff_vp8_v_loop_filter8uv_mbedge_mmxext(uint8_t *dstU, uint8_t *dstV,
|
||||
int s, int e, int i, int hvt);
|
||||
extern void ff_vp8_v_loop_filter8uv_mbedge_sse2 (uint8_t *dstU, uint8_t *dstV,
|
||||
int s, int e, int i, int hvt);
|
||||
extern void ff_vp8_h_loop_filter8uv_mbedge_mmx (uint8_t *dstU, uint8_t *dstV,
|
||||
int s, int e, int i, int hvt);
|
||||
extern void ff_vp8_h_loop_filter8uv_mbedge_mmxext(uint8_t *dstU, uint8_t *dstV,
|
||||
int s, int e, int i, int hvt);
|
||||
extern void ff_vp8_h_loop_filter8uv_mbedge_sse2 (uint8_t *dstU, uint8_t *dstV,
|
||||
int s, int e, int i, int hvt);
|
||||
#endif
|
||||
|
||||
#define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
|
||||
@@ -384,8 +351,8 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
|
||||
c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
|
||||
c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
|
||||
|
||||
c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext;
|
||||
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
|
||||
//c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2;
|
||||
//c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2;
|
||||
}
|
||||
|
||||
if (mm_flags & FF_MM_SSSE3) {
|
||||
@@ -395,6 +362,19 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
|
||||
VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
|
||||
VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
|
||||
VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
|
||||
|
||||
c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
|
||||
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
|
||||
|
||||
c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
|
||||
c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
|
||||
c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
|
||||
c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
|
||||
|
||||
c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_ssse3;
|
||||
//c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3;
|
||||
c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
|
||||
//c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
|
||||
}
|
||||
|
||||
if (mm_flags & FF_MM_SSE4) {
|
||||
|
Reference in New Issue
Block a user