VP8 MBedge loopfilter MMX/MMX2/SSE2 functions for both luma (width=16)

and chroma (width=8).

Originally committed as revision 24378 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Ronald S. Bultje
2010-07-20 22:58:56 +00:00
parent 92bfd7461c
commit e9e456d850
4 changed files with 687 additions and 2 deletions

View File

@@ -255,6 +255,32 @@ extern void ff_vp8_h_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV,
int s, int e, int i, int hvt);
extern void ff_vp8_h_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV,
int s, int e, int i, int hvt);
extern void ff_vp8_v_loop_filter16y_mbedge_mmx (uint8_t *dst, int stride,
int e, int i, int hvt);
extern void ff_vp8_v_loop_filter16y_mbedge_mmxext(uint8_t *dst, int stride,
int e, int i, int hvt);
extern void ff_vp8_v_loop_filter16y_mbedge_sse2 (uint8_t *dst, int stride,
int e, int i, int hvt);
extern void ff_vp8_h_loop_filter16y_mbedge_mmx (uint8_t *dst, int stride,
int e, int i, int hvt);
extern void ff_vp8_h_loop_filter16y_mbedge_mmxext(uint8_t *dst, int stride,
int e, int i, int hvt);
extern void ff_vp8_h_loop_filter16y_mbedge_sse2 (uint8_t *dst, int stride,
int e, int i, int hvt);
extern void ff_vp8_v_loop_filter8uv_mbedge_mmx (uint8_t *dstU, uint8_t *dstV,
int s, int e, int i, int hvt);
extern void ff_vp8_v_loop_filter8uv_mbedge_mmxext(uint8_t *dstU, uint8_t *dstV,
int s, int e, int i, int hvt);
extern void ff_vp8_v_loop_filter8uv_mbedge_sse2 (uint8_t *dstU, uint8_t *dstV,
int s, int e, int i, int hvt);
extern void ff_vp8_h_loop_filter8uv_mbedge_mmx (uint8_t *dstU, uint8_t *dstV,
int s, int e, int i, int hvt);
extern void ff_vp8_h_loop_filter8uv_mbedge_mmxext(uint8_t *dstU, uint8_t *dstV,
int s, int e, int i, int hvt);
extern void ff_vp8_h_loop_filter8uv_mbedge_sse2 (uint8_t *dstU, uint8_t *dstV,
int s, int e, int i, int hvt);
#endif
#define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
@@ -301,6 +327,11 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx;
c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx;
c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx;
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx;
}
/* note that 4-tap width=16 functions are missing because w=16
@@ -321,6 +352,11 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext;
c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext;
c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
}
if (mm_flags & FF_MM_SSE) {
@@ -339,11 +375,17 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext;
c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
}
if (mm_flags & FF_MM_SSE2) {
c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext;
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
}
if (mm_flags & FF_MM_SSSE3) {