x86/mpegvideoencdsp: improve ff_pix_sum16_sse2
~15% faster. Also add an mmxext version that takes advantage of the new code, and build it alongside with the mmx version only on x86_32. Reviewed-by: Michael Niedermayer <michaelni@gmx.at> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
@@ -24,6 +24,7 @@
|
||||
#include "libavcodec/mpegvideoencdsp.h"
|
||||
|
||||
int ff_pix_sum16_mmx(uint8_t *pix, int line_size);
|
||||
int ff_pix_sum16_mmxext(uint8_t *pix, int line_size);
|
||||
int ff_pix_sum16_sse2(uint8_t *pix, int line_size);
|
||||
int ff_pix_sum16_xop(uint8_t *pix, int line_size);
|
||||
int ff_pix_norm1_mmx(uint8_t *pix, int line_size);
|
||||
@@ -218,11 +219,17 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
#if ARCH_X86_32
|
||||
if (EXTERNAL_MMX(cpu_flags)) {
|
||||
c->pix_sum = ff_pix_sum16_mmx;
|
||||
c->pix_norm1 = ff_pix_norm1_mmx;
|
||||
}
|
||||
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
c->pix_sum = ff_pix_sum16_mmxext;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
c->pix_sum = ff_pix_sum16_sse2;
|
||||
c->pix_norm1 = ff_pix_norm1_sse2;
|
||||
|
Reference in New Issue
Block a user