From c5c2060cf597c8eb5989ca4ba68a1eaeb59f7cdb Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Thu, 24 Jan 2013 18:39:10 +0100 Subject: [PATCH 1/2] x86: h264qpel: add cpu flag checks for init function The code was copied from per cpu extension init function so the checks for supported extensions was overlooked. --- libavcodec/x86/h264_qpel.c | 145 +++++++++++++++++++------------------ 1 file changed, 74 insertions(+), 71 deletions(-) diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index bebf5a5f3d..9157223c84 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -21,6 +21,7 @@ #include "libavutil/cpu.h" #include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" #include "libavcodec/dsputil.h" #include "libavcodec/h264qpel.h" #include "libavcodec/mpegvideo.h" @@ -530,89 +531,91 @@ QPEL16(mmxext) void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) { +#if HAVE_YASM int high_bit_depth = bit_depth > 8; int mm_flags = av_get_cpu_flags(); -#if HAVE_MMXEXT_EXTERNAL - if (!high_bit_depth) { - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); - SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, ); - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, ); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, ); - SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); - } else if (bit_depth == 10) { + if (EXTERNAL_MMXEXT(mm_flags)) { + if (!high_bit_depth) { + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); + } else if (bit_depth == 10) { #if !ARCH_X86_64 - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_); - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_); #endif - SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); - SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_); - } -#endif - -#if HAVE_SSE2_EXTERNAL - if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) { - // these functions are slower than mmx on AMD, but faster on Intel - H264_QPEL_FUNCS(0, 0, sse2); + SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_); + } } - if (!high_bit_depth) { - H264_QPEL_FUNCS(0, 1, sse2); - H264_QPEL_FUNCS(0, 2, sse2); - H264_QPEL_FUNCS(0, 3, sse2); - H264_QPEL_FUNCS(1, 1, sse2); - H264_QPEL_FUNCS(1, 2, sse2); - H264_QPEL_FUNCS(1, 3, sse2); - H264_QPEL_FUNCS(2, 1, sse2); - H264_QPEL_FUNCS(2, 2, sse2); - H264_QPEL_FUNCS(2, 3, sse2); - H264_QPEL_FUNCS(3, 1, sse2); - H264_QPEL_FUNCS(3, 2, sse2); - H264_QPEL_FUNCS(3, 3, sse2); + if (EXTERNAL_SSE2(mm_flags)) { + if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) { + // these functions are slower than mmx on AMD, but faster on Intel + H264_QPEL_FUNCS(0, 0, sse2); + } + + if (!high_bit_depth) { + H264_QPEL_FUNCS(0, 1, sse2); + H264_QPEL_FUNCS(0, 2, sse2); + H264_QPEL_FUNCS(0, 3, sse2); + H264_QPEL_FUNCS(1, 1, sse2); + H264_QPEL_FUNCS(1, 2, sse2); + H264_QPEL_FUNCS(1, 3, sse2); + H264_QPEL_FUNCS(2, 1, sse2); + H264_QPEL_FUNCS(2, 2, sse2); + H264_QPEL_FUNCS(2, 3, sse2); + H264_QPEL_FUNCS(3, 1, sse2); + H264_QPEL_FUNCS(3, 2, sse2); + H264_QPEL_FUNCS(3, 3, sse2); + } + + if (bit_depth == 10) { + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); + H264_QPEL_FUNCS_10(1, 0, sse2_cache64); + H264_QPEL_FUNCS_10(2, 0, sse2_cache64); + H264_QPEL_FUNCS_10(3, 0, sse2_cache64); + } } - if (bit_depth == 10) { - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); - H264_QPEL_FUNCS_10(1, 0, sse2_cache64); - H264_QPEL_FUNCS_10(2, 0, sse2_cache64); - H264_QPEL_FUNCS_10(3, 0, sse2_cache64); - } -#endif + if (EXTERNAL_SSSE3(mm_flags)) { + if (!high_bit_depth) { + H264_QPEL_FUNCS(1, 0, ssse3); + H264_QPEL_FUNCS(1, 1, ssse3); + H264_QPEL_FUNCS(1, 2, ssse3); + H264_QPEL_FUNCS(1, 3, ssse3); + H264_QPEL_FUNCS(2, 0, ssse3); + H264_QPEL_FUNCS(2, 1, ssse3); + H264_QPEL_FUNCS(2, 2, ssse3); + H264_QPEL_FUNCS(2, 3, ssse3); + H264_QPEL_FUNCS(3, 0, ssse3); + H264_QPEL_FUNCS(3, 1, ssse3); + H264_QPEL_FUNCS(3, 2, ssse3); + H264_QPEL_FUNCS(3, 3, ssse3); + } -#if HAVE_SSSE3_EXTERNAL - if (!high_bit_depth) { - H264_QPEL_FUNCS(1, 0, ssse3); - H264_QPEL_FUNCS(1, 1, ssse3); - H264_QPEL_FUNCS(1, 2, ssse3); - H264_QPEL_FUNCS(1, 3, ssse3); - H264_QPEL_FUNCS(2, 0, ssse3); - H264_QPEL_FUNCS(2, 1, ssse3); - H264_QPEL_FUNCS(2, 2, ssse3); - H264_QPEL_FUNCS(2, 3, ssse3); - H264_QPEL_FUNCS(3, 0, ssse3); - H264_QPEL_FUNCS(3, 1, ssse3); - H264_QPEL_FUNCS(3, 2, ssse3); - H264_QPEL_FUNCS(3, 3, ssse3); + if (bit_depth == 10) { + H264_QPEL_FUNCS_10(1, 0, ssse3_cache64); + H264_QPEL_FUNCS_10(2, 0, ssse3_cache64); + H264_QPEL_FUNCS_10(3, 0, ssse3_cache64); + } } - if (bit_depth == 10) { - H264_QPEL_FUNCS_10(1, 0, ssse3_cache64); - H264_QPEL_FUNCS_10(2, 0, ssse3_cache64); - H264_QPEL_FUNCS_10(3, 0, ssse3_cache64); - } -#endif - -#if HAVE_AVX_EXTERNAL - if (bit_depth == 10) { - H264_QPEL_FUNCS_10(1, 0, sse2); - H264_QPEL_FUNCS_10(2, 0, sse2); - H264_QPEL_FUNCS_10(3, 0, sse2); + if (EXTERNAL_AVX(mm_flags)) { + if (bit_depth == 10) { + H264_QPEL_FUNCS_10(1, 0, sse2); + H264_QPEL_FUNCS_10(2, 0, sse2); + H264_QPEL_FUNCS_10(3, 0, sse2); + } } #endif } From 2c10e2a2f62477efaef5b641974594f7df4ca339 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Thu, 24 Jan 2013 17:47:03 +0100 Subject: [PATCH 2/2] build: Make the H.264 parser select h264qpel It is required for building the shared H.264 code. --- configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure b/configure index 87045f25ee..77600ef2b5 100755 --- a/configure +++ b/configure @@ -1633,7 +1633,7 @@ wmv3_vdpau_decoder_select="vc1_vdpau_decoder" wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel" # parsers -h264_parser_select="error_resilience golomb h264dsp h264pred mpegvideo" +h264_parser_select="error_resilience golomb h264dsp h264pred h264qpel mpegvideo" mpeg4video_parser_select="error_resilience mpegvideo" mpegvideo_parser_select="error_resilience mpegvideo" vc1_parser_select="error_resilience mpegvideo"