From 38fdf7258035eb520ca152e9bea6d95cdfaca424 Mon Sep 17 00:00:00 2001 From: Kostya Shishkov Date: Wed, 31 Oct 2012 11:26:32 +0100 Subject: [PATCH 1/3] swscale: do not forget to swap data in formats with different endianness Otherwise during scaling it will try to interpret input in the wrong way and that leads to the test results disagreeing on different platforms and with different optimizations. Signed-off-by: Diego Biurrun --- libswscale/input.c | 42 +++++++++++++++++++++++++++++++++++ tests/ref/lavfi/pixfmts_scale | 16 ++++++------- 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index 142cc29a62..2e8d43f446 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -724,6 +724,15 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_YUV420P16LE: case AV_PIX_FMT_YUV422P16LE: case AV_PIX_FMT_YUV444P16LE: + case AV_PIX_FMT_YUVA444P9LE: + case AV_PIX_FMT_YUVA422P9LE: + case AV_PIX_FMT_YUVA420P9LE: + case AV_PIX_FMT_YUVA422P10LE: + case AV_PIX_FMT_YUVA444P10LE: + case AV_PIX_FMT_YUVA420P10LE: + case AV_PIX_FMT_YUVA420P16LE: + case AV_PIX_FMT_YUVA422P16LE: + case AV_PIX_FMT_YUVA444P16LE: c->chrToYV12 = bswap16UV_c; break; #else @@ -736,6 +745,15 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_YUV420P16BE: case AV_PIX_FMT_YUV422P16BE: case AV_PIX_FMT_YUV444P16BE: + case AV_PIX_FMT_YUVA444P9BE: + case AV_PIX_FMT_YUVA422P9BE: + case AV_PIX_FMT_YUVA420P9BE: + case AV_PIX_FMT_YUVA422P10BE: + case AV_PIX_FMT_YUVA444P10BE: + case AV_PIX_FMT_YUVA420P10BE: + case AV_PIX_FMT_YUVA420P16BE: + case AV_PIX_FMT_YUVA422P16BE: + case AV_PIX_FMT_YUVA444P16BE: c->chrToYV12 = bswap16UV_c; break; #endif @@ -917,6 +935,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break; + case AV_PIX_FMT_YUVA444P9LE: + case AV_PIX_FMT_YUVA422P9LE: + case AV_PIX_FMT_YUVA420P9LE: + case AV_PIX_FMT_YUVA444P10LE: + case AV_PIX_FMT_YUVA422P10LE: + case AV_PIX_FMT_YUVA420P10LE: + case AV_PIX_FMT_YUVA420P16LE: + case AV_PIX_FMT_YUVA422P16LE: + case AV_PIX_FMT_YUVA444P16LE: + c->lumToYV12 = bswap16Y_c; + c->alpToYV12 = bswap16Y_c; + break; #else case AV_PIX_FMT_YUV444P9BE: case AV_PIX_FMT_YUV422P9BE: @@ -930,6 +960,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break; + case AV_PIX_FMT_YUVA444P9BE: + case AV_PIX_FMT_YUVA422P9BE: + case AV_PIX_FMT_YUVA420P9BE: + case AV_PIX_FMT_YUVA444P10BE: + case AV_PIX_FMT_YUVA422P10BE: + case AV_PIX_FMT_YUVA420P10BE: + case AV_PIX_FMT_YUVA420P16BE: + case AV_PIX_FMT_YUVA422P16BE: + case AV_PIX_FMT_YUVA444P16BE: + c->lumToYV12 = bswap16Y_c; + c->alpToYV12 = bswap16Y_c; + break; #endif case AV_PIX_FMT_YUYV422: case AV_PIX_FMT_Y400A: diff --git a/tests/ref/lavfi/pixfmts_scale b/tests/ref/lavfi/pixfmts_scale index d8ae09cf09..7be8af4a11 100644 --- a/tests/ref/lavfi/pixfmts_scale +++ b/tests/ref/lavfi/pixfmts_scale @@ -57,25 +57,25 @@ yuv444p16le a0c5d3c7bf3f181db503cf8e450d1335 yuv444p9be 9ac2643ce7f7e5c4e17c8c9fd8494d4a yuv444p9le 896a1cc9cccca1ba410dd53942d33cc4 yuva420p 8673a9131fb47de69788863f93a50eb7 -yuva420p10be cf397b35db9407496093b2ad64f3106c +yuva420p10be d92a95061809f251175f5d5e3074930e yuva420p10le 8a06c377b8aa2b2979054e074582a5b5 yuva420p16be a61d8ddb646e2d26020fc7ed2a48c1a9 yuva420p16le 90ef774f86ad3177ec57eca8744b4e09 -yuva420p9be b43d5d88a474c80abad8e887eb5a3317 +yuva420p9be f7655546446bfdc875243d7cdeb13b30 yuva420p9le ada2b719827059d70ebc57e2a3f9da92 yuva422p 3c76ebeca0a7d3aa5f8e31ef80a86ffe -yuva422p10be c12a427d2b8fc84f93fd3cf9fd5bcb14 +yuva422p10be 9a21b2f566c0761c8338edaa88006bee yuva422p10le aefcda062e7e3463c887faa9d926aca7 -yuva422p16be a31bd04c58c22690f2a7c745f34cf48f +yuva422p16be c21afa31ac18bd92e8e596b81552b52b yuva422p16le 0bc3720dba6076dcce3b74b1d3c6c4b7 -yuva422p9be b21d2aa97ff643c86bbc08b578729c39 +yuva422p9be a60ac5b8026e9621724c033fbf79dbda yuva422p9le c3eda8831e9b9c94a3eb487d33114103 yuva444p 3268c6abe5e3cdbd16552a1eddced816 -yuva444p10be 4f6eaf2bbe8a083773b9f061fec20e41 +yuva444p10be 3fbd1ece625c7aa7284b9ca3724d6abb yuva444p10le 2eeda83856df77760cd30e477e8ba00b -yuva444p16be 3587f05da58a8435aad648506562d39b +yuva444p16be ed5b07fe4d5b1137604568786777af1d yuva444p16le 3a3df23feb60d8832b566fd9765983d0 -yuva444p9be d5342be0074975ea65907f5b65c7a335 +yuva444p9be 4fc479c5b1044ad37b4e6fc6488b4f7f yuva444p9le c41849b0134670d6f6253c337defbb04 yuvj420p 30427bd6caf5bda93a173dbebe759e09 yuvj422p fc8288f64fd149573f73cf8da05d8e6d From d8eda3708023db388d80027a79d5df7ee25a5a3f Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Sun, 8 Jul 2012 19:56:10 +0200 Subject: [PATCH 2/3] x86: mmx2 ---> mmxext in function names --- libavcodec/dct-test.c | 4 +- libavcodec/dsputil.h | 2 +- libavcodec/x86/cavsdsp.c | 29 +++---- libavcodec/x86/dsputil_mmx.c | 144 ++++++++++++++++---------------- libavcodec/x86/dsputil_mmx.h | 10 +-- libavcodec/x86/dsputilenc_mmx.c | 25 ++++-- libavcodec/x86/fdct.c | 7 +- libavcodec/x86/h264_qpel.c | 54 ++++++------ libavcodec/x86/h264dsp_init.c | 13 ++- libavcodec/x86/idct_mmx_xvid.c | 11 +-- libavcodec/x86/idct_xvid.h | 6 +- libavcodec/x86/motion_est.c | 36 ++++---- libavcodec/x86/mpegvideoenc.c | 6 +- libavcodec/x86/vc1dsp_mmx.c | 57 +++++++------ libavfilter/x86/gradfun.c | 6 +- libavfilter/x86/yadif.c | 4 +- libswscale/utils.c | 21 ++--- libswscale/x86/rgb2rgb.c | 4 +- libswscale/x86/swscale.c | 4 +- libswscale/x86/yuv2rgb.c | 8 +- 20 files changed, 238 insertions(+), 213 deletions(-) diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c index 848ba8a957..c480aeccf1 100644 --- a/libavcodec/dct-test.c +++ b/libavcodec/dct-test.c @@ -83,7 +83,7 @@ static const struct algo fdct_tab[] = { #if HAVE_MMX_INLINE { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX }, - { "MMXEXT", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT }, + { "MMXEXT", ff_fdct_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT }, { "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 }, #endif @@ -107,7 +107,7 @@ static const struct algo idct_tab[] = { #if HAVE_MMX_INLINE { "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX }, { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 }, - { "XVID-MMXEXT", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 }, + { "XVID-MMXEXT", ff_idct_xvid_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 }, { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 }, #endif diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index e38f7a744c..f48aa96017 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -49,7 +49,7 @@ void ff_j_rev_dct (DCTELEM *data); void ff_wmv2_idct_c(DCTELEM *data); void ff_fdct_mmx(DCTELEM *block); -void ff_fdct_mmx2(DCTELEM *block); +void ff_fdct_mmxext(DCTELEM *block); void ff_fdct_sse2(DCTELEM *block); #define H264_IDCT(depth) \ diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c index b628f080e4..f94e2f3f1b 100644 --- a/libavcodec/x86/cavsdsp.c +++ b/libavcodec/x86/cavsdsp.c @@ -438,21 +438,22 @@ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, ui #endif /* (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) */ #if HAVE_MMXEXT_INLINE -QPEL_CAVS(put_, PUT_OP, mmx2) -QPEL_CAVS(avg_,AVG_MMXEXT_OP, mmx2) +QPEL_CAVS(put_, PUT_OP, mmxext) +QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext) -CAVS_MC(put_, 8, mmx2) -CAVS_MC(put_, 16,mmx2) -CAVS_MC(avg_, 8, mmx2) -CAVS_MC(avg_, 16,mmx2) +CAVS_MC(put_, 8, mmxext) +CAVS_MC(put_, 16, mmxext) +CAVS_MC(avg_, 8, mmxext) +CAVS_MC(avg_, 16, mmxext) -static void ff_cavsdsp_init_mmx2(CAVSDSPContext* c, AVCodecContext *avctx) { +static void ff_cavsdsp_init_mmxext(CAVSDSPContext *c, AVCodecContext *avctx) +{ #define dspfunc(PFX, IDX, NUM) \ - c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \ - c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \ - c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \ - c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \ - c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmxext; \ + c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmxext; \ + c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmxext; \ + c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmxext; \ + c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmxext; \ dspfunc(put_cavs_qpel, 0, 16); dspfunc(put_cavs_qpel, 1, 8); @@ -475,7 +476,7 @@ CAVS_MC(avg_, 16,3dnow) static void ff_cavsdsp_init_3dnow(CAVSDSPContext* c, AVCodecContext *avctx) { #define dspfunc(PFX, IDX, NUM) \ - c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmxext; \ c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \ c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \ c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \ @@ -496,7 +497,7 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx) int mm_flags = av_get_cpu_flags(); #if HAVE_MMXEXT_INLINE - if (mm_flags & AV_CPU_FLAG_MMXEXT) ff_cavsdsp_init_mmx2(c, avctx); + if (mm_flags & AV_CPU_FLAG_MMXEXT) ff_cavsdsp_init_mmxext(c, avctx); #endif /* HAVE_MMXEXT_INLINE */ #if HAVE_AMD3DNOW_INLINE if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx); diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 1e78c20a96..d23279b389 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -207,7 +207,7 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; /***********************************/ /* MMXEXT specific */ -#define DEF(x) x ## _mmx2 +#define DEF(x) x ## _mmxext /* Introduced only in MMXEXT set */ #define PAVGB "pavgb" @@ -221,11 +221,11 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; #define put_no_rnd_pixels16_mmx put_pixels16_mmx #define put_no_rnd_pixels8_mmx put_pixels8_mmx -#define put_pixels16_mmx2 put_pixels16_mmx -#define put_pixels8_mmx2 put_pixels8_mmx -#define put_pixels4_mmx2 put_pixels4_mmx -#define put_no_rnd_pixels16_mmx2 put_no_rnd_pixels16_mmx -#define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx +#define put_pixels16_mmxext put_pixels16_mmx +#define put_pixels8_mmxext put_pixels8_mmx +#define put_pixels4_mmxext put_pixels4_mmx +#define put_no_rnd_pixels16_mmxext put_no_rnd_pixels16_mmx +#define put_no_rnd_pixels8_mmxext put_no_rnd_pixels8_mmx #define put_pixels16_3dnow put_pixels16_mmx #define put_pixels8_3dnow put_pixels8_mmx #define put_pixels4_3dnow put_pixels4_mmx @@ -924,11 +924,11 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, OP(%%mm5, out, %%mm7, d) #define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMXEXT, OP_3DNOW) \ -static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \ - uint8_t *src, \ - int dstStride, \ - int srcStride, \ - int h) \ +static void OPNAME ## mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, \ + uint8_t *src, \ + int dstStride, \ + int srcStride, \ + int h) \ { \ uint64_t temp; \ \ @@ -1118,11 +1118,11 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, \ } \ } \ \ -static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, \ - uint8_t *src, \ - int dstStride, \ - int srcStride, \ - int h) \ +static void OPNAME ## mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, \ + uint8_t *src, \ + int dstStride, \ + int srcStride, \ + int h) \ { \ __asm__ volatile ( \ "pxor %%mm7, %%mm7 \n\t" \ @@ -1755,9 +1755,9 @@ QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP) QPEL_OP(put_, ff_pw_16, _, PUT_OP, 3dnow) QPEL_OP(avg_, ff_pw_16, _, AVG_3DNOW_OP, 3dnow) QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow) -QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmx2) -QPEL_OP(avg_, ff_pw_16, _, AVG_MMXEXT_OP, mmx2) -QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2) +QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmxext) +QPEL_OP(avg_, ff_pw_16, _, AVG_MMXEXT_OP, mmxext) +QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmxext) /***********************************/ /* bilinear qpel: not compliant to any spec, only for -lavdopts fast */ @@ -1811,10 +1811,10 @@ QPEL_2TAP_L3(OPNAME, SIZE, MMX, 31, 1, stride, -1) \ QPEL_2TAP_L3(OPNAME, SIZE, MMX, 13, stride, -stride, 1) \ QPEL_2TAP_L3(OPNAME, SIZE, MMX, 33, stride + 1, -stride, -1) \ -QPEL_2TAP(put_, 16, mmx2) -QPEL_2TAP(avg_, 16, mmx2) -QPEL_2TAP(put_, 8, mmx2) -QPEL_2TAP(avg_, 8, mmx2) +QPEL_2TAP(put_, 16, mmxext) +QPEL_2TAP(avg_, 16, mmxext) +QPEL_2TAP(put_, 8, mmxext) +QPEL_2TAP(avg_, 8, mmxext) QPEL_2TAP(put_, 16, 3dnow) QPEL_2TAP(avg_, 16, 3dnow) QPEL_2TAP(put_, 8, 3dnow) @@ -2035,7 +2035,7 @@ static void name(void *mem, int stride, int h) \ } while (--h); \ } -PREFETCH(prefetch_mmx2, prefetcht0) +PREFETCH(prefetch_mmxext, prefetcht0) PREFETCH(prefetch_3dnow, prefetch) #undef PREFETCH @@ -2089,22 +2089,22 @@ CHROMA_MC(avg, 8, 10, avx) #if HAVE_INLINE_ASM /* CAVS-specific */ -void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) +void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride) { put_pixels8_mmx(dst, src, stride, 8); } -void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) +void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride) { avg_pixels8_mmx(dst, src, stride, 8); } -void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) +void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride) { put_pixels16_mmx(dst, src, stride, 16); } -void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) +void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride) { avg_pixels16_mmx(dst, src, stride, 16); } @@ -2116,10 +2116,10 @@ void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, put_pixels8_mmx(dst, src, stride, 8); } -void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, - int stride, int rnd) +void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, + int stride, int rnd) { - avg_pixels8_mmx2(dst, src, stride, 8); + avg_pixels8_mmxext(dst, src, stride, 8); } static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize) @@ -2456,74 +2456,74 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags) } -static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, - int mm_flags) +static void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, + int mm_flags) { const int bit_depth = avctx->bits_per_raw_sample; const int high_bit_depth = bit_depth > 8; #if HAVE_INLINE_ASM - c->prefetch = prefetch_mmx2; + c->prefetch = prefetch_mmxext; if (!high_bit_depth) { - c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; - c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; + c->put_pixels_tab[0][1] = put_pixels16_x2_mmxext; + c->put_pixels_tab[0][2] = put_pixels16_y2_mmxext; - c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; - c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; - c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; + c->avg_pixels_tab[0][0] = avg_pixels16_mmxext; + c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmxext; + c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmxext; - c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; - c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; + c->put_pixels_tab[1][1] = put_pixels8_x2_mmxext; + c->put_pixels_tab[1][2] = put_pixels8_y2_mmxext; - c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; - c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; - c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; + c->avg_pixels_tab[1][0] = avg_pixels8_mmxext; + c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmxext; + c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmxext; } if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { if (!high_bit_depth) { - c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; - c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmxext; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmxext; - c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; - c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext; + c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmxext; } } if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) { - c->idct_put = ff_idct_xvid_mmx2_put; - c->idct_add = ff_idct_xvid_mmx2_add; - c->idct = ff_idct_xvid_mmx2; + c->idct_put = ff_idct_xvid_mmxext_put; + c->idct_add = ff_idct_xvid_mmxext_add; + c->idct = ff_idct_xvid_mmxext; } if (CONFIG_VP3_DECODER && (avctx->codec_id == AV_CODEC_ID_VP3 || avctx->codec_id == AV_CODEC_ID_THEORA)) { - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx2; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmxext; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmxext; } #endif /* HAVE_INLINE_ASM */ if (CONFIG_H264QPEL) { #if HAVE_INLINE_ASM - SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, ); - SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2, ); - SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2, ); + SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, ); #endif /* HAVE_INLINE_ASM */ if (!high_bit_depth) { #if HAVE_INLINE_ASM - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2, ); - SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2, ); - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2, ); - SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2, ); + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); #endif /* HAVE_INLINE_ASM */ } else if (bit_depth == 10) { #if HAVE_YASM @@ -2539,10 +2539,10 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, } #if HAVE_INLINE_ASM - SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, ); - SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, ); + SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmxext, ); #endif /* HAVE_INLINE_ASM */ } @@ -2861,7 +2861,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) dsputil_init_mmx(c, avctx, mm_flags); if (mm_flags & AV_CPU_FLAG_MMXEXT) - dsputil_init_mmx2(c, avctx, mm_flags); + dsputil_init_mmxext(c, avctx, mm_flags); if (mm_flags & AV_CPU_FLAG_3DNOW) dsputil_init_3dnow(c, avctx, mm_flags); diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h index bd14c5ad41..a142406a6e 100644 --- a/libavcodec/x86/dsputil_mmx.h +++ b/libavcodec/x86/dsputil_mmx.h @@ -89,13 +89,13 @@ void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_s void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); -void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); -void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); -void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); -void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); +void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride); +void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride); +void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride); +void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride); void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd); -void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd); +void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, int stride, int rnd); void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c index 43940bdf81..883d96566c 100644 --- a/libavcodec/x86/dsputilenc_mmx.c +++ b/libavcodec/x86/dsputilenc_mmx.c @@ -647,7 +647,9 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si } #undef SUM -static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) { +static int vsad_intra16_mmxext(void *v, uint8_t *pix, uint8_t *dummy, + int line_size, int h) +{ int tmp; assert( (((int)pix) & 7) == 0); @@ -765,7 +767,9 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in } #undef SUM -static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { +static int vsad16_mmxext(void *v, uint8_t *pix1, uint8_t *pix2, + int line_size, int h) +{ int tmp; assert( (((int)pix1) & 7) == 0); @@ -844,7 +848,10 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ dst[i+0] = src1[i+0]-src2[i+0]; } -static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){ +static void sub_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *src1, + const uint8_t *src2, int w, + int *left, int *left_top) +{ x86_reg i=0; uint8_t l, lt; @@ -976,7 +983,7 @@ DCT_SAD_FUNC(mmx) #define HSUM(a,t,dst) HSUM_MMXEXT(a,t,dst) #define MMABS(a,z) MMABS_MMXEXT(a,z) -DCT_SAD_FUNC(mmx2) +DCT_SAD_FUNC(mmxext) #undef HSUM #undef DCT_SAD @@ -1115,7 +1122,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) if(mm_flags & AV_CPU_FLAG_SSE2){ c->fdct = ff_fdct_sse2; } else if (mm_flags & AV_CPU_FLAG_MMXEXT) { - c->fdct = ff_fdct_mmx2; + c->fdct = ff_fdct_mmxext; }else{ c->fdct = ff_fdct_mmx; } @@ -1148,14 +1155,14 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; if (mm_flags & AV_CPU_FLAG_MMXEXT) { - c->sum_abs_dctelem= sum_abs_dctelem_mmx2; - c->vsad[4]= vsad_intra16_mmx2; + c->sum_abs_dctelem = sum_abs_dctelem_mmxext; + c->vsad[4] = vsad_intra16_mmxext; if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ - c->vsad[0] = vsad16_mmx2; + c->vsad[0] = vsad16_mmxext; } - c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; + c->sub_hfyu_median_prediction = sub_hfyu_median_prediction_mmxext; } if(mm_flags & AV_CPU_FLAG_SSE2){ diff --git a/libavcodec/x86/fdct.c b/libavcodec/x86/fdct.c index f9bd3f2508..b37238dfac 100644 --- a/libavcodec/x86/fdct.c +++ b/libavcodec/x86/fdct.c @@ -440,7 +440,8 @@ static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) ); } -static av_always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table) +static av_always_inline void fdct_row_mmxext(const int16_t *in, int16_t *out, + const int16_t *table) { __asm__ volatile ( "pshufw $0x1B, 8(%0), %%mm5 \n\t" @@ -555,7 +556,7 @@ void ff_fdct_mmx(int16_t *block) } } -void ff_fdct_mmx2(int16_t *block) +void ff_fdct_mmxext(int16_t *block) { DECLARE_ALIGNED(8, int64_t, align_tmp)[16]; int16_t *block1= (int16_t*)align_tmp; @@ -566,7 +567,7 @@ void ff_fdct_mmx2(int16_t *block) fdct_col_mmx(block, block1, 4); for(i=8;i>0;i--) { - fdct_row_mmx2(block1, block, table); + fdct_row_mmxext(block1, block, table); block1 += 8; table += 32; block += 8; diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index 5a2db781d2..f978520719 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -1002,36 +1002,36 @@ static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\ }\ -#define put_pixels8_l2_sse2 put_pixels8_l2_mmx2 -#define avg_pixels8_l2_sse2 avg_pixels8_l2_mmx2 -#define put_pixels16_l2_sse2 put_pixels16_l2_mmx2 -#define avg_pixels16_l2_sse2 avg_pixels16_l2_mmx2 -#define put_pixels8_l2_ssse3 put_pixels8_l2_mmx2 -#define avg_pixels8_l2_ssse3 avg_pixels8_l2_mmx2 -#define put_pixels16_l2_ssse3 put_pixels16_l2_mmx2 -#define avg_pixels16_l2_ssse3 avg_pixels16_l2_mmx2 +#define put_pixels8_l2_sse2 put_pixels8_l2_mmxext +#define avg_pixels8_l2_sse2 avg_pixels8_l2_mmxext +#define put_pixels16_l2_sse2 put_pixels16_l2_mmxext +#define avg_pixels16_l2_sse2 avg_pixels16_l2_mmxext +#define put_pixels8_l2_ssse3 put_pixels8_l2_mmxext +#define avg_pixels8_l2_ssse3 avg_pixels8_l2_mmxext +#define put_pixels16_l2_ssse3 put_pixels16_l2_mmxext +#define avg_pixels16_l2_ssse3 avg_pixels16_l2_mmxext -#define put_pixels8_l2_shift5_sse2 put_pixels8_l2_shift5_mmx2 -#define avg_pixels8_l2_shift5_sse2 avg_pixels8_l2_shift5_mmx2 -#define put_pixels16_l2_shift5_sse2 put_pixels16_l2_shift5_mmx2 -#define avg_pixels16_l2_shift5_sse2 avg_pixels16_l2_shift5_mmx2 -#define put_pixels8_l2_shift5_ssse3 put_pixels8_l2_shift5_mmx2 -#define avg_pixels8_l2_shift5_ssse3 avg_pixels8_l2_shift5_mmx2 -#define put_pixels16_l2_shift5_ssse3 put_pixels16_l2_shift5_mmx2 -#define avg_pixels16_l2_shift5_ssse3 avg_pixels16_l2_shift5_mmx2 +#define put_pixels8_l2_shift5_sse2 put_pixels8_l2_shift5_mmxext +#define avg_pixels8_l2_shift5_sse2 avg_pixels8_l2_shift5_mmxext +#define put_pixels16_l2_shift5_sse2 put_pixels16_l2_shift5_mmxext +#define avg_pixels16_l2_shift5_sse2 avg_pixels16_l2_shift5_mmxext +#define put_pixels8_l2_shift5_ssse3 put_pixels8_l2_shift5_mmxext +#define avg_pixels8_l2_shift5_ssse3 avg_pixels8_l2_shift5_mmxext +#define put_pixels16_l2_shift5_ssse3 put_pixels16_l2_shift5_mmxext +#define avg_pixels16_l2_shift5_ssse3 avg_pixels16_l2_shift5_mmxext -#define put_h264_qpel8_h_lowpass_l2_sse2 put_h264_qpel8_h_lowpass_l2_mmx2 -#define avg_h264_qpel8_h_lowpass_l2_sse2 avg_h264_qpel8_h_lowpass_l2_mmx2 -#define put_h264_qpel16_h_lowpass_l2_sse2 put_h264_qpel16_h_lowpass_l2_mmx2 -#define avg_h264_qpel16_h_lowpass_l2_sse2 avg_h264_qpel16_h_lowpass_l2_mmx2 +#define put_h264_qpel8_h_lowpass_l2_sse2 put_h264_qpel8_h_lowpass_l2_mmxext +#define avg_h264_qpel8_h_lowpass_l2_sse2 avg_h264_qpel8_h_lowpass_l2_mmxext +#define put_h264_qpel16_h_lowpass_l2_sse2 put_h264_qpel16_h_lowpass_l2_mmxext +#define avg_h264_qpel16_h_lowpass_l2_sse2 avg_h264_qpel16_h_lowpass_l2_mmxext #define put_h264_qpel8_v_lowpass_ssse3 put_h264_qpel8_v_lowpass_sse2 #define avg_h264_qpel8_v_lowpass_ssse3 avg_h264_qpel8_v_lowpass_sse2 #define put_h264_qpel16_v_lowpass_ssse3 put_h264_qpel16_v_lowpass_sse2 #define avg_h264_qpel16_v_lowpass_ssse3 avg_h264_qpel16_v_lowpass_sse2 -#define put_h264_qpel8or16_hv2_lowpass_sse2 put_h264_qpel8or16_hv2_lowpass_mmx2 -#define avg_h264_qpel8or16_hv2_lowpass_sse2 avg_h264_qpel8or16_hv2_lowpass_mmx2 +#define put_h264_qpel8or16_hv2_lowpass_sse2 put_h264_qpel8or16_hv2_lowpass_mmxext +#define avg_h264_qpel8or16_hv2_lowpass_sse2 avg_h264_qpel8or16_hv2_lowpass_mmxext #define H264_MC(OPNAME, SIZE, MMX, ALIGN) \ H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\ @@ -1045,8 +1045,8 @@ static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){ static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){ avg_pixels16_sse2(dst, src, stride, 16); } -#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmx2 -#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmx2 +#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmxext +#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmxext #define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \ static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\ @@ -1168,8 +1168,8 @@ QPEL_H264(put_, PUT_OP, 3dnow) QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow) #undef PAVGB #define PAVGB "pavgb" -QPEL_H264(put_, PUT_OP, mmx2) -QPEL_H264(avg_,AVG_MMXEXT_OP, mmx2) +QPEL_H264(put_, PUT_OP, mmxext) +QPEL_H264(avg_, AVG_MMXEXT_OP, mmxext) QPEL_H264_V_XMM(put_, PUT_OP, sse2) QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2) QPEL_H264_HV_XMM(put_, PUT_OP, sse2) @@ -1185,7 +1185,7 @@ QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3) #undef PAVGB H264_MC_4816(3dnow) -H264_MC_4816(mmx2) +H264_MC_4816(mmxext) H264_MC_816(H264_MC_V, sse2) H264_MC_816(H264_MC_HV, sse2) #if HAVE_SSSE3_INLINE diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c index 3f6ded46e1..913c362ee3 100644 --- a/libavcodec/x86/h264dsp_init.c +++ b/libavcodec/x86/h264dsp_init.c @@ -130,18 +130,17 @@ LF_FUNCS(uint16_t, 10) #if ARCH_X86_32 LF_FUNC(v8, luma, 8, mmx2) -static void ff_deblock_v_luma_8_mmx2(uint8_t *pix, int stride, int alpha, - int beta, int8_t *tc0) +static void ff_deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha, + int beta, int8_t *tc0) { if ((tc0[0] & tc0[1]) >= 0) ff_deblock_v8_luma_8_mmx2(pix + 0, stride, alpha, beta, tc0); if ((tc0[2] & tc0[3]) >= 0) ff_deblock_v8_luma_8_mmx2(pix + 8, stride, alpha, beta, tc0 + 2); } - LF_IFUNC(v8, luma_intra, 8, mmx2) -static void ff_deblock_v_luma_intra_8_mmx2(uint8_t *pix, int stride, - int alpha, int beta) +static void ff_deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride, + int alpha, int beta) { ff_deblock_v8_luma_intra_8_mmx2(pix + 0, stride, alpha, beta); ff_deblock_v8_luma_intra_8_mmx2(pix + 8, stride, alpha, beta); @@ -246,9 +245,9 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmx2; } #if ARCH_X86_32 - c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_mmx2; + c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_mmxext; c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmx2; - c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmx2; + c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmx2; #endif /* ARCH_X86_32 */ c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmx2; diff --git a/libavcodec/x86/idct_mmx_xvid.c b/libavcodec/x86/idct_mmx_xvid.c index 08a627d5b9..2cf8b47d62 100644 --- a/libavcodec/x86/idct_mmx_xvid.c +++ b/libavcodec/x86/idct_mmx_xvid.c @@ -512,7 +512,8 @@ __asm__ volatile( //----------------------------------------------------------------------------- -void ff_idct_xvid_mmx2(short *block){ +void ff_idct_xvid_mmxext(short *block) +{ __asm__ volatile( //# Process each row DCT_8_INV_ROW_XMM(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1)) @@ -542,15 +543,15 @@ void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block) ff_add_pixels_clamped_mmx(block, dest, line_size); } -void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block) +void ff_idct_xvid_mmxext_put(uint8_t *dest, int line_size, DCTELEM *block) { - ff_idct_xvid_mmx2(block); + ff_idct_xvid_mmxext(block); ff_put_pixels_clamped_mmx(block, dest, line_size); } -void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block) +void ff_idct_xvid_mmxext_add(uint8_t *dest, int line_size, DCTELEM *block) { - ff_idct_xvid_mmx2(block); + ff_idct_xvid_mmxext(block); ff_add_pixels_clamped_mmx(block, dest, line_size); } diff --git a/libavcodec/x86/idct_xvid.h b/libavcodec/x86/idct_xvid.h index 82fa990873..79d5bf96a8 100644 --- a/libavcodec/x86/idct_xvid.h +++ b/libavcodec/x86/idct_xvid.h @@ -34,9 +34,9 @@ void ff_idct_xvid_mmx(short *block); void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block); void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block); -void ff_idct_xvid_mmx2(short *block); -void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block); -void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block); +void ff_idct_xvid_mmxext(short *block); +void ff_idct_xvid_mmxext_put(uint8_t *dest, int line_size, DCTELEM *block); +void ff_idct_xvid_mmxext_add(uint8_t *dest, int line_size, DCTELEM *block); void ff_idct_xvid_sse2(short *block); void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block); diff --git a/libavcodec/x86/motion_est.c b/libavcodec/x86/motion_est.c index 6eb44d4b2d..0a0cab9cd2 100644 --- a/libavcodec/x86/motion_est.c +++ b/libavcodec/x86/motion_est.c @@ -74,7 +74,8 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) ); } -static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) +static inline void sad8_1_mmxext(uint8_t *blk1, uint8_t *blk2, + int stride, int h) { __asm__ volatile( ".p2align 4 \n\t" @@ -120,7 +121,8 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) return ret; } -static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) +static inline void sad8_x2a_mmxext(uint8_t *blk1, uint8_t *blk2, + int stride, int h) { __asm__ volatile( ".p2align 4 \n\t" @@ -142,7 +144,8 @@ static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h ); } -static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) +static inline void sad8_y2a_mmxext(uint8_t *blk1, uint8_t *blk2, + int stride, int h) { __asm__ volatile( "movq (%1), %%mm0 \n\t" @@ -167,7 +170,8 @@ static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h ); } -static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) +static inline void sad8_4_mmxext(uint8_t *blk1, uint8_t *blk2, + int stride, int h) { __asm__ volatile( "movq "MANGLE(bone)", %%mm5 \n\t" @@ -304,7 +308,7 @@ static inline int sum_mmx(void) return ret&0xFFFF; } -static inline int sum_mmx2(void) +static inline int sum_mmxext(void) { int ret; __asm__ volatile( @@ -424,7 +428,7 @@ static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, }\ PIX_SAD(mmx) -PIX_SAD(mmx2) +PIX_SAD(mmxext) #endif /* HAVE_INLINE_ASM */ @@ -447,19 +451,19 @@ void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx) c->sad[1]= sad8_mmx; } if (mm_flags & AV_CPU_FLAG_MMXEXT) { - c->pix_abs[0][0] = sad16_mmx2; - c->pix_abs[1][0] = sad8_mmx2; + c->pix_abs[0][0] = sad16_mmxext; + c->pix_abs[1][0] = sad8_mmxext; - c->sad[0]= sad16_mmx2; - c->sad[1]= sad8_mmx2; + c->sad[0] = sad16_mmxext; + c->sad[1] = sad8_mmxext; if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ - c->pix_abs[0][1] = sad16_x2_mmx2; - c->pix_abs[0][2] = sad16_y2_mmx2; - c->pix_abs[0][3] = sad16_xy2_mmx2; - c->pix_abs[1][1] = sad8_x2_mmx2; - c->pix_abs[1][2] = sad8_y2_mmx2; - c->pix_abs[1][3] = sad8_xy2_mmx2; + c->pix_abs[0][1] = sad16_x2_mmxext; + c->pix_abs[0][2] = sad16_y2_mmxext; + c->pix_abs[0][3] = sad16_xy2_mmxext; + c->pix_abs[1][1] = sad8_x2_mmxext; + c->pix_abs[1][2] = sad8_y2_mmxext; + c->pix_abs[1][3] = sad8_xy2_mmxext; } } if ((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW) && avctx->codec_id != AV_CODEC_ID_SNOW) { diff --git a/libavcodec/x86/mpegvideoenc.c b/libavcodec/x86/mpegvideoenc.c index 59e3580153..8f7c2e474a 100644 --- a/libavcodec/x86/mpegvideoenc.c +++ b/libavcodec/x86/mpegvideoenc.c @@ -47,8 +47,8 @@ extern uint16_t ff_inv_zigzag_direct16[64]; #define COMPILE_TEMPLATE_SSSE3 0 #undef RENAME #undef RENAMEl -#define RENAME(a) a ## _MMX2 -#define RENAMEl(a) a ## _mmx2 +#define RENAME(a) a ## _MMXEXT +#define RENAMEl(a) a ## _mmxext #include "mpegvideoenc_template.c" #endif /* HAVE_MMXEXT_INLINE */ @@ -92,7 +92,7 @@ void ff_MPV_encode_init_x86(MpegEncContext *s) #endif #if HAVE_MMXEXT_INLINE if (INLINE_MMXEXT(mm_flags)) - s->dct_quantize = dct_quantize_MMX2; + s->dct_quantize = dct_quantize_MMXEXT; #endif #if HAVE_SSE2_INLINE if (INLINE_SSE2(mm_flags)) diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c index 6b1ae37efd..b02582f615 100644 --- a/libavcodec/x86/vc1dsp_mmx.c +++ b/libavcodec/x86/vc1dsp_mmx.c @@ -467,7 +467,10 @@ VC1_MSPEL_MC(avg_) static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ }\ -static void avg_vc1_mspel_mc ## a ## b ## _mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ +static void avg_vc1_mspel_mc ## a ## b ## _mmxext(uint8_t *dst, \ + const uint8_t *src, \ + int stride, int rnd) \ +{ \ avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ } @@ -490,7 +493,8 @@ DECLARE_FUNCTION(3, 1) DECLARE_FUNCTION(3, 2) DECLARE_FUNCTION(3, 3) -static void vc1_inv_trans_4x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block) +static void vc1_inv_trans_4x4_dc_mmxext(uint8_t *dest, int linesize, + DCTELEM *block) { int dc = block[0]; dc = (17 * dc + 4) >> 3; @@ -528,7 +532,8 @@ static void vc1_inv_trans_4x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc ); } -static void vc1_inv_trans_4x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block) +static void vc1_inv_trans_4x8_dc_mmxext(uint8_t *dest, int linesize, + DCTELEM *block) { int dc = block[0]; dc = (17 * dc + 4) >> 3; @@ -589,7 +594,8 @@ static void vc1_inv_trans_4x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc ); } -static void vc1_inv_trans_8x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block) +static void vc1_inv_trans_8x4_dc_mmxext(uint8_t *dest, int linesize, + DCTELEM *block) { int dc = block[0]; dc = ( 3 * dc + 1) >> 1; @@ -627,7 +633,8 @@ static void vc1_inv_trans_8x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc ); } -static void vc1_inv_trans_8x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block) +static void vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, int linesize, + DCTELEM *block) { int dc = block[0]; dc = (3 * dc + 1) >> 1; @@ -713,29 +720,29 @@ av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp) av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp) { - dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2; - dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmx2; - dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmx2; - dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_mmx2; + dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmxext; + dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmxext; + dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmxext; + dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_mmxext; - dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_mmx2; - dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_mmx2; - dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_mmx2; - dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_mmx2; + dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_mmxext; + dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_mmxext; + dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_mmxext; + dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_mmxext; - dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_mmx2; - dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_mmx2; - dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_mmx2; - dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_mmx2; + dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_mmxext; + dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_mmxext; + dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_mmxext; + dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_mmxext; - dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_mmx2; - dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_mmx2; - dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_mmx2; - dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_mmx2; + dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_mmxext; + dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_mmxext; + dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_mmxext; + dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_mmxext; - dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmx2; - dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmx2; - dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmx2; - dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmx2; + dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmxext; + dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmxext; + dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmxext; + dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmxext; } #endif /* HAVE_INLINE_ASM */ diff --git a/libavfilter/x86/gradfun.c b/libavfilter/x86/gradfun.c index 424a03138b..b4ca86c617 100644 --- a/libavfilter/x86/gradfun.c +++ b/libavfilter/x86/gradfun.c @@ -30,7 +30,9 @@ DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; #if HAVE_MMXEXT_INLINE -static void gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers) +static void gradfun_filter_line_mmxext(uint8_t *dst, uint8_t *src, uint16_t *dc, + int width, int thresh, + const uint16_t *dithers) { intptr_t x; if (width & 3) { @@ -175,7 +177,7 @@ av_cold void ff_gradfun_init_x86(GradFunContext *gf) #if HAVE_MMXEXT_INLINE if (cpu_flags & AV_CPU_FLAG_MMXEXT) - gf->filter_line = gradfun_filter_line_mmx2; + gf->filter_line = gradfun_filter_line_mmxext; #endif #if HAVE_SSSE3_INLINE if (cpu_flags & AV_CPU_FLAG_SSSE3) diff --git a/libavfilter/x86/yadif.c b/libavfilter/x86/yadif.c index f178b32cbe..ab1d282f9d 100644 --- a/libavfilter/x86/yadif.c +++ b/libavfilter/x86/yadif.c @@ -49,7 +49,7 @@ DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x000100010 #if HAVE_MMXEXT_INLINE #undef RENAME -#define RENAME(a) a ## _mmx2 +#define RENAME(a) a ## _mmxext #include "yadif_template.c" #endif @@ -61,7 +61,7 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif) #if HAVE_MMXEXT_INLINE if (cpu_flags & AV_CPU_FLAG_MMXEXT) - yadif->filter_line = yadif_filter_line_mmx2; + yadif->filter_line = yadif_filter_line_mmxext; #endif #if HAVE_SSE2_INLINE if (cpu_flags & AV_CPU_FLAG_SSE2) diff --git a/libswscale/utils.c b/libswscale/utils.c index 64a3a58067..e5e4d60dd2 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -600,8 +600,9 @@ fail: } #if HAVE_MMXEXT_INLINE -static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, - int16_t *filter, int32_t *filterPos, int numSplits) +static int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, + int16_t *filter, int32_t *filterPos, + int numSplits) { uint8_t *fragmentA; x86_reg imm8OfPShufW1A; @@ -1043,10 +1044,10 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, #if HAVE_MMXEXT_INLINE // can't downscale !!! if (c->canMMXEXTBeUsed && (flags & SWS_FAST_BILINEAR)) { - c->lumMmxextFilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL, - NULL, NULL, 8); - c->chrMmxextFilterCodeSize = initMMX2HScaler(c->chrDstW, c->chrXInc, - NULL, NULL, NULL, 4); + c->lumMmxextFilterCodeSize = init_hscaler_mmxext(dstW, c->lumXInc, NULL, + NULL, NULL, 8); + c->chrMmxextFilterCodeSize = init_hscaler_mmxext(c->chrDstW, c->chrXInc, + NULL, NULL, NULL, 4); #if USE_MMAP c->lumMmxextFilterCode = mmap(NULL, c->lumMmxextFilterCodeSize, @@ -1078,10 +1079,10 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW / 2 / 8 + 8) * sizeof(int32_t), fail); FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail); - initMMX2HScaler(dstW, c->lumXInc, c->lumMmxextFilterCode, - c->hLumFilter, c->hLumFilterPos, 8); - initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmxextFilterCode, - c->hChrFilter, c->hChrFilterPos, 4); + init_hscaler_mmxext(dstW, c->lumXInc, c->lumMmxextFilterCode, + c->hLumFilter, c->hLumFilterPos, 8); + init_hscaler_mmxext(c->chrDstW, c->chrXInc, c->chrMmxextFilterCode, + c->hChrFilter, c->hChrFilterPos, 4); #if USE_MMAP mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ); diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c index 486f436702..d4f25804cc 100644 --- a/libswscale/x86/rgb2rgb.c +++ b/libswscale/x86/rgb2rgb.c @@ -99,7 +99,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _MMX2 +#define RENAME(a) a ## _MMXEXT #include "rgb2rgb_template.c" //SSE2 versions @@ -139,7 +139,7 @@ av_cold void rgb2rgb_init_x86(void) if (INLINE_AMD3DNOW(cpu_flags)) rgb2rgb_init_3DNOW(); if (INLINE_MMXEXT(cpu_flags)) - rgb2rgb_init_MMX2(); + rgb2rgb_init_MMXEXT(); if (INLINE_SSE2(cpu_flags)) rgb2rgb_init_SSE2(); #endif /* HAVE_INLINE_ASM */ diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index fc74d97201..571510ae43 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -83,7 +83,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _MMX2 +#define RENAME(a) a ## _MMXEXT #include "swscale_template.c" #endif @@ -311,7 +311,7 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c) sws_init_swScale_MMX(c); #if HAVE_MMXEXT_INLINE if (cpu_flags & AV_CPU_FLAG_MMXEXT) - sws_init_swScale_MMX2(c); + sws_init_swScale_MMXEXT(c); #endif #endif /* HAVE_INLINE_ASM */ diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index 17ac3e2ffe..419d5133f9 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -63,7 +63,7 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _MMX2 +#define RENAME(a) a ## _MMXEXT #include "yuv2rgb_template.c" #endif /* HAVE_MMXEXT_INLINE */ @@ -81,8 +81,10 @@ av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) #if HAVE_MMXEXT_INLINE if (cpu_flags & AV_CPU_FLAG_MMXEXT) { switch (c->dstFormat) { - case AV_PIX_FMT_RGB24: return yuv420_rgb24_MMX2; - case AV_PIX_FMT_BGR24: return yuv420_bgr24_MMX2; + case AV_PIX_FMT_RGB24: + return yuv420_rgb24_MMXEXT; + case AV_PIX_FMT_BGR24: + return yuv420_bgr24_MMXEXT; } } #endif From fa8fcab1e0d31074c0644c4ac5194474c6c26415 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Fri, 27 Jul 2012 13:43:33 +0200 Subject: [PATCH 3/3] x86: h264_chromamc_10bit: drop pointless PAVG %define It is only used in one place so there is no need for the abstraction. --- libavcodec/x86/h264_chromamc_10bit.asm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libavcodec/x86/h264_chromamc_10bit.asm b/libavcodec/x86/h264_chromamc_10bit.asm index 4481efef7f..d24308d254 100644 --- a/libavcodec/x86/h264_chromamc_10bit.asm +++ b/libavcodec/x86/h264_chromamc_10bit.asm @@ -245,7 +245,7 @@ cglobal %1_h264_chroma_mc2_10, 6,7 %if %0==3 movq %2, %3 %endif - PAVG %1, %2 + pavgw %1, %2 %endmacro %define CHROMAMC_AVG NOTHING @@ -260,7 +260,6 @@ CHROMA_MC4 put CHROMA_MC2 put %define CHROMAMC_AVG AVG -%define PAVG pavgw INIT_XMM sse2 CHROMA_MC8 avg %if HAVE_AVX_EXTERNAL