ffmpeg/libavcodec/x86/dsputil_mmx.h

/*
 * MMX optimized DSP utils
 * Copyright (c) 2007  Aurelien Jacobs <aurel@gnuage.org>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#ifndef AVCODEC_X86_DSPUTIL_MMX_H
#define AVCODEC_X86_DSPUTIL_MMX_H

#include <stddef.h>
#include <stdint.h>

#include "libavcodec/dsputil.h"
#include "libavutil/x86/asm.h"

extern const uint64_t ff_bone;
extern const uint64_t ff_wtwo;

extern const xmm_reg  ff_pw_3;
extern const xmm_reg  ff_pw_4;
extern const xmm_reg  ff_pw_5;
extern const xmm_reg  ff_pw_8;
extern const uint64_t ff_pw_15;
extern const xmm_reg  ff_pw_16;
extern const xmm_reg  ff_pw_18;
extern const uint64_t ff_pw_20;
extern const xmm_reg  ff_pw_32;
extern const uint64_t ff_pw_42;
extern const uint64_t ff_pw_53;
extern const xmm_reg  ff_pw_64;
extern const uint64_t ff_pw_96;
extern const uint64_t ff_pw_128;
extern const uint64_t ff_pw_255;

extern const xmm_reg  ff_pb_1;
extern const xmm_reg  ff_pb_3;
extern const uint64_t ff_pb_3F;
extern const xmm_reg  ff_pb_F8;
extern const uint64_t ff_pb_FC;

extern const double ff_pd_1[2];
extern const double ff_pd_2[2];

#define SBUTTERFLY(a,b,t,n,m)\
    "mov" #m " " #a ", " #t "         \n\t" /* abcd */\
    "punpckl" #n " " #b ", " #a "     \n\t" /* aebf */\
    "punpckh" #n " " #b ", " #t "     \n\t" /* cgdh */\

#define TRANSPOSE4(a,b,c,d,t)\
    SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\
    SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\
    SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
    SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */

#define MOVQ_WONE(regd) \
    __asm__ volatile ( \
    "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
    "psrlw $15, %%" #regd ::)

#define JUMPALIGN()     __asm__ volatile (".p2align 3"::)
#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)

#define MOVQ_BFE(regd)                                  \
    __asm__ volatile (                                  \
        "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \
        "paddb   %%"#regd", %%"#regd"   \n\t" ::)

#ifndef PIC
#define MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_bone))
#define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo))
#else
// for shared library it's better to use this way for accessing constants
// pcmpeqd -> -1
#define MOVQ_BONE(regd)                                 \
    __asm__ volatile (                                  \
        "pcmpeqd  %%"#regd", %%"#regd"  \n\t"           \
        "psrlw          $15, %%"#regd"  \n\t"           \
        "packuswb %%"#regd", %%"#regd"  \n\t" ::)

#define MOVQ_WTWO(regd)                                 \
    __asm__ volatile (                                  \
        "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \
        "psrlw         $15, %%"#regd"   \n\t"           \
        "psllw          $1, %%"#regd"   \n\t"::)

#endif

// using regr as temporary and for the output result
// first argument is unmodifed and second is trashed
// regfe is supposed to contain 0xfefefefefefefefe
#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe)                \
    "movq   "#rega", "#regr"            \n\t"                    \
    "pand   "#regb", "#regr"            \n\t"                    \
    "pxor   "#rega", "#regb"            \n\t"                    \
    "pand  "#regfe", "#regb"            \n\t"                    \
    "psrlq       $1, "#regb"            \n\t"                    \
    "paddb  "#regb", "#regr"            \n\t"

#define PAVGB_MMX(rega, regb, regr, regfe)                       \
    "movq   "#rega", "#regr"            \n\t"                    \
    "por    "#regb", "#regr"            \n\t"                    \
    "pxor   "#rega", "#regb"            \n\t"                    \
    "pand  "#regfe", "#regb"            \n\t"                    \
    "psrlq       $1, "#regb"            \n\t"                    \
    "psubb  "#regb", "#regr"            \n\t"

// mm6 is supposed to contain 0xfefefefefefefefe
#define PAVGBP_MMX_NO_RND(rega, regb, regr,  regc, regd, regp)   \
    "movq  "#rega", "#regr"             \n\t"                    \
    "movq  "#regc", "#regp"             \n\t"                    \
    "pand  "#regb", "#regr"             \n\t"                    \
    "pand  "#regd", "#regp"             \n\t"                    \
    "pxor  "#rega", "#regb"             \n\t"                    \
    "pxor  "#regc", "#regd"             \n\t"                    \
    "pand    %%mm6, "#regb"             \n\t"                    \
    "pand    %%mm6, "#regd"             \n\t"                    \
    "psrlq      $1, "#regb"             \n\t"                    \
    "psrlq      $1, "#regd"             \n\t"                    \
    "paddb "#regb", "#regr"             \n\t"                    \
    "paddb "#regd", "#regp"             \n\t"

#define PAVGBP_MMX(rega, regb, regr, regc, regd, regp)           \
    "movq  "#rega", "#regr"             \n\t"                    \
    "movq  "#regc", "#regp"             \n\t"                    \
    "por   "#regb", "#regr"             \n\t"                    \
    "por   "#regd", "#regp"             \n\t"                    \
    "pxor  "#rega", "#regb"             \n\t"                    \
    "pxor  "#regc", "#regd"             \n\t"                    \
    "pand    %%mm6, "#regb"             \n\t"                    \
    "pand    %%mm6, "#regd"             \n\t"                    \
    "psrlq      $1, "#regd"             \n\t"                    \
    "psrlq      $1, "#regb"             \n\t"                    \
    "psubb "#regb", "#regr"             \n\t"                    \
    "psubb "#regd", "#regp"             \n\t"

void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);

void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);


void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
                        ptrdiff_t line_size, int h);
void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
                         ptrdiff_t line_size, int h);
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
                           ptrdiff_t line_size, int h);
void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
                           ptrdiff_t line_size, int h);
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
                          ptrdiff_t line_size, int h);
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
                          ptrdiff_t line_size, int h);

void ff_put_cavs_qpel8_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_avg_cavs_qpel8_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_put_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_avg_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride);

void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int rnd);

void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, ptrdiff_t stride);
void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, ptrdiff_t stride);
void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, ptrdiff_t stride);
void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, ptrdiff_t stride);

void ff_mmx_idct(int16_t *block);
void ff_mmxext_idct(int16_t *block);


void ff_deinterlace_line_mmx(uint8_t *dst,
                             const uint8_t *lum_m4, const uint8_t *lum_m3,
                             const uint8_t *lum_m2, const uint8_t *lum_m1,
                             const uint8_t *lum,
                             int size);

void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
                                     const uint8_t *lum_m3,
                                     const uint8_t *lum_m2,
                                     const uint8_t *lum_m1,
                                     const uint8_t *lum, int size);

#endif /* AVCODEC_X86_DSPUTIL_MMX_H */
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 23:23:34 +01:00			`/*`
			`* MMX optimized DSP utils`
			`* Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>`
			`*`
			`* This file is part of FFmpeg.`
			`*`
			`* FFmpeg is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
			`* FFmpeg is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
			`* License along with FFmpeg; if not, write to the Free Software`
			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`*/`

Rename libavcodec/i386/ --> libavcodec/x86/. It contains optimizations that are not specific to i386 and libavutil uses this naming scheme already. Originally committed as revision 16270 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-22 10:12:42 +01:00			`#ifndef AVCODEC_X86_DSPUTIL_MMX_H`
			`#define AVCODEC_X86_DSPUTIL_MMX_H`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 23:23:34 +01:00
x86: vc1dsp: Move ff_avg_vc1_mspel_mc00_mmxext out of dsputil_mmx.c 2013-02-26 17:29:48 +01:00			`#include <stddef.h>`
add required include to make this file self-contained Originally committed as revision 11211 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-12-12 23:45:03 +01:00			`#include <stdint.h>`
x86: vc1dsp: Move ff_avg_vc1_mspel_mc00_mmxext out of dsputil_mmx.c 2013-02-26 17:29:48 +01:00
Use full path for #includes from another directory. Originally committed as revision 13098 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-05-09 13:56:36 +02:00			`#include "libavcodec/dsputil.h"`
x86: rename libavutil/x86_cpu.h to libavutil/x86/asm.h This puts x86-specific things in the x86/ subdirectory where they belong. Signed-off-by: Mans Rullgard <mans@mansr.com> 2012-08-08 14:51:52 +02:00			`#include "libavutil/x86/asm.h"`
add required include to make this file self-contained Originally committed as revision 11211 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-12-12 23:45:03 +01:00
use ff_ prefix for extern vars Originally committed as revision 11101 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 23:36:15 +01:00			`extern const uint64_t ff_bone;`
			`extern const uint64_t ff_wtwo;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 23:23:34 +01:00
For rounding in chroma MC SSSE3, use 16-byte pw_3/4 instead of reading 8 bytes and then using movlhps to dup it into the higher half of the register. Originally committed as revision 26086 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-12-24 18:23:22 +01:00			`extern const xmm_reg ff_pw_3;`
Make ff_pw_4 128 bits Originally committed as revision 24207 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-07-12 00:52:55 +02:00			`extern const xmm_reg ff_pw_4;`
avoid POSIX reserved _t suffix Originally committed as revision 16117 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-14 01:48:16 +01:00			`extern const xmm_reg ff_pw_5;`
			`extern const xmm_reg ff_pw_8;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 23:23:34 +01:00			`extern const uint64_t ff_pw_15;`
avoid POSIX reserved _t suffix Originally committed as revision 16117 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-14 01:48:16 +01:00			`extern const xmm_reg ff_pw_16;`
Add header declarations for mmx/sse constants missing them Originally committed as revision 24381 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-07-21 12:02:07 +02:00			`extern const xmm_reg ff_pw_18;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 23:23:34 +01:00			`extern const uint64_t ff_pw_20;`
avoid POSIX reserved _t suffix Originally committed as revision 16117 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-14 01:48:16 +01:00			`extern const xmm_reg ff_pw_32;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 23:23:34 +01:00			`extern const uint64_t ff_pw_42;`
Add header declarations for mmx/sse constants missing them Originally committed as revision 24381 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-07-21 12:02:07 +02:00			`extern const uint64_t ff_pw_53;`
convert ff_pw_64 into an xmm_reg for future use in vp6 sse code Originally committed as revision 17192 to svn://svn.ffmpeg.org/ffmpeg/trunk 2009-02-13 00:48:07 +01:00			`extern const xmm_reg ff_pw_64;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 23:23:34 +01:00			`extern const uint64_t ff_pw_96;`
			`extern const uint64_t ff_pw_128;`
split encoding part of dsputil_mmx into its own file Originally committed as revision 12223 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-02-26 00:14:22 +01:00			`extern const uint64_t ff_pw_255;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 23:23:34 +01:00
VP8 H/V inner loopfilter MMX/MMXEXT/SSE2 optimizations. Originally committed as revision 24250 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-07-16 01:02:34 +02:00			`extern const xmm_reg ff_pb_1;`
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264 Originally committed as revision 23783 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-06-25 20:25:49 +02:00			`extern const xmm_reg ff_pb_3;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 23:23:34 +01:00			`extern const uint64_t ff_pb_3F;`
Add header declarations for mmx/sse constants missing them Originally committed as revision 24381 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-07-21 12:02:07 +02:00			`extern const xmm_reg ff_pb_F8;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 23:23:34 +01:00			`extern const uint64_t ff_pb_FC;`

			`extern const double ff_pd_1[2];`
			`extern const double ff_pd_2[2];`

Factorize some duplicated code from CAVS and H.264 into a common file. patch by Christophe Gisquet, christophe.gisquet free fr Originally committed as revision 11504 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-01-11 09:29:58 +01:00			`#define SBUTTERFLY(a,b,t,n,m)\`
			`"mov" #m " " #a ", " #t " \n\t" /* abcd */\`
			`"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\`
			`"punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\`

			`#define TRANSPOSE4(a,b,c,d,t)\`
			`SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\`
			`SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\`
			`SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\`
			`SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */`

split encoding part of dsputil_mmx into its own file Originally committed as revision 12223 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-02-26 00:14:22 +01:00			`#define MOVQ_WONE(regd) \`
Convert asm keyword into __asm__. Neither the asm() nor the __asm__() keyword is part of the C99 standard, but while GCC accepts the former in C89 syntax, it is not accepted in C99 unless GNU extensions are turned on (with -fasm). The latter form is accepted in any syntax as an extension (without requiring further command-line options). Sun Studio C99 compiler also does not accept asm() while accepting __asm__(), albeit reporting warnings that it's not valid C99 syntax. Originally committed as revision 15627 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-10-16 15:34:09 +02:00			`__asm__ volatile ( \`
split encoding part of dsputil_mmx into its own file Originally committed as revision 12223 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-02-26 00:14:22 +01:00			`"pcmpeqd %%" #regd ", %%" #regd " \n\t" \`
			`"psrlw $15, %%" #regd ::)`

x86: Factorize duplicated inline assembly snippets Signed-off-by: Diego Biurrun <diego@biurrun.de> 2013-04-22 11:23:47 +02:00			`#define JUMPALIGN() __asm__ volatile (".p2align 3"::)`
			`#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)`

			`#define MOVQ_BFE(regd) \`
			`__asm__ volatile ( \`
			`"pcmpeqd %%"#regd", %%"#regd" \n\t" \`
			`"paddb %%"#regd", %%"#regd" \n\t" ::)`

			`#ifndef PIC`
			`#define MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_bone))`
			`#define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo))`
			`#else`
			`// for shared library it's better to use this way for accessing constants`
			`// pcmpeqd -> -1`
			`#define MOVQ_BONE(regd) \`
			`__asm__ volatile ( \`
			`"pcmpeqd %%"#regd", %%"#regd" \n\t" \`
			`"psrlw $15, %%"#regd" \n\t" \`
			`"packuswb %%"#regd", %%"#regd" \n\t" ::)`

			`#define MOVQ_WTWO(regd) \`
			`__asm__ volatile ( \`
			`"pcmpeqd %%"#regd", %%"#regd" \n\t" \`
			`"psrlw $15, %%"#regd" \n\t" \`
			`"psllw $1, %%"#regd" \n\t"::)`

			`#endif`

			`// using regr as temporary and for the output result`
			`// first argument is unmodifed and second is trashed`
			`// regfe is supposed to contain 0xfefefefefefefefe`
			`#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \`
			`"movq "#rega", "#regr" \n\t" \`
			`"pand "#regb", "#regr" \n\t" \`
			`"pxor "#rega", "#regb" \n\t" \`
			`"pand "#regfe", "#regb" \n\t" \`
			`"psrlq $1, "#regb" \n\t" \`
			`"paddb "#regb", "#regr" \n\t"`

			`#define PAVGB_MMX(rega, regb, regr, regfe) \`
			`"movq "#rega", "#regr" \n\t" \`
			`"por "#regb", "#regr" \n\t" \`
			`"pxor "#rega", "#regb" \n\t" \`
			`"pand "#regfe", "#regb" \n\t" \`
			`"psrlq $1, "#regb" \n\t" \`
			`"psubb "#regb", "#regr" \n\t"`

			`// mm6 is supposed to contain 0xfefefefefefefefe`
			`#define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \`
			`"movq "#rega", "#regr" \n\t" \`
			`"movq "#regc", "#regp" \n\t" \`
			`"pand "#regb", "#regr" \n\t" \`
			`"pand "#regd", "#regp" \n\t" \`
			`"pxor "#rega", "#regb" \n\t" \`
			`"pxor "#regc", "#regd" \n\t" \`
			`"pand %%mm6, "#regb" \n\t" \`
			`"pand %%mm6, "#regd" \n\t" \`
			`"psrlq $1, "#regb" \n\t" \`
			`"psrlq $1, "#regd" \n\t" \`
			`"paddb "#regb", "#regr" \n\t" \`
			`"paddb "#regd", "#regp" \n\t"`

			`#define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \`
			`"movq "#rega", "#regr" \n\t" \`
			`"movq "#regc", "#regp" \n\t" \`
			`"por "#regb", "#regr" \n\t" \`
			`"por "#regd", "#regp" \n\t" \`
			`"pxor "#rega", "#regb" \n\t" \`
			`"pxor "#regc", "#regd" \n\t" \`
			`"pand %%mm6, "#regb" \n\t" \`
			`"pand %%mm6, "#regd" \n\t" \`
			`"psrlq $1, "#regd" \n\t" \`
			`"psrlq $1, "#regb" \n\t" \`
			`"psubb "#regb", "#regr" \n\t" \`
			`"psubb "#regd", "#regp" \n\t"`

dsputil: Add ff_ prefix to the dsputil_init functions Signed-off-by: Martin Storsjö <martin@martin.st> 2012-02-15 11:06:44 +01:00			`void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);`
			`void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);`
Move declarations of some mmx functions to dsputil_mmx.h Originally committed as revision 19739 to svn://svn.ffmpeg.org/ffmpeg/trunk 2009-08-29 18:55:50 +02:00
Drop DCTELEM typedef It does not help as an abstraction and adds dsputil dependencies. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com> 2013-01-20 01:02:29 +01:00			`void ff_add_pixels_clamped_mmx(const int16_t block, uint8_t pixels, int line_size);`
			`void ff_put_pixels_clamped_mmx(const int16_t block, uint8_t pixels, int line_size);`
			`void ff_put_signed_pixels_clamped_mmx(const int16_t block, uint8_t pixels, int line_size);`
split encoding part of dsputil_mmx into its own file Originally committed as revision 12223 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-02-26 00:14:22 +01:00
x86: Move duplicated put_pixels{8\|16}_mmx functions into their own file 2013-04-23 17:10:59 +02:00
			`void ff_put_pixels8_mmx(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
			`void ff_put_pixels16_mmx(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
x86: vc1dsp: Move ff_avg_vc1_mspel_mc00_mmxext out of dsputil_mmx.c 2013-02-26 17:29:48 +01:00			`void ff_avg_pixels8_mmxext(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
x86: dsputil: Refactor some ff_{avg\|put}_pixels function declarations 2013-04-11 02:31:09 +02:00			`void ff_put_pixels8_mmxext(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
			`void ff_avg_pixels16_sse2(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
			`void ff_put_pixels16_sse2(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
x86: vc1dsp: Move ff_avg_vc1_mspel_mc00_mmxext out of dsputil_mmx.c 2013-02-26 17:29:48 +01:00
x86: cavs: Put mmx-specific code into its own init function Before, this code was labeled as mmxext and enabled both for the 3dnow and the mmxext case. 2013-04-20 22:03:19 +02:00			`void ff_put_cavs_qpel8_mc00_mmx(uint8_t dst, uint8_t src, ptrdiff_t stride);`
			`void ff_avg_cavs_qpel8_mc00_mmx(uint8_t dst, uint8_t src, ptrdiff_t stride);`
			`void ff_put_cavs_qpel16_mc00_mmx(uint8_t dst, uint8_t src, ptrdiff_t stride);`
			`void ff_avg_cavs_qpel16_mc00_mmx(uint8_t dst, uint8_t src, ptrdiff_t stride);`
x86: move function prototypes to header files Originally committed as revision 22266 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-03-06 23:37:08 +01:00
dsputil: convert remaining functions to use ptrdiff_t strides Signed-off-by: Luca Barbato <lu_zero@gentoo.org> 2013-03-08 22:40:03 +01:00			`void ff_put_vc1_mspel_mc00_mmx(uint8_t dst, const uint8_t src, ptrdiff_t stride, int rnd);`
x86: move function prototypes to header files Originally committed as revision 22266 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-03-06 23:37:08 +01:00
dsputil: convert remaining functions to use ptrdiff_t strides Signed-off-by: Luca Barbato <lu_zero@gentoo.org> 2013-03-08 22:40:03 +01:00			`void ff_put_rv40_qpel8_mc33_mmx(uint8_t block, uint8_t pixels, ptrdiff_t stride);`
			`void ff_put_rv40_qpel16_mc33_mmx(uint8_t block, uint8_t pixels, ptrdiff_t stride);`
			`void ff_avg_rv40_qpel8_mc33_mmx(uint8_t block, uint8_t pixels, ptrdiff_t stride);`
			`void ff_avg_rv40_qpel16_mc33_mmx(uint8_t block, uint8_t pixels, ptrdiff_t stride);`
rv40dsp x86: MMX/MMX2/3DNow/SSE2/SSSE3 implementations of MC Code mostly inspired by vp8's MC, however: - its MMX2 horizontal filter is worse because it can't take advantage of the coefficient redundancy - that same coefficient redundancy allows better code for non-SSSE3 versions Benchmark (rounded to tens of unit): V8x8 H8x8 2D8x8 V16x16 H16x16 2D16x16 C 445 358 985 1785 1559 3280 MMX* 219 271 478 714 929 1443 SSE2 131 158 294 425 515 892 SSSE3 120 122 248 387 390 763 End result is overall around a 15% speedup for SSSE3 version (on 6 sequences); all loop filter functions now take around 55% of decoding time, while luma MC dsp functions are around 6%, chroma ones are 1.3% and biweight around 2.3%. Signed-off-by: Diego Biurrun <diego@biurrun.de> 2012-04-19 22:36:17 +02:00
Merge commit '88bd7fdc821aaa0cbcf44cf075c62aaa42121e3f' * commit '88bd7fdc821aaa0cbcf44cf075c62aaa42121e3f': Drop DCTELEM typedef Conflicts: libavcodec/alpha/dsputil_alpha.h libavcodec/alpha/motion_est_alpha.c libavcodec/arm/dsputil_init_armv6.c libavcodec/bfin/dsputil_bfin.h libavcodec/bfin/pixels_bfin.S libavcodec/cavs.c libavcodec/cavsdec.c libavcodec/dct-test.c libavcodec/dnxhdenc.c libavcodec/dsputil.c libavcodec/dsputil.h libavcodec/dsputil_template.c libavcodec/eamad.c libavcodec/h264_cavlc.c libavcodec/h264idct_template.c libavcodec/mpeg12.c libavcodec/mpegvideo.c libavcodec/mpegvideo.h libavcodec/mpegvideo_enc.c libavcodec/ppc/dsputil_altivec.c libavcodec/proresdsp.c Merged-by: Michael Niedermayer <michaelni@gmx.at> 2013-01-23 17:44:56 +01:00			`void ff_mmx_idct(int16_t *block);`
			`void ff_mmxext_idct(int16_t *block);`
x86: move function prototypes to header files Originally committed as revision 22266 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-03-06 23:37:08 +01:00
Convert deinterlacing MMX code to YASM Originally committed as revision 24615 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-07-31 16:50:51 +02:00
			`void ff_deinterlace_line_mmx(uint8_t *dst,`
			`const uint8_t lum_m4, const uint8_t lum_m3,`
			`const uint8_t lum_m2, const uint8_t lum_m1,`
			`const uint8_t *lum,`
			`int size);`

			`void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,`
			`const uint8_t *lum_m3,`
			`const uint8_t *lum_m2,`
			`const uint8_t *lum_m1,`
			`const uint8_t *lum, int size);`

Rename libavcodec/i386/ --> libavcodec/x86/. It contains optimizations that are not specific to i386 and libavutil uses this naming scheme already. Originally committed as revision 16270 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-22 10:12:42 +01:00			`#endif /* AVCODEC_X86_DSPUTIL_MMX_H */`