dsputil: x86: Convert mpeg4 qpel and dsputil avg to yasm
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
This commit is contained in:
		
				
					committed by
					
						
						Luca Barbato
					
				
			
			
				
	
			
			
			
						parent
						
							f90ff772e7
						
					
				
				
					commit
					71155d7b41
				
			@@ -71,3 +71,5 @@ YASM-OBJS-$(CONFIG_VP8_DECODER)        += x86/vp8dsp.o
 | 
			
		||||
YASM-OBJS                              += x86/dsputil.o                 \
 | 
			
		||||
                                          x86/deinterlace.o             \
 | 
			
		||||
                                          x86/fmtconvert.o              \
 | 
			
		||||
                                          x86/hpeldsp.o                 \
 | 
			
		||||
                                          x86/mpeg4qpel.o               \
 | 
			
		||||
 
 | 
			
		||||
@@ -24,781 +24,54 @@
 | 
			
		||||
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm
 | 
			
		||||
   clobber bug - now it will work with 2.95.2 and also with -fPIC
 | 
			
		||||
 */
 | 
			
		||||
static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "lea (%3, %3), %%"REG_a"        \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq (%1), %%mm0               \n\t"
 | 
			
		||||
        "movq (%1, %3), %%mm1           \n\t"
 | 
			
		||||
        PAVGB" 1(%1), %%mm0             \n\t"
 | 
			
		||||
        PAVGB" 1(%1, %3), %%mm1         \n\t"
 | 
			
		||||
        "movq %%mm0, (%2)               \n\t"
 | 
			
		||||
        "movq %%mm1, (%2, %3)           \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "movq (%1), %%mm0               \n\t"
 | 
			
		||||
        "movq (%1, %3), %%mm1           \n\t"
 | 
			
		||||
        PAVGB" 1(%1), %%mm0             \n\t"
 | 
			
		||||
        PAVGB" 1(%1, %3), %%mm1         \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        "movq %%mm0, (%2)               \n\t"
 | 
			
		||||
        "movq %%mm1, (%2, %3)           \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "subl $4, %0                    \n\t"
 | 
			
		||||
        "jnz 1b                         \n\t"
 | 
			
		||||
        :"+g"(h), "+S"(pixels), "+D"(block)
 | 
			
		||||
        :"r" ((x86_reg)line_size)
 | 
			
		||||
        :"%"REG_a, "memory");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifndef SKIP_FOR_3DNOW
 | 
			
		||||
static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "testl $1, %0                   \n\t"
 | 
			
		||||
            " jz 1f                     \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "movq   (%2), %%mm1             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        "add    $8, %2                  \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm0             \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "decl   %0                      \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        "movq   (%1), %%mm1             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        PAVGB" (%2), %%mm0              \n\t"
 | 
			
		||||
        PAVGB" 8(%2), %%mm1             \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "movq   %%mm1, (%3)             \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        "movq   (%1), %%mm1             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        PAVGB" 16(%2), %%mm0            \n\t"
 | 
			
		||||
        PAVGB" 24(%2), %%mm1            \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "movq   %%mm1, (%3)             \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "add    $32, %2                 \n\t"
 | 
			
		||||
        "subl   $4, %0                  \n\t"
 | 
			
		||||
        "jnz    1b                      \n\t"
 | 
			
		||||
#if !HAVE_EBX_AVAILABLE //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
 | 
			
		||||
        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 | 
			
		||||
#else
 | 
			
		||||
        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 | 
			
		||||
#endif
 | 
			
		||||
        :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
 | 
			
		||||
        :"memory");
 | 
			
		||||
//the following should be used, though better not with gcc ...
 | 
			
		||||
/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
 | 
			
		||||
        :"r"(src1Stride), "r"(dstStride)
 | 
			
		||||
        :"memory");*/
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "pcmpeqb %%mm6, %%mm6           \n\t"
 | 
			
		||||
        "testl $1, %0                   \n\t"
 | 
			
		||||
            " jz 1f                     \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "movq   (%2), %%mm1             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        "add    $8, %2                  \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm0              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm1              \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm0             \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm0              \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "decl   %0                      \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        "movq   (%1), %%mm1             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        "movq   (%2), %%mm2             \n\t"
 | 
			
		||||
        "movq   8(%2), %%mm3            \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm0              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm1              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm2              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm3              \n\t"
 | 
			
		||||
        PAVGB" %%mm2, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" %%mm3, %%mm1             \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm0              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm1              \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "movq   %%mm1, (%3)             \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        "movq   (%1), %%mm1             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        "movq   16(%2), %%mm2           \n\t"
 | 
			
		||||
        "movq   24(%2), %%mm3           \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm0              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm1              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm2              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm3              \n\t"
 | 
			
		||||
        PAVGB" %%mm2, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" %%mm3, %%mm1             \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm0              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm1              \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "movq   %%mm1, (%3)             \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "add    $32, %2                 \n\t"
 | 
			
		||||
        "subl   $4, %0                  \n\t"
 | 
			
		||||
        "jnz    1b                      \n\t"
 | 
			
		||||
#if !HAVE_EBX_AVAILABLE //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
 | 
			
		||||
        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 | 
			
		||||
#else
 | 
			
		||||
        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 | 
			
		||||
#endif
 | 
			
		||||
        :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
 | 
			
		||||
        :"memory");
 | 
			
		||||
//the following should be used, though better not with gcc ...
 | 
			
		||||
/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
 | 
			
		||||
        :"r"(src1Stride), "r"(dstStride)
 | 
			
		||||
        :"memory");*/
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "testl $1, %0                   \n\t"
 | 
			
		||||
            " jz 1f                     \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "movq   (%2), %%mm1             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        "add    $8, %2                  \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" (%3), %%mm0              \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "decl   %0                      \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        "movq   (%1), %%mm1             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        PAVGB" (%2), %%mm0              \n\t"
 | 
			
		||||
        PAVGB" 8(%2), %%mm1             \n\t"
 | 
			
		||||
        PAVGB" (%3), %%mm0              \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        PAVGB" (%3), %%mm1              \n\t"
 | 
			
		||||
        "movq   %%mm1, (%3)             \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        "movq   (%1), %%mm1             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        PAVGB" 16(%2), %%mm0            \n\t"
 | 
			
		||||
        PAVGB" 24(%2), %%mm1            \n\t"
 | 
			
		||||
        PAVGB" (%3), %%mm0              \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        PAVGB" (%3), %%mm1              \n\t"
 | 
			
		||||
        "movq   %%mm1, (%3)             \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "add    $32, %2                 \n\t"
 | 
			
		||||
        "subl   $4, %0                  \n\t"
 | 
			
		||||
        "jnz    1b                      \n\t"
 | 
			
		||||
#if !HAVE_EBX_AVAILABLE  //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
 | 
			
		||||
        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 | 
			
		||||
#else
 | 
			
		||||
        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 | 
			
		||||
#endif
 | 
			
		||||
        :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
 | 
			
		||||
        :"memory");
 | 
			
		||||
//the following should be used, though better not with gcc ...
 | 
			
		||||
/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
 | 
			
		||||
        :"r"(src1Stride), "r"(dstStride)
 | 
			
		||||
        :"memory");*/
 | 
			
		||||
}
 | 
			
		||||
#endif /* SKIP_FOR_3DNOW */
 | 
			
		||||
 | 
			
		||||
static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "lea (%3, %3), %%"REG_a"        \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq (%1), %%mm0               \n\t"
 | 
			
		||||
        "movq (%1, %3), %%mm1           \n\t"
 | 
			
		||||
        "movq 8(%1), %%mm2              \n\t"
 | 
			
		||||
        "movq 8(%1, %3), %%mm3          \n\t"
 | 
			
		||||
        PAVGB" 1(%1), %%mm0             \n\t"
 | 
			
		||||
        PAVGB" 1(%1, %3), %%mm1         \n\t"
 | 
			
		||||
        PAVGB" 9(%1), %%mm2             \n\t"
 | 
			
		||||
        PAVGB" 9(%1, %3), %%mm3         \n\t"
 | 
			
		||||
        "movq %%mm0, (%2)               \n\t"
 | 
			
		||||
        "movq %%mm1, (%2, %3)           \n\t"
 | 
			
		||||
        "movq %%mm2, 8(%2)              \n\t"
 | 
			
		||||
        "movq %%mm3, 8(%2, %3)          \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "movq (%1), %%mm0               \n\t"
 | 
			
		||||
        "movq (%1, %3), %%mm1           \n\t"
 | 
			
		||||
        "movq 8(%1), %%mm2              \n\t"
 | 
			
		||||
        "movq 8(%1, %3), %%mm3          \n\t"
 | 
			
		||||
        PAVGB" 1(%1), %%mm0             \n\t"
 | 
			
		||||
        PAVGB" 1(%1, %3), %%mm1         \n\t"
 | 
			
		||||
        PAVGB" 9(%1), %%mm2             \n\t"
 | 
			
		||||
        PAVGB" 9(%1, %3), %%mm3         \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        "movq %%mm0, (%2)               \n\t"
 | 
			
		||||
        "movq %%mm1, (%2, %3)           \n\t"
 | 
			
		||||
        "movq %%mm2, 8(%2)              \n\t"
 | 
			
		||||
        "movq %%mm3, 8(%2, %3)          \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "subl $4, %0                    \n\t"
 | 
			
		||||
        "jnz 1b                         \n\t"
 | 
			
		||||
        :"+g"(h), "+S"(pixels), "+D"(block)
 | 
			
		||||
        :"r" ((x86_reg)line_size)
 | 
			
		||||
        :"%"REG_a, "memory");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifndef SKIP_FOR_3DNOW
 | 
			
		||||
static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "testl $1, %0                   \n\t"
 | 
			
		||||
            " jz 1f                     \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "movq   8(%1), %%mm1            \n\t"
 | 
			
		||||
        PAVGB" (%2), %%mm0              \n\t"
 | 
			
		||||
        PAVGB" 8(%2), %%mm1             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        "add    $16, %2                 \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "movq   %%mm1, 8(%3)            \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "decl   %0                      \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "movq   8(%1), %%mm1            \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        PAVGB" (%2), %%mm0              \n\t"
 | 
			
		||||
        PAVGB" 8(%2), %%mm1             \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "movq   %%mm1, 8(%3)            \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "movq   8(%1), %%mm1            \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        PAVGB" 16(%2), %%mm0            \n\t"
 | 
			
		||||
        PAVGB" 24(%2), %%mm1            \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "movq   %%mm1, 8(%3)            \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "add    $32, %2                 \n\t"
 | 
			
		||||
        "subl   $2, %0                  \n\t"
 | 
			
		||||
        "jnz    1b                      \n\t"
 | 
			
		||||
#if !HAVE_EBX_AVAILABLE  //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
 | 
			
		||||
        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 | 
			
		||||
#else
 | 
			
		||||
        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 | 
			
		||||
#endif
 | 
			
		||||
        :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
 | 
			
		||||
        :"memory");
 | 
			
		||||
//the following should be used, though better not with gcc ...
 | 
			
		||||
/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
 | 
			
		||||
        :"r"(src1Stride), "r"(dstStride)
 | 
			
		||||
        :"memory");*/
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "testl $1, %0                   \n\t"
 | 
			
		||||
            " jz 1f                     \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "movq   8(%1), %%mm1            \n\t"
 | 
			
		||||
        PAVGB" (%2), %%mm0              \n\t"
 | 
			
		||||
        PAVGB" 8(%2), %%mm1             \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        "add    $16, %2                 \n\t"
 | 
			
		||||
        PAVGB" (%3), %%mm0              \n\t"
 | 
			
		||||
        PAVGB" 8(%3), %%mm1             \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "movq   %%mm1, 8(%3)            \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "decl   %0                      \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "movq   8(%1), %%mm1            \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        PAVGB" (%2), %%mm0              \n\t"
 | 
			
		||||
        PAVGB" 8(%2), %%mm1             \n\t"
 | 
			
		||||
        PAVGB" (%3), %%mm0              \n\t"
 | 
			
		||||
        PAVGB" 8(%3), %%mm1             \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "movq   %%mm1, 8(%3)            \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "movq   8(%1), %%mm1            \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        PAVGB" 16(%2), %%mm0            \n\t"
 | 
			
		||||
        PAVGB" 24(%2), %%mm1            \n\t"
 | 
			
		||||
        PAVGB" (%3), %%mm0              \n\t"
 | 
			
		||||
        PAVGB" 8(%3), %%mm1             \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "movq   %%mm1, 8(%3)            \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "add    $32, %2                 \n\t"
 | 
			
		||||
        "subl   $2, %0                  \n\t"
 | 
			
		||||
        "jnz    1b                      \n\t"
 | 
			
		||||
#if !HAVE_EBX_AVAILABLE  //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
 | 
			
		||||
        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 | 
			
		||||
#else
 | 
			
		||||
        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 | 
			
		||||
#endif
 | 
			
		||||
        :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
 | 
			
		||||
        :"memory");
 | 
			
		||||
//the following should be used, though better not with gcc ...
 | 
			
		||||
/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
 | 
			
		||||
        :"r"(src1Stride), "r"(dstStride)
 | 
			
		||||
        :"memory");*/
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "pcmpeqb %%mm6, %%mm6           \n\t"
 | 
			
		||||
        "testl $1, %0                   \n\t"
 | 
			
		||||
            " jz 1f                     \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "movq   8(%1), %%mm1            \n\t"
 | 
			
		||||
        "movq   (%2), %%mm2             \n\t"
 | 
			
		||||
        "movq   8(%2), %%mm3            \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm0              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm1              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm2              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm3              \n\t"
 | 
			
		||||
        PAVGB" %%mm2, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" %%mm3, %%mm1             \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm0              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm1              \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        "add    $16, %2                 \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "movq   %%mm1, 8(%3)            \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "decl   %0                      \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "movq   8(%1), %%mm1            \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        "movq   (%2), %%mm2             \n\t"
 | 
			
		||||
        "movq   8(%2), %%mm3            \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm0              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm1              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm2              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm3              \n\t"
 | 
			
		||||
        PAVGB" %%mm2, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" %%mm3, %%mm1             \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm0              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm1              \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "movq   %%mm1, 8(%3)            \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "movq   (%1), %%mm0             \n\t"
 | 
			
		||||
        "movq   8(%1), %%mm1            \n\t"
 | 
			
		||||
        "add    %4, %1                  \n\t"
 | 
			
		||||
        "movq   16(%2), %%mm2           \n\t"
 | 
			
		||||
        "movq   24(%2), %%mm3           \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm0              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm1              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm2              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm3              \n\t"
 | 
			
		||||
        PAVGB" %%mm2, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" %%mm3, %%mm1             \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm0              \n\t"
 | 
			
		||||
        "pxor %%mm6, %%mm1              \n\t"
 | 
			
		||||
        "movq   %%mm0, (%3)             \n\t"
 | 
			
		||||
        "movq   %%mm1, 8(%3)            \n\t"
 | 
			
		||||
        "add    %5, %3                  \n\t"
 | 
			
		||||
        "add    $32, %2                 \n\t"
 | 
			
		||||
        "subl   $2, %0                  \n\t"
 | 
			
		||||
        "jnz    1b                      \n\t"
 | 
			
		||||
#if !HAVE_EBX_AVAILABLE //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
 | 
			
		||||
        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 | 
			
		||||
#else
 | 
			
		||||
        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 | 
			
		||||
#endif
 | 
			
		||||
        :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
 | 
			
		||||
        :"memory");
 | 
			
		||||
//the following should be used, though better not with gcc ...
 | 
			
		||||
/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
 | 
			
		||||
        :"r"(src1Stride), "r"(dstStride)
 | 
			
		||||
        :"memory");*/
 | 
			
		||||
}
 | 
			
		||||
#endif /* SKIP_FOR_3DNOW */
 | 
			
		||||
 | 
			
		||||
/* GL: this function does incorrect rounding if overflow */
 | 
			
		||||
static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    MOVQ_BONE(mm6);
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "lea (%3, %3), %%"REG_a"        \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq (%1), %%mm0               \n\t"
 | 
			
		||||
        "movq (%1, %3), %%mm2           \n\t"
 | 
			
		||||
        "movq 1(%1), %%mm1              \n\t"
 | 
			
		||||
        "movq 1(%1, %3), %%mm3          \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        "psubusb %%mm6, %%mm0           \n\t"
 | 
			
		||||
        "psubusb %%mm6, %%mm2           \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" %%mm3, %%mm2             \n\t"
 | 
			
		||||
        "movq %%mm0, (%2)               \n\t"
 | 
			
		||||
        "movq %%mm2, (%2, %3)           \n\t"
 | 
			
		||||
        "movq (%1), %%mm0               \n\t"
 | 
			
		||||
        "movq 1(%1), %%mm1              \n\t"
 | 
			
		||||
        "movq (%1, %3), %%mm2           \n\t"
 | 
			
		||||
        "movq 1(%1, %3), %%mm3          \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        "psubusb %%mm6, %%mm0           \n\t"
 | 
			
		||||
        "psubusb %%mm6, %%mm2           \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" %%mm3, %%mm2             \n\t"
 | 
			
		||||
        "movq %%mm0, (%2)               \n\t"
 | 
			
		||||
        "movq %%mm2, (%2, %3)           \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "subl $4, %0                    \n\t"
 | 
			
		||||
        "jnz 1b                         \n\t"
 | 
			
		||||
        :"+g"(h), "+S"(pixels), "+D"(block)
 | 
			
		||||
        :"r" ((x86_reg)line_size)
 | 
			
		||||
        :"%"REG_a, "memory");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void DEF(put_no_rnd_pixels8_x2_exact)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile (
 | 
			
		||||
        "pcmpeqb %%mm6, %%mm6           \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq  (%1),     %%mm0          \n\t"
 | 
			
		||||
        "movq  (%1, %3), %%mm2          \n\t"
 | 
			
		||||
        "movq 1(%1),     %%mm1          \n\t"
 | 
			
		||||
        "movq 1(%1, %3), %%mm3          \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm0             \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm2             \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm1             \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm3             \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" %%mm3, %%mm2             \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm0             \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm2             \n\t"
 | 
			
		||||
        "movq  %%mm0, (%2)              \n\t"
 | 
			
		||||
        "movq  %%mm2, (%2, %3)          \n\t"
 | 
			
		||||
        "movq  (%1, %3,2), %%mm0        \n\t"
 | 
			
		||||
        "movq 1(%1, %3,2), %%mm1        \n\t"
 | 
			
		||||
        "movq  (%1, %4),   %%mm2        \n\t"
 | 
			
		||||
        "movq 1(%1, %4),   %%mm3        \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm0             \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm1             \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm2             \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm3             \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" %%mm3, %%mm2             \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm0             \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm2             \n\t"
 | 
			
		||||
        "movq  %%mm0, (%2, %3,2)        \n\t"
 | 
			
		||||
        "movq  %%mm2, (%2, %4)          \n\t"
 | 
			
		||||
        "lea   (%1, %3,4), %1           \n\t"
 | 
			
		||||
        "lea   (%2, %3,4), %2           \n\t"
 | 
			
		||||
        "subl  $4, %0                   \n\t"
 | 
			
		||||
        "jg 1b                          \n\t"
 | 
			
		||||
        : "+g"(h), "+r"(pixels), "+r"(block)
 | 
			
		||||
        : "r" ((x86_reg)line_size), "r"((x86_reg)3*line_size)
 | 
			
		||||
        : "memory"
 | 
			
		||||
    );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "lea (%3, %3), %%"REG_a"        \n\t"
 | 
			
		||||
        "movq (%1), %%mm0               \n\t"
 | 
			
		||||
        "sub %3, %2                     \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq (%1, %3), %%mm1           \n\t"
 | 
			
		||||
        "movq (%1, %%"REG_a"), %%mm2    \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" %%mm2, %%mm1             \n\t"
 | 
			
		||||
        "movq %%mm0, (%2, %3)           \n\t"
 | 
			
		||||
        "movq %%mm1, (%2, %%"REG_a")    \n\t"
 | 
			
		||||
        "movq (%1, %3), %%mm1           \n\t"
 | 
			
		||||
        "movq (%1, %%"REG_a"), %%mm0    \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm2             \n\t"
 | 
			
		||||
        PAVGB" %%mm0, %%mm1             \n\t"
 | 
			
		||||
        "movq %%mm2, (%2, %3)           \n\t"
 | 
			
		||||
        "movq %%mm1, (%2, %%"REG_a")    \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "subl $4, %0                    \n\t"
 | 
			
		||||
        "jnz 1b                         \n\t"
 | 
			
		||||
        :"+g"(h), "+S"(pixels), "+D" (block)
 | 
			
		||||
        :"r" ((x86_reg)line_size)
 | 
			
		||||
        :"%"REG_a, "memory");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* GL: this function does incorrect rounding if overflow */
 | 
			
		||||
static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    MOVQ_BONE(mm6);
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "lea (%3, %3), %%"REG_a"        \n\t"
 | 
			
		||||
        "movq (%1), %%mm0               \n\t"
 | 
			
		||||
        "sub %3, %2                     \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq (%1, %3), %%mm1           \n\t"
 | 
			
		||||
        "movq (%1, %%"REG_a"), %%mm2    \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        "psubusb %%mm6, %%mm1           \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" %%mm2, %%mm1             \n\t"
 | 
			
		||||
        "movq %%mm0, (%2, %3)           \n\t"
 | 
			
		||||
        "movq %%mm1, (%2, %%"REG_a")    \n\t"
 | 
			
		||||
        "movq (%1, %3), %%mm1           \n\t"
 | 
			
		||||
        "movq (%1, %%"REG_a"), %%mm0    \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        "psubusb %%mm6, %%mm1           \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm2             \n\t"
 | 
			
		||||
        PAVGB" %%mm0, %%mm1             \n\t"
 | 
			
		||||
        "movq %%mm2, (%2, %3)           \n\t"
 | 
			
		||||
        "movq %%mm1, (%2, %%"REG_a")    \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "subl $4, %0                    \n\t"
 | 
			
		||||
        "jnz 1b                         \n\t"
 | 
			
		||||
        :"+g"(h), "+S"(pixels), "+D" (block)
 | 
			
		||||
        :"r" ((x86_reg)line_size)
 | 
			
		||||
        :"%"REG_a, "memory");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void DEF(put_no_rnd_pixels8_y2_exact)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile (
 | 
			
		||||
        "movq     (%1), %%mm0           \n\t"
 | 
			
		||||
        "pcmpeqb %%mm6, %%mm6           \n\t"
 | 
			
		||||
        "add        %3, %1              \n\t"
 | 
			
		||||
        "pxor    %%mm6, %%mm0           \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq  (%1),     %%mm1          \n\t"
 | 
			
		||||
        "movq  (%1, %3), %%mm2          \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm1             \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm2             \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" %%mm2, %%mm1             \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm0             \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm1             \n\t"
 | 
			
		||||
        "movq  %%mm0, (%2)              \n\t"
 | 
			
		||||
        "movq  %%mm1, (%2, %3)          \n\t"
 | 
			
		||||
        "movq  (%1, %3,2), %%mm1        \n\t"
 | 
			
		||||
        "movq  (%1, %4),   %%mm0        \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm1             \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm2             \n\t"
 | 
			
		||||
        PAVGB" %%mm0, %%mm1             \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm2             \n\t"
 | 
			
		||||
        "pxor  %%mm6, %%mm1             \n\t"
 | 
			
		||||
        "movq %%mm2, (%2, %3,2)         \n\t"
 | 
			
		||||
        "movq %%mm1, (%2, %4)           \n\t"
 | 
			
		||||
        "lea   (%1, %3,4), %1           \n\t"
 | 
			
		||||
        "lea   (%2, %3,4), %2           \n\t"
 | 
			
		||||
        "subl $4, %0                    \n\t"
 | 
			
		||||
        "jg 1b                          \n\t"
 | 
			
		||||
        :"+g"(h), "+r"(pixels), "+r" (block)
 | 
			
		||||
        :"r" ((x86_reg)line_size), "r"((x86_reg)3*line_size)
 | 
			
		||||
        :"memory"
 | 
			
		||||
    );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "lea (%3, %3), %%"REG_a"        \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq (%2), %%mm0               \n\t"
 | 
			
		||||
        "movq (%2, %3), %%mm1           \n\t"
 | 
			
		||||
        PAVGB" (%1), %%mm0              \n\t"
 | 
			
		||||
        PAVGB" (%1, %3), %%mm1          \n\t"
 | 
			
		||||
        "movq %%mm0, (%2)               \n\t"
 | 
			
		||||
        "movq %%mm1, (%2, %3)           \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "movq (%2), %%mm0               \n\t"
 | 
			
		||||
        "movq (%2, %3), %%mm1           \n\t"
 | 
			
		||||
        PAVGB" (%1), %%mm0              \n\t"
 | 
			
		||||
        PAVGB" (%1, %3), %%mm1          \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        "movq %%mm0, (%2)               \n\t"
 | 
			
		||||
        "movq %%mm1, (%2, %3)           \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "subl $4, %0                    \n\t"
 | 
			
		||||
        "jnz 1b                         \n\t"
 | 
			
		||||
        :"+g"(h), "+S"(pixels), "+D"(block)
 | 
			
		||||
        :"r" ((x86_reg)line_size)
 | 
			
		||||
        :"%"REG_a, "memory");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "lea (%3, %3), %%"REG_a"        \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq (%1), %%mm0               \n\t"
 | 
			
		||||
        "movq (%1, %3), %%mm2           \n\t"
 | 
			
		||||
        PAVGB" 1(%1), %%mm0             \n\t"
 | 
			
		||||
        PAVGB" 1(%1, %3), %%mm2         \n\t"
 | 
			
		||||
        PAVGB" (%2), %%mm0              \n\t"
 | 
			
		||||
        PAVGB" (%2, %3), %%mm2          \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        "movq %%mm0, (%2)               \n\t"
 | 
			
		||||
        "movq %%mm2, (%2, %3)           \n\t"
 | 
			
		||||
        "movq (%1), %%mm0               \n\t"
 | 
			
		||||
        "movq (%1, %3), %%mm2           \n\t"
 | 
			
		||||
        PAVGB" 1(%1), %%mm0             \n\t"
 | 
			
		||||
        PAVGB" 1(%1, %3), %%mm2         \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        PAVGB" (%2), %%mm0              \n\t"
 | 
			
		||||
        PAVGB" (%2, %3), %%mm2          \n\t"
 | 
			
		||||
        "movq %%mm0, (%2)               \n\t"
 | 
			
		||||
        "movq %%mm2, (%2, %3)           \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "subl $4, %0                    \n\t"
 | 
			
		||||
        "jnz 1b                         \n\t"
 | 
			
		||||
        :"+g"(h), "+S"(pixels), "+D"(block)
 | 
			
		||||
        :"r" ((x86_reg)line_size)
 | 
			
		||||
        :"%"REG_a, "memory");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "lea (%3, %3), %%"REG_a"        \n\t"
 | 
			
		||||
        "movq (%1), %%mm0               \n\t"
 | 
			
		||||
        "sub %3, %2                     \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq (%1, %3), %%mm1           \n\t"
 | 
			
		||||
        "movq (%1, %%"REG_a"), %%mm2    \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" %%mm2, %%mm1             \n\t"
 | 
			
		||||
        "movq (%2, %3), %%mm3           \n\t"
 | 
			
		||||
        "movq (%2, %%"REG_a"), %%mm4    \n\t"
 | 
			
		||||
        PAVGB" %%mm3, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" %%mm4, %%mm1             \n\t"
 | 
			
		||||
        "movq %%mm0, (%2, %3)           \n\t"
 | 
			
		||||
        "movq %%mm1, (%2, %%"REG_a")    \n\t"
 | 
			
		||||
        "movq (%1, %3), %%mm1           \n\t"
 | 
			
		||||
        "movq (%1, %%"REG_a"), %%mm0    \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm2             \n\t"
 | 
			
		||||
        PAVGB" %%mm0, %%mm1             \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        "movq (%2, %3), %%mm3           \n\t"
 | 
			
		||||
        "movq (%2, %%"REG_a"), %%mm4    \n\t"
 | 
			
		||||
        PAVGB" %%mm3, %%mm2             \n\t"
 | 
			
		||||
        PAVGB" %%mm4, %%mm1             \n\t"
 | 
			
		||||
        "movq %%mm2, (%2, %3)           \n\t"
 | 
			
		||||
        "movq %%mm1, (%2, %%"REG_a")    \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "subl $4, %0                    \n\t"
 | 
			
		||||
        "jnz 1b                         \n\t"
 | 
			
		||||
        :"+g"(h), "+S"(pixels), "+D"(block)
 | 
			
		||||
        :"r" ((x86_reg)line_size)
 | 
			
		||||
        :"%"REG_a, "memory");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Note this is not correctly rounded, but this function is only
 | 
			
		||||
 * used for B-frames so it does not matter. */
 | 
			
		||||
static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    MOVQ_BONE(mm6);
 | 
			
		||||
    __asm__ volatile(
 | 
			
		||||
        "lea (%3, %3), %%"REG_a"        \n\t"
 | 
			
		||||
        "movq (%1), %%mm0               \n\t"
 | 
			
		||||
        PAVGB" 1(%1), %%mm0             \n\t"
 | 
			
		||||
         ".p2align 3                    \n\t"
 | 
			
		||||
        "1:                             \n\t"
 | 
			
		||||
        "movq (%1, %%"REG_a"), %%mm2    \n\t"
 | 
			
		||||
        "movq (%1, %3), %%mm1           \n\t"
 | 
			
		||||
        "psubusb %%mm6, %%mm2           \n\t"
 | 
			
		||||
        PAVGB" 1(%1, %3), %%mm1         \n\t"
 | 
			
		||||
        PAVGB" 1(%1, %%"REG_a"), %%mm2  \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm0             \n\t"
 | 
			
		||||
        PAVGB" %%mm2, %%mm1             \n\t"
 | 
			
		||||
        PAVGB" (%2), %%mm0              \n\t"
 | 
			
		||||
        PAVGB" (%2, %3), %%mm1          \n\t"
 | 
			
		||||
        "movq %%mm0, (%2)               \n\t"
 | 
			
		||||
        "movq %%mm1, (%2, %3)           \n\t"
 | 
			
		||||
        "movq (%1, %3), %%mm1           \n\t"
 | 
			
		||||
        "movq (%1, %%"REG_a"), %%mm0    \n\t"
 | 
			
		||||
        PAVGB" 1(%1, %3), %%mm1         \n\t"
 | 
			
		||||
        PAVGB" 1(%1, %%"REG_a"), %%mm0  \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "add %%"REG_a", %1              \n\t"
 | 
			
		||||
        PAVGB" %%mm1, %%mm2             \n\t"
 | 
			
		||||
        PAVGB" %%mm0, %%mm1             \n\t"
 | 
			
		||||
        PAVGB" (%2), %%mm2              \n\t"
 | 
			
		||||
        PAVGB" (%2, %3), %%mm1          \n\t"
 | 
			
		||||
        "movq %%mm2, (%2)               \n\t"
 | 
			
		||||
        "movq %%mm1, (%2, %3)           \n\t"
 | 
			
		||||
        "add %%"REG_a", %2              \n\t"
 | 
			
		||||
        "subl $4, %0                    \n\t"
 | 
			
		||||
        "jnz 1b                         \n\t"
 | 
			
		||||
        :"+g"(h), "+S"(pixels), "+D"(block)
 | 
			
		||||
        :"r" ((x86_reg)line_size)
 | 
			
		||||
        :"%"REG_a,  "memory");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//FIXME the following could be optimized too ...
 | 
			
		||||
static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
 | 
			
		||||
    DEF(put_no_rnd_pixels8_x2)(block  , pixels  , line_size, h);
 | 
			
		||||
    DEF(put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h);
 | 
			
		||||
static void DEF(ff_put_no_rnd_pixels16_x2)(uint8_t *block,
 | 
			
		||||
                                           const uint8_t *pixels,
 | 
			
		||||
                                           int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    DEF(ff_put_no_rnd_pixels8_x2)(block,     pixels,     line_size, h);
 | 
			
		||||
    DEF(ff_put_no_rnd_pixels8_x2)(block + 8, pixels + 8, line_size, h);
 | 
			
		||||
}
 | 
			
		||||
static void DEF(put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
 | 
			
		||||
    DEF(put_pixels8_y2)(block  , pixels  , line_size, h);
 | 
			
		||||
    DEF(put_pixels8_y2)(block+8, pixels+8, line_size, h);
 | 
			
		||||
 | 
			
		||||
static void DEF(ff_put_pixels16_y2)(uint8_t *block, const uint8_t *pixels,
 | 
			
		||||
                                    int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    DEF(ff_put_pixels8_y2)(block,     pixels,     line_size, h);
 | 
			
		||||
    DEF(ff_put_pixels8_y2)(block + 8, pixels + 8, line_size, h);
 | 
			
		||||
}
 | 
			
		||||
static void DEF(put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
 | 
			
		||||
    DEF(put_no_rnd_pixels8_y2)(block  , pixels  , line_size, h);
 | 
			
		||||
    DEF(put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h);
 | 
			
		||||
 | 
			
		||||
static void DEF(ff_put_no_rnd_pixels16_y2)(uint8_t *block,
 | 
			
		||||
                                           const uint8_t *pixels,
 | 
			
		||||
                                           int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    DEF(ff_put_no_rnd_pixels8_y2)(block,     pixels,     line_size, h);
 | 
			
		||||
    DEF(ff_put_no_rnd_pixels8_y2)(block + 8, pixels + 8, line_size, h);
 | 
			
		||||
}
 | 
			
		||||
static void DEF(avg_pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
 | 
			
		||||
    DEF(avg_pixels8)(block  , pixels  , line_size, h);
 | 
			
		||||
    DEF(avg_pixels8)(block+8, pixels+8, line_size, h);
 | 
			
		||||
 | 
			
		||||
static void DEF(ff_avg_pixels16)(uint8_t *block, const uint8_t *pixels,
 | 
			
		||||
                                 int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    DEF(ff_avg_pixels8)(block,     pixels,     line_size, h);
 | 
			
		||||
    DEF(ff_avg_pixels8)(block + 8, pixels + 8, line_size, h);
 | 
			
		||||
}
 | 
			
		||||
static void DEF(avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
 | 
			
		||||
    DEF(avg_pixels8_x2)(block  , pixels  , line_size, h);
 | 
			
		||||
    DEF(avg_pixels8_x2)(block+8, pixels+8, line_size, h);
 | 
			
		||||
 | 
			
		||||
static void DEF(ff_avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels,
 | 
			
		||||
                                    int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    DEF(ff_avg_pixels8_x2)(block,     pixels,     line_size, h);
 | 
			
		||||
    DEF(ff_avg_pixels8_x2)(block + 8, pixels + 8, line_size, h);
 | 
			
		||||
}
 | 
			
		||||
static void DEF(avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
 | 
			
		||||
    DEF(avg_pixels8_y2)(block  , pixels  , line_size, h);
 | 
			
		||||
    DEF(avg_pixels8_y2)(block+8, pixels+8, line_size, h);
 | 
			
		||||
 | 
			
		||||
static void DEF(ff_avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels,
 | 
			
		||||
                                    int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    DEF(ff_avg_pixels8_y2)(block,     pixels,     line_size, h);
 | 
			
		||||
    DEF(ff_avg_pixels8_y2)(block + 8, pixels + 8, line_size, h);
 | 
			
		||||
}
 | 
			
		||||
static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
 | 
			
		||||
    DEF(avg_pixels8_xy2)(block  , pixels  , line_size, h);
 | 
			
		||||
    DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h);
 | 
			
		||||
 | 
			
		||||
static void DEF(ff_avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels,
 | 
			
		||||
                                     int line_size, int h)
 | 
			
		||||
{
 | 
			
		||||
    DEF(ff_avg_pixels8_xy2)(block,     pixels,     line_size, h);
 | 
			
		||||
    DEF(ff_avg_pixels8_xy2)(block + 8, pixels + 8, line_size, h);
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										465
									
								
								libavcodec/x86/hpeldsp.asm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										465
									
								
								libavcodec/x86/hpeldsp.asm
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,465 @@
 | 
			
		||||
;******************************************************************************
 | 
			
		||||
;* MMX optimized hpel functions
 | 
			
		||||
;*
 | 
			
		||||
;* This file is part of Libav.
 | 
			
		||||
;*
 | 
			
		||||
;* Libav is free software; you can redistribute it and/or
 | 
			
		||||
;* modify it under the terms of the GNU Lesser General Public
 | 
			
		||||
;* License as published by the Free Software Foundation; either
 | 
			
		||||
;* version 2.1 of the License, or (at your option) any later version.
 | 
			
		||||
;*
 | 
			
		||||
;* Libav is distributed in the hope that it will be useful,
 | 
			
		||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
			
		||||
;* Lesser General Public License for more details.
 | 
			
		||||
;*
 | 
			
		||||
;* You should have received a copy of the GNU Lesser General Public
 | 
			
		||||
;* License along with Libav; if not, write to the Free Software
 | 
			
		||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
			
		||||
;******************************************************************************
 | 
			
		||||
 | 
			
		||||
%include "libavutil/x86/x86util.asm"
 | 
			
		||||
 | 
			
		||||
SECTION_RODATA
 | 
			
		||||
cextern pb_1
 | 
			
		||||
 | 
			
		||||
SECTION_TEXT
 | 
			
		||||
 | 
			
		||||
; put_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
%macro PUT_PIXELS8_X2 0
 | 
			
		||||
cglobal put_pixels8_x2, 4,5
 | 
			
		||||
    movsxdifnidn r2, r2d
 | 
			
		||||
    lea          r4, [r2*2]
 | 
			
		||||
.loop:
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m1, [r1+r2]
 | 
			
		||||
    PAVGB        m0, [r1+1]
 | 
			
		||||
    PAVGB        m1, [r1+r2+1]
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    mova    [r0+r2], m1
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m1, [r1+r2]
 | 
			
		||||
    PAVGB        m0, [r1+1]
 | 
			
		||||
    PAVGB        m1, [r1+r2+1]
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    mova    [r0+r2], m1
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    sub         r3d, 4
 | 
			
		||||
    jne .loop
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX mmxext
 | 
			
		||||
PUT_PIXELS8_X2
 | 
			
		||||
INIT_MMX 3dnow
 | 
			
		||||
PUT_PIXELS8_X2
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; put_pixels16_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
%macro PUT_PIXELS_16 0
 | 
			
		||||
cglobal put_pixels16_x2, 4,5
 | 
			
		||||
    movsxdifnidn r2, r2d
 | 
			
		||||
    lea          r4, [r2*2]
 | 
			
		||||
.loop:
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m1, [r1+r2]
 | 
			
		||||
    mova         m2, [r1+8]
 | 
			
		||||
    mova         m3, [r1+r2+8]
 | 
			
		||||
    PAVGB        m0, [r1+1]
 | 
			
		||||
    PAVGB        m1, [r1+r2+1]
 | 
			
		||||
    PAVGB        m2, [r1+9]
 | 
			
		||||
    PAVGB        m3, [r1+r2+9]
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    mova    [r0+r2], m1
 | 
			
		||||
    mova     [r0+8], m2
 | 
			
		||||
    mova  [r0+r2+8], m3
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m1, [r1+r2]
 | 
			
		||||
    mova         m2, [r1+8]
 | 
			
		||||
    mova         m3, [r1+r2+8]
 | 
			
		||||
    PAVGB        m0, [r1+1]
 | 
			
		||||
    PAVGB        m1, [r1+r2+1]
 | 
			
		||||
    PAVGB        m2, [r1+9]
 | 
			
		||||
    PAVGB        m3, [r1+r2+9]
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    mova    [r0+r2], m1
 | 
			
		||||
    mova     [r0+8], m2
 | 
			
		||||
    mova  [r0+r2+8], m3
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    sub         r3d, 4
 | 
			
		||||
    jne .loop
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX mmxext
 | 
			
		||||
PUT_PIXELS_16
 | 
			
		||||
INIT_MMX 3dnow
 | 
			
		||||
PUT_PIXELS_16
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; put_no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
%macro PUT_NO_RND_PIXELS8_X2 0
 | 
			
		||||
cglobal put_no_rnd_pixels8_x2, 4,5
 | 
			
		||||
    mova         m6, [pb_1]
 | 
			
		||||
    movsxdifnidn r2, r2d
 | 
			
		||||
    lea          r4, [r2*2]
 | 
			
		||||
.loop:
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m2, [r1+r2]
 | 
			
		||||
    mova         m1, [r1+1]
 | 
			
		||||
    mova         m3, [r1+r2+1]
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    psubusb      m0, m6
 | 
			
		||||
    psubusb      m2, m6
 | 
			
		||||
    PAVGB        m0, m1
 | 
			
		||||
    PAVGB        m2, m3
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    mova    [r0+r2], m2
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m1, [r1+1]
 | 
			
		||||
    mova         m2, [r1+r2]
 | 
			
		||||
    mova         m3, [r1+r2+1]
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    psubusb      m0, m6
 | 
			
		||||
    psubusb      m2, m6
 | 
			
		||||
    PAVGB        m0, m1
 | 
			
		||||
    PAVGB        m2, m3
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    mova    [r0+r2], m2
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    sub         r3d, 4
 | 
			
		||||
    jne .loop
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX mmxext
 | 
			
		||||
PUT_NO_RND_PIXELS8_X2
 | 
			
		||||
INIT_MMX 3dnow
 | 
			
		||||
PUT_NO_RND_PIXELS8_X2
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
%macro PUT_NO_RND_PIXELS8_X2_EXACT 0
 | 
			
		||||
cglobal put_no_rnd_pixels8_x2_exact, 4,5
 | 
			
		||||
    movsxdifnidn r2, r2d
 | 
			
		||||
    lea          r4, [r2*3]
 | 
			
		||||
    pcmpeqb      m6, m6
 | 
			
		||||
.loop:
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m2, [r1+r2]
 | 
			
		||||
    mova         m1, [r1+1]
 | 
			
		||||
    mova         m3, [r1+r2+1]
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    pxor         m2, m6
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    pxor         m3, m6
 | 
			
		||||
    PAVGB        m0, m1
 | 
			
		||||
    PAVGB        m2, m3
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    pxor         m2, m6
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    mova    [r0+r2], m2
 | 
			
		||||
    mova         m0, [r1+r2*2]
 | 
			
		||||
    mova         m1, [r1+r2*2+1]
 | 
			
		||||
    mova         m2, [r1+r4]
 | 
			
		||||
    mova         m3, [r1+r4+1]
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    pxor         m2, m6
 | 
			
		||||
    pxor         m3, m6
 | 
			
		||||
    PAVGB        m0, m1
 | 
			
		||||
    PAVGB        m2, m3
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    pxor         m2, m6
 | 
			
		||||
    mova  [r0+r2*2], m0
 | 
			
		||||
    mova    [r0+r4], m2
 | 
			
		||||
    lea          r1, [r1+r2*4]
 | 
			
		||||
    lea          r0, [r0+r2*4]
 | 
			
		||||
    sub         r3d, 4
 | 
			
		||||
    jg .loop
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX mmxext
 | 
			
		||||
PUT_NO_RND_PIXELS8_X2_EXACT
 | 
			
		||||
INIT_MMX 3dnow
 | 
			
		||||
PUT_NO_RND_PIXELS8_X2_EXACT
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; put_pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
%macro PUT_PIXELS8_Y2 0
 | 
			
		||||
cglobal put_pixels8_y2, 4,5
 | 
			
		||||
    movsxdifnidn r2, r2d
 | 
			
		||||
    lea          r4, [r2*2]
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    sub          r0, r2
 | 
			
		||||
.loop:
 | 
			
		||||
    mova         m1, [r1+r2]
 | 
			
		||||
    mova         m2, [r1+r4]
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    PAVGB        m0, m1
 | 
			
		||||
    PAVGB        m1, m2
 | 
			
		||||
    mova    [r0+r2], m0
 | 
			
		||||
    mova    [r0+r4], m1
 | 
			
		||||
    mova         m1, [r1+r2]
 | 
			
		||||
    mova         m0, [r1+r4]
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    PAVGB        m2, m1
 | 
			
		||||
    PAVGB        m1, m0
 | 
			
		||||
    mova    [r0+r2], m2
 | 
			
		||||
    mova    [r0+r4], m1
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    sub         r3d, 4
 | 
			
		||||
    jne .loop
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX mmxext
 | 
			
		||||
PUT_PIXELS8_Y2
 | 
			
		||||
INIT_MMX 3dnow
 | 
			
		||||
PUT_PIXELS8_Y2
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; put_no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
%macro PUT_NO_RND_PIXELS8_Y2 0
 | 
			
		||||
cglobal put_no_rnd_pixels8_y2, 4,5
 | 
			
		||||
    mova         m6, [pb_1]
 | 
			
		||||
    movsxdifnidn r2, r2d
 | 
			
		||||
    lea          r4, [r2+r2]
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    sub          r0, r2
 | 
			
		||||
.loop:
 | 
			
		||||
    mova         m1, [r1+r2]
 | 
			
		||||
    mova         m2, [r1+r4]
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    psubusb      m1, m6
 | 
			
		||||
    PAVGB        m0, m1
 | 
			
		||||
    PAVGB        m1, m2
 | 
			
		||||
    mova    [r0+r2], m0
 | 
			
		||||
    mova    [r0+r4], m1
 | 
			
		||||
    mova         m1, [r1+r2]
 | 
			
		||||
    mova         m0, [r1+r4]
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    psubusb      m1, m6
 | 
			
		||||
    PAVGB        m2, m1
 | 
			
		||||
    PAVGB        m1, m0
 | 
			
		||||
    mova    [r0+r2], m2
 | 
			
		||||
    mova    [r0+r4], m1
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    sub         r3d, 4
 | 
			
		||||
    jne .loop
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX mmxext
 | 
			
		||||
PUT_NO_RND_PIXELS8_Y2
 | 
			
		||||
INIT_MMX 3dnow
 | 
			
		||||
PUT_NO_RND_PIXELS8_Y2
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
 | 
			
		||||
cglobal put_no_rnd_pixels8_y2_exact, 4,5
 | 
			
		||||
    movsxdifnidn r2, r2d
 | 
			
		||||
    lea          r4, [r2*3]
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    pcmpeqb      m6, m6
 | 
			
		||||
    add          r1, r2
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
.loop:
 | 
			
		||||
    mova         m1, [r1]
 | 
			
		||||
    mova         m2, [r1+r2]
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    pxor         m2, m6
 | 
			
		||||
    PAVGB        m0, m1
 | 
			
		||||
    PAVGB        m1, m2
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    mova    [r0+r2], m1
 | 
			
		||||
    mova         m1, [r1+r2*2]
 | 
			
		||||
    mova         m0, [r1+r4]
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    PAVGB        m2, m1
 | 
			
		||||
    PAVGB        m1, m0
 | 
			
		||||
    pxor         m2, m6
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    mova  [r0+r2*2], m2
 | 
			
		||||
    mova    [r0+r4], m1
 | 
			
		||||
    lea          r1, [r1+r2*4]
 | 
			
		||||
    lea          r0, [r0+r2*4]
 | 
			
		||||
    sub         r3d, 4
 | 
			
		||||
    jg .loop
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX mmxext
 | 
			
		||||
PUT_NO_RND_PIXELS8_Y2_EXACT
 | 
			
		||||
INIT_MMX 3dnow
 | 
			
		||||
PUT_NO_RND_PIXELS8_Y2_EXACT
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; avg_pixels8(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
%macro AVG_PIXELS8 0
 | 
			
		||||
cglobal avg_pixels8, 4,5
 | 
			
		||||
    movsxdifnidn r2, edx
 | 
			
		||||
    lea          r4, [r2+r2]
 | 
			
		||||
.loop:
 | 
			
		||||
    mova         m0, [r0]
 | 
			
		||||
    mova         m1, [r0+r2]
 | 
			
		||||
    PAVGB        m0, [r1]
 | 
			
		||||
    PAVGB        m1, [r1+r2]
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    mova    [r0+r2], m1
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    mova         m0, [r0]
 | 
			
		||||
    mova         m1, [r0+r2]
 | 
			
		||||
    PAVGB        m0, [r1]
 | 
			
		||||
    PAVGB        m1, [r1+r2]
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    mova    [r0+r2], m1
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    sub         r3d, 4
 | 
			
		||||
    jne .loop
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX 3dnow
 | 
			
		||||
AVG_PIXELS8
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
%macro AVG_PIXELS8_X2 0
 | 
			
		||||
cglobal avg_pixels8_x2, 4,5
 | 
			
		||||
    movsxdifnidn r2, edx
 | 
			
		||||
    lea          r4, [r2*2]
 | 
			
		||||
.loop:
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m2, [r1+r2]
 | 
			
		||||
    PAVGB        m0, [r1+1]
 | 
			
		||||
    PAVGB        m2, [r1+r2+1]
 | 
			
		||||
    PAVGB        m0, [r0]
 | 
			
		||||
    PAVGB        m2, [r0+r2]
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    mova    [r0+r2], m2
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m2, [r1+r2]
 | 
			
		||||
    PAVGB        m0, [r1+1]
 | 
			
		||||
    PAVGB        m2, [r1+r2+1]
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    PAVGB        m0, [r0]
 | 
			
		||||
    PAVGB        m2, [r0+r2]
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    mova    [r0+r2], m2
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    sub         r3d, 4
 | 
			
		||||
    jne .loop
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX mmxext
 | 
			
		||||
AVG_PIXELS8_X2
 | 
			
		||||
INIT_MMX 3dnow
 | 
			
		||||
AVG_PIXELS8_X2
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; avg_pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
%macro AVG_PIXELS8_Y2 0
 | 
			
		||||
cglobal avg_pixels8_y2, 4,5
 | 
			
		||||
    movsxdifnidn r2, r2d
 | 
			
		||||
    lea          r4, [r2*2]
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    sub          r0, r2
 | 
			
		||||
.loop:
 | 
			
		||||
    mova         m1, [r1+r2]
 | 
			
		||||
    mova         m2, [r1+r4]
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    PAVGB        m0, m1
 | 
			
		||||
    PAVGB        m1, m2
 | 
			
		||||
    mova         m3, [r0+r2]
 | 
			
		||||
    mova         m4, [r0+r4]
 | 
			
		||||
    PAVGB        m0, m3
 | 
			
		||||
    PAVGB        m1, m4
 | 
			
		||||
    mova    [r0+r2], m0
 | 
			
		||||
    mova    [r0+r4], m1
 | 
			
		||||
    mova         m1, [r1+r2]
 | 
			
		||||
    mova         m0, [r1+r4]
 | 
			
		||||
    PAVGB        m2, m1
 | 
			
		||||
    PAVGB        m1, m0
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    mova         m3, [r0+r2]
 | 
			
		||||
    mova         m4, [r0+r4]
 | 
			
		||||
    PAVGB        m2, m3
 | 
			
		||||
    PAVGB        m1, m4
 | 
			
		||||
    mova    [r0+r2], m2
 | 
			
		||||
    mova    [r0+r4], m1
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    sub         r3d, 4
 | 
			
		||||
    jne .loop
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX mmxext
 | 
			
		||||
AVG_PIXELS8_Y2
 | 
			
		||||
INIT_MMX 3dnow
 | 
			
		||||
AVG_PIXELS8_Y2
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; avg_pixels8_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 | 
			
		||||
%macro AVG_PIXELS8_XY2 0
 | 
			
		||||
cglobal avg_pixels8_xy2, 4,5
 | 
			
		||||
    mova         m6, [pb_1]
 | 
			
		||||
    movsxdifnidn r2, r2d
 | 
			
		||||
    lea          r4, [r2*2]
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    pavgb        m0, [r1+1]
 | 
			
		||||
.loop:
 | 
			
		||||
    mova         m2, [r1+r4]
 | 
			
		||||
    mova         m1, [r1+r2]
 | 
			
		||||
    psubusb      m2, m6
 | 
			
		||||
    pavgb        m1, [r1+r2+1]
 | 
			
		||||
    pavgb        m2, [r1+r4+1]
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    pavgb        m0, m1
 | 
			
		||||
    pavgb        m1, m2
 | 
			
		||||
    pavgb        m0, [r0]
 | 
			
		||||
    pavgb        m1, [r0+r2]
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    mova    [r0+r2], m1
 | 
			
		||||
    mova         m1, [r1+r2]
 | 
			
		||||
    mova         m0, [r1+r4]
 | 
			
		||||
    pavgb        m1, [r1+r2+1]
 | 
			
		||||
    pavgb        m0, [r1+r4+1]
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    pavgb        m2, m1
 | 
			
		||||
    pavgb        m1, m0
 | 
			
		||||
    pavgb        m2, [r0]
 | 
			
		||||
    pavgb        m1, [r0+r2]
 | 
			
		||||
    mova       [r0], m2
 | 
			
		||||
    mova    [r0+r2], m2
 | 
			
		||||
    add          r0, r4
 | 
			
		||||
    sub         r3d, 4
 | 
			
		||||
    jne .loop
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX mmxext
 | 
			
		||||
AVG_PIXELS8_XY2
 | 
			
		||||
INIT_MMX 3dnow
 | 
			
		||||
AVG_PIXELS8_XY2
 | 
			
		||||
							
								
								
									
										558
									
								
								libavcodec/x86/mpeg4qpel.asm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										558
									
								
								libavcodec/x86/mpeg4qpel.asm
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,558 @@
 | 
			
		||||
;******************************************************************************
 | 
			
		||||
;* mpeg4 qpel
 | 
			
		||||
;* Copyright (c) 2008 Loren Merritt
 | 
			
		||||
;*
 | 
			
		||||
;* This file is part of Libav.
 | 
			
		||||
;*
 | 
			
		||||
;* Libav is free software; you can redistribute it and/or
 | 
			
		||||
;* modify it under the terms of the GNU Lesser General Public
 | 
			
		||||
;* License as published by the Free Software Foundation; either
 | 
			
		||||
;* version 2.1 of the License, or (at your option) any later version.
 | 
			
		||||
;*
 | 
			
		||||
;* Libav is distributed in the hope that it will be useful,
 | 
			
		||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
			
		||||
;* Lesser General Public License for more details.
 | 
			
		||||
;*
 | 
			
		||||
;* You should have received a copy of the GNU Lesser General Public
 | 
			
		||||
;* License along with Libav; if not, write to the Free Software
 | 
			
		||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
			
		||||
;******************************************************************************
 | 
			
		||||
 | 
			
		||||
%include "libavutil/x86/x86util.asm"
 | 
			
		||||
 | 
			
		||||
SECTION_RODATA
 | 
			
		||||
cextern pb_1
 | 
			
		||||
cextern pw_3
 | 
			
		||||
cextern pw_15
 | 
			
		||||
cextern pw_16
 | 
			
		||||
cextern pw_20
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
SECTION_TEXT
 | 
			
		||||
 | 
			
		||||
; put_no_rnd_pixels8_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 | 
			
		||||
%macro PUT_NO_RND_PIXELS8_L2 0
 | 
			
		||||
cglobal put_no_rnd_pixels8_l2, 6,6
 | 
			
		||||
    movsxdifnidn r4, r4d
 | 
			
		||||
    movsxdifnidn r3, r3d
 | 
			
		||||
    pcmpeqb      m6, m6
 | 
			
		||||
    test        r5d, 1
 | 
			
		||||
    je .loop
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m1, [r2]
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    add          r2, 8
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    PAVGB        m0, m1
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    add          r0, r3
 | 
			
		||||
    dec r5d
 | 
			
		||||
.loop:
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    mova         m1, [r1]
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    mova         m2, [r2]
 | 
			
		||||
    mova         m3, [r2+8]
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    pxor         m2, m6
 | 
			
		||||
    pxor         m3, m6
 | 
			
		||||
    PAVGB        m0, m2
 | 
			
		||||
    PAVGB        m1, m3
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    add          r0, r3
 | 
			
		||||
    mova       [r0], m1
 | 
			
		||||
    add          r0, r3
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    mova         m1, [r1]
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    mova         m2, [r2+16]
 | 
			
		||||
    mova         m3, [r2+24]
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    pxor         m2, m6
 | 
			
		||||
    pxor         m3, m6
 | 
			
		||||
    PAVGB        m0, m2
 | 
			
		||||
    PAVGB        m1, m3
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    add          r0, r3
 | 
			
		||||
    mova       [r0], m1
 | 
			
		||||
    add          r0, r3
 | 
			
		||||
    add          r2, 32
 | 
			
		||||
    sub         r5d, 4
 | 
			
		||||
    jne .loop
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX mmxext
 | 
			
		||||
PUT_NO_RND_PIXELS8_L2
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; put_no_rnd_pixels16_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 | 
			
		||||
%macro PUT_NO_RND_PIXELS16_l2 0
 | 
			
		||||
cglobal put_no_rnd_pixels16_l2, 5,5
 | 
			
		||||
    movsxdifnidn r3, r3
 | 
			
		||||
    movsxdifnidn r4, r4d
 | 
			
		||||
    pcmpeqb      m6, m6
 | 
			
		||||
    test        r5d, 1
 | 
			
		||||
    je .loop
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m1, [r1+8]
 | 
			
		||||
    mova         m2, [r2]
 | 
			
		||||
    mova         m3, [r2+8]
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    pxor         m2, m6
 | 
			
		||||
    pxor         m3, m6
 | 
			
		||||
    PAVGB        m0, m2
 | 
			
		||||
    PAVGB        m1, m3
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    add          r2, 16
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    mova     [r0+8], m1
 | 
			
		||||
    add          r0, r3
 | 
			
		||||
    dec r5d
 | 
			
		||||
.loop:
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m1, [r1+8]
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    mova         m2, [r2]
 | 
			
		||||
    mova         m3, [r2+8]
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    pxor         m2, m6
 | 
			
		||||
    pxor         m3, m6
 | 
			
		||||
    PAVGB        m0, m2
 | 
			
		||||
    PAVGB        m1, m3
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    mova     [r0+8], m1
 | 
			
		||||
    add          r0, r3
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m1, [r1+8]
 | 
			
		||||
    add          r1, r4
 | 
			
		||||
    mova         m2, [r2+16]
 | 
			
		||||
    mova         m3, [r2+24]
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    pxor         m2, m6
 | 
			
		||||
    pxor         m3, m6
 | 
			
		||||
    PAVGB        m0, m2
 | 
			
		||||
    PAVGB        m1, m3
 | 
			
		||||
    pxor         m0, m6
 | 
			
		||||
    pxor         m1, m6
 | 
			
		||||
    mova       [r0], m0
 | 
			
		||||
    mova     [r0+8], m1
 | 
			
		||||
    add          r0, r3
 | 
			
		||||
    add          r2, 32
 | 
			
		||||
    sub         r5d, 2
 | 
			
		||||
    jne .loop
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX mmxext
 | 
			
		||||
PUT_NO_RND_PIXELS16_l2
 | 
			
		||||
INIT_MMX 3dnow
 | 
			
		||||
PUT_NO_RND_PIXELS16_l2
 | 
			
		||||
 | 
			
		||||
%macro MPEG4_QPEL16_H_LOWPASS 1
 | 
			
		||||
cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 8
 | 
			
		||||
    movsxdifnidn r2, r2d
 | 
			
		||||
    movsxdifnidn r3, r3d
 | 
			
		||||
    pxor         m7, m7
 | 
			
		||||
.loop:
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m1, m0
 | 
			
		||||
    mova         m2, m0
 | 
			
		||||
    punpcklbw    m0, m7
 | 
			
		||||
    punpckhbw    m1, m7
 | 
			
		||||
    pshufw       m5, m0, 0x90
 | 
			
		||||
    pshufw       m6, m0, 0x41
 | 
			
		||||
    mova         m3, m2
 | 
			
		||||
    mova         m4, m2
 | 
			
		||||
    psllq        m2, 8
 | 
			
		||||
    psllq        m3, 16
 | 
			
		||||
    psllq        m4, 24
 | 
			
		||||
    punpckhbw    m2, m7
 | 
			
		||||
    punpckhbw    m3, m7
 | 
			
		||||
    punpckhbw    m4, m7
 | 
			
		||||
    paddw        m5, m3
 | 
			
		||||
    paddw        m6, m2
 | 
			
		||||
    paddw        m5, m5
 | 
			
		||||
    psubw        m6, m5
 | 
			
		||||
    pshufw       m5, m0, 6
 | 
			
		||||
    pmullw       m6, [pw_3]
 | 
			
		||||
    paddw        m0, m4
 | 
			
		||||
    paddw        m5, m1
 | 
			
		||||
    pmullw       m0, [pw_20]
 | 
			
		||||
    psubw        m0, m5
 | 
			
		||||
    paddw        m6, [PW_ROUND]
 | 
			
		||||
    paddw        m0, m6
 | 
			
		||||
    psraw        m0, 5
 | 
			
		||||
    mova    [rsp-8], m0
 | 
			
		||||
    mova         m0, [r1+5]
 | 
			
		||||
    mova         m5, m0
 | 
			
		||||
    mova         m6, m0
 | 
			
		||||
    psrlq        m0, 8
 | 
			
		||||
    psrlq        m5, 16
 | 
			
		||||
    punpcklbw    m0, m7
 | 
			
		||||
    punpcklbw    m5, m7
 | 
			
		||||
    paddw        m2, m0
 | 
			
		||||
    paddw        m3, m5
 | 
			
		||||
    paddw        m2, m2
 | 
			
		||||
    psubw        m3, m2
 | 
			
		||||
    mova         m2, m6
 | 
			
		||||
    psrlq        m6, 24
 | 
			
		||||
    punpcklbw    m2, m7
 | 
			
		||||
    punpcklbw    m6, m7
 | 
			
		||||
    pmullw       m3, [pw_3]
 | 
			
		||||
    paddw        m1, m2
 | 
			
		||||
    paddw        m4, m6
 | 
			
		||||
    pmullw       m1, [pw_20]
 | 
			
		||||
    psubw        m3, m4
 | 
			
		||||
    paddw        m1, [PW_ROUND]
 | 
			
		||||
    paddw        m3, m1
 | 
			
		||||
    psraw        m3, 5
 | 
			
		||||
    mova         m1, [rsp-8]
 | 
			
		||||
    packuswb     m1, m3
 | 
			
		||||
    OP_MOV     [r0], m1, m4
 | 
			
		||||
    mova         m1, [r1+9]
 | 
			
		||||
    mova         m4, m1
 | 
			
		||||
    mova         m3, m1
 | 
			
		||||
    psrlq        m1, 8
 | 
			
		||||
    psrlq        m4, 16
 | 
			
		||||
    punpcklbw    m1, m7
 | 
			
		||||
    punpcklbw    m4, m7
 | 
			
		||||
    paddw        m5, m1
 | 
			
		||||
    paddw        m0, m4
 | 
			
		||||
    paddw        m5, m5
 | 
			
		||||
    psubw        m0, m5
 | 
			
		||||
    mova         m5, m3
 | 
			
		||||
    psrlq        m3, 24
 | 
			
		||||
    pmullw       m0, [pw_3]
 | 
			
		||||
    punpcklbw    m3, m7
 | 
			
		||||
    paddw        m2, m3
 | 
			
		||||
    psubw        m0, m2
 | 
			
		||||
    mova         m2, m5
 | 
			
		||||
    punpcklbw    m2, m7
 | 
			
		||||
    punpckhbw    m5, m7
 | 
			
		||||
    paddw        m6, m2
 | 
			
		||||
    pmullw       m6, [pw_20]
 | 
			
		||||
    paddw        m0, [PW_ROUND]
 | 
			
		||||
    paddw        m0, m6
 | 
			
		||||
    psraw        m0, 5
 | 
			
		||||
    paddw        m3, m5
 | 
			
		||||
    pshufw       m6, m5, 0xf9
 | 
			
		||||
    paddw        m6, m4
 | 
			
		||||
    pshufw       m4, m5, 0xbe
 | 
			
		||||
    pshufw       m5, m5, 0x6f
 | 
			
		||||
    paddw        m4, m1
 | 
			
		||||
    paddw        m5, m2
 | 
			
		||||
    paddw        m6, m6
 | 
			
		||||
    psubw        m4, m6
 | 
			
		||||
    pmullw       m3, [pw_20]
 | 
			
		||||
    pmullw       m4, [pw_3]
 | 
			
		||||
    psubw        m3, m5
 | 
			
		||||
    paddw        m4, [PW_ROUND]
 | 
			
		||||
    paddw        m4, m3
 | 
			
		||||
    psraw        m4, 5
 | 
			
		||||
    packuswb     m0, m4
 | 
			
		||||
    OP_MOV   [r0+8], m0, m4
 | 
			
		||||
    add          r1, r3
 | 
			
		||||
    add          r0, r2
 | 
			
		||||
    dec r4d
 | 
			
		||||
    jne .loop
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
%macro PUT_OP 2-3
 | 
			
		||||
    mova %1, %2
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
%macro AVG_OP 2-3
 | 
			
		||||
    mova  %3, %1
 | 
			
		||||
    pavgb %2, %3
 | 
			
		||||
    mova  %1, %2
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX mmxext
 | 
			
		||||
%define PW_ROUND pw_16
 | 
			
		||||
%define OP_MOV PUT_OP
 | 
			
		||||
MPEG4_QPEL16_H_LOWPASS put
 | 
			
		||||
%define PW_ROUND pw_16
 | 
			
		||||
%define OP_MOV AVG_OP
 | 
			
		||||
MPEG4_QPEL16_H_LOWPASS avg
 | 
			
		||||
%define PW_ROUND pw_15
 | 
			
		||||
%define OP_MOV PUT_OP
 | 
			
		||||
MPEG4_QPEL16_H_LOWPASS put_no_rnd
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
%macro MPEG4_QPEL8_H_LOWPASS 1
 | 
			
		||||
cglobal %1_mpeg4_qpel8_h_lowpass, 5, 5, 0, 8
 | 
			
		||||
    movsxdifnidn r2, r2d
 | 
			
		||||
    movsxdifnidn r3, r3d
 | 
			
		||||
    pxor         m7, m7
 | 
			
		||||
.loop:
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m1, m0
 | 
			
		||||
    mova         m2, m0
 | 
			
		||||
    punpcklbw    m0, m7
 | 
			
		||||
    punpckhbw    m1, m7
 | 
			
		||||
    pshufw       m5, m0, 0x90
 | 
			
		||||
    pshufw       m6, m0, 0x41
 | 
			
		||||
    mova         m3, m2
 | 
			
		||||
    mova         m4, m2
 | 
			
		||||
    psllq        m2, 8
 | 
			
		||||
    psllq        m3, 16
 | 
			
		||||
    psllq        m4, 24
 | 
			
		||||
    punpckhbw    m2, m7
 | 
			
		||||
    punpckhbw    m3, m7
 | 
			
		||||
    punpckhbw    m4, m7
 | 
			
		||||
    paddw        m5, m3
 | 
			
		||||
    paddw        m6, m2
 | 
			
		||||
    paddw        m5, m5
 | 
			
		||||
    psubw        m6, m5
 | 
			
		||||
    pshufw       m5, m0, 0x6
 | 
			
		||||
    pmullw       m6, [pw_3]
 | 
			
		||||
    paddw        m0, m4
 | 
			
		||||
    paddw        m5, m1
 | 
			
		||||
    pmullw       m0, [pw_20]
 | 
			
		||||
    psubw        m0, m5
 | 
			
		||||
    paddw        m6, [PW_ROUND]
 | 
			
		||||
    paddw        m0, m6
 | 
			
		||||
    psraw        m0, 5
 | 
			
		||||
    movh         m5, [r1+5]
 | 
			
		||||
    punpcklbw    m5, m7
 | 
			
		||||
    pshufw       m6, m5, 0xf9
 | 
			
		||||
    paddw        m1, m5
 | 
			
		||||
    paddw        m2, m6
 | 
			
		||||
    pshufw       m6, m5, 0xbe
 | 
			
		||||
    pshufw       m5, m5, 0x6f
 | 
			
		||||
    paddw        m3, m6
 | 
			
		||||
    paddw        m4, m5
 | 
			
		||||
    paddw        m2, m2
 | 
			
		||||
    psubw        m3, m2
 | 
			
		||||
    pmullw       m1, [pw_20]
 | 
			
		||||
    pmullw       m3, [pw_3]
 | 
			
		||||
    psubw        m3, m4
 | 
			
		||||
    paddw        m1, [PW_ROUND]
 | 
			
		||||
    paddw        m3, m1
 | 
			
		||||
    psraw        m3, 5
 | 
			
		||||
    packuswb     m0, m3
 | 
			
		||||
    OP_MOV     [r0], m0, m4
 | 
			
		||||
    add          r1, r3
 | 
			
		||||
    add          r0, r2
 | 
			
		||||
    dec r4d
 | 
			
		||||
    jne .loop
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX mmxext
 | 
			
		||||
%define PW_ROUND pw_16
 | 
			
		||||
%define OP_MOV PUT_OP
 | 
			
		||||
MPEG4_QPEL8_H_LOWPASS put
 | 
			
		||||
%define PW_ROUND pw_16
 | 
			
		||||
%define OP_MOV AVG_OP
 | 
			
		||||
MPEG4_QPEL8_H_LOWPASS avg
 | 
			
		||||
%define PW_ROUND pw_15
 | 
			
		||||
%define OP_MOV PUT_OP
 | 
			
		||||
MPEG4_QPEL8_H_LOWPASS put_no_rnd
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
%macro QPEL_V_LOW 5
 | 
			
		||||
    paddw      m0, m1
 | 
			
		||||
    mova       m4, [pw_20]
 | 
			
		||||
    pmullw     m4, m0
 | 
			
		||||
    mova       m0, %4
 | 
			
		||||
    mova       m5, %1
 | 
			
		||||
    paddw      m5, m0
 | 
			
		||||
    psubw      m4, m5
 | 
			
		||||
    mova       m5, %2
 | 
			
		||||
    mova       m6, %3
 | 
			
		||||
    paddw      m5, m3
 | 
			
		||||
    paddw      m6, m2
 | 
			
		||||
    paddw      m6, m6
 | 
			
		||||
    psubw      m5, m6
 | 
			
		||||
    pmullw     m5, [pw_3]
 | 
			
		||||
    paddw      m4, [PW_ROUND]
 | 
			
		||||
    paddw      m5, m4
 | 
			
		||||
    psraw      m5, 5
 | 
			
		||||
    packuswb   m5, m5
 | 
			
		||||
    OP_MOV     %5, m5, m7
 | 
			
		||||
    SWAP 0,1,2,3
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
%macro MPEG4_QPEL16_V_LOWPASS 1
 | 
			
		||||
cglobal %1_mpeg4_qpel16_v_lowpass, 4, 6, 0, 544
 | 
			
		||||
    movsxdifnidn r2, r2d
 | 
			
		||||
    movsxdifnidn r3, r3d
 | 
			
		||||
 | 
			
		||||
    mov         r4d, 17
 | 
			
		||||
    mov          r5, rsp
 | 
			
		||||
    pxor         m7, m7
 | 
			
		||||
.looph:
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m1, [r1]
 | 
			
		||||
    mova         m2, [r1+8]
 | 
			
		||||
    mova         m3, [r1+8]
 | 
			
		||||
    punpcklbw    m0, m7
 | 
			
		||||
    punpckhbw    m1, m7
 | 
			
		||||
    punpcklbw    m2, m7
 | 
			
		||||
    punpckhbw    m3, m7
 | 
			
		||||
    mova       [r5], m0
 | 
			
		||||
    mova  [r5+0x88], m1
 | 
			
		||||
    mova [r5+0x110], m2
 | 
			
		||||
    mova [r5+0x198], m3
 | 
			
		||||
    add          r5, 8
 | 
			
		||||
    add          r1, r3
 | 
			
		||||
    dec r4d
 | 
			
		||||
    jne .looph
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    ; NOTE: r1 CHANGES VALUES: r1 -> 4 - 14*dstStride
 | 
			
		||||
    mov         r4d, 4
 | 
			
		||||
    mov          r1, 4
 | 
			
		||||
    neg          r2
 | 
			
		||||
    lea          r1, [r1+r2*8]
 | 
			
		||||
    lea          r1, [r1+r2*4]
 | 
			
		||||
    lea          r1, [r1+r2*2]
 | 
			
		||||
    neg          r2
 | 
			
		||||
    mov          r5, rsp
 | 
			
		||||
.loopv:
 | 
			
		||||
    pxor         m7, m7
 | 
			
		||||
    mova         m0, [r5+ 0x0]
 | 
			
		||||
    mova         m1, [r5+ 0x8]
 | 
			
		||||
    mova         m2, [r5+0x10]
 | 
			
		||||
    mova         m3, [r5+0x18]
 | 
			
		||||
    QPEL_V_LOW [r5+0x10], [r5+ 0x8], [r5+ 0x0], [r5+0x20], [r0]
 | 
			
		||||
    QPEL_V_LOW [r5+ 0x8], [r5+ 0x0], [r5+ 0x0], [r5+0x28], [r0+r2]
 | 
			
		||||
    lea    r0, [r0+r2*2]
 | 
			
		||||
    QPEL_V_LOW [r5+ 0x0], [r5+ 0x0], [r5+ 0x8], [r5+0x30], [r0]
 | 
			
		||||
    QPEL_V_LOW [r5+ 0x0], [r5+ 0x8], [r5+0x10], [r5+0x38], [r0+r2]
 | 
			
		||||
    lea    r0, [r0+r2*2]
 | 
			
		||||
    QPEL_V_LOW [r5+ 0x8], [r5+0x10], [r5+0x18], [r5+0x40], [r0]
 | 
			
		||||
    QPEL_V_LOW [r5+0x10], [r5+0x18], [r5+0x20], [r5+0x48], [r0+r2]
 | 
			
		||||
    lea    r0, [r0+r2*2]
 | 
			
		||||
    QPEL_V_LOW [r5+0x18], [r5+0x20], [r5+0x28], [r5+0x50], [r0]
 | 
			
		||||
    QPEL_V_LOW [r5+0x20], [r5+0x28], [r5+0x30], [r5+0x58], [r0+r2]
 | 
			
		||||
    lea    r0, [r0+r2*2]
 | 
			
		||||
    QPEL_V_LOW [r5+0x28], [r5+0x30], [r5+0x38], [r5+0x60], [r0]
 | 
			
		||||
    QPEL_V_LOW [r5+0x30], [r5+0x38], [r5+0x40], [r5+0x68], [r0+r2]
 | 
			
		||||
    lea    r0, [r0+r2*2]
 | 
			
		||||
    QPEL_V_LOW [r5+0x38], [r5+0x40], [r5+0x48], [r5+0x70], [r0]
 | 
			
		||||
    QPEL_V_LOW [r5+0x40], [r5+0x48], [r5+0x50], [r5+0x78], [r0+r2]
 | 
			
		||||
    lea    r0, [r0+r2*2]
 | 
			
		||||
    QPEL_V_LOW [r5+0x48], [r5+0x50], [r5+0x58], [r5+0x80], [r0]
 | 
			
		||||
    QPEL_V_LOW [r5+0x50], [r5+0x58], [r5+0x60], [r5+0x80], [r0+r2]
 | 
			
		||||
    lea    r0, [r0+r2*2]
 | 
			
		||||
    QPEL_V_LOW [r5+0x58], [r5+0x60], [r5+0x68], [r5+0x78], [r0]
 | 
			
		||||
    QPEL_V_LOW [r5+0x60], [r5+0x68], [r5+0x70], [r5+0x70], [r0+r2]
 | 
			
		||||
 | 
			
		||||
    add    r5, 0x88
 | 
			
		||||
    add    r0, r1
 | 
			
		||||
    dec r4d
 | 
			
		||||
    jne .loopv
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
%macro PUT_OPH 2-3
 | 
			
		||||
    movh %1, %2
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
%macro AVG_OPH 2-3
 | 
			
		||||
    movh  %3, %1
 | 
			
		||||
    pavgb %2, %3
 | 
			
		||||
    movh  %1, %2
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX mmxext
 | 
			
		||||
%define PW_ROUND pw_16
 | 
			
		||||
%define OP_MOV PUT_OPH
 | 
			
		||||
MPEG4_QPEL16_V_LOWPASS put
 | 
			
		||||
%define PW_ROUND pw_16
 | 
			
		||||
%define OP_MOV AVG_OPH
 | 
			
		||||
MPEG4_QPEL16_V_LOWPASS avg
 | 
			
		||||
%define PW_ROUND pw_15
 | 
			
		||||
%define OP_MOV PUT_OPH
 | 
			
		||||
MPEG4_QPEL16_V_LOWPASS put_no_rnd
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
%macro MPEG4_QPEL8_V_LOWPASS 1
 | 
			
		||||
cglobal %1_mpeg4_qpel8_v_lowpass, 4, 6, 0, 288
 | 
			
		||||
    movsxdifnidn r2, r2d
 | 
			
		||||
    movsxdifnidn r3, r3d
 | 
			
		||||
 | 
			
		||||
    mov         r4d, 9
 | 
			
		||||
    mov          r5, rsp
 | 
			
		||||
    pxor         m7, m7
 | 
			
		||||
.looph:
 | 
			
		||||
    mova         m0, [r1]
 | 
			
		||||
    mova         m1, [r1]
 | 
			
		||||
    punpcklbw    m0, m7
 | 
			
		||||
    punpckhbw    m1, m7
 | 
			
		||||
    mova       [r5], m0
 | 
			
		||||
    mova  [r5+0x48], m1
 | 
			
		||||
    add          r5, 8
 | 
			
		||||
    add          r1, r3
 | 
			
		||||
    dec r4d
 | 
			
		||||
    jne .looph
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    ; NOTE: r1 CHANGES VALUES: r1 -> 4 - 6*dstStride
 | 
			
		||||
    mov         r4d, 2
 | 
			
		||||
    mov          r1, 4
 | 
			
		||||
    neg          r2
 | 
			
		||||
    lea          r1, [r1+r2*4]
 | 
			
		||||
    lea          r1, [r1+r2*2]
 | 
			
		||||
    neg          r2
 | 
			
		||||
    mov          r5, rsp
 | 
			
		||||
.loopv:
 | 
			
		||||
    pxor         m7, m7
 | 
			
		||||
    mova         m0, [r5+ 0x0]
 | 
			
		||||
    mova         m1, [r5+ 0x8]
 | 
			
		||||
    mova         m2, [r5+0x10]
 | 
			
		||||
    mova         m3, [r5+0x18]
 | 
			
		||||
    QPEL_V_LOW [r5+0x10], [r5+ 0x8], [r5+ 0x0], [r5+0x20], [r0]
 | 
			
		||||
    QPEL_V_LOW [r5+ 0x8], [r5+ 0x0], [r5+ 0x0], [r5+0x28], [r0+r2]
 | 
			
		||||
    lea    r0, [r0+r2*2]
 | 
			
		||||
    QPEL_V_LOW [r5+ 0x0], [r5+ 0x0], [r5+ 0x8], [r5+0x30], [r0]
 | 
			
		||||
    QPEL_V_LOW [r5+ 0x0], [r5+ 0x8], [r5+0x10], [r5+0x38], [r0+r2]
 | 
			
		||||
    lea    r0, [r0+r2*2]
 | 
			
		||||
    QPEL_V_LOW [r5+ 0x8], [r5+0x10], [r5+0x18], [r5+0x40], [r0]
 | 
			
		||||
    QPEL_V_LOW [r5+0x10], [r5+0x18], [r5+0x20], [r5+0x40], [r0+r2]
 | 
			
		||||
    lea    r0, [r0+r2*2]
 | 
			
		||||
    QPEL_V_LOW [r5+0x18], [r5+0x20], [r5+0x28], [r5+0x38], [r0]
 | 
			
		||||
    QPEL_V_LOW [r5+0x20], [r5+0x28], [r5+0x30], [r5+0x30], [r0+r2]
 | 
			
		||||
 | 
			
		||||
    add    r5, 0x48
 | 
			
		||||
    add    r0, r1
 | 
			
		||||
    dec r4d
 | 
			
		||||
    jne .loopv
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_MMX mmxext
 | 
			
		||||
%define PW_ROUND pw_16
 | 
			
		||||
%define OP_MOV PUT_OPH
 | 
			
		||||
MPEG4_QPEL8_V_LOWPASS put
 | 
			
		||||
%define PW_ROUND pw_16
 | 
			
		||||
%define OP_MOV AVG_OPH
 | 
			
		||||
MPEG4_QPEL8_V_LOWPASS avg
 | 
			
		||||
%define PW_ROUND pw_15
 | 
			
		||||
%define OP_MOV PUT_OPH
 | 
			
		||||
MPEG4_QPEL8_V_LOWPASS put_no_rnd
 | 
			
		||||
@@ -697,7 +697,9 @@ static void vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, int linesize,
 | 
			
		||||
 | 
			
		||||
av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
 | 
			
		||||
{
 | 
			
		||||
#if HAVE_YASM
 | 
			
		||||
        dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx;
 | 
			
		||||
#endif /* HAVE_YASM */
 | 
			
		||||
        dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx;
 | 
			
		||||
        dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_mmx;
 | 
			
		||||
        dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_mmx;
 | 
			
		||||
@@ -720,7 +722,9 @@ av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
 | 
			
		||||
 | 
			
		||||
av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp)
 | 
			
		||||
{
 | 
			
		||||
#if HAVE_YASM
 | 
			
		||||
        dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmxext;
 | 
			
		||||
#endif /* HAVE_YASM */
 | 
			
		||||
        dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmxext;
 | 
			
		||||
        dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmxext;
 | 
			
		||||
        dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_mmxext;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user