From b32c9ca9a3265cc8566d183dad260a056eb68fae Mon Sep 17 00:00:00 2001 From: Ramiro Polla Date: Mon, 25 Oct 2010 18:02:02 +0000 Subject: [PATCH] h264dsp: merge some asm blocks Some code was initializing some xmm registers in one asm block and using them in the following block, assuming they wouldn't be changed in between blocks. Originally committed as revision 25568 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/x86/h264_qpel_mmx.c | 46 +++++++++++++++------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/libavcodec/x86/h264_qpel_mmx.c b/libavcodec/x86/h264_qpel_mmx.c index 6228922a38..3ad182091b 100644 --- a/libavcodec/x86/h264_qpel_mmx.c +++ b/libavcodec/x86/h264_qpel_mmx.c @@ -299,11 +299,8 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, int h=8;\ __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ - "movq %0, %%mm6 \n\t"\ - :: "m"(ff_pw_5)\ - );\ - do{\ - __asm__ volatile(\ + "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\ + "1: \n\t"\ "movq (%0), %%mm0 \n\t"\ "movq 1(%0), %%mm2 \n\t"\ "movq %%mm0, %%mm1 \n\t"\ @@ -336,7 +333,7 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, "punpcklbw %%mm7, %%mm5 \n\t"\ "paddw %%mm3, %%mm2 \n\t"\ "paddw %%mm5, %%mm4 \n\t"\ - "movq %5, %%mm5 \n\t"\ + "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\ "paddw %%mm5, %%mm2 \n\t"\ "paddw %%mm5, %%mm4 \n\t"\ "paddw %%mm2, %%mm0 \n\t"\ @@ -347,15 +344,15 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, "packuswb %%mm1, %%mm0 \n\t"\ PAVGB" %%mm4, %%mm0 \n\t"\ OP(%%mm0, (%1),%%mm5, q)\ - "add %4, %0 \n\t"\ - "add %4, %1 \n\t"\ - "add %3, %2 \n\t"\ - : "+a"(src), "+c"(dst), "+d"(src2)\ - : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\ - "m"(ff_pw_16)\ + "add %5, %0 \n\t"\ + "add %5, %1 \n\t"\ + "add %4, %2 \n\t"\ + "decl %3 \n\t"\ + "jg 1b \n\t"\ + : "+a"(src), "+c"(dst), "+d"(src2), "+g"(h)\ + : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\ : "memory"\ );\ - }while(--h);\ }\ \ static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ @@ -697,11 +694,8 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, int h=8;\ __asm__ volatile(\ "pxor %%xmm7, %%xmm7 \n\t"\ - "movdqa %0, %%xmm6 \n\t"\ - :: "m"(ff_pw_5)\ - );\ - do{\ - __asm__ volatile(\ + "movdqa "MANGLE(ff_pw_5)", %%xmm6\n\t"\ + "1: \n\t"\ "lddqu -2(%0), %%xmm1 \n\t"\ "movdqa %%xmm1, %%xmm0 \n\t"\ "punpckhbw %%xmm7, %%xmm1 \n\t"\ @@ -721,22 +715,22 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, "psllw $2, %%xmm2 \n\t"\ "movq (%2), %%xmm3 \n\t"\ "psubw %%xmm1, %%xmm2 \n\t"\ - "paddw %5, %%xmm0 \n\t"\ + "paddw "MANGLE(ff_pw_16)", %%xmm0\n\t"\ "pmullw %%xmm6, %%xmm2 \n\t"\ "paddw %%xmm0, %%xmm2 \n\t"\ "psraw $5, %%xmm2 \n\t"\ "packuswb %%xmm2, %%xmm2 \n\t"\ "pavgb %%xmm3, %%xmm2 \n\t"\ OP(%%xmm2, (%1), %%xmm4, q)\ - "add %4, %0 \n\t"\ - "add %4, %1 \n\t"\ - "add %3, %2 \n\t"\ - : "+a"(src), "+c"(dst), "+d"(src2)\ - : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\ - "m"(ff_pw_16)\ + "add %5, %0 \n\t"\ + "add %5, %1 \n\t"\ + "add %4, %2 \n\t"\ + "decl %3 \n\t"\ + "jg 1b \n\t"\ + : "+a"(src), "+c"(dst), "+d"(src2), "+g"(h)\ + : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\ : "memory"\ );\ - }while(--h);\ }\ QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ \