diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c index 40ee6c4746..5754c10871 100644 --- a/libswscale/swscale_template.c +++ b/libswscale/swscale_template.c @@ -385,60 +385,6 @@ "packuswb %%mm6, %%mm5 \n\t"\ "packuswb %%mm3, %%mm4 \n\t"\ "pxor %%mm7, %%mm7 \n\t" -#if 0 -#define FULL_YSCALEYUV2RGB \ - "pxor %%mm7, %%mm7 \n\t"\ - "movd %6, %%mm6 \n\t" /*yalpha1*/\ - "punpcklwd %%mm6, %%mm6 \n\t"\ - "punpcklwd %%mm6, %%mm6 \n\t"\ - "movd %7, %%mm5 \n\t" /*uvalpha1*/\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "xor %%"REG_a", %%"REG_a" \n\t"\ - ASMALIGN(4)\ - "1: \n\t"\ - "movq (%0, %%"REG_a",2), %%mm0 \n\t" /*buf0[eax]*/\ - "movq (%1, %%"REG_a",2), %%mm1 \n\t" /*buf1[eax]*/\ - "movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\ - "movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ - "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ - "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ - "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ - "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "movq "AV_STRINGIFY(VOF)"(%2, %%"REG_a",2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ - "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ - "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ - "movq "AV_STRINGIFY(VOF)"(%3, %%"REG_a",2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\ - "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ - "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ - "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ - "psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\ - "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ -\ -\ - "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ - "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ - "pmulhw "MANGLE(ubCoeff)", %%mm3 \n\t"\ - "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ - "pmulhw "MANGLE(ugCoeff)", %%mm2 \n\t"\ - "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ - "psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\ -\ -\ - "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\ - "pmulhw "MANGLE(vrCoeff)", %%mm0 \n\t"\ - "pmulhw "MANGLE(vgCoeff)", %%mm4 \n\t"\ - "paddw %%mm1, %%mm3 \n\t" /* B*/\ - "paddw %%mm1, %%mm0 \n\t" /* R*/\ - "packuswb %%mm3, %%mm3 \n\t"\ -\ - "packuswb %%mm0, %%mm0 \n\t"\ - "paddw %%mm4, %%mm2 \n\t"\ - "paddw %%mm2, %%mm1 \n\t" /* G*/\ -\ - "packuswb %%mm1, %%mm1 \n\t" -#endif #define REAL_YSCALEYUV2PACKED(index, c) \ "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ @@ -1213,221 +1159,6 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t * int uvalpha1=4095-uvalpha; int i; -#if 0 //isn't used - if (flags&SWS_FULL_CHR_H_INT) - { - switch(dstFormat) - { -#ifdef HAVE_MMX - case PIX_FMT_RGB32: - __asm__ volatile( - - -FULL_YSCALEYUV2RGB - "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG - "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 - - "movq %%mm3, %%mm1 \n\t" - "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 - "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 - - MOVNTQ(%%mm3, (%4, %%REGa, 4)) - MOVNTQ(%%mm1, 8(%4, %%REGa, 4)) - - "add $4, %%"REG_a" \n\t" - "cmp %5, %%"REG_a" \n\t" - " jb 1b \n\t" - - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" ((long)dstW), - "m" (yalpha1), "m" (uvalpha1) - : "%"REG_a - ); - break; - case PIX_FMT_BGR24: - __asm__ volatile( - -FULL_YSCALEYUV2RGB - - // lsb ... msb - "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG - "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 - - "movq %%mm3, %%mm1 \n\t" - "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 - "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 - - "movq %%mm3, %%mm2 \n\t" // BGR0BGR0 - "psrlq $8, %%mm3 \n\t" // GR0BGR00 - "pand "MANGLE(bm00000111)", %%mm2 \n\t" // BGR00000 - "pand "MANGLE(bm11111000)", %%mm3 \n\t" // 000BGR00 - "por %%mm2, %%mm3 \n\t" // BGRBGR00 - "movq %%mm1, %%mm2 \n\t" - "psllq $48, %%mm1 \n\t" // 000000BG - "por %%mm1, %%mm3 \n\t" // BGRBGRBG - - "movq %%mm2, %%mm1 \n\t" // BGR0BGR0 - "psrld $16, %%mm2 \n\t" // R000R000 - "psrlq $24, %%mm1 \n\t" // 0BGR0000 - "por %%mm2, %%mm1 \n\t" // RBGRR000 - - "mov %4, %%"REG_b" \n\t" - "add %%"REG_a", %%"REG_b" \n\t" - -#ifdef HAVE_MMX2 - //FIXME Alignment - "movntq %%mm3, (%%"REG_b", %%"REG_a", 2) \n\t" - "movntq %%mm1, 8(%%"REG_b", %%"REG_a", 2) \n\t" -#else - "movd %%mm3, (%%"REG_b", %%"REG_a", 2) \n\t" - "psrlq $32, %%mm3 \n\t" - "movd %%mm3, 4(%%"REG_b", %%"REG_a", 2) \n\t" - "movd %%mm1, 8(%%"REG_b", %%"REG_a", 2) \n\t" -#endif - "add $4, %%"REG_a" \n\t" - "cmp %5, %%"REG_a" \n\t" - " jb 1b \n\t" - - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), - "m" (yalpha1), "m" (uvalpha1) - : "%"REG_a, "%"REG_b - ); - break; - case PIX_FMT_BGR555: - __asm__ volatile( - -FULL_YSCALEYUV2RGB -#ifdef DITHER1XBPP - "paddusb "MANGLE(g5Dither)", %%mm1 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm0 \n\t" - "paddusb "MANGLE(b5Dither)", %%mm3 \n\t" -#endif - "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G - "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B - "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R - - "psrlw $3, %%mm3 \n\t" - "psllw $2, %%mm1 \n\t" - "psllw $7, %%mm0 \n\t" - "pand "MANGLE(g15Mask)", %%mm1 \n\t" - "pand "MANGLE(r15Mask)", %%mm0 \n\t" - - "por %%mm3, %%mm1 \n\t" - "por %%mm1, %%mm0 \n\t" - - MOVNTQ(%%mm0, (%4, %%REGa, 2)) - - "add $4, %%"REG_a" \n\t" - "cmp %5, %%"REG_a" \n\t" - " jb 1b \n\t" - - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), - "m" (yalpha1), "m" (uvalpha1) - : "%"REG_a - ); - break; - case PIX_FMT_BGR565: - __asm__ volatile( - -FULL_YSCALEYUV2RGB -#ifdef DITHER1XBPP - "paddusb "MANGLE(g5Dither)", %%mm1 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm0 \n\t" - "paddusb "MANGLE(b5Dither)", %%mm3 \n\t" -#endif - "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G - "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B - "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R - - "psrlw $3, %%mm3 \n\t" - "psllw $3, %%mm1 \n\t" - "psllw $8, %%mm0 \n\t" - "pand "MANGLE(g16Mask)", %%mm1 \n\t" - "pand "MANGLE(r16Mask)", %%mm0 \n\t" - - "por %%mm3, %%mm1 \n\t" - "por %%mm1, %%mm0 \n\t" - - MOVNTQ(%%mm0, (%4, %%REGa, 2)) - - "add $4, %%"REG_a" \n\t" - "cmp %5, %%"REG_a" \n\t" - " jb 1b \n\t" - - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), - "m" (yalpha1), "m" (uvalpha1) - : "%"REG_a - ); - break; -#endif /* HAVE_MMX */ - case PIX_FMT_BGR32: -#ifndef HAVE_MMX - case PIX_FMT_RGB32: -#endif - if (dstFormat==PIX_FMT_RGB32) - { - int i; -#ifdef WORDS_BIGENDIAN - dest++; -#endif - for (i=0;i>19)]; - int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); - int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19); - dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; - dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; - dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; - dest+= 4; - } - } - else if (dstFormat==PIX_FMT_BGR24) - { - int i; - for (i=0;i>19)]; - int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); - int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19); - dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; - dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; - dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; - dest+= 3; - } - } - else if (dstFormat==PIX_FMT_BGR565) - { - int i; - for (i=0;i>19)]; - int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); - int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19); - - ((uint16_t*)dest)[i] = - clip_table16b[(Y + yuvtab_40cf[U]) >>13] | - clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | - clip_table16r[(Y + yuvtab_3343[V]) >>13]; - } - } - else if (dstFormat==PIX_FMT_BGR555) - { - int i; - for (i=0;i>19)]; - int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); - int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19); - - ((uint16_t*)dest)[i] = - clip_table15b[(Y + yuvtab_40cf[U]) >>13] | - clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | - clip_table15r[(Y + yuvtab_3343[V]) >>13]; - } - } - }//FULL_UV_IPOL - else - { -#endif // if 0 #ifdef HAVE_MMX if(!(c->flags & SWS_BITEXACT)){ switch(c->dstFormat)