24bpp support (untested)
Originally committed as revision 2238 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
This commit is contained in:
parent
fffd2e0ac2
commit
d9cf0d3368
@ -31,6 +31,8 @@ static uint64_t ugCoeff= 0xE5E2E5E2E5E2E5E2LL;
|
|||||||
static uint64_t vgCoeff= 0xF36EF36EF36EF36ELL;
|
static uint64_t vgCoeff= 0xF36EF36EF36EF36ELL;
|
||||||
static uint64_t w80= 0x0080008000800080LL;
|
static uint64_t w80= 0x0080008000800080LL;
|
||||||
static uint64_t w10= 0x0010001000100010LL;
|
static uint64_t w10= 0x0010001000100010LL;
|
||||||
|
static uint64_t bm00000111=0x0000000000FFFFFFLL;
|
||||||
|
static uint64_t bm11111000=0xFFFFFFFFFF000000LL;
|
||||||
|
|
||||||
static uint64_t b16Dither= 0x0004000400040004LL;
|
static uint64_t b16Dither= 0x0004000400040004LL;
|
||||||
static uint64_t b16Dither1=0x0004000400040004LL;
|
static uint64_t b16Dither1=0x0004000400040004LL;
|
||||||
@ -412,7 +414,6 @@ s_xinc&= -2; //clear last bit or uv and y might be shifted relative to each othe
|
|||||||
);
|
);
|
||||||
|
|
||||||
#elif defined (ARCH_X86)
|
#elif defined (ARCH_X86)
|
||||||
//NO MMX just normal asm ... FIXME try/write funny MMX2 variant
|
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"xorl %%eax, %%eax \n\t" // i
|
"xorl %%eax, %%eax \n\t" // i
|
||||||
"xorl %%ebx, %%ebx \n\t" // xx
|
"xorl %%ebx, %%ebx \n\t" // xx
|
||||||
@ -555,6 +556,55 @@ YSCALEYUV2RGB
|
|||||||
: "%eax"
|
: "%eax"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
else if(dstbpp==24)
|
||||||
|
{
|
||||||
|
asm volatile(
|
||||||
|
|
||||||
|
YSCALEYUV2RGB
|
||||||
|
|
||||||
|
// lsb ... msb
|
||||||
|
"punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
|
||||||
|
"punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
|
||||||
|
|
||||||
|
"movq %%mm3, %%mm1 \n\t"
|
||||||
|
"punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
|
||||||
|
"punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
|
||||||
|
|
||||||
|
"movq %%mm3, %%mm2 \n\t" // BGR0BGR0
|
||||||
|
"psrlq $8, %%mm3 \n\t" // GR0BGR00
|
||||||
|
"pand bm00000111, %%mm2 \n\t" // BGR00000
|
||||||
|
"pand bm11111000, %%mm3 \n\t" // 000BGR00
|
||||||
|
"por %%mm2, %%mm3 \n\t" // BGRBGR00
|
||||||
|
"movq %%mm1, %%mm2 \n\t"
|
||||||
|
"psllq $48, %%mm1 \n\t" // 000000BG
|
||||||
|
"por %%mm1, %%mm3 \n\t" // BGRBGRBG
|
||||||
|
|
||||||
|
"movq %%mm2, %%mm1 \n\t" // BGR0BGR0
|
||||||
|
"psrld $16, %%mm2 \n\t" // R000R000
|
||||||
|
"psrlq $24, %%mm1 \n\t" // 0BGR0000
|
||||||
|
"por %%mm2, %%mm1 \n\t" // RBGRR000
|
||||||
|
|
||||||
|
"movl %4, %%ebx \n\t"
|
||||||
|
"addl %%eax, %%ebx \n\t"
|
||||||
|
#ifdef HAVE_MMX2
|
||||||
|
//FIXME Alignment
|
||||||
|
"movntq %%mm3, (%%ebx, %%eax, 2)\n\t"
|
||||||
|
"movntq %%mm1, 8(%%ebx, %%eax, 2)\n\t"
|
||||||
|
#else
|
||||||
|
"movd %%mm3, (%%ebx, %%eax, 2) \n\t"
|
||||||
|
"psrlq $32, %%mm3 \n\t"
|
||||||
|
"movd %%mm3, 4(%%ebx, %%eax, 2) \n\t"
|
||||||
|
"movd %%mm1, 8(%%ebx, %%eax, 2) \n\t"
|
||||||
|
#endif
|
||||||
|
"addl $4, %%eax \n\t"
|
||||||
|
"cmpl %5, %%eax \n\t"
|
||||||
|
" jb 1b \n\t"
|
||||||
|
|
||||||
|
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstw),
|
||||||
|
"m" (yalpha1), "m" (uvalpha1)
|
||||||
|
: "%eax", "%ebx"
|
||||||
|
);
|
||||||
|
}
|
||||||
else if(dstbpp==16)
|
else if(dstbpp==16)
|
||||||
{
|
{
|
||||||
asm volatile(
|
asm volatile(
|
||||||
@ -603,7 +653,7 @@ YSCALEYUV2RGB
|
|||||||
dest+=dstbpp>>3;
|
dest+=dstbpp>>3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(dstbpp==16) //16bit
|
else if(dstbpp==16)
|
||||||
{
|
{
|
||||||
for(i=0;i<dstw;i++){
|
for(i=0;i<dstw;i++){
|
||||||
// vertical linear interpolation && yuv2rgb in a single step:
|
// vertical linear interpolation && yuv2rgb in a single step:
|
||||||
|
@ -31,6 +31,8 @@ static uint64_t ugCoeff= 0xE5E2E5E2E5E2E5E2LL;
|
|||||||
static uint64_t vgCoeff= 0xF36EF36EF36EF36ELL;
|
static uint64_t vgCoeff= 0xF36EF36EF36EF36ELL;
|
||||||
static uint64_t w80= 0x0080008000800080LL;
|
static uint64_t w80= 0x0080008000800080LL;
|
||||||
static uint64_t w10= 0x0010001000100010LL;
|
static uint64_t w10= 0x0010001000100010LL;
|
||||||
|
static uint64_t bm00000111=0x0000000000FFFFFFLL;
|
||||||
|
static uint64_t bm11111000=0xFFFFFFFFFF000000LL;
|
||||||
|
|
||||||
static uint64_t b16Dither= 0x0004000400040004LL;
|
static uint64_t b16Dither= 0x0004000400040004LL;
|
||||||
static uint64_t b16Dither1=0x0004000400040004LL;
|
static uint64_t b16Dither1=0x0004000400040004LL;
|
||||||
@ -412,7 +414,6 @@ s_xinc&= -2; //clear last bit or uv and y might be shifted relative to each othe
|
|||||||
);
|
);
|
||||||
|
|
||||||
#elif defined (ARCH_X86)
|
#elif defined (ARCH_X86)
|
||||||
//NO MMX just normal asm ... FIXME try/write funny MMX2 variant
|
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"xorl %%eax, %%eax \n\t" // i
|
"xorl %%eax, %%eax \n\t" // i
|
||||||
"xorl %%ebx, %%ebx \n\t" // xx
|
"xorl %%ebx, %%ebx \n\t" // xx
|
||||||
@ -555,6 +556,55 @@ YSCALEYUV2RGB
|
|||||||
: "%eax"
|
: "%eax"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
else if(dstbpp==24)
|
||||||
|
{
|
||||||
|
asm volatile(
|
||||||
|
|
||||||
|
YSCALEYUV2RGB
|
||||||
|
|
||||||
|
// lsb ... msb
|
||||||
|
"punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
|
||||||
|
"punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
|
||||||
|
|
||||||
|
"movq %%mm3, %%mm1 \n\t"
|
||||||
|
"punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
|
||||||
|
"punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
|
||||||
|
|
||||||
|
"movq %%mm3, %%mm2 \n\t" // BGR0BGR0
|
||||||
|
"psrlq $8, %%mm3 \n\t" // GR0BGR00
|
||||||
|
"pand bm00000111, %%mm2 \n\t" // BGR00000
|
||||||
|
"pand bm11111000, %%mm3 \n\t" // 000BGR00
|
||||||
|
"por %%mm2, %%mm3 \n\t" // BGRBGR00
|
||||||
|
"movq %%mm1, %%mm2 \n\t"
|
||||||
|
"psllq $48, %%mm1 \n\t" // 000000BG
|
||||||
|
"por %%mm1, %%mm3 \n\t" // BGRBGRBG
|
||||||
|
|
||||||
|
"movq %%mm2, %%mm1 \n\t" // BGR0BGR0
|
||||||
|
"psrld $16, %%mm2 \n\t" // R000R000
|
||||||
|
"psrlq $24, %%mm1 \n\t" // 0BGR0000
|
||||||
|
"por %%mm2, %%mm1 \n\t" // RBGRR000
|
||||||
|
|
||||||
|
"movl %4, %%ebx \n\t"
|
||||||
|
"addl %%eax, %%ebx \n\t"
|
||||||
|
#ifdef HAVE_MMX2
|
||||||
|
//FIXME Alignment
|
||||||
|
"movntq %%mm3, (%%ebx, %%eax, 2)\n\t"
|
||||||
|
"movntq %%mm1, 8(%%ebx, %%eax, 2)\n\t"
|
||||||
|
#else
|
||||||
|
"movd %%mm3, (%%ebx, %%eax, 2) \n\t"
|
||||||
|
"psrlq $32, %%mm3 \n\t"
|
||||||
|
"movd %%mm3, 4(%%ebx, %%eax, 2) \n\t"
|
||||||
|
"movd %%mm1, 8(%%ebx, %%eax, 2) \n\t"
|
||||||
|
#endif
|
||||||
|
"addl $4, %%eax \n\t"
|
||||||
|
"cmpl %5, %%eax \n\t"
|
||||||
|
" jb 1b \n\t"
|
||||||
|
|
||||||
|
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstw),
|
||||||
|
"m" (yalpha1), "m" (uvalpha1)
|
||||||
|
: "%eax", "%ebx"
|
||||||
|
);
|
||||||
|
}
|
||||||
else if(dstbpp==16)
|
else if(dstbpp==16)
|
||||||
{
|
{
|
||||||
asm volatile(
|
asm volatile(
|
||||||
@ -603,7 +653,7 @@ YSCALEYUV2RGB
|
|||||||
dest+=dstbpp>>3;
|
dest+=dstbpp>>3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(dstbpp==16) //16bit
|
else if(dstbpp==16)
|
||||||
{
|
{
|
||||||
for(i=0;i<dstw;i++){
|
for(i=0;i<dstw;i++){
|
||||||
// vertical linear interpolation && yuv2rgb in a single step:
|
// vertical linear interpolation && yuv2rgb in a single step:
|
||||||
|
Loading…
Reference in New Issue
Block a user