Merge commit '932806232108872655556100011fe369125805d3'
* commit '932806232108872655556100011fe369125805d3': x86: dsputil: Move avg_pixels16_mmx() out of rnd_template.c x86: dsputil: Move avg_pixels8_mmx() out of rnd_template.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
35ef98013d
@ -1086,7 +1086,7 @@ void ff_put_cavs_qpel8_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
|||||||
|
|
||||||
void ff_avg_cavs_qpel8_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
void ff_avg_cavs_qpel8_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
||||||
{
|
{
|
||||||
avg_pixels8_mmx(dst, src, stride, 8);
|
ff_avg_pixels8_mmx(dst, src, stride, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ff_put_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
void ff_put_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
||||||
@ -1096,7 +1096,7 @@ void ff_put_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
|||||||
|
|
||||||
void ff_avg_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
void ff_avg_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
||||||
{
|
{
|
||||||
avg_pixels16_mmx(dst, src, stride, 16);
|
ff_avg_pixels16_mmx(dst, src, stride, 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* VC-1-specific */
|
/* VC-1-specific */
|
||||||
@ -1134,7 +1134,7 @@ void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[
|
|||||||
|
|
||||||
#if HAVE_MMX_INLINE
|
#if HAVE_MMX_INLINE
|
||||||
DIRAC_PIXOP(put, ff_put, mmx)
|
DIRAC_PIXOP(put, ff_put, mmx)
|
||||||
DIRAC_PIXOP(avg, avg, mmx)
|
DIRAC_PIXOP(avg, ff_avg, mmx)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
|
@ -156,6 +156,10 @@ void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_s
|
|||||||
void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
|
void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
|
||||||
|
|
||||||
|
|
||||||
|
void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
|
||||||
|
ptrdiff_t line_size, int h);
|
||||||
|
void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
|
||||||
|
ptrdiff_t line_size, int h);
|
||||||
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
|
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
|
||||||
ptrdiff_t line_size, int h);
|
ptrdiff_t line_size, int h);
|
||||||
void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
|
void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
|
||||||
|
@ -29,6 +29,51 @@
|
|||||||
|
|
||||||
#if HAVE_MMX_INLINE
|
#if HAVE_MMX_INLINE
|
||||||
|
|
||||||
|
// in case more speed is needed - unroling would certainly help
|
||||||
|
void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
|
||||||
|
ptrdiff_t line_size, int h)
|
||||||
|
{
|
||||||
|
MOVQ_BFE(mm6);
|
||||||
|
JUMPALIGN();
|
||||||
|
do {
|
||||||
|
__asm__ volatile(
|
||||||
|
"movq %0, %%mm0 \n\t"
|
||||||
|
"movq %1, %%mm1 \n\t"
|
||||||
|
PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||||
|
"movq %%mm2, %0 \n\t"
|
||||||
|
:"+m"(*block)
|
||||||
|
:"m"(*pixels)
|
||||||
|
:"memory");
|
||||||
|
pixels += line_size;
|
||||||
|
block += line_size;
|
||||||
|
}
|
||||||
|
while (--h);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
|
||||||
|
ptrdiff_t line_size, int h)
|
||||||
|
{
|
||||||
|
MOVQ_BFE(mm6);
|
||||||
|
JUMPALIGN();
|
||||||
|
do {
|
||||||
|
__asm__ volatile(
|
||||||
|
"movq %0, %%mm0 \n\t"
|
||||||
|
"movq %1, %%mm1 \n\t"
|
||||||
|
PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||||
|
"movq %%mm2, %0 \n\t"
|
||||||
|
"movq 8%0, %%mm0 \n\t"
|
||||||
|
"movq 8%1, %%mm1 \n\t"
|
||||||
|
PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||||
|
"movq %%mm2, 8%0 \n\t"
|
||||||
|
:"+m"(*block)
|
||||||
|
:"m"(*pixels)
|
||||||
|
:"memory");
|
||||||
|
pixels += line_size;
|
||||||
|
block += line_size;
|
||||||
|
}
|
||||||
|
while (--h);
|
||||||
|
}
|
||||||
|
|
||||||
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
|
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
|
||||||
ptrdiff_t line_size, int h)
|
ptrdiff_t line_size, int h)
|
||||||
{
|
{
|
||||||
|
@ -74,8 +74,11 @@ void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
|
|||||||
void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
|
void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
|
||||||
ptrdiff_t line_size, int h);
|
ptrdiff_t line_size, int h);
|
||||||
|
|
||||||
|
#define avg_pixels8_mmx ff_avg_pixels8_mmx
|
||||||
|
#define avg_pixels16_mmx ff_avg_pixels16_mmx
|
||||||
#define put_pixels8_mmx ff_put_pixels8_mmx
|
#define put_pixels8_mmx ff_put_pixels8_mmx
|
||||||
#define put_pixels16_mmx ff_put_pixels16_mmx
|
#define put_pixels16_mmx ff_put_pixels16_mmx
|
||||||
|
#define avg_no_rnd_pixels16_mmx ff_avg_pixels16_mmx
|
||||||
#define put_no_rnd_pixels8_mmx ff_put_pixels8_mmx
|
#define put_no_rnd_pixels8_mmx ff_put_pixels8_mmx
|
||||||
#define put_no_rnd_pixels16_mmx ff_put_pixels16_mmx
|
#define put_no_rnd_pixels16_mmx ff_put_pixels16_mmx
|
||||||
|
|
||||||
|
@ -92,51 +92,6 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff
|
|||||||
}
|
}
|
||||||
|
|
||||||
// avg_pixels
|
// avg_pixels
|
||||||
#ifndef NO_RND
|
|
||||||
// in case more speed is needed - unroling would certainly help
|
|
||||||
static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
|
||||||
{
|
|
||||||
MOVQ_BFE(mm6);
|
|
||||||
JUMPALIGN();
|
|
||||||
do {
|
|
||||||
__asm__ volatile(
|
|
||||||
"movq %0, %%mm0 \n\t"
|
|
||||||
"movq %1, %%mm1 \n\t"
|
|
||||||
OP_AVG(%%mm0, %%mm1, %%mm2, %%mm6)
|
|
||||||
"movq %%mm2, %0 \n\t"
|
|
||||||
:"+m"(*block)
|
|
||||||
:"m"(*pixels)
|
|
||||||
:"memory");
|
|
||||||
pixels += line_size;
|
|
||||||
block += line_size;
|
|
||||||
}
|
|
||||||
while (--h);
|
|
||||||
}
|
|
||||||
#endif /* NO_RND */
|
|
||||||
|
|
||||||
static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
|
||||||
{
|
|
||||||
MOVQ_BFE(mm6);
|
|
||||||
JUMPALIGN();
|
|
||||||
do {
|
|
||||||
__asm__ volatile(
|
|
||||||
"movq %0, %%mm0 \n\t"
|
|
||||||
"movq %1, %%mm1 \n\t"
|
|
||||||
OP_AVG(%%mm0, %%mm1, %%mm2, %%mm6)
|
|
||||||
"movq %%mm2, %0 \n\t"
|
|
||||||
"movq 8%0, %%mm0 \n\t"
|
|
||||||
"movq 8%1, %%mm1 \n\t"
|
|
||||||
OP_AVG(%%mm0, %%mm1, %%mm2, %%mm6)
|
|
||||||
"movq %%mm2, 8%0 \n\t"
|
|
||||||
:"+m"(*block)
|
|
||||||
:"m"(*pixels)
|
|
||||||
:"memory");
|
|
||||||
pixels += line_size;
|
|
||||||
block += line_size;
|
|
||||||
}
|
|
||||||
while (--h);
|
|
||||||
}
|
|
||||||
|
|
||||||
// this routine is 'slightly' suboptimal but mostly unused
|
// this routine is 'slightly' suboptimal but mostly unused
|
||||||
static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user