dsputil_mmx: put optimized gmc code back and avoid a VLA without loosing features.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
@@ -1929,10 +1929,15 @@ static av_noinline void emulated_edge_mc_sse(uint8_t *buf, const uint8_t *src,
|
|||||||
|
|
||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
|
|
||||||
static void gmc_mmx(uint8_t *dst, uint8_t *src,
|
typedef void emulated_edge_mc_func(uint8_t *dst, const uint8_t *src,
|
||||||
int stride, int h, int ox, int oy,
|
int linesize, int block_w, int block_h,
|
||||||
int dxx, int dxy, int dyx, int dyy,
|
int src_x, int src_y, int w, int h);
|
||||||
int shift, int r, int width, int height)
|
|
||||||
|
static av_always_inline void gmc(uint8_t *dst, uint8_t *src,
|
||||||
|
int stride, int h, int ox, int oy,
|
||||||
|
int dxx, int dxy, int dyx, int dyy,
|
||||||
|
int shift, int r, int width, int height,
|
||||||
|
emulated_edge_mc_func *emu_edge_fn)
|
||||||
{
|
{
|
||||||
const int w = 8;
|
const int w = 8;
|
||||||
const int ix = ox >> (16 + shift);
|
const int ix = ox >> (16 + shift);
|
||||||
@@ -1947,6 +1952,9 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src,
|
|||||||
const uint16_t dxy4[4] = { dxys, dxys, dxys, dxys };
|
const uint16_t dxy4[4] = { dxys, dxys, dxys, dxys };
|
||||||
const uint16_t dyy4[4] = { dyys, dyys, dyys, dyys };
|
const uint16_t dyy4[4] = { dyys, dyys, dyys, dyys };
|
||||||
const uint64_t shift2 = 2 * shift;
|
const uint64_t shift2 = 2 * shift;
|
||||||
|
#define MAX_STRIDE 4096U
|
||||||
|
#define MAX_H 8U
|
||||||
|
uint8_t edge_buf[(MAX_H + 1) * MAX_STRIDE];
|
||||||
int x, y;
|
int x, y;
|
||||||
|
|
||||||
const int dxw = (dxx - (1 << (16 + shift))) * (w - 1);
|
const int dxw = (dxx - (1 << (16 + shift))) * (w - 1);
|
||||||
@@ -1957,9 +1965,8 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src,
|
|||||||
((ox ^ (ox + dxw)) | (ox ^ (ox + dxh)) | (ox ^ (ox + dxw + dxh)) |
|
((ox ^ (ox + dxw)) | (ox ^ (ox + dxh)) | (ox ^ (ox + dxw + dxh)) |
|
||||||
(oy ^ (oy + dyw)) | (oy ^ (oy + dyh)) | (oy ^ (oy + dyw + dyh))) >> (16 + shift)
|
(oy ^ (oy + dyw)) | (oy ^ (oy + dyh)) | (oy ^ (oy + dyw + dyh))) >> (16 + shift)
|
||||||
// uses more than 16 bits of subpel mv (only at huge resolution)
|
// uses more than 16 bits of subpel mv (only at huge resolution)
|
||||||
|| (dxx | dxy | dyx | dyy) & 15 ||
|
|| (dxx | dxy | dyx | dyy) & 15
|
||||||
(unsigned)ix >= width - w ||
|
|| h > MAX_H || stride > MAX_STRIDE) {
|
||||||
(unsigned)iy >= height - h) {
|
|
||||||
// FIXME could still use mmx for some of the rows
|
// FIXME could still use mmx for some of the rows
|
||||||
ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy,
|
ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy,
|
||||||
shift, r, width, height);
|
shift, r, width, height);
|
||||||
@@ -1967,6 +1974,11 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src,
|
|||||||
}
|
}
|
||||||
|
|
||||||
src += ix + iy * stride;
|
src += ix + iy * stride;
|
||||||
|
if ((unsigned)ix >= width - w ||
|
||||||
|
(unsigned)iy >= height - h) {
|
||||||
|
emu_edge_fn(edge_buf, src, stride, w + 1, h + 1, ix, iy, width, height);
|
||||||
|
src = edge_buf;
|
||||||
|
}
|
||||||
|
|
||||||
__asm__ volatile (
|
__asm__ volatile (
|
||||||
"movd %0, %%mm6 \n\t"
|
"movd %0, %%mm6 \n\t"
|
||||||
@@ -2045,6 +2057,36 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if HAVE_YASM
|
||||||
|
#if ARCH_X86_32
|
||||||
|
static void gmc_mmx(uint8_t *dst, uint8_t *src,
|
||||||
|
int stride, int h, int ox, int oy,
|
||||||
|
int dxx, int dxy, int dyx, int dyy,
|
||||||
|
int shift, int r, int width, int height)
|
||||||
|
{
|
||||||
|
gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r,
|
||||||
|
width, height, &emulated_edge_mc_mmx);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
static void gmc_sse(uint8_t *dst, uint8_t *src,
|
||||||
|
int stride, int h, int ox, int oy,
|
||||||
|
int dxx, int dxy, int dyx, int dyy,
|
||||||
|
int shift, int r, int width, int height)
|
||||||
|
{
|
||||||
|
gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r,
|
||||||
|
width, height, &emulated_edge_mc_sse);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static void gmc_mmx(uint8_t *dst, uint8_t *src,
|
||||||
|
int stride, int h, int ox, int oy,
|
||||||
|
int dxx, int dxy, int dyx, int dyy,
|
||||||
|
int shift, int r, int width, int height)
|
||||||
|
{
|
||||||
|
gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r,
|
||||||
|
width, height, &ff_emulated_edge_mc_8);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#define PREFETCH(name, op) \
|
#define PREFETCH(name, op) \
|
||||||
static void name(void *mem, int stride, int h) \
|
static void name(void *mem, int stride, int h) \
|
||||||
{ \
|
{ \
|
||||||
@@ -2545,7 +2587,9 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
|
|||||||
SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx);
|
SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if ARCH_X86_32 || !HAVE_YASM
|
||||||
c->gmc = gmc_mmx;
|
c->gmc = gmc_mmx;
|
||||||
|
#endif
|
||||||
|
|
||||||
c->add_bytes = add_bytes_mmx;
|
c->add_bytes = add_bytes_mmx;
|
||||||
|
|
||||||
@@ -2800,6 +2844,9 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
|
|||||||
|
|
||||||
if (!high_bit_depth)
|
if (!high_bit_depth)
|
||||||
c->emulated_edge_mc = emulated_edge_mc_sse;
|
c->emulated_edge_mc = emulated_edge_mc_sse;
|
||||||
|
#if HAVE_INLINE_ASM
|
||||||
|
c->gmc = gmc_sse;
|
||||||
|
#endif
|
||||||
#endif /* HAVE_YASM */
|
#endif /* HAVE_YASM */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user