dsputil: use movups instead of movdqu in ff_emu_edge_core_sse()
This allows emulated_edge_mc_sse() and gmc_sse() to be used under AV_CPU_FLAG_SSE.
This commit is contained in:
parent
05d1e45d1f
commit
395f2e70dd
@ -2874,6 +2874,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
||||
#if HAVE_YASM
|
||||
c->scalarproduct_float = ff_scalarproduct_float_sse;
|
||||
c->butterflies_float_interleave = ff_butterflies_float_interleave_sse;
|
||||
|
||||
if (!high_bit_depth)
|
||||
c->emulated_edge_mc = emulated_edge_mc_sse;
|
||||
c->gmc = gmc_sse;
|
||||
#endif
|
||||
}
|
||||
if (HAVE_AMD3DNOW && (mm_flags & AV_CPU_FLAG_3DNOW))
|
||||
@ -2894,10 +2898,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
||||
c->apply_window_int16 = ff_apply_window_int16_sse2;
|
||||
}
|
||||
}
|
||||
|
||||
if (!high_bit_depth)
|
||||
c->emulated_edge_mc = emulated_edge_mc_sse;
|
||||
c->gmc= gmc_sse;
|
||||
#endif
|
||||
}
|
||||
if (mm_flags & AV_CPU_FLAG_SSSE3) {
|
||||
|
@ -637,7 +637,7 @@ cglobal emu_edge_core_%1, 2, 7, 0
|
||||
|
||||
%ifnidn %3, mmx
|
||||
%rep %2/16
|
||||
movdqu xmm %+ %%sxidx, [r1+%%src_off]
|
||||
movups xmm %+ %%sxidx, [r1+%%src_off]
|
||||
%assign %%src_off %%src_off+16
|
||||
%assign %%sxidx %%sxidx+1
|
||||
%endrep ; %2/16
|
||||
@ -686,7 +686,7 @@ cglobal emu_edge_core_%1, 2, 7, 0
|
||||
|
||||
%ifnidn %3, mmx
|
||||
%rep %2/16
|
||||
movdqu [r0+%%dst_off], xmm %+ %%dxidx
|
||||
movups [r0+%%dst_off], xmm %+ %%dxidx
|
||||
%assign %%dst_off %%dst_off+16
|
||||
%assign %%dxidx %%dxidx+1
|
||||
%endrep ; %2/16
|
||||
@ -915,7 +915,7 @@ ALIGN 64
|
||||
%define linesize r2m
|
||||
V_COPY_NPX %1, mm0, movq, 8, 0xFFFFFFF8
|
||||
%else ; !mmx
|
||||
V_COPY_NPX %1, xmm0, movdqu, 16, 0xFFFFFFF0
|
||||
V_COPY_NPX %1, xmm0, movups, 16, 0xFFFFFFF0
|
||||
%ifdef ARCH_X86_64
|
||||
%define linesize r2
|
||||
V_COPY_NPX %1, rax , mov, 8
|
||||
|
Loading…
Reference in New Issue
Block a user