x86/vf_maskedmerge: make ff_maskedmerge8_sse2 work on x86_32

Reviewed-by: Paul B Mahol <onemda@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
James Almer 2015-12-23 20:37:37 -03:00
parent 470749703e
commit ce4c85de6a
2 changed files with 19 additions and 12 deletions

View File

@ -22,7 +22,6 @@
%include "libavutil/x86/x86util.asm" %include "libavutil/x86/x86util.asm"
%if ARCH_X86_64
SECTION_RODATA SECTION_RODATA
pw_128: times 8 dw 128 pw_128: times 8 dw 128
@ -31,24 +30,33 @@ pw_256: times 8 dw 256
SECTION .text SECTION .text
INIT_XMM sse2 INIT_XMM sse2
cglobal maskedmerge8, 10, 11, 7, 0, bsrc, osrc, msrc, dst, blinesize, olinesize, mlinesize, dlinesize, w, h %if ARCH_X86_64
cglobal maskedmerge8, 8, 11, 7, bsrc, osrc, msrc, dst, blinesize, olinesize, mlinesize, dlinesize, w, h, x
mov wd, dword wm
mov hd, dword hm
%else
cglobal maskedmerge8, 5, 7, 7, bsrc, osrc, msrc, dst, blinesize, w, x
mov wd, r8m
%define olinesizeq r5mp
%define mlinesizeq r6mp
%define dlinesizeq r7mp
%define hd r9mp
%endif
mova m4, [pw_256] mova m4, [pw_256]
mova m5, [pw_128] mova m5, [pw_128]
pxor m6, m6 pxor m6, m6
movsxdifnidn wq, wd
add bsrcq, wq add bsrcq, wq
add osrcq, wq add osrcq, wq
add msrcq, wq add msrcq, wq
add dstq, wq add dstq, wq
neg wq neg wq
%define x r10q
.nextrow: .nextrow:
mov x, wq mov xq, wq
.loop: .loop:
movh m0, [bsrcq + x] movh m0, [bsrcq + xq]
movh m1, [osrcq + x] movh m1, [osrcq + xq]
movh m3, [msrcq + x] movh m3, [msrcq + xq]
mova m2, m4 mova m2, m4
punpcklbw m0, m6 punpcklbw m0, m6
punpcklbw m1, m6 punpcklbw m1, m6
@ -60,8 +68,8 @@ cglobal maskedmerge8, 10, 11, 7, 0, bsrc, osrc, msrc, dst, blinesize, olinesize,
paddw m1, m5 paddw m1, m5
psrlw m1, 8 psrlw m1, 8
packuswb m1, m1 packuswb m1, m1
movh [dstq + x], m1 movh [dstq + xq], m1
add r10q, mmsize / 2 add xq, mmsize / 2
jl .loop jl .loop
add bsrcq, blinesizeq add bsrcq, blinesizeq
@ -71,4 +79,3 @@ cglobal maskedmerge8, 10, 11, 7, 0, bsrc, osrc, msrc, dst, blinesize, olinesize,
sub hd, 1 sub hd, 1
jg .nextrow jg .nextrow
REP_RET REP_RET
%endif

View File

@ -34,7 +34,7 @@ av_cold void ff_maskedmerge_init_x86(MaskedMergeContext *s)
{ {
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && s->depth == 8) { if (EXTERNAL_SSE2(cpu_flags) && s->depth == 8) {
s->maskedmerge = ff_maskedmerge8_sse2; s->maskedmerge = ff_maskedmerge8_sse2;
} }
} }