x86/vf_w3fdif: 32-bit compatibility for w3fdif_simple_high
This commit is contained in:
parent
08aec7c1bd
commit
53ada3af62
@ -102,14 +102,22 @@ cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef, linesize
|
|||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
|
|
||||||
cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
|
cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
|
||||||
|
%else
|
||||||
|
cglobal w3fdif_simple_high, 4, 7, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
|
||||||
|
%endif
|
||||||
movq m2, [coefq]
|
movq m2, [coefq]
|
||||||
|
%if ARCH_X86_64
|
||||||
DEFINE_ARGS work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, linesize, offset, in_lines_cur2, in_lines_adj1, in_lines_adj2
|
DEFINE_ARGS work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, linesize, offset, in_lines_cur2, in_lines_adj1, in_lines_adj2
|
||||||
|
xor offsetq, offsetq
|
||||||
|
%else
|
||||||
|
DEFINE_ARGS work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, in_lines_cur2, in_lines_adj1, in_lines_adj2
|
||||||
|
%define linesized r4mp
|
||||||
|
%endif
|
||||||
|
|
||||||
pshufd m0, m2, q0000
|
pshufd m0, m2, q0000
|
||||||
SPLATW m2, m2, 2
|
SPLATW m2, m2, 2
|
||||||
pxor m7, m7
|
pxor m7, m7
|
||||||
mov offsetq, 0
|
|
||||||
mov in_lines_cur2q, [in_lines_cur0q+gprsize*2]
|
mov in_lines_cur2q, [in_lines_cur0q+gprsize*2]
|
||||||
mov in_lines_cur1q, [in_lines_cur0q+gprsize]
|
mov in_lines_cur1q, [in_lines_cur0q+gprsize]
|
||||||
mov in_lines_cur0q, [in_lines_cur0q]
|
mov in_lines_cur0q, [in_lines_cur0q]
|
||||||
@ -117,8 +125,21 @@ cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0,
|
|||||||
mov in_lines_adj1q, [in_lines_adj0q+gprsize]
|
mov in_lines_adj1q, [in_lines_adj0q+gprsize]
|
||||||
mov in_lines_adj0q, [in_lines_adj0q]
|
mov in_lines_adj0q, [in_lines_adj0q]
|
||||||
|
|
||||||
|
%if ARCH_X86_32
|
||||||
|
sub in_lines_cur1q, in_lines_cur0q
|
||||||
|
sub in_lines_cur2q, in_lines_cur0q
|
||||||
|
sub in_lines_adj0q, in_lines_cur0q
|
||||||
|
sub in_lines_adj1q, in_lines_cur0q
|
||||||
|
sub in_lines_adj2q, in_lines_cur0q
|
||||||
|
%define offsetq in_lines_cur0q
|
||||||
|
%endif
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
|
%if ARCH_X86_64
|
||||||
movh m3, [in_lines_cur0q+offsetq]
|
movh m3, [in_lines_cur0q+offsetq]
|
||||||
|
%else
|
||||||
|
movh m3, [in_lines_cur0q]
|
||||||
|
%endif
|
||||||
movh m4, [in_lines_cur1q+offsetq]
|
movh m4, [in_lines_cur1q+offsetq]
|
||||||
punpcklbw m3, m7
|
punpcklbw m3, m7
|
||||||
punpcklbw m4, m7
|
punpcklbw m4, m7
|
||||||
@ -143,15 +164,25 @@ cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0,
|
|||||||
pmaddwd m6, m2
|
pmaddwd m6, m2
|
||||||
paddd m3, m5
|
paddd m3, m5
|
||||||
paddd m4, m6
|
paddd m4, m6
|
||||||
|
%if ARCH_X86_64
|
||||||
paddd m3, [work_lineq+offsetq*4]
|
paddd m3, [work_lineq+offsetq*4]
|
||||||
paddd m4, [work_lineq+offsetq*4+mmsize]
|
paddd m4, [work_lineq+offsetq*4+mmsize]
|
||||||
mova [work_lineq+offsetq*4], m3
|
mova [work_lineq+offsetq*4], m3
|
||||||
mova [work_lineq+offsetq*4+mmsize], m4
|
mova [work_lineq+offsetq*4+mmsize], m4
|
||||||
|
%else
|
||||||
|
paddd m3, [work_lineq]
|
||||||
|
paddd m4, [work_lineq+mmsize]
|
||||||
|
mova [work_lineq], m3
|
||||||
|
mova [work_lineq+mmsize], m4
|
||||||
|
add work_lineq, mmsize*2
|
||||||
|
%endif
|
||||||
add offsetq, mmsize/2
|
add offsetq, mmsize/2
|
||||||
sub linesized, mmsize/2
|
sub linesized, mmsize/2
|
||||||
jg .loop
|
jg .loop
|
||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
|
%if ARCH_X86_64
|
||||||
|
|
||||||
cglobal w3fdif_complex_high, 5, 13, 10, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
|
cglobal w3fdif_complex_high, 5, 13, 10, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
|
||||||
movq m0, [coefq+0]
|
movq m0, [coefq+0]
|
||||||
movd m4, [coefq+8]
|
movd m4, [coefq+8]
|
||||||
|
@ -51,12 +51,12 @@ av_cold void ff_w3fdif_init_x86(W3FDIFDSPContext *dsp)
|
|||||||
|
|
||||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
dsp->filter_simple_low = ff_w3fdif_simple_low_sse2;
|
dsp->filter_simple_low = ff_w3fdif_simple_low_sse2;
|
||||||
|
dsp->filter_simple_high = ff_w3fdif_simple_high_sse2;
|
||||||
dsp->filter_complex_low = ff_w3fdif_complex_low_sse2;
|
dsp->filter_complex_low = ff_w3fdif_complex_low_sse2;
|
||||||
dsp->filter_scale = ff_w3fdif_scale_sse2;
|
dsp->filter_scale = ff_w3fdif_scale_sse2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) {
|
if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) {
|
||||||
dsp->filter_simple_high = ff_w3fdif_simple_high_sse2;
|
|
||||||
dsp->filter_complex_high = ff_w3fdif_complex_high_sse2;
|
dsp->filter_complex_high = ff_w3fdif_complex_high_sse2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user