Merge "convolve_copy_sse2: replace SSE w/SSE2 code"
This commit is contained in:
commit
e3efed7f4c
@ -13,15 +13,21 @@
|
||||
SECTION .text
|
||||
|
||||
%macro convolve_fn 1-2
|
||||
INIT_XMM sse2
|
||||
%ifidn %1, avg
|
||||
%define AUX_XMM_REGS 4
|
||||
%else
|
||||
%define AUX_XMM_REGS 0
|
||||
%endif
|
||||
%ifidn %2, highbd
|
||||
%define pavg pavgw
|
||||
cglobal %2_convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \
|
||||
fx, fxs, fy, fys, w, h, bd
|
||||
cglobal %2_convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \
|
||||
dst, dst_stride, \
|
||||
fx, fxs, fy, fys, w, h, bd
|
||||
%else
|
||||
%define pavg pavgb
|
||||
cglobal convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \
|
||||
fx, fxs, fy, fys, w, h
|
||||
cglobal convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \
|
||||
dst, dst_stride, \
|
||||
fx, fxs, fy, fys, w, h
|
||||
%endif
|
||||
mov r4d, dword wm
|
||||
%ifidn %2, highbd
|
||||
@ -152,38 +158,11 @@ cglobal convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \
|
||||
jnz .loop16
|
||||
RET
|
||||
|
||||
INIT_MMX sse
|
||||
.w8:
|
||||
mov r4d, dword hm
|
||||
lea r5q, [src_strideq*3]
|
||||
lea r6q, [dst_strideq*3]
|
||||
.loop8:
|
||||
movu m0, [srcq]
|
||||
movu m1, [srcq+src_strideq]
|
||||
movu m2, [srcq+src_strideq*2]
|
||||
movu m3, [srcq+r5q]
|
||||
lea srcq, [srcq+src_strideq*4]
|
||||
%ifidn %1, avg
|
||||
pavg m0, [dstq]
|
||||
pavg m1, [dstq+dst_strideq]
|
||||
pavg m2, [dstq+dst_strideq*2]
|
||||
pavg m3, [dstq+r6q]
|
||||
%endif
|
||||
mova [dstq ], m0
|
||||
mova [dstq+dst_strideq ], m1
|
||||
mova [dstq+dst_strideq*2], m2
|
||||
mova [dstq+r6q ], m3
|
||||
lea dstq, [dstq+dst_strideq*4]
|
||||
sub r4d, 4
|
||||
jnz .loop8
|
||||
RET
|
||||
|
||||
%ifnidn %2, highbd
|
||||
.w4:
|
||||
mov r4d, dword hm
|
||||
lea r5q, [src_strideq*3]
|
||||
lea r6q, [dst_strideq*3]
|
||||
.loop4:
|
||||
movh m0, [srcq]
|
||||
movh m1, [srcq+src_strideq]
|
||||
movh m2, [srcq+src_strideq*2]
|
||||
@ -205,11 +184,42 @@ INIT_MMX sse
|
||||
movh [dstq+r6q ], m3
|
||||
lea dstq, [dstq+dst_strideq*4]
|
||||
sub r4d, 4
|
||||
jnz .loop8
|
||||
RET
|
||||
|
||||
%ifnidn %2, highbd
|
||||
.w4:
|
||||
mov r4d, dword hm
|
||||
lea r5q, [src_strideq*3]
|
||||
lea r6q, [dst_strideq*3]
|
||||
.loop4:
|
||||
movd m0, [srcq]
|
||||
movd m1, [srcq+src_strideq]
|
||||
movd m2, [srcq+src_strideq*2]
|
||||
movd m3, [srcq+r5q]
|
||||
lea srcq, [srcq+src_strideq*4]
|
||||
%ifidn %1, avg
|
||||
movd m4, [dstq]
|
||||
movd m5, [dstq+dst_strideq]
|
||||
movd m6, [dstq+dst_strideq*2]
|
||||
movd m7, [dstq+r6q]
|
||||
pavg m0, m4
|
||||
pavg m1, m5
|
||||
pavg m2, m6
|
||||
pavg m3, m7
|
||||
%endif
|
||||
movd [dstq ], m0
|
||||
movd [dstq+dst_strideq ], m1
|
||||
movd [dstq+dst_strideq*2], m2
|
||||
movd [dstq+r6q ], m3
|
||||
lea dstq, [dstq+dst_strideq*4]
|
||||
sub r4d, 4
|
||||
jnz .loop4
|
||||
RET
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
convolve_fn copy
|
||||
convolve_fn avg
|
||||
%if CONFIG_VP9_HIGHBITDEPTH
|
||||
|
Loading…
x
Reference in New Issue
Block a user