x86: move horizontal add macros to x86util

Also port relevant AVX2/XOP optimizations from x264 with permission
to relicense to LGPL from the corresponding authors

Signed-off-by: James Almer <jamrial@gmail.com>
Reviewed-by: "Ronald S. Bultje" <rsbultje@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
James Almer 2014-04-16 20:15:35 -03:00 committed by Michael Niedermayer
parent 443261cbbd
commit 76ed71a72b
2 changed files with 33 additions and 16 deletions

View File

@ -171,22 +171,6 @@ PRED4x4_HD
;-----------------------------------------------------------------------------
; void ff_pred4x4_dc(pixel *src, const pixel *topright, int stride)
;-----------------------------------------------------------------------------
%macro HADDD 2 ; sum junk
%if mmsize == 16
movhlps %2, %1
paddd %1, %2
pshuflw %2, %1, 0xE
paddd %1, %2
%else
pshufw %2, %1, 0xE
paddd %1, %2
%endif
%endmacro
%macro HADDW 2
pmaddwd %1, [pw_1]
HADDD %1, %2
%endmacro
INIT_MMX mmxext
cglobal pred4x4_dc_10, 3, 3

View File

@ -273,6 +273,39 @@
%endif
%endmacro
%macro HADDD 2 ; sum junk
%if sizeof%1 == 32
%define %2 xmm%2
vextracti128 %2, %1, 1
%define %1 xmm%1
paddd %1, %2
%endif
%if mmsize >= 16
%if cpuflag(xop) && sizeof%1 == 16
vphadddq %1, %1
%endif
movhlps %2, %1
paddd %1, %2
%endif
%if notcpuflag(xop) || sizeof%1 != 16
PSHUFLW %2, %1, q0032
paddd %1, %2
%endif
%undef %1
%undef %2
%endmacro
%macro HADDW 2 ; reg, tmp
%if cpuflag(xop) && sizeof%1 == 16
vphaddwq %1, %1
movhlps %2, %1
paddd %1, %2
%else
pmaddwd %1, [pw_1]
HADDD %1, %2
%endif
%endmacro
%macro PALIGNR 4-5
%if cpuflag(ssse3)
%if %0==5