x86/vp9: add AVX and AVX2 MC
Roughly 25% faster MC than ssse3 for blocksizes 32 and 64. Reviewed-by: Ronald S. Bultje <rsbultje@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
@@ -41,7 +41,8 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64) = { 0x0040004000400040ULL, 0x004
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pw_96) = 0x0060006000600060ULL;
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
|
||||
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_256) = { 0x0100010001000100ULL, 0x0100010001000100ULL };
|
||||
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_256) = { 0x0100010001000100ULL, 0x0100010001000100ULL,
|
||||
0x0100010001000100ULL, 0x0100010001000100ULL };
|
||||
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_512) = { 0x0200020002000200ULL, 0x0200020002000200ULL };
|
||||
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL };
|
||||
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1024) = { 0x0400040004000400ULL, 0x0400040004000400ULL };
|
||||
|
||||
Reference in New Issue
Block a user