vp9: 10/12bpp SIMD (sse2/ssse3/avx) for directional intra prediction.
This commit is contained in:
parent
26ece7a511
commit
061b67fb50
@ -85,3 +85,5 @@ DECLARE_ALIGNED(32, const ymm_reg, ff_pd_16) = { 0x0000001000000010ULL, 0x000
|
||||
0x0000001000000010ULL, 0x0000001000000010ULL };
|
||||
DECLARE_ALIGNED(32, const ymm_reg, ff_pd_32) = { 0x0000002000000020ULL, 0x0000002000000020ULL,
|
||||
0x0000002000000020ULL, 0x0000002000000020ULL };
|
||||
DECLARE_ALIGNED(32, const ymm_reg, ff_pd_65535)= { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL,
|
||||
0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL };
|
||||
|
@ -65,5 +65,6 @@ extern const xmm_reg ff_ps_neg;
|
||||
extern const ymm_reg ff_pd_1;
|
||||
extern const ymm_reg ff_pd_16;
|
||||
extern const ymm_reg ff_pd_32;
|
||||
extern const ymm_reg ff_pd_65535;
|
||||
|
||||
#endif /* AVCODEC_X86_CONSTANTS_H */
|
||||
|
@ -26,6 +26,7 @@
|
||||
|
||||
SECTION_RODATA 32
|
||||
|
||||
cextern pd_65535
|
||||
cextern pw_1023
|
||||
%define pw_pixel_max pw_1023
|
||||
cextern pw_16
|
||||
@ -42,7 +43,6 @@ unpad: times 8 dw 16*1022/32 ; needs to be mod 16
|
||||
tap1: times 4 dw 1, -5
|
||||
tap2: times 4 dw 20, 20
|
||||
tap3: times 4 dw -5, 1
|
||||
pd_0f: times 4 dd 0xffff
|
||||
|
||||
SECTION .text
|
||||
|
||||
@ -708,7 +708,7 @@ h%1_loop_op:
|
||||
psrad m1, 10
|
||||
psrad m2, 10
|
||||
pslld m2, 16
|
||||
pand m1, [pd_0f]
|
||||
pand m1, [pd_65535]
|
||||
por m1, m2
|
||||
%if num_mmregs <= 8
|
||||
pxor m0, m0
|
||||
|
@ -165,6 +165,10 @@ filters_8tap_2d_fn(op, 4, align, bpp, bytes, opt4, f_opt)
|
||||
init_ipred_func(type, enum, 16, bpp, opt); \
|
||||
init_ipred_func(type, enum, 32, bpp, opt)
|
||||
|
||||
#define init_ipred_funcs(type, enum, bpp, opt) \
|
||||
init_ipred_func(type, enum, 4, bpp, opt); \
|
||||
init_8_16_32_ipred_funcs(type, enum, bpp, opt)
|
||||
|
||||
void ff_vp9dsp_init_10bpp_x86(VP9DSPContext *dsp);
|
||||
void ff_vp9dsp_init_12bpp_x86(VP9DSPContext *dsp);
|
||||
void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp);
|
||||
|
@ -51,6 +51,18 @@ decl_ipred_fns(h, 16, mmxext, sse2);
|
||||
decl_ipred_fns(dc, 16, mmxext, sse2);
|
||||
decl_ipred_fns(dc_top, 16, mmxext, sse2);
|
||||
decl_ipred_fns(dc_left, 16, mmxext, sse2);
|
||||
|
||||
#define decl_ipred_dir_funcs(type) \
|
||||
decl_ipred_fns(type, 16, sse2, sse2); \
|
||||
decl_ipred_fns(type, 16, ssse3, ssse3); \
|
||||
decl_ipred_fns(type, 16, avx, avx)
|
||||
|
||||
decl_ipred_dir_funcs(dl);
|
||||
decl_ipred_dir_funcs(dr);
|
||||
decl_ipred_dir_funcs(vl);
|
||||
decl_ipred_dir_funcs(vr);
|
||||
decl_ipred_dir_funcs(hu);
|
||||
decl_ipred_dir_funcs(hd);
|
||||
#endif /* HAVE_YASM */
|
||||
|
||||
av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
|
||||
@ -88,12 +100,33 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
|
||||
init_8_16_32_ipred_funcs(dc, DC, 16, sse2);
|
||||
init_8_16_32_ipred_funcs(dc_top, TOP_DC, 16, sse2);
|
||||
init_8_16_32_ipred_funcs(dc_left, LEFT_DC, 16, sse2);
|
||||
init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, sse2);
|
||||
init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, sse2);
|
||||
init_ipred_funcs(vl, VERT_LEFT, 16, sse2);
|
||||
init_ipred_funcs(vr, VERT_RIGHT, 16, sse2);
|
||||
init_ipred_funcs(hu, HOR_UP, 16, sse2);
|
||||
init_ipred_funcs(hd, HOR_DOWN, 16, sse2);
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||
init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, ssse3);
|
||||
init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, ssse3);
|
||||
init_ipred_funcs(vl, VERT_LEFT, 16, ssse3);
|
||||
init_ipred_funcs(vr, VERT_RIGHT, 16, ssse3);
|
||||
init_ipred_funcs(hu, HOR_UP, 16, ssse3);
|
||||
init_ipred_funcs(hd, HOR_DOWN, 16, ssse3);
|
||||
}
|
||||
|
||||
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
||||
init_fpel_func(2, 0, 32, put, , avx);
|
||||
init_fpel_func(1, 0, 64, put, , avx);
|
||||
init_fpel_func(0, 0, 128, put, , avx);
|
||||
init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, avx);
|
||||
init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, avx);
|
||||
init_ipred_funcs(vl, VERT_LEFT, 16, avx);
|
||||
init_ipred_funcs(vr, VERT_RIGHT, 16, avx);
|
||||
init_ipred_funcs(hu, HOR_UP, 16, avx);
|
||||
init_ipred_funcs(hd, HOR_DOWN, 16, avx);
|
||||
}
|
||||
|
||||
if (EXTERNAL_AVX2(cpu_flags)) {
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user