vp9/x86: 4x4 iadst SIMD (ssse3) variants.

Cycle measurements for intra itxfm_4x4_add on ped1080p.webm:
idct_idct:    66 -> 67 cycles (noise measurement)
idct_iadst:  199 -> 79 cycles
iadst_idct:  165 -> 70 cycles
iadst_iadst: 183 -> 82 cycles
This commit is contained in:
Ronald S. Bultje
2014-01-20 15:30:22 -05:00
parent baf47020cd
commit d43efa68bd
2 changed files with 73 additions and 1 deletions

View File

@@ -166,7 +166,7 @@ itxfm_func(iadst, idct, size, opt); \
itxfm_func(idct, iadst, size, opt); \
itxfm_func(iadst, iadst, size, opt)
itxfm_func(idct, idct, 4, ssse3);
itxfm_funcs(4, ssse3);
itxfm_funcs(8, ssse3);
itxfm_funcs(8, avx);
itxfm_funcs(16, ssse3);
@@ -250,6 +250,9 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
init_subpel3(0, put, ssse3);
init_subpel3(1, avg, ssse3);
dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3;
dsp->itxfm_add[TX_4X4][ADST_DCT] = ff_vp9_idct_iadst_4x4_add_ssse3;
dsp->itxfm_add[TX_4X4][DCT_ADST] = ff_vp9_iadst_idct_4x4_add_ssse3;
dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3;
if (ARCH_X86_64) {
dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_ssse3;