arm64: port synth_filter_float_neon from arm

~25% faster dts decoding overall. The checkasm CPU cycles numbers are
not that useful since synth_filter_float() calls FFTContext.imdct_half().

                         cortex-a57   cortex-a53
synth_filter_float_c:    1866.2       3490.9
synth_filter_float_neon:  915.0       1531.5

With fftc.imdct_half forced to imdct_half_neon:
                         cortex-a57   cortex-a53
synth_filter_float_c:    1718.4       3025.3
synth_filter_float_neon:  926.2       1530.1
This commit is contained in:
Janne Grunau
2015-12-01 13:37:41 +01:00
parent c33c1fa8af
commit 705f5e5e15
6 changed files with 147 additions and 3 deletions

View File

@@ -60,6 +60,10 @@ av_cold void ff_synth_filter_init(SynthFilterContext *c)
{
c->synth_filter_float = synth_filter_float;
if (ARCH_ARM) ff_synth_filter_init_arm(c);
if (ARCH_X86) ff_synth_filter_init_x86(c);
if (ARCH_AARCH64)
ff_synth_filter_init_aarch64(c);
if (ARCH_ARM)
ff_synth_filter_init_arm(c);
if (ARCH_X86)
ff_synth_filter_init_x86(c);
}