x86/dcadec: add ff_lfe_fir1_float_{sse3,avx}
Reviewed-by: Christophe Gisquet <christophe.gisquet@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
37afeabd1b
commit
45d3af9059
@ -201,3 +201,82 @@ LFE_FIR0_FLOAT
|
||||
INIT_XMM fma3
|
||||
LFE_FIR0_FLOAT
|
||||
%endif
|
||||
|
||||
%macro LFE_FIR1_FLOAT 0
|
||||
cglobal lfe_fir1_float, 4, 6, 10, samples, lfe, coeff, nblocks, cnt1, cnt2
|
||||
shr nblocksd, 2
|
||||
sub lfeq, 3*sizeof_float
|
||||
mov cnt1d, 64*sizeof_float
|
||||
mov cnt2d, 64*sizeof_float-16
|
||||
lea coeffq, [coeffq+cnt1q*4]
|
||||
add samplesq, cnt1q
|
||||
neg cnt1q
|
||||
|
||||
.loop:
|
||||
%if cpuflag(avx)
|
||||
cvtdq2ps m4, [lfeq]
|
||||
shufps m5, m4, m4, q0123
|
||||
%elif cpuflag(sse2)
|
||||
movu m4, [lfeq]
|
||||
cvtdq2ps m4, m4
|
||||
pshufd m5, m4, q0123
|
||||
%endif
|
||||
|
||||
.inner_loop:
|
||||
movaps m6, [coeffq+cnt1q*4 ]
|
||||
movaps m7, [coeffq+cnt1q*4+16]
|
||||
mulps m0, m5, m6
|
||||
mulps m1, m5, m7
|
||||
%if ARCH_X86_64
|
||||
movaps m8, [coeffq+cnt1q*4+32]
|
||||
movaps m9, [coeffq+cnt1q*4+48]
|
||||
mulps m2, m5, m8
|
||||
mulps m3, m5, m9
|
||||
%else
|
||||
mulps m2, m5, [coeffq+cnt1q*4+32]
|
||||
mulps m3, m5, [coeffq+cnt1q*4+48]
|
||||
%endif
|
||||
|
||||
haddps m0, m1
|
||||
haddps m2, m3
|
||||
haddps m0, m2
|
||||
movaps [samplesq+cnt1q], m0
|
||||
|
||||
mulps m6, m4
|
||||
mulps m7, m4
|
||||
%if ARCH_X86_64
|
||||
mulps m8, m4
|
||||
mulps m9, m4
|
||||
|
||||
haddps m6, m7
|
||||
haddps m8, m9
|
||||
haddps m6, m8
|
||||
%else
|
||||
mulps m2, m4, [coeffq+cnt1q*4+32]
|
||||
mulps m3, m4, [coeffq+cnt1q*4+48]
|
||||
|
||||
haddps m6, m7
|
||||
haddps m2, m3
|
||||
haddps m6, m2
|
||||
%endif
|
||||
movaps [samplesq+cnt2q], m6
|
||||
|
||||
sub cnt2d, 16
|
||||
add cnt1q, 16
|
||||
jl .inner_loop
|
||||
|
||||
add lfeq, sizeof_float
|
||||
add samplesq, 128*sizeof_float
|
||||
mov cnt1q, -64*sizeof_float
|
||||
mov cnt2d, 64*sizeof_float-16
|
||||
sub nblocksd, 1
|
||||
jg .loop
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse3
|
||||
LFE_FIR1_FLOAT
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_XMM avx
|
||||
LFE_FIR1_FLOAT
|
||||
%endif
|
||||
|
@ -23,10 +23,13 @@
|
||||
|
||||
#define LFE_FIR_FLOAT_FUNC(opt) \
|
||||
void ff_lfe_fir0_float_##opt(float *pcm_samples, int32_t *lfe_samples, \
|
||||
const float *filter_coeff, ptrdiff_t npcmblocks); \
|
||||
void ff_lfe_fir1_float_##opt(float *pcm_samples, int32_t *lfe_samples, \
|
||||
const float *filter_coeff, ptrdiff_t npcmblocks);
|
||||
|
||||
LFE_FIR_FLOAT_FUNC(sse)
|
||||
LFE_FIR_FLOAT_FUNC(sse2)
|
||||
LFE_FIR_FLOAT_FUNC(sse3)
|
||||
LFE_FIR_FLOAT_FUNC(avx)
|
||||
LFE_FIR_FLOAT_FUNC(fma3)
|
||||
|
||||
@ -38,8 +41,12 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
|
||||
s->lfe_fir_float[0] = ff_lfe_fir0_float_sse;
|
||||
if (EXTERNAL_SSE2(cpu_flags))
|
||||
s->lfe_fir_float[0] = ff_lfe_fir0_float_sse2;
|
||||
if (EXTERNAL_AVX(cpu_flags))
|
||||
if (EXTERNAL_SSE3(cpu_flags))
|
||||
s->lfe_fir_float[1] = ff_lfe_fir1_float_sse3;
|
||||
if (EXTERNAL_AVX(cpu_flags)) {
|
||||
s->lfe_fir_float[0] = ff_lfe_fir0_float_avx;
|
||||
s->lfe_fir_float[1] = ff_lfe_fir1_float_avx;
|
||||
}
|
||||
if (EXTERNAL_FMA3(cpu_flags))
|
||||
s->lfe_fir_float[0] = ff_lfe_fir0_float_fma3;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user