x86: float dsp: butterflies_float SSE
97c -> 49c Some codecs could benefit from more unrolling, but AAC doesn't. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
295ce83e2f
commit
1a4007964c
@ -263,3 +263,26 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset
|
|||||||
%endif
|
%endif
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
;-----------------------------------------------------------------------------
|
||||||
|
; void ff_butterflies_float(float *src0, float *src1, int len);
|
||||||
|
;-----------------------------------------------------------------------------
|
||||||
|
INIT_XMM sse
|
||||||
|
cglobal butterflies_float, 3,3,3, src0, src1, len
|
||||||
|
movsxdifnidn lenq, lend
|
||||||
|
test lenq, lenq
|
||||||
|
jz .end
|
||||||
|
shl lenq, 2
|
||||||
|
lea src0q, [src0q + lenq]
|
||||||
|
lea src1q, [src1q + lenq]
|
||||||
|
neg lenq
|
||||||
|
.loop:
|
||||||
|
mova m0, [src0q + lenq]
|
||||||
|
mova m1, [src1q + lenq]
|
||||||
|
subps m2, m0, m1
|
||||||
|
addps m0, m0, m1
|
||||||
|
mova [src1q + lenq], m2
|
||||||
|
mova [src0q + lenq], m0
|
||||||
|
add lenq, mmsize
|
||||||
|
jl .loop
|
||||||
|
.end:
|
||||||
|
REP_RET
|
||||||
|
@ -53,6 +53,8 @@ void ff_vector_fmul_reverse_avx(float *dst, const float *src0,
|
|||||||
|
|
||||||
float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
|
float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
|
||||||
|
|
||||||
|
void ff_butterflies_float_sse(float *src0, float *src1, int len);
|
||||||
|
|
||||||
#if HAVE_6REGS && HAVE_INLINE_ASM
|
#if HAVE_6REGS && HAVE_INLINE_ASM
|
||||||
static void vector_fmul_window_3dnowext(float *dst, const float *src0,
|
static void vector_fmul_window_3dnowext(float *dst, const float *src0,
|
||||||
const float *src1, const float *win,
|
const float *src1, const float *win,
|
||||||
@ -138,6 +140,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
|||||||
fdsp->vector_fmul_add = ff_vector_fmul_add_sse;
|
fdsp->vector_fmul_add = ff_vector_fmul_add_sse;
|
||||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
|
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
|
||||||
fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
|
fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
|
||||||
|
fdsp->butterflies_float = ff_butterflies_float_sse;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(mm_flags)) {
|
||||||
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
|
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user