x86: float_dsp: add SSE version of vector_fmul_scalar()
This commit is contained in:
@@ -85,3 +85,32 @@ INIT_XMM sse
|
|||||||
VECTOR_FMAC_SCALAR
|
VECTOR_FMAC_SCALAR
|
||||||
INIT_YMM avx
|
INIT_YMM avx
|
||||||
VECTOR_FMAC_SCALAR
|
VECTOR_FMAC_SCALAR
|
||||||
|
|
||||||
|
;------------------------------------------------------------------------------
|
||||||
|
; void ff_vector_fmul_scalar(float *dst, const float *src, float mul, int len)
|
||||||
|
;------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
%macro VECTOR_FMUL_SCALAR 0
|
||||||
|
%if UNIX64
|
||||||
|
cglobal vector_fmul_scalar, 3,3,2, dst, src, len
|
||||||
|
%else
|
||||||
|
cglobal vector_fmul_scalar, 4,4,3, dst, src, mul, len
|
||||||
|
%endif
|
||||||
|
%if ARCH_X86_32
|
||||||
|
movss m0, mulm
|
||||||
|
%elif WIN64
|
||||||
|
SWAP 0, 2
|
||||||
|
%endif
|
||||||
|
shufps m0, m0, 0
|
||||||
|
lea lenq, [lend*4-mmsize]
|
||||||
|
.loop:
|
||||||
|
mova m1, [srcq+lenq]
|
||||||
|
mulps m1, m0
|
||||||
|
mova [dstq+lenq], m1
|
||||||
|
sub lenq, mmsize
|
||||||
|
jge .loop
|
||||||
|
REP_RET
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
INIT_XMM sse
|
||||||
|
VECTOR_FMUL_SCALAR
|
||||||
|
@@ -32,6 +32,9 @@ extern void ff_vector_fmac_scalar_sse(float *dst, const float *src, float mul,
|
|||||||
extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul,
|
extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul,
|
||||||
int len);
|
int len);
|
||||||
|
|
||||||
|
extern void ff_vector_fmul_scalar_sse(float *dst, const float *src, float mul,
|
||||||
|
int len);
|
||||||
|
|
||||||
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
||||||
{
|
{
|
||||||
int mm_flags = av_get_cpu_flags();
|
int mm_flags = av_get_cpu_flags();
|
||||||
@@ -39,6 +42,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
|||||||
if (EXTERNAL_SSE(mm_flags)) {
|
if (EXTERNAL_SSE(mm_flags)) {
|
||||||
fdsp->vector_fmul = ff_vector_fmul_sse;
|
fdsp->vector_fmul = ff_vector_fmul_sse;
|
||||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
|
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
|
||||||
|
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(mm_flags)) {
|
if (EXTERNAL_AVX(mm_flags)) {
|
||||||
fdsp->vector_fmul = ff_vector_fmul_avx;
|
fdsp->vector_fmul = ff_vector_fmul_avx;
|
||||||
|
Reference in New Issue
Block a user