fmtconvert: port float_to_int16_interleave() 2-channel x86 inline asm to yasm
This commit is contained in:
parent
4e8e262476
commit
aad3429d4e
@ -112,6 +112,58 @@ FLOAT_TO_INT16 3dnow, 0
|
|||||||
%undef cvtps2pi
|
%undef cvtps2pi
|
||||||
|
|
||||||
|
|
||||||
|
;-------------------------------------------------------------------------------
|
||||||
|
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
|
||||||
|
;-------------------------------------------------------------------------------
|
||||||
|
%macro FLOAT_TO_INT16_INTERLEAVE2 1
|
||||||
|
cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
|
||||||
|
lea lenq, [4*r2q]
|
||||||
|
mov src1q, [src0q+gprsize]
|
||||||
|
mov src0q, [src0q]
|
||||||
|
add dstq, lenq
|
||||||
|
add src0q, lenq
|
||||||
|
add src1q, lenq
|
||||||
|
neg lenq
|
||||||
|
.loop:
|
||||||
|
%ifidn %1, sse2
|
||||||
|
cvtps2dq m0, [src0q+lenq]
|
||||||
|
cvtps2dq m1, [src1q+lenq]
|
||||||
|
packssdw m0, m1
|
||||||
|
movhlps m1, m0
|
||||||
|
punpcklwd m0, m1
|
||||||
|
mova [dstq+lenq], m0
|
||||||
|
%else
|
||||||
|
cvtps2pi m0, [src0q+lenq ]
|
||||||
|
cvtps2pi m1, [src0q+lenq+8]
|
||||||
|
cvtps2pi m2, [src1q+lenq ]
|
||||||
|
cvtps2pi m3, [src1q+lenq+8]
|
||||||
|
packssdw m0, m1
|
||||||
|
packssdw m2, m3
|
||||||
|
mova m1, m0
|
||||||
|
punpcklwd m0, m2
|
||||||
|
punpckhwd m1, m2
|
||||||
|
mova [dstq+lenq ], m0
|
||||||
|
mova [dstq+lenq+8], m1
|
||||||
|
%endif
|
||||||
|
add lenq, 16
|
||||||
|
js .loop
|
||||||
|
%ifnidn %1, sse2
|
||||||
|
emms
|
||||||
|
%endif
|
||||||
|
REP_RET
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
INIT_MMX
|
||||||
|
%define cvtps2pi pf2id
|
||||||
|
FLOAT_TO_INT16_INTERLEAVE2 3dnow
|
||||||
|
%undef cvtps2pi
|
||||||
|
%define movdqa movaps
|
||||||
|
FLOAT_TO_INT16_INTERLEAVE2 sse
|
||||||
|
%undef movdqa
|
||||||
|
INIT_XMM
|
||||||
|
FLOAT_TO_INT16_INTERLEAVE2 sse2
|
||||||
|
|
||||||
|
|
||||||
%macro PSWAPD_SSE 2
|
%macro PSWAPD_SSE 2
|
||||||
pshufw %1, %2, 0x4e
|
pshufw %1, %2, 0x4e
|
||||||
%endmacro
|
%endmacro
|
||||||
|
@ -35,13 +35,17 @@ void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len);
|
|||||||
void ff_float_to_int16_sse (int16_t *dst, const float *src, long len);
|
void ff_float_to_int16_sse (int16_t *dst, const float *src, long len);
|
||||||
void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len);
|
void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len);
|
||||||
|
|
||||||
|
void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len);
|
||||||
|
void ff_float_to_int16_interleave2_sse (int16_t *dst, const float **src, long len);
|
||||||
|
void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len);
|
||||||
|
|
||||||
void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
|
void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
|
||||||
void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
|
void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
|
||||||
void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
|
void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
|
||||||
|
|
||||||
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
|
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
|
||||||
|
|
||||||
#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
|
#define FLOAT_TO_INT16_INTERLEAVE(cpu) \
|
||||||
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
|
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
|
||||||
static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
|
static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
|
||||||
DECLARE_ALIGNED(16, int16_t, tmp)[len];\
|
DECLARE_ALIGNED(16, int16_t, tmp)[len];\
|
||||||
@ -57,71 +61,16 @@ static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon
|
|||||||
if(channels==1)\
|
if(channels==1)\
|
||||||
ff_float_to_int16_##cpu(dst, src[0], len);\
|
ff_float_to_int16_##cpu(dst, src[0], len);\
|
||||||
else if(channels==2){\
|
else if(channels==2){\
|
||||||
x86_reg reglen = len; \
|
ff_float_to_int16_interleave2_##cpu(dst, src, len);\
|
||||||
const float *src0 = src[0];\
|
|
||||||
const float *src1 = src[1];\
|
|
||||||
__asm__ volatile(\
|
|
||||||
"shl $2, %0 \n"\
|
|
||||||
"add %0, %1 \n"\
|
|
||||||
"add %0, %2 \n"\
|
|
||||||
"add %0, %3 \n"\
|
|
||||||
"neg %0 \n"\
|
|
||||||
body\
|
|
||||||
:"+r"(reglen), "+r"(dst), "+r"(src0), "+r"(src1)\
|
|
||||||
);\
|
|
||||||
}else if(channels==6){\
|
}else if(channels==6){\
|
||||||
ff_float_to_int16_interleave6_##cpu(dst, src, len);\
|
ff_float_to_int16_interleave6_##cpu(dst, src, len);\
|
||||||
}else\
|
}else\
|
||||||
float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
|
float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
|
||||||
}
|
}
|
||||||
|
|
||||||
FLOAT_TO_INT16_INTERLEAVE(3dnow,
|
FLOAT_TO_INT16_INTERLEAVE(3dnow)
|
||||||
"1: \n"
|
FLOAT_TO_INT16_INTERLEAVE(sse)
|
||||||
"pf2id (%2,%0), %%mm0 \n"
|
FLOAT_TO_INT16_INTERLEAVE(sse2)
|
||||||
"pf2id 8(%2,%0), %%mm1 \n"
|
|
||||||
"pf2id (%3,%0), %%mm2 \n"
|
|
||||||
"pf2id 8(%3,%0), %%mm3 \n"
|
|
||||||
"packssdw %%mm1, %%mm0 \n"
|
|
||||||
"packssdw %%mm3, %%mm2 \n"
|
|
||||||
"movq %%mm0, %%mm1 \n"
|
|
||||||
"punpcklwd %%mm2, %%mm0 \n"
|
|
||||||
"punpckhwd %%mm2, %%mm1 \n"
|
|
||||||
"movq %%mm0, (%1,%0)\n"
|
|
||||||
"movq %%mm1, 8(%1,%0)\n"
|
|
||||||
"add $16, %0 \n"
|
|
||||||
"js 1b \n"
|
|
||||||
"femms \n"
|
|
||||||
)
|
|
||||||
|
|
||||||
FLOAT_TO_INT16_INTERLEAVE(sse,
|
|
||||||
"1: \n"
|
|
||||||
"cvtps2pi (%2,%0), %%mm0 \n"
|
|
||||||
"cvtps2pi 8(%2,%0), %%mm1 \n"
|
|
||||||
"cvtps2pi (%3,%0), %%mm2 \n"
|
|
||||||
"cvtps2pi 8(%3,%0), %%mm3 \n"
|
|
||||||
"packssdw %%mm1, %%mm0 \n"
|
|
||||||
"packssdw %%mm3, %%mm2 \n"
|
|
||||||
"movq %%mm0, %%mm1 \n"
|
|
||||||
"punpcklwd %%mm2, %%mm0 \n"
|
|
||||||
"punpckhwd %%mm2, %%mm1 \n"
|
|
||||||
"movq %%mm0, (%1,%0)\n"
|
|
||||||
"movq %%mm1, 8(%1,%0)\n"
|
|
||||||
"add $16, %0 \n"
|
|
||||||
"js 1b \n"
|
|
||||||
"emms \n"
|
|
||||||
)
|
|
||||||
|
|
||||||
FLOAT_TO_INT16_INTERLEAVE(sse2,
|
|
||||||
"1: \n"
|
|
||||||
"cvtps2dq (%2,%0), %%xmm0 \n"
|
|
||||||
"cvtps2dq (%3,%0), %%xmm1 \n"
|
|
||||||
"packssdw %%xmm1, %%xmm0 \n"
|
|
||||||
"movhlps %%xmm0, %%xmm1 \n"
|
|
||||||
"punpcklwd %%xmm1, %%xmm0 \n"
|
|
||||||
"movdqa %%xmm0, (%1,%0) \n"
|
|
||||||
"add $16, %0 \n"
|
|
||||||
"js 1b \n"
|
|
||||||
)
|
|
||||||
|
|
||||||
static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){
|
static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){
|
||||||
if(channels==6)
|
if(channels==6)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user