x86/swr: replace sse4 instructions in pack_6ch with sse ones
There's no benefit from using blendps here except on CPUs with AVX, where it's faster than shufps according to Intel's documentation. As such, rename the sse4 functions to sse/sse2 and use shufps instead. Reviewed-by: Michael Niedermayer <michaelni@gmx.at> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
93ab6693d8
commit
b385c4c6a3
@ -245,15 +245,27 @@ pack_6ch_%2_to_%1_u_int %+ SUFFIX
|
||||
mov%3 m4, [srcq+src4q]
|
||||
mov%3 m5, [srcq+src5q]
|
||||
%7 x,x,x,x,m7,x
|
||||
%if cpuflag(sse4)
|
||||
%if cpuflag(sse)
|
||||
SBUTTERFLYPS 0, 1, 6
|
||||
SBUTTERFLYPS 2, 3, 6
|
||||
SBUTTERFLYPS 4, 5, 6
|
||||
|
||||
%if cpuflag(avx)
|
||||
blendps m6, m4, m0, 1100b
|
||||
%else
|
||||
movaps m6, m4
|
||||
shufps m4, m0, q3210
|
||||
SWAP 4,6
|
||||
%endif
|
||||
movlhps m0, m2
|
||||
movhlps m4, m2
|
||||
%if cpuflag(avx)
|
||||
blendps m2, m5, m1, 1100b
|
||||
%else
|
||||
movaps m2, m5
|
||||
shufps m5, m1, q3210
|
||||
SWAP 2,5
|
||||
%endif
|
||||
movlhps m1, m3
|
||||
movhlps m5, m3
|
||||
|
||||
@ -380,6 +392,10 @@ CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
|
||||
PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
|
||||
PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
|
||||
|
||||
INIT_XMM sse
|
||||
PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
|
||||
PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
|
||||
|
||||
INIT_XMM sse2
|
||||
CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
|
||||
CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
|
||||
@ -431,6 +447,10 @@ UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
|
||||
UNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
|
||||
UNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
|
||||
|
||||
PACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
|
||||
PACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
|
||||
PACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
|
||||
PACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
|
||||
|
||||
INIT_XMM ssse3
|
||||
UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
|
||||
@ -440,15 +460,6 @@ UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
|
||||
UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
|
||||
UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
|
||||
|
||||
INIT_XMM sse4
|
||||
PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
|
||||
PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
|
||||
|
||||
PACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
|
||||
PACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
|
||||
PACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
|
||||
PACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
|
||||
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_XMM avx
|
||||
PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
|
||||
|
@ -58,7 +58,12 @@ MULTI_CAPS_FUNC(SSE2, sse2)
|
||||
ac->simd_f = ff_pack_6ch_float_to_float_a_mmx;
|
||||
}
|
||||
}
|
||||
|
||||
if(EXTERNAL_SSE(mm_flags)) {
|
||||
if(channels == 6) {
|
||||
if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
|
||||
ac->simd_f = ff_pack_6ch_float_to_float_a_sse;
|
||||
}
|
||||
}
|
||||
if(EXTERNAL_SSE2(mm_flags)) {
|
||||
if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
|
||||
ac->simd_f = ff_int32_to_float_a_sse2;
|
||||
@ -105,6 +110,12 @@ MULTI_CAPS_FUNC(SSE2, sse2)
|
||||
if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLT)
|
||||
ac->simd_f = ff_unpack_2ch_float_to_int16_a_sse2;
|
||||
}
|
||||
if(channels == 6) {
|
||||
if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P)
|
||||
ac->simd_f = ff_pack_6ch_int32_to_float_a_sse2;
|
||||
if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP)
|
||||
ac->simd_f = ff_pack_6ch_float_to_int32_a_sse2;
|
||||
}
|
||||
}
|
||||
if(EXTERNAL_SSSE3(mm_flags)) {
|
||||
if(channels == 2) {
|
||||
@ -116,16 +127,6 @@ MULTI_CAPS_FUNC(SSE2, sse2)
|
||||
ac->simd_f = ff_unpack_2ch_int16_to_float_a_ssse3;
|
||||
}
|
||||
}
|
||||
if(EXTERNAL_SSE4(mm_flags)) {
|
||||
if(channels == 6) {
|
||||
if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
|
||||
ac->simd_f = ff_pack_6ch_float_to_float_a_sse4;
|
||||
if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P)
|
||||
ac->simd_f = ff_pack_6ch_int32_to_float_a_sse4;
|
||||
if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP)
|
||||
ac->simd_f = ff_pack_6ch_float_to_int32_a_sse4;
|
||||
}
|
||||
}
|
||||
if(EXTERNAL_AVX(mm_flags)) {
|
||||
if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
|
||||
ac->simd_f = ff_int32_to_float_a_avx;
|
||||
|
Loading…
Reference in New Issue
Block a user