x86: Use consistent 3dnowext function and macro name suffixes

Currently there is a wild mix of 3dn2/3dnow2/3dnowext.  Switching to
"3dnowext", which is a more common name of the CPU flag, as reported
e.g. by the Linux kernel, unifies this.
This commit is contained in:
Diego Biurrun 2012-08-01 15:31:43 +02:00
parent d3e0766fc0
commit ca844b7be9
7 changed files with 46 additions and 44 deletions

View File

@ -2358,9 +2358,9 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2],
} }
#if HAVE_6REGS #if HAVE_6REGS
static void vector_fmul_window_3dnow2(float *dst, const float *src0, static void vector_fmul_window_3dnowext(float *dst, const float *src0,
const float *src1, const float *win, const float *src1, const float *win,
int len) int len)
{ {
x86_reg i = -len * 4; x86_reg i = -len * 4;
x86_reg j = len * 4 - 8; x86_reg j = len * 4 - 8;
@ -2809,11 +2809,11 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
#endif #endif
} }
static void dsputil_init_3dnow2(DSPContext *c, AVCodecContext *avctx, static void dsputil_init_3dnowext(DSPContext *c, AVCodecContext *avctx,
int mm_flags) int mm_flags)
{ {
#if HAVE_6REGS && HAVE_INLINE_ASM #if HAVE_6REGS && HAVE_INLINE_ASM
c->vector_fmul_window = vector_fmul_window_3dnow2; c->vector_fmul_window = vector_fmul_window_3dnowext;
#endif #endif
} }
@ -3051,7 +3051,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
dsputil_init_3dnow(c, avctx, mm_flags); dsputil_init_3dnow(c, avctx, mm_flags);
if (mm_flags & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) if (mm_flags & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT)
dsputil_init_3dnow2(c, avctx, mm_flags); dsputil_init_3dnowext(c, avctx, mm_flags);
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE)
dsputil_init_sse(c, avctx, mm_flags); dsputil_init_sse(c, avctx, mm_flags);

View File

@ -34,9 +34,9 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
} }
if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) { if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) {
/* 3DNowEx for K7 */ /* 3DNowEx for K7 */
s->imdct_calc = ff_imdct_calc_3dnow2; s->imdct_calc = ff_imdct_calc_3dnowext;
s->imdct_half = ff_imdct_half_3dnow2; s->imdct_half = ff_imdct_half_3dnowext;
s->fft_calc = ff_fft_calc_3dnow2; s->fft_calc = ff_fft_calc_3dnowext;
} }
#endif #endif
if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) { if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) {

View File

@ -25,12 +25,12 @@ void ff_fft_permute_sse(FFTContext *s, FFTComplex *z);
void ff_fft_calc_avx(FFTContext *s, FFTComplex *z); void ff_fft_calc_avx(FFTContext *s, FFTComplex *z);
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
void ff_fft_calc_3dnow(FFTContext *s, FFTComplex *z); void ff_fft_calc_3dnow(FFTContext *s, FFTComplex *z);
void ff_fft_calc_3dnow2(FFTContext *s, FFTComplex *z); void ff_fft_calc_3dnowext(FFTContext *s, FFTComplex *z);
void ff_imdct_calc_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_calc_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_calc_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_calc_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input);

View File

@ -93,14 +93,14 @@ cextern cos_ %+ i
SECTION_TEXT SECTION_TEXT
%macro T2_3DN 4 ; z0, z1, mem0, mem1 %macro T2_3DNOW 4 ; z0, z1, mem0, mem1
mova %1, %3 mova %1, %3
mova %2, %1 mova %2, %1
pfadd %1, %4 pfadd %1, %4
pfsub %2, %4 pfsub %2, %4
%endmacro %endmacro
%macro T4_3DN 6 ; z0, z1, z2, z3, tmp0, tmp1 %macro T4_3DNOW 6 ; z0, z1, z2, z3, tmp0, tmp1
mova %5, %3 mova %5, %3
pfsub %3, %4 pfsub %3, %4
pfadd %5, %4 ; {t6,t5} pfadd %5, %4 ; {t6,t5}
@ -444,13 +444,13 @@ fft16_sse:
ret ret
%macro FFT48_3DN 0 %macro FFT48_3DNOW 0
align 16 align 16
fft4 %+ SUFFIX: fft4 %+ SUFFIX:
T2_3DN m0, m1, Z(0), Z(1) T2_3DNOW m0, m1, Z(0), Z(1)
mova m2, Z(2) mova m2, Z(2)
mova m3, Z(3) mova m3, Z(3)
T4_3DN m0, m1, m2, m3, m4, m5 T4_3DNOW m0, m1, m2, m3, m4, m5
PUNPCK m0, m1, m4 PUNPCK m0, m1, m4
PUNPCK m2, m3, m5 PUNPCK m2, m3, m5
mova Z(0), m0 mova Z(0), m0
@ -461,14 +461,14 @@ fft4 %+ SUFFIX:
align 16 align 16
fft8 %+ SUFFIX: fft8 %+ SUFFIX:
T2_3DN m0, m1, Z(0), Z(1) T2_3DNOW m0, m1, Z(0), Z(1)
mova m2, Z(2) mova m2, Z(2)
mova m3, Z(3) mova m3, Z(3)
T4_3DN m0, m1, m2, m3, m4, m5 T4_3DNOW m0, m1, m2, m3, m4, m5
mova Z(0), m0 mova Z(0), m0
mova Z(2), m2 mova Z(2), m2
T2_3DN m4, m5, Z(4), Z(5) T2_3DNOW m4, m5, Z(4), Z(5)
T2_3DN m6, m7, Z2(6), Z2(7) T2_3DNOW m6, m7, Z2(6), Z2(7)
PSWAPD m0, m5 PSWAPD m0, m5
PSWAPD m2, m7 PSWAPD m2, m7
pxor m0, [ps_m1p1] pxor m0, [ps_m1p1]
@ -477,12 +477,12 @@ fft8 %+ SUFFIX:
pfadd m7, m2 pfadd m7, m2
pfmul m5, [ps_root2] pfmul m5, [ps_root2]
pfmul m7, [ps_root2] pfmul m7, [ps_root2]
T4_3DN m1, m3, m5, m7, m0, m2 T4_3DNOW m1, m3, m5, m7, m0, m2
mova Z(5), m5 mova Z(5), m5
mova Z2(7), m7 mova Z2(7), m7
mova m0, Z(0) mova m0, Z(0)
mova m2, Z(2) mova m2, Z(2)
T4_3DN m0, m2, m4, m6, m5, m7 T4_3DNOW m0, m2, m4, m6, m5, m7
PUNPCK m0, m1, m5 PUNPCK m0, m1, m5
PUNPCK m2, m3, m7 PUNPCK m2, m3, m7
mova Z(0), m0 mova Z(0), m0
@ -500,7 +500,7 @@ fft8 %+ SUFFIX:
%if ARCH_X86_32 %if ARCH_X86_32
%macro PSWAPD 2 %macro PSWAPD 2
%if cpuflag(3dnow2) %if cpuflag(3dnowext)
pswapd %1, %2 pswapd %1, %2
%elifidn %1, %2 %elifidn %1, %2
movd [r0+12], %1 movd [r0+12], %1
@ -512,11 +512,11 @@ fft8 %+ SUFFIX:
%endif %endif
%endmacro %endmacro
INIT_MMX 3dnow2 INIT_MMX 3dnowext
FFT48_3DN FFT48_3DNOW
INIT_MMX 3dnow INIT_MMX 3dnow
FFT48_3DN FFT48_3DNOW
%endif %endif
%define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)] %define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)]
@ -633,7 +633,7 @@ cglobal fft_calc, 2,5,8
%if ARCH_X86_32 %if ARCH_X86_32
INIT_MMX 3dnow INIT_MMX 3dnow
FFT_CALC_FUNC FFT_CALC_FUNC
INIT_MMX 3dnow2 INIT_MMX 3dnowext
FFT_CALC_FUNC FFT_CALC_FUNC
%endif %endif
INIT_XMM sse INIT_XMM sse
@ -727,7 +727,7 @@ cglobal imdct_calc, 3,5,3
%if ARCH_X86_32 %if ARCH_X86_32
INIT_MMX 3dnow INIT_MMX 3dnow
IMDCT_CALC_FUNC IMDCT_CALC_FUNC
INIT_MMX 3dnow2 INIT_MMX 3dnowext
IMDCT_CALC_FUNC IMDCT_CALC_FUNC
%endif %endif
@ -743,8 +743,8 @@ INIT_MMX 3dnow
%define unpckhps punpckhdq %define unpckhps punpckhdq
DECL_PASS pass_3dnow, PASS_SMALL 1, [wq], [wq+o1q] DECL_PASS pass_3dnow, PASS_SMALL 1, [wq], [wq+o1q]
DECL_PASS pass_interleave_3dnow, PASS_BIG 0 DECL_PASS pass_interleave_3dnow, PASS_BIG 0
%define pass_3dnow2 pass_3dnow %define pass_3dnowext pass_3dnow
%define pass_interleave_3dnow2 pass_interleave_3dnow %define pass_interleave_3dnowext pass_interleave_3dnow
%endif %endif
%ifdef PIC %ifdef PIC
@ -813,7 +813,7 @@ DECL_FFT 5, _interleave
INIT_MMX 3dnow INIT_MMX 3dnow
DECL_FFT 4 DECL_FFT 4
DECL_FFT 4, _interleave DECL_FFT 4, _interleave
INIT_MMX 3dnow2 INIT_MMX 3dnowext
DECL_FFT 4 DECL_FFT 4
DECL_FFT 4, _interleave DECL_FFT 4, _interleave
%endif %endif
@ -845,7 +845,7 @@ INIT_XMM sse
PSWAPD m5, m3 PSWAPD m5, m3
pfmul m2, m3 pfmul m2, m3
pfmul m6, m5 pfmul m6, m5
%if cpuflag(3dnow2) %if cpuflag(3dnowext)
pfpnacc m0, m4 pfpnacc m0, m4
pfpnacc m2, m6 pfpnacc m2, m6
%else %else
@ -1018,7 +1018,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
xor r4, r4 xor r4, r4
sub r4, r3 sub r4, r3
%endif %endif
%if notcpuflag(3dnow2) && mmsize == 8 %if notcpuflag(3dnowext) && mmsize == 8
movd m7, [ps_m1m1m1m1] movd m7, [ps_m1m1m1m1]
%endif %endif
.pre: .pre:
@ -1102,7 +1102,7 @@ DECL_IMDCT POSROTATESHUF
INIT_MMX 3dnow INIT_MMX 3dnow
DECL_IMDCT POSROTATESHUF_3DNOW DECL_IMDCT POSROTATESHUF_3DNOW
INIT_MMX 3dnow2 INIT_MMX 3dnowext
DECL_IMDCT POSROTATESHUF_3DNOW DECL_IMDCT POSROTATESHUF_3DNOW
%endif %endif

View File

@ -249,7 +249,7 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2
%macro PSWAPD_SSE 2 %macro PSWAPD_SSE 2
pshufw %1, %2, 0x4e pshufw %1, %2, 0x4e
%endmacro %endmacro
%macro PSWAPD_3DN1 2 %macro PSWAPD_3DNOW 2
movq %1, %2 movq %1, %2
psrlq %1, 32 psrlq %1, 32
punpckldq %1, %2 punpckldq %1, %2
@ -306,10 +306,10 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4,
%define pswapd PSWAPD_SSE %define pswapd PSWAPD_SSE
FLOAT_TO_INT16_INTERLEAVE6 sse FLOAT_TO_INT16_INTERLEAVE6 sse
%define cvtps2pi pf2id %define cvtps2pi pf2id
%define pswapd PSWAPD_3DN1 %define pswapd PSWAPD_3DNOW
FLOAT_TO_INT16_INTERLEAVE6 3dnow FLOAT_TO_INT16_INTERLEAVE6 3dnow
%undef pswapd %undef pswapd
FLOAT_TO_INT16_INTERLEAVE6 3dn2 FLOAT_TO_INT16_INTERLEAVE6 3dnowext
%undef cvtps2pi %undef cvtps2pi
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------

View File

@ -46,7 +46,7 @@ void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long l
void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); void ff_float_to_int16_interleave6_3dnowext(int16_t *dst, const float **src, int len);
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
@ -74,9 +74,11 @@ FLOAT_TO_INT16_INTERLEAVE(3dnow)
FLOAT_TO_INT16_INTERLEAVE(sse) FLOAT_TO_INT16_INTERLEAVE(sse)
FLOAT_TO_INT16_INTERLEAVE(sse2) FLOAT_TO_INT16_INTERLEAVE(sse2)
static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){ static void float_to_int16_interleave_3dnowext(int16_t *dst, const float **src,
long len, int channels)
{
if(channels==6) if(channels==6)
ff_float_to_int16_interleave6_3dn2(dst, src, len); ff_float_to_int16_interleave6_3dnowext(dst, src, len);
else else
float_to_int16_interleave_3dnow(dst, src, len, channels); float_to_int16_interleave_3dnow(dst, src, len, channels);
} }
@ -126,7 +128,7 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
} }
if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) { if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) {
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->float_to_int16_interleave = float_to_int16_interleave_3dn2; c->float_to_int16_interleave = float_to_int16_interleave_3dnowext;
} }
} }
if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) { if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) {

View File

@ -557,7 +557,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%assign cpuflags_mmx (1<<0) %assign cpuflags_mmx (1<<0)
%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx %assign cpuflags_mmx2 (1<<1) | cpuflags_mmx
%assign cpuflags_3dnow (1<<2) | cpuflags_mmx %assign cpuflags_3dnow (1<<2) | cpuflags_mmx
%assign cpuflags_3dnow2 (1<<3) | cpuflags_3dnow %assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow
%assign cpuflags_sse (1<<4) | cpuflags_mmx2 %assign cpuflags_sse (1<<4) | cpuflags_mmx2
%assign cpuflags_sse2 (1<<5) | cpuflags_sse %assign cpuflags_sse2 (1<<5) | cpuflags_sse
%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 %assign cpuflags_sse2slow (1<<6) | cpuflags_sse2