From a9a07762b8c89c7a1136115fe5da4cb9cfc22356 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Wed, 21 May 2003 17:50:57 +0000 Subject: [PATCH] altivec gcc-3 fixes by (Magnus Damm ) Originally committed as revision 1896 to svn://svn.ffmpeg.org/ffmpeg/trunk --- configure | 31 +++++++++++ libavcodec/ppc/dsputil_altivec.c | 18 +++---- libavcodec/ppc/fft_altivec.c | 2 + libavcodec/ppc/gcc_fixes.h | 85 ++++++++++++++++++++++++++++++ libavcodec/ppc/gmc_altivec.c | 2 + libavcodec/ppc/idct_altivec.c | 24 +++------ libavcodec/ppc/mpegvideo_altivec.c | 5 +- 7 files changed, 140 insertions(+), 27 deletions(-) create mode 100644 libavcodec/ppc/gcc_fixes.h diff --git a/configure b/configure index c02d43e43d..67b9fce2f2 100755 --- a/configure +++ b/configure @@ -343,8 +343,29 @@ if test $altivec = "default"; then fi fi +# See if we have +cat > $TMPC << EOF +#include +int main( void ) { return 0; } +EOF + +_altivec_h="no" +if $cc -o $TMPE $TMPC 2> /dev/null ; then +_altivec_h="yes" +fi + # See does our compiler support Motorola AltiVec C API if test $altivec = "yes"; then +if test $_altivec_h = "yes"; then +cat > $TMPC << EOF +#include +int main(void) { + vector signed int v1, v2, v3; + v1 = vec_add(v2,v3); + return 0; +} +EOF +else cat > $TMPC << EOF int main(void) { vector signed int v1, v2, v3; @@ -352,7 +373,12 @@ int main(void) { return 0; } EOF +fi +if test "$darwin" = "yes"; then $cc -o $TMPE $TMPC -faltivec 2> /dev/null || altivec="no" +else +$cc -o $TMPE $TMPC -maltivec -mabi=altivec 2> /dev/null || altivec="no" +fi fi # Can only do mmi on mips @@ -742,6 +768,11 @@ if test "$altivec" = "yes" ; then echo "#define HAVE_ALTIVEC 1" >> $TMPH echo "// Enable the next line to use the reference C code instead of AltiVec" >> $TMPH echo "// #define ALTIVEC_USE_REFERENCE_C_CODE 1" >> $TMPH + if test "$_altivec_h" = "yes" ; then + echo "#define HAVE_ALTIVEC_H 1" >> $TMPH + else + echo "#undef HAVE_ALTIVEC_H 1" >> $TMPH + fi fi if test "$gprof" = "yes" ; then echo "TARGET_GPROF=yes" >> config.mak diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c index d48332ba97..32e881b703 100644 --- a/libavcodec/ppc/dsputil_altivec.c +++ b/libavcodec/ppc/dsputil_altivec.c @@ -19,6 +19,9 @@ */ #include "../dsputil.h" + +#include "gcc_fixes.h" + #include "dsputil_altivec.h" #ifdef CONFIG_DARWIN @@ -303,11 +306,8 @@ int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) vector signed int sumdiffs; sad = (vector unsigned int)vec_splat_u32(0); -#ifdef CONFIG_DARWIN - permclear = (vector unsigned char)(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); -#else - permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0}; -#endif + + permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); for(i=0;i<8;i++) { /* Read potentially unaligned pixels into t1 and t2 @@ -387,11 +387,9 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size) vector signed int sumsqr; sum = (vector unsigned int)vec_splat_u32(0); -#ifdef CONFIG_DARWIN - permclear = (vector unsigned char)(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); -#else - permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0}; -#endif + + permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0); + for(i=0;i<8;i++) { /* Read potentially unaligned pixels into t1 and t2 diff --git a/libavcodec/ppc/fft_altivec.c b/libavcodec/ppc/fft_altivec.c index 992be5b8ed..75c95bb879 100644 --- a/libavcodec/ppc/fft_altivec.c +++ b/libavcodec/ppc/fft_altivec.c @@ -20,6 +20,8 @@ */ #include "../dsputil.h" +#include "gcc_fixes.h" + #include "dsputil_altivec.h" /* diff --git a/libavcodec/ppc/gcc_fixes.h b/libavcodec/ppc/gcc_fixes.h new file mode 100644 index 0000000000..855a5b4f7b --- /dev/null +++ b/libavcodec/ppc/gcc_fixes.h @@ -0,0 +1,85 @@ +/* + * gcc fixes for altivec. + * Used to workaround broken gcc (FSF gcc-3 pre gcc-3.3) + * and to stay somewhat compatible with Darwin. + */ + +#ifndef _GCC_FIXES_ +#define _GCC_FIXES_ + +#ifdef HAVE_ALTIVEC_H +#include +#endif + +#ifdef CONFIG_DARWIN +#define AVV(x...) (x) +#else +#define AVV(x...) {x} + +#if (__GNUC__ * 100 + __GNUC_MINOR__ < 303) + +/* This code was provided to me by Bartosch Pixa + * as a separate header file (broken_mergel.h). + * thanks to lu_zero for the workaround. + * + * See this mail for more information: + * http://gcc.gnu.org/ml/gcc/2003-04/msg00967.html + */ + +static inline vector signed char my_vmrglb (vector signed char const A, + vector signed char const B) +{ + static const vector unsigned char lowbyte = { + 0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b, 0x1b, + 0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f + }; + return vec_perm (A, B, lowbyte); +} + +static inline vector signed short my_vmrglh (vector signed short const A, + vector signed short const B) +{ + static const vector unsigned char lowhalf = { + 0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b, + 0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f + }; + return vec_perm (A, B, lowhalf); +} + +static inline vector signed int my_vmrglw (vector signed int const A, + vector signed int const B) +{ + static const vector unsigned char lowword = { + 0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b, + 0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f + }; + return vec_perm (A, B, lowword); +} +/*#define my_vmrglb my_vmrglb +#define my_vmrglh my_vmrglh +#define my_vmrglw my_vmrglw +*/ +#undef vec_mergel + +#define vec_mergel(a1, a2) \ +__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \ + ((vector signed char) my_vmrglb ((vector signed char) (a1), (vector signed char) (a2))), \ +__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \ + ((vector unsigned char) my_vmrglb ((vector signed char) (a1), (vector signed char) (a2))), \ +__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \ + ((vector signed short) my_vmrglh ((vector signed short) (a1), (vector signed short) (a2))), \ +__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \ + ((vector unsigned short) my_vmrglh ((vector signed short) (a1), (vector signed short) (a2))), \ +__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \ + ((vector float) my_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \ +__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \ + ((vector signed int) my_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \ +__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \ + ((vector unsigned int) my_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \ + __altivec_link_error_invalid_argument ()))))))) + +#endif + +#endif /* CONFIG_DARWIN */ + +#endif /* _GCC_FIXES_ */ diff --git a/libavcodec/ppc/gmc_altivec.c b/libavcodec/ppc/gmc_altivec.c index 515a766d77..18d52bbc52 100644 --- a/libavcodec/ppc/gmc_altivec.c +++ b/libavcodec/ppc/gmc_altivec.c @@ -20,6 +20,8 @@ #include "../dsputil.h" +#include "gcc_fixes.h" + #include "dsputil_altivec.h" /* diff --git a/libavcodec/ppc/idct_altivec.c b/libavcodec/ppc/idct_altivec.c index 13df78f32d..f8a8aa6787 100644 --- a/libavcodec/ppc/idct_altivec.c +++ b/libavcodec/ppc/idct_altivec.c @@ -38,6 +38,9 @@ #include /* malloc(), free() */ #include #include "../dsputil.h" + +#include "gcc_fixes.h" + #include "dsputil_altivec.h" #define vector_s16_t vector signed short @@ -152,24 +155,13 @@ vx7 = vec_sra (vy7, shift); -#ifdef CONFIG_DARWIN static const vector_s16_t constants[5] = { - (vector_s16_t)(23170, 13573, 6518, 21895, -23170, -21895, 32, 31), - (vector_s16_t)(16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725), - (vector_s16_t)(22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521), - (vector_s16_t)(21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692), - (vector_s16_t)(19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722) + (vector_s16_t) AVV(23170, 13573, 6518, 21895, -23170, -21895, 32, 31), + (vector_s16_t) AVV(16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725), + (vector_s16_t) AVV(22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521), + (vector_s16_t) AVV(21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692), + (vector_s16_t) AVV(19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722) }; -#else -// broken gcc -static const vector_s16_t constants[5] = { - (vector_s16_t){23170, 13573, 6518, 21895, -23170, -21895, 32, 31}, - (vector_s16_t){16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725}, - (vector_s16_t){22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521}, - (vector_s16_t){21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692}, - (vector_s16_t){19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722} -}; -#endif void idct_put_altivec(uint8_t* dest, int stride, vector_s16_t* block) { diff --git a/libavcodec/ppc/mpegvideo_altivec.c b/libavcodec/ppc/mpegvideo_altivec.c index 95558a6be4..bbf9c44335 100644 --- a/libavcodec/ppc/mpegvideo_altivec.c +++ b/libavcodec/ppc/mpegvideo_altivec.c @@ -15,11 +15,14 @@ * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - + #include #include #include "../dsputil.h" #include "../mpegvideo.h" + +#include "gcc_fixes.h" + #include "dsputil_altivec.h" // Swaps two variables (used for altivec registers)