diff --git a/postproc/rgb2rgb_template.c b/postproc/rgb2rgb_template.c index d611debca2..d03493ca31 100644 --- a/postproc/rgb2rgb_template.c +++ b/postproc/rgb2rgb_template.c @@ -12,6 +12,8 @@ #include #include /* for __WORDSIZE */ +#include "asmalign.h" + #ifndef __WORDSIZE // #warning You have misconfigured system and probably will lose performance! #define __WORDSIZE MP_WORDSIZE @@ -40,9 +42,14 @@ #define PREFETCHW "prefetcht0" #define PAVGB "pavgb" #else +#ifdef __APPLE__ +#define PREFETCH "#" +#define PREFETCHW "#" +#elif #define PREFETCH "/nop" #define PREFETCHW "/nop" #endif +#endif #ifdef HAVE_3DNOW /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ @@ -56,8 +63,12 @@ #define SFENCE "sfence" #else #define MOVNTQ "movq" +#ifdef __APPLE__ +#define SFENCE "#" +#elif #define SFENCE "/nop" #endif +#endif static inline void RENAME(rgb24to32)(const uint8_t *src,uint8_t *dst,long src_size) { @@ -332,7 +343,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_ "movq %3, %%mm5 \n\t" "movq %4, %%mm6 \n\t" "movq %5, %%mm7 \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 32(%1) \n\t" "movd (%1), %%mm0 \n\t" @@ -489,7 +500,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_ "movq %3, %%mm5 \n\t" "movq %4, %%mm6 \n\t" "movq %5, %%mm7 \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 32(%1) \n\t" "movd (%1), %%mm0 \n\t" @@ -1344,7 +1355,7 @@ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long s /* TODO: unroll this loop */ asm volatile ( "xor %%"REG_a", %%"REG_a" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 32(%0, %%"REG_a") \n\t" "movq (%0, %%"REG_a"), %%mm0 \n\t" @@ -1394,7 +1405,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s "movq "MANGLE(mask24r)", %%mm5 \n\t" "movq "MANGLE(mask24g)", %%mm6 \n\t" "movq "MANGLE(mask24b)", %%mm7 \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 32(%1, %%"REG_a") \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG @@ -1464,7 +1475,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) asm volatile( "xor %%"REG_a", %%"REG_a" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 32(%1, %%"REG_a", 2) \n\t" PREFETCH" 32(%2, %%"REG_a") \n\t" @@ -1617,7 +1628,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) asm volatile( "xor %%"REG_a", %%"REG_a" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 32(%1, %%"REG_a", 2) \n\t" PREFETCH" 32(%2, %%"REG_a") \n\t" @@ -1741,7 +1752,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t "xor %%"REG_a", %%"REG_a" \n\t" "pcmpeqw %%mm7, %%mm7 \n\t" "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 64(%0, %%"REG_a", 4) \n\t" "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) @@ -1794,7 +1805,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t asm volatile( "xor %%"REG_a", %%"REG_a" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 64(%0, %%"REG_a", 4) \n\t" "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) @@ -1979,7 +1990,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t "xorl %%eax, %%eax \n\t" "pcmpeqw %%mm7, %%mm7 \n\t" "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 64(%0, %%eax, 4) \n\t" "movq (%0, %%eax, 4), %%mm0 \n\t" // UYVY UYVY(0) @@ -2032,7 +2043,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t asm volatile( "xorl %%eax, %%eax \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 64(%0, %%eax, 4) \n\t" "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0) @@ -2110,7 +2121,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "movq "MANGLE(w1111)", %%mm5 \n\t" "pxor %%mm7, %%mm7 \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 64(%0, %%"REG_b") \n\t" "movd (%0, %%"REG_b"), %%mm0 \n\t" @@ -2184,7 +2195,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "pxor %%mm7, %%mm7 \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t" "add %%"REG_b", %%"REG_b" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 64(%0, %%"REG_b") \n\t" PREFETCH" 64(%1, %%"REG_b") \n\t" diff --git a/postproc/swscale_template.c b/postproc/swscale_template.c index 98828a9136..84af160903 100644 --- a/postproc/swscale_template.c +++ b/postproc/swscale_template.c @@ -16,6 +16,8 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "asmalign.h" + #undef REAL_MOVNTQ #undef MOVNTQ #undef PAVGB @@ -71,7 +73,7 @@ "movq %%mm3, %%mm4 \n\t"\ "lea " offset "(%0), %%"REG_d" \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\ - ".balign 16 \n\t" /* FIXME Unroll? */\ + ASMALIGN16 /* FIXME Unroll? */\ "1: \n\t"\ "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\ @@ -98,7 +100,7 @@ #define YSCALEYUV2YV121 \ "mov %2, %%"REG_a" \n\t"\ - ".balign 16 \n\t" /* FIXME Unroll? */\ + ASMALIGN16 /* FIXME Unroll? */\ "1: \n\t"\ "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\ "movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\ @@ -118,14 +120,14 @@ */ #define YSCALEYUV2PACKEDX \ "xor %%"REG_a", %%"REG_a" \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "nop \n\t"\ "1: \n\t"\ "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\ "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\ "movq %%mm3, %%mm4 \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "2: \n\t"\ "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\ @@ -143,7 +145,7 @@ "mov (%%"REG_d"), %%"REG_S" \n\t"\ "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\ "movq %%mm1, %%mm7 \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "2: \n\t"\ "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ "movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\ @@ -205,7 +207,7 @@ "punpcklwd %%mm5, %%mm5 \n\t"\ "punpcklwd %%mm5, %%mm5 \n\t"\ "xor %%"REG_a", %%"REG_a" \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "1: \n\t"\ "movq (%0, %%"REG_a", 2), %%mm0 \n\t" /*buf0[eax]*/\ "movq (%1, %%"REG_a", 2), %%mm1 \n\t" /*buf1[eax]*/\ @@ -258,7 +260,7 @@ "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\ "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\ "xor "#index", "#index" \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "1: \n\t"\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ @@ -290,7 +292,7 @@ #define REAL_YSCALEYUV2RGB(index, c) \ "xor "#index", "#index" \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "1: \n\t"\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ @@ -356,7 +358,7 @@ #define REAL_YSCALEYUV2PACKED1(index, c) \ "xor "#index", "#index" \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "1: \n\t"\ "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ @@ -371,7 +373,7 @@ #define REAL_YSCALEYUV2RGB1(index, c) \ "xor "#index", "#index" \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "1: \n\t"\ "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ @@ -420,7 +422,7 @@ #define REAL_YSCALEYUV2PACKED1b(index, c) \ "xor "#index", "#index" \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "1: \n\t"\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ @@ -439,7 +441,7 @@ // do vertical chrominance interpolation #define REAL_YSCALEYUV2RGB1b(index, c) \ "xor "#index", "#index" \n\t"\ - ".balign 16 \n\t"\ + ASMALIGN16\ "1: \n\t"\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ @@ -1662,7 +1664,7 @@ static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width) "movq "MANGLE(w1111)", %%mm5 \n\t" "pxor %%mm7, %%mm7 \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 64(%0, %%"REG_b") \n\t" "movd (%0, %%"REG_b"), %%mm0 \n\t" @@ -1748,7 +1750,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 "pxor %%mm7, %%mm7 \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_b" \n\t" "add %%"REG_b", %%"REG_b" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" PREFETCH" 64(%0, %%"REG_b") \n\t" PREFETCH" 64(%1, %%"REG_b") \n\t" @@ -2055,7 +2057,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW "movq "MANGLE(w02)", %%mm6 \n\t" "push %%"REG_BP" \n\t" // we use 7 regs here ... "mov %%"REG_a", %%"REG_BP" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movzwl (%2, %%"REG_BP"), %%eax \n\t" "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t" @@ -2093,7 +2095,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW "movq "MANGLE(w02)", %%mm6 \n\t" "push %%"REG_BP" \n\t" // we use 7 regs here ... "mov %%"REG_a", %%"REG_BP" \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movzwl (%2, %%"REG_BP"), %%eax \n\t" "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t" @@ -2142,7 +2144,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW asm volatile( "pxor %%mm7, %%mm7 \n\t" "movq "MANGLE(w02)", %%mm6 \n\t" - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "mov %2, %%"REG_c" \n\t" "movzwl (%%"REG_c", %0), %%eax \n\t" @@ -2326,7 +2328,7 @@ FUNNY_Y_CODE "xor %%"REG_a", %%"REG_a" \n\t" // i "xor %%"REG_b", %%"REG_b" \n\t" // xx "xorl %%ecx, %%ecx \n\t" // 2*xalpha - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "movzbl (%0, %%"REG_b"), %%edi \n\t" //src[xx] "movzbl 1(%0, %%"REG_b"), %%esi \n\t" //src[xx+1] @@ -2523,7 +2525,7 @@ FUNNY_UV_CODE "xor %%"REG_a", %%"REG_a" \n\t" // i "xor %%"REG_b", %%"REG_b" \n\t" // xx "xorl %%ecx, %%ecx \n\t" // 2*xalpha - ".balign 16 \n\t" + ASMALIGN16 "1: \n\t" "mov %0, %%"REG_S" \n\t" "movzbl (%%"REG_S", %%"REG_b"), %%edi \n\t" //src[xx]