x86: swscale: fix fragile memory accesses

To access data at multiple fixed offsets from a base address, this
code uses a single "m" operand and code of the form "32%0", relying on
the memory operand instantiation having no displacement, giving a final
result of the form "32(%rax)".  If the compiler uses a register and
displacement, e.g. "64(%rax)", the end result becomes "3264(%rax)",
which obviously does not work.

Replacing the "m" operands with "r" operands allows safe addition of a
displacement.  In theory, multiple memory operands could use a shared
base register with different index registers, "(%rax,%rbx)", potentially
making more efficient use of registers.  In the cases at hand, no such
sharing is possible since the addresses involved are entirely unrelated.

After this change, the code somewhat rudely accesses memory without
using a corresponding memory operand, which in some cases can lead to
unwanted "optimisations" of surrounding code.  However, the original
code also accesses memory not covered by a memory operand, so this is
not adding any defect not already present.  It is also hightly unlikely
that any such optimisations could be performed here since the memory
locations in questions are not accessed elsewhere in the same functions.

This fixes crashes with suncc.

Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
Mans Rullgard 2012-08-13 00:53:05 +01:00
parent 10b83cb653
commit 90540c2d5a

View File

@ -73,25 +73,24 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
__asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory");
while (s < mm_end) {
__asm__ volatile(
PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t"
"punpckldq 3%1, %%mm0 \n\t"
"movd 6%1, %%mm1 \n\t"
"punpckldq 9%1, %%mm1 \n\t"
"movd 12%1, %%mm2 \n\t"
"punpckldq 15%1, %%mm2 \n\t"
"movd 18%1, %%mm3 \n\t"
"punpckldq 21%1, %%mm3 \n\t"
PREFETCH" 32(%1) \n\t"
"movd (%1), %%mm0 \n\t"
"punpckldq 3(%1), %%mm0 \n\t"
"movd 6(%1), %%mm1 \n\t"
"punpckldq 9(%1), %%mm1 \n\t"
"movd 12(%1), %%mm2 \n\t"
"punpckldq 15(%1), %%mm2 \n\t"
"movd 18(%1), %%mm3 \n\t"
"punpckldq 21(%1), %%mm3 \n\t"
"por %%mm7, %%mm0 \n\t"
"por %%mm7, %%mm1 \n\t"
"por %%mm7, %%mm2 \n\t"
"por %%mm7, %%mm3 \n\t"
MOVNTQ" %%mm0, %0 \n\t"
MOVNTQ" %%mm1, 8%0 \n\t"
MOVNTQ" %%mm2, 16%0 \n\t"
MOVNTQ" %%mm3, 24%0"
:"=m"(*dest)
:"m"(*s)
MOVNTQ" %%mm0, (%0) \n\t"
MOVNTQ" %%mm1, 8(%0) \n\t"
MOVNTQ" %%mm2, 16(%0) \n\t"
MOVNTQ" %%mm3, 24(%0)"
:: "r"(dest), "r"(s)
:"memory");
dest += 32;
s += 24;
@ -138,9 +137,9 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
"pand "MANGLE(mask24hhhh)", %%mm5\n\t" \
"por %%mm5, %%mm4 \n\t" \
\
MOVNTQ" %%mm0, %0 \n\t" \
MOVNTQ" %%mm1, 8%0 \n\t" \
MOVNTQ" %%mm4, 16%0"
MOVNTQ" %%mm0, (%0) \n\t" \
MOVNTQ" %%mm1, 8(%0) \n\t" \
MOVNTQ" %%mm4, 16(%0)"
static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
@ -154,18 +153,17 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 31;
while (s < mm_end) {
__asm__ volatile(
PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t"
"movq 8%1, %%mm1 \n\t"
"movq 16%1, %%mm4 \n\t"
"movq 24%1, %%mm5 \n\t"
PREFETCH" 32(%1) \n\t"
"movq (%1), %%mm0 \n\t"
"movq 8(%1), %%mm1 \n\t"
"movq 16(%1), %%mm4 \n\t"
"movq 24(%1), %%mm5 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm1, %%mm3 \n\t"
"movq %%mm4, %%mm6 \n\t"
"movq %%mm5, %%mm7 \n\t"
STORE_BGR24_MMX
:"=m"(*dest)
:"m"(*s)
:: "r"(dest), "r"(s)
:"memory");
dest += 24;
s += 32;
@ -198,19 +196,18 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15;
while (s<mm_end) {
__asm__ volatile(
PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t"
"movq 8%1, %%mm2 \n\t"
PREFETCH" 32(%1) \n\t"
"movq (%1), %%mm0 \n\t"
"movq 8(%1), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"pand %%mm4, %%mm0 \n\t"
"pand %%mm4, %%mm2 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"paddw %%mm3, %%mm2 \n\t"
MOVNTQ" %%mm0, %0 \n\t"
MOVNTQ" %%mm2, 8%0"
:"=m"(*d)
:"m"(*s)
MOVNTQ" %%mm0, (%0) \n\t"
MOVNTQ" %%mm2, 8(%0)"
:: "r"(d), "r"(s)
);
d+=16;
s+=16;
@ -243,9 +240,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15;
while (s<mm_end) {
__asm__ volatile(
PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t"
"movq 8%1, %%mm2 \n\t"
PREFETCH" 32(%1) \n\t"
"movq (%1), %%mm0 \n\t"
"movq 8(%1), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"psrlq $1, %%mm0 \n\t"
@ -256,10 +253,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s
"pand %%mm6, %%mm3 \n\t"
"por %%mm1, %%mm0 \n\t"
"por %%mm3, %%mm2 \n\t"
MOVNTQ" %%mm0, %0 \n\t"
MOVNTQ" %%mm2, 8%0"
:"=m"(*d)
:"m"(*s)
MOVNTQ" %%mm0, (%0) \n\t"
MOVNTQ" %%mm2, 8(%0)"
:: "r"(d), "r"(s)
);
d+=16;
s+=16;
@ -344,11 +340,11 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 15;
while (s < mm_end) {
__asm__ volatile(
PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t"
"movd 4%1, %%mm3 \n\t"
"punpckldq 8%1, %%mm0 \n\t"
"punpckldq 12%1, %%mm3 \n\t"
PREFETCH" 32(%1) \n\t"
"movd (%1), %%mm0 \n\t"
"movd 4(%1), %%mm3 \n\t"
"punpckldq 8(%1), %%mm0 \n\t"
"punpckldq 12(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@ -371,8 +367,8 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
MOVNTQ" %%mm0, %0 \n\t"
:"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
MOVNTQ" %%mm0, (%0) \n\t"
:: "r"(d),"r"(s),"m"(blue_16mask):"memory");
d += 4;
s += 16;
}
@ -449,11 +445,11 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 15;
while (s < mm_end) {
__asm__ volatile(
PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t"
"movd 4%1, %%mm3 \n\t"
"punpckldq 8%1, %%mm0 \n\t"
"punpckldq 12%1, %%mm3 \n\t"
PREFETCH" 32(%1) \n\t"
"movd (%1), %%mm0 \n\t"
"movd 4(%1), %%mm3 \n\t"
"punpckldq 8(%1), %%mm0 \n\t"
"punpckldq 12(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@ -476,8 +472,8 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
MOVNTQ" %%mm0, %0 \n\t"
:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
MOVNTQ" %%mm0, (%0) \n\t"
::"r"(d),"r"(s),"m"(blue_15mask):"memory");
d += 4;
s += 16;
}
@ -504,11 +500,11 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 11;
while (s < mm_end) {
__asm__ volatile(
PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t"
"movd 3%1, %%mm3 \n\t"
"punpckldq 6%1, %%mm0 \n\t"
"punpckldq 9%1, %%mm3 \n\t"
PREFETCH" 32(%1) \n\t"
"movd (%1), %%mm0 \n\t"
"movd 3(%1), %%mm3 \n\t"
"punpckldq 6(%1), %%mm0 \n\t"
"punpckldq 9(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@ -531,8 +527,8 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
MOVNTQ" %%mm0, %0 \n\t"
:"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
MOVNTQ" %%mm0, (%0) \n\t"
::"r"(d),"r"(s),"m"(blue_16mask):"memory");
d += 4;
s += 12;
}
@ -561,11 +557,11 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15;
while (s < mm_end) {
__asm__ volatile(
PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t"
"movd 3%1, %%mm3 \n\t"
"punpckldq 6%1, %%mm0 \n\t"
"punpckldq 9%1, %%mm3 \n\t"
PREFETCH" 32(%1) \n\t"
"movd (%1), %%mm0 \n\t"
"movd 3(%1), %%mm3 \n\t"
"punpckldq 6(%1), %%mm0 \n\t"
"punpckldq 9(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@ -588,8 +584,8 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
MOVNTQ" %%mm0, %0 \n\t"
:"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
MOVNTQ" %%mm0, (%0) \n\t"
::"r"(d),"r"(s),"m"(blue_16mask):"memory");
d += 4;
s += 12;
}
@ -618,11 +614,11 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 11;
while (s < mm_end) {
__asm__ volatile(
PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t"
"movd 3%1, %%mm3 \n\t"
"punpckldq 6%1, %%mm0 \n\t"
"punpckldq 9%1, %%mm3 \n\t"
PREFETCH" 32(%1) \n\t"
"movd (%1), %%mm0 \n\t"
"movd 3(%1), %%mm3 \n\t"
"punpckldq 6(%1), %%mm0 \n\t"
"punpckldq 9(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@ -645,8 +641,8 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
MOVNTQ" %%mm0, %0 \n\t"
:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
MOVNTQ" %%mm0, (%0) \n\t"
::"r"(d),"r"(s),"m"(blue_15mask):"memory");
d += 4;
s += 12;
}
@ -675,11 +671,11 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15;
while (s < mm_end) {
__asm__ volatile(
PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t"
"movd 3%1, %%mm3 \n\t"
"punpckldq 6%1, %%mm0 \n\t"
"punpckldq 9%1, %%mm3 \n\t"
PREFETCH" 32(%1) \n\t"
"movd (%1), %%mm0 \n\t"
"movd 3(%1), %%mm3 \n\t"
"punpckldq 6(%1), %%mm0 \n\t"
"punpckldq 9(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm3, %%mm4 \n\t"
@ -702,8 +698,8 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
"por %%mm5, %%mm3 \n\t"
"psllq $16, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
MOVNTQ" %%mm0, %0 \n\t"
:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
MOVNTQ" %%mm0, (%0) \n\t"
::"r"(d),"r"(s),"m"(blue_15mask):"memory");
d += 4;
s += 12;
}
@ -749,10 +745,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 7;
while (s < mm_end) {
__asm__ volatile(
PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
"movq %1, %%mm2 \n\t"
PREFETCH" 32(%1) \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%1), %%mm1 \n\t"
"movq (%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@ -780,9 +776,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm0, %%mm6 \n\t"
"movq %%mm3, %%mm7 \n\t"
"movq 8%1, %%mm0 \n\t"
"movq 8%1, %%mm1 \n\t"
"movq 8%1, %%mm2 \n\t"
"movq 8(%1), %%mm0 \n\t"
"movq 8(%1), %%mm1 \n\t"
"movq 8(%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@ -808,7 +804,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
:"=m"(*d)
:"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
:"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
:"memory");
/* borrowed 32 to 24 */
__asm__ volatile(
@ -824,8 +820,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX
:"=m"(*d)
:"m"(*s)
:: "r"(d), "m"(*s)
:"memory");
d += 24;
s += 8;
@ -852,10 +847,10 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 7;
while (s < mm_end) {
__asm__ volatile(
PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
"movq %1, %%mm2 \n\t"
PREFETCH" 32(%1) \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%1), %%mm1 \n\t"
"movq (%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@ -883,9 +878,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm0, %%mm6 \n\t"
"movq %%mm3, %%mm7 \n\t"
"movq 8%1, %%mm0 \n\t"
"movq 8%1, %%mm1 \n\t"
"movq 8%1, %%mm2 \n\t"
"movq 8(%1), %%mm0 \n\t"
"movq 8(%1), %%mm1 \n\t"
"movq 8(%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@ -910,7 +905,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm4, %%mm3 \n\t"
"por %%mm5, %%mm3 \n\t"
:"=m"(*d)
:"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
:"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
:"memory");
/* borrowed 32 to 24 */
__asm__ volatile(
@ -926,8 +921,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX
:"=m"(*d)
:"m"(*s)
:: "r"(d), "m"(*s)
:"memory");
d += 24;
s += 8;
@ -959,8 +953,8 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm0, %%mm3 \n\t" \
"punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
"punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
MOVNTQ" %%mm0, %0 \n\t" \
MOVNTQ" %%mm3, 8%0 \n\t" \
MOVNTQ" %%mm0, (%0) \n\t" \
MOVNTQ" %%mm3, 8(%0) \n\t" \
static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size)
{
@ -975,10 +969,10 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 3;
while (s < mm_end) {
__asm__ volatile(
PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
"movq %1, %%mm2 \n\t"
PREFETCH" 32(%1) \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%1), %%mm1 \n\t"
"movq (%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@ -986,8 +980,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
"psrlq $2, %%mm1 \n\t"
"psrlq $7, %%mm2 \n\t"
PACK_RGB32
:"=m"(*d)
:"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
:"memory");
d += 16;
s += 4;
@ -1017,10 +1010,10 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 3;
while (s < mm_end) {
__asm__ volatile(
PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
"movq %1, %%mm2 \n\t"
PREFETCH" 32(%1) \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%1), %%mm1 \n\t"
"movq (%1), %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
@ -1028,8 +1021,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
"psrlq $3, %%mm1 \n\t"
"psrlq $8, %%mm2 \n\t"
PACK_RGB32
:"=m"(*d)
:"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
:"memory");
d += 16;
s += 4;
@ -1957,8 +1949,8 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
int srcStride1, int srcStride2,
int dstStride1, int dstStride2)
{
x86_reg y;
int x,w,h;
x86_reg x, y;
int w,h;
w=width/2; h=height/2;
__asm__ volatile(
PREFETCH" %0 \n\t"
@ -1970,11 +1962,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
x=0;
for (;x<w-31;x+=32) {
__asm__ volatile(
PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t"
"movq 8%1, %%mm2 \n\t"
"movq 16%1, %%mm4 \n\t"
"movq 24%1, %%mm6 \n\t"
PREFETCH" 32(%1,%2) \n\t"
"movq (%1,%2), %%mm0 \n\t"
"movq 8(%1,%2), %%mm2 \n\t"
"movq 16(%1,%2), %%mm4 \n\t"
"movq 24(%1,%2), %%mm6 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t"
@ -1987,16 +1979,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
"punpckhbw %%mm5, %%mm5 \n\t"
"punpcklbw %%mm6, %%mm6 \n\t"
"punpckhbw %%mm7, %%mm7 \n\t"
MOVNTQ" %%mm0, %0 \n\t"
MOVNTQ" %%mm1, 8%0 \n\t"
MOVNTQ" %%mm2, 16%0 \n\t"
MOVNTQ" %%mm3, 24%0 \n\t"
MOVNTQ" %%mm4, 32%0 \n\t"
MOVNTQ" %%mm5, 40%0 \n\t"
MOVNTQ" %%mm6, 48%0 \n\t"
MOVNTQ" %%mm7, 56%0"
:"=m"(d[2*x])
:"m"(s1[x])
MOVNTQ" %%mm0, (%0,%2,2) \n\t"
MOVNTQ" %%mm1, 8(%0,%2,2) \n\t"
MOVNTQ" %%mm2, 16(%0,%2,2) \n\t"
MOVNTQ" %%mm3, 24(%0,%2,2) \n\t"
MOVNTQ" %%mm4, 32(%0,%2,2) \n\t"
MOVNTQ" %%mm5, 40(%0,%2,2) \n\t"
MOVNTQ" %%mm6, 48(%0,%2,2) \n\t"
MOVNTQ" %%mm7, 56(%0,%2,2)"
:: "r"(d), "r"(s1), "r"(x)
:"memory");
}
for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
@ -2007,11 +1998,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
x=0;
for (;x<w-31;x+=32) {
__asm__ volatile(
PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t"
"movq 8%1, %%mm2 \n\t"
"movq 16%1, %%mm4 \n\t"
"movq 24%1, %%mm6 \n\t"
PREFETCH" 32(%1,%2) \n\t"
"movq (%1,%2), %%mm0 \n\t"
"movq 8(%1,%2), %%mm2 \n\t"
"movq 16(%1,%2), %%mm4 \n\t"
"movq 24(%1,%2), %%mm6 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t"
@ -2024,16 +2015,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
"punpckhbw %%mm5, %%mm5 \n\t"
"punpcklbw %%mm6, %%mm6 \n\t"
"punpckhbw %%mm7, %%mm7 \n\t"
MOVNTQ" %%mm0, %0 \n\t"
MOVNTQ" %%mm1, 8%0 \n\t"
MOVNTQ" %%mm2, 16%0 \n\t"
MOVNTQ" %%mm3, 24%0 \n\t"
MOVNTQ" %%mm4, 32%0 \n\t"
MOVNTQ" %%mm5, 40%0 \n\t"
MOVNTQ" %%mm6, 48%0 \n\t"
MOVNTQ" %%mm7, 56%0"
:"=m"(d[2*x])
:"m"(s2[x])
MOVNTQ" %%mm0, (%0,%2,2) \n\t"
MOVNTQ" %%mm1, 8(%0,%2,2) \n\t"
MOVNTQ" %%mm2, 16(%0,%2,2) \n\t"
MOVNTQ" %%mm3, 24(%0,%2,2) \n\t"
MOVNTQ" %%mm4, 32(%0,%2,2) \n\t"
MOVNTQ" %%mm5, 40(%0,%2,2) \n\t"
MOVNTQ" %%mm6, 48(%0,%2,2) \n\t"
MOVNTQ" %%mm7, 56(%0,%2,2)"
:: "r"(d), "r"(s2), "r"(x)
:"memory");
}
for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];