x86: cavs: Remove an unneeded scratch buffer

Simplifies the code and makes it build on certain compilers
running out of registers on x86.

CC: libav-stable@libav.org
Reported-By: mudler
(cherry picked from commit e4610300de)
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
This commit is contained in:
Michael Niedermayer
2015-05-28 12:38:35 +02:00
committed by Luca Barbato
parent b37bfbfbe5
commit 4dc0fbb13c

View File

@@ -142,8 +142,6 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
DECLARE_ALIGNED(8, int16_t, b2)[64]; DECLARE_ALIGNED(8, int16_t, b2)[64];
for(i=0; i<2; i++){ for(i=0; i<2; i++){
DECLARE_ALIGNED(8, uint64_t, tmp);
cavs_idct8_1d(block + 4 * i, ff_pw_4.a); cavs_idct8_1d(block + 4 * i, ff_pw_4.a);
__asm__ volatile( __asm__ volatile(
@@ -155,19 +153,19 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
"psraw $3, %%mm2 \n\t" "psraw $3, %%mm2 \n\t"
"psraw $3, %%mm1 \n\t" "psraw $3, %%mm1 \n\t"
"psraw $3, %%mm0 \n\t" "psraw $3, %%mm0 \n\t"
"movq %%mm7, %0 \n\t" "movq %%mm7, (%0) \n\t"
TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 ) TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
"movq %%mm0, 8(%1) \n\t" "movq %%mm0, 8(%0) \n\t"
"movq %%mm6, 24(%1) \n\t" "movq %%mm6, 24(%0) \n\t"
"movq %%mm7, 40(%1) \n\t" "movq %%mm7, 40(%0) \n\t"
"movq %%mm4, 56(%1) \n\t" "movq %%mm4, 56(%0) \n\t"
"movq %0, %%mm7 \n\t" "movq (%0), %%mm7 \n\t"
TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 ) TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
"movq %%mm7, (%1) \n\t" "movq %%mm7, (%0) \n\t"
"movq %%mm1, 16(%1) \n\t" "movq %%mm1, 16(%0) \n\t"
"movq %%mm0, 32(%1) \n\t" "movq %%mm0, 32(%0) \n\t"
"movq %%mm3, 48(%1) \n\t" "movq %%mm3, 48(%0) \n\t"
: "=m"(tmp) :
: "r"(b2 + 32 * i) : "r"(b2 + 32 * i)
: "memory" : "memory"
); );