Merge commit 'be923ed659016350592acb9b3346f706f8170ac5'
* commit 'be923ed659016350592acb9b3346f706f8170ac5': x86: fmtconvert: port to cpuflags x86: MMX2 ---> MMXEXT in macro names Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
28c0678eb7
@ -156,7 +156,7 @@ INIT_MMX mmx
|
|||||||
%define ABS2 ABS2_MMX
|
%define ABS2 ABS2_MMX
|
||||||
AC3_MAX_MSB_ABS_INT16 or_abs
|
AC3_MAX_MSB_ABS_INT16 or_abs
|
||||||
INIT_MMX mmx2
|
INIT_MMX mmx2
|
||||||
%define ABS2 ABS2_MMX2
|
%define ABS2 ABS2_MMXEXT
|
||||||
AC3_MAX_MSB_ABS_INT16 min_max
|
AC3_MAX_MSB_ABS_INT16 min_max
|
||||||
INIT_XMM sse2
|
INIT_XMM sse2
|
||||||
AC3_MAX_MSB_ABS_INT16 min_max
|
AC3_MAX_MSB_ABS_INT16 min_max
|
||||||
|
@ -430,7 +430,7 @@ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, ui
|
|||||||
"mov" #size " " #b ", " #temp " \n\t"\
|
"mov" #size " " #b ", " #temp " \n\t"\
|
||||||
"pavgusb " #temp ", " #a " \n\t"\
|
"pavgusb " #temp ", " #a " \n\t"\
|
||||||
"mov" #size " " #a ", " #b " \n\t"
|
"mov" #size " " #a ", " #b " \n\t"
|
||||||
#define AVG_MMX2_OP(a,b,temp, size) \
|
#define AVG_MMXEXT_OP(a, b, temp, size) \
|
||||||
"mov" #size " " #b ", " #temp " \n\t"\
|
"mov" #size " " #b ", " #temp " \n\t"\
|
||||||
"pavgb " #temp ", " #a " \n\t"\
|
"pavgb " #temp ", " #a " \n\t"\
|
||||||
"mov" #size " " #a ", " #b " \n\t"
|
"mov" #size " " #a ", " #b " \n\t"
|
||||||
@ -439,7 +439,7 @@ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, ui
|
|||||||
|
|
||||||
#if HAVE_MMXEXT_INLINE
|
#if HAVE_MMXEXT_INLINE
|
||||||
QPEL_CAVS(put_, PUT_OP, mmx2)
|
QPEL_CAVS(put_, PUT_OP, mmx2)
|
||||||
QPEL_CAVS(avg_, AVG_MMX2_OP, mmx2)
|
QPEL_CAVS(avg_,AVG_MMXEXT_OP, mmx2)
|
||||||
|
|
||||||
CAVS_MC(put_, 8, mmx2)
|
CAVS_MC(put_, 8, mmx2)
|
||||||
CAVS_MC(put_, 16,mmx2)
|
CAVS_MC(put_, 16,mmx2)
|
||||||
|
@ -943,7 +943,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
|
|||||||
"packuswb %%mm5, %%mm5 \n\t" \
|
"packuswb %%mm5, %%mm5 \n\t" \
|
||||||
OP(%%mm5, out, %%mm7, d)
|
OP(%%mm5, out, %%mm7, d)
|
||||||
|
|
||||||
#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW) \
|
#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMXEXT, OP_3DNOW) \
|
||||||
static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \
|
static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \
|
||||||
uint8_t *src, \
|
uint8_t *src, \
|
||||||
int dstStride, \
|
int dstStride, \
|
||||||
@ -1011,7 +1011,7 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \
|
|||||||
"psraw $5, %%mm3 \n\t" \
|
"psraw $5, %%mm3 \n\t" \
|
||||||
"movq %5, %%mm1 \n\t" \
|
"movq %5, %%mm1 \n\t" \
|
||||||
"packuswb %%mm3, %%mm1 \n\t" \
|
"packuswb %%mm3, %%mm1 \n\t" \
|
||||||
OP_MMX2(%%mm1, (%1), %%mm4, q) \
|
OP_MMXEXT(%%mm1, (%1), %%mm4, q) \
|
||||||
/* mm0 = GHIJ, mm2 = FGHI, mm5 = HIJK, mm6 = IJKL, mm7 = 0 */ \
|
/* mm0 = GHIJ, mm2 = FGHI, mm5 = HIJK, mm6 = IJKL, mm7 = 0 */ \
|
||||||
\
|
\
|
||||||
"movq 9(%0), %%mm1 \n\t" /* JKLMNOPQ */ \
|
"movq 9(%0), %%mm1 \n\t" /* JKLMNOPQ */ \
|
||||||
@ -1058,7 +1058,7 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \
|
|||||||
"paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */ \
|
"paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */ \
|
||||||
"psraw $5, %%mm4 \n\t" \
|
"psraw $5, %%mm4 \n\t" \
|
||||||
"packuswb %%mm4, %%mm0 \n\t" \
|
"packuswb %%mm4, %%mm0 \n\t" \
|
||||||
OP_MMX2(%%mm0, 8(%1), %%mm4, q) \
|
OP_MMXEXT(%%mm0, 8(%1), %%mm4, q) \
|
||||||
\
|
\
|
||||||
"add %3, %0 \n\t" \
|
"add %3, %0 \n\t" \
|
||||||
"add %4, %1 \n\t" \
|
"add %4, %1 \n\t" \
|
||||||
@ -1195,7 +1195,7 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, \
|
|||||||
"paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */ \
|
"paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */ \
|
||||||
"psraw $5, %%mm3 \n\t" \
|
"psraw $5, %%mm3 \n\t" \
|
||||||
"packuswb %%mm3, %%mm0 \n\t" \
|
"packuswb %%mm3, %%mm0 \n\t" \
|
||||||
OP_MMX2(%%mm0, (%1), %%mm4, q) \
|
OP_MMXEXT(%%mm0, (%1), %%mm4, q) \
|
||||||
\
|
\
|
||||||
"add %3, %0 \n\t" \
|
"add %3, %0 \n\t" \
|
||||||
"add %4, %1 \n\t" \
|
"add %4, %1 \n\t" \
|
||||||
@ -1764,19 +1764,19 @@ static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
|
|||||||
"pavgusb "#temp", "#a" \n\t" \
|
"pavgusb "#temp", "#a" \n\t" \
|
||||||
"mov"#size" "#a", "#b" \n\t"
|
"mov"#size" "#a", "#b" \n\t"
|
||||||
|
|
||||||
#define AVG_MMX2_OP(a, b, temp, size) \
|
#define AVG_MMXEXT_OP(a, b, temp, size) \
|
||||||
"mov"#size" "#b", "#temp" \n\t" \
|
"mov"#size" "#b", "#temp" \n\t" \
|
||||||
"pavgb "#temp", "#a" \n\t" \
|
"pavgb "#temp", "#a" \n\t" \
|
||||||
"mov"#size" "#a", "#b" \n\t"
|
"mov"#size" "#a", "#b" \n\t"
|
||||||
|
|
||||||
QPEL_BASE(put_, ff_pw_16, _, PUT_OP, PUT_OP)
|
QPEL_BASE(put_, ff_pw_16, _, PUT_OP, PUT_OP)
|
||||||
QPEL_BASE(avg_, ff_pw_16, _, AVG_MMX2_OP, AVG_3DNOW_OP)
|
QPEL_BASE(avg_, ff_pw_16, _, AVG_MMXEXT_OP, AVG_3DNOW_OP)
|
||||||
QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP)
|
QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP)
|
||||||
QPEL_OP(put_, ff_pw_16, _, PUT_OP, 3dnow)
|
QPEL_OP(put_, ff_pw_16, _, PUT_OP, 3dnow)
|
||||||
QPEL_OP(avg_, ff_pw_16, _, AVG_3DNOW_OP, 3dnow)
|
QPEL_OP(avg_, ff_pw_16, _, AVG_3DNOW_OP, 3dnow)
|
||||||
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow)
|
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow)
|
||||||
QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmx2)
|
QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmx2)
|
||||||
QPEL_OP(avg_, ff_pw_16, _, AVG_MMX2_OP, mmx2)
|
QPEL_OP(avg_, ff_pw_16, _, AVG_MMXEXT_OP, mmx2)
|
||||||
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2)
|
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2)
|
||||||
|
|
||||||
/***********************************/
|
/***********************************/
|
||||||
|
@ -112,7 +112,7 @@ SECTION .text
|
|||||||
movd %3, %1
|
movd %3, %1
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro HSUM_MMX2 3
|
%macro HSUM_MMXEXT 3
|
||||||
pshufw %2, %1, 0xE
|
pshufw %2, %1, 0xE
|
||||||
paddusw %1, %2
|
paddusw %1, %2
|
||||||
pshufw %2, %1, 0x1
|
pshufw %2, %1, 0x1
|
||||||
@ -263,12 +263,12 @@ INIT_MMX
|
|||||||
%define HSUM HSUM_MMX
|
%define HSUM HSUM_MMX
|
||||||
HADAMARD8_DIFF_MMX mmx
|
HADAMARD8_DIFF_MMX mmx
|
||||||
|
|
||||||
%define ABS1 ABS1_MMX2
|
%define ABS1 ABS1_MMXEXT
|
||||||
%define HSUM HSUM_MMX2
|
%define HSUM HSUM_MMXEXT
|
||||||
HADAMARD8_DIFF_MMX mmx2
|
HADAMARD8_DIFF_MMX mmx2
|
||||||
|
|
||||||
INIT_XMM
|
INIT_XMM
|
||||||
%define ABS2 ABS2_MMX2
|
%define ABS2 ABS2_MMXEXT
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
%define ABS_SUM_8x8 ABS_SUM_8x8_64
|
%define ABS_SUM_8x8 ABS_SUM_8x8_64
|
||||||
%else
|
%else
|
||||||
|
@ -889,7 +889,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *src1, c
|
|||||||
"pxor " #z ", " #a " \n\t"\
|
"pxor " #z ", " #a " \n\t"\
|
||||||
"psubw " #z ", " #a " \n\t"
|
"psubw " #z ", " #a " \n\t"
|
||||||
|
|
||||||
#define MMABS_MMX2(a,z)\
|
#define MMABS_MMXEXT(a, z) \
|
||||||
"pxor " #z ", " #z " \n\t"\
|
"pxor " #z ", " #z " \n\t"\
|
||||||
"psubw " #a ", " #z " \n\t"\
|
"psubw " #a ", " #z " \n\t"\
|
||||||
"pmaxsw " #z ", " #a " \n\t"
|
"pmaxsw " #z ", " #a " \n\t"
|
||||||
@ -913,7 +913,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *src1, c
|
|||||||
"paddusw "#t", "#a" \n\t"\
|
"paddusw "#t", "#a" \n\t"\
|
||||||
"movd "#a", "#dst" \n\t"\
|
"movd "#a", "#dst" \n\t"\
|
||||||
|
|
||||||
#define HSUM_MMX2(a, t, dst)\
|
#define HSUM_MMXEXT(a, t, dst) \
|
||||||
"pshufw $0x0E, "#a", "#t" \n\t"\
|
"pshufw $0x0E, "#a", "#t" \n\t"\
|
||||||
"paddusw "#t", "#a" \n\t"\
|
"paddusw "#t", "#a" \n\t"\
|
||||||
"pshufw $0x01, "#a", "#t" \n\t"\
|
"pshufw $0x01, "#a", "#t" \n\t"\
|
||||||
@ -975,8 +975,8 @@ DCT_SAD_FUNC(mmx)
|
|||||||
#undef MMABS
|
#undef MMABS
|
||||||
#undef HSUM
|
#undef HSUM
|
||||||
|
|
||||||
#define HSUM(a,t,dst) HSUM_MMX2(a,t,dst)
|
#define HSUM(a,t,dst) HSUM_MMXEXT(a,t,dst)
|
||||||
#define MMABS(a,z) MMABS_MMX2(a,z)
|
#define MMABS(a,z) MMABS_MMXEXT(a,z)
|
||||||
DCT_SAD_FUNC(mmx2)
|
DCT_SAD_FUNC(mmx2)
|
||||||
#undef HSUM
|
#undef HSUM
|
||||||
#undef DCT_SAD
|
#undef DCT_SAD
|
||||||
|
@ -26,11 +26,11 @@ SECTION_TEXT
|
|||||||
;---------------------------------------------------------------------------------
|
;---------------------------------------------------------------------------------
|
||||||
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
|
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
|
||||||
;---------------------------------------------------------------------------------
|
;---------------------------------------------------------------------------------
|
||||||
%macro INT32_TO_FLOAT_FMUL_SCALAR 2
|
%macro INT32_TO_FLOAT_FMUL_SCALAR 1
|
||||||
%if UNIX64
|
%if UNIX64
|
||||||
cglobal int32_to_float_fmul_scalar_%1, 3,3,%2, dst, src, len
|
cglobal int32_to_float_fmul_scalar, 3, 3, %1, dst, src, len
|
||||||
%else
|
%else
|
||||||
cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
|
cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, mul, len
|
||||||
%endif
|
%endif
|
||||||
%if WIN64
|
%if WIN64
|
||||||
SWAP 0, 2
|
SWAP 0, 2
|
||||||
@ -43,7 +43,7 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
|
|||||||
add dstq, lenq
|
add dstq, lenq
|
||||||
neg lenq
|
neg lenq
|
||||||
.loop:
|
.loop:
|
||||||
%ifidn %1, sse2
|
%if cpuflag(sse2)
|
||||||
cvtdq2ps m1, [srcq+lenq ]
|
cvtdq2ps m1, [srcq+lenq ]
|
||||||
cvtdq2ps m2, [srcq+lenq+16]
|
cvtdq2ps m2, [srcq+lenq+16]
|
||||||
%else
|
%else
|
||||||
@ -63,27 +63,26 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
|
|||||||
REP_RET
|
REP_RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
INIT_XMM
|
INIT_XMM sse
|
||||||
%define SPLATD SPLATD_SSE
|
%define SPLATD SPLATD_SSE
|
||||||
%define movdqa movaps
|
INT32_TO_FLOAT_FMUL_SCALAR 5
|
||||||
INT32_TO_FLOAT_FMUL_SCALAR sse, 5
|
INIT_XMM sse2
|
||||||
%undef movdqa
|
|
||||||
%define SPLATD SPLATD_SSE2
|
%define SPLATD SPLATD_SSE2
|
||||||
INT32_TO_FLOAT_FMUL_SCALAR sse2, 3
|
INT32_TO_FLOAT_FMUL_SCALAR 3
|
||||||
%undef SPLATD
|
%undef SPLATD
|
||||||
|
|
||||||
|
|
||||||
;------------------------------------------------------------------------------
|
;------------------------------------------------------------------------------
|
||||||
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
|
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
|
||||||
;------------------------------------------------------------------------------
|
;------------------------------------------------------------------------------
|
||||||
%macro FLOAT_TO_INT16 2
|
%macro FLOAT_TO_INT16 1
|
||||||
cglobal float_to_int16_%1, 3,3,%2, dst, src, len
|
cglobal float_to_int16, 3, 3, %1, dst, src, len
|
||||||
add lenq, lenq
|
add lenq, lenq
|
||||||
lea srcq, [srcq+2*lenq]
|
lea srcq, [srcq+2*lenq]
|
||||||
add dstq, lenq
|
add dstq, lenq
|
||||||
neg lenq
|
neg lenq
|
||||||
.loop:
|
.loop:
|
||||||
%ifidn %1, sse2
|
%if cpuflag(sse2)
|
||||||
cvtps2dq m0, [srcq+2*lenq ]
|
cvtps2dq m0, [srcq+2*lenq ]
|
||||||
cvtps2dq m1, [srcq+2*lenq+16]
|
cvtps2dq m1, [srcq+2*lenq+16]
|
||||||
packssdw m0, m1
|
packssdw m0, m1
|
||||||
@ -100,31 +99,32 @@ cglobal float_to_int16_%1, 3,3,%2, dst, src, len
|
|||||||
%endif
|
%endif
|
||||||
add lenq, 16
|
add lenq, 16
|
||||||
js .loop
|
js .loop
|
||||||
%ifnidn %1, sse2
|
%if mmsize == 8
|
||||||
emms
|
emms
|
||||||
%endif
|
%endif
|
||||||
REP_RET
|
REP_RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
INIT_XMM
|
INIT_XMM sse2
|
||||||
FLOAT_TO_INT16 sse2, 2
|
FLOAT_TO_INT16 2
|
||||||
INIT_MMX
|
INIT_MMX sse
|
||||||
FLOAT_TO_INT16 sse, 0
|
FLOAT_TO_INT16 0
|
||||||
%define cvtps2pi pf2id
|
%define cvtps2pi pf2id
|
||||||
FLOAT_TO_INT16 3dnow, 0
|
INIT_MMX 3dnow
|
||||||
|
FLOAT_TO_INT16 0
|
||||||
%undef cvtps2pi
|
%undef cvtps2pi
|
||||||
|
|
||||||
;------------------------------------------------------------------------------
|
;------------------------------------------------------------------------------
|
||||||
; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step);
|
; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step);
|
||||||
;------------------------------------------------------------------------------
|
;------------------------------------------------------------------------------
|
||||||
%macro FLOAT_TO_INT16_STEP 2
|
%macro FLOAT_TO_INT16_STEP 1
|
||||||
cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2
|
cglobal float_to_int16_step, 4, 7, %1, dst, src, len, step, step3, v1, v2
|
||||||
add lenq, lenq
|
add lenq, lenq
|
||||||
lea srcq, [srcq+2*lenq]
|
lea srcq, [srcq+2*lenq]
|
||||||
lea step3q, [stepq*3]
|
lea step3q, [stepq*3]
|
||||||
neg lenq
|
neg lenq
|
||||||
.loop:
|
.loop:
|
||||||
%ifidn %1, sse2
|
%if cpuflag(sse2)
|
||||||
cvtps2dq m0, [srcq+2*lenq ]
|
cvtps2dq m0, [srcq+2*lenq ]
|
||||||
cvtps2dq m1, [srcq+2*lenq+16]
|
cvtps2dq m1, [srcq+2*lenq+16]
|
||||||
packssdw m0, m1
|
packssdw m0, m1
|
||||||
@ -179,25 +179,26 @@ cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2
|
|||||||
%endif
|
%endif
|
||||||
add lenq, 16
|
add lenq, 16
|
||||||
js .loop
|
js .loop
|
||||||
%ifnidn %1, sse2
|
%if mmsize == 8
|
||||||
emms
|
emms
|
||||||
%endif
|
%endif
|
||||||
REP_RET
|
REP_RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
INIT_XMM
|
INIT_XMM sse2
|
||||||
FLOAT_TO_INT16_STEP sse2, 2
|
FLOAT_TO_INT16_STEP 2
|
||||||
INIT_MMX
|
INIT_MMX sse
|
||||||
FLOAT_TO_INT16_STEP sse, 0
|
FLOAT_TO_INT16_STEP 0
|
||||||
%define cvtps2pi pf2id
|
%define cvtps2pi pf2id
|
||||||
FLOAT_TO_INT16_STEP 3dnow, 0
|
INIT_MMX 3dnow
|
||||||
|
FLOAT_TO_INT16_STEP 0
|
||||||
%undef cvtps2pi
|
%undef cvtps2pi
|
||||||
|
|
||||||
;-------------------------------------------------------------------------------
|
;-------------------------------------------------------------------------------
|
||||||
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
|
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
|
||||||
;-------------------------------------------------------------------------------
|
;-------------------------------------------------------------------------------
|
||||||
%macro FLOAT_TO_INT16_INTERLEAVE2 1
|
%macro FLOAT_TO_INT16_INTERLEAVE2 0
|
||||||
cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
|
cglobal float_to_int16_interleave2, 3, 4, 2, dst, src0, src1, len
|
||||||
lea lenq, [4*r2q]
|
lea lenq, [4*r2q]
|
||||||
mov src1q, [src0q+gprsize]
|
mov src1q, [src0q+gprsize]
|
||||||
mov src0q, [src0q]
|
mov src0q, [src0q]
|
||||||
@ -206,7 +207,7 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
|
|||||||
add src1q, lenq
|
add src1q, lenq
|
||||||
neg lenq
|
neg lenq
|
||||||
.loop:
|
.loop:
|
||||||
%ifidn %1, sse2
|
%if cpuflag(sse2)
|
||||||
cvtps2dq m0, [src0q+lenq]
|
cvtps2dq m0, [src0q+lenq]
|
||||||
cvtps2dq m1, [src1q+lenq]
|
cvtps2dq m1, [src1q+lenq]
|
||||||
packssdw m0, m1
|
packssdw m0, m1
|
||||||
@ -228,21 +229,20 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
|
|||||||
%endif
|
%endif
|
||||||
add lenq, 16
|
add lenq, 16
|
||||||
js .loop
|
js .loop
|
||||||
%ifnidn %1, sse2
|
%if mmsize == 8
|
||||||
emms
|
emms
|
||||||
%endif
|
%endif
|
||||||
REP_RET
|
REP_RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
INIT_MMX
|
INIT_MMX 3dnow
|
||||||
%define cvtps2pi pf2id
|
%define cvtps2pi pf2id
|
||||||
FLOAT_TO_INT16_INTERLEAVE2 3dnow
|
FLOAT_TO_INT16_INTERLEAVE2
|
||||||
%undef cvtps2pi
|
%undef cvtps2pi
|
||||||
%define movdqa movaps
|
INIT_MMX sse
|
||||||
FLOAT_TO_INT16_INTERLEAVE2 sse
|
FLOAT_TO_INT16_INTERLEAVE2
|
||||||
%undef movdqa
|
INIT_XMM sse2
|
||||||
INIT_XMM
|
FLOAT_TO_INT16_INTERLEAVE2
|
||||||
FLOAT_TO_INT16_INTERLEAVE2 sse2
|
|
||||||
|
|
||||||
|
|
||||||
%macro PSWAPD_SSE 2
|
%macro PSWAPD_SSE 2
|
||||||
@ -254,9 +254,9 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2
|
|||||||
punpckldq %1, %2
|
punpckldq %1, %2
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro FLOAT_TO_INT16_INTERLEAVE6 1
|
%macro FLOAT_TO_INT16_INTERLEAVE6 0
|
||||||
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
|
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
|
||||||
cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4, src5, len
|
cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, src5, len
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov lend, r2d
|
mov lend, r2d
|
||||||
%else
|
%else
|
||||||
@ -302,21 +302,24 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4,
|
|||||||
RET
|
RET
|
||||||
%endmacro ; FLOAT_TO_INT16_INTERLEAVE6
|
%endmacro ; FLOAT_TO_INT16_INTERLEAVE6
|
||||||
|
|
||||||
|
INIT_MMX sse
|
||||||
%define pswapd PSWAPD_SSE
|
%define pswapd PSWAPD_SSE
|
||||||
FLOAT_TO_INT16_INTERLEAVE6 sse
|
FLOAT_TO_INT16_INTERLEAVE6
|
||||||
|
INIT_MMX 3dnow
|
||||||
%define cvtps2pi pf2id
|
%define cvtps2pi pf2id
|
||||||
%define pswapd PSWAPD_3DNOW
|
%define pswapd PSWAPD_3DNOW
|
||||||
FLOAT_TO_INT16_INTERLEAVE6 3dnow
|
FLOAT_TO_INT16_INTERLEAVE6
|
||||||
%undef pswapd
|
%undef pswapd
|
||||||
FLOAT_TO_INT16_INTERLEAVE6 3dnowext
|
INIT_MMX 3dnowext
|
||||||
|
FLOAT_TO_INT16_INTERLEAVE6
|
||||||
%undef cvtps2pi
|
%undef cvtps2pi
|
||||||
|
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
; void ff_float_interleave6(float *dst, const float **src, unsigned int len);
|
; void ff_float_interleave6(float *dst, const float **src, unsigned int len);
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
|
|
||||||
%macro FLOAT_INTERLEAVE6 2
|
%macro FLOAT_INTERLEAVE6 1
|
||||||
cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, len
|
cglobal float_interleave6, 2, 8, %1, dst, src, src1, src2, src3, src4, src5, len
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov lend, r2d
|
mov lend, r2d
|
||||||
%else
|
%else
|
||||||
@ -334,7 +337,7 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le
|
|||||||
sub src4q, srcq
|
sub src4q, srcq
|
||||||
sub src5q, srcq
|
sub src5q, srcq
|
||||||
.loop:
|
.loop:
|
||||||
%ifidn %1, sse
|
%if cpuflag(sse)
|
||||||
movaps m0, [srcq]
|
movaps m0, [srcq]
|
||||||
movaps m1, [srcq+src1q]
|
movaps m1, [srcq+src1q]
|
||||||
movaps m2, [srcq+src2q]
|
movaps m2, [srcq+src2q]
|
||||||
@ -383,62 +386,60 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le
|
|||||||
add dstq, mmsize*6
|
add dstq, mmsize*6
|
||||||
sub lend, mmsize/4
|
sub lend, mmsize/4
|
||||||
jg .loop
|
jg .loop
|
||||||
%ifidn %1, mmx
|
%if mmsize == 8
|
||||||
emms
|
emms
|
||||||
%endif
|
%endif
|
||||||
REP_RET
|
REP_RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
INIT_MMX
|
INIT_MMX mmx
|
||||||
FLOAT_INTERLEAVE6 mmx, 0
|
FLOAT_INTERLEAVE6 0
|
||||||
INIT_XMM
|
INIT_XMM sse
|
||||||
FLOAT_INTERLEAVE6 sse, 7
|
FLOAT_INTERLEAVE6 7
|
||||||
|
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
; void ff_float_interleave2(float *dst, const float **src, unsigned int len);
|
; void ff_float_interleave2(float *dst, const float **src, unsigned int len);
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
|
|
||||||
%macro FLOAT_INTERLEAVE2 2
|
%macro FLOAT_INTERLEAVE2 1
|
||||||
cglobal float_interleave2_%1, 3,4,%2, dst, src, len, src1
|
cglobal float_interleave2, 3, 4, %1, dst, src, len, src1
|
||||||
mov src1q, [srcq+gprsize]
|
mov src1q, [srcq+gprsize]
|
||||||
mov srcq, [srcq ]
|
mov srcq, [srcq ]
|
||||||
sub src1q, srcq
|
sub src1q, srcq
|
||||||
.loop:
|
.loop:
|
||||||
MOVPS m0, [srcq ]
|
mova m0, [srcq ]
|
||||||
MOVPS m1, [srcq+src1q ]
|
mova m1, [srcq+src1q ]
|
||||||
MOVPS m3, [srcq +mmsize]
|
mova m3, [srcq +mmsize]
|
||||||
MOVPS m4, [srcq+src1q+mmsize]
|
mova m4, [srcq+src1q+mmsize]
|
||||||
|
|
||||||
MOVPS m2, m0
|
mova m2, m0
|
||||||
PUNPCKLDQ m0, m1
|
PUNPCKLDQ m0, m1
|
||||||
PUNPCKHDQ m2, m1
|
PUNPCKHDQ m2, m1
|
||||||
|
|
||||||
MOVPS m1, m3
|
mova m1, m3
|
||||||
PUNPCKLDQ m3, m4
|
PUNPCKLDQ m3, m4
|
||||||
PUNPCKHDQ m1, m4
|
PUNPCKHDQ m1, m4
|
||||||
|
|
||||||
MOVPS [dstq ], m0
|
mova [dstq ], m0
|
||||||
MOVPS [dstq+1*mmsize], m2
|
mova [dstq+1*mmsize], m2
|
||||||
MOVPS [dstq+2*mmsize], m3
|
mova [dstq+2*mmsize], m3
|
||||||
MOVPS [dstq+3*mmsize], m1
|
mova [dstq+3*mmsize], m1
|
||||||
|
|
||||||
add srcq, mmsize*2
|
add srcq, mmsize*2
|
||||||
add dstq, mmsize*4
|
add dstq, mmsize*4
|
||||||
sub lend, mmsize/2
|
sub lend, mmsize/2
|
||||||
jg .loop
|
jg .loop
|
||||||
%ifidn %1, mmx
|
%if mmsize == 8
|
||||||
emms
|
emms
|
||||||
%endif
|
%endif
|
||||||
REP_RET
|
REP_RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
INIT_MMX
|
INIT_MMX mmx
|
||||||
%define MOVPS movq
|
|
||||||
%define PUNPCKLDQ punpckldq
|
%define PUNPCKLDQ punpckldq
|
||||||
%define PUNPCKHDQ punpckhdq
|
%define PUNPCKHDQ punpckhdq
|
||||||
FLOAT_INTERLEAVE2 mmx, 0
|
FLOAT_INTERLEAVE2 0
|
||||||
INIT_XMM
|
INIT_XMM sse
|
||||||
%define MOVPS movaps
|
|
||||||
%define PUNPCKLDQ unpcklps
|
%define PUNPCKLDQ unpcklps
|
||||||
%define PUNPCKHDQ unpckhps
|
%define PUNPCKHDQ unpckhps
|
||||||
FLOAT_INTERLEAVE2 sse, 5
|
FLOAT_INTERLEAVE2 5
|
||||||
|
@ -246,7 +246,7 @@ cglobal h264_idct8_add_8_sse2, 3, 4, 10
|
|||||||
IDCT8_ADD_SSE r0, r1, r2, r3
|
IDCT8_ADD_SSE r0, r1, r2, r3
|
||||||
RET
|
RET
|
||||||
|
|
||||||
%macro DC_ADD_MMX2_INIT 2-3
|
%macro DC_ADD_MMXEXT_INIT 2-3
|
||||||
%if %0 == 2
|
%if %0 == 2
|
||||||
movsx %1, word [%1]
|
movsx %1, word [%1]
|
||||||
add %1, 32
|
add %1, 32
|
||||||
@ -266,7 +266,7 @@ cglobal h264_idct8_add_8_sse2, 3, 4, 10
|
|||||||
packuswb m1, m1
|
packuswb m1, m1
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro DC_ADD_MMX2_OP 4
|
%macro DC_ADD_MMXEXT_OP 4
|
||||||
%1 m2, [%2 ]
|
%1 m2, [%2 ]
|
||||||
%1 m3, [%2+%3 ]
|
%1 m3, [%2+%3 ]
|
||||||
%1 m4, [%2+%3*2]
|
%1 m4, [%2+%3*2]
|
||||||
@ -288,16 +288,16 @@ cglobal h264_idct8_add_8_sse2, 3, 4, 10
|
|||||||
INIT_MMX
|
INIT_MMX
|
||||||
; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
|
; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
|
||||||
cglobal h264_idct_dc_add_8_mmx2, 3, 3, 0
|
cglobal h264_idct_dc_add_8_mmx2, 3, 3, 0
|
||||||
DC_ADD_MMX2_INIT r1, r2
|
DC_ADD_MMXEXT_INIT r1, r2
|
||||||
DC_ADD_MMX2_OP movh, r0, r2, r1
|
DC_ADD_MMXEXT_OP movh, r0, r2, r1
|
||||||
RET
|
RET
|
||||||
|
|
||||||
; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
|
; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
|
||||||
cglobal h264_idct8_dc_add_8_mmx2, 3, 3, 0
|
cglobal h264_idct8_dc_add_8_mmx2, 3, 3, 0
|
||||||
DC_ADD_MMX2_INIT r1, r2
|
DC_ADD_MMXEXT_INIT r1, r2
|
||||||
DC_ADD_MMX2_OP mova, r0, r2, r1
|
DC_ADD_MMXEXT_OP mova, r0, r2, r1
|
||||||
lea r0, [r0+r2*4]
|
lea r0, [r0+r2*4]
|
||||||
DC_ADD_MMX2_OP mova, r0, r2, r1
|
DC_ADD_MMXEXT_OP mova, r0, r2, r1
|
||||||
RET
|
RET
|
||||||
|
|
||||||
; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset,
|
; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset,
|
||||||
@ -371,14 +371,14 @@ cglobal h264_idct_add16_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, s
|
|||||||
movsx r6, word [r2]
|
movsx r6, word [r2]
|
||||||
test r6, r6
|
test r6, r6
|
||||||
jz .no_dc
|
jz .no_dc
|
||||||
DC_ADD_MMX2_INIT r2, r3, r6
|
DC_ADD_MMXEXT_INIT r2, r3, r6
|
||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
%define dst2q r1
|
%define dst2q r1
|
||||||
%define dst2d r1d
|
%define dst2d r1d
|
||||||
%endif
|
%endif
|
||||||
mov dst2d, dword [r1+r5*4]
|
mov dst2d, dword [r1+r5*4]
|
||||||
lea dst2q, [r0+dst2q]
|
lea dst2q, [r0+dst2q]
|
||||||
DC_ADD_MMX2_OP movh, dst2q, r3, r6
|
DC_ADD_MMXEXT_OP movh, dst2q, r3, r6
|
||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
mov r1, r1m
|
mov r1, r1m
|
||||||
%endif
|
%endif
|
||||||
@ -445,14 +445,14 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, blo
|
|||||||
movsx r6, word [r2]
|
movsx r6, word [r2]
|
||||||
test r6, r6
|
test r6, r6
|
||||||
jz .skipblock
|
jz .skipblock
|
||||||
DC_ADD_MMX2_INIT r2, r3, r6
|
DC_ADD_MMXEXT_INIT r2, r3, r6
|
||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
%define dst2q r1
|
%define dst2q r1
|
||||||
%define dst2d r1d
|
%define dst2d r1d
|
||||||
%endif
|
%endif
|
||||||
mov dst2d, dword [r1+r5*4]
|
mov dst2d, dword [r1+r5*4]
|
||||||
add dst2q, r0
|
add dst2q, r0
|
||||||
DC_ADD_MMX2_OP movh, dst2q, r3, r6
|
DC_ADD_MMXEXT_OP movh, dst2q, r3, r6
|
||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
mov r1, r1m
|
mov r1, r1m
|
||||||
%endif
|
%endif
|
||||||
@ -483,16 +483,16 @@ cglobal h264_idct8_add4_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, s
|
|||||||
movsx r6, word [r2]
|
movsx r6, word [r2]
|
||||||
test r6, r6
|
test r6, r6
|
||||||
jz .no_dc
|
jz .no_dc
|
||||||
DC_ADD_MMX2_INIT r2, r3, r6
|
DC_ADD_MMXEXT_INIT r2, r3, r6
|
||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
%define dst2q r1
|
%define dst2q r1
|
||||||
%define dst2d r1d
|
%define dst2d r1d
|
||||||
%endif
|
%endif
|
||||||
mov dst2d, dword [r1+r5*4]
|
mov dst2d, dword [r1+r5*4]
|
||||||
lea dst2q, [r0+dst2q]
|
lea dst2q, [r0+dst2q]
|
||||||
DC_ADD_MMX2_OP mova, dst2q, r3, r6
|
DC_ADD_MMXEXT_OP mova, dst2q, r3, r6
|
||||||
lea dst2q, [dst2q+r3*4]
|
lea dst2q, [dst2q+r3*4]
|
||||||
DC_ADD_MMX2_OP mova, dst2q, r3, r6
|
DC_ADD_MMXEXT_OP mova, dst2q, r3, r6
|
||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
mov r1, r1m
|
mov r1, r1m
|
||||||
%endif
|
%endif
|
||||||
@ -541,16 +541,16 @@ cglobal h264_idct8_add4_8_sse2, 5, 8 + npicregs, 10, dst1, block_offset, block,
|
|||||||
test r6, r6
|
test r6, r6
|
||||||
jz .no_dc
|
jz .no_dc
|
||||||
INIT_MMX
|
INIT_MMX
|
||||||
DC_ADD_MMX2_INIT r2, r3, r6
|
DC_ADD_MMXEXT_INIT r2, r3, r6
|
||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
%define dst2q r1
|
%define dst2q r1
|
||||||
%define dst2d r1d
|
%define dst2d r1d
|
||||||
%endif
|
%endif
|
||||||
mov dst2d, dword [r1+r5*4]
|
mov dst2d, dword [r1+r5*4]
|
||||||
add dst2q, r0
|
add dst2q, r0
|
||||||
DC_ADD_MMX2_OP mova, dst2q, r3, r6
|
DC_ADD_MMXEXT_OP mova, dst2q, r3, r6
|
||||||
lea dst2q, [dst2q+r3*4]
|
lea dst2q, [dst2q+r3*4]
|
||||||
DC_ADD_MMX2_OP mova, dst2q, r3, r6
|
DC_ADD_MMXEXT_OP mova, dst2q, r3, r6
|
||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
mov r1, r1m
|
mov r1, r1m
|
||||||
%endif
|
%endif
|
||||||
@ -644,7 +644,7 @@ h264_idct_add8_mmx2_plane:
|
|||||||
movsx r6, word [r2]
|
movsx r6, word [r2]
|
||||||
test r6, r6
|
test r6, r6
|
||||||
jz .skipblock
|
jz .skipblock
|
||||||
DC_ADD_MMX2_INIT r2, r3, r6
|
DC_ADD_MMXEXT_INIT r2, r3, r6
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r0d, dword [r1+r5*4]
|
mov r0d, dword [r1+r5*4]
|
||||||
add r0, [dst2q]
|
add r0, [dst2q]
|
||||||
@ -653,7 +653,7 @@ h264_idct_add8_mmx2_plane:
|
|||||||
mov r0, [r0]
|
mov r0, [r0]
|
||||||
add r0, dword [r1+r5*4]
|
add r0, dword [r1+r5*4]
|
||||||
%endif
|
%endif
|
||||||
DC_ADD_MMX2_OP movh, r0, r3, r6
|
DC_ADD_MMXEXT_OP movh, r0, r3, r6
|
||||||
.skipblock:
|
.skipblock:
|
||||||
inc r5
|
inc r5
|
||||||
add r2, 32
|
add r2, 32
|
||||||
@ -697,7 +697,7 @@ h264_idct_dc_add8_mmx2:
|
|||||||
pshufw m1, m0, 0xFA ; -d-d-d-d-D-D-D-D
|
pshufw m1, m0, 0xFA ; -d-d-d-d-D-D-D-D
|
||||||
punpcklwd m0, m0 ; d d d d D D D D
|
punpcklwd m0, m0 ; d d d d D D D D
|
||||||
lea r6, [r3*3]
|
lea r6, [r3*3]
|
||||||
DC_ADD_MMX2_OP movq, r0, r3, r6
|
DC_ADD_MMXEXT_OP movq, r0, r3, r6
|
||||||
ret
|
ret
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
|
@ -1169,18 +1169,18 @@ QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
|
|||||||
#undef PAVGB
|
#undef PAVGB
|
||||||
#define PAVGB "pavgb"
|
#define PAVGB "pavgb"
|
||||||
QPEL_H264(put_, PUT_OP, mmx2)
|
QPEL_H264(put_, PUT_OP, mmx2)
|
||||||
QPEL_H264(avg_, AVG_MMX2_OP, mmx2)
|
QPEL_H264(avg_,AVG_MMXEXT_OP, mmx2)
|
||||||
QPEL_H264_V_XMM(put_, PUT_OP, sse2)
|
QPEL_H264_V_XMM(put_, PUT_OP, sse2)
|
||||||
QPEL_H264_V_XMM(avg_, AVG_MMX2_OP, sse2)
|
QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2)
|
||||||
QPEL_H264_HV_XMM(put_, PUT_OP, sse2)
|
QPEL_H264_HV_XMM(put_, PUT_OP, sse2)
|
||||||
QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, sse2)
|
QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, sse2)
|
||||||
#if HAVE_SSSE3_INLINE
|
#if HAVE_SSSE3_INLINE
|
||||||
QPEL_H264_H_XMM(put_, PUT_OP, ssse3)
|
QPEL_H264_H_XMM(put_, PUT_OP, ssse3)
|
||||||
QPEL_H264_H_XMM(avg_, AVG_MMX2_OP, ssse3)
|
QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3)
|
||||||
QPEL_H264_HV2_XMM(put_, PUT_OP, ssse3)
|
QPEL_H264_HV2_XMM(put_, PUT_OP, ssse3)
|
||||||
QPEL_H264_HV2_XMM(avg_, AVG_MMX2_OP, ssse3)
|
QPEL_H264_HV2_XMM(avg_,AVG_MMXEXT_OP, ssse3)
|
||||||
QPEL_H264_HV_XMM(put_, PUT_OP, ssse3)
|
QPEL_H264_HV_XMM(put_, PUT_OP, ssse3)
|
||||||
QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, ssse3)
|
QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
|
||||||
#endif
|
#endif
|
||||||
#undef PAVGB
|
#undef PAVGB
|
||||||
|
|
||||||
|
@ -268,7 +268,7 @@ cglobal vc1_h_loop_filter8_%1, 3,5,0
|
|||||||
RET
|
RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%define PABSW PABSW_MMX2
|
%define PABSW PABSW_MMXEXT
|
||||||
VC1_LF_MMX mmx2
|
VC1_LF_MMX mmx2
|
||||||
|
|
||||||
INIT_XMM
|
INIT_XMM
|
||||||
|
@ -157,7 +157,7 @@
|
|||||||
psubw %1, %2
|
psubw %1, %2
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro PABSW_MMX2 2
|
%macro PABSW_MMXEXT 2
|
||||||
pxor %1, %1
|
pxor %1, %1
|
||||||
psubw %1, %2
|
psubw %1, %2
|
||||||
pmaxsw %1, %2
|
pmaxsw %1, %2
|
||||||
@ -189,13 +189,13 @@
|
|||||||
psubw %2, %4
|
psubw %2, %4
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro ABS1_MMX2 2 ; a, tmp
|
%macro ABS1_MMXEXT 2 ; a, tmp
|
||||||
pxor %2, %2
|
pxor %2, %2
|
||||||
psubw %2, %1
|
psubw %2, %1
|
||||||
pmaxsw %1, %2
|
pmaxsw %1, %2
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro ABS2_MMX2 4 ; a, b, tmp0, tmp1
|
%macro ABS2_MMXEXT 4 ; a, b, tmp0, tmp1
|
||||||
pxor %3, %3
|
pxor %3, %3
|
||||||
pxor %4, %4
|
pxor %4, %4
|
||||||
psubw %3, %1
|
psubw %3, %1
|
||||||
|
@ -593,7 +593,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
|
|||||||
"cmp "#dstw", "#index" \n\t"\
|
"cmp "#dstw", "#index" \n\t"\
|
||||||
" jb 1b \n\t"
|
" jb 1b \n\t"
|
||||||
|
|
||||||
#define WRITEBGR24MMX2(dst, dstw, index) \
|
#define WRITEBGR24MMXEXT(dst, dstw, index) \
|
||||||
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
|
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
|
||||||
"movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
|
"movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
|
||||||
"movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
|
"movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
|
||||||
@ -643,7 +643,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
|
|||||||
|
|
||||||
#if COMPILE_TEMPLATE_MMXEXT
|
#if COMPILE_TEMPLATE_MMXEXT
|
||||||
#undef WRITEBGR24
|
#undef WRITEBGR24
|
||||||
#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
|
#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMXEXT(dst, dstw, index)
|
||||||
#else
|
#else
|
||||||
#undef WRITEBGR24
|
#undef WRITEBGR24
|
||||||
#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
|
#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
|
||||||
@ -1485,7 +1485,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
|
|||||||
PREFETCH" 64(%%"REG_c") \n\t"
|
PREFETCH" 64(%%"REG_c") \n\t"
|
||||||
|
|
||||||
#if ARCH_X86_64
|
#if ARCH_X86_64
|
||||||
#define CALL_MMX2_FILTER_CODE \
|
#define CALL_MMXEXT_FILTER_CODE \
|
||||||
"movl (%%"REG_b"), %%esi \n\t"\
|
"movl (%%"REG_b"), %%esi \n\t"\
|
||||||
"call *%4 \n\t"\
|
"call *%4 \n\t"\
|
||||||
"movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
|
"movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
|
||||||
@ -1494,7 +1494,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
|
|||||||
"xor %%"REG_a", %%"REG_a" \n\t"\
|
"xor %%"REG_a", %%"REG_a" \n\t"\
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define CALL_MMX2_FILTER_CODE \
|
#define CALL_MMXEXT_FILTER_CODE \
|
||||||
"movl (%%"REG_b"), %%esi \n\t"\
|
"movl (%%"REG_b"), %%esi \n\t"\
|
||||||
"call *%4 \n\t"\
|
"call *%4 \n\t"\
|
||||||
"addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
|
"addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
|
||||||
@ -1503,14 +1503,14 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
|
|||||||
|
|
||||||
#endif /* ARCH_X86_64 */
|
#endif /* ARCH_X86_64 */
|
||||||
|
|
||||||
CALL_MMX2_FILTER_CODE
|
CALL_MMXEXT_FILTER_CODE
|
||||||
CALL_MMX2_FILTER_CODE
|
CALL_MMXEXT_FILTER_CODE
|
||||||
CALL_MMX2_FILTER_CODE
|
CALL_MMXEXT_FILTER_CODE
|
||||||
CALL_MMX2_FILTER_CODE
|
CALL_MMXEXT_FILTER_CODE
|
||||||
CALL_MMX2_FILTER_CODE
|
CALL_MMXEXT_FILTER_CODE
|
||||||
CALL_MMX2_FILTER_CODE
|
CALL_MMXEXT_FILTER_CODE
|
||||||
CALL_MMX2_FILTER_CODE
|
CALL_MMXEXT_FILTER_CODE
|
||||||
CALL_MMX2_FILTER_CODE
|
CALL_MMXEXT_FILTER_CODE
|
||||||
|
|
||||||
#if defined(PIC)
|
#if defined(PIC)
|
||||||
"mov %5, %%"REG_b" \n\t"
|
"mov %5, %%"REG_b" \n\t"
|
||||||
@ -1580,10 +1580,10 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
|
|||||||
PREFETCH" 32(%%"REG_c") \n\t"
|
PREFETCH" 32(%%"REG_c") \n\t"
|
||||||
PREFETCH" 64(%%"REG_c") \n\t"
|
PREFETCH" 64(%%"REG_c") \n\t"
|
||||||
|
|
||||||
CALL_MMX2_FILTER_CODE
|
CALL_MMXEXT_FILTER_CODE
|
||||||
CALL_MMX2_FILTER_CODE
|
CALL_MMXEXT_FILTER_CODE
|
||||||
CALL_MMX2_FILTER_CODE
|
CALL_MMXEXT_FILTER_CODE
|
||||||
CALL_MMX2_FILTER_CODE
|
CALL_MMXEXT_FILTER_CODE
|
||||||
"xor %%"REG_a", %%"REG_a" \n\t" // i
|
"xor %%"REG_a", %%"REG_a" \n\t" // i
|
||||||
"mov %5, %%"REG_c" \n\t" // src
|
"mov %5, %%"REG_c" \n\t" // src
|
||||||
"mov %6, %%"REG_D" \n\t" // buf2
|
"mov %6, %%"REG_D" \n\t" // buf2
|
||||||
@ -1591,10 +1591,10 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
|
|||||||
PREFETCH" 32(%%"REG_c") \n\t"
|
PREFETCH" 32(%%"REG_c") \n\t"
|
||||||
PREFETCH" 64(%%"REG_c") \n\t"
|
PREFETCH" 64(%%"REG_c") \n\t"
|
||||||
|
|
||||||
CALL_MMX2_FILTER_CODE
|
CALL_MMXEXT_FILTER_CODE
|
||||||
CALL_MMX2_FILTER_CODE
|
CALL_MMXEXT_FILTER_CODE
|
||||||
CALL_MMX2_FILTER_CODE
|
CALL_MMXEXT_FILTER_CODE
|
||||||
CALL_MMX2_FILTER_CODE
|
CALL_MMXEXT_FILTER_CODE
|
||||||
|
|
||||||
#if defined(PIC)
|
#if defined(PIC)
|
||||||
"mov %7, %%"REG_b" \n\t"
|
"mov %7, %%"REG_b" \n\t"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user