Convert deinterlacing MMX code to YASM
Originally committed as revision 24615 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
c2eae137e9
commit
de4bc44abb
@ -39,7 +39,6 @@
|
||||
#include "libavcore/imgutils.h"
|
||||
|
||||
#if HAVE_MMX
|
||||
#include "x86/mmx.h"
|
||||
#include "x86/dsputil_mmx.h"
|
||||
#endif
|
||||
|
||||
@ -55,6 +54,14 @@
|
||||
#define FF_PIXEL_PACKED 1 /**< only one components containing all the channels */
|
||||
#define FF_PIXEL_PALETTE 2 /**< one components containing indexes for a palette */
|
||||
|
||||
#if HAVE_MMX
|
||||
#define deinterlace_line_inplace ff_deinterlace_line_inplace_mmx
|
||||
#define deinterlace_line ff_deinterlace_line_mmx
|
||||
#else
|
||||
#define deinterlace_line_inplace deinterlace_line_inplace_c
|
||||
#define deinterlace_line deinterlace_line_c
|
||||
#endif
|
||||
|
||||
typedef struct PixFmtInfo {
|
||||
uint8_t nb_channels; /**< number of channels (including alpha) */
|
||||
uint8_t color_type; /**< color type (see FF_COLOR_xxx constants) */
|
||||
@ -1119,61 +1126,14 @@ int img_get_alpha_info(const AVPicture *src,
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if HAVE_MMX
|
||||
#define DEINT_INPLACE_LINE_LUM \
|
||||
movd_m2r(lum_m4[0],mm0);\
|
||||
movd_m2r(lum_m3[0],mm1);\
|
||||
movd_m2r(lum_m2[0],mm2);\
|
||||
movd_m2r(lum_m1[0],mm3);\
|
||||
movd_m2r(lum[0],mm4);\
|
||||
punpcklbw_r2r(mm7,mm0);\
|
||||
movd_r2m(mm2,lum_m4[0]);\
|
||||
punpcklbw_r2r(mm7,mm1);\
|
||||
punpcklbw_r2r(mm7,mm2);\
|
||||
punpcklbw_r2r(mm7,mm3);\
|
||||
punpcklbw_r2r(mm7,mm4);\
|
||||
paddw_r2r(mm3,mm1);\
|
||||
psllw_i2r(1,mm2);\
|
||||
paddw_r2r(mm4,mm0);\
|
||||
psllw_i2r(2,mm1);\
|
||||
paddw_r2r(mm6,mm2);\
|
||||
paddw_r2r(mm2,mm1);\
|
||||
psubusw_r2r(mm0,mm1);\
|
||||
psrlw_i2r(3,mm1);\
|
||||
packuswb_r2r(mm7,mm1);\
|
||||
movd_r2m(mm1,lum_m2[0]);
|
||||
|
||||
#define DEINT_LINE_LUM \
|
||||
movd_m2r(lum_m4[0],mm0);\
|
||||
movd_m2r(lum_m3[0],mm1);\
|
||||
movd_m2r(lum_m2[0],mm2);\
|
||||
movd_m2r(lum_m1[0],mm3);\
|
||||
movd_m2r(lum[0],mm4);\
|
||||
punpcklbw_r2r(mm7,mm0);\
|
||||
punpcklbw_r2r(mm7,mm1);\
|
||||
punpcklbw_r2r(mm7,mm2);\
|
||||
punpcklbw_r2r(mm7,mm3);\
|
||||
punpcklbw_r2r(mm7,mm4);\
|
||||
paddw_r2r(mm3,mm1);\
|
||||
psllw_i2r(1,mm2);\
|
||||
paddw_r2r(mm4,mm0);\
|
||||
psllw_i2r(2,mm1);\
|
||||
paddw_r2r(mm6,mm2);\
|
||||
paddw_r2r(mm2,mm1);\
|
||||
psubusw_r2r(mm0,mm1);\
|
||||
psrlw_i2r(3,mm1);\
|
||||
packuswb_r2r(mm7,mm1);\
|
||||
movd_r2m(mm1,dst[0]);
|
||||
#endif
|
||||
|
||||
#if !HAVE_MMX
|
||||
/* filter parameters: [-1 4 2 4 -1] // 8 */
|
||||
static void deinterlace_line(uint8_t *dst,
|
||||
static void deinterlace_line_c(uint8_t *dst,
|
||||
const uint8_t *lum_m4, const uint8_t *lum_m3,
|
||||
const uint8_t *lum_m2, const uint8_t *lum_m1,
|
||||
const uint8_t *lum,
|
||||
int size)
|
||||
{
|
||||
#if !HAVE_MMX
|
||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
||||
int sum;
|
||||
|
||||
@ -1191,27 +1151,12 @@ static void deinterlace_line(uint8_t *dst,
|
||||
lum++;
|
||||
dst++;
|
||||
}
|
||||
#else
|
||||
|
||||
{
|
||||
pxor_r2r(mm7,mm7);
|
||||
movq_m2r(ff_pw_4,mm6);
|
||||
}
|
||||
for (;size > 3; size-=4) {
|
||||
DEINT_LINE_LUM
|
||||
lum_m4+=4;
|
||||
lum_m3+=4;
|
||||
lum_m2+=4;
|
||||
lum_m1+=4;
|
||||
lum+=4;
|
||||
dst+=4;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *lum_m2, uint8_t *lum_m1, uint8_t *lum,
|
||||
int size)
|
||||
|
||||
static void deinterlace_line_inplace_c(uint8_t *lum_m4, uint8_t *lum_m3,
|
||||
uint8_t *lum_m2, uint8_t *lum_m1,
|
||||
uint8_t *lum, int size)
|
||||
{
|
||||
#if !HAVE_MMX
|
||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
||||
int sum;
|
||||
|
||||
@ -1229,22 +1174,8 @@ static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *
|
||||
lum_m1++;
|
||||
lum++;
|
||||
}
|
||||
#else
|
||||
|
||||
{
|
||||
pxor_r2r(mm7,mm7);
|
||||
movq_m2r(ff_pw_4,mm6);
|
||||
}
|
||||
for (;size > 3; size-=4) {
|
||||
DEINT_INPLACE_LINE_LUM
|
||||
lum_m4+=4;
|
||||
lum_m3+=4;
|
||||
lum_m2+=4;
|
||||
lum_m1+=4;
|
||||
lum+=4;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
/* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
|
||||
top field is copied as is, but the bottom field is deinterlaced
|
||||
|
@ -35,6 +35,7 @@ MMX-OBJS-$(CONFIG_VP6_DECODER) += x86/vp3dsp_mmx.o \
|
||||
YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o
|
||||
MMX-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp-init.o
|
||||
MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \
|
||||
x86/deinterlace.o \
|
||||
$(YASM-OBJS-yes)
|
||||
|
||||
MMX-OBJS-$(CONFIG_FFT) += x86/fft.o
|
||||
|
81
libavcodec/x86/deinterlace.asm
Normal file
81
libavcodec/x86/deinterlace.asm
Normal file
@ -0,0 +1,81 @@
|
||||
;******************************************************************************
|
||||
;* MMX optimized deinterlacing functions
|
||||
;* Copyright (c) 2010 Vitor Sessak
|
||||
;* Copyright (c) 2002 Michael Niedermayer
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "x86inc.asm"
|
||||
%include "x86util.asm"
|
||||
|
||||
SECTION_RODATA
|
||||
|
||||
cextern pw_4
|
||||
|
||||
%macro DEINTERLACE 1
|
||||
%ifidn %1, inplace
|
||||
;void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum, int size)
|
||||
cglobal deinterlace_line_inplace_mmx, 6,6,7, lum_m4, lum_m3, lum_m2, lum_m1, lum, size
|
||||
%else
|
||||
;void ff_deinterlace_line_mmx(uint8_t *dst, const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum, int size)
|
||||
cglobal deinterlace_line_mmx, 7,7,7, dst, lum_m4, lum_m3, lum_m2, lum_m1, lum, size
|
||||
%endif
|
||||
pxor mm7, mm7
|
||||
movq mm6, [pw_4]
|
||||
.nextrow
|
||||
movd mm0, [lum_m4q]
|
||||
movd mm1, [lum_m3q]
|
||||
movd mm2, [lum_m2q]
|
||||
%ifidn %1, inplace
|
||||
movd [lum_m4q], mm2
|
||||
%endif
|
||||
movd mm3, [lum_m1q]
|
||||
movd mm4, [lumq]
|
||||
punpcklbw mm0, mm7
|
||||
punpcklbw mm1, mm7
|
||||
punpcklbw mm2, mm7
|
||||
punpcklbw mm3, mm7
|
||||
punpcklbw mm4, mm7
|
||||
paddw mm1, mm3
|
||||
psllw mm2, 1
|
||||
paddw mm0, mm4
|
||||
psllw mm1, 2
|
||||
paddw mm2, mm6
|
||||
paddw mm1, mm2
|
||||
psubusw mm1, mm0
|
||||
psrlw mm1, 3
|
||||
packuswb mm1, mm7
|
||||
%ifidn %1, inplace
|
||||
movd [lum_m2q], mm1
|
||||
%else
|
||||
movd [dstq], mm1
|
||||
add dstq, 4
|
||||
%endif
|
||||
add lum_m4q, 4
|
||||
add lum_m3q, 4
|
||||
add lum_m2q, 4
|
||||
add lum_m1q, 4
|
||||
add lumq, 4
|
||||
sub sized, 4
|
||||
jg .nextrow
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
DEINTERLACE ""
|
||||
|
||||
DEINTERLACE inplace
|
@ -179,4 +179,17 @@ void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag,
|
||||
void ff_mmx_idct(DCTELEM *block);
|
||||
void ff_mmxext_idct(DCTELEM *block);
|
||||
|
||||
|
||||
void ff_deinterlace_line_mmx(uint8_t *dst,
|
||||
const uint8_t *lum_m4, const uint8_t *lum_m3,
|
||||
const uint8_t *lum_m2, const uint8_t *lum_m1,
|
||||
const uint8_t *lum,
|
||||
int size);
|
||||
|
||||
void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
|
||||
const uint8_t *lum_m3,
|
||||
const uint8_t *lum_m2,
|
||||
const uint8_t *lum_m1,
|
||||
const uint8_t *lum, int size);
|
||||
|
||||
#endif /* AVCODEC_X86_DSPUTIL_MMX_H */
|
||||
|
Loading…
Reference in New Issue
Block a user