vp3: move idct and loop filter pointers to new vp3dsp context
This moves all VP3-specific function pointers from dsputil to a new vp3dsp context. There is no reason to ever use the VP3 IDCT where an MPEG2 IDCT is expected or vice versa. Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
		@@ -11,6 +11,7 @@ ARMV6-OBJS-$(CONFIG_AC3DSP)            += arm/ac3dsp_armv6.o
 | 
			
		||||
OBJS-$(CONFIG_MPEGAUDIODSP)            += arm/mpegaudiodsp_init_arm.o
 | 
			
		||||
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP)      += arm/mpegaudiodsp_fixed_armv6.o
 | 
			
		||||
 | 
			
		||||
OBJS-$(CONFIG_VP3DSP)                  += arm/vp3dsp_init_arm.o
 | 
			
		||||
OBJS-$(CONFIG_VP5_DECODER)             += arm/vp56dsp_init_arm.o
 | 
			
		||||
OBJS-$(CONFIG_VP6_DECODER)             += arm/vp56dsp_init_arm.o
 | 
			
		||||
OBJS-$(CONFIG_VP8_DECODER)             += arm/vp8dsp_init_arm.o
 | 
			
		||||
 
 | 
			
		||||
@@ -29,11 +29,6 @@ void ff_simple_idct_neon(DCTELEM *data);
 | 
			
		||||
void ff_simple_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
 | 
			
		||||
void ff_simple_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data);
 | 
			
		||||
 | 
			
		||||
void ff_vp3_idct_neon(DCTELEM *data);
 | 
			
		||||
void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
 | 
			
		||||
void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data);
 | 
			
		||||
void ff_vp3_idct_dc_add_neon(uint8_t *dest, int line_size, const DCTELEM *data);
 | 
			
		||||
 | 
			
		||||
void ff_clear_block_neon(DCTELEM *block);
 | 
			
		||||
void ff_clear_blocks_neon(DCTELEM *blocks);
 | 
			
		||||
 | 
			
		||||
@@ -147,9 +142,6 @@ void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
 | 
			
		||||
void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
 | 
			
		||||
void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
 | 
			
		||||
 | 
			
		||||
void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *);
 | 
			
		||||
void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *);
 | 
			
		||||
 | 
			
		||||
void ff_vector_fmul_window_neon(float *dst, const float *src0,
 | 
			
		||||
                                const float *src1, const float *win, int len);
 | 
			
		||||
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
 | 
			
		||||
@@ -186,13 +178,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
 | 
			
		||||
            c->idct_add              = ff_simple_idct_add_neon;
 | 
			
		||||
            c->idct                  = ff_simple_idct_neon;
 | 
			
		||||
            c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM;
 | 
			
		||||
        } else if ((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER ||
 | 
			
		||||
                    CONFIG_VP6_DECODER) &&
 | 
			
		||||
                   avctx->idct_algo == FF_IDCT_VP3) {
 | 
			
		||||
            c->idct_put              = ff_vp3_idct_put_neon;
 | 
			
		||||
            c->idct_add              = ff_vp3_idct_add_neon;
 | 
			
		||||
            c->idct                  = ff_vp3_idct_neon;
 | 
			
		||||
            c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -319,12 +304,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
 | 
			
		||||
        c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (CONFIG_VP3_DECODER) {
 | 
			
		||||
        c->vp3_v_loop_filter = ff_vp3_v_loop_filter_neon;
 | 
			
		||||
        c->vp3_h_loop_filter = ff_vp3_h_loop_filter_neon;
 | 
			
		||||
        c->vp3_idct_dc_add   = ff_vp3_idct_dc_add_neon;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    c->vector_fmul_window         = ff_vector_fmul_window_neon;
 | 
			
		||||
    c->vector_fmul_scalar         = ff_vector_fmul_scalar_neon;
 | 
			
		||||
    c->butterflies_float          = ff_butterflies_float_neon;
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										45
									
								
								libavcodec/arm/vp3dsp_init_arm.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								libavcodec/arm/vp3dsp_init_arm.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,45 @@
 | 
			
		||||
/*
 | 
			
		||||
 * This file is part of Libav.
 | 
			
		||||
 *
 | 
			
		||||
 * Libav is free software; you can redistribute it and/or
 | 
			
		||||
 * modify it under the terms of the GNU Lesser General Public
 | 
			
		||||
 * License as published by the Free Software Foundation; either
 | 
			
		||||
 * version 2.1 of the License, or (at your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 * Libav is distributed in the hope that it will be useful,
 | 
			
		||||
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
			
		||||
 * Lesser General Public License for more details.
 | 
			
		||||
 *
 | 
			
		||||
 * You should have received a copy of the GNU Lesser General Public
 | 
			
		||||
 * License along with Libav; if not, write to the Free Software
 | 
			
		||||
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include <stdint.h>
 | 
			
		||||
 | 
			
		||||
#include "libavutil/attributes.h"
 | 
			
		||||
#include "libavutil/cpu.h"
 | 
			
		||||
#include "libavutil/arm/cpu.h"
 | 
			
		||||
#include "libavcodec/vp3dsp.h"
 | 
			
		||||
 | 
			
		||||
void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
 | 
			
		||||
void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data);
 | 
			
		||||
void ff_vp3_idct_dc_add_neon(uint8_t *dest, int line_size, const DCTELEM *data);
 | 
			
		||||
 | 
			
		||||
void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *);
 | 
			
		||||
void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *);
 | 
			
		||||
 | 
			
		||||
av_cold void ff_vp3dsp_init_arm(VP3DSPContext *c, int flags)
 | 
			
		||||
{
 | 
			
		||||
    int cpu_flags = av_get_cpu_flags();
 | 
			
		||||
 | 
			
		||||
    if (have_neon(cpu_flags)) {
 | 
			
		||||
        c->idct_put      = ff_vp3_idct_put_neon;
 | 
			
		||||
        c->idct_add      = ff_vp3_idct_add_neon;
 | 
			
		||||
        c->idct_dc_add   = ff_vp3_idct_dc_add_neon;
 | 
			
		||||
        c->v_loop_filter = ff_vp3_v_loop_filter_neon;
 | 
			
		||||
        c->h_loop_filter = ff_vp3_h_loop_filter_neon;
 | 
			
		||||
        c->idct_perm     = FF_TRANSPOSE_IDCT_PERM;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@@ -260,32 +260,6 @@ endfunc
 | 
			
		||||
VP3_IDCT_END row
 | 
			
		||||
VP3_IDCT_END col
 | 
			
		||||
 | 
			
		||||
function ff_vp3_idct_neon, export=1
 | 
			
		||||
    mov             ip,  lr
 | 
			
		||||
    mov             r2,  r0
 | 
			
		||||
    bl              vp3_idct_start_neon
 | 
			
		||||
    bl              vp3_idct_end_row_neon
 | 
			
		||||
    mov             r3,  #8
 | 
			
		||||
    bl              vp3_idct_core_neon
 | 
			
		||||
    bl              vp3_idct_end_col_neon
 | 
			
		||||
    mov             lr,  ip
 | 
			
		||||
    vpop            {d8-d15}
 | 
			
		||||
 | 
			
		||||
    vshr.s16        q8,  q8,  #4
 | 
			
		||||
    vshr.s16        q9,  q9,  #4
 | 
			
		||||
    vshr.s16        q10, q10, #4
 | 
			
		||||
    vshr.s16        q11, q11, #4
 | 
			
		||||
    vshr.s16        q12, q12, #4
 | 
			
		||||
    vst1.64         {d16-d19}, [r0,:128]!
 | 
			
		||||
    vshr.s16        q13, q13, #4
 | 
			
		||||
    vshr.s16        q14, q14, #4
 | 
			
		||||
    vst1.64         {d20-d23}, [r0,:128]!
 | 
			
		||||
    vshr.s16        q15, q15, #4
 | 
			
		||||
    vst1.64         {d24-d27}, [r0,:128]!
 | 
			
		||||
    vst1.64         {d28-d31}, [r0,:128]!
 | 
			
		||||
    bx              lr
 | 
			
		||||
endfunc
 | 
			
		||||
 | 
			
		||||
function ff_vp3_idct_put_neon, export=1
 | 
			
		||||
    mov             ip,  lr
 | 
			
		||||
    bl              vp3_idct_start_neon
 | 
			
		||||
 
 | 
			
		||||
@@ -2701,12 +2701,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
 | 
			
		||||
            c->idct_add= ff_jref_idct_add;
 | 
			
		||||
            c->idct    = ff_j_rev_dct;
 | 
			
		||||
            c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
 | 
			
		||||
        }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
 | 
			
		||||
                avctx->idct_algo==FF_IDCT_VP3){
 | 
			
		||||
            c->idct_put= ff_vp3_idct_put_c;
 | 
			
		||||
            c->idct_add= ff_vp3_idct_add_c;
 | 
			
		||||
            c->idct    = ff_vp3_idct_c;
 | 
			
		||||
            c->idct_permutation_type= FF_NO_IDCT_PERM;
 | 
			
		||||
        }else if(avctx->idct_algo==FF_IDCT_WMV2){
 | 
			
		||||
            c->idct_put= ff_wmv2_idct_put_c;
 | 
			
		||||
            c->idct_add= ff_wmv2_idct_add_c;
 | 
			
		||||
@@ -2867,12 +2861,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
 | 
			
		||||
        c->h263_v_loop_filter= h263_v_loop_filter_c;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (CONFIG_VP3_DECODER) {
 | 
			
		||||
        c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
 | 
			
		||||
        c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
 | 
			
		||||
        c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    c->h261_loop_filter= h261_loop_filter_c;
 | 
			
		||||
 | 
			
		||||
    c->try_8x8basis= try_8x8basis_c;
 | 
			
		||||
 
 | 
			
		||||
@@ -101,15 +101,6 @@ PUTAVG_PIXELS(10)
 | 
			
		||||
#define ff_put_pixels16x16_c ff_put_pixels16x16_8_c
 | 
			
		||||
#define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c
 | 
			
		||||
 | 
			
		||||
/* VP3 DSP functions */
 | 
			
		||||
void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
 | 
			
		||||
void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
 | 
			
		||||
void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
 | 
			
		||||
void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/);
 | 
			
		||||
 | 
			
		||||
void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
 | 
			
		||||
void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
 | 
			
		||||
 | 
			
		||||
/* EA functions */
 | 
			
		||||
void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
 | 
			
		||||
 | 
			
		||||
@@ -391,10 +382,6 @@ typedef struct DSPContext {
 | 
			
		||||
    void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale);
 | 
			
		||||
    void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale);
 | 
			
		||||
 | 
			
		||||
    void (*vp3_idct_dc_add)(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/);
 | 
			
		||||
    void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
 | 
			
		||||
    void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
 | 
			
		||||
 | 
			
		||||
    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
 | 
			
		||||
    void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
 | 
			
		||||
    void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
 | 
			
		||||
 
 | 
			
		||||
@@ -1,14 +1,13 @@
 | 
			
		||||
OBJS                                   += ppc/dsputil_ppc.o             \
 | 
			
		||||
 | 
			
		||||
OBJS-$(CONFIG_VP3DSP)                  += ppc/vp3dsp_altivec.o
 | 
			
		||||
 | 
			
		||||
FFT-OBJS-$(HAVE_GNU_AS)                += ppc/fft_altivec_s.o
 | 
			
		||||
ALTIVEC-OBJS-$(CONFIG_FFT)             += ppc/fft_altivec.o             \
 | 
			
		||||
                                          $(FFT-OBJS-yes)
 | 
			
		||||
ALTIVEC-OBJS-$(CONFIG_H264DSP)         += ppc/h264_altivec.o
 | 
			
		||||
ALTIVEC-OBJS-$(CONFIG_MPEGAUDIODSP)    += ppc/mpegaudiodec_altivec.o
 | 
			
		||||
ALTIVEC-OBJS-$(CONFIG_VC1_DECODER)     += ppc/vc1dsp_altivec.o
 | 
			
		||||
ALTIVEC-OBJS-$(CONFIG_VP3_DECODER)     += ppc/vp3dsp_altivec.o
 | 
			
		||||
ALTIVEC-OBJS-$(CONFIG_VP5_DECODER)     += ppc/vp3dsp_altivec.o
 | 
			
		||||
ALTIVEC-OBJS-$(CONFIG_VP6_DECODER)     += ppc/vp3dsp_altivec.o
 | 
			
		||||
ALTIVEC-OBJS-$(CONFIG_VP8_DECODER)     += ppc/vp8dsp_altivec.o
 | 
			
		||||
 | 
			
		||||
ALTIVEC-OBJS                           += ppc/dsputil_altivec.o         \
 | 
			
		||||
 
 | 
			
		||||
@@ -36,10 +36,6 @@ void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
 | 
			
		||||
void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
 | 
			
		||||
void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
 | 
			
		||||
 | 
			
		||||
void ff_vp3_idct_altivec(DCTELEM *block);
 | 
			
		||||
void ff_vp3_idct_put_altivec(uint8_t *dest, int line_size, DCTELEM *block);
 | 
			
		||||
void ff_vp3_idct_add_altivec(uint8_t *dest, int line_size, DCTELEM *block);
 | 
			
		||||
 | 
			
		||||
void ff_dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx);
 | 
			
		||||
 | 
			
		||||
void ff_dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx);
 | 
			
		||||
 
 | 
			
		||||
@@ -185,12 +185,6 @@ void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
 | 
			
		||||
                c->idct_put = ff_idct_put_altivec;
 | 
			
		||||
                c->idct_add = ff_idct_add_altivec;
 | 
			
		||||
                c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
 | 
			
		||||
            }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) &&
 | 
			
		||||
                     avctx->idct_algo==FF_IDCT_VP3){
 | 
			
		||||
                c->idct_put = ff_vp3_idct_put_altivec;
 | 
			
		||||
                c->idct_add = ff_vp3_idct_add_altivec;
 | 
			
		||||
                c->idct     = ff_vp3_idct_altivec;
 | 
			
		||||
                c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -18,6 +18,13 @@
 | 
			
		||||
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "config.h"
 | 
			
		||||
#include "libavutil/attributes.h"
 | 
			
		||||
#include "libavutil/cpu.h"
 | 
			
		||||
#include "libavcodec/vp3dsp.h"
 | 
			
		||||
 | 
			
		||||
#if HAVE_ALTIVEC
 | 
			
		||||
 | 
			
		||||
#include "libavutil/ppc/types_altivec.h"
 | 
			
		||||
#include "libavutil/ppc/util_altivec.h"
 | 
			
		||||
#include "libavcodec/dsputil.h"
 | 
			
		||||
@@ -107,25 +114,7 @@ static inline vec_s16 M16(vec_s16 a, vec_s16 C)
 | 
			
		||||
#define ADD8(a) vec_add(a, eight)
 | 
			
		||||
#define SHIFT4(a) vec_sra(a, four)
 | 
			
		||||
 | 
			
		||||
void ff_vp3_idct_altivec(DCTELEM block[64])
 | 
			
		||||
{
 | 
			
		||||
    IDCT_START
 | 
			
		||||
 | 
			
		||||
    IDCT_1D(NOP, NOP)
 | 
			
		||||
    TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7);
 | 
			
		||||
    IDCT_1D(ADD8, SHIFT4)
 | 
			
		||||
 | 
			
		||||
    vec_st(b0, 0x00, block);
 | 
			
		||||
    vec_st(b1, 0x10, block);
 | 
			
		||||
    vec_st(b2, 0x20, block);
 | 
			
		||||
    vec_st(b3, 0x30, block);
 | 
			
		||||
    vec_st(b4, 0x40, block);
 | 
			
		||||
    vec_st(b5, 0x50, block);
 | 
			
		||||
    vec_st(b6, 0x60, block);
 | 
			
		||||
    vec_st(b7, 0x70, block);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ff_vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64])
 | 
			
		||||
static void vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64])
 | 
			
		||||
{
 | 
			
		||||
    vec_u8 t;
 | 
			
		||||
    IDCT_START
 | 
			
		||||
@@ -153,7 +142,7 @@ void ff_vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64])
 | 
			
		||||
    PUT(b7)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ff_vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64])
 | 
			
		||||
static void vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64])
 | 
			
		||||
{
 | 
			
		||||
    LOAD_ZERO;
 | 
			
		||||
    vec_u8 t, vdst;
 | 
			
		||||
@@ -183,3 +172,14 @@ void ff_vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64])
 | 
			
		||||
    ADD(b6)     dst += stride;
 | 
			
		||||
    ADD(b7)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif /* HAVE_ALTIVEC */
 | 
			
		||||
 | 
			
		||||
av_cold void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags)
 | 
			
		||||
{
 | 
			
		||||
    if (HAVE_ALTIVEC && av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
 | 
			
		||||
        c->idct_put  = vp3_idct_put_altivec;
 | 
			
		||||
        c->idct_add  = vp3_idct_add_altivec;
 | 
			
		||||
        c->idct_perm = FF_TRANSPOSE_IDCT_PERM;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -40,6 +40,7 @@
 | 
			
		||||
#include "get_bits.h"
 | 
			
		||||
 | 
			
		||||
#include "vp3data.h"
 | 
			
		||||
#include "vp3dsp.h"
 | 
			
		||||
#include "xiph.h"
 | 
			
		||||
#include "thread.h"
 | 
			
		||||
 | 
			
		||||
@@ -135,6 +136,7 @@ typedef struct Vp3DecodeContext {
 | 
			
		||||
    AVFrame current_frame;
 | 
			
		||||
    int keyframe;
 | 
			
		||||
    DSPContext dsp;
 | 
			
		||||
    VP3DSPContext vp3dsp;
 | 
			
		||||
    int flipped_image;
 | 
			
		||||
    int last_slice_end;
 | 
			
		||||
    int skip_loop_filter;
 | 
			
		||||
@@ -1302,14 +1304,14 @@ static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int ye
 | 
			
		||||
            {
 | 
			
		||||
                /* do not perform left edge filter for left columns frags */
 | 
			
		||||
                if (x > 0) {
 | 
			
		||||
                    s->dsp.vp3_h_loop_filter(
 | 
			
		||||
                    s->vp3dsp.h_loop_filter(
 | 
			
		||||
                        plane_data + 8*x,
 | 
			
		||||
                        stride, bounding_values);
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                /* do not perform top edge filter for top row fragments */
 | 
			
		||||
                if (y > 0) {
 | 
			
		||||
                    s->dsp.vp3_v_loop_filter(
 | 
			
		||||
                    s->vp3dsp.v_loop_filter(
 | 
			
		||||
                        plane_data + 8*x,
 | 
			
		||||
                        stride, bounding_values);
 | 
			
		||||
                }
 | 
			
		||||
@@ -1319,7 +1321,7 @@ static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int ye
 | 
			
		||||
                 * in this frame (it will be filtered in next iteration) */
 | 
			
		||||
                if ((x < width - 1) &&
 | 
			
		||||
                    (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) {
 | 
			
		||||
                    s->dsp.vp3_h_loop_filter(
 | 
			
		||||
                    s->vp3dsp.h_loop_filter(
 | 
			
		||||
                        plane_data + 8*x + 8,
 | 
			
		||||
                        stride, bounding_values);
 | 
			
		||||
                }
 | 
			
		||||
@@ -1329,7 +1331,7 @@ static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int ye
 | 
			
		||||
                 * in this frame (it will be filtered in the next row) */
 | 
			
		||||
                if ((y < height - 1) &&
 | 
			
		||||
                    (s->all_fragments[fragment + width].coding_method == MODE_COPY)) {
 | 
			
		||||
                    s->dsp.vp3_v_loop_filter(
 | 
			
		||||
                    s->vp3dsp.v_loop_filter(
 | 
			
		||||
                        plane_data + 8*x + 8*stride,
 | 
			
		||||
                        stride, bounding_values);
 | 
			
		||||
                }
 | 
			
		||||
@@ -1577,9 +1579,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
 | 
			
		||||
                        index = vp3_dequant(s, s->all_fragments + i, plane, 0, block);
 | 
			
		||||
                        if (index > 63)
 | 
			
		||||
                            continue;
 | 
			
		||||
                        if(s->avctx->idct_algo!=FF_IDCT_VP3)
 | 
			
		||||
                            block[0] += 128<<3;
 | 
			
		||||
                        s->dsp.idct_put(
 | 
			
		||||
                        s->vp3dsp.idct_put(
 | 
			
		||||
                            output_plane + first_pixel,
 | 
			
		||||
                            stride,
 | 
			
		||||
                            block);
 | 
			
		||||
@@ -1588,12 +1588,12 @@ static void render_slice(Vp3DecodeContext *s, int slice)
 | 
			
		||||
                        if (index > 63)
 | 
			
		||||
                            continue;
 | 
			
		||||
                        if (index > 0) {
 | 
			
		||||
                        s->dsp.idct_add(
 | 
			
		||||
                        s->vp3dsp.idct_add(
 | 
			
		||||
                            output_plane + first_pixel,
 | 
			
		||||
                            stride,
 | 
			
		||||
                            block);
 | 
			
		||||
                        } else {
 | 
			
		||||
                            s->dsp.vp3_idct_dc_add(output_plane + first_pixel, stride, block);
 | 
			
		||||
                            s->vp3dsp.idct_dc_add(output_plane + first_pixel, stride, block);
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                } else {
 | 
			
		||||
@@ -1676,10 +1676,10 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
 | 
			
		||||
    if (avctx->pix_fmt == PIX_FMT_NONE)
 | 
			
		||||
        avctx->pix_fmt = PIX_FMT_YUV420P;
 | 
			
		||||
    avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
 | 
			
		||||
    if(avctx->idct_algo==FF_IDCT_AUTO)
 | 
			
		||||
        avctx->idct_algo=FF_IDCT_VP3;
 | 
			
		||||
    ff_dsputil_init(&s->dsp, avctx);
 | 
			
		||||
    ff_vp3dsp_init(&s->vp3dsp, avctx->flags);
 | 
			
		||||
 | 
			
		||||
    ff_init_scantable_permutation(s->dsp.idct_permutation, s->vp3dsp.idct_perm);
 | 
			
		||||
    ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct);
 | 
			
		||||
 | 
			
		||||
    /* initialize to an impossible value which will force a recalculation
 | 
			
		||||
 
 | 
			
		||||
@@ -24,8 +24,10 @@
 | 
			
		||||
 * source code.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "libavutil/attributes.h"
 | 
			
		||||
#include "avcodec.h"
 | 
			
		||||
#include "dsputil.h"
 | 
			
		||||
#include "vp3dsp.h"
 | 
			
		||||
 | 
			
		||||
#define IdctAdjustBeforeShift 8
 | 
			
		||||
#define xC1S7 64277
 | 
			
		||||
@@ -210,19 +212,16 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ff_vp3_idct_c(DCTELEM *block/* align 16*/){
 | 
			
		||||
    idct(NULL, 0, block, 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
 | 
			
		||||
static void vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
 | 
			
		||||
    idct(dest, line_size, block, 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
 | 
			
		||||
static void vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
 | 
			
		||||
    idct(dest, line_size, block, 2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/){
 | 
			
		||||
static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size,
 | 
			
		||||
                              const DCTELEM *block/*align 16*/){
 | 
			
		||||
    int i, dc = (block[0] + 15) >> 5;
 | 
			
		||||
 | 
			
		||||
    for(i = 0; i < 8; i++){
 | 
			
		||||
@@ -238,7 +237,8 @@ void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ff_vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values)
 | 
			
		||||
static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride,
 | 
			
		||||
                                int *bounding_values)
 | 
			
		||||
{
 | 
			
		||||
    unsigned char *end;
 | 
			
		||||
    int filter_value;
 | 
			
		||||
@@ -254,7 +254,8 @@ void ff_vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_valu
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ff_vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values)
 | 
			
		||||
static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride,
 | 
			
		||||
                                int *bounding_values)
 | 
			
		||||
{
 | 
			
		||||
    unsigned char *end;
 | 
			
		||||
    int filter_value;
 | 
			
		||||
@@ -268,3 +269,21 @@ void ff_vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_valu
 | 
			
		||||
        first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags)
 | 
			
		||||
{
 | 
			
		||||
    c->idct_put      = vp3_idct_put_c;
 | 
			
		||||
    c->idct_add      = vp3_idct_add_c;
 | 
			
		||||
    c->idct_dc_add   = vp3_idct_dc_add_c;
 | 
			
		||||
    c->v_loop_filter = vp3_v_loop_filter_c;
 | 
			
		||||
    c->h_loop_filter = vp3_h_loop_filter_c;
 | 
			
		||||
 | 
			
		||||
    c->idct_perm = FF_NO_IDCT_PERM;
 | 
			
		||||
 | 
			
		||||
    if (ARCH_ARM)
 | 
			
		||||
        ff_vp3dsp_init_arm(c, flags);
 | 
			
		||||
    if (ARCH_PPC)
 | 
			
		||||
        ff_vp3dsp_init_ppc(c, flags);
 | 
			
		||||
    if (ARCH_X86)
 | 
			
		||||
        ff_vp3dsp_init_x86(c, flags);
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										40
									
								
								libavcodec/vp3dsp.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								libavcodec/vp3dsp.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,40 @@
 | 
			
		||||
/*
 | 
			
		||||
 * This file is part of Libav.
 | 
			
		||||
 *
 | 
			
		||||
 * Libav is free software; you can redistribute it and/or
 | 
			
		||||
 * modify it under the terms of the GNU Lesser General Public
 | 
			
		||||
 * License as published by the Free Software Foundation; either
 | 
			
		||||
 * version 2.1 of the License, or (at your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 * Libav is distributed in the hope that it will be useful,
 | 
			
		||||
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
			
		||||
 * Lesser General Public License for more details.
 | 
			
		||||
 *
 | 
			
		||||
 * You should have received a copy of the GNU Lesser General Public
 | 
			
		||||
 * License along with Libav; if not, write to the Free Software
 | 
			
		||||
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef AVCODEC_VP3DSP_H
 | 
			
		||||
#define AVCODEC_VP3DSP_H
 | 
			
		||||
 | 
			
		||||
#include <stdint.h>
 | 
			
		||||
#include "dsputil.h"
 | 
			
		||||
 | 
			
		||||
typedef struct VP3DSPContext {
 | 
			
		||||
    void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block);
 | 
			
		||||
    void (*idct_add)(uint8_t *dest, int line_size, DCTELEM *block);
 | 
			
		||||
    void (*idct_dc_add)(uint8_t *dest, int line_size, const DCTELEM *block);
 | 
			
		||||
    void (*v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
 | 
			
		||||
    void (*h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
 | 
			
		||||
 | 
			
		||||
    int idct_perm;
 | 
			
		||||
} VP3DSPContext;
 | 
			
		||||
 | 
			
		||||
void ff_vp3dsp_init(VP3DSPContext *c, int flags);
 | 
			
		||||
void ff_vp3dsp_init_arm(VP3DSPContext *c, int flags);
 | 
			
		||||
void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags);
 | 
			
		||||
void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags);
 | 
			
		||||
 | 
			
		||||
#endif /* AVCODEC_VP3DSP_H */
 | 
			
		||||
@@ -411,7 +411,7 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha)
 | 
			
		||||
        case VP56_MB_INTRA:
 | 
			
		||||
            for (b=0; b<b_max; b++) {
 | 
			
		||||
                plane = ff_vp56_b2p[b+ab];
 | 
			
		||||
                s->dsp.idct_put(frame_current->data[plane] + s->block_offset[b],
 | 
			
		||||
                s->vp3dsp.idct_put(frame_current->data[plane] + s->block_offset[b],
 | 
			
		||||
                                s->stride[plane], s->block_coeff[b]);
 | 
			
		||||
            }
 | 
			
		||||
            break;
 | 
			
		||||
@@ -424,7 +424,7 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha)
 | 
			
		||||
                s->dsp.put_pixels_tab[1][0](frame_current->data[plane] + off,
 | 
			
		||||
                                            frame_ref->data[plane] + off,
 | 
			
		||||
                                            s->stride[plane], 8);
 | 
			
		||||
                s->dsp.idct_add(frame_current->data[plane] + off,
 | 
			
		||||
                s->vp3dsp.idct_add(frame_current->data[plane] + off,
 | 
			
		||||
                                s->stride[plane], s->block_coeff[b]);
 | 
			
		||||
            }
 | 
			
		||||
            break;
 | 
			
		||||
@@ -442,7 +442,7 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha)
 | 
			
		||||
                plane = ff_vp56_b2p[b+ab];
 | 
			
		||||
                vp56_mc(s, b, plane, frame_ref->data[plane], s->stride[plane],
 | 
			
		||||
                        16*col+x_off, 16*row+y_off);
 | 
			
		||||
                s->dsp.idct_add(frame_current->data[plane] + s->block_offset[b],
 | 
			
		||||
                s->vp3dsp.idct_add(frame_current->data[plane] + s->block_offset[b],
 | 
			
		||||
                                s->stride[plane], s->block_coeff[b]);
 | 
			
		||||
            }
 | 
			
		||||
            break;
 | 
			
		||||
@@ -666,10 +666,10 @@ av_cold void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
 | 
			
		||||
    s->avctx = avctx;
 | 
			
		||||
    avctx->pix_fmt = has_alpha ? PIX_FMT_YUVA420P : PIX_FMT_YUV420P;
 | 
			
		||||
 | 
			
		||||
    if (avctx->idct_algo == FF_IDCT_AUTO)
 | 
			
		||||
        avctx->idct_algo = FF_IDCT_VP3;
 | 
			
		||||
    ff_dsputil_init(&s->dsp, avctx);
 | 
			
		||||
    ff_vp3dsp_init(&s->vp3dsp, avctx->flags);
 | 
			
		||||
    ff_vp56dsp_init(&s->vp56dsp, avctx->codec->id);
 | 
			
		||||
    ff_init_scantable_permutation(s->dsp.idct_permutation, s->vp3dsp.idct_perm);
 | 
			
		||||
    ff_init_scantable(s->dsp.idct_permutation, &s->scantable,ff_zigzag_direct);
 | 
			
		||||
 | 
			
		||||
    for (i=0; i<4; i++)
 | 
			
		||||
 
 | 
			
		||||
@@ -30,6 +30,7 @@
 | 
			
		||||
#include "dsputil.h"
 | 
			
		||||
#include "get_bits.h"
 | 
			
		||||
#include "bytestream.h"
 | 
			
		||||
#include "vp3dsp.h"
 | 
			
		||||
#include "vp56dsp.h"
 | 
			
		||||
 | 
			
		||||
typedef struct vp56_context VP56Context;
 | 
			
		||||
@@ -91,6 +92,7 @@ typedef struct {
 | 
			
		||||
struct vp56_context {
 | 
			
		||||
    AVCodecContext *avctx;
 | 
			
		||||
    DSPContext dsp;
 | 
			
		||||
    VP3DSPContext vp3dsp;
 | 
			
		||||
    VP56DSPContext vp56dsp;
 | 
			
		||||
    ScanTable scantable;
 | 
			
		||||
    AVFrame frames[4];
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,6 @@
 | 
			
		||||
OBJS-$(CONFIG_MLP_DECODER)             += x86/mlpdsp.o
 | 
			
		||||
OBJS-$(CONFIG_TRUEHD_DECODER)          += x86/mlpdsp.o
 | 
			
		||||
OBJS-$(CONFIG_VP3DSP)                  += x86/vp3dsp_init.o
 | 
			
		||||
OBJS-$(CONFIG_XMM_CLOBBER_TEST)        += x86/w64xmmtest.o
 | 
			
		||||
 | 
			
		||||
MMX-OBJS                               += x86/dsputil_mmx.o             \
 | 
			
		||||
 
 | 
			
		||||
@@ -2476,20 +2476,6 @@ static void vector_clipf_sse(float *dst, const float *src,
 | 
			
		||||
    );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ff_vp3_idct_mmx(int16_t *input_data);
 | 
			
		||||
void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block);
 | 
			
		||||
void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
 | 
			
		||||
 | 
			
		||||
void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size,
 | 
			
		||||
                             const DCTELEM *block);
 | 
			
		||||
 | 
			
		||||
void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
 | 
			
		||||
void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
 | 
			
		||||
 | 
			
		||||
void ff_vp3_idct_sse2(int16_t *input_data);
 | 
			
		||||
void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block);
 | 
			
		||||
void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
 | 
			
		||||
 | 
			
		||||
int32_t ff_scalarproduct_int16_mmx2(const int16_t *v1, const int16_t *v2,
 | 
			
		||||
                                    int order);
 | 
			
		||||
int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
 | 
			
		||||
@@ -2681,14 +2667,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
 | 
			
		||||
            c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
 | 
			
		||||
            c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (CONFIG_VP3_DECODER && HAVE_YASM) {
 | 
			
		||||
            c->vp3_v_loop_filter = ff_vp3_v_loop_filter_mmx2;
 | 
			
		||||
            c->vp3_h_loop_filter = ff_vp3_h_loop_filter_mmx2;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    if (CONFIG_VP3_DECODER && HAVE_YASM)
 | 
			
		||||
        c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2;
 | 
			
		||||
 | 
			
		||||
    if (CONFIG_VP3_DECODER && (avctx->codec_id == CODEC_ID_VP3 ||
 | 
			
		||||
                               avctx->codec_id == CODEC_ID_THEORA)) {
 | 
			
		||||
@@ -3064,20 +3043,6 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
 | 
			
		||||
                }
 | 
			
		||||
                c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
 | 
			
		||||
#endif
 | 
			
		||||
            } else if ((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER ||
 | 
			
		||||
                        CONFIG_VP6_DECODER) &&
 | 
			
		||||
                       idct_algo == FF_IDCT_VP3 && HAVE_YASM) {
 | 
			
		||||
                if (mm_flags & AV_CPU_FLAG_SSE2) {
 | 
			
		||||
                    c->idct_put              = ff_vp3_idct_put_sse2;
 | 
			
		||||
                    c->idct_add              = ff_vp3_idct_add_sse2;
 | 
			
		||||
                    c->idct                  = ff_vp3_idct_sse2;
 | 
			
		||||
                    c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
 | 
			
		||||
                } else {
 | 
			
		||||
                    c->idct_put              = ff_vp3_idct_put_mmx;
 | 
			
		||||
                    c->idct_add              = ff_vp3_idct_add_mmx;
 | 
			
		||||
                    c->idct                  = ff_vp3_idct_mmx;
 | 
			
		||||
                    c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM;
 | 
			
		||||
                }
 | 
			
		||||
            } else if (idct_algo == FF_IDCT_CAVS) {
 | 
			
		||||
                    c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
 | 
			
		||||
            } else if (idct_algo == FF_IDCT_XVIDMMX) {
 | 
			
		||||
 
 | 
			
		||||
@@ -524,10 +524,6 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
%macro vp3_idct_funcs 3
 | 
			
		||||
cglobal vp3_idct_%1, 1, 1, %2
 | 
			
		||||
    VP3_IDCT_%1   r0
 | 
			
		||||
    RET
 | 
			
		||||
 | 
			
		||||
cglobal vp3_idct_put_%1, 3, %3, %2
 | 
			
		||||
    VP3_IDCT_%1   r2
 | 
			
		||||
%if ARCH_X86_64
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										65
									
								
								libavcodec/x86/vp3dsp_init.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								libavcodec/x86/vp3dsp_init.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,65 @@
 | 
			
		||||
/*
 | 
			
		||||
 * This file is part of Libav.
 | 
			
		||||
 *
 | 
			
		||||
 * Libav is free software; you can redistribute it and/or
 | 
			
		||||
 * modify it under the terms of the GNU Lesser General Public
 | 
			
		||||
 * License as published by the Free Software Foundation; either
 | 
			
		||||
 * version 2.1 of the License, or (at your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 * Libav is distributed in the hope that it will be useful,
 | 
			
		||||
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
			
		||||
 * Lesser General Public License for more details.
 | 
			
		||||
 *
 | 
			
		||||
 * You should have received a copy of the GNU Lesser General Public
 | 
			
		||||
 * License along with Libav; if not, write to the Free Software
 | 
			
		||||
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include <stdint.h>
 | 
			
		||||
 | 
			
		||||
#include "libavutil/attributes.h"
 | 
			
		||||
#include "libavutil/cpu.h"
 | 
			
		||||
#include "libavcodec/avcodec.h"
 | 
			
		||||
#include "libavcodec/vp3dsp.h"
 | 
			
		||||
#include "config.h"
 | 
			
		||||
 | 
			
		||||
void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block);
 | 
			
		||||
void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
 | 
			
		||||
 | 
			
		||||
void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block);
 | 
			
		||||
void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
 | 
			
		||||
 | 
			
		||||
void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size,
 | 
			
		||||
                             const DCTELEM *block);
 | 
			
		||||
 | 
			
		||||
void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
 | 
			
		||||
void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
 | 
			
		||||
 | 
			
		||||
av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
 | 
			
		||||
{
 | 
			
		||||
#if HAVE_YASM
 | 
			
		||||
    int cpuflags = av_get_cpu_flags();
 | 
			
		||||
 | 
			
		||||
    if (HAVE_MMX && cpuflags & AV_CPU_FLAG_MMX) {
 | 
			
		||||
        c->idct_put  = ff_vp3_idct_put_mmx;
 | 
			
		||||
        c->idct_add  = ff_vp3_idct_add_mmx;
 | 
			
		||||
        c->idct_perm = FF_PARTTRANS_IDCT_PERM;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (HAVE_MMX2 && cpuflags & AV_CPU_FLAG_MMX2) {
 | 
			
		||||
        c->idct_dc_add = ff_vp3_idct_dc_add_mmx2;
 | 
			
		||||
 | 
			
		||||
        if (!(flags & CODEC_FLAG_BITEXACT)) {
 | 
			
		||||
            c->v_loop_filter = ff_vp3_v_loop_filter_mmx2;
 | 
			
		||||
            c->h_loop_filter = ff_vp3_h_loop_filter_mmx2;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (cpuflags & AV_CPU_FLAG_SSE2) {
 | 
			
		||||
        c->idct_put  = ff_vp3_idct_put_sse2;
 | 
			
		||||
        c->idct_add  = ff_vp3_idct_add_sse2;
 | 
			
		||||
        c->idct_perm = FF_TRANSPOSE_IDCT_PERM;
 | 
			
		||||
    }
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user