Move add/diff_int16 to lossless_videodsp
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
		
							
								
								
									
										9
									
								
								configure
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								configure
									
									
									
									
										vendored
									
									
								
							@@ -1683,6 +1683,7 @@ CONFIG_EXTRA="
 | 
				
			|||||||
    huffman
 | 
					    huffman
 | 
				
			||||||
    intrax8
 | 
					    intrax8
 | 
				
			||||||
    lgplv3
 | 
					    lgplv3
 | 
				
			||||||
 | 
					    llviddsp
 | 
				
			||||||
    lpc
 | 
					    lpc
 | 
				
			||||||
    mpegaudio
 | 
					    mpegaudio
 | 
				
			||||||
    mpegaudiodsp
 | 
					    mpegaudiodsp
 | 
				
			||||||
@@ -1904,8 +1905,8 @@ eatqi_decoder_select="aandcttables error_resilience mpegvideo"
 | 
				
			|||||||
exr_decoder_select="zlib"
 | 
					exr_decoder_select="zlib"
 | 
				
			||||||
ffv1_decoder_select="dsputil golomb rangecoder"
 | 
					ffv1_decoder_select="dsputil golomb rangecoder"
 | 
				
			||||||
ffv1_encoder_select="dsputil rangecoder"
 | 
					ffv1_encoder_select="dsputil rangecoder"
 | 
				
			||||||
ffvhuff_decoder_select="dsputil"
 | 
					ffvhuff_decoder_select="dsputil llviddsp"
 | 
				
			||||||
ffvhuff_encoder_select="dsputil huffman"
 | 
					ffvhuff_encoder_select="dsputil huffman llviddsp"
 | 
				
			||||||
flac_decoder_select="golomb"
 | 
					flac_decoder_select="golomb"
 | 
				
			||||||
flac_encoder_select="dsputil golomb lpc"
 | 
					flac_encoder_select="dsputil golomb lpc"
 | 
				
			||||||
flashsv_decoder_select="zlib"
 | 
					flashsv_decoder_select="zlib"
 | 
				
			||||||
@@ -1927,8 +1928,8 @@ h263p_encoder_select="h263_encoder"
 | 
				
			|||||||
h264_decoder_select="golomb h264chroma h264dsp h264pred h264qpel videodsp"
 | 
					h264_decoder_select="golomb h264chroma h264dsp h264pred h264qpel videodsp"
 | 
				
			||||||
h264_decoder_suggest="error_resilience"
 | 
					h264_decoder_suggest="error_resilience"
 | 
				
			||||||
hevc_decoder_select="dsputil golomb videodsp"
 | 
					hevc_decoder_select="dsputil golomb videodsp"
 | 
				
			||||||
huffyuv_decoder_select="dsputil"
 | 
					huffyuv_decoder_select="dsputil llviddsp"
 | 
				
			||||||
huffyuv_encoder_select="dsputil huffman"
 | 
					huffyuv_encoder_select="dsputil huffman llviddsp"
 | 
				
			||||||
iac_decoder_select="dsputil fft mdct sinewin"
 | 
					iac_decoder_select="dsputil fft mdct sinewin"
 | 
				
			||||||
imc_decoder_select="dsputil fft mdct sinewin"
 | 
					imc_decoder_select="dsputil fft mdct sinewin"
 | 
				
			||||||
indeo3_decoder_select="hpeldsp"
 | 
					indeo3_decoder_select="hpeldsp"
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -57,6 +57,7 @@ OBJS-$(CONFIG_HPELDSP)                 += hpeldsp.o
 | 
				
			|||||||
OBJS-$(CONFIG_HUFFMAN)                 += huffman.o
 | 
					OBJS-$(CONFIG_HUFFMAN)                 += huffman.o
 | 
				
			||||||
OBJS-$(CONFIG_INTRAX8)                 += intrax8.o intrax8dsp.o
 | 
					OBJS-$(CONFIG_INTRAX8)                 += intrax8.o intrax8dsp.o
 | 
				
			||||||
OBJS-$(CONFIG_LIBXVID)                 += libxvid_rc.o
 | 
					OBJS-$(CONFIG_LIBXVID)                 += libxvid_rc.o
 | 
				
			||||||
 | 
					OBJS-$(CONFIG_LLVIDDSP)                += lossless_videodsp.o
 | 
				
			||||||
OBJS-$(CONFIG_LPC)                     += lpc.o
 | 
					OBJS-$(CONFIG_LPC)                     += lpc.o
 | 
				
			||||||
OBJS-$(CONFIG_LSP)                     += lsp.o
 | 
					OBJS-$(CONFIG_LSP)                     += lsp.o
 | 
				
			||||||
OBJS-$(CONFIG_MDCT)                    += mdct_fixed.o mdct_float.o mdct_fixed_32.o
 | 
					OBJS-$(CONFIG_MDCT)                    += mdct_fixed.o mdct_float.o mdct_fixed_32.o
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1888,45 +1888,6 @@ static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
 | 
				
			|||||||
        dst[i+0] = src1[i+0]-src2[i+0];
 | 
					        dst[i+0] = src1[i+0]-src2[i+0];
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void add_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w){
 | 
					 | 
				
			||||||
    long i;
 | 
					 | 
				
			||||||
    unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL;
 | 
					 | 
				
			||||||
    unsigned long pw_msb = pw_lsb +  0x0001000100010001ULL;
 | 
					 | 
				
			||||||
    for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
 | 
					 | 
				
			||||||
        long a = *(long*)(src+i);
 | 
					 | 
				
			||||||
        long b = *(long*)(dst+i);
 | 
					 | 
				
			||||||
        *(long*)(dst+i) = ((a&pw_lsb) + (b&pw_lsb)) ^ ((a^b)&pw_msb);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    for(; i<w; i++)
 | 
					 | 
				
			||||||
        dst[i] = (dst[i] + src[i]) & mask;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){
 | 
					 | 
				
			||||||
    long i;
 | 
					 | 
				
			||||||
#if !HAVE_FAST_UNALIGNED
 | 
					 | 
				
			||||||
    if((long)src2 & (sizeof(long)-1)){
 | 
					 | 
				
			||||||
        for(i=0; i+7<w; i+=8){
 | 
					 | 
				
			||||||
            dst[i+0] = (src1[i+0]-src2[i+0]) & mask;
 | 
					 | 
				
			||||||
            dst[i+1] = (src1[i+1]-src2[i+1]) & mask;
 | 
					 | 
				
			||||||
            dst[i+2] = (src1[i+2]-src2[i+2]) & mask;
 | 
					 | 
				
			||||||
            dst[i+3] = (src1[i+3]-src2[i+3]) & mask;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }else
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
        unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL;
 | 
					 | 
				
			||||||
        unsigned long pw_msb = pw_lsb +  0x0001000100010001ULL;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
 | 
					 | 
				
			||||||
            long a = *(long*)(src1+i);
 | 
					 | 
				
			||||||
            long b = *(long*)(src2+i);
 | 
					 | 
				
			||||||
            *(long*)(dst+i) = ((a|pw_msb) - (b&pw_lsb)) ^ ((a^b^pw_msb)&pw_msb);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    for (; i<w; i++)
 | 
					 | 
				
			||||||
        dst[i] = (src1[i] - src2[i]) & mask;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
 | 
					static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
 | 
				
			||||||
    int i;
 | 
					    int i;
 | 
				
			||||||
    uint8_t l, lt;
 | 
					    uint8_t l, lt;
 | 
				
			||||||
@@ -2812,8 +2773,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    c->add_bytes= add_bytes_c;
 | 
					    c->add_bytes= add_bytes_c;
 | 
				
			||||||
    c->diff_bytes= diff_bytes_c;
 | 
					    c->diff_bytes= diff_bytes_c;
 | 
				
			||||||
    c->add_int16 = add_int16_c;
 | 
					 | 
				
			||||||
    c->diff_int16= diff_int16_c;
 | 
					 | 
				
			||||||
    c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
 | 
					    c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
 | 
				
			||||||
    c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
 | 
					    c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
 | 
				
			||||||
    c->add_hfyu_left_prediction  = add_hfyu_left_prediction_c;
 | 
					    c->add_hfyu_left_prediction  = add_hfyu_left_prediction_c;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -193,8 +193,6 @@ typedef struct DSPContext {
 | 
				
			|||||||
    /* huffyuv specific */
 | 
					    /* huffyuv specific */
 | 
				
			||||||
    void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
 | 
					    void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
 | 
				
			||||||
    void (*diff_bytes)(uint8_t *dst/*align 16*/, const uint8_t *src1/*align 16*/, const uint8_t *src2/*align 1*/,int w);
 | 
					    void (*diff_bytes)(uint8_t *dst/*align 16*/, const uint8_t *src1/*align 16*/, const uint8_t *src2/*align 1*/,int w);
 | 
				
			||||||
    void (*add_int16)(uint16_t *dst/*align 16*/, const uint16_t *src/*align 16*/, unsigned mask, int w);
 | 
					 | 
				
			||||||
    void (*diff_int16)(uint16_t *dst/*align 16*/, const uint16_t *src1/*align 16*/, const uint16_t *src2/*align 1*/, unsigned mask, int w);
 | 
					 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * subtract huffyuv's variant of median prediction
 | 
					     * subtract huffyuv's variant of median prediction
 | 
				
			||||||
     * note, this might read from src1[-1], src2[-1]
 | 
					     * note, this might read from src1[-1], src2[-1]
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -81,6 +81,7 @@ av_cold void ff_huffyuv_common_init(AVCodecContext *avctx)
 | 
				
			|||||||
    s->flags = avctx->flags;
 | 
					    s->flags = avctx->flags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ff_dsputil_init(&s->dsp, avctx);
 | 
					    ff_dsputil_init(&s->dsp, avctx);
 | 
				
			||||||
 | 
					    ff_llviddsp_init(&s->llviddsp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    s->width = avctx->width;
 | 
					    s->width = avctx->width;
 | 
				
			||||||
    s->height = avctx->height;
 | 
					    s->height = avctx->height;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -35,6 +35,7 @@
 | 
				
			|||||||
#include "dsputil.h"
 | 
					#include "dsputil.h"
 | 
				
			||||||
#include "get_bits.h"
 | 
					#include "get_bits.h"
 | 
				
			||||||
#include "put_bits.h"
 | 
					#include "put_bits.h"
 | 
				
			||||||
 | 
					#include "lossless_videodsp.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define VLC_BITS 11
 | 
					#define VLC_BITS 11
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -92,6 +93,7 @@ typedef struct HYuvContext {
 | 
				
			|||||||
    uint8_t *bitstream_buffer;
 | 
					    uint8_t *bitstream_buffer;
 | 
				
			||||||
    unsigned int bitstream_buffer_size;
 | 
					    unsigned int bitstream_buffer_size;
 | 
				
			||||||
    DSPContext dsp;
 | 
					    DSPContext dsp;
 | 
				
			||||||
 | 
					    LLVidDSPContext llviddsp;
 | 
				
			||||||
} HYuvContext;
 | 
					} HYuvContext;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ff_huffyuv_common_init(AVCodecContext *s);
 | 
					void ff_huffyuv_common_init(AVCodecContext *s);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -708,7 +708,7 @@ static void add_bytes(HYuvContext *s, uint8_t *dst, uint8_t *src, int w)
 | 
				
			|||||||
    if (s->bps <= 8) {
 | 
					    if (s->bps <= 8) {
 | 
				
			||||||
        s->dsp.add_bytes(dst, src, w);
 | 
					        s->dsp.add_bytes(dst, src, w);
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        s->dsp.add_int16((uint16_t*)dst, (const uint16_t*)src, s->n - 1, w);
 | 
					        s->llviddsp.add_int16((uint16_t*)dst, (const uint16_t*)src, s->n - 1, w);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -41,7 +41,7 @@ static inline void diff_bytes(HYuvContext *s, uint8_t *dst,
 | 
				
			|||||||
    if (s->bps <= 8) {
 | 
					    if (s->bps <= 8) {
 | 
				
			||||||
        s->dsp.diff_bytes(dst, src0, src1, w);
 | 
					        s->dsp.diff_bytes(dst, src0, src1, w);
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        s->dsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w);
 | 
					        s->llviddsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										69
									
								
								libavcodec/lossless_videodsp.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								libavcodec/lossless_videodsp.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,69 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Lossless video DSP utils
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This file is part of FFmpeg.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * FFmpeg is free software; you can redistribute it and/or
 | 
				
			||||||
 | 
					 * modify it under the terms of the GNU Lesser General Public
 | 
				
			||||||
 | 
					 * License as published by the Free Software Foundation; either
 | 
				
			||||||
 | 
					 * version 2.1 of the License, or (at your option) any later version.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * FFmpeg is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
				
			||||||
 | 
					 * Lesser General Public License for more details.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * You should have received a copy of the GNU Lesser General Public
 | 
				
			||||||
 | 
					 * License along with FFmpeg; if not, write to the Free Software
 | 
				
			||||||
 | 
					 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#include "avcodec.h"
 | 
				
			||||||
 | 
					#include "lossless_videodsp.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void add_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w){
 | 
				
			||||||
 | 
					    long i;
 | 
				
			||||||
 | 
					    unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL;
 | 
				
			||||||
 | 
					    unsigned long pw_msb = pw_lsb +  0x0001000100010001ULL;
 | 
				
			||||||
 | 
					    for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
 | 
				
			||||||
 | 
					        long a = *(long*)(src+i);
 | 
				
			||||||
 | 
					        long b = *(long*)(dst+i);
 | 
				
			||||||
 | 
					        *(long*)(dst+i) = ((a&pw_lsb) + (b&pw_lsb)) ^ ((a^b)&pw_msb);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    for(; i<w; i++)
 | 
				
			||||||
 | 
					        dst[i] = (dst[i] + src[i]) & mask;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){
 | 
				
			||||||
 | 
					    long i;
 | 
				
			||||||
 | 
					#if !HAVE_FAST_UNALIGNED
 | 
				
			||||||
 | 
					    if((long)src2 & (sizeof(long)-1)){
 | 
				
			||||||
 | 
					        for(i=0; i+7<w; i+=8){
 | 
				
			||||||
 | 
					            dst[i+0] = (src1[i+0]-src2[i+0]) & mask;
 | 
				
			||||||
 | 
					            dst[i+1] = (src1[i+1]-src2[i+1]) & mask;
 | 
				
			||||||
 | 
					            dst[i+2] = (src1[i+2]-src2[i+2]) & mask;
 | 
				
			||||||
 | 
					            dst[i+3] = (src1[i+3]-src2[i+3]) & mask;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }else
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL;
 | 
				
			||||||
 | 
					        unsigned long pw_msb = pw_lsb +  0x0001000100010001ULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
 | 
				
			||||||
 | 
					            long a = *(long*)(src1+i);
 | 
				
			||||||
 | 
					            long b = *(long*)(src2+i);
 | 
				
			||||||
 | 
					            *(long*)(dst+i) = ((a|pw_msb) - (b&pw_lsb)) ^ ((a^b^pw_msb)&pw_msb);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    for (; i<w; i++)
 | 
				
			||||||
 | 
					        dst[i] = (src1[i] - src2[i]) & mask;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void ff_llviddsp_init(LLVidDSPContext *c)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    c->add_int16 = add_int16_c;
 | 
				
			||||||
 | 
					    c->diff_int16= diff_int16_c;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (ARCH_X86)
 | 
				
			||||||
 | 
					        ff_llviddsp_init_x86(c);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										36
									
								
								libavcodec/lossless_videodsp.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								libavcodec/lossless_videodsp.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,36 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Lossless video DSP utils
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This file is part of FFmpeg.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * FFmpeg is free software; you can redistribute it and/or
 | 
				
			||||||
 | 
					 * modify it under the terms of the GNU Lesser General Public
 | 
				
			||||||
 | 
					 * License as published by the Free Software Foundation; either
 | 
				
			||||||
 | 
					 * version 2.1 of the License, or (at your option) any later version.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * FFmpeg is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
				
			||||||
 | 
					 * Lesser General Public License for more details.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * You should have received a copy of the GNU Lesser General Public
 | 
				
			||||||
 | 
					 * License along with FFmpeg; if not, write to the Free Software
 | 
				
			||||||
 | 
					 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef AVCODEC_LOSSLESS_VIDEODSP_H
 | 
				
			||||||
 | 
					#define AVCODEC_LOSSLESS_VIDEODSP_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "avcodec.h"
 | 
				
			||||||
 | 
					#include "libavutil/cpu.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct LLVidDSPContext {
 | 
				
			||||||
 | 
					    void (*add_int16)(uint16_t *dst/*align 16*/, const uint16_t *src/*align 16*/, unsigned mask, int w);
 | 
				
			||||||
 | 
					    void (*diff_int16)(uint16_t *dst/*align 16*/, const uint16_t *src1/*align 16*/, const uint16_t *src2/*align 1*/, unsigned mask, int w);
 | 
				
			||||||
 | 
					} LLVidDSPContext;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void ff_llviddsp_init(LLVidDSPContext *llviddsp);
 | 
				
			||||||
 | 
					void ff_llviddsp_init_x86(LLVidDSPContext *llviddsp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif //AVCODEC_LOSSLESS_VIDEODSP_H
 | 
				
			||||||
@@ -18,6 +18,7 @@ OBJS-$(CONFIG_H264DSP)                 += x86/h264dsp_init.o
 | 
				
			|||||||
OBJS-$(CONFIG_H264PRED)                += x86/h264_intrapred_init.o
 | 
					OBJS-$(CONFIG_H264PRED)                += x86/h264_intrapred_init.o
 | 
				
			||||||
OBJS-$(CONFIG_H264QPEL)                += x86/h264_qpel.o
 | 
					OBJS-$(CONFIG_H264QPEL)                += x86/h264_qpel.o
 | 
				
			||||||
OBJS-$(CONFIG_HPELDSP)                 += x86/hpeldsp_init.o
 | 
					OBJS-$(CONFIG_HPELDSP)                 += x86/hpeldsp_init.o
 | 
				
			||||||
 | 
					OBJS-$(CONFIG_LLVIDDSP)                += x86/lossless_videodsp_init.o
 | 
				
			||||||
OBJS-$(CONFIG_LPC)                     += x86/lpc.o
 | 
					OBJS-$(CONFIG_LPC)                     += x86/lpc.o
 | 
				
			||||||
OBJS-$(CONFIG_MLP_DECODER)             += x86/mlpdsp.o
 | 
					OBJS-$(CONFIG_MLP_DECODER)             += x86/mlpdsp.o
 | 
				
			||||||
OBJS-$(CONFIG_MPEGAUDIODSP)            += x86/mpegaudiodsp.o
 | 
					OBJS-$(CONFIG_MPEGAUDIODSP)            += x86/mpegaudiodsp.o
 | 
				
			||||||
@@ -86,6 +87,7 @@ YASM-OBJS-$(CONFIG_H264QPEL)           += x86/h264_qpel_8bit.o          \
 | 
				
			|||||||
                                          x86/qpel.o
 | 
					                                          x86/qpel.o
 | 
				
			||||||
YASM-OBJS-$(CONFIG_HPELDSP)            += x86/fpel.o                    \
 | 
					YASM-OBJS-$(CONFIG_HPELDSP)            += x86/fpel.o                    \
 | 
				
			||||||
                                          x86/hpeldsp.o
 | 
					                                          x86/hpeldsp.o
 | 
				
			||||||
 | 
					YASM-OBJS-$(CONFIG_LLVIDDSP)           += x86/lossless_videodsp.o
 | 
				
			||||||
YASM-OBJS-$(CONFIG_MPEGAUDIODSP)       += x86/imdct36.o
 | 
					YASM-OBJS-$(CONFIG_MPEGAUDIODSP)       += x86/imdct36.o
 | 
				
			||||||
YASM-OBJS-$(CONFIG_PNG_DECODER)        += x86/pngdsp.o
 | 
					YASM-OBJS-$(CONFIG_PNG_DECODER)        += x86/pngdsp.o
 | 
				
			||||||
YASM-OBJS-$(CONFIG_PRORES_DECODER)     += x86/proresdsp.o
 | 
					YASM-OBJS-$(CONFIG_PRORES_DECODER)     += x86/proresdsp.o
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -466,70 +466,6 @@ cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left
 | 
				
			|||||||
    ADD_HFYU_LEFT_LOOP 0, 0
 | 
					    ADD_HFYU_LEFT_LOOP 0, 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
%macro ADD_INT16_LOOP 1 ; %1 = is_aligned
 | 
					 | 
				
			||||||
    movd      m4, maskq
 | 
					 | 
				
			||||||
    punpcklwd m4, m4
 | 
					 | 
				
			||||||
    punpcklwd m4, m4
 | 
					 | 
				
			||||||
    punpcklwd m4, m4
 | 
					 | 
				
			||||||
    add     wq, wq
 | 
					 | 
				
			||||||
    test    wq, 2*mmsize - 1
 | 
					 | 
				
			||||||
    jz %%.tomainloop
 | 
					 | 
				
			||||||
%%.wordloop:
 | 
					 | 
				
			||||||
    sub     wq, 2
 | 
					 | 
				
			||||||
    mov     ax, [srcq+wq]
 | 
					 | 
				
			||||||
    add     ax, [dstq+wq]
 | 
					 | 
				
			||||||
    and     ax, maskw
 | 
					 | 
				
			||||||
    mov     [dstq+wq], ax
 | 
					 | 
				
			||||||
    test    wq, 2*mmsize - 1
 | 
					 | 
				
			||||||
    jnz %%.wordloop
 | 
					 | 
				
			||||||
%%.tomainloop:
 | 
					 | 
				
			||||||
    add     srcq, wq
 | 
					 | 
				
			||||||
    add     dstq, wq
 | 
					 | 
				
			||||||
    neg     wq
 | 
					 | 
				
			||||||
    jz      %%.end
 | 
					 | 
				
			||||||
%%.loop:
 | 
					 | 
				
			||||||
%if %1
 | 
					 | 
				
			||||||
    mova    m0, [srcq+wq]
 | 
					 | 
				
			||||||
    mova    m1, [dstq+wq]
 | 
					 | 
				
			||||||
    mova    m2, [srcq+wq+mmsize]
 | 
					 | 
				
			||||||
    mova    m3, [dstq+wq+mmsize]
 | 
					 | 
				
			||||||
%else
 | 
					 | 
				
			||||||
    movu    m0, [srcq+wq]
 | 
					 | 
				
			||||||
    movu    m1, [dstq+wq]
 | 
					 | 
				
			||||||
    movu    m2, [srcq+wq+mmsize]
 | 
					 | 
				
			||||||
    movu    m3, [dstq+wq+mmsize]
 | 
					 | 
				
			||||||
%endif
 | 
					 | 
				
			||||||
    paddw   m0, m1
 | 
					 | 
				
			||||||
    paddw   m2, m3
 | 
					 | 
				
			||||||
    pand    m0, m4
 | 
					 | 
				
			||||||
    pand    m2, m4
 | 
					 | 
				
			||||||
%if %1
 | 
					 | 
				
			||||||
    mova    [dstq+wq]       , m0
 | 
					 | 
				
			||||||
    mova    [dstq+wq+mmsize], m2
 | 
					 | 
				
			||||||
%else
 | 
					 | 
				
			||||||
    movu    [dstq+wq]       , m0
 | 
					 | 
				
			||||||
    movu    [dstq+wq+mmsize], m2
 | 
					 | 
				
			||||||
%endif
 | 
					 | 
				
			||||||
    add     wq, 2*mmsize
 | 
					 | 
				
			||||||
    jl %%.loop
 | 
					 | 
				
			||||||
%%.end:
 | 
					 | 
				
			||||||
    RET
 | 
					 | 
				
			||||||
%endmacro
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
INIT_MMX mmx
 | 
					 | 
				
			||||||
cglobal add_int16, 4,4,5, dst, src, mask, w
 | 
					 | 
				
			||||||
    ADD_INT16_LOOP 1
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
INIT_XMM sse2
 | 
					 | 
				
			||||||
cglobal add_int16, 4,4,5, dst, src, mask, w
 | 
					 | 
				
			||||||
    test srcq, mmsize-1
 | 
					 | 
				
			||||||
    jnz .unaligned
 | 
					 | 
				
			||||||
    test dstq, mmsize-1
 | 
					 | 
				
			||||||
    jnz .unaligned
 | 
					 | 
				
			||||||
    ADD_INT16_LOOP 1
 | 
					 | 
				
			||||||
.unaligned:
 | 
					 | 
				
			||||||
    ADD_INT16_LOOP 0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
;-----------------------------------------------------------------------------
 | 
					;-----------------------------------------------------------------------------
 | 
				
			||||||
; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
 | 
					; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
 | 
				
			||||||
;                           int32_t max, unsigned int len)
 | 
					;                           int32_t max, unsigned int len)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -542,7 +542,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
 | 
				
			|||||||
#endif /* HAVE_MMX_INLINE */
 | 
					#endif /* HAVE_MMX_INLINE */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if HAVE_MMX_EXTERNAL
 | 
					#if HAVE_MMX_EXTERNAL
 | 
				
			||||||
    c->add_int16 = ff_add_int16_mmx;
 | 
					 | 
				
			||||||
    c->vector_clip_int32 = ff_vector_clip_int32_mmx;
 | 
					    c->vector_clip_int32 = ff_vector_clip_int32_mmx;
 | 
				
			||||||
#endif /* HAVE_MMX_EXTERNAL */
 | 
					#endif /* HAVE_MMX_EXTERNAL */
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -626,8 +625,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
 | 
				
			|||||||
        c->vector_clip_int32 = ff_vector_clip_int32_sse2;
 | 
					        c->vector_clip_int32 = ff_vector_clip_int32_sse2;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    c->bswap_buf = ff_bswap32_buf_sse2;
 | 
					    c->bswap_buf = ff_bswap32_buf_sse2;
 | 
				
			||||||
 | 
					 | 
				
			||||||
    c->add_int16 = ff_add_int16_sse2;
 | 
					 | 
				
			||||||
#endif /* HAVE_SSE2_EXTERNAL */
 | 
					#endif /* HAVE_SSE2_EXTERNAL */
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -116,8 +116,6 @@ void ff_clear_blocks_mmx(int16_t *blocks);
 | 
				
			|||||||
void ff_clear_blocks_sse(int16_t *blocks);
 | 
					void ff_clear_blocks_sse(int16_t *blocks);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w);
 | 
					void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w);
 | 
				
			||||||
void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
 | 
					 | 
				
			||||||
void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top,
 | 
					void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top,
 | 
				
			||||||
                                        const uint8_t *diff, int w,
 | 
					                                        const uint8_t *diff, int w,
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										88
									
								
								libavcodec/x86/lossless_videodsp.asm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								libavcodec/x86/lossless_videodsp.asm
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,88 @@
 | 
				
			|||||||
 | 
					;******************************************************************************
 | 
				
			||||||
 | 
					;* SIMD lossless video DSP utils
 | 
				
			||||||
 | 
					;* Copyright (c) 2014 Michael Niedermayer
 | 
				
			||||||
 | 
					;*
 | 
				
			||||||
 | 
					;* This file is part of FFmpeg.
 | 
				
			||||||
 | 
					;*
 | 
				
			||||||
 | 
					;* FFmpeg is free software; you can redistribute it and/or
 | 
				
			||||||
 | 
					;* modify it under the terms of the GNU Lesser General Public
 | 
				
			||||||
 | 
					;* License as published by the Free Software Foundation; either
 | 
				
			||||||
 | 
					;* version 2.1 of the License, or (at your option) any later version.
 | 
				
			||||||
 | 
					;*
 | 
				
			||||||
 | 
					;* FFmpeg is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
				
			||||||
 | 
					;* Lesser General Public License for more details.
 | 
				
			||||||
 | 
					;*
 | 
				
			||||||
 | 
					;* You should have received a copy of the GNU Lesser General Public
 | 
				
			||||||
 | 
					;* License along with FFmpeg; if not, write to the Free Software
 | 
				
			||||||
 | 
					;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
				
			||||||
 | 
					;******************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					%include "libavutil/x86/x86util.asm"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SECTION_TEXT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					%macro ADD_INT16_LOOP 1 ; %1 = is_aligned
 | 
				
			||||||
 | 
					    movd      m4, maskq
 | 
				
			||||||
 | 
					    punpcklwd m4, m4
 | 
				
			||||||
 | 
					    punpcklwd m4, m4
 | 
				
			||||||
 | 
					    punpcklwd m4, m4
 | 
				
			||||||
 | 
					    add     wq, wq
 | 
				
			||||||
 | 
					    test    wq, 2*mmsize - 1
 | 
				
			||||||
 | 
					    jz %%.tomainloop
 | 
				
			||||||
 | 
					%%.wordloop:
 | 
				
			||||||
 | 
					    sub     wq, 2
 | 
				
			||||||
 | 
					    mov     ax, [srcq+wq]
 | 
				
			||||||
 | 
					    add     ax, [dstq+wq]
 | 
				
			||||||
 | 
					    and     ax, maskw
 | 
				
			||||||
 | 
					    mov     [dstq+wq], ax
 | 
				
			||||||
 | 
					    test    wq, 2*mmsize - 1
 | 
				
			||||||
 | 
					    jnz %%.wordloop
 | 
				
			||||||
 | 
					%%.tomainloop:
 | 
				
			||||||
 | 
					    add     srcq, wq
 | 
				
			||||||
 | 
					    add     dstq, wq
 | 
				
			||||||
 | 
					    neg     wq
 | 
				
			||||||
 | 
					    jz      %%.end
 | 
				
			||||||
 | 
					%%.loop:
 | 
				
			||||||
 | 
					%if %1
 | 
				
			||||||
 | 
					    mova    m0, [srcq+wq]
 | 
				
			||||||
 | 
					    mova    m1, [dstq+wq]
 | 
				
			||||||
 | 
					    mova    m2, [srcq+wq+mmsize]
 | 
				
			||||||
 | 
					    mova    m3, [dstq+wq+mmsize]
 | 
				
			||||||
 | 
					%else
 | 
				
			||||||
 | 
					    movu    m0, [srcq+wq]
 | 
				
			||||||
 | 
					    movu    m1, [dstq+wq]
 | 
				
			||||||
 | 
					    movu    m2, [srcq+wq+mmsize]
 | 
				
			||||||
 | 
					    movu    m3, [dstq+wq+mmsize]
 | 
				
			||||||
 | 
					%endif
 | 
				
			||||||
 | 
					    paddw   m0, m1
 | 
				
			||||||
 | 
					    paddw   m2, m3
 | 
				
			||||||
 | 
					    pand    m0, m4
 | 
				
			||||||
 | 
					    pand    m2, m4
 | 
				
			||||||
 | 
					%if %1
 | 
				
			||||||
 | 
					    mova    [dstq+wq]       , m0
 | 
				
			||||||
 | 
					    mova    [dstq+wq+mmsize], m2
 | 
				
			||||||
 | 
					%else
 | 
				
			||||||
 | 
					    movu    [dstq+wq]       , m0
 | 
				
			||||||
 | 
					    movu    [dstq+wq+mmsize], m2
 | 
				
			||||||
 | 
					%endif
 | 
				
			||||||
 | 
					    add     wq, 2*mmsize
 | 
				
			||||||
 | 
					    jl %%.loop
 | 
				
			||||||
 | 
					%%.end:
 | 
				
			||||||
 | 
					    RET
 | 
				
			||||||
 | 
					%endmacro
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					INIT_MMX mmx
 | 
				
			||||||
 | 
					cglobal add_int16, 4,4,5, dst, src, mask, w
 | 
				
			||||||
 | 
					    ADD_INT16_LOOP 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					INIT_XMM sse2
 | 
				
			||||||
 | 
					cglobal add_int16, 4,4,5, dst, src, mask, w
 | 
				
			||||||
 | 
					    test srcq, mmsize-1
 | 
				
			||||||
 | 
					    jnz .unaligned
 | 
				
			||||||
 | 
					    test dstq, mmsize-1
 | 
				
			||||||
 | 
					    jnz .unaligned
 | 
				
			||||||
 | 
					    ADD_INT16_LOOP 1
 | 
				
			||||||
 | 
					.unaligned:
 | 
				
			||||||
 | 
					    ADD_INT16_LOOP 0
 | 
				
			||||||
							
								
								
									
										38
									
								
								libavcodec/x86/lossless_videodsp_init.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								libavcodec/x86/lossless_videodsp_init.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,38 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Lossless video DSP utils
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This file is part of FFmpeg.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * FFmpeg is free software; you can redistribute it and/or
 | 
				
			||||||
 | 
					 * modify it under the terms of the GNU Lesser General Public
 | 
				
			||||||
 | 
					 * License as published by the Free Software Foundation; either
 | 
				
			||||||
 | 
					 * version 2.1 of the License, or (at your option) any later version.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * FFmpeg is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
				
			||||||
 | 
					 * Lesser General Public License for more details.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * You should have received a copy of the GNU Lesser General Public
 | 
				
			||||||
 | 
					 * License along with FFmpeg; if not, write to the Free Software
 | 
				
			||||||
 | 
					 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "../lossless_videodsp.h"
 | 
				
			||||||
 | 
					#include "libavutil/x86/cpu.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
 | 
				
			||||||
 | 
					void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void ff_llviddsp_init_x86(LLVidDSPContext *c)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int cpu_flags = av_get_cpu_flags();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (EXTERNAL_MMX(cpu_flags)) {
 | 
				
			||||||
 | 
					        c->add_int16 = ff_add_int16_mmx;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (EXTERNAL_SSE2(cpu_flags)) {
 | 
				
			||||||
 | 
					        c->add_int16 = ff_add_int16_sse2;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Reference in New Issue
	
	Block a user