swr: SIMD rematrixing and SSE/AVX mix_1_1 float
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
		@@ -340,6 +340,9 @@ int swri_rematrix_init(SwrContext *s){
 | 
			
		||||
        }
 | 
			
		||||
        s->matrix_ch[i][0]= ch_in;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(HAVE_YASM && HAVE_MMX) swri_rematrix_init_x86(s);
 | 
			
		||||
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -351,12 +354,19 @@ void swri_rematrix_free(SwrContext *s){
 | 
			
		||||
 | 
			
		||||
int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mustcopy){
 | 
			
		||||
    int out_i, in_i, i, j;
 | 
			
		||||
    int len1 = 0;
 | 
			
		||||
    int off = 0;
 | 
			
		||||
 | 
			
		||||
    if(s->mix_any_f) {
 | 
			
		||||
        s->mix_any_f(out->ch, in->ch, s->native_matrix, len);
 | 
			
		||||
        return 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(s->mix_2_1_simd || s->mix_1_1_simd){
 | 
			
		||||
        len1= len&~15;
 | 
			
		||||
        off = len1 * out->bps;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    av_assert0(out->ch_count == av_get_channel_layout_nb_channels(s->out_ch_layout));
 | 
			
		||||
    av_assert0(in ->ch_count == av_get_channel_layout_nb_channels(s-> in_ch_layout));
 | 
			
		||||
 | 
			
		||||
@@ -369,7 +379,10 @@ int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mus
 | 
			
		||||
        case 1:
 | 
			
		||||
            in_i= s->matrix_ch[out_i][1];
 | 
			
		||||
            if(s->matrix[out_i][in_i]!=1.0){
 | 
			
		||||
                s->mix_1_1_f(out->ch[out_i], in->ch[in_i], s->native_matrix, in->ch_count*out_i + in_i, len);
 | 
			
		||||
                if(s->mix_1_1_simd && len1)
 | 
			
		||||
                    s->mix_1_1_simd(out->ch[out_i]    , in->ch[in_i]    , s->native_matrix, in->ch_count*out_i + in_i, len1);
 | 
			
		||||
                if(len != len1)
 | 
			
		||||
                    s->mix_1_1_f   (out->ch[out_i]+off, in->ch[in_i]+off, s->native_matrix, in->ch_count*out_i + in_i, len-len1);
 | 
			
		||||
            }else if(mustcopy){
 | 
			
		||||
                memcpy(out->ch[out_i], in->ch[in_i], len*out->bps);
 | 
			
		||||
            }else{
 | 
			
		||||
@@ -379,7 +392,12 @@ int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mus
 | 
			
		||||
        case 2: {
 | 
			
		||||
            int in_i1 = s->matrix_ch[out_i][1];
 | 
			
		||||
            int in_i2 = s->matrix_ch[out_i][2];
 | 
			
		||||
            s->mix_2_1_f(out->ch[out_i], in->ch[in_i1], in->ch[in_i2], s->native_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len);
 | 
			
		||||
            if(s->mix_2_1_simd && len1)
 | 
			
		||||
                s->mix_2_1_simd(out->ch[out_i]    , in->ch[in_i1]    , in->ch[in_i2]    , s->native_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len1);
 | 
			
		||||
            else
 | 
			
		||||
                s->mix_2_1_f   (out->ch[out_i]    , in->ch[in_i1]    , in->ch[in_i2]    , s->native_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len1);
 | 
			
		||||
            if(len != len1)
 | 
			
		||||
                s->mix_2_1_f   (out->ch[out_i]+off, in->ch[in_i1]+off, in->ch[in_i2]+off, s->native_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len-len1);
 | 
			
		||||
            break;}
 | 
			
		||||
        default:
 | 
			
		||||
            if(s->int_sample_fmt == AV_SAMPLE_FMT_FLTP){
 | 
			
		||||
 
 | 
			
		||||
@@ -121,6 +121,7 @@ int swri_resample_double(struct ResampleContext *c,double  *dst, const double  *
 | 
			
		||||
int swri_rematrix_init(SwrContext *s);
 | 
			
		||||
void swri_rematrix_free(SwrContext *s);
 | 
			
		||||
int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mustcopy);
 | 
			
		||||
void swri_rematrix_init_x86(struct SwrContext *s);
 | 
			
		||||
 | 
			
		||||
void swri_get_dither(SwrContext *s, void *dst, int len, unsigned seed, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,2 +1,3 @@
 | 
			
		||||
YASM-OBJS                       += x86/swresample_x86.o\
 | 
			
		||||
                                   x86/audio_convert.o\
 | 
			
		||||
                                   x86/rematrix.o\
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										66
									
								
								libswresample/x86/rematrix.asm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								libswresample/x86/rematrix.asm
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,66 @@
 | 
			
		||||
;******************************************************************************
 | 
			
		||||
;* Copyright (c) 2012 Michael Niedermayer
 | 
			
		||||
;*
 | 
			
		||||
;* This file is part of FFmpeg.
 | 
			
		||||
;*
 | 
			
		||||
;* FFmpeg is free software; you can redistribute it and/or
 | 
			
		||||
;* modify it under the terms of the GNU Lesser General Public
 | 
			
		||||
;* License as published by the Free Software Foundation; either
 | 
			
		||||
;* version 2.1 of the License, or (at your option) any later version.
 | 
			
		||||
;*
 | 
			
		||||
;* FFmpeg is distributed in the hope that it will be useful,
 | 
			
		||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
			
		||||
;* Lesser General Public License for more details.
 | 
			
		||||
;*
 | 
			
		||||
;* You should have received a copy of the GNU Lesser General Public
 | 
			
		||||
;* License along with FFmpeg; if not, write to the Free Software
 | 
			
		||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
			
		||||
;******************************************************************************
 | 
			
		||||
 | 
			
		||||
%include "libavutil/x86/x86inc.asm"
 | 
			
		||||
%include "libavutil/x86/x86util.asm"
 | 
			
		||||
 | 
			
		||||
SECTION .text
 | 
			
		||||
 | 
			
		||||
%macro MIX1_FLT 1
 | 
			
		||||
cglobal mix_1_1_%1_float, 5, 5, 3, out, in, coeffp, index, len
 | 
			
		||||
%ifidn %1, a
 | 
			
		||||
    test inq, mmsize-1
 | 
			
		||||
        jne mix_1_1_float_u_int %+ SUFFIX
 | 
			
		||||
    test outq, mmsize-1
 | 
			
		||||
        jne mix_1_1_float_u_int %+ SUFFIX
 | 
			
		||||
%else
 | 
			
		||||
mix_1_1_float_u_int %+ SUFFIX
 | 
			
		||||
%endif
 | 
			
		||||
    VBROADCASTSS m2, [coeffpq + 4*indexq]
 | 
			
		||||
    shl lenq    , 2
 | 
			
		||||
    add inq     , lenq
 | 
			
		||||
    add outq    , lenq
 | 
			
		||||
    neg lenq
 | 
			
		||||
.next:
 | 
			
		||||
%ifidn %1, a
 | 
			
		||||
    mulps        m0, m2, [inq + lenq         ]
 | 
			
		||||
    mulps        m1, m2, [inq + lenq + mmsize]
 | 
			
		||||
%else
 | 
			
		||||
    movu         m0, [inq + lenq         ]
 | 
			
		||||
    movu         m1, [inq + lenq + mmsize]
 | 
			
		||||
    mulps        m0, m0, m2
 | 
			
		||||
    mulps        m1, m1, m2
 | 
			
		||||
%endif
 | 
			
		||||
    mov%1  [outq + lenq         ], m0
 | 
			
		||||
    mov%1  [outq + lenq + mmsize], m1
 | 
			
		||||
    add        lenq, mmsize*2
 | 
			
		||||
        jl .next
 | 
			
		||||
    REP_RET
 | 
			
		||||
%endmacro
 | 
			
		||||
 | 
			
		||||
INIT_XMM sse
 | 
			
		||||
MIX1_FLT u
 | 
			
		||||
MIX1_FLT a
 | 
			
		||||
 | 
			
		||||
%if HAVE_AVX
 | 
			
		||||
INIT_YMM avx
 | 
			
		||||
MIX1_FLT u
 | 
			
		||||
MIX1_FLT a
 | 
			
		||||
%endif
 | 
			
		||||
@@ -142,3 +142,35 @@ MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE, sse)
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define D(type, simd) \
 | 
			
		||||
mix_1_1_func_type ff_mix_1_1_a_## type ## _ ## simd;\
 | 
			
		||||
mix_2_1_func_type ff_mix_2_1_a_## type ## _ ## simd;
 | 
			
		||||
 | 
			
		||||
D(float, sse)
 | 
			
		||||
D(float, avx)
 | 
			
		||||
D(int16, mmx)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void swri_rematrix_init_x86(struct SwrContext *s){
 | 
			
		||||
    int mm_flags = av_get_cpu_flags();
 | 
			
		||||
    int nb_in  = av_get_channel_layout_nb_channels(s->in_ch_layout);
 | 
			
		||||
    int nb_out = av_get_channel_layout_nb_channels(s->out_ch_layout);
 | 
			
		||||
    int num    = nb_in * nb_out;
 | 
			
		||||
    int i,j;
 | 
			
		||||
 | 
			
		||||
    s->mix_1_1_simd = NULL;
 | 
			
		||||
    s->mix_2_1_simd = NULL;
 | 
			
		||||
 | 
			
		||||
    if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){
 | 
			
		||||
    } else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){
 | 
			
		||||
        if(mm_flags & AV_CPU_FLAG_SSE) {
 | 
			
		||||
            s->mix_1_1_simd = ff_mix_1_1_a_float_sse;
 | 
			
		||||
        }
 | 
			
		||||
        if(HAVE_AVX && mm_flags & AV_CPU_FLAG_AVX) {
 | 
			
		||||
            s->mix_1_1_simd = ff_mix_1_1_a_float_avx;
 | 
			
		||||
        }
 | 
			
		||||
        s->native_simd_matrix = av_mallocz(num * sizeof(float));
 | 
			
		||||
        memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float));
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user