x86: check for AV_CPU_FLAG_AVXSLOW where useful
Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
16c430e8ef
commit
c16e99e3b3
@ -103,10 +103,10 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
|
|||||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
s->synth_filter_float = synth_filter_sse2;
|
s->synth_filter_float = synth_filter_sse2;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(cpu_flags)) {
|
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
||||||
s->synth_filter_float = synth_filter_avx;
|
s->synth_filter_float = synth_filter_avx;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_FMA3(cpu_flags)) {
|
if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) {
|
||||||
s->synth_filter_float = synth_filter_fma3;
|
s->synth_filter_float = synth_filter_fma3;
|
||||||
}
|
}
|
||||||
#endif /* HAVE_YASM */
|
#endif /* HAVE_YASM */
|
||||||
|
@ -34,6 +34,6 @@ av_cold void ff_dct_init_x86(DCTContext *s)
|
|||||||
s->dct32 = ff_dct32_float_sse;
|
s->dct32 = ff_dct32_float_sse;
|
||||||
if (EXTERNAL_SSE2(cpu_flags))
|
if (EXTERNAL_SSE2(cpu_flags))
|
||||||
s->dct32 = ff_dct32_float_sse2;
|
s->dct32 = ff_dct32_float_sse2;
|
||||||
if (EXTERNAL_AVX(cpu_flags))
|
if (EXTERNAL_AVX_FAST(cpu_flags))
|
||||||
s->dct32 = ff_dct32_float_avx;
|
s->dct32 = ff_dct32_float_avx;
|
||||||
}
|
}
|
||||||
|
@ -48,7 +48,7 @@ av_cold void ff_fft_init_x86(FFTContext *s)
|
|||||||
s->fft_calc = ff_fft_calc_sse;
|
s->fft_calc = ff_fft_calc_sse;
|
||||||
s->fft_permutation = FF_FFT_PERM_SWAP_LSBS;
|
s->fft_permutation = FF_FFT_PERM_SWAP_LSBS;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(cpu_flags) && s->nbits >= 5) {
|
if (EXTERNAL_AVX_FAST(cpu_flags) && s->nbits >= 5) {
|
||||||
/* AVX for SB */
|
/* AVX for SB */
|
||||||
s->imdct_half = ff_imdct_half_avx;
|
s->imdct_half = ff_imdct_half_avx;
|
||||||
s->fft_calc = ff_fft_calc_avx;
|
s->fft_calc = ff_fft_calc_avx;
|
||||||
|
@ -483,12 +483,14 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp)
|
|||||||
dsp->itxfm_add[TX_32X32][ADST_DCT] =
|
dsp->itxfm_add[TX_32X32][ADST_DCT] =
|
||||||
dsp->itxfm_add[TX_32X32][DCT_ADST] =
|
dsp->itxfm_add[TX_32X32][DCT_ADST] =
|
||||||
dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
|
dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
|
||||||
init_fpel(1, 0, 32, put, avx);
|
|
||||||
init_fpel(0, 0, 64, put, avx);
|
|
||||||
init_lpf(avx);
|
init_lpf(avx);
|
||||||
init_dir_tm_h_ipred(8, avx);
|
init_dir_tm_h_ipred(8, avx);
|
||||||
init_dir_tm_h_ipred(16, avx);
|
init_dir_tm_h_ipred(16, avx);
|
||||||
init_dir_tm_h_ipred(32, avx);
|
init_dir_tm_h_ipred(32, avx);
|
||||||
|
}
|
||||||
|
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
||||||
|
init_fpel(1, 0, 32, put, avx);
|
||||||
|
init_fpel(0, 0, 64, put, avx);
|
||||||
init_ipred(32, avx, v, VERT);
|
init_ipred(32, avx, v, VERT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ av_cold void ff_volume_init_x86(VolumeContext *vol)
|
|||||||
vol->scale_samples = ff_scale_samples_s32_ssse3_atom;
|
vol->scale_samples = ff_scale_samples_s32_ssse3_atom;
|
||||||
vol->samples_align = 4;
|
vol->samples_align = 4;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(cpu_flags)) {
|
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
||||||
vol->scale_samples = ff_scale_samples_s32_avx;
|
vol->scale_samples = ff_scale_samples_s32_avx;
|
||||||
vol->samples_align = 8;
|
vol->samples_align = 8;
|
||||||
}
|
}
|
||||||
|
@ -85,14 +85,14 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
|||||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
|
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(cpu_flags)) {
|
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
||||||
fdsp->vector_fmul = ff_vector_fmul_avx;
|
fdsp->vector_fmul = ff_vector_fmul_avx;
|
||||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
|
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
|
||||||
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx;
|
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx;
|
||||||
fdsp->vector_fmul_add = ff_vector_fmul_add_avx;
|
fdsp->vector_fmul_add = ff_vector_fmul_add_avx;
|
||||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
|
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_FMA3(cpu_flags)) {
|
if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) {
|
||||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3;
|
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3;
|
||||||
fdsp->vector_fmul_add = ff_vector_fmul_add_fma3;
|
fdsp->vector_fmul_add = ff_vector_fmul_add_fma3;
|
||||||
}
|
}
|
||||||
|
@ -35,7 +35,7 @@ av_cold void ff_init_lls_x86(LLSModel *m)
|
|||||||
if (m->indep_count >= 4)
|
if (m->indep_count >= 4)
|
||||||
m->evaluate_lls = ff_evaluate_lls_sse2;
|
m->evaluate_lls = ff_evaluate_lls_sse2;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(cpu_flags)) {
|
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
||||||
m->update_lls = ff_update_lls_avx;
|
m->update_lls = ff_update_lls_avx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -145,9 +145,11 @@ MULTI_CAPS_FUNC(SSE2, sse2)
|
|||||||
ac->simd_f = ff_unpack_2ch_int16_to_float_a_ssse3;
|
ac->simd_f = ff_unpack_2ch_int16_to_float_a_ssse3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(EXTERNAL_AVX(mm_flags)) {
|
if(EXTERNAL_AVX_FAST(mm_flags)) {
|
||||||
if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
|
if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
|
||||||
ac->simd_f = ff_int32_to_float_a_avx;
|
ac->simd_f = ff_int32_to_float_a_avx;
|
||||||
|
}
|
||||||
|
if(EXTERNAL_AVX(mm_flags)) {
|
||||||
if(channels == 6) {
|
if(channels == 6) {
|
||||||
if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
|
if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
|
||||||
ac->simd_f = ff_pack_6ch_float_to_float_a_avx;
|
ac->simd_f = ff_pack_6ch_float_to_float_a_avx;
|
||||||
|
@ -73,7 +73,7 @@ av_cold int swri_rematrix_init_x86(struct SwrContext *s){
|
|||||||
s->mix_1_1_simd = ff_mix_1_1_a_float_sse;
|
s->mix_1_1_simd = ff_mix_1_1_a_float_sse;
|
||||||
s->mix_2_1_simd = ff_mix_2_1_a_float_sse;
|
s->mix_2_1_simd = ff_mix_2_1_a_float_sse;
|
||||||
}
|
}
|
||||||
if(EXTERNAL_AVX(mm_flags)) {
|
if(EXTERNAL_AVX_FAST(mm_flags)) {
|
||||||
s->mix_1_1_simd = ff_mix_1_1_a_float_avx;
|
s->mix_1_1_simd = ff_mix_1_1_a_float_avx;
|
||||||
s->mix_2_1_simd = ff_mix_2_1_a_float_avx;
|
s->mix_2_1_simd = ff_mix_2_1_a_float_avx;
|
||||||
}
|
}
|
||||||
|
@ -67,11 +67,11 @@ av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
|
|||||||
c->dsp.resample = c->linear ? ff_resample_linear_float_sse
|
c->dsp.resample = c->linear ? ff_resample_linear_float_sse
|
||||||
: ff_resample_common_float_sse;
|
: ff_resample_common_float_sse;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(mm_flags)) {
|
if (EXTERNAL_AVX_FAST(mm_flags)) {
|
||||||
c->dsp.resample = c->linear ? ff_resample_linear_float_avx
|
c->dsp.resample = c->linear ? ff_resample_linear_float_avx
|
||||||
: ff_resample_common_float_avx;
|
: ff_resample_common_float_avx;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_FMA3(mm_flags)) {
|
if (EXTERNAL_FMA3(mm_flags) && !(mm_flags & AV_CPU_FLAG_AVXSLOW)) {
|
||||||
c->dsp.resample = c->linear ? ff_resample_linear_float_fma3
|
c->dsp.resample = c->linear ? ff_resample_linear_float_fma3
|
||||||
: ff_resample_common_float_fma3;
|
: ff_resample_common_float_fma3;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user