diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c index 0c9b0ad7ff..fba7de501d 100644 --- a/libavcodec/aacdec.c +++ b/libavcodec/aacdec.c @@ -1729,9 +1729,9 @@ static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe) if (cpe->ms_mask[idx] && cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) { for (group = 0; group < ics->group_len[g]; group++) { - ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i], - ch1 + group * 128 + offsets[i], - offsets[i+1] - offsets[i]); + ac->fdsp.butterflies_float(ch0 + group * 128 + offsets[i], + ch1 + group * 128 + offsets[i], + offsets[i+1] - offsets[i]); } } } diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index 35b3a53ce8..c67bbed681 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -142,7 +142,6 @@ void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); -void ff_butterflies_float_neon(float *v1, float *v2, int len); float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, @@ -294,7 +293,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon; } - c->butterflies_float = ff_butterflies_float_neon; c->scalarproduct_float = ff_scalarproduct_float_neon; c->vector_clipf = ff_vector_clipf_neon; c->vector_clip_int32 = ff_vector_clip_int32_neon; diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index 4cdbb1771d..31ad72ff89 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -531,18 +531,6 @@ function ff_add_pixels_clamped_neon, export=1 bx lr endfunc -function ff_butterflies_float_neon, export=1 -1: vld1.32 {q0},[r0,:128] - vld1.32 {q1},[r1,:128] - vsub.f32 q2, q0, q1 - vadd.f32 q1, q0, q1 - vst1.32 {q2},[r1,:128]! - vst1.32 {q1},[r0,:128]! - subs r2, r2, #4 - bgt 1b - bx lr -endfunc - function ff_scalarproduct_float_neon, export=1 vmov.f32 q2, #0.0 1: vld1.32 {q0},[r0,:128]! diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 01232bf001..49e56cc0d4 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2483,17 +2483,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) WRAPPER8_16_SQ(rd8x8_c, rd16_c) WRAPPER8_16_SQ(bit8x8_c, bit16_c) -static void butterflies_float_c(float *av_restrict v1, float *av_restrict v2, - int len) -{ - int i; - for (i = 0; i < len; i++) { - float t = v1[i] - v2[i]; - v1[i] += v2[i]; - v2[i] = t; - } -} - float ff_scalarproduct_float_c(const float *v1, const float *v2, int len) { float p = 0.0; @@ -2887,7 +2876,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) c->apply_window_int16 = apply_window_int16_c; c->vector_clip_int32 = vector_clip_int32_c; c->scalarproduct_float = ff_scalarproduct_float_c; - c->butterflies_float = butterflies_float_c; c->shrink[0]= av_image_copy_plane; c->shrink[1]= ff_shrink22; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 3329ff816e..8953d972bd 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -367,13 +367,6 @@ typedef struct DSPContext { * @param len length of vectors, multiple of 4 */ float (*scalarproduct_float)(const float *v1, const float *v2, int len); - /** - * Calculate the sum and difference of two vectors of floats. - * @param v1 first input vector, sum output, 16-byte aligned - * @param v2 second input vector, difference output, 16-byte aligned - * @param len length of vectors, multiple of 4 - */ - void (*butterflies_float)(float *av_restrict v1, float *av_restrict v2, int len); /* (I)DCT */ void (*fdct)(DCTELEM *block/* align 16*/); diff --git a/libavcodec/imc.c b/libavcodec/imc.c index 1295c8ec6c..a09e3c5d9f 100644 --- a/libavcodec/imc.c +++ b/libavcodec/imc.c @@ -36,6 +36,7 @@ #include #include "libavutil/channel_layout.h" +#include "libavutil/float_dsp.h" #include "libavutil/libm.h" #include "avcodec.h" #include "get_bits.h" @@ -96,6 +97,7 @@ typedef struct { GetBitContext gb; DSPContext dsp; + AVFloatDSPContext fdsp; FFTContext fft; DECLARE_ALIGNED(32, FFTComplex, samples)[COEFFS / 2]; float *out_samples; @@ -245,6 +247,7 @@ static av_cold int imc_decode_init(AVCodecContext *avctx) return ret; } ff_dsputil_init(&q->dsp, avctx); + avpriv_float_dsp_init(&q->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; avctx->channel_layout = avctx->channels == 1 ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO; @@ -967,8 +970,8 @@ static int imc_decode_frame(AVCodecContext *avctx, void *data, } if (avctx->channels == 2) { - q->dsp.butterflies_float((float *)q->frame.extended_data[0], - (float *)q->frame.extended_data[1], COEFFS); + q->fdsp.butterflies_float((float *)q->frame.extended_data[0], + (float *)q->frame.extended_data[1], COEFFS); } *got_frame_ptr = 1; diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c index beef38dde3..995738aae7 100644 --- a/libavcodec/mpegaudiodec.c +++ b/libavcodec/mpegaudiodec.c @@ -26,6 +26,7 @@ #include "libavutil/avassert.h" #include "libavutil/channel_layout.h" +#include "libavutil/float_dsp.h" #include "libavutil/libm.h" #include "avcodec.h" #include "get_bits.h" @@ -84,7 +85,7 @@ typedef struct MPADecodeContext { int err_recognition; AVCodecContext* avctx; MPADSPContext mpadsp; - DSPContext dsp; + AVFloatDSPContext fdsp; AVFrame frame; } MPADecodeContext; @@ -441,8 +442,8 @@ static av_cold int decode_init(AVCodecContext * avctx) s->avctx = avctx; + avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); ff_mpadsp_init(&s->mpadsp); - ff_dsputil_init(&s->dsp, avctx); if (avctx->request_sample_fmt == OUT_FMT && avctx->codec_id != AV_CODEC_ID_MP3ON4) @@ -1164,7 +1165,7 @@ found2: /* NOTE: the 1/sqrt(2) normalization factor is included in the global gain */ #if CONFIG_FLOAT - s-> dsp.butterflies_float(g0->sb_hybrid, g1->sb_hybrid, 576); + s->fdsp.butterflies_float(g0->sb_hybrid, g1->sb_hybrid, 576); #else tab0 = g0->sb_hybrid; tab1 = g1->sb_hybrid; diff --git a/libavcodec/twinvq.c b/libavcodec/twinvq.c index 062bad4fd4..8bb44cce29 100644 --- a/libavcodec/twinvq.c +++ b/libavcodec/twinvq.c @@ -178,7 +178,6 @@ static const ModeTab mode_44_48 = { typedef struct TwinContext { AVCodecContext *avctx; AVFrame frame; - DSPContext dsp; AVFloatDSPContext fdsp; FFTContext mdct_ctx[3]; @@ -693,7 +692,7 @@ static void imdct_output(TwinContext *tctx, enum FrameType ftype, int wtype, if (tctx->avctx->channels == 2) { memcpy(&out[1][0], &prev_buf[2*mtab->size], size1 * sizeof(out[1][0])); memcpy(&out[1][size1], &tctx->curr_frame[2*mtab->size], size2 * sizeof(out[1][0])); - tctx->dsp.butterflies_float(out[0], out[1], mtab->size); + tctx->fdsp.butterflies_float(out[0], out[1], mtab->size); } } @@ -1162,7 +1161,6 @@ static av_cold int twin_decode_init(AVCodecContext *avctx) return -1; } - ff_dsputil_init(&tctx->dsp, avctx); avpriv_float_dsp_init(&tctx->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); if ((ret = init_mdct_win(tctx))) { av_log(avctx, AV_LOG_ERROR, "Error initializing MDCT\n"); diff --git a/libavcodec/wma.c b/libavcodec/wma.c index d0c0b34868..5af20739a5 100644 --- a/libavcodec/wma.c +++ b/libavcodec/wma.c @@ -82,7 +82,6 @@ int ff_wma_init(AVCodecContext *avctx, int flags2) || avctx->bit_rate <= 0) return -1; - ff_dsputil_init(&s->dsp, avctx); ff_fmt_convert_init(&s->fmt_conv, avctx); avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); diff --git a/libavcodec/wma.h b/libavcodec/wma.h index 4db4faa54a..36c6e55e46 100644 --- a/libavcodec/wma.h +++ b/libavcodec/wma.h @@ -132,7 +132,6 @@ typedef struct WMACodecContext { float lsp_pow_e_table[256]; float lsp_pow_m_table1[(1 << LSP_POW_BITS)]; float lsp_pow_m_table2[(1 << LSP_POW_BITS)]; - DSPContext dsp; FmtConvertContext fmt_conv; AVFloatDSPContext fdsp; diff --git a/libavcodec/wmadec.c b/libavcodec/wmadec.c index 525df6d353..cf8331b613 100644 --- a/libavcodec/wmadec.c +++ b/libavcodec/wmadec.c @@ -731,7 +731,7 @@ static int wma_decode_block(WMACodecContext *s) s->channel_coded[0] = 1; } - s->dsp.butterflies_float(s->coefs[0], s->coefs[1], s->block_len); + s->fdsp.butterflies_float(s->coefs[0], s->coefs[1], s->block_len); } next: diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c index c6f02bd2c5..b3644e82a2 100644 --- a/libavutil/arm/float_dsp_init_neon.c +++ b/libavutil/arm/float_dsp_init_neon.c @@ -41,6 +41,8 @@ void ff_vector_fmul_add_neon(float *dst, const float *src0, const float *src1, void ff_vector_fmul_reverse_neon(float *dst, const float *src0, const float *src1, int len); +void ff_butterflies_float_neon(float *v1, float *v2, int len); + void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) { fdsp->vector_fmul = ff_vector_fmul_neon; @@ -49,4 +51,5 @@ void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) fdsp->vector_fmul_window = ff_vector_fmul_window_neon; fdsp->vector_fmul_add = ff_vector_fmul_add_neon; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon; + fdsp->butterflies_float = ff_butterflies_float_neon; } diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S index d00e59de8f..4acc406d33 100644 --- a/libavutil/arm/float_dsp_neon.S +++ b/libavutil/arm/float_dsp_neon.S @@ -244,3 +244,15 @@ function ff_vector_fmul_reverse_neon, export=1 2: vst1.32 {q8-q9}, [r0,:128]! bx lr endfunc + +function ff_butterflies_float_neon, export=1 +1: vld1.32 {q0},[r0,:128] + vld1.32 {q1},[r1,:128] + vsub.f32 q2, q0, q1 + vadd.f32 q1, q0, q1 + vst1.32 {q2},[r1,:128]! + vst1.32 {q1},[r0,:128]! + subs r2, r2, #4 + bgt 1b + bx lr +endfunc diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c index 810acfe26d..05688e4c3f 100644 --- a/libavutil/float_dsp.c +++ b/libavutil/float_dsp.c @@ -92,6 +92,18 @@ static void vector_fmul_reverse_c(float *dst, const float *src0, dst[i] = src0[i] * src1[-i]; } +static void butterflies_float_c(float *restrict v1, float *restrict v2, + int len) +{ + int i; + + for (i = 0; i < len; i++) { + float t = v1[i] - v2[i]; + v1[i] += v2[i]; + v2[i] = t; + } +} + void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) { fdsp->vector_fmul = vector_fmul_c; @@ -101,6 +113,7 @@ void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) fdsp->vector_fmul_window = vector_fmul_window_c; fdsp->vector_fmul_add = vector_fmul_add_c; fdsp->vector_fmul_reverse = vector_fmul_reverse_c; + fdsp->butterflies_float = butterflies_float_c; #if ARCH_ARM ff_float_dsp_init_arm(fdsp); diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h index 3ee4ca269c..ff83beddbe 100644 --- a/libavutil/float_dsp.h +++ b/libavutil/float_dsp.h @@ -137,6 +137,15 @@ typedef struct AVFloatDSPContext { */ void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len); + + /** + * Calculate the sum and difference of two vectors of floats. + * + * @param v1 first input vector, sum output, 16-byte aligned + * @param v2 second input vector, difference output, 16-byte aligned + * @param len length of vectors, multiple of 4 + */ + void (*butterflies_float)(float *restrict v1, float *restrict v2, int len); } AVFloatDSPContext; /**