Merge commit '284ea790d89441fa1e6b2d72d3c1ed6d61972f0b'
* commit '284ea790d89441fa1e6b2d72d3c1ed6d61972f0b': dsputil: move vector_fmul_scalar() to AVFloatDSPContext in libavutil aacenc: use the correct output buffer aacdec: fix signed overflows in lcg_random() base64: fix signed overflow in shift Conflicts: libavcodec/dsputil.c libavutil/base64.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
		@@ -819,7 +819,8 @@ static int decode_audio_specific_config(AACContext *ac,
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
static av_always_inline int lcg_random(unsigned previous_val)
 | 
					static av_always_inline int lcg_random(unsigned previous_val)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    return previous_val * 1664525 + 1013904223;
 | 
					    union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
 | 
				
			||||||
 | 
					    return v.s;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static av_always_inline void reset_predict_state(PredictorState *ps)
 | 
					static av_always_inline void reset_predict_state(PredictorState *ps)
 | 
				
			||||||
@@ -1394,7 +1395,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
                    band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
 | 
					                    band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
 | 
				
			||||||
                    scale = sf[idx] / sqrtf(band_energy);
 | 
					                    scale = sf[idx] / sqrtf(band_energy);
 | 
				
			||||||
                    ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
 | 
					                    ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            } else {
 | 
					            } else {
 | 
				
			||||||
                const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
 | 
					                const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
 | 
				
			||||||
@@ -1540,7 +1541,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
 | 
				
			|||||||
                            }
 | 
					                            }
 | 
				
			||||||
                        } while (len -= 2);
 | 
					                        } while (len -= 2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                        ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
 | 
					                        ac->fdsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -1764,10 +1765,10 @@ static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_p
 | 
				
			|||||||
                        c *= 1 - 2 * cpe->ms_mask[idx];
 | 
					                        c *= 1 - 2 * cpe->ms_mask[idx];
 | 
				
			||||||
                    scale = c * sce1->sf[idx];
 | 
					                    scale = c * sce1->sf[idx];
 | 
				
			||||||
                    for (group = 0; group < ics->group_len[g]; group++)
 | 
					                    for (group = 0; group < ics->group_len[g]; group++)
 | 
				
			||||||
                        ac->dsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
 | 
					                        ac->fdsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
 | 
				
			||||||
                                                   coef0 + group * 128 + offsets[i],
 | 
					                                                    coef0 + group * 128 + offsets[i],
 | 
				
			||||||
                                                   scale,
 | 
					                                                    scale,
 | 
				
			||||||
                                                   offsets[i + 1] - offsets[i]);
 | 
					                                                    offsets[i + 1] - offsets[i]);
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            } else {
 | 
					            } else {
 | 
				
			||||||
                int bt_run_end = sce1->band_type_run_end[idx];
 | 
					                int bt_run_end = sce1->band_type_run_end[idx];
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -144,8 +144,6 @@ void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
void ff_vector_fmul_window_neon(float *dst, const float *src0,
 | 
					void ff_vector_fmul_window_neon(float *dst, const float *src0,
 | 
				
			||||||
                                const float *src1, const float *win, int len);
 | 
					                                const float *src1, const float *win, int len);
 | 
				
			||||||
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
 | 
					 | 
				
			||||||
                                int len);
 | 
					 | 
				
			||||||
void ff_butterflies_float_neon(float *v1, float *v2, int len);
 | 
					void ff_butterflies_float_neon(float *v1, float *v2, int len);
 | 
				
			||||||
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
 | 
					float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
 | 
				
			||||||
void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
 | 
					void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
 | 
				
			||||||
@@ -305,7 +303,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    c->vector_fmul_window         = ff_vector_fmul_window_neon;
 | 
					    c->vector_fmul_window         = ff_vector_fmul_window_neon;
 | 
				
			||||||
    c->vector_fmul_scalar         = ff_vector_fmul_scalar_neon;
 | 
					 | 
				
			||||||
    c->butterflies_float          = ff_butterflies_float_neon;
 | 
					    c->butterflies_float          = ff_butterflies_float_neon;
 | 
				
			||||||
    c->scalarproduct_float        = ff_scalarproduct_float_neon;
 | 
					    c->scalarproduct_float        = ff_scalarproduct_float_neon;
 | 
				
			||||||
    c->vector_fmul_reverse        = ff_vector_fmul_reverse_neon;
 | 
					    c->vector_fmul_reverse        = ff_vector_fmul_reverse_neon;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -642,44 +642,6 @@ function ff_vorbis_inverse_coupling_neon, export=1
 | 
				
			|||||||
endfunc
 | 
					endfunc
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
function ff_vector_fmul_scalar_neon, export=1
 | 
					 | 
				
			||||||
VFP     len .req r2
 | 
					 | 
				
			||||||
NOVFP   len .req r3
 | 
					 | 
				
			||||||
VFP     vdup.32         q8,  d0[0]
 | 
					 | 
				
			||||||
NOVFP   vdup.32         q8,  r2
 | 
					 | 
				
			||||||
        bics            r12, len, #15
 | 
					 | 
				
			||||||
        beq             3f
 | 
					 | 
				
			||||||
        vld1.32         {q0},[r1,:128]!
 | 
					 | 
				
			||||||
        vld1.32         {q1},[r1,:128]!
 | 
					 | 
				
			||||||
1:      vmul.f32        q0,  q0,  q8
 | 
					 | 
				
			||||||
        vld1.32         {q2},[r1,:128]!
 | 
					 | 
				
			||||||
        vmul.f32        q1,  q1,  q8
 | 
					 | 
				
			||||||
        vld1.32         {q3},[r1,:128]!
 | 
					 | 
				
			||||||
        vmul.f32        q2,  q2,  q8
 | 
					 | 
				
			||||||
        vst1.32         {q0},[r0,:128]!
 | 
					 | 
				
			||||||
        vmul.f32        q3,  q3,  q8
 | 
					 | 
				
			||||||
        vst1.32         {q1},[r0,:128]!
 | 
					 | 
				
			||||||
        subs            r12, r12, #16
 | 
					 | 
				
			||||||
        beq             2f
 | 
					 | 
				
			||||||
        vld1.32         {q0},[r1,:128]!
 | 
					 | 
				
			||||||
        vst1.32         {q2},[r0,:128]!
 | 
					 | 
				
			||||||
        vld1.32         {q1},[r1,:128]!
 | 
					 | 
				
			||||||
        vst1.32         {q3},[r0,:128]!
 | 
					 | 
				
			||||||
        b               1b
 | 
					 | 
				
			||||||
2:      vst1.32         {q2},[r0,:128]!
 | 
					 | 
				
			||||||
        vst1.32         {q3},[r0,:128]!
 | 
					 | 
				
			||||||
        ands            len, len, #15
 | 
					 | 
				
			||||||
        it              eq
 | 
					 | 
				
			||||||
        bxeq            lr
 | 
					 | 
				
			||||||
3:      vld1.32         {q0},[r1,:128]!
 | 
					 | 
				
			||||||
        vmul.f32        q0,  q0,  q8
 | 
					 | 
				
			||||||
        vst1.32         {q0},[r0,:128]!
 | 
					 | 
				
			||||||
        subs            len, len, #4
 | 
					 | 
				
			||||||
        bgt             3b
 | 
					 | 
				
			||||||
        bx              lr
 | 
					 | 
				
			||||||
        .unreq          len
 | 
					 | 
				
			||||||
endfunc
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
function ff_butterflies_float_neon, export=1
 | 
					function ff_butterflies_float_neon, export=1
 | 
				
			||||||
1:      vld1.32         {q0},[r0,:128]
 | 
					1:      vld1.32         {q0},[r0,:128]
 | 
				
			||||||
        vld1.32         {q1},[r1,:128]
 | 
					        vld1.32         {q1},[r1,:128]
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -2514,14 +2514,6 @@ static void vector_fmul_window_c(float *dst, const float *src0,
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
 | 
					 | 
				
			||||||
                                 int len)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int i;
 | 
					 | 
				
			||||||
    for (i = 0; i < len; i++)
 | 
					 | 
				
			||||||
        dst[i] = src[i] * mul;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void butterflies_float_c(float *av_restrict v1, float *av_restrict v2,
 | 
					static void butterflies_float_c(float *av_restrict v1, float *av_restrict v2,
 | 
				
			||||||
                                int len)
 | 
					                                int len)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -3039,7 +3031,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
 | 
				
			|||||||
    c->scalarproduct_float = ff_scalarproduct_float_c;
 | 
					    c->scalarproduct_float = ff_scalarproduct_float_c;
 | 
				
			||||||
    c->butterflies_float = butterflies_float_c;
 | 
					    c->butterflies_float = butterflies_float_c;
 | 
				
			||||||
    c->butterflies_float_interleave = butterflies_float_interleave_c;
 | 
					    c->butterflies_float_interleave = butterflies_float_interleave_c;
 | 
				
			||||||
    c->vector_fmul_scalar = vector_fmul_scalar_c;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    c->shrink[0]= av_image_copy_plane;
 | 
					    c->shrink[0]= av_image_copy_plane;
 | 
				
			||||||
    c->shrink[1]= ff_shrink22;
 | 
					    c->shrink[1]= ff_shrink22;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -392,16 +392,6 @@ typedef struct DSPContext {
 | 
				
			|||||||
    void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len);
 | 
					    void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len);
 | 
				
			||||||
    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
 | 
					    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
 | 
				
			||||||
    void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
 | 
					    void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
 | 
				
			||||||
    /**
 | 
					 | 
				
			||||||
     * Multiply a vector of floats by a scalar float.  Source and
 | 
					 | 
				
			||||||
     * destination vectors must overlap exactly or not at all.
 | 
					 | 
				
			||||||
     * @param dst result vector, 16-byte aligned
 | 
					 | 
				
			||||||
     * @param src input vector, 16-byte aligned
 | 
					 | 
				
			||||||
     * @param mul scalar value
 | 
					 | 
				
			||||||
     * @param len length of vector, multiple of 4
 | 
					 | 
				
			||||||
     */
 | 
					 | 
				
			||||||
    void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
 | 
					 | 
				
			||||||
                               int len);
 | 
					 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * Calculate the scalar product of two vectors of floats.
 | 
					     * Calculate the scalar product of two vectors of floats.
 | 
				
			||||||
     * @param v1  first vector, 16-byte aligned
 | 
					     * @param v1  first vector, 16-byte aligned
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -28,12 +28,12 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include "libavutil/channel_layout.h"
 | 
					#include "libavutil/channel_layout.h"
 | 
				
			||||||
#include "libavutil/common.h"
 | 
					#include "libavutil/common.h"
 | 
				
			||||||
 | 
					#include "libavutil/float_dsp.h"
 | 
				
			||||||
#include "libavutil/intreadwrite.h"
 | 
					#include "libavutil/intreadwrite.h"
 | 
				
			||||||
#include "libavutil/log.h"
 | 
					#include "libavutil/log.h"
 | 
				
			||||||
#include "libavutil/opt.h"
 | 
					#include "libavutil/opt.h"
 | 
				
			||||||
#include "avcodec.h"
 | 
					#include "avcodec.h"
 | 
				
			||||||
#include "audio_frame_queue.h"
 | 
					#include "audio_frame_queue.h"
 | 
				
			||||||
#include "dsputil.h"
 | 
					 | 
				
			||||||
#include "internal.h"
 | 
					#include "internal.h"
 | 
				
			||||||
#include "mpegaudio.h"
 | 
					#include "mpegaudio.h"
 | 
				
			||||||
#include "mpegaudiodecheader.h"
 | 
					#include "mpegaudiodecheader.h"
 | 
				
			||||||
@@ -50,7 +50,7 @@ typedef struct LAMEContext {
 | 
				
			|||||||
    int reservoir;
 | 
					    int reservoir;
 | 
				
			||||||
    float *samples_flt[2];
 | 
					    float *samples_flt[2];
 | 
				
			||||||
    AudioFrameQueue afq;
 | 
					    AudioFrameQueue afq;
 | 
				
			||||||
    DSPContext dsp;
 | 
					    AVFloatDSPContext fdsp;
 | 
				
			||||||
} LAMEContext;
 | 
					} LAMEContext;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -168,7 +168,7 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx)
 | 
				
			|||||||
    if (ret < 0)
 | 
					    if (ret < 0)
 | 
				
			||||||
        goto error;
 | 
					        goto error;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ff_dsputil_init(&s->dsp, avctx);
 | 
					    avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return 0;
 | 
					    return 0;
 | 
				
			||||||
error:
 | 
					error:
 | 
				
			||||||
@@ -206,10 +206,10 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 | 
				
			|||||||
                return AVERROR(EINVAL);
 | 
					                return AVERROR(EINVAL);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            for (ch = 0; ch < avctx->channels; ch++) {
 | 
					            for (ch = 0; ch < avctx->channels; ch++) {
 | 
				
			||||||
                s->dsp.vector_fmul_scalar(s->samples_flt[ch],
 | 
					                s->fdsp.vector_fmul_scalar(s->samples_flt[ch],
 | 
				
			||||||
                                          (const float *)frame->data[ch],
 | 
					                                           (const float *)frame->data[ch],
 | 
				
			||||||
                                          32768.0f,
 | 
					                                           32768.0f,
 | 
				
			||||||
                                          FFALIGN(frame->nb_samples, 8));
 | 
					                                           FFALIGN(frame->nb_samples, 8));
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            ENCODE_BUFFER(lame_encode_buffer_float, float, s->samples_flt);
 | 
					            ENCODE_BUFFER(lame_encode_buffer_float, float, s->samples_flt);
 | 
				
			||||||
            break;
 | 
					            break;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -108,7 +108,7 @@ static void apply_window_and_mdct(AVCodecContext * avctx, const AVFrame *frame)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    for (ch = 0; ch < avctx->channels; ch++) {
 | 
					    for (ch = 0; ch < avctx->channels; ch++) {
 | 
				
			||||||
        memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output));
 | 
					        memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output));
 | 
				
			||||||
        s->dsp.vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len);
 | 
					        s->fdsp.vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len);
 | 
				
			||||||
        s->dsp.vector_fmul_reverse(&s->output[window_len], s->frame_out[ch], win, len);
 | 
					        s->dsp.vector_fmul_reverse(&s->output[window_len], s->frame_out[ch], win, len);
 | 
				
			||||||
        s->fdsp.vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len);
 | 
					        s->fdsp.vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len);
 | 
				
			||||||
        mdct->mdct_calc(mdct, s->coefs[ch], s->output);
 | 
					        mdct->mdct_calc(mdct, s->coefs[ch], s->output);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -86,6 +86,7 @@
 | 
				
			|||||||
 * subframe in order to reconstruct the output samples.
 | 
					 * subframe in order to reconstruct the output samples.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "libavutil/float_dsp.h"
 | 
				
			||||||
#include "libavutil/intfloat.h"
 | 
					#include "libavutil/intfloat.h"
 | 
				
			||||||
#include "libavutil/intreadwrite.h"
 | 
					#include "libavutil/intreadwrite.h"
 | 
				
			||||||
#include "avcodec.h"
 | 
					#include "avcodec.h"
 | 
				
			||||||
@@ -170,6 +171,7 @@ typedef struct WMAProDecodeCtx {
 | 
				
			|||||||
    AVCodecContext*  avctx;                         ///< codec context for av_log
 | 
					    AVCodecContext*  avctx;                         ///< codec context for av_log
 | 
				
			||||||
    AVFrame          frame;                         ///< AVFrame for decoded output
 | 
					    AVFrame          frame;                         ///< AVFrame for decoded output
 | 
				
			||||||
    DSPContext       dsp;                           ///< accelerated DSP functions
 | 
					    DSPContext       dsp;                           ///< accelerated DSP functions
 | 
				
			||||||
 | 
					    AVFloatDSPContext fdsp;
 | 
				
			||||||
    uint8_t          frame_data[MAX_FRAMESIZE +
 | 
					    uint8_t          frame_data[MAX_FRAMESIZE +
 | 
				
			||||||
                      FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
 | 
					                      FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
 | 
				
			||||||
    PutBitContext    pb;                            ///< context for filling the frame_data buffer
 | 
					    PutBitContext    pb;                            ///< context for filling the frame_data buffer
 | 
				
			||||||
@@ -280,6 +282,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    s->avctx = avctx;
 | 
					    s->avctx = avctx;
 | 
				
			||||||
    ff_dsputil_init(&s->dsp, avctx);
 | 
					    ff_dsputil_init(&s->dsp, avctx);
 | 
				
			||||||
 | 
					    avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
 | 
					    init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
 | 
					    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
 | 
				
			||||||
@@ -1018,12 +1022,12 @@ static void inverse_channel_transform(WMAProDecodeCtx *s)
 | 
				
			|||||||
                    }
 | 
					                    }
 | 
				
			||||||
                } else if (s->avctx->channels == 2) {
 | 
					                } else if (s->avctx->channels == 2) {
 | 
				
			||||||
                    int len = FFMIN(sfb[1], s->subframe_len) - sfb[0];
 | 
					                    int len = FFMIN(sfb[1], s->subframe_len) - sfb[0];
 | 
				
			||||||
                    s->dsp.vector_fmul_scalar(ch_data[0] + sfb[0],
 | 
					                    s->fdsp.vector_fmul_scalar(ch_data[0] + sfb[0],
 | 
				
			||||||
                                              ch_data[0] + sfb[0],
 | 
					                                               ch_data[0] + sfb[0],
 | 
				
			||||||
                                              181.0 / 128, len);
 | 
					                                               181.0 / 128, len);
 | 
				
			||||||
                    s->dsp.vector_fmul_scalar(ch_data[1] + sfb[0],
 | 
					                    s->fdsp.vector_fmul_scalar(ch_data[1] + sfb[0],
 | 
				
			||||||
                                              ch_data[1] + sfb[0],
 | 
					                                               ch_data[1] + sfb[0],
 | 
				
			||||||
                                              181.0 / 128, len);
 | 
					                                               181.0 / 128, len);
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@@ -1270,9 +1274,9 @@ static int decode_subframe(WMAProDecodeCtx *s)
 | 
				
			|||||||
                            s->channel[c].scale_factor_step;
 | 
					                            s->channel[c].scale_factor_step;
 | 
				
			||||||
                const float quant = pow(10.0, exp / 20.0);
 | 
					                const float quant = pow(10.0, exp / 20.0);
 | 
				
			||||||
                int start = s->cur_sfb_offsets[b];
 | 
					                int start = s->cur_sfb_offsets[b];
 | 
				
			||||||
                s->dsp.vector_fmul_scalar(s->tmp + start,
 | 
					                s->fdsp.vector_fmul_scalar(s->tmp + start,
 | 
				
			||||||
                                          s->channel[c].coeffs + start,
 | 
					                                           s->channel[c].coeffs + start,
 | 
				
			||||||
                                          quant, end - start);
 | 
					                                           quant, end - start);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            /** apply imdct (imdct_half == DCTIV with reverse) */
 | 
					            /** apply imdct (imdct_half == DCTIV with reverse) */
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -29,8 +29,12 @@ void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int l
 | 
				
			|||||||
void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
 | 
					void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
 | 
				
			||||||
                                int len);
 | 
					                                int len);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
 | 
				
			||||||
 | 
					                                int len);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
 | 
					void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    fdsp->vector_fmul = ff_vector_fmul_neon;
 | 
					    fdsp->vector_fmul = ff_vector_fmul_neon;
 | 
				
			||||||
    fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
 | 
					    fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
 | 
				
			||||||
 | 
					    fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -108,3 +108,41 @@ NOVFP   vdup.32         q15, r2
 | 
				
			|||||||
        bx              lr
 | 
					        bx              lr
 | 
				
			||||||
        .unreq          len
 | 
					        .unreq          len
 | 
				
			||||||
endfunc
 | 
					endfunc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					function ff_vector_fmul_scalar_neon, export=1
 | 
				
			||||||
 | 
					VFP     len .req r2
 | 
				
			||||||
 | 
					NOVFP   len .req r3
 | 
				
			||||||
 | 
					VFP     vdup.32         q8,  d0[0]
 | 
				
			||||||
 | 
					NOVFP   vdup.32         q8,  r2
 | 
				
			||||||
 | 
					        bics            r12, len, #15
 | 
				
			||||||
 | 
					        beq             3f
 | 
				
			||||||
 | 
					        vld1.32         {q0},[r1,:128]!
 | 
				
			||||||
 | 
					        vld1.32         {q1},[r1,:128]!
 | 
				
			||||||
 | 
					1:      vmul.f32        q0,  q0,  q8
 | 
				
			||||||
 | 
					        vld1.32         {q2},[r1,:128]!
 | 
				
			||||||
 | 
					        vmul.f32        q1,  q1,  q8
 | 
				
			||||||
 | 
					        vld1.32         {q3},[r1,:128]!
 | 
				
			||||||
 | 
					        vmul.f32        q2,  q2,  q8
 | 
				
			||||||
 | 
					        vst1.32         {q0},[r0,:128]!
 | 
				
			||||||
 | 
					        vmul.f32        q3,  q3,  q8
 | 
				
			||||||
 | 
					        vst1.32         {q1},[r0,:128]!
 | 
				
			||||||
 | 
					        subs            r12, r12, #16
 | 
				
			||||||
 | 
					        beq             2f
 | 
				
			||||||
 | 
					        vld1.32         {q0},[r1,:128]!
 | 
				
			||||||
 | 
					        vst1.32         {q2},[r0,:128]!
 | 
				
			||||||
 | 
					        vld1.32         {q1},[r1,:128]!
 | 
				
			||||||
 | 
					        vst1.32         {q3},[r0,:128]!
 | 
				
			||||||
 | 
					        b               1b
 | 
				
			||||||
 | 
					2:      vst1.32         {q2},[r0,:128]!
 | 
				
			||||||
 | 
					        vst1.32         {q3},[r0,:128]!
 | 
				
			||||||
 | 
					        ands            len, len, #15
 | 
				
			||||||
 | 
					        it              eq
 | 
				
			||||||
 | 
					        bxeq            lr
 | 
				
			||||||
 | 
					3:      vld1.32         {q0},[r1,:128]!
 | 
				
			||||||
 | 
					        vmul.f32        q0,  q0,  q8
 | 
				
			||||||
 | 
					        vst1.32         {q0},[r0,:128]!
 | 
				
			||||||
 | 
					        subs            len, len, #4
 | 
				
			||||||
 | 
					        bgt             3b
 | 
				
			||||||
 | 
					        bx              lr
 | 
				
			||||||
 | 
					        .unreq          len
 | 
				
			||||||
 | 
					endfunc
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -39,10 +39,19 @@ static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
 | 
				
			|||||||
        dst[i] += src[i] * mul;
 | 
					        dst[i] += src[i] * mul;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
 | 
				
			||||||
 | 
					                                 int len)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int i;
 | 
				
			||||||
 | 
					    for (i = 0; i < len; i++)
 | 
				
			||||||
 | 
					        dst[i] = src[i] * mul;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
 | 
					void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    fdsp->vector_fmul = vector_fmul_c;
 | 
					    fdsp->vector_fmul = vector_fmul_c;
 | 
				
			||||||
    fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
 | 
					    fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
 | 
				
			||||||
 | 
					    fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if ARCH_ARM
 | 
					#if ARCH_ARM
 | 
				
			||||||
    ff_float_dsp_init_arm(fdsp);
 | 
					    ff_float_dsp_init_arm(fdsp);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -51,6 +51,21 @@ typedef struct AVFloatDSPContext {
 | 
				
			|||||||
     */
 | 
					     */
 | 
				
			||||||
    void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
 | 
					    void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
 | 
				
			||||||
                               int len);
 | 
					                               int len);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /**
 | 
				
			||||||
 | 
					     * Multiply a vector of floats by a scalar float.  Source and
 | 
				
			||||||
 | 
					     * destination vectors must overlap exactly or not at all.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * @param dst result vector
 | 
				
			||||||
 | 
					     *            constraints: 16-byte aligned
 | 
				
			||||||
 | 
					     * @param src input vector
 | 
				
			||||||
 | 
					     *            constraints: 16-byte aligned
 | 
				
			||||||
 | 
					     * @param mul scalar value
 | 
				
			||||||
 | 
					     * @param len length of vector
 | 
				
			||||||
 | 
					     *            constraints: multiple of 4
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
 | 
				
			||||||
 | 
					                               int len);
 | 
				
			||||||
} AVFloatDSPContext;
 | 
					} AVFloatDSPContext;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user