Merge commit '5959bfaca396ecaf63a8123055f499688b79cae3'
* commit '5959bfaca396ecaf63a8123055f499688b79cae3': floatdsp: move butterflies_float from dsputil to avfloatdsp. Conflicts: libavcodec/dsputil.c libavcodec/dsputil.h libavcodec/imc.c libavcodec/mpegaudiodec.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
24604ebaf8
@ -1729,7 +1729,7 @@ static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
|
|||||||
if (cpe->ms_mask[idx] &&
|
if (cpe->ms_mask[idx] &&
|
||||||
cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
|
cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
|
||||||
for (group = 0; group < ics->group_len[g]; group++) {
|
for (group = 0; group < ics->group_len[g]; group++) {
|
||||||
ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i],
|
ac->fdsp.butterflies_float(ch0 + group * 128 + offsets[i],
|
||||||
ch1 + group * 128 + offsets[i],
|
ch1 + group * 128 + offsets[i],
|
||||||
offsets[i+1] - offsets[i]);
|
offsets[i+1] - offsets[i]);
|
||||||
}
|
}
|
||||||
|
@ -142,7 +142,6 @@ void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
|
|||||||
void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
|
void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||||
void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
|
void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||||
|
|
||||||
void ff_butterflies_float_neon(float *v1, float *v2, int len);
|
|
||||||
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
|
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
|
||||||
|
|
||||||
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
|
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
|
||||||
@ -294,7 +293,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
|
|||||||
c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon;
|
c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon;
|
||||||
}
|
}
|
||||||
|
|
||||||
c->butterflies_float = ff_butterflies_float_neon;
|
|
||||||
c->scalarproduct_float = ff_scalarproduct_float_neon;
|
c->scalarproduct_float = ff_scalarproduct_float_neon;
|
||||||
c->vector_clipf = ff_vector_clipf_neon;
|
c->vector_clipf = ff_vector_clipf_neon;
|
||||||
c->vector_clip_int32 = ff_vector_clip_int32_neon;
|
c->vector_clip_int32 = ff_vector_clip_int32_neon;
|
||||||
|
@ -531,18 +531,6 @@ function ff_add_pixels_clamped_neon, export=1
|
|||||||
bx lr
|
bx lr
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
function ff_butterflies_float_neon, export=1
|
|
||||||
1: vld1.32 {q0},[r0,:128]
|
|
||||||
vld1.32 {q1},[r1,:128]
|
|
||||||
vsub.f32 q2, q0, q1
|
|
||||||
vadd.f32 q1, q0, q1
|
|
||||||
vst1.32 {q2},[r1,:128]!
|
|
||||||
vst1.32 {q1},[r0,:128]!
|
|
||||||
subs r2, r2, #4
|
|
||||||
bgt 1b
|
|
||||||
bx lr
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
function ff_scalarproduct_float_neon, export=1
|
function ff_scalarproduct_float_neon, export=1
|
||||||
vmov.f32 q2, #0.0
|
vmov.f32 q2, #0.0
|
||||||
1: vld1.32 {q0},[r0,:128]!
|
1: vld1.32 {q0},[r0,:128]!
|
||||||
|
@ -2483,17 +2483,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
|
|||||||
WRAPPER8_16_SQ(rd8x8_c, rd16_c)
|
WRAPPER8_16_SQ(rd8x8_c, rd16_c)
|
||||||
WRAPPER8_16_SQ(bit8x8_c, bit16_c)
|
WRAPPER8_16_SQ(bit8x8_c, bit16_c)
|
||||||
|
|
||||||
static void butterflies_float_c(float *av_restrict v1, float *av_restrict v2,
|
|
||||||
int len)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < len; i++) {
|
|
||||||
float t = v1[i] - v2[i];
|
|
||||||
v1[i] += v2[i];
|
|
||||||
v2[i] = t;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)
|
float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)
|
||||||
{
|
{
|
||||||
float p = 0.0;
|
float p = 0.0;
|
||||||
@ -2887,7 +2876,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->apply_window_int16 = apply_window_int16_c;
|
c->apply_window_int16 = apply_window_int16_c;
|
||||||
c->vector_clip_int32 = vector_clip_int32_c;
|
c->vector_clip_int32 = vector_clip_int32_c;
|
||||||
c->scalarproduct_float = ff_scalarproduct_float_c;
|
c->scalarproduct_float = ff_scalarproduct_float_c;
|
||||||
c->butterflies_float = butterflies_float_c;
|
|
||||||
|
|
||||||
c->shrink[0]= av_image_copy_plane;
|
c->shrink[0]= av_image_copy_plane;
|
||||||
c->shrink[1]= ff_shrink22;
|
c->shrink[1]= ff_shrink22;
|
||||||
|
@ -367,13 +367,6 @@ typedef struct DSPContext {
|
|||||||
* @param len length of vectors, multiple of 4
|
* @param len length of vectors, multiple of 4
|
||||||
*/
|
*/
|
||||||
float (*scalarproduct_float)(const float *v1, const float *v2, int len);
|
float (*scalarproduct_float)(const float *v1, const float *v2, int len);
|
||||||
/**
|
|
||||||
* Calculate the sum and difference of two vectors of floats.
|
|
||||||
* @param v1 first input vector, sum output, 16-byte aligned
|
|
||||||
* @param v2 second input vector, difference output, 16-byte aligned
|
|
||||||
* @param len length of vectors, multiple of 4
|
|
||||||
*/
|
|
||||||
void (*butterflies_float)(float *av_restrict v1, float *av_restrict v2, int len);
|
|
||||||
|
|
||||||
/* (I)DCT */
|
/* (I)DCT */
|
||||||
void (*fdct)(DCTELEM *block/* align 16*/);
|
void (*fdct)(DCTELEM *block/* align 16*/);
|
||||||
|
@ -36,6 +36,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "libavutil/channel_layout.h"
|
#include "libavutil/channel_layout.h"
|
||||||
|
#include "libavutil/float_dsp.h"
|
||||||
#include "libavutil/libm.h"
|
#include "libavutil/libm.h"
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "get_bits.h"
|
#include "get_bits.h"
|
||||||
@ -96,6 +97,7 @@ typedef struct {
|
|||||||
GetBitContext gb;
|
GetBitContext gb;
|
||||||
|
|
||||||
DSPContext dsp;
|
DSPContext dsp;
|
||||||
|
AVFloatDSPContext fdsp;
|
||||||
FFTContext fft;
|
FFTContext fft;
|
||||||
DECLARE_ALIGNED(32, FFTComplex, samples)[COEFFS / 2];
|
DECLARE_ALIGNED(32, FFTComplex, samples)[COEFFS / 2];
|
||||||
float *out_samples;
|
float *out_samples;
|
||||||
@ -245,6 +247,7 @@ static av_cold int imc_decode_init(AVCodecContext *avctx)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
ff_dsputil_init(&q->dsp, avctx);
|
ff_dsputil_init(&q->dsp, avctx);
|
||||||
|
avpriv_float_dsp_init(&q->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
|
||||||
avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
|
avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
|
||||||
avctx->channel_layout = avctx->channels == 1 ? AV_CH_LAYOUT_MONO
|
avctx->channel_layout = avctx->channels == 1 ? AV_CH_LAYOUT_MONO
|
||||||
: AV_CH_LAYOUT_STEREO;
|
: AV_CH_LAYOUT_STEREO;
|
||||||
@ -967,7 +970,7 @@ static int imc_decode_frame(AVCodecContext *avctx, void *data,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (avctx->channels == 2) {
|
if (avctx->channels == 2) {
|
||||||
q->dsp.butterflies_float((float *)q->frame.extended_data[0],
|
q->fdsp.butterflies_float((float *)q->frame.extended_data[0],
|
||||||
(float *)q->frame.extended_data[1], COEFFS);
|
(float *)q->frame.extended_data[1], COEFFS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
|
|
||||||
#include "libavutil/avassert.h"
|
#include "libavutil/avassert.h"
|
||||||
#include "libavutil/channel_layout.h"
|
#include "libavutil/channel_layout.h"
|
||||||
|
#include "libavutil/float_dsp.h"
|
||||||
#include "libavutil/libm.h"
|
#include "libavutil/libm.h"
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "get_bits.h"
|
#include "get_bits.h"
|
||||||
@ -84,7 +85,7 @@ typedef struct MPADecodeContext {
|
|||||||
int err_recognition;
|
int err_recognition;
|
||||||
AVCodecContext* avctx;
|
AVCodecContext* avctx;
|
||||||
MPADSPContext mpadsp;
|
MPADSPContext mpadsp;
|
||||||
DSPContext dsp;
|
AVFloatDSPContext fdsp;
|
||||||
AVFrame frame;
|
AVFrame frame;
|
||||||
} MPADecodeContext;
|
} MPADecodeContext;
|
||||||
|
|
||||||
@ -441,8 +442,8 @@ static av_cold int decode_init(AVCodecContext * avctx)
|
|||||||
|
|
||||||
s->avctx = avctx;
|
s->avctx = avctx;
|
||||||
|
|
||||||
|
avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
|
||||||
ff_mpadsp_init(&s->mpadsp);
|
ff_mpadsp_init(&s->mpadsp);
|
||||||
ff_dsputil_init(&s->dsp, avctx);
|
|
||||||
|
|
||||||
if (avctx->request_sample_fmt == OUT_FMT &&
|
if (avctx->request_sample_fmt == OUT_FMT &&
|
||||||
avctx->codec_id != AV_CODEC_ID_MP3ON4)
|
avctx->codec_id != AV_CODEC_ID_MP3ON4)
|
||||||
@ -1164,7 +1165,7 @@ found2:
|
|||||||
/* NOTE: the 1/sqrt(2) normalization factor is included in the
|
/* NOTE: the 1/sqrt(2) normalization factor is included in the
|
||||||
global gain */
|
global gain */
|
||||||
#if CONFIG_FLOAT
|
#if CONFIG_FLOAT
|
||||||
s-> dsp.butterflies_float(g0->sb_hybrid, g1->sb_hybrid, 576);
|
s->fdsp.butterflies_float(g0->sb_hybrid, g1->sb_hybrid, 576);
|
||||||
#else
|
#else
|
||||||
tab0 = g0->sb_hybrid;
|
tab0 = g0->sb_hybrid;
|
||||||
tab1 = g1->sb_hybrid;
|
tab1 = g1->sb_hybrid;
|
||||||
|
@ -178,7 +178,6 @@ static const ModeTab mode_44_48 = {
|
|||||||
typedef struct TwinContext {
|
typedef struct TwinContext {
|
||||||
AVCodecContext *avctx;
|
AVCodecContext *avctx;
|
||||||
AVFrame frame;
|
AVFrame frame;
|
||||||
DSPContext dsp;
|
|
||||||
AVFloatDSPContext fdsp;
|
AVFloatDSPContext fdsp;
|
||||||
FFTContext mdct_ctx[3];
|
FFTContext mdct_ctx[3];
|
||||||
|
|
||||||
@ -693,7 +692,7 @@ static void imdct_output(TwinContext *tctx, enum FrameType ftype, int wtype,
|
|||||||
if (tctx->avctx->channels == 2) {
|
if (tctx->avctx->channels == 2) {
|
||||||
memcpy(&out[1][0], &prev_buf[2*mtab->size], size1 * sizeof(out[1][0]));
|
memcpy(&out[1][0], &prev_buf[2*mtab->size], size1 * sizeof(out[1][0]));
|
||||||
memcpy(&out[1][size1], &tctx->curr_frame[2*mtab->size], size2 * sizeof(out[1][0]));
|
memcpy(&out[1][size1], &tctx->curr_frame[2*mtab->size], size2 * sizeof(out[1][0]));
|
||||||
tctx->dsp.butterflies_float(out[0], out[1], mtab->size);
|
tctx->fdsp.butterflies_float(out[0], out[1], mtab->size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1162,7 +1161,6 @@ static av_cold int twin_decode_init(AVCodecContext *avctx)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ff_dsputil_init(&tctx->dsp, avctx);
|
|
||||||
avpriv_float_dsp_init(&tctx->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
|
avpriv_float_dsp_init(&tctx->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
|
||||||
if ((ret = init_mdct_win(tctx))) {
|
if ((ret = init_mdct_win(tctx))) {
|
||||||
av_log(avctx, AV_LOG_ERROR, "Error initializing MDCT\n");
|
av_log(avctx, AV_LOG_ERROR, "Error initializing MDCT\n");
|
||||||
|
@ -82,7 +82,6 @@ int ff_wma_init(AVCodecContext *avctx, int flags2)
|
|||||||
|| avctx->bit_rate <= 0)
|
|| avctx->bit_rate <= 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
ff_dsputil_init(&s->dsp, avctx);
|
|
||||||
ff_fmt_convert_init(&s->fmt_conv, avctx);
|
ff_fmt_convert_init(&s->fmt_conv, avctx);
|
||||||
avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
|
avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
|
||||||
|
|
||||||
|
@ -132,7 +132,6 @@ typedef struct WMACodecContext {
|
|||||||
float lsp_pow_e_table[256];
|
float lsp_pow_e_table[256];
|
||||||
float lsp_pow_m_table1[(1 << LSP_POW_BITS)];
|
float lsp_pow_m_table1[(1 << LSP_POW_BITS)];
|
||||||
float lsp_pow_m_table2[(1 << LSP_POW_BITS)];
|
float lsp_pow_m_table2[(1 << LSP_POW_BITS)];
|
||||||
DSPContext dsp;
|
|
||||||
FmtConvertContext fmt_conv;
|
FmtConvertContext fmt_conv;
|
||||||
AVFloatDSPContext fdsp;
|
AVFloatDSPContext fdsp;
|
||||||
|
|
||||||
|
@ -731,7 +731,7 @@ static int wma_decode_block(WMACodecContext *s)
|
|||||||
s->channel_coded[0] = 1;
|
s->channel_coded[0] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
s->dsp.butterflies_float(s->coefs[0], s->coefs[1], s->block_len);
|
s->fdsp.butterflies_float(s->coefs[0], s->coefs[1], s->block_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
next:
|
next:
|
||||||
|
@ -41,6 +41,8 @@ void ff_vector_fmul_add_neon(float *dst, const float *src0, const float *src1,
|
|||||||
void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
|
void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
|
||||||
const float *src1, int len);
|
const float *src1, int len);
|
||||||
|
|
||||||
|
void ff_butterflies_float_neon(float *v1, float *v2, int len);
|
||||||
|
|
||||||
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
|
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
|
||||||
{
|
{
|
||||||
fdsp->vector_fmul = ff_vector_fmul_neon;
|
fdsp->vector_fmul = ff_vector_fmul_neon;
|
||||||
@ -49,4 +51,5 @@ void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
|
|||||||
fdsp->vector_fmul_window = ff_vector_fmul_window_neon;
|
fdsp->vector_fmul_window = ff_vector_fmul_window_neon;
|
||||||
fdsp->vector_fmul_add = ff_vector_fmul_add_neon;
|
fdsp->vector_fmul_add = ff_vector_fmul_add_neon;
|
||||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
|
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
|
||||||
|
fdsp->butterflies_float = ff_butterflies_float_neon;
|
||||||
}
|
}
|
||||||
|
@ -244,3 +244,15 @@ function ff_vector_fmul_reverse_neon, export=1
|
|||||||
2: vst1.32 {q8-q9}, [r0,:128]!
|
2: vst1.32 {q8-q9}, [r0,:128]!
|
||||||
bx lr
|
bx lr
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
|
function ff_butterflies_float_neon, export=1
|
||||||
|
1: vld1.32 {q0},[r0,:128]
|
||||||
|
vld1.32 {q1},[r1,:128]
|
||||||
|
vsub.f32 q2, q0, q1
|
||||||
|
vadd.f32 q1, q0, q1
|
||||||
|
vst1.32 {q2},[r1,:128]!
|
||||||
|
vst1.32 {q1},[r0,:128]!
|
||||||
|
subs r2, r2, #4
|
||||||
|
bgt 1b
|
||||||
|
bx lr
|
||||||
|
endfunc
|
||||||
|
@ -92,6 +92,18 @@ static void vector_fmul_reverse_c(float *dst, const float *src0,
|
|||||||
dst[i] = src0[i] * src1[-i];
|
dst[i] = src0[i] * src1[-i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void butterflies_float_c(float *restrict v1, float *restrict v2,
|
||||||
|
int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < len; i++) {
|
||||||
|
float t = v1[i] - v2[i];
|
||||||
|
v1[i] += v2[i];
|
||||||
|
v2[i] = t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
|
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
|
||||||
{
|
{
|
||||||
fdsp->vector_fmul = vector_fmul_c;
|
fdsp->vector_fmul = vector_fmul_c;
|
||||||
@ -101,6 +113,7 @@ void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
|
|||||||
fdsp->vector_fmul_window = vector_fmul_window_c;
|
fdsp->vector_fmul_window = vector_fmul_window_c;
|
||||||
fdsp->vector_fmul_add = vector_fmul_add_c;
|
fdsp->vector_fmul_add = vector_fmul_add_c;
|
||||||
fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
|
fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
|
||||||
|
fdsp->butterflies_float = butterflies_float_c;
|
||||||
|
|
||||||
#if ARCH_ARM
|
#if ARCH_ARM
|
||||||
ff_float_dsp_init_arm(fdsp);
|
ff_float_dsp_init_arm(fdsp);
|
||||||
|
@ -137,6 +137,15 @@ typedef struct AVFloatDSPContext {
|
|||||||
*/
|
*/
|
||||||
void (*vector_fmul_reverse)(float *dst, const float *src0,
|
void (*vector_fmul_reverse)(float *dst, const float *src0,
|
||||||
const float *src1, int len);
|
const float *src1, int len);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate the sum and difference of two vectors of floats.
|
||||||
|
*
|
||||||
|
* @param v1 first input vector, sum output, 16-byte aligned
|
||||||
|
* @param v2 second input vector, difference output, 16-byte aligned
|
||||||
|
* @param len length of vectors, multiple of 4
|
||||||
|
*/
|
||||||
|
void (*butterflies_float)(float *restrict v1, float *restrict v2, int len);
|
||||||
} AVFloatDSPContext;
|
} AVFloatDSPContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
x
Reference in New Issue
Block a user