dsputil: Split audio operations off into a separate context
This commit is contained in:
parent
ca1e36a8e4
commit
9a9e2f1c8a
9
configure
vendored
9
configure
vendored
@ -1529,6 +1529,7 @@ CONFIG_EXTRA="
|
||||
aandcttables
|
||||
ac3dsp
|
||||
audio_frame_queue
|
||||
audiodsp
|
||||
blockdsp
|
||||
cabac
|
||||
dsputil
|
||||
@ -1713,8 +1714,8 @@ aac_decoder_select="mdct sinewin"
|
||||
aac_encoder_select="audio_frame_queue mdct sinewin"
|
||||
aac_latm_decoder_select="aac_decoder aac_latm_parser"
|
||||
ac3_decoder_select="mdct ac3dsp ac3_parser dsputil"
|
||||
ac3_encoder_select="mdct ac3dsp dsputil"
|
||||
ac3_fixed_encoder_select="mdct ac3dsp dsputil"
|
||||
ac3_encoder_select="ac3dsp audiodsp dsputil mdct"
|
||||
ac3_fixed_encoder_select="ac3dsp audiodsp dsputil mdct"
|
||||
aic_decoder_select="dsputil golomb"
|
||||
alac_encoder_select="lpc"
|
||||
als_decoder_select="dsputil"
|
||||
@ -1735,7 +1736,7 @@ binkaudio_rdft_decoder_select="mdct rdft sinewin"
|
||||
cavs_decoder_select="blockdsp dsputil golomb h264chroma qpeldsp videodsp"
|
||||
cllc_decoder_select="dsputil"
|
||||
comfortnoise_encoder_select="lpc"
|
||||
cook_decoder_select="dsputil mdct sinewin"
|
||||
cook_decoder_select="audiodsp mdct sinewin"
|
||||
cscd_decoder_select="lzo"
|
||||
cscd_decoder_suggest="zlib"
|
||||
dca_decoder_select="mdct"
|
||||
@ -1849,7 +1850,7 @@ svq1_decoder_select="hpeldsp"
|
||||
svq1_encoder_select="aandcttables dsputil hpeldsp mpegvideoenc"
|
||||
svq3_decoder_select="h264_decoder hpeldsp tpeldsp"
|
||||
svq3_decoder_suggest="zlib"
|
||||
tak_decoder_select="dsputil"
|
||||
tak_decoder_select="audiodsp"
|
||||
theora_decoder_select="vp3_decoder"
|
||||
thp_decoder_select="mjpeg_decoder"
|
||||
tiff_decoder_suggest="zlib"
|
||||
|
@ -28,6 +28,7 @@ OBJS = allcodecs.o \
|
||||
OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o
|
||||
OBJS-$(CONFIG_AC3DSP) += ac3dsp.o
|
||||
OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o
|
||||
OBJS-$(CONFIG_AUDIODSP) += audiodsp.o
|
||||
OBJS-$(CONFIG_BLOCKDSP) += blockdsp.o
|
||||
OBJS-$(CONFIG_CABAC) += cabac.o
|
||||
OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include "libavutil/opt.h"
|
||||
#include "avcodec.h"
|
||||
#include "put_bits.h"
|
||||
#include "audiodsp.h"
|
||||
#include "ac3dsp.h"
|
||||
#include "ac3.h"
|
||||
#include "fft.h"
|
||||
@ -2480,6 +2481,7 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
|
||||
if (ret)
|
||||
goto init_fail;
|
||||
|
||||
ff_audiodsp_init(&s->adsp);
|
||||
ff_dsputil_init(&s->dsp, avctx);
|
||||
ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT);
|
||||
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include "fft.h"
|
||||
#include "mathops.h"
|
||||
#include "put_bits.h"
|
||||
#include "audiodsp.h"
|
||||
|
||||
#ifndef CONFIG_AC3ENC_FLOAT
|
||||
#define CONFIG_AC3ENC_FLOAT 0
|
||||
@ -162,6 +163,7 @@ typedef struct AC3EncodeContext {
|
||||
AVCodecContext *avctx; ///< parent AVCodecContext
|
||||
PutBitContext pb; ///< bitstream writer context
|
||||
DSPContext dsp;
|
||||
AudioDSPContext adsp;
|
||||
AVFloatDSPContext fdsp;
|
||||
AC3DSPContext ac3dsp; ///< AC-3 optimized functions
|
||||
FFTContext mdct; ///< FFT context for MDCT calculation
|
||||
|
@ -29,6 +29,7 @@
|
||||
#define FFT_FLOAT 0
|
||||
#undef CONFIG_AC3ENC_FLOAT
|
||||
#include "internal.h"
|
||||
#include "audiodsp.h"
|
||||
#include "ac3enc.h"
|
||||
#include "eac3enc.h"
|
||||
|
||||
@ -100,9 +101,10 @@ static void scale_coefficients(AC3EncodeContext *s)
|
||||
/*
|
||||
* Clip MDCT coefficients to allowable range.
|
||||
*/
|
||||
static void clip_coefficients(DSPContext *dsp, int32_t *coef, unsigned int len)
|
||||
static void clip_coefficients(AudioDSPContext *adsp, int32_t *coef,
|
||||
unsigned int len)
|
||||
{
|
||||
dsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len);
|
||||
adsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len);
|
||||
}
|
||||
|
||||
|
||||
|
@ -28,6 +28,7 @@
|
||||
|
||||
#define CONFIG_AC3ENC_FLOAT 1
|
||||
#include "internal.h"
|
||||
#include "audiodsp.h"
|
||||
#include "ac3enc.h"
|
||||
#include "eac3enc.h"
|
||||
#include "kbdwin.h"
|
||||
@ -107,9 +108,10 @@ static void scale_coefficients(AC3EncodeContext *s)
|
||||
/*
|
||||
* Clip MDCT coefficients to allowable range.
|
||||
*/
|
||||
static void clip_coefficients(DSPContext *dsp, float *coef, unsigned int len)
|
||||
static void clip_coefficients(AudioDSPContext *adsp, float *coef,
|
||||
unsigned int len)
|
||||
{
|
||||
dsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len);
|
||||
adsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len);
|
||||
}
|
||||
|
||||
|
||||
|
@ -30,6 +30,8 @@
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/internal.h"
|
||||
|
||||
#include "audiodsp.h"
|
||||
#include "internal.h"
|
||||
#include "ac3enc.h"
|
||||
#include "eac3enc.h"
|
||||
@ -40,7 +42,8 @@ static void scale_coefficients(AC3EncodeContext *s);
|
||||
|
||||
static int normalize_samples(AC3EncodeContext *s);
|
||||
|
||||
static void clip_coefficients(DSPContext *dsp, CoefType *coef, unsigned int len);
|
||||
static void clip_coefficients(AudioDSPContext *adsp, CoefType *coef,
|
||||
unsigned int len);
|
||||
|
||||
static CoefType calc_cpl_coord(CoefSumType energy_ch, CoefSumType energy_cpl);
|
||||
|
||||
@ -161,7 +164,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
|
||||
}
|
||||
|
||||
/* coefficients must be clipped in order to be encoded */
|
||||
clip_coefficients(&s->dsp, cpl_coef, num_cpl_coefs);
|
||||
clip_coefficients(&s->adsp, cpl_coef, num_cpl_coefs);
|
||||
}
|
||||
|
||||
/* calculate energy in each band in coupling channel and each fbw channel */
|
||||
@ -412,7 +415,7 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, AVPacket *avpkt,
|
||||
if (s->fixed_point)
|
||||
scale_coefficients(s);
|
||||
|
||||
clip_coefficients(&s->dsp, s->blocks[0].mdct_coef[1],
|
||||
clip_coefficients(&s->adsp, s->blocks[0].mdct_coef[1],
|
||||
AC3_MAX_COEFS * s->num_blocks * s->channels);
|
||||
|
||||
s->cpl_on = s->cpl_enabled;
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "avcodec.h"
|
||||
#include "acelp_pitch_delay.h"
|
||||
#include "celp_math.h"
|
||||
#include "audiodsp.h"
|
||||
|
||||
int ff_acelp_decode_8bit_to_1st_delay3(int ac_index)
|
||||
{
|
||||
@ -90,7 +91,7 @@ void ff_acelp_update_past_gain(
|
||||
}
|
||||
|
||||
int16_t ff_acelp_decode_gain_code(
|
||||
DSPContext *dsp,
|
||||
AudioDSPContext *adsp,
|
||||
int gain_corr_factor,
|
||||
const int16_t* fc_v,
|
||||
int mr_energy,
|
||||
@ -107,7 +108,7 @@ int16_t ff_acelp_decode_gain_code(
|
||||
mr_energy += quant_energy[i] * ma_prediction_coeff[i];
|
||||
|
||||
mr_energy = gain_corr_factor * exp(M_LN10 / (20 << 23) * mr_energy) /
|
||||
sqrt(dsp->scalarproduct_int16(fc_v, fc_v, subframe_size));
|
||||
sqrt(adsp->scalarproduct_int16(fc_v, fc_v, subframe_size));
|
||||
return mr_energy >> 12;
|
||||
}
|
||||
|
||||
|
@ -24,7 +24,8 @@
|
||||
#define AVCODEC_ACELP_PITCH_DELAY_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "dsputil.h"
|
||||
|
||||
#include "audiodsp.h"
|
||||
|
||||
#define PITCH_DELAY_MIN 20
|
||||
#define PITCH_DELAY_MAX 143
|
||||
@ -139,7 +140,7 @@ void ff_acelp_update_past_gain(
|
||||
/**
|
||||
* @brief Decode the adaptive codebook gain and add
|
||||
* correction (4.1.5 and 3.9.1 of G.729).
|
||||
* @param dsp initialized dsputil context
|
||||
* @param adsp initialized audio DSP context
|
||||
* @param gain_corr_factor gain correction factor (2.13)
|
||||
* @param fc_v fixed-codebook vector (2.13)
|
||||
* @param mr_energy mean innovation energy and fixed-point correction (7.13)
|
||||
@ -208,7 +209,7 @@ void ff_acelp_update_past_gain(
|
||||
* @remark The routine is used in G.729 and AMR (all modes).
|
||||
*/
|
||||
int16_t ff_acelp_decode_gain_code(
|
||||
DSPContext *dsp,
|
||||
AudioDSPContext *adsp,
|
||||
int gain_corr_factor,
|
||||
const int16_t* fc_v,
|
||||
int mr_energy,
|
||||
|
@ -4,6 +4,7 @@ OBJS += arm/fmtconvert_init_arm.o
|
||||
|
||||
OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \
|
||||
arm/ac3dsp_arm.o
|
||||
OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_arm.o
|
||||
OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_arm.o
|
||||
OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \
|
||||
arm/dsputil_arm.o \
|
||||
@ -77,11 +78,13 @@ VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \
|
||||
NEON-OBJS += arm/fmtconvert_neon.o
|
||||
|
||||
NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o
|
||||
NEON-OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_neon.o \
|
||||
arm/audiodsp_neon.o \
|
||||
arm/int_neon.o
|
||||
NEON-OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_neon.o \
|
||||
arm/blockdsp_neon.o
|
||||
NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \
|
||||
arm/dsputil_neon.o \
|
||||
arm/int_neon.o \
|
||||
arm/simple_idct_neon.o
|
||||
NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \
|
||||
arm/fft_fixed_neon.o
|
||||
|
26
libavcodec/arm/audiodsp_arm.h
Normal file
26
libavcodec/arm/audiodsp_arm.h
Normal file
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVCODEC_ARM_AUDIODSP_ARM_H
|
||||
#define AVCODEC_ARM_AUDIODSP_ARM_H
|
||||
|
||||
#include "libavcodec/audiodsp.h"
|
||||
|
||||
void ff_audiodsp_init_neon(AudioDSPContext *c);
|
||||
|
||||
#endif /* AVCODEC_ARM_AUDIODSP_ARM_H */
|
33
libavcodec/arm/audiodsp_init_arm.c
Normal file
33
libavcodec/arm/audiodsp_init_arm.c
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* ARM optimized audio functions
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/arm/cpu.h"
|
||||
#include "libavcodec/audiodsp.h"
|
||||
#include "audiodsp_arm.h"
|
||||
|
||||
av_cold void ff_audiodsp_init_arm(AudioDSPContext *c)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (have_neon(cpu_flags))
|
||||
ff_audiodsp_init_neon(c);
|
||||
}
|
41
libavcodec/arm/audiodsp_init_neon.c
Normal file
41
libavcodec/arm/audiodsp_init_neon.c
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* ARM NEON optimised audio functions
|
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavcodec/audiodsp.h"
|
||||
#include "audiodsp_arm.h"
|
||||
|
||||
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
|
||||
int len);
|
||||
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len);
|
||||
|
||||
int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len);
|
||||
|
||||
av_cold void ff_audiodsp_init_neon(AudioDSPContext *c)
|
||||
{
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_neon;
|
||||
c->vector_clipf = ff_vector_clipf_neon;
|
||||
|
||||
c->scalarproduct_int16 = ff_scalarproduct_int16_neon;
|
||||
}
|
64
libavcodec/arm/audiodsp_neon.S
Normal file
64
libavcodec/arm/audiodsp_neon.S
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* ARM NEON optimised audio functions
|
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/arm/asm.S"
|
||||
|
||||
function ff_vector_clipf_neon, export=1
|
||||
VFP vdup.32 q1, d0[1]
|
||||
VFP vdup.32 q0, d0[0]
|
||||
NOVFP vdup.32 q0, r2
|
||||
NOVFP vdup.32 q1, r3
|
||||
NOVFP ldr r2, [sp]
|
||||
vld1.f32 {q2},[r1,:128]!
|
||||
vmin.f32 q10, q2, q1
|
||||
vld1.f32 {q3},[r1,:128]!
|
||||
vmin.f32 q11, q3, q1
|
||||
1: vmax.f32 q8, q10, q0
|
||||
vmax.f32 q9, q11, q0
|
||||
subs r2, r2, #8
|
||||
beq 2f
|
||||
vld1.f32 {q2},[r1,:128]!
|
||||
vmin.f32 q10, q2, q1
|
||||
vld1.f32 {q3},[r1,:128]!
|
||||
vmin.f32 q11, q3, q1
|
||||
vst1.f32 {q8},[r0,:128]!
|
||||
vst1.f32 {q9},[r0,:128]!
|
||||
b 1b
|
||||
2: vst1.f32 {q8},[r0,:128]!
|
||||
vst1.f32 {q9},[r0,:128]!
|
||||
bx lr
|
||||
endfunc
|
||||
|
||||
function ff_vector_clip_int32_neon, export=1
|
||||
vdup.32 q0, r2
|
||||
vdup.32 q1, r3
|
||||
ldr r2, [sp]
|
||||
1:
|
||||
vld1.32 {q2-q3}, [r1,:128]!
|
||||
vmin.s32 q2, q2, q1
|
||||
vmin.s32 q3, q3, q1
|
||||
vmax.s32 q2, q2, q0
|
||||
vmax.s32 q3, q3, q0
|
||||
vst1.32 {q2-q3}, [r0,:128]!
|
||||
subs r2, r2, #8
|
||||
bgt 1b
|
||||
bx lr
|
||||
endfunc
|
@ -34,13 +34,6 @@ void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int);
|
||||
void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int);
|
||||
void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int);
|
||||
|
||||
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
|
||||
int len);
|
||||
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len);
|
||||
|
||||
int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len);
|
||||
|
||||
av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx,
|
||||
unsigned high_bit_depth)
|
||||
{
|
||||
@ -57,9 +50,4 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx,
|
||||
c->add_pixels_clamped = ff_add_pixels_clamped_neon;
|
||||
c->put_pixels_clamped = ff_put_pixels_clamped_neon;
|
||||
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
|
||||
|
||||
c->vector_clipf = ff_vector_clipf_neon;
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_neon;
|
||||
|
||||
c->scalarproduct_int16 = ff_scalarproduct_int16_neon;
|
||||
}
|
||||
|
@ -126,45 +126,3 @@ function ff_add_pixels_clamped_neon, export=1
|
||||
vst1.8 {d6}, [r3,:64], r2
|
||||
bx lr
|
||||
endfunc
|
||||
|
||||
function ff_vector_clipf_neon, export=1
|
||||
VFP vdup.32 q1, d0[1]
|
||||
VFP vdup.32 q0, d0[0]
|
||||
NOVFP vdup.32 q0, r2
|
||||
NOVFP vdup.32 q1, r3
|
||||
NOVFP ldr r2, [sp]
|
||||
vld1.f32 {q2},[r1,:128]!
|
||||
vmin.f32 q10, q2, q1
|
||||
vld1.f32 {q3},[r1,:128]!
|
||||
vmin.f32 q11, q3, q1
|
||||
1: vmax.f32 q8, q10, q0
|
||||
vmax.f32 q9, q11, q0
|
||||
subs r2, r2, #8
|
||||
beq 2f
|
||||
vld1.f32 {q2},[r1,:128]!
|
||||
vmin.f32 q10, q2, q1
|
||||
vld1.f32 {q3},[r1,:128]!
|
||||
vmin.f32 q11, q3, q1
|
||||
vst1.f32 {q8},[r0,:128]!
|
||||
vst1.f32 {q9},[r0,:128]!
|
||||
b 1b
|
||||
2: vst1.f32 {q8},[r0,:128]!
|
||||
vst1.f32 {q9},[r0,:128]!
|
||||
bx lr
|
||||
endfunc
|
||||
|
||||
function ff_vector_clip_int32_neon, export=1
|
||||
vdup.32 q0, r2
|
||||
vdup.32 q1, r3
|
||||
ldr r2, [sp]
|
||||
1:
|
||||
vld1.32 {q2-q3}, [r1,:128]!
|
||||
vmin.s32 q2, q2, q1
|
||||
vmin.s32 q3, q3, q1
|
||||
vmax.s32 q2, q2, q0
|
||||
vmax.s32 q3, q3, q0
|
||||
vst1.32 {q2-q3}, [r0,:128]!
|
||||
subs r2, r2, #8
|
||||
bgt 1b
|
||||
bx lr
|
||||
endfunc
|
||||
|
118
libavcodec/audiodsp.c
Normal file
118
libavcodec/audiodsp.c
Normal file
@ -0,0 +1,118 @@
|
||||
/*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "audiodsp.h"
|
||||
|
||||
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
|
||||
uint32_t maxi, uint32_t maxisign)
|
||||
{
|
||||
if (a > mini)
|
||||
return mini;
|
||||
else if ((a ^ (1U << 31)) > maxisign)
|
||||
return maxi;
|
||||
else
|
||||
return a;
|
||||
}
|
||||
|
||||
static void vector_clipf_c_opposite_sign(float *dst, const float *src,
|
||||
float *min, float *max, int len)
|
||||
{
|
||||
int i;
|
||||
uint32_t mini = *(uint32_t *) min;
|
||||
uint32_t maxi = *(uint32_t *) max;
|
||||
uint32_t maxisign = maxi ^ (1U << 31);
|
||||
uint32_t *dsti = (uint32_t *) dst;
|
||||
const uint32_t *srci = (const uint32_t *) src;
|
||||
|
||||
for (i = 0; i < len; i += 8) {
|
||||
dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
|
||||
dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
|
||||
dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
|
||||
dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
|
||||
dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
|
||||
dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
|
||||
dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
|
||||
dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_clipf_c(float *dst, const float *src,
|
||||
float min, float max, int len)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (min < 0 && max > 0) {
|
||||
vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
|
||||
} else {
|
||||
for (i = 0; i < len; i += 8) {
|
||||
dst[i] = av_clipf(src[i], min, max);
|
||||
dst[i + 1] = av_clipf(src[i + 1], min, max);
|
||||
dst[i + 2] = av_clipf(src[i + 2], min, max);
|
||||
dst[i + 3] = av_clipf(src[i + 3], min, max);
|
||||
dst[i + 4] = av_clipf(src[i + 4], min, max);
|
||||
dst[i + 5] = av_clipf(src[i + 5], min, max);
|
||||
dst[i + 6] = av_clipf(src[i + 6], min, max);
|
||||
dst[i + 7] = av_clipf(src[i + 7], min, max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2,
|
||||
int order)
|
||||
{
|
||||
int res = 0;
|
||||
|
||||
while (order--)
|
||||
res += *v1++ **v2++;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len)
|
||||
{
|
||||
do {
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
len -= 8;
|
||||
} while (len > 0);
|
||||
}
|
||||
|
||||
av_cold void ff_audiodsp_init(AudioDSPContext *c)
|
||||
{
|
||||
c->scalarproduct_int16 = scalarproduct_int16_c;
|
||||
c->vector_clip_int32 = vector_clip_int32_c;
|
||||
c->vector_clipf = vector_clipf_c;
|
||||
|
||||
if (ARCH_ARM)
|
||||
ff_audiodsp_init_arm(c);
|
||||
if (ARCH_PPC)
|
||||
ff_audiodsp_init_ppc(c);
|
||||
if (ARCH_X86)
|
||||
ff_audiodsp_init_x86(c);
|
||||
}
|
59
libavcodec/audiodsp.h
Normal file
59
libavcodec/audiodsp.h
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVCODEC_AUDIODSP_H
|
||||
#define AVCODEC_AUDIODSP_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct AudioDSPContext {
|
||||
/**
|
||||
* Calculate scalar product of two vectors.
|
||||
* @param len length of vectors, should be multiple of 16
|
||||
*/
|
||||
int32_t (*scalarproduct_int16)(const int16_t *v1,
|
||||
const int16_t *v2 /* align 16 */, int len);
|
||||
|
||||
/**
|
||||
* Clip each element in an array of int32_t to a given minimum and
|
||||
* maximum value.
|
||||
* @param dst destination array
|
||||
* constraints: 16-byte aligned
|
||||
* @param src source array
|
||||
* constraints: 16-byte aligned
|
||||
* @param min minimum value
|
||||
* constraints: must be in the range [-(1 << 24), 1 << 24]
|
||||
* @param max maximum value
|
||||
* constraints: must be in the range [-(1 << 24), 1 << 24]
|
||||
* @param len number of elements in the array
|
||||
* constraints: multiple of 32 greater than zero
|
||||
*/
|
||||
void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len);
|
||||
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
|
||||
void (*vector_clipf)(float *dst /* align 16 */,
|
||||
const float *src /* align 16 */,
|
||||
float min, float max, int len /* align 16 */);
|
||||
} AudioDSPContext;
|
||||
|
||||
void ff_audiodsp_init(AudioDSPContext *c);
|
||||
void ff_audiodsp_init_arm(AudioDSPContext *c);
|
||||
void ff_audiodsp_init_ppc(AudioDSPContext *c);
|
||||
void ff_audiodsp_init_x86(AudioDSPContext *c);
|
||||
|
||||
#endif /* AVCODEC_AUDIODSP_H */
|
@ -44,9 +44,10 @@
|
||||
|
||||
#include "libavutil/channel_layout.h"
|
||||
#include "libavutil/lfg.h"
|
||||
|
||||
#include "audiodsp.h"
|
||||
#include "avcodec.h"
|
||||
#include "get_bits.h"
|
||||
#include "dsputil.h"
|
||||
#include "bytestream.h"
|
||||
#include "fft.h"
|
||||
#include "internal.h"
|
||||
@ -122,7 +123,7 @@ typedef struct cook {
|
||||
void (*saturate_output)(struct cook *q, float *out);
|
||||
|
||||
AVCodecContext* avctx;
|
||||
DSPContext dsp;
|
||||
AudioDSPContext adsp;
|
||||
GetBitContext gb;
|
||||
/* stream data */
|
||||
int num_vectors;
|
||||
@ -865,8 +866,8 @@ static inline void decode_bytes_and_gain(COOKContext *q, COOKSubpacket *p,
|
||||
*/
|
||||
static void saturate_output_float(COOKContext *q, float *out)
|
||||
{
|
||||
q->dsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel,
|
||||
-1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8));
|
||||
q->adsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel,
|
||||
-1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8));
|
||||
}
|
||||
|
||||
|
||||
@ -1065,7 +1066,7 @@ static av_cold int cook_decode_init(AVCodecContext *avctx)
|
||||
/* Initialize RNG. */
|
||||
av_lfg_init(&q->random_state, 0);
|
||||
|
||||
ff_dsputil_init(&q->dsp, avctx);
|
||||
ff_audiodsp_init(&q->adsp);
|
||||
|
||||
while (edata_ptr < edata_ptr_end) {
|
||||
/* 8 for mono, 16 for stereo, ? for multichannel
|
||||
|
@ -1267,87 +1267,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
|
||||
WRAPPER8_16_SQ(rd8x8_c, rd16_c)
|
||||
WRAPPER8_16_SQ(bit8x8_c, bit16_c)
|
||||
|
||||
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
|
||||
uint32_t maxi, uint32_t maxisign)
|
||||
{
|
||||
if (a > mini)
|
||||
return mini;
|
||||
else if ((a ^ (1U << 31)) > maxisign)
|
||||
return maxi;
|
||||
else
|
||||
return a;
|
||||
}
|
||||
|
||||
static void vector_clipf_c_opposite_sign(float *dst, const float *src,
|
||||
float *min, float *max, int len)
|
||||
{
|
||||
int i;
|
||||
uint32_t mini = *(uint32_t *) min;
|
||||
uint32_t maxi = *(uint32_t *) max;
|
||||
uint32_t maxisign = maxi ^ (1U << 31);
|
||||
uint32_t *dsti = (uint32_t *) dst;
|
||||
const uint32_t *srci = (const uint32_t *) src;
|
||||
|
||||
for (i = 0; i < len; i += 8) {
|
||||
dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
|
||||
dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
|
||||
dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
|
||||
dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
|
||||
dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
|
||||
dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
|
||||
dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
|
||||
dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_clipf_c(float *dst, const float *src,
|
||||
float min, float max, int len)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (min < 0 && max > 0) {
|
||||
vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
|
||||
} else {
|
||||
for (i = 0; i < len; i += 8) {
|
||||
dst[i] = av_clipf(src[i], min, max);
|
||||
dst[i + 1] = av_clipf(src[i + 1], min, max);
|
||||
dst[i + 2] = av_clipf(src[i + 2], min, max);
|
||||
dst[i + 3] = av_clipf(src[i + 3], min, max);
|
||||
dst[i + 4] = av_clipf(src[i + 4], min, max);
|
||||
dst[i + 5] = av_clipf(src[i + 5], min, max);
|
||||
dst[i + 6] = av_clipf(src[i + 6], min, max);
|
||||
dst[i + 7] = av_clipf(src[i + 7], min, max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2,
|
||||
int order)
|
||||
{
|
||||
int res = 0;
|
||||
|
||||
while (order--)
|
||||
res += *v1++ **v2++;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len)
|
||||
{
|
||||
do {
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
len -= 8;
|
||||
} while (len > 0);
|
||||
}
|
||||
|
||||
static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
|
||||
{
|
||||
ff_j_rev_dct(block);
|
||||
@ -1502,10 +1421,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
|
||||
c->try_8x8basis = try_8x8basis_c;
|
||||
c->add_8x8basis = add_8x8basis_c;
|
||||
|
||||
c->scalarproduct_int16 = scalarproduct_int16_c;
|
||||
c->vector_clip_int32 = vector_clip_int32_c;
|
||||
c->vector_clipf = vector_clipf_c;
|
||||
|
||||
c->shrink[0] = av_image_copy_plane;
|
||||
c->shrink[1] = ff_shrink22;
|
||||
c->shrink[2] = ff_shrink44;
|
||||
|
@ -125,11 +125,6 @@ typedef struct DSPContext {
|
||||
void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
|
||||
void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len);
|
||||
|
||||
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
|
||||
void (*vector_clipf)(float *dst /* align 16 */,
|
||||
const float *src /* align 16 */,
|
||||
float min, float max, int len /* align 16 */);
|
||||
|
||||
/* (I)DCT */
|
||||
void (*fdct)(int16_t *block /* align 16 */);
|
||||
void (*fdct248)(int16_t *block /* align 16 */);
|
||||
@ -189,30 +184,6 @@ typedef struct DSPContext {
|
||||
|
||||
void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src,
|
||||
int src_wrap, int width, int height);
|
||||
|
||||
/**
|
||||
* Calculate scalar product of two vectors.
|
||||
* @param len length of vectors, should be multiple of 16
|
||||
*/
|
||||
int32_t (*scalarproduct_int16)(const int16_t *v1,
|
||||
const int16_t *v2 /* align 16 */, int len);
|
||||
|
||||
/**
|
||||
* Clip each element in an array of int32_t to a given minimum and
|
||||
* maximum value.
|
||||
* @param dst destination array
|
||||
* constraints: 16-byte aligned
|
||||
* @param src source array
|
||||
* constraints: 16-byte aligned
|
||||
* @param min minimum value
|
||||
* constraints: must be in the range [-(1 << 24), 1 << 24]
|
||||
* @param max maximum value
|
||||
* constraints: must be in the range [-(1 << 24), 1 << 24]
|
||||
* @param len number of elements in the array
|
||||
* constraints: multiple of 32 greater than zero
|
||||
*/
|
||||
void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len);
|
||||
} DSPContext;
|
||||
|
||||
void ff_dsputil_static_init(void);
|
||||
|
@ -1,5 +1,6 @@
|
||||
OBJS += ppc/fmtconvert_altivec.o \
|
||||
|
||||
OBJS-$(CONFIG_AUDIODSP) += ppc/audiodsp.o
|
||||
OBJS-$(CONFIG_BLOCKDSP) += ppc/blockdsp.o
|
||||
OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_ppc.o
|
||||
OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o
|
||||
@ -24,7 +25,6 @@ ALTIVEC-OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_altivec.o \
|
||||
ppc/fdct_altivec.o \
|
||||
ppc/gmc_altivec.o \
|
||||
ppc/idct_altivec.o \
|
||||
ppc/int_altivec.o \
|
||||
|
||||
FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o
|
||||
ALTIVEC-OBJS-$(CONFIG_FFT) += $(FFT-OBJS-yes)
|
||||
|
@ -20,7 +20,7 @@
|
||||
|
||||
/**
|
||||
* @file
|
||||
* miscellaneous integer operations
|
||||
* miscellaneous audio operations
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
@ -29,10 +29,13 @@
|
||||
#endif
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/ppc/cpu.h"
|
||||
#include "libavutil/ppc/types_altivec.h"
|
||||
#include "libavutil/ppc/util_altivec.h"
|
||||
#include "libavcodec/dsputil.h"
|
||||
#include "dsputil_altivec.h"
|
||||
#include "libavcodec/audiodsp.h"
|
||||
|
||||
#if HAVE_ALTIVEC
|
||||
|
||||
static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
|
||||
int order)
|
||||
@ -56,7 +59,14 @@ static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
|
||||
return ires;
|
||||
}
|
||||
|
||||
av_cold void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx)
|
||||
#endif /* HAVE_ALTIVEC */
|
||||
|
||||
av_cold void ff_audiodsp_init_ppc(AudioDSPContext *c)
|
||||
{
|
||||
#if HAVE_ALTIVEC
|
||||
if (!PPC_ALTIVEC(av_get_cpu_flags()))
|
||||
return;
|
||||
|
||||
c->scalarproduct_int16 = scalarproduct_int16_altivec;
|
||||
#endif /* HAVE_ALTIVEC */
|
||||
}
|
@ -35,6 +35,5 @@ void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
|
||||
|
||||
void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx,
|
||||
unsigned high_bit_depth);
|
||||
void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx);
|
||||
|
||||
#endif /* AVCODEC_PPC_DSPUTIL_ALTIVEC_H */
|
||||
|
@ -34,7 +34,7 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx,
|
||||
{
|
||||
if (PPC_ALTIVEC(av_get_cpu_flags())) {
|
||||
ff_dsputil_init_altivec(c, avctx, high_bit_depth);
|
||||
ff_int_init_altivec(c, avctx);
|
||||
|
||||
c->gmc1 = ff_gmc1_altivec;
|
||||
|
||||
if (!high_bit_depth) {
|
||||
|
@ -28,8 +28,8 @@
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/samplefmt.h"
|
||||
#include "tak.h"
|
||||
#include "audiodsp.h"
|
||||
#include "avcodec.h"
|
||||
#include "dsputil.h"
|
||||
#include "internal.h"
|
||||
#include "unary.h"
|
||||
|
||||
@ -45,7 +45,7 @@ typedef struct MCDParam {
|
||||
|
||||
typedef struct TAKDecContext {
|
||||
AVCodecContext *avctx; // parent AVCodecContext
|
||||
DSPContext dsp;
|
||||
AudioDSPContext adsp;
|
||||
TAKStreamInfo ti;
|
||||
GetBitContext gb; // bitstream reader initialized to start at the current frame
|
||||
|
||||
@ -172,7 +172,7 @@ static av_cold int tak_decode_init(AVCodecContext *avctx)
|
||||
{
|
||||
TAKDecContext *s = avctx->priv_data;
|
||||
|
||||
ff_dsputil_init(&s->dsp, avctx);
|
||||
ff_audiodsp_init(&s->adsp);
|
||||
|
||||
s->avctx = avctx;
|
||||
|
||||
@ -484,8 +484,8 @@ static int decode_subframe(TAKDecContext *s, int32_t *decoded,
|
||||
for (i = 0; i < subframe_size - filter_order; i++) {
|
||||
int v = 1 << (filter_quant - 1);
|
||||
|
||||
v += s->dsp.scalarproduct_int16(&s->residues[i], filter,
|
||||
FFALIGN(filter_order, 16));
|
||||
v += s->adsp.scalarproduct_int16(&s->residues[i], filter,
|
||||
FFALIGN(filter_order, 16));
|
||||
|
||||
v = (av_clip(v >> filter_quant, -8192, 8191) << dshift) - *decoded;
|
||||
*decoded++ = v;
|
||||
@ -654,8 +654,8 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
|
||||
for (i = 0; i < length2; i++) {
|
||||
int v = 1 << 9;
|
||||
|
||||
v += s->dsp.scalarproduct_int16(&s->residues[i], filter,
|
||||
FFALIGN(filter_order, 16));
|
||||
v += s->adsp.scalarproduct_int16(&s->residues[i], filter,
|
||||
FFALIGN(filter_order, 16));
|
||||
|
||||
p1[i] = (av_clip(v >> 10, -8192, 8191) << dshift) - p1[i];
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ OBJS += x86/constants.o \
|
||||
x86/fmtconvert_init.o \
|
||||
|
||||
OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o
|
||||
OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_init.o
|
||||
OBJS-$(CONFIG_DCT) += x86/dct_init.o
|
||||
OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o
|
||||
OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o \
|
||||
@ -44,6 +45,7 @@ OBJS-$(CONFIG_VP7_DECODER) += x86/vp8dsp_init.o
|
||||
OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o
|
||||
OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o
|
||||
|
||||
MMX-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_mmx.o
|
||||
MMX-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o
|
||||
MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \
|
||||
x86/idct_mmx_xvid.o \
|
||||
@ -61,6 +63,7 @@ YASM-OBJS += x86/deinterlace.o \
|
||||
x86/fmtconvert.o \
|
||||
|
||||
YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o
|
||||
YASM-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp.o
|
||||
YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o
|
||||
YASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o
|
||||
YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o
|
||||
|
137
libavcodec/x86/audiodsp.asm
Normal file
137
libavcodec/x86/audiodsp.asm
Normal file
@ -0,0 +1,137 @@
|
||||
;******************************************************************************
|
||||
;* optimized audio functions
|
||||
;* Copyright (c) 2008 Loren Merritt
|
||||
;*
|
||||
;* This file is part of Libav.
|
||||
;*
|
||||
;* Libav is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* Libav is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with Libav; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
SECTION_TEXT
|
||||
|
||||
%macro SCALARPRODUCT 0
|
||||
; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order)
|
||||
cglobal scalarproduct_int16, 3,3,3, v1, v2, order
|
||||
shl orderq, 1
|
||||
add v1q, orderq
|
||||
add v2q, orderq
|
||||
neg orderq
|
||||
pxor m2, m2
|
||||
.loop:
|
||||
movu m0, [v1q + orderq]
|
||||
movu m1, [v1q + orderq + mmsize]
|
||||
pmaddwd m0, [v2q + orderq]
|
||||
pmaddwd m1, [v2q + orderq + mmsize]
|
||||
paddd m2, m0
|
||||
paddd m2, m1
|
||||
add orderq, mmsize*2
|
||||
jl .loop
|
||||
%if mmsize == 16
|
||||
movhlps m0, m2
|
||||
paddd m2, m0
|
||||
pshuflw m0, m2, 0x4e
|
||||
%else
|
||||
pshufw m0, m2, 0x4e
|
||||
%endif
|
||||
paddd m2, m0
|
||||
movd eax, m2
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmxext
|
||||
SCALARPRODUCT
|
||||
INIT_XMM sse2
|
||||
SCALARPRODUCT
|
||||
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
|
||||
; int32_t max, unsigned int len)
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
; %1 = number of xmm registers used
|
||||
; %2 = number of inline load/process/store loops per asm loop
|
||||
; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop
|
||||
; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2)
|
||||
; %5 = suffix
|
||||
%macro VECTOR_CLIP_INT32 4-5
|
||||
cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
|
||||
%if %4
|
||||
cvtsi2ss m4, minm
|
||||
cvtsi2ss m5, maxm
|
||||
%else
|
||||
movd m4, minm
|
||||
movd m5, maxm
|
||||
%endif
|
||||
SPLATD m4
|
||||
SPLATD m5
|
||||
.loop:
|
||||
%assign %%i 1
|
||||
%rep %2
|
||||
mova m0, [srcq+mmsize*0*%%i]
|
||||
mova m1, [srcq+mmsize*1*%%i]
|
||||
mova m2, [srcq+mmsize*2*%%i]
|
||||
mova m3, [srcq+mmsize*3*%%i]
|
||||
%if %3
|
||||
mova m7, [srcq+mmsize*4*%%i]
|
||||
mova m8, [srcq+mmsize*5*%%i]
|
||||
mova m9, [srcq+mmsize*6*%%i]
|
||||
mova m10, [srcq+mmsize*7*%%i]
|
||||
%endif
|
||||
CLIPD m0, m4, m5, m6
|
||||
CLIPD m1, m4, m5, m6
|
||||
CLIPD m2, m4, m5, m6
|
||||
CLIPD m3, m4, m5, m6
|
||||
%if %3
|
||||
CLIPD m7, m4, m5, m6
|
||||
CLIPD m8, m4, m5, m6
|
||||
CLIPD m9, m4, m5, m6
|
||||
CLIPD m10, m4, m5, m6
|
||||
%endif
|
||||
mova [dstq+mmsize*0*%%i], m0
|
||||
mova [dstq+mmsize*1*%%i], m1
|
||||
mova [dstq+mmsize*2*%%i], m2
|
||||
mova [dstq+mmsize*3*%%i], m3
|
||||
%if %3
|
||||
mova [dstq+mmsize*4*%%i], m7
|
||||
mova [dstq+mmsize*5*%%i], m8
|
||||
mova [dstq+mmsize*6*%%i], m9
|
||||
mova [dstq+mmsize*7*%%i], m10
|
||||
%endif
|
||||
%assign %%i %%i+1
|
||||
%endrep
|
||||
add srcq, mmsize*4*(%2+%3)
|
||||
add dstq, mmsize*4*(%2+%3)
|
||||
sub lend, mmsize*(%2+%3)
|
||||
jg .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmx
|
||||
%define CLIPD CLIPD_MMX
|
||||
VECTOR_CLIP_INT32 0, 1, 0, 0
|
||||
INIT_XMM sse2
|
||||
VECTOR_CLIP_INT32 6, 1, 0, 0, _int
|
||||
%define CLIPD CLIPD_SSE2
|
||||
VECTOR_CLIP_INT32 6, 2, 0, 1
|
||||
INIT_XMM sse4
|
||||
%define CLIPD CLIPD_SSE41
|
||||
%ifdef m8
|
||||
VECTOR_CLIP_INT32 11, 1, 1, 0
|
||||
%else
|
||||
VECTOR_CLIP_INT32 6, 1, 0, 0
|
||||
%endif
|
25
libavcodec/x86/audiodsp.h
Normal file
25
libavcodec/x86/audiodsp.h
Normal file
@ -0,0 +1,25 @@
|
||||
/*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVCODEC_X86_AUDIODSP_H
|
||||
#define AVCODEC_X86_AUDIODSP_H
|
||||
|
||||
void ff_vector_clipf_sse(float *dst, const float *src,
|
||||
float min, float max, int len);
|
||||
|
||||
#endif /* AVCODEC_X86_AUDIODSP_H */
|
66
libavcodec/x86/audiodsp_init.c
Normal file
66
libavcodec/x86/audiodsp_init.c
Normal file
@ -0,0 +1,66 @@
|
||||
/*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/x86/asm.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavcodec/audiodsp.h"
|
||||
#include "audiodsp.h"
|
||||
|
||||
int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2,
|
||||
int order);
|
||||
int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
|
||||
int order);
|
||||
|
||||
void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src,
|
||||
int32_t min, int32_t max, unsigned int len);
|
||||
void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src,
|
||||
int32_t min, int32_t max, unsigned int len);
|
||||
void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src,
|
||||
int32_t min, int32_t max, unsigned int len);
|
||||
void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src,
|
||||
int32_t min, int32_t max, unsigned int len);
|
||||
|
||||
av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_MMX(cpu_flags))
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_mmx;
|
||||
|
||||
if (EXTERNAL_MMXEXT(cpu_flags))
|
||||
c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext;
|
||||
|
||||
if (INLINE_SSE(cpu_flags))
|
||||
c->vector_clipf = ff_vector_clipf_sse;
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
|
||||
if (cpu_flags & AV_CPU_FLAG_ATOM)
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_int_sse2;
|
||||
else
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_sse2;
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSE4(cpu_flags))
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_sse4;
|
||||
}
|
58
libavcodec/x86/audiodsp_mmx.c
Normal file
58
libavcodec/x86/audiodsp_mmx.c
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "libavutil/x86/asm.h"
|
||||
#include "audiodsp.h"
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
void ff_vector_clipf_sse(float *dst, const float *src,
|
||||
float min, float max, int len)
|
||||
{
|
||||
x86_reg i = (len - 16) * 4;
|
||||
__asm__ volatile (
|
||||
"movss %3, %%xmm4 \n\t"
|
||||
"movss %4, %%xmm5 \n\t"
|
||||
"shufps $0, %%xmm4, %%xmm4 \n\t"
|
||||
"shufps $0, %%xmm5, %%xmm5 \n\t"
|
||||
"1: \n\t"
|
||||
"movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel
|
||||
"movaps 16(%2, %0), %%xmm1 \n\t"
|
||||
"movaps 32(%2, %0), %%xmm2 \n\t"
|
||||
"movaps 48(%2, %0), %%xmm3 \n\t"
|
||||
"maxps %%xmm4, %%xmm0 \n\t"
|
||||
"maxps %%xmm4, %%xmm1 \n\t"
|
||||
"maxps %%xmm4, %%xmm2 \n\t"
|
||||
"maxps %%xmm4, %%xmm3 \n\t"
|
||||
"minps %%xmm5, %%xmm0 \n\t"
|
||||
"minps %%xmm5, %%xmm1 \n\t"
|
||||
"minps %%xmm5, %%xmm2 \n\t"
|
||||
"minps %%xmm5, %%xmm3 \n\t"
|
||||
"movaps %%xmm0, (%1, %0) \n\t"
|
||||
"movaps %%xmm1, 16(%1, %0) \n\t"
|
||||
"movaps %%xmm2, 32(%1, %0) \n\t"
|
||||
"movaps %%xmm3, 48(%1, %0) \n\t"
|
||||
"sub $64, %0 \n\t"
|
||||
"jge 1b \n\t"
|
||||
: "+&r" (i)
|
||||
: "r" (dst), "r" (src), "m" (min), "m" (max)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
@ -26,119 +26,6 @@ pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
|
||||
|
||||
SECTION_TEXT
|
||||
|
||||
%macro SCALARPRODUCT 0
|
||||
; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order)
|
||||
cglobal scalarproduct_int16, 3,3,3, v1, v2, order
|
||||
shl orderq, 1
|
||||
add v1q, orderq
|
||||
add v2q, orderq
|
||||
neg orderq
|
||||
pxor m2, m2
|
||||
.loop:
|
||||
movu m0, [v1q + orderq]
|
||||
movu m1, [v1q + orderq + mmsize]
|
||||
pmaddwd m0, [v2q + orderq]
|
||||
pmaddwd m1, [v2q + orderq + mmsize]
|
||||
paddd m2, m0
|
||||
paddd m2, m1
|
||||
add orderq, mmsize*2
|
||||
jl .loop
|
||||
%if mmsize == 16
|
||||
movhlps m0, m2
|
||||
paddd m2, m0
|
||||
pshuflw m0, m2, 0x4e
|
||||
%else
|
||||
pshufw m0, m2, 0x4e
|
||||
%endif
|
||||
paddd m2, m0
|
||||
movd eax, m2
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmxext
|
||||
SCALARPRODUCT
|
||||
INIT_XMM sse2
|
||||
SCALARPRODUCT
|
||||
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
|
||||
; int32_t max, unsigned int len)
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
; %1 = number of xmm registers used
|
||||
; %2 = number of inline load/process/store loops per asm loop
|
||||
; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop
|
||||
; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2)
|
||||
; %5 = suffix
|
||||
%macro VECTOR_CLIP_INT32 4-5
|
||||
cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
|
||||
%if %4
|
||||
cvtsi2ss m4, minm
|
||||
cvtsi2ss m5, maxm
|
||||
%else
|
||||
movd m4, minm
|
||||
movd m5, maxm
|
||||
%endif
|
||||
SPLATD m4
|
||||
SPLATD m5
|
||||
.loop:
|
||||
%assign %%i 1
|
||||
%rep %2
|
||||
mova m0, [srcq+mmsize*0*%%i]
|
||||
mova m1, [srcq+mmsize*1*%%i]
|
||||
mova m2, [srcq+mmsize*2*%%i]
|
||||
mova m3, [srcq+mmsize*3*%%i]
|
||||
%if %3
|
||||
mova m7, [srcq+mmsize*4*%%i]
|
||||
mova m8, [srcq+mmsize*5*%%i]
|
||||
mova m9, [srcq+mmsize*6*%%i]
|
||||
mova m10, [srcq+mmsize*7*%%i]
|
||||
%endif
|
||||
CLIPD m0, m4, m5, m6
|
||||
CLIPD m1, m4, m5, m6
|
||||
CLIPD m2, m4, m5, m6
|
||||
CLIPD m3, m4, m5, m6
|
||||
%if %3
|
||||
CLIPD m7, m4, m5, m6
|
||||
CLIPD m8, m4, m5, m6
|
||||
CLIPD m9, m4, m5, m6
|
||||
CLIPD m10, m4, m5, m6
|
||||
%endif
|
||||
mova [dstq+mmsize*0*%%i], m0
|
||||
mova [dstq+mmsize*1*%%i], m1
|
||||
mova [dstq+mmsize*2*%%i], m2
|
||||
mova [dstq+mmsize*3*%%i], m3
|
||||
%if %3
|
||||
mova [dstq+mmsize*4*%%i], m7
|
||||
mova [dstq+mmsize*5*%%i], m8
|
||||
mova [dstq+mmsize*6*%%i], m9
|
||||
mova [dstq+mmsize*7*%%i], m10
|
||||
%endif
|
||||
%assign %%i %%i+1
|
||||
%endrep
|
||||
add srcq, mmsize*4*(%2+%3)
|
||||
add dstq, mmsize*4*(%2+%3)
|
||||
sub lend, mmsize*(%2+%3)
|
||||
jg .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmx
|
||||
%define CLIPD CLIPD_MMX
|
||||
VECTOR_CLIP_INT32 0, 1, 0, 0
|
||||
INIT_XMM sse2
|
||||
VECTOR_CLIP_INT32 6, 1, 0, 0, _int
|
||||
%define CLIPD CLIPD_SSE2
|
||||
VECTOR_CLIP_INT32 6, 2, 0, 1
|
||||
INIT_XMM sse4
|
||||
%define CLIPD CLIPD_SSE41
|
||||
%ifdef m8
|
||||
VECTOR_CLIP_INT32 11, 1, 1, 0
|
||||
%else
|
||||
VECTOR_CLIP_INT32 6, 1, 0, 0
|
||||
%endif
|
||||
|
||||
; %1 = aligned/unaligned
|
||||
%macro BSWAP_LOOPS 1
|
||||
mov r3, r2
|
||||
|
@ -26,23 +26,9 @@
|
||||
#include "dsputil_x86.h"
|
||||
#include "idct_xvid.h"
|
||||
|
||||
int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2,
|
||||
int order);
|
||||
int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
|
||||
int order);
|
||||
|
||||
void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w);
|
||||
void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w);
|
||||
|
||||
void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src,
|
||||
int32_t min, int32_t max, unsigned int len);
|
||||
void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src,
|
||||
int32_t min, int32_t max, unsigned int len);
|
||||
void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src,
|
||||
int32_t min, int32_t max, unsigned int len);
|
||||
void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src,
|
||||
int32_t min, int32_t max, unsigned int len);
|
||||
|
||||
static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
|
||||
int cpu_flags, unsigned high_bit_depth)
|
||||
{
|
||||
@ -72,10 +58,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
|
||||
|
||||
c->gmc = ff_gmc_mmx;
|
||||
#endif /* HAVE_MMX_INLINE */
|
||||
|
||||
#if HAVE_MMX_EXTERNAL
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_mmx;
|
||||
#endif /* HAVE_MMX_EXTERNAL */
|
||||
}
|
||||
|
||||
static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
|
||||
@ -88,18 +70,6 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
|
||||
c->idct = ff_idct_xvid_mmxext;
|
||||
}
|
||||
#endif /* HAVE_MMXEXT_INLINE */
|
||||
|
||||
#if HAVE_MMXEXT_EXTERNAL
|
||||
c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext;
|
||||
#endif /* HAVE_MMXEXT_EXTERNAL */
|
||||
}
|
||||
|
||||
static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx,
|
||||
int cpu_flags, unsigned high_bit_depth)
|
||||
{
|
||||
#if HAVE_SSE_INLINE
|
||||
c->vector_clipf = ff_vector_clipf_sse;
|
||||
#endif /* HAVE_SSE_INLINE */
|
||||
}
|
||||
|
||||
static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
|
||||
@ -115,12 +85,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
|
||||
#endif /* HAVE_SSE2_INLINE */
|
||||
|
||||
#if HAVE_SSE2_EXTERNAL
|
||||
c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
|
||||
if (cpu_flags & AV_CPU_FLAG_ATOM) {
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_int_sse2;
|
||||
} else {
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_sse2;
|
||||
}
|
||||
c->bswap_buf = ff_bswap32_buf_sse2;
|
||||
#endif /* HAVE_SSE2_EXTERNAL */
|
||||
}
|
||||
@ -133,14 +97,6 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
|
||||
#endif /* HAVE_SSSE3_EXTERNAL */
|
||||
}
|
||||
|
||||
static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx,
|
||||
int cpu_flags, unsigned high_bit_depth)
|
||||
{
|
||||
#if HAVE_SSE4_EXTERNAL
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_sse4;
|
||||
#endif /* HAVE_SSE4_EXTERNAL */
|
||||
}
|
||||
|
||||
av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
|
||||
unsigned high_bit_depth)
|
||||
{
|
||||
@ -152,18 +108,12 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
|
||||
if (X86_MMXEXT(cpu_flags))
|
||||
dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth);
|
||||
|
||||
if (X86_SSE(cpu_flags))
|
||||
dsputil_init_sse(c, avctx, cpu_flags, high_bit_depth);
|
||||
|
||||
if (X86_SSE2(cpu_flags))
|
||||
dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth);
|
||||
|
||||
if (EXTERNAL_SSSE3(cpu_flags))
|
||||
dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth);
|
||||
|
||||
if (EXTERNAL_SSE4(cpu_flags))
|
||||
dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth);
|
||||
|
||||
if (CONFIG_ENCODERS)
|
||||
ff_dsputilenc_init_mmx(c, avctx, high_bit_depth);
|
||||
}
|
||||
|
@ -25,7 +25,6 @@
|
||||
#include "config.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/x86/asm.h"
|
||||
#include "constants.h"
|
||||
#include "dsputil_x86.h"
|
||||
#include "inline_asm.h"
|
||||
|
||||
@ -375,37 +374,4 @@ void ff_gmc_mmx(uint8_t *dst, uint8_t *src,
|
||||
}
|
||||
}
|
||||
|
||||
void ff_vector_clipf_sse(float *dst, const float *src,
|
||||
float min, float max, int len)
|
||||
{
|
||||
x86_reg i = (len - 16) * 4;
|
||||
__asm__ volatile (
|
||||
"movss %3, %%xmm4 \n\t"
|
||||
"movss %4, %%xmm5 \n\t"
|
||||
"shufps $0, %%xmm4, %%xmm4 \n\t"
|
||||
"shufps $0, %%xmm5, %%xmm5 \n\t"
|
||||
"1: \n\t"
|
||||
"movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel
|
||||
"movaps 16(%2, %0), %%xmm1 \n\t"
|
||||
"movaps 32(%2, %0), %%xmm2 \n\t"
|
||||
"movaps 48(%2, %0), %%xmm3 \n\t"
|
||||
"maxps %%xmm4, %%xmm0 \n\t"
|
||||
"maxps %%xmm4, %%xmm1 \n\t"
|
||||
"maxps %%xmm4, %%xmm2 \n\t"
|
||||
"maxps %%xmm4, %%xmm3 \n\t"
|
||||
"minps %%xmm5, %%xmm0 \n\t"
|
||||
"minps %%xmm5, %%xmm1 \n\t"
|
||||
"minps %%xmm5, %%xmm2 \n\t"
|
||||
"minps %%xmm5, %%xmm3 \n\t"
|
||||
"movaps %%xmm0, (%1, %0) \n\t"
|
||||
"movaps %%xmm1, 16(%1, %0) \n\t"
|
||||
"movaps %%xmm2, 32(%1, %0) \n\t"
|
||||
"movaps %%xmm3, 48(%1, %0) \n\t"
|
||||
"sub $64, %0 \n\t"
|
||||
"jge 1b \n\t"
|
||||
: "+&r" (i)
|
||||
: "r" (dst), "r" (src), "m" (min), "m" (max)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
@ -46,7 +46,4 @@ void ff_gmc_mmx(uint8_t *dst, uint8_t *src,
|
||||
int dxx, int dxy, int dyx, int dyy,
|
||||
int shift, int r, int width, int height);
|
||||
|
||||
void ff_vector_clipf_sse(float *dst, const float *src,
|
||||
float min, float max, int len);
|
||||
|
||||
#endif /* AVCODEC_X86_DSPUTIL_X86_H */
|
||||
|
Loading…
Reference in New Issue
Block a user