Merge remote-tracking branch 'qatar/master'

* qatar/master:
  dcadsp: split lfe_dir cases

Conflicts:
	libavcodec/arm/dcadsp_init_arm.c

See: 45854df9a5220bdde400a447f63f61618b89dde2
Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2014-02-08 02:16:47 +01:00
commit 5794e9fce2
3 changed files with 38 additions and 58 deletions

View File

@ -24,16 +24,22 @@
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavcodec/dcadsp.h" #include "libavcodec/dcadsp.h"
void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs, void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs,
int decifactor, float scale); float scale);
void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs,
float scale);
void ff_dca_lfe_fir32_vfp(float *out, const float *in, const float *coefs,
float scale);
void ff_dca_lfe_fir64_vfp(float *out, const float *in, const float *coefs,
float scale);
void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act, void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act,
SynthFilterContext *synth, FFTContext *imdct, SynthFilterContext *synth, FFTContext *imdct,
float synth_buf_ptr[512], float synth_buf_ptr[512],
int *synth_buf_offset, float synth_buf2[32], int *synth_buf_offset, float synth_buf2[32],
const float window[512], float *samples_out, const float window[512], float *samples_out,
float raXin[32], float scale); float raXin[32], float scale);
void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs,
int decifactor, float scale);
void ff_synth_filter_float_vfp(FFTContext *imdct, void ff_synth_filter_float_vfp(FFTContext *imdct,
float *synth_buf_ptr, int *synth_buf_offset, float *synth_buf_ptr, int *synth_buf_offset,
@ -47,42 +53,18 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
float out[32], const float in[32], float out[32], const float in[32],
float scale); float scale);
static void lfe_fir0_vfp(float *out, const float *in, const float *coefs,
float scale)
{
ff_dca_lfe_fir_vfp(out, in, coefs, 32, scale);
}
static void lfe_fir1_vfp(float *out, const float *in, const float *coefs,
float scale)
{
ff_dca_lfe_fir_vfp(out, in, coefs, 64, scale);
}
static void lfe_fir0_neon(float *out, const float *in, const float *coefs,
float scale)
{
ff_dca_lfe_fir_neon(out, in, coefs, 32, scale);
}
static void lfe_fir1_neon(float *out, const float *in, const float *coefs,
float scale)
{
ff_dca_lfe_fir_neon(out, in, coefs, 64, scale);
}
av_cold void ff_dcadsp_init_arm(DCADSPContext *s) av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
{ {
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) { if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) {
s->lfe_fir[0] = lfe_fir0_vfp; s->lfe_fir[0] = ff_dca_lfe_fir32_vfp;
s->lfe_fir[1] = lfe_fir1_vfp; s->lfe_fir[1] = ff_dca_lfe_fir64_vfp;
s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp; s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp;
} }
if (have_neon(cpu_flags)) { if (have_neon(cpu_flags)) {
s->lfe_fir[0] = lfe_fir0_neon; s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
s->lfe_fir[1] = lfe_fir1_neon; s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
} }
} }

View File

@ -20,17 +20,23 @@
#include "libavutil/arm/asm.S" #include "libavutil/arm/asm.S"
function ff_dca_lfe_fir_neon, export=1 function ff_dca_lfe_fir0_neon, export=1
push {r4-r6,lr} push {r4-r6,lr}
NOVFP vmov s0, r3 @ scale
mov r3, #32 @ decifactor
mov r6, #256/32
b dca_lfe_fir
endfunc
function ff_dca_lfe_fir1_neon, export=1
push {r4-r6,lr}
NOVFP vmov s0, r3 @ scale
mov r3, #64 @ decifactor
mov r6, #256/64
dca_lfe_fir:
add r4, r0, r3, lsl #2 @ out2 add r4, r0, r3, lsl #2 @ out2
add r5, r2, #256*4-16 @ cf1 add r5, r2, #256*4-16 @ cf1
sub r1, r1, #12 sub r1, r1, #12
cmp r3, #32
ite eq
moveq r6, #256/32
movne r6, #256/64
NOVFP vldr s0, [sp, #16] @ scale
mov lr, #-16 mov lr, #-16
1: 1:
vmov.f32 q2, #0.0 @ v0 vmov.f32 q2, #0.0 @ v0

View File

@ -24,7 +24,6 @@
POUT .req a1 POUT .req a1
PIN .req a2 PIN .req a2
PCOEF .req a3 PCOEF .req a3
DECIFACTOR .req a4
OLDFPSCR .req a4 OLDFPSCR .req a4
COUNTER .req ip COUNTER .req ip
@ -129,6 +128,15 @@ POST3 .req s27
.endm .endm
.macro dca_lfe_fir decifactor .macro dca_lfe_fir decifactor
function ff_dca_lfe_fir\decifactor\()_vfp, export=1
NOVFP vmov s0, r3
fmrx OLDFPSCR, FPSCR
ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
fmxr FPSCR, ip
vldr IN0, [PIN, #-0*4]
vldr IN1, [PIN, #-1*4]
vldr IN2, [PIN, #-2*4]
vldr IN3, [PIN, #-3*4]
.if \decifactor == 32 .if \decifactor == 32
.set JMAX, 8 .set JMAX, 8
vpush {s16-s31} vpush {s16-s31}
@ -165,32 +173,16 @@ POST3 .req s27
.endif .endif
fmxr FPSCR, OLDFPSCR fmxr FPSCR, OLDFPSCR
bx lr bx lr
endfunc
.endm .endm
dca_lfe_fir 64
/* void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs,
* int decifactor, float scale)
*/
function ff_dca_lfe_fir_vfp, export=1
teq DECIFACTOR, #32
fmrx OLDFPSCR, FPSCR
ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
fmxr FPSCR, ip
NOVFP vldr s0, [sp]
vldr IN0, [PIN, #-0*4]
vldr IN1, [PIN, #-1*4]
vldr IN2, [PIN, #-2*4]
vldr IN3, [PIN, #-3*4]
beq 32f
64: dca_lfe_fir 64
.ltorg .ltorg
32: dca_lfe_fir 32 dca_lfe_fir 32
endfunc
.unreq POUT .unreq POUT
.unreq PIN .unreq PIN
.unreq PCOEF .unreq PCOEF
.unreq DECIFACTOR
.unreq OLDFPSCR .unreq OLDFPSCR
.unreq COUNTER .unreq COUNTER