Remove unneeded add bias from 3 functions.

DSPContext.vector_fmul_window()
DCADSPContext.lfe_fir()
SynthFilterContext.synth_filter_float()

Signed-off-by: Mans Rullgard <mans@mansr.com>
(cherry picked from commit 80ba1ddb58)
This commit is contained in:
Justin Ruggles 2011-01-31 19:26:02 +00:00 committed by Michael Niedermayer
parent 403fa3cf07
commit a8ae4e0e7b
21 changed files with 59 additions and 80 deletions

View File

@ -1721,19 +1721,19 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
*/
if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
(ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, 0, 512);
ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512);
} else {
memcpy( out, saved, 448 * sizeof(float));
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 0, 64);
ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 0, 64);
ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 0, 64);
ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 0, 64);
ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 0, 64);
ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64);
ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64);
ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64);
ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64);
ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64);
memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
} else {
ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 0, 64);
ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64);
memcpy( out + 576, buf + 64, 448 * sizeof(float));
}
}
@ -1741,9 +1741,9 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
// buffer update
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
memcpy( saved, temp + 64, 64 * sizeof(float));
ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64);
ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
} else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
memcpy( saved, buf + 512, 448 * sizeof(float));

View File

@ -628,13 +628,13 @@ static inline void do_imdct(AC3DecodeContext *s, int channels)
for(i=0; i<128; i++)
x[i] = s->transform_coeffs[ch][2*i];
ff_imdct_half(&s->imdct_256, s->tmp_output, x);
s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 0, 128);
s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 128);
for(i=0; i<128; i++)
x[i] = s->transform_coeffs[ch][2*i+1];
ff_imdct_half(&s->imdct_256, s->delay[ch-1], x);
} else {
ff_imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 0, 128);
s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 128);
memcpy(s->delay[ch-1], s->tmp_output+128, 128*sizeof(float));
}
}

View File

@ -23,7 +23,7 @@
#include "libavcodec/dcadsp.h"
void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs,
int decifactor, float scale, float bias);
int decifactor, float scale);
void av_cold ff_dcadsp_init_arm(DCADSPContext *s)
{

View File

@ -29,7 +29,7 @@ function ff_dca_lfe_fir_neon, export=1
cmp r3, #32
moveq r6, #256/32
movne r6, #256/64
NOVFP vldr d0, [sp, #16] @ scale, bias
NOVFP vldr s0, [sp, #16] @ scale
mov lr, #-16
1:
vmov.f32 q2, #0.0 @ v0
@ -51,8 +51,7 @@ NOVFP vldr d0, [sp, #16] @ scale, bias
vadd.f32 d4, d4, d5
vadd.f32 d6, d6, d7
vpadd.f32 d4, d4, d6
vdup.32 d5, d0[1]
vmla.f32 d5, d4, d0[0]
vmul.f32 d5, d4, d0[0]
vst1.32 {d5[0]}, [r0,:32]!
vst1.32 {d5[1]}, [r4,:32]!
bne 1b

View File

@ -140,8 +140,7 @@ void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *);
void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
void ff_vector_fmul_window_neon(float *dst, const float *src0,
const float *src1, const float *win,
float add_bias, int len);
const float *src1, const float *win, int len);
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
int len);
void ff_vector_fmul_sv_scalar_2_neon(float *dst, const float *src,

View File

@ -777,11 +777,8 @@ function ff_vector_fmul_neon, export=1
endfunc
function ff_vector_fmul_window_neon, export=1
VFP vdup.32 q8, d0[0]
NOVFP vld1.32 {d16[],d17[]}, [sp,:32]
push {r4,r5,lr}
VFP ldr lr, [sp, #12]
NOVFP ldr lr, [sp, #16]
ldr lr, [sp, #12]
sub r2, r2, #8
sub r5, lr, #2
add r2, r2, r5, lsl #2
@ -793,14 +790,12 @@ NOVFP ldr lr, [sp, #16]
vld1.64 {d4,d5}, [r3,:128]!
vld1.64 {d6,d7}, [r4,:128], r5
1: subs lr, lr, #4
vmov q11, q8
vmla.f32 d22, d0, d4
vmov q10, q8
vmla.f32 d23, d1, d5
vmul.f32 d22, d0, d4
vrev64.32 q3, q3
vmla.f32 d20, d0, d7
vmul.f32 d23, d1, d5
vrev64.32 q1, q1
vmla.f32 d21, d1, d6
vmul.f32 d20, d0, d7
vmul.f32 d21, d1, d6
beq 2f
vmla.f32 d22, d3, d7
vld1.64 {d0,d1}, [r1,:128]!

View File

@ -34,7 +34,7 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
float *synth_buf_ptr, int *synth_buf_offset,
float synth_buf2[32], const float window[512],
float out[32], const float in[32],
float scale, float bias);
float scale);
av_cold void ff_fft_init_arm(FFTContext *s)
{

View File

@ -42,7 +42,7 @@ VFP vpop {d0}
ldr r5, [sp, #9*4] @ window
ldr r2, [sp, #10*4] @ out
NOVFP vldr d0, [sp, #12*4] @ scale, bias
NOVFP vldr s0, [sp, #12*4] @ scale
add r8, r9, #12*4
mov lr, #64*4
@ -90,10 +90,8 @@ NOVFP vldr d0, [sp, #12*4] @ scale, bias
sub r11, r11, #512*4
b 2b
3:
vdup.32 q8, d0[1]
vdup.32 q9, d0[1]
vmla.f32 q8, q10, d0[0]
vmla.f32 q9, q1, d0[0]
vmul.f32 q8, q10, d0[0]
vmul.f32 q9, q1, d0[0]
vst1.32 {q3}, [r3,:128]
sub r3, r3, #16*4
vst1.32 {q2}, [r3,:128]

View File

@ -141,7 +141,7 @@ static int at1_imdct_block(AT1SUCtx* su, AT1Ctx *q)
/* overlap and window */
q->dsp.vector_fmul_window(&q->bands[band_num][start_pos], prev_buf,
&su->spectrum[0][ref_pos + start_pos], ff_sine_32, 0, 16);
&su->spectrum[0][ref_pos + start_pos], ff_sine_32, 16);
prev_buf = &su->spectrum[0][ref_pos+start_pos + 16];
start_pos += block_size;

View File

@ -896,7 +896,7 @@ static void qmf_32_subbands(DCAContext * s, int chans,
s->synth.synth_filter_float(&s->imdct,
s->subband_fir_hist[chans], &s->hist_index[chans],
s->subband_fir_noidea[chans], prCoeff,
samples_out, s->raXin, scale, 0);
samples_out, s->raXin, scale);
samples_out+= 32;
}
@ -929,7 +929,7 @@ static void lfe_interpolation_fir(DCAContext *s, int decimation_select,
/* Interpolation */
for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor,
scale, 0);
scale);
samples_in++;
samples_out += 2 * decifactor;
}

View File

@ -23,7 +23,7 @@
#include "dcadsp.h"
static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
int decifactor, float scale, float bias)
int decifactor, float scale)
{
float *out2 = out + decifactor;
const float *cf0 = coefs;
@ -39,8 +39,8 @@ static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
v0 += s * *cf0++;
v1 += s * *--cf1;
}
*out++ = (v0 * scale) + bias;
*out2++ = (v1 * scale) + bias;
*out++ = v0 * scale;
*out2++ = v1 * scale;
}
}

View File

@ -21,7 +21,7 @@
typedef struct DCADSPContext {
void (*lfe_fir)(float *out, const float *in, const float *coefs,
int decifactor, float scale, float bias);
int decifactor, float scale);
} DCADSPContext;
void ff_dcadsp_init(DCADSPContext *s);

View File

@ -3776,7 +3776,9 @@ static void vector_fmul_add_c(float *dst, const float *src0, const float *src1,
dst[i] = src0[i] * src1[i] + src2[i];
}
void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){
static void vector_fmul_window_c(float *dst, const float *src0,
const float *src1, const float *win, int len)
{
int i,j;
dst += len;
win += len;
@ -3786,8 +3788,8 @@ void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, c
float s1 = src1[j];
float wi = win[i];
float wj = win[j];
dst[i] = s0*wj - s1*wi + add_bias;
dst[j] = s0*wi + s1*wj + add_bias;
dst[i] = s0*wj - s1*wi;
dst[j] = s0*wi + s1*wj;
}
}
@ -4434,7 +4436,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->vector_fmul = vector_fmul_c;
c->vector_fmul_reverse = vector_fmul_reverse_c;
c->vector_fmul_add = vector_fmul_add_c;
c->vector_fmul_window = ff_vector_fmul_window_c;
c->vector_fmul_window = vector_fmul_window_c;
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
c->vector_clipf = vector_clipf_c;
c->float_to_int16 = ff_float_to_int16_c;

View File

@ -68,9 +68,6 @@ void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul);
void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
const float *win, float add_bias, int len);
/* encoding scans */
extern const uint8_t ff_alternate_horizontal_scan[64];
extern const uint8_t ff_alternate_vertical_scan[64];
@ -393,7 +390,7 @@ typedef struct DSPContext {
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len);
void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len);
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);

View File

@ -90,13 +90,9 @@ static void vector_fmul_add_altivec(float *dst, const float *src0,
}
}
static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len)
static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, int len)
{
union {
vector float v;
float s[4];
} vadd;
vector float vadd_bias, zero, t0, t1, s0, s1, wi, wj;
vector float zero, t0, t1, s0, s1, wi, wj;
const vector unsigned char reverse = vcprm(3,2,1,0);
int i,j;
@ -104,8 +100,6 @@ static void vector_fmul_window_altivec(float *dst, const float *src0, const floa
win += len;
src0+= len;
vadd.s[0] = add_bias;
vadd_bias = vec_splat(vadd.v, 0);
zero = (vector float)vec_splat_u32(0);
for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {
@ -117,9 +111,9 @@ static void vector_fmul_window_altivec(float *dst, const float *src0, const floa
s1 = vec_perm(s1, s1, reverse);
wj = vec_perm(wj, wj, reverse);
t0 = vec_madd(s0, wj, vadd_bias);
t0 = vec_madd(s0, wj, zero);
t0 = vec_nmsub(s1, wi, t0);
t1 = vec_madd(s0, wi, vadd_bias);
t1 = vec_madd(s0, wi, zero);
t1 = vec_madd(s1, wj, t1);
t1 = vec_perm(t1, t1, reverse);

View File

@ -24,7 +24,7 @@
static void synth_filter_float(FFTContext *imdct,
float *synth_buf_ptr, int *synth_buf_offset,
float synth_buf2[32], const float window[512],
float out[32], const float in[32], float scale, float bias)
float out[32], const float in[32], float scale)
{
float *synth_buf= synth_buf_ptr + *synth_buf_offset;
int i, j;
@ -48,8 +48,8 @@ static void synth_filter_float(FFTContext *imdct,
c += window[i + j + 32]*( synth_buf[16 + i + j - 512]);
d += window[i + j + 48]*( synth_buf[31 - i + j - 512]);
}
out[i ] = a*scale + bias;
out[i + 16] = b*scale + bias;
out[i ] = a*scale;
out[i + 16] = b*scale;
synth_buf2[i ] = c;
synth_buf2[i + 16] = d;
}

View File

@ -28,7 +28,7 @@ typedef struct SynthFilterContext {
float *synth_buf_ptr, int *synth_buf_offset,
float synth_buf2[32], const float window[512],
float out[32], const float in[32],
float scale, float bias);
float scale);
} SynthFilterContext;
void ff_synth_filter_init(SynthFilterContext *c);

View File

@ -646,7 +646,6 @@ static void imdct_and_window(TwinContext *tctx, enum FrameType ftype, int wtype,
prev_buf + (bsize-wsize)/2,
buf1 + bsize*j,
ff_sine_windows[av_log2(wsize)],
0.0,
wsize/2);
out2 += wsize;

View File

@ -1575,13 +1575,13 @@ static int vorbis_parse_audio_packet(vorbis_context *vc)
const float *win = vc->win[blockflag & previous_window];
if (blockflag == previous_window) {
vc->dsp.vector_fmul_window(ret, saved, buf, win, 0, blocksize / 4);
vc->dsp.vector_fmul_window(ret, saved, buf, win, blocksize / 4);
} else if (blockflag > previous_window) {
vc->dsp.vector_fmul_window(ret, saved, buf, win, 0, bs0 / 4);
vc->dsp.vector_fmul_window(ret, saved, buf, win, bs0 / 4);
memcpy(ret+bs0/2, buf+bs0/4, ((bs1-bs0)/4) * sizeof(float));
} else {
memcpy(ret, saved, ((bs1 - bs0) / 4) * sizeof(float));
vc->dsp.vector_fmul_window(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, 0, bs0 / 4);
vc->dsp.vector_fmul_window(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, bs0 / 4);
}
memcpy(saved, buf + blocksize / 4, blocksize / 4 * sizeof(float));
}

View File

@ -1031,7 +1031,7 @@ static void wmapro_window(WMAProDecodeCtx *s)
winlen >>= 1;
s->dsp.vector_fmul_window(start, start, start + winlen,
window, 0, winlen);
window, winlen);
s->channel[c].prev_block_len = s->subframe_len;
}

View File

@ -2190,10 +2190,9 @@ static void vector_fmul_add_sse(float *dst, const float *src0, const float *src1
);
}
static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1,
const float *win, float add_bias, int len){
#if HAVE_6REGS
if(add_bias == 0){
static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1,
const float *win, int len){
x86_reg i = -len*4;
x86_reg j = len*4-8;
__asm__ volatile(
@ -2220,15 +2219,10 @@ static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float
:"+r"(i), "+r"(j)
:"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
);
}else
#endif
ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
}
static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1,
const float *win, float add_bias, int len){
#if HAVE_6REGS
if(add_bias == 0){
const float *win, int len){
x86_reg i = -len*4;
x86_reg j = len*4-16;
__asm__ volatile(
@ -2256,10 +2250,8 @@ static void vector_fmul_window_sse(float *dst, const float *src0, const float *s
:"+r"(i), "+r"(j)
:"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
);
}else
#endif
ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
}
#endif /* HAVE_6REGS */
static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len)
{
@ -2882,7 +2874,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
if(mm_flags & AV_CPU_FLAG_3DNOWEXT){
c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
#if HAVE_6REGS
c->vector_fmul_window = vector_fmul_window_3dnow2;
#endif
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
}
@ -2899,7 +2893,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->vector_fmul = vector_fmul_sse;
c->vector_fmul_reverse = vector_fmul_reverse_sse;
c->vector_fmul_add = vector_fmul_add_sse;
#if HAVE_6REGS
c->vector_fmul_window = vector_fmul_window_sse;
#endif
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
c->vector_clipf = vector_clipf_sse;
c->float_to_int16 = float_to_int16_sse;