sse & sse2 implementations of vorbis channel coupling.
9% faster vorbis (on a K8). Originally committed as revision 5898 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
7bf0049623
commit
2dac4acfc0
@ -35,6 +35,9 @@
|
||||
/* snow.c */
|
||||
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
|
||||
|
||||
/* vorbis.c */
|
||||
void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
|
||||
|
||||
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
|
||||
uint32_t squareTbl[512] = {0, };
|
||||
|
||||
@ -4090,6 +4093,10 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
||||
c->inner_add_yblock = ff_snow_inner_add_yblock;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_VORBIS_DECODER
|
||||
c->vorbis_inverse_coupling = vorbis_inverse_coupling;
|
||||
#endif
|
||||
|
||||
c->shrink[0]= ff_img_copy_plane;
|
||||
c->shrink[1]= ff_shrink22;
|
||||
c->shrink[2]= ff_shrink44;
|
||||
|
@ -307,6 +307,8 @@ typedef struct DSPContext {
|
||||
|
||||
void (*h261_loop_filter)(uint8_t *src, int stride);
|
||||
|
||||
void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
|
||||
|
||||
/* (I)DCT */
|
||||
void (*fdct)(DCTELEM *block/* align 16*/);
|
||||
void (*fdct248)(DCTELEM *block/* align 16*/);
|
||||
|
@ -2711,6 +2711,59 @@ static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block)
|
||||
}
|
||||
#endif
|
||||
|
||||
static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
|
||||
{
|
||||
int i;
|
||||
asm volatile("pxor %%mm7, %%mm7":);
|
||||
for(i=0; i<blocksize; i+=2) {
|
||||
asm volatile(
|
||||
"movq %0, %%mm0 \n\t"
|
||||
"movq %1, %%mm1 \n\t"
|
||||
"movq %%mm0, %%mm2 \n\t"
|
||||
"movq %%mm1, %%mm3 \n\t"
|
||||
"pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
|
||||
"pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
|
||||
"pslld $31, %%mm2 \n\t" // keep only the sign bit
|
||||
"pxor %%mm2, %%mm1 \n\t"
|
||||
"movq %%mm3, %%mm4 \n\t"
|
||||
"pand %%mm1, %%mm3 \n\t"
|
||||
"pandn %%mm1, %%mm4 \n\t"
|
||||
"pfadd %%mm0, %%mm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
|
||||
"pfsub %%mm4, %%mm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
|
||||
"movq %%mm3, %1 \n\t"
|
||||
"movq %%mm0, %0 \n\t"
|
||||
:"+m"(mag[i]), "+m"(ang[i])
|
||||
::"memory"
|
||||
);
|
||||
}
|
||||
asm volatile("emms");
|
||||
}
|
||||
static void vorbis_inverse_coupling_sse2(float *mag, float *ang, int blocksize)
|
||||
{
|
||||
int i;
|
||||
for(i=0; i<blocksize; i+=4) {
|
||||
asm volatile(
|
||||
"movaps %0, %%xmm0 \n\t"
|
||||
"movaps %1, %%xmm1 \n\t"
|
||||
"pxor %%xmm2, %%xmm2 \n\t"
|
||||
"pxor %%xmm3, %%xmm3 \n\t"
|
||||
"cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
|
||||
"cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
|
||||
"pslld $31, %%xmm2 \n\t" // keep only the sign bit
|
||||
"pxor %%xmm2, %%xmm1 \n\t"
|
||||
"movaps %%xmm3, %%xmm4 \n\t"
|
||||
"pand %%xmm1, %%xmm3 \n\t"
|
||||
"pandn %%xmm1, %%xmm4 \n\t"
|
||||
"addps %%xmm0, %%xmm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
|
||||
"subps %%xmm4, %%xmm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
|
||||
"movaps %%xmm3, %1 \n\t"
|
||||
"movaps %%xmm0, %0 \n\t"
|
||||
:"+m"(mag[i]), "+m"(ang[i])
|
||||
::"memory"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SNOW_ENCODER
|
||||
extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width);
|
||||
extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width);
|
||||
@ -3137,6 +3190,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
||||
c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
|
||||
}
|
||||
#endif
|
||||
|
||||
if(mm_flags & MM_SSE2)
|
||||
c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse2;
|
||||
else if(mm_flags & MM_SSE)
|
||||
c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ENCODERS
|
||||
|
@ -929,6 +929,7 @@ static int vorbis_decode_init(AVCodecContext *avccontext) {
|
||||
int i, j, hdr_type;
|
||||
|
||||
vc->avccontext = avccontext;
|
||||
dsputil_init(&vc->dsp, avccontext);
|
||||
|
||||
if (!headers_len) {
|
||||
av_log(avccontext, AV_LOG_ERROR, "Extradata corrupt.\n");
|
||||
@ -1443,6 +1444,31 @@ static int vorbis_residue_decode(vorbis_context *vc, vorbis_residue *vr, uint_fa
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vorbis_inverse_coupling(float *mag, float *ang, int blocksize)
|
||||
{
|
||||
int i;
|
||||
for(i=0; i<blocksize; i++)
|
||||
{
|
||||
if (mag[i]>0.0) {
|
||||
if (ang[i]>0.0) {
|
||||
ang[i]=mag[i]-ang[i];
|
||||
} else {
|
||||
float temp=ang[i];
|
||||
ang[i]=mag[i];
|
||||
mag[i]+=temp;
|
||||
}
|
||||
} else {
|
||||
if (ang[i]>0.0) {
|
||||
ang[i]+=mag[i];
|
||||
} else {
|
||||
float temp=ang[i];
|
||||
ang[i]=mag[i];
|
||||
mag[i]-=temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Decode the audio packet using the functions above
|
||||
#define BIAS 385
|
||||
|
||||
@ -1541,26 +1567,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
|
||||
|
||||
mag=vc->channel_residues+res_chan[mapping->magnitude[i]]*blocksize/2;
|
||||
ang=vc->channel_residues+res_chan[mapping->angle[i]]*blocksize/2;
|
||||
for(j=0;j<blocksize/2;++j) {
|
||||
float temp;
|
||||
if (mag[j]>0.0) {
|
||||
if (ang[j]>0.0) {
|
||||
ang[j]=mag[j]-ang[j];
|
||||
} else {
|
||||
temp=ang[j];
|
||||
ang[j]=mag[j];
|
||||
mag[j]+=temp;
|
||||
}
|
||||
} else {
|
||||
if (ang[j]>0.0) {
|
||||
ang[j]+=mag[j];
|
||||
} else {
|
||||
temp=ang[j];
|
||||
ang[j]=mag[j];
|
||||
mag[j]-=temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
vc->dsp.vorbis_inverse_coupling(mag, ang, blocksize/2);
|
||||
}
|
||||
|
||||
// Dotproduct
|
||||
|
@ -87,6 +87,7 @@ typedef struct {
|
||||
typedef struct vorbis_context_s {
|
||||
AVCodecContext *avccontext;
|
||||
GetBitContext gb;
|
||||
DSPContext dsp;
|
||||
|
||||
MDCTContext mdct0;
|
||||
MDCTContext mdct1;
|
||||
|
Loading…
x
Reference in New Issue
Block a user