sse & sse2 implementations of vorbis channel coupling.
9% faster vorbis (on a K8). Originally committed as revision 5898 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
7bf0049623
commit
2dac4acfc0
@ -35,6 +35,9 @@
|
|||||||
/* snow.c */
|
/* snow.c */
|
||||||
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
|
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
|
||||||
|
|
||||||
|
/* vorbis.c */
|
||||||
|
void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
|
||||||
|
|
||||||
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
|
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
|
||||||
uint32_t squareTbl[512] = {0, };
|
uint32_t squareTbl[512] = {0, };
|
||||||
|
|
||||||
@ -4090,6 +4093,10 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->inner_add_yblock = ff_snow_inner_add_yblock;
|
c->inner_add_yblock = ff_snow_inner_add_yblock;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_VORBIS_DECODER
|
||||||
|
c->vorbis_inverse_coupling = vorbis_inverse_coupling;
|
||||||
|
#endif
|
||||||
|
|
||||||
c->shrink[0]= ff_img_copy_plane;
|
c->shrink[0]= ff_img_copy_plane;
|
||||||
c->shrink[1]= ff_shrink22;
|
c->shrink[1]= ff_shrink22;
|
||||||
c->shrink[2]= ff_shrink44;
|
c->shrink[2]= ff_shrink44;
|
||||||
|
@ -307,6 +307,8 @@ typedef struct DSPContext {
|
|||||||
|
|
||||||
void (*h261_loop_filter)(uint8_t *src, int stride);
|
void (*h261_loop_filter)(uint8_t *src, int stride);
|
||||||
|
|
||||||
|
void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
|
||||||
|
|
||||||
/* (I)DCT */
|
/* (I)DCT */
|
||||||
void (*fdct)(DCTELEM *block/* align 16*/);
|
void (*fdct)(DCTELEM *block/* align 16*/);
|
||||||
void (*fdct248)(DCTELEM *block/* align 16*/);
|
void (*fdct248)(DCTELEM *block/* align 16*/);
|
||||||
|
@ -2711,6 +2711,59 @@ static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
asm volatile("pxor %%mm7, %%mm7":);
|
||||||
|
for(i=0; i<blocksize; i+=2) {
|
||||||
|
asm volatile(
|
||||||
|
"movq %0, %%mm0 \n\t"
|
||||||
|
"movq %1, %%mm1 \n\t"
|
||||||
|
"movq %%mm0, %%mm2 \n\t"
|
||||||
|
"movq %%mm1, %%mm3 \n\t"
|
||||||
|
"pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
|
||||||
|
"pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
|
||||||
|
"pslld $31, %%mm2 \n\t" // keep only the sign bit
|
||||||
|
"pxor %%mm2, %%mm1 \n\t"
|
||||||
|
"movq %%mm3, %%mm4 \n\t"
|
||||||
|
"pand %%mm1, %%mm3 \n\t"
|
||||||
|
"pandn %%mm1, %%mm4 \n\t"
|
||||||
|
"pfadd %%mm0, %%mm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
|
||||||
|
"pfsub %%mm4, %%mm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
|
||||||
|
"movq %%mm3, %1 \n\t"
|
||||||
|
"movq %%mm0, %0 \n\t"
|
||||||
|
:"+m"(mag[i]), "+m"(ang[i])
|
||||||
|
::"memory"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
asm volatile("emms");
|
||||||
|
}
|
||||||
|
static void vorbis_inverse_coupling_sse2(float *mag, float *ang, int blocksize)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for(i=0; i<blocksize; i+=4) {
|
||||||
|
asm volatile(
|
||||||
|
"movaps %0, %%xmm0 \n\t"
|
||||||
|
"movaps %1, %%xmm1 \n\t"
|
||||||
|
"pxor %%xmm2, %%xmm2 \n\t"
|
||||||
|
"pxor %%xmm3, %%xmm3 \n\t"
|
||||||
|
"cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
|
||||||
|
"cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
|
||||||
|
"pslld $31, %%xmm2 \n\t" // keep only the sign bit
|
||||||
|
"pxor %%xmm2, %%xmm1 \n\t"
|
||||||
|
"movaps %%xmm3, %%xmm4 \n\t"
|
||||||
|
"pand %%xmm1, %%xmm3 \n\t"
|
||||||
|
"pandn %%xmm1, %%xmm4 \n\t"
|
||||||
|
"addps %%xmm0, %%xmm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
|
||||||
|
"subps %%xmm4, %%xmm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
|
||||||
|
"movaps %%xmm3, %1 \n\t"
|
||||||
|
"movaps %%xmm0, %0 \n\t"
|
||||||
|
:"+m"(mag[i]), "+m"(ang[i])
|
||||||
|
::"memory"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SNOW_ENCODER
|
#ifdef CONFIG_SNOW_ENCODER
|
||||||
extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width);
|
extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width);
|
||||||
extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width);
|
extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width);
|
||||||
@ -3137,6 +3190,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
|
c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if(mm_flags & MM_SSE2)
|
||||||
|
c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse2;
|
||||||
|
else if(mm_flags & MM_SSE)
|
||||||
|
c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_ENCODERS
|
#ifdef CONFIG_ENCODERS
|
||||||
|
@ -929,6 +929,7 @@ static int vorbis_decode_init(AVCodecContext *avccontext) {
|
|||||||
int i, j, hdr_type;
|
int i, j, hdr_type;
|
||||||
|
|
||||||
vc->avccontext = avccontext;
|
vc->avccontext = avccontext;
|
||||||
|
dsputil_init(&vc->dsp, avccontext);
|
||||||
|
|
||||||
if (!headers_len) {
|
if (!headers_len) {
|
||||||
av_log(avccontext, AV_LOG_ERROR, "Extradata corrupt.\n");
|
av_log(avccontext, AV_LOG_ERROR, "Extradata corrupt.\n");
|
||||||
@ -1443,6 +1444,31 @@ static int vorbis_residue_decode(vorbis_context *vc, vorbis_residue *vr, uint_fa
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void vorbis_inverse_coupling(float *mag, float *ang, int blocksize)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for(i=0; i<blocksize; i++)
|
||||||
|
{
|
||||||
|
if (mag[i]>0.0) {
|
||||||
|
if (ang[i]>0.0) {
|
||||||
|
ang[i]=mag[i]-ang[i];
|
||||||
|
} else {
|
||||||
|
float temp=ang[i];
|
||||||
|
ang[i]=mag[i];
|
||||||
|
mag[i]+=temp;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (ang[i]>0.0) {
|
||||||
|
ang[i]+=mag[i];
|
||||||
|
} else {
|
||||||
|
float temp=ang[i];
|
||||||
|
ang[i]=mag[i];
|
||||||
|
mag[i]-=temp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Decode the audio packet using the functions above
|
// Decode the audio packet using the functions above
|
||||||
#define BIAS 385
|
#define BIAS 385
|
||||||
|
|
||||||
@ -1541,26 +1567,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
|
|||||||
|
|
||||||
mag=vc->channel_residues+res_chan[mapping->magnitude[i]]*blocksize/2;
|
mag=vc->channel_residues+res_chan[mapping->magnitude[i]]*blocksize/2;
|
||||||
ang=vc->channel_residues+res_chan[mapping->angle[i]]*blocksize/2;
|
ang=vc->channel_residues+res_chan[mapping->angle[i]]*blocksize/2;
|
||||||
for(j=0;j<blocksize/2;++j) {
|
vc->dsp.vorbis_inverse_coupling(mag, ang, blocksize/2);
|
||||||
float temp;
|
|
||||||
if (mag[j]>0.0) {
|
|
||||||
if (ang[j]>0.0) {
|
|
||||||
ang[j]=mag[j]-ang[j];
|
|
||||||
} else {
|
|
||||||
temp=ang[j];
|
|
||||||
ang[j]=mag[j];
|
|
||||||
mag[j]+=temp;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (ang[j]>0.0) {
|
|
||||||
ang[j]+=mag[j];
|
|
||||||
} else {
|
|
||||||
temp=ang[j];
|
|
||||||
ang[j]=mag[j];
|
|
||||||
mag[j]-=temp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dotproduct
|
// Dotproduct
|
||||||
|
@ -87,6 +87,7 @@ typedef struct {
|
|||||||
typedef struct vorbis_context_s {
|
typedef struct vorbis_context_s {
|
||||||
AVCodecContext *avccontext;
|
AVCodecContext *avccontext;
|
||||||
GetBitContext gb;
|
GetBitContext gb;
|
||||||
|
DSPContext dsp;
|
||||||
|
|
||||||
MDCTContext mdct0;
|
MDCTContext mdct0;
|
||||||
MDCTContext mdct1;
|
MDCTContext mdct1;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user