add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4)
Patch by David Conrad %lessen42 A gmail P com% Originally committed as revision 16458 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
094d9df72e
commit
8b2bc85f29
@ -935,6 +935,50 @@ static void h264_h_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha,
|
|||||||
write16x4(pix-2, stride, line1, line2, line3, line4);
|
write16x4(pix-2, stride, line1, line2, line3, line4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static av_always_inline
|
||||||
|
void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset, int w, int h)
|
||||||
|
{
|
||||||
|
int y, aligned;
|
||||||
|
vec_u8 vblock;
|
||||||
|
vec_s16 vtemp, vweight, voffset, v0, v1;
|
||||||
|
vec_u16 vlog2_denom;
|
||||||
|
DECLARE_ALIGNED_16(int32_t, temp[4]);
|
||||||
|
LOAD_ZERO;
|
||||||
|
|
||||||
|
offset <<= log2_denom;
|
||||||
|
if(log2_denom) offset += 1<<(log2_denom-1);
|
||||||
|
temp[0] = log2_denom;
|
||||||
|
temp[1] = weight;
|
||||||
|
temp[2] = offset;
|
||||||
|
|
||||||
|
vtemp = (vec_s16)vec_ld(0, temp);
|
||||||
|
vlog2_denom = (vec_u16)vec_splat(vtemp, 1);
|
||||||
|
vweight = vec_splat(vtemp, 3);
|
||||||
|
voffset = vec_splat(vtemp, 5);
|
||||||
|
aligned = !((unsigned long)block & 0xf);
|
||||||
|
|
||||||
|
for (y=0; y<h; y++) {
|
||||||
|
vblock = vec_ld(0, block);
|
||||||
|
|
||||||
|
v0 = (vec_s16)vec_mergeh(zero_u8v, vblock);
|
||||||
|
v1 = (vec_s16)vec_mergel(zero_u8v, vblock);
|
||||||
|
|
||||||
|
if (w == 16 || aligned) {
|
||||||
|
v0 = vec_mladd(v0, vweight, zero_s16v);
|
||||||
|
v0 = vec_adds(v0, voffset);
|
||||||
|
v0 = vec_sra(v0, vlog2_denom);
|
||||||
|
}
|
||||||
|
if (w == 16 || !aligned) {
|
||||||
|
v1 = vec_mladd(v1, vweight, zero_s16v);
|
||||||
|
v1 = vec_adds(v1, voffset);
|
||||||
|
v1 = vec_sra(v1, vlog2_denom);
|
||||||
|
}
|
||||||
|
vblock = vec_packsu(v0, v1);
|
||||||
|
vec_st(vblock, 0, block);
|
||||||
|
|
||||||
|
block += stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static av_always_inline
|
static av_always_inline
|
||||||
void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom,
|
void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom,
|
||||||
@ -1002,6 +1046,9 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define H264_WEIGHT(W,H) \
|
#define H264_WEIGHT(W,H) \
|
||||||
|
static void ff_weight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
|
||||||
|
weight_h264_WxH_altivec(block, stride, log2_denom, weight, offset, W, H); \
|
||||||
|
}\
|
||||||
static void ff_biweight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
|
static void ff_biweight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
|
||||||
biweight_h264_WxH_altivec(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \
|
biweight_h264_WxH_altivec(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \
|
||||||
}
|
}
|
||||||
@ -1051,6 +1098,11 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
|
|||||||
dspfunc(avg_h264_qpel, 0, 16);
|
dspfunc(avg_h264_qpel, 0, 16);
|
||||||
#undef dspfunc
|
#undef dspfunc
|
||||||
|
|
||||||
|
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16x16_altivec;
|
||||||
|
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels16x8_altivec;
|
||||||
|
c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec;
|
||||||
|
c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec;
|
||||||
|
c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec;
|
||||||
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec;
|
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec;
|
||||||
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec;
|
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec;
|
||||||
c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec;
|
c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec;
|
||||||
|
Loading…
Reference in New Issue
Block a user