1) remove TBL support in PPC performance. It's much more useful to use the
PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless code around 2) make the PPC perf stuff a configure option 3) make put_pixels16_altivec a bit faster by unrolling the loop by 4 patch by (Romain Dolbeau <dolbeau at irisa dot fr>) Originally committed as revision 2022 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
0273ceebbd
commit
e45a2872fa
13
configure
vendored
13
configure
vendored
@ -27,6 +27,7 @@ make="make"
|
||||
strip="strip"
|
||||
cpu=`uname -m`
|
||||
tune="generic"
|
||||
powerpc_perf="no"
|
||||
mmx="default"
|
||||
altivec="default"
|
||||
mmi="default"
|
||||
@ -275,6 +276,8 @@ for opt do
|
||||
;;
|
||||
--tune=*) tune=`echo $opt | cut -d '=' -f 2`
|
||||
;;
|
||||
--powerpc-perf-enable) powerpc_perf="yes"
|
||||
;;
|
||||
--disable-mmx) mmx="no"
|
||||
;;
|
||||
--disable-altivec) altivec="no"
|
||||
@ -398,7 +401,7 @@ if test $tune != "generic"; then
|
||||
if test $altivec = "no"; then
|
||||
echo "WARNING: tuning for PPC74xx but altivec disabled !";
|
||||
fi
|
||||
TUNECPU=ppc7450
|
||||
TUNECPU=ppc7400
|
||||
;;
|
||||
G5|970|ppc970|PowerPC970|power4*|Power4*)
|
||||
CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc64 -force_cpusubtype_ALL "
|
||||
@ -749,6 +752,7 @@ echo " --extra-ldflags=ELDFLAGS add ELDFLAGS to LDFLAGS [$LDFLAGS]"
|
||||
echo " --extra-libs=ELIBS add ELIBS [$ELIBS]"
|
||||
echo " --cpu=CPU force cpu to CPU [$cpu]"
|
||||
echo " --tune=PROCESSOR tune code for a particular CPU (may fails or misperforms on other CPUs)"
|
||||
echo " --powerpc-perf-enable enable performance report on PPC (requires enabling PMC)"
|
||||
echo " --disable-mmx disable mmx usage"
|
||||
echo " --disable-altivec disable AltiVec usage"
|
||||
echo " --disable-audio-oss disable OSS audio support [default=no]"
|
||||
@ -847,10 +851,9 @@ elif test "$cpu" = "sparc64" ; then
|
||||
elif test "$cpu" = "powerpc" ; then
|
||||
echo "TARGET_ARCH_POWERPC=yes" >> config.mak
|
||||
echo "#define ARCH_POWERPC 1" >> $TMPH
|
||||
echo "// Enable the next line to get PowerPC performance report" >> $TMPH
|
||||
echo "// #define POWERPC_TBL_PERFORMANCE_REPORT 1" >> $TMPH
|
||||
echo "// Enable the next line to use PMC registers instead of TBL" >> $TMPH
|
||||
echo "// #define POWERPC_PERF_USE_PMC 1" >> $TMPH
|
||||
if test "$powerpc_perf" = "yes"; then
|
||||
echo "#define POWERPC_PERFORMANCE_REPORT 1" >> $TMPH
|
||||
fi
|
||||
elif test "$cpu" = "mips" ; then
|
||||
echo "TARGET_ARCH_MIPS=yes" >> config.mak
|
||||
echo "#define ARCH_MIPS 1" >> $TMPH
|
||||
|
4
ffmpeg.c
4
ffmpeg.c
@ -2757,10 +2757,10 @@ int main(int argc, char **argv)
|
||||
av_free_static();
|
||||
|
||||
|
||||
#ifdef POWERPC_TBL_PERFORMANCE_REPORT
|
||||
#ifdef POWERPC_PERFORMANCE_REPORT
|
||||
extern void powerpc_display_perf_report(void);
|
||||
powerpc_display_perf_report();
|
||||
#endif /* POWERPC_TBL_PERFORMANCE_REPORT */
|
||||
#endif /* POWERPC_PERFORMANCE_REPORT */
|
||||
|
||||
#ifndef CONFIG_WIN32
|
||||
if (received_sigterm) {
|
||||
|
@ -655,11 +655,11 @@ void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
|
||||
/* next one assumes that ((line_size % 16) == 0) */
|
||||
void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||
{
|
||||
POWERPC_TBL_DECLARE(altivec_put_pixels16_num, 1);
|
||||
POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1);
|
||||
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
|
||||
int i;
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1);
|
||||
|
||||
for(i=0; i<h; i++) {
|
||||
*((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l);
|
||||
@ -670,15 +670,27 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
|
||||
block +=line_size;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1);
|
||||
|
||||
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
register vector unsigned char pixelsv1, pixelsv2;
|
||||
register vector unsigned char pixelsv1B, pixelsv2B;
|
||||
register vector unsigned char pixelsv1C, pixelsv2C;
|
||||
register vector unsigned char pixelsv1D, pixelsv2D;
|
||||
|
||||
register vector unsigned char perm = vec_lvsl(0, pixels);
|
||||
int i;
|
||||
register int line_size_2 = line_size << 1;
|
||||
register int line_size_3 = line_size + line_size_2;
|
||||
register int line_size_4 = line_size << 2;
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
|
||||
|
||||
POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1);
|
||||
// hand-unrolling the loop by 4 gains about 15%
|
||||
// mininum execution time goes from 74 to 60 cycles
|
||||
// it's faster than -funroll-loops, but using
|
||||
// -funroll-loops w/ this is bad - 74 cycles again.
|
||||
// all this is on a 7450, tuning for the 7450
|
||||
#if 0
|
||||
for(i=0; i<h; i++) {
|
||||
pixelsv1 = vec_ld(0, (unsigned char*)pixels);
|
||||
pixelsv2 = vec_ld(16, (unsigned char*)pixels);
|
||||
@ -687,8 +699,29 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
|
||||
pixels+=line_size;
|
||||
block +=line_size;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
|
||||
#else
|
||||
for(i=0; i<h; i+=4) {
|
||||
pixelsv1 = vec_ld(0, (unsigned char*)pixels);
|
||||
pixelsv2 = vec_ld(16, (unsigned char*)pixels);
|
||||
pixelsv1B = vec_ld(line_size, (unsigned char*)pixels);
|
||||
pixelsv2B = vec_ld(16 + line_size, (unsigned char*)pixels);
|
||||
pixelsv1C = vec_ld(line_size_2, (unsigned char*)pixels);
|
||||
pixelsv2C = vec_ld(16 + line_size_2, (unsigned char*)pixels);
|
||||
pixelsv1D = vec_ld(line_size_3, (unsigned char*)pixels);
|
||||
pixelsv2D = vec_ld(16 + line_size_3, (unsigned char*)pixels);
|
||||
vec_st(vec_perm(pixelsv1, pixelsv2, perm),
|
||||
0, (unsigned char*)block);
|
||||
vec_st(vec_perm(pixelsv1B, pixelsv2B, perm),
|
||||
line_size, (unsigned char*)block);
|
||||
vec_st(vec_perm(pixelsv1C, pixelsv2C, perm),
|
||||
line_size_2, (unsigned char*)block);
|
||||
vec_st(vec_perm(pixelsv1D, pixelsv2D, perm),
|
||||
line_size_3, (unsigned char*)block);
|
||||
pixels+=line_size_4;
|
||||
block +=line_size_4;
|
||||
}
|
||||
#endif
|
||||
POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1);
|
||||
|
||||
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
}
|
||||
@ -697,11 +730,11 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
|
||||
#define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
|
||||
void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||
{
|
||||
POWERPC_TBL_DECLARE(altivec_avg_pixels16_num, 1);
|
||||
POWERPC_PERF_DECLARE(altivec_avg_pixels16_num, 1);
|
||||
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
|
||||
int i;
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1);
|
||||
|
||||
for(i=0; i<h; i++) {
|
||||
op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l));
|
||||
@ -712,14 +745,14 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
|
||||
block +=line_size;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);
|
||||
|
||||
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
|
||||
register vector unsigned char perm = vec_lvsl(0, pixels);
|
||||
int i;
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1);
|
||||
|
||||
for(i=0; i<h; i++) {
|
||||
pixelsv1 = vec_ld(0, (unsigned char*)pixels);
|
||||
@ -732,7 +765,7 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
|
||||
block +=line_size;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);
|
||||
|
||||
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
}
|
||||
@ -740,10 +773,10 @@ POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
|
||||
/* next one assumes that ((line_size % 8) == 0) */
|
||||
void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
|
||||
{
|
||||
POWERPC_TBL_DECLARE(altivec_avg_pixels8_num, 1);
|
||||
POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1);
|
||||
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
|
||||
int i;
|
||||
POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
|
||||
for (i = 0; i < h; i++) {
|
||||
*((uint32_t *) (block)) =
|
||||
(((*((uint32_t *) (block))) |
|
||||
@ -761,13 +794,13 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
|
||||
pixels += line_size;
|
||||
block += line_size;
|
||||
}
|
||||
POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);
|
||||
|
||||
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
|
||||
int i;
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
|
||||
|
||||
for (i = 0; i < h; i++) {
|
||||
/*
|
||||
@ -798,7 +831,7 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
|
||||
block += line_size;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);
|
||||
|
||||
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
}
|
||||
@ -806,10 +839,10 @@ POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1);
|
||||
/* next one assumes that ((line_size % 8) == 0) */
|
||||
void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||
{
|
||||
POWERPC_TBL_DECLARE(altivec_put_pixels8_xy2_num, 1);
|
||||
POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1);
|
||||
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
|
||||
int j;
|
||||
POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1);
|
||||
for (j = 0; j < 2; j++) {
|
||||
int i;
|
||||
const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
|
||||
@ -842,7 +875,7 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1);
|
||||
block += 4 - line_size * h;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
|
||||
|
||||
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
register int i;
|
||||
@ -873,7 +906,7 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
|
||||
(vector unsigned short)pixelsv2);
|
||||
pixelssum1 = vec_add(pixelssum1, vctwo);
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1);
|
||||
for (i = 0; i < h ; i++) {
|
||||
int rightside = ((unsigned long)block & 0x0000000F);
|
||||
blockv = vec_ld(0, block);
|
||||
@ -914,17 +947,17 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1);
|
||||
pixels += line_size;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
|
||||
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
}
|
||||
|
||||
/* next one assumes that ((line_size % 8) == 0) */
|
||||
void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||
{
|
||||
POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
||||
POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
||||
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
|
||||
int j;
|
||||
POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
||||
for (j = 0; j < 2; j++) {
|
||||
int i;
|
||||
const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
|
||||
@ -957,7 +990,7 @@ POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
||||
block += 4 - line_size * h;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
||||
|
||||
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
register int i;
|
||||
@ -989,7 +1022,7 @@ POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
||||
(vector unsigned short)pixelsv2);
|
||||
pixelssum1 = vec_add(pixelssum1, vcone);
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
||||
for (i = 0; i < h ; i++) {
|
||||
int rightside = ((unsigned long)block & 0x0000000F);
|
||||
blockv = vec_ld(0, block);
|
||||
@ -1030,17 +1063,17 @@ POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
||||
pixels += line_size;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
||||
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
}
|
||||
|
||||
/* next one assumes that ((line_size % 16) == 0) */
|
||||
void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
|
||||
{
|
||||
POWERPC_TBL_DECLARE(altivec_put_pixels16_xy2_num, 1);
|
||||
POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1);
|
||||
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
|
||||
int j;
|
||||
POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
|
||||
for (j = 0; j < 4; j++) {
|
||||
int i;
|
||||
const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
|
||||
@ -1073,7 +1106,7 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
|
||||
block += 4 - line_size * h;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
|
||||
|
||||
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
register int i;
|
||||
@ -1087,7 +1120,7 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
|
||||
register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
|
||||
register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
|
||||
|
||||
temp1 = vec_ld(0, pixels);
|
||||
temp2 = vec_ld(16, pixels);
|
||||
@ -1151,17 +1184,17 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
|
||||
pixels += line_size;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
|
||||
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
}
|
||||
|
||||
/* next one assumes that ((line_size % 16) == 0) */
|
||||
void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
|
||||
{
|
||||
POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
|
||||
int j;
|
||||
POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
for (j = 0; j < 4; j++) {
|
||||
int i;
|
||||
const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
|
||||
@ -1194,7 +1227,7 @@ POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
block += 4 - line_size * h;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
|
||||
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
register int i;
|
||||
@ -1209,7 +1242,7 @@ POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
|
||||
register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
|
||||
temp1 = vec_ld(0, pixels);
|
||||
temp2 = vec_ld(16, pixels);
|
||||
@ -1273,7 +1306,7 @@ POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
pixels += line_size;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
||||
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
}
|
||||
|
||||
|
@ -41,8 +41,8 @@ int mm_support(void)
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef POWERPC_TBL_PERFORMANCE_REPORT
|
||||
unsigned long long perfdata[powerpc_perf_total][powerpc_data_total];
|
||||
#ifdef POWERPC_PERFORMANCE_REPORT
|
||||
unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
|
||||
/* list below must match enum in dsputil_ppc.h */
|
||||
static unsigned char* perfname[] = {
|
||||
"fft_calc_altivec",
|
||||
@ -60,53 +60,32 @@ static unsigned char* perfname[] = {
|
||||
"clear_blocks_dcbz32_ppc",
|
||||
"clear_blocks_dcbz128_ppc"
|
||||
};
|
||||
#ifdef POWERPC_PERF_USE_PMC
|
||||
unsigned long long perfdata_pmc2[powerpc_perf_total][powerpc_data_total];
|
||||
unsigned long long perfdata_pmc3[powerpc_perf_total][powerpc_data_total];
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#ifdef POWERPC_TBL_PERFORMANCE_REPORT
|
||||
#ifdef POWERPC_PERFORMANCE_REPORT
|
||||
void powerpc_display_perf_report(void)
|
||||
{
|
||||
int i;
|
||||
#ifndef POWERPC_PERF_USE_PMC
|
||||
fprintf(stderr, "PowerPC performance report\n Values are from the Time Base register, and represent 4 bus cycles.\n");
|
||||
#else /* POWERPC_PERF_USE_PMC */
|
||||
int i, j;
|
||||
fprintf(stderr, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
|
||||
#endif /* POWERPC_PERF_USE_PMC */
|
||||
for(i = 0 ; i < powerpc_perf_total ; i++)
|
||||
{
|
||||
if (perfdata[i][powerpc_data_num] != (unsigned long long)0)
|
||||
fprintf(stderr, " Function \"%s\" (pmc1):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
|
||||
perfname[i],
|
||||
perfdata[i][powerpc_data_min],
|
||||
perfdata[i][powerpc_data_max],
|
||||
(double)perfdata[i][powerpc_data_sum] /
|
||||
(double)perfdata[i][powerpc_data_num],
|
||||
perfdata[i][powerpc_data_num]);
|
||||
#ifdef POWERPC_PERF_USE_PMC
|
||||
if (perfdata_pmc2[i][powerpc_data_num] != (unsigned long long)0)
|
||||
fprintf(stderr, " Function \"%s\" (pmc2):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
|
||||
perfname[i],
|
||||
perfdata_pmc2[i][powerpc_data_min],
|
||||
perfdata_pmc2[i][powerpc_data_max],
|
||||
(double)perfdata_pmc2[i][powerpc_data_sum] /
|
||||
(double)perfdata_pmc2[i][powerpc_data_num],
|
||||
perfdata_pmc2[i][powerpc_data_num]);
|
||||
if (perfdata_pmc3[i][powerpc_data_num] != (unsigned long long)0)
|
||||
fprintf(stderr, " Function \"%s\" (pmc3):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
|
||||
perfname[i],
|
||||
perfdata_pmc3[i][powerpc_data_min],
|
||||
perfdata_pmc3[i][powerpc_data_max],
|
||||
(double)perfdata_pmc3[i][powerpc_data_sum] /
|
||||
(double)perfdata_pmc3[i][powerpc_data_num],
|
||||
perfdata_pmc3[i][powerpc_data_num]);
|
||||
#endif
|
||||
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
|
||||
{
|
||||
if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
|
||||
fprintf(stderr,
|
||||
" Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
|
||||
perfname[i],
|
||||
j+1,
|
||||
perfdata[j][i][powerpc_data_min],
|
||||
perfdata[j][i][powerpc_data_max],
|
||||
(double)perfdata[j][i][powerpc_data_sum] /
|
||||
(double)perfdata[j][i][powerpc_data_num],
|
||||
perfdata[j][i][powerpc_data_num]);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* POWERPC_TBL_PERFORMANCE_REPORT */
|
||||
#endif /* POWERPC_PERFORMANCE_REPORT */
|
||||
|
||||
/* ***** WARNING ***** WARNING ***** WARNING ***** */
|
||||
/*
|
||||
@ -135,10 +114,10 @@ void powerpc_display_perf_report(void)
|
||||
*/
|
||||
void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
|
||||
{
|
||||
POWERPC_TBL_DECLARE(powerpc_clear_blocks_dcbz32, 1);
|
||||
POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1);
|
||||
register int misal = ((unsigned long)blocks & 0x00000010);
|
||||
register int i = 0;
|
||||
POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
|
||||
POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
|
||||
#if 1
|
||||
if (misal) {
|
||||
((unsigned long*)blocks)[0] = 0L;
|
||||
@ -160,7 +139,7 @@ POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
|
||||
#else
|
||||
memset(blocks, 0, sizeof(DCTELEM)*6*64);
|
||||
#endif
|
||||
POWERPC_TBL_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
|
||||
POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
|
||||
}
|
||||
|
||||
/* same as above, when dcbzl clear a whole 128B cache line
|
||||
@ -168,10 +147,10 @@ POWERPC_TBL_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
|
||||
#ifndef NO_DCBZL
|
||||
void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
|
||||
{
|
||||
POWERPC_TBL_DECLARE(powerpc_clear_blocks_dcbz128, 1);
|
||||
POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1);
|
||||
register int misal = ((unsigned long)blocks & 0x0000007f);
|
||||
register int i = 0;
|
||||
POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
|
||||
POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
|
||||
#if 1
|
||||
if (misal) {
|
||||
// we could probably also optimize this case,
|
||||
@ -186,7 +165,7 @@ POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
|
||||
#else
|
||||
memset(blocks, 0, sizeof(DCTELEM)*6*64);
|
||||
#endif
|
||||
POWERPC_TBL_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
|
||||
POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
|
||||
}
|
||||
#else
|
||||
void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
|
||||
@ -277,6 +256,8 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
|
||||
c->add_bytes= add_bytes_altivec;
|
||||
#endif /* 0 */
|
||||
c->put_pixels_tab[0][0] = put_pixels16_altivec;
|
||||
/* the tow functions do the same thing, so use the same code */
|
||||
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
|
||||
c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
|
||||
// next one disabled as it's untested.
|
||||
#if 0
|
||||
@ -301,28 +282,21 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
|
||||
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
}
|
||||
|
||||
#ifdef POWERPC_TBL_PERFORMANCE_REPORT
|
||||
#ifdef POWERPC_PERFORMANCE_REPORT
|
||||
{
|
||||
int i;
|
||||
int i, j;
|
||||
for (i = 0 ; i < powerpc_perf_total ; i++)
|
||||
{
|
||||
perfdata[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
|
||||
perfdata[i][powerpc_data_max] = 0x0000000000000000;
|
||||
perfdata[i][powerpc_data_sum] = 0x0000000000000000;
|
||||
perfdata[i][powerpc_data_num] = 0x0000000000000000;
|
||||
#ifdef POWERPC_PERF_USE_PMC
|
||||
perfdata_pmc2[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
|
||||
perfdata_pmc2[i][powerpc_data_max] = 0x0000000000000000;
|
||||
perfdata_pmc2[i][powerpc_data_sum] = 0x0000000000000000;
|
||||
perfdata_pmc2[i][powerpc_data_num] = 0x0000000000000000;
|
||||
perfdata_pmc3[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
|
||||
perfdata_pmc3[i][powerpc_data_max] = 0x0000000000000000;
|
||||
perfdata_pmc3[i][powerpc_data_sum] = 0x0000000000000000;
|
||||
perfdata_pmc3[i][powerpc_data_num] = 0x0000000000000000;
|
||||
#endif /* POWERPC_PERF_USE_PMC */
|
||||
}
|
||||
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
|
||||
{
|
||||
perfdata[j][i][powerpc_data_min] = (unsigned long long)0xFFFFFFFFFFFFFFFF;
|
||||
perfdata[j][i][powerpc_data_max] = (unsigned long long)0x0000000000000000;
|
||||
perfdata[j][i][powerpc_data_sum] = (unsigned long long)0x0000000000000000;
|
||||
perfdata[j][i][powerpc_data_num] = (unsigned long long)0x0000000000000000;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* POWERPC_TBL_PERFORMANCE_REPORT */
|
||||
#endif /* POWERPC_PERFORMANCE_REPORT */
|
||||
} else
|
||||
#endif /* HAVE_ALTIVEC */
|
||||
{
|
||||
|
@ -30,8 +30,10 @@
|
||||
#define NO_DCBZL
|
||||
#endif /* CONFIG_DARWIN */
|
||||
|
||||
#ifdef POWERPC_TBL_PERFORMANCE_REPORT
|
||||
#ifdef POWERPC_PERFORMANCE_REPORT
|
||||
void powerpc_display_perf_report(void);
|
||||
/* the 604* have 2, the G3* have 4, the G4s have 6 */
|
||||
#define POWERPC_NUM_PMC_ENABLED 4
|
||||
/* if you add to the enum below, also add to the perfname array
|
||||
in dsputil_ppc.c */
|
||||
enum powerpc_perf_index {
|
||||
@ -58,98 +60,65 @@ enum powerpc_data_index {
|
||||
powerpc_data_num,
|
||||
powerpc_data_total
|
||||
};
|
||||
extern unsigned long long perfdata[powerpc_perf_total][powerpc_data_total];
|
||||
#ifdef POWERPC_PERF_USE_PMC
|
||||
extern unsigned long long perfdata_pmc2[powerpc_perf_total][powerpc_data_total];
|
||||
extern unsigned long long perfdata_pmc3[powerpc_perf_total][powerpc_data_total];
|
||||
#endif
|
||||
extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
|
||||
|
||||
#ifndef POWERPC_PERF_USE_PMC
|
||||
#define POWERPC_GET_CYCLES(a) asm volatile("mftb %0" : "=r" (a))
|
||||
#define POWERPC_TBL_DECLARE(a, cond) register unsigned long tbl_start, tbl_stop
|
||||
#define POWERPC_TBL_START_COUNT(a, cond) do { POWERPC_GET_CYCLES(tbl_start); } while (0)
|
||||
#define POWERPC_TBL_STOP_COUNT(a, cond) do { \
|
||||
POWERPC_GET_CYCLES(tbl_stop); \
|
||||
if (tbl_stop > tbl_start) \
|
||||
{ \
|
||||
unsigned long diff = tbl_stop - tbl_start; \
|
||||
if (cond) \
|
||||
{ \
|
||||
if (diff < perfdata[a][powerpc_data_min]) \
|
||||
perfdata[a][powerpc_data_min] = diff; \
|
||||
if (diff > perfdata[a][powerpc_data_max]) \
|
||||
perfdata[a][powerpc_data_max] = diff; \
|
||||
perfdata[a][powerpc_data_sum] += diff; \
|
||||
perfdata[a][powerpc_data_num] ++; \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else /* POWERPC_PERF_USE_PMC */
|
||||
#define POWERPC_GET_CYCLES(a) asm volatile("mfspr %0, 937" : "=r" (a))
|
||||
#define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 937" : "=r" (a))
|
||||
#define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a))
|
||||
#if (POWERPC_NUM_PMC_ENABLED > 2)
|
||||
#define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a))
|
||||
#define POWERPC_TBL_DECLARE(a, cond) register unsigned long cycles_start, cycles_stop, pmc2_start, pmc2_stop, pmc3_start, pmc3_stop
|
||||
#define POWERPC_TBL_START_COUNT(a, cond) do { \
|
||||
POWERPC_GET_PMC3(pmc3_start); \
|
||||
POWERPC_GET_PMC2(pmc2_start); \
|
||||
POWERPC_GET_CYCLES(cycles_start); } while (0)
|
||||
#define POWERPC_TBL_STOP_COUNT(a, cond) do { \
|
||||
POWERPC_GET_CYCLES(cycles_stop); \
|
||||
POWERPC_GET_PMC2(pmc2_stop); \
|
||||
POWERPC_GET_PMC3(pmc3_stop); \
|
||||
if (cycles_stop >= cycles_start) \
|
||||
{ \
|
||||
unsigned long diff = \
|
||||
cycles_stop - cycles_start; \
|
||||
if (cond) \
|
||||
{ \
|
||||
if (diff < perfdata[a][powerpc_data_min]) \
|
||||
perfdata[a][powerpc_data_min] = diff; \
|
||||
if (diff > perfdata[a][powerpc_data_max]) \
|
||||
perfdata[a][powerpc_data_max] = diff; \
|
||||
perfdata[a][powerpc_data_sum] += diff; \
|
||||
perfdata[a][powerpc_data_num] ++; \
|
||||
} \
|
||||
} \
|
||||
if (pmc2_stop >= pmc2_start) \
|
||||
{ \
|
||||
unsigned long diff = \
|
||||
pmc2_stop - pmc2_start; \
|
||||
if (cond) \
|
||||
{ \
|
||||
if (diff < perfdata_pmc2[a][powerpc_data_min]) \
|
||||
perfdata_pmc2[a][powerpc_data_min] = diff; \
|
||||
if (diff > perfdata_pmc2[a][powerpc_data_max]) \
|
||||
perfdata_pmc2[a][powerpc_data_max] = diff; \
|
||||
perfdata_pmc2[a][powerpc_data_sum] += diff; \
|
||||
perfdata_pmc2[a][powerpc_data_num] ++; \
|
||||
} \
|
||||
} \
|
||||
if (pmc3_stop >= pmc3_start) \
|
||||
{ \
|
||||
unsigned long diff = \
|
||||
pmc3_stop - pmc3_start; \
|
||||
if (cond) \
|
||||
{ \
|
||||
if (diff < perfdata_pmc3[a][powerpc_data_min]) \
|
||||
perfdata_pmc3[a][powerpc_data_min] = diff; \
|
||||
if (diff > perfdata_pmc3[a][powerpc_data_max]) \
|
||||
perfdata_pmc3[a][powerpc_data_max] = diff; \
|
||||
perfdata_pmc3[a][powerpc_data_sum] += diff; \
|
||||
perfdata_pmc3[a][powerpc_data_num] ++; \
|
||||
} \
|
||||
} \
|
||||
#define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 942" : "=r" (a))
|
||||
#else
|
||||
#define POWERPC_GET_PMC3(a) do {} while (0)
|
||||
#define POWERPC_GET_PMC4(a) do {} while (0)
|
||||
#endif
|
||||
#if (POWERPC_NUM_PMC_ENABLED > 4)
|
||||
#define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 929" : "=r" (a))
|
||||
#define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 930" : "=r" (a))
|
||||
#else
|
||||
#define POWERPC_GET_PMC5(a) do {} while (0)
|
||||
#define POWERPC_GET_PMC6(a) do {} while (0)
|
||||
#endif
|
||||
#define POWERPC_PERF_DECLARE(a, cond) unsigned long pmc_start[POWERPC_NUM_PMC_ENABLED], pmc_stop[POWERPC_NUM_PMC_ENABLED], pmc_loop_index;
|
||||
#define POWERPC_PERF_START_COUNT(a, cond) do { \
|
||||
POWERPC_GET_PMC6(pmc_start[5]); \
|
||||
POWERPC_GET_PMC5(pmc_start[4]); \
|
||||
POWERPC_GET_PMC4(pmc_start[3]); \
|
||||
POWERPC_GET_PMC3(pmc_start[2]); \
|
||||
POWERPC_GET_PMC2(pmc_start[1]); \
|
||||
POWERPC_GET_PMC1(pmc_start[0]); \
|
||||
} while (0)
|
||||
#define POWERPC_PERF_STOP_COUNT(a, cond) do { \
|
||||
POWERPC_GET_PMC1(pmc_stop[0]); \
|
||||
POWERPC_GET_PMC2(pmc_stop[1]); \
|
||||
POWERPC_GET_PMC3(pmc_stop[2]); \
|
||||
POWERPC_GET_PMC4(pmc_stop[3]); \
|
||||
POWERPC_GET_PMC5(pmc_stop[4]); \
|
||||
POWERPC_GET_PMC6(pmc_stop[5]); \
|
||||
if (cond) \
|
||||
{ \
|
||||
for(pmc_loop_index = 0; \
|
||||
pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \
|
||||
pmc_loop_index++) \
|
||||
{ \
|
||||
if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \
|
||||
{ \
|
||||
unsigned long diff = \
|
||||
pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \
|
||||
if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
|
||||
perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \
|
||||
if (diff > perfdata[pmc_loop_index][a][powerpc_data_max]) \
|
||||
perfdata[pmc_loop_index][a][powerpc_data_max] = diff; \
|
||||
perfdata[pmc_loop_index][a][powerpc_data_sum] += diff; \
|
||||
perfdata[pmc_loop_index][a][powerpc_data_num] ++; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#endif /* POWERPC_PERF_USE_PMC */
|
||||
|
||||
|
||||
#else /* POWERPC_TBL_PERFORMANCE_REPORT */
|
||||
#else /* POWERPC_PERFORMANCE_REPORT */
|
||||
// those are needed to avoid empty statements.
|
||||
#define POWERPC_TBL_DECLARE(a, cond) int altivec_placeholder __attribute__ ((unused))
|
||||
#define POWERPC_TBL_START_COUNT(a, cond) do {} while (0)
|
||||
#define POWERPC_TBL_STOP_COUNT(a, cond) do {} while (0)
|
||||
#endif /* POWERPC_TBL_PERFORMANCE_REPORT */
|
||||
#define POWERPC_PERF_DECLARE(a, cond) int altivec_placeholder __attribute__ ((unused))
|
||||
#define POWERPC_PERF_START_COUNT(a, cond) do {} while (0)
|
||||
#define POWERPC_PERF_STOP_COUNT(a, cond) do {} while (0)
|
||||
#endif /* POWERPC_PERFORMANCE_REPORT */
|
||||
|
||||
#endif /* _DSPUTIL_PPC_ */
|
||||
|
@ -62,7 +62,7 @@
|
||||
*/
|
||||
void fft_calc_altivec(FFTContext *s, FFTComplex *z)
|
||||
{
|
||||
POWERPC_TBL_DECLARE(altivec_fft_num, s->nbits >= 6);
|
||||
POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6);
|
||||
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
|
||||
int ln = s->nbits;
|
||||
int j, np, np2;
|
||||
@ -72,7 +72,7 @@ POWERPC_TBL_DECLARE(altivec_fft_num, s->nbits >= 6);
|
||||
int l;
|
||||
FFTSample tmp_re, tmp_im;
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6);
|
||||
POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
|
||||
|
||||
np = 1 << ln;
|
||||
|
||||
@ -137,7 +137,7 @@ POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6);
|
||||
nloops = nloops << 1;
|
||||
} while (nblocks != 0);
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
|
||||
|
||||
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
#ifdef CONFIG_DARWIN
|
||||
@ -153,7 +153,7 @@ POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
|
||||
FFTComplex *cptr, *cptr1;
|
||||
int k;
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6);
|
||||
POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
|
||||
|
||||
np = 1 << ln;
|
||||
|
||||
@ -241,7 +241,7 @@ POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6);
|
||||
nloops = nloops << 1;
|
||||
} while (nblocks != 0);
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
|
||||
|
||||
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
}
|
||||
|
@ -31,7 +31,7 @@
|
||||
#define GMC1_PERF_COND (h==8)
|
||||
void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
|
||||
{
|
||||
POWERPC_TBL_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
|
||||
const int A=(16-x16)*(16-y16);
|
||||
const int B=( x16)*(16-y16);
|
||||
@ -39,7 +39,7 @@ POWERPC_TBL_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
const int D=( x16)*( y16);
|
||||
int i;
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
|
||||
for(i=0; i<h; i++)
|
||||
{
|
||||
@ -55,7 +55,7 @@ POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
src+= stride;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
|
||||
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
const unsigned short __attribute__ ((aligned(16))) rounder_a[8] =
|
||||
@ -78,7 +78,7 @@ POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
unsigned long src_really_odd = (unsigned long)src & 0x0000000F;
|
||||
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
|
||||
tempA = vec_ld(0, (unsigned short*)ABCD);
|
||||
Av = vec_splat(tempA, 0);
|
||||
@ -166,7 +166,7 @@ POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
src += stride;
|
||||
}
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
|
||||
|
||||
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
}
|
||||
|
@ -165,16 +165,16 @@ static const vector_s16_t constants[5] = {
|
||||
|
||||
void idct_put_altivec(uint8_t* dest, int stride, vector_s16_t* block)
|
||||
{
|
||||
POWERPC_TBL_DECLARE(altivec_idct_put_num, 1);
|
||||
POWERPC_PERF_DECLARE(altivec_idct_put_num, 1);
|
||||
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
|
||||
POWERPC_TBL_START_COUNT(altivec_idct_put_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
|
||||
void simple_idct_put(uint8_t *dest, int line_size, int16_t *block);
|
||||
simple_idct_put(dest, stride, (int16_t*)block);
|
||||
POWERPC_TBL_STOP_COUNT(altivec_idct_put_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
|
||||
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
vector_u8_t tmp;
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_idct_put_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
|
||||
|
||||
IDCT
|
||||
|
||||
@ -192,18 +192,18 @@ POWERPC_TBL_START_COUNT(altivec_idct_put_num, 1);
|
||||
COPY (dest, vx6) dest += stride;
|
||||
COPY (dest, vx7)
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_idct_put_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
|
||||
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
}
|
||||
|
||||
void idct_add_altivec(uint8_t* dest, int stride, vector_s16_t* block)
|
||||
{
|
||||
POWERPC_TBL_DECLARE(altivec_idct_add_num, 1);
|
||||
POWERPC_PERF_DECLARE(altivec_idct_add_num, 1);
|
||||
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
|
||||
POWERPC_TBL_START_COUNT(altivec_idct_add_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
|
||||
void simple_idct_add(uint8_t *dest, int line_size, int16_t *block);
|
||||
simple_idct_add(dest, stride, (int16_t*)block);
|
||||
POWERPC_TBL_STOP_COUNT(altivec_idct_add_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);
|
||||
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
vector_u8_t tmp;
|
||||
vector_s16_t tmp2, tmp3;
|
||||
@ -211,7 +211,7 @@ POWERPC_TBL_STOP_COUNT(altivec_idct_add_num, 1);
|
||||
vector_u8_t perm1;
|
||||
vector_u8_t p0, p1, p;
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_idct_add_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
|
||||
|
||||
IDCT
|
||||
|
||||
@ -239,7 +239,7 @@ POWERPC_TBL_START_COUNT(altivec_idct_add_num, 1);
|
||||
ADD (dest, vx6, perm0) dest += stride;
|
||||
ADD (dest, vx7, perm1)
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_idct_add_num, 1);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);
|
||||
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
}
|
||||
|
||||
|
@ -522,13 +522,13 @@ int dct_quantize_altivec(MpegEncContext* s,
|
||||
void dct_unquantize_h263_altivec(MpegEncContext *s,
|
||||
DCTELEM *block, int n, int qscale)
|
||||
{
|
||||
POWERPC_TBL_DECLARE(altivec_dct_unquantize_h263_num, 1);
|
||||
POWERPC_PERF_DECLARE(altivec_dct_unquantize_h263_num, 1);
|
||||
int i, level, qmul, qadd;
|
||||
int nCoeffs;
|
||||
|
||||
assert(s->block_last_index[n]>=0);
|
||||
|
||||
POWERPC_TBL_START_COUNT(altivec_dct_unquantize_h263_num, 1);
|
||||
POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
|
||||
|
||||
qadd = (qscale - 1) | 1;
|
||||
qmul = qscale << 1;
|
||||
@ -641,5 +641,5 @@ POWERPC_TBL_START_COUNT(altivec_dct_unquantize_h263_num, 1);
|
||||
}
|
||||
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||
|
||||
POWERPC_TBL_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63);
|
||||
POWERPC_PERF_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user