COSMETICS: tabs --> spaces, some prettyprinting
Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
50827fcf44
commit
bb270c0896
@ -96,7 +96,7 @@ unknown_opt:
|
|||||||
if(po->u.func2_arg(opt+1, arg)<0)
|
if(po->u.func2_arg(opt+1, arg)<0)
|
||||||
goto unknown_opt;
|
goto unknown_opt;
|
||||||
} else {
|
} else {
|
||||||
po->u.func_arg(arg);
|
po->u.func_arg(arg);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
parse_arg_file(opt);
|
parse_arg_file(opt);
|
||||||
@ -122,8 +122,8 @@ void print_error(const char *filename, int err)
|
|||||||
break;
|
break;
|
||||||
case AVERROR_IO:
|
case AVERROR_IO:
|
||||||
fprintf(stderr, "%s: I/O error occured\n"
|
fprintf(stderr, "%s: I/O error occured\n"
|
||||||
"Usually that means that input file is truncated and/or corrupted.\n",
|
"Usually that means that input file is truncated and/or corrupted.\n",
|
||||||
filename);
|
filename);
|
||||||
break;
|
break;
|
||||||
case AVERROR_NOMEM:
|
case AVERROR_NOMEM:
|
||||||
fprintf(stderr, "%s: memory allocation error occured\n", filename);
|
fprintf(stderr, "%s: memory allocation error occured\n", filename);
|
||||||
|
174
configure
vendored
174
configure
vendored
@ -688,26 +688,26 @@ fi
|
|||||||
needmdynamicnopic="no"
|
needmdynamicnopic="no"
|
||||||
if test $targetos = Darwin; then
|
if test $targetos = Darwin; then
|
||||||
if test -n "`$cc -v 2>&1 | grep xlc`"; then
|
if test -n "`$cc -v 2>&1 | grep xlc`"; then
|
||||||
CFLAGS="$CFLAGS -qpdf2 -qlanglvl=extc99 -qmaxmem=-1 -qarch=auto -qtune=auto"
|
CFLAGS="$CFLAGS -qpdf2 -qlanglvl=extc99 -qmaxmem=-1 -qarch=auto -qtune=auto"
|
||||||
else
|
else
|
||||||
gcc_version="`$cc -v 2>&1 | grep version | cut -d ' ' -f3-`"
|
gcc_version="`$cc -v 2>&1 | grep version | cut -d ' ' -f3-`"
|
||||||
case "$gcc_version" in
|
case "$gcc_version" in
|
||||||
*2.95*)
|
*2.95*)
|
||||||
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer"
|
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer"
|
||||||
;;
|
;;
|
||||||
*[34].*)
|
*[34].*)
|
||||||
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer -force_cpusubtype_ALL -Wno-sign-compare"
|
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer -force_cpusubtype_ALL -Wno-sign-compare"
|
||||||
if test "$lshared" = no; then
|
if test "$lshared" = no; then
|
||||||
needmdynamicnopic="yes"
|
needmdynamicnopic="yes"
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer"
|
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer"
|
||||||
if test "$lshared" = no; then
|
if test "$lshared" = no; then
|
||||||
needmdynamicnopic="yes"
|
needmdynamicnopic="yes"
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -725,62 +725,62 @@ TUNECPU="generic"
|
|||||||
POWERPCMODE="32bits"
|
POWERPCMODE="32bits"
|
||||||
if test $tune != "generic"; then
|
if test $tune != "generic"; then
|
||||||
case $tune in
|
case $tune in
|
||||||
601|ppc601|PowerPC601)
|
601|ppc601|PowerPC601)
|
||||||
CFLAGS="$CFLAGS -mcpu=601"
|
CFLAGS="$CFLAGS -mcpu=601"
|
||||||
if test $altivec = "yes"; then
|
if test $altivec = "yes"; then
|
||||||
echo "WARNING: Tuning for PPC601 but AltiVec enabled!";
|
echo "WARNING: Tuning for PPC601 but AltiVec enabled!";
|
||||||
fi
|
fi
|
||||||
TUNECPU=ppc601
|
TUNECPU=ppc601
|
||||||
;;
|
;;
|
||||||
603*|ppc603*|PowerPC603*)
|
603*|ppc603*|PowerPC603*)
|
||||||
CFLAGS="$CFLAGS -mcpu=603"
|
CFLAGS="$CFLAGS -mcpu=603"
|
||||||
if test $altivec = "yes"; then
|
if test $altivec = "yes"; then
|
||||||
echo "WARNING: Tuning for PPC603 but AltiVec enabled!";
|
echo "WARNING: Tuning for PPC603 but AltiVec enabled!";
|
||||||
fi
|
fi
|
||||||
TUNECPU=ppc603
|
TUNECPU=ppc603
|
||||||
;;
|
;;
|
||||||
604*|ppc604*|PowerPC604*)
|
604*|ppc604*|PowerPC604*)
|
||||||
CFLAGS="$CFLAGS -mcpu=604"
|
CFLAGS="$CFLAGS -mcpu=604"
|
||||||
if test $altivec = "yes"; then
|
if test $altivec = "yes"; then
|
||||||
echo "WARNING: Tuning for PPC604 but AltiVec enabled!";
|
echo "WARNING: Tuning for PPC604 but AltiVec enabled!";
|
||||||
fi
|
fi
|
||||||
TUNECPU=ppc604
|
TUNECPU=ppc604
|
||||||
;;
|
;;
|
||||||
G3|g3|75*|ppc75*|PowerPC75*)
|
G3|g3|75*|ppc75*|PowerPC75*)
|
||||||
CFLAGS="$CFLAGS -mcpu=750 -mtune=750 -mpowerpc-gfxopt"
|
CFLAGS="$CFLAGS -mcpu=750 -mtune=750 -mpowerpc-gfxopt"
|
||||||
if test $altivec = "yes"; then
|
if test $altivec = "yes"; then
|
||||||
echo "WARNING: Tuning for PPC75x but AltiVec enabled!";
|
echo "WARNING: Tuning for PPC75x but AltiVec enabled!";
|
||||||
fi
|
fi
|
||||||
TUNECPU=ppc750
|
TUNECPU=ppc750
|
||||||
;;
|
;;
|
||||||
G4|g4|745*|ppc745*|PowerPC745*)
|
G4|g4|745*|ppc745*|PowerPC745*)
|
||||||
CFLAGS="$CFLAGS -mcpu=7450 -mtune=7450 -mpowerpc-gfxopt"
|
CFLAGS="$CFLAGS -mcpu=7450 -mtune=7450 -mpowerpc-gfxopt"
|
||||||
if test $altivec = "no"; then
|
if test $altivec = "no"; then
|
||||||
echo "WARNING: Tuning for PPC745x but AltiVec disabled!";
|
echo "WARNING: Tuning for PPC745x but AltiVec disabled!";
|
||||||
fi
|
fi
|
||||||
TUNECPU=ppc7450
|
TUNECPU=ppc7450
|
||||||
;;
|
;;
|
||||||
74*|ppc74*|PowerPC74*)
|
74*|ppc74*|PowerPC74*)
|
||||||
CFLAGS="$CFLAGS -mcpu=7400 -mtune=7400 -mpowerpc-gfxopt"
|
CFLAGS="$CFLAGS -mcpu=7400 -mtune=7400 -mpowerpc-gfxopt"
|
||||||
if test $altivec = "no"; then
|
if test $altivec = "no"; then
|
||||||
echo "WARNING: Tuning for PPC74xx but AltiVec disabled!";
|
echo "WARNING: Tuning for PPC74xx but AltiVec disabled!";
|
||||||
fi
|
fi
|
||||||
TUNECPU=ppc7400
|
TUNECPU=ppc7400
|
||||||
;;
|
;;
|
||||||
G5|g5|970|ppc970|PowerPC970|power4*|Power4*)
|
G5|g5|970|ppc970|PowerPC970|power4*|Power4*)
|
||||||
CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc-gfxopt -mpowerpc64"
|
CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc-gfxopt -mpowerpc64"
|
||||||
if test $altivec = "no"; then
|
if test $altivec = "no"; then
|
||||||
echo "WARNING: Tuning for PPC970 but AltiVec disabled!";
|
echo "WARNING: Tuning for PPC970 but AltiVec disabled!";
|
||||||
fi
|
fi
|
||||||
TUNECPU=ppc970
|
TUNECPU=ppc970
|
||||||
POWERPCMODE="64bits"
|
POWERPCMODE="64bits"
|
||||||
;;
|
;;
|
||||||
i[3456]86|pentium|pentiumpro|pentium-mmx|pentium[234]|prescott|k6|k6-[23]|athlon|athlon-tbird|athlon-4|athlon-[mx]p|winchip-c6|winchip2|c3|nocona|athlon64|k8|opteron|athlon-fx)
|
i[3456]86|pentium|pentiumpro|pentium-mmx|pentium[234]|prescott|k6|k6-[23]|athlon|athlon-tbird|athlon-4|athlon-[mx]p|winchip-c6|winchip2|c3|nocona|athlon64|k8|opteron|athlon-fx)
|
||||||
CFLAGS="$CFLAGS -march=$tune"
|
CFLAGS="$CFLAGS -march=$tune"
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo "WARNING: Unknown CPU \"$tune\", ignored."
|
echo "WARNING: Unknown CPU \"$tune\", ignored."
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -876,8 +876,8 @@ if test -z "$cross_prefix" ; then
|
|||||||
cat > $TMPC << EOF
|
cat > $TMPC << EOF
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
int main(int argc, char ** argv){
|
int main(int argc, char ** argv){
|
||||||
volatile uint32_t i=0x01234567;
|
volatile uint32_t i=0x01234567;
|
||||||
return (*((uint8_t*)(&i))) == 0x67;
|
return (*((uint8_t*)(&i))) == 0x67;
|
||||||
}
|
}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
@ -912,8 +912,8 @@ $cc -o $TMPE $TMPC 2>/dev/null || inttypes="no"
|
|||||||
cat > $TMPC << EOF
|
cat > $TMPC << EOF
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
int main(int argc, char ** argv){
|
int main(int argc, char ** argv){
|
||||||
volatile uint_fast64_t i=0x01234567;
|
volatile uint_fast64_t i=0x01234567;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
@ -1152,10 +1152,10 @@ fi
|
|||||||
|
|
||||||
case "`$cc -v 2>&1 | grep version`" in
|
case "`$cc -v 2>&1 | grep version`" in
|
||||||
*gcc*)
|
*gcc*)
|
||||||
CFLAGS="-Wall -Wno-switch $CFLAGS"
|
CFLAGS="-Wall -Wno-switch $CFLAGS"
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
if test "$sdl" = "no" ; then
|
if test "$sdl" = "no" ; then
|
||||||
@ -1163,7 +1163,7 @@ if test "$sdl" = "no" ; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if test "$debug" = "yes"; then
|
if test "$debug" = "yes"; then
|
||||||
CFLAGS="-g $CFLAGS"
|
CFLAGS="-g $CFLAGS"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if test "$optimize" = "small"; then
|
if test "$optimize" = "small"; then
|
||||||
@ -1173,10 +1173,10 @@ fi
|
|||||||
|
|
||||||
if test "$optimize" = "yes"; then
|
if test "$optimize" = "yes"; then
|
||||||
if test -n "`$cc -v 2>&1 | grep xlc`"; then
|
if test -n "`$cc -v 2>&1 | grep xlc`"; then
|
||||||
CFLAGS="$CFLAGS -O5"
|
CFLAGS="$CFLAGS -O5"
|
||||||
LDFLAGS="$LDFLAGS -O5"
|
LDFLAGS="$LDFLAGS -O5"
|
||||||
else
|
else
|
||||||
CFLAGS="-O3 $CFLAGS"
|
CFLAGS="-O3 $CFLAGS"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -1793,9 +1793,9 @@ done
|
|||||||
|
|
||||||
diff $TMPH config.h >/dev/null 2>&1
|
diff $TMPH config.h >/dev/null 2>&1
|
||||||
if test $? -ne 0 ; then
|
if test $? -ne 0 ; then
|
||||||
mv -f $TMPH config.h
|
mv -f $TMPH config.h
|
||||||
else
|
else
|
||||||
echo "config.h is unchanged"
|
echo "config.h is unchanged"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
rm -f $TMPO $TMPC $TMPE $TMPS $TMPH
|
rm -f $TMPO $TMPC $TMPE $TMPS $TMPH
|
||||||
|
84
cws2fws.c
84
cws2fws.c
@ -25,37 +25,37 @@ main(int argc, char *argv[])
|
|||||||
|
|
||||||
if (argc < 3)
|
if (argc < 3)
|
||||||
{
|
{
|
||||||
printf("Usage: %s <infile.swf> <outfile.swf>\n", argv[0]);
|
printf("Usage: %s <infile.swf> <outfile.swf>\n", argv[0]);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
fd_in = open(argv[1], O_RDONLY);
|
fd_in = open(argv[1], O_RDONLY);
|
||||||
if (fd_in < 0)
|
if (fd_in < 0)
|
||||||
{
|
{
|
||||||
perror("Error while opening: ");
|
perror("Error while opening: ");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
fd_out = open(argv[2], O_WRONLY|O_CREAT, 00644);
|
fd_out = open(argv[2], O_WRONLY|O_CREAT, 00644);
|
||||||
if (fd_out < 0)
|
if (fd_out < 0)
|
||||||
{
|
{
|
||||||
perror("Error while opening: ");
|
perror("Error while opening: ");
|
||||||
close(fd_in);
|
close(fd_in);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (read(fd_in, &buf_in, 8) != 8)
|
if (read(fd_in, &buf_in, 8) != 8)
|
||||||
{
|
{
|
||||||
printf("Header error\n");
|
printf("Header error\n");
|
||||||
close(fd_in);
|
close(fd_in);
|
||||||
close(fd_out);
|
close(fd_out);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (buf_in[0] != 'C' || buf_in[1] != 'W' || buf_in[2] != 'S')
|
if (buf_in[0] != 'C' || buf_in[1] != 'W' || buf_in[2] != 'S')
|
||||||
{
|
{
|
||||||
printf("Not a compressed flash file\n");
|
printf("Not a compressed flash file\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
fstat(fd_in, &statbuf);
|
fstat(fd_in, &statbuf);
|
||||||
@ -75,48 +75,48 @@ main(int argc, char *argv[])
|
|||||||
|
|
||||||
for (i = 0; i < comp_len-4;)
|
for (i = 0; i < comp_len-4;)
|
||||||
{
|
{
|
||||||
int ret, len = read(fd_in, &buf_in, 1024);
|
int ret, len = read(fd_in, &buf_in, 1024);
|
||||||
|
|
||||||
dbgprintf("read %d bytes\n", len);
|
dbgprintf("read %d bytes\n", len);
|
||||||
|
|
||||||
last_out = zstream.total_out;
|
last_out = zstream.total_out;
|
||||||
|
|
||||||
zstream.next_in = &buf_in[0];
|
zstream.next_in = &buf_in[0];
|
||||||
zstream.avail_in = len;
|
zstream.avail_in = len;
|
||||||
zstream.next_out = &buf_out[0];
|
zstream.next_out = &buf_out[0];
|
||||||
zstream.avail_out = 1024;
|
zstream.avail_out = 1024;
|
||||||
|
|
||||||
ret = inflate(&zstream, Z_SYNC_FLUSH);
|
ret = inflate(&zstream, Z_SYNC_FLUSH);
|
||||||
if (ret == Z_STREAM_END || ret == Z_BUF_ERROR)
|
if (ret == Z_STREAM_END || ret == Z_BUF_ERROR)
|
||||||
break;
|
break;
|
||||||
if (ret != Z_OK)
|
if (ret != Z_OK)
|
||||||
{
|
{
|
||||||
printf("Error while decompressing: %d\n", ret);
|
printf("Error while decompressing: %d\n", ret);
|
||||||
inflateEnd(&zstream);
|
inflateEnd(&zstream);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
dbgprintf("a_in: %d t_in: %d a_out: %d t_out: %d -- %d out\n",
|
dbgprintf("a_in: %d t_in: %d a_out: %d t_out: %d -- %d out\n",
|
||||||
zstream.avail_in, zstream.total_in, zstream.avail_out, zstream.total_out,
|
zstream.avail_in, zstream.total_in, zstream.avail_out, zstream.total_out,
|
||||||
zstream.total_out-last_out);
|
zstream.total_out-last_out);
|
||||||
|
|
||||||
write(fd_out, &buf_out, zstream.total_out-last_out);
|
write(fd_out, &buf_out, zstream.total_out-last_out);
|
||||||
|
|
||||||
i += len;
|
i += len;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (zstream.total_out != uncomp_len-8)
|
if (zstream.total_out != uncomp_len-8)
|
||||||
{
|
{
|
||||||
printf("Size mismatch (%d != %d), updating header...\n",
|
printf("Size mismatch (%d != %d), updating header...\n",
|
||||||
zstream.total_out, uncomp_len-8);
|
zstream.total_out, uncomp_len-8);
|
||||||
|
|
||||||
buf_in[0] = (zstream.total_out+8) & 0xff;
|
buf_in[0] = (zstream.total_out+8) & 0xff;
|
||||||
buf_in[1] = (zstream.total_out+8 >> 8) & 0xff;
|
buf_in[1] = (zstream.total_out+8 >> 8) & 0xff;
|
||||||
buf_in[2] = (zstream.total_out+8 >> 16) & 0xff;
|
buf_in[2] = (zstream.total_out+8 >> 16) & 0xff;
|
||||||
buf_in[3] = (zstream.total_out+8 >> 24) & 0xff;
|
buf_in[3] = (zstream.total_out+8 >> 24) & 0xff;
|
||||||
|
|
||||||
lseek(fd_out, 4, SEEK_SET);
|
lseek(fd_out, 4, SEEK_SET);
|
||||||
write(fd_out, &buf_in, 4);
|
write(fd_out, &buf_in, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
inflateEnd(&zstream);
|
inflateEnd(&zstream);
|
||||||
|
286
doc/texi2pod.pl
286
doc/texi2pod.pl
@ -39,24 +39,24 @@ $ibase = "";
|
|||||||
|
|
||||||
while ($_ = shift) {
|
while ($_ = shift) {
|
||||||
if (/^-D(.*)$/) {
|
if (/^-D(.*)$/) {
|
||||||
if ($1 ne "") {
|
if ($1 ne "") {
|
||||||
$flag = $1;
|
$flag = $1;
|
||||||
} else {
|
} else {
|
||||||
$flag = shift;
|
$flag = shift;
|
||||||
}
|
}
|
||||||
$value = "";
|
$value = "";
|
||||||
($flag, $value) = ($flag =~ /^([^=]+)(?:=(.+))?/);
|
($flag, $value) = ($flag =~ /^([^=]+)(?:=(.+))?/);
|
||||||
die "no flag specified for -D\n"
|
die "no flag specified for -D\n"
|
||||||
unless $flag ne "";
|
unless $flag ne "";
|
||||||
die "flags may only contain letters, digits, hyphens, dashes and underscores\n"
|
die "flags may only contain letters, digits, hyphens, dashes and underscores\n"
|
||||||
unless $flag =~ /^[a-zA-Z0-9_-]+$/;
|
unless $flag =~ /^[a-zA-Z0-9_-]+$/;
|
||||||
$defs{$flag} = $value;
|
$defs{$flag} = $value;
|
||||||
} elsif (/^-/) {
|
} elsif (/^-/) {
|
||||||
usage();
|
usage();
|
||||||
} else {
|
} else {
|
||||||
$in = $_, next unless defined $in;
|
$in = $_, next unless defined $in;
|
||||||
$out = $_, next unless defined $out;
|
$out = $_, next unless defined $out;
|
||||||
usage();
|
usage();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -76,13 +76,13 @@ while(defined $inf) {
|
|||||||
while(<$inf>) {
|
while(<$inf>) {
|
||||||
# Certain commands are discarded without further processing.
|
# Certain commands are discarded without further processing.
|
||||||
/^\@(?:
|
/^\@(?:
|
||||||
[a-z]+index # @*index: useful only in complete manual
|
[a-z]+index # @*index: useful only in complete manual
|
||||||
|need # @need: useful only in printed manual
|
|need # @need: useful only in printed manual
|
||||||
|(?:end\s+)?group # @group .. @end group: ditto
|
|(?:end\s+)?group # @group .. @end group: ditto
|
||||||
|page # @page: ditto
|
|page # @page: ditto
|
||||||
|node # @node: useful only in .info file
|
|node # @node: useful only in .info file
|
||||||
|(?:end\s+)?ifnottex # @ifnottex .. @end ifnottex: use contents
|
|(?:end\s+)?ifnottex # @ifnottex .. @end ifnottex: use contents
|
||||||
)\b/x and next;
|
)\b/x and next;
|
||||||
|
|
||||||
chomp;
|
chomp;
|
||||||
|
|
||||||
@ -92,38 +92,38 @@ while(<$inf>) {
|
|||||||
|
|
||||||
# Identify a man title but keep only the one we are interested in.
|
# Identify a man title but keep only the one we are interested in.
|
||||||
/^\@c\s+man\s+title\s+([A-Za-z0-9-]+)\s+(.+)/ and do {
|
/^\@c\s+man\s+title\s+([A-Za-z0-9-]+)\s+(.+)/ and do {
|
||||||
if (exists $defs{$1}) {
|
if (exists $defs{$1}) {
|
||||||
$fn = $1;
|
$fn = $1;
|
||||||
$tl = postprocess($2);
|
$tl = postprocess($2);
|
||||||
}
|
}
|
||||||
next;
|
next;
|
||||||
};
|
};
|
||||||
|
|
||||||
# Look for blocks surrounded by @c man begin SECTION ... @c man end.
|
# Look for blocks surrounded by @c man begin SECTION ... @c man end.
|
||||||
# This really oughta be @ifman ... @end ifman and the like, but such
|
# This really oughta be @ifman ... @end ifman and the like, but such
|
||||||
# would require rev'ing all other Texinfo translators.
|
# would require rev'ing all other Texinfo translators.
|
||||||
/^\@c\s+man\s+begin\s+([A-Z]+)\s+([A-Za-z0-9-]+)/ and do {
|
/^\@c\s+man\s+begin\s+([A-Z]+)\s+([A-Za-z0-9-]+)/ and do {
|
||||||
$output = 1 if exists $defs{$2};
|
$output = 1 if exists $defs{$2};
|
||||||
$sect = $1;
|
$sect = $1;
|
||||||
next;
|
next;
|
||||||
};
|
};
|
||||||
/^\@c\s+man\s+begin\s+([A-Z]+)/ and $sect = $1, $output = 1, next;
|
/^\@c\s+man\s+begin\s+([A-Z]+)/ and $sect = $1, $output = 1, next;
|
||||||
/^\@c\s+man\s+end/ and do {
|
/^\@c\s+man\s+end/ and do {
|
||||||
$sects{$sect} = "" unless exists $sects{$sect};
|
$sects{$sect} = "" unless exists $sects{$sect};
|
||||||
$sects{$sect} .= postprocess($section);
|
$sects{$sect} .= postprocess($section);
|
||||||
$section = "";
|
$section = "";
|
||||||
$output = 0;
|
$output = 0;
|
||||||
next;
|
next;
|
||||||
};
|
};
|
||||||
|
|
||||||
# handle variables
|
# handle variables
|
||||||
/^\@set\s+([a-zA-Z0-9_-]+)\s*(.*)$/ and do {
|
/^\@set\s+([a-zA-Z0-9_-]+)\s*(.*)$/ and do {
|
||||||
$defs{$1} = $2;
|
$defs{$1} = $2;
|
||||||
next;
|
next;
|
||||||
};
|
};
|
||||||
/^\@clear\s+([a-zA-Z0-9_-]+)/ and do {
|
/^\@clear\s+([a-zA-Z0-9_-]+)/ and do {
|
||||||
delete $defs{$1};
|
delete $defs{$1};
|
||||||
next;
|
next;
|
||||||
};
|
};
|
||||||
|
|
||||||
next unless $output;
|
next unless $output;
|
||||||
@ -135,55 +135,55 @@ while(<$inf>) {
|
|||||||
# End-block handler goes up here because it needs to operate even
|
# End-block handler goes up here because it needs to operate even
|
||||||
# if we are skipping.
|
# if we are skipping.
|
||||||
/^\@end\s+([a-z]+)/ and do {
|
/^\@end\s+([a-z]+)/ and do {
|
||||||
# Ignore @end foo, where foo is not an operation which may
|
# Ignore @end foo, where foo is not an operation which may
|
||||||
# cause us to skip, if we are presently skipping.
|
# cause us to skip, if we are presently skipping.
|
||||||
my $ended = $1;
|
my $ended = $1;
|
||||||
next if $skipping && $ended !~ /^(?:ifset|ifclear|ignore|menu|iftex)$/;
|
next if $skipping && $ended !~ /^(?:ifset|ifclear|ignore|menu|iftex)$/;
|
||||||
|
|
||||||
die "\@end $ended without \@$ended at line $.\n" unless defined $endw;
|
die "\@end $ended without \@$ended at line $.\n" unless defined $endw;
|
||||||
die "\@$endw ended by \@end $ended at line $.\n" unless $ended eq $endw;
|
die "\@$endw ended by \@end $ended at line $.\n" unless $ended eq $endw;
|
||||||
|
|
||||||
$endw = pop @endwstack;
|
$endw = pop @endwstack;
|
||||||
|
|
||||||
if ($ended =~ /^(?:ifset|ifclear|ignore|menu|iftex)$/) {
|
if ($ended =~ /^(?:ifset|ifclear|ignore|menu|iftex)$/) {
|
||||||
$skipping = pop @skstack;
|
$skipping = pop @skstack;
|
||||||
next;
|
next;
|
||||||
} elsif ($ended =~ /^(?:example|smallexample|display)$/) {
|
} elsif ($ended =~ /^(?:example|smallexample|display)$/) {
|
||||||
$shift = "";
|
$shift = "";
|
||||||
$_ = ""; # need a paragraph break
|
$_ = ""; # need a paragraph break
|
||||||
} elsif ($ended =~ /^(?:itemize|enumerate|[fv]?table)$/) {
|
} elsif ($ended =~ /^(?:itemize|enumerate|[fv]?table)$/) {
|
||||||
$_ = "\n=back\n";
|
$_ = "\n=back\n";
|
||||||
$ic = pop @icstack;
|
$ic = pop @icstack;
|
||||||
} else {
|
} else {
|
||||||
die "unknown command \@end $ended at line $.\n";
|
die "unknown command \@end $ended at line $.\n";
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
# We must handle commands which can cause skipping even while we
|
# We must handle commands which can cause skipping even while we
|
||||||
# are skipping, otherwise we will not process nested conditionals
|
# are skipping, otherwise we will not process nested conditionals
|
||||||
# correctly.
|
# correctly.
|
||||||
/^\@ifset\s+([a-zA-Z0-9_-]+)/ and do {
|
/^\@ifset\s+([a-zA-Z0-9_-]+)/ and do {
|
||||||
push @endwstack, $endw;
|
push @endwstack, $endw;
|
||||||
push @skstack, $skipping;
|
push @skstack, $skipping;
|
||||||
$endw = "ifset";
|
$endw = "ifset";
|
||||||
$skipping = 1 unless exists $defs{$1};
|
$skipping = 1 unless exists $defs{$1};
|
||||||
next;
|
next;
|
||||||
};
|
};
|
||||||
|
|
||||||
/^\@ifclear\s+([a-zA-Z0-9_-]+)/ and do {
|
/^\@ifclear\s+([a-zA-Z0-9_-]+)/ and do {
|
||||||
push @endwstack, $endw;
|
push @endwstack, $endw;
|
||||||
push @skstack, $skipping;
|
push @skstack, $skipping;
|
||||||
$endw = "ifclear";
|
$endw = "ifclear";
|
||||||
$skipping = 1 if exists $defs{$1};
|
$skipping = 1 if exists $defs{$1};
|
||||||
next;
|
next;
|
||||||
};
|
};
|
||||||
|
|
||||||
/^\@(ignore|menu|iftex)\b/ and do {
|
/^\@(ignore|menu|iftex)\b/ and do {
|
||||||
push @endwstack, $endw;
|
push @endwstack, $endw;
|
||||||
push @skstack, $skipping;
|
push @skstack, $skipping;
|
||||||
$endw = $1;
|
$endw = $1;
|
||||||
$skipping = 1;
|
$skipping = 1;
|
||||||
next;
|
next;
|
||||||
};
|
};
|
||||||
|
|
||||||
next if $skipping;
|
next if $skipping;
|
||||||
@ -210,85 +210,85 @@ while(<$inf>) {
|
|||||||
|
|
||||||
# Inside a verbatim block, handle @var specially.
|
# Inside a verbatim block, handle @var specially.
|
||||||
if ($shift ne "") {
|
if ($shift ne "") {
|
||||||
s/\@var\{([^\}]*)\}/<$1>/g;
|
s/\@var\{([^\}]*)\}/<$1>/g;
|
||||||
}
|
}
|
||||||
|
|
||||||
# POD doesn't interpret E<> inside a verbatim block.
|
# POD doesn't interpret E<> inside a verbatim block.
|
||||||
if ($shift eq "") {
|
if ($shift eq "") {
|
||||||
s/</</g;
|
s/</</g;
|
||||||
s/>/>/g;
|
s/>/>/g;
|
||||||
} else {
|
} else {
|
||||||
s/</</g;
|
s/</</g;
|
||||||
s/>/>/g;
|
s/>/>/g;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Single line command handlers.
|
# Single line command handlers.
|
||||||
|
|
||||||
/^\@include\s+(.+)$/ and do {
|
/^\@include\s+(.+)$/ and do {
|
||||||
push @instack, $inf;
|
push @instack, $inf;
|
||||||
$inf = gensym();
|
$inf = gensym();
|
||||||
|
|
||||||
# Try cwd and $ibase.
|
# Try cwd and $ibase.
|
||||||
open($inf, "<" . $1)
|
open($inf, "<" . $1)
|
||||||
or open($inf, "<" . $ibase . "/" . $1)
|
or open($inf, "<" . $ibase . "/" . $1)
|
||||||
or die "cannot open $1 or $ibase/$1: $!\n";
|
or die "cannot open $1 or $ibase/$1: $!\n";
|
||||||
next;
|
next;
|
||||||
};
|
};
|
||||||
|
|
||||||
/^\@(?:section|unnumbered|unnumberedsec|center)\s+(.+)$/
|
/^\@(?:section|unnumbered|unnumberedsec|center)\s+(.+)$/
|
||||||
and $_ = "\n=head2 $1\n";
|
and $_ = "\n=head2 $1\n";
|
||||||
/^\@subsection\s+(.+)$/
|
/^\@subsection\s+(.+)$/
|
||||||
and $_ = "\n=head3 $1\n";
|
and $_ = "\n=head3 $1\n";
|
||||||
|
|
||||||
# Block command handlers:
|
# Block command handlers:
|
||||||
/^\@itemize\s+(\@[a-z]+|\*|-)/ and do {
|
/^\@itemize\s+(\@[a-z]+|\*|-)/ and do {
|
||||||
push @endwstack, $endw;
|
push @endwstack, $endw;
|
||||||
push @icstack, $ic;
|
push @icstack, $ic;
|
||||||
$ic = $1;
|
$ic = $1;
|
||||||
$_ = "\n=over 4\n";
|
$_ = "\n=over 4\n";
|
||||||
$endw = "itemize";
|
$endw = "itemize";
|
||||||
};
|
};
|
||||||
|
|
||||||
/^\@enumerate(?:\s+([a-zA-Z0-9]+))?/ and do {
|
/^\@enumerate(?:\s+([a-zA-Z0-9]+))?/ and do {
|
||||||
push @endwstack, $endw;
|
push @endwstack, $endw;
|
||||||
push @icstack, $ic;
|
push @icstack, $ic;
|
||||||
if (defined $1) {
|
if (defined $1) {
|
||||||
$ic = $1 . ".";
|
$ic = $1 . ".";
|
||||||
} else {
|
} else {
|
||||||
$ic = "1.";
|
$ic = "1.";
|
||||||
}
|
}
|
||||||
$_ = "\n=over 4\n";
|
$_ = "\n=over 4\n";
|
||||||
$endw = "enumerate";
|
$endw = "enumerate";
|
||||||
};
|
};
|
||||||
|
|
||||||
/^\@([fv]?table)\s+(\@[a-z]+)/ and do {
|
/^\@([fv]?table)\s+(\@[a-z]+)/ and do {
|
||||||
push @endwstack, $endw;
|
push @endwstack, $endw;
|
||||||
push @icstack, $ic;
|
push @icstack, $ic;
|
||||||
$endw = $1;
|
$endw = $1;
|
||||||
$ic = $2;
|
$ic = $2;
|
||||||
$ic =~ s/\@(?:samp|strong|key|gcctabopt|option|env)/B/;
|
$ic =~ s/\@(?:samp|strong|key|gcctabopt|option|env)/B/;
|
||||||
$ic =~ s/\@(?:code|kbd)/C/;
|
$ic =~ s/\@(?:code|kbd)/C/;
|
||||||
$ic =~ s/\@(?:dfn|var|emph|cite|i)/I/;
|
$ic =~ s/\@(?:dfn|var|emph|cite|i)/I/;
|
||||||
$ic =~ s/\@(?:file)/F/;
|
$ic =~ s/\@(?:file)/F/;
|
||||||
$_ = "\n=over 4\n";
|
$_ = "\n=over 4\n";
|
||||||
};
|
};
|
||||||
|
|
||||||
/^\@((?:small)?example|display)/ and do {
|
/^\@((?:small)?example|display)/ and do {
|
||||||
push @endwstack, $endw;
|
push @endwstack, $endw;
|
||||||
$endw = $1;
|
$endw = $1;
|
||||||
$shift = "\t";
|
$shift = "\t";
|
||||||
$_ = ""; # need a paragraph break
|
$_ = ""; # need a paragraph break
|
||||||
};
|
};
|
||||||
|
|
||||||
/^\@itemx?\s*(.+)?$/ and do {
|
/^\@itemx?\s*(.+)?$/ and do {
|
||||||
if (defined $1) {
|
if (defined $1) {
|
||||||
# Entity escapes prevent munging by the <> processing below.
|
# Entity escapes prevent munging by the <> processing below.
|
||||||
$_ = "\n=item $ic\<$1\>\n";
|
$_ = "\n=item $ic\<$1\>\n";
|
||||||
} else {
|
} else {
|
||||||
$_ = "\n=item $ic\n";
|
$_ = "\n=item $ic\n";
|
||||||
$ic =~ y/A-Ya-y/B-Zb-z/;
|
$ic =~ y/A-Ya-y/B-Zb-z/;
|
||||||
$ic =~ s/(\d+)/$1 + 1/eg;
|
$ic =~ s/(\d+)/$1 + 1/eg;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
$section .= $shift.$_."\n";
|
$section .= $shift.$_."\n";
|
||||||
@ -304,13 +304,13 @@ $sects{NAME} = "$fn \- $tl\n";
|
|||||||
$sects{FOOTNOTES} .= "=back\n" if exists $sects{FOOTNOTES};
|
$sects{FOOTNOTES} .= "=back\n" if exists $sects{FOOTNOTES};
|
||||||
|
|
||||||
for $sect (qw(NAME SYNOPSIS DESCRIPTION OPTIONS EXAMPLES ENVIRONMENT FILES
|
for $sect (qw(NAME SYNOPSIS DESCRIPTION OPTIONS EXAMPLES ENVIRONMENT FILES
|
||||||
BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
|
BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
|
||||||
if(exists $sects{$sect}) {
|
if(exists $sects{$sect}) {
|
||||||
$head = $sect;
|
$head = $sect;
|
||||||
$head =~ s/SEEALSO/SEE ALSO/;
|
$head =~ s/SEEALSO/SEE ALSO/;
|
||||||
print "=head1 $head\n\n";
|
print "=head1 $head\n\n";
|
||||||
print scalar unmunge ($sects{$sect});
|
print scalar unmunge ($sects{$sect});
|
||||||
print "\n";
|
print "\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -325,13 +325,13 @@ sub postprocess
|
|||||||
|
|
||||||
# @value{foo} is replaced by whatever 'foo' is defined as.
|
# @value{foo} is replaced by whatever 'foo' is defined as.
|
||||||
while (m/(\@value\{([a-zA-Z0-9_-]+)\})/g) {
|
while (m/(\@value\{([a-zA-Z0-9_-]+)\})/g) {
|
||||||
if (! exists $defs{$2}) {
|
if (! exists $defs{$2}) {
|
||||||
print STDERR "Option $2 not defined\n";
|
print STDERR "Option $2 not defined\n";
|
||||||
s/\Q$1\E//;
|
s/\Q$1\E//;
|
||||||
} else {
|
} else {
|
||||||
$value = $defs{$2};
|
$value = $defs{$2};
|
||||||
s/\Q$1\E/$value/;
|
s/\Q$1\E/$value/;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Formatting commands.
|
# Formatting commands.
|
||||||
@ -381,9 +381,9 @@ sub postprocess
|
|||||||
# processing because otherwise the regexp will choke on formatting
|
# processing because otherwise the regexp will choke on formatting
|
||||||
# inside @footnote.
|
# inside @footnote.
|
||||||
while (/\@footnote/g) {
|
while (/\@footnote/g) {
|
||||||
s/\@footnote\{([^\}]+)\}/[$fnno]/;
|
s/\@footnote\{([^\}]+)\}/[$fnno]/;
|
||||||
add_footnote($1, $fnno);
|
add_footnote($1, $fnno);
|
||||||
$fnno++;
|
$fnno++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return $_;
|
return $_;
|
||||||
@ -406,7 +406,7 @@ sub unmunge
|
|||||||
sub add_footnote
|
sub add_footnote
|
||||||
{
|
{
|
||||||
unless (exists $sects{FOOTNOTES}) {
|
unless (exists $sects{FOOTNOTES}) {
|
||||||
$sects{FOOTNOTES} = "\n=over 4\n\n";
|
$sects{FOOTNOTES} = "\n=over 4\n\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
$sects{FOOTNOTES} .= "=item $fnno.\n\n"; $fnno++;
|
$sects{FOOTNOTES} .= "=item $fnno.\n\n"; $fnno++;
|
||||||
@ -419,9 +419,9 @@ sub add_footnote
|
|||||||
my $genseq = 0;
|
my $genseq = 0;
|
||||||
sub gensym
|
sub gensym
|
||||||
{
|
{
|
||||||
my $name = "GEN" . $genseq++;
|
my $name = "GEN" . $genseq++;
|
||||||
my $ref = \*{$name};
|
my $ref = \*{$name};
|
||||||
delete $::{$name};
|
delete $::{$name};
|
||||||
return $ref;
|
return $ref;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
72
ffmpeg.c
72
ffmpeg.c
@ -579,7 +579,7 @@ static void do_audio_out(AVFormatContext *s,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
ret = avcodec_encode_audio(enc, audio_out, size_out,
|
ret = avcodec_encode_audio(enc, audio_out, size_out,
|
||||||
(short *)buftmp);
|
(short *)buftmp);
|
||||||
audio_size += ret;
|
audio_size += ret;
|
||||||
pkt.stream_index= ost->index;
|
pkt.stream_index= ost->index;
|
||||||
pkt.data= audio_out;
|
pkt.data= audio_out;
|
||||||
@ -821,10 +821,10 @@ static void do_video_out(AVFormatContext *s,
|
|||||||
padcolor);
|
padcolor);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (enc->pix_fmt != PIX_FMT_YUV420P) {
|
if (enc->pix_fmt != PIX_FMT_YUV420P) {
|
||||||
int size;
|
int size;
|
||||||
|
|
||||||
av_free(buf);
|
av_free(buf);
|
||||||
/* create temporary picture */
|
/* create temporary picture */
|
||||||
size = avpicture_get_size(enc->pix_fmt, enc->width, enc->height);
|
size = avpicture_get_size(enc->pix_fmt, enc->width, enc->height);
|
||||||
buf = av_malloc(size);
|
buf = av_malloc(size);
|
||||||
@ -842,7 +842,7 @@ static void do_video_out(AVFormatContext *s,
|
|||||||
|
|
||||||
goto the_end;
|
goto the_end;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (ost->video_crop) {
|
} else if (ost->video_crop) {
|
||||||
picture_crop_temp.data[0] = formatted_picture->data[0] +
|
picture_crop_temp.data[0] = formatted_picture->data[0] +
|
||||||
(ost->topBand * formatted_picture->linesize[0]) + ost->leftBand;
|
(ost->topBand * formatted_picture->linesize[0]) + ost->leftBand;
|
||||||
@ -921,7 +921,7 @@ static void do_video_out(AVFormatContext *s,
|
|||||||
avoid any copies. We support temorarily the older
|
avoid any copies. We support temorarily the older
|
||||||
method. */
|
method. */
|
||||||
AVFrame* old_frame = enc->coded_frame;
|
AVFrame* old_frame = enc->coded_frame;
|
||||||
enc->coded_frame = dec->coded_frame; //FIXME/XXX remove this hack
|
enc->coded_frame = dec->coded_frame; //FIXME/XXX remove this hack
|
||||||
pkt.data= (uint8_t *)final_picture;
|
pkt.data= (uint8_t *)final_picture;
|
||||||
pkt.size= sizeof(AVPicture);
|
pkt.size= sizeof(AVPicture);
|
||||||
if(dec->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE)
|
if(dec->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE)
|
||||||
@ -930,7 +930,7 @@ static void do_video_out(AVFormatContext *s,
|
|||||||
pkt.flags |= PKT_FLAG_KEY;
|
pkt.flags |= PKT_FLAG_KEY;
|
||||||
|
|
||||||
av_interleaved_write_frame(s, &pkt);
|
av_interleaved_write_frame(s, &pkt);
|
||||||
enc->coded_frame = old_frame;
|
enc->coded_frame = old_frame;
|
||||||
} else {
|
} else {
|
||||||
AVFrame big_picture;
|
AVFrame big_picture;
|
||||||
|
|
||||||
@ -1044,8 +1044,8 @@ static void do_video_stats(AVFormatContext *os, AVOutputStream *ost,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void print_report(AVFormatContext **output_files,
|
static void print_report(AVFormatContext **output_files,
|
||||||
AVOutputStream **ost_table, int nb_ostreams,
|
AVOutputStream **ost_table, int nb_ostreams,
|
||||||
int is_last_report)
|
int is_last_report)
|
||||||
{
|
{
|
||||||
char buf[1024];
|
char buf[1024];
|
||||||
AVOutputStream *ost;
|
AVOutputStream *ost;
|
||||||
@ -1138,9 +1138,9 @@ static void print_report(AVFormatContext **output_files,
|
|||||||
"size=%8.0fkB time=%0.1f bitrate=%6.1fkbits/s",
|
"size=%8.0fkB time=%0.1f bitrate=%6.1fkbits/s",
|
||||||
(double)total_size / 1024, ti1, bitrate);
|
(double)total_size / 1024, ti1, bitrate);
|
||||||
|
|
||||||
if (verbose > 1)
|
if (verbose > 1)
|
||||||
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " dup=%d drop=%d",
|
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " dup=%d drop=%d",
|
||||||
nb_frames_dup, nb_frames_drop);
|
nb_frames_dup, nb_frames_drop);
|
||||||
|
|
||||||
if (verbose >= 0)
|
if (verbose >= 0)
|
||||||
fprintf(stderr, "%s \r", buf);
|
fprintf(stderr, "%s \r", buf);
|
||||||
@ -1323,7 +1323,7 @@ static int output_packet(AVInputStream *ist, int ist_index,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
/* if output time reached then transcode raw format,
|
/* if output time reached then transcode raw format,
|
||||||
encode packets and output them */
|
encode packets and output them */
|
||||||
if (start_time == 0 || ist->pts >= start_time)
|
if (start_time == 0 || ist->pts >= start_time)
|
||||||
for(i=0;i<nb_ostreams;i++) {
|
for(i=0;i<nb_ostreams;i++) {
|
||||||
int frame_size;
|
int frame_size;
|
||||||
@ -1898,7 +1898,7 @@ static int av_encode(AVFormatContext **output_files,
|
|||||||
/* init pts */
|
/* init pts */
|
||||||
for(i=0;i<nb_istreams;i++) {
|
for(i=0;i<nb_istreams;i++) {
|
||||||
ist = ist_table[i];
|
ist = ist_table[i];
|
||||||
is = input_files[ist->file_index];
|
is = input_files[ist->file_index];
|
||||||
ist->pts = 0;
|
ist->pts = 0;
|
||||||
ist->next_pts = av_rescale_q(ist->st->start_time, ist->st->time_base, AV_TIME_BASE_Q);
|
ist->next_pts = av_rescale_q(ist->st->start_time, ist->st->time_base, AV_TIME_BASE_Q);
|
||||||
if(ist->st->start_time == AV_NOPTS_VALUE)
|
if(ist->st->start_time == AV_NOPTS_VALUE)
|
||||||
@ -2273,7 +2273,7 @@ static void opt_frame_rate(const char *arg)
|
|||||||
{
|
{
|
||||||
if (parse_frame_rate(&frame_rate, &frame_rate_base, arg) < 0) {
|
if (parse_frame_rate(&frame_rate, &frame_rate_base, arg) < 0) {
|
||||||
fprintf(stderr, "Incorrect frame rate\n");
|
fprintf(stderr, "Incorrect frame rate\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2289,7 +2289,7 @@ static void opt_frame_crop_top(const char *arg)
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if ((frame_topBand) >= frame_height){
|
if ((frame_topBand) >= frame_height){
|
||||||
fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
|
fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
frame_height -= frame_topBand;
|
frame_height -= frame_topBand;
|
||||||
@ -2307,7 +2307,7 @@ static void opt_frame_crop_bottom(const char *arg)
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if ((frame_bottomBand) >= frame_height){
|
if ((frame_bottomBand) >= frame_height){
|
||||||
fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
|
fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
frame_height -= frame_bottomBand;
|
frame_height -= frame_bottomBand;
|
||||||
@ -2325,7 +2325,7 @@ static void opt_frame_crop_left(const char *arg)
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if ((frame_leftBand) >= frame_width){
|
if ((frame_leftBand) >= frame_width){
|
||||||
fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
|
fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
frame_width -= frame_leftBand;
|
frame_width -= frame_leftBand;
|
||||||
@ -2343,7 +2343,7 @@ static void opt_frame_crop_right(const char *arg)
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if ((frame_rightBand) >= frame_width){
|
if ((frame_rightBand) >= frame_width){
|
||||||
fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
|
fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
frame_width -= frame_rightBand;
|
frame_width -= frame_rightBand;
|
||||||
@ -2364,7 +2364,7 @@ static void opt_frame_size(const char *arg)
|
|||||||
|
|
||||||
#define SCALEBITS 10
|
#define SCALEBITS 10
|
||||||
#define ONE_HALF (1 << (SCALEBITS - 1))
|
#define ONE_HALF (1 << (SCALEBITS - 1))
|
||||||
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
|
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
|
||||||
|
|
||||||
#define RGB_TO_Y(r, g, b) \
|
#define RGB_TO_Y(r, g, b) \
|
||||||
((FIX(0.29900) * (r) + FIX(0.58700) * (g) + \
|
((FIX(0.29900) * (r) + FIX(0.58700) * (g) + \
|
||||||
@ -2462,16 +2462,16 @@ static void opt_frame_aspect_ratio(const char *arg)
|
|||||||
p = strchr(arg, ':');
|
p = strchr(arg, ':');
|
||||||
if (p) {
|
if (p) {
|
||||||
x = strtol(arg, (char **)&arg, 10);
|
x = strtol(arg, (char **)&arg, 10);
|
||||||
if (arg == p)
|
if (arg == p)
|
||||||
y = strtol(arg+1, (char **)&arg, 10);
|
y = strtol(arg+1, (char **)&arg, 10);
|
||||||
if (x > 0 && y > 0)
|
if (x > 0 && y > 0)
|
||||||
ar = (double)x / (double)y;
|
ar = (double)x / (double)y;
|
||||||
} else
|
} else
|
||||||
ar = strtod(arg, (char **)&arg);
|
ar = strtod(arg, (char **)&arg);
|
||||||
|
|
||||||
if (!ar) {
|
if (!ar) {
|
||||||
fprintf(stderr, "Incorrect aspect ratio specification.\n");
|
fprintf(stderr, "Incorrect aspect ratio specification.\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
frame_aspect_ratio = ar;
|
frame_aspect_ratio = ar;
|
||||||
}
|
}
|
||||||
@ -2957,8 +2957,8 @@ static void opt_input_file(const char *filename)
|
|||||||
}
|
}
|
||||||
frame_height = enc->height;
|
frame_height = enc->height;
|
||||||
frame_width = enc->width;
|
frame_width = enc->width;
|
||||||
frame_aspect_ratio = av_q2d(enc->sample_aspect_ratio) * enc->width / enc->height;
|
frame_aspect_ratio = av_q2d(enc->sample_aspect_ratio) * enc->width / enc->height;
|
||||||
frame_pix_fmt = enc->pix_fmt;
|
frame_pix_fmt = enc->pix_fmt;
|
||||||
rfps = ic->streams[i]->r_frame_rate.num;
|
rfps = ic->streams[i]->r_frame_rate.num;
|
||||||
rfps_base = ic->streams[i]->r_frame_rate.den;
|
rfps_base = ic->streams[i]->r_frame_rate.den;
|
||||||
enc->workaround_bugs = workaround_bugs;
|
enc->workaround_bugs = workaround_bugs;
|
||||||
@ -3454,7 +3454,7 @@ static void opt_output_file(const char *filename)
|
|||||||
|
|
||||||
oc->timestamp = rec_timestamp;
|
oc->timestamp = rec_timestamp;
|
||||||
|
|
||||||
if (str_title)
|
if (str_title)
|
||||||
pstrcpy(oc->title, sizeof(oc->title), str_title);
|
pstrcpy(oc->title, sizeof(oc->title), str_title);
|
||||||
if (str_author)
|
if (str_author)
|
||||||
pstrcpy(oc->author, sizeof(oc->author), str_author);
|
pstrcpy(oc->author, sizeof(oc->author), str_author);
|
||||||
@ -3490,11 +3490,11 @@ static void opt_output_file(const char *filename)
|
|||||||
fprintf(stderr, "Not overwriting - exiting\n");
|
fprintf(stderr, "Not overwriting - exiting\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
fprintf(stderr,"File '%s' already exists. Exiting.\n", filename);
|
fprintf(stderr,"File '%s' already exists. Exiting.\n", filename);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3579,14 +3579,14 @@ static void prepare_grab(void)
|
|||||||
fmt1 = av_find_input_format(video_grab_format);
|
fmt1 = av_find_input_format(video_grab_format);
|
||||||
vp->device = video_device;
|
vp->device = video_device;
|
||||||
vp->channel = video_channel;
|
vp->channel = video_channel;
|
||||||
vp->standard = video_standard;
|
vp->standard = video_standard;
|
||||||
if (av_open_input_file(&ic, "", fmt1, 0, vp) < 0) {
|
if (av_open_input_file(&ic, "", fmt1, 0, vp) < 0) {
|
||||||
fprintf(stderr, "Could not find video grab device\n");
|
fprintf(stderr, "Could not find video grab device\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
/* If not enough info to get the stream parameters, we decode the
|
/* If not enough info to get the stream parameters, we decode the
|
||||||
first frames to get it. */
|
first frames to get it. */
|
||||||
if ((ic->ctx_flags & AVFMTCTX_NOHEADER) && av_find_stream_info(ic) < 0) {
|
if ((ic->ctx_flags & AVFMTCTX_NOHEADER) && av_find_stream_info(ic) < 0) {
|
||||||
fprintf(stderr, "Could not find video grab parameters\n");
|
fprintf(stderr, "Could not find video grab parameters\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
@ -4276,11 +4276,11 @@ int main(int argc, char **argv)
|
|||||||
for(i=0;i<nb_output_files;i++) {
|
for(i=0;i<nb_output_files;i++) {
|
||||||
/* maybe av_close_output_file ??? */
|
/* maybe av_close_output_file ??? */
|
||||||
AVFormatContext *s = output_files[i];
|
AVFormatContext *s = output_files[i];
|
||||||
int j;
|
int j;
|
||||||
if (!(s->oformat->flags & AVFMT_NOFILE))
|
if (!(s->oformat->flags & AVFMT_NOFILE))
|
||||||
url_fclose(&s->pb);
|
url_fclose(&s->pb);
|
||||||
for(j=0;j<s->nb_streams;j++)
|
for(j=0;j<s->nb_streams;j++)
|
||||||
av_free(s->streams[j]);
|
av_free(s->streams[j]);
|
||||||
av_free(s);
|
av_free(s);
|
||||||
}
|
}
|
||||||
for(i=0;i<nb_input_files;i++)
|
for(i=0;i<nb_input_files;i++)
|
||||||
|
46
ffplay.c
46
ffplay.c
@ -1649,7 +1649,7 @@ static int stream_component_open(VideoState *is, int stream_index)
|
|||||||
|
|
||||||
memset(&is->audio_pkt, 0, sizeof(is->audio_pkt));
|
memset(&is->audio_pkt, 0, sizeof(is->audio_pkt));
|
||||||
packet_queue_init(&is->audioq);
|
packet_queue_init(&is->audioq);
|
||||||
SDL_PauseAudio(0);
|
SDL_PauseAudio(0);
|
||||||
break;
|
break;
|
||||||
case CODEC_TYPE_VIDEO:
|
case CODEC_TYPE_VIDEO:
|
||||||
is->video_stream = stream_index;
|
is->video_stream = stream_index;
|
||||||
@ -1937,11 +1937,11 @@ static int decode_thread(void *arg)
|
|||||||
}
|
}
|
||||||
ret = av_read_frame(ic, pkt);
|
ret = av_read_frame(ic, pkt);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
if (url_ferror(&ic->pb) == 0) {
|
if (url_ferror(&ic->pb) == 0) {
|
||||||
SDL_Delay(100); /* wait for user event */
|
SDL_Delay(100); /* wait for user event */
|
||||||
continue;
|
continue;
|
||||||
} else
|
} else
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (pkt->stream_index == is->audio_stream) {
|
if (pkt->stream_index == is->audio_stream) {
|
||||||
packet_queue_put(&is->audioq, pkt);
|
packet_queue_put(&is->audioq, pkt);
|
||||||
@ -2224,23 +2224,23 @@ void event_loop(void)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SDL_MOUSEBUTTONDOWN:
|
case SDL_MOUSEBUTTONDOWN:
|
||||||
if (cur_stream) {
|
if (cur_stream) {
|
||||||
int ns, hh, mm, ss;
|
int ns, hh, mm, ss;
|
||||||
int tns, thh, tmm, tss;
|
int tns, thh, tmm, tss;
|
||||||
tns = cur_stream->ic->duration/1000000LL;
|
tns = cur_stream->ic->duration/1000000LL;
|
||||||
thh = tns/3600;
|
thh = tns/3600;
|
||||||
tmm = (tns%3600)/60;
|
tmm = (tns%3600)/60;
|
||||||
tss = (tns%60);
|
tss = (tns%60);
|
||||||
frac = (double)event.button.x/(double)cur_stream->width;
|
frac = (double)event.button.x/(double)cur_stream->width;
|
||||||
ns = frac*tns;
|
ns = frac*tns;
|
||||||
hh = ns/3600;
|
hh = ns/3600;
|
||||||
mm = (ns%3600)/60;
|
mm = (ns%3600)/60;
|
||||||
ss = (ns%60);
|
ss = (ns%60);
|
||||||
fprintf(stderr, "Seek to %2.0f%% (%2d:%02d:%02d) of total duration (%2d:%02d:%02d) \n", frac*100,
|
fprintf(stderr, "Seek to %2.0f%% (%2d:%02d:%02d) of total duration (%2d:%02d:%02d) \n", frac*100,
|
||||||
hh, mm, ss, thh, tmm, tss);
|
hh, mm, ss, thh, tmm, tss);
|
||||||
stream_seek(cur_stream, (int64_t)(cur_stream->ic->start_time+frac*cur_stream->ic->duration), 0);
|
stream_seek(cur_stream, (int64_t)(cur_stream->ic->start_time+frac*cur_stream->ic->duration), 0);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SDL_VIDEORESIZE:
|
case SDL_VIDEORESIZE:
|
||||||
if (cur_stream) {
|
if (cur_stream) {
|
||||||
screen = SDL_SetVideoMode(event.resize.w, event.resize.h, 0,
|
screen = SDL_SetVideoMode(event.resize.w, event.resize.h, 0,
|
||||||
@ -2452,7 +2452,7 @@ int main(int argc, char **argv)
|
|||||||
if (dpy) {
|
if (dpy) {
|
||||||
fs_screen_width = DisplayWidth(dpy, DefaultScreen(dpy));
|
fs_screen_width = DisplayWidth(dpy, DefaultScreen(dpy));
|
||||||
fs_screen_height = DisplayHeight(dpy, DefaultScreen(dpy));
|
fs_screen_height = DisplayHeight(dpy, DefaultScreen(dpy));
|
||||||
XCloseDisplay(dpy);
|
XCloseDisplay(dpy);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
42
ffserver.c
42
ffserver.c
@ -1204,7 +1204,7 @@ static int http_parse_request(HTTPContext *c)
|
|||||||
pstrcpy(c->protocol, sizeof(c->protocol), protocol);
|
pstrcpy(c->protocol, sizeof(c->protocol), protocol);
|
||||||
|
|
||||||
if (ffserver_debug)
|
if (ffserver_debug)
|
||||||
http_log("New connection: %s %s\n", cmd, url);
|
http_log("New connection: %s %s\n", cmd, url);
|
||||||
|
|
||||||
/* find the filename and the optional info string in the request */
|
/* find the filename and the optional info string in the request */
|
||||||
p = url;
|
p = url;
|
||||||
@ -2001,7 +2001,7 @@ static int http_prepare_data(HTTPContext *c)
|
|||||||
c->fmt_ctx.nb_streams = c->stream->nb_streams;
|
c->fmt_ctx.nb_streams = c->stream->nb_streams;
|
||||||
for(i=0;i<c->fmt_ctx.nb_streams;i++) {
|
for(i=0;i<c->fmt_ctx.nb_streams;i++) {
|
||||||
AVStream *st;
|
AVStream *st;
|
||||||
AVStream *src;
|
AVStream *src;
|
||||||
st = av_mallocz(sizeof(AVStream));
|
st = av_mallocz(sizeof(AVStream));
|
||||||
st->codec= avcodec_alloc_context();
|
st->codec= avcodec_alloc_context();
|
||||||
c->fmt_ctx.streams[i] = st;
|
c->fmt_ctx.streams[i] = st;
|
||||||
@ -2012,8 +2012,8 @@ static int http_prepare_data(HTTPContext *c)
|
|||||||
else
|
else
|
||||||
src = c->stream->feed->streams[c->stream->feed_streams[i]];
|
src = c->stream->feed->streams[c->stream->feed_streams[i]];
|
||||||
|
|
||||||
*st = *src;
|
*st = *src;
|
||||||
st->priv_data = 0;
|
st->priv_data = 0;
|
||||||
st->codec->frame_number = 0; /* XXX: should be done in
|
st->codec->frame_number = 0; /* XXX: should be done in
|
||||||
AVStream, not in codec */
|
AVStream, not in codec */
|
||||||
/* I'm pretty sure that this is not correct...
|
/* I'm pretty sure that this is not correct...
|
||||||
@ -2452,8 +2452,8 @@ static int http_receive_data(HTTPContext *c)
|
|||||||
s.priv_data = av_mallocz(fmt_in->priv_data_size);
|
s.priv_data = av_mallocz(fmt_in->priv_data_size);
|
||||||
if (!s.priv_data)
|
if (!s.priv_data)
|
||||||
goto fail;
|
goto fail;
|
||||||
} else
|
} else
|
||||||
s.priv_data = NULL;
|
s.priv_data = NULL;
|
||||||
|
|
||||||
if (fmt_in->read_header(&s, 0) < 0) {
|
if (fmt_in->read_header(&s, 0) < 0) {
|
||||||
av_freep(&s.priv_data);
|
av_freep(&s.priv_data);
|
||||||
@ -3868,20 +3868,20 @@ static int parse_ffconfig(const char *filename)
|
|||||||
|
|
||||||
feed->child_argv[i] = av_malloc(30 + strlen(feed->filename));
|
feed->child_argv[i] = av_malloc(30 + strlen(feed->filename));
|
||||||
|
|
||||||
snprintf(feed->child_argv[i], 30+strlen(feed->filename),
|
snprintf(feed->child_argv[i], 30+strlen(feed->filename),
|
||||||
"http://%s:%d/%s",
|
"http://%s:%d/%s",
|
||||||
(my_http_addr.sin_addr.s_addr == INADDR_ANY) ? "127.0.0.1" :
|
(my_http_addr.sin_addr.s_addr == INADDR_ANY) ? "127.0.0.1" :
|
||||||
inet_ntoa(my_http_addr.sin_addr),
|
inet_ntoa(my_http_addr.sin_addr),
|
||||||
ntohs(my_http_addr.sin_port), feed->filename);
|
ntohs(my_http_addr.sin_port), feed->filename);
|
||||||
|
|
||||||
if (ffserver_debug)
|
if (ffserver_debug)
|
||||||
{
|
{
|
||||||
int j;
|
int j;
|
||||||
fprintf(stdout, "Launch commandline: ");
|
fprintf(stdout, "Launch commandline: ");
|
||||||
for (j = 0; j <= i; j++)
|
for (j = 0; j <= i; j++)
|
||||||
fprintf(stdout, "%s ", feed->child_argv[j]);
|
fprintf(stdout, "%s ", feed->child_argv[j]);
|
||||||
fprintf(stdout, "\n");
|
fprintf(stdout, "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (!strcasecmp(cmd, "ReadOnlyFile")) {
|
} else if (!strcasecmp(cmd, "ReadOnlyFile")) {
|
||||||
if (feed) {
|
if (feed) {
|
||||||
@ -4074,8 +4074,8 @@ static int parse_ffconfig(const char *filename)
|
|||||||
if (stream) {
|
if (stream) {
|
||||||
audio_enc.sample_rate = atoi(arg);
|
audio_enc.sample_rate = atoi(arg);
|
||||||
}
|
}
|
||||||
} else if (!strcasecmp(cmd, "AudioQuality")) {
|
} else if (!strcasecmp(cmd, "AudioQuality")) {
|
||||||
get_arg(arg, sizeof(arg), &p);
|
get_arg(arg, sizeof(arg), &p);
|
||||||
if (stream) {
|
if (stream) {
|
||||||
// audio_enc.quality = atof(arg) * 1000;
|
// audio_enc.quality = atof(arg) * 1000;
|
||||||
}
|
}
|
||||||
|
@ -44,11 +44,11 @@ const enum PixelFormat pixfmt_rgb24[] = {PIX_FMT_BGR24, PIX_FMT_RGBA32, -1};
|
|||||||
*/
|
*/
|
||||||
typedef struct EightBpsContext {
|
typedef struct EightBpsContext {
|
||||||
|
|
||||||
AVCodecContext *avctx;
|
AVCodecContext *avctx;
|
||||||
AVFrame pic;
|
AVFrame pic;
|
||||||
|
|
||||||
unsigned char planes;
|
unsigned char planes;
|
||||||
unsigned char planemap[4];
|
unsigned char planemap[4];
|
||||||
} EightBpsContext;
|
} EightBpsContext;
|
||||||
|
|
||||||
|
|
||||||
@ -59,87 +59,87 @@ typedef struct EightBpsContext {
|
|||||||
*/
|
*/
|
||||||
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
|
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
|
||||||
{
|
{
|
||||||
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
|
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
|
||||||
unsigned char *encoded = (unsigned char *)buf;
|
unsigned char *encoded = (unsigned char *)buf;
|
||||||
unsigned char *pixptr, *pixptr_end;
|
unsigned char *pixptr, *pixptr_end;
|
||||||
unsigned int height = avctx->height; // Real image height
|
unsigned int height = avctx->height; // Real image height
|
||||||
unsigned int dlen, p, row;
|
unsigned int dlen, p, row;
|
||||||
unsigned char *lp, *dp;
|
unsigned char *lp, *dp;
|
||||||
unsigned char count;
|
unsigned char count;
|
||||||
unsigned int px_inc;
|
unsigned int px_inc;
|
||||||
unsigned int planes = c->planes;
|
unsigned int planes = c->planes;
|
||||||
unsigned char *planemap = c->planemap;
|
unsigned char *planemap = c->planemap;
|
||||||
|
|
||||||
if(c->pic.data[0])
|
if(c->pic.data[0])
|
||||||
avctx->release_buffer(avctx, &c->pic);
|
avctx->release_buffer(avctx, &c->pic);
|
||||||
|
|
||||||
c->pic.reference = 0;
|
c->pic.reference = 0;
|
||||||
c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
|
c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
|
||||||
if(avctx->get_buffer(avctx, &c->pic) < 0){
|
if(avctx->get_buffer(avctx, &c->pic) < 0){
|
||||||
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
|
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set data pointer after line lengths */
|
/* Set data pointer after line lengths */
|
||||||
dp = encoded + planes * (height << 1);
|
dp = encoded + planes * (height << 1);
|
||||||
|
|
||||||
/* Ignore alpha plane, don't know what to do with it */
|
/* Ignore alpha plane, don't know what to do with it */
|
||||||
if (planes == 4)
|
if (planes == 4)
|
||||||
planes--;
|
planes--;
|
||||||
|
|
||||||
px_inc = planes + (avctx->pix_fmt == PIX_FMT_RGBA32);
|
px_inc = planes + (avctx->pix_fmt == PIX_FMT_RGBA32);
|
||||||
|
|
||||||
for (p = 0; p < planes; p++) {
|
for (p = 0; p < planes; p++) {
|
||||||
/* Lines length pointer for this plane */
|
/* Lines length pointer for this plane */
|
||||||
lp = encoded + p * (height << 1);
|
lp = encoded + p * (height << 1);
|
||||||
|
|
||||||
/* Decode a plane */
|
/* Decode a plane */
|
||||||
for(row = 0; row < height; row++) {
|
for(row = 0; row < height; row++) {
|
||||||
pixptr = c->pic.data[0] + row * c->pic.linesize[0] + planemap[p];
|
pixptr = c->pic.data[0] + row * c->pic.linesize[0] + planemap[p];
|
||||||
pixptr_end = pixptr + c->pic.linesize[0];
|
pixptr_end = pixptr + c->pic.linesize[0];
|
||||||
dlen = be2me_16(*(unsigned short *)(lp+row*2));
|
dlen = be2me_16(*(unsigned short *)(lp+row*2));
|
||||||
/* Decode a row of this plane */
|
/* Decode a row of this plane */
|
||||||
while(dlen > 0) {
|
while(dlen > 0) {
|
||||||
if(dp + 1 >= buf+buf_size) return -1;
|
if(dp + 1 >= buf+buf_size) return -1;
|
||||||
if ((count = *dp++) <= 127) {
|
if ((count = *dp++) <= 127) {
|
||||||
count++;
|
count++;
|
||||||
dlen -= count + 1;
|
dlen -= count + 1;
|
||||||
if (pixptr + count * px_inc > pixptr_end)
|
if (pixptr + count * px_inc > pixptr_end)
|
||||||
break;
|
break;
|
||||||
if(dp + count > buf+buf_size) return -1;
|
if(dp + count > buf+buf_size) return -1;
|
||||||
while(count--) {
|
while(count--) {
|
||||||
*pixptr = *dp++;
|
*pixptr = *dp++;
|
||||||
pixptr += px_inc;
|
pixptr += px_inc;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
count = 257 - count;
|
count = 257 - count;
|
||||||
if (pixptr + count * px_inc > pixptr_end)
|
if (pixptr + count * px_inc > pixptr_end)
|
||||||
break;
|
break;
|
||||||
while(count--) {
|
while(count--) {
|
||||||
*pixptr = *dp;
|
*pixptr = *dp;
|
||||||
pixptr += px_inc;
|
pixptr += px_inc;
|
||||||
}
|
}
|
||||||
dp++;
|
dp++;
|
||||||
dlen -= 2;
|
dlen -= 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (avctx->palctrl) {
|
if (avctx->palctrl) {
|
||||||
memcpy (c->pic.data[1], avctx->palctrl->palette, AVPALETTE_SIZE);
|
memcpy (c->pic.data[1], avctx->palctrl->palette, AVPALETTE_SIZE);
|
||||||
if (avctx->palctrl->palette_changed) {
|
if (avctx->palctrl->palette_changed) {
|
||||||
c->pic.palette_has_changed = 1;
|
c->pic.palette_has_changed = 1;
|
||||||
avctx->palctrl->palette_changed = 0;
|
avctx->palctrl->palette_changed = 0;
|
||||||
} else
|
} else
|
||||||
c->pic.palette_has_changed = 0;
|
c->pic.palette_has_changed = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
*data_size = sizeof(AVFrame);
|
*data_size = sizeof(AVFrame);
|
||||||
*(AVFrame*)data = c->pic;
|
*(AVFrame*)data = c->pic;
|
||||||
|
|
||||||
/* always report that the buffer was completely consumed */
|
/* always report that the buffer was completely consumed */
|
||||||
return buf_size;
|
return buf_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -150,53 +150,53 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
|
|||||||
*/
|
*/
|
||||||
static int decode_init(AVCodecContext *avctx)
|
static int decode_init(AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
|
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
|
||||||
|
|
||||||
c->avctx = avctx;
|
c->avctx = avctx;
|
||||||
avctx->has_b_frames = 0;
|
avctx->has_b_frames = 0;
|
||||||
|
|
||||||
c->pic.data[0] = NULL;
|
c->pic.data[0] = NULL;
|
||||||
|
|
||||||
if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
|
if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (avctx->bits_per_sample) {
|
switch (avctx->bits_per_sample) {
|
||||||
case 8:
|
case 8:
|
||||||
avctx->pix_fmt = PIX_FMT_PAL8;
|
avctx->pix_fmt = PIX_FMT_PAL8;
|
||||||
c->planes = 1;
|
c->planes = 1;
|
||||||
c->planemap[0] = 0; // 1st plane is palette indexes
|
c->planemap[0] = 0; // 1st plane is palette indexes
|
||||||
if (avctx->palctrl == NULL) {
|
if (avctx->palctrl == NULL) {
|
||||||
av_log(avctx, AV_LOG_ERROR, "Error: PAL8 format but no palette from demuxer.\n");
|
av_log(avctx, AV_LOG_ERROR, "Error: PAL8 format but no palette from demuxer.\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 24:
|
case 24:
|
||||||
avctx->pix_fmt = avctx->get_format(avctx, pixfmt_rgb24);
|
avctx->pix_fmt = avctx->get_format(avctx, pixfmt_rgb24);
|
||||||
c->planes = 3;
|
c->planes = 3;
|
||||||
c->planemap[0] = 2; // 1st plane is red
|
c->planemap[0] = 2; // 1st plane is red
|
||||||
c->planemap[1] = 1; // 2nd plane is green
|
c->planemap[1] = 1; // 2nd plane is green
|
||||||
c->planemap[2] = 0; // 3rd plane is blue
|
c->planemap[2] = 0; // 3rd plane is blue
|
||||||
break;
|
break;
|
||||||
case 32:
|
case 32:
|
||||||
avctx->pix_fmt = PIX_FMT_RGBA32;
|
avctx->pix_fmt = PIX_FMT_RGBA32;
|
||||||
c->planes = 4;
|
c->planes = 4;
|
||||||
#ifdef WORDS_BIGENDIAN
|
#ifdef WORDS_BIGENDIAN
|
||||||
c->planemap[0] = 1; // 1st plane is red
|
c->planemap[0] = 1; // 1st plane is red
|
||||||
c->planemap[1] = 2; // 2nd plane is green
|
c->planemap[1] = 2; // 2nd plane is green
|
||||||
c->planemap[2] = 3; // 3rd plane is blue
|
c->planemap[2] = 3; // 3rd plane is blue
|
||||||
c->planemap[3] = 0; // 4th plane is alpha???
|
c->planemap[3] = 0; // 4th plane is alpha???
|
||||||
#else
|
#else
|
||||||
c->planemap[0] = 2; // 1st plane is red
|
c->planemap[0] = 2; // 1st plane is red
|
||||||
c->planemap[1] = 1; // 2nd plane is green
|
c->planemap[1] = 1; // 2nd plane is green
|
||||||
c->planemap[2] = 0; // 3rd plane is blue
|
c->planemap[2] = 0; // 3rd plane is blue
|
||||||
c->planemap[3] = 3; // 4th plane is alpha???
|
c->planemap[3] = 3; // 4th plane is alpha???
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
av_log(avctx, AV_LOG_ERROR, "Error: Unsupported color depth: %u.\n", avctx->bits_per_sample);
|
av_log(avctx, AV_LOG_ERROR, "Error: Unsupported color depth: %u.\n", avctx->bits_per_sample);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -211,24 +211,24 @@ static int decode_init(AVCodecContext *avctx)
|
|||||||
*/
|
*/
|
||||||
static int decode_end(AVCodecContext *avctx)
|
static int decode_end(AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
|
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
|
||||||
|
|
||||||
if (c->pic.data[0])
|
if (c->pic.data[0])
|
||||||
avctx->release_buffer(avctx, &c->pic);
|
avctx->release_buffer(avctx, &c->pic);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
AVCodec eightbps_decoder = {
|
AVCodec eightbps_decoder = {
|
||||||
"8bps",
|
"8bps",
|
||||||
CODEC_TYPE_VIDEO,
|
CODEC_TYPE_VIDEO,
|
||||||
CODEC_ID_8BPS,
|
CODEC_ID_8BPS,
|
||||||
sizeof(EightBpsContext),
|
sizeof(EightBpsContext),
|
||||||
decode_init,
|
decode_init,
|
||||||
NULL,
|
NULL,
|
||||||
decode_end,
|
decode_end,
|
||||||
decode_frame,
|
decode_frame,
|
||||||
CODEC_CAP_DR1,
|
CODEC_CAP_DR1,
|
||||||
};
|
};
|
||||||
|
@ -58,11 +58,11 @@ typedef struct AC3DecodeState {
|
|||||||
a52_state_t* (*a52_init)(uint32_t mm_accel);
|
a52_state_t* (*a52_init)(uint32_t mm_accel);
|
||||||
sample_t* (*a52_samples)(a52_state_t * state);
|
sample_t* (*a52_samples)(a52_state_t * state);
|
||||||
int (*a52_syncinfo)(uint8_t * buf, int * flags,
|
int (*a52_syncinfo)(uint8_t * buf, int * flags,
|
||||||
int * sample_rate, int * bit_rate);
|
int * sample_rate, int * bit_rate);
|
||||||
int (*a52_frame)(a52_state_t * state, uint8_t * buf, int * flags,
|
int (*a52_frame)(a52_state_t * state, uint8_t * buf, int * flags,
|
||||||
sample_t * level, sample_t bias);
|
sample_t * level, sample_t bias);
|
||||||
void (*a52_dynrng)(a52_state_t * state,
|
void (*a52_dynrng)(a52_state_t * state,
|
||||||
sample_t (* call) (sample_t, void *), void * data);
|
sample_t (* call) (sample_t, void *), void * data);
|
||||||
int (*a52_block)(a52_state_t * state);
|
int (*a52_block)(a52_state_t * state);
|
||||||
void (*a52_free)(a52_state_t * state);
|
void (*a52_free)(a52_state_t * state);
|
||||||
|
|
||||||
@ -105,7 +105,7 @@ static int a52_decode_init(AVCodecContext *avctx)
|
|||||||
if (!s->a52_init || !s->a52_samples || !s->a52_syncinfo
|
if (!s->a52_init || !s->a52_samples || !s->a52_syncinfo
|
||||||
|| !s->a52_frame || !s->a52_block || !s->a52_free)
|
|| !s->a52_frame || !s->a52_block || !s->a52_free)
|
||||||
{
|
{
|
||||||
dlclose(s->handle);
|
dlclose(s->handle);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
@ -130,22 +130,22 @@ static int a52_decode_init(AVCodecContext *avctx)
|
|||||||
static inline int blah (int32_t i)
|
static inline int blah (int32_t i)
|
||||||
{
|
{
|
||||||
if (i > 0x43c07fff)
|
if (i > 0x43c07fff)
|
||||||
return 32767;
|
return 32767;
|
||||||
else if (i < 0x43bf8000)
|
else if (i < 0x43bf8000)
|
||||||
return -32768;
|
return -32768;
|
||||||
return i - 0x43c00000;
|
return i - 0x43c00000;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void float_to_int (float * _f, int16_t * s16, int nchannels)
|
static inline void float_to_int (float * _f, int16_t * s16, int nchannels)
|
||||||
{
|
{
|
||||||
int i, j, c;
|
int i, j, c;
|
||||||
int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format
|
int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
nchannels *= 256;
|
nchannels *= 256;
|
||||||
for (i = 0; i < 256; i++) {
|
for (i = 0; i < 256; i++) {
|
||||||
for (c = 0; c < nchannels; c += 256)
|
for (c = 0; c < nchannels; c += 256)
|
||||||
s16[j++] = blah (f[i + c]);
|
s16[j++] = blah (f[i + c]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -164,7 +164,7 @@ static int a52_decode_frame(AVCodecContext *avctx,
|
|||||||
short *out_samples = data;
|
short *out_samples = data;
|
||||||
float level;
|
float level;
|
||||||
static const int ac3_channels[8] = {
|
static const int ac3_channels[8] = {
|
||||||
2, 1, 2, 3, 3, 4, 4, 5
|
2, 1, 2, 3, 3, 4, 4, 5
|
||||||
};
|
};
|
||||||
|
|
||||||
buf_ptr = buf;
|
buf_ptr = buf;
|
||||||
@ -186,20 +186,20 @@ static int a52_decode_frame(AVCodecContext *avctx,
|
|||||||
memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1);
|
memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1);
|
||||||
s->inbuf_ptr--;
|
s->inbuf_ptr--;
|
||||||
} else {
|
} else {
|
||||||
s->frame_size = len;
|
s->frame_size = len;
|
||||||
/* update codec info */
|
/* update codec info */
|
||||||
avctx->sample_rate = sample_rate;
|
avctx->sample_rate = sample_rate;
|
||||||
s->channels = ac3_channels[s->flags & 7];
|
s->channels = ac3_channels[s->flags & 7];
|
||||||
if (s->flags & A52_LFE)
|
if (s->flags & A52_LFE)
|
||||||
s->channels++;
|
s->channels++;
|
||||||
if (avctx->channels == 0)
|
if (avctx->channels == 0)
|
||||||
/* No specific number of channel requested */
|
/* No specific number of channel requested */
|
||||||
avctx->channels = s->channels;
|
avctx->channels = s->channels;
|
||||||
else if (s->channels < avctx->channels) {
|
else if (s->channels < avctx->channels) {
|
||||||
av_log(avctx, AV_LOG_ERROR, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len);
|
av_log(avctx, AV_LOG_ERROR, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len);
|
||||||
avctx->channels = s->channels;
|
avctx->channels = s->channels;
|
||||||
}
|
}
|
||||||
avctx->bit_rate = bit_rate;
|
avctx->bit_rate = bit_rate;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (len < s->frame_size) {
|
} else if (len < s->frame_size) {
|
||||||
|
@ -54,23 +54,23 @@ stream_samples_t samples;
|
|||||||
static inline int blah (int32_t i)
|
static inline int blah (int32_t i)
|
||||||
{
|
{
|
||||||
if (i > 0x43c07fff)
|
if (i > 0x43c07fff)
|
||||||
return 32767;
|
return 32767;
|
||||||
else if (i < 0x43bf8000)
|
else if (i < 0x43bf8000)
|
||||||
return -32768;
|
return -32768;
|
||||||
else
|
else
|
||||||
return i - 0x43c00000;
|
return i - 0x43c00000;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void float_to_int (float * _f, int16_t * s16, int nchannels)
|
static inline void float_to_int (float * _f, int16_t * s16, int nchannels)
|
||||||
{
|
{
|
||||||
int i, j, c;
|
int i, j, c;
|
||||||
int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format
|
int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
nchannels *= 256;
|
nchannels *= 256;
|
||||||
for (i = 0; i < 256; i++) {
|
for (i = 0; i < 256; i++) {
|
||||||
for (c = 0; c < nchannels; c += 256)
|
for (c = 0; c < nchannels; c += 256)
|
||||||
s16[j++] = blah (f[i + c]);
|
s16[j++] = blah (f[i + c]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -89,7 +89,7 @@ static int ac3_decode_frame(AVCodecContext *avctx,
|
|||||||
short *out_samples = data;
|
short *out_samples = data;
|
||||||
float level;
|
float level;
|
||||||
static const int ac3_channels[8] = {
|
static const int ac3_channels[8] = {
|
||||||
2, 1, 2, 3, 3, 4, 4, 5
|
2, 1, 2, 3, 3, 4, 4, 5
|
||||||
};
|
};
|
||||||
|
|
||||||
buf_ptr = buf;
|
buf_ptr = buf;
|
||||||
@ -111,20 +111,20 @@ static int ac3_decode_frame(AVCodecContext *avctx,
|
|||||||
memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1);
|
memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1);
|
||||||
s->inbuf_ptr--;
|
s->inbuf_ptr--;
|
||||||
} else {
|
} else {
|
||||||
s->frame_size = len;
|
s->frame_size = len;
|
||||||
/* update codec info */
|
/* update codec info */
|
||||||
avctx->sample_rate = sample_rate;
|
avctx->sample_rate = sample_rate;
|
||||||
s->channels = ac3_channels[s->flags & 7];
|
s->channels = ac3_channels[s->flags & 7];
|
||||||
if (s->flags & AC3_LFE)
|
if (s->flags & AC3_LFE)
|
||||||
s->channels++;
|
s->channels++;
|
||||||
if (avctx->channels == 0)
|
if (avctx->channels == 0)
|
||||||
/* No specific number of channel requested */
|
/* No specific number of channel requested */
|
||||||
avctx->channels = s->channels;
|
avctx->channels = s->channels;
|
||||||
else if (s->channels < avctx->channels) {
|
else if (s->channels < avctx->channels) {
|
||||||
av_log( avctx, AV_LOG_INFO, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len);
|
av_log( avctx, AV_LOG_INFO, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len);
|
||||||
avctx->channels = s->channels;
|
avctx->channels = s->channels;
|
||||||
}
|
}
|
||||||
avctx->bit_rate = bit_rate;
|
avctx->bit_rate = bit_rate;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (len < s->frame_size) {
|
} else if (len < s->frame_size) {
|
||||||
|
@ -337,8 +337,8 @@ static void fft_init(int ln)
|
|||||||
/* do a 2^n point complex fft on 2^ln points. */
|
/* do a 2^n point complex fft on 2^ln points. */
|
||||||
static void fft(IComplex *z, int ln)
|
static void fft(IComplex *z, int ln)
|
||||||
{
|
{
|
||||||
int j, l, np, np2;
|
int j, l, np, np2;
|
||||||
int nblocks, nloops;
|
int nblocks, nloops;
|
||||||
register IComplex *p,*q;
|
register IComplex *p,*q;
|
||||||
int tmp_re, tmp_im;
|
int tmp_re, tmp_im;
|
||||||
|
|
||||||
@ -472,7 +472,7 @@ static void compute_exp_strategy(uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNEL
|
|||||||
exp_strategy[i][ch] = EXP_REUSE;
|
exp_strategy[i][ch] = EXP_REUSE;
|
||||||
}
|
}
|
||||||
if (is_lfe)
|
if (is_lfe)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* now select the encoding strategy type : if exponents are often
|
/* now select the encoding strategy type : if exponents are often
|
||||||
recoded, we use a coarse encoding */
|
recoded, we use a coarse encoding */
|
||||||
@ -493,7 +493,7 @@ static void compute_exp_strategy(uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNEL
|
|||||||
exp_strategy[i][ch] = EXP_D15;
|
exp_strategy[i][ch] = EXP_D15;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
i = j;
|
i = j;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -553,9 +553,9 @@ static int encode_exp(uint8_t encoded_exp[N/2],
|
|||||||
/* Decrease the delta between each groups to within 2
|
/* Decrease the delta between each groups to within 2
|
||||||
* so that they can be differentially encoded */
|
* so that they can be differentially encoded */
|
||||||
for (i=1;i<=nb_groups;i++)
|
for (i=1;i<=nb_groups;i++)
|
||||||
exp1[i] = FFMIN(exp1[i], exp1[i-1] + 2);
|
exp1[i] = FFMIN(exp1[i], exp1[i-1] + 2);
|
||||||
for (i=nb_groups-1;i>=0;i--)
|
for (i=nb_groups-1;i>=0;i--)
|
||||||
exp1[i] = FFMIN(exp1[i], exp1[i+1] + 2);
|
exp1[i] = FFMIN(exp1[i], exp1[i+1] + 2);
|
||||||
|
|
||||||
/* now we have the exponent values the decoder will see */
|
/* now we have the exponent values the decoder will see */
|
||||||
encoded_exp[0] = exp1[0];
|
encoded_exp[0] = exp1[0];
|
||||||
@ -708,8 +708,8 @@ static int compute_bit_allocation(AC3EncodeContext *s,
|
|||||||
if(i==0) frame_bits += 4;
|
if(i==0) frame_bits += 4;
|
||||||
}
|
}
|
||||||
frame_bits += 2 * s->nb_channels; /* chexpstr[2] * c */
|
frame_bits += 2 * s->nb_channels; /* chexpstr[2] * c */
|
||||||
if (s->lfe)
|
if (s->lfe)
|
||||||
frame_bits++; /* lfeexpstr */
|
frame_bits++; /* lfeexpstr */
|
||||||
for(ch=0;ch<s->nb_channels;ch++) {
|
for(ch=0;ch<s->nb_channels;ch++) {
|
||||||
if (exp_strategy[i][ch] != EXP_REUSE)
|
if (exp_strategy[i][ch] != EXP_REUSE)
|
||||||
frame_bits += 6 + 2; /* chbwcod[6], gainrng[2] */
|
frame_bits += 6 + 2; /* chbwcod[6], gainrng[2] */
|
||||||
@ -736,11 +736,11 @@ static int compute_bit_allocation(AC3EncodeContext *s,
|
|||||||
|
|
||||||
csnroffst = s->csnroffst;
|
csnroffst = s->csnroffst;
|
||||||
while (csnroffst >= 0 &&
|
while (csnroffst >= 0 &&
|
||||||
bit_alloc(s, bap, encoded_exp, exp_strategy, frame_bits, csnroffst, 0) < 0)
|
bit_alloc(s, bap, encoded_exp, exp_strategy, frame_bits, csnroffst, 0) < 0)
|
||||||
csnroffst -= SNR_INC1;
|
csnroffst -= SNR_INC1;
|
||||||
if (csnroffst < 0) {
|
if (csnroffst < 0) {
|
||||||
av_log(NULL, AV_LOG_ERROR, "Yack, Error !!!\n");
|
av_log(NULL, AV_LOG_ERROR, "Yack, Error !!!\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
while ((csnroffst + SNR_INC1) <= 63 &&
|
while ((csnroffst + SNR_INC1) <= 63 &&
|
||||||
bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits,
|
bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits,
|
||||||
@ -815,19 +815,19 @@ static int AC3_encode_init(AVCodecContext *avctx)
|
|||||||
int i, j, ch;
|
int i, j, ch;
|
||||||
float alpha;
|
float alpha;
|
||||||
static const uint8_t acmod_defs[6] = {
|
static const uint8_t acmod_defs[6] = {
|
||||||
0x01, /* C */
|
0x01, /* C */
|
||||||
0x02, /* L R */
|
0x02, /* L R */
|
||||||
0x03, /* L C R */
|
0x03, /* L C R */
|
||||||
0x06, /* L R SL SR */
|
0x06, /* L R SL SR */
|
||||||
0x07, /* L C R SL SR */
|
0x07, /* L C R SL SR */
|
||||||
0x07, /* L C R SL SR (+LFE) */
|
0x07, /* L C R SL SR (+LFE) */
|
||||||
};
|
};
|
||||||
|
|
||||||
avctx->frame_size = AC3_FRAME_SIZE;
|
avctx->frame_size = AC3_FRAME_SIZE;
|
||||||
|
|
||||||
/* number of channels */
|
/* number of channels */
|
||||||
if (channels < 1 || channels > 6)
|
if (channels < 1 || channels > 6)
|
||||||
return -1;
|
return -1;
|
||||||
s->acmod = acmod_defs[channels - 1];
|
s->acmod = acmod_defs[channels - 1];
|
||||||
s->lfe = (channels == 6) ? 1 : 0;
|
s->lfe = (channels == 6) ? 1 : 0;
|
||||||
s->nb_all_channels = channels;
|
s->nb_all_channels = channels;
|
||||||
@ -871,7 +871,7 @@ static int AC3_encode_init(AVCodecContext *avctx)
|
|||||||
s->nb_coefs[ch] = ((s->chbwcod[ch] + 12) * 3) + 37;
|
s->nb_coefs[ch] = ((s->chbwcod[ch] + 12) * 3) + 37;
|
||||||
}
|
}
|
||||||
if (s->lfe) {
|
if (s->lfe) {
|
||||||
s->nb_coefs[s->lfe_channel] = 7; /* fixed */
|
s->nb_coefs[s->lfe_channel] = 7; /* fixed */
|
||||||
}
|
}
|
||||||
/* initial snr offset */
|
/* initial snr offset */
|
||||||
s->csnroffst = 40;
|
s->csnroffst = 40;
|
||||||
@ -907,9 +907,9 @@ static void output_frame_header(AC3EncodeContext *s, unsigned char *frame)
|
|||||||
put_bits(&s->pb, 3, s->bsmod);
|
put_bits(&s->pb, 3, s->bsmod);
|
||||||
put_bits(&s->pb, 3, s->acmod);
|
put_bits(&s->pb, 3, s->acmod);
|
||||||
if ((s->acmod & 0x01) && s->acmod != 0x01)
|
if ((s->acmod & 0x01) && s->acmod != 0x01)
|
||||||
put_bits(&s->pb, 2, 1); /* XXX -4.5 dB */
|
put_bits(&s->pb, 2, 1); /* XXX -4.5 dB */
|
||||||
if (s->acmod & 0x04)
|
if (s->acmod & 0x04)
|
||||||
put_bits(&s->pb, 2, 1); /* XXX -6 dB */
|
put_bits(&s->pb, 2, 1); /* XXX -6 dB */
|
||||||
if (s->acmod == 0x02)
|
if (s->acmod == 0x02)
|
||||||
put_bits(&s->pb, 2, 0); /* surround not indicated */
|
put_bits(&s->pb, 2, 0); /* surround not indicated */
|
||||||
put_bits(&s->pb, 1, s->lfe); /* LFE */
|
put_bits(&s->pb, 1, s->lfe); /* LFE */
|
||||||
@ -995,20 +995,20 @@ static void output_audio_block(AC3EncodeContext *s,
|
|||||||
|
|
||||||
if (s->acmod == 2)
|
if (s->acmod == 2)
|
||||||
{
|
{
|
||||||
if(block_num==0)
|
if(block_num==0)
|
||||||
{
|
{
|
||||||
/* first block must define rematrixing (rematstr) */
|
/* first block must define rematrixing (rematstr) */
|
||||||
put_bits(&s->pb, 1, 1);
|
put_bits(&s->pb, 1, 1);
|
||||||
|
|
||||||
/* dummy rematrixing rematflg(1:4)=0 */
|
/* dummy rematrixing rematflg(1:4)=0 */
|
||||||
for (rbnd=0;rbnd<4;rbnd++)
|
for (rbnd=0;rbnd<4;rbnd++)
|
||||||
put_bits(&s->pb, 1, 0);
|
put_bits(&s->pb, 1, 0);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* no matrixing (but should be used in the future) */
|
/* no matrixing (but should be used in the future) */
|
||||||
put_bits(&s->pb, 1, 0);
|
put_bits(&s->pb, 1, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(DEBUG)
|
#if defined(DEBUG)
|
||||||
@ -1023,7 +1023,7 @@ static void output_audio_block(AC3EncodeContext *s,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (s->lfe) {
|
if (s->lfe) {
|
||||||
put_bits(&s->pb, 1, exp_strategy[s->lfe_channel]);
|
put_bits(&s->pb, 1, exp_strategy[s->lfe_channel]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(ch=0;ch<s->nb_channels;ch++) {
|
for(ch=0;ch<s->nb_channels;ch++) {
|
||||||
@ -1047,7 +1047,7 @@ static void output_audio_block(AC3EncodeContext *s,
|
|||||||
group_size = 4;
|
group_size = 4;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
nb_groups = (s->nb_coefs[ch] + (group_size * 3) - 4) / (3 * group_size);
|
nb_groups = (s->nb_coefs[ch] + (group_size * 3) - 4) / (3 * group_size);
|
||||||
p = encoded_exp[ch];
|
p = encoded_exp[ch];
|
||||||
|
|
||||||
/* first exponent */
|
/* first exponent */
|
||||||
@ -1075,8 +1075,8 @@ static void output_audio_block(AC3EncodeContext *s,
|
|||||||
put_bits(&s->pb, 7, ((delta0 * 5 + delta1) * 5) + delta2);
|
put_bits(&s->pb, 7, ((delta0 * 5 + delta1) * 5) + delta2);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ch != s->lfe_channel)
|
if (ch != s->lfe_channel)
|
||||||
put_bits(&s->pb, 2, 0); /* no gain range info */
|
put_bits(&s->pb, 2, 0); /* no gain range info */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* bit allocation info */
|
/* bit allocation info */
|
||||||
|
@ -300,7 +300,7 @@ static inline unsigned char adpcm_yamaha_compress_sample(ADPCMChannelStatus *c,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int adpcm_encode_frame(AVCodecContext *avctx,
|
static int adpcm_encode_frame(AVCodecContext *avctx,
|
||||||
unsigned char *frame, int buf_size, void *data)
|
unsigned char *frame, int buf_size, void *data)
|
||||||
{
|
{
|
||||||
int n, i, st;
|
int n, i, st;
|
||||||
short *samples;
|
short *samples;
|
||||||
@ -431,8 +431,8 @@ static int adpcm_decode_init(AVCodecContext * avctx)
|
|||||||
|
|
||||||
switch(avctx->codec->id) {
|
switch(avctx->codec->id) {
|
||||||
case CODEC_ID_ADPCM_CT:
|
case CODEC_ID_ADPCM_CT:
|
||||||
c->status[0].step = c->status[1].step = 511;
|
c->status[0].step = c->status[1].step = 511;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -498,16 +498,16 @@ static inline short adpcm_ct_expand_nibble(ADPCMChannelStatus *c, char nibble)
|
|||||||
predictor = c->predictor;
|
predictor = c->predictor;
|
||||||
/* predictor update is not so trivial: predictor is multiplied on 254/256 before updating */
|
/* predictor update is not so trivial: predictor is multiplied on 254/256 before updating */
|
||||||
if(sign)
|
if(sign)
|
||||||
predictor = ((predictor * 254) >> 8) - diff;
|
predictor = ((predictor * 254) >> 8) - diff;
|
||||||
else
|
else
|
||||||
predictor = ((predictor * 254) >> 8) + diff;
|
predictor = ((predictor * 254) >> 8) + diff;
|
||||||
/* calculate new step and clamp it to range 511..32767 */
|
/* calculate new step and clamp it to range 511..32767 */
|
||||||
new_step = (ct_adpcm_table[nibble & 7] * c->step) >> 8;
|
new_step = (ct_adpcm_table[nibble & 7] * c->step) >> 8;
|
||||||
c->step = new_step;
|
c->step = new_step;
|
||||||
if(c->step < 511)
|
if(c->step < 511)
|
||||||
c->step = 511;
|
c->step = 511;
|
||||||
if(c->step > 32767)
|
if(c->step > 32767)
|
||||||
c->step = 32767;
|
c->step = 32767;
|
||||||
|
|
||||||
CLAMP_TO_SHORT(predictor);
|
CLAMP_TO_SHORT(predictor);
|
||||||
c->predictor = predictor;
|
c->predictor = predictor;
|
||||||
@ -612,8 +612,8 @@ static void xa_decode(short *out, const unsigned char *in,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int adpcm_decode_frame(AVCodecContext *avctx,
|
static int adpcm_decode_frame(AVCodecContext *avctx,
|
||||||
void *data, int *data_size,
|
void *data, int *data_size,
|
||||||
uint8_t *buf, int buf_size)
|
uint8_t *buf, int buf_size)
|
||||||
{
|
{
|
||||||
ADPCMContext *c = avctx->priv_data;
|
ADPCMContext *c = avctx->priv_data;
|
||||||
ADPCMChannelStatus *cs;
|
ADPCMChannelStatus *cs;
|
||||||
@ -701,7 +701,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
|
|||||||
cs->predictor -= 0x10000;
|
cs->predictor -= 0x10000;
|
||||||
CLAMP_TO_SHORT(cs->predictor);
|
CLAMP_TO_SHORT(cs->predictor);
|
||||||
|
|
||||||
// XXX: is this correct ??: *samples++ = cs->predictor;
|
// XXX: is this correct ??: *samples++ = cs->predictor;
|
||||||
|
|
||||||
cs->step_index = *src++;
|
cs->step_index = *src++;
|
||||||
if (cs->step_index < 0) cs->step_index = 0;
|
if (cs->step_index < 0) cs->step_index = 0;
|
||||||
@ -710,19 +710,19 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for(m=4; src < (buf + buf_size);) {
|
for(m=4; src < (buf + buf_size);) {
|
||||||
*samples++ = adpcm_ima_expand_nibble(&c->status[0], src[0] & 0x0F, 3);
|
*samples++ = adpcm_ima_expand_nibble(&c->status[0], src[0] & 0x0F, 3);
|
||||||
if (st)
|
if (st)
|
||||||
*samples++ = adpcm_ima_expand_nibble(&c->status[1], src[4] & 0x0F, 3);
|
*samples++ = adpcm_ima_expand_nibble(&c->status[1], src[4] & 0x0F, 3);
|
||||||
*samples++ = adpcm_ima_expand_nibble(&c->status[0], (src[0] >> 4) & 0x0F, 3);
|
*samples++ = adpcm_ima_expand_nibble(&c->status[0], (src[0] >> 4) & 0x0F, 3);
|
||||||
if (st) {
|
if (st) {
|
||||||
*samples++ = adpcm_ima_expand_nibble(&c->status[1], (src[4] >> 4) & 0x0F, 3);
|
*samples++ = adpcm_ima_expand_nibble(&c->status[1], (src[4] >> 4) & 0x0F, 3);
|
||||||
if (!--m) {
|
if (!--m) {
|
||||||
m=4;
|
m=4;
|
||||||
src+=4;
|
src+=4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
src++;
|
src++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case CODEC_ID_ADPCM_4XM:
|
case CODEC_ID_ADPCM_4XM:
|
||||||
cs = &(c->status[0]);
|
cs = &(c->status[0]);
|
||||||
@ -739,13 +739,13 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
|
|||||||
|
|
||||||
m= (buf_size - (src - buf))>>st;
|
m= (buf_size - (src - buf))>>st;
|
||||||
for(i=0; i<m; i++) {
|
for(i=0; i<m; i++) {
|
||||||
*samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] & 0x0F, 4);
|
*samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] & 0x0F, 4);
|
||||||
if (st)
|
if (st)
|
||||||
*samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] & 0x0F, 4);
|
*samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] & 0x0F, 4);
|
||||||
*samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] >> 4, 4);
|
*samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] >> 4, 4);
|
||||||
if (st)
|
if (st)
|
||||||
*samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] >> 4, 4);
|
*samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] >> 4, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
src += m<<st;
|
src += m<<st;
|
||||||
|
|
||||||
@ -958,7 +958,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case CODEC_ID_ADPCM_CT:
|
case CODEC_ID_ADPCM_CT:
|
||||||
while (src < buf + buf_size) {
|
while (src < buf + buf_size) {
|
||||||
if (st) {
|
if (st) {
|
||||||
*samples++ = adpcm_ct_expand_nibble(&c->status[0],
|
*samples++ = adpcm_ct_expand_nibble(&c->status[0],
|
||||||
(src[0] >> 4) & 0x0F);
|
(src[0] >> 4) & 0x0F);
|
||||||
@ -970,78 +970,78 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
|
|||||||
*samples++ = adpcm_ct_expand_nibble(&c->status[0],
|
*samples++ = adpcm_ct_expand_nibble(&c->status[0],
|
||||||
src[0] & 0x0F);
|
src[0] & 0x0F);
|
||||||
}
|
}
|
||||||
src++;
|
src++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case CODEC_ID_ADPCM_SWF:
|
case CODEC_ID_ADPCM_SWF:
|
||||||
{
|
{
|
||||||
GetBitContext gb;
|
GetBitContext gb;
|
||||||
const int *table;
|
const int *table;
|
||||||
int k0, signmask;
|
int k0, signmask;
|
||||||
int size = buf_size*8;
|
int size = buf_size*8;
|
||||||
|
|
||||||
init_get_bits(&gb, buf, size);
|
init_get_bits(&gb, buf, size);
|
||||||
|
|
||||||
// first frame, read bits & inital values
|
// first frame, read bits & inital values
|
||||||
if (!c->nb_bits)
|
if (!c->nb_bits)
|
||||||
{
|
{
|
||||||
c->nb_bits = get_bits(&gb, 2)+2;
|
c->nb_bits = get_bits(&gb, 2)+2;
|
||||||
// av_log(NULL,AV_LOG_INFO,"nb_bits: %d\n", c->nb_bits);
|
// av_log(NULL,AV_LOG_INFO,"nb_bits: %d\n", c->nb_bits);
|
||||||
}
|
}
|
||||||
|
|
||||||
table = swf_index_tables[c->nb_bits-2];
|
table = swf_index_tables[c->nb_bits-2];
|
||||||
k0 = 1 << (c->nb_bits-2);
|
k0 = 1 << (c->nb_bits-2);
|
||||||
signmask = 1 << (c->nb_bits-1);
|
signmask = 1 << (c->nb_bits-1);
|
||||||
|
|
||||||
while (get_bits_count(&gb) <= size)
|
while (get_bits_count(&gb) <= size)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
c->nb_samples++;
|
c->nb_samples++;
|
||||||
// wrap around at every 4096 samples...
|
// wrap around at every 4096 samples...
|
||||||
if ((c->nb_samples & 0xfff) == 1)
|
if ((c->nb_samples & 0xfff) == 1)
|
||||||
{
|
{
|
||||||
for (i = 0; i <= st; i++)
|
for (i = 0; i <= st; i++)
|
||||||
{
|
{
|
||||||
*samples++ = c->status[i].predictor = get_sbits(&gb, 16);
|
*samples++ = c->status[i].predictor = get_sbits(&gb, 16);
|
||||||
c->status[i].step_index = get_bits(&gb, 6);
|
c->status[i].step_index = get_bits(&gb, 6);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// similar to IMA adpcm
|
// similar to IMA adpcm
|
||||||
for (i = 0; i <= st; i++)
|
for (i = 0; i <= st; i++)
|
||||||
{
|
{
|
||||||
int delta = get_bits(&gb, c->nb_bits);
|
int delta = get_bits(&gb, c->nb_bits);
|
||||||
int step = step_table[c->status[i].step_index];
|
int step = step_table[c->status[i].step_index];
|
||||||
long vpdiff = 0; // vpdiff = (delta+0.5)*step/4
|
long vpdiff = 0; // vpdiff = (delta+0.5)*step/4
|
||||||
int k = k0;
|
int k = k0;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
if (delta & k)
|
if (delta & k)
|
||||||
vpdiff += step;
|
vpdiff += step;
|
||||||
step >>= 1;
|
step >>= 1;
|
||||||
k >>= 1;
|
k >>= 1;
|
||||||
} while(k);
|
} while(k);
|
||||||
vpdiff += step;
|
vpdiff += step;
|
||||||
|
|
||||||
if (delta & signmask)
|
if (delta & signmask)
|
||||||
c->status[i].predictor -= vpdiff;
|
c->status[i].predictor -= vpdiff;
|
||||||
else
|
else
|
||||||
c->status[i].predictor += vpdiff;
|
c->status[i].predictor += vpdiff;
|
||||||
|
|
||||||
c->status[i].step_index += table[delta & (~signmask)];
|
c->status[i].step_index += table[delta & (~signmask)];
|
||||||
|
|
||||||
c->status[i].step_index = clip(c->status[i].step_index, 0, 88);
|
c->status[i].step_index = clip(c->status[i].step_index, 0, 88);
|
||||||
c->status[i].predictor = clip(c->status[i].predictor, -32768, 32767);
|
c->status[i].predictor = clip(c->status[i].predictor, -32768, 32767);
|
||||||
|
|
||||||
*samples++ = c->status[i].predictor;
|
*samples++ = c->status[i].predictor;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// src += get_bits_count(&gb)*8;
|
// src += get_bits_count(&gb)*8;
|
||||||
src += size;
|
src += size;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case CODEC_ID_ADPCM_YAMAHA:
|
case CODEC_ID_ADPCM_YAMAHA:
|
||||||
while (src < buf + buf_size) {
|
while (src < buf + buf_size) {
|
||||||
|
@ -35,7 +35,7 @@ void avcodec_register_all(void)
|
|||||||
static int inited = 0;
|
static int inited = 0;
|
||||||
|
|
||||||
if (inited != 0)
|
if (inited != 0)
|
||||||
return;
|
return;
|
||||||
inited = 1;
|
inited = 1;
|
||||||
|
|
||||||
/* encoders */
|
/* encoders */
|
||||||
|
@ -84,24 +84,24 @@ static inline uint64_t WORD_VEC(uint64_t x)
|
|||||||
} *) (p))->__l) = l; \
|
} *) (p))->__l) = l; \
|
||||||
} while (0)
|
} while (0)
|
||||||
struct unaligned_long { uint64_t l; } __attribute__((packed));
|
struct unaligned_long { uint64_t l; } __attribute__((packed));
|
||||||
#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
|
#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
|
||||||
#define uldq(a) (((const struct unaligned_long *) (a))->l)
|
#define uldq(a) (((const struct unaligned_long *) (a))->l)
|
||||||
|
|
||||||
#if GNUC_PREREQ(3,3)
|
#if GNUC_PREREQ(3,3)
|
||||||
#define prefetch(p) __builtin_prefetch((p), 0, 1)
|
#define prefetch(p) __builtin_prefetch((p), 0, 1)
|
||||||
#define prefetch_en(p) __builtin_prefetch((p), 0, 0)
|
#define prefetch_en(p) __builtin_prefetch((p), 0, 0)
|
||||||
#define prefetch_m(p) __builtin_prefetch((p), 1, 1)
|
#define prefetch_m(p) __builtin_prefetch((p), 1, 1)
|
||||||
#define prefetch_men(p) __builtin_prefetch((p), 1, 0)
|
#define prefetch_men(p) __builtin_prefetch((p), 1, 0)
|
||||||
#define cmpbge __builtin_alpha_cmpbge
|
#define cmpbge __builtin_alpha_cmpbge
|
||||||
/* Avoid warnings. */
|
/* Avoid warnings. */
|
||||||
#define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b))
|
#define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b))
|
||||||
#define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b))
|
#define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b))
|
||||||
#define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b))
|
#define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b))
|
||||||
#define zap __builtin_alpha_zap
|
#define zap __builtin_alpha_zap
|
||||||
#define zapnot __builtin_alpha_zapnot
|
#define zapnot __builtin_alpha_zapnot
|
||||||
#define amask __builtin_alpha_amask
|
#define amask __builtin_alpha_amask
|
||||||
#define implver __builtin_alpha_implver
|
#define implver __builtin_alpha_implver
|
||||||
#define rpcc __builtin_alpha_rpcc
|
#define rpcc __builtin_alpha_rpcc
|
||||||
#else
|
#else
|
||||||
#define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory")
|
#define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory")
|
||||||
#define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory")
|
#define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory")
|
||||||
@ -113,26 +113,26 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
|
|||||||
#define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
|
#define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
|
||||||
#define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
|
#define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
|
||||||
#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
|
#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
|
||||||
#define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; })
|
#define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; })
|
||||||
#define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; })
|
#define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; })
|
||||||
#define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; })
|
#define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; })
|
||||||
#endif
|
#endif
|
||||||
#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory")
|
#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory")
|
||||||
|
|
||||||
#if GNUC_PREREQ(3,3) && defined(__alpha_max__)
|
#if GNUC_PREREQ(3,3) && defined(__alpha_max__)
|
||||||
#define minub8 __builtin_alpha_minub8
|
#define minub8 __builtin_alpha_minub8
|
||||||
#define minsb8 __builtin_alpha_minsb8
|
#define minsb8 __builtin_alpha_minsb8
|
||||||
#define minuw4 __builtin_alpha_minuw4
|
#define minuw4 __builtin_alpha_minuw4
|
||||||
#define minsw4 __builtin_alpha_minsw4
|
#define minsw4 __builtin_alpha_minsw4
|
||||||
#define maxub8 __builtin_alpha_maxub8
|
#define maxub8 __builtin_alpha_maxub8
|
||||||
#define maxsb8 __builtin_alpha_maxsb8
|
#define maxsb8 __builtin_alpha_maxsb8
|
||||||
#define maxuw4 __builtin_alpha_maxuw4
|
#define maxuw4 __builtin_alpha_maxuw4
|
||||||
#define maxsw4 __builtin_alpha_maxsw4
|
#define maxsw4 __builtin_alpha_maxsw4
|
||||||
#define perr __builtin_alpha_perr
|
#define perr __builtin_alpha_perr
|
||||||
#define pklb __builtin_alpha_pklb
|
#define pklb __builtin_alpha_pklb
|
||||||
#define pkwb __builtin_alpha_pkwb
|
#define pkwb __builtin_alpha_pkwb
|
||||||
#define unpkbl __builtin_alpha_unpkbl
|
#define unpkbl __builtin_alpha_unpkbl
|
||||||
#define unpkbw __builtin_alpha_unpkbw
|
#define unpkbw __builtin_alpha_unpkbw
|
||||||
#else
|
#else
|
||||||
#define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
#define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
||||||
#define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
#define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
||||||
@ -143,13 +143,13 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
|
|||||||
#define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
#define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
||||||
#define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
#define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
||||||
#define perr(a, b) ({ uint64_t __r; asm (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
|
#define perr(a, b) ({ uint64_t __r; asm (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
|
||||||
#define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
#define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
||||||
#define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
#define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
||||||
#define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
#define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
||||||
#define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
#define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */
|
#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */
|
||||||
|
|
||||||
#include <c_asm.h>
|
#include <c_asm.h>
|
||||||
#define ldq(p) (*(const uint64_t *) (p))
|
#define ldq(p) (*(const uint64_t *) (p))
|
||||||
@ -157,7 +157,7 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
|
|||||||
#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
|
#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
|
||||||
#define stl(l, p) do { *(int32_t *) (p) = (l); } while (0)
|
#define stl(l, p) do { *(int32_t *) (p) = (l); } while (0)
|
||||||
#define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a)
|
#define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a)
|
||||||
#define uldq(a) (*(const __unaligned uint64_t *) (a))
|
#define uldq(a) (*(const __unaligned uint64_t *) (a))
|
||||||
#define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b)
|
#define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b)
|
||||||
#define extql(a, b) asm ("extql %a0,%a1,%v0", a, b)
|
#define extql(a, b) asm ("extql %a0,%a1,%v0", a, b)
|
||||||
#define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b)
|
#define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b)
|
||||||
@ -166,7 +166,7 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
|
|||||||
#define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b)
|
#define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b)
|
||||||
#define amask(a) asm ("amask %a0,%v0", a)
|
#define amask(a) asm ("amask %a0,%v0", a)
|
||||||
#define implver() asm ("implver %v0")
|
#define implver() asm ("implver %v0")
|
||||||
#define rpcc() asm ("rpcc %v0")
|
#define rpcc() asm ("rpcc %v0")
|
||||||
#define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b)
|
#define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b)
|
||||||
#define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b)
|
#define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b)
|
||||||
#define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b)
|
#define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b)
|
||||||
|
@ -71,7 +71,7 @@ $unaligned:
|
|||||||
addq a1, a2, a1
|
addq a1, a2, a1
|
||||||
nop
|
nop
|
||||||
|
|
||||||
ldq_u t4, 0(a1)
|
ldq_u t4, 0(a1)
|
||||||
ldq_u t5, 8(a1)
|
ldq_u t5, 8(a1)
|
||||||
addq a1, a2, a1
|
addq a1, a2, a1
|
||||||
nop
|
nop
|
||||||
@ -120,20 +120,20 @@ $aligned:
|
|||||||
addq a1, a2, a1
|
addq a1, a2, a1
|
||||||
ldq t3, 0(a1)
|
ldq t3, 0(a1)
|
||||||
|
|
||||||
addq a0, a2, t4
|
addq a0, a2, t4
|
||||||
addq a1, a2, a1
|
addq a1, a2, a1
|
||||||
addq t4, a2, t5
|
addq t4, a2, t5
|
||||||
subq a3, 4, a3
|
subq a3, 4, a3
|
||||||
|
|
||||||
stq t0, 0(a0)
|
stq t0, 0(a0)
|
||||||
addq t5, a2, t6
|
addq t5, a2, t6
|
||||||
stq t1, 0(t4)
|
stq t1, 0(t4)
|
||||||
addq t6, a2, a0
|
addq t6, a2, a0
|
||||||
|
|
||||||
stq t2, 0(t5)
|
stq t2, 0(t5)
|
||||||
stq t3, 0(t6)
|
stq t3, 0(t6)
|
||||||
|
|
||||||
bne a3, $aligned
|
bne a3, $aligned
|
||||||
ret
|
ret
|
||||||
.end put_pixels_axp_asm
|
.end put_pixels_axp_asm
|
||||||
|
|
||||||
|
@ -116,7 +116,7 @@ int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0 /* now done in assembly */
|
#if 0 /* now done in assembly */
|
||||||
int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
|
int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
|
||||||
{
|
{
|
||||||
int result = 0;
|
int result = 0;
|
||||||
|
@ -285,7 +285,7 @@ void simple_idct_axp(DCTELEM *block)
|
|||||||
stq(v, block + 1 * 4);
|
stq(v, block + 1 * 4);
|
||||||
stq(w, block + 2 * 4);
|
stq(w, block + 2 * 4);
|
||||||
stq(w, block + 3 * 4);
|
stq(w, block + 3 * 4);
|
||||||
block += 4 * 4;
|
block += 4 * 4;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (i = 0; i < 8; i++)
|
for (i = 0; i < 8; i++)
|
||||||
|
@ -301,7 +301,7 @@ static int amr_nb_decode_frame(AVCodecContext * avctx,
|
|||||||
|
|
||||||
|
|
||||||
static int amr_nb_encode_frame(AVCodecContext *avctx,
|
static int amr_nb_encode_frame(AVCodecContext *avctx,
|
||||||
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
|
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
|
||||||
{
|
{
|
||||||
short serial_data[250] = {0};
|
short serial_data[250] = {0};
|
||||||
|
|
||||||
@ -440,7 +440,7 @@ static int amr_nb_decode_frame(AVCodecContext * avctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int amr_nb_encode_frame(AVCodecContext *avctx,
|
static int amr_nb_encode_frame(AVCodecContext *avctx,
|
||||||
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
|
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
|
||||||
{
|
{
|
||||||
AMRContext *s = (AMRContext*)avctx->priv_data;
|
AMRContext *s = (AMRContext*)avctx->priv_data;
|
||||||
int written;
|
int written;
|
||||||
@ -584,7 +584,7 @@ static int amr_wb_encode_close(AVCodecContext * avctx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int amr_wb_encode_frame(AVCodecContext *avctx,
|
static int amr_wb_encode_frame(AVCodecContext *avctx,
|
||||||
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
|
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
|
||||||
{
|
{
|
||||||
AMRWBContext *s = (AMRWBContext*) avctx->priv_data;
|
AMRWBContext *s = (AMRWBContext*) avctx->priv_data;
|
||||||
int size = E_IF_encode(s->state, s->mode, data, frame, s->allow_dtx);
|
int size = E_IF_encode(s->state, s->mode, data, frame, s->allow_dtx);
|
||||||
|
@ -205,13 +205,13 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
|
|||||||
#endif
|
#endif
|
||||||
c->idct_put= j_rev_dct_ARM_put;
|
c->idct_put= j_rev_dct_ARM_put;
|
||||||
c->idct_add= j_rev_dct_ARM_add;
|
c->idct_add= j_rev_dct_ARM_add;
|
||||||
c->idct = j_rev_dct_ARM;
|
c->idct = j_rev_dct_ARM;
|
||||||
c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
|
c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
|
||||||
} else if (idct_algo==FF_IDCT_SIMPLEARM){
|
} else if (idct_algo==FF_IDCT_SIMPLEARM){
|
||||||
c->idct_put= simple_idct_ARM_put;
|
c->idct_put= simple_idct_ARM_put;
|
||||||
c->idct_add= simple_idct_ARM_add;
|
c->idct_add= simple_idct_ARM_add;
|
||||||
c->idct = simple_idct_ARM;
|
c->idct = simple_idct_ARM;
|
||||||
c->idct_permutation_type= FF_NO_IDCT_PERM;
|
c->idct_permutation_type= FF_NO_IDCT_PERM;
|
||||||
#ifdef HAVE_IPP
|
#ifdef HAVE_IPP
|
||||||
} else if (idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_IPP){
|
} else if (idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_IPP){
|
||||||
#else
|
#else
|
||||||
|
@ -138,10 +138,10 @@ void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
|
|||||||
mm_flags = mm_support();
|
mm_flags = mm_support();
|
||||||
|
|
||||||
if (avctx->dsp_mask) {
|
if (avctx->dsp_mask) {
|
||||||
if (avctx->dsp_mask & FF_MM_FORCE)
|
if (avctx->dsp_mask & FF_MM_FORCE)
|
||||||
mm_flags |= (avctx->dsp_mask & 0xffff);
|
mm_flags |= (avctx->dsp_mask & 0xffff);
|
||||||
else
|
else
|
||||||
mm_flags &= ~(avctx->dsp_mask & 0xffff);
|
mm_flags &= ~(avctx->dsp_mask & 0xffff);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(mm_flags & MM_IWMMXT)) return;
|
if (!(mm_flags & MM_IWMMXT)) return;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
C-like prototype :
|
C-like prototype :
|
||||||
void j_rev_dct_ARM(DCTBLOCK data)
|
void j_rev_dct_ARM(DCTBLOCK data)
|
||||||
|
|
||||||
With DCTBLOCK being a pointer to an array of 64 'signed shorts'
|
With DCTBLOCK being a pointer to an array of 64 'signed shorts'
|
||||||
|
|
||||||
@ -51,336 +51,336 @@
|
|||||||
#define FIX_M_1_961570560_ID 40
|
#define FIX_M_1_961570560_ID 40
|
||||||
#define FIX_M_2_562915447_ID 44
|
#define FIX_M_2_562915447_ID 44
|
||||||
#define FIX_0xFFFF_ID 48
|
#define FIX_0xFFFF_ID 48
|
||||||
.text
|
.text
|
||||||
.align
|
.align
|
||||||
|
|
||||||
.global j_rev_dct_ARM
|
.global j_rev_dct_ARM
|
||||||
j_rev_dct_ARM:
|
j_rev_dct_ARM:
|
||||||
stmdb sp!, { r4 - r12, lr } @ all callee saved regs
|
stmdb sp!, { r4 - r12, lr } @ all callee saved regs
|
||||||
|
|
||||||
sub sp, sp, #4 @ reserve some space on the stack
|
sub sp, sp, #4 @ reserve some space on the stack
|
||||||
str r0, [ sp ] @ save the DCT pointer to the stack
|
str r0, [ sp ] @ save the DCT pointer to the stack
|
||||||
|
|
||||||
mov lr, r0 @ lr = pointer to the current row
|
mov lr, r0 @ lr = pointer to the current row
|
||||||
mov r12, #8 @ r12 = row-counter
|
mov r12, #8 @ r12 = row-counter
|
||||||
add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
|
add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
|
||||||
row_loop:
|
row_loop:
|
||||||
ldrsh r0, [lr, # 0] @ r0 = 'd0'
|
ldrsh r0, [lr, # 0] @ r0 = 'd0'
|
||||||
ldrsh r1, [lr, # 8] @ r1 = 'd1'
|
ldrsh r1, [lr, # 8] @ r1 = 'd1'
|
||||||
|
|
||||||
@ Optimization for row that have all items except the first set to 0
|
@ Optimization for row that have all items except the first set to 0
|
||||||
@ (this works as the DCTELEMS are always 4-byte aligned)
|
@ (this works as the DCTELEMS are always 4-byte aligned)
|
||||||
ldr r5, [lr, # 0]
|
ldr r5, [lr, # 0]
|
||||||
ldr r2, [lr, # 4]
|
ldr r2, [lr, # 4]
|
||||||
ldr r3, [lr, # 8]
|
ldr r3, [lr, # 8]
|
||||||
ldr r4, [lr, #12]
|
ldr r4, [lr, #12]
|
||||||
orr r3, r3, r4
|
orr r3, r3, r4
|
||||||
orr r3, r3, r2
|
orr r3, r3, r2
|
||||||
orrs r5, r3, r5
|
orrs r5, r3, r5
|
||||||
beq end_of_row_loop @ nothing to be done as ALL of them are '0'
|
beq end_of_row_loop @ nothing to be done as ALL of them are '0'
|
||||||
orrs r2, r3, r1
|
orrs r2, r3, r1
|
||||||
beq empty_row
|
beq empty_row
|
||||||
|
|
||||||
ldrsh r2, [lr, # 2] @ r2 = 'd2'
|
ldrsh r2, [lr, # 2] @ r2 = 'd2'
|
||||||
ldrsh r4, [lr, # 4] @ r4 = 'd4'
|
ldrsh r4, [lr, # 4] @ r4 = 'd4'
|
||||||
ldrsh r6, [lr, # 6] @ r6 = 'd6'
|
ldrsh r6, [lr, # 6] @ r6 = 'd6'
|
||||||
|
|
||||||
ldr r3, [r11, #FIX_0_541196100_ID]
|
ldr r3, [r11, #FIX_0_541196100_ID]
|
||||||
add r7, r2, r6
|
add r7, r2, r6
|
||||||
ldr r5, [r11, #FIX_M_1_847759065_ID]
|
ldr r5, [r11, #FIX_M_1_847759065_ID]
|
||||||
mul r7, r3, r7 @ r7 = z1
|
mul r7, r3, r7 @ r7 = z1
|
||||||
ldr r3, [r11, #FIX_0_765366865_ID]
|
ldr r3, [r11, #FIX_0_765366865_ID]
|
||||||
mla r6, r5, r6, r7 @ r6 = tmp2
|
mla r6, r5, r6, r7 @ r6 = tmp2
|
||||||
add r5, r0, r4 @ r5 = tmp0
|
add r5, r0, r4 @ r5 = tmp0
|
||||||
mla r2, r3, r2, r7 @ r2 = tmp3
|
mla r2, r3, r2, r7 @ r2 = tmp3
|
||||||
sub r3, r0, r4 @ r3 = tmp1
|
sub r3, r0, r4 @ r3 = tmp1
|
||||||
|
|
||||||
add r0, r2, r5, lsl #13 @ r0 = tmp10
|
add r0, r2, r5, lsl #13 @ r0 = tmp10
|
||||||
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
|
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
|
||||||
add r4, r6, r3, lsl #13 @ r4 = tmp11
|
add r4, r6, r3, lsl #13 @ r4 = tmp11
|
||||||
rsb r3, r6, r3, lsl #13 @ r3 = tmp12
|
rsb r3, r6, r3, lsl #13 @ r3 = tmp12
|
||||||
|
|
||||||
stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
|
stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
|
||||||
|
|
||||||
ldrsh r3, [lr, #10] @ r3 = 'd3'
|
ldrsh r3, [lr, #10] @ r3 = 'd3'
|
||||||
ldrsh r5, [lr, #12] @ r5 = 'd5'
|
ldrsh r5, [lr, #12] @ r5 = 'd5'
|
||||||
ldrsh r7, [lr, #14] @ r7 = 'd7'
|
ldrsh r7, [lr, #14] @ r7 = 'd7'
|
||||||
|
|
||||||
add r0, r3, r5 @ r0 = 'z2'
|
add r0, r3, r5 @ r0 = 'z2'
|
||||||
add r2, r1, r7 @ r2 = 'z1'
|
add r2, r1, r7 @ r2 = 'z1'
|
||||||
add r4, r3, r7 @ r4 = 'z3'
|
add r4, r3, r7 @ r4 = 'z3'
|
||||||
add r6, r1, r5 @ r6 = 'z4'
|
add r6, r1, r5 @ r6 = 'z4'
|
||||||
ldr r9, [r11, #FIX_1_175875602_ID]
|
ldr r9, [r11, #FIX_1_175875602_ID]
|
||||||
add r8, r4, r6 @ r8 = z3 + z4
|
add r8, r4, r6 @ r8 = z3 + z4
|
||||||
ldr r10, [r11, #FIX_M_0_899976223_ID]
|
ldr r10, [r11, #FIX_M_0_899976223_ID]
|
||||||
mul r8, r9, r8 @ r8 = 'z5'
|
mul r8, r9, r8 @ r8 = 'z5'
|
||||||
ldr r9, [r11, #FIX_M_2_562915447_ID]
|
ldr r9, [r11, #FIX_M_2_562915447_ID]
|
||||||
mul r2, r10, r2 @ r2 = 'z1'
|
mul r2, r10, r2 @ r2 = 'z1'
|
||||||
ldr r10, [r11, #FIX_M_1_961570560_ID]
|
ldr r10, [r11, #FIX_M_1_961570560_ID]
|
||||||
mul r0, r9, r0 @ r0 = 'z2'
|
mul r0, r9, r0 @ r0 = 'z2'
|
||||||
ldr r9, [r11, #FIX_M_0_390180644_ID]
|
ldr r9, [r11, #FIX_M_0_390180644_ID]
|
||||||
mla r4, r10, r4, r8 @ r4 = 'z3'
|
mla r4, r10, r4, r8 @ r4 = 'z3'
|
||||||
ldr r10, [r11, #FIX_0_298631336_ID]
|
ldr r10, [r11, #FIX_0_298631336_ID]
|
||||||
mla r6, r9, r6, r8 @ r6 = 'z4'
|
mla r6, r9, r6, r8 @ r6 = 'z4'
|
||||||
ldr r9, [r11, #FIX_2_053119869_ID]
|
ldr r9, [r11, #FIX_2_053119869_ID]
|
||||||
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
|
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
|
||||||
ldr r10, [r11, #FIX_3_072711026_ID]
|
ldr r10, [r11, #FIX_3_072711026_ID]
|
||||||
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
|
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
|
||||||
ldr r9, [r11, #FIX_1_501321110_ID]
|
ldr r9, [r11, #FIX_1_501321110_ID]
|
||||||
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
|
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
|
||||||
add r7, r7, r4 @ r7 = tmp0
|
add r7, r7, r4 @ r7 = tmp0
|
||||||
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
|
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
|
||||||
add r5, r5, r6 @ r5 = tmp1
|
add r5, r5, r6 @ r5 = tmp1
|
||||||
add r3, r3, r4 @ r3 = tmp2
|
add r3, r3, r4 @ r3 = tmp2
|
||||||
add r1, r1, r6 @ r1 = tmp3
|
add r1, r1, r6 @ r1 = tmp3
|
||||||
|
|
||||||
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
|
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
|
||||||
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
|
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
|
||||||
|
|
||||||
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
|
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
|
||||||
add r8, r0, r1
|
add r8, r0, r1
|
||||||
add r8, r8, #(1<<10)
|
add r8, r8, #(1<<10)
|
||||||
mov r8, r8, asr #11
|
mov r8, r8, asr #11
|
||||||
strh r8, [lr, # 0]
|
strh r8, [lr, # 0]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
|
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
|
||||||
sub r8, r0, r1
|
sub r8, r0, r1
|
||||||
add r8, r8, #(1<<10)
|
add r8, r8, #(1<<10)
|
||||||
mov r8, r8, asr #11
|
mov r8, r8, asr #11
|
||||||
strh r8, [lr, #14]
|
strh r8, [lr, #14]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
|
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
|
||||||
add r8, r6, r3
|
add r8, r6, r3
|
||||||
add r8, r8, #(1<<10)
|
add r8, r8, #(1<<10)
|
||||||
mov r8, r8, asr #11
|
mov r8, r8, asr #11
|
||||||
strh r8, [lr, # 2]
|
strh r8, [lr, # 2]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
|
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
|
||||||
sub r8, r6, r3
|
sub r8, r6, r3
|
||||||
add r8, r8, #(1<<10)
|
add r8, r8, #(1<<10)
|
||||||
mov r8, r8, asr #11
|
mov r8, r8, asr #11
|
||||||
strh r8, [lr, #12]
|
strh r8, [lr, #12]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
|
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
|
||||||
add r8, r4, r5
|
add r8, r4, r5
|
||||||
add r8, r8, #(1<<10)
|
add r8, r8, #(1<<10)
|
||||||
mov r8, r8, asr #11
|
mov r8, r8, asr #11
|
||||||
strh r8, [lr, # 4]
|
strh r8, [lr, # 4]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
|
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
|
||||||
sub r8, r4, r5
|
sub r8, r4, r5
|
||||||
add r8, r8, #(1<<10)
|
add r8, r8, #(1<<10)
|
||||||
mov r8, r8, asr #11
|
mov r8, r8, asr #11
|
||||||
strh r8, [lr, #10]
|
strh r8, [lr, #10]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
|
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
|
||||||
add r8, r2, r7
|
add r8, r2, r7
|
||||||
add r8, r8, #(1<<10)
|
add r8, r8, #(1<<10)
|
||||||
mov r8, r8, asr #11
|
mov r8, r8, asr #11
|
||||||
strh r8, [lr, # 6]
|
strh r8, [lr, # 6]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
|
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
|
||||||
sub r8, r2, r7
|
sub r8, r2, r7
|
||||||
add r8, r8, #(1<<10)
|
add r8, r8, #(1<<10)
|
||||||
mov r8, r8, asr #11
|
mov r8, r8, asr #11
|
||||||
strh r8, [lr, # 8]
|
strh r8, [lr, # 8]
|
||||||
|
|
||||||
@ End of row loop
|
@ End of row loop
|
||||||
add lr, lr, #16
|
add lr, lr, #16
|
||||||
subs r12, r12, #1
|
subs r12, r12, #1
|
||||||
bne row_loop
|
bne row_loop
|
||||||
beq start_column_loop
|
beq start_column_loop
|
||||||
|
|
||||||
empty_row:
|
empty_row:
|
||||||
ldr r1, [r11, #FIX_0xFFFF_ID]
|
ldr r1, [r11, #FIX_0xFFFF_ID]
|
||||||
mov r0, r0, lsl #2
|
mov r0, r0, lsl #2
|
||||||
and r0, r0, r1
|
and r0, r0, r1
|
||||||
add r0, r0, r0, lsl #16
|
add r0, r0, r0, lsl #16
|
||||||
str r0, [lr, # 0]
|
str r0, [lr, # 0]
|
||||||
str r0, [lr, # 4]
|
str r0, [lr, # 4]
|
||||||
str r0, [lr, # 8]
|
str r0, [lr, # 8]
|
||||||
str r0, [lr, #12]
|
str r0, [lr, #12]
|
||||||
|
|
||||||
end_of_row_loop:
|
end_of_row_loop:
|
||||||
@ End of loop
|
@ End of loop
|
||||||
add lr, lr, #16
|
add lr, lr, #16
|
||||||
subs r12, r12, #1
|
subs r12, r12, #1
|
||||||
bne row_loop
|
bne row_loop
|
||||||
|
|
||||||
start_column_loop:
|
start_column_loop:
|
||||||
@ Start of column loop
|
@ Start of column loop
|
||||||
ldr lr, [ sp ]
|
ldr lr, [ sp ]
|
||||||
mov r12, #8
|
mov r12, #8
|
||||||
column_loop:
|
column_loop:
|
||||||
ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0'
|
ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0'
|
||||||
ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2'
|
ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2'
|
||||||
ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4'
|
ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4'
|
||||||
ldrsh r6, [lr, #(12*8)] @ r6 = 'd6'
|
ldrsh r6, [lr, #(12*8)] @ r6 = 'd6'
|
||||||
|
|
||||||
ldr r3, [r11, #FIX_0_541196100_ID]
|
ldr r3, [r11, #FIX_0_541196100_ID]
|
||||||
add r1, r2, r6
|
add r1, r2, r6
|
||||||
ldr r5, [r11, #FIX_M_1_847759065_ID]
|
ldr r5, [r11, #FIX_M_1_847759065_ID]
|
||||||
mul r1, r3, r1 @ r1 = z1
|
mul r1, r3, r1 @ r1 = z1
|
||||||
ldr r3, [r11, #FIX_0_765366865_ID]
|
ldr r3, [r11, #FIX_0_765366865_ID]
|
||||||
mla r6, r5, r6, r1 @ r6 = tmp2
|
mla r6, r5, r6, r1 @ r6 = tmp2
|
||||||
add r5, r0, r4 @ r5 = tmp0
|
add r5, r0, r4 @ r5 = tmp0
|
||||||
mla r2, r3, r2, r1 @ r2 = tmp3
|
mla r2, r3, r2, r1 @ r2 = tmp3
|
||||||
sub r3, r0, r4 @ r3 = tmp1
|
sub r3, r0, r4 @ r3 = tmp1
|
||||||
|
|
||||||
add r0, r2, r5, lsl #13 @ r0 = tmp10
|
add r0, r2, r5, lsl #13 @ r0 = tmp10
|
||||||
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
|
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
|
||||||
add r4, r6, r3, lsl #13 @ r4 = tmp11
|
add r4, r6, r3, lsl #13 @ r4 = tmp11
|
||||||
rsb r6, r6, r3, lsl #13 @ r6 = tmp12
|
rsb r6, r6, r3, lsl #13 @ r6 = tmp12
|
||||||
|
|
||||||
ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1'
|
ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1'
|
||||||
ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3'
|
ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3'
|
||||||
ldrsh r5, [lr, #(10*8)] @ r5 = 'd5'
|
ldrsh r5, [lr, #(10*8)] @ r5 = 'd5'
|
||||||
ldrsh r7, [lr, #(14*8)] @ r7 = 'd7'
|
ldrsh r7, [lr, #(14*8)] @ r7 = 'd7'
|
||||||
|
|
||||||
@ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
|
@ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
|
||||||
orr r9, r1, r3
|
orr r9, r1, r3
|
||||||
orr r10, r5, r7
|
orr r10, r5, r7
|
||||||
orrs r10, r9, r10
|
orrs r10, r9, r10
|
||||||
beq empty_odd_column
|
beq empty_odd_column
|
||||||
|
|
||||||
stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
|
stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
|
||||||
|
|
||||||
add r0, r3, r5 @ r0 = 'z2'
|
add r0, r3, r5 @ r0 = 'z2'
|
||||||
add r2, r1, r7 @ r2 = 'z1'
|
add r2, r1, r7 @ r2 = 'z1'
|
||||||
add r4, r3, r7 @ r4 = 'z3'
|
add r4, r3, r7 @ r4 = 'z3'
|
||||||
add r6, r1, r5 @ r6 = 'z4'
|
add r6, r1, r5 @ r6 = 'z4'
|
||||||
ldr r9, [r11, #FIX_1_175875602_ID]
|
ldr r9, [r11, #FIX_1_175875602_ID]
|
||||||
add r8, r4, r6
|
add r8, r4, r6
|
||||||
ldr r10, [r11, #FIX_M_0_899976223_ID]
|
ldr r10, [r11, #FIX_M_0_899976223_ID]
|
||||||
mul r8, r9, r8 @ r8 = 'z5'
|
mul r8, r9, r8 @ r8 = 'z5'
|
||||||
ldr r9, [r11, #FIX_M_2_562915447_ID]
|
ldr r9, [r11, #FIX_M_2_562915447_ID]
|
||||||
mul r2, r10, r2 @ r2 = 'z1'
|
mul r2, r10, r2 @ r2 = 'z1'
|
||||||
ldr r10, [r11, #FIX_M_1_961570560_ID]
|
ldr r10, [r11, #FIX_M_1_961570560_ID]
|
||||||
mul r0, r9, r0 @ r0 = 'z2'
|
mul r0, r9, r0 @ r0 = 'z2'
|
||||||
ldr r9, [r11, #FIX_M_0_390180644_ID]
|
ldr r9, [r11, #FIX_M_0_390180644_ID]
|
||||||
mla r4, r10, r4, r8 @ r4 = 'z3'
|
mla r4, r10, r4, r8 @ r4 = 'z3'
|
||||||
ldr r10, [r11, #FIX_0_298631336_ID]
|
ldr r10, [r11, #FIX_0_298631336_ID]
|
||||||
mla r6, r9, r6, r8 @ r6 = 'z4'
|
mla r6, r9, r6, r8 @ r6 = 'z4'
|
||||||
ldr r9, [r11, #FIX_2_053119869_ID]
|
ldr r9, [r11, #FIX_2_053119869_ID]
|
||||||
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
|
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
|
||||||
ldr r10, [r11, #FIX_3_072711026_ID]
|
ldr r10, [r11, #FIX_3_072711026_ID]
|
||||||
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
|
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
|
||||||
ldr r9, [r11, #FIX_1_501321110_ID]
|
ldr r9, [r11, #FIX_1_501321110_ID]
|
||||||
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
|
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
|
||||||
add r7, r7, r4 @ r7 = tmp0
|
add r7, r7, r4 @ r7 = tmp0
|
||||||
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
|
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
|
||||||
add r5, r5, r6 @ r5 = tmp1
|
add r5, r5, r6 @ r5 = tmp1
|
||||||
add r3, r3, r4 @ r3 = tmp2
|
add r3, r3, r4 @ r3 = tmp2
|
||||||
add r1, r1, r6 @ r1 = tmp3
|
add r1, r1, r6 @ r1 = tmp3
|
||||||
|
|
||||||
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
|
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
|
||||||
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
|
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
|
||||||
|
|
||||||
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
|
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
|
||||||
add r8, r0, r1
|
add r8, r0, r1
|
||||||
add r8, r8, #(1<<17)
|
add r8, r8, #(1<<17)
|
||||||
mov r8, r8, asr #18
|
mov r8, r8, asr #18
|
||||||
strh r8, [lr, #( 0*8)]
|
strh r8, [lr, #( 0*8)]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
|
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
|
||||||
sub r8, r0, r1
|
sub r8, r0, r1
|
||||||
add r8, r8, #(1<<17)
|
add r8, r8, #(1<<17)
|
||||||
mov r8, r8, asr #18
|
mov r8, r8, asr #18
|
||||||
strh r8, [lr, #(14*8)]
|
strh r8, [lr, #(14*8)]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
|
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
|
||||||
add r8, r4, r3
|
add r8, r4, r3
|
||||||
add r8, r8, #(1<<17)
|
add r8, r8, #(1<<17)
|
||||||
mov r8, r8, asr #18
|
mov r8, r8, asr #18
|
||||||
strh r8, [lr, #( 2*8)]
|
strh r8, [lr, #( 2*8)]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
|
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
|
||||||
sub r8, r4, r3
|
sub r8, r4, r3
|
||||||
add r8, r8, #(1<<17)
|
add r8, r8, #(1<<17)
|
||||||
mov r8, r8, asr #18
|
mov r8, r8, asr #18
|
||||||
strh r8, [lr, #(12*8)]
|
strh r8, [lr, #(12*8)]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
|
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
|
||||||
add r8, r6, r5
|
add r8, r6, r5
|
||||||
add r8, r8, #(1<<17)
|
add r8, r8, #(1<<17)
|
||||||
mov r8, r8, asr #18
|
mov r8, r8, asr #18
|
||||||
strh r8, [lr, #( 4*8)]
|
strh r8, [lr, #( 4*8)]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
|
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
|
||||||
sub r8, r6, r5
|
sub r8, r6, r5
|
||||||
add r8, r8, #(1<<17)
|
add r8, r8, #(1<<17)
|
||||||
mov r8, r8, asr #18
|
mov r8, r8, asr #18
|
||||||
strh r8, [lr, #(10*8)]
|
strh r8, [lr, #(10*8)]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
|
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
|
||||||
add r8, r2, r7
|
add r8, r2, r7
|
||||||
add r8, r8, #(1<<17)
|
add r8, r8, #(1<<17)
|
||||||
mov r8, r8, asr #18
|
mov r8, r8, asr #18
|
||||||
strh r8, [lr, #( 6*8)]
|
strh r8, [lr, #( 6*8)]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
|
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
|
||||||
sub r8, r2, r7
|
sub r8, r2, r7
|
||||||
add r8, r8, #(1<<17)
|
add r8, r8, #(1<<17)
|
||||||
mov r8, r8, asr #18
|
mov r8, r8, asr #18
|
||||||
strh r8, [lr, #( 8*8)]
|
strh r8, [lr, #( 8*8)]
|
||||||
|
|
||||||
@ End of row loop
|
@ End of row loop
|
||||||
add lr, lr, #2
|
add lr, lr, #2
|
||||||
subs r12, r12, #1
|
subs r12, r12, #1
|
||||||
bne column_loop
|
bne column_loop
|
||||||
beq the_end
|
beq the_end
|
||||||
|
|
||||||
empty_odd_column:
|
empty_odd_column:
|
||||||
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
|
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
|
||||||
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
|
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
|
||||||
add r0, r0, #(1<<17)
|
add r0, r0, #(1<<17)
|
||||||
mov r0, r0, asr #18
|
mov r0, r0, asr #18
|
||||||
strh r0, [lr, #( 0*8)]
|
strh r0, [lr, #( 0*8)]
|
||||||
strh r0, [lr, #(14*8)]
|
strh r0, [lr, #(14*8)]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
|
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
|
||||||
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
|
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
|
||||||
add r4, r4, #(1<<17)
|
add r4, r4, #(1<<17)
|
||||||
mov r4, r4, asr #18
|
mov r4, r4, asr #18
|
||||||
strh r4, [lr, #( 2*8)]
|
strh r4, [lr, #( 2*8)]
|
||||||
strh r4, [lr, #(12*8)]
|
strh r4, [lr, #(12*8)]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
|
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
|
||||||
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
|
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
|
||||||
add r6, r6, #(1<<17)
|
add r6, r6, #(1<<17)
|
||||||
mov r6, r6, asr #18
|
mov r6, r6, asr #18
|
||||||
strh r6, [lr, #( 4*8)]
|
strh r6, [lr, #( 4*8)]
|
||||||
strh r6, [lr, #(10*8)]
|
strh r6, [lr, #(10*8)]
|
||||||
|
|
||||||
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
|
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
|
||||||
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
|
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
|
||||||
add r2, r2, #(1<<17)
|
add r2, r2, #(1<<17)
|
||||||
mov r2, r2, asr #18
|
mov r2, r2, asr #18
|
||||||
strh r2, [lr, #( 6*8)]
|
strh r2, [lr, #( 6*8)]
|
||||||
strh r2, [lr, #( 8*8)]
|
strh r2, [lr, #( 8*8)]
|
||||||
|
|
||||||
@ End of row loop
|
@ End of row loop
|
||||||
add lr, lr, #2
|
add lr, lr, #2
|
||||||
subs r12, r12, #1
|
subs r12, r12, #1
|
||||||
bne column_loop
|
bne column_loop
|
||||||
|
|
||||||
the_end:
|
the_end:
|
||||||
@ The end....
|
@ The end....
|
||||||
add sp, sp, #4
|
add sp, sp, #4
|
||||||
ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
|
ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
|
||||||
|
|
||||||
const_array:
|
const_array:
|
||||||
.align
|
.align
|
||||||
.word FIX_0_298631336
|
.word FIX_0_298631336
|
||||||
.word FIX_0_541196100
|
.word FIX_0_541196100
|
||||||
.word FIX_0_765366865
|
.word FIX_0_765366865
|
||||||
.word FIX_1_175875602
|
.word FIX_1_175875602
|
||||||
.word FIX_1_501321110
|
.word FIX_1_501321110
|
||||||
.word FIX_2_053119869
|
.word FIX_2_053119869
|
||||||
.word FIX_3_072711026
|
.word FIX_3_072711026
|
||||||
.word FIX_M_0_390180644
|
.word FIX_M_0_390180644
|
||||||
.word FIX_M_0_899976223
|
.word FIX_M_0_899976223
|
||||||
.word FIX_M_1_847759065
|
.word FIX_M_1_847759065
|
||||||
.word FIX_M_1_961570560
|
.word FIX_M_1_961570560
|
||||||
.word FIX_M_2_562915447
|
.word FIX_M_2_562915447
|
||||||
.word FIX_0xFFFF
|
.word FIX_0xFFFF
|
||||||
|
@ -51,9 +51,9 @@
|
|||||||
#define COL_SHIFTED_1 524288 /* 1<< (COL_SHIFT-1) */
|
#define COL_SHIFTED_1 524288 /* 1<< (COL_SHIFT-1) */
|
||||||
|
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.align
|
.align
|
||||||
.global simple_idct_ARM
|
.global simple_idct_ARM
|
||||||
|
|
||||||
simple_idct_ARM:
|
simple_idct_ARM:
|
||||||
@@ void simple_idct_ARM(int16_t *block)
|
@@ void simple_idct_ARM(int16_t *block)
|
||||||
@ -120,8 +120,8 @@ __b_evaluation:
|
|||||||
ldr r11, [r12, #offW7] @ R11=W7
|
ldr r11, [r12, #offW7] @ R11=W7
|
||||||
mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
||||||
mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
||||||
teq r2, #0 @ if null avoid muls
|
teq r2, #0 @ if null avoid muls
|
||||||
mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||||
rsbne r2, r2, #0 @ R2=-ROWr16[3]
|
rsbne r2, r2, #0 @ R2=-ROWr16[3]
|
||||||
mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||||
mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||||
@ -147,7 +147,7 @@ __b_evaluation:
|
|||||||
@@ MAC16(b3, -W1, row[7]);
|
@@ MAC16(b3, -W1, row[7]);
|
||||||
@@ MAC16(b1, -W5, row[7]);
|
@@ MAC16(b1, -W5, row[7]);
|
||||||
mov r3, r3, asr #16 @ R3=ROWr16[5]
|
mov r3, r3, asr #16 @ R3=ROWr16[5]
|
||||||
teq r3, #0 @ if null avoid muls
|
teq r3, #0 @ if null avoid muls
|
||||||
mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5]=b0
|
mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5]=b0
|
||||||
mov r4, r4, asr #16 @ R4=ROWr16[7]
|
mov r4, r4, asr #16 @ R4=ROWr16[7]
|
||||||
mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5]=b2
|
mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5]=b2
|
||||||
@ -155,7 +155,7 @@ __b_evaluation:
|
|||||||
rsbne r3, r3, #0 @ R3=-ROWr16[5]
|
rsbne r3, r3, #0 @ R3=-ROWr16[5]
|
||||||
mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5]=b1
|
mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5]=b1
|
||||||
@@ R3 is free now
|
@@ R3 is free now
|
||||||
teq r4, #0 @ if null avoid muls
|
teq r4, #0 @ if null avoid muls
|
||||||
mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7]=b0
|
mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7]=b0
|
||||||
mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7]=b2
|
mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7]=b2
|
||||||
rsbne r4, r4, #0 @ R4=-ROWr16[7]
|
rsbne r4, r4, #0 @ R4=-ROWr16[7]
|
||||||
@ -187,7 +187,7 @@ __a_evaluation:
|
|||||||
teq r2, #0
|
teq r2, #0
|
||||||
beq __end_bef_a_evaluation
|
beq __end_bef_a_evaluation
|
||||||
|
|
||||||
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
|
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
|
||||||
mul r11, r8, r4 @ R11=W2*ROWr16[2]
|
mul r11, r8, r4 @ R11=W2*ROWr16[2]
|
||||||
sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3)
|
sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3)
|
||||||
add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0)
|
add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0)
|
||||||
@ -203,7 +203,7 @@ __a_evaluation:
|
|||||||
@@ a2 -= W4*row[4]
|
@@ a2 -= W4*row[4]
|
||||||
@@ a3 += W4*row[4]
|
@@ a3 += W4*row[4]
|
||||||
ldrsh r11, [r14, #8] @ R11=ROWr16[4]
|
ldrsh r11, [r14, #8] @ R11=ROWr16[4]
|
||||||
teq r11, #0 @ if null avoid muls
|
teq r11, #0 @ if null avoid muls
|
||||||
mulne r11, r9, r11 @ R11=W4*ROWr16[4]
|
mulne r11, r9, r11 @ R11=W4*ROWr16[4]
|
||||||
@@ R9 is free now
|
@@ R9 is free now
|
||||||
ldrsh r9, [r14, #12] @ R9=ROWr16[6]
|
ldrsh r9, [r14, #12] @ R9=ROWr16[6]
|
||||||
@ -212,7 +212,7 @@ __a_evaluation:
|
|||||||
subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2)
|
subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2)
|
||||||
addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3)
|
addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3)
|
||||||
@@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
|
@@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
|
||||||
teq r9, #0 @ if null avoid muls
|
teq r9, #0 @ if null avoid muls
|
||||||
mulne r11, r10, r9 @ R11=W6*ROWr16[6]
|
mulne r11, r10, r9 @ R11=W6*ROWr16[6]
|
||||||
addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0)
|
addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0)
|
||||||
mulne r10, r8, r9 @ R10=W2*ROWr16[6]
|
mulne r10, r8, r9 @ R10=W2*ROWr16[6]
|
||||||
@ -294,165 +294,165 @@ __end_row_loop:
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ at this point, R0=block, R1-R11 (free)
|
@@ at this point, R0=block, R1-R11 (free)
|
||||||
@@ R12=__const_ptr_, R14=&block[n]
|
@@ R12=__const_ptr_, R14=&block[n]
|
||||||
add r14, r0, #14 @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block.
|
add r14, r0, #14 @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block.
|
||||||
__col_loop:
|
__col_loop:
|
||||||
|
|
||||||
__b_evaluation2:
|
__b_evaluation2:
|
||||||
@@ at this point, R0=block (temp), R1-R11 (free)
|
@@ at this point, R0=block (temp), R1-R11 (free)
|
||||||
@@ R12=__const_ptr_, R14=&block[n]
|
@@ R12=__const_ptr_, R14=&block[n]
|
||||||
@@ proceed with b0-b3 first, followed by a0-a3
|
@@ proceed with b0-b3 first, followed by a0-a3
|
||||||
@@ MUL16(b0, W1, col[8x1]);
|
@@ MUL16(b0, W1, col[8x1]);
|
||||||
@@ MUL16(b1, W3, col[8x1]);
|
@@ MUL16(b1, W3, col[8x1]);
|
||||||
@@ MUL16(b2, W5, col[8x1]);
|
@@ MUL16(b2, W5, col[8x1]);
|
||||||
@@ MUL16(b3, W7, col[8x1]);
|
@@ MUL16(b3, W7, col[8x1]);
|
||||||
@@ MAC16(b0, W3, col[8x3]);
|
@@ MAC16(b0, W3, col[8x3]);
|
||||||
@@ MAC16(b1, -W7, col[8x3]);
|
@@ MAC16(b1, -W7, col[8x3]);
|
||||||
@@ MAC16(b2, -W1, col[8x3]);
|
@@ MAC16(b2, -W1, col[8x3]);
|
||||||
@@ MAC16(b3, -W5, col[8x3]);
|
@@ MAC16(b3, -W5, col[8x3]);
|
||||||
ldr r8, [r12, #offW1] @ R8=W1
|
ldr r8, [r12, #offW1] @ R8=W1
|
||||||
ldrsh r7, [r14, #16]
|
ldrsh r7, [r14, #16]
|
||||||
mul r0, r8, r7 @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
mul r0, r8, r7 @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
||||||
ldr r9, [r12, #offW3] @ R9=W3
|
ldr r9, [r12, #offW3] @ R9=W3
|
||||||
ldr r10, [r12, #offW5] @ R10=W5
|
ldr r10, [r12, #offW5] @ R10=W5
|
||||||
mul r1, r9, r7 @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
mul r1, r9, r7 @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
||||||
ldr r11, [r12, #offW7] @ R11=W7
|
ldr r11, [r12, #offW7] @ R11=W7
|
||||||
mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
||||||
ldrsh r2, [r14, #48]
|
ldrsh r2, [r14, #48]
|
||||||
mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
||||||
teq r2, #0 @ if 0, then avoid muls
|
teq r2, #0 @ if 0, then avoid muls
|
||||||
mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||||
rsbne r2, r2, #0 @ R2=-ROWr16[3]
|
rsbne r2, r2, #0 @ R2=-ROWr16[3]
|
||||||
mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||||
mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||||
mlane r7, r10, r2, r7 @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
mlane r7, r10, r2, r7 @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||||
|
|
||||||
@@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free),
|
@@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free),
|
||||||
@@ R5=b2, R6 (free), R7=b3, R8=W1, R9=W3, R10=W5, R11=W7,
|
@@ R5=b2, R6 (free), R7=b3, R8=W1, R9=W3, R10=W5, R11=W7,
|
||||||
@@ R12=__const_ptr_, R14=&block[n]
|
@@ R12=__const_ptr_, R14=&block[n]
|
||||||
@@ MAC16(b0, W5, col[5x8]);
|
@@ MAC16(b0, W5, col[5x8]);
|
||||||
@@ MAC16(b2, W7, col[5x8]);
|
@@ MAC16(b2, W7, col[5x8]);
|
||||||
@@ MAC16(b3, W3, col[5x8]);
|
@@ MAC16(b3, W3, col[5x8]);
|
||||||
@@ MAC16(b1, -W1, col[5x8]);
|
@@ MAC16(b1, -W1, col[5x8]);
|
||||||
@@ MAC16(b0, W7, col[7x8]);
|
@@ MAC16(b0, W7, col[7x8]);
|
||||||
@@ MAC16(b2, W3, col[7x8]);
|
@@ MAC16(b2, W3, col[7x8]);
|
||||||
@@ MAC16(b3, -W1, col[7x8]);
|
@@ MAC16(b3, -W1, col[7x8]);
|
||||||
@@ MAC16(b1, -W5, col[7x8]);
|
@@ MAC16(b1, -W5, col[7x8]);
|
||||||
ldrsh r3, [r14, #80] @ R3=COLr16[5x8]
|
ldrsh r3, [r14, #80] @ R3=COLr16[5x8]
|
||||||
teq r3, #0 @ if 0 then avoid muls
|
teq r3, #0 @ if 0 then avoid muls
|
||||||
mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5x8]=b0
|
mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5x8]=b0
|
||||||
mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5x8]=b2
|
mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5x8]=b2
|
||||||
mlane r7, r9, r3, r7 @ R7+=W3*ROWr16[5x8]=b3
|
mlane r7, r9, r3, r7 @ R7+=W3*ROWr16[5x8]=b3
|
||||||
rsbne r3, r3, #0 @ R3=-ROWr16[5x8]
|
rsbne r3, r3, #0 @ R3=-ROWr16[5x8]
|
||||||
ldrsh r4, [r14, #112] @ R4=COLr16[7x8]
|
ldrsh r4, [r14, #112] @ R4=COLr16[7x8]
|
||||||
mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5x8]=b1
|
mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5x8]=b1
|
||||||
@@ R3 is free now
|
@@ R3 is free now
|
||||||
teq r4, #0 @ if 0 then avoid muls
|
teq r4, #0 @ if 0 then avoid muls
|
||||||
mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7x8]=b0
|
mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7x8]=b0
|
||||||
mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7x8]=b2
|
mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7x8]=b2
|
||||||
rsbne r4, r4, #0 @ R4=-ROWr16[7x8]
|
rsbne r4, r4, #0 @ R4=-ROWr16[7x8]
|
||||||
mlane r7, r8, r4, r7 @ R7-=W1*ROWr16[7x8]=b3
|
mlane r7, r8, r4, r7 @ R7-=W1*ROWr16[7x8]=b3
|
||||||
mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7x8]=b1
|
mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7x8]=b1
|
||||||
@@ R4 is free now
|
@@ R4 is free now
|
||||||
__end_b_evaluation2:
|
__end_b_evaluation2:
|
||||||
@@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free),
|
@@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free),
|
||||||
@@ R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
|
@@ R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
|
||||||
@@ R12=__const_ptr_, R14=&block[n]
|
@@ R12=__const_ptr_, R14=&block[n]
|
||||||
|
|
||||||
__a_evaluation2:
|
__a_evaluation2:
|
||||||
@@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1));
|
@@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1));
|
||||||
@@ a1 = a0 + W6 * row[2];
|
@@ a1 = a0 + W6 * row[2];
|
||||||
@@ a2 = a0 - W6 * row[2];
|
@@ a2 = a0 - W6 * row[2];
|
||||||
@@ a3 = a0 - W2 * row[2];
|
@@ a3 = a0 - W2 * row[2];
|
||||||
@@ a0 = a0 + W2 * row[2];
|
@@ a0 = a0 + W2 * row[2];
|
||||||
ldrsh r6, [r14, #0]
|
ldrsh r6, [r14, #0]
|
||||||
ldr r9, [r12, #offW4] @ R9=W4
|
ldr r9, [r12, #offW4] @ R9=W4
|
||||||
mul r6, r9, r6 @ R6=W4*ROWr16[0]
|
mul r6, r9, r6 @ R6=W4*ROWr16[0]
|
||||||
ldr r10, [r12, #offW6] @ R10=W6
|
ldr r10, [r12, #offW6] @ R10=W6
|
||||||
ldrsh r4, [r14, #32] @ R4=ROWr16[2] (a3 not defined yet)
|
ldrsh r4, [r14, #32] @ R4=ROWr16[2] (a3 not defined yet)
|
||||||
add r6, r6, #COL_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(COL_SHIFT-1) (a0)
|
add r6, r6, #COL_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(COL_SHIFT-1) (a0)
|
||||||
mul r11, r10, r4 @ R11=W6*ROWr16[2]
|
mul r11, r10, r4 @ R11=W6*ROWr16[2]
|
||||||
ldr r8, [r12, #offW2] @ R8=W2
|
ldr r8, [r12, #offW2] @ R8=W2
|
||||||
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
|
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
|
||||||
sub r3, r6, r11 @ R3=a0-W6*ROWr16[2] (a2)
|
sub r3, r6, r11 @ R3=a0-W6*ROWr16[2] (a2)
|
||||||
mul r11, r8, r4 @ R11=W2*ROWr16[2]
|
mul r11, r8, r4 @ R11=W2*ROWr16[2]
|
||||||
sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3)
|
sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3)
|
||||||
add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0)
|
add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0)
|
||||||
|
|
||||||
@@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3,
|
@@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3,
|
||||||
@@ R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free),
|
@@ R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free),
|
||||||
@@ R12=__const_ptr_, R14=&block[n]
|
@@ R12=__const_ptr_, R14=&block[n]
|
||||||
@@ a0 += W4*row[4]
|
@@ a0 += W4*row[4]
|
||||||
@@ a1 -= W4*row[4]
|
@@ a1 -= W4*row[4]
|
||||||
@@ a2 -= W4*row[4]
|
@@ a2 -= W4*row[4]
|
||||||
@@ a3 += W4*row[4]
|
@@ a3 += W4*row[4]
|
||||||
ldrsh r11, [r14, #64] @ R11=ROWr16[4]
|
ldrsh r11, [r14, #64] @ R11=ROWr16[4]
|
||||||
teq r11, #0 @ if null avoid muls
|
teq r11, #0 @ if null avoid muls
|
||||||
mulne r11, r9, r11 @ R11=W4*ROWr16[4]
|
mulne r11, r9, r11 @ R11=W4*ROWr16[4]
|
||||||
@@ R9 is free now
|
@@ R9 is free now
|
||||||
addne r6, r6, r11 @ R6+=W4*ROWr16[4] (a0)
|
addne r6, r6, r11 @ R6+=W4*ROWr16[4] (a0)
|
||||||
subne r2, r2, r11 @ R2-=W4*ROWr16[4] (a1)
|
subne r2, r2, r11 @ R2-=W4*ROWr16[4] (a1)
|
||||||
subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2)
|
subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2)
|
||||||
ldrsh r9, [r14, #96] @ R9=ROWr16[6]
|
ldrsh r9, [r14, #96] @ R9=ROWr16[6]
|
||||||
addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3)
|
addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3)
|
||||||
@@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
|
@@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
|
||||||
teq r9, #0 @ if null avoid muls
|
teq r9, #0 @ if null avoid muls
|
||||||
mulne r11, r10, r9 @ R11=W6*ROWr16[6]
|
mulne r11, r10, r9 @ R11=W6*ROWr16[6]
|
||||||
addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0)
|
addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0)
|
||||||
mulne r10, r8, r9 @ R10=W2*ROWr16[6]
|
mulne r10, r8, r9 @ R10=W2*ROWr16[6]
|
||||||
@@ a0 += W6*row[6];
|
@@ a0 += W6*row[6];
|
||||||
@@ a3 -= W6*row[6];
|
@@ a3 -= W6*row[6];
|
||||||
@@ a1 -= W2*row[6];
|
@@ a1 -= W2*row[6];
|
||||||
@@ a2 += W2*row[6];
|
@@ a2 += W2*row[6];
|
||||||
subne r4, r4, r11 @ R4-=W6*ROWr16[6] (a3)
|
subne r4, r4, r11 @ R4-=W6*ROWr16[6] (a3)
|
||||||
subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1)
|
subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1)
|
||||||
addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2)
|
addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2)
|
||||||
__end_a_evaluation2:
|
__end_a_evaluation2:
|
||||||
@@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3,
|
@@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3,
|
||||||
@@ R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
|
@@ R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
|
||||||
@@ R12=__const_ptr_, R14=&block[n]
|
@@ R12=__const_ptr_, R14=&block[n]
|
||||||
@@ col[0 ] = ((a0 + b0) >> COL_SHIFT);
|
@@ col[0 ] = ((a0 + b0) >> COL_SHIFT);
|
||||||
@@ col[8 ] = ((a1 + b1) >> COL_SHIFT);
|
@@ col[8 ] = ((a1 + b1) >> COL_SHIFT);
|
||||||
@@ col[16] = ((a2 + b2) >> COL_SHIFT);
|
@@ col[16] = ((a2 + b2) >> COL_SHIFT);
|
||||||
@@ col[24] = ((a3 + b3) >> COL_SHIFT);
|
@@ col[24] = ((a3 + b3) >> COL_SHIFT);
|
||||||
@@ col[32] = ((a3 - b3) >> COL_SHIFT);
|
@@ col[32] = ((a3 - b3) >> COL_SHIFT);
|
||||||
@@ col[40] = ((a2 - b2) >> COL_SHIFT);
|
@@ col[40] = ((a2 - b2) >> COL_SHIFT);
|
||||||
@@ col[48] = ((a1 - b1) >> COL_SHIFT);
|
@@ col[48] = ((a1 - b1) >> COL_SHIFT);
|
||||||
@@ col[56] = ((a0 - b0) >> COL_SHIFT);
|
@@ col[56] = ((a0 - b0) >> COL_SHIFT);
|
||||||
@@@@@ no optimisation here @@@@@
|
@@@@@ no optimisation here @@@@@
|
||||||
add r8, r6, r0 @ R8=a0+b0
|
add r8, r6, r0 @ R8=a0+b0
|
||||||
add r9, r2, r1 @ R9=a1+b1
|
add r9, r2, r1 @ R9=a1+b1
|
||||||
mov r8, r8, asr #COL_SHIFT
|
mov r8, r8, asr #COL_SHIFT
|
||||||
mov r9, r9, asr #COL_SHIFT
|
mov r9, r9, asr #COL_SHIFT
|
||||||
strh r8, [r14, #0]
|
strh r8, [r14, #0]
|
||||||
strh r9, [r14, #16]
|
strh r9, [r14, #16]
|
||||||
add r8, r3, r5 @ R8=a2+b2
|
add r8, r3, r5 @ R8=a2+b2
|
||||||
add r9, r4, r7 @ R9=a3+b3
|
add r9, r4, r7 @ R9=a3+b3
|
||||||
mov r8, r8, asr #COL_SHIFT
|
mov r8, r8, asr #COL_SHIFT
|
||||||
mov r9, r9, asr #COL_SHIFT
|
mov r9, r9, asr #COL_SHIFT
|
||||||
strh r8, [r14, #32]
|
strh r8, [r14, #32]
|
||||||
strh r9, [r14, #48]
|
strh r9, [r14, #48]
|
||||||
sub r8, r4, r7 @ R8=a3-b3
|
sub r8, r4, r7 @ R8=a3-b3
|
||||||
sub r9, r3, r5 @ R9=a2-b2
|
sub r9, r3, r5 @ R9=a2-b2
|
||||||
mov r8, r8, asr #COL_SHIFT
|
mov r8, r8, asr #COL_SHIFT
|
||||||
mov r9, r9, asr #COL_SHIFT
|
mov r9, r9, asr #COL_SHIFT
|
||||||
strh r8, [r14, #64]
|
strh r8, [r14, #64]
|
||||||
strh r9, [r14, #80]
|
strh r9, [r14, #80]
|
||||||
sub r8, r2, r1 @ R8=a1-b1
|
sub r8, r2, r1 @ R8=a1-b1
|
||||||
sub r9, r6, r0 @ R9=a0-b0
|
sub r9, r6, r0 @ R9=a0-b0
|
||||||
mov r8, r8, asr #COL_SHIFT
|
mov r8, r8, asr #COL_SHIFT
|
||||||
mov r9, r9, asr #COL_SHIFT
|
mov r9, r9, asr #COL_SHIFT
|
||||||
strh r8, [r14, #96]
|
strh r8, [r14, #96]
|
||||||
strh r9, [r14, #112]
|
strh r9, [r14, #112]
|
||||||
|
|
||||||
__end_col_loop:
|
__end_col_loop:
|
||||||
@@ at this point, R0-R11 (free)
|
@@ at this point, R0-R11 (free)
|
||||||
@@ R12=__const_ptr_, R14=&block[n]
|
@@ R12=__const_ptr_, R14=&block[n]
|
||||||
ldr r0, [sp, #0] @ R0=block
|
ldr r0, [sp, #0] @ R0=block
|
||||||
teq r0, r14 @ compare current &block[n] to block, when block is reached, the loop is finished.
|
teq r0, r14 @ compare current &block[n] to block, when block is reached, the loop is finished.
|
||||||
sub r14, r14, #2
|
sub r14, r14, #2
|
||||||
bne __col_loop
|
bne __col_loop
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -466,15 +466,15 @@ __end_simple_idct_ARM:
|
|||||||
|
|
||||||
@@ kind of sub-function, here not to overload the common case.
|
@@ kind of sub-function, here not to overload the common case.
|
||||||
__end_bef_a_evaluation:
|
__end_bef_a_evaluation:
|
||||||
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
|
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
|
||||||
mul r11, r8, r4 @ R11=W2*ROWr16[2]
|
mul r11, r8, r4 @ R11=W2*ROWr16[2]
|
||||||
sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3)
|
sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3)
|
||||||
add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0)
|
add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0)
|
||||||
bal __end_a_evaluation
|
bal __end_a_evaluation
|
||||||
|
|
||||||
|
|
||||||
__constant_ptr__: @@ see #defines at the beginning of the source code for values.
|
__constant_ptr__: @@ see #defines at the beginning of the source code for values.
|
||||||
.align
|
.align
|
||||||
.word W1
|
.word W1
|
||||||
.word W2
|
.word W2
|
||||||
.word W3
|
.word W3
|
||||||
|
@ -15,21 +15,21 @@ extern "C" {
|
|||||||
#include <sys/types.h> /* size_t */
|
#include <sys/types.h> /* size_t */
|
||||||
|
|
||||||
//FIXME the following 2 really dont belong in here
|
//FIXME the following 2 really dont belong in here
|
||||||
#define FFMPEG_VERSION_INT 0x000409
|
#define FFMPEG_VERSION_INT 0x000409
|
||||||
#define FFMPEG_VERSION "CVS"
|
#define FFMPEG_VERSION "CVS"
|
||||||
|
|
||||||
#define AV_STRINGIFY(s) AV_TOSTRING(s)
|
#define AV_STRINGIFY(s) AV_TOSTRING(s)
|
||||||
#define AV_TOSTRING(s) #s
|
#define AV_TOSTRING(s) #s
|
||||||
|
|
||||||
#define LIBAVCODEC_VERSION_INT ((51<<16)+(0<<8)+0)
|
#define LIBAVCODEC_VERSION_INT ((51<<16)+(0<<8)+0)
|
||||||
#define LIBAVCODEC_VERSION 51.0.0
|
#define LIBAVCODEC_VERSION 51.0.0
|
||||||
#define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT
|
#define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT
|
||||||
|
|
||||||
#define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
|
#define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
|
||||||
|
|
||||||
#define AV_NOPTS_VALUE int64_t_C(0x8000000000000000)
|
#define AV_NOPTS_VALUE int64_t_C(0x8000000000000000)
|
||||||
#define AV_TIME_BASE 1000000
|
#define AV_TIME_BASE 1000000
|
||||||
#define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE}
|
#define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE}
|
||||||
|
|
||||||
enum CodecID {
|
enum CodecID {
|
||||||
CODEC_ID_NONE,
|
CODEC_ID_NONE,
|
||||||
@ -362,9 +362,9 @@ extern int motion_estimation_method;
|
|||||||
#define CODEC_FLAG2_LOCAL_HEADER 0x00000008 ///< place global headers at every keyframe instead of in extradata
|
#define CODEC_FLAG2_LOCAL_HEADER 0x00000008 ///< place global headers at every keyframe instead of in extradata
|
||||||
|
|
||||||
/* Unsupported options :
|
/* Unsupported options :
|
||||||
* Syntax Arithmetic coding (SAC)
|
* Syntax Arithmetic coding (SAC)
|
||||||
* Reference Picture Selection
|
* Reference Picture Selection
|
||||||
* Independant Segment Decoding */
|
* Independant Segment Decoding */
|
||||||
/* /Fx */
|
/* /Fx */
|
||||||
/* codec capabilities */
|
/* codec capabilities */
|
||||||
|
|
||||||
@ -646,9 +646,9 @@ typedef struct AVPanScan{
|
|||||||
*/\
|
*/\
|
||||||
int8_t *ref_index[2];
|
int8_t *ref_index[2];
|
||||||
|
|
||||||
#define FF_QSCALE_TYPE_MPEG1 0
|
#define FF_QSCALE_TYPE_MPEG1 0
|
||||||
#define FF_QSCALE_TYPE_MPEG2 1
|
#define FF_QSCALE_TYPE_MPEG2 1
|
||||||
#define FF_QSCALE_TYPE_H264 2
|
#define FF_QSCALE_TYPE_H264 2
|
||||||
|
|
||||||
#define FF_BUFFER_TYPE_INTERNAL 1
|
#define FF_BUFFER_TYPE_INTERNAL 1
|
||||||
#define FF_BUFFER_TYPE_USER 2 ///< Direct rendering buffers (image is (de)allocated by user)
|
#define FF_BUFFER_TYPE_USER 2 ///< Direct rendering buffers (image is (de)allocated by user)
|
||||||
@ -684,9 +684,9 @@ typedef struct AVCLASS AVClass;
|
|||||||
struct AVCLASS {
|
struct AVCLASS {
|
||||||
const char* class_name;
|
const char* class_name;
|
||||||
const char* (*item_name)(void*); /* actually passing a pointer to an AVCodecContext
|
const char* (*item_name)(void*); /* actually passing a pointer to an AVCodecContext
|
||||||
or AVFormatContext, which begin with an AVClass.
|
or AVFormatContext, which begin with an AVClass.
|
||||||
Needed because av_log is in libavcodec and has no visibility
|
Needed because av_log is in libavcodec and has no visibility
|
||||||
of AVIn/OutputFormat */
|
of AVIn/OutputFormat */
|
||||||
struct AVOption *option;
|
struct AVOption *option;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1252,18 +1252,18 @@ typedef struct AVCodecContext {
|
|||||||
* result into program crash)
|
* result into program crash)
|
||||||
*/
|
*/
|
||||||
unsigned dsp_mask;
|
unsigned dsp_mask;
|
||||||
#define FF_MM_FORCE 0x80000000 /* force usage of selected flags (OR) */
|
#define FF_MM_FORCE 0x80000000 /* force usage of selected flags (OR) */
|
||||||
/* lower 16 bits - CPU features */
|
/* lower 16 bits - CPU features */
|
||||||
#ifdef HAVE_MMX
|
#ifdef HAVE_MMX
|
||||||
#define FF_MM_MMX 0x0001 /* standard MMX */
|
#define FF_MM_MMX 0x0001 /* standard MMX */
|
||||||
#define FF_MM_3DNOW 0x0004 /* AMD 3DNOW */
|
#define FF_MM_3DNOW 0x0004 /* AMD 3DNOW */
|
||||||
#define FF_MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
|
#define FF_MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
|
||||||
#define FF_MM_SSE 0x0008 /* SSE functions */
|
#define FF_MM_SSE 0x0008 /* SSE functions */
|
||||||
#define FF_MM_SSE2 0x0010 /* PIV SSE2 functions */
|
#define FF_MM_SSE2 0x0010 /* PIV SSE2 functions */
|
||||||
#define FF_MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */
|
#define FF_MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */
|
||||||
#endif /* HAVE_MMX */
|
#endif /* HAVE_MMX */
|
||||||
#ifdef HAVE_IWMMXT
|
#ifdef HAVE_IWMMXT
|
||||||
#define FF_MM_IWMMXT 0x0100 /* XScale IWMMXT */
|
#define FF_MM_IWMMXT 0x0100 /* XScale IWMMXT */
|
||||||
#endif /* HAVE_IWMMXT */
|
#endif /* HAVE_IWMMXT */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -2223,7 +2223,7 @@ int avcodec_find_best_pix_fmt(int pix_fmt_mask, int src_pix_fmt,
|
|||||||
#define FF_ALPHA_TRANSP 0x0001 /* image has some totally transparent pixels */
|
#define FF_ALPHA_TRANSP 0x0001 /* image has some totally transparent pixels */
|
||||||
#define FF_ALPHA_SEMI_TRANSP 0x0002 /* image has some transparent pixels */
|
#define FF_ALPHA_SEMI_TRANSP 0x0002 /* image has some transparent pixels */
|
||||||
int img_get_alpha_info(const AVPicture *src,
|
int img_get_alpha_info(const AVPicture *src,
|
||||||
int pix_fmt, int width, int height);
|
int pix_fmt, int width, int height);
|
||||||
|
|
||||||
/* convert among pixel formats */
|
/* convert among pixel formats */
|
||||||
int img_convert(AVPicture *dst, int dst_pix_fmt,
|
int img_convert(AVPicture *dst, int dst_pix_fmt,
|
||||||
|
@ -35,20 +35,20 @@ typedef struct ThreadContext{
|
|||||||
|
|
||||||
// it's odd Be never patented that :D
|
// it's odd Be never patented that :D
|
||||||
struct benaphore {
|
struct benaphore {
|
||||||
vint32 atom;
|
vint32 atom;
|
||||||
sem_id sem;
|
sem_id sem;
|
||||||
};
|
};
|
||||||
static inline int lock_ben(struct benaphore *ben)
|
static inline int lock_ben(struct benaphore *ben)
|
||||||
{
|
{
|
||||||
if (atomic_add(&ben->atom, 1) > 0)
|
if (atomic_add(&ben->atom, 1) > 0)
|
||||||
return acquire_sem(ben->sem);
|
return acquire_sem(ben->sem);
|
||||||
return B_OK;
|
return B_OK;
|
||||||
}
|
}
|
||||||
static inline int unlock_ben(struct benaphore *ben)
|
static inline int unlock_ben(struct benaphore *ben)
|
||||||
{
|
{
|
||||||
if (atomic_add(&ben->atom, -1) > 1)
|
if (atomic_add(&ben->atom, -1) > 1)
|
||||||
return release_sem(ben->sem);
|
return release_sem(ben->sem);
|
||||||
return B_OK;
|
return B_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct benaphore av_thread_lib_ben;
|
static struct benaphore av_thread_lib_ben;
|
||||||
@ -155,25 +155,25 @@ fail:
|
|||||||
|
|
||||||
int avcodec_thread_lock_lib(void)
|
int avcodec_thread_lock_lib(void)
|
||||||
{
|
{
|
||||||
return lock_ben(&av_thread_lib_ben);
|
return lock_ben(&av_thread_lib_ben);
|
||||||
}
|
}
|
||||||
|
|
||||||
int avcodec_thread_unlock_lib(void)
|
int avcodec_thread_unlock_lib(void)
|
||||||
{
|
{
|
||||||
return unlock_ben(&av_thread_lib_ben);
|
return unlock_ben(&av_thread_lib_ben);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* our versions of _init and _fini (which are called by those actually from crt.o) */
|
/* our versions of _init and _fini (which are called by those actually from crt.o) */
|
||||||
|
|
||||||
void initialize_after(void)
|
void initialize_after(void)
|
||||||
{
|
{
|
||||||
av_thread_lib_ben.atom = 0;
|
av_thread_lib_ben.atom = 0;
|
||||||
av_thread_lib_ben.sem = create_sem(0, "libavcodec benaphore");
|
av_thread_lib_ben.sem = create_sem(0, "libavcodec benaphore");
|
||||||
}
|
}
|
||||||
|
|
||||||
void uninitialize_before(void)
|
void uninitialize_before(void)
|
||||||
{
|
{
|
||||||
delete_sem(av_thread_lib_ben.sem);
|
delete_sem(av_thread_lib_ben.sem);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -83,7 +83,7 @@ int check_marker(GetBitContext *s, const char *msg)
|
|||||||
{
|
{
|
||||||
int bit= get_bits1(s);
|
int bit= get_bits1(s);
|
||||||
if(!bit)
|
if(!bit)
|
||||||
av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg);
|
av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg);
|
||||||
|
|
||||||
return bit;
|
return bit;
|
||||||
}
|
}
|
||||||
|
@ -146,7 +146,7 @@ typedef struct RL_VLC_ELEM {
|
|||||||
# ifdef __GNUC__
|
# ifdef __GNUC__
|
||||||
static inline uint32_t unaligned32(const void *v) {
|
static inline uint32_t unaligned32(const void *v) {
|
||||||
struct Unaligned {
|
struct Unaligned {
|
||||||
uint32_t i;
|
uint32_t i;
|
||||||
} __attribute__((packed));
|
} __attribute__((packed));
|
||||||
|
|
||||||
return ((const struct Unaligned *) v)->i;
|
return ((const struct Unaligned *) v)->i;
|
||||||
@ -183,7 +183,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
|
|||||||
bit_buf = (bit_buf<<n) | value;
|
bit_buf = (bit_buf<<n) | value;
|
||||||
bit_left-=n;
|
bit_left-=n;
|
||||||
} else {
|
} else {
|
||||||
bit_buf<<=bit_left;
|
bit_buf<<=bit_left;
|
||||||
bit_buf |= value >> (n - bit_left);
|
bit_buf |= value >> (n - bit_left);
|
||||||
#ifdef UNALIGNED_STORES_ARE_BAD
|
#ifdef UNALIGNED_STORES_ARE_BAD
|
||||||
if (3 & (intptr_t) s->buf_ptr) {
|
if (3 & (intptr_t) s->buf_ptr) {
|
||||||
@ -196,7 +196,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
|
|||||||
*(uint32_t *)s->buf_ptr = be2me_32(bit_buf);
|
*(uint32_t *)s->buf_ptr = be2me_32(bit_buf);
|
||||||
//printf("bitbuf = %08x\n", bit_buf);
|
//printf("bitbuf = %08x\n", bit_buf);
|
||||||
s->buf_ptr+=4;
|
s->buf_ptr+=4;
|
||||||
bit_left+=32 - n;
|
bit_left+=32 - n;
|
||||||
bit_buf = value;
|
bit_buf = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -212,21 +212,21 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
|
|||||||
# ifdef ALIGNED_BITSTREAM_WRITER
|
# ifdef ALIGNED_BITSTREAM_WRITER
|
||||||
# if defined(ARCH_X86) || defined(ARCH_X86_64)
|
# if defined(ARCH_X86) || defined(ARCH_X86_64)
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movl %0, %%ecx \n\t"
|
"movl %0, %%ecx \n\t"
|
||||||
"xorl %%eax, %%eax \n\t"
|
"xorl %%eax, %%eax \n\t"
|
||||||
"shrdl %%cl, %1, %%eax \n\t"
|
"shrdl %%cl, %1, %%eax \n\t"
|
||||||
"shrl %%cl, %1 \n\t"
|
"shrl %%cl, %1 \n\t"
|
||||||
"movl %0, %%ecx \n\t"
|
"movl %0, %%ecx \n\t"
|
||||||
"shrl $3, %%ecx \n\t"
|
"shrl $3, %%ecx \n\t"
|
||||||
"andl $0xFFFFFFFC, %%ecx \n\t"
|
"andl $0xFFFFFFFC, %%ecx \n\t"
|
||||||
"bswapl %1 \n\t"
|
"bswapl %1 \n\t"
|
||||||
"orl %1, (%2, %%ecx) \n\t"
|
"orl %1, (%2, %%ecx) \n\t"
|
||||||
"bswapl %%eax \n\t"
|
"bswapl %%eax \n\t"
|
||||||
"addl %3, %0 \n\t"
|
"addl %3, %0 \n\t"
|
||||||
"movl %%eax, 4(%2, %%ecx) \n\t"
|
"movl %%eax, 4(%2, %%ecx) \n\t"
|
||||||
: "=&r" (s->index), "=&r" (value)
|
: "=&r" (s->index), "=&r" (value)
|
||||||
: "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
|
: "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
|
||||||
: "%eax", "%ecx"
|
: "%eax", "%ecx"
|
||||||
);
|
);
|
||||||
# else
|
# else
|
||||||
int index= s->index;
|
int index= s->index;
|
||||||
@ -243,20 +243,20 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
|
|||||||
# else //ALIGNED_BITSTREAM_WRITER
|
# else //ALIGNED_BITSTREAM_WRITER
|
||||||
# if defined(ARCH_X86) || defined(ARCH_X86_64)
|
# if defined(ARCH_X86) || defined(ARCH_X86_64)
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movl $7, %%ecx \n\t"
|
"movl $7, %%ecx \n\t"
|
||||||
"andl %0, %%ecx \n\t"
|
"andl %0, %%ecx \n\t"
|
||||||
"addl %3, %%ecx \n\t"
|
"addl %3, %%ecx \n\t"
|
||||||
"negl %%ecx \n\t"
|
"negl %%ecx \n\t"
|
||||||
"shll %%cl, %1 \n\t"
|
"shll %%cl, %1 \n\t"
|
||||||
"bswapl %1 \n\t"
|
"bswapl %1 \n\t"
|
||||||
"movl %0, %%ecx \n\t"
|
"movl %0, %%ecx \n\t"
|
||||||
"shrl $3, %%ecx \n\t"
|
"shrl $3, %%ecx \n\t"
|
||||||
"orl %1, (%%ecx, %2) \n\t"
|
"orl %1, (%%ecx, %2) \n\t"
|
||||||
"addl %3, %0 \n\t"
|
"addl %3, %0 \n\t"
|
||||||
"movl $0, 4(%%ecx, %2) \n\t"
|
"movl $0, 4(%%ecx, %2) \n\t"
|
||||||
: "=&r" (s->index), "=&r" (value)
|
: "=&r" (s->index), "=&r" (value)
|
||||||
: "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
|
: "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
|
||||||
: "%ecx"
|
: "%ecx"
|
||||||
);
|
);
|
||||||
# else
|
# else
|
||||||
int index= s->index;
|
int index= s->index;
|
||||||
@ -276,9 +276,9 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
|
|||||||
static inline uint8_t* pbBufPtr(PutBitContext *s)
|
static inline uint8_t* pbBufPtr(PutBitContext *s)
|
||||||
{
|
{
|
||||||
#ifdef ALT_BITSTREAM_WRITER
|
#ifdef ALT_BITSTREAM_WRITER
|
||||||
return s->buf + (s->index>>3);
|
return s->buf + (s->index>>3);
|
||||||
#else
|
#else
|
||||||
return s->buf_ptr;
|
return s->buf_ptr;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -290,10 +290,10 @@ static inline void skip_put_bytes(PutBitContext *s, int n){
|
|||||||
assert((put_bits_count(s)&7)==0);
|
assert((put_bits_count(s)&7)==0);
|
||||||
#ifdef ALT_BITSTREAM_WRITER
|
#ifdef ALT_BITSTREAM_WRITER
|
||||||
FIXME may need some cleaning of the buffer
|
FIXME may need some cleaning of the buffer
|
||||||
s->index += n<<3;
|
s->index += n<<3;
|
||||||
#else
|
#else
|
||||||
assert(s->bit_left==32);
|
assert(s->bit_left==32);
|
||||||
s->buf_ptr += n;
|
s->buf_ptr += n;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -366,10 +366,10 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
|
|||||||
static inline int unaligned32_be(const void *v)
|
static inline int unaligned32_be(const void *v)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_ALIGN
|
#ifdef CONFIG_ALIGN
|
||||||
const uint8_t *p=v;
|
const uint8_t *p=v;
|
||||||
return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]);
|
return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]);
|
||||||
#else
|
#else
|
||||||
return be2me_32( unaligned32(v)); //original
|
return be2me_32( unaligned32(v)); //original
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -528,8 +528,8 @@ static inline int get_bits_count(GetBitContext *s){
|
|||||||
#if defined(ARCH_X86) || defined(ARCH_X86_64)
|
#if defined(ARCH_X86) || defined(ARCH_X86_64)
|
||||||
# define SKIP_CACHE(name, gb, num)\
|
# define SKIP_CACHE(name, gb, num)\
|
||||||
asm(\
|
asm(\
|
||||||
"shldl %2, %1, %0 \n\t"\
|
"shldl %2, %1, %0 \n\t"\
|
||||||
"shll %2, %1 \n\t"\
|
"shll %2, %1 \n\t"\
|
||||||
: "+r" (name##_cache0), "+r" (name##_cache1)\
|
: "+r" (name##_cache0), "+r" (name##_cache1)\
|
||||||
: "Ic" ((uint8_t)num)\
|
: "Ic" ((uint8_t)num)\
|
||||||
);
|
);
|
||||||
|
@ -61,13 +61,13 @@ static int decode_frame(AVCodecContext *avctx,
|
|||||||
uint8_t *cb= &a->picture.data[1][ y*a->picture.linesize[1] ];
|
uint8_t *cb= &a->picture.data[1][ y*a->picture.linesize[1] ];
|
||||||
uint8_t *cr= &a->picture.data[2][ y*a->picture.linesize[2] ];
|
uint8_t *cr= &a->picture.data[2][ y*a->picture.linesize[2] ];
|
||||||
for(x=0; x<avctx->width; x+=4){
|
for(x=0; x<avctx->width; x+=4){
|
||||||
luma[3] = get_bits(&a->gb, 5) << 3;
|
luma[3] = get_bits(&a->gb, 5) << 3;
|
||||||
luma[2] = get_bits(&a->gb, 5) << 3;
|
luma[2] = get_bits(&a->gb, 5) << 3;
|
||||||
luma[1] = get_bits(&a->gb, 5) << 3;
|
luma[1] = get_bits(&a->gb, 5) << 3;
|
||||||
luma[0] = get_bits(&a->gb, 5) << 3;
|
luma[0] = get_bits(&a->gb, 5) << 3;
|
||||||
luma+= 4;
|
luma+= 4;
|
||||||
*(cb++) = get_bits(&a->gb, 6) << 2;
|
*(cb++) = get_bits(&a->gb, 6) << 2;
|
||||||
*(cr++) = get_bits(&a->gb, 6) << 2;
|
*(cr++) = get_bits(&a->gb, 6) << 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -65,14 +65,14 @@ int64_t gettime(void)
|
|||||||
static short idct_mmx_perm[64];
|
static short idct_mmx_perm[64];
|
||||||
|
|
||||||
static short idct_simple_mmx_perm[64]={
|
static short idct_simple_mmx_perm[64]={
|
||||||
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
|
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
|
||||||
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
|
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
|
||||||
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
|
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
|
||||||
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
|
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
|
||||||
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
|
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
|
||||||
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
|
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
|
||||||
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
|
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
|
||||||
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
|
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
|
||||||
};
|
};
|
||||||
|
|
||||||
void idct_mmx_init(void)
|
void idct_mmx_init(void)
|
||||||
@ -81,8 +81,8 @@ void idct_mmx_init(void)
|
|||||||
|
|
||||||
/* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
|
/* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
|
||||||
for (i = 0; i < 64; i++) {
|
for (i = 0; i < 64; i++) {
|
||||||
idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
|
idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
|
||||||
// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
|
// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -151,7 +151,7 @@ void dct_error(const char *name, int is_idct,
|
|||||||
for(i=0;i<64;i++)
|
for(i=0;i<64;i++)
|
||||||
block[idct_simple_mmx_perm[i]] = block1[i];
|
block[idct_simple_mmx_perm[i]] = block1[i];
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
for(i=0; i<64; i++)
|
for(i=0; i<64; i++)
|
||||||
block[i]= block1[i];
|
block[i]= block1[i];
|
||||||
}
|
}
|
||||||
@ -186,9 +186,9 @@ void dct_error(const char *name, int is_idct,
|
|||||||
if (v > err_inf)
|
if (v > err_inf)
|
||||||
err_inf = v;
|
err_inf = v;
|
||||||
err2 += v * v;
|
err2 += v * v;
|
||||||
sysErr[i] += block[i] - block1[i];
|
sysErr[i] += block[i] - block1[i];
|
||||||
blockSumErr += v;
|
blockSumErr += v;
|
||||||
if( abs(block[i])>maxout) maxout=abs(block[i]);
|
if( abs(block[i])>maxout) maxout=abs(block[i]);
|
||||||
}
|
}
|
||||||
if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
|
if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
|
||||||
#if 0 // print different matrix pairs
|
#if 0 // print different matrix pairs
|
||||||
@ -209,7 +209,7 @@ void dct_error(const char *name, int is_idct,
|
|||||||
|
|
||||||
#if 1 // dump systematic errors
|
#if 1 // dump systematic errors
|
||||||
for(i=0; i<64; i++){
|
for(i=0; i<64; i++){
|
||||||
if(i%8==0) printf("\n");
|
if(i%8==0) printf("\n");
|
||||||
printf("%5d ", (int)sysErr[i]);
|
printf("%5d ", (int)sysErr[i]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
@ -503,7 +503,7 @@ int main(int argc, char **argv)
|
|||||||
dct_error("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, test);
|
dct_error("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, test);
|
||||||
// dct_error("ODIVX-C", 1, odivx_idct_c, idct);
|
// dct_error("ODIVX-C", 1, odivx_idct_c, idct);
|
||||||
//printf(" test against odivx idct\n");
|
//printf(" test against odivx idct\n");
|
||||||
// dct_error("REF", 1, idct, odivx_idct_c);
|
// dct_error("REF", 1, idct, odivx_idct_c);
|
||||||
// dct_error("INT", 1, j_rev_dct, odivx_idct_c);
|
// dct_error("INT", 1, j_rev_dct, odivx_idct_c);
|
||||||
// dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c);
|
// dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c);
|
||||||
// dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c);
|
// dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c);
|
||||||
|
@ -124,14 +124,14 @@ const uint32_t inverse[256]={
|
|||||||
|
|
||||||
/* Input permutation for the simple_idct_mmx */
|
/* Input permutation for the simple_idct_mmx */
|
||||||
static const uint8_t simple_mmx_permutation[64]={
|
static const uint8_t simple_mmx_permutation[64]={
|
||||||
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
|
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
|
||||||
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
|
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
|
||||||
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
|
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
|
||||||
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
|
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
|
||||||
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
|
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
|
||||||
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
|
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
|
||||||
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
|
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
|
||||||
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
|
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int pix_sum_c(uint8_t * pix, int line_size)
|
static int pix_sum_c(uint8_t * pix, int line_size)
|
||||||
@ -140,18 +140,18 @@ static int pix_sum_c(uint8_t * pix, int line_size)
|
|||||||
|
|
||||||
s = 0;
|
s = 0;
|
||||||
for (i = 0; i < 16; i++) {
|
for (i = 0; i < 16; i++) {
|
||||||
for (j = 0; j < 16; j += 8) {
|
for (j = 0; j < 16; j += 8) {
|
||||||
s += pix[0];
|
s += pix[0];
|
||||||
s += pix[1];
|
s += pix[1];
|
||||||
s += pix[2];
|
s += pix[2];
|
||||||
s += pix[3];
|
s += pix[3];
|
||||||
s += pix[4];
|
s += pix[4];
|
||||||
s += pix[5];
|
s += pix[5];
|
||||||
s += pix[6];
|
s += pix[6];
|
||||||
s += pix[7];
|
s += pix[7];
|
||||||
pix += 8;
|
pix += 8;
|
||||||
}
|
}
|
||||||
pix += line_size - 16;
|
pix += line_size - 16;
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
@ -163,33 +163,33 @@ static int pix_norm1_c(uint8_t * pix, int line_size)
|
|||||||
|
|
||||||
s = 0;
|
s = 0;
|
||||||
for (i = 0; i < 16; i++) {
|
for (i = 0; i < 16; i++) {
|
||||||
for (j = 0; j < 16; j += 8) {
|
for (j = 0; j < 16; j += 8) {
|
||||||
#if 0
|
#if 0
|
||||||
s += sq[pix[0]];
|
s += sq[pix[0]];
|
||||||
s += sq[pix[1]];
|
s += sq[pix[1]];
|
||||||
s += sq[pix[2]];
|
s += sq[pix[2]];
|
||||||
s += sq[pix[3]];
|
s += sq[pix[3]];
|
||||||
s += sq[pix[4]];
|
s += sq[pix[4]];
|
||||||
s += sq[pix[5]];
|
s += sq[pix[5]];
|
||||||
s += sq[pix[6]];
|
s += sq[pix[6]];
|
||||||
s += sq[pix[7]];
|
s += sq[pix[7]];
|
||||||
#else
|
#else
|
||||||
#if LONG_MAX > 2147483647
|
#if LONG_MAX > 2147483647
|
||||||
register uint64_t x=*(uint64_t*)pix;
|
register uint64_t x=*(uint64_t*)pix;
|
||||||
s += sq[x&0xff];
|
s += sq[x&0xff];
|
||||||
s += sq[(x>>8)&0xff];
|
s += sq[(x>>8)&0xff];
|
||||||
s += sq[(x>>16)&0xff];
|
s += sq[(x>>16)&0xff];
|
||||||
s += sq[(x>>24)&0xff];
|
s += sq[(x>>24)&0xff];
|
||||||
s += sq[(x>>32)&0xff];
|
s += sq[(x>>32)&0xff];
|
||||||
s += sq[(x>>40)&0xff];
|
s += sq[(x>>40)&0xff];
|
||||||
s += sq[(x>>48)&0xff];
|
s += sq[(x>>48)&0xff];
|
||||||
s += sq[(x>>56)&0xff];
|
s += sq[(x>>56)&0xff];
|
||||||
#else
|
#else
|
||||||
register uint32_t x=*(uint32_t*)pix;
|
register uint32_t x=*(uint32_t*)pix;
|
||||||
s += sq[x&0xff];
|
s += sq[x&0xff];
|
||||||
s += sq[(x>>8)&0xff];
|
s += sq[(x>>8)&0xff];
|
||||||
s += sq[(x>>16)&0xff];
|
s += sq[(x>>16)&0xff];
|
||||||
s += sq[(x>>24)&0xff];
|
s += sq[(x>>24)&0xff];
|
||||||
x=*(uint32_t*)(pix+4);
|
x=*(uint32_t*)(pix+4);
|
||||||
s += sq[x&0xff];
|
s += sq[x&0xff];
|
||||||
s += sq[(x>>8)&0xff];
|
s += sq[(x>>8)&0xff];
|
||||||
@ -197,9 +197,9 @@ static int pix_norm1_c(uint8_t * pix, int line_size)
|
|||||||
s += sq[(x>>24)&0xff];
|
s += sq[(x>>24)&0xff];
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
pix += 8;
|
pix += 8;
|
||||||
}
|
}
|
||||||
pix += line_size - 16;
|
pix += line_size - 16;
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
@ -410,7 +410,7 @@ static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int lin
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
|
static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
|
||||||
const uint8_t *s2, int stride){
|
const uint8_t *s2, int stride){
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
/* read the pixels */
|
/* read the pixels */
|
||||||
@ -431,7 +431,7 @@ static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
|
|||||||
|
|
||||||
|
|
||||||
static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
|
static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
|
||||||
int line_size)
|
int line_size)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
||||||
@ -453,7 +453,7 @@ static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
|
static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
|
||||||
int line_size)
|
int line_size)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
||||||
@ -471,7 +471,7 @@ static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
|
static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
|
||||||
int line_size)
|
int line_size)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
||||||
@ -1214,7 +1214,7 @@ static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int
|
|||||||
int i,j;
|
int i,j;
|
||||||
for (i=0; i < height; i++) {
|
for (i=0; i < height; i++) {
|
||||||
for (j=0; j < width; j++) {
|
for (j=0; j < width; j++) {
|
||||||
dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
|
dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
|
||||||
}
|
}
|
||||||
src += stride;
|
src += stride;
|
||||||
dst += stride;
|
dst += stride;
|
||||||
@ -1225,7 +1225,7 @@ static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int
|
|||||||
int i,j;
|
int i,j;
|
||||||
for (i=0; i < height; i++) {
|
for (i=0; i < height; i++) {
|
||||||
for (j=0; j < width; j++) {
|
for (j=0; j < width; j++) {
|
||||||
dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
|
dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
|
||||||
}
|
}
|
||||||
src += stride;
|
src += stride;
|
||||||
dst += stride;
|
dst += stride;
|
||||||
@ -1236,7 +1236,7 @@ static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int
|
|||||||
int i,j;
|
int i,j;
|
||||||
for (i=0; i < height; i++) {
|
for (i=0; i < height; i++) {
|
||||||
for (j=0; j < width; j++) {
|
for (j=0; j < width; j++) {
|
||||||
dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
|
dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
|
||||||
}
|
}
|
||||||
src += stride;
|
src += stride;
|
||||||
dst += stride;
|
dst += stride;
|
||||||
@ -1247,7 +1247,7 @@ static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int
|
|||||||
int i,j;
|
int i,j;
|
||||||
for (i=0; i < height; i++) {
|
for (i=0; i < height; i++) {
|
||||||
for (j=0; j < width; j++) {
|
for (j=0; j < width; j++) {
|
||||||
dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
|
dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
|
||||||
}
|
}
|
||||||
src += stride;
|
src += stride;
|
||||||
dst += stride;
|
dst += stride;
|
||||||
@ -1258,7 +1258,7 @@ static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int
|
|||||||
int i,j;
|
int i,j;
|
||||||
for (i=0; i < height; i++) {
|
for (i=0; i < height; i++) {
|
||||||
for (j=0; j < width; j++) {
|
for (j=0; j < width; j++) {
|
||||||
dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
||||||
}
|
}
|
||||||
src += stride;
|
src += stride;
|
||||||
dst += stride;
|
dst += stride;
|
||||||
@ -1269,7 +1269,7 @@ static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int
|
|||||||
int i,j;
|
int i,j;
|
||||||
for (i=0; i < height; i++) {
|
for (i=0; i < height; i++) {
|
||||||
for (j=0; j < width; j++) {
|
for (j=0; j < width; j++) {
|
||||||
dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
|
dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
|
||||||
}
|
}
|
||||||
src += stride;
|
src += stride;
|
||||||
dst += stride;
|
dst += stride;
|
||||||
@ -1280,7 +1280,7 @@ static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int
|
|||||||
int i,j;
|
int i,j;
|
||||||
for (i=0; i < height; i++) {
|
for (i=0; i < height; i++) {
|
||||||
for (j=0; j < width; j++) {
|
for (j=0; j < width; j++) {
|
||||||
dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
||||||
}
|
}
|
||||||
src += stride;
|
src += stride;
|
||||||
dst += stride;
|
dst += stride;
|
||||||
@ -1291,7 +1291,7 @@ static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int
|
|||||||
int i,j;
|
int i,j;
|
||||||
for (i=0; i < height; i++) {
|
for (i=0; i < height; i++) {
|
||||||
for (j=0; j < width; j++) {
|
for (j=0; j < width; j++) {
|
||||||
dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
|
dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
|
||||||
}
|
}
|
||||||
src += stride;
|
src += stride;
|
||||||
dst += stride;
|
dst += stride;
|
||||||
@ -1311,7 +1311,7 @@ static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int
|
|||||||
int i,j;
|
int i,j;
|
||||||
for (i=0; i < height; i++) {
|
for (i=0; i < height; i++) {
|
||||||
for (j=0; j < width; j++) {
|
for (j=0; j < width; j++) {
|
||||||
dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
|
dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
|
||||||
}
|
}
|
||||||
src += stride;
|
src += stride;
|
||||||
dst += stride;
|
dst += stride;
|
||||||
@ -1322,7 +1322,7 @@ static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int
|
|||||||
int i,j;
|
int i,j;
|
||||||
for (i=0; i < height; i++) {
|
for (i=0; i < height; i++) {
|
||||||
for (j=0; j < width; j++) {
|
for (j=0; j < width; j++) {
|
||||||
dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
|
dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
|
||||||
}
|
}
|
||||||
src += stride;
|
src += stride;
|
||||||
dst += stride;
|
dst += stride;
|
||||||
@ -1333,7 +1333,7 @@ static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int
|
|||||||
int i,j;
|
int i,j;
|
||||||
for (i=0; i < height; i++) {
|
for (i=0; i < height; i++) {
|
||||||
for (j=0; j < width; j++) {
|
for (j=0; j < width; j++) {
|
||||||
dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
|
dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
|
||||||
}
|
}
|
||||||
src += stride;
|
src += stride;
|
||||||
dst += stride;
|
dst += stride;
|
||||||
@ -1344,7 +1344,7 @@ static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int
|
|||||||
int i,j;
|
int i,j;
|
||||||
for (i=0; i < height; i++) {
|
for (i=0; i < height; i++) {
|
||||||
for (j=0; j < width; j++) {
|
for (j=0; j < width; j++) {
|
||||||
dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
||||||
}
|
}
|
||||||
src += stride;
|
src += stride;
|
||||||
dst += stride;
|
dst += stride;
|
||||||
@ -1355,7 +1355,7 @@ static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int
|
|||||||
int i,j;
|
int i,j;
|
||||||
for (i=0; i < height; i++) {
|
for (i=0; i < height; i++) {
|
||||||
for (j=0; j < width; j++) {
|
for (j=0; j < width; j++) {
|
||||||
dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
||||||
}
|
}
|
||||||
src += stride;
|
src += stride;
|
||||||
dst += stride;
|
dst += stride;
|
||||||
@ -1366,7 +1366,7 @@ static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int
|
|||||||
int i,j;
|
int i,j;
|
||||||
for (i=0; i < height; i++) {
|
for (i=0; i < height; i++) {
|
||||||
for (j=0; j < width; j++) {
|
for (j=0; j < width; j++) {
|
||||||
dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
|
dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
|
||||||
}
|
}
|
||||||
src += stride;
|
src += stride;
|
||||||
dst += stride;
|
dst += stride;
|
||||||
@ -1377,7 +1377,7 @@ static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int
|
|||||||
int i,j;
|
int i,j;
|
||||||
for (i=0; i < height; i++) {
|
for (i=0; i < height; i++) {
|
||||||
for (j=0; j < width; j++) {
|
for (j=0; j < width; j++) {
|
||||||
dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
||||||
}
|
}
|
||||||
src += stride;
|
src += stride;
|
||||||
dst += stride;
|
dst += stride;
|
||||||
@ -1388,7 +1388,7 @@ static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int
|
|||||||
int i,j;
|
int i,j;
|
||||||
for (i=0; i < height; i++) {
|
for (i=0; i < height; i++) {
|
||||||
for (j=0; j < width; j++) {
|
for (j=0; j < width; j++) {
|
||||||
dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
||||||
}
|
}
|
||||||
src += stride;
|
src += stride;
|
||||||
dst += stride;
|
dst += stride;
|
||||||
@ -3666,15 +3666,15 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
|||||||
#ifdef CONFIG_ENCODERS
|
#ifdef CONFIG_ENCODERS
|
||||||
if(avctx->dct_algo==FF_DCT_FASTINT) {
|
if(avctx->dct_algo==FF_DCT_FASTINT) {
|
||||||
c->fdct = fdct_ifast;
|
c->fdct = fdct_ifast;
|
||||||
c->fdct248 = fdct_ifast248;
|
c->fdct248 = fdct_ifast248;
|
||||||
}
|
}
|
||||||
else if(avctx->dct_algo==FF_DCT_FAAN) {
|
else if(avctx->dct_algo==FF_DCT_FAAN) {
|
||||||
c->fdct = ff_faandct;
|
c->fdct = ff_faandct;
|
||||||
c->fdct248 = ff_faandct248;
|
c->fdct248 = ff_faandct248;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
|
c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
|
||||||
c->fdct248 = ff_fdct248_islow;
|
c->fdct248 = ff_fdct248_islow;
|
||||||
}
|
}
|
||||||
#endif //CONFIG_ENCODERS
|
#endif //CONFIG_ENCODERS
|
||||||
|
|
||||||
|
@ -151,7 +151,7 @@ typedef struct DSPContext {
|
|||||||
* global motion compensation.
|
* global motion compensation.
|
||||||
*/
|
*/
|
||||||
void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
|
void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
|
||||||
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
|
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
|
||||||
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
|
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
|
||||||
int (*pix_sum)(uint8_t * pix, int line_size);
|
int (*pix_sum)(uint8_t * pix, int line_size);
|
||||||
int (*pix_norm1)(uint8_t * pix, int line_size);
|
int (*pix_norm1)(uint8_t * pix, int line_size);
|
||||||
@ -342,7 +342,7 @@ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scant
|
|||||||
|
|
||||||
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
|
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
|
||||||
|
|
||||||
#define BYTE_VEC32(c) ((c)*0x01010101UL)
|
#define BYTE_VEC32(c) ((c)*0x01010101UL)
|
||||||
|
|
||||||
static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
|
static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
|
||||||
{
|
{
|
||||||
|
@ -194,7 +194,7 @@ channels_multi (int flags)
|
|||||||
{
|
{
|
||||||
if (flags & DTS_LFE)
|
if (flags & DTS_LFE)
|
||||||
return 6;
|
return 6;
|
||||||
else if (flags & 1) /* center channel */
|
else if (flags & 1) /* center channel */
|
||||||
return 5;
|
return 5;
|
||||||
else if ((flags & DTS_CHANNEL_MASK) == DTS_2F2R)
|
else if ((flags & DTS_CHANNEL_MASK) == DTS_2F2R)
|
||||||
return 4;
|
return 4;
|
||||||
|
354
libavcodec/dv.c
354
libavcodec/dv.c
@ -84,7 +84,7 @@ static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm)
|
|||||||
j = perm[i];
|
j = perm[i];
|
||||||
s->dv_idct_shift[0][0][q][j] =
|
s->dv_idct_shift[0][0][q][j] =
|
||||||
dv_quant_shifts[q][dv_88_areas[i]] + 1;
|
dv_quant_shifts[q][dv_88_areas[i]] + 1;
|
||||||
s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1;
|
s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 248DCT */
|
/* 248DCT */
|
||||||
@ -92,7 +92,7 @@ static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm)
|
|||||||
/* 248 table */
|
/* 248 table */
|
||||||
s->dv_idct_shift[0][1][q][i] =
|
s->dv_idct_shift[0][1][q][i] =
|
||||||
dv_quant_shifts[q][dv_248_areas[i]] + 1;
|
dv_quant_shifts[q][dv_248_areas[i]] + 1;
|
||||||
s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1;
|
s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -114,35 +114,35 @@ static int dvvideo_init(AVCodecContext *avctx)
|
|||||||
done = 1;
|
done = 1;
|
||||||
|
|
||||||
dv_vlc_map = av_mallocz_static(DV_VLC_MAP_LEV_SIZE*DV_VLC_MAP_RUN_SIZE*sizeof(struct dv_vlc_pair));
|
dv_vlc_map = av_mallocz_static(DV_VLC_MAP_LEV_SIZE*DV_VLC_MAP_RUN_SIZE*sizeof(struct dv_vlc_pair));
|
||||||
if (!dv_vlc_map)
|
if (!dv_vlc_map)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
/* dv_anchor lets each thread know its Id */
|
/* dv_anchor lets each thread know its Id */
|
||||||
dv_anchor = av_malloc(12*27*sizeof(void*));
|
dv_anchor = av_malloc(12*27*sizeof(void*));
|
||||||
if (!dv_anchor) {
|
if (!dv_anchor) {
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
for (i=0; i<12*27; i++)
|
for (i=0; i<12*27; i++)
|
||||||
dv_anchor[i] = (void*)(size_t)i;
|
dv_anchor[i] = (void*)(size_t)i;
|
||||||
|
|
||||||
/* it's faster to include sign bit in a generic VLC parsing scheme */
|
/* it's faster to include sign bit in a generic VLC parsing scheme */
|
||||||
for (i=0, j=0; i<NB_DV_VLC; i++, j++) {
|
for (i=0, j=0; i<NB_DV_VLC; i++, j++) {
|
||||||
new_dv_vlc_bits[j] = dv_vlc_bits[i];
|
new_dv_vlc_bits[j] = dv_vlc_bits[i];
|
||||||
new_dv_vlc_len[j] = dv_vlc_len[i];
|
new_dv_vlc_len[j] = dv_vlc_len[i];
|
||||||
new_dv_vlc_run[j] = dv_vlc_run[i];
|
new_dv_vlc_run[j] = dv_vlc_run[i];
|
||||||
new_dv_vlc_level[j] = dv_vlc_level[i];
|
new_dv_vlc_level[j] = dv_vlc_level[i];
|
||||||
|
|
||||||
if (dv_vlc_level[i]) {
|
if (dv_vlc_level[i]) {
|
||||||
new_dv_vlc_bits[j] <<= 1;
|
new_dv_vlc_bits[j] <<= 1;
|
||||||
new_dv_vlc_len[j]++;
|
new_dv_vlc_len[j]++;
|
||||||
|
|
||||||
j++;
|
j++;
|
||||||
new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1;
|
new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1;
|
||||||
new_dv_vlc_len[j] = dv_vlc_len[i] + 1;
|
new_dv_vlc_len[j] = dv_vlc_len[i] + 1;
|
||||||
new_dv_vlc_run[j] = dv_vlc_run[i];
|
new_dv_vlc_run[j] = dv_vlc_run[i];
|
||||||
new_dv_vlc_level[j] = -dv_vlc_level[i];
|
new_dv_vlc_level[j] = -dv_vlc_level[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* NOTE: as a trick, we use the fact the no codes are unused
|
/* NOTE: as a trick, we use the fact the no codes are unused
|
||||||
to accelerate the parsing of partial codes */
|
to accelerate the parsing of partial codes */
|
||||||
@ -150,10 +150,10 @@ static int dvvideo_init(AVCodecContext *avctx)
|
|||||||
new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2, 0);
|
new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2, 0);
|
||||||
|
|
||||||
dv_rl_vlc = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM));
|
dv_rl_vlc = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM));
|
||||||
if (!dv_rl_vlc) {
|
if (!dv_rl_vlc) {
|
||||||
av_free(dv_anchor);
|
av_free(dv_anchor);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
for(i = 0; i < dv_vlc.table_size; i++){
|
for(i = 0; i < dv_vlc.table_size; i++){
|
||||||
int code= dv_vlc.table[i][0];
|
int code= dv_vlc.table[i][0];
|
||||||
int len = dv_vlc.table[i][1];
|
int len = dv_vlc.table[i][1];
|
||||||
@ -170,49 +170,49 @@ static int dvvideo_init(AVCodecContext *avctx)
|
|||||||
dv_rl_vlc[i].level = level;
|
dv_rl_vlc[i].level = level;
|
||||||
dv_rl_vlc[i].run = run;
|
dv_rl_vlc[i].run = run;
|
||||||
}
|
}
|
||||||
free_vlc(&dv_vlc);
|
free_vlc(&dv_vlc);
|
||||||
|
|
||||||
for (i = 0; i < NB_DV_VLC - 1; i++) {
|
for (i = 0; i < NB_DV_VLC - 1; i++) {
|
||||||
if (dv_vlc_run[i] >= DV_VLC_MAP_RUN_SIZE)
|
if (dv_vlc_run[i] >= DV_VLC_MAP_RUN_SIZE)
|
||||||
continue;
|
continue;
|
||||||
#ifdef DV_CODEC_TINY_TARGET
|
#ifdef DV_CODEC_TINY_TARGET
|
||||||
if (dv_vlc_level[i] >= DV_VLC_MAP_LEV_SIZE)
|
if (dv_vlc_level[i] >= DV_VLC_MAP_LEV_SIZE)
|
||||||
continue;
|
continue;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size != 0)
|
if (dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size != 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].vlc = dv_vlc_bits[i] <<
|
dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].vlc = dv_vlc_bits[i] <<
|
||||||
(!!dv_vlc_level[i]);
|
(!!dv_vlc_level[i]);
|
||||||
dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size = dv_vlc_len[i] +
|
dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size = dv_vlc_len[i] +
|
||||||
(!!dv_vlc_level[i]);
|
(!!dv_vlc_level[i]);
|
||||||
}
|
}
|
||||||
for (i = 0; i < DV_VLC_MAP_RUN_SIZE; i++) {
|
for (i = 0; i < DV_VLC_MAP_RUN_SIZE; i++) {
|
||||||
#ifdef DV_CODEC_TINY_TARGET
|
#ifdef DV_CODEC_TINY_TARGET
|
||||||
for (j = 1; j < DV_VLC_MAP_LEV_SIZE; j++) {
|
for (j = 1; j < DV_VLC_MAP_LEV_SIZE; j++) {
|
||||||
if (dv_vlc_map[i][j].size == 0) {
|
if (dv_vlc_map[i][j].size == 0) {
|
||||||
dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
|
dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
|
||||||
(dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
|
(dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
|
||||||
dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
|
dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
|
||||||
dv_vlc_map[0][j].size;
|
dv_vlc_map[0][j].size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
for (j = 1; j < DV_VLC_MAP_LEV_SIZE/2; j++) {
|
for (j = 1; j < DV_VLC_MAP_LEV_SIZE/2; j++) {
|
||||||
if (dv_vlc_map[i][j].size == 0) {
|
if (dv_vlc_map[i][j].size == 0) {
|
||||||
dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
|
dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
|
||||||
(dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
|
(dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
|
||||||
dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
|
dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
|
||||||
dv_vlc_map[0][j].size;
|
dv_vlc_map[0][j].size;
|
||||||
}
|
}
|
||||||
dv_vlc_map[i][((uint16_t)(-j))&0x1ff].vlc =
|
dv_vlc_map[i][((uint16_t)(-j))&0x1ff].vlc =
|
||||||
dv_vlc_map[i][j].vlc | 1;
|
dv_vlc_map[i][j].vlc | 1;
|
||||||
dv_vlc_map[i][((uint16_t)(-j))&0x1ff].size =
|
dv_vlc_map[i][((uint16_t)(-j))&0x1ff].size =
|
||||||
dv_vlc_map[i][j].size;
|
dv_vlc_map[i][j].size;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Generic DSP setup */
|
/* Generic DSP setup */
|
||||||
@ -241,7 +241,7 @@ static int dvvideo_init(AVCodecContext *avctx)
|
|||||||
|
|
||||||
/* FIXME: I really don't think this should be here */
|
/* FIXME: I really don't think this should be here */
|
||||||
if (dv_codec_profile(avctx))
|
if (dv_codec_profile(avctx))
|
||||||
avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt;
|
avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt;
|
||||||
avctx->coded_frame = &s->picture;
|
avctx->coded_frame = &s->picture;
|
||||||
s->avctx= avctx;
|
s->avctx= avctx;
|
||||||
|
|
||||||
@ -306,9 +306,9 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
|
|||||||
/* if we must parse a partial vlc, we do it here */
|
/* if we must parse a partial vlc, we do it here */
|
||||||
if (partial_bit_count > 0) {
|
if (partial_bit_count > 0) {
|
||||||
re_cache = ((unsigned)re_cache >> partial_bit_count) |
|
re_cache = ((unsigned)re_cache >> partial_bit_count) |
|
||||||
(mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count));
|
(mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count));
|
||||||
re_index -= partial_bit_count;
|
re_index -= partial_bit_count;
|
||||||
mb->partial_bit_count = 0;
|
mb->partial_bit_count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get the AC coefficients until last_index is reached */
|
/* get the AC coefficients until last_index is reached */
|
||||||
@ -318,30 +318,30 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
|
|||||||
#endif
|
#endif
|
||||||
/* our own optimized GET_RL_VLC */
|
/* our own optimized GET_RL_VLC */
|
||||||
index = NEG_USR32(re_cache, TEX_VLC_BITS);
|
index = NEG_USR32(re_cache, TEX_VLC_BITS);
|
||||||
vlc_len = dv_rl_vlc[index].len;
|
vlc_len = dv_rl_vlc[index].len;
|
||||||
if (vlc_len < 0) {
|
if (vlc_len < 0) {
|
||||||
index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level;
|
index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level;
|
||||||
vlc_len = TEX_VLC_BITS - vlc_len;
|
vlc_len = TEX_VLC_BITS - vlc_len;
|
||||||
}
|
}
|
||||||
level = dv_rl_vlc[index].level;
|
level = dv_rl_vlc[index].level;
|
||||||
run = dv_rl_vlc[index].run;
|
run = dv_rl_vlc[index].run;
|
||||||
|
|
||||||
/* gotta check if we're still within gb boundaries */
|
/* gotta check if we're still within gb boundaries */
|
||||||
if (re_index + vlc_len > last_index) {
|
if (re_index + vlc_len > last_index) {
|
||||||
/* should be < 16 bits otherwise a codeword could have been parsed */
|
/* should be < 16 bits otherwise a codeword could have been parsed */
|
||||||
mb->partial_bit_count = last_index - re_index;
|
mb->partial_bit_count = last_index - re_index;
|
||||||
mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count);
|
mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count);
|
||||||
re_index = last_index;
|
re_index = last_index;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
re_index += vlc_len;
|
re_index += vlc_len;
|
||||||
|
|
||||||
#ifdef VLC_DEBUG
|
#ifdef VLC_DEBUG
|
||||||
printf("run=%d level=%d\n", run, level);
|
printf("run=%d level=%d\n", run, level);
|
||||||
#endif
|
#endif
|
||||||
pos += run;
|
pos += run;
|
||||||
if (pos >= 64)
|
if (pos >= 64)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
assert(level);
|
assert(level);
|
||||||
pos1 = scan_table[pos];
|
pos1 = scan_table[pos];
|
||||||
@ -404,7 +404,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
|
|||||||
block = block1;
|
block = block1;
|
||||||
for(j = 0;j < 6; j++) {
|
for(j = 0;j < 6; j++) {
|
||||||
last_index = block_sizes[j];
|
last_index = block_sizes[j];
|
||||||
init_get_bits(&gb, buf_ptr, last_index);
|
init_get_bits(&gb, buf_ptr, last_index);
|
||||||
|
|
||||||
/* get the dc */
|
/* get the dc */
|
||||||
dc = get_sbits(&gb, 9);
|
dc = get_sbits(&gb, 9);
|
||||||
@ -444,7 +444,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
|
|||||||
block = block1;
|
block = block1;
|
||||||
mb = mb1;
|
mb = mb1;
|
||||||
init_get_bits(&gb, mb_bit_buffer, put_bits_count(&pb));
|
init_get_bits(&gb, mb_bit_buffer, put_bits_count(&pb));
|
||||||
flush_put_bits(&pb);
|
flush_put_bits(&pb);
|
||||||
for(j = 0;j < 6; j++, block += 64, mb++) {
|
for(j = 0;j < 6; j++, block += 64, mb++) {
|
||||||
if (mb->pos < 64 && get_bits_left(&gb) > 0) {
|
if (mb->pos < 64 && get_bits_left(&gb) > 0) {
|
||||||
dv_decode_ac(&gb, mb, block);
|
dv_decode_ac(&gb, mb, block);
|
||||||
@ -456,7 +456,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
|
|||||||
/* all blocks are finished, so the extra bytes can be used at
|
/* all blocks are finished, so the extra bytes can be used at
|
||||||
the video segment level */
|
the video segment level */
|
||||||
if (j >= 6)
|
if (j >= 6)
|
||||||
bit_copy(&vs_pb, &gb);
|
bit_copy(&vs_pb, &gb);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* we need a pass other the whole video segment */
|
/* we need a pass other the whole video segment */
|
||||||
@ -475,8 +475,8 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
|
|||||||
#endif
|
#endif
|
||||||
dv_decode_ac(&gb, mb, block);
|
dv_decode_ac(&gb, mb, block);
|
||||||
}
|
}
|
||||||
if (mb->pos >= 64 && mb->pos < 127)
|
if (mb->pos >= 64 && mb->pos < 127)
|
||||||
av_log(NULL, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos);
|
av_log(NULL, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos);
|
||||||
block += 64;
|
block += 64;
|
||||||
mb++;
|
mb++;
|
||||||
}
|
}
|
||||||
@ -508,7 +508,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
|
|||||||
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) {
|
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) {
|
||||||
uint64_t aligned_pixels[64/8];
|
uint64_t aligned_pixels[64/8];
|
||||||
uint8_t *pixels= (uint8_t*)aligned_pixels;
|
uint8_t *pixels= (uint8_t*)aligned_pixels;
|
||||||
uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1;
|
uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1;
|
||||||
int x, y, linesize;
|
int x, y, linesize;
|
||||||
/* NOTE: at end of line, the macroblock is handled as 420 */
|
/* NOTE: at end of line, the macroblock is handled as 420 */
|
||||||
idct_put(pixels, 8, block);
|
idct_put(pixels, 8, block);
|
||||||
@ -543,21 +543,21 @@ static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
|
|||||||
int size;
|
int size;
|
||||||
if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
|
if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
|
||||||
*vlc = dv_vlc_map[run][level].vlc | sign;
|
*vlc = dv_vlc_map[run][level].vlc | sign;
|
||||||
size = dv_vlc_map[run][level].size;
|
size = dv_vlc_map[run][level].size;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (level < DV_VLC_MAP_LEV_SIZE) {
|
if (level < DV_VLC_MAP_LEV_SIZE) {
|
||||||
*vlc = dv_vlc_map[0][level].vlc | sign;
|
*vlc = dv_vlc_map[0][level].vlc | sign;
|
||||||
size = dv_vlc_map[0][level].size;
|
size = dv_vlc_map[0][level].size;
|
||||||
} else {
|
} else {
|
||||||
*vlc = 0xfe00 | (level << 1) | sign;
|
*vlc = 0xfe00 | (level << 1) | sign;
|
||||||
size = 16;
|
size = 16;
|
||||||
}
|
}
|
||||||
if (run) {
|
if (run) {
|
||||||
*vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc :
|
*vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc :
|
||||||
(0x1f80 | (run - 1))) << size;
|
(0x1f80 | (run - 1))) << size;
|
||||||
size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
|
size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return size;
|
return size;
|
||||||
@ -568,13 +568,13 @@ static always_inline int dv_rl2vlc_size(int run, int level)
|
|||||||
int size;
|
int size;
|
||||||
|
|
||||||
if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
|
if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
|
||||||
size = dv_vlc_map[run][level].size;
|
size = dv_vlc_map[run][level].size;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16;
|
size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16;
|
||||||
if (run) {
|
if (run) {
|
||||||
size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
|
size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
@ -620,14 +620,14 @@ static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext
|
|||||||
for (; size > (bits_left = put_bits_left(pb)); pb++) {
|
for (; size > (bits_left = put_bits_left(pb)); pb++) {
|
||||||
if (bits_left) {
|
if (bits_left) {
|
||||||
size -= bits_left;
|
size -= bits_left;
|
||||||
put_bits(pb, bits_left, vlc >> size);
|
put_bits(pb, bits_left, vlc >> size);
|
||||||
vlc = vlc & ((1<<size)-1);
|
vlc = vlc & ((1<<size)-1);
|
||||||
}
|
}
|
||||||
if (pb + 1 >= pb_end) {
|
if (pb + 1 >= pb_end) {
|
||||||
bi->partial_bit_count = size;
|
bi->partial_bit_count = size;
|
||||||
bi->partial_bit_buffer = vlc;
|
bi->partial_bit_buffer = vlc;
|
||||||
return pb;
|
return pb;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Store VLC */
|
/* Store VLC */
|
||||||
@ -712,14 +712,14 @@ static always_inline int dv_guess_dct_mode(DCTELEM *blk) {
|
|||||||
s = blk;
|
s = blk;
|
||||||
for(i=0; i<7; i++) {
|
for(i=0; i<7; i++) {
|
||||||
score88 += SC(0, 8) + SC(1, 9) + SC(2, 10) + SC(3, 11) +
|
score88 += SC(0, 8) + SC(1, 9) + SC(2, 10) + SC(3, 11) +
|
||||||
SC(4, 12) + SC(5,13) + SC(6, 14) + SC(7, 15);
|
SC(4, 12) + SC(5,13) + SC(6, 14) + SC(7, 15);
|
||||||
s += 8;
|
s += 8;
|
||||||
}
|
}
|
||||||
/* Compute 2-4-8 score (small values give a better chance for 2-4-8 DCT) */
|
/* Compute 2-4-8 score (small values give a better chance for 2-4-8 DCT) */
|
||||||
s = blk;
|
s = blk;
|
||||||
for(i=0; i<6; i++) {
|
for(i=0; i<6; i++) {
|
||||||
score248 += SC(0, 16) + SC(1,17) + SC(2, 18) + SC(3, 19) +
|
score248 += SC(0, 16) + SC(1,17) + SC(2, 18) + SC(3, 19) +
|
||||||
SC(4, 20) + SC(5,21) + SC(6, 22) + SC(7, 23);
|
SC(4, 20) + SC(5,21) + SC(6, 22) + SC(7, 23);
|
||||||
s += 8;
|
s += 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -736,30 +736,30 @@ static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos)
|
|||||||
b = blks;
|
b = blks;
|
||||||
for (i=0; i<5; i++) {
|
for (i=0; i<5; i++) {
|
||||||
if (!qnos[i])
|
if (!qnos[i])
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
qnos[i]--;
|
qnos[i]--;
|
||||||
size[i] = 0;
|
size[i] = 0;
|
||||||
for (j=0; j<6; j++, b++) {
|
for (j=0; j<6; j++, b++) {
|
||||||
for (a=0; a<4; a++) {
|
for (a=0; a<4; a++) {
|
||||||
if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) {
|
if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) {
|
||||||
b->bit_size[a] = 1; // 4 areas 4 bits for EOB :)
|
b->bit_size[a] = 1; // 4 areas 4 bits for EOB :)
|
||||||
b->area_q[a]++;
|
b->area_q[a]++;
|
||||||
prev= b->prev[a];
|
prev= b->prev[a];
|
||||||
for (k= b->next[prev] ; k<mb_area_start[a+1]; k= b->next[k]) {
|
for (k= b->next[prev] ; k<mb_area_start[a+1]; k= b->next[k]) {
|
||||||
b->mb[k] >>= 1;
|
b->mb[k] >>= 1;
|
||||||
if (b->mb[k]) {
|
if (b->mb[k]) {
|
||||||
b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]);
|
b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]);
|
||||||
prev= k;
|
prev= k;
|
||||||
} else {
|
} else {
|
||||||
b->next[prev] = b->next[k];
|
b->next[prev] = b->next[k];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
b->prev[a+1]= prev;
|
b->prev[a+1]= prev;
|
||||||
}
|
}
|
||||||
size[i] += b->bit_size[a];
|
size[i] += b->bit_size[a];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while ((vs_total_ac_bits < size[0] + size[1] + size[2] + size[3] + size[4]) &&
|
} while ((vs_total_ac_bits < size[0] + size[1] + size[2] + size[3] + size[4]) &&
|
||||||
(qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4]));
|
(qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4]));
|
||||||
@ -797,68 +797,68 @@ static inline void dv_encode_video_segment(DVVideoContext *s,
|
|||||||
mb_x = v & 0xff;
|
mb_x = v & 0xff;
|
||||||
mb_y = v >> 8;
|
mb_y = v >> 8;
|
||||||
y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8);
|
y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8);
|
||||||
c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ?
|
c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ?
|
||||||
((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) :
|
((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) :
|
||||||
(((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8));
|
(((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8));
|
||||||
do_edge_wrap = 0;
|
do_edge_wrap = 0;
|
||||||
qnos[mb_index] = 15; /* No quantization */
|
qnos[mb_index] = 15; /* No quantization */
|
||||||
ptr = dif + mb_index*80 + 4;
|
ptr = dif + mb_index*80 + 4;
|
||||||
for(j = 0;j < 6; j++) {
|
for(j = 0;j < 6; j++) {
|
||||||
if (j < 4) { /* Four Y blocks */
|
if (j < 4) { /* Four Y blocks */
|
||||||
/* NOTE: at end of line, the macroblock is handled as 420 */
|
/* NOTE: at end of line, the macroblock is handled as 420 */
|
||||||
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) {
|
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) {
|
||||||
data = y_ptr + (j * 8);
|
data = y_ptr + (j * 8);
|
||||||
} else {
|
} else {
|
||||||
data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]);
|
data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]);
|
||||||
}
|
}
|
||||||
linesize = s->picture.linesize[0];
|
linesize = s->picture.linesize[0];
|
||||||
} else { /* Cr and Cb blocks */
|
} else { /* Cr and Cb blocks */
|
||||||
/* don't ask Fabrice why they inverted Cb and Cr ! */
|
/* don't ask Fabrice why they inverted Cb and Cr ! */
|
||||||
data = s->picture.data[6 - j] + c_offset;
|
data = s->picture.data[6 - j] + c_offset;
|
||||||
linesize = s->picture.linesize[6 - j];
|
linesize = s->picture.linesize[6 - j];
|
||||||
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8))
|
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8))
|
||||||
do_edge_wrap = 1;
|
do_edge_wrap = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Everything is set up -- now just copy data -> DCT block */
|
/* Everything is set up -- now just copy data -> DCT block */
|
||||||
if (do_edge_wrap) { /* Edge wrap copy: 4x16 -> 8x8 */
|
if (do_edge_wrap) { /* Edge wrap copy: 4x16 -> 8x8 */
|
||||||
uint8_t* d;
|
uint8_t* d;
|
||||||
DCTELEM *b = block;
|
DCTELEM *b = block;
|
||||||
for (i=0;i<8;i++) {
|
for (i=0;i<8;i++) {
|
||||||
d = data + 8 * linesize;
|
d = data + 8 * linesize;
|
||||||
b[0] = data[0]; b[1] = data[1]; b[2] = data[2]; b[3] = data[3];
|
b[0] = data[0]; b[1] = data[1]; b[2] = data[2]; b[3] = data[3];
|
||||||
b[4] = d[0]; b[5] = d[1]; b[6] = d[2]; b[7] = d[3];
|
b[4] = d[0]; b[5] = d[1]; b[6] = d[2]; b[7] = d[3];
|
||||||
data += linesize;
|
data += linesize;
|
||||||
b += 8;
|
b += 8;
|
||||||
}
|
}
|
||||||
} else { /* Simple copy: 8x8 -> 8x8 */
|
} else { /* Simple copy: 8x8 -> 8x8 */
|
||||||
s->get_pixels(block, data, linesize);
|
s->get_pixels(block, data, linesize);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(s->avctx->flags & CODEC_FLAG_INTERLACED_DCT)
|
if(s->avctx->flags & CODEC_FLAG_INTERLACED_DCT)
|
||||||
enc_blk->dct_mode = dv_guess_dct_mode(block);
|
enc_blk->dct_mode = dv_guess_dct_mode(block);
|
||||||
else
|
else
|
||||||
enc_blk->dct_mode = 0;
|
enc_blk->dct_mode = 0;
|
||||||
enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0;
|
enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0;
|
||||||
enc_blk->partial_bit_count = 0;
|
enc_blk->partial_bit_count = 0;
|
||||||
enc_blk->partial_bit_buffer = 0;
|
enc_blk->partial_bit_buffer = 0;
|
||||||
enc_blk->cur_ac = 0;
|
enc_blk->cur_ac = 0;
|
||||||
|
|
||||||
s->fdct[enc_blk->dct_mode](block);
|
s->fdct[enc_blk->dct_mode](block);
|
||||||
|
|
||||||
dv_set_class_number(block, enc_blk,
|
dv_set_class_number(block, enc_blk,
|
||||||
enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4);
|
enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4);
|
||||||
|
|
||||||
init_put_bits(pb, ptr, block_sizes[j]/8);
|
init_put_bits(pb, ptr, block_sizes[j]/8);
|
||||||
put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2));
|
put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2));
|
||||||
put_bits(pb, 1, enc_blk->dct_mode);
|
put_bits(pb, 1, enc_blk->dct_mode);
|
||||||
put_bits(pb, 2, enc_blk->cno);
|
put_bits(pb, 2, enc_blk->cno);
|
||||||
|
|
||||||
vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] +
|
vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] +
|
||||||
enc_blk->bit_size[2] + enc_blk->bit_size[3];
|
enc_blk->bit_size[2] + enc_blk->bit_size[3];
|
||||||
++enc_blk;
|
++enc_blk;
|
||||||
++pb;
|
++pb;
|
||||||
ptr += block_sizes[j]/8;
|
ptr += block_sizes[j]/8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -898,7 +898,7 @@ static int dv_decode_mt(AVCodecContext *avctx, void* sl)
|
|||||||
DVVideoContext *s = avctx->priv_data;
|
DVVideoContext *s = avctx->priv_data;
|
||||||
int slice = (size_t)sl;
|
int slice = (size_t)sl;
|
||||||
dv_decode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80],
|
dv_decode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80],
|
||||||
&s->sys->video_place[slice*5]);
|
&s->sys->video_place[slice*5]);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -907,7 +907,7 @@ static int dv_encode_mt(AVCodecContext *avctx, void* sl)
|
|||||||
DVVideoContext *s = avctx->priv_data;
|
DVVideoContext *s = avctx->priv_data;
|
||||||
int slice = (size_t)sl;
|
int slice = (size_t)sl;
|
||||||
dv_encode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80],
|
dv_encode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80],
|
||||||
&s->sys->video_place[slice*5]);
|
&s->sys->video_place[slice*5]);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -940,7 +940,7 @@ static int dvvideo_decode_frame(AVCodecContext *avctx,
|
|||||||
|
|
||||||
s->buf = buf;
|
s->buf = buf;
|
||||||
avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL,
|
avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL,
|
||||||
s->sys->difseg_size * 27);
|
s->sys->difseg_size * 27);
|
||||||
|
|
||||||
emms_c();
|
emms_c();
|
||||||
|
|
||||||
@ -958,7 +958,7 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size,
|
|||||||
|
|
||||||
s->sys = dv_codec_profile(c);
|
s->sys = dv_codec_profile(c);
|
||||||
if (!s->sys)
|
if (!s->sys)
|
||||||
return -1;
|
return -1;
|
||||||
if(buf_size < s->sys->frame_size)
|
if(buf_size < s->sys->frame_size)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@ -969,7 +969,7 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size,
|
|||||||
|
|
||||||
s->buf = buf;
|
s->buf = buf;
|
||||||
c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL,
|
c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL,
|
||||||
s->sys->difseg_size * 27);
|
s->sys->difseg_size * 27);
|
||||||
|
|
||||||
emms_c();
|
emms_c();
|
||||||
return s->sys->frame_size;
|
return s->sys->frame_size;
|
||||||
|
@ -192,7 +192,7 @@ static void dvb_encode_rle4(uint8_t **pq,
|
|||||||
|
|
||||||
#define SCALEBITS 10
|
#define SCALEBITS 10
|
||||||
#define ONE_HALF (1 << (SCALEBITS - 1))
|
#define ONE_HALF (1 << (SCALEBITS - 1))
|
||||||
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
|
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
|
||||||
|
|
||||||
#define RGB_TO_Y_CCIR(r, g, b) \
|
#define RGB_TO_Y_CCIR(r, g, b) \
|
||||||
((FIX(0.29900*219.0/255.0) * (r) + FIX(0.58700*219.0/255.0) * (g) + \
|
((FIX(0.29900*219.0/255.0) * (r) + FIX(0.58700*219.0/255.0) * (g) + \
|
||||||
|
@ -108,8 +108,8 @@ static void filter181(int16_t *data, int width, int height, int stride){
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* guess the dc of blocks which dont have a undamaged dc
|
* guess the dc of blocks which dont have a undamaged dc
|
||||||
* @param w width in 8 pixel blocks
|
* @param w width in 8 pixel blocks
|
||||||
* @param h height in 8 pixel blocks
|
* @param h height in 8 pixel blocks
|
||||||
*/
|
*/
|
||||||
static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, int is_luma){
|
static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, int is_luma){
|
||||||
int b_x, b_y;
|
int b_x, b_y;
|
||||||
@ -192,8 +192,8 @@ static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, i
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* simple horizontal deblocking filter used for error resilience
|
* simple horizontal deblocking filter used for error resilience
|
||||||
* @param w width in 8 pixel blocks
|
* @param w width in 8 pixel blocks
|
||||||
* @param h height in 8 pixel blocks
|
* @param h height in 8 pixel blocks
|
||||||
*/
|
*/
|
||||||
static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){
|
static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){
|
||||||
int b_x, b_y;
|
int b_x, b_y;
|
||||||
@ -252,8 +252,8 @@ static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int st
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* simple vertical deblocking filter used for error resilience
|
* simple vertical deblocking filter used for error resilience
|
||||||
* @param w width in 8 pixel blocks
|
* @param w width in 8 pixel blocks
|
||||||
* @param h height in 8 pixel blocks
|
* @param h height in 8 pixel blocks
|
||||||
*/
|
*/
|
||||||
static void v_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){
|
static void v_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){
|
||||||
int b_x, b_y;
|
int b_x, b_y;
|
||||||
@ -348,7 +348,7 @@ static void guess_mv(MpegEncContext *s){
|
|||||||
s->mv_type = MV_TYPE_16X16;
|
s->mv_type = MV_TYPE_16X16;
|
||||||
s->mb_skipped=0;
|
s->mb_skipped=0;
|
||||||
|
|
||||||
s->dsp.clear_blocks(s->block[0]);
|
s->dsp.clear_blocks(s->block[0]);
|
||||||
|
|
||||||
s->mb_x= mb_x;
|
s->mb_x= mb_x;
|
||||||
s->mb_y= mb_y;
|
s->mb_y= mb_y;
|
||||||
@ -476,7 +476,7 @@ int score_sum=0;
|
|||||||
s->mv_type = MV_TYPE_16X16;
|
s->mv_type = MV_TYPE_16X16;
|
||||||
s->mb_skipped=0;
|
s->mb_skipped=0;
|
||||||
|
|
||||||
s->dsp.clear_blocks(s->block[0]);
|
s->dsp.clear_blocks(s->block[0]);
|
||||||
|
|
||||||
s->mb_x= mb_x;
|
s->mb_x= mb_x;
|
||||||
s->mb_y= mb_y;
|
s->mb_y= mb_y;
|
||||||
@ -582,7 +582,7 @@ static int is_intra_more_likely(MpegEncContext *s){
|
|||||||
uint8_t *mb_ptr = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize;
|
uint8_t *mb_ptr = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize;
|
||||||
uint8_t *last_mb_ptr= s->last_picture.data [0] + mb_x*16 + mb_y*16*s->linesize;
|
uint8_t *last_mb_ptr= s->last_picture.data [0] + mb_x*16 + mb_y*16*s->linesize;
|
||||||
|
|
||||||
is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr , s->linesize, 16);
|
is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr , s->linesize, 16);
|
||||||
is_intra_likely -= s->dsp.sad[0](NULL, last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize, 16);
|
is_intra_likely -= s->dsp.sad[0](NULL, last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize, 16);
|
||||||
}else{
|
}else{
|
||||||
if(IS_INTRA(s->current_picture.mb_type[mb_xy]))
|
if(IS_INTRA(s->current_picture.mb_type[mb_xy]))
|
||||||
@ -873,7 +873,7 @@ void ff_er_frame_end(MpegEncContext *s){
|
|||||||
s->mv[0][0][1] = s->current_picture.motion_val[0][ mb_x*2 + mb_y*2*s->b8_stride ][1];
|
s->mv[0][0][1] = s->current_picture.motion_val[0][ mb_x*2 + mb_y*2*s->b8_stride ][1];
|
||||||
}
|
}
|
||||||
|
|
||||||
s->dsp.clear_blocks(s->block[0]);
|
s->dsp.clear_blocks(s->block[0]);
|
||||||
|
|
||||||
s->mb_x= mb_x;
|
s->mb_x= mb_x;
|
||||||
s->mb_y= mb_y;
|
s->mb_y= mb_y;
|
||||||
|
@ -46,7 +46,7 @@ static int Faac_encode_init(AVCodecContext *avctx)
|
|||||||
/* check faac version */
|
/* check faac version */
|
||||||
faac_cfg = faacEncGetCurrentConfiguration(s->faac_handle);
|
faac_cfg = faacEncGetCurrentConfiguration(s->faac_handle);
|
||||||
if (faac_cfg->version != FAAC_CFG_VERSION) {
|
if (faac_cfg->version != FAAC_CFG_VERSION) {
|
||||||
av_log(avctx, AV_LOG_ERROR, "wrong libfaac version (compiled for: %d, using %d)\n", FAAC_CFG_VERSION, faac_cfg->version);
|
av_log(avctx, AV_LOG_ERROR, "wrong libfaac version (compiled for: %d, using %d)\n", FAAC_CFG_VERSION, faac_cfg->version);
|
||||||
faacEncClose(s->faac_handle);
|
faacEncClose(s->faac_handle);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -47,8 +47,8 @@ static const char* libfaadname = "libfaad.so.0";
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
void* handle; /* dlopen handle */
|
void* handle; /* dlopen handle */
|
||||||
void* faac_handle; /* FAAD library handle */
|
void* faac_handle; /* FAAD library handle */
|
||||||
int frame_size;
|
int frame_size;
|
||||||
int sample_size;
|
int sample_size;
|
||||||
int flags;
|
int flags;
|
||||||
@ -57,36 +57,36 @@ typedef struct {
|
|||||||
faacDecHandle FAADAPI (*faacDecOpen)(void);
|
faacDecHandle FAADAPI (*faacDecOpen)(void);
|
||||||
faacDecConfigurationPtr FAADAPI (*faacDecGetCurrentConfiguration)(faacDecHandle hDecoder);
|
faacDecConfigurationPtr FAADAPI (*faacDecGetCurrentConfiguration)(faacDecHandle hDecoder);
|
||||||
#ifndef FAAD2_VERSION
|
#ifndef FAAD2_VERSION
|
||||||
int FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder,
|
int FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder,
|
||||||
faacDecConfigurationPtr config);
|
faacDecConfigurationPtr config);
|
||||||
int FAADAPI (*faacDecInit)(faacDecHandle hDecoder,
|
int FAADAPI (*faacDecInit)(faacDecHandle hDecoder,
|
||||||
unsigned char *buffer,
|
unsigned char *buffer,
|
||||||
unsigned long *samplerate,
|
unsigned long *samplerate,
|
||||||
unsigned long *channels);
|
unsigned long *channels);
|
||||||
int FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer,
|
int FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer,
|
||||||
unsigned long SizeOfDecoderSpecificInfo,
|
unsigned long SizeOfDecoderSpecificInfo,
|
||||||
unsigned long *samplerate, unsigned long *channels);
|
unsigned long *samplerate, unsigned long *channels);
|
||||||
int FAADAPI (*faacDecDecode)(faacDecHandle hDecoder,
|
int FAADAPI (*faacDecDecode)(faacDecHandle hDecoder,
|
||||||
unsigned char *buffer,
|
unsigned char *buffer,
|
||||||
unsigned long *bytesconsumed,
|
unsigned long *bytesconsumed,
|
||||||
short *sample_buffer,
|
short *sample_buffer,
|
||||||
unsigned long *samples);
|
unsigned long *samples);
|
||||||
#else
|
#else
|
||||||
unsigned char FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder,
|
unsigned char FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder,
|
||||||
faacDecConfigurationPtr config);
|
faacDecConfigurationPtr config);
|
||||||
long FAADAPI (*faacDecInit)(faacDecHandle hDecoder,
|
long FAADAPI (*faacDecInit)(faacDecHandle hDecoder,
|
||||||
unsigned char *buffer,
|
unsigned char *buffer,
|
||||||
unsigned long buffer_size,
|
unsigned long buffer_size,
|
||||||
unsigned long *samplerate,
|
unsigned long *samplerate,
|
||||||
unsigned char *channels);
|
unsigned char *channels);
|
||||||
char FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer,
|
char FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer,
|
||||||
unsigned long SizeOfDecoderSpecificInfo,
|
unsigned long SizeOfDecoderSpecificInfo,
|
||||||
unsigned long *samplerate, unsigned char *channels);
|
unsigned long *samplerate, unsigned char *channels);
|
||||||
void *FAADAPI (*faacDecDecode)(faacDecHandle hDecoder,
|
void *FAADAPI (*faacDecDecode)(faacDecHandle hDecoder,
|
||||||
faacDecFrameInfo *hInfo,
|
faacDecFrameInfo *hInfo,
|
||||||
unsigned char *buffer,
|
unsigned char *buffer,
|
||||||
unsigned long buffer_size);
|
unsigned long buffer_size);
|
||||||
char* FAADAPI (*faacDecGetErrorMessage)(unsigned char errcode);
|
char* FAADAPI (*faacDecGetErrorMessage)(unsigned char errcode);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void FAADAPI (*faacDecClose)(faacDecHandle hDecoder);
|
void FAADAPI (*faacDecClose)(faacDecHandle hDecoder);
|
||||||
@ -112,14 +112,14 @@ static int faac_init_mp4(AVCodecContext *avctx)
|
|||||||
int r = 0;
|
int r = 0;
|
||||||
|
|
||||||
if (avctx->extradata)
|
if (avctx->extradata)
|
||||||
r = s->faacDecInit2(s->faac_handle, (uint8_t*) avctx->extradata,
|
r = s->faacDecInit2(s->faac_handle, (uint8_t*) avctx->extradata,
|
||||||
avctx->extradata_size,
|
avctx->extradata_size,
|
||||||
&samplerate, &channels);
|
&samplerate, &channels);
|
||||||
// else r = s->faacDecInit(s->faac_handle ... );
|
// else r = s->faacDecInit(s->faac_handle ... );
|
||||||
|
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
av_log(avctx, AV_LOG_ERROR, "faacDecInit2 failed r:%d sr:%ld ch:%ld s:%d\n",
|
av_log(avctx, AV_LOG_ERROR, "faacDecInit2 failed r:%d sr:%ld ch:%ld s:%d\n",
|
||||||
r, samplerate, (long)channels, avctx->extradata_size);
|
r, samplerate, (long)channels, avctx->extradata_size);
|
||||||
avctx->sample_rate = samplerate;
|
avctx->sample_rate = samplerate;
|
||||||
avctx->channels = channels;
|
avctx->channels = channels;
|
||||||
|
|
||||||
@ -141,7 +141,7 @@ static int faac_decode_frame(AVCodecContext *avctx,
|
|||||||
void *out;
|
void *out;
|
||||||
#endif
|
#endif
|
||||||
if(buf_size == 0)
|
if(buf_size == 0)
|
||||||
return 0;
|
return 0;
|
||||||
#ifndef FAAD2_VERSION
|
#ifndef FAAD2_VERSION
|
||||||
out = s->faacDecDecode(s->faac_handle,
|
out = s->faacDecDecode(s->faac_handle,
|
||||||
(unsigned char*)buf,
|
(unsigned char*)buf,
|
||||||
@ -150,16 +150,16 @@ static int faac_decode_frame(AVCodecContext *avctx,
|
|||||||
&samples);
|
&samples);
|
||||||
samples *= s->sample_size;
|
samples *= s->sample_size;
|
||||||
if (data_size)
|
if (data_size)
|
||||||
*data_size = samples;
|
*data_size = samples;
|
||||||
return (buf_size < (int)bytesconsumed)
|
return (buf_size < (int)bytesconsumed)
|
||||||
? buf_size : (int)bytesconsumed;
|
? buf_size : (int)bytesconsumed;
|
||||||
#else
|
#else
|
||||||
|
|
||||||
out = s->faacDecDecode(s->faac_handle, &frame_info, (unsigned char*)buf, (unsigned long)buf_size);
|
out = s->faacDecDecode(s->faac_handle, &frame_info, (unsigned char*)buf, (unsigned long)buf_size);
|
||||||
|
|
||||||
if (frame_info.error > 0) {
|
if (frame_info.error > 0) {
|
||||||
av_log(avctx, AV_LOG_ERROR, "faac: frame decoding failed: %s\n",
|
av_log(avctx, AV_LOG_ERROR, "faac: frame decoding failed: %s\n",
|
||||||
s->faacDecGetErrorMessage(frame_info.error));
|
s->faacDecGetErrorMessage(frame_info.error));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -167,10 +167,10 @@ static int faac_decode_frame(AVCodecContext *avctx,
|
|||||||
memcpy(data, out, frame_info.samples); // CHECKME - can we cheat this one
|
memcpy(data, out, frame_info.samples); // CHECKME - can we cheat this one
|
||||||
|
|
||||||
if (data_size)
|
if (data_size)
|
||||||
*data_size = frame_info.samples;
|
*data_size = frame_info.samples;
|
||||||
|
|
||||||
return (buf_size < (int)frame_info.bytesconsumed)
|
return (buf_size < (int)frame_info.bytesconsumed)
|
||||||
? buf_size : (int)frame_info.bytesconsumed;
|
? buf_size : (int)frame_info.bytesconsumed;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -196,8 +196,8 @@ static int faac_decode_init(AVCodecContext *avctx)
|
|||||||
s->handle = dlopen(libfaadname, RTLD_LAZY);
|
s->handle = dlopen(libfaadname, RTLD_LAZY);
|
||||||
if (!s->handle)
|
if (!s->handle)
|
||||||
{
|
{
|
||||||
av_log(avctx, AV_LOG_ERROR, "FAAD library: %s could not be opened! \n%s\n",
|
av_log(avctx, AV_LOG_ERROR, "FAAD library: %s could not be opened! \n%s\n",
|
||||||
libfaadname, dlerror());
|
libfaadname, dlerror());
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
#define dfaac(a, b) \
|
#define dfaac(a, b) \
|
||||||
@ -209,32 +209,32 @@ static int faac_decode_init(AVCodecContext *avctx)
|
|||||||
#endif /* CONFIG_FAADBIN */
|
#endif /* CONFIG_FAADBIN */
|
||||||
|
|
||||||
// resolve all needed function calls
|
// resolve all needed function calls
|
||||||
dfaac(Open, (faacDecHandle FAADAPI (*)(void)));
|
dfaac(Open, (faacDecHandle FAADAPI (*)(void)));
|
||||||
dfaac(GetCurrentConfiguration, (faacDecConfigurationPtr
|
dfaac(GetCurrentConfiguration, (faacDecConfigurationPtr
|
||||||
FAADAPI (*)(faacDecHandle)));
|
FAADAPI (*)(faacDecHandle)));
|
||||||
#ifndef FAAD2_VERSION
|
#ifndef FAAD2_VERSION
|
||||||
dfaac(SetConfiguration, (int FAADAPI (*)(faacDecHandle,
|
dfaac(SetConfiguration, (int FAADAPI (*)(faacDecHandle,
|
||||||
faacDecConfigurationPtr)));
|
faacDecConfigurationPtr)));
|
||||||
|
|
||||||
dfaac(Init, (int FAADAPI (*)(faacDecHandle, unsigned char*,
|
dfaac(Init, (int FAADAPI (*)(faacDecHandle, unsigned char*,
|
||||||
unsigned long*, unsigned long*)));
|
unsigned long*, unsigned long*)));
|
||||||
dfaac(Init2, (int FAADAPI (*)(faacDecHandle, unsigned char*,
|
dfaac(Init2, (int FAADAPI (*)(faacDecHandle, unsigned char*,
|
||||||
unsigned long, unsigned long*,
|
unsigned long, unsigned long*,
|
||||||
unsigned long*)));
|
unsigned long*)));
|
||||||
dfaac(Close, (void FAADAPI (*)(faacDecHandle hDecoder)));
|
dfaac(Close, (void FAADAPI (*)(faacDecHandle hDecoder)));
|
||||||
dfaac(Decode, (int FAADAPI (*)(faacDecHandle, unsigned char*,
|
dfaac(Decode, (int FAADAPI (*)(faacDecHandle, unsigned char*,
|
||||||
unsigned long*, short*, unsigned long*)));
|
unsigned long*, short*, unsigned long*)));
|
||||||
#else
|
#else
|
||||||
dfaac(SetConfiguration, (unsigned char FAADAPI (*)(faacDecHandle,
|
dfaac(SetConfiguration, (unsigned char FAADAPI (*)(faacDecHandle,
|
||||||
faacDecConfigurationPtr)));
|
faacDecConfigurationPtr)));
|
||||||
dfaac(Init, (long FAADAPI (*)(faacDecHandle, unsigned char*,
|
dfaac(Init, (long FAADAPI (*)(faacDecHandle, unsigned char*,
|
||||||
unsigned long, unsigned long*, unsigned char*)));
|
unsigned long, unsigned long*, unsigned char*)));
|
||||||
dfaac(Init2, (char FAADAPI (*)(faacDecHandle, unsigned char*,
|
dfaac(Init2, (char FAADAPI (*)(faacDecHandle, unsigned char*,
|
||||||
unsigned long, unsigned long*,
|
unsigned long, unsigned long*,
|
||||||
unsigned char*)));
|
unsigned char*)));
|
||||||
dfaac(Decode, (void *FAADAPI (*)(faacDecHandle, faacDecFrameInfo*,
|
dfaac(Decode, (void *FAADAPI (*)(faacDecHandle, faacDecFrameInfo*,
|
||||||
unsigned char*, unsigned long)));
|
unsigned char*, unsigned long)));
|
||||||
dfaac(GetErrorMessage, (char* FAADAPI (*)(unsigned char)));
|
dfaac(GetErrorMessage, (char* FAADAPI (*)(unsigned char)));
|
||||||
#endif
|
#endif
|
||||||
#undef dfacc
|
#undef dfacc
|
||||||
|
|
||||||
@ -243,8 +243,8 @@ static int faac_decode_init(AVCodecContext *avctx)
|
|||||||
}
|
}
|
||||||
if (err) {
|
if (err) {
|
||||||
dlclose(s->handle);
|
dlclose(s->handle);
|
||||||
av_log(avctx, AV_LOG_ERROR, "FAAD library: cannot resolve %s in %s!\n",
|
av_log(avctx, AV_LOG_ERROR, "FAAD library: cannot resolve %s in %s!\n",
|
||||||
err, libfaadname);
|
err, libfaadname);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -260,31 +260,31 @@ static int faac_decode_init(AVCodecContext *avctx)
|
|||||||
faac_cfg = s->faacDecGetCurrentConfiguration(s->faac_handle);
|
faac_cfg = s->faacDecGetCurrentConfiguration(s->faac_handle);
|
||||||
|
|
||||||
if (faac_cfg) {
|
if (faac_cfg) {
|
||||||
switch (avctx->bits_per_sample) {
|
switch (avctx->bits_per_sample) {
|
||||||
case 8: av_log(avctx, AV_LOG_ERROR, "FAADlib unsupported bps %d\n", avctx->bits_per_sample); break;
|
case 8: av_log(avctx, AV_LOG_ERROR, "FAADlib unsupported bps %d\n", avctx->bits_per_sample); break;
|
||||||
default:
|
default:
|
||||||
case 16:
|
case 16:
|
||||||
#ifdef FAAD2_VERSION
|
#ifdef FAAD2_VERSION
|
||||||
faac_cfg->outputFormat = FAAD_FMT_16BIT;
|
faac_cfg->outputFormat = FAAD_FMT_16BIT;
|
||||||
#endif
|
#endif
|
||||||
s->sample_size = 2;
|
s->sample_size = 2;
|
||||||
break;
|
break;
|
||||||
case 24:
|
case 24:
|
||||||
#ifdef FAAD2_VERSION
|
#ifdef FAAD2_VERSION
|
||||||
faac_cfg->outputFormat = FAAD_FMT_24BIT;
|
faac_cfg->outputFormat = FAAD_FMT_24BIT;
|
||||||
#endif
|
#endif
|
||||||
s->sample_size = 3;
|
s->sample_size = 3;
|
||||||
break;
|
break;
|
||||||
case 32:
|
case 32:
|
||||||
#ifdef FAAD2_VERSION
|
#ifdef FAAD2_VERSION
|
||||||
faac_cfg->outputFormat = FAAD_FMT_32BIT;
|
faac_cfg->outputFormat = FAAD_FMT_32BIT;
|
||||||
#endif
|
#endif
|
||||||
s->sample_size = 4;
|
s->sample_size = 4;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
faac_cfg->defSampleRate = (!avctx->sample_rate) ? 44100 : avctx->sample_rate;
|
faac_cfg->defSampleRate = (!avctx->sample_rate) ? 44100 : avctx->sample_rate;
|
||||||
faac_cfg->defObjectType = LC;
|
faac_cfg->defObjectType = LC;
|
||||||
}
|
}
|
||||||
|
|
||||||
s->faacDecSetConfiguration(s->faac_handle, faac_cfg);
|
s->faacDecSetConfiguration(s->faac_handle, faac_cfg);
|
||||||
|
@ -204,15 +204,15 @@ void ff_faandct248(DCTELEM * data)
|
|||||||
data[8*6 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1));
|
data[8*6 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1));
|
||||||
|
|
||||||
tmp10 = tmp4 + tmp7;
|
tmp10 = tmp4 + tmp7;
|
||||||
tmp11 = tmp5 + tmp6;
|
tmp11 = tmp5 + tmp6;
|
||||||
tmp12 = tmp5 - tmp6;
|
tmp12 = tmp5 - tmp6;
|
||||||
tmp13 = tmp4 - tmp7;
|
tmp13 = tmp4 - tmp7;
|
||||||
|
|
||||||
data[8*1 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11));
|
data[8*1 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11));
|
||||||
data[8*5 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11));
|
data[8*5 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11));
|
||||||
|
|
||||||
z1 = (tmp12 + tmp13)* A1;
|
z1 = (tmp12 + tmp13)* A1;
|
||||||
data[8*3 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1));
|
data[8*3 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1));
|
||||||
data[8*7 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1));
|
data[8*7 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -64,51 +64,51 @@ void init_fdct()
|
|||||||
void fdct(block)
|
void fdct(block)
|
||||||
short *block;
|
short *block;
|
||||||
{
|
{
|
||||||
register int i, j;
|
register int i, j;
|
||||||
double s;
|
double s;
|
||||||
double tmp[64];
|
double tmp[64];
|
||||||
|
|
||||||
for(i = 0; i < 8; i++)
|
for(i = 0; i < 8; i++)
|
||||||
for(j = 0; j < 8; j++)
|
for(j = 0; j < 8; j++)
|
||||||
{
|
{
|
||||||
s = 0.0;
|
s = 0.0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* for(k = 0; k < 8; k++)
|
* for(k = 0; k < 8; k++)
|
||||||
* s += c[j][k] * block[8 * i + k];
|
* s += c[j][k] * block[8 * i + k];
|
||||||
*/
|
*/
|
||||||
s += c[j][0] * block[8 * i + 0];
|
s += c[j][0] * block[8 * i + 0];
|
||||||
s += c[j][1] * block[8 * i + 1];
|
s += c[j][1] * block[8 * i + 1];
|
||||||
s += c[j][2] * block[8 * i + 2];
|
s += c[j][2] * block[8 * i + 2];
|
||||||
s += c[j][3] * block[8 * i + 3];
|
s += c[j][3] * block[8 * i + 3];
|
||||||
s += c[j][4] * block[8 * i + 4];
|
s += c[j][4] * block[8 * i + 4];
|
||||||
s += c[j][5] * block[8 * i + 5];
|
s += c[j][5] * block[8 * i + 5];
|
||||||
s += c[j][6] * block[8 * i + 6];
|
s += c[j][6] * block[8 * i + 6];
|
||||||
s += c[j][7] * block[8 * i + 7];
|
s += c[j][7] * block[8 * i + 7];
|
||||||
|
|
||||||
tmp[8 * i + j] = s;
|
tmp[8 * i + j] = s;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(j = 0; j < 8; j++)
|
for(j = 0; j < 8; j++)
|
||||||
for(i = 0; i < 8; i++)
|
for(i = 0; i < 8; i++)
|
||||||
{
|
{
|
||||||
s = 0.0;
|
s = 0.0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* for(k = 0; k < 8; k++)
|
* for(k = 0; k < 8; k++)
|
||||||
* s += c[i][k] * tmp[8 * k + j];
|
* s += c[i][k] * tmp[8 * k + j];
|
||||||
*/
|
*/
|
||||||
s += c[i][0] * tmp[8 * 0 + j];
|
s += c[i][0] * tmp[8 * 0 + j];
|
||||||
s += c[i][1] * tmp[8 * 1 + j];
|
s += c[i][1] * tmp[8 * 1 + j];
|
||||||
s += c[i][2] * tmp[8 * 2 + j];
|
s += c[i][2] * tmp[8 * 2 + j];
|
||||||
s += c[i][3] * tmp[8 * 3 + j];
|
s += c[i][3] * tmp[8 * 3 + j];
|
||||||
s += c[i][4] * tmp[8 * 4 + j];
|
s += c[i][4] * tmp[8 * 4 + j];
|
||||||
s += c[i][5] * tmp[8 * 5 + j];
|
s += c[i][5] * tmp[8 * 5 + j];
|
||||||
s += c[i][6] * tmp[8 * 6 + j];
|
s += c[i][6] * tmp[8 * 6 + j];
|
||||||
s += c[i][7] * tmp[8 * 7 + j];
|
s += c[i][7] * tmp[8 * 7 + j];
|
||||||
s*=8.0;
|
s*=8.0;
|
||||||
|
|
||||||
block[8 * i + j] = (short)floor(s + 0.499999);
|
block[8 * i + j] = (short)floor(s + 0.499999);
|
||||||
/*
|
/*
|
||||||
* reason for adding 0.499999 instead of 0.5:
|
* reason for adding 0.499999 instead of 0.5:
|
||||||
* s is quite often x.5 (at least for i and/or j = 0 or 4)
|
* s is quite often x.5 (at least for i and/or j = 0 or 4)
|
||||||
|
@ -149,8 +149,8 @@ int ff_fft_init(FFTContext *s, int nbits, int inverse)
|
|||||||
void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
|
void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
|
||||||
{
|
{
|
||||||
int ln = s->nbits;
|
int ln = s->nbits;
|
||||||
int j, np, np2;
|
int j, np, np2;
|
||||||
int nblocks, nloops;
|
int nblocks, nloops;
|
||||||
register FFTComplex *p, *q;
|
register FFTComplex *p, *q;
|
||||||
FFTComplex *exptab = s->exptab;
|
FFTComplex *exptab = s->exptab;
|
||||||
int l;
|
int l;
|
||||||
|
@ -31,30 +31,30 @@
|
|||||||
* instead of simply using 32bit integer arithmetic.
|
* instead of simply using 32bit integer arithmetic.
|
||||||
*/
|
*/
|
||||||
typedef struct Float11 {
|
typedef struct Float11 {
|
||||||
int sign; /**< 1bit sign */
|
int sign; /**< 1bit sign */
|
||||||
int exp; /**< 4bit exponent */
|
int exp; /**< 4bit exponent */
|
||||||
int mant; /**< 6bit mantissa */
|
int mant; /**< 6bit mantissa */
|
||||||
} Float11;
|
} Float11;
|
||||||
|
|
||||||
static inline Float11* i2f(int16_t i, Float11* f)
|
static inline Float11* i2f(int16_t i, Float11* f)
|
||||||
{
|
{
|
||||||
f->sign = (i < 0);
|
f->sign = (i < 0);
|
||||||
if (f->sign)
|
if (f->sign)
|
||||||
i = -i;
|
i = -i;
|
||||||
f->exp = av_log2_16bit(i) + !!i;
|
f->exp = av_log2_16bit(i) + !!i;
|
||||||
f->mant = i? (i<<6) >> f->exp :
|
f->mant = i? (i<<6) >> f->exp :
|
||||||
1<<5;
|
1<<5;
|
||||||
return f;
|
return f;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int16_t mult(Float11* f1, Float11* f2)
|
static inline int16_t mult(Float11* f1, Float11* f2)
|
||||||
{
|
{
|
||||||
int res, exp;
|
int res, exp;
|
||||||
|
|
||||||
exp = f1->exp + f2->exp;
|
exp = f1->exp + f2->exp;
|
||||||
res = (((f1->mant * f2->mant) + 0x30) >> 4) << 7;
|
res = (((f1->mant * f2->mant) + 0x30) >> 4) << 7;
|
||||||
res = exp > 26 ? res << (exp - 26) : res >> (26 - exp);
|
res = exp > 26 ? res << (exp - 26) : res >> (26 - exp);
|
||||||
return (f1->sign ^ f2->sign) ? -res : res;
|
return (f1->sign ^ f2->sign) ? -res : res;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int sgn(int value)
|
static inline int sgn(int value)
|
||||||
@ -63,32 +63,32 @@ static inline int sgn(int value)
|
|||||||
}
|
}
|
||||||
|
|
||||||
typedef struct G726Tables {
|
typedef struct G726Tables {
|
||||||
int bits; /**< bits per sample */
|
int bits; /**< bits per sample */
|
||||||
int* quant; /**< quantization table */
|
int* quant; /**< quantization table */
|
||||||
int* iquant; /**< inverse quantization table */
|
int* iquant; /**< inverse quantization table */
|
||||||
int* W; /**< special table #1 ;-) */
|
int* W; /**< special table #1 ;-) */
|
||||||
int* F; /**< special table #2 */
|
int* F; /**< special table #2 */
|
||||||
} G726Tables;
|
} G726Tables;
|
||||||
|
|
||||||
typedef struct G726Context {
|
typedef struct G726Context {
|
||||||
G726Tables* tbls; /**< static tables needed for computation */
|
G726Tables* tbls; /**< static tables needed for computation */
|
||||||
|
|
||||||
Float11 sr[2]; /**< prev. reconstructed samples */
|
Float11 sr[2]; /**< prev. reconstructed samples */
|
||||||
Float11 dq[6]; /**< prev. difference */
|
Float11 dq[6]; /**< prev. difference */
|
||||||
int a[2]; /**< second order predictor coeffs */
|
int a[2]; /**< second order predictor coeffs */
|
||||||
int b[6]; /**< sixth order predictor coeffs */
|
int b[6]; /**< sixth order predictor coeffs */
|
||||||
int pk[2]; /**< signs of prev. 2 sez + dq */
|
int pk[2]; /**< signs of prev. 2 sez + dq */
|
||||||
|
|
||||||
int ap; /**< scale factor control */
|
int ap; /**< scale factor control */
|
||||||
int yu; /**< fast scale factor */
|
int yu; /**< fast scale factor */
|
||||||
int yl; /**< slow scale factor */
|
int yl; /**< slow scale factor */
|
||||||
int dms; /**< short average magnitude of F[i] */
|
int dms; /**< short average magnitude of F[i] */
|
||||||
int dml; /**< long average magnitude of F[i] */
|
int dml; /**< long average magnitude of F[i] */
|
||||||
int td; /**< tone detect */
|
int td; /**< tone detect */
|
||||||
|
|
||||||
int se; /**< estimated signal for the next iteration */
|
int se; /**< estimated signal for the next iteration */
|
||||||
int sez; /**< estimated second order prediction */
|
int sez; /**< estimated second order prediction */
|
||||||
int y; /**< quantizer scaling factor for the next iteration */
|
int y; /**< quantizer scaling factor for the next iteration */
|
||||||
} G726Context;
|
} G726Context;
|
||||||
|
|
||||||
static int quant_tbl16[] = /**< 16kbit/s 2bits per sample */
|
static int quant_tbl16[] = /**< 16kbit/s 2bits per sample */
|
||||||
@ -113,34 +113,34 @@ static int quant_tbl32[] = /**< 32kbit/s 4bits per sample
|
|||||||
{ -125, 79, 177, 245, 299, 348, 399, INT_MAX };
|
{ -125, 79, 177, 245, 299, 348, 399, INT_MAX };
|
||||||
static int iquant_tbl32[] =
|
static int iquant_tbl32[] =
|
||||||
{ INT_MIN, 4, 135, 213, 273, 323, 373, 425,
|
{ INT_MIN, 4, 135, 213, 273, 323, 373, 425,
|
||||||
425, 373, 323, 273, 213, 135, 4, INT_MIN };
|
425, 373, 323, 273, 213, 135, 4, INT_MIN };
|
||||||
static int W_tbl32[] =
|
static int W_tbl32[] =
|
||||||
{ -12, 18, 41, 64, 112, 198, 355, 1122,
|
{ -12, 18, 41, 64, 112, 198, 355, 1122,
|
||||||
1122, 355, 198, 112, 64, 41, 18, -12};
|
1122, 355, 198, 112, 64, 41, 18, -12};
|
||||||
static int F_tbl32[] =
|
static int F_tbl32[] =
|
||||||
{ 0, 0, 0, 1, 1, 1, 3, 7, 7, 3, 1, 1, 1, 0, 0, 0 };
|
{ 0, 0, 0, 1, 1, 1, 3, 7, 7, 3, 1, 1, 1, 0, 0, 0 };
|
||||||
|
|
||||||
static int quant_tbl40[] = /**< 40kbit/s 5bits per sample */
|
static int quant_tbl40[] = /**< 40kbit/s 5bits per sample */
|
||||||
{ -122, -16, 67, 138, 197, 249, 297, 338,
|
{ -122, -16, 67, 138, 197, 249, 297, 338,
|
||||||
377, 412, 444, 474, 501, 527, 552, INT_MAX };
|
377, 412, 444, 474, 501, 527, 552, INT_MAX };
|
||||||
static int iquant_tbl40[] =
|
static int iquant_tbl40[] =
|
||||||
{ INT_MIN, -66, 28, 104, 169, 224, 274, 318,
|
{ INT_MIN, -66, 28, 104, 169, 224, 274, 318,
|
||||||
358, 395, 429, 459, 488, 514, 539, 566,
|
358, 395, 429, 459, 488, 514, 539, 566,
|
||||||
566, 539, 514, 488, 459, 429, 395, 358,
|
566, 539, 514, 488, 459, 429, 395, 358,
|
||||||
318, 274, 224, 169, 104, 28, -66, INT_MIN };
|
318, 274, 224, 169, 104, 28, -66, INT_MIN };
|
||||||
static int W_tbl40[] =
|
static int W_tbl40[] =
|
||||||
{ 14, 14, 24, 39, 40, 41, 58, 100,
|
{ 14, 14, 24, 39, 40, 41, 58, 100,
|
||||||
141, 179, 219, 280, 358, 440, 529, 696,
|
141, 179, 219, 280, 358, 440, 529, 696,
|
||||||
696, 529, 440, 358, 280, 219, 179, 141,
|
696, 529, 440, 358, 280, 219, 179, 141,
|
||||||
100, 58, 41, 40, 39, 24, 14, 14 };
|
100, 58, 41, 40, 39, 24, 14, 14 };
|
||||||
static int F_tbl40[] =
|
static int F_tbl40[] =
|
||||||
{ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 6,
|
{ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 6,
|
||||||
6, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
|
6, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
|
||||||
|
|
||||||
static G726Tables G726Tables_pool[] =
|
static G726Tables G726Tables_pool[] =
|
||||||
{{ 2, quant_tbl16, iquant_tbl16, W_tbl16, F_tbl16 },
|
{{ 2, quant_tbl16, iquant_tbl16, W_tbl16, F_tbl16 },
|
||||||
{ 3, quant_tbl24, iquant_tbl24, W_tbl24, F_tbl24 },
|
{ 3, quant_tbl24, iquant_tbl24, W_tbl24, F_tbl24 },
|
||||||
{ 4, quant_tbl32, iquant_tbl32, W_tbl32, F_tbl32 },
|
{ 4, quant_tbl32, iquant_tbl32, W_tbl32, F_tbl32 },
|
||||||
{ 5, quant_tbl40, iquant_tbl40, W_tbl40, F_tbl40 }};
|
{ 5, quant_tbl40, iquant_tbl40, W_tbl40, F_tbl40 }};
|
||||||
|
|
||||||
|
|
||||||
@ -207,20 +207,20 @@ static inline int16_t g726_iterate(G726Context* c, int16_t I)
|
|||||||
dq0 = dq ? sgn(dq) : 0;
|
dq0 = dq ? sgn(dq) : 0;
|
||||||
if (tr) {
|
if (tr) {
|
||||||
c->a[0] = 0;
|
c->a[0] = 0;
|
||||||
c->a[1] = 0;
|
c->a[1] = 0;
|
||||||
for (i=0; i<6; i++)
|
for (i=0; i<6; i++)
|
||||||
c->b[i] = 0;
|
c->b[i] = 0;
|
||||||
} else {
|
} else {
|
||||||
/* This is a bit crazy, but it really is +255 not +256 */
|
/* This is a bit crazy, but it really is +255 not +256 */
|
||||||
fa1 = clip((-c->a[0]*c->pk[0]*pk0)>>5, -256, 255);
|
fa1 = clip((-c->a[0]*c->pk[0]*pk0)>>5, -256, 255);
|
||||||
|
|
||||||
c->a[1] += 128*pk0*c->pk[1] + fa1 - (c->a[1]>>7);
|
c->a[1] += 128*pk0*c->pk[1] + fa1 - (c->a[1]>>7);
|
||||||
c->a[1] = clip(c->a[1], -12288, 12288);
|
c->a[1] = clip(c->a[1], -12288, 12288);
|
||||||
c->a[0] += 64*3*pk0*c->pk[0] - (c->a[0] >> 8);
|
c->a[0] += 64*3*pk0*c->pk[0] - (c->a[0] >> 8);
|
||||||
c->a[0] = clip(c->a[0], -(15360 - c->a[1]), 15360 - c->a[1]);
|
c->a[0] = clip(c->a[0], -(15360 - c->a[1]), 15360 - c->a[1]);
|
||||||
|
|
||||||
for (i=0; i<6; i++)
|
for (i=0; i<6; i++)
|
||||||
c->b[i] += 128*dq0*sgn(-c->dq[i].sign) - (c->b[i]>>8);
|
c->b[i] += 128*dq0*sgn(-c->dq[i].sign) - (c->b[i]>>8);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Update Dq and Sr and Pk */
|
/* Update Dq and Sr and Pk */
|
||||||
@ -323,13 +323,13 @@ static int g726_init(AVCodecContext * avctx)
|
|||||||
|
|
||||||
if (avctx->channels != 1 ||
|
if (avctx->channels != 1 ||
|
||||||
(avctx->bit_rate != 16000 && avctx->bit_rate != 24000 &&
|
(avctx->bit_rate != 16000 && avctx->bit_rate != 24000 &&
|
||||||
avctx->bit_rate != 32000 && avctx->bit_rate != 40000)) {
|
avctx->bit_rate != 32000 && avctx->bit_rate != 40000)) {
|
||||||
av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n");
|
av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (avctx->sample_rate != 8000 && avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL) {
|
if (avctx->sample_rate != 8000 && avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL) {
|
||||||
av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n");
|
av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
g726_reset(&c->c, avctx->bit_rate);
|
g726_reset(&c->c, avctx->bit_rate);
|
||||||
c->code_size = c->c.tbls->bits;
|
c->code_size = c->c.tbls->bits;
|
||||||
@ -384,12 +384,12 @@ static int g726_decode_frame(AVCodecContext *avctx,
|
|||||||
init_get_bits(&gb, buf, buf_size * 8);
|
init_get_bits(&gb, buf, buf_size * 8);
|
||||||
if (c->bits_left) {
|
if (c->bits_left) {
|
||||||
int s = c->code_size - c->bits_left;;
|
int s = c->code_size - c->bits_left;;
|
||||||
code = (c->bit_buffer << s) | get_bits(&gb, s);
|
code = (c->bit_buffer << s) | get_bits(&gb, s);
|
||||||
*samples++ = g726_decode(&c->c, code & mask);
|
*samples++ = g726_decode(&c->c, code & mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
while (get_bits_count(&gb) + c->code_size <= buf_size*8)
|
while (get_bits_count(&gb) + c->code_size <= buf_size*8)
|
||||||
*samples++ = g726_decode(&c->c, get_bits(&gb, c->code_size) & mask);
|
*samples++ = g726_decode(&c->c, get_bits(&gb, c->code_size) & mask);
|
||||||
|
|
||||||
c->bits_left = buf_size*8 - get_bits_count(&gb);
|
c->bits_left = buf_size*8 - get_bits_count(&gb);
|
||||||
c->bit_buffer = get_bits(&gb, c->bits_left);
|
c->bit_buffer = get_bits(&gb, c->bits_left);
|
||||||
|
@ -288,7 +288,7 @@ static inline int get_sr_golomb_flac(GetBitContext *gb, int k, int limit, int es
|
|||||||
* read unsigned golomb rice code (shorten).
|
* read unsigned golomb rice code (shorten).
|
||||||
*/
|
*/
|
||||||
static inline unsigned int get_ur_golomb_shorten(GetBitContext *gb, int k){
|
static inline unsigned int get_ur_golomb_shorten(GetBitContext *gb, int k){
|
||||||
return get_ur_golomb_jpegls(gb, k, INT_MAX, 0);
|
return get_ur_golomb_jpegls(gb, k, INT_MAX, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -395,7 +395,7 @@ static inline void set_te_golomb(PutBitContext *pb, int i, int range){
|
|||||||
*/
|
*/
|
||||||
static inline void set_se_golomb(PutBitContext *pb, int i){
|
static inline void set_se_golomb(PutBitContext *pb, int i){
|
||||||
// if (i>32767 || i<-32767)
|
// if (i>32767 || i<-32767)
|
||||||
// av_log(NULL,AV_LOG_ERROR,"value out of range %d\n", i);
|
// av_log(NULL,AV_LOG_ERROR,"value out of range %d\n", i);
|
||||||
#if 0
|
#if 0
|
||||||
if(i<=0) i= -2*i;
|
if(i<=0) i= -2*i;
|
||||||
else i= 2*i-1;
|
else i= 2*i-1;
|
||||||
|
@ -231,11 +231,11 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
|
|||||||
(coded_frame_rate_base * (int64_t)s->avctx->time_base.den);
|
(coded_frame_rate_base * (int64_t)s->avctx->time_base.den);
|
||||||
put_bits(&s->pb, 8, temp_ref & 0xff); /* TemporalReference */
|
put_bits(&s->pb, 8, temp_ref & 0xff); /* TemporalReference */
|
||||||
|
|
||||||
put_bits(&s->pb, 1, 1); /* marker */
|
put_bits(&s->pb, 1, 1); /* marker */
|
||||||
put_bits(&s->pb, 1, 0); /* h263 id */
|
put_bits(&s->pb, 1, 0); /* h263 id */
|
||||||
put_bits(&s->pb, 1, 0); /* split screen off */
|
put_bits(&s->pb, 1, 0); /* split screen off */
|
||||||
put_bits(&s->pb, 1, 0); /* camera off */
|
put_bits(&s->pb, 1, 0); /* camera off */
|
||||||
put_bits(&s->pb, 1, 0); /* freeze picture release off */
|
put_bits(&s->pb, 1, 0); /* freeze picture release off */
|
||||||
|
|
||||||
format = h263_get_picture_format(s->width, s->height);
|
format = h263_get_picture_format(s->width, s->height);
|
||||||
if (!s->h263_plus) {
|
if (!s->h263_plus) {
|
||||||
@ -245,12 +245,12 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
|
|||||||
/* By now UMV IS DISABLED ON H.263v1, since the restrictions
|
/* By now UMV IS DISABLED ON H.263v1, since the restrictions
|
||||||
of H.263v1 UMV implies to check the predicted MV after
|
of H.263v1 UMV implies to check the predicted MV after
|
||||||
calculation of the current MB to see if we're on the limits */
|
calculation of the current MB to see if we're on the limits */
|
||||||
put_bits(&s->pb, 1, 0); /* Unrestricted Motion Vector: off */
|
put_bits(&s->pb, 1, 0); /* Unrestricted Motion Vector: off */
|
||||||
put_bits(&s->pb, 1, 0); /* SAC: off */
|
put_bits(&s->pb, 1, 0); /* SAC: off */
|
||||||
put_bits(&s->pb, 1, s->obmc); /* Advanced Prediction */
|
put_bits(&s->pb, 1, s->obmc); /* Advanced Prediction */
|
||||||
put_bits(&s->pb, 1, 0); /* only I/P frames, no PB frame */
|
put_bits(&s->pb, 1, 0); /* only I/P frames, no PB frame */
|
||||||
put_bits(&s->pb, 5, s->qscale);
|
put_bits(&s->pb, 5, s->qscale);
|
||||||
put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
|
put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
|
||||||
} else {
|
} else {
|
||||||
int ufep=1;
|
int ufep=1;
|
||||||
/* H.263v2 */
|
/* H.263v2 */
|
||||||
@ -286,9 +286,9 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
|
|||||||
put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
|
put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
|
||||||
|
|
||||||
/* This should be here if PLUSPTYPE */
|
/* This should be here if PLUSPTYPE */
|
||||||
put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
|
put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
|
||||||
|
|
||||||
if (format == 7) {
|
if (format == 7) {
|
||||||
/* Custom Picture Format (CPFMT) */
|
/* Custom Picture Format (CPFMT) */
|
||||||
aspect_to_info(s, s->avctx->sample_aspect_ratio);
|
aspect_to_info(s, s->avctx->sample_aspect_ratio);
|
||||||
|
|
||||||
@ -299,7 +299,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
|
|||||||
if (s->aspect_ratio_info == FF_ASPECT_EXTENDED){
|
if (s->aspect_ratio_info == FF_ASPECT_EXTENDED){
|
||||||
put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.num);
|
put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.num);
|
||||||
put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den);
|
put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(s->custom_pcf){
|
if(s->custom_pcf){
|
||||||
if(ufep){
|
if(ufep){
|
||||||
@ -320,7 +320,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
|
|||||||
put_bits(&s->pb, 5, s->qscale);
|
put_bits(&s->pb, 5, s->qscale);
|
||||||
}
|
}
|
||||||
|
|
||||||
put_bits(&s->pb, 1, 0); /* no PEI */
|
put_bits(&s->pb, 1, 0); /* no PEI */
|
||||||
|
|
||||||
if(s->h263_slice_structured){
|
if(s->h263_slice_structured){
|
||||||
put_bits(&s->pb, 1, 1);
|
put_bits(&s->pb, 1, 1);
|
||||||
@ -823,8 +823,8 @@ static inline void mpeg4_encode_blocks(MpegEncContext * s, DCTELEM block[6][64],
|
|||||||
}
|
}
|
||||||
|
|
||||||
void mpeg4_encode_mb(MpegEncContext * s,
|
void mpeg4_encode_mb(MpegEncContext * s,
|
||||||
DCTELEM block[6][64],
|
DCTELEM block[6][64],
|
||||||
int motion_x, int motion_y)
|
int motion_x, int motion_y)
|
||||||
{
|
{
|
||||||
int cbpc, cbpy, pred_x, pred_y;
|
int cbpc, cbpy, pred_x, pred_y;
|
||||||
PutBitContext * const pb2 = s->data_partitioning ? &s->pb2 : &s->pb;
|
PutBitContext * const pb2 = s->data_partitioning ? &s->pb2 : &s->pb;
|
||||||
@ -884,7 +884,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
put_bits(&s->pb, 1, 0); /* mb coded modb1=0 */
|
put_bits(&s->pb, 1, 0); /* mb coded modb1=0 */
|
||||||
put_bits(&s->pb, 1, cbp ? 0 : 1); /* modb2 */ //FIXME merge
|
put_bits(&s->pb, 1, cbp ? 0 : 1); /* modb2 */ //FIXME merge
|
||||||
put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we don't need it :)
|
put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we don't need it :)
|
||||||
if(cbp) put_bits(&s->pb, 6, cbp);
|
if(cbp) put_bits(&s->pb, 6, cbp);
|
||||||
@ -998,7 +998,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
|||||||
if(pic==NULL || pic->pict_type!=B_TYPE) break;
|
if(pic==NULL || pic->pict_type!=B_TYPE) break;
|
||||||
|
|
||||||
b_pic= pic->data[0] + offset + 16; //FIXME +16
|
b_pic= pic->data[0] + offset + 16; //FIXME +16
|
||||||
diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16);
|
diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16);
|
||||||
if(diff>s->qscale*70){ //FIXME check that 70 is optimal
|
if(diff>s->qscale*70){ //FIXME check that 70 is optimal
|
||||||
s->mb_skipped=0;
|
s->mb_skipped=0;
|
||||||
break;
|
break;
|
||||||
@ -1021,7 +1021,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||||
cbpc = cbp & 3;
|
cbpc = cbp & 3;
|
||||||
cbpy = cbp >> 2;
|
cbpy = cbp >> 2;
|
||||||
cbpy ^= 0xf;
|
cbpy ^= 0xf;
|
||||||
@ -1121,7 +1121,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
|||||||
int dc_diff[6]; //dc values with the dc prediction subtracted
|
int dc_diff[6]; //dc values with the dc prediction subtracted
|
||||||
int dir[6]; //prediction direction
|
int dir[6]; //prediction direction
|
||||||
int zigzag_last_index[6];
|
int zigzag_last_index[6];
|
||||||
uint8_t *scan_table[6];
|
uint8_t *scan_table[6];
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for(i=0; i<6; i++){
|
for(i=0; i<6; i++){
|
||||||
@ -1152,7 +1152,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
|||||||
intra_MCBPC_code[cbpc]);
|
intra_MCBPC_code[cbpc]);
|
||||||
} else {
|
} else {
|
||||||
if(s->dquant) cbpc+=8;
|
if(s->dquant) cbpc+=8;
|
||||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||||
put_bits(&s->pb,
|
put_bits(&s->pb,
|
||||||
inter_MCBPC_bits[cbpc + 4],
|
inter_MCBPC_bits[cbpc + 4],
|
||||||
inter_MCBPC_code[cbpc + 4]);
|
inter_MCBPC_code[cbpc + 4]);
|
||||||
@ -1185,8 +1185,8 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void h263_encode_mb(MpegEncContext * s,
|
void h263_encode_mb(MpegEncContext * s,
|
||||||
DCTELEM block[6][64],
|
DCTELEM block[6][64],
|
||||||
int motion_x, int motion_y)
|
int motion_x, int motion_y)
|
||||||
{
|
{
|
||||||
int cbpc, cbpy, i, cbp, pred_x, pred_y;
|
int cbpc, cbpy, i, cbp, pred_x, pred_y;
|
||||||
int16_t pred_dc;
|
int16_t pred_dc;
|
||||||
@ -1211,7 +1211,7 @@ void h263_encode_mb(MpegEncContext * s,
|
|||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||||
|
|
||||||
cbpc = cbp & 3;
|
cbpc = cbp & 3;
|
||||||
cbpy = cbp >> 2;
|
cbpy = cbp >> 2;
|
||||||
@ -1346,14 +1346,14 @@ void h263_encode_mb(MpegEncContext * s,
|
|||||||
intra_MCBPC_code[cbpc]);
|
intra_MCBPC_code[cbpc]);
|
||||||
} else {
|
} else {
|
||||||
if(s->dquant) cbpc+=8;
|
if(s->dquant) cbpc+=8;
|
||||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||||
put_bits(&s->pb,
|
put_bits(&s->pb,
|
||||||
inter_MCBPC_bits[cbpc + 4],
|
inter_MCBPC_bits[cbpc + 4],
|
||||||
inter_MCBPC_code[cbpc + 4]);
|
inter_MCBPC_code[cbpc + 4]);
|
||||||
}
|
}
|
||||||
if (s->h263_aic) {
|
if (s->h263_aic) {
|
||||||
/* XXX: currently, we do not try to use ac prediction */
|
/* XXX: currently, we do not try to use ac prediction */
|
||||||
put_bits(&s->pb, 1, 0); /* no AC prediction */
|
put_bits(&s->pb, 1, 0); /* no AC prediction */
|
||||||
}
|
}
|
||||||
cbpy = cbp >> 2;
|
cbpy = cbp >> 2;
|
||||||
put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
|
put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
|
||||||
@ -1796,7 +1796,7 @@ static void init_uni_dc_tab(void)
|
|||||||
v = abs(level);
|
v = abs(level);
|
||||||
while (v) {
|
while (v) {
|
||||||
v >>= 1;
|
v >>= 1;
|
||||||
size++;
|
size++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (level < 0)
|
if (level < 0)
|
||||||
@ -2318,14 +2318,14 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
|
|||||||
put_bits(&s->pb, 16, 0);
|
put_bits(&s->pb, 16, 0);
|
||||||
put_bits(&s->pb, 16, 0x120 + vol_number); /* video obj layer */
|
put_bits(&s->pb, 16, 0x120 + vol_number); /* video obj layer */
|
||||||
|
|
||||||
put_bits(&s->pb, 1, 0); /* random access vol */
|
put_bits(&s->pb, 1, 0); /* random access vol */
|
||||||
put_bits(&s->pb, 8, s->vo_type); /* video obj type indication */
|
put_bits(&s->pb, 8, s->vo_type); /* video obj type indication */
|
||||||
if(s->workaround_bugs & FF_BUG_MS) {
|
if(s->workaround_bugs & FF_BUG_MS) {
|
||||||
put_bits(&s->pb, 1, 0); /* is obj layer id= no */
|
put_bits(&s->pb, 1, 0); /* is obj layer id= no */
|
||||||
} else {
|
} else {
|
||||||
put_bits(&s->pb, 1, 1); /* is obj layer id= yes */
|
put_bits(&s->pb, 1, 1); /* is obj layer id= yes */
|
||||||
put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */
|
put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */
|
||||||
put_bits(&s->pb, 3, 1); /* is obj layer priority */
|
put_bits(&s->pb, 3, 1); /* is obj layer priority */
|
||||||
}
|
}
|
||||||
|
|
||||||
aspect_to_info(s, s->avctx->sample_aspect_ratio);
|
aspect_to_info(s, s->avctx->sample_aspect_ratio);
|
||||||
@ -2337,37 +2337,37 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(s->workaround_bugs & FF_BUG_MS) { //
|
if(s->workaround_bugs & FF_BUG_MS) { //
|
||||||
put_bits(&s->pb, 1, 0); /* vol control parameters= no @@@ */
|
put_bits(&s->pb, 1, 0); /* vol control parameters= no @@@ */
|
||||||
} else {
|
} else {
|
||||||
put_bits(&s->pb, 1, 1); /* vol control parameters= yes */
|
put_bits(&s->pb, 1, 1); /* vol control parameters= yes */
|
||||||
put_bits(&s->pb, 2, 1); /* chroma format YUV 420/YV12 */
|
put_bits(&s->pb, 2, 1); /* chroma format YUV 420/YV12 */
|
||||||
put_bits(&s->pb, 1, s->low_delay);
|
put_bits(&s->pb, 1, s->low_delay);
|
||||||
put_bits(&s->pb, 1, 0); /* vbv parameters= no */
|
put_bits(&s->pb, 1, 0); /* vbv parameters= no */
|
||||||
}
|
}
|
||||||
|
|
||||||
put_bits(&s->pb, 2, RECT_SHAPE); /* vol shape= rectangle */
|
put_bits(&s->pb, 2, RECT_SHAPE); /* vol shape= rectangle */
|
||||||
put_bits(&s->pb, 1, 1); /* marker bit */
|
put_bits(&s->pb, 1, 1); /* marker bit */
|
||||||
|
|
||||||
put_bits(&s->pb, 16, s->avctx->time_base.den);
|
put_bits(&s->pb, 16, s->avctx->time_base.den);
|
||||||
if (s->time_increment_bits < 1)
|
if (s->time_increment_bits < 1)
|
||||||
s->time_increment_bits = 1;
|
s->time_increment_bits = 1;
|
||||||
put_bits(&s->pb, 1, 1); /* marker bit */
|
put_bits(&s->pb, 1, 1); /* marker bit */
|
||||||
put_bits(&s->pb, 1, 0); /* fixed vop rate=no */
|
put_bits(&s->pb, 1, 0); /* fixed vop rate=no */
|
||||||
put_bits(&s->pb, 1, 1); /* marker bit */
|
put_bits(&s->pb, 1, 1); /* marker bit */
|
||||||
put_bits(&s->pb, 13, s->width); /* vol width */
|
put_bits(&s->pb, 13, s->width); /* vol width */
|
||||||
put_bits(&s->pb, 1, 1); /* marker bit */
|
put_bits(&s->pb, 1, 1); /* marker bit */
|
||||||
put_bits(&s->pb, 13, s->height); /* vol height */
|
put_bits(&s->pb, 13, s->height); /* vol height */
|
||||||
put_bits(&s->pb, 1, 1); /* marker bit */
|
put_bits(&s->pb, 1, 1); /* marker bit */
|
||||||
put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1);
|
put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1);
|
||||||
put_bits(&s->pb, 1, 1); /* obmc disable */
|
put_bits(&s->pb, 1, 1); /* obmc disable */
|
||||||
if (vo_ver_id == 1) {
|
if (vo_ver_id == 1) {
|
||||||
put_bits(&s->pb, 1, s->vol_sprite_usage); /* sprite enable */
|
put_bits(&s->pb, 1, s->vol_sprite_usage); /* sprite enable */
|
||||||
}else{
|
}else{
|
||||||
put_bits(&s->pb, 2, s->vol_sprite_usage); /* sprite enable */
|
put_bits(&s->pb, 2, s->vol_sprite_usage); /* sprite enable */
|
||||||
}
|
}
|
||||||
|
|
||||||
put_bits(&s->pb, 1, 0); /* not 8 bit == false */
|
put_bits(&s->pb, 1, 0); /* not 8 bit == false */
|
||||||
put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/
|
put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/
|
||||||
|
|
||||||
if(s->mpeg_quant){
|
if(s->mpeg_quant){
|
||||||
ff_write_quant_matrix(&s->pb, s->avctx->intra_matrix);
|
ff_write_quant_matrix(&s->pb, s->avctx->intra_matrix);
|
||||||
@ -2376,27 +2376,27 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
|
|||||||
|
|
||||||
if (vo_ver_id != 1)
|
if (vo_ver_id != 1)
|
||||||
put_bits(&s->pb, 1, s->quarter_sample);
|
put_bits(&s->pb, 1, s->quarter_sample);
|
||||||
put_bits(&s->pb, 1, 1); /* complexity estimation disable */
|
put_bits(&s->pb, 1, 1); /* complexity estimation disable */
|
||||||
s->resync_marker= s->rtp_mode;
|
s->resync_marker= s->rtp_mode;
|
||||||
put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */
|
put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */
|
||||||
put_bits(&s->pb, 1, s->data_partitioning ? 1 : 0);
|
put_bits(&s->pb, 1, s->data_partitioning ? 1 : 0);
|
||||||
if(s->data_partitioning){
|
if(s->data_partitioning){
|
||||||
put_bits(&s->pb, 1, 0); /* no rvlc */
|
put_bits(&s->pb, 1, 0); /* no rvlc */
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vo_ver_id != 1){
|
if (vo_ver_id != 1){
|
||||||
put_bits(&s->pb, 1, 0); /* newpred */
|
put_bits(&s->pb, 1, 0); /* newpred */
|
||||||
put_bits(&s->pb, 1, 0); /* reduced res vop */
|
put_bits(&s->pb, 1, 0); /* reduced res vop */
|
||||||
}
|
}
|
||||||
put_bits(&s->pb, 1, 0); /* scalability */
|
put_bits(&s->pb, 1, 0); /* scalability */
|
||||||
|
|
||||||
ff_mpeg4_stuffing(&s->pb);
|
ff_mpeg4_stuffing(&s->pb);
|
||||||
|
|
||||||
/* user data */
|
/* user data */
|
||||||
if(!(s->flags & CODEC_FLAG_BITEXACT)){
|
if(!(s->flags & CODEC_FLAG_BITEXACT)){
|
||||||
put_bits(&s->pb, 16, 0);
|
put_bits(&s->pb, 16, 0);
|
||||||
put_bits(&s->pb, 16, 0x1B2); /* user_data */
|
put_bits(&s->pb, 16, 0x1B2); /* user_data */
|
||||||
ff_put_string(&s->pb, LIBAVCODEC_IDENT, 0);
|
ff_put_string(&s->pb, LIBAVCODEC_IDENT, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2421,9 +2421,9 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
|
|||||||
|
|
||||||
//printf("num:%d rate:%d base:%d\n", s->picture_number, s->time_base.den, FRAME_RATE_BASE);
|
//printf("num:%d rate:%d base:%d\n", s->picture_number, s->time_base.den, FRAME_RATE_BASE);
|
||||||
|
|
||||||
put_bits(&s->pb, 16, 0); /* vop header */
|
put_bits(&s->pb, 16, 0); /* vop header */
|
||||||
put_bits(&s->pb, 16, VOP_STARTCODE); /* vop header */
|
put_bits(&s->pb, 16, VOP_STARTCODE); /* vop header */
|
||||||
put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */
|
put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */
|
||||||
|
|
||||||
assert(s->time>=0);
|
assert(s->time>=0);
|
||||||
time_div= s->time/s->avctx->time_base.den;
|
time_div= s->time/s->avctx->time_base.den;
|
||||||
@ -2435,15 +2435,15 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
|
|||||||
|
|
||||||
put_bits(&s->pb, 1, 0);
|
put_bits(&s->pb, 1, 0);
|
||||||
|
|
||||||
put_bits(&s->pb, 1, 1); /* marker */
|
put_bits(&s->pb, 1, 1); /* marker */
|
||||||
put_bits(&s->pb, s->time_increment_bits, time_mod); /* time increment */
|
put_bits(&s->pb, s->time_increment_bits, time_mod); /* time increment */
|
||||||
put_bits(&s->pb, 1, 1); /* marker */
|
put_bits(&s->pb, 1, 1); /* marker */
|
||||||
put_bits(&s->pb, 1, 1); /* vop coded */
|
put_bits(&s->pb, 1, 1); /* vop coded */
|
||||||
if ( s->pict_type == P_TYPE
|
if ( s->pict_type == P_TYPE
|
||||||
|| (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) {
|
|| (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) {
|
||||||
put_bits(&s->pb, 1, s->no_rounding); /* rounding type */
|
put_bits(&s->pb, 1, s->no_rounding); /* rounding type */
|
||||||
}
|
}
|
||||||
put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */
|
put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */
|
||||||
if(!s->progressive_sequence){
|
if(!s->progressive_sequence){
|
||||||
put_bits(&s->pb, 1, s->current_picture_ptr->top_field_first);
|
put_bits(&s->pb, 1, s->current_picture_ptr->top_field_first);
|
||||||
put_bits(&s->pb, 1, s->alternate_scan);
|
put_bits(&s->pb, 1, s->alternate_scan);
|
||||||
@ -2453,9 +2453,9 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
|
|||||||
put_bits(&s->pb, 5, s->qscale);
|
put_bits(&s->pb, 5, s->qscale);
|
||||||
|
|
||||||
if (s->pict_type != I_TYPE)
|
if (s->pict_type != I_TYPE)
|
||||||
put_bits(&s->pb, 3, s->f_code); /* fcode_for */
|
put_bits(&s->pb, 3, s->f_code); /* fcode_for */
|
||||||
if (s->pict_type == B_TYPE)
|
if (s->pict_type == B_TYPE)
|
||||||
put_bits(&s->pb, 3, s->b_code); /* fcode_back */
|
put_bits(&s->pb, 3, s->b_code); /* fcode_back */
|
||||||
// printf("****frame %d\n", picture_number);
|
// printf("****frame %d\n", picture_number);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2492,9 +2492,9 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *di
|
|||||||
|
|
||||||
/* find prediction */
|
/* find prediction */
|
||||||
if (n < 4) {
|
if (n < 4) {
|
||||||
scale = s->y_dc_scale;
|
scale = s->y_dc_scale;
|
||||||
} else {
|
} else {
|
||||||
scale = s->c_dc_scale;
|
scale = s->c_dc_scale;
|
||||||
}
|
}
|
||||||
if(IS_3IV1)
|
if(IS_3IV1)
|
||||||
scale= 8;
|
scale= 8;
|
||||||
@ -2520,10 +2520,10 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *di
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (abs(a - b) < abs(b - c)) {
|
if (abs(a - b) < abs(b - c)) {
|
||||||
pred = c;
|
pred = c;
|
||||||
*dir_ptr = 1; /* top */
|
*dir_ptr = 1; /* top */
|
||||||
} else {
|
} else {
|
||||||
pred = a;
|
pred = a;
|
||||||
*dir_ptr = 0; /* left */
|
*dir_ptr = 0; /* left */
|
||||||
}
|
}
|
||||||
/* we assume pred is positive */
|
/* we assume pred is positive */
|
||||||
@ -2629,11 +2629,11 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
|
|||||||
// if(level<-255 || level>255) printf("dc overflow\n");
|
// if(level<-255 || level>255) printf("dc overflow\n");
|
||||||
level+=256;
|
level+=256;
|
||||||
if (n < 4) {
|
if (n < 4) {
|
||||||
/* luminance */
|
/* luminance */
|
||||||
put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]);
|
put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]);
|
||||||
} else {
|
} else {
|
||||||
/* chrominance */
|
/* chrominance */
|
||||||
put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]);
|
put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
int size, v;
|
int size, v;
|
||||||
@ -2641,25 +2641,25 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
|
|||||||
size = 0;
|
size = 0;
|
||||||
v = abs(level);
|
v = abs(level);
|
||||||
while (v) {
|
while (v) {
|
||||||
v >>= 1;
|
v >>= 1;
|
||||||
size++;
|
size++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (n < 4) {
|
if (n < 4) {
|
||||||
/* luminance */
|
/* luminance */
|
||||||
put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]);
|
put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]);
|
||||||
} else {
|
} else {
|
||||||
/* chrominance */
|
/* chrominance */
|
||||||
put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]);
|
put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* encode remaining bits */
|
/* encode remaining bits */
|
||||||
if (size > 0) {
|
if (size > 0) {
|
||||||
if (level < 0)
|
if (level < 0)
|
||||||
level = (-level) ^ ((1 << size) - 1);
|
level = (-level) ^ ((1 << size) - 1);
|
||||||
put_bits(&s->pb, size, level);
|
put_bits(&s->pb, size, level);
|
||||||
if (size > 8)
|
if (size > 8)
|
||||||
put_bits(&s->pb, 1, 1);
|
put_bits(&s->pb, 1, 1);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -2689,16 +2689,16 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
|
|||||||
const int last_index = s->block_last_index[n];
|
const int last_index = s->block_last_index[n];
|
||||||
|
|
||||||
if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
|
if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
|
||||||
/* mpeg4 based DC predictor */
|
/* mpeg4 based DC predictor */
|
||||||
mpeg4_encode_dc(dc_pb, intra_dc, n);
|
mpeg4_encode_dc(dc_pb, intra_dc, n);
|
||||||
if(last_index<1) return;
|
if(last_index<1) return;
|
||||||
i = 1;
|
i = 1;
|
||||||
rl = &rl_intra;
|
rl = &rl_intra;
|
||||||
bits_tab= uni_mpeg4_intra_rl_bits;
|
bits_tab= uni_mpeg4_intra_rl_bits;
|
||||||
len_tab = uni_mpeg4_intra_rl_len;
|
len_tab = uni_mpeg4_intra_rl_len;
|
||||||
} else {
|
} else {
|
||||||
if(last_index<0) return;
|
if(last_index<0) return;
|
||||||
i = 0;
|
i = 0;
|
||||||
rl = &rl_inter;
|
rl = &rl_inter;
|
||||||
bits_tab= uni_mpeg4_inter_rl_bits;
|
bits_tab= uni_mpeg4_inter_rl_bits;
|
||||||
len_tab = uni_mpeg4_inter_rl_len;
|
len_tab = uni_mpeg4_inter_rl_len;
|
||||||
@ -2708,9 +2708,9 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
|
|||||||
last_non_zero = i - 1;
|
last_non_zero = i - 1;
|
||||||
#if 1
|
#if 1
|
||||||
for (; i < last_index; i++) {
|
for (; i < last_index; i++) {
|
||||||
int level = block[ scan_table[i] ];
|
int level = block[ scan_table[i] ];
|
||||||
if (level) {
|
if (level) {
|
||||||
int run = i - last_non_zero - 1;
|
int run = i - last_non_zero - 1;
|
||||||
level+=64;
|
level+=64;
|
||||||
if((level&(~127)) == 0){
|
if((level&(~127)) == 0){
|
||||||
const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
|
const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
|
||||||
@ -2718,11 +2718,11 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
|
|||||||
}else{ //ESC3
|
}else{ //ESC3
|
||||||
put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(0<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1);
|
put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(0<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1);
|
||||||
}
|
}
|
||||||
last_non_zero = i;
|
last_non_zero = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*if(i<=last_index)*/{
|
/*if(i<=last_index)*/{
|
||||||
int level = block[ scan_table[i] ];
|
int level = block[ scan_table[i] ];
|
||||||
int run = i - last_non_zero - 1;
|
int run = i - last_non_zero - 1;
|
||||||
level+=64;
|
level+=64;
|
||||||
if((level&(~127)) == 0){
|
if((level&(~127)) == 0){
|
||||||
@ -2734,17 +2734,17 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
|
|||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
for (; i <= last_index; i++) {
|
for (; i <= last_index; i++) {
|
||||||
const int slevel = block[ scan_table[i] ];
|
const int slevel = block[ scan_table[i] ];
|
||||||
if (slevel) {
|
if (slevel) {
|
||||||
int level;
|
int level;
|
||||||
int run = i - last_non_zero - 1;
|
int run = i - last_non_zero - 1;
|
||||||
last = (i == last_index);
|
last = (i == last_index);
|
||||||
sign = 0;
|
sign = 0;
|
||||||
level = slevel;
|
level = slevel;
|
||||||
if (level < 0) {
|
if (level < 0) {
|
||||||
sign = 1;
|
sign = 1;
|
||||||
level = -level;
|
level = -level;
|
||||||
}
|
}
|
||||||
code = get_rl_index(rl, last, run, level);
|
code = get_rl_index(rl, last, run, level);
|
||||||
put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
|
put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
|
||||||
if (code == rl->n) {
|
if (code == rl->n) {
|
||||||
@ -2786,8 +2786,8 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
|
|||||||
} else {
|
} else {
|
||||||
put_bits(ac_pb, 1, sign);
|
put_bits(ac_pb, 1, sign);
|
||||||
}
|
}
|
||||||
last_non_zero = i;
|
last_non_zero = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -2802,15 +2802,15 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in
|
|||||||
int len=0;
|
int len=0;
|
||||||
|
|
||||||
if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
|
if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
|
||||||
/* mpeg4 based DC predictor */
|
/* mpeg4 based DC predictor */
|
||||||
len += mpeg4_get_dc_length(intra_dc, n);
|
len += mpeg4_get_dc_length(intra_dc, n);
|
||||||
if(last_index<1) return len;
|
if(last_index<1) return len;
|
||||||
i = 1;
|
i = 1;
|
||||||
rl = &rl_intra;
|
rl = &rl_intra;
|
||||||
len_tab = uni_mpeg4_intra_rl_len;
|
len_tab = uni_mpeg4_intra_rl_len;
|
||||||
} else {
|
} else {
|
||||||
if(last_index<0) return 0;
|
if(last_index<0) return 0;
|
||||||
i = 0;
|
i = 0;
|
||||||
rl = &rl_inter;
|
rl = &rl_inter;
|
||||||
len_tab = uni_mpeg4_inter_rl_len;
|
len_tab = uni_mpeg4_inter_rl_len;
|
||||||
}
|
}
|
||||||
@ -2818,9 +2818,9 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in
|
|||||||
/* AC coefs */
|
/* AC coefs */
|
||||||
last_non_zero = i - 1;
|
last_non_zero = i - 1;
|
||||||
for (; i < last_index; i++) {
|
for (; i < last_index; i++) {
|
||||||
int level = block[ scan_table[i] ];
|
int level = block[ scan_table[i] ];
|
||||||
if (level) {
|
if (level) {
|
||||||
int run = i - last_non_zero - 1;
|
int run = i - last_non_zero - 1;
|
||||||
level+=64;
|
level+=64;
|
||||||
if((level&(~127)) == 0){
|
if((level&(~127)) == 0){
|
||||||
const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
|
const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
|
||||||
@ -2828,11 +2828,11 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in
|
|||||||
}else{ //ESC3
|
}else{ //ESC3
|
||||||
len += 7+2+1+6+1+12+1;
|
len += 7+2+1+6+1+12+1;
|
||||||
}
|
}
|
||||||
last_non_zero = i;
|
last_non_zero = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*if(i<=last_index)*/{
|
/*if(i<=last_index)*/{
|
||||||
int level = block[ scan_table[i] ];
|
int level = block[ scan_table[i] ];
|
||||||
int run = i - last_non_zero - 1;
|
int run = i - last_non_zero - 1;
|
||||||
level+=64;
|
level+=64;
|
||||||
if((level&(~127)) == 0){
|
if((level&(~127)) == 0){
|
||||||
@ -3251,7 +3251,7 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s)
|
|||||||
//FIXME reduced res stuff here
|
//FIXME reduced res stuff here
|
||||||
|
|
||||||
if (s->pict_type != I_TYPE) {
|
if (s->pict_type != I_TYPE) {
|
||||||
int f_code = get_bits(&s->gb, 3); /* fcode_for */
|
int f_code = get_bits(&s->gb, 3); /* fcode_for */
|
||||||
if(f_code==0){
|
if(f_code==0){
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "Error, video packet header damaged (f_code=0)\n");
|
av_log(s->avctx, AV_LOG_ERROR, "Error, video packet header damaged (f_code=0)\n");
|
||||||
}
|
}
|
||||||
@ -4741,7 +4741,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
|
|||||||
|
|
||||||
if(intra) {
|
if(intra) {
|
||||||
if(s->qscale < s->intra_dc_threshold){
|
if(s->qscale < s->intra_dc_threshold){
|
||||||
/* DC coef */
|
/* DC coef */
|
||||||
if(s->partitioned_frame){
|
if(s->partitioned_frame){
|
||||||
level = s->dc_val[0][ s->block_index[n] ];
|
level = s->dc_val[0][ s->block_index[n] ];
|
||||||
if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale);
|
if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale);
|
||||||
@ -4898,7 +4898,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (level>0) level= level * qmul + qadd;
|
if (level>0) level= level * qmul + qadd;
|
||||||
else level= level * qmul - qadd;
|
else level= level * qmul - qadd;
|
||||||
|
|
||||||
if((unsigned)(level + 2048) > 4095){
|
if((unsigned)(level + 2048) > 4095){
|
||||||
@ -5014,18 +5014,18 @@ int h263_decode_picture_header(MpegEncContext *s)
|
|||||||
}
|
}
|
||||||
if (get_bits1(&s->gb) != 0) {
|
if (get_bits1(&s->gb) != 0) {
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n");
|
av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n");
|
||||||
return -1; /* h263 id */
|
return -1; /* h263 id */
|
||||||
}
|
}
|
||||||
skip_bits1(&s->gb); /* split screen off */
|
skip_bits1(&s->gb); /* split screen off */
|
||||||
skip_bits1(&s->gb); /* camera off */
|
skip_bits1(&s->gb); /* camera off */
|
||||||
skip_bits1(&s->gb); /* freeze picture release off */
|
skip_bits1(&s->gb); /* freeze picture release off */
|
||||||
|
|
||||||
format = get_bits(&s->gb, 3);
|
format = get_bits(&s->gb, 3);
|
||||||
/*
|
/*
|
||||||
0 forbidden
|
0 forbidden
|
||||||
1 sub-QCIF
|
1 sub-QCIF
|
||||||
10 QCIF
|
10 QCIF
|
||||||
7 extended PTYPE (PLUSPTYPE)
|
7 extended PTYPE (PLUSPTYPE)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (format != 7 && format != 6) {
|
if (format != 7 && format != 6) {
|
||||||
@ -5042,17 +5042,17 @@ int h263_decode_picture_header(MpegEncContext *s)
|
|||||||
|
|
||||||
if (get_bits1(&s->gb) != 0) {
|
if (get_bits1(&s->gb) != 0) {
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "H263 SAC not supported\n");
|
av_log(s->avctx, AV_LOG_ERROR, "H263 SAC not supported\n");
|
||||||
return -1; /* SAC: off */
|
return -1; /* SAC: off */
|
||||||
}
|
}
|
||||||
s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */
|
s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */
|
||||||
s->unrestricted_mv = s->h263_long_vectors || s->obmc;
|
s->unrestricted_mv = s->h263_long_vectors || s->obmc;
|
||||||
|
|
||||||
if (get_bits1(&s->gb) != 0) {
|
if (get_bits1(&s->gb) != 0) {
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "H263 PB frame not supported\n");
|
av_log(s->avctx, AV_LOG_ERROR, "H263 PB frame not supported\n");
|
||||||
return -1; /* not PB frame */
|
return -1; /* not PB frame */
|
||||||
}
|
}
|
||||||
s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
|
s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
|
||||||
skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
|
skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
|
||||||
|
|
||||||
s->width = width;
|
s->width = width;
|
||||||
s->height = height;
|
s->height = height;
|
||||||
@ -5511,17 +5511,17 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
|
|||||||
}
|
}
|
||||||
s->low_delay= get_bits1(gb);
|
s->low_delay= get_bits1(gb);
|
||||||
if(get_bits1(gb)){ /* vbv parameters */
|
if(get_bits1(gb)){ /* vbv parameters */
|
||||||
get_bits(gb, 15); /* first_half_bitrate */
|
get_bits(gb, 15); /* first_half_bitrate */
|
||||||
skip_bits1(gb); /* marker */
|
skip_bits1(gb); /* marker */
|
||||||
get_bits(gb, 15); /* latter_half_bitrate */
|
get_bits(gb, 15); /* latter_half_bitrate */
|
||||||
skip_bits1(gb); /* marker */
|
skip_bits1(gb); /* marker */
|
||||||
get_bits(gb, 15); /* first_half_vbv_buffer_size */
|
get_bits(gb, 15); /* first_half_vbv_buffer_size */
|
||||||
skip_bits1(gb); /* marker */
|
skip_bits1(gb); /* marker */
|
||||||
get_bits(gb, 3); /* latter_half_vbv_buffer_size */
|
get_bits(gb, 3); /* latter_half_vbv_buffer_size */
|
||||||
get_bits(gb, 11); /* first_half_vbv_occupancy */
|
get_bits(gb, 11); /* first_half_vbv_occupancy */
|
||||||
skip_bits1(gb); /* marker */
|
skip_bits1(gb); /* marker */
|
||||||
get_bits(gb, 15); /* latter_half_vbv_occupancy */
|
get_bits(gb, 15); /* latter_half_vbv_occupancy */
|
||||||
skip_bits1(gb); /* marker */
|
skip_bits1(gb); /* marker */
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
// set low delay flag only once the smartest? low delay detection won't be overriden
|
// set low delay flag only once the smartest? low delay detection won't be overriden
|
||||||
@ -5628,7 +5628,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
|
|||||||
/* load custom intra matrix */
|
/* load custom intra matrix */
|
||||||
if(get_bits1(gb)){
|
if(get_bits1(gb)){
|
||||||
int last=0;
|
int last=0;
|
||||||
for(i=0; i<64; i++){
|
for(i=0; i<64; i++){
|
||||||
int j;
|
int j;
|
||||||
v= get_bits(gb, 8);
|
v= get_bits(gb, 8);
|
||||||
if(v==0) break;
|
if(v==0) break;
|
||||||
@ -5641,7 +5641,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
|
|||||||
|
|
||||||
/* replicate last value */
|
/* replicate last value */
|
||||||
for(; i<64; i++){
|
for(; i<64; i++){
|
||||||
int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
|
int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
|
||||||
s->intra_matrix[j]= last;
|
s->intra_matrix[j]= last;
|
||||||
s->chroma_intra_matrix[j]= last;
|
s->chroma_intra_matrix[j]= last;
|
||||||
}
|
}
|
||||||
@ -5650,7 +5650,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
|
|||||||
/* load custom non intra matrix */
|
/* load custom non intra matrix */
|
||||||
if(get_bits1(gb)){
|
if(get_bits1(gb)){
|
||||||
int last=0;
|
int last=0;
|
||||||
for(i=0; i<64; i++){
|
for(i=0; i<64; i++){
|
||||||
int j;
|
int j;
|
||||||
v= get_bits(gb, 8);
|
v= get_bits(gb, 8);
|
||||||
if(v==0) break;
|
if(v==0) break;
|
||||||
@ -5663,7 +5663,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
|
|||||||
|
|
||||||
/* replicate last value */
|
/* replicate last value */
|
||||||
for(; i<64; i++){
|
for(; i<64; i++){
|
||||||
int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
|
int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
|
||||||
s->inter_matrix[j]= last;
|
s->inter_matrix[j]= last;
|
||||||
s->chroma_inter_matrix[j]= last;
|
s->chroma_inter_matrix[j]= last;
|
||||||
}
|
}
|
||||||
@ -5794,7 +5794,7 @@ static int decode_user_data(MpegEncContext *s, GetBitContext *gb){
|
|||||||
static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
|
static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
|
||||||
int time_incr, time_increment;
|
int time_incr, time_increment;
|
||||||
|
|
||||||
s->pict_type = get_bits(gb, 2) + I_TYPE; /* pict type: I = 0 , P = 1 */
|
s->pict_type = get_bits(gb, 2) + I_TYPE; /* pict type: I = 0 , P = 1 */
|
||||||
if(s->pict_type==B_TYPE && s->low_delay && s->vol_control_parameters==0 && !(s->flags & CODEC_FLAG_LOW_DELAY)){
|
if(s->pict_type==B_TYPE && s->low_delay && s->vol_control_parameters==0 && !(s->flags & CODEC_FLAG_LOW_DELAY)){
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "low_delay flag incorrectly, clearing it\n");
|
av_log(s->avctx, AV_LOG_ERROR, "low_delay flag incorrectly, clearing it\n");
|
||||||
s->low_delay=0;
|
s->low_delay=0;
|
||||||
@ -5877,9 +5877,9 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
|
|||||||
if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == P_TYPE
|
if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == P_TYPE
|
||||||
|| (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) {
|
|| (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) {
|
||||||
/* rounding type for motion estimation */
|
/* rounding type for motion estimation */
|
||||||
s->no_rounding = get_bits1(gb);
|
s->no_rounding = get_bits1(gb);
|
||||||
} else {
|
} else {
|
||||||
s->no_rounding = 0;
|
s->no_rounding = 0;
|
||||||
}
|
}
|
||||||
//FIXME reduced res stuff
|
//FIXME reduced res stuff
|
||||||
|
|
||||||
@ -5938,7 +5938,7 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (s->pict_type != I_TYPE) {
|
if (s->pict_type != I_TYPE) {
|
||||||
s->f_code = get_bits(gb, 3); /* fcode_for */
|
s->f_code = get_bits(gb, 3); /* fcode_for */
|
||||||
if(s->f_code==0){
|
if(s->f_code==0){
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "Error, header damaged or not MPEG4 header (f_code=0)\n");
|
av_log(s->avctx, AV_LOG_ERROR, "Error, header damaged or not MPEG4 header (f_code=0)\n");
|
||||||
return -1; // makes no sense to continue, as the MV decoding will break very quickly
|
return -1; // makes no sense to continue, as the MV decoding will break very quickly
|
||||||
@ -6094,15 +6094,15 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
|
|||||||
|
|
||||||
if (get_bits1(&s->gb) != 1) {
|
if (get_bits1(&s->gb) != 1) {
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "Bad marker\n");
|
av_log(s->avctx, AV_LOG_ERROR, "Bad marker\n");
|
||||||
return -1; /* marker */
|
return -1; /* marker */
|
||||||
}
|
}
|
||||||
if (get_bits1(&s->gb) != 0) {
|
if (get_bits1(&s->gb) != 0) {
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n");
|
av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n");
|
||||||
return -1; /* h263 id */
|
return -1; /* h263 id */
|
||||||
}
|
}
|
||||||
skip_bits1(&s->gb); /* split screen off */
|
skip_bits1(&s->gb); /* split screen off */
|
||||||
skip_bits1(&s->gb); /* camera off */
|
skip_bits1(&s->gb); /* camera off */
|
||||||
skip_bits1(&s->gb); /* freeze picture release off */
|
skip_bits1(&s->gb); /* freeze picture release off */
|
||||||
|
|
||||||
format = get_bits(&s->gb, 3);
|
format = get_bits(&s->gb, 3);
|
||||||
if (format != 7) {
|
if (format != 7) {
|
||||||
@ -6118,23 +6118,23 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
|
|||||||
|
|
||||||
if (get_bits1(&s->gb) != 0) {
|
if (get_bits1(&s->gb) != 0) {
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "SAC not supported\n");
|
av_log(s->avctx, AV_LOG_ERROR, "SAC not supported\n");
|
||||||
return -1; /* SAC: off */
|
return -1; /* SAC: off */
|
||||||
}
|
}
|
||||||
if (get_bits1(&s->gb) != 0) {
|
if (get_bits1(&s->gb) != 0) {
|
||||||
s->obmc= 1;
|
s->obmc= 1;
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "Advanced Prediction Mode not supported\n");
|
av_log(s->avctx, AV_LOG_ERROR, "Advanced Prediction Mode not supported\n");
|
||||||
// return -1; /* advanced prediction mode: off */
|
// return -1; /* advanced prediction mode: off */
|
||||||
}
|
}
|
||||||
if (get_bits1(&s->gb) != 0) {
|
if (get_bits1(&s->gb) != 0) {
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "PB frame mode no supported\n");
|
av_log(s->avctx, AV_LOG_ERROR, "PB frame mode no supported\n");
|
||||||
return -1; /* PB frame mode */
|
return -1; /* PB frame mode */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* skip unknown header garbage */
|
/* skip unknown header garbage */
|
||||||
skip_bits(&s->gb, 41);
|
skip_bits(&s->gb, 41);
|
||||||
|
|
||||||
s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
|
s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
|
||||||
skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
|
skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
|
||||||
|
|
||||||
/* PEI */
|
/* PEI */
|
||||||
while (get_bits1(&s->gb) != 0) {
|
while (get_bits1(&s->gb) != 0) {
|
||||||
@ -6208,7 +6208,7 @@ int flv_h263_decode_picture_header(MpegEncContext *s)
|
|||||||
if (s->dropable)
|
if (s->dropable)
|
||||||
s->pict_type = P_TYPE;
|
s->pict_type = P_TYPE;
|
||||||
|
|
||||||
skip_bits1(&s->gb); /* deblocking flag */
|
skip_bits1(&s->gb); /* deblocking flag */
|
||||||
s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
|
s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
|
||||||
|
|
||||||
s->h263_plus = 0;
|
s->h263_plus = 0;
|
||||||
|
@ -147,15 +147,15 @@ typedef struct H264Context{
|
|||||||
MpegEncContext s;
|
MpegEncContext s;
|
||||||
int nal_ref_idc;
|
int nal_ref_idc;
|
||||||
int nal_unit_type;
|
int nal_unit_type;
|
||||||
#define NAL_SLICE 1
|
#define NAL_SLICE 1
|
||||||
#define NAL_DPA 2
|
#define NAL_DPA 2
|
||||||
#define NAL_DPB 3
|
#define NAL_DPB 3
|
||||||
#define NAL_DPC 4
|
#define NAL_DPC 4
|
||||||
#define NAL_IDR_SLICE 5
|
#define NAL_IDR_SLICE 5
|
||||||
#define NAL_SEI 6
|
#define NAL_SEI 6
|
||||||
#define NAL_SPS 7
|
#define NAL_SPS 7
|
||||||
#define NAL_PPS 8
|
#define NAL_PPS 8
|
||||||
#define NAL_AUD 9
|
#define NAL_AUD 9
|
||||||
#define NAL_END_SEQUENCE 10
|
#define NAL_END_SEQUENCE 10
|
||||||
#define NAL_END_STREAM 11
|
#define NAL_END_STREAM 11
|
||||||
#define NAL_FILLER_DATA 12
|
#define NAL_FILLER_DATA 12
|
||||||
@ -1461,7 +1461,7 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c
|
|||||||
int i, si, di;
|
int i, si, di;
|
||||||
uint8_t *dst;
|
uint8_t *dst;
|
||||||
|
|
||||||
// src[0]&0x80; //forbidden bit
|
// src[0]&0x80; //forbidden bit
|
||||||
h->nal_ref_idc= src[0]>>5;
|
h->nal_ref_idc= src[0]>>5;
|
||||||
h->nal_unit_type= src[0]&0x1F;
|
h->nal_unit_type= src[0]&0x1F;
|
||||||
|
|
||||||
@ -7545,8 +7545,8 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
|
|||||||
case NAL_SPS_EXT:
|
case NAL_SPS_EXT:
|
||||||
case NAL_AUXILIARY_SLICE:
|
case NAL_AUXILIARY_SLICE:
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
|
av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
|
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
|
||||||
#define cpuid(index,eax,ebx,ecx,edx)\
|
#define cpuid(index,eax,ebx,ecx,edx)\
|
||||||
__asm __volatile\
|
__asm __volatile\
|
||||||
("mov %%"REG_b", %%"REG_S"\n\t"\
|
("mov %%"REG_b", %%"REG_S"\n\t"\
|
||||||
"cpuid\n\t"\
|
"cpuid\n\t"\
|
||||||
"xchg %%"REG_b", %%"REG_S\
|
"xchg %%"REG_b", %%"REG_S\
|
||||||
: "=a" (eax), "=S" (ebx),\
|
: "=a" (eax), "=S" (ebx),\
|
||||||
@ -89,8 +89,8 @@ int mm_support(void)
|
|||||||
edx == 0x48727561 &&
|
edx == 0x48727561 &&
|
||||||
ecx == 0x736c7561) { /* "CentaurHauls" */
|
ecx == 0x736c7561) { /* "CentaurHauls" */
|
||||||
/* VIA C3 */
|
/* VIA C3 */
|
||||||
if(ext_caps & (1<<24))
|
if(ext_caps & (1<<24))
|
||||||
rval |= MM_MMXEXT;
|
rval |= MM_MMXEXT;
|
||||||
} else if (ebx == 0x69727943 &&
|
} else if (ebx == 0x69727943 &&
|
||||||
edx == 0x736e4978 &&
|
edx == 0x736e4978 &&
|
||||||
ecx == 0x64616574) {
|
ecx == 0x64616574) {
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -27,206 +27,206 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line
|
|||||||
{
|
{
|
||||||
MOVQ_BFE(mm6);
|
MOVQ_BFE(mm6);
|
||||||
__asm __volatile(
|
__asm __volatile(
|
||||||
"lea (%3, %3), %%"REG_a" \n\t"
|
"lea (%3, %3), %%"REG_a" \n\t"
|
||||||
".balign 8 \n\t"
|
".balign 8 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%1), %%mm0 \n\t"
|
"movq (%1), %%mm0 \n\t"
|
||||||
"movq 1(%1), %%mm1 \n\t"
|
"movq 1(%1), %%mm1 \n\t"
|
||||||
"movq (%1, %3), %%mm2 \n\t"
|
"movq (%1, %3), %%mm2 \n\t"
|
||||||
"movq 1(%1, %3), %%mm3 \n\t"
|
"movq 1(%1, %3), %%mm3 \n\t"
|
||||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||||
"movq %%mm4, (%2) \n\t"
|
"movq %%mm4, (%2) \n\t"
|
||||||
"movq %%mm5, (%2, %3) \n\t"
|
"movq %%mm5, (%2, %3) \n\t"
|
||||||
"add %%"REG_a", %1 \n\t"
|
"add %%"REG_a", %1 \n\t"
|
||||||
"add %%"REG_a", %2 \n\t"
|
"add %%"REG_a", %2 \n\t"
|
||||||
"movq (%1), %%mm0 \n\t"
|
"movq (%1), %%mm0 \n\t"
|
||||||
"movq 1(%1), %%mm1 \n\t"
|
"movq 1(%1), %%mm1 \n\t"
|
||||||
"movq (%1, %3), %%mm2 \n\t"
|
"movq (%1, %3), %%mm2 \n\t"
|
||||||
"movq 1(%1, %3), %%mm3 \n\t"
|
"movq 1(%1, %3), %%mm3 \n\t"
|
||||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||||
"movq %%mm4, (%2) \n\t"
|
"movq %%mm4, (%2) \n\t"
|
||||||
"movq %%mm5, (%2, %3) \n\t"
|
"movq %%mm5, (%2, %3) \n\t"
|
||||||
"add %%"REG_a", %1 \n\t"
|
"add %%"REG_a", %1 \n\t"
|
||||||
"add %%"REG_a", %2 \n\t"
|
"add %%"REG_a", %2 \n\t"
|
||||||
"subl $4, %0 \n\t"
|
"subl $4, %0 \n\t"
|
||||||
"jnz 1b \n\t"
|
"jnz 1b \n\t"
|
||||||
:"+g"(h), "+S"(pixels), "+D"(block)
|
:"+g"(h), "+S"(pixels), "+D"(block)
|
||||||
:"r"((long)line_size)
|
:"r"((long)line_size)
|
||||||
:REG_a, "memory");
|
:REG_a, "memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void attribute_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
|
static void attribute_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
|
||||||
{
|
{
|
||||||
MOVQ_BFE(mm6);
|
MOVQ_BFE(mm6);
|
||||||
__asm __volatile(
|
__asm __volatile(
|
||||||
"testl $1, %0 \n\t"
|
"testl $1, %0 \n\t"
|
||||||
" jz 1f \n\t"
|
" jz 1f \n\t"
|
||||||
"movq (%1), %%mm0 \n\t"
|
"movq (%1), %%mm0 \n\t"
|
||||||
"movq (%2), %%mm1 \n\t"
|
"movq (%2), %%mm1 \n\t"
|
||||||
"add %4, %1 \n\t"
|
"add %4, %1 \n\t"
|
||||||
"add $8, %2 \n\t"
|
"add $8, %2 \n\t"
|
||||||
PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
|
PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
|
||||||
"movq %%mm4, (%3) \n\t"
|
"movq %%mm4, (%3) \n\t"
|
||||||
"add %5, %3 \n\t"
|
"add %5, %3 \n\t"
|
||||||
"decl %0 \n\t"
|
"decl %0 \n\t"
|
||||||
".balign 8 \n\t"
|
".balign 8 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%1), %%mm0 \n\t"
|
"movq (%1), %%mm0 \n\t"
|
||||||
"movq (%2), %%mm1 \n\t"
|
"movq (%2), %%mm1 \n\t"
|
||||||
"add %4, %1 \n\t"
|
"add %4, %1 \n\t"
|
||||||
"movq (%1), %%mm2 \n\t"
|
"movq (%1), %%mm2 \n\t"
|
||||||
"movq 8(%2), %%mm3 \n\t"
|
"movq 8(%2), %%mm3 \n\t"
|
||||||
"add %4, %1 \n\t"
|
"add %4, %1 \n\t"
|
||||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||||
"movq %%mm4, (%3) \n\t"
|
"movq %%mm4, (%3) \n\t"
|
||||||
"add %5, %3 \n\t"
|
"add %5, %3 \n\t"
|
||||||
"movq %%mm5, (%3) \n\t"
|
"movq %%mm5, (%3) \n\t"
|
||||||
"add %5, %3 \n\t"
|
"add %5, %3 \n\t"
|
||||||
"movq (%1), %%mm0 \n\t"
|
"movq (%1), %%mm0 \n\t"
|
||||||
"movq 16(%2), %%mm1 \n\t"
|
"movq 16(%2), %%mm1 \n\t"
|
||||||
"add %4, %1 \n\t"
|
"add %4, %1 \n\t"
|
||||||
"movq (%1), %%mm2 \n\t"
|
"movq (%1), %%mm2 \n\t"
|
||||||
"movq 24(%2), %%mm3 \n\t"
|
"movq 24(%2), %%mm3 \n\t"
|
||||||
"add %4, %1 \n\t"
|
"add %4, %1 \n\t"
|
||||||
"add $32, %2 \n\t"
|
"add $32, %2 \n\t"
|
||||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||||
"movq %%mm4, (%3) \n\t"
|
"movq %%mm4, (%3) \n\t"
|
||||||
"add %5, %3 \n\t"
|
"add %5, %3 \n\t"
|
||||||
"movq %%mm5, (%3) \n\t"
|
"movq %%mm5, (%3) \n\t"
|
||||||
"add %5, %3 \n\t"
|
"add %5, %3 \n\t"
|
||||||
"subl $4, %0 \n\t"
|
"subl $4, %0 \n\t"
|
||||||
"jnz 1b \n\t"
|
"jnz 1b \n\t"
|
||||||
#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
|
#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
|
||||||
:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
|
:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
|
||||||
#else
|
#else
|
||||||
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
|
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
|
||||||
#endif
|
#endif
|
||||||
:"S"((long)src1Stride), "D"((long)dstStride)
|
:"S"((long)src1Stride), "D"((long)dstStride)
|
||||||
:"memory");
|
:"memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
{
|
{
|
||||||
MOVQ_BFE(mm6);
|
MOVQ_BFE(mm6);
|
||||||
__asm __volatile(
|
__asm __volatile(
|
||||||
"lea (%3, %3), %%"REG_a" \n\t"
|
"lea (%3, %3), %%"REG_a" \n\t"
|
||||||
".balign 8 \n\t"
|
".balign 8 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%1), %%mm0 \n\t"
|
"movq (%1), %%mm0 \n\t"
|
||||||
"movq 1(%1), %%mm1 \n\t"
|
"movq 1(%1), %%mm1 \n\t"
|
||||||
"movq (%1, %3), %%mm2 \n\t"
|
"movq (%1, %3), %%mm2 \n\t"
|
||||||
"movq 1(%1, %3), %%mm3 \n\t"
|
"movq 1(%1, %3), %%mm3 \n\t"
|
||||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||||
"movq %%mm4, (%2) \n\t"
|
"movq %%mm4, (%2) \n\t"
|
||||||
"movq %%mm5, (%2, %3) \n\t"
|
"movq %%mm5, (%2, %3) \n\t"
|
||||||
"movq 8(%1), %%mm0 \n\t"
|
"movq 8(%1), %%mm0 \n\t"
|
||||||
"movq 9(%1), %%mm1 \n\t"
|
"movq 9(%1), %%mm1 \n\t"
|
||||||
"movq 8(%1, %3), %%mm2 \n\t"
|
"movq 8(%1, %3), %%mm2 \n\t"
|
||||||
"movq 9(%1, %3), %%mm3 \n\t"
|
"movq 9(%1, %3), %%mm3 \n\t"
|
||||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||||
"movq %%mm4, 8(%2) \n\t"
|
"movq %%mm4, 8(%2) \n\t"
|
||||||
"movq %%mm5, 8(%2, %3) \n\t"
|
"movq %%mm5, 8(%2, %3) \n\t"
|
||||||
"add %%"REG_a", %1 \n\t"
|
"add %%"REG_a", %1 \n\t"
|
||||||
"add %%"REG_a", %2 \n\t"
|
"add %%"REG_a", %2 \n\t"
|
||||||
"movq (%1), %%mm0 \n\t"
|
"movq (%1), %%mm0 \n\t"
|
||||||
"movq 1(%1), %%mm1 \n\t"
|
"movq 1(%1), %%mm1 \n\t"
|
||||||
"movq (%1, %3), %%mm2 \n\t"
|
"movq (%1, %3), %%mm2 \n\t"
|
||||||
"movq 1(%1, %3), %%mm3 \n\t"
|
"movq 1(%1, %3), %%mm3 \n\t"
|
||||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||||
"movq %%mm4, (%2) \n\t"
|
"movq %%mm4, (%2) \n\t"
|
||||||
"movq %%mm5, (%2, %3) \n\t"
|
"movq %%mm5, (%2, %3) \n\t"
|
||||||
"movq 8(%1), %%mm0 \n\t"
|
"movq 8(%1), %%mm0 \n\t"
|
||||||
"movq 9(%1), %%mm1 \n\t"
|
"movq 9(%1), %%mm1 \n\t"
|
||||||
"movq 8(%1, %3), %%mm2 \n\t"
|
"movq 8(%1, %3), %%mm2 \n\t"
|
||||||
"movq 9(%1, %3), %%mm3 \n\t"
|
"movq 9(%1, %3), %%mm3 \n\t"
|
||||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||||
"movq %%mm4, 8(%2) \n\t"
|
"movq %%mm4, 8(%2) \n\t"
|
||||||
"movq %%mm5, 8(%2, %3) \n\t"
|
"movq %%mm5, 8(%2, %3) \n\t"
|
||||||
"add %%"REG_a", %1 \n\t"
|
"add %%"REG_a", %1 \n\t"
|
||||||
"add %%"REG_a", %2 \n\t"
|
"add %%"REG_a", %2 \n\t"
|
||||||
"subl $4, %0 \n\t"
|
"subl $4, %0 \n\t"
|
||||||
"jnz 1b \n\t"
|
"jnz 1b \n\t"
|
||||||
:"+g"(h), "+S"(pixels), "+D"(block)
|
:"+g"(h), "+S"(pixels), "+D"(block)
|
||||||
:"r"((long)line_size)
|
:"r"((long)line_size)
|
||||||
:REG_a, "memory");
|
:REG_a, "memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void attribute_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
|
static void attribute_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
|
||||||
{
|
{
|
||||||
MOVQ_BFE(mm6);
|
MOVQ_BFE(mm6);
|
||||||
__asm __volatile(
|
__asm __volatile(
|
||||||
"testl $1, %0 \n\t"
|
"testl $1, %0 \n\t"
|
||||||
" jz 1f \n\t"
|
" jz 1f \n\t"
|
||||||
"movq (%1), %%mm0 \n\t"
|
"movq (%1), %%mm0 \n\t"
|
||||||
"movq (%2), %%mm1 \n\t"
|
"movq (%2), %%mm1 \n\t"
|
||||||
"movq 8(%1), %%mm2 \n\t"
|
"movq 8(%1), %%mm2 \n\t"
|
||||||
"movq 8(%2), %%mm3 \n\t"
|
"movq 8(%2), %%mm3 \n\t"
|
||||||
"add %4, %1 \n\t"
|
"add %4, %1 \n\t"
|
||||||
"add $16, %2 \n\t"
|
"add $16, %2 \n\t"
|
||||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||||
"movq %%mm4, (%3) \n\t"
|
"movq %%mm4, (%3) \n\t"
|
||||||
"movq %%mm5, 8(%3) \n\t"
|
"movq %%mm5, 8(%3) \n\t"
|
||||||
"add %5, %3 \n\t"
|
"add %5, %3 \n\t"
|
||||||
"decl %0 \n\t"
|
"decl %0 \n\t"
|
||||||
".balign 8 \n\t"
|
".balign 8 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%1), %%mm0 \n\t"
|
"movq (%1), %%mm0 \n\t"
|
||||||
"movq (%2), %%mm1 \n\t"
|
"movq (%2), %%mm1 \n\t"
|
||||||
"movq 8(%1), %%mm2 \n\t"
|
"movq 8(%1), %%mm2 \n\t"
|
||||||
"movq 8(%2), %%mm3 \n\t"
|
"movq 8(%2), %%mm3 \n\t"
|
||||||
"add %4, %1 \n\t"
|
"add %4, %1 \n\t"
|
||||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||||
"movq %%mm4, (%3) \n\t"
|
"movq %%mm4, (%3) \n\t"
|
||||||
"movq %%mm5, 8(%3) \n\t"
|
"movq %%mm5, 8(%3) \n\t"
|
||||||
"add %5, %3 \n\t"
|
"add %5, %3 \n\t"
|
||||||
"movq (%1), %%mm0 \n\t"
|
"movq (%1), %%mm0 \n\t"
|
||||||
"movq 16(%2), %%mm1 \n\t"
|
"movq 16(%2), %%mm1 \n\t"
|
||||||
"movq 8(%1), %%mm2 \n\t"
|
"movq 8(%1), %%mm2 \n\t"
|
||||||
"movq 24(%2), %%mm3 \n\t"
|
"movq 24(%2), %%mm3 \n\t"
|
||||||
"add %4, %1 \n\t"
|
"add %4, %1 \n\t"
|
||||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||||
"movq %%mm4, (%3) \n\t"
|
"movq %%mm4, (%3) \n\t"
|
||||||
"movq %%mm5, 8(%3) \n\t"
|
"movq %%mm5, 8(%3) \n\t"
|
||||||
"add %5, %3 \n\t"
|
"add %5, %3 \n\t"
|
||||||
"add $32, %2 \n\t"
|
"add $32, %2 \n\t"
|
||||||
"subl $2, %0 \n\t"
|
"subl $2, %0 \n\t"
|
||||||
"jnz 1b \n\t"
|
"jnz 1b \n\t"
|
||||||
#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
|
#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
|
||||||
:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
|
:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
|
||||||
#else
|
#else
|
||||||
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
|
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
|
||||||
#endif
|
#endif
|
||||||
:"S"((long)src1Stride), "D"((long)dstStride)
|
:"S"((long)src1Stride), "D"((long)dstStride)
|
||||||
:"memory");
|
:"memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
{
|
{
|
||||||
MOVQ_BFE(mm6);
|
MOVQ_BFE(mm6);
|
||||||
__asm __volatile(
|
__asm __volatile(
|
||||||
"lea (%3, %3), %%"REG_a" \n\t"
|
"lea (%3, %3), %%"REG_a" \n\t"
|
||||||
"movq (%1), %%mm0 \n\t"
|
"movq (%1), %%mm0 \n\t"
|
||||||
".balign 8 \n\t"
|
".balign 8 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%1, %3), %%mm1 \n\t"
|
"movq (%1, %3), %%mm1 \n\t"
|
||||||
"movq (%1, %%"REG_a"),%%mm2 \n\t"
|
"movq (%1, %%"REG_a"),%%mm2 \n\t"
|
||||||
PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
|
PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
|
||||||
"movq %%mm4, (%2) \n\t"
|
"movq %%mm4, (%2) \n\t"
|
||||||
"movq %%mm5, (%2, %3) \n\t"
|
"movq %%mm5, (%2, %3) \n\t"
|
||||||
"add %%"REG_a", %1 \n\t"
|
"add %%"REG_a", %1 \n\t"
|
||||||
"add %%"REG_a", %2 \n\t"
|
"add %%"REG_a", %2 \n\t"
|
||||||
"movq (%1, %3), %%mm1 \n\t"
|
"movq (%1, %3), %%mm1 \n\t"
|
||||||
"movq (%1, %%"REG_a"),%%mm0 \n\t"
|
"movq (%1, %%"REG_a"),%%mm0 \n\t"
|
||||||
PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
|
PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
|
||||||
"movq %%mm4, (%2) \n\t"
|
"movq %%mm4, (%2) \n\t"
|
||||||
"movq %%mm5, (%2, %3) \n\t"
|
"movq %%mm5, (%2, %3) \n\t"
|
||||||
"add %%"REG_a", %1 \n\t"
|
"add %%"REG_a", %1 \n\t"
|
||||||
"add %%"REG_a", %2 \n\t"
|
"add %%"REG_a", %2 \n\t"
|
||||||
"subl $4, %0 \n\t"
|
"subl $4, %0 \n\t"
|
||||||
"jnz 1b \n\t"
|
"jnz 1b \n\t"
|
||||||
:"+g"(h), "+S"(pixels), "+D"(block)
|
:"+g"(h), "+S"(pixels), "+D"(block)
|
||||||
:"r"((long)line_size)
|
:"r"((long)line_size)
|
||||||
:REG_a, "memory");
|
:REG_a, "memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
@ -234,65 +234,65 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
|
|||||||
MOVQ_ZERO(mm7);
|
MOVQ_ZERO(mm7);
|
||||||
SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
|
SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
|
||||||
__asm __volatile(
|
__asm __volatile(
|
||||||
"movq (%1), %%mm0 \n\t"
|
"movq (%1), %%mm0 \n\t"
|
||||||
"movq 1(%1), %%mm4 \n\t"
|
"movq 1(%1), %%mm4 \n\t"
|
||||||
"movq %%mm0, %%mm1 \n\t"
|
"movq %%mm0, %%mm1 \n\t"
|
||||||
"movq %%mm4, %%mm5 \n\t"
|
"movq %%mm4, %%mm5 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm5 \n\t"
|
"punpckhbw %%mm7, %%mm5 \n\t"
|
||||||
"paddusw %%mm0, %%mm4 \n\t"
|
"paddusw %%mm0, %%mm4 \n\t"
|
||||||
"paddusw %%mm1, %%mm5 \n\t"
|
"paddusw %%mm1, %%mm5 \n\t"
|
||||||
"xor %%"REG_a", %%"REG_a" \n\t"
|
"xor %%"REG_a", %%"REG_a" \n\t"
|
||||||
"add %3, %1 \n\t"
|
"add %3, %1 \n\t"
|
||||||
".balign 8 \n\t"
|
".balign 8 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||||
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
|
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
|
||||||
"movq %%mm0, %%mm1 \n\t"
|
"movq %%mm0, %%mm1 \n\t"
|
||||||
"movq %%mm2, %%mm3 \n\t"
|
"movq %%mm2, %%mm3 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm3 \n\t"
|
"punpckhbw %%mm7, %%mm3 \n\t"
|
||||||
"paddusw %%mm2, %%mm0 \n\t"
|
"paddusw %%mm2, %%mm0 \n\t"
|
||||||
"paddusw %%mm3, %%mm1 \n\t"
|
"paddusw %%mm3, %%mm1 \n\t"
|
||||||
"paddusw %%mm6, %%mm4 \n\t"
|
"paddusw %%mm6, %%mm4 \n\t"
|
||||||
"paddusw %%mm6, %%mm5 \n\t"
|
"paddusw %%mm6, %%mm5 \n\t"
|
||||||
"paddusw %%mm0, %%mm4 \n\t"
|
"paddusw %%mm0, %%mm4 \n\t"
|
||||||
"paddusw %%mm1, %%mm5 \n\t"
|
"paddusw %%mm1, %%mm5 \n\t"
|
||||||
"psrlw $2, %%mm4 \n\t"
|
"psrlw $2, %%mm4 \n\t"
|
||||||
"psrlw $2, %%mm5 \n\t"
|
"psrlw $2, %%mm5 \n\t"
|
||||||
"packuswb %%mm5, %%mm4 \n\t"
|
"packuswb %%mm5, %%mm4 \n\t"
|
||||||
"movq %%mm4, (%2, %%"REG_a") \n\t"
|
"movq %%mm4, (%2, %%"REG_a") \n\t"
|
||||||
"add %3, %%"REG_a" \n\t"
|
"add %3, %%"REG_a" \n\t"
|
||||||
|
|
||||||
"movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
|
"movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
|
||||||
"movq 1(%1, %%"REG_a"), %%mm4 \n\t"
|
"movq 1(%1, %%"REG_a"), %%mm4 \n\t"
|
||||||
"movq %%mm2, %%mm3 \n\t"
|
"movq %%mm2, %%mm3 \n\t"
|
||||||
"movq %%mm4, %%mm5 \n\t"
|
"movq %%mm4, %%mm5 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm3 \n\t"
|
"punpckhbw %%mm7, %%mm3 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm5 \n\t"
|
"punpckhbw %%mm7, %%mm5 \n\t"
|
||||||
"paddusw %%mm2, %%mm4 \n\t"
|
"paddusw %%mm2, %%mm4 \n\t"
|
||||||
"paddusw %%mm3, %%mm5 \n\t"
|
"paddusw %%mm3, %%mm5 \n\t"
|
||||||
"paddusw %%mm6, %%mm0 \n\t"
|
"paddusw %%mm6, %%mm0 \n\t"
|
||||||
"paddusw %%mm6, %%mm1 \n\t"
|
"paddusw %%mm6, %%mm1 \n\t"
|
||||||
"paddusw %%mm4, %%mm0 \n\t"
|
"paddusw %%mm4, %%mm0 \n\t"
|
||||||
"paddusw %%mm5, %%mm1 \n\t"
|
"paddusw %%mm5, %%mm1 \n\t"
|
||||||
"psrlw $2, %%mm0 \n\t"
|
"psrlw $2, %%mm0 \n\t"
|
||||||
"psrlw $2, %%mm1 \n\t"
|
"psrlw $2, %%mm1 \n\t"
|
||||||
"packuswb %%mm1, %%mm0 \n\t"
|
"packuswb %%mm1, %%mm0 \n\t"
|
||||||
"movq %%mm0, (%2, %%"REG_a") \n\t"
|
"movq %%mm0, (%2, %%"REG_a") \n\t"
|
||||||
"add %3, %%"REG_a" \n\t"
|
"add %3, %%"REG_a" \n\t"
|
||||||
|
|
||||||
"subl $2, %0 \n\t"
|
"subl $2, %0 \n\t"
|
||||||
"jnz 1b \n\t"
|
"jnz 1b \n\t"
|
||||||
:"+g"(h), "+S"(pixels)
|
:"+g"(h), "+S"(pixels)
|
||||||
:"D"(block), "r"((long)line_size)
|
:"D"(block), "r"((long)line_size)
|
||||||
:REG_a, "memory");
|
:REG_a, "memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
// avg_pixels
|
// avg_pixels
|
||||||
@ -301,16 +301,16 @@ static void attribute_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pi
|
|||||||
MOVQ_BFE(mm6);
|
MOVQ_BFE(mm6);
|
||||||
JUMPALIGN();
|
JUMPALIGN();
|
||||||
do {
|
do {
|
||||||
__asm __volatile(
|
__asm __volatile(
|
||||||
"movd %0, %%mm0 \n\t"
|
"movd %0, %%mm0 \n\t"
|
||||||
"movd %1, %%mm1 \n\t"
|
"movd %1, %%mm1 \n\t"
|
||||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||||
"movd %%mm2, %0 \n\t"
|
"movd %%mm2, %0 \n\t"
|
||||||
:"+m"(*block)
|
:"+m"(*block)
|
||||||
:"m"(*pixels)
|
:"m"(*pixels)
|
||||||
:"memory");
|
:"memory");
|
||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
block += line_size;
|
block += line_size;
|
||||||
}
|
}
|
||||||
while (--h);
|
while (--h);
|
||||||
}
|
}
|
||||||
@ -321,16 +321,16 @@ static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_si
|
|||||||
MOVQ_BFE(mm6);
|
MOVQ_BFE(mm6);
|
||||||
JUMPALIGN();
|
JUMPALIGN();
|
||||||
do {
|
do {
|
||||||
__asm __volatile(
|
__asm __volatile(
|
||||||
"movq %0, %%mm0 \n\t"
|
"movq %0, %%mm0 \n\t"
|
||||||
"movq %1, %%mm1 \n\t"
|
"movq %1, %%mm1 \n\t"
|
||||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||||
"movq %%mm2, %0 \n\t"
|
"movq %%mm2, %0 \n\t"
|
||||||
:"+m"(*block)
|
:"+m"(*block)
|
||||||
:"m"(*pixels)
|
:"m"(*pixels)
|
||||||
:"memory");
|
:"memory");
|
||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
block += line_size;
|
block += line_size;
|
||||||
}
|
}
|
||||||
while (--h);
|
while (--h);
|
||||||
}
|
}
|
||||||
@ -340,20 +340,20 @@ static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_s
|
|||||||
MOVQ_BFE(mm6);
|
MOVQ_BFE(mm6);
|
||||||
JUMPALIGN();
|
JUMPALIGN();
|
||||||
do {
|
do {
|
||||||
__asm __volatile(
|
__asm __volatile(
|
||||||
"movq %0, %%mm0 \n\t"
|
"movq %0, %%mm0 \n\t"
|
||||||
"movq %1, %%mm1 \n\t"
|
"movq %1, %%mm1 \n\t"
|
||||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||||
"movq %%mm2, %0 \n\t"
|
"movq %%mm2, %0 \n\t"
|
||||||
"movq 8%0, %%mm0 \n\t"
|
"movq 8%0, %%mm0 \n\t"
|
||||||
"movq 8%1, %%mm1 \n\t"
|
"movq 8%1, %%mm1 \n\t"
|
||||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||||
"movq %%mm2, 8%0 \n\t"
|
"movq %%mm2, 8%0 \n\t"
|
||||||
:"+m"(*block)
|
:"+m"(*block)
|
||||||
:"m"(*pixels)
|
:"m"(*pixels)
|
||||||
:"memory");
|
:"memory");
|
||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
block += line_size;
|
block += line_size;
|
||||||
}
|
}
|
||||||
while (--h);
|
while (--h);
|
||||||
}
|
}
|
||||||
@ -363,18 +363,18 @@ static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line
|
|||||||
MOVQ_BFE(mm6);
|
MOVQ_BFE(mm6);
|
||||||
JUMPALIGN();
|
JUMPALIGN();
|
||||||
do {
|
do {
|
||||||
__asm __volatile(
|
__asm __volatile(
|
||||||
"movq %1, %%mm0 \n\t"
|
"movq %1, %%mm0 \n\t"
|
||||||
"movq 1%1, %%mm1 \n\t"
|
"movq 1%1, %%mm1 \n\t"
|
||||||
"movq %0, %%mm3 \n\t"
|
"movq %0, %%mm3 \n\t"
|
||||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||||
"movq %%mm0, %0 \n\t"
|
"movq %%mm0, %0 \n\t"
|
||||||
:"+m"(*block)
|
:"+m"(*block)
|
||||||
:"m"(*pixels)
|
:"m"(*pixels)
|
||||||
:"memory");
|
:"memory");
|
||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
block += line_size;
|
block += line_size;
|
||||||
} while (--h);
|
} while (--h);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -383,17 +383,17 @@ static __attribute__((unused)) void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *
|
|||||||
MOVQ_BFE(mm6);
|
MOVQ_BFE(mm6);
|
||||||
JUMPALIGN();
|
JUMPALIGN();
|
||||||
do {
|
do {
|
||||||
__asm __volatile(
|
__asm __volatile(
|
||||||
"movq %1, %%mm0 \n\t"
|
"movq %1, %%mm0 \n\t"
|
||||||
"movq %2, %%mm1 \n\t"
|
"movq %2, %%mm1 \n\t"
|
||||||
"movq %0, %%mm3 \n\t"
|
"movq %0, %%mm3 \n\t"
|
||||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||||
"movq %%mm0, %0 \n\t"
|
"movq %%mm0, %0 \n\t"
|
||||||
:"+m"(*dst)
|
:"+m"(*dst)
|
||||||
:"m"(*src1), "m"(*src2)
|
:"m"(*src1), "m"(*src2)
|
||||||
:"memory");
|
:"memory");
|
||||||
dst += dstStride;
|
dst += dstStride;
|
||||||
src1 += src1Stride;
|
src1 += src1Stride;
|
||||||
src2 += 8;
|
src2 += 8;
|
||||||
} while (--h);
|
} while (--h);
|
||||||
@ -404,24 +404,24 @@ static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin
|
|||||||
MOVQ_BFE(mm6);
|
MOVQ_BFE(mm6);
|
||||||
JUMPALIGN();
|
JUMPALIGN();
|
||||||
do {
|
do {
|
||||||
__asm __volatile(
|
__asm __volatile(
|
||||||
"movq %1, %%mm0 \n\t"
|
"movq %1, %%mm0 \n\t"
|
||||||
"movq 1%1, %%mm1 \n\t"
|
"movq 1%1, %%mm1 \n\t"
|
||||||
"movq %0, %%mm3 \n\t"
|
"movq %0, %%mm3 \n\t"
|
||||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||||
"movq %%mm0, %0 \n\t"
|
"movq %%mm0, %0 \n\t"
|
||||||
"movq 8%1, %%mm0 \n\t"
|
"movq 8%1, %%mm0 \n\t"
|
||||||
"movq 9%1, %%mm1 \n\t"
|
"movq 9%1, %%mm1 \n\t"
|
||||||
"movq 8%0, %%mm3 \n\t"
|
"movq 8%0, %%mm3 \n\t"
|
||||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||||
"movq %%mm0, 8%0 \n\t"
|
"movq %%mm0, 8%0 \n\t"
|
||||||
:"+m"(*block)
|
:"+m"(*block)
|
||||||
:"m"(*pixels)
|
:"m"(*pixels)
|
||||||
:"memory");
|
:"memory");
|
||||||
pixels += line_size;
|
pixels += line_size;
|
||||||
block += line_size;
|
block += line_size;
|
||||||
} while (--h);
|
} while (--h);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -430,23 +430,23 @@ static __attribute__((unused)) void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t
|
|||||||
MOVQ_BFE(mm6);
|
MOVQ_BFE(mm6);
|
||||||
JUMPALIGN();
|
JUMPALIGN();
|
||||||
do {
|
do {
|
||||||
__asm __volatile(
|
__asm __volatile(
|
||||||
"movq %1, %%mm0 \n\t"
|
"movq %1, %%mm0 \n\t"
|
||||||
"movq %2, %%mm1 \n\t"
|
"movq %2, %%mm1 \n\t"
|
||||||
"movq %0, %%mm3 \n\t"
|
"movq %0, %%mm3 \n\t"
|
||||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||||
"movq %%mm0, %0 \n\t"
|
"movq %%mm0, %0 \n\t"
|
||||||
"movq 8%1, %%mm0 \n\t"
|
"movq 8%1, %%mm0 \n\t"
|
||||||
"movq 8%2, %%mm1 \n\t"
|
"movq 8%2, %%mm1 \n\t"
|
||||||
"movq 8%0, %%mm3 \n\t"
|
"movq 8%0, %%mm3 \n\t"
|
||||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||||
"movq %%mm0, 8%0 \n\t"
|
"movq %%mm0, 8%0 \n\t"
|
||||||
:"+m"(*dst)
|
:"+m"(*dst)
|
||||||
:"m"(*src1), "m"(*src2)
|
:"m"(*src1), "m"(*src2)
|
||||||
:"memory");
|
:"memory");
|
||||||
dst += dstStride;
|
dst += dstStride;
|
||||||
src1 += src1Stride;
|
src1 += src1Stride;
|
||||||
src2 += 16;
|
src2 += 16;
|
||||||
} while (--h);
|
} while (--h);
|
||||||
@ -456,39 +456,39 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
|
|||||||
{
|
{
|
||||||
MOVQ_BFE(mm6);
|
MOVQ_BFE(mm6);
|
||||||
__asm __volatile(
|
__asm __volatile(
|
||||||
"lea (%3, %3), %%"REG_a" \n\t"
|
"lea (%3, %3), %%"REG_a" \n\t"
|
||||||
"movq (%1), %%mm0 \n\t"
|
"movq (%1), %%mm0 \n\t"
|
||||||
".balign 8 \n\t"
|
".balign 8 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%1, %3), %%mm1 \n\t"
|
"movq (%1, %3), %%mm1 \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm2 \n\t"
|
"movq (%1, %%"REG_a"), %%mm2 \n\t"
|
||||||
PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
|
PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
|
||||||
"movq (%2), %%mm3 \n\t"
|
"movq (%2), %%mm3 \n\t"
|
||||||
PAVGB(%%mm3, %%mm4, %%mm0, %%mm6)
|
PAVGB(%%mm3, %%mm4, %%mm0, %%mm6)
|
||||||
"movq (%2, %3), %%mm3 \n\t"
|
"movq (%2, %3), %%mm3 \n\t"
|
||||||
PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
|
PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
|
||||||
"movq %%mm0, (%2) \n\t"
|
"movq %%mm0, (%2) \n\t"
|
||||||
"movq %%mm1, (%2, %3) \n\t"
|
"movq %%mm1, (%2, %3) \n\t"
|
||||||
"add %%"REG_a", %1 \n\t"
|
"add %%"REG_a", %1 \n\t"
|
||||||
"add %%"REG_a", %2 \n\t"
|
"add %%"REG_a", %2 \n\t"
|
||||||
|
|
||||||
"movq (%1, %3), %%mm1 \n\t"
|
"movq (%1, %3), %%mm1 \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||||
PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
|
PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
|
||||||
"movq (%2), %%mm3 \n\t"
|
"movq (%2), %%mm3 \n\t"
|
||||||
PAVGB(%%mm3, %%mm4, %%mm2, %%mm6)
|
PAVGB(%%mm3, %%mm4, %%mm2, %%mm6)
|
||||||
"movq (%2, %3), %%mm3 \n\t"
|
"movq (%2, %3), %%mm3 \n\t"
|
||||||
PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
|
PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
|
||||||
"movq %%mm2, (%2) \n\t"
|
"movq %%mm2, (%2) \n\t"
|
||||||
"movq %%mm1, (%2, %3) \n\t"
|
"movq %%mm1, (%2, %3) \n\t"
|
||||||
"add %%"REG_a", %1 \n\t"
|
"add %%"REG_a", %1 \n\t"
|
||||||
"add %%"REG_a", %2 \n\t"
|
"add %%"REG_a", %2 \n\t"
|
||||||
|
|
||||||
"subl $4, %0 \n\t"
|
"subl $4, %0 \n\t"
|
||||||
"jnz 1b \n\t"
|
"jnz 1b \n\t"
|
||||||
:"+g"(h), "+S"(pixels), "+D"(block)
|
:"+g"(h), "+S"(pixels), "+D"(block)
|
||||||
:"r"((long)line_size)
|
:"r"((long)line_size)
|
||||||
:REG_a, "memory");
|
:REG_a, "memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
// this routine is 'slightly' suboptimal but mostly unused
|
// this routine is 'slightly' suboptimal but mostly unused
|
||||||
@ -497,73 +497,73 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
|
|||||||
MOVQ_ZERO(mm7);
|
MOVQ_ZERO(mm7);
|
||||||
SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
|
SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
|
||||||
__asm __volatile(
|
__asm __volatile(
|
||||||
"movq (%1), %%mm0 \n\t"
|
"movq (%1), %%mm0 \n\t"
|
||||||
"movq 1(%1), %%mm4 \n\t"
|
"movq 1(%1), %%mm4 \n\t"
|
||||||
"movq %%mm0, %%mm1 \n\t"
|
"movq %%mm0, %%mm1 \n\t"
|
||||||
"movq %%mm4, %%mm5 \n\t"
|
"movq %%mm4, %%mm5 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm5 \n\t"
|
"punpckhbw %%mm7, %%mm5 \n\t"
|
||||||
"paddusw %%mm0, %%mm4 \n\t"
|
"paddusw %%mm0, %%mm4 \n\t"
|
||||||
"paddusw %%mm1, %%mm5 \n\t"
|
"paddusw %%mm1, %%mm5 \n\t"
|
||||||
"xor %%"REG_a", %%"REG_a" \n\t"
|
"xor %%"REG_a", %%"REG_a" \n\t"
|
||||||
"add %3, %1 \n\t"
|
"add %3, %1 \n\t"
|
||||||
".balign 8 \n\t"
|
".balign 8 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||||
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
|
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
|
||||||
"movq %%mm0, %%mm1 \n\t"
|
"movq %%mm0, %%mm1 \n\t"
|
||||||
"movq %%mm2, %%mm3 \n\t"
|
"movq %%mm2, %%mm3 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm3 \n\t"
|
"punpckhbw %%mm7, %%mm3 \n\t"
|
||||||
"paddusw %%mm2, %%mm0 \n\t"
|
"paddusw %%mm2, %%mm0 \n\t"
|
||||||
"paddusw %%mm3, %%mm1 \n\t"
|
"paddusw %%mm3, %%mm1 \n\t"
|
||||||
"paddusw %%mm6, %%mm4 \n\t"
|
"paddusw %%mm6, %%mm4 \n\t"
|
||||||
"paddusw %%mm6, %%mm5 \n\t"
|
"paddusw %%mm6, %%mm5 \n\t"
|
||||||
"paddusw %%mm0, %%mm4 \n\t"
|
"paddusw %%mm0, %%mm4 \n\t"
|
||||||
"paddusw %%mm1, %%mm5 \n\t"
|
"paddusw %%mm1, %%mm5 \n\t"
|
||||||
"psrlw $2, %%mm4 \n\t"
|
"psrlw $2, %%mm4 \n\t"
|
||||||
"psrlw $2, %%mm5 \n\t"
|
"psrlw $2, %%mm5 \n\t"
|
||||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||||
"packuswb %%mm5, %%mm4 \n\t"
|
"packuswb %%mm5, %%mm4 \n\t"
|
||||||
"pcmpeqd %%mm2, %%mm2 \n\t"
|
"pcmpeqd %%mm2, %%mm2 \n\t"
|
||||||
"paddb %%mm2, %%mm2 \n\t"
|
"paddb %%mm2, %%mm2 \n\t"
|
||||||
PAVGB(%%mm3, %%mm4, %%mm5, %%mm2)
|
PAVGB(%%mm3, %%mm4, %%mm5, %%mm2)
|
||||||
"movq %%mm5, (%2, %%"REG_a") \n\t"
|
"movq %%mm5, (%2, %%"REG_a") \n\t"
|
||||||
"add %3, %%"REG_a" \n\t"
|
"add %3, %%"REG_a" \n\t"
|
||||||
|
|
||||||
"movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
|
"movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
|
||||||
"movq 1(%1, %%"REG_a"), %%mm4 \n\t"
|
"movq 1(%1, %%"REG_a"), %%mm4 \n\t"
|
||||||
"movq %%mm2, %%mm3 \n\t"
|
"movq %%mm2, %%mm3 \n\t"
|
||||||
"movq %%mm4, %%mm5 \n\t"
|
"movq %%mm4, %%mm5 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm3 \n\t"
|
"punpckhbw %%mm7, %%mm3 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm5 \n\t"
|
"punpckhbw %%mm7, %%mm5 \n\t"
|
||||||
"paddusw %%mm2, %%mm4 \n\t"
|
"paddusw %%mm2, %%mm4 \n\t"
|
||||||
"paddusw %%mm3, %%mm5 \n\t"
|
"paddusw %%mm3, %%mm5 \n\t"
|
||||||
"paddusw %%mm6, %%mm0 \n\t"
|
"paddusw %%mm6, %%mm0 \n\t"
|
||||||
"paddusw %%mm6, %%mm1 \n\t"
|
"paddusw %%mm6, %%mm1 \n\t"
|
||||||
"paddusw %%mm4, %%mm0 \n\t"
|
"paddusw %%mm4, %%mm0 \n\t"
|
||||||
"paddusw %%mm5, %%mm1 \n\t"
|
"paddusw %%mm5, %%mm1 \n\t"
|
||||||
"psrlw $2, %%mm0 \n\t"
|
"psrlw $2, %%mm0 \n\t"
|
||||||
"psrlw $2, %%mm1 \n\t"
|
"psrlw $2, %%mm1 \n\t"
|
||||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||||
"packuswb %%mm1, %%mm0 \n\t"
|
"packuswb %%mm1, %%mm0 \n\t"
|
||||||
"pcmpeqd %%mm2, %%mm2 \n\t"
|
"pcmpeqd %%mm2, %%mm2 \n\t"
|
||||||
"paddb %%mm2, %%mm2 \n\t"
|
"paddb %%mm2, %%mm2 \n\t"
|
||||||
PAVGB(%%mm3, %%mm0, %%mm1, %%mm2)
|
PAVGB(%%mm3, %%mm0, %%mm1, %%mm2)
|
||||||
"movq %%mm1, (%2, %%"REG_a") \n\t"
|
"movq %%mm1, (%2, %%"REG_a") \n\t"
|
||||||
"add %3, %%"REG_a" \n\t"
|
"add %3, %%"REG_a" \n\t"
|
||||||
|
|
||||||
"subl $2, %0 \n\t"
|
"subl $2, %0 \n\t"
|
||||||
"jnz 1b \n\t"
|
"jnz 1b \n\t"
|
||||||
:"+g"(h), "+S"(pixels)
|
:"+g"(h), "+S"(pixels)
|
||||||
:"D"(block), "r"((long)line_size)
|
:"D"(block), "r"((long)line_size)
|
||||||
:REG_a, "memory");
|
:REG_a, "memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
//FIXME optimize
|
//FIXME optimize
|
||||||
|
@ -30,21 +30,21 @@
|
|||||||
//
|
//
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#define BITS_FRW_ACC 3 //; 2 or 3 for accuracy
|
#define BITS_FRW_ACC 3 //; 2 or 3 for accuracy
|
||||||
#define SHIFT_FRW_COL BITS_FRW_ACC
|
#define SHIFT_FRW_COL BITS_FRW_ACC
|
||||||
#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3)
|
#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3)
|
||||||
#define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1))
|
#define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1))
|
||||||
//#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1))
|
//#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1))
|
||||||
|
|
||||||
//concatenated table, for forward DCT transformation
|
//concatenated table, for forward DCT transformation
|
||||||
static const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = {
|
static const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = {
|
||||||
13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5
|
13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5
|
||||||
27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5
|
27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5
|
||||||
-21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5
|
-21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5
|
||||||
};
|
};
|
||||||
|
|
||||||
static const int16_t ocos_4_16[4] ATTR_ALIGN(8) = {
|
static const int16_t ocos_4_16[4] ATTR_ALIGN(8) = {
|
||||||
23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5
|
23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5
|
||||||
};
|
};
|
||||||
|
|
||||||
static const int64_t fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL;
|
static const int64_t fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL;
|
||||||
@ -351,62 +351,62 @@ static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
|
|||||||
static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
|
static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
|
||||||
{
|
{
|
||||||
asm volatile(
|
asm volatile(
|
||||||
".macro FDCT_ROW_SSE2_H1 i t \n\t"
|
".macro FDCT_ROW_SSE2_H1 i t \n\t"
|
||||||
"movq \\i(%0), %%xmm2 \n\t"
|
"movq \\i(%0), %%xmm2 \n\t"
|
||||||
"movq \\i+8(%0), %%xmm0 \n\t"
|
"movq \\i+8(%0), %%xmm0 \n\t"
|
||||||
"movdqa \\t+32(%1), %%xmm3 \n\t"
|
"movdqa \\t+32(%1), %%xmm3 \n\t"
|
||||||
"movdqa \\t+48(%1), %%xmm7 \n\t"
|
"movdqa \\t+48(%1), %%xmm7 \n\t"
|
||||||
"movdqa \\t(%1), %%xmm4 \n\t"
|
"movdqa \\t(%1), %%xmm4 \n\t"
|
||||||
"movdqa \\t+16(%1), %%xmm5 \n\t"
|
"movdqa \\t+16(%1), %%xmm5 \n\t"
|
||||||
".endm \n\t"
|
".endm \n\t"
|
||||||
".macro FDCT_ROW_SSE2_H2 i t \n\t"
|
".macro FDCT_ROW_SSE2_H2 i t \n\t"
|
||||||
"movq \\i(%0), %%xmm2 \n\t"
|
"movq \\i(%0), %%xmm2 \n\t"
|
||||||
"movq \\i+8(%0), %%xmm0 \n\t"
|
"movq \\i+8(%0), %%xmm0 \n\t"
|
||||||
"movdqa \\t+32(%1), %%xmm3 \n\t"
|
"movdqa \\t+32(%1), %%xmm3 \n\t"
|
||||||
"movdqa \\t+48(%1), %%xmm7 \n\t"
|
"movdqa \\t+48(%1), %%xmm7 \n\t"
|
||||||
".endm \n\t"
|
".endm \n\t"
|
||||||
".macro FDCT_ROW_SSE2 i \n\t"
|
".macro FDCT_ROW_SSE2 i \n\t"
|
||||||
"movq %%xmm2, %%xmm1 \n\t"
|
"movq %%xmm2, %%xmm1 \n\t"
|
||||||
"pshuflw $27, %%xmm0, %%xmm0 \n\t"
|
"pshuflw $27, %%xmm0, %%xmm0 \n\t"
|
||||||
"paddsw %%xmm0, %%xmm1 \n\t"
|
"paddsw %%xmm0, %%xmm1 \n\t"
|
||||||
"psubsw %%xmm0, %%xmm2 \n\t"
|
"psubsw %%xmm0, %%xmm2 \n\t"
|
||||||
"punpckldq %%xmm2, %%xmm1 \n\t"
|
"punpckldq %%xmm2, %%xmm1 \n\t"
|
||||||
"pshufd $78, %%xmm1, %%xmm2 \n\t"
|
"pshufd $78, %%xmm1, %%xmm2 \n\t"
|
||||||
"pmaddwd %%xmm2, %%xmm3 \n\t"
|
"pmaddwd %%xmm2, %%xmm3 \n\t"
|
||||||
"pmaddwd %%xmm1, %%xmm7 \n\t"
|
"pmaddwd %%xmm1, %%xmm7 \n\t"
|
||||||
"pmaddwd %%xmm5, %%xmm2 \n\t"
|
"pmaddwd %%xmm5, %%xmm2 \n\t"
|
||||||
"pmaddwd %%xmm4, %%xmm1 \n\t"
|
"pmaddwd %%xmm4, %%xmm1 \n\t"
|
||||||
"paddd %%xmm7, %%xmm3 \n\t"
|
"paddd %%xmm7, %%xmm3 \n\t"
|
||||||
"paddd %%xmm2, %%xmm1 \n\t"
|
"paddd %%xmm2, %%xmm1 \n\t"
|
||||||
"paddd %%xmm6, %%xmm3 \n\t"
|
"paddd %%xmm6, %%xmm3 \n\t"
|
||||||
"paddd %%xmm6, %%xmm1 \n\t"
|
"paddd %%xmm6, %%xmm1 \n\t"
|
||||||
"psrad %3, %%xmm3 \n\t"
|
"psrad %3, %%xmm3 \n\t"
|
||||||
"psrad %3, %%xmm1 \n\t"
|
"psrad %3, %%xmm1 \n\t"
|
||||||
"packssdw %%xmm3, %%xmm1 \n\t"
|
"packssdw %%xmm3, %%xmm1 \n\t"
|
||||||
"movdqa %%xmm1, \\i(%4) \n\t"
|
"movdqa %%xmm1, \\i(%4) \n\t"
|
||||||
".endm \n\t"
|
".endm \n\t"
|
||||||
"movdqa (%2), %%xmm6 \n\t"
|
"movdqa (%2), %%xmm6 \n\t"
|
||||||
"FDCT_ROW_SSE2_H1 0 0 \n\t"
|
"FDCT_ROW_SSE2_H1 0 0 \n\t"
|
||||||
"FDCT_ROW_SSE2 0 \n\t"
|
"FDCT_ROW_SSE2 0 \n\t"
|
||||||
"FDCT_ROW_SSE2_H2 64 0 \n\t"
|
"FDCT_ROW_SSE2_H2 64 0 \n\t"
|
||||||
"FDCT_ROW_SSE2 64 \n\t"
|
"FDCT_ROW_SSE2 64 \n\t"
|
||||||
|
|
||||||
"FDCT_ROW_SSE2_H1 16 64 \n\t"
|
"FDCT_ROW_SSE2_H1 16 64 \n\t"
|
||||||
"FDCT_ROW_SSE2 16 \n\t"
|
"FDCT_ROW_SSE2 16 \n\t"
|
||||||
"FDCT_ROW_SSE2_H2 112 64 \n\t"
|
"FDCT_ROW_SSE2_H2 112 64 \n\t"
|
||||||
"FDCT_ROW_SSE2 112 \n\t"
|
"FDCT_ROW_SSE2 112 \n\t"
|
||||||
|
|
||||||
"FDCT_ROW_SSE2_H1 32 128 \n\t"
|
"FDCT_ROW_SSE2_H1 32 128 \n\t"
|
||||||
"FDCT_ROW_SSE2 32 \n\t"
|
"FDCT_ROW_SSE2 32 \n\t"
|
||||||
"FDCT_ROW_SSE2_H2 96 128 \n\t"
|
"FDCT_ROW_SSE2_H2 96 128 \n\t"
|
||||||
"FDCT_ROW_SSE2 96 \n\t"
|
"FDCT_ROW_SSE2 96 \n\t"
|
||||||
|
|
||||||
"FDCT_ROW_SSE2_H1 48 192 \n\t"
|
"FDCT_ROW_SSE2_H1 48 192 \n\t"
|
||||||
"FDCT_ROW_SSE2 48 \n\t"
|
"FDCT_ROW_SSE2 48 \n\t"
|
||||||
"FDCT_ROW_SSE2_H2 80 192 \n\t"
|
"FDCT_ROW_SSE2_H2 80 192 \n\t"
|
||||||
"FDCT_ROW_SSE2 80 \n\t"
|
"FDCT_ROW_SSE2 80 \n\t"
|
||||||
:
|
:
|
||||||
: "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
|
: "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,8 +45,8 @@ static void print_v4sf(const char *str, __m128 a)
|
|||||||
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z)
|
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z)
|
||||||
{
|
{
|
||||||
int ln = s->nbits;
|
int ln = s->nbits;
|
||||||
int j, np, np2;
|
int j, np, np2;
|
||||||
int nblocks, nloops;
|
int nblocks, nloops;
|
||||||
register FFTComplex *p, *q;
|
register FFTComplex *p, *q;
|
||||||
FFTComplex *cptr, *cptr1;
|
FFTComplex *cptr, *cptr1;
|
||||||
int k;
|
int k;
|
||||||
|
@ -47,9 +47,9 @@
|
|||||||
SUMSUB_BADC( d13, s02, s13, d02 )
|
SUMSUB_BADC( d13, s02, s13, d02 )
|
||||||
|
|
||||||
#define SBUTTERFLY(a,b,t,n)\
|
#define SBUTTERFLY(a,b,t,n)\
|
||||||
"movq " #a ", " #t " \n\t" /* abcd */\
|
"movq " #a ", " #t " \n\t" /* abcd */\
|
||||||
"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
|
"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
|
||||||
"punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
|
"punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
|
||||||
|
|
||||||
#define TRANSPOSE4(a,b,c,d,t)\
|
#define TRANSPOSE4(a,b,c,d,t)\
|
||||||
SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
|
SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
|
||||||
@ -369,73 +369,73 @@ static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a
|
|||||||
/* motion compensation */
|
/* motion compensation */
|
||||||
|
|
||||||
#define QPEL_H264V(A,B,C,D,E,F,OP)\
|
#define QPEL_H264V(A,B,C,D,E,F,OP)\
|
||||||
"movd (%0), "#F" \n\t"\
|
"movd (%0), "#F" \n\t"\
|
||||||
"movq "#C", %%mm6 \n\t"\
|
"movq "#C", %%mm6 \n\t"\
|
||||||
"paddw "#D", %%mm6 \n\t"\
|
"paddw "#D", %%mm6 \n\t"\
|
||||||
"psllw $2, %%mm6 \n\t"\
|
"psllw $2, %%mm6 \n\t"\
|
||||||
"psubw "#B", %%mm6 \n\t"\
|
"psubw "#B", %%mm6 \n\t"\
|
||||||
"psubw "#E", %%mm6 \n\t"\
|
"psubw "#E", %%mm6 \n\t"\
|
||||||
"pmullw %4, %%mm6 \n\t"\
|
"pmullw %4, %%mm6 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"punpcklbw %%mm7, "#F" \n\t"\
|
"punpcklbw %%mm7, "#F" \n\t"\
|
||||||
"paddw %5, "#A" \n\t"\
|
"paddw %5, "#A" \n\t"\
|
||||||
"paddw "#F", "#A" \n\t"\
|
"paddw "#F", "#A" \n\t"\
|
||||||
"paddw "#A", %%mm6 \n\t"\
|
"paddw "#A", %%mm6 \n\t"\
|
||||||
"psraw $5, %%mm6 \n\t"\
|
"psraw $5, %%mm6 \n\t"\
|
||||||
"packuswb %%mm6, %%mm6 \n\t"\
|
"packuswb %%mm6, %%mm6 \n\t"\
|
||||||
OP(%%mm6, (%1), A, d)\
|
OP(%%mm6, (%1), A, d)\
|
||||||
"add %3, %1 \n\t"
|
"add %3, %1 \n\t"
|
||||||
|
|
||||||
#define QPEL_H264HV(A,B,C,D,E,F,OF)\
|
#define QPEL_H264HV(A,B,C,D,E,F,OF)\
|
||||||
"movd (%0), "#F" \n\t"\
|
"movd (%0), "#F" \n\t"\
|
||||||
"movq "#C", %%mm6 \n\t"\
|
"movq "#C", %%mm6 \n\t"\
|
||||||
"paddw "#D", %%mm6 \n\t"\
|
"paddw "#D", %%mm6 \n\t"\
|
||||||
"psllw $2, %%mm6 \n\t"\
|
"psllw $2, %%mm6 \n\t"\
|
||||||
"psubw "#B", %%mm6 \n\t"\
|
"psubw "#B", %%mm6 \n\t"\
|
||||||
"psubw "#E", %%mm6 \n\t"\
|
"psubw "#E", %%mm6 \n\t"\
|
||||||
"pmullw %3, %%mm6 \n\t"\
|
"pmullw %3, %%mm6 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"punpcklbw %%mm7, "#F" \n\t"\
|
"punpcklbw %%mm7, "#F" \n\t"\
|
||||||
"paddw "#F", "#A" \n\t"\
|
"paddw "#F", "#A" \n\t"\
|
||||||
"paddw "#A", %%mm6 \n\t"\
|
"paddw "#A", %%mm6 \n\t"\
|
||||||
"movq %%mm6, "#OF"(%1) \n\t"
|
"movq %%mm6, "#OF"(%1) \n\t"
|
||||||
|
|
||||||
#define QPEL_H264(OPNAME, OP, MMX)\
|
#define QPEL_H264(OPNAME, OP, MMX)\
|
||||||
static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
||||||
int h=4;\
|
int h=4;\
|
||||||
\
|
\
|
||||||
asm volatile(\
|
asm volatile(\
|
||||||
"pxor %%mm7, %%mm7 \n\t"\
|
"pxor %%mm7, %%mm7 \n\t"\
|
||||||
"movq %5, %%mm4 \n\t"\
|
"movq %5, %%mm4 \n\t"\
|
||||||
"movq %6, %%mm5 \n\t"\
|
"movq %6, %%mm5 \n\t"\
|
||||||
"1: \n\t"\
|
"1: \n\t"\
|
||||||
"movd -1(%0), %%mm1 \n\t"\
|
"movd -1(%0), %%mm1 \n\t"\
|
||||||
"movd (%0), %%mm2 \n\t"\
|
"movd (%0), %%mm2 \n\t"\
|
||||||
"movd 1(%0), %%mm3 \n\t"\
|
"movd 1(%0), %%mm3 \n\t"\
|
||||||
"movd 2(%0), %%mm0 \n\t"\
|
"movd 2(%0), %%mm0 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm1 \n\t"\
|
"punpcklbw %%mm7, %%mm1 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||||
"paddw %%mm0, %%mm1 \n\t"\
|
"paddw %%mm0, %%mm1 \n\t"\
|
||||||
"paddw %%mm3, %%mm2 \n\t"\
|
"paddw %%mm3, %%mm2 \n\t"\
|
||||||
"movd -2(%0), %%mm0 \n\t"\
|
"movd -2(%0), %%mm0 \n\t"\
|
||||||
"movd 3(%0), %%mm3 \n\t"\
|
"movd 3(%0), %%mm3 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||||
"paddw %%mm3, %%mm0 \n\t"\
|
"paddw %%mm3, %%mm0 \n\t"\
|
||||||
"psllw $2, %%mm2 \n\t"\
|
"psllw $2, %%mm2 \n\t"\
|
||||||
"psubw %%mm1, %%mm2 \n\t"\
|
"psubw %%mm1, %%mm2 \n\t"\
|
||||||
"pmullw %%mm4, %%mm2 \n\t"\
|
"pmullw %%mm4, %%mm2 \n\t"\
|
||||||
"paddw %%mm5, %%mm0 \n\t"\
|
"paddw %%mm5, %%mm0 \n\t"\
|
||||||
"paddw %%mm2, %%mm0 \n\t"\
|
"paddw %%mm2, %%mm0 \n\t"\
|
||||||
"psraw $5, %%mm0 \n\t"\
|
"psraw $5, %%mm0 \n\t"\
|
||||||
"packuswb %%mm0, %%mm0 \n\t"\
|
"packuswb %%mm0, %%mm0 \n\t"\
|
||||||
OP(%%mm0, (%1),%%mm6, d)\
|
OP(%%mm0, (%1),%%mm6, d)\
|
||||||
"add %3, %0 \n\t"\
|
"add %3, %0 \n\t"\
|
||||||
"add %4, %1 \n\t"\
|
"add %4, %1 \n\t"\
|
||||||
"decl %2 \n\t"\
|
"decl %2 \n\t"\
|
||||||
" jnz 1b \n\t"\
|
" jnz 1b \n\t"\
|
||||||
: "+a"(src), "+c"(dst), "+m"(h)\
|
: "+a"(src), "+c"(dst), "+m"(h)\
|
||||||
: "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
|
: "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
|
||||||
: "memory"\
|
: "memory"\
|
||||||
@ -444,22 +444,22 @@ static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i
|
|||||||
static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
||||||
src -= 2*srcStride;\
|
src -= 2*srcStride;\
|
||||||
asm volatile(\
|
asm volatile(\
|
||||||
"pxor %%mm7, %%mm7 \n\t"\
|
"pxor %%mm7, %%mm7 \n\t"\
|
||||||
"movd (%0), %%mm0 \n\t"\
|
"movd (%0), %%mm0 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"movd (%0), %%mm1 \n\t"\
|
"movd (%0), %%mm1 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"movd (%0), %%mm2 \n\t"\
|
"movd (%0), %%mm2 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"movd (%0), %%mm3 \n\t"\
|
"movd (%0), %%mm3 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"movd (%0), %%mm4 \n\t"\
|
"movd (%0), %%mm4 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm1 \n\t"\
|
"punpcklbw %%mm7, %%mm1 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm4 \n\t"\
|
"punpcklbw %%mm7, %%mm4 \n\t"\
|
||||||
QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
|
QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
|
||||||
QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
|
QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
|
||||||
QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
|
QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
|
||||||
@ -476,22 +476,22 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
|
|||||||
src -= 2*srcStride+2;\
|
src -= 2*srcStride+2;\
|
||||||
while(w--){\
|
while(w--){\
|
||||||
asm volatile(\
|
asm volatile(\
|
||||||
"pxor %%mm7, %%mm7 \n\t"\
|
"pxor %%mm7, %%mm7 \n\t"\
|
||||||
"movd (%0), %%mm0 \n\t"\
|
"movd (%0), %%mm0 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"movd (%0), %%mm1 \n\t"\
|
"movd (%0), %%mm1 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"movd (%0), %%mm2 \n\t"\
|
"movd (%0), %%mm2 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"movd (%0), %%mm3 \n\t"\
|
"movd (%0), %%mm3 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"movd (%0), %%mm4 \n\t"\
|
"movd (%0), %%mm4 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm1 \n\t"\
|
"punpcklbw %%mm7, %%mm1 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm4 \n\t"\
|
"punpcklbw %%mm7, %%mm4 \n\t"\
|
||||||
QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\
|
QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\
|
||||||
QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\
|
QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\
|
||||||
QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\
|
QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\
|
||||||
@ -506,28 +506,28 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
|
|||||||
}\
|
}\
|
||||||
tmp -= 3*4;\
|
tmp -= 3*4;\
|
||||||
asm volatile(\
|
asm volatile(\
|
||||||
"movq %4, %%mm6 \n\t"\
|
"movq %4, %%mm6 \n\t"\
|
||||||
"1: \n\t"\
|
"1: \n\t"\
|
||||||
"movq (%0), %%mm0 \n\t"\
|
"movq (%0), %%mm0 \n\t"\
|
||||||
"paddw 10(%0), %%mm0 \n\t"\
|
"paddw 10(%0), %%mm0 \n\t"\
|
||||||
"movq 2(%0), %%mm1 \n\t"\
|
"movq 2(%0), %%mm1 \n\t"\
|
||||||
"paddw 8(%0), %%mm1 \n\t"\
|
"paddw 8(%0), %%mm1 \n\t"\
|
||||||
"movq 4(%0), %%mm2 \n\t"\
|
"movq 4(%0), %%mm2 \n\t"\
|
||||||
"paddw 6(%0), %%mm2 \n\t"\
|
"paddw 6(%0), %%mm2 \n\t"\
|
||||||
"psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\
|
"psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\
|
||||||
"psraw $2, %%mm0 \n\t"/*(a-b)/4 */\
|
"psraw $2, %%mm0 \n\t"/*(a-b)/4 */\
|
||||||
"psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\
|
"psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\
|
||||||
"paddsw %%mm2, %%mm0 \n\t"\
|
"paddsw %%mm2, %%mm0 \n\t"\
|
||||||
"psraw $2, %%mm0 \n\t"/*((a-b)/4-b)/4 */\
|
"psraw $2, %%mm0 \n\t"/*((a-b)/4-b)/4 */\
|
||||||
"paddw %%mm6, %%mm2 \n\t"\
|
"paddw %%mm6, %%mm2 \n\t"\
|
||||||
"paddw %%mm2, %%mm0 \n\t"\
|
"paddw %%mm2, %%mm0 \n\t"\
|
||||||
"psraw $6, %%mm0 \n\t"\
|
"psraw $6, %%mm0 \n\t"\
|
||||||
"packuswb %%mm0, %%mm0 \n\t"\
|
"packuswb %%mm0, %%mm0 \n\t"\
|
||||||
OP(%%mm0, (%1),%%mm7, d)\
|
OP(%%mm0, (%1),%%mm7, d)\
|
||||||
"add $24, %0 \n\t"\
|
"add $24, %0 \n\t"\
|
||||||
"add %3, %1 \n\t"\
|
"add %3, %1 \n\t"\
|
||||||
"decl %2 \n\t"\
|
"decl %2 \n\t"\
|
||||||
" jnz 1b \n\t"\
|
" jnz 1b \n\t"\
|
||||||
: "+a"(tmp), "+c"(dst), "+m"(h)\
|
: "+a"(tmp), "+c"(dst), "+m"(h)\
|
||||||
: "S"((long)dstStride), "m"(ff_pw_32)\
|
: "S"((long)dstStride), "m"(ff_pw_32)\
|
||||||
: "memory"\
|
: "memory"\
|
||||||
@ -537,54 +537,54 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
|
|||||||
static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
||||||
int h=8;\
|
int h=8;\
|
||||||
asm volatile(\
|
asm volatile(\
|
||||||
"pxor %%mm7, %%mm7 \n\t"\
|
"pxor %%mm7, %%mm7 \n\t"\
|
||||||
"movq %5, %%mm6 \n\t"\
|
"movq %5, %%mm6 \n\t"\
|
||||||
"1: \n\t"\
|
"1: \n\t"\
|
||||||
"movq (%0), %%mm0 \n\t"\
|
"movq (%0), %%mm0 \n\t"\
|
||||||
"movq 1(%0), %%mm2 \n\t"\
|
"movq 1(%0), %%mm2 \n\t"\
|
||||||
"movq %%mm0, %%mm1 \n\t"\
|
"movq %%mm0, %%mm1 \n\t"\
|
||||||
"movq %%mm2, %%mm3 \n\t"\
|
"movq %%mm2, %%mm3 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||||
"punpckhbw %%mm7, %%mm1 \n\t"\
|
"punpckhbw %%mm7, %%mm1 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||||
"punpckhbw %%mm7, %%mm3 \n\t"\
|
"punpckhbw %%mm7, %%mm3 \n\t"\
|
||||||
"paddw %%mm2, %%mm0 \n\t"\
|
"paddw %%mm2, %%mm0 \n\t"\
|
||||||
"paddw %%mm3, %%mm1 \n\t"\
|
"paddw %%mm3, %%mm1 \n\t"\
|
||||||
"psllw $2, %%mm0 \n\t"\
|
"psllw $2, %%mm0 \n\t"\
|
||||||
"psllw $2, %%mm1 \n\t"\
|
"psllw $2, %%mm1 \n\t"\
|
||||||
"movq -1(%0), %%mm2 \n\t"\
|
"movq -1(%0), %%mm2 \n\t"\
|
||||||
"movq 2(%0), %%mm4 \n\t"\
|
"movq 2(%0), %%mm4 \n\t"\
|
||||||
"movq %%mm2, %%mm3 \n\t"\
|
"movq %%mm2, %%mm3 \n\t"\
|
||||||
"movq %%mm4, %%mm5 \n\t"\
|
"movq %%mm4, %%mm5 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||||
"punpckhbw %%mm7, %%mm3 \n\t"\
|
"punpckhbw %%mm7, %%mm3 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm4 \n\t"\
|
"punpcklbw %%mm7, %%mm4 \n\t"\
|
||||||
"punpckhbw %%mm7, %%mm5 \n\t"\
|
"punpckhbw %%mm7, %%mm5 \n\t"\
|
||||||
"paddw %%mm4, %%mm2 \n\t"\
|
"paddw %%mm4, %%mm2 \n\t"\
|
||||||
"paddw %%mm3, %%mm5 \n\t"\
|
"paddw %%mm3, %%mm5 \n\t"\
|
||||||
"psubw %%mm2, %%mm0 \n\t"\
|
"psubw %%mm2, %%mm0 \n\t"\
|
||||||
"psubw %%mm5, %%mm1 \n\t"\
|
"psubw %%mm5, %%mm1 \n\t"\
|
||||||
"pmullw %%mm6, %%mm0 \n\t"\
|
"pmullw %%mm6, %%mm0 \n\t"\
|
||||||
"pmullw %%mm6, %%mm1 \n\t"\
|
"pmullw %%mm6, %%mm1 \n\t"\
|
||||||
"movd -2(%0), %%mm2 \n\t"\
|
"movd -2(%0), %%mm2 \n\t"\
|
||||||
"movd 7(%0), %%mm5 \n\t"\
|
"movd 7(%0), %%mm5 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm5 \n\t"\
|
"punpcklbw %%mm7, %%mm5 \n\t"\
|
||||||
"paddw %%mm3, %%mm2 \n\t"\
|
"paddw %%mm3, %%mm2 \n\t"\
|
||||||
"paddw %%mm5, %%mm4 \n\t"\
|
"paddw %%mm5, %%mm4 \n\t"\
|
||||||
"movq %6, %%mm5 \n\t"\
|
"movq %6, %%mm5 \n\t"\
|
||||||
"paddw %%mm5, %%mm2 \n\t"\
|
"paddw %%mm5, %%mm2 \n\t"\
|
||||||
"paddw %%mm5, %%mm4 \n\t"\
|
"paddw %%mm5, %%mm4 \n\t"\
|
||||||
"paddw %%mm2, %%mm0 \n\t"\
|
"paddw %%mm2, %%mm0 \n\t"\
|
||||||
"paddw %%mm4, %%mm1 \n\t"\
|
"paddw %%mm4, %%mm1 \n\t"\
|
||||||
"psraw $5, %%mm0 \n\t"\
|
"psraw $5, %%mm0 \n\t"\
|
||||||
"psraw $5, %%mm1 \n\t"\
|
"psraw $5, %%mm1 \n\t"\
|
||||||
"packuswb %%mm1, %%mm0 \n\t"\
|
"packuswb %%mm1, %%mm0 \n\t"\
|
||||||
OP(%%mm0, (%1),%%mm5, q)\
|
OP(%%mm0, (%1),%%mm5, q)\
|
||||||
"add %3, %0 \n\t"\
|
"add %3, %0 \n\t"\
|
||||||
"add %4, %1 \n\t"\
|
"add %4, %1 \n\t"\
|
||||||
"decl %2 \n\t"\
|
"decl %2 \n\t"\
|
||||||
" jnz 1b \n\t"\
|
" jnz 1b \n\t"\
|
||||||
: "+a"(src), "+c"(dst), "+m"(h)\
|
: "+a"(src), "+c"(dst), "+m"(h)\
|
||||||
: "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
|
: "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
|
||||||
: "memory"\
|
: "memory"\
|
||||||
@ -597,22 +597,22 @@ static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i
|
|||||||
\
|
\
|
||||||
while(h--){\
|
while(h--){\
|
||||||
asm volatile(\
|
asm volatile(\
|
||||||
"pxor %%mm7, %%mm7 \n\t"\
|
"pxor %%mm7, %%mm7 \n\t"\
|
||||||
"movd (%0), %%mm0 \n\t"\
|
"movd (%0), %%mm0 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"movd (%0), %%mm1 \n\t"\
|
"movd (%0), %%mm1 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"movd (%0), %%mm2 \n\t"\
|
"movd (%0), %%mm2 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"movd (%0), %%mm3 \n\t"\
|
"movd (%0), %%mm3 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"movd (%0), %%mm4 \n\t"\
|
"movd (%0), %%mm4 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm1 \n\t"\
|
"punpcklbw %%mm7, %%mm1 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm4 \n\t"\
|
"punpcklbw %%mm7, %%mm4 \n\t"\
|
||||||
QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
|
QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
|
||||||
QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
|
QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
|
||||||
QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
|
QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
|
||||||
@ -636,22 +636,22 @@ static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
|
|||||||
src -= 2*srcStride+2;\
|
src -= 2*srcStride+2;\
|
||||||
while(w--){\
|
while(w--){\
|
||||||
asm volatile(\
|
asm volatile(\
|
||||||
"pxor %%mm7, %%mm7 \n\t"\
|
"pxor %%mm7, %%mm7 \n\t"\
|
||||||
"movd (%0), %%mm0 \n\t"\
|
"movd (%0), %%mm0 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"movd (%0), %%mm1 \n\t"\
|
"movd (%0), %%mm1 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"movd (%0), %%mm2 \n\t"\
|
"movd (%0), %%mm2 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"movd (%0), %%mm3 \n\t"\
|
"movd (%0), %%mm3 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"movd (%0), %%mm4 \n\t"\
|
"movd (%0), %%mm4 \n\t"\
|
||||||
"add %2, %0 \n\t"\
|
"add %2, %0 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm1 \n\t"\
|
"punpcklbw %%mm7, %%mm1 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||||
"punpcklbw %%mm7, %%mm4 \n\t"\
|
"punpcklbw %%mm7, %%mm4 \n\t"\
|
||||||
QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*4)\
|
QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*4)\
|
||||||
QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*4)\
|
QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*4)\
|
||||||
QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*4)\
|
QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*4)\
|
||||||
@ -670,42 +670,42 @@ static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
|
|||||||
}\
|
}\
|
||||||
tmp -= 4*4;\
|
tmp -= 4*4;\
|
||||||
asm volatile(\
|
asm volatile(\
|
||||||
"movq %4, %%mm6 \n\t"\
|
"movq %4, %%mm6 \n\t"\
|
||||||
"1: \n\t"\
|
"1: \n\t"\
|
||||||
"movq (%0), %%mm0 \n\t"\
|
"movq (%0), %%mm0 \n\t"\
|
||||||
"movq 8(%0), %%mm3 \n\t"\
|
"movq 8(%0), %%mm3 \n\t"\
|
||||||
"movq 2(%0), %%mm1 \n\t"\
|
"movq 2(%0), %%mm1 \n\t"\
|
||||||
"movq 10(%0), %%mm4 \n\t"\
|
"movq 10(%0), %%mm4 \n\t"\
|
||||||
"paddw %%mm4, %%mm0 \n\t"\
|
"paddw %%mm4, %%mm0 \n\t"\
|
||||||
"paddw %%mm3, %%mm1 \n\t"\
|
"paddw %%mm3, %%mm1 \n\t"\
|
||||||
"paddw 18(%0), %%mm3 \n\t"\
|
"paddw 18(%0), %%mm3 \n\t"\
|
||||||
"paddw 16(%0), %%mm4 \n\t"\
|
"paddw 16(%0), %%mm4 \n\t"\
|
||||||
"movq 4(%0), %%mm2 \n\t"\
|
"movq 4(%0), %%mm2 \n\t"\
|
||||||
"movq 12(%0), %%mm5 \n\t"\
|
"movq 12(%0), %%mm5 \n\t"\
|
||||||
"paddw 6(%0), %%mm2 \n\t"\
|
"paddw 6(%0), %%mm2 \n\t"\
|
||||||
"paddw 14(%0), %%mm5 \n\t"\
|
"paddw 14(%0), %%mm5 \n\t"\
|
||||||
"psubw %%mm1, %%mm0 \n\t"\
|
"psubw %%mm1, %%mm0 \n\t"\
|
||||||
"psubw %%mm4, %%mm3 \n\t"\
|
"psubw %%mm4, %%mm3 \n\t"\
|
||||||
"psraw $2, %%mm0 \n\t"\
|
"psraw $2, %%mm0 \n\t"\
|
||||||
"psraw $2, %%mm3 \n\t"\
|
"psraw $2, %%mm3 \n\t"\
|
||||||
"psubw %%mm1, %%mm0 \n\t"\
|
"psubw %%mm1, %%mm0 \n\t"\
|
||||||
"psubw %%mm4, %%mm3 \n\t"\
|
"psubw %%mm4, %%mm3 \n\t"\
|
||||||
"paddsw %%mm2, %%mm0 \n\t"\
|
"paddsw %%mm2, %%mm0 \n\t"\
|
||||||
"paddsw %%mm5, %%mm3 \n\t"\
|
"paddsw %%mm5, %%mm3 \n\t"\
|
||||||
"psraw $2, %%mm0 \n\t"\
|
"psraw $2, %%mm0 \n\t"\
|
||||||
"psraw $2, %%mm3 \n\t"\
|
"psraw $2, %%mm3 \n\t"\
|
||||||
"paddw %%mm6, %%mm2 \n\t"\
|
"paddw %%mm6, %%mm2 \n\t"\
|
||||||
"paddw %%mm6, %%mm5 \n\t"\
|
"paddw %%mm6, %%mm5 \n\t"\
|
||||||
"paddw %%mm2, %%mm0 \n\t"\
|
"paddw %%mm2, %%mm0 \n\t"\
|
||||||
"paddw %%mm5, %%mm3 \n\t"\
|
"paddw %%mm5, %%mm3 \n\t"\
|
||||||
"psraw $6, %%mm0 \n\t"\
|
"psraw $6, %%mm0 \n\t"\
|
||||||
"psraw $6, %%mm3 \n\t"\
|
"psraw $6, %%mm3 \n\t"\
|
||||||
"packuswb %%mm3, %%mm0 \n\t"\
|
"packuswb %%mm3, %%mm0 \n\t"\
|
||||||
OP(%%mm0, (%1),%%mm7, q)\
|
OP(%%mm0, (%1),%%mm7, q)\
|
||||||
"add $32, %0 \n\t"\
|
"add $32, %0 \n\t"\
|
||||||
"add %3, %1 \n\t"\
|
"add %3, %1 \n\t"\
|
||||||
"decl %2 \n\t"\
|
"decl %2 \n\t"\
|
||||||
" jnz 1b \n\t"\
|
" jnz 1b \n\t"\
|
||||||
: "+a"(tmp), "+c"(dst), "+m"(h)\
|
: "+a"(tmp), "+c"(dst), "+m"(h)\
|
||||||
: "S"((long)dstStride), "m"(ff_pw_32)\
|
: "S"((long)dstStride), "m"(ff_pw_32)\
|
||||||
: "memory"\
|
: "memory"\
|
||||||
@ -862,15 +862,15 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *
|
|||||||
}\
|
}\
|
||||||
|
|
||||||
|
|
||||||
#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
|
#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
|
||||||
#define AVG_3DNOW_OP(a,b,temp, size) \
|
#define AVG_3DNOW_OP(a,b,temp, size) \
|
||||||
"mov" #size " " #b ", " #temp " \n\t"\
|
"mov" #size " " #b ", " #temp " \n\t"\
|
||||||
"pavgusb " #temp ", " #a " \n\t"\
|
"pavgusb " #temp ", " #a " \n\t"\
|
||||||
"mov" #size " " #a ", " #b " \n\t"
|
"mov" #size " " #a ", " #b " \n\t"
|
||||||
#define AVG_MMX2_OP(a,b,temp, size) \
|
#define AVG_MMX2_OP(a,b,temp, size) \
|
||||||
"mov" #size " " #b ", " #temp " \n\t"\
|
"mov" #size " " #b ", " #temp " \n\t"\
|
||||||
"pavgb " #temp ", " #a " \n\t"\
|
"pavgb " #temp ", " #a " \n\t"\
|
||||||
"mov" #size " " #a ", " #b " \n\t"
|
"mov" #size " " #a ", " #b " \n\t"
|
||||||
|
|
||||||
QPEL_H264(put_, PUT_OP, 3dnow)
|
QPEL_H264(put_, PUT_OP, 3dnow)
|
||||||
QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
|
QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
|
||||||
|
@ -38,7 +38,7 @@
|
|||||||
#if 0
|
#if 0
|
||||||
/* C row IDCT - its just here to document the MMXEXT and MMX versions */
|
/* C row IDCT - its just here to document the MMXEXT and MMX versions */
|
||||||
static inline void idct_row (int16_t * row, int offset,
|
static inline void idct_row (int16_t * row, int offset,
|
||||||
int16_t * table, int32_t * rounder)
|
int16_t * table, int32_t * rounder)
|
||||||
{
|
{
|
||||||
int C1, C2, C3, C4, C5, C6, C7;
|
int C1, C2, C3, C4, C5, C6, C7;
|
||||||
int a0, a1, a2, a3, b0, b1, b2, b3;
|
int a0, a1, a2, a3, b0, b1, b2, b3;
|
||||||
@ -77,241 +77,241 @@ static inline void idct_row (int16_t * row, int offset,
|
|||||||
|
|
||||||
/* MMXEXT row IDCT */
|
/* MMXEXT row IDCT */
|
||||||
|
|
||||||
#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \
|
#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \
|
||||||
c4, c6, c4, c6, \
|
c4, c6, c4, c6, \
|
||||||
c1, c3, -c1, -c5, \
|
c1, c3, -c1, -c5, \
|
||||||
c5, c7, c3, -c7, \
|
c5, c7, c3, -c7, \
|
||||||
c4, -c6, c4, -c6, \
|
c4, -c6, c4, -c6, \
|
||||||
-c4, c2, c4, -c2, \
|
-c4, c2, c4, -c2, \
|
||||||
c5, -c1, c3, -c1, \
|
c5, -c1, c3, -c1, \
|
||||||
c7, c3, c7, -c5 }
|
c7, c3, c7, -c5 }
|
||||||
|
|
||||||
static inline void mmxext_row_head (int16_t * row, int offset, const int16_t * table)
|
static inline void mmxext_row_head (int16_t * row, int offset, const int16_t * table)
|
||||||
{
|
{
|
||||||
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
||||||
|
|
||||||
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
|
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
|
||||||
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
|
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
|
||||||
|
|
||||||
movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
|
movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
|
||||||
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
|
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
|
||||||
|
|
||||||
movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
|
movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
|
||||||
pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
|
pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
|
||||||
|
|
||||||
pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
|
pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void mmxext_row (const int16_t * table, const int32_t * rounder)
|
static inline void mmxext_row (const int16_t * table, const int32_t * rounder)
|
||||||
{
|
{
|
||||||
movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1
|
movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1
|
||||||
pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6
|
pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6
|
||||||
|
|
||||||
pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2
|
pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2
|
||||||
pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5
|
pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5
|
||||||
|
|
||||||
movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5
|
movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5
|
||||||
pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3
|
pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3
|
||||||
|
|
||||||
paddd_m2r (*rounder, mm3); // mm3 += rounder
|
paddd_m2r (*rounder, mm3); // mm3 += rounder
|
||||||
pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7
|
pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7
|
||||||
|
|
||||||
pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6
|
pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6
|
||||||
paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
|
paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
|
||||||
|
|
||||||
pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3
|
pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3
|
||||||
movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
|
movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
|
||||||
|
|
||||||
pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7
|
pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7
|
||||||
paddd_r2r (mm7, mm1); // mm1 = b1 b0
|
paddd_r2r (mm7, mm1); // mm1 = b1 b0
|
||||||
|
|
||||||
paddd_m2r (*rounder, mm0); // mm0 += rounder
|
paddd_m2r (*rounder, mm0); // mm0 += rounder
|
||||||
psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
|
psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
|
||||||
|
|
||||||
psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
|
psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
|
||||||
paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
|
paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
|
||||||
|
|
||||||
paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
|
paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
|
||||||
psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
|
psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
|
||||||
|
|
||||||
paddd_r2r (mm6, mm5); // mm5 = b3 b2
|
paddd_r2r (mm6, mm5); // mm5 = b3 b2
|
||||||
movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder
|
movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder
|
||||||
|
|
||||||
paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
|
paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
|
||||||
psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder
|
psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void mmxext_row_tail (int16_t * row, int store)
|
static inline void mmxext_row_tail (int16_t * row, int store)
|
||||||
{
|
{
|
||||||
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
||||||
|
|
||||||
psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
|
psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
|
||||||
|
|
||||||
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
|
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
|
||||||
|
|
||||||
packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
|
packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
|
||||||
|
|
||||||
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
|
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
|
||||||
pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
|
pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
|
||||||
|
|
||||||
/* slot */
|
/* slot */
|
||||||
|
|
||||||
movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
|
movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void mmxext_row_mid (int16_t * row, int store,
|
static inline void mmxext_row_mid (int16_t * row, int store,
|
||||||
int offset, const int16_t * table)
|
int offset, const int16_t * table)
|
||||||
{
|
{
|
||||||
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
||||||
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
||||||
|
|
||||||
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
|
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
|
||||||
psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
|
psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
|
||||||
|
|
||||||
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
|
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
|
||||||
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
|
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
|
||||||
|
|
||||||
packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
|
packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
|
||||||
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
|
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
|
||||||
|
|
||||||
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
|
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
|
||||||
pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
|
pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
|
||||||
|
|
||||||
movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
|
movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
|
||||||
movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
|
movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
|
||||||
|
|
||||||
pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
|
pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
|
||||||
|
|
||||||
movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
|
movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
|
||||||
pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
|
pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* MMX row IDCT */
|
/* MMX row IDCT */
|
||||||
|
|
||||||
#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
|
#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
|
||||||
c4, c6, -c4, -c2, \
|
c4, c6, -c4, -c2, \
|
||||||
c1, c3, c3, -c7, \
|
c1, c3, c3, -c7, \
|
||||||
c5, c7, -c1, -c5, \
|
c5, c7, -c1, -c5, \
|
||||||
c4, -c6, c4, -c2, \
|
c4, -c6, c4, -c2, \
|
||||||
-c4, c2, c4, -c6, \
|
-c4, c2, c4, -c6, \
|
||||||
c5, -c1, c7, -c5, \
|
c5, -c1, c7, -c5, \
|
||||||
c7, c3, c3, -c1 }
|
c7, c3, c3, -c1 }
|
||||||
|
|
||||||
static inline void mmx_row_head (int16_t * row, int offset, const int16_t * table)
|
static inline void mmx_row_head (int16_t * row, int offset, const int16_t * table)
|
||||||
{
|
{
|
||||||
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
||||||
|
|
||||||
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
|
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
|
||||||
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
|
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
|
||||||
|
|
||||||
movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
|
movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
|
||||||
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
|
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
|
||||||
|
|
||||||
punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
|
punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
|
||||||
|
|
||||||
movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
|
movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
|
||||||
pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
|
pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
|
||||||
|
|
||||||
movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
|
movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
|
||||||
punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
|
punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void mmx_row (const int16_t * table, const int32_t * rounder)
|
static inline void mmx_row (const int16_t * table, const int32_t * rounder)
|
||||||
{
|
{
|
||||||
pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6
|
pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6
|
||||||
punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1
|
punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1
|
||||||
|
|
||||||
pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2
|
pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2
|
||||||
punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5
|
punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5
|
||||||
|
|
||||||
movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5
|
movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5
|
||||||
pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3
|
pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3
|
||||||
|
|
||||||
paddd_m2r (*rounder, mm3); // mm3 += rounder
|
paddd_m2r (*rounder, mm3); // mm3 += rounder
|
||||||
pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7
|
pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7
|
||||||
|
|
||||||
pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6
|
pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6
|
||||||
paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
|
paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
|
||||||
|
|
||||||
pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3
|
pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3
|
||||||
movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
|
movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
|
||||||
|
|
||||||
pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7
|
pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7
|
||||||
paddd_r2r (mm7, mm1); // mm1 = b1 b0
|
paddd_r2r (mm7, mm1); // mm1 = b1 b0
|
||||||
|
|
||||||
paddd_m2r (*rounder, mm0); // mm0 += rounder
|
paddd_m2r (*rounder, mm0); // mm0 += rounder
|
||||||
psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
|
psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
|
||||||
|
|
||||||
psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
|
psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
|
||||||
paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
|
paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
|
||||||
|
|
||||||
paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
|
paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
|
||||||
psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
|
psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
|
||||||
|
|
||||||
paddd_r2r (mm6, mm5); // mm5 = b3 b2
|
paddd_r2r (mm6, mm5); // mm5 = b3 b2
|
||||||
movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder
|
movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder
|
||||||
|
|
||||||
paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
|
paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
|
||||||
psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder
|
psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void mmx_row_tail (int16_t * row, int store)
|
static inline void mmx_row_tail (int16_t * row, int store)
|
||||||
{
|
{
|
||||||
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
||||||
|
|
||||||
psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
|
psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
|
||||||
|
|
||||||
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
|
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
|
||||||
|
|
||||||
packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
|
packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
|
||||||
|
|
||||||
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
|
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
|
||||||
movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5
|
movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5
|
||||||
|
|
||||||
pslld_i2r (16, mm7); // mm7 = y7 0 y5 0
|
pslld_i2r (16, mm7); // mm7 = y7 0 y5 0
|
||||||
|
|
||||||
psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4
|
psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4
|
||||||
|
|
||||||
por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4
|
por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4
|
||||||
|
|
||||||
/* slot */
|
/* slot */
|
||||||
|
|
||||||
movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
|
movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void mmx_row_mid (int16_t * row, int store,
|
static inline void mmx_row_mid (int16_t * row, int store,
|
||||||
int offset, const int16_t * table)
|
int offset, const int16_t * table)
|
||||||
{
|
{
|
||||||
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
||||||
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
||||||
|
|
||||||
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
|
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
|
||||||
psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
|
psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
|
||||||
|
|
||||||
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
|
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
|
||||||
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
|
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
|
||||||
|
|
||||||
packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
|
packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
|
||||||
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
|
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
|
||||||
|
|
||||||
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
|
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
|
||||||
movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5
|
movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5
|
||||||
|
|
||||||
punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
|
punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
|
||||||
psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4
|
psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4
|
||||||
|
|
||||||
movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
|
movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
|
||||||
pslld_i2r (16, mm1); // mm1 = y7 0 y5 0
|
pslld_i2r (16, mm1); // mm1 = y7 0 y5 0
|
||||||
|
|
||||||
movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
|
movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
|
||||||
por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4
|
por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4
|
||||||
|
|
||||||
movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
|
movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
|
||||||
punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
|
punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
|
||||||
|
|
||||||
movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
|
movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
|
||||||
pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
|
pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -403,132 +403,132 @@ static inline void idct_col (int16_t * col, int offset)
|
|||||||
/* column code adapted from peter gubanov */
|
/* column code adapted from peter gubanov */
|
||||||
/* http://www.elecard.com/peter/idct.shtml */
|
/* http://www.elecard.com/peter/idct.shtml */
|
||||||
|
|
||||||
movq_m2r (*_T1, mm0); // mm0 = T1
|
movq_m2r (*_T1, mm0); // mm0 = T1
|
||||||
|
|
||||||
movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1
|
movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1
|
||||||
movq_r2r (mm0, mm2); // mm2 = T1
|
movq_r2r (mm0, mm2); // mm2 = T1
|
||||||
|
|
||||||
movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7
|
movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7
|
||||||
pmulhw_r2r (mm1, mm0); // mm0 = T1*x1
|
pmulhw_r2r (mm1, mm0); // mm0 = T1*x1
|
||||||
|
|
||||||
movq_m2r (*_T3, mm5); // mm5 = T3
|
movq_m2r (*_T3, mm5); // mm5 = T3
|
||||||
pmulhw_r2r (mm4, mm2); // mm2 = T1*x7
|
pmulhw_r2r (mm4, mm2); // mm2 = T1*x7
|
||||||
|
|
||||||
movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5
|
movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5
|
||||||
movq_r2r (mm5, mm7); // mm7 = T3-1
|
movq_r2r (mm5, mm7); // mm7 = T3-1
|
||||||
|
|
||||||
movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3
|
movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3
|
||||||
psubsw_r2r (mm4, mm0); // mm0 = v17
|
psubsw_r2r (mm4, mm0); // mm0 = v17
|
||||||
|
|
||||||
movq_m2r (*_T2, mm4); // mm4 = T2
|
movq_m2r (*_T2, mm4); // mm4 = T2
|
||||||
pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3
|
pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3
|
||||||
|
|
||||||
paddsw_r2r (mm2, mm1); // mm1 = u17
|
paddsw_r2r (mm2, mm1); // mm1 = u17
|
||||||
pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5
|
pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5
|
||||||
|
|
||||||
/* slot */
|
/* slot */
|
||||||
|
|
||||||
movq_r2r (mm4, mm2); // mm2 = T2
|
movq_r2r (mm4, mm2); // mm2 = T2
|
||||||
paddsw_r2r (mm3, mm5); // mm5 = T3*x3
|
paddsw_r2r (mm3, mm5); // mm5 = T3*x3
|
||||||
|
|
||||||
pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2
|
pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2
|
||||||
paddsw_r2r (mm6, mm7); // mm7 = T3*x5
|
paddsw_r2r (mm6, mm7); // mm7 = T3*x5
|
||||||
|
|
||||||
psubsw_r2r (mm6, mm5); // mm5 = v35
|
psubsw_r2r (mm6, mm5); // mm5 = v35
|
||||||
paddsw_r2r (mm3, mm7); // mm7 = u35
|
paddsw_r2r (mm3, mm7); // mm7 = u35
|
||||||
|
|
||||||
movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6
|
movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6
|
||||||
movq_r2r (mm0, mm6); // mm6 = v17
|
movq_r2r (mm0, mm6); // mm6 = v17
|
||||||
|
|
||||||
pmulhw_r2r (mm3, mm2); // mm2 = T2*x6
|
pmulhw_r2r (mm3, mm2); // mm2 = T2*x6
|
||||||
psubsw_r2r (mm5, mm0); // mm0 = b3
|
psubsw_r2r (mm5, mm0); // mm0 = b3
|
||||||
|
|
||||||
psubsw_r2r (mm3, mm4); // mm4 = v26
|
psubsw_r2r (mm3, mm4); // mm4 = v26
|
||||||
paddsw_r2r (mm6, mm5); // mm5 = v12
|
paddsw_r2r (mm6, mm5); // mm5 = v12
|
||||||
|
|
||||||
movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0
|
movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0
|
||||||
movq_r2r (mm1, mm6); // mm6 = u17
|
movq_r2r (mm1, mm6); // mm6 = u17
|
||||||
|
|
||||||
paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26
|
paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26
|
||||||
paddsw_r2r (mm7, mm6); // mm6 = b0
|
paddsw_r2r (mm7, mm6); // mm6 = b0
|
||||||
|
|
||||||
psubsw_r2r (mm7, mm1); // mm1 = u12
|
psubsw_r2r (mm7, mm1); // mm1 = u12
|
||||||
movq_r2r (mm1, mm7); // mm7 = u12
|
movq_r2r (mm1, mm7); // mm7 = u12
|
||||||
|
|
||||||
movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0
|
movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0
|
||||||
paddsw_r2r (mm5, mm1); // mm1 = u12+v12
|
paddsw_r2r (mm5, mm1); // mm1 = u12+v12
|
||||||
|
|
||||||
movq_m2r (*_C4, mm0); // mm0 = C4/2
|
movq_m2r (*_C4, mm0); // mm0 = C4/2
|
||||||
psubsw_r2r (mm5, mm7); // mm7 = u12-v12
|
psubsw_r2r (mm5, mm7); // mm7 = u12-v12
|
||||||
|
|
||||||
movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1
|
movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1
|
||||||
pmulhw_r2r (mm0, mm1); // mm1 = b1/2
|
pmulhw_r2r (mm0, mm1); // mm1 = b1/2
|
||||||
|
|
||||||
movq_r2r (mm4, mm6); // mm6 = v26
|
movq_r2r (mm4, mm6); // mm6 = v26
|
||||||
pmulhw_r2r (mm0, mm7); // mm7 = b2/2
|
pmulhw_r2r (mm0, mm7); // mm7 = b2/2
|
||||||
|
|
||||||
movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4
|
movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4
|
||||||
movq_r2r (mm3, mm0); // mm0 = x0
|
movq_r2r (mm3, mm0); // mm0 = x0
|
||||||
|
|
||||||
psubsw_r2r (mm5, mm3); // mm3 = v04
|
psubsw_r2r (mm5, mm3); // mm3 = v04
|
||||||
paddsw_r2r (mm5, mm0); // mm0 = u04
|
paddsw_r2r (mm5, mm0); // mm0 = u04
|
||||||
|
|
||||||
paddsw_r2r (mm3, mm4); // mm4 = a1
|
paddsw_r2r (mm3, mm4); // mm4 = a1
|
||||||
movq_r2r (mm0, mm5); // mm5 = u04
|
movq_r2r (mm0, mm5); // mm5 = u04
|
||||||
|
|
||||||
psubsw_r2r (mm6, mm3); // mm3 = a2
|
psubsw_r2r (mm6, mm3); // mm3 = a2
|
||||||
paddsw_r2r (mm2, mm5); // mm5 = a0
|
paddsw_r2r (mm2, mm5); // mm5 = a0
|
||||||
|
|
||||||
paddsw_r2r (mm1, mm1); // mm1 = b1
|
paddsw_r2r (mm1, mm1); // mm1 = b1
|
||||||
psubsw_r2r (mm2, mm0); // mm0 = a3
|
psubsw_r2r (mm2, mm0); // mm0 = a3
|
||||||
|
|
||||||
paddsw_r2r (mm7, mm7); // mm7 = b2
|
paddsw_r2r (mm7, mm7); // mm7 = b2
|
||||||
movq_r2r (mm3, mm2); // mm2 = a2
|
movq_r2r (mm3, mm2); // mm2 = a2
|
||||||
|
|
||||||
movq_r2r (mm4, mm6); // mm6 = a1
|
movq_r2r (mm4, mm6); // mm6 = a1
|
||||||
paddsw_r2r (mm7, mm3); // mm3 = a2+b2
|
paddsw_r2r (mm7, mm3); // mm3 = a2+b2
|
||||||
|
|
||||||
psraw_i2r (COL_SHIFT, mm3); // mm3 = y2
|
psraw_i2r (COL_SHIFT, mm3); // mm3 = y2
|
||||||
paddsw_r2r (mm1, mm4); // mm4 = a1+b1
|
paddsw_r2r (mm1, mm4); // mm4 = a1+b1
|
||||||
|
|
||||||
psraw_i2r (COL_SHIFT, mm4); // mm4 = y1
|
psraw_i2r (COL_SHIFT, mm4); // mm4 = y1
|
||||||
psubsw_r2r (mm1, mm6); // mm6 = a1-b1
|
psubsw_r2r (mm1, mm6); // mm6 = a1-b1
|
||||||
|
|
||||||
movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0
|
movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0
|
||||||
psubsw_r2r (mm7, mm2); // mm2 = a2-b2
|
psubsw_r2r (mm7, mm2); // mm2 = a2-b2
|
||||||
|
|
||||||
psraw_i2r (COL_SHIFT, mm6); // mm6 = y6
|
psraw_i2r (COL_SHIFT, mm6); // mm6 = y6
|
||||||
movq_r2r (mm5, mm7); // mm7 = a0
|
movq_r2r (mm5, mm7); // mm7 = a0
|
||||||
|
|
||||||
movq_r2m (mm4, *(col+offset+1*8)); // save y1
|
movq_r2m (mm4, *(col+offset+1*8)); // save y1
|
||||||
psraw_i2r (COL_SHIFT, mm2); // mm2 = y5
|
psraw_i2r (COL_SHIFT, mm2); // mm2 = y5
|
||||||
|
|
||||||
movq_r2m (mm3, *(col+offset+2*8)); // save y2
|
movq_r2m (mm3, *(col+offset+2*8)); // save y2
|
||||||
paddsw_r2r (mm1, mm5); // mm5 = a0+b0
|
paddsw_r2r (mm1, mm5); // mm5 = a0+b0
|
||||||
|
|
||||||
movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3
|
movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3
|
||||||
psubsw_r2r (mm1, mm7); // mm7 = a0-b0
|
psubsw_r2r (mm1, mm7); // mm7 = a0-b0
|
||||||
|
|
||||||
psraw_i2r (COL_SHIFT, mm5); // mm5 = y0
|
psraw_i2r (COL_SHIFT, mm5); // mm5 = y0
|
||||||
movq_r2r (mm0, mm3); // mm3 = a3
|
movq_r2r (mm0, mm3); // mm3 = a3
|
||||||
|
|
||||||
movq_r2m (mm2, *(col+offset+5*8)); // save y5
|
movq_r2m (mm2, *(col+offset+5*8)); // save y5
|
||||||
psubsw_r2r (mm4, mm3); // mm3 = a3-b3
|
psubsw_r2r (mm4, mm3); // mm3 = a3-b3
|
||||||
|
|
||||||
psraw_i2r (COL_SHIFT, mm7); // mm7 = y7
|
psraw_i2r (COL_SHIFT, mm7); // mm7 = y7
|
||||||
paddsw_r2r (mm0, mm4); // mm4 = a3+b3
|
paddsw_r2r (mm0, mm4); // mm4 = a3+b3
|
||||||
|
|
||||||
movq_r2m (mm5, *(col+offset+0*8)); // save y0
|
movq_r2m (mm5, *(col+offset+0*8)); // save y0
|
||||||
psraw_i2r (COL_SHIFT, mm3); // mm3 = y4
|
psraw_i2r (COL_SHIFT, mm3); // mm3 = y4
|
||||||
|
|
||||||
movq_r2m (mm6, *(col+offset+6*8)); // save y6
|
movq_r2m (mm6, *(col+offset+6*8)); // save y6
|
||||||
psraw_i2r (COL_SHIFT, mm4); // mm4 = y3
|
psraw_i2r (COL_SHIFT, mm4); // mm4 = y3
|
||||||
|
|
||||||
movq_r2m (mm7, *(col+offset+7*8)); // save y7
|
movq_r2m (mm7, *(col+offset+7*8)); // save y7
|
||||||
|
|
||||||
movq_r2m (mm3, *(col+offset+4*8)); // save y4
|
movq_r2m (mm3, *(col+offset+4*8)); // save y4
|
||||||
|
|
||||||
movq_r2m (mm4, *(col+offset+3*8)); // save y3
|
movq_r2m (mm4, *(col+offset+3*8)); // save y3
|
||||||
|
|
||||||
#undef T1
|
#undef T1
|
||||||
#undef T2
|
#undef T2
|
||||||
@ -540,61 +540,61 @@ static const int32_t rounder0[] ATTR_ALIGN(8) =
|
|||||||
rounder ((1 << (COL_SHIFT - 1)) - 0.5);
|
rounder ((1 << (COL_SHIFT - 1)) - 0.5);
|
||||||
static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
|
static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
|
||||||
static const int32_t rounder1[] ATTR_ALIGN(8) =
|
static const int32_t rounder1[] ATTR_ALIGN(8) =
|
||||||
rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
|
rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
|
||||||
static const int32_t rounder7[] ATTR_ALIGN(8) =
|
static const int32_t rounder7[] ATTR_ALIGN(8) =
|
||||||
rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
|
rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
|
||||||
static const int32_t rounder2[] ATTR_ALIGN(8) =
|
static const int32_t rounder2[] ATTR_ALIGN(8) =
|
||||||
rounder (0.60355339059); /* C2 * (C6+C2)/2 */
|
rounder (0.60355339059); /* C2 * (C6+C2)/2 */
|
||||||
static const int32_t rounder6[] ATTR_ALIGN(8) =
|
static const int32_t rounder6[] ATTR_ALIGN(8) =
|
||||||
rounder (-0.25); /* C2 * (C6-C2)/2 */
|
rounder (-0.25); /* C2 * (C6-C2)/2 */
|
||||||
static const int32_t rounder3[] ATTR_ALIGN(8) =
|
static const int32_t rounder3[] ATTR_ALIGN(8) =
|
||||||
rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
|
rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
|
||||||
static const int32_t rounder5[] ATTR_ALIGN(8) =
|
static const int32_t rounder5[] ATTR_ALIGN(8) =
|
||||||
rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
|
rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
|
||||||
|
|
||||||
#undef COL_SHIFT
|
#undef COL_SHIFT
|
||||||
#undef ROW_SHIFT
|
#undef ROW_SHIFT
|
||||||
|
|
||||||
#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
|
#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
|
||||||
void idct (int16_t * block) \
|
void idct (int16_t * block) \
|
||||||
{ \
|
{ \
|
||||||
static const int16_t table04[] ATTR_ALIGN(16) = \
|
static const int16_t table04[] ATTR_ALIGN(16) = \
|
||||||
table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
|
table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
|
||||||
static const int16_t table17[] ATTR_ALIGN(16) = \
|
static const int16_t table17[] ATTR_ALIGN(16) = \
|
||||||
table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
|
table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
|
||||||
static const int16_t table26[] ATTR_ALIGN(16) = \
|
static const int16_t table26[] ATTR_ALIGN(16) = \
|
||||||
table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
|
table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
|
||||||
static const int16_t table35[] ATTR_ALIGN(16) = \
|
static const int16_t table35[] ATTR_ALIGN(16) = \
|
||||||
table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
|
table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
|
||||||
\
|
\
|
||||||
idct_row_head (block, 0*8, table04); \
|
idct_row_head (block, 0*8, table04); \
|
||||||
idct_row (table04, rounder0); \
|
idct_row (table04, rounder0); \
|
||||||
idct_row_mid (block, 0*8, 4*8, table04); \
|
idct_row_mid (block, 0*8, 4*8, table04); \
|
||||||
idct_row (table04, rounder4); \
|
idct_row (table04, rounder4); \
|
||||||
idct_row_mid (block, 4*8, 1*8, table17); \
|
idct_row_mid (block, 4*8, 1*8, table17); \
|
||||||
idct_row (table17, rounder1); \
|
idct_row (table17, rounder1); \
|
||||||
idct_row_mid (block, 1*8, 7*8, table17); \
|
idct_row_mid (block, 1*8, 7*8, table17); \
|
||||||
idct_row (table17, rounder7); \
|
idct_row (table17, rounder7); \
|
||||||
idct_row_mid (block, 7*8, 2*8, table26); \
|
idct_row_mid (block, 7*8, 2*8, table26); \
|
||||||
idct_row (table26, rounder2); \
|
idct_row (table26, rounder2); \
|
||||||
idct_row_mid (block, 2*8, 6*8, table26); \
|
idct_row_mid (block, 2*8, 6*8, table26); \
|
||||||
idct_row (table26, rounder6); \
|
idct_row (table26, rounder6); \
|
||||||
idct_row_mid (block, 6*8, 3*8, table35); \
|
idct_row_mid (block, 6*8, 3*8, table35); \
|
||||||
idct_row (table35, rounder3); \
|
idct_row (table35, rounder3); \
|
||||||
idct_row_mid (block, 3*8, 5*8, table35); \
|
idct_row_mid (block, 3*8, 5*8, table35); \
|
||||||
idct_row (table35, rounder5); \
|
idct_row (table35, rounder5); \
|
||||||
idct_row_tail (block, 5*8); \
|
idct_row_tail (block, 5*8); \
|
||||||
\
|
\
|
||||||
idct_col (block, 0); \
|
idct_col (block, 0); \
|
||||||
idct_col (block, 4); \
|
idct_col (block, 4); \
|
||||||
}
|
}
|
||||||
|
|
||||||
void ff_mmx_idct(DCTELEM *block);
|
void ff_mmx_idct(DCTELEM *block);
|
||||||
void ff_mmxext_idct(DCTELEM *block);
|
void ff_mmxext_idct(DCTELEM *block);
|
||||||
|
|
||||||
declare_idct (ff_mmxext_idct, mmxext_table,
|
declare_idct (ff_mmxext_idct, mmxext_table,
|
||||||
mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
|
mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
|
||||||
|
|
||||||
declare_idct (ff_mmx_idct, mmx_table,
|
declare_idct (ff_mmx_idct, mmx_table,
|
||||||
mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
|
mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
|
||||||
|
|
||||||
|
@ -27,257 +27,257 @@
|
|||||||
* values by ULL, lest they be truncated by the compiler)
|
* values by ULL, lest they be truncated by the compiler)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
typedef union {
|
typedef union {
|
||||||
long long q; /* Quadword (64-bit) value */
|
long long q; /* Quadword (64-bit) value */
|
||||||
unsigned long long uq; /* Unsigned Quadword */
|
unsigned long long uq; /* Unsigned Quadword */
|
||||||
int d[2]; /* 2 Doubleword (32-bit) values */
|
int d[2]; /* 2 Doubleword (32-bit) values */
|
||||||
unsigned int ud[2]; /* 2 Unsigned Doubleword */
|
unsigned int ud[2]; /* 2 Unsigned Doubleword */
|
||||||
short w[4]; /* 4 Word (16-bit) values */
|
short w[4]; /* 4 Word (16-bit) values */
|
||||||
unsigned short uw[4]; /* 4 Unsigned Word */
|
unsigned short uw[4]; /* 4 Unsigned Word */
|
||||||
char b[8]; /* 8 Byte (8-bit) values */
|
char b[8]; /* 8 Byte (8-bit) values */
|
||||||
unsigned char ub[8]; /* 8 Unsigned Byte */
|
unsigned char ub[8]; /* 8 Unsigned Byte */
|
||||||
float s[2]; /* Single-precision (32-bit) value */
|
float s[2]; /* Single-precision (32-bit) value */
|
||||||
} mmx_t; /* On an 8-byte (64-bit) boundary */
|
} mmx_t; /* On an 8-byte (64-bit) boundary */
|
||||||
|
|
||||||
|
|
||||||
#define mmx_i2r(op,imm,reg) \
|
#define mmx_i2r(op,imm,reg) \
|
||||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||||
: /* nothing */ \
|
: /* nothing */ \
|
||||||
: "i" (imm) )
|
: "i" (imm) )
|
||||||
|
|
||||||
#define mmx_m2r(op,mem,reg) \
|
#define mmx_m2r(op,mem,reg) \
|
||||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||||
: /* nothing */ \
|
: /* nothing */ \
|
||||||
: "m" (mem))
|
: "m" (mem))
|
||||||
|
|
||||||
#define mmx_r2m(op,reg,mem) \
|
#define mmx_r2m(op,reg,mem) \
|
||||||
__asm__ __volatile__ (#op " %%" #reg ", %0" \
|
__asm__ __volatile__ (#op " %%" #reg ", %0" \
|
||||||
: "=m" (mem) \
|
: "=m" (mem) \
|
||||||
: /* nothing */ )
|
: /* nothing */ )
|
||||||
|
|
||||||
#define mmx_r2r(op,regs,regd) \
|
#define mmx_r2r(op,regs,regd) \
|
||||||
__asm__ __volatile__ (#op " %" #regs ", %" #regd)
|
__asm__ __volatile__ (#op " %" #regs ", %" #regd)
|
||||||
|
|
||||||
|
|
||||||
#define emms() __asm__ __volatile__ ("emms")
|
#define emms() __asm__ __volatile__ ("emms")
|
||||||
|
|
||||||
#define movd_m2r(var,reg) mmx_m2r (movd, var, reg)
|
#define movd_m2r(var,reg) mmx_m2r (movd, var, reg)
|
||||||
#define movd_r2m(reg,var) mmx_r2m (movd, reg, var)
|
#define movd_r2m(reg,var) mmx_r2m (movd, reg, var)
|
||||||
#define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd)
|
#define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd)
|
||||||
|
|
||||||
#define movq_m2r(var,reg) mmx_m2r (movq, var, reg)
|
#define movq_m2r(var,reg) mmx_m2r (movq, var, reg)
|
||||||
#define movq_r2m(reg,var) mmx_r2m (movq, reg, var)
|
#define movq_r2m(reg,var) mmx_r2m (movq, reg, var)
|
||||||
#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd)
|
#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd)
|
||||||
|
|
||||||
#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg)
|
#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg)
|
||||||
#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd)
|
#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd)
|
||||||
#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg)
|
#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg)
|
||||||
#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd)
|
#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd)
|
||||||
|
|
||||||
#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg)
|
#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg)
|
||||||
#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd)
|
#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd)
|
||||||
|
|
||||||
#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg)
|
#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg)
|
||||||
#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd)
|
#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd)
|
||||||
#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg)
|
#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg)
|
||||||
#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd)
|
#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd)
|
||||||
#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg)
|
#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg)
|
||||||
#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd)
|
#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd)
|
||||||
|
|
||||||
#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg)
|
#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg)
|
||||||
#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd)
|
#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd)
|
||||||
#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg)
|
#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg)
|
||||||
#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd)
|
#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd)
|
||||||
|
|
||||||
#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg)
|
#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg)
|
||||||
#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd)
|
#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd)
|
||||||
#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg)
|
#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg)
|
||||||
#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd)
|
#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd)
|
||||||
|
|
||||||
#define pand_m2r(var,reg) mmx_m2r (pand, var, reg)
|
#define pand_m2r(var,reg) mmx_m2r (pand, var, reg)
|
||||||
#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd)
|
#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd)
|
||||||
|
|
||||||
#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg)
|
#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg)
|
||||||
#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd)
|
#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd)
|
||||||
|
|
||||||
#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg)
|
#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg)
|
||||||
#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd)
|
#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd)
|
||||||
#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg)
|
#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg)
|
||||||
#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd)
|
#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd)
|
||||||
#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg)
|
#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg)
|
||||||
#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd)
|
#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd)
|
||||||
|
|
||||||
#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg)
|
#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg)
|
||||||
#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd)
|
#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd)
|
||||||
#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg)
|
#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg)
|
||||||
#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd)
|
#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd)
|
||||||
#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg)
|
#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg)
|
||||||
#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd)
|
#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd)
|
||||||
|
|
||||||
#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg)
|
#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg)
|
||||||
#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd)
|
#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd)
|
||||||
|
|
||||||
#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg)
|
#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg)
|
||||||
#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd)
|
#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd)
|
||||||
|
|
||||||
#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg)
|
#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg)
|
||||||
#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd)
|
#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd)
|
||||||
|
|
||||||
#define por_m2r(var,reg) mmx_m2r (por, var, reg)
|
#define por_m2r(var,reg) mmx_m2r (por, var, reg)
|
||||||
#define por_r2r(regs,regd) mmx_r2r (por, regs, regd)
|
#define por_r2r(regs,regd) mmx_r2r (por, regs, regd)
|
||||||
|
|
||||||
#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg)
|
#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg)
|
||||||
#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg)
|
#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg)
|
||||||
#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd)
|
#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd)
|
||||||
#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg)
|
#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg)
|
||||||
#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg)
|
#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg)
|
||||||
#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd)
|
#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd)
|
||||||
#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg)
|
#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg)
|
||||||
#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg)
|
#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg)
|
||||||
#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd)
|
#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd)
|
||||||
|
|
||||||
#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg)
|
#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg)
|
||||||
#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg)
|
#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg)
|
||||||
#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd)
|
#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd)
|
||||||
#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg)
|
#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg)
|
||||||
#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg)
|
#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg)
|
||||||
#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd)
|
#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd)
|
||||||
|
|
||||||
#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg)
|
#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg)
|
||||||
#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg)
|
#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg)
|
||||||
#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd)
|
#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd)
|
||||||
#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg)
|
#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg)
|
||||||
#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg)
|
#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg)
|
||||||
#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd)
|
#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd)
|
||||||
#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg)
|
#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg)
|
||||||
#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg)
|
#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg)
|
||||||
#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd)
|
#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd)
|
||||||
|
|
||||||
#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg)
|
#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg)
|
||||||
#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd)
|
#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd)
|
||||||
#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg)
|
#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg)
|
||||||
#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd)
|
#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd)
|
||||||
#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg)
|
#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg)
|
||||||
#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd)
|
#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd)
|
||||||
|
|
||||||
#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg)
|
#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg)
|
||||||
#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd)
|
#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd)
|
||||||
#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg)
|
#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg)
|
||||||
#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd)
|
#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd)
|
||||||
|
|
||||||
#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg)
|
#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg)
|
||||||
#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd)
|
#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd)
|
||||||
#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg)
|
#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg)
|
||||||
#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd)
|
#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd)
|
||||||
|
|
||||||
#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg)
|
#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg)
|
||||||
#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd)
|
#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd)
|
||||||
#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg)
|
#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg)
|
||||||
#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd)
|
#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd)
|
||||||
#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg)
|
#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg)
|
||||||
#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd)
|
#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd)
|
||||||
|
|
||||||
#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg)
|
#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg)
|
||||||
#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd)
|
#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd)
|
||||||
#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg)
|
#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg)
|
||||||
#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd)
|
#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd)
|
||||||
#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg)
|
#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg)
|
||||||
#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd)
|
#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd)
|
||||||
|
|
||||||
#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg)
|
#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg)
|
||||||
#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd)
|
#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd)
|
||||||
|
|
||||||
|
|
||||||
/* 3DNOW extensions */
|
/* 3DNOW extensions */
|
||||||
|
|
||||||
#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg)
|
#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg)
|
||||||
#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd)
|
#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd)
|
||||||
|
|
||||||
|
|
||||||
/* AMD MMX extensions - also available in intel SSE */
|
/* AMD MMX extensions - also available in intel SSE */
|
||||||
|
|
||||||
|
|
||||||
#define mmx_m2ri(op,mem,reg,imm) \
|
#define mmx_m2ri(op,mem,reg,imm) \
|
||||||
__asm__ __volatile__ (#op " %1, %0, %%" #reg \
|
__asm__ __volatile__ (#op " %1, %0, %%" #reg \
|
||||||
: /* nothing */ \
|
: /* nothing */ \
|
||||||
: "X" (mem), "X" (imm))
|
: "X" (mem), "X" (imm))
|
||||||
#define mmx_r2ri(op,regs,regd,imm) \
|
#define mmx_r2ri(op,regs,regd,imm) \
|
||||||
__asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
|
__asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
|
||||||
: /* nothing */ \
|
: /* nothing */ \
|
||||||
: "X" (imm) )
|
: "X" (imm) )
|
||||||
|
|
||||||
#define mmx_fetch(mem,hint) \
|
#define mmx_fetch(mem,hint) \
|
||||||
__asm__ __volatile__ ("prefetch" #hint " %0" \
|
__asm__ __volatile__ ("prefetch" #hint " %0" \
|
||||||
: /* nothing */ \
|
: /* nothing */ \
|
||||||
: "X" (mem))
|
: "X" (mem))
|
||||||
|
|
||||||
|
|
||||||
#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg)
|
#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg)
|
||||||
|
|
||||||
#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var)
|
#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var)
|
||||||
|
|
||||||
#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg)
|
#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg)
|
||||||
#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd)
|
#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd)
|
||||||
#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg)
|
#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg)
|
||||||
#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd)
|
#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd)
|
||||||
|
|
||||||
#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm)
|
#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm)
|
||||||
|
|
||||||
#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm)
|
#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm)
|
||||||
|
|
||||||
#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg)
|
#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg)
|
||||||
#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd)
|
#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd)
|
||||||
|
|
||||||
#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg)
|
#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg)
|
||||||
#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd)
|
#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd)
|
||||||
|
|
||||||
#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg)
|
#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg)
|
||||||
#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd)
|
#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd)
|
||||||
|
|
||||||
#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg)
|
#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg)
|
||||||
#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd)
|
#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd)
|
||||||
|
|
||||||
#define pmovmskb(mmreg,reg) \
|
#define pmovmskb(mmreg,reg) \
|
||||||
__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
|
__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
|
||||||
|
|
||||||
#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg)
|
#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg)
|
||||||
#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd)
|
#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd)
|
||||||
|
|
||||||
#define prefetcht0(mem) mmx_fetch (mem, t0)
|
#define prefetcht0(mem) mmx_fetch (mem, t0)
|
||||||
#define prefetcht1(mem) mmx_fetch (mem, t1)
|
#define prefetcht1(mem) mmx_fetch (mem, t1)
|
||||||
#define prefetcht2(mem) mmx_fetch (mem, t2)
|
#define prefetcht2(mem) mmx_fetch (mem, t2)
|
||||||
#define prefetchnta(mem) mmx_fetch (mem, nta)
|
#define prefetchnta(mem) mmx_fetch (mem, nta)
|
||||||
|
|
||||||
#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg)
|
#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg)
|
||||||
#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd)
|
#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd)
|
||||||
|
|
||||||
#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm)
|
#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm)
|
||||||
#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm)
|
#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm)
|
||||||
|
|
||||||
#define sfence() __asm__ __volatile__ ("sfence\n\t")
|
#define sfence() __asm__ __volatile__ ("sfence\n\t")
|
||||||
|
|
||||||
/* SSE2 */
|
/* SSE2 */
|
||||||
#define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm)
|
#define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm)
|
||||||
#define pshufhw_r2r(regs,regd,imm) mmx_r2ri(pshufhw, regs, regd, imm)
|
#define pshufhw_r2r(regs,regd,imm) mmx_r2ri(pshufhw, regs, regd, imm)
|
||||||
#define pshuflw_m2r(var,reg,imm) mmx_m2ri(pshuflw, var, reg, imm)
|
#define pshuflw_m2r(var,reg,imm) mmx_m2ri(pshuflw, var, reg, imm)
|
||||||
#define pshuflw_r2r(regs,regd,imm) mmx_r2ri(pshuflw, regs, regd, imm)
|
#define pshuflw_r2r(regs,regd,imm) mmx_r2ri(pshuflw, regs, regd, imm)
|
||||||
|
|
||||||
#define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm)
|
#define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm)
|
||||||
|
|
||||||
#define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg)
|
#define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg)
|
||||||
#define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var)
|
#define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var)
|
||||||
#define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd)
|
#define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd)
|
||||||
#define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg)
|
#define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg)
|
||||||
#define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var)
|
#define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var)
|
||||||
#define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd)
|
#define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd)
|
||||||
|
|
||||||
#define pmullw_r2m(reg,var) mmx_r2m (pmullw, reg, var)
|
#define pmullw_r2m(reg,var) mmx_r2m (pmullw, reg, var)
|
||||||
|
|
||||||
#define pslldq_i2r(imm,reg) mmx_i2r (pslldq, imm, reg)
|
#define pslldq_i2r(imm,reg) mmx_i2r (pslldq, imm, reg)
|
||||||
#define psrldq_i2r(imm,reg) mmx_i2r (psrldq, imm, reg)
|
#define psrldq_i2r(imm,reg) mmx_i2r (psrldq, imm, reg)
|
||||||
|
|
||||||
#define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd)
|
#define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd)
|
||||||
#define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd)
|
#define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd)
|
||||||
|
|
||||||
|
|
||||||
#endif /* AVCODEC_I386MMX_H */
|
#endif /* AVCODEC_I386MMX_H */
|
||||||
|
@ -34,33 +34,33 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
|
|||||||
{
|
{
|
||||||
long len= -(stride*h);
|
long len= -(stride*h);
|
||||||
asm volatile(
|
asm volatile(
|
||||||
".balign 16 \n\t"
|
".balign 16 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||||
"movq (%2, %%"REG_a"), %%mm2 \n\t"
|
"movq (%2, %%"REG_a"), %%mm2 \n\t"
|
||||||
"movq (%2, %%"REG_a"), %%mm4 \n\t"
|
"movq (%2, %%"REG_a"), %%mm4 \n\t"
|
||||||
"add %3, %%"REG_a" \n\t"
|
"add %3, %%"REG_a" \n\t"
|
||||||
"psubusb %%mm0, %%mm2 \n\t"
|
"psubusb %%mm0, %%mm2 \n\t"
|
||||||
"psubusb %%mm4, %%mm0 \n\t"
|
"psubusb %%mm4, %%mm0 \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
||||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||||
"movq (%2, %%"REG_a"), %%mm5 \n\t"
|
"movq (%2, %%"REG_a"), %%mm5 \n\t"
|
||||||
"psubusb %%mm1, %%mm3 \n\t"
|
"psubusb %%mm1, %%mm3 \n\t"
|
||||||
"psubusb %%mm5, %%mm1 \n\t"
|
"psubusb %%mm5, %%mm1 \n\t"
|
||||||
"por %%mm2, %%mm0 \n\t"
|
"por %%mm2, %%mm0 \n\t"
|
||||||
"por %%mm1, %%mm3 \n\t"
|
"por %%mm1, %%mm3 \n\t"
|
||||||
"movq %%mm0, %%mm1 \n\t"
|
"movq %%mm0, %%mm1 \n\t"
|
||||||
"movq %%mm3, %%mm2 \n\t"
|
"movq %%mm3, %%mm2 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm3 \n\t"
|
"punpcklbw %%mm7, %%mm3 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm2 \n\t"
|
"punpckhbw %%mm7, %%mm2 \n\t"
|
||||||
"paddw %%mm1, %%mm0 \n\t"
|
"paddw %%mm1, %%mm0 \n\t"
|
||||||
"paddw %%mm3, %%mm2 \n\t"
|
"paddw %%mm3, %%mm2 \n\t"
|
||||||
"paddw %%mm2, %%mm0 \n\t"
|
"paddw %%mm2, %%mm0 \n\t"
|
||||||
"paddw %%mm0, %%mm6 \n\t"
|
"paddw %%mm0, %%mm6 \n\t"
|
||||||
"add %3, %%"REG_a" \n\t"
|
"add %3, %%"REG_a" \n\t"
|
||||||
" js 1b \n\t"
|
" js 1b \n\t"
|
||||||
: "+a" (len)
|
: "+a" (len)
|
||||||
: "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride)
|
: "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride)
|
||||||
);
|
);
|
||||||
@ -70,19 +70,19 @@ static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
|
|||||||
{
|
{
|
||||||
long len= -(stride*h);
|
long len= -(stride*h);
|
||||||
asm volatile(
|
asm volatile(
|
||||||
".balign 16 \n\t"
|
".balign 16 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||||
"movq (%2, %%"REG_a"), %%mm2 \n\t"
|
"movq (%2, %%"REG_a"), %%mm2 \n\t"
|
||||||
"psadbw %%mm2, %%mm0 \n\t"
|
"psadbw %%mm2, %%mm0 \n\t"
|
||||||
"add %3, %%"REG_a" \n\t"
|
"add %3, %%"REG_a" \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
||||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||||
"psadbw %%mm1, %%mm3 \n\t"
|
"psadbw %%mm1, %%mm3 \n\t"
|
||||||
"paddw %%mm3, %%mm0 \n\t"
|
"paddw %%mm3, %%mm0 \n\t"
|
||||||
"paddw %%mm0, %%mm6 \n\t"
|
"paddw %%mm0, %%mm6 \n\t"
|
||||||
"add %3, %%"REG_a" \n\t"
|
"add %3, %%"REG_a" \n\t"
|
||||||
" js 1b \n\t"
|
" js 1b \n\t"
|
||||||
: "+a" (len)
|
: "+a" (len)
|
||||||
: "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride)
|
: "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride)
|
||||||
);
|
);
|
||||||
@ -92,23 +92,23 @@ static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, in
|
|||||||
{
|
{
|
||||||
long len= -(stride*h);
|
long len= -(stride*h);
|
||||||
asm volatile(
|
asm volatile(
|
||||||
".balign 16 \n\t"
|
".balign 16 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||||
"movq (%2, %%"REG_a"), %%mm2 \n\t"
|
"movq (%2, %%"REG_a"), %%mm2 \n\t"
|
||||||
"pavgb %%mm2, %%mm0 \n\t"
|
"pavgb %%mm2, %%mm0 \n\t"
|
||||||
"movq (%3, %%"REG_a"), %%mm2 \n\t"
|
"movq (%3, %%"REG_a"), %%mm2 \n\t"
|
||||||
"psadbw %%mm2, %%mm0 \n\t"
|
"psadbw %%mm2, %%mm0 \n\t"
|
||||||
"add %4, %%"REG_a" \n\t"
|
"add %4, %%"REG_a" \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
||||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||||
"pavgb %%mm1, %%mm3 \n\t"
|
"pavgb %%mm1, %%mm3 \n\t"
|
||||||
"movq (%3, %%"REG_a"), %%mm1 \n\t"
|
"movq (%3, %%"REG_a"), %%mm1 \n\t"
|
||||||
"psadbw %%mm1, %%mm3 \n\t"
|
"psadbw %%mm1, %%mm3 \n\t"
|
||||||
"paddw %%mm3, %%mm0 \n\t"
|
"paddw %%mm3, %%mm0 \n\t"
|
||||||
"paddw %%mm0, %%mm6 \n\t"
|
"paddw %%mm0, %%mm6 \n\t"
|
||||||
"add %4, %%"REG_a" \n\t"
|
"add %4, %%"REG_a" \n\t"
|
||||||
" js 1b \n\t"
|
" js 1b \n\t"
|
||||||
: "+a" (len)
|
: "+a" (len)
|
||||||
: "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride)
|
: "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride)
|
||||||
);
|
);
|
||||||
@ -118,34 +118,34 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
|
|||||||
{ //FIXME reuse src
|
{ //FIXME reuse src
|
||||||
long len= -(stride*h);
|
long len= -(stride*h);
|
||||||
asm volatile(
|
asm volatile(
|
||||||
".balign 16 \n\t"
|
".balign 16 \n\t"
|
||||||
"movq "MANGLE(bone)", %%mm5 \n\t"
|
"movq "MANGLE(bone)", %%mm5 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||||
"movq (%2, %%"REG_a"), %%mm2 \n\t"
|
"movq (%2, %%"REG_a"), %%mm2 \n\t"
|
||||||
"movq 1(%1, %%"REG_a"), %%mm1 \n\t"
|
"movq 1(%1, %%"REG_a"), %%mm1 \n\t"
|
||||||
"movq 1(%2, %%"REG_a"), %%mm3 \n\t"
|
"movq 1(%2, %%"REG_a"), %%mm3 \n\t"
|
||||||
"pavgb %%mm2, %%mm0 \n\t"
|
"pavgb %%mm2, %%mm0 \n\t"
|
||||||
"pavgb %%mm1, %%mm3 \n\t"
|
"pavgb %%mm1, %%mm3 \n\t"
|
||||||
"psubusb %%mm5, %%mm3 \n\t"
|
"psubusb %%mm5, %%mm3 \n\t"
|
||||||
"pavgb %%mm3, %%mm0 \n\t"
|
"pavgb %%mm3, %%mm0 \n\t"
|
||||||
"movq (%3, %%"REG_a"), %%mm2 \n\t"
|
"movq (%3, %%"REG_a"), %%mm2 \n\t"
|
||||||
"psadbw %%mm2, %%mm0 \n\t"
|
"psadbw %%mm2, %%mm0 \n\t"
|
||||||
"add %4, %%"REG_a" \n\t"
|
"add %4, %%"REG_a" \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
||||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||||
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
|
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
|
||||||
"movq 1(%2, %%"REG_a"), %%mm4 \n\t"
|
"movq 1(%2, %%"REG_a"), %%mm4 \n\t"
|
||||||
"pavgb %%mm3, %%mm1 \n\t"
|
"pavgb %%mm3, %%mm1 \n\t"
|
||||||
"pavgb %%mm4, %%mm2 \n\t"
|
"pavgb %%mm4, %%mm2 \n\t"
|
||||||
"psubusb %%mm5, %%mm2 \n\t"
|
"psubusb %%mm5, %%mm2 \n\t"
|
||||||
"pavgb %%mm1, %%mm2 \n\t"
|
"pavgb %%mm1, %%mm2 \n\t"
|
||||||
"movq (%3, %%"REG_a"), %%mm1 \n\t"
|
"movq (%3, %%"REG_a"), %%mm1 \n\t"
|
||||||
"psadbw %%mm1, %%mm2 \n\t"
|
"psadbw %%mm1, %%mm2 \n\t"
|
||||||
"paddw %%mm2, %%mm0 \n\t"
|
"paddw %%mm2, %%mm0 \n\t"
|
||||||
"paddw %%mm0, %%mm6 \n\t"
|
"paddw %%mm0, %%mm6 \n\t"
|
||||||
"add %4, %%"REG_a" \n\t"
|
"add %4, %%"REG_a" \n\t"
|
||||||
" js 1b \n\t"
|
" js 1b \n\t"
|
||||||
: "+a" (len)
|
: "+a" (len)
|
||||||
: "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride)
|
: "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride)
|
||||||
);
|
);
|
||||||
@ -155,35 +155,35 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int
|
|||||||
{
|
{
|
||||||
long len= -(stride*h);
|
long len= -(stride*h);
|
||||||
asm volatile(
|
asm volatile(
|
||||||
".balign 16 \n\t"
|
".balign 16 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||||
"movq (%2, %%"REG_a"), %%mm1 \n\t"
|
"movq (%2, %%"REG_a"), %%mm1 \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm2 \n\t"
|
"movq (%1, %%"REG_a"), %%mm2 \n\t"
|
||||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm1 \n\t"
|
"punpcklbw %%mm7, %%mm1 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm2 \n\t"
|
"punpckhbw %%mm7, %%mm2 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm3 \n\t"
|
"punpckhbw %%mm7, %%mm3 \n\t"
|
||||||
"paddw %%mm0, %%mm1 \n\t"
|
"paddw %%mm0, %%mm1 \n\t"
|
||||||
"paddw %%mm2, %%mm3 \n\t"
|
"paddw %%mm2, %%mm3 \n\t"
|
||||||
"movq (%3, %%"REG_a"), %%mm4 \n\t"
|
"movq (%3, %%"REG_a"), %%mm4 \n\t"
|
||||||
"movq (%3, %%"REG_a"), %%mm2 \n\t"
|
"movq (%3, %%"REG_a"), %%mm2 \n\t"
|
||||||
"paddw %%mm5, %%mm1 \n\t"
|
"paddw %%mm5, %%mm1 \n\t"
|
||||||
"paddw %%mm5, %%mm3 \n\t"
|
"paddw %%mm5, %%mm3 \n\t"
|
||||||
"psrlw $1, %%mm1 \n\t"
|
"psrlw $1, %%mm1 \n\t"
|
||||||
"psrlw $1, %%mm3 \n\t"
|
"psrlw $1, %%mm3 \n\t"
|
||||||
"packuswb %%mm3, %%mm1 \n\t"
|
"packuswb %%mm3, %%mm1 \n\t"
|
||||||
"psubusb %%mm1, %%mm4 \n\t"
|
"psubusb %%mm1, %%mm4 \n\t"
|
||||||
"psubusb %%mm2, %%mm1 \n\t"
|
"psubusb %%mm2, %%mm1 \n\t"
|
||||||
"por %%mm4, %%mm1 \n\t"
|
"por %%mm4, %%mm1 \n\t"
|
||||||
"movq %%mm1, %%mm0 \n\t"
|
"movq %%mm1, %%mm0 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||||
"paddw %%mm1, %%mm0 \n\t"
|
"paddw %%mm1, %%mm0 \n\t"
|
||||||
"paddw %%mm0, %%mm6 \n\t"
|
"paddw %%mm0, %%mm6 \n\t"
|
||||||
"add %4, %%"REG_a" \n\t"
|
"add %4, %%"REG_a" \n\t"
|
||||||
" js 1b \n\t"
|
" js 1b \n\t"
|
||||||
: "+a" (len)
|
: "+a" (len)
|
||||||
: "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride)
|
: "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride)
|
||||||
);
|
);
|
||||||
@ -193,47 +193,47 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
|
|||||||
{
|
{
|
||||||
long len= -(stride*h);
|
long len= -(stride*h);
|
||||||
asm volatile(
|
asm volatile(
|
||||||
".balign 16 \n\t"
|
".balign 16 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||||
"movq (%2, %%"REG_a"), %%mm1 \n\t"
|
"movq (%2, %%"REG_a"), %%mm1 \n\t"
|
||||||
"movq %%mm0, %%mm4 \n\t"
|
"movq %%mm0, %%mm4 \n\t"
|
||||||
"movq %%mm1, %%mm2 \n\t"
|
"movq %%mm1, %%mm2 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm1 \n\t"
|
"punpcklbw %%mm7, %%mm1 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm4 \n\t"
|
"punpckhbw %%mm7, %%mm4 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm2 \n\t"
|
"punpckhbw %%mm7, %%mm2 \n\t"
|
||||||
"paddw %%mm1, %%mm0 \n\t"
|
"paddw %%mm1, %%mm0 \n\t"
|
||||||
"paddw %%mm2, %%mm4 \n\t"
|
"paddw %%mm2, %%mm4 \n\t"
|
||||||
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
|
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
|
||||||
"movq 1(%2, %%"REG_a"), %%mm3 \n\t"
|
"movq 1(%2, %%"REG_a"), %%mm3 \n\t"
|
||||||
"movq %%mm2, %%mm1 \n\t"
|
"movq %%mm2, %%mm1 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||||
"paddw %%mm0, %%mm2 \n\t"
|
"paddw %%mm0, %%mm2 \n\t"
|
||||||
"paddw %%mm4, %%mm1 \n\t"
|
"paddw %%mm4, %%mm1 \n\t"
|
||||||
"movq %%mm3, %%mm4 \n\t"
|
"movq %%mm3, %%mm4 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm3 \n\t"
|
"punpcklbw %%mm7, %%mm3 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm4 \n\t"
|
"punpckhbw %%mm7, %%mm4 \n\t"
|
||||||
"paddw %%mm3, %%mm2 \n\t"
|
"paddw %%mm3, %%mm2 \n\t"
|
||||||
"paddw %%mm4, %%mm1 \n\t"
|
"paddw %%mm4, %%mm1 \n\t"
|
||||||
"movq (%3, %%"REG_a"), %%mm3 \n\t"
|
"movq (%3, %%"REG_a"), %%mm3 \n\t"
|
||||||
"movq (%3, %%"REG_a"), %%mm4 \n\t"
|
"movq (%3, %%"REG_a"), %%mm4 \n\t"
|
||||||
"paddw %%mm5, %%mm2 \n\t"
|
"paddw %%mm5, %%mm2 \n\t"
|
||||||
"paddw %%mm5, %%mm1 \n\t"
|
"paddw %%mm5, %%mm1 \n\t"
|
||||||
"psrlw $2, %%mm2 \n\t"
|
"psrlw $2, %%mm2 \n\t"
|
||||||
"psrlw $2, %%mm1 \n\t"
|
"psrlw $2, %%mm1 \n\t"
|
||||||
"packuswb %%mm1, %%mm2 \n\t"
|
"packuswb %%mm1, %%mm2 \n\t"
|
||||||
"psubusb %%mm2, %%mm3 \n\t"
|
"psubusb %%mm2, %%mm3 \n\t"
|
||||||
"psubusb %%mm4, %%mm2 \n\t"
|
"psubusb %%mm4, %%mm2 \n\t"
|
||||||
"por %%mm3, %%mm2 \n\t"
|
"por %%mm3, %%mm2 \n\t"
|
||||||
"movq %%mm2, %%mm0 \n\t"
|
"movq %%mm2, %%mm0 \n\t"
|
||||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||||
"punpckhbw %%mm7, %%mm2 \n\t"
|
"punpckhbw %%mm7, %%mm2 \n\t"
|
||||||
"paddw %%mm2, %%mm0 \n\t"
|
"paddw %%mm2, %%mm0 \n\t"
|
||||||
"paddw %%mm0, %%mm6 \n\t"
|
"paddw %%mm0, %%mm6 \n\t"
|
||||||
"add %4, %%"REG_a" \n\t"
|
"add %4, %%"REG_a" \n\t"
|
||||||
" js 1b \n\t"
|
" js 1b \n\t"
|
||||||
: "+a" (len)
|
: "+a" (len)
|
||||||
: "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride)
|
: "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride)
|
||||||
);
|
);
|
||||||
@ -243,13 +243,13 @@ static inline int sum_mmx(void)
|
|||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movq %%mm6, %%mm0 \n\t"
|
"movq %%mm6, %%mm0 \n\t"
|
||||||
"psrlq $32, %%mm6 \n\t"
|
"psrlq $32, %%mm6 \n\t"
|
||||||
"paddw %%mm0, %%mm6 \n\t"
|
"paddw %%mm0, %%mm6 \n\t"
|
||||||
"movq %%mm6, %%mm0 \n\t"
|
"movq %%mm6, %%mm0 \n\t"
|
||||||
"psrlq $16, %%mm6 \n\t"
|
"psrlq $16, %%mm6 \n\t"
|
||||||
"paddw %%mm0, %%mm6 \n\t"
|
"paddw %%mm0, %%mm6 \n\t"
|
||||||
"movd %%mm6, %0 \n\t"
|
"movd %%mm6, %0 \n\t"
|
||||||
: "=r" (ret)
|
: "=r" (ret)
|
||||||
);
|
);
|
||||||
return ret&0xFFFF;
|
return ret&0xFFFF;
|
||||||
@ -259,7 +259,7 @@ static inline int sum_mmx2(void)
|
|||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movd %%mm6, %0 \n\t"
|
"movd %%mm6, %0 \n\t"
|
||||||
: "=r" (ret)
|
: "=r" (ret)
|
||||||
);
|
);
|
||||||
return ret;
|
return ret;
|
||||||
@ -270,8 +270,8 @@ static inline int sum_mmx2(void)
|
|||||||
static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
||||||
{\
|
{\
|
||||||
assert(h==8);\
|
assert(h==8);\
|
||||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||||
"pxor %%mm6, %%mm6 \n\t":);\
|
"pxor %%mm6, %%mm6 \n\t":);\
|
||||||
\
|
\
|
||||||
sad8_1_ ## suf(blk1, blk2, stride, 8);\
|
sad8_1_ ## suf(blk1, blk2, stride, 8);\
|
||||||
\
|
\
|
||||||
@ -280,9 +280,9 @@ static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h
|
|||||||
static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
||||||
{\
|
{\
|
||||||
assert(h==8);\
|
assert(h==8);\
|
||||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||||
"pxor %%mm6, %%mm6 \n\t"\
|
"pxor %%mm6, %%mm6 \n\t"\
|
||||||
"movq %0, %%mm5 \n\t"\
|
"movq %0, %%mm5 \n\t"\
|
||||||
:: "m"(round_tab[1]) \
|
:: "m"(round_tab[1]) \
|
||||||
);\
|
);\
|
||||||
\
|
\
|
||||||
@ -294,9 +294,9 @@ static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in
|
|||||||
static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
||||||
{\
|
{\
|
||||||
assert(h==8);\
|
assert(h==8);\
|
||||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||||
"pxor %%mm6, %%mm6 \n\t"\
|
"pxor %%mm6, %%mm6 \n\t"\
|
||||||
"movq %0, %%mm5 \n\t"\
|
"movq %0, %%mm5 \n\t"\
|
||||||
:: "m"(round_tab[1]) \
|
:: "m"(round_tab[1]) \
|
||||||
);\
|
);\
|
||||||
\
|
\
|
||||||
@ -308,9 +308,9 @@ static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in
|
|||||||
static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
||||||
{\
|
{\
|
||||||
assert(h==8);\
|
assert(h==8);\
|
||||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||||
"pxor %%mm6, %%mm6 \n\t"\
|
"pxor %%mm6, %%mm6 \n\t"\
|
||||||
"movq %0, %%mm5 \n\t"\
|
"movq %0, %%mm5 \n\t"\
|
||||||
:: "m"(round_tab[2]) \
|
:: "m"(round_tab[2]) \
|
||||||
);\
|
);\
|
||||||
\
|
\
|
||||||
@ -321,8 +321,8 @@ static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i
|
|||||||
\
|
\
|
||||||
static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
||||||
{\
|
{\
|
||||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||||
"pxor %%mm6, %%mm6 \n\t":);\
|
"pxor %%mm6, %%mm6 \n\t":);\
|
||||||
\
|
\
|
||||||
sad8_1_ ## suf(blk1 , blk2 , stride, h);\
|
sad8_1_ ## suf(blk1 , blk2 , stride, h);\
|
||||||
sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\
|
sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\
|
||||||
@ -331,9 +331,9 @@ static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int
|
|||||||
}\
|
}\
|
||||||
static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
||||||
{\
|
{\
|
||||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||||
"pxor %%mm6, %%mm6 \n\t"\
|
"pxor %%mm6, %%mm6 \n\t"\
|
||||||
"movq %0, %%mm5 \n\t"\
|
"movq %0, %%mm5 \n\t"\
|
||||||
:: "m"(round_tab[1]) \
|
:: "m"(round_tab[1]) \
|
||||||
);\
|
);\
|
||||||
\
|
\
|
||||||
@ -344,9 +344,9 @@ static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i
|
|||||||
}\
|
}\
|
||||||
static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
||||||
{\
|
{\
|
||||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||||
"pxor %%mm6, %%mm6 \n\t"\
|
"pxor %%mm6, %%mm6 \n\t"\
|
||||||
"movq %0, %%mm5 \n\t"\
|
"movq %0, %%mm5 \n\t"\
|
||||||
:: "m"(round_tab[1]) \
|
:: "m"(round_tab[1]) \
|
||||||
);\
|
);\
|
||||||
\
|
\
|
||||||
@ -357,9 +357,9 @@ static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i
|
|||||||
}\
|
}\
|
||||||
static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
||||||
{\
|
{\
|
||||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||||
"pxor %%mm6, %%mm6 \n\t"\
|
"pxor %%mm6, %%mm6 \n\t"\
|
||||||
"movq %0, %%mm5 \n\t"\
|
"movq %0, %%mm5 \n\t"\
|
||||||
:: "m"(round_tab[2]) \
|
:: "m"(round_tab[2]) \
|
||||||
);\
|
);\
|
||||||
\
|
\
|
||||||
@ -384,15 +384,15 @@ void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->pix_abs[1][2] = sad8_y2_mmx;
|
c->pix_abs[1][2] = sad8_y2_mmx;
|
||||||
c->pix_abs[1][3] = sad8_xy2_mmx;
|
c->pix_abs[1][3] = sad8_xy2_mmx;
|
||||||
|
|
||||||
c->sad[0]= sad16_mmx;
|
c->sad[0]= sad16_mmx;
|
||||||
c->sad[1]= sad8_mmx;
|
c->sad[1]= sad8_mmx;
|
||||||
}
|
}
|
||||||
if (mm_flags & MM_MMXEXT) {
|
if (mm_flags & MM_MMXEXT) {
|
||||||
c->pix_abs[0][0] = sad16_mmx2;
|
c->pix_abs[0][0] = sad16_mmx2;
|
||||||
c->pix_abs[1][0] = sad8_mmx2;
|
c->pix_abs[1][0] = sad8_mmx2;
|
||||||
|
|
||||||
c->sad[0]= sad16_mmx2;
|
c->sad[0]= sad16_mmx2;
|
||||||
c->sad[1]= sad8_mmx2;
|
c->sad[1]= sad8_mmx2;
|
||||||
|
|
||||||
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
||||||
c->pix_abs[0][1] = sad16_x2_mmx2;
|
c->pix_abs[0][1] = sad16_x2_mmx2;
|
||||||
|
@ -57,52 +57,52 @@ static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
|
|||||||
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
|
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
|
||||||
//printf("%d %d ", qmul, qadd);
|
//printf("%d %d ", qmul, qadd);
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movd %1, %%mm6 \n\t" //qmul
|
"movd %1, %%mm6 \n\t" //qmul
|
||||||
"packssdw %%mm6, %%mm6 \n\t"
|
"packssdw %%mm6, %%mm6 \n\t"
|
||||||
"packssdw %%mm6, %%mm6 \n\t"
|
"packssdw %%mm6, %%mm6 \n\t"
|
||||||
"movd %2, %%mm5 \n\t" //qadd
|
"movd %2, %%mm5 \n\t" //qadd
|
||||||
"pxor %%mm7, %%mm7 \n\t"
|
"pxor %%mm7, %%mm7 \n\t"
|
||||||
"packssdw %%mm5, %%mm5 \n\t"
|
"packssdw %%mm5, %%mm5 \n\t"
|
||||||
"packssdw %%mm5, %%mm5 \n\t"
|
"packssdw %%mm5, %%mm5 \n\t"
|
||||||
"psubw %%mm5, %%mm7 \n\t"
|
"psubw %%mm5, %%mm7 \n\t"
|
||||||
"pxor %%mm4, %%mm4 \n\t"
|
"pxor %%mm4, %%mm4 \n\t"
|
||||||
".balign 16\n\t"
|
".balign 16 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%0, %3), %%mm0 \n\t"
|
"movq (%0, %3), %%mm0 \n\t"
|
||||||
"movq 8(%0, %3), %%mm1 \n\t"
|
"movq 8(%0, %3), %%mm1 \n\t"
|
||||||
|
|
||||||
"pmullw %%mm6, %%mm0 \n\t"
|
"pmullw %%mm6, %%mm0 \n\t"
|
||||||
"pmullw %%mm6, %%mm1 \n\t"
|
"pmullw %%mm6, %%mm1 \n\t"
|
||||||
|
|
||||||
"movq (%0, %3), %%mm2 \n\t"
|
"movq (%0, %3), %%mm2 \n\t"
|
||||||
"movq 8(%0, %3), %%mm3 \n\t"
|
"movq 8(%0, %3), %%mm3 \n\t"
|
||||||
|
|
||||||
"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||||
"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||||
|
|
||||||
"pxor %%mm2, %%mm0 \n\t"
|
"pxor %%mm2, %%mm0 \n\t"
|
||||||
"pxor %%mm3, %%mm1 \n\t"
|
"pxor %%mm3, %%mm1 \n\t"
|
||||||
|
|
||||||
"paddw %%mm7, %%mm0 \n\t"
|
"paddw %%mm7, %%mm0 \n\t"
|
||||||
"paddw %%mm7, %%mm1 \n\t"
|
"paddw %%mm7, %%mm1 \n\t"
|
||||||
|
|
||||||
"pxor %%mm0, %%mm2 \n\t"
|
"pxor %%mm0, %%mm2 \n\t"
|
||||||
"pxor %%mm1, %%mm3 \n\t"
|
"pxor %%mm1, %%mm3 \n\t"
|
||||||
|
|
||||||
"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
|
"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
|
||||||
"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
|
"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
|
||||||
|
|
||||||
"pandn %%mm2, %%mm0 \n\t"
|
"pandn %%mm2, %%mm0 \n\t"
|
||||||
"pandn %%mm3, %%mm1 \n\t"
|
"pandn %%mm3, %%mm1 \n\t"
|
||||||
|
|
||||||
"movq %%mm0, (%0, %3) \n\t"
|
"movq %%mm0, (%0, %3) \n\t"
|
||||||
"movq %%mm1, 8(%0, %3) \n\t"
|
"movq %%mm1, 8(%0, %3) \n\t"
|
||||||
|
|
||||||
"add $16, %3 \n\t"
|
"add $16, %3 \n\t"
|
||||||
"jng 1b \n\t"
|
"jng 1b \n\t"
|
||||||
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
|
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
|
||||||
: "memory"
|
: "memory"
|
||||||
);
|
);
|
||||||
block[0]= level;
|
block[0]= level;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -120,52 +120,52 @@ static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
|
|||||||
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
|
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
|
||||||
//printf("%d %d ", qmul, qadd);
|
//printf("%d %d ", qmul, qadd);
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movd %1, %%mm6 \n\t" //qmul
|
"movd %1, %%mm6 \n\t" //qmul
|
||||||
"packssdw %%mm6, %%mm6 \n\t"
|
"packssdw %%mm6, %%mm6 \n\t"
|
||||||
"packssdw %%mm6, %%mm6 \n\t"
|
"packssdw %%mm6, %%mm6 \n\t"
|
||||||
"movd %2, %%mm5 \n\t" //qadd
|
"movd %2, %%mm5 \n\t" //qadd
|
||||||
"pxor %%mm7, %%mm7 \n\t"
|
"pxor %%mm7, %%mm7 \n\t"
|
||||||
"packssdw %%mm5, %%mm5 \n\t"
|
"packssdw %%mm5, %%mm5 \n\t"
|
||||||
"packssdw %%mm5, %%mm5 \n\t"
|
"packssdw %%mm5, %%mm5 \n\t"
|
||||||
"psubw %%mm5, %%mm7 \n\t"
|
"psubw %%mm5, %%mm7 \n\t"
|
||||||
"pxor %%mm4, %%mm4 \n\t"
|
"pxor %%mm4, %%mm4 \n\t"
|
||||||
".balign 16\n\t"
|
".balign 16 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%0, %3), %%mm0 \n\t"
|
"movq (%0, %3), %%mm0 \n\t"
|
||||||
"movq 8(%0, %3), %%mm1 \n\t"
|
"movq 8(%0, %3), %%mm1 \n\t"
|
||||||
|
|
||||||
"pmullw %%mm6, %%mm0 \n\t"
|
"pmullw %%mm6, %%mm0 \n\t"
|
||||||
"pmullw %%mm6, %%mm1 \n\t"
|
"pmullw %%mm6, %%mm1 \n\t"
|
||||||
|
|
||||||
"movq (%0, %3), %%mm2 \n\t"
|
"movq (%0, %3), %%mm2 \n\t"
|
||||||
"movq 8(%0, %3), %%mm3 \n\t"
|
"movq 8(%0, %3), %%mm3 \n\t"
|
||||||
|
|
||||||
"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||||
"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||||
|
|
||||||
"pxor %%mm2, %%mm0 \n\t"
|
"pxor %%mm2, %%mm0 \n\t"
|
||||||
"pxor %%mm3, %%mm1 \n\t"
|
"pxor %%mm3, %%mm1 \n\t"
|
||||||
|
|
||||||
"paddw %%mm7, %%mm0 \n\t"
|
"paddw %%mm7, %%mm0 \n\t"
|
||||||
"paddw %%mm7, %%mm1 \n\t"
|
"paddw %%mm7, %%mm1 \n\t"
|
||||||
|
|
||||||
"pxor %%mm0, %%mm2 \n\t"
|
"pxor %%mm0, %%mm2 \n\t"
|
||||||
"pxor %%mm1, %%mm3 \n\t"
|
"pxor %%mm1, %%mm3 \n\t"
|
||||||
|
|
||||||
"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
|
"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
|
||||||
"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
|
"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
|
||||||
|
|
||||||
"pandn %%mm2, %%mm0 \n\t"
|
"pandn %%mm2, %%mm0 \n\t"
|
||||||
"pandn %%mm3, %%mm1 \n\t"
|
"pandn %%mm3, %%mm1 \n\t"
|
||||||
|
|
||||||
"movq %%mm0, (%0, %3) \n\t"
|
"movq %%mm0, (%0, %3) \n\t"
|
||||||
"movq %%mm1, 8(%0, %3) \n\t"
|
"movq %%mm1, 8(%0, %3) \n\t"
|
||||||
|
|
||||||
"add $16, %3 \n\t"
|
"add $16, %3 \n\t"
|
||||||
"jng 1b \n\t"
|
"jng 1b \n\t"
|
||||||
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
|
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
|
||||||
: "memory"
|
: "memory"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -216,54 +216,54 @@ static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
|
|||||||
/* XXX: only mpeg1 */
|
/* XXX: only mpeg1 */
|
||||||
quant_matrix = s->intra_matrix;
|
quant_matrix = s->intra_matrix;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"pcmpeqw %%mm7, %%mm7 \n\t"
|
"pcmpeqw %%mm7, %%mm7 \n\t"
|
||||||
"psrlw $15, %%mm7 \n\t"
|
"psrlw $15, %%mm7 \n\t"
|
||||||
"movd %2, %%mm6 \n\t"
|
"movd %2, %%mm6 \n\t"
|
||||||
"packssdw %%mm6, %%mm6 \n\t"
|
"packssdw %%mm6, %%mm6 \n\t"
|
||||||
"packssdw %%mm6, %%mm6 \n\t"
|
"packssdw %%mm6, %%mm6 \n\t"
|
||||||
"mov %3, %%"REG_a" \n\t"
|
"mov %3, %%"REG_a" \n\t"
|
||||||
".balign 16\n\t"
|
".balign 16 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
||||||
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
|
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm4 \n\t"
|
"movq (%1, %%"REG_a"), %%mm4 \n\t"
|
||||||
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
|
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
|
||||||
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
|
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
|
||||||
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
|
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
|
||||||
"pxor %%mm2, %%mm2 \n\t"
|
"pxor %%mm2, %%mm2 \n\t"
|
||||||
"pxor %%mm3, %%mm3 \n\t"
|
"pxor %%mm3, %%mm3 \n\t"
|
||||||
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||||
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||||
"pxor %%mm2, %%mm0 \n\t"
|
"pxor %%mm2, %%mm0 \n\t"
|
||||||
"pxor %%mm3, %%mm1 \n\t"
|
"pxor %%mm3, %%mm1 \n\t"
|
||||||
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
|
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
|
||||||
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
|
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
|
||||||
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
|
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
|
||||||
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
|
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
|
||||||
"pxor %%mm4, %%mm4 \n\t"
|
"pxor %%mm4, %%mm4 \n\t"
|
||||||
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
|
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
|
||||||
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
|
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
|
||||||
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
|
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
|
||||||
"psraw $3, %%mm0 \n\t"
|
"psraw $3, %%mm0 \n\t"
|
||||||
"psraw $3, %%mm1 \n\t"
|
"psraw $3, %%mm1 \n\t"
|
||||||
"psubw %%mm7, %%mm0 \n\t"
|
"psubw %%mm7, %%mm0 \n\t"
|
||||||
"psubw %%mm7, %%mm1 \n\t"
|
"psubw %%mm7, %%mm1 \n\t"
|
||||||
"por %%mm7, %%mm0 \n\t"
|
"por %%mm7, %%mm0 \n\t"
|
||||||
"por %%mm7, %%mm1 \n\t"
|
"por %%mm7, %%mm1 \n\t"
|
||||||
"pxor %%mm2, %%mm0 \n\t"
|
"pxor %%mm2, %%mm0 \n\t"
|
||||||
"pxor %%mm3, %%mm1 \n\t"
|
"pxor %%mm3, %%mm1 \n\t"
|
||||||
"psubw %%mm2, %%mm0 \n\t"
|
"psubw %%mm2, %%mm0 \n\t"
|
||||||
"psubw %%mm3, %%mm1 \n\t"
|
"psubw %%mm3, %%mm1 \n\t"
|
||||||
"pandn %%mm0, %%mm4 \n\t"
|
"pandn %%mm0, %%mm4 \n\t"
|
||||||
"pandn %%mm1, %%mm5 \n\t"
|
"pandn %%mm1, %%mm5 \n\t"
|
||||||
"movq %%mm4, (%0, %%"REG_a") \n\t"
|
"movq %%mm4, (%0, %%"REG_a") \n\t"
|
||||||
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
|
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
|
||||||
|
|
||||||
"add $16, %%"REG_a" \n\t"
|
"add $16, %%"REG_a" \n\t"
|
||||||
"js 1b \n\t"
|
"js 1b \n\t"
|
||||||
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
|
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
|
||||||
: "%"REG_a, "memory"
|
: "%"REG_a, "memory"
|
||||||
);
|
);
|
||||||
block[0]= block0;
|
block[0]= block0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -279,58 +279,58 @@ static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
|
|||||||
|
|
||||||
quant_matrix = s->inter_matrix;
|
quant_matrix = s->inter_matrix;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"pcmpeqw %%mm7, %%mm7 \n\t"
|
"pcmpeqw %%mm7, %%mm7 \n\t"
|
||||||
"psrlw $15, %%mm7 \n\t"
|
"psrlw $15, %%mm7 \n\t"
|
||||||
"movd %2, %%mm6 \n\t"
|
"movd %2, %%mm6 \n\t"
|
||||||
"packssdw %%mm6, %%mm6 \n\t"
|
"packssdw %%mm6, %%mm6 \n\t"
|
||||||
"packssdw %%mm6, %%mm6 \n\t"
|
"packssdw %%mm6, %%mm6 \n\t"
|
||||||
"mov %3, %%"REG_a" \n\t"
|
"mov %3, %%"REG_a" \n\t"
|
||||||
".balign 16\n\t"
|
".balign 16 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
||||||
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
|
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm4 \n\t"
|
"movq (%1, %%"REG_a"), %%mm4 \n\t"
|
||||||
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
|
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
|
||||||
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
|
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
|
||||||
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
|
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
|
||||||
"pxor %%mm2, %%mm2 \n\t"
|
"pxor %%mm2, %%mm2 \n\t"
|
||||||
"pxor %%mm3, %%mm3 \n\t"
|
"pxor %%mm3, %%mm3 \n\t"
|
||||||
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||||
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||||
"pxor %%mm2, %%mm0 \n\t"
|
"pxor %%mm2, %%mm0 \n\t"
|
||||||
"pxor %%mm3, %%mm1 \n\t"
|
"pxor %%mm3, %%mm1 \n\t"
|
||||||
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
|
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
|
||||||
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
|
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
|
||||||
"paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
|
"paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
|
||||||
"paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
|
"paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
|
||||||
"paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1
|
"paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1
|
||||||
"paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1
|
"paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1
|
||||||
"pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
|
"pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
|
||||||
"pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
|
"pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
|
||||||
"pxor %%mm4, %%mm4 \n\t"
|
"pxor %%mm4, %%mm4 \n\t"
|
||||||
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
|
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
|
||||||
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
|
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
|
||||||
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
|
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
|
||||||
"psraw $4, %%mm0 \n\t"
|
"psraw $4, %%mm0 \n\t"
|
||||||
"psraw $4, %%mm1 \n\t"
|
"psraw $4, %%mm1 \n\t"
|
||||||
"psubw %%mm7, %%mm0 \n\t"
|
"psubw %%mm7, %%mm0 \n\t"
|
||||||
"psubw %%mm7, %%mm1 \n\t"
|
"psubw %%mm7, %%mm1 \n\t"
|
||||||
"por %%mm7, %%mm0 \n\t"
|
"por %%mm7, %%mm0 \n\t"
|
||||||
"por %%mm7, %%mm1 \n\t"
|
"por %%mm7, %%mm1 \n\t"
|
||||||
"pxor %%mm2, %%mm0 \n\t"
|
"pxor %%mm2, %%mm0 \n\t"
|
||||||
"pxor %%mm3, %%mm1 \n\t"
|
"pxor %%mm3, %%mm1 \n\t"
|
||||||
"psubw %%mm2, %%mm0 \n\t"
|
"psubw %%mm2, %%mm0 \n\t"
|
||||||
"psubw %%mm3, %%mm1 \n\t"
|
"psubw %%mm3, %%mm1 \n\t"
|
||||||
"pandn %%mm0, %%mm4 \n\t"
|
"pandn %%mm0, %%mm4 \n\t"
|
||||||
"pandn %%mm1, %%mm5 \n\t"
|
"pandn %%mm1, %%mm5 \n\t"
|
||||||
"movq %%mm4, (%0, %%"REG_a") \n\t"
|
"movq %%mm4, (%0, %%"REG_a") \n\t"
|
||||||
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
|
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
|
||||||
|
|
||||||
"add $16, %%"REG_a" \n\t"
|
"add $16, %%"REG_a" \n\t"
|
||||||
"js 1b \n\t"
|
"js 1b \n\t"
|
||||||
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
|
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
|
||||||
: "%"REG_a, "memory"
|
: "%"REG_a, "memory"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
|
static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
|
||||||
@ -351,50 +351,50 @@ static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
|
|||||||
block0 = block[0] * s->c_dc_scale;
|
block0 = block[0] * s->c_dc_scale;
|
||||||
quant_matrix = s->intra_matrix;
|
quant_matrix = s->intra_matrix;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"pcmpeqw %%mm7, %%mm7 \n\t"
|
"pcmpeqw %%mm7, %%mm7 \n\t"
|
||||||
"psrlw $15, %%mm7 \n\t"
|
"psrlw $15, %%mm7 \n\t"
|
||||||
"movd %2, %%mm6 \n\t"
|
"movd %2, %%mm6 \n\t"
|
||||||
"packssdw %%mm6, %%mm6 \n\t"
|
"packssdw %%mm6, %%mm6 \n\t"
|
||||||
"packssdw %%mm6, %%mm6 \n\t"
|
"packssdw %%mm6, %%mm6 \n\t"
|
||||||
"mov %3, %%"REG_a" \n\t"
|
"mov %3, %%"REG_a" \n\t"
|
||||||
".balign 16\n\t"
|
".balign 16 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
||||||
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
|
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm4 \n\t"
|
"movq (%1, %%"REG_a"), %%mm4 \n\t"
|
||||||
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
|
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
|
||||||
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
|
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
|
||||||
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
|
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
|
||||||
"pxor %%mm2, %%mm2 \n\t"
|
"pxor %%mm2, %%mm2 \n\t"
|
||||||
"pxor %%mm3, %%mm3 \n\t"
|
"pxor %%mm3, %%mm3 \n\t"
|
||||||
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||||
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||||
"pxor %%mm2, %%mm0 \n\t"
|
"pxor %%mm2, %%mm0 \n\t"
|
||||||
"pxor %%mm3, %%mm1 \n\t"
|
"pxor %%mm3, %%mm1 \n\t"
|
||||||
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
|
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
|
||||||
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
|
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
|
||||||
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
|
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
|
||||||
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
|
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
|
||||||
"pxor %%mm4, %%mm4 \n\t"
|
"pxor %%mm4, %%mm4 \n\t"
|
||||||
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
|
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
|
||||||
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
|
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
|
||||||
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
|
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
|
||||||
"psraw $3, %%mm0 \n\t"
|
"psraw $3, %%mm0 \n\t"
|
||||||
"psraw $3, %%mm1 \n\t"
|
"psraw $3, %%mm1 \n\t"
|
||||||
"pxor %%mm2, %%mm0 \n\t"
|
"pxor %%mm2, %%mm0 \n\t"
|
||||||
"pxor %%mm3, %%mm1 \n\t"
|
"pxor %%mm3, %%mm1 \n\t"
|
||||||
"psubw %%mm2, %%mm0 \n\t"
|
"psubw %%mm2, %%mm0 \n\t"
|
||||||
"psubw %%mm3, %%mm1 \n\t"
|
"psubw %%mm3, %%mm1 \n\t"
|
||||||
"pandn %%mm0, %%mm4 \n\t"
|
"pandn %%mm0, %%mm4 \n\t"
|
||||||
"pandn %%mm1, %%mm5 \n\t"
|
"pandn %%mm1, %%mm5 \n\t"
|
||||||
"movq %%mm4, (%0, %%"REG_a") \n\t"
|
"movq %%mm4, (%0, %%"REG_a") \n\t"
|
||||||
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
|
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
|
||||||
|
|
||||||
"add $16, %%"REG_a" \n\t"
|
"add $16, %%"REG_a" \n\t"
|
||||||
"jng 1b \n\t"
|
"jng 1b \n\t"
|
||||||
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
|
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
|
||||||
: "%"REG_a, "memory"
|
: "%"REG_a, "memory"
|
||||||
);
|
);
|
||||||
block[0]= block0;
|
block[0]= block0;
|
||||||
//Note, we dont do mismatch control for intra as errors cannot accumulate
|
//Note, we dont do mismatch control for intra as errors cannot accumulate
|
||||||
}
|
}
|
||||||
@ -412,68 +412,68 @@ static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
|
|||||||
|
|
||||||
quant_matrix = s->inter_matrix;
|
quant_matrix = s->inter_matrix;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"pcmpeqw %%mm7, %%mm7 \n\t"
|
"pcmpeqw %%mm7, %%mm7 \n\t"
|
||||||
"psrlq $48, %%mm7 \n\t"
|
"psrlq $48, %%mm7 \n\t"
|
||||||
"movd %2, %%mm6 \n\t"
|
"movd %2, %%mm6 \n\t"
|
||||||
"packssdw %%mm6, %%mm6 \n\t"
|
"packssdw %%mm6, %%mm6 \n\t"
|
||||||
"packssdw %%mm6, %%mm6 \n\t"
|
"packssdw %%mm6, %%mm6 \n\t"
|
||||||
"mov %3, %%"REG_a" \n\t"
|
"mov %3, %%"REG_a" \n\t"
|
||||||
".balign 16\n\t"
|
".balign 16 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
||||||
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
|
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
|
||||||
"movq (%1, %%"REG_a"), %%mm4 \n\t"
|
"movq (%1, %%"REG_a"), %%mm4 \n\t"
|
||||||
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
|
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
|
||||||
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
|
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
|
||||||
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
|
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
|
||||||
"pxor %%mm2, %%mm2 \n\t"
|
"pxor %%mm2, %%mm2 \n\t"
|
||||||
"pxor %%mm3, %%mm3 \n\t"
|
"pxor %%mm3, %%mm3 \n\t"
|
||||||
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||||
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||||
"pxor %%mm2, %%mm0 \n\t"
|
"pxor %%mm2, %%mm0 \n\t"
|
||||||
"pxor %%mm3, %%mm1 \n\t"
|
"pxor %%mm3, %%mm1 \n\t"
|
||||||
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
|
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
|
||||||
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
|
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
|
||||||
"paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
|
"paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
|
||||||
"paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
|
"paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
|
||||||
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*2*q
|
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*2*q
|
||||||
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*2*q
|
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*2*q
|
||||||
"paddw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
|
"paddw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
|
||||||
"paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
|
"paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
|
||||||
"pxor %%mm4, %%mm4 \n\t"
|
"pxor %%mm4, %%mm4 \n\t"
|
||||||
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
|
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
|
||||||
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
|
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
|
||||||
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
|
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
|
||||||
"psrlw $4, %%mm0 \n\t"
|
"psrlw $4, %%mm0 \n\t"
|
||||||
"psrlw $4, %%mm1 \n\t"
|
"psrlw $4, %%mm1 \n\t"
|
||||||
"pxor %%mm2, %%mm0 \n\t"
|
"pxor %%mm2, %%mm0 \n\t"
|
||||||
"pxor %%mm3, %%mm1 \n\t"
|
"pxor %%mm3, %%mm1 \n\t"
|
||||||
"psubw %%mm2, %%mm0 \n\t"
|
"psubw %%mm2, %%mm0 \n\t"
|
||||||
"psubw %%mm3, %%mm1 \n\t"
|
"psubw %%mm3, %%mm1 \n\t"
|
||||||
"pandn %%mm0, %%mm4 \n\t"
|
"pandn %%mm0, %%mm4 \n\t"
|
||||||
"pandn %%mm1, %%mm5 \n\t"
|
"pandn %%mm1, %%mm5 \n\t"
|
||||||
"pxor %%mm4, %%mm7 \n\t"
|
"pxor %%mm4, %%mm7 \n\t"
|
||||||
"pxor %%mm5, %%mm7 \n\t"
|
"pxor %%mm5, %%mm7 \n\t"
|
||||||
"movq %%mm4, (%0, %%"REG_a") \n\t"
|
"movq %%mm4, (%0, %%"REG_a") \n\t"
|
||||||
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
|
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
|
||||||
|
|
||||||
"add $16, %%"REG_a" \n\t"
|
"add $16, %%"REG_a" \n\t"
|
||||||
"jng 1b \n\t"
|
"jng 1b \n\t"
|
||||||
"movd 124(%0, %3), %%mm0 \n\t"
|
"movd 124(%0, %3), %%mm0 \n\t"
|
||||||
"movq %%mm7, %%mm6 \n\t"
|
"movq %%mm7, %%mm6 \n\t"
|
||||||
"psrlq $32, %%mm7 \n\t"
|
"psrlq $32, %%mm7 \n\t"
|
||||||
"pxor %%mm6, %%mm7 \n\t"
|
"pxor %%mm6, %%mm7 \n\t"
|
||||||
"movq %%mm7, %%mm6 \n\t"
|
"movq %%mm7, %%mm6 \n\t"
|
||||||
"psrlq $16, %%mm7 \n\t"
|
"psrlq $16, %%mm7 \n\t"
|
||||||
"pxor %%mm6, %%mm7 \n\t"
|
"pxor %%mm6, %%mm7 \n\t"
|
||||||
"pslld $31, %%mm7 \n\t"
|
"pslld $31, %%mm7 \n\t"
|
||||||
"psrlq $15, %%mm7 \n\t"
|
"psrlq $15, %%mm7 \n\t"
|
||||||
"pxor %%mm7, %%mm0 \n\t"
|
"pxor %%mm7, %%mm0 \n\t"
|
||||||
"movd %%mm0, 124(%0, %3) \n\t"
|
"movd %%mm0, 124(%0, %3) \n\t"
|
||||||
|
|
||||||
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
|
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
|
||||||
: "%"REG_a, "memory"
|
: "%"REG_a, "memory"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* draw the edges of width 'w' of an image of size width, height
|
/* draw the edges of width 'w' of an image of size width, height
|
||||||
@ -488,79 +488,79 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
|
|||||||
ptr = buf;
|
ptr = buf;
|
||||||
if(w==8)
|
if(w==8)
|
||||||
{
|
{
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movd (%0), %%mm0 \n\t"
|
"movd (%0), %%mm0 \n\t"
|
||||||
"punpcklbw %%mm0, %%mm0 \n\t"
|
"punpcklbw %%mm0, %%mm0 \n\t"
|
||||||
"punpcklwd %%mm0, %%mm0 \n\t"
|
"punpcklwd %%mm0, %%mm0 \n\t"
|
||||||
"punpckldq %%mm0, %%mm0 \n\t"
|
"punpckldq %%mm0, %%mm0 \n\t"
|
||||||
"movq %%mm0, -8(%0) \n\t"
|
"movq %%mm0, -8(%0) \n\t"
|
||||||
"movq -8(%0, %2), %%mm1 \n\t"
|
"movq -8(%0, %2), %%mm1 \n\t"
|
||||||
"punpckhbw %%mm1, %%mm1 \n\t"
|
"punpckhbw %%mm1, %%mm1 \n\t"
|
||||||
"punpckhwd %%mm1, %%mm1 \n\t"
|
"punpckhwd %%mm1, %%mm1 \n\t"
|
||||||
"punpckhdq %%mm1, %%mm1 \n\t"
|
"punpckhdq %%mm1, %%mm1 \n\t"
|
||||||
"movq %%mm1, (%0, %2) \n\t"
|
"movq %%mm1, (%0, %2) \n\t"
|
||||||
"add %1, %0 \n\t"
|
"add %1, %0 \n\t"
|
||||||
"cmp %3, %0 \n\t"
|
"cmp %3, %0 \n\t"
|
||||||
" jb 1b \n\t"
|
" jb 1b \n\t"
|
||||||
: "+r" (ptr)
|
: "+r" (ptr)
|
||||||
: "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
|
: "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movd (%0), %%mm0 \n\t"
|
"movd (%0), %%mm0 \n\t"
|
||||||
"punpcklbw %%mm0, %%mm0 \n\t"
|
"punpcklbw %%mm0, %%mm0 \n\t"
|
||||||
"punpcklwd %%mm0, %%mm0 \n\t"
|
"punpcklwd %%mm0, %%mm0 \n\t"
|
||||||
"punpckldq %%mm0, %%mm0 \n\t"
|
"punpckldq %%mm0, %%mm0 \n\t"
|
||||||
"movq %%mm0, -8(%0) \n\t"
|
"movq %%mm0, -8(%0) \n\t"
|
||||||
"movq %%mm0, -16(%0) \n\t"
|
"movq %%mm0, -16(%0) \n\t"
|
||||||
"movq -8(%0, %2), %%mm1 \n\t"
|
"movq -8(%0, %2), %%mm1 \n\t"
|
||||||
"punpckhbw %%mm1, %%mm1 \n\t"
|
"punpckhbw %%mm1, %%mm1 \n\t"
|
||||||
"punpckhwd %%mm1, %%mm1 \n\t"
|
"punpckhwd %%mm1, %%mm1 \n\t"
|
||||||
"punpckhdq %%mm1, %%mm1 \n\t"
|
"punpckhdq %%mm1, %%mm1 \n\t"
|
||||||
"movq %%mm1, (%0, %2) \n\t"
|
"movq %%mm1, (%0, %2) \n\t"
|
||||||
"movq %%mm1, 8(%0, %2) \n\t"
|
"movq %%mm1, 8(%0, %2) \n\t"
|
||||||
"add %1, %0 \n\t"
|
"add %1, %0 \n\t"
|
||||||
"cmp %3, %0 \n\t"
|
"cmp %3, %0 \n\t"
|
||||||
" jb 1b \n\t"
|
" jb 1b \n\t"
|
||||||
: "+r" (ptr)
|
: "+r" (ptr)
|
||||||
: "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
|
: "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(i=0;i<w;i+=4) {
|
for(i=0;i<w;i+=4) {
|
||||||
/* top and bottom (and hopefully also the corners) */
|
/* top and bottom (and hopefully also the corners) */
|
||||||
ptr= buf - (i + 1) * wrap - w;
|
ptr= buf - (i + 1) * wrap - w;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%1, %0), %%mm0 \n\t"
|
"movq (%1, %0), %%mm0 \n\t"
|
||||||
"movq %%mm0, (%0) \n\t"
|
"movq %%mm0, (%0) \n\t"
|
||||||
"movq %%mm0, (%0, %2) \n\t"
|
"movq %%mm0, (%0, %2) \n\t"
|
||||||
"movq %%mm0, (%0, %2, 2) \n\t"
|
"movq %%mm0, (%0, %2, 2) \n\t"
|
||||||
"movq %%mm0, (%0, %3) \n\t"
|
"movq %%mm0, (%0, %3) \n\t"
|
||||||
"add $8, %0 \n\t"
|
"add $8, %0 \n\t"
|
||||||
"cmp %4, %0 \n\t"
|
"cmp %4, %0 \n\t"
|
||||||
" jb 1b \n\t"
|
" jb 1b \n\t"
|
||||||
: "+r" (ptr)
|
: "+r" (ptr)
|
||||||
: "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w)
|
: "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w)
|
||||||
);
|
);
|
||||||
ptr= last_line + (i + 1) * wrap - w;
|
ptr= last_line + (i + 1) * wrap - w;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movq (%1, %0), %%mm0 \n\t"
|
"movq (%1, %0), %%mm0 \n\t"
|
||||||
"movq %%mm0, (%0) \n\t"
|
"movq %%mm0, (%0) \n\t"
|
||||||
"movq %%mm0, (%0, %2) \n\t"
|
"movq %%mm0, (%0, %2) \n\t"
|
||||||
"movq %%mm0, (%0, %2, 2) \n\t"
|
"movq %%mm0, (%0, %2, 2) \n\t"
|
||||||
"movq %%mm0, (%0, %3) \n\t"
|
"movq %%mm0, (%0, %3) \n\t"
|
||||||
"add $8, %0 \n\t"
|
"add $8, %0 \n\t"
|
||||||
"cmp %4, %0 \n\t"
|
"cmp %4, %0 \n\t"
|
||||||
" jb 1b \n\t"
|
" jb 1b \n\t"
|
||||||
: "+r" (ptr)
|
: "+r" (ptr)
|
||||||
: "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w)
|
: "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -572,47 +572,47 @@ static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){
|
|||||||
s->dct_count[intra]++;
|
s->dct_count[intra]++;
|
||||||
|
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"pxor %%mm7, %%mm7 \n\t"
|
"pxor %%mm7, %%mm7 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"pxor %%mm0, %%mm0 \n\t"
|
"pxor %%mm0, %%mm0 \n\t"
|
||||||
"pxor %%mm1, %%mm1 \n\t"
|
"pxor %%mm1, %%mm1 \n\t"
|
||||||
"movq (%0), %%mm2 \n\t"
|
"movq (%0), %%mm2 \n\t"
|
||||||
"movq 8(%0), %%mm3 \n\t"
|
"movq 8(%0), %%mm3 \n\t"
|
||||||
"pcmpgtw %%mm2, %%mm0 \n\t"
|
"pcmpgtw %%mm2, %%mm0 \n\t"
|
||||||
"pcmpgtw %%mm3, %%mm1 \n\t"
|
"pcmpgtw %%mm3, %%mm1 \n\t"
|
||||||
"pxor %%mm0, %%mm2 \n\t"
|
"pxor %%mm0, %%mm2 \n\t"
|
||||||
"pxor %%mm1, %%mm3 \n\t"
|
"pxor %%mm1, %%mm3 \n\t"
|
||||||
"psubw %%mm0, %%mm2 \n\t"
|
"psubw %%mm0, %%mm2 \n\t"
|
||||||
"psubw %%mm1, %%mm3 \n\t"
|
"psubw %%mm1, %%mm3 \n\t"
|
||||||
"movq %%mm2, %%mm4 \n\t"
|
"movq %%mm2, %%mm4 \n\t"
|
||||||
"movq %%mm3, %%mm5 \n\t"
|
"movq %%mm3, %%mm5 \n\t"
|
||||||
"psubusw (%2), %%mm2 \n\t"
|
"psubusw (%2), %%mm2 \n\t"
|
||||||
"psubusw 8(%2), %%mm3 \n\t"
|
"psubusw 8(%2), %%mm3 \n\t"
|
||||||
"pxor %%mm0, %%mm2 \n\t"
|
"pxor %%mm0, %%mm2 \n\t"
|
||||||
"pxor %%mm1, %%mm3 \n\t"
|
"pxor %%mm1, %%mm3 \n\t"
|
||||||
"psubw %%mm0, %%mm2 \n\t"
|
"psubw %%mm0, %%mm2 \n\t"
|
||||||
"psubw %%mm1, %%mm3 \n\t"
|
"psubw %%mm1, %%mm3 \n\t"
|
||||||
"movq %%mm2, (%0) \n\t"
|
"movq %%mm2, (%0) \n\t"
|
||||||
"movq %%mm3, 8(%0) \n\t"
|
"movq %%mm3, 8(%0) \n\t"
|
||||||
"movq %%mm4, %%mm2 \n\t"
|
"movq %%mm4, %%mm2 \n\t"
|
||||||
"movq %%mm5, %%mm3 \n\t"
|
"movq %%mm5, %%mm3 \n\t"
|
||||||
"punpcklwd %%mm7, %%mm4 \n\t"
|
"punpcklwd %%mm7, %%mm4 \n\t"
|
||||||
"punpckhwd %%mm7, %%mm2 \n\t"
|
"punpckhwd %%mm7, %%mm2 \n\t"
|
||||||
"punpcklwd %%mm7, %%mm5 \n\t"
|
"punpcklwd %%mm7, %%mm5 \n\t"
|
||||||
"punpckhwd %%mm7, %%mm3 \n\t"
|
"punpckhwd %%mm7, %%mm3 \n\t"
|
||||||
"paddd (%1), %%mm4 \n\t"
|
"paddd (%1), %%mm4 \n\t"
|
||||||
"paddd 8(%1), %%mm2 \n\t"
|
"paddd 8(%1), %%mm2 \n\t"
|
||||||
"paddd 16(%1), %%mm5 \n\t"
|
"paddd 16(%1), %%mm5 \n\t"
|
||||||
"paddd 24(%1), %%mm3 \n\t"
|
"paddd 24(%1), %%mm3 \n\t"
|
||||||
"movq %%mm4, (%1) \n\t"
|
"movq %%mm4, (%1) \n\t"
|
||||||
"movq %%mm2, 8(%1) \n\t"
|
"movq %%mm2, 8(%1) \n\t"
|
||||||
"movq %%mm5, 16(%1) \n\t"
|
"movq %%mm5, 16(%1) \n\t"
|
||||||
"movq %%mm3, 24(%1) \n\t"
|
"movq %%mm3, 24(%1) \n\t"
|
||||||
"add $16, %0 \n\t"
|
"add $16, %0 \n\t"
|
||||||
"add $32, %1 \n\t"
|
"add $32, %1 \n\t"
|
||||||
"add $16, %2 \n\t"
|
"add $16, %2 \n\t"
|
||||||
"cmp %3, %0 \n\t"
|
"cmp %3, %0 \n\t"
|
||||||
" jb 1b \n\t"
|
" jb 1b \n\t"
|
||||||
: "+r" (block), "+r" (sum), "+r" (offset)
|
: "+r" (block), "+r" (sum), "+r" (offset)
|
||||||
: "r"(block+64)
|
: "r"(block+64)
|
||||||
);
|
);
|
||||||
@ -626,47 +626,47 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
|
|||||||
s->dct_count[intra]++;
|
s->dct_count[intra]++;
|
||||||
|
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"pxor %%xmm7, %%xmm7 \n\t"
|
"pxor %%xmm7, %%xmm7 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"pxor %%xmm0, %%xmm0 \n\t"
|
"pxor %%xmm0, %%xmm0 \n\t"
|
||||||
"pxor %%xmm1, %%xmm1 \n\t"
|
"pxor %%xmm1, %%xmm1 \n\t"
|
||||||
"movdqa (%0), %%xmm2 \n\t"
|
"movdqa (%0), %%xmm2 \n\t"
|
||||||
"movdqa 16(%0), %%xmm3 \n\t"
|
"movdqa 16(%0), %%xmm3 \n\t"
|
||||||
"pcmpgtw %%xmm2, %%xmm0 \n\t"
|
"pcmpgtw %%xmm2, %%xmm0 \n\t"
|
||||||
"pcmpgtw %%xmm3, %%xmm1 \n\t"
|
"pcmpgtw %%xmm3, %%xmm1 \n\t"
|
||||||
"pxor %%xmm0, %%xmm2 \n\t"
|
"pxor %%xmm0, %%xmm2 \n\t"
|
||||||
"pxor %%xmm1, %%xmm3 \n\t"
|
"pxor %%xmm1, %%xmm3 \n\t"
|
||||||
"psubw %%xmm0, %%xmm2 \n\t"
|
"psubw %%xmm0, %%xmm2 \n\t"
|
||||||
"psubw %%xmm1, %%xmm3 \n\t"
|
"psubw %%xmm1, %%xmm3 \n\t"
|
||||||
"movdqa %%xmm2, %%xmm4 \n\t"
|
"movdqa %%xmm2, %%xmm4 \n\t"
|
||||||
"movdqa %%xmm3, %%xmm5 \n\t"
|
"movdqa %%xmm3, %%xmm5 \n\t"
|
||||||
"psubusw (%2), %%xmm2 \n\t"
|
"psubusw (%2), %%xmm2 \n\t"
|
||||||
"psubusw 16(%2), %%xmm3 \n\t"
|
"psubusw 16(%2), %%xmm3 \n\t"
|
||||||
"pxor %%xmm0, %%xmm2 \n\t"
|
"pxor %%xmm0, %%xmm2 \n\t"
|
||||||
"pxor %%xmm1, %%xmm3 \n\t"
|
"pxor %%xmm1, %%xmm3 \n\t"
|
||||||
"psubw %%xmm0, %%xmm2 \n\t"
|
"psubw %%xmm0, %%xmm2 \n\t"
|
||||||
"psubw %%xmm1, %%xmm3 \n\t"
|
"psubw %%xmm1, %%xmm3 \n\t"
|
||||||
"movdqa %%xmm2, (%0) \n\t"
|
"movdqa %%xmm2, (%0) \n\t"
|
||||||
"movdqa %%xmm3, 16(%0) \n\t"
|
"movdqa %%xmm3, 16(%0) \n\t"
|
||||||
"movdqa %%xmm4, %%xmm6 \n\t"
|
"movdqa %%xmm4, %%xmm6 \n\t"
|
||||||
"movdqa %%xmm5, %%xmm0 \n\t"
|
"movdqa %%xmm5, %%xmm0 \n\t"
|
||||||
"punpcklwd %%xmm7, %%xmm4 \n\t"
|
"punpcklwd %%xmm7, %%xmm4 \n\t"
|
||||||
"punpckhwd %%xmm7, %%xmm6 \n\t"
|
"punpckhwd %%xmm7, %%xmm6 \n\t"
|
||||||
"punpcklwd %%xmm7, %%xmm5 \n\t"
|
"punpcklwd %%xmm7, %%xmm5 \n\t"
|
||||||
"punpckhwd %%xmm7, %%xmm0 \n\t"
|
"punpckhwd %%xmm7, %%xmm0 \n\t"
|
||||||
"paddd (%1), %%xmm4 \n\t"
|
"paddd (%1), %%xmm4 \n\t"
|
||||||
"paddd 16(%1), %%xmm6 \n\t"
|
"paddd 16(%1), %%xmm6 \n\t"
|
||||||
"paddd 32(%1), %%xmm5 \n\t"
|
"paddd 32(%1), %%xmm5 \n\t"
|
||||||
"paddd 48(%1), %%xmm0 \n\t"
|
"paddd 48(%1), %%xmm0 \n\t"
|
||||||
"movdqa %%xmm4, (%1) \n\t"
|
"movdqa %%xmm4, (%1) \n\t"
|
||||||
"movdqa %%xmm6, 16(%1) \n\t"
|
"movdqa %%xmm6, 16(%1) \n\t"
|
||||||
"movdqa %%xmm5, 32(%1) \n\t"
|
"movdqa %%xmm5, 32(%1) \n\t"
|
||||||
"movdqa %%xmm0, 48(%1) \n\t"
|
"movdqa %%xmm0, 48(%1) \n\t"
|
||||||
"add $32, %0 \n\t"
|
"add $32, %0 \n\t"
|
||||||
"add $64, %1 \n\t"
|
"add $64, %1 \n\t"
|
||||||
"add $32, %2 \n\t"
|
"add $32, %2 \n\t"
|
||||||
"cmp %3, %0 \n\t"
|
"cmp %3, %0 \n\t"
|
||||||
" jb 1b \n\t"
|
" jb 1b \n\t"
|
||||||
: "+r" (block), "+r" (sum), "+r" (offset)
|
: "+r" (block), "+r" (sum), "+r" (offset)
|
||||||
: "r"(block+64)
|
: "r"(block+64)
|
||||||
);
|
);
|
||||||
@ -705,10 +705,10 @@ void MPV_common_init_mmx(MpegEncContext *s)
|
|||||||
draw_edges = draw_edges_mmx;
|
draw_edges = draw_edges_mmx;
|
||||||
|
|
||||||
if (mm_flags & MM_SSE2) {
|
if (mm_flags & MM_SSE2) {
|
||||||
s->denoise_dct= denoise_dct_sse2;
|
s->denoise_dct= denoise_dct_sse2;
|
||||||
} else {
|
} else {
|
||||||
s->denoise_dct= denoise_dct_mmx;
|
s->denoise_dct= denoise_dct_mmx;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
|
if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
|
||||||
if(mm_flags & MM_SSE2){
|
if(mm_flags & MM_SSE2){
|
||||||
|
@ -21,26 +21,26 @@
|
|||||||
#undef PMAXW
|
#undef PMAXW
|
||||||
#ifdef HAVE_MMX2
|
#ifdef HAVE_MMX2
|
||||||
#define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
|
#define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
|
||||||
#define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
|
#define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
|
||||||
#define PMAX(a,b) \
|
#define PMAX(a,b) \
|
||||||
"pshufw $0x0E," #a ", " #b " \n\t"\
|
"pshufw $0x0E," #a ", " #b " \n\t"\
|
||||||
PMAXW(b, a)\
|
PMAXW(b, a)\
|
||||||
"pshufw $0x01," #a ", " #b " \n\t"\
|
"pshufw $0x01," #a ", " #b " \n\t"\
|
||||||
PMAXW(b, a)
|
PMAXW(b, a)
|
||||||
#else
|
#else
|
||||||
#define SPREADW(a) \
|
#define SPREADW(a) \
|
||||||
"punpcklwd " #a ", " #a " \n\t"\
|
"punpcklwd " #a ", " #a " \n\t"\
|
||||||
"punpcklwd " #a ", " #a " \n\t"
|
"punpcklwd " #a ", " #a " \n\t"
|
||||||
#define PMAXW(a,b) \
|
#define PMAXW(a,b) \
|
||||||
"psubusw " #a ", " #b " \n\t"\
|
"psubusw " #a ", " #b " \n\t"\
|
||||||
"paddw " #a ", " #b " \n\t"
|
"paddw " #a ", " #b " \n\t"
|
||||||
#define PMAX(a,b) \
|
#define PMAX(a,b) \
|
||||||
"movq " #a ", " #b " \n\t"\
|
"movq " #a ", " #b " \n\t"\
|
||||||
"psrlq $32, " #a " \n\t"\
|
"psrlq $32, " #a " \n\t"\
|
||||||
PMAXW(b, a)\
|
PMAXW(b, a)\
|
||||||
"movq " #a ", " #b " \n\t"\
|
"movq " #a ", " #b " \n\t"\
|
||||||
"psrlq $16, " #a " \n\t"\
|
"psrlq $16, " #a " \n\t"\
|
||||||
PMAXW(b, a)
|
PMAXW(b, a)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -71,18 +71,18 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
|
|||||||
if (!s->h263_aic) {
|
if (!s->h263_aic) {
|
||||||
#if 1
|
#if 1
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"mul %%ecx \n\t"
|
"mul %%ecx \n\t"
|
||||||
: "=d" (level), "=a"(dummy)
|
: "=d" (level), "=a"(dummy)
|
||||||
: "a" ((block[0]>>2) + q), "c" (inverse[q<<1])
|
: "a" ((block[0]>>2) + q), "c" (inverse[q<<1])
|
||||||
);
|
);
|
||||||
#else
|
#else
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"xorl %%edx, %%edx \n\t"
|
"xorl %%edx, %%edx \n\t"
|
||||||
"divw %%cx \n\t"
|
"divw %%cx \n\t"
|
||||||
"movzwl %%ax, %%eax \n\t"
|
"movzwl %%ax, %%eax \n\t"
|
||||||
: "=a" (level)
|
: "=a" (level)
|
||||||
: "a" ((block[0]>>2) + q), "c" (q<<1)
|
: "a" ((block[0]>>2) + q), "c" (q<<1)
|
||||||
: "%edx"
|
: "%edx"
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
} else
|
} else
|
||||||
@ -103,94 +103,94 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
|
|||||||
if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){
|
if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){
|
||||||
|
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1
|
"movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1
|
||||||
SPREADW(%%mm3)
|
SPREADW(%%mm3)
|
||||||
"pxor %%mm7, %%mm7 \n\t" // 0
|
"pxor %%mm7, %%mm7 \n\t" // 0
|
||||||
"pxor %%mm4, %%mm4 \n\t" // 0
|
"pxor %%mm4, %%mm4 \n\t" // 0
|
||||||
"movq (%2), %%mm5 \n\t" // qmat[0]
|
"movq (%2), %%mm5 \n\t" // qmat[0]
|
||||||
"pxor %%mm6, %%mm6 \n\t"
|
"pxor %%mm6, %%mm6 \n\t"
|
||||||
"psubw (%3), %%mm6 \n\t" // -bias[0]
|
"psubw (%3), %%mm6 \n\t" // -bias[0]
|
||||||
"mov $-128, %%"REG_a" \n\t"
|
"mov $-128, %%"REG_a" \n\t"
|
||||||
".balign 16 \n\t"
|
".balign 16 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"pxor %%mm1, %%mm1 \n\t" // 0
|
"pxor %%mm1, %%mm1 \n\t" // 0
|
||||||
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
|
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
|
||||||
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
|
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
|
||||||
"pxor %%mm1, %%mm0 \n\t"
|
"pxor %%mm1, %%mm0 \n\t"
|
||||||
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
|
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
|
||||||
"psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
|
"psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
|
||||||
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
|
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
|
||||||
"por %%mm0, %%mm4 \n\t"
|
"por %%mm0, %%mm4 \n\t"
|
||||||
"pxor %%mm1, %%mm0 \n\t"
|
"pxor %%mm1, %%mm0 \n\t"
|
||||||
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
|
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
|
||||||
"movq %%mm0, (%5, %%"REG_a") \n\t"
|
"movq %%mm0, (%5, %%"REG_a") \n\t"
|
||||||
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
|
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
|
||||||
"movq (%4, %%"REG_a"), %%mm1 \n\t"
|
"movq (%4, %%"REG_a"), %%mm1 \n\t"
|
||||||
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
|
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
|
||||||
"pandn %%mm1, %%mm0 \n\t"
|
"pandn %%mm1, %%mm0 \n\t"
|
||||||
PMAXW(%%mm0, %%mm3)
|
PMAXW(%%mm0, %%mm3)
|
||||||
"add $8, %%"REG_a" \n\t"
|
"add $8, %%"REG_a" \n\t"
|
||||||
" js 1b \n\t"
|
" js 1b \n\t"
|
||||||
PMAX(%%mm3, %%mm0)
|
PMAX(%%mm3, %%mm0)
|
||||||
"movd %%mm3, %%"REG_a" \n\t"
|
"movd %%mm3, %%"REG_a" \n\t"
|
||||||
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
|
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
|
||||||
: "+a" (last_non_zero_p1)
|
: "+a" (last_non_zero_p1)
|
||||||
: "r" (block+64), "r" (qmat), "r" (bias),
|
: "r" (block+64), "r" (qmat), "r" (bias),
|
||||||
"r" (inv_zigzag_direct16+64), "r" (temp_block+64)
|
"r" (inv_zigzag_direct16+64), "r" (temp_block+64)
|
||||||
);
|
);
|
||||||
// note the asm is split cuz gcc doesnt like that many operands ...
|
// note the asm is split cuz gcc doesnt like that many operands ...
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movd %1, %%mm1 \n\t" // max_qcoeff
|
"movd %1, %%mm1 \n\t" // max_qcoeff
|
||||||
SPREADW(%%mm1)
|
SPREADW(%%mm1)
|
||||||
"psubusw %%mm1, %%mm4 \n\t"
|
"psubusw %%mm1, %%mm4 \n\t"
|
||||||
"packuswb %%mm4, %%mm4 \n\t"
|
"packuswb %%mm4, %%mm4 \n\t"
|
||||||
"movd %%mm4, %0 \n\t" // *overflow
|
"movd %%mm4, %0 \n\t" // *overflow
|
||||||
: "=g" (*overflow)
|
: "=g" (*overflow)
|
||||||
: "g" (s->max_qcoeff)
|
: "g" (s->max_qcoeff)
|
||||||
);
|
);
|
||||||
}else{ // FMT_H263
|
}else{ // FMT_H263
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1
|
"movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1
|
||||||
SPREADW(%%mm3)
|
SPREADW(%%mm3)
|
||||||
"pxor %%mm7, %%mm7 \n\t" // 0
|
"pxor %%mm7, %%mm7 \n\t" // 0
|
||||||
"pxor %%mm4, %%mm4 \n\t" // 0
|
"pxor %%mm4, %%mm4 \n\t" // 0
|
||||||
"mov $-128, %%"REG_a" \n\t"
|
"mov $-128, %%"REG_a" \n\t"
|
||||||
".balign 16 \n\t"
|
".balign 16 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"pxor %%mm1, %%mm1 \n\t" // 0
|
"pxor %%mm1, %%mm1 \n\t" // 0
|
||||||
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
|
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
|
||||||
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
|
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
|
||||||
"pxor %%mm1, %%mm0 \n\t"
|
"pxor %%mm1, %%mm0 \n\t"
|
||||||
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
|
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
|
||||||
"movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0]
|
"movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0]
|
||||||
"paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
|
"paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
|
||||||
"movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i]
|
"movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i]
|
||||||
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
|
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
|
||||||
"por %%mm0, %%mm4 \n\t"
|
"por %%mm0, %%mm4 \n\t"
|
||||||
"pxor %%mm1, %%mm0 \n\t"
|
"pxor %%mm1, %%mm0 \n\t"
|
||||||
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
|
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
|
||||||
"movq %%mm0, (%5, %%"REG_a") \n\t"
|
"movq %%mm0, (%5, %%"REG_a") \n\t"
|
||||||
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
|
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
|
||||||
"movq (%4, %%"REG_a"), %%mm1 \n\t"
|
"movq (%4, %%"REG_a"), %%mm1 \n\t"
|
||||||
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
|
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
|
||||||
"pandn %%mm1, %%mm0 \n\t"
|
"pandn %%mm1, %%mm0 \n\t"
|
||||||
PMAXW(%%mm0, %%mm3)
|
PMAXW(%%mm0, %%mm3)
|
||||||
"add $8, %%"REG_a" \n\t"
|
"add $8, %%"REG_a" \n\t"
|
||||||
" js 1b \n\t"
|
" js 1b \n\t"
|
||||||
PMAX(%%mm3, %%mm0)
|
PMAX(%%mm3, %%mm0)
|
||||||
"movd %%mm3, %%"REG_a" \n\t"
|
"movd %%mm3, %%"REG_a" \n\t"
|
||||||
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
|
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
|
||||||
: "+a" (last_non_zero_p1)
|
: "+a" (last_non_zero_p1)
|
||||||
: "r" (block+64), "r" (qmat+64), "r" (bias+64),
|
: "r" (block+64), "r" (qmat+64), "r" (bias+64),
|
||||||
"r" (inv_zigzag_direct16+64), "r" (temp_block+64)
|
"r" (inv_zigzag_direct16+64), "r" (temp_block+64)
|
||||||
);
|
);
|
||||||
// note the asm is split cuz gcc doesnt like that many operands ...
|
// note the asm is split cuz gcc doesnt like that many operands ...
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movd %1, %%mm1 \n\t" // max_qcoeff
|
"movd %1, %%mm1 \n\t" // max_qcoeff
|
||||||
SPREADW(%%mm1)
|
SPREADW(%%mm1)
|
||||||
"psubusw %%mm1, %%mm4 \n\t"
|
"psubusw %%mm1, %%mm4 \n\t"
|
||||||
"packuswb %%mm4, %%mm4 \n\t"
|
"packuswb %%mm4, %%mm4 \n\t"
|
||||||
"movd %%mm4, %0 \n\t" // *overflow
|
"movd %%mm4, %0 \n\t" // *overflow
|
||||||
: "=g" (*overflow)
|
: "=g" (*overflow)
|
||||||
: "g" (s->max_qcoeff)
|
: "g" (s->max_qcoeff)
|
||||||
);
|
);
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -257,13 +257,13 @@ enum PixelFormat avcodec_get_pix_fmt(const char* name)
|
|||||||
|
|
||||||
for (i=0; i < PIX_FMT_NB; i++)
|
for (i=0; i < PIX_FMT_NB; i++)
|
||||||
if (!strcmp(pix_fmt_info[i].name, name))
|
if (!strcmp(pix_fmt_info[i].name, name))
|
||||||
break;
|
break;
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Picture field are filled with 'ptr' addresses. Also return size */
|
/* Picture field are filled with 'ptr' addresses. Also return size */
|
||||||
int avpicture_fill(AVPicture *picture, uint8_t *ptr,
|
int avpicture_fill(AVPicture *picture, uint8_t *ptr,
|
||||||
int pix_fmt, int width, int height)
|
int pix_fmt, int width, int height)
|
||||||
{
|
{
|
||||||
int size, w2, h2, size2;
|
int size, w2, h2, size2;
|
||||||
PixFmtInfo *pinfo;
|
PixFmtInfo *pinfo;
|
||||||
@ -373,36 +373,36 @@ int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height,
|
|||||||
pix_fmt == PIX_FMT_RGB565 ||
|
pix_fmt == PIX_FMT_RGB565 ||
|
||||||
pix_fmt == PIX_FMT_RGB555)
|
pix_fmt == PIX_FMT_RGB555)
|
||||||
w = width * 2;
|
w = width * 2;
|
||||||
else if (pix_fmt == PIX_FMT_UYVY411)
|
else if (pix_fmt == PIX_FMT_UYVY411)
|
||||||
w = width + width/2;
|
w = width + width/2;
|
||||||
else if (pix_fmt == PIX_FMT_PAL8)
|
else if (pix_fmt == PIX_FMT_PAL8)
|
||||||
w = width;
|
w = width;
|
||||||
else
|
else
|
||||||
w = width * (pf->depth * pf->nb_channels / 8);
|
w = width * (pf->depth * pf->nb_channels / 8);
|
||||||
|
|
||||||
data_planes = 1;
|
data_planes = 1;
|
||||||
h = height;
|
h = height;
|
||||||
} else {
|
} else {
|
||||||
data_planes = pf->nb_channels;
|
data_planes = pf->nb_channels;
|
||||||
w = (width*pf->depth + 7)/8;
|
w = (width*pf->depth + 7)/8;
|
||||||
h = height;
|
h = height;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i=0; i<data_planes; i++) {
|
for (i=0; i<data_planes; i++) {
|
||||||
if (i == 1) {
|
if (i == 1) {
|
||||||
w = width >> pf->x_chroma_shift;
|
w = width >> pf->x_chroma_shift;
|
||||||
h = height >> pf->y_chroma_shift;
|
h = height >> pf->y_chroma_shift;
|
||||||
}
|
}
|
||||||
s = src->data[i];
|
s = src->data[i];
|
||||||
for(j=0; j<h; j++) {
|
for(j=0; j<h; j++) {
|
||||||
memcpy(dest, s, w);
|
memcpy(dest, s, w);
|
||||||
dest += w;
|
dest += w;
|
||||||
s += src->linesize[i];
|
s += src->linesize[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pf->pixel_type == FF_PIXEL_PALETTE)
|
if (pf->pixel_type == FF_PIXEL_PALETTE)
|
||||||
memcpy((unsigned char *)(((size_t)dest + 3) & ~3), src->data[1], 256 * 4);
|
memcpy((unsigned char *)(((size_t)dest + 3) & ~3), src->data[1], 256 * 4);
|
||||||
|
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
@ -486,9 +486,9 @@ static int avg_bits_per_pixel(int pix_fmt)
|
|||||||
case PIX_FMT_RGB555:
|
case PIX_FMT_RGB555:
|
||||||
bits = 16;
|
bits = 16;
|
||||||
break;
|
break;
|
||||||
case PIX_FMT_UYVY411:
|
case PIX_FMT_UYVY411:
|
||||||
bits = 12;
|
bits = 12;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
bits = pf->depth * pf->nb_channels;
|
bits = pf->depth * pf->nb_channels;
|
||||||
break;
|
break;
|
||||||
@ -604,9 +604,9 @@ void img_copy(AVPicture *dst, const AVPicture *src,
|
|||||||
case PIX_FMT_RGB555:
|
case PIX_FMT_RGB555:
|
||||||
bits = 16;
|
bits = 16;
|
||||||
break;
|
break;
|
||||||
case PIX_FMT_UYVY411:
|
case PIX_FMT_UYVY411:
|
||||||
bits = 12;
|
bits = 12;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
bits = pf->depth * pf->nb_channels;
|
bits = pf->depth * pf->nb_channels;
|
||||||
break;
|
break;
|
||||||
@ -910,11 +910,11 @@ static void uyvy411_to_yuv411p(AVPicture *dst, const AVPicture *src,
|
|||||||
cr = cr1;
|
cr = cr1;
|
||||||
for(w = width; w >= 4; w -= 4) {
|
for(w = width; w >= 4; w -= 4) {
|
||||||
cb[0] = p[0];
|
cb[0] = p[0];
|
||||||
lum[0] = p[1];
|
lum[0] = p[1];
|
||||||
lum[1] = p[2];
|
lum[1] = p[2];
|
||||||
cr[0] = p[3];
|
cr[0] = p[3];
|
||||||
lum[2] = p[4];
|
lum[2] = p[4];
|
||||||
lum[3] = p[5];
|
lum[3] = p[5];
|
||||||
p += 6;
|
p += 6;
|
||||||
lum += 4;
|
lum += 4;
|
||||||
cb++;
|
cb++;
|
||||||
@ -996,7 +996,7 @@ static void yuv420p_to_uyvy422(AVPicture *dst, const AVPicture *src,
|
|||||||
|
|
||||||
#define SCALEBITS 10
|
#define SCALEBITS 10
|
||||||
#define ONE_HALF (1 << (SCALEBITS - 1))
|
#define ONE_HALF (1 << (SCALEBITS - 1))
|
||||||
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
|
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
|
||||||
|
|
||||||
#define YUV_TO_RGB1_CCIR(cb1, cr1)\
|
#define YUV_TO_RGB1_CCIR(cb1, cr1)\
|
||||||
{\
|
{\
|
||||||
@ -1046,7 +1046,7 @@ static void yuv420p_to_uyvy422(AVPicture *dst, const AVPicture *src,
|
|||||||
static inline int C_JPEG_TO_CCIR(int y) {
|
static inline int C_JPEG_TO_CCIR(int y) {
|
||||||
y = (((y - 128) * FIX(112.0/127.0) + (ONE_HALF + (128 << SCALEBITS))) >> SCALEBITS);
|
y = (((y - 128) * FIX(112.0/127.0) + (ONE_HALF + (128 << SCALEBITS))) >> SCALEBITS);
|
||||||
if (y < 16)
|
if (y < 16)
|
||||||
y = 16;
|
y = 16;
|
||||||
return y;
|
return y;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1681,7 +1681,7 @@ static void gray_to_monoblack(AVPicture *dst, const AVPicture *src,
|
|||||||
|
|
||||||
typedef struct ConvertEntry {
|
typedef struct ConvertEntry {
|
||||||
void (*convert)(AVPicture *dst,
|
void (*convert)(AVPicture *dst,
|
||||||
const AVPicture *src, int width, int height);
|
const AVPicture *src, int width, int height);
|
||||||
} ConvertEntry;
|
} ConvertEntry;
|
||||||
|
|
||||||
/* Add each new convertion function in this table. In order to be able
|
/* Add each new convertion function in this table. In order to be able
|
||||||
@ -1721,7 +1721,7 @@ static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = {
|
|||||||
[PIX_FMT_RGBA32] = {
|
[PIX_FMT_RGBA32] = {
|
||||||
.convert = yuv420p_to_rgba32
|
.convert = yuv420p_to_rgba32
|
||||||
},
|
},
|
||||||
[PIX_FMT_UYVY422] = {
|
[PIX_FMT_UYVY422] = {
|
||||||
.convert = yuv420p_to_uyvy422,
|
.convert = yuv420p_to_uyvy422,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -2224,7 +2224,7 @@ static int get_alpha_info_pal8(const AVPicture *src, int width, int height)
|
|||||||
* @return ored mask of FF_ALPHA_xxx constants
|
* @return ored mask of FF_ALPHA_xxx constants
|
||||||
*/
|
*/
|
||||||
int img_get_alpha_info(const AVPicture *src,
|
int img_get_alpha_info(const AVPicture *src,
|
||||||
int pix_fmt, int width, int height)
|
int pix_fmt, int width, int height)
|
||||||
{
|
{
|
||||||
PixFmtInfo *pf = &pix_fmt_info[pix_fmt];
|
PixFmtInfo *pf = &pix_fmt_info[pix_fmt];
|
||||||
int ret;
|
int ret;
|
||||||
@ -2300,10 +2300,10 @@ int img_get_alpha_info(const AVPicture *src,
|
|||||||
|
|
||||||
/* filter parameters: [-1 4 2 4 -1] // 8 */
|
/* filter parameters: [-1 4 2 4 -1] // 8 */
|
||||||
static void deinterlace_line(uint8_t *dst,
|
static void deinterlace_line(uint8_t *dst,
|
||||||
const uint8_t *lum_m4, const uint8_t *lum_m3,
|
const uint8_t *lum_m4, const uint8_t *lum_m3,
|
||||||
const uint8_t *lum_m2, const uint8_t *lum_m1,
|
const uint8_t *lum_m2, const uint8_t *lum_m1,
|
||||||
const uint8_t *lum,
|
const uint8_t *lum,
|
||||||
int size)
|
int size)
|
||||||
{
|
{
|
||||||
#ifndef HAVE_MMX
|
#ifndef HAVE_MMX
|
||||||
uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
||||||
@ -2421,7 +2421,7 @@ static void deinterlace_bottom_field(uint8_t *dst, int dst_wrap,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void deinterlace_bottom_field_inplace(uint8_t *src1, int src_wrap,
|
static void deinterlace_bottom_field_inplace(uint8_t *src1, int src_wrap,
|
||||||
int width, int height)
|
int width, int height)
|
||||||
{
|
{
|
||||||
uint8_t *src_m1, *src_0, *src_p1, *src_p2;
|
uint8_t *src_m1, *src_0, *src_p1, *src_p2;
|
||||||
int y;
|
int y;
|
||||||
@ -2455,7 +2455,7 @@ int avpicture_deinterlace(AVPicture *dst, const AVPicture *src,
|
|||||||
if (pix_fmt != PIX_FMT_YUV420P &&
|
if (pix_fmt != PIX_FMT_YUV420P &&
|
||||||
pix_fmt != PIX_FMT_YUV422P &&
|
pix_fmt != PIX_FMT_YUV422P &&
|
||||||
pix_fmt != PIX_FMT_YUV444P &&
|
pix_fmt != PIX_FMT_YUV444P &&
|
||||||
pix_fmt != PIX_FMT_YUV411P)
|
pix_fmt != PIX_FMT_YUV411P)
|
||||||
return -1;
|
return -1;
|
||||||
if ((width & 3) != 0 || (height & 3) != 0)
|
if ((width & 3) != 0 || (height & 3) != 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -821,7 +821,7 @@ static void glue(RGB_NAME, _to_pal8)(AVPicture *dst, const AVPicture *src,
|
|||||||
#ifdef RGBA_IN
|
#ifdef RGBA_IN
|
||||||
|
|
||||||
static int glue(get_alpha_info_, RGB_NAME)(const AVPicture *src,
|
static int glue(get_alpha_info_, RGB_NAME)(const AVPicture *src,
|
||||||
int width, int height)
|
int width, int height)
|
||||||
{
|
{
|
||||||
const unsigned char *p;
|
const unsigned char *p;
|
||||||
int src_wrap, ret, x, y;
|
int src_wrap, ret, x, y;
|
||||||
|
@ -64,8 +64,8 @@ static inline int get_phase(int pos)
|
|||||||
|
|
||||||
/* This function must be optimized */
|
/* This function must be optimized */
|
||||||
static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
|
static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
|
||||||
int src_width, int src_start, int src_incr,
|
int src_width, int src_start, int src_incr,
|
||||||
int16_t *filters)
|
int16_t *filters)
|
||||||
{
|
{
|
||||||
int src_pos, phase, sum, i;
|
int src_pos, phase, sum, i;
|
||||||
const uint8_t *s;
|
const uint8_t *s;
|
||||||
@ -108,7 +108,7 @@ static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
|
|||||||
|
|
||||||
/* This function must be optimized */
|
/* This function must be optimized */
|
||||||
static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
|
static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
|
||||||
int wrap, int16_t *filter)
|
int wrap, int16_t *filter)
|
||||||
{
|
{
|
||||||
int sum, i;
|
int sum, i;
|
||||||
const uint8_t *s;
|
const uint8_t *s;
|
||||||
@ -167,7 +167,7 @@ static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
|
|||||||
|
|
||||||
/* XXX: do four pixels at a time */
|
/* XXX: do four pixels at a time */
|
||||||
static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
|
static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
|
||||||
const uint8_t *src, int src_width,
|
const uint8_t *src, int src_width,
|
||||||
int src_start, int src_incr, int16_t *filters)
|
int src_start, int src_incr, int16_t *filters)
|
||||||
{
|
{
|
||||||
int src_pos, phase;
|
int src_pos, phase;
|
||||||
@ -212,7 +212,7 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
|
static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
|
||||||
int wrap, int16_t *filter)
|
int wrap, int16_t *filter)
|
||||||
{
|
{
|
||||||
int sum, i, v;
|
int sum, i, v;
|
||||||
const uint8_t *s;
|
const uint8_t *s;
|
||||||
@ -277,18 +277,18 @@ static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_ALTIVEC
|
#ifdef HAVE_ALTIVEC
|
||||||
typedef union {
|
typedef union {
|
||||||
vector unsigned char v;
|
vector unsigned char v;
|
||||||
unsigned char c[16];
|
unsigned char c[16];
|
||||||
} vec_uc_t;
|
} vec_uc_t;
|
||||||
|
|
||||||
typedef union {
|
typedef union {
|
||||||
vector signed short v;
|
vector signed short v;
|
||||||
signed short s[8];
|
signed short s[8];
|
||||||
} vec_ss_t;
|
} vec_ss_t;
|
||||||
|
|
||||||
void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
|
void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
|
||||||
int wrap, int16_t *filter)
|
int wrap, int16_t *filter)
|
||||||
{
|
{
|
||||||
int sum, i;
|
int sum, i;
|
||||||
const uint8_t *s;
|
const uint8_t *s;
|
||||||
@ -405,7 +405,7 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
|
|||||||
|
|
||||||
/* slow version to handle limit cases. Does not need optimisation */
|
/* slow version to handle limit cases. Does not need optimisation */
|
||||||
static void h_resample_slow(uint8_t *dst, int dst_width,
|
static void h_resample_slow(uint8_t *dst, int dst_width,
|
||||||
const uint8_t *src, int src_width,
|
const uint8_t *src, int src_width,
|
||||||
int src_start, int src_incr, int16_t *filters)
|
int src_start, int src_incr, int16_t *filters)
|
||||||
{
|
{
|
||||||
int src_pos, phase, sum, j, v, i;
|
int src_pos, phase, sum, j, v, i;
|
||||||
@ -441,8 +441,8 @@ static void h_resample_slow(uint8_t *dst, int dst_width,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
|
static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
|
||||||
int src_width, int src_start, int src_incr,
|
int src_width, int src_start, int src_incr,
|
||||||
int16_t *filters)
|
int16_t *filters)
|
||||||
{
|
{
|
||||||
int n, src_end;
|
int n, src_end;
|
||||||
|
|
||||||
@ -559,7 +559,7 @@ ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
|
|||||||
ImgReSampleContext *s;
|
ImgReSampleContext *s;
|
||||||
|
|
||||||
if (!owidth || !oheight || !iwidth || !iheight)
|
if (!owidth || !oheight || !iwidth || !iheight)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
s = av_mallocz(sizeof(ImgReSampleContext));
|
s = av_mallocz(sizeof(ImgReSampleContext));
|
||||||
if (!s)
|
if (!s)
|
||||||
|
@ -70,13 +70,13 @@ static void build_modpred(Indeo3DecodeContext *s)
|
|||||||
for (i=0; i < 128; ++i) {
|
for (i=0; i < 128; ++i) {
|
||||||
s->ModPred[i+0*128] = (i > 126) ? 254 : 2*((i + 1) - ((i + 1) % 2));
|
s->ModPred[i+0*128] = (i > 126) ? 254 : 2*((i + 1) - ((i + 1) % 2));
|
||||||
s->ModPred[i+1*128] = (i == 7) ? 20 : ((i == 119 || i == 120)
|
s->ModPred[i+1*128] = (i == 7) ? 20 : ((i == 119 || i == 120)
|
||||||
? 236 : 2*((i + 2) - ((i + 1) % 3)));
|
? 236 : 2*((i + 2) - ((i + 1) % 3)));
|
||||||
s->ModPred[i+2*128] = (i > 125) ? 248 : 2*((i + 2) - ((i + 2) % 4));
|
s->ModPred[i+2*128] = (i > 125) ? 248 : 2*((i + 2) - ((i + 2) % 4));
|
||||||
s->ModPred[i+3*128] = 2*((i + 1) - ((i - 3) % 5));
|
s->ModPred[i+3*128] = 2*((i + 1) - ((i - 3) % 5));
|
||||||
s->ModPred[i+4*128] = (i == 8) ? 20 : 2*((i + 1) - ((i - 3) % 6));
|
s->ModPred[i+4*128] = (i == 8) ? 20 : 2*((i + 1) - ((i - 3) % 6));
|
||||||
s->ModPred[i+5*128] = 2*((i + 4) - ((i + 3) % 7));
|
s->ModPred[i+5*128] = 2*((i + 4) - ((i + 3) % 7));
|
||||||
s->ModPred[i+6*128] = (i > 123) ? 240 : 2*((i + 4) - ((i + 4) % 8));
|
s->ModPred[i+6*128] = (i > 123) ? 240 : 2*((i + 4) - ((i + 4) % 8));
|
||||||
s->ModPred[i+7*128] = 2*((i + 5) - ((i + 4) % 9));
|
s->ModPred[i+7*128] = 2*((i + 5) - ((i + 4) % 9));
|
||||||
}
|
}
|
||||||
|
|
||||||
s->corrector_type = (unsigned short *) av_malloc (24 * 256 * sizeof(unsigned short));
|
s->corrector_type = (unsigned short *) av_malloc (24 * 256 * sizeof(unsigned short));
|
||||||
@ -84,8 +84,8 @@ static void build_modpred(Indeo3DecodeContext *s)
|
|||||||
for (i=0; i < 24; ++i) {
|
for (i=0; i < 24; ++i) {
|
||||||
for (j=0; j < 256; ++j) {
|
for (j=0; j < 256; ++j) {
|
||||||
s->corrector_type[i*256+j] = (j < corrector_type_0[i])
|
s->corrector_type[i*256+j] = (j < corrector_type_0[i])
|
||||||
? 1 : ((j < 248 || (i == 16 && j == 248))
|
? 1 : ((j < 248 || (i == 16 && j == 248))
|
||||||
? 0 : corrector_type_2[j - 248]);
|
? 0 : corrector_type_2[j - 248]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -83,10 +83,10 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#if CONST_BITS == 8
|
#if CONST_BITS == 8
|
||||||
#define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */
|
#define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */
|
||||||
#define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */
|
#define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */
|
||||||
#define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */
|
#define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */
|
||||||
#define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */
|
#define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */
|
||||||
#else
|
#else
|
||||||
#define FIX_0_382683433 FIX(0.382683433)
|
#define FIX_0_382683433 FIX(0.382683433)
|
||||||
#define FIX_0_541196100 FIX(0.541196100)
|
#define FIX_0_541196100 FIX(0.541196100)
|
||||||
@ -135,7 +135,7 @@ static always_inline void row_fdct(DCTELEM * data){
|
|||||||
|
|
||||||
/* Even part */
|
/* Even part */
|
||||||
|
|
||||||
tmp10 = tmp0 + tmp3; /* phase 2 */
|
tmp10 = tmp0 + tmp3; /* phase 2 */
|
||||||
tmp13 = tmp0 - tmp3;
|
tmp13 = tmp0 - tmp3;
|
||||||
tmp11 = tmp1 + tmp2;
|
tmp11 = tmp1 + tmp2;
|
||||||
tmp12 = tmp1 - tmp2;
|
tmp12 = tmp1 - tmp2;
|
||||||
@ -144,30 +144,30 @@ static always_inline void row_fdct(DCTELEM * data){
|
|||||||
dataptr[4] = tmp10 - tmp11;
|
dataptr[4] = tmp10 - tmp11;
|
||||||
|
|
||||||
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
|
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
|
||||||
dataptr[2] = tmp13 + z1; /* phase 5 */
|
dataptr[2] = tmp13 + z1; /* phase 5 */
|
||||||
dataptr[6] = tmp13 - z1;
|
dataptr[6] = tmp13 - z1;
|
||||||
|
|
||||||
/* Odd part */
|
/* Odd part */
|
||||||
|
|
||||||
tmp10 = tmp4 + tmp5; /* phase 2 */
|
tmp10 = tmp4 + tmp5; /* phase 2 */
|
||||||
tmp11 = tmp5 + tmp6;
|
tmp11 = tmp5 + tmp6;
|
||||||
tmp12 = tmp6 + tmp7;
|
tmp12 = tmp6 + tmp7;
|
||||||
|
|
||||||
/* The rotator is modified from fig 4-8 to avoid extra negations. */
|
/* The rotator is modified from fig 4-8 to avoid extra negations. */
|
||||||
z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
|
z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
|
||||||
z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
|
z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
|
||||||
z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
|
z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
|
||||||
z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
|
z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
|
||||||
|
|
||||||
z11 = tmp7 + z3; /* phase 5 */
|
z11 = tmp7 + z3; /* phase 5 */
|
||||||
z13 = tmp7 - z3;
|
z13 = tmp7 - z3;
|
||||||
|
|
||||||
dataptr[5] = z13 + z2; /* phase 6 */
|
dataptr[5] = z13 + z2; /* phase 6 */
|
||||||
dataptr[3] = z13 - z2;
|
dataptr[3] = z13 - z2;
|
||||||
dataptr[1] = z11 + z4;
|
dataptr[1] = z11 + z4;
|
||||||
dataptr[7] = z11 - z4;
|
dataptr[7] = z11 - z4;
|
||||||
|
|
||||||
dataptr += DCTSIZE; /* advance pointer to next row */
|
dataptr += DCTSIZE; /* advance pointer to next row */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -202,7 +202,7 @@ fdct_ifast (DCTELEM * data)
|
|||||||
|
|
||||||
/* Even part */
|
/* Even part */
|
||||||
|
|
||||||
tmp10 = tmp0 + tmp3; /* phase 2 */
|
tmp10 = tmp0 + tmp3; /* phase 2 */
|
||||||
tmp13 = tmp0 - tmp3;
|
tmp13 = tmp0 - tmp3;
|
||||||
tmp11 = tmp1 + tmp2;
|
tmp11 = tmp1 + tmp2;
|
||||||
tmp12 = tmp1 - tmp2;
|
tmp12 = tmp1 - tmp2;
|
||||||
@ -216,7 +216,7 @@ fdct_ifast (DCTELEM * data)
|
|||||||
|
|
||||||
/* Odd part */
|
/* Odd part */
|
||||||
|
|
||||||
tmp10 = tmp4 + tmp5; /* phase 2 */
|
tmp10 = tmp4 + tmp5; /* phase 2 */
|
||||||
tmp11 = tmp5 + tmp6;
|
tmp11 = tmp5 + tmp6;
|
||||||
tmp12 = tmp6 + tmp7;
|
tmp12 = tmp6 + tmp7;
|
||||||
|
|
||||||
@ -226,7 +226,7 @@ fdct_ifast (DCTELEM * data)
|
|||||||
z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
|
z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
|
||||||
z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
|
z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
|
||||||
|
|
||||||
z11 = tmp7 + z3; /* phase 5 */
|
z11 = tmp7 + z3; /* phase 5 */
|
||||||
z13 = tmp7 - z3;
|
z13 = tmp7 - z3;
|
||||||
|
|
||||||
dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
|
dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
|
||||||
@ -234,7 +234,7 @@ fdct_ifast (DCTELEM * data)
|
|||||||
dataptr[DCTSIZE*1] = z11 + z4;
|
dataptr[DCTSIZE*1] = z11 + z4;
|
||||||
dataptr[DCTSIZE*7] = z11 - z4;
|
dataptr[DCTSIZE*7] = z11 - z4;
|
||||||
|
|
||||||
dataptr++; /* advance pointer to next column */
|
dataptr++; /* advance pointer to next column */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -293,7 +293,7 @@ fdct_ifast248 (DCTELEM * data)
|
|||||||
dataptr[DCTSIZE*3] = tmp13 + z1;
|
dataptr[DCTSIZE*3] = tmp13 + z1;
|
||||||
dataptr[DCTSIZE*7] = tmp13 - z1;
|
dataptr[DCTSIZE*7] = tmp13 - z1;
|
||||||
|
|
||||||
dataptr++; /* advance pointer to next column */
|
dataptr++; /* advance pointer to next column */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -92,10 +92,10 @@
|
|||||||
|
|
||||||
#if BITS_IN_JSAMPLE == 8
|
#if BITS_IN_JSAMPLE == 8
|
||||||
#define CONST_BITS 13
|
#define CONST_BITS 13
|
||||||
#define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */
|
#define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */
|
||||||
#else
|
#else
|
||||||
#define CONST_BITS 13
|
#define CONST_BITS 13
|
||||||
#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
|
#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
|
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
|
||||||
@ -106,18 +106,18 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#if CONST_BITS == 13
|
#if CONST_BITS == 13
|
||||||
#define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */
|
#define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */
|
||||||
#define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */
|
#define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */
|
||||||
#define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */
|
#define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */
|
||||||
#define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */
|
#define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */
|
||||||
#define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */
|
#define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */
|
||||||
#define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */
|
#define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */
|
||||||
#define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */
|
#define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */
|
||||||
#define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */
|
#define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */
|
||||||
#define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */
|
#define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */
|
||||||
#define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */
|
#define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */
|
||||||
#define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */
|
#define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */
|
||||||
#define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */
|
#define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */
|
||||||
#else
|
#else
|
||||||
#define FIX_0_298631336 FIX(0.298631336)
|
#define FIX_0_298631336 FIX(0.298631336)
|
||||||
#define FIX_0_390180644 FIX(0.390180644)
|
#define FIX_0_390180644 FIX(0.390180644)
|
||||||
@ -185,9 +185,9 @@ static always_inline void row_fdct(DCTELEM * data){
|
|||||||
|
|
||||||
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
|
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
|
||||||
dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
||||||
CONST_BITS-PASS1_BITS);
|
CONST_BITS-PASS1_BITS);
|
||||||
dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
||||||
CONST_BITS-PASS1_BITS);
|
CONST_BITS-PASS1_BITS);
|
||||||
|
|
||||||
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
|
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
|
||||||
* cK represents cos(K*pi/16).
|
* cK represents cos(K*pi/16).
|
||||||
@ -217,7 +217,7 @@ static always_inline void row_fdct(DCTELEM * data){
|
|||||||
dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
|
dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
|
||||||
dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
|
dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
|
||||||
|
|
||||||
dataptr += DCTSIZE; /* advance pointer to next row */
|
dataptr += DCTSIZE; /* advance pointer to next row */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -267,9 +267,9 @@ ff_jpeg_fdct_islow (DCTELEM * data)
|
|||||||
|
|
||||||
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
|
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
|
||||||
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
||||||
CONST_BITS+PASS1_BITS);
|
CONST_BITS+PASS1_BITS);
|
||||||
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
||||||
CONST_BITS+PASS1_BITS);
|
CONST_BITS+PASS1_BITS);
|
||||||
|
|
||||||
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
|
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
|
||||||
* cK represents cos(K*pi/16).
|
* cK represents cos(K*pi/16).
|
||||||
@ -295,15 +295,15 @@ ff_jpeg_fdct_islow (DCTELEM * data)
|
|||||||
z4 += z5;
|
z4 += z5;
|
||||||
|
|
||||||
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,
|
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,
|
||||||
CONST_BITS+PASS1_BITS);
|
CONST_BITS+PASS1_BITS);
|
||||||
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,
|
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,
|
||||||
CONST_BITS+PASS1_BITS);
|
CONST_BITS+PASS1_BITS);
|
||||||
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,
|
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,
|
||||||
CONST_BITS+PASS1_BITS);
|
CONST_BITS+PASS1_BITS);
|
||||||
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,
|
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,
|
||||||
CONST_BITS+PASS1_BITS);
|
CONST_BITS+PASS1_BITS);
|
||||||
|
|
||||||
dataptr++; /* advance pointer to next column */
|
dataptr++; /* advance pointer to next column */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -350,9 +350,9 @@ ff_fdct248_islow (DCTELEM * data)
|
|||||||
|
|
||||||
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
|
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
|
||||||
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
||||||
CONST_BITS+PASS1_BITS);
|
CONST_BITS+PASS1_BITS);
|
||||||
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
||||||
CONST_BITS+PASS1_BITS);
|
CONST_BITS+PASS1_BITS);
|
||||||
|
|
||||||
tmp10 = tmp4 + tmp7;
|
tmp10 = tmp4 + tmp7;
|
||||||
tmp11 = tmp5 + tmp6;
|
tmp11 = tmp5 + tmp6;
|
||||||
@ -364,10 +364,10 @@ ff_fdct248_islow (DCTELEM * data)
|
|||||||
|
|
||||||
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
|
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
|
||||||
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
||||||
CONST_BITS+PASS1_BITS);
|
CONST_BITS+PASS1_BITS);
|
||||||
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
||||||
CONST_BITS+PASS1_BITS);
|
CONST_BITS+PASS1_BITS);
|
||||||
|
|
||||||
dataptr++; /* advance pointer to next column */
|
dataptr++; /* advance pointer to next column */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
1270
libavcodec/jrevdct.c
1270
libavcodec/jrevdct.c
File diff suppressed because it is too large
Load Diff
@ -81,8 +81,8 @@
|
|||||||
*/
|
*/
|
||||||
typedef struct LclContext {
|
typedef struct LclContext {
|
||||||
|
|
||||||
AVCodecContext *avctx;
|
AVCodecContext *avctx;
|
||||||
AVFrame pic;
|
AVFrame pic;
|
||||||
PutBitContext pb;
|
PutBitContext pb;
|
||||||
|
|
||||||
// Image type
|
// Image type
|
||||||
@ -198,8 +198,8 @@ static unsigned int mszh_decomp(unsigned char * srcptr, int srclen, unsigned cha
|
|||||||
*/
|
*/
|
||||||
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
|
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
|
||||||
{
|
{
|
||||||
LclContext * const c = (LclContext *)avctx->priv_data;
|
LclContext * const c = (LclContext *)avctx->priv_data;
|
||||||
unsigned char *encoded = (unsigned char *)buf;
|
unsigned char *encoded = (unsigned char *)buf;
|
||||||
unsigned int pixel_ptr;
|
unsigned int pixel_ptr;
|
||||||
int row, col;
|
int row, col;
|
||||||
unsigned char *outptr;
|
unsigned char *outptr;
|
||||||
@ -214,15 +214,15 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
|
|||||||
#endif
|
#endif
|
||||||
unsigned int len = buf_size;
|
unsigned int len = buf_size;
|
||||||
|
|
||||||
if(c->pic.data[0])
|
if(c->pic.data[0])
|
||||||
avctx->release_buffer(avctx, &c->pic);
|
avctx->release_buffer(avctx, &c->pic);
|
||||||
|
|
||||||
c->pic.reference = 0;
|
c->pic.reference = 0;
|
||||||
c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
|
c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
|
||||||
if(avctx->get_buffer(avctx, &c->pic) < 0){
|
if(avctx->get_buffer(avctx, &c->pic) < 0){
|
||||||
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
|
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
outptr = c->pic.data[0]; // Output image pointer
|
outptr = c->pic.data[0]; // Output image pointer
|
||||||
|
|
||||||
@ -358,7 +358,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
|
|||||||
pixel_ptr = row * width * 3;
|
pixel_ptr = row * width * 3;
|
||||||
yq = encoded[pixel_ptr++];
|
yq = encoded[pixel_ptr++];
|
||||||
uqvq = encoded[pixel_ptr++];
|
uqvq = encoded[pixel_ptr++];
|
||||||
uqvq+=(encoded[pixel_ptr++] << 8);
|
uqvq+=(encoded[pixel_ptr++] << 8);
|
||||||
for (col = 1; col < width; col++) {
|
for (col = 1; col < width; col++) {
|
||||||
encoded[pixel_ptr] = yq -= encoded[pixel_ptr];
|
encoded[pixel_ptr] = yq -= encoded[pixel_ptr];
|
||||||
uqvq -= (encoded[pixel_ptr+1] | (encoded[pixel_ptr+2]<<8));
|
uqvq -= (encoded[pixel_ptr+1] | (encoded[pixel_ptr+2]<<8));
|
||||||
@ -588,8 +588,8 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
|
|||||||
c->zstream.avail_in = avctx->width*3;
|
c->zstream.avail_in = avctx->width*3;
|
||||||
zret = deflate(&(c->zstream), Z_NO_FLUSH);
|
zret = deflate(&(c->zstream), Z_NO_FLUSH);
|
||||||
if (zret != Z_OK) {
|
if (zret != Z_OK) {
|
||||||
av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret);
|
av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
zret = deflate(&(c->zstream), Z_FINISH);
|
zret = deflate(&(c->zstream), Z_FINISH);
|
||||||
@ -714,7 +714,7 @@ static int decode_init(AVCodecContext *avctx)
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
if ((c->compression < Z_NO_COMPRESSION) || (c->compression > Z_BEST_COMPRESSION)) {
|
if ((c->compression < Z_NO_COMPRESSION) || (c->compression > Z_BEST_COMPRESSION)) {
|
||||||
av_log(avctx, AV_LOG_ERROR, "Unsupported compression level for ZLIB: (%d).\n", c->compression);
|
av_log(avctx, AV_LOG_ERROR, "Unsupported compression level for ZLIB: (%d).\n", c->compression);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
av_log(avctx, AV_LOG_INFO, "Compression level for ZLIB: (%d).\n", c->compression);
|
av_log(avctx, AV_LOG_INFO, "Compression level for ZLIB: (%d).\n", c->compression);
|
||||||
@ -851,15 +851,15 @@ static int encode_init(AVCodecContext *avctx)
|
|||||||
*/
|
*/
|
||||||
static int decode_end(AVCodecContext *avctx)
|
static int decode_end(AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
LclContext * const c = (LclContext *)avctx->priv_data;
|
LclContext * const c = (LclContext *)avctx->priv_data;
|
||||||
|
|
||||||
if (c->pic.data[0])
|
if (c->pic.data[0])
|
||||||
avctx->release_buffer(avctx, &c->pic);
|
avctx->release_buffer(avctx, &c->pic);
|
||||||
#ifdef CONFIG_ZLIB
|
#ifdef CONFIG_ZLIB
|
||||||
inflateEnd(&(c->zstream));
|
inflateEnd(&(c->zstream));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -883,28 +883,28 @@ static int encode_end(AVCodecContext *avctx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
AVCodec mszh_decoder = {
|
AVCodec mszh_decoder = {
|
||||||
"mszh",
|
"mszh",
|
||||||
CODEC_TYPE_VIDEO,
|
CODEC_TYPE_VIDEO,
|
||||||
CODEC_ID_MSZH,
|
CODEC_ID_MSZH,
|
||||||
sizeof(LclContext),
|
sizeof(LclContext),
|
||||||
decode_init,
|
decode_init,
|
||||||
NULL,
|
NULL,
|
||||||
decode_end,
|
decode_end,
|
||||||
decode_frame,
|
decode_frame,
|
||||||
CODEC_CAP_DR1,
|
CODEC_CAP_DR1,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
AVCodec zlib_decoder = {
|
AVCodec zlib_decoder = {
|
||||||
"zlib",
|
"zlib",
|
||||||
CODEC_TYPE_VIDEO,
|
CODEC_TYPE_VIDEO,
|
||||||
CODEC_ID_ZLIB,
|
CODEC_ID_ZLIB,
|
||||||
sizeof(LclContext),
|
sizeof(LclContext),
|
||||||
decode_init,
|
decode_init,
|
||||||
NULL,
|
NULL,
|
||||||
decode_end,
|
decode_end,
|
||||||
decode_frame,
|
decode_frame,
|
||||||
CODEC_CAP_DR1,
|
CODEC_CAP_DR1,
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef CONFIG_ENCODERS
|
#ifdef CONFIG_ENCODERS
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -42,7 +42,7 @@ void pp_postprocess(uint8_t * src[3], int srcStride[3],
|
|||||||
uint8_t * dst[3], int dstStride[3],
|
uint8_t * dst[3], int dstStride[3],
|
||||||
int horizontalSize, int verticalSize,
|
int horizontalSize, int verticalSize,
|
||||||
QP_STORE_T *QP_store, int QP_stride,
|
QP_STORE_T *QP_store, int QP_stride,
|
||||||
pp_mode_t *mode, pp_context_t *ppContext, int pict_type);
|
pp_mode_t *mode, pp_context_t *ppContext, int pict_type);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -26,35 +26,35 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define ALTIVEC_TRANSPOSE_8x8_SHORT(src_a,src_b,src_c,src_d,src_e,src_f,src_g,src_h) \
|
#define ALTIVEC_TRANSPOSE_8x8_SHORT(src_a,src_b,src_c,src_d,src_e,src_f,src_g,src_h) \
|
||||||
do { \
|
do { \
|
||||||
__typeof__(src_a) tempA1, tempB1, tempC1, tempD1; \
|
__typeof__(src_a) tempA1, tempB1, tempC1, tempD1; \
|
||||||
__typeof__(src_a) tempE1, tempF1, tempG1, tempH1; \
|
__typeof__(src_a) tempE1, tempF1, tempG1, tempH1; \
|
||||||
__typeof__(src_a) tempA2, tempB2, tempC2, tempD2; \
|
__typeof__(src_a) tempA2, tempB2, tempC2, tempD2; \
|
||||||
__typeof__(src_a) tempE2, tempF2, tempG2, tempH2; \
|
__typeof__(src_a) tempE2, tempF2, tempG2, tempH2; \
|
||||||
tempA1 = vec_mergeh (src_a, src_e); \
|
tempA1 = vec_mergeh (src_a, src_e); \
|
||||||
tempB1 = vec_mergel (src_a, src_e); \
|
tempB1 = vec_mergel (src_a, src_e); \
|
||||||
tempC1 = vec_mergeh (src_b, src_f); \
|
tempC1 = vec_mergeh (src_b, src_f); \
|
||||||
tempD1 = vec_mergel (src_b, src_f); \
|
tempD1 = vec_mergel (src_b, src_f); \
|
||||||
tempE1 = vec_mergeh (src_c, src_g); \
|
tempE1 = vec_mergeh (src_c, src_g); \
|
||||||
tempF1 = vec_mergel (src_c, src_g); \
|
tempF1 = vec_mergel (src_c, src_g); \
|
||||||
tempG1 = vec_mergeh (src_d, src_h); \
|
tempG1 = vec_mergeh (src_d, src_h); \
|
||||||
tempH1 = vec_mergel (src_d, src_h); \
|
tempH1 = vec_mergel (src_d, src_h); \
|
||||||
tempA2 = vec_mergeh (tempA1, tempE1); \
|
tempA2 = vec_mergeh (tempA1, tempE1); \
|
||||||
tempB2 = vec_mergel (tempA1, tempE1); \
|
tempB2 = vec_mergel (tempA1, tempE1); \
|
||||||
tempC2 = vec_mergeh (tempB1, tempF1); \
|
tempC2 = vec_mergeh (tempB1, tempF1); \
|
||||||
tempD2 = vec_mergel (tempB1, tempF1); \
|
tempD2 = vec_mergel (tempB1, tempF1); \
|
||||||
tempE2 = vec_mergeh (tempC1, tempG1); \
|
tempE2 = vec_mergeh (tempC1, tempG1); \
|
||||||
tempF2 = vec_mergel (tempC1, tempG1); \
|
tempF2 = vec_mergel (tempC1, tempG1); \
|
||||||
tempG2 = vec_mergeh (tempD1, tempH1); \
|
tempG2 = vec_mergeh (tempD1, tempH1); \
|
||||||
tempH2 = vec_mergel (tempD1, tempH1); \
|
tempH2 = vec_mergel (tempD1, tempH1); \
|
||||||
src_a = vec_mergeh (tempA2, tempE2); \
|
src_a = vec_mergeh (tempA2, tempE2); \
|
||||||
src_b = vec_mergel (tempA2, tempE2); \
|
src_b = vec_mergel (tempA2, tempE2); \
|
||||||
src_c = vec_mergeh (tempB2, tempF2); \
|
src_c = vec_mergeh (tempB2, tempF2); \
|
||||||
src_d = vec_mergel (tempB2, tempF2); \
|
src_d = vec_mergel (tempB2, tempF2); \
|
||||||
src_e = vec_mergeh (tempC2, tempG2); \
|
src_e = vec_mergeh (tempC2, tempG2); \
|
||||||
src_f = vec_mergel (tempC2, tempG2); \
|
src_f = vec_mergel (tempC2, tempG2); \
|
||||||
src_g = vec_mergeh (tempD2, tempH2); \
|
src_g = vec_mergeh (tempD2, tempH2); \
|
||||||
src_h = vec_mergel (tempD2, tempH2); \
|
src_h = vec_mergel (tempD2, tempH2); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
@ -94,25 +94,25 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
|
|||||||
|
|
||||||
vector signed short v_srcAss0, v_srcAss1, v_srcAss2, v_srcAss3, v_srcAss4, v_srcAss5, v_srcAss6, v_srcAss7;
|
vector signed short v_srcAss0, v_srcAss1, v_srcAss2, v_srcAss3, v_srcAss4, v_srcAss5, v_srcAss6, v_srcAss7;
|
||||||
|
|
||||||
#define LOAD_LINE(i) \
|
#define LOAD_LINE(i) \
|
||||||
register int j##i = i * stride; \
|
register int j##i = i * stride; \
|
||||||
vector unsigned char perm##i = vec_lvsl(j##i, src2); \
|
vector unsigned char perm##i = vec_lvsl(j##i, src2); \
|
||||||
const vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \
|
const vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \
|
||||||
vector unsigned char v_srcA2##i; \
|
vector unsigned char v_srcA2##i; \
|
||||||
if (two_vectors) \
|
if (two_vectors) \
|
||||||
v_srcA2##i = vec_ld(j##i + 16, src2); \
|
v_srcA2##i = vec_ld(j##i + 16, src2); \
|
||||||
const vector unsigned char v_srcA##i = \
|
const vector unsigned char v_srcA##i = \
|
||||||
vec_perm(v_srcA1##i, v_srcA2##i, perm##i); \
|
vec_perm(v_srcA1##i, v_srcA2##i, perm##i); \
|
||||||
v_srcAss##i = \
|
v_srcAss##i = \
|
||||||
(vector signed short)vec_mergeh((vector signed char)zero, \
|
(vector signed short)vec_mergeh((vector signed char)zero, \
|
||||||
(vector signed char)v_srcA##i)
|
(vector signed char)v_srcA##i)
|
||||||
|
|
||||||
#define LOAD_LINE_ALIGNED(i) \
|
#define LOAD_LINE_ALIGNED(i) \
|
||||||
register int j##i = i * stride; \
|
register int j##i = i * stride; \
|
||||||
const vector unsigned char v_srcA##i = vec_ld(j##i, src2); \
|
const vector unsigned char v_srcA##i = vec_ld(j##i, src2); \
|
||||||
v_srcAss##i = \
|
v_srcAss##i = \
|
||||||
(vector signed short)vec_mergeh((vector signed char)zero, \
|
(vector signed short)vec_mergeh((vector signed char)zero, \
|
||||||
(vector signed char)v_srcA##i)
|
(vector signed char)v_srcA##i)
|
||||||
|
|
||||||
// special casing the aligned case is worthwhile, as all call from
|
// special casing the aligned case is worthwhile, as all call from
|
||||||
// the (transposed) horizontable deblocks will be aligned, i naddition
|
// the (transposed) horizontable deblocks will be aligned, i naddition
|
||||||
@ -139,15 +139,15 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
|
|||||||
#undef LOAD_LINE
|
#undef LOAD_LINE
|
||||||
#undef LOAD_LINE_ALIGNED
|
#undef LOAD_LINE_ALIGNED
|
||||||
|
|
||||||
#define ITER(i, j) \
|
#define ITER(i, j) \
|
||||||
const vector signed short v_diff##i = \
|
const vector signed short v_diff##i = \
|
||||||
vec_sub(v_srcAss##i, v_srcAss##j); \
|
vec_sub(v_srcAss##i, v_srcAss##j); \
|
||||||
const vector signed short v_sum##i = \
|
const vector signed short v_sum##i = \
|
||||||
vec_add(v_diff##i, v_dcOffset); \
|
vec_add(v_diff##i, v_dcOffset); \
|
||||||
const vector signed short v_comp##i = \
|
const vector signed short v_comp##i = \
|
||||||
(vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \
|
(vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \
|
||||||
v_dcThreshold); \
|
v_dcThreshold); \
|
||||||
const vector signed short v_part##i = vec_and(mask, v_comp##i); \
|
const vector signed short v_part##i = vec_and(mask, v_comp##i); \
|
||||||
v_numEq = vec_sum4s(v_part##i, v_numEq);
|
v_numEq = vec_sum4s(v_part##i, v_numEq);
|
||||||
|
|
||||||
ITER(0, 1);
|
ITER(0, 1);
|
||||||
@ -167,13 +167,13 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
|
|||||||
if (numEq > c->ppMode.flatnessThreshold)
|
if (numEq > c->ppMode.flatnessThreshold)
|
||||||
{
|
{
|
||||||
const vector unsigned char mmoP1 = (const vector unsigned char)
|
const vector unsigned char mmoP1 = (const vector unsigned char)
|
||||||
AVV(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
|
AVV(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
|
||||||
0x00, 0x01, 0x12, 0x13, 0x08, 0x09, 0x1A, 0x1B);
|
0x00, 0x01, 0x12, 0x13, 0x08, 0x09, 0x1A, 0x1B);
|
||||||
const vector unsigned char mmoP2 = (const vector unsigned char)
|
const vector unsigned char mmoP2 = (const vector unsigned char)
|
||||||
AVV(0x04, 0x05, 0x16, 0x17, 0x0C, 0x0D, 0x1E, 0x1F,
|
AVV(0x04, 0x05, 0x16, 0x17, 0x0C, 0x0D, 0x1E, 0x1F,
|
||||||
0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f);
|
0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f);
|
||||||
const vector unsigned char mmoP = (const vector unsigned char)
|
const vector unsigned char mmoP = (const vector unsigned char)
|
||||||
vec_lvsl(8, (unsigned char*)0);
|
vec_lvsl(8, (unsigned char*)0);
|
||||||
|
|
||||||
vector signed short mmoL1 = vec_perm(v_srcAss0, v_srcAss2, mmoP1);
|
vector signed short mmoL1 = vec_perm(v_srcAss0, v_srcAss2, mmoP1);
|
||||||
vector signed short mmoL2 = vec_perm(v_srcAss4, v_srcAss6, mmoP2);
|
vector signed short mmoL2 = vec_perm(v_srcAss4, v_srcAss6, mmoP2);
|
||||||
@ -185,9 +185,9 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
|
|||||||
vector unsigned short mmoSum = (vector unsigned short)vec_add(mmoDiff, v2QP);
|
vector unsigned short mmoSum = (vector unsigned short)vec_add(mmoDiff, v2QP);
|
||||||
|
|
||||||
if (vec_any_gt(mmoSum, v4QP))
|
if (vec_any_gt(mmoSum, v4QP))
|
||||||
return 0;
|
return 0;
|
||||||
else
|
else
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
else return 2;
|
else return 2;
|
||||||
}
|
}
|
||||||
@ -218,21 +218,21 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
|
|||||||
vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9;
|
vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9;
|
||||||
|
|
||||||
#define LOAD_LINE(i) \
|
#define LOAD_LINE(i) \
|
||||||
const vector unsigned char perml##i = \
|
const vector unsigned char perml##i = \
|
||||||
vec_lvsl(i * stride, src2); \
|
vec_lvsl(i * stride, src2); \
|
||||||
vbA##i = vec_ld(i * stride, src2); \
|
vbA##i = vec_ld(i * stride, src2); \
|
||||||
vbB##i = vec_ld(i * stride + 16, src2); \
|
vbB##i = vec_ld(i * stride + 16, src2); \
|
||||||
vbT##i = vec_perm(vbA##i, vbB##i, perml##i); \
|
vbT##i = vec_perm(vbA##i, vbB##i, perml##i); \
|
||||||
vb##i = \
|
vb##i = \
|
||||||
(vector signed short)vec_mergeh((vector unsigned char)zero, \
|
(vector signed short)vec_mergeh((vector unsigned char)zero, \
|
||||||
(vector unsigned char)vbT##i)
|
(vector unsigned char)vbT##i)
|
||||||
|
|
||||||
#define LOAD_LINE_ALIGNED(i) \
|
#define LOAD_LINE_ALIGNED(i) \
|
||||||
register int j##i = i * stride; \
|
register int j##i = i * stride; \
|
||||||
vbT##i = vec_ld(j##i, src2); \
|
vbT##i = vec_ld(j##i, src2); \
|
||||||
vb##i = \
|
vb##i = \
|
||||||
(vector signed short)vec_mergeh((vector signed char)zero, \
|
(vector signed short)vec_mergeh((vector signed char)zero, \
|
||||||
(vector signed char)vbT##i)
|
(vector signed char)vbT##i)
|
||||||
|
|
||||||
// special casing the aligned case is worthwhile, as all call from
|
// special casing the aligned case is worthwhile, as all call from
|
||||||
// the (transposed) horizontable deblocks will be aligned, in addition
|
// the (transposed) horizontable deblocks will be aligned, in addition
|
||||||
@ -308,11 +308,11 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
|
|||||||
const vector signed short temp91 = vec_sub(v_sumsB8, vb5);
|
const vector signed short temp91 = vec_sub(v_sumsB8, vb5);
|
||||||
const vector signed short v_sumsB9 = vec_add(temp91, v_last);
|
const vector signed short v_sumsB9 = vec_add(temp91, v_last);
|
||||||
|
|
||||||
#define COMPUTE_VR(i, j, k) \
|
#define COMPUTE_VR(i, j, k) \
|
||||||
const vector signed short temps1##i = \
|
const vector signed short temps1##i = \
|
||||||
vec_add(v_sumsB##i, v_sumsB##k); \
|
vec_add(v_sumsB##i, v_sumsB##k); \
|
||||||
const vector signed short temps2##i = \
|
const vector signed short temps2##i = \
|
||||||
vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \
|
vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \
|
||||||
const vector signed short vr##j = vec_sra(temps2##i, v_4)
|
const vector signed short vr##j = vec_sra(temps2##i, v_4)
|
||||||
|
|
||||||
COMPUTE_VR(0, 1, 2);
|
COMPUTE_VR(0, 1, 2);
|
||||||
@ -326,31 +326,31 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
|
|||||||
|
|
||||||
const vector signed char neg1 = vec_splat_s8(-1);
|
const vector signed char neg1 = vec_splat_s8(-1);
|
||||||
const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
|
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
|
||||||
|
|
||||||
#define PACK_AND_STORE(i) \
|
#define PACK_AND_STORE(i) \
|
||||||
const vector unsigned char perms##i = \
|
const vector unsigned char perms##i = \
|
||||||
vec_lvsr(i * stride, src2); \
|
vec_lvsr(i * stride, src2); \
|
||||||
const vector unsigned char vf##i = \
|
const vector unsigned char vf##i = \
|
||||||
vec_packsu(vr##i, (vector signed short)zero); \
|
vec_packsu(vr##i, (vector signed short)zero); \
|
||||||
const vector unsigned char vg##i = \
|
const vector unsigned char vg##i = \
|
||||||
vec_perm(vf##i, vbT##i, permHH); \
|
vec_perm(vf##i, vbT##i, permHH); \
|
||||||
const vector unsigned char mask##i = \
|
const vector unsigned char mask##i = \
|
||||||
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \
|
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \
|
||||||
const vector unsigned char vg2##i = \
|
const vector unsigned char vg2##i = \
|
||||||
vec_perm(vg##i, vg##i, perms##i); \
|
vec_perm(vg##i, vg##i, perms##i); \
|
||||||
const vector unsigned char svA##i = \
|
const vector unsigned char svA##i = \
|
||||||
vec_sel(vbA##i, vg2##i, mask##i); \
|
vec_sel(vbA##i, vg2##i, mask##i); \
|
||||||
const vector unsigned char svB##i = \
|
const vector unsigned char svB##i = \
|
||||||
vec_sel(vg2##i, vbB##i, mask##i); \
|
vec_sel(vg2##i, vbB##i, mask##i); \
|
||||||
vec_st(svA##i, i * stride, src2); \
|
vec_st(svA##i, i * stride, src2); \
|
||||||
vec_st(svB##i, i * stride + 16, src2)
|
vec_st(svB##i, i * stride + 16, src2)
|
||||||
|
|
||||||
#define PACK_AND_STORE_ALIGNED(i) \
|
#define PACK_AND_STORE_ALIGNED(i) \
|
||||||
const vector unsigned char vf##i = \
|
const vector unsigned char vf##i = \
|
||||||
vec_packsu(vr##i, (vector signed short)zero); \
|
vec_packsu(vr##i, (vector signed short)zero); \
|
||||||
const vector unsigned char vg##i = \
|
const vector unsigned char vg##i = \
|
||||||
vec_perm(vf##i, vbT##i, permHH); \
|
vec_perm(vf##i, vbT##i, permHH); \
|
||||||
vec_st(vg##i, i * stride, src2)
|
vec_st(vg##i, i * stride, src2)
|
||||||
|
|
||||||
// special casing the aligned case is worthwhile, as all call from
|
// special casing the aligned case is worthwhile, as all call from
|
||||||
@ -398,17 +398,17 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext
|
|||||||
vqp = vec_splat(vqp, 0);
|
vqp = vec_splat(vqp, 0);
|
||||||
|
|
||||||
#define LOAD_LINE(i) \
|
#define LOAD_LINE(i) \
|
||||||
const vector unsigned char perm##i = \
|
const vector unsigned char perm##i = \
|
||||||
vec_lvsl(i * stride, src2); \
|
vec_lvsl(i * stride, src2); \
|
||||||
const vector unsigned char vbA##i = \
|
const vector unsigned char vbA##i = \
|
||||||
vec_ld(i * stride, src2); \
|
vec_ld(i * stride, src2); \
|
||||||
const vector unsigned char vbB##i = \
|
const vector unsigned char vbB##i = \
|
||||||
vec_ld(i * stride + 16, src2); \
|
vec_ld(i * stride + 16, src2); \
|
||||||
const vector unsigned char vbT##i = \
|
const vector unsigned char vbT##i = \
|
||||||
vec_perm(vbA##i, vbB##i, perm##i); \
|
vec_perm(vbA##i, vbB##i, perm##i); \
|
||||||
const vector signed short vb##i = \
|
const vector signed short vb##i = \
|
||||||
(vector signed short)vec_mergeh((vector unsigned char)zero, \
|
(vector signed short)vec_mergeh((vector unsigned char)zero, \
|
||||||
(vector unsigned char)vbT##i)
|
(vector unsigned char)vbT##i)
|
||||||
|
|
||||||
src2 += stride*3;
|
src2 += stride*3;
|
||||||
|
|
||||||
@ -426,7 +426,7 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext
|
|||||||
const vector signed short v_2 = vec_splat_s16(2);
|
const vector signed short v_2 = vec_splat_s16(2);
|
||||||
const vector signed short v_5 = vec_splat_s16(5);
|
const vector signed short v_5 = vec_splat_s16(5);
|
||||||
const vector signed short v_32 = vec_sl(v_1,
|
const vector signed short v_32 = vec_sl(v_1,
|
||||||
(vector unsigned short)v_5);
|
(vector unsigned short)v_5);
|
||||||
/* middle energy */
|
/* middle energy */
|
||||||
const vector signed short l3minusl6 = vec_sub(vb3, vb6);
|
const vector signed short l3minusl6 = vec_sub(vb3, vb6);
|
||||||
const vector signed short l5minusl4 = vec_sub(vb5, vb4);
|
const vector signed short l5minusl4 = vec_sub(vb5, vb4);
|
||||||
@ -483,22 +483,22 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext
|
|||||||
|
|
||||||
const vector signed char neg1 = vec_splat_s8(-1);
|
const vector signed char neg1 = vec_splat_s8(-1);
|
||||||
const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
|
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
|
||||||
|
|
||||||
#define STORE(i) \
|
#define STORE(i) \
|
||||||
const vector unsigned char perms##i = \
|
const vector unsigned char perms##i = \
|
||||||
vec_lvsr(i * stride, src2); \
|
vec_lvsr(i * stride, src2); \
|
||||||
const vector unsigned char vg##i = \
|
const vector unsigned char vg##i = \
|
||||||
vec_perm(st##i, vbT##i, permHH); \
|
vec_perm(st##i, vbT##i, permHH); \
|
||||||
const vector unsigned char mask##i = \
|
const vector unsigned char mask##i = \
|
||||||
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \
|
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \
|
||||||
const vector unsigned char vg2##i = \
|
const vector unsigned char vg2##i = \
|
||||||
vec_perm(vg##i, vg##i, perms##i); \
|
vec_perm(vg##i, vg##i, perms##i); \
|
||||||
const vector unsigned char svA##i = \
|
const vector unsigned char svA##i = \
|
||||||
vec_sel(vbA##i, vg2##i, mask##i); \
|
vec_sel(vbA##i, vg2##i, mask##i); \
|
||||||
const vector unsigned char svB##i = \
|
const vector unsigned char svB##i = \
|
||||||
vec_sel(vg2##i, vbB##i, mask##i); \
|
vec_sel(vg2##i, vbB##i, mask##i); \
|
||||||
vec_st(svA##i, i * stride, src2); \
|
vec_st(svA##i, i * stride, src2); \
|
||||||
vec_st(svB##i, i * stride + 16, src2)
|
vec_st(svB##i, i * stride + 16, src2)
|
||||||
|
|
||||||
STORE(4);
|
STORE(4);
|
||||||
@ -522,11 +522,11 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
|||||||
dt[0] = deringThreshold;
|
dt[0] = deringThreshold;
|
||||||
v_dt = vec_splat(vec_ld(0, dt), 0);
|
v_dt = vec_splat(vec_ld(0, dt), 0);
|
||||||
|
|
||||||
#define LOAD_LINE(i) \
|
#define LOAD_LINE(i) \
|
||||||
const vector unsigned char perm##i = \
|
const vector unsigned char perm##i = \
|
||||||
vec_lvsl(i * stride, srcCopy); \
|
vec_lvsl(i * stride, srcCopy); \
|
||||||
vector unsigned char sA##i = vec_ld(i * stride, srcCopy); \
|
vector unsigned char sA##i = vec_ld(i * stride, srcCopy); \
|
||||||
vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy); \
|
vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy); \
|
||||||
vector unsigned char src##i = vec_perm(sA##i, sB##i, perm##i)
|
vector unsigned char src##i = vec_perm(sA##i, sB##i, perm##i)
|
||||||
|
|
||||||
LOAD_LINE(0);
|
LOAD_LINE(0);
|
||||||
@ -545,13 +545,13 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
|||||||
{
|
{
|
||||||
const vector unsigned char trunc_perm = (vector unsigned char)
|
const vector unsigned char trunc_perm = (vector unsigned char)
|
||||||
AVV(0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
AVV(0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||||
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18);
|
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18);
|
||||||
const vector unsigned char trunc_src12 = vec_perm(src1, src2, trunc_perm);
|
const vector unsigned char trunc_src12 = vec_perm(src1, src2, trunc_perm);
|
||||||
const vector unsigned char trunc_src34 = vec_perm(src3, src4, trunc_perm);
|
const vector unsigned char trunc_src34 = vec_perm(src3, src4, trunc_perm);
|
||||||
const vector unsigned char trunc_src56 = vec_perm(src5, src6, trunc_perm);
|
const vector unsigned char trunc_src56 = vec_perm(src5, src6, trunc_perm);
|
||||||
const vector unsigned char trunc_src78 = vec_perm(src7, src8, trunc_perm);
|
const vector unsigned char trunc_src78 = vec_perm(src7, src8, trunc_perm);
|
||||||
|
|
||||||
#define EXTRACT(op) do { \
|
#define EXTRACT(op) do { \
|
||||||
const vector unsigned char s##op##_1 = vec_##op(trunc_src12, trunc_src34); \
|
const vector unsigned char s##op##_1 = vec_##op(trunc_src12, trunc_src34); \
|
||||||
const vector unsigned char s##op##_2 = vec_##op(trunc_src56, trunc_src78); \
|
const vector unsigned char s##op##_2 = vec_##op(trunc_src56, trunc_src78); \
|
||||||
const vector unsigned char s##op##_6 = vec_##op(s##op##_1, s##op##_2); \
|
const vector unsigned char s##op##_6 = vec_##op(s##op##_1, s##op##_2); \
|
||||||
@ -584,29 +584,29 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
|||||||
{
|
{
|
||||||
const vector unsigned short mask1 = (vector unsigned short)
|
const vector unsigned short mask1 = (vector unsigned short)
|
||||||
AVV(0x0001, 0x0002, 0x0004, 0x0008,
|
AVV(0x0001, 0x0002, 0x0004, 0x0008,
|
||||||
0x0010, 0x0020, 0x0040, 0x0080);
|
0x0010, 0x0020, 0x0040, 0x0080);
|
||||||
const vector unsigned short mask2 = (vector unsigned short)
|
const vector unsigned short mask2 = (vector unsigned short)
|
||||||
AVV(0x0100, 0x0200, 0x0000, 0x0000,
|
AVV(0x0100, 0x0200, 0x0000, 0x0000,
|
||||||
0x0000, 0x0000, 0x0000, 0x0000);
|
0x0000, 0x0000, 0x0000, 0x0000);
|
||||||
|
|
||||||
const vector unsigned int vuint32_16 = vec_sl(vec_splat_u32(1), vec_splat_u32(4));
|
const vector unsigned int vuint32_16 = vec_sl(vec_splat_u32(1), vec_splat_u32(4));
|
||||||
const vector unsigned int vuint32_1 = vec_splat_u32(1);
|
const vector unsigned int vuint32_1 = vec_splat_u32(1);
|
||||||
|
|
||||||
#define COMPARE(i) \
|
#define COMPARE(i) \
|
||||||
vector signed int sum##i; \
|
vector signed int sum##i; \
|
||||||
do { \
|
do { \
|
||||||
const vector unsigned char cmp##i = \
|
const vector unsigned char cmp##i = \
|
||||||
(vector unsigned char)vec_cmpgt(src##i, v_avg); \
|
(vector unsigned char)vec_cmpgt(src##i, v_avg); \
|
||||||
const vector unsigned short cmpHi##i = \
|
const vector unsigned short cmpHi##i = \
|
||||||
(vector unsigned short)vec_mergeh(cmp##i, cmp##i); \
|
(vector unsigned short)vec_mergeh(cmp##i, cmp##i); \
|
||||||
const vector unsigned short cmpLi##i = \
|
const vector unsigned short cmpLi##i = \
|
||||||
(vector unsigned short)vec_mergel(cmp##i, cmp##i); \
|
(vector unsigned short)vec_mergel(cmp##i, cmp##i); \
|
||||||
const vector signed short cmpHf##i = \
|
const vector signed short cmpHf##i = \
|
||||||
(vector signed short)vec_and(cmpHi##i, mask1); \
|
(vector signed short)vec_and(cmpHi##i, mask1); \
|
||||||
const vector signed short cmpLf##i = \
|
const vector signed short cmpLf##i = \
|
||||||
(vector signed short)vec_and(cmpLi##i, mask2); \
|
(vector signed short)vec_and(cmpLi##i, mask2); \
|
||||||
const vector signed int sump##i = vec_sum4s(cmpHf##i, zero); \
|
const vector signed int sump##i = vec_sum4s(cmpHf##i, zero); \
|
||||||
const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i); \
|
const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i); \
|
||||||
sum##i = vec_sums(sumq##i, zero); } while (0)
|
sum##i = vec_sums(sumq##i, zero); } while (0)
|
||||||
|
|
||||||
COMPARE(0);
|
COMPARE(0);
|
||||||
@ -643,11 +643,11 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
|||||||
const vector signed int t2B = vec_or(sumB, tB);
|
const vector signed int t2B = vec_or(sumB, tB);
|
||||||
const vector signed int t2C = vec_or(sumC, tC);
|
const vector signed int t2C = vec_or(sumC, tC);
|
||||||
const vector signed int t3A = vec_and(vec_sra(t2A, vuint32_1),
|
const vector signed int t3A = vec_and(vec_sra(t2A, vuint32_1),
|
||||||
vec_sl(t2A, vuint32_1));
|
vec_sl(t2A, vuint32_1));
|
||||||
const vector signed int t3B = vec_and(vec_sra(t2B, vuint32_1),
|
const vector signed int t3B = vec_and(vec_sra(t2B, vuint32_1),
|
||||||
vec_sl(t2B, vuint32_1));
|
vec_sl(t2B, vuint32_1));
|
||||||
const vector signed int t3C = vec_and(vec_sra(t2C, vuint32_1),
|
const vector signed int t3C = vec_and(vec_sra(t2C, vuint32_1),
|
||||||
vec_sl(t2C, vuint32_1));
|
vec_sl(t2C, vuint32_1));
|
||||||
const vector signed int yA = vec_and(t2A, t3A);
|
const vector signed int yA = vec_and(t2A, t3A);
|
||||||
const vector signed int yB = vec_and(t2B, t3B);
|
const vector signed int yB = vec_and(t2B, t3B);
|
||||||
const vector signed int yC = vec_and(t2C, t3C);
|
const vector signed int yC = vec_and(t2C, t3C);
|
||||||
@ -659,15 +659,15 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
|||||||
const vector signed int sumBd4 = vec_perm(yB, yC, strangeperm1);
|
const vector signed int sumBd4 = vec_perm(yB, yC, strangeperm1);
|
||||||
const vector signed int sumBd8 = vec_perm(yB, yC, strangeperm2);
|
const vector signed int sumBd8 = vec_perm(yB, yC, strangeperm2);
|
||||||
const vector signed int sumAp = vec_and(yA,
|
const vector signed int sumAp = vec_and(yA,
|
||||||
vec_and(sumAd4,sumAd8));
|
vec_and(sumAd4,sumAd8));
|
||||||
const vector signed int sumBp = vec_and(yB,
|
const vector signed int sumBp = vec_and(yB,
|
||||||
vec_and(sumBd4,sumBd8));
|
vec_and(sumBd4,sumBd8));
|
||||||
sumA2 = vec_or(sumAp,
|
sumA2 = vec_or(sumAp,
|
||||||
vec_sra(sumAp,
|
vec_sra(sumAp,
|
||||||
vuint32_16));
|
vuint32_16));
|
||||||
sumB2 = vec_or(sumBp,
|
sumB2 = vec_or(sumBp,
|
||||||
vec_sra(sumBp,
|
vec_sra(sumBp,
|
||||||
vuint32_16));
|
vuint32_16));
|
||||||
}
|
}
|
||||||
vec_st(sumA2, 0, S);
|
vec_st(sumA2, 0, S);
|
||||||
vec_st(sumB2, 16, S);
|
vec_st(sumB2, 16, S);
|
||||||
@ -686,84 +686,84 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
|||||||
|
|
||||||
const vector unsigned char permA1 = (vector unsigned char)
|
const vector unsigned char permA1 = (vector unsigned char)
|
||||||
AVV(0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F,
|
AVV(0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F,
|
||||||
0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
|
0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
|
||||||
const vector unsigned char permA2 = (vector unsigned char)
|
const vector unsigned char permA2 = (vector unsigned char)
|
||||||
AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11,
|
AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11,
|
||||||
0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
|
0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
|
||||||
const vector unsigned char permA1inc = (vector unsigned char)
|
const vector unsigned char permA1inc = (vector unsigned char)
|
||||||
AVV(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,
|
AVV(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
||||||
const vector unsigned char permA2inc = (vector unsigned char)
|
const vector unsigned char permA2inc = (vector unsigned char)
|
||||||
AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
|
AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
|
||||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
||||||
const vector unsigned char magic = (vector unsigned char)
|
const vector unsigned char magic = (vector unsigned char)
|
||||||
AVV(0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02,
|
AVV(0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02,
|
||||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
||||||
const vector unsigned char extractPerm = (vector unsigned char)
|
const vector unsigned char extractPerm = (vector unsigned char)
|
||||||
AVV(0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01,
|
AVV(0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01,
|
||||||
0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01);
|
0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01);
|
||||||
const vector unsigned char extractPermInc = (vector unsigned char)
|
const vector unsigned char extractPermInc = (vector unsigned char)
|
||||||
AVV(0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
|
AVV(0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
|
||||||
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01);
|
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01);
|
||||||
const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0);
|
const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0);
|
||||||
const vector unsigned char tenRight = (vector unsigned char)
|
const vector unsigned char tenRight = (vector unsigned char)
|
||||||
AVV(0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
AVV(0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
||||||
const vector unsigned char eightLeft = (vector unsigned char)
|
const vector unsigned char eightLeft = (vector unsigned char)
|
||||||
AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08);
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08);
|
||||||
|
|
||||||
|
|
||||||
#define F_INIT(i) \
|
#define F_INIT(i) \
|
||||||
vector unsigned char tenRightM##i = tenRight; \
|
vector unsigned char tenRightM##i = tenRight; \
|
||||||
vector unsigned char permA1M##i = permA1; \
|
vector unsigned char permA1M##i = permA1; \
|
||||||
vector unsigned char permA2M##i = permA2; \
|
vector unsigned char permA2M##i = permA2; \
|
||||||
vector unsigned char extractPermM##i = extractPerm
|
vector unsigned char extractPermM##i = extractPerm
|
||||||
|
|
||||||
#define F2(i, j, k, l) \
|
#define F2(i, j, k, l) \
|
||||||
if (S[i] & (1 << (l+1))) { \
|
if (S[i] & (1 << (l+1))) { \
|
||||||
const vector unsigned char a_##j##_A##l = \
|
const vector unsigned char a_##j##_A##l = \
|
||||||
vec_perm(src##i, src##j, permA1M##i); \
|
vec_perm(src##i, src##j, permA1M##i); \
|
||||||
const vector unsigned char a_##j##_B##l = \
|
const vector unsigned char a_##j##_B##l = \
|
||||||
vec_perm(a_##j##_A##l, src##k, permA2M##i); \
|
vec_perm(a_##j##_A##l, src##k, permA2M##i); \
|
||||||
const vector signed int a_##j##_sump##l = \
|
const vector signed int a_##j##_sump##l = \
|
||||||
(vector signed int)vec_msum(a_##j##_B##l, magic, \
|
(vector signed int)vec_msum(a_##j##_B##l, magic, \
|
||||||
(vector unsigned int)zero); \
|
(vector unsigned int)zero); \
|
||||||
vector signed int F_##j##_##l = \
|
vector signed int F_##j##_##l = \
|
||||||
vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4); \
|
vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4); \
|
||||||
F_##j##_##l = vec_splat(F_##j##_##l, 3); \
|
F_##j##_##l = vec_splat(F_##j##_##l, 3); \
|
||||||
const vector signed int p_##j##_##l = \
|
const vector signed int p_##j##_##l = \
|
||||||
(vector signed int)vec_perm(src##j, \
|
(vector signed int)vec_perm(src##j, \
|
||||||
(vector unsigned char)zero, \
|
(vector unsigned char)zero, \
|
||||||
extractPermM##i); \
|
extractPermM##i); \
|
||||||
const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2); \
|
const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2);\
|
||||||
const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2); \
|
const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2);\
|
||||||
vector signed int newpm_##j##_##l; \
|
vector signed int newpm_##j##_##l; \
|
||||||
if (vec_all_lt(sum_##j##_##l, F_##j##_##l)) \
|
if (vec_all_lt(sum_##j##_##l, F_##j##_##l)) \
|
||||||
newpm_##j##_##l = sum_##j##_##l; \
|
newpm_##j##_##l = sum_##j##_##l; \
|
||||||
else if (vec_all_gt(diff_##j##_##l, F_##j##_##l)) \
|
else if (vec_all_gt(diff_##j##_##l, F_##j##_##l)) \
|
||||||
newpm_##j##_##l = diff_##j##_##l; \
|
newpm_##j##_##l = diff_##j##_##l; \
|
||||||
else newpm_##j##_##l = F_##j##_##l; \
|
else newpm_##j##_##l = F_##j##_##l; \
|
||||||
const vector unsigned char newpm2_##j##_##l = \
|
const vector unsigned char newpm2_##j##_##l = \
|
||||||
vec_splat((vector unsigned char)newpm_##j##_##l, 15); \
|
vec_splat((vector unsigned char)newpm_##j##_##l, 15); \
|
||||||
const vector unsigned char mask##j##l = vec_add(identity, \
|
const vector unsigned char mask##j##l = vec_add(identity, \
|
||||||
tenRightM##i); \
|
tenRightM##i); \
|
||||||
src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l); \
|
src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l); \
|
||||||
} \
|
} \
|
||||||
permA1M##i = vec_add(permA1M##i, permA1inc); \
|
permA1M##i = vec_add(permA1M##i, permA1inc); \
|
||||||
permA2M##i = vec_add(permA2M##i, permA2inc); \
|
permA2M##i = vec_add(permA2M##i, permA2inc); \
|
||||||
tenRightM##i = vec_sro(tenRightM##i, eightLeft); \
|
tenRightM##i = vec_sro(tenRightM##i, eightLeft); \
|
||||||
extractPermM##i = vec_add(extractPermM##i, extractPermInc)
|
extractPermM##i = vec_add(extractPermM##i, extractPermInc)
|
||||||
|
|
||||||
#define ITER(i, j, k) \
|
#define ITER(i, j, k) \
|
||||||
F_INIT(i); \
|
F_INIT(i); \
|
||||||
F2(i, j, k, 0); \
|
F2(i, j, k, 0); \
|
||||||
F2(i, j, k, 1); \
|
F2(i, j, k, 1); \
|
||||||
F2(i, j, k, 2); \
|
F2(i, j, k, 2); \
|
||||||
F2(i, j, k, 3); \
|
F2(i, j, k, 3); \
|
||||||
F2(i, j, k, 4); \
|
F2(i, j, k, 4); \
|
||||||
F2(i, j, k, 5); \
|
F2(i, j, k, 5); \
|
||||||
F2(i, j, k, 6); \
|
F2(i, j, k, 6); \
|
||||||
F2(i, j, k, 7)
|
F2(i, j, k, 7)
|
||||||
|
|
||||||
ITER(0, 1, 2);
|
ITER(0, 1, 2);
|
||||||
@ -777,16 +777,16 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
|||||||
|
|
||||||
const vector signed char neg1 = vec_splat_s8(-1);
|
const vector signed char neg1 = vec_splat_s8(-1);
|
||||||
|
|
||||||
#define STORE_LINE(i) \
|
#define STORE_LINE(i) \
|
||||||
const vector unsigned char permST##i = \
|
const vector unsigned char permST##i = \
|
||||||
vec_lvsr(i * stride, srcCopy); \
|
vec_lvsr(i * stride, srcCopy); \
|
||||||
const vector unsigned char maskST##i = \
|
const vector unsigned char maskST##i = \
|
||||||
vec_perm((vector unsigned char)zero, \
|
vec_perm((vector unsigned char)zero, \
|
||||||
(vector unsigned char)neg1, permST##i); \
|
(vector unsigned char)neg1, permST##i); \
|
||||||
src##i = vec_perm(src##i ,src##i, permST##i); \
|
src##i = vec_perm(src##i ,src##i, permST##i); \
|
||||||
sA##i= vec_sel(sA##i, src##i, maskST##i); \
|
sA##i= vec_sel(sA##i, src##i, maskST##i); \
|
||||||
sB##i= vec_sel(src##i, sB##i, maskST##i); \
|
sB##i= vec_sel(src##i, sB##i, maskST##i); \
|
||||||
vec_st(sA##i, i * stride, srcCopy); \
|
vec_st(sA##i, i * stride, srcCopy); \
|
||||||
vec_st(sB##i, i * stride + 16, srcCopy)
|
vec_st(sB##i, i * stride + 16, srcCopy)
|
||||||
|
|
||||||
STORE_LINE(1);
|
STORE_LINE(1);
|
||||||
@ -808,7 +808,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
|||||||
#define do_a_deblock_altivec(a...) do_a_deblock_C(a)
|
#define do_a_deblock_altivec(a...) do_a_deblock_C(a)
|
||||||
|
|
||||||
static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
||||||
uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
|
uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
|
||||||
{
|
{
|
||||||
const vector signed int zero = vec_splat_s32(0);
|
const vector signed int zero = vec_splat_s32(0);
|
||||||
const vector signed short vsint16_1 = vec_splat_s16(1);
|
const vector signed short vsint16_1 = vec_splat_s16(1);
|
||||||
@ -820,16 +820,16 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
|||||||
tempBluredPast[128]= maxNoise[1];
|
tempBluredPast[128]= maxNoise[1];
|
||||||
tempBluredPast[129]= maxNoise[2];
|
tempBluredPast[129]= maxNoise[2];
|
||||||
|
|
||||||
#define LOAD_LINE(src, i) \
|
#define LOAD_LINE(src, i) \
|
||||||
register int j##src##i = i * stride; \
|
register int j##src##i = i * stride; \
|
||||||
vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \
|
vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \
|
||||||
const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \
|
const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \
|
||||||
const vector unsigned char v_##src##A2##i = vec_ld(j##src##i + 16, src); \
|
const vector unsigned char v_##src##A2##i = vec_ld(j##src##i + 16, src); \
|
||||||
const vector unsigned char v_##src##A##i = \
|
const vector unsigned char v_##src##A##i = \
|
||||||
vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i); \
|
vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i); \
|
||||||
vector signed short v_##src##Ass##i = \
|
vector signed short v_##src##Ass##i = \
|
||||||
(vector signed short)vec_mergeh((vector signed char)zero, \
|
(vector signed short)vec_mergeh((vector signed char)zero, \
|
||||||
(vector signed char)v_##src##A##i)
|
(vector signed char)v_##src##A##i)
|
||||||
|
|
||||||
LOAD_LINE(src, 0);
|
LOAD_LINE(src, 0);
|
||||||
LOAD_LINE(src, 1);
|
LOAD_LINE(src, 1);
|
||||||
@ -850,10 +850,10 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
|||||||
LOAD_LINE(tempBlured, 7);
|
LOAD_LINE(tempBlured, 7);
|
||||||
#undef LOAD_LINE
|
#undef LOAD_LINE
|
||||||
|
|
||||||
#define ACCUMULATE_DIFFS(i) \
|
#define ACCUMULATE_DIFFS(i) \
|
||||||
vector signed short v_d##i = vec_sub(v_tempBluredAss##i, \
|
vector signed short v_d##i = vec_sub(v_tempBluredAss##i, \
|
||||||
v_srcAss##i); \
|
v_srcAss##i); \
|
||||||
v_dp = vec_msums(v_d##i, v_d##i, v_dp); \
|
v_dp = vec_msums(v_d##i, v_d##i, v_dp); \
|
||||||
v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp)
|
v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp)
|
||||||
|
|
||||||
ACCUMULATE_DIFFS(0);
|
ACCUMULATE_DIFFS(0);
|
||||||
@ -916,12 +916,12 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
|||||||
const vector signed short vsint16_4 = vec_splat_s16(4);
|
const vector signed short vsint16_4 = vec_splat_s16(4);
|
||||||
const vector unsigned short vuint16_3 = vec_splat_u16(3);
|
const vector unsigned short vuint16_3 = vec_splat_u16(3);
|
||||||
|
|
||||||
#define OP(i) \
|
#define OP(i) \
|
||||||
const vector signed short v_temp##i = \
|
const vector signed short v_temp##i = \
|
||||||
vec_mladd(v_tempBluredAss##i, \
|
vec_mladd(v_tempBluredAss##i, \
|
||||||
vsint16_7, v_srcAss##i); \
|
vsint16_7, v_srcAss##i); \
|
||||||
const vector signed short v_temp2##i = \
|
const vector signed short v_temp2##i = \
|
||||||
vec_add(v_temp##i, vsint16_4); \
|
vec_add(v_temp##i, vsint16_4); \
|
||||||
v_tempBluredAss##i = vec_sr(v_temp2##i, vuint16_3)
|
v_tempBluredAss##i = vec_sr(v_temp2##i, vuint16_3)
|
||||||
|
|
||||||
OP(0);
|
OP(0);
|
||||||
@ -937,12 +937,12 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
|||||||
const vector signed short vsint16_3 = vec_splat_s16(3);
|
const vector signed short vsint16_3 = vec_splat_s16(3);
|
||||||
const vector signed short vsint16_2 = vec_splat_s16(2);
|
const vector signed short vsint16_2 = vec_splat_s16(2);
|
||||||
|
|
||||||
#define OP(i) \
|
#define OP(i) \
|
||||||
const vector signed short v_temp##i = \
|
const vector signed short v_temp##i = \
|
||||||
vec_mladd(v_tempBluredAss##i, \
|
vec_mladd(v_tempBluredAss##i, \
|
||||||
vsint16_3, v_srcAss##i); \
|
vsint16_3, v_srcAss##i); \
|
||||||
const vector signed short v_temp2##i = \
|
const vector signed short v_temp2##i = \
|
||||||
vec_add(v_temp##i, vsint16_2); \
|
vec_add(v_temp##i, vsint16_2); \
|
||||||
v_tempBluredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2)
|
v_tempBluredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2)
|
||||||
|
|
||||||
OP(0);
|
OP(0);
|
||||||
@ -959,24 +959,24 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
|||||||
|
|
||||||
const vector signed char neg1 = vec_splat_s8(-1);
|
const vector signed char neg1 = vec_splat_s8(-1);
|
||||||
const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
|
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
|
||||||
|
|
||||||
#define PACK_AND_STORE(src, i) \
|
#define PACK_AND_STORE(src, i) \
|
||||||
const vector unsigned char perms##src##i = \
|
const vector unsigned char perms##src##i = \
|
||||||
vec_lvsr(i * stride, src); \
|
vec_lvsr(i * stride, src); \
|
||||||
const vector unsigned char vf##src##i = \
|
const vector unsigned char vf##src##i = \
|
||||||
vec_packsu(v_tempBluredAss##i, (vector signed short)zero); \
|
vec_packsu(v_tempBluredAss##i, (vector signed short)zero); \
|
||||||
const vector unsigned char vg##src##i = \
|
const vector unsigned char vg##src##i = \
|
||||||
vec_perm(vf##src##i, v_##src##A##i, permHH); \
|
vec_perm(vf##src##i, v_##src##A##i, permHH); \
|
||||||
const vector unsigned char mask##src##i = \
|
const vector unsigned char mask##src##i = \
|
||||||
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \
|
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \
|
||||||
const vector unsigned char vg2##src##i = \
|
const vector unsigned char vg2##src##i = \
|
||||||
vec_perm(vg##src##i, vg##src##i, perms##src##i); \
|
vec_perm(vg##src##i, vg##src##i, perms##src##i); \
|
||||||
const vector unsigned char svA##src##i = \
|
const vector unsigned char svA##src##i = \
|
||||||
vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i); \
|
vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i); \
|
||||||
const vector unsigned char svB##src##i = \
|
const vector unsigned char svB##src##i = \
|
||||||
vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \
|
vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \
|
||||||
vec_st(svA##src##i, i * stride, src); \
|
vec_st(svA##src##i, i * stride, src); \
|
||||||
vec_st(svB##src##i, i * stride + 16, src)
|
vec_st(svB##src##i, i * stride + 16, src)
|
||||||
|
|
||||||
PACK_AND_STORE(src, 0);
|
PACK_AND_STORE(src, 0);
|
||||||
@ -1001,14 +1001,14 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
|||||||
static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) {
|
static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) {
|
||||||
const vector unsigned char zero = vec_splat_u8(0);
|
const vector unsigned char zero = vec_splat_u8(0);
|
||||||
|
|
||||||
#define LOAD_DOUBLE_LINE(i, j) \
|
#define LOAD_DOUBLE_LINE(i, j) \
|
||||||
vector unsigned char perm1##i = vec_lvsl(i * stride, src); \
|
vector unsigned char perm1##i = vec_lvsl(i * stride, src); \
|
||||||
vector unsigned char perm2##i = vec_lvsl(j * stride, src); \
|
vector unsigned char perm2##i = vec_lvsl(j * stride, src); \
|
||||||
vector unsigned char srcA##i = vec_ld(i * stride, src); \
|
vector unsigned char srcA##i = vec_ld(i * stride, src); \
|
||||||
vector unsigned char srcB##i = vec_ld(i * stride + 16, src); \
|
vector unsigned char srcB##i = vec_ld(i * stride + 16, src); \
|
||||||
vector unsigned char srcC##i = vec_ld(j * stride, src); \
|
vector unsigned char srcC##i = vec_ld(j * stride, src); \
|
||||||
vector unsigned char srcD##i = vec_ld(j * stride+ 16, src); \
|
vector unsigned char srcD##i = vec_ld(j * stride+ 16, src); \
|
||||||
vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i); \
|
vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i); \
|
||||||
vector unsigned char src##j = vec_perm(srcC##i, srcD##i, perm2##i)
|
vector unsigned char src##j = vec_perm(srcC##i, srcD##i, perm2##i)
|
||||||
|
|
||||||
LOAD_DOUBLE_LINE(0, 1);
|
LOAD_DOUBLE_LINE(0, 1);
|
||||||
@ -1107,10 +1107,10 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds
|
|||||||
const vector unsigned char zero = vec_splat_u8(0);
|
const vector unsigned char zero = vec_splat_u8(0);
|
||||||
const vector unsigned char magic_perm = (const vector unsigned char)
|
const vector unsigned char magic_perm = (const vector unsigned char)
|
||||||
AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
|
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
|
||||||
|
|
||||||
#define LOAD_DOUBLE_LINE(i, j) \
|
#define LOAD_DOUBLE_LINE(i, j) \
|
||||||
vector unsigned char src##i = vec_ld(i * 16, src); \
|
vector unsigned char src##i = vec_ld(i * 16, src); \
|
||||||
vector unsigned char src##j = vec_ld(j * 16, src)
|
vector unsigned char src##j = vec_ld(j * 16, src)
|
||||||
|
|
||||||
LOAD_DOUBLE_LINE(0, 1);
|
LOAD_DOUBLE_LINE(0, 1);
|
||||||
@ -1169,24 +1169,24 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds
|
|||||||
|
|
||||||
|
|
||||||
const vector signed char neg1 = vec_splat_s8(-1);
|
const vector signed char neg1 = vec_splat_s8(-1);
|
||||||
#define STORE_DOUBLE_LINE(i, j) \
|
#define STORE_DOUBLE_LINE(i, j) \
|
||||||
vector unsigned char dstA##i = vec_ld(i * stride, dst); \
|
vector unsigned char dstA##i = vec_ld(i * stride, dst); \
|
||||||
vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \
|
vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \
|
||||||
vector unsigned char dstA##j = vec_ld(j * stride, dst); \
|
vector unsigned char dstA##j = vec_ld(j * stride, dst); \
|
||||||
vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst); \
|
vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst); \
|
||||||
vector unsigned char align##i = vec_lvsr(i * stride, dst); \
|
vector unsigned char align##i = vec_lvsr(i * stride, dst); \
|
||||||
vector unsigned char align##j = vec_lvsr(j * stride, dst); \
|
vector unsigned char align##j = vec_lvsr(j * stride, dst); \
|
||||||
vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \
|
vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \
|
||||||
vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \
|
vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \
|
||||||
vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i); \
|
vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i); \
|
||||||
vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j); \
|
vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j); \
|
||||||
vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \
|
vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \
|
||||||
vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \
|
vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \
|
||||||
vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \
|
vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \
|
||||||
vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j); \
|
vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j); \
|
||||||
vec_st(dstAF##i, i * stride, dst); \
|
vec_st(dstAF##i, i * stride, dst); \
|
||||||
vec_st(dstBF##i, i * stride + 16, dst); \
|
vec_st(dstBF##i, i * stride + 16, dst); \
|
||||||
vec_st(dstAF##j, j * stride, dst); \
|
vec_st(dstAF##j, j * stride, dst); \
|
||||||
vec_st(dstBF##j, j * stride + 16, dst)
|
vec_st(dstBF##j, j * stride + 16, dst)
|
||||||
|
|
||||||
STORE_DOUBLE_LINE(0,1);
|
STORE_DOUBLE_LINE(0,1);
|
||||||
|
@ -21,42 +21,42 @@
|
|||||||
* internal api header.
|
* internal api header.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define V_DEBLOCK 0x01
|
#define V_DEBLOCK 0x01
|
||||||
#define H_DEBLOCK 0x02
|
#define H_DEBLOCK 0x02
|
||||||
#define DERING 0x04
|
#define DERING 0x04
|
||||||
#define LEVEL_FIX 0x08 ///< Brightness & Contrast
|
#define LEVEL_FIX 0x08 ///< Brightness & Contrast
|
||||||
|
|
||||||
#define LUM_V_DEBLOCK V_DEBLOCK // 1
|
#define LUM_V_DEBLOCK V_DEBLOCK // 1
|
||||||
#define LUM_H_DEBLOCK H_DEBLOCK // 2
|
#define LUM_H_DEBLOCK H_DEBLOCK // 2
|
||||||
#define CHROM_V_DEBLOCK (V_DEBLOCK<<4) // 16
|
#define CHROM_V_DEBLOCK (V_DEBLOCK<<4) // 16
|
||||||
#define CHROM_H_DEBLOCK (H_DEBLOCK<<4) // 32
|
#define CHROM_H_DEBLOCK (H_DEBLOCK<<4) // 32
|
||||||
#define LUM_DERING DERING // 4
|
#define LUM_DERING DERING // 4
|
||||||
#define CHROM_DERING (DERING<<4) // 64
|
#define CHROM_DERING (DERING<<4) // 64
|
||||||
#define LUM_LEVEL_FIX LEVEL_FIX // 8
|
#define LUM_LEVEL_FIX LEVEL_FIX // 8
|
||||||
#define CHROM_LEVEL_FIX (LEVEL_FIX<<4) // 128 (not implemented yet)
|
#define CHROM_LEVEL_FIX (LEVEL_FIX<<4) // 128 (not implemented yet)
|
||||||
|
|
||||||
// Experimental vertical filters
|
// Experimental vertical filters
|
||||||
#define V_X1_FILTER 0x0200 // 512
|
#define V_X1_FILTER 0x0200 // 512
|
||||||
#define V_A_DEBLOCK 0x0400
|
#define V_A_DEBLOCK 0x0400
|
||||||
|
|
||||||
// Experimental horizontal filters
|
// Experimental horizontal filters
|
||||||
#define H_X1_FILTER 0x2000 // 8192
|
#define H_X1_FILTER 0x2000 // 8192
|
||||||
#define H_A_DEBLOCK 0x4000
|
#define H_A_DEBLOCK 0x4000
|
||||||
|
|
||||||
/// select between full y range (255-0) or standart one (234-16)
|
/// select between full y range (255-0) or standart one (234-16)
|
||||||
#define FULL_Y_RANGE 0x8000 // 32768
|
#define FULL_Y_RANGE 0x8000 // 32768
|
||||||
|
|
||||||
//Deinterlacing Filters
|
//Deinterlacing Filters
|
||||||
#define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536
|
#define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536
|
||||||
#define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072
|
#define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072
|
||||||
#define CUBIC_BLEND_DEINT_FILTER 0x8000 // (not implemented yet)
|
#define CUBIC_BLEND_DEINT_FILTER 0x8000 // (not implemented yet)
|
||||||
#define CUBIC_IPOL_DEINT_FILTER 0x40000 // 262144
|
#define CUBIC_IPOL_DEINT_FILTER 0x40000 // 262144
|
||||||
#define MEDIAN_DEINT_FILTER 0x80000 // 524288
|
#define MEDIAN_DEINT_FILTER 0x80000 // 524288
|
||||||
#define FFMPEG_DEINT_FILTER 0x400000
|
#define FFMPEG_DEINT_FILTER 0x400000
|
||||||
#define LOWPASS5_DEINT_FILTER 0x800000
|
#define LOWPASS5_DEINT_FILTER 0x800000
|
||||||
|
|
||||||
#define TEMP_NOISE_FILTER 0x100000
|
#define TEMP_NOISE_FILTER 0x100000
|
||||||
#define FORCE_QUANT 0x200000
|
#define FORCE_QUANT 0x200000
|
||||||
|
|
||||||
//use if u want a faster postprocessing code
|
//use if u want a faster postprocessing code
|
||||||
//cant differentiate between chroma & luma filters (both on or both off)
|
//cant differentiate between chroma & luma filters (both on or both off)
|
||||||
@ -66,8 +66,8 @@
|
|||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
static inline int CLIP(int a){
|
static inline int CLIP(int a){
|
||||||
if(a&256) return ((a)>>31)^(-1);
|
if(a&256) return ((a)>>31)^(-1);
|
||||||
else return a;
|
else return a;
|
||||||
}
|
}
|
||||||
//#define CLIP(a) (((a)&256) ? ((a)>>31)^(-1) : (a))
|
//#define CLIP(a) (((a)&256) ? ((a)>>31)^(-1) : (a))
|
||||||
#elif 0
|
#elif 0
|
||||||
@ -79,92 +79,92 @@ static inline int CLIP(int a){
|
|||||||
* Postprocessng filter.
|
* Postprocessng filter.
|
||||||
*/
|
*/
|
||||||
struct PPFilter{
|
struct PPFilter{
|
||||||
char *shortName;
|
char *shortName;
|
||||||
char *longName;
|
char *longName;
|
||||||
int chromDefault; ///< is chrominance filtering on by default if this filter is manually activated
|
int chromDefault; ///< is chrominance filtering on by default if this filter is manually activated
|
||||||
int minLumQuality; ///< minimum quality to turn luminance filtering on
|
int minLumQuality; ///< minimum quality to turn luminance filtering on
|
||||||
int minChromQuality; ///< minimum quality to turn chrominance filtering on
|
int minChromQuality; ///< minimum quality to turn chrominance filtering on
|
||||||
int mask; ///< Bitmask to turn this filter on
|
int mask; ///< Bitmask to turn this filter on
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Postprocessng mode.
|
* Postprocessng mode.
|
||||||
*/
|
*/
|
||||||
typedef struct PPMode{
|
typedef struct PPMode{
|
||||||
int lumMode; ///< acivates filters for luminance
|
int lumMode; ///< acivates filters for luminance
|
||||||
int chromMode; ///< acivates filters for chrominance
|
int chromMode; ///< acivates filters for chrominance
|
||||||
int error; ///< non zero on error
|
int error; ///< non zero on error
|
||||||
|
|
||||||
int minAllowedY; ///< for brigtness correction
|
int minAllowedY; ///< for brigtness correction
|
||||||
int maxAllowedY; ///< for brihtness correction
|
int maxAllowedY; ///< for brihtness correction
|
||||||
float maxClippedThreshold; ///< amount of "black" u r willing to loose to get a brightness corrected picture
|
float maxClippedThreshold; ///< amount of "black" u r willing to loose to get a brightness corrected picture
|
||||||
|
|
||||||
int maxTmpNoise[3]; ///< for Temporal Noise Reducing filter (Maximal sum of abs differences)
|
int maxTmpNoise[3]; ///< for Temporal Noise Reducing filter (Maximal sum of abs differences)
|
||||||
|
|
||||||
int baseDcDiff;
|
int baseDcDiff;
|
||||||
int flatnessThreshold;
|
int flatnessThreshold;
|
||||||
|
|
||||||
int forcedQuant; ///< quantizer if FORCE_QUANT is used
|
int forcedQuant; ///< quantizer if FORCE_QUANT is used
|
||||||
} PPMode;
|
} PPMode;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* postprocess context.
|
* postprocess context.
|
||||||
*/
|
*/
|
||||||
typedef struct PPContext{
|
typedef struct PPContext{
|
||||||
uint8_t *tempBlocks; ///<used for the horizontal code
|
uint8_t *tempBlocks; ///<used for the horizontal code
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* luma histogram.
|
* luma histogram.
|
||||||
* we need 64bit here otherwise we'll going to have a problem
|
* we need 64bit here otherwise we'll going to have a problem
|
||||||
* after watching a black picture for 5 hours
|
* after watching a black picture for 5 hours
|
||||||
*/
|
*/
|
||||||
uint64_t *yHistogram;
|
uint64_t *yHistogram;
|
||||||
|
|
||||||
uint64_t __attribute__((aligned(8))) packedYOffset;
|
uint64_t __attribute__((aligned(8))) packedYOffset;
|
||||||
uint64_t __attribute__((aligned(8))) packedYScale;
|
uint64_t __attribute__((aligned(8))) packedYScale;
|
||||||
|
|
||||||
/** Temporal noise reducing buffers */
|
/** Temporal noise reducing buffers */
|
||||||
uint8_t *tempBlured[3];
|
uint8_t *tempBlured[3];
|
||||||
int32_t *tempBluredPast[3];
|
int32_t *tempBluredPast[3];
|
||||||
|
|
||||||
/** Temporary buffers for handling the last row(s) */
|
/** Temporary buffers for handling the last row(s) */
|
||||||
uint8_t *tempDst;
|
uint8_t *tempDst;
|
||||||
uint8_t *tempSrc;
|
uint8_t *tempSrc;
|
||||||
|
|
||||||
uint8_t *deintTemp;
|
uint8_t *deintTemp;
|
||||||
|
|
||||||
uint64_t __attribute__((aligned(8))) pQPb;
|
uint64_t __attribute__((aligned(8))) pQPb;
|
||||||
uint64_t __attribute__((aligned(8))) pQPb2;
|
uint64_t __attribute__((aligned(8))) pQPb2;
|
||||||
|
|
||||||
uint64_t __attribute__((aligned(8))) mmxDcOffset[64];
|
uint64_t __attribute__((aligned(8))) mmxDcOffset[64];
|
||||||
uint64_t __attribute__((aligned(8))) mmxDcThreshold[64];
|
uint64_t __attribute__((aligned(8))) mmxDcThreshold[64];
|
||||||
|
|
||||||
QP_STORE_T *stdQPTable; ///< used to fix MPEG2 style qscale
|
QP_STORE_T *stdQPTable; ///< used to fix MPEG2 style qscale
|
||||||
QP_STORE_T *nonBQPTable;
|
QP_STORE_T *nonBQPTable;
|
||||||
QP_STORE_T *forcedQPTable;
|
QP_STORE_T *forcedQPTable;
|
||||||
|
|
||||||
int QP;
|
int QP;
|
||||||
int nonBQP;
|
int nonBQP;
|
||||||
|
|
||||||
int frameNum;
|
int frameNum;
|
||||||
|
|
||||||
int cpuCaps;
|
int cpuCaps;
|
||||||
|
|
||||||
int qpStride; ///<size of qp buffers (needed to realloc them if needed)
|
int qpStride; ///<size of qp buffers (needed to realloc them if needed)
|
||||||
int stride; ///<size of some buffers (needed to realloc them if needed)
|
int stride; ///<size of some buffers (needed to realloc them if needed)
|
||||||
|
|
||||||
int hChromaSubSample;
|
int hChromaSubSample;
|
||||||
int vChromaSubSample;
|
int vChromaSubSample;
|
||||||
|
|
||||||
PPMode ppMode;
|
PPMode ppMode;
|
||||||
} PPContext;
|
} PPContext;
|
||||||
|
|
||||||
|
|
||||||
static inline void linecpy(void *dest, void *src, int lines, int stride)
|
static inline void linecpy(void *dest, void *src, int lines, int stride)
|
||||||
{
|
{
|
||||||
if (stride > 0) {
|
if (stride > 0) {
|
||||||
memcpy(dest, src, lines*stride);
|
memcpy(dest, src, lines*stride);
|
||||||
} else {
|
} else {
|
||||||
memcpy(dest+(lines-1)*stride, src+(lines-1)*stride, -lines*stride);
|
memcpy(dest+(lines-1)*stride, src+(lines-1)*stride, -lines*stride);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -64,8 +64,8 @@ void *av_malloc(unsigned int size)
|
|||||||
Indeed, we should align it:
|
Indeed, we should align it:
|
||||||
on 4 for 386
|
on 4 for 386
|
||||||
on 16 for 486
|
on 16 for 486
|
||||||
on 32 for 586, PPro - k6-III
|
on 32 for 586, PPro - k6-III
|
||||||
on 64 for K7 (maybe for P3 too).
|
on 64 for K7 (maybe for P3 too).
|
||||||
Because L1 and L2 caches are aligned on those values.
|
Because L1 and L2 caches are aligned on those values.
|
||||||
But I don't want to code such logic here!
|
But I don't want to code such logic here!
|
||||||
*/
|
*/
|
||||||
@ -76,13 +76,13 @@ void *av_malloc(unsigned int size)
|
|||||||
Why not larger? because i didnt see a difference in benchmarks ...
|
Why not larger? because i didnt see a difference in benchmarks ...
|
||||||
*/
|
*/
|
||||||
/* benchmarks with p3
|
/* benchmarks with p3
|
||||||
memalign(64)+1 3071,3051,3032
|
memalign(64)+1 3071,3051,3032
|
||||||
memalign(64)+2 3051,3032,3041
|
memalign(64)+2 3051,3032,3041
|
||||||
memalign(64)+4 2911,2896,2915
|
memalign(64)+4 2911,2896,2915
|
||||||
memalign(64)+8 2545,2554,2550
|
memalign(64)+8 2545,2554,2550
|
||||||
memalign(64)+16 2543,2572,2563
|
memalign(64)+16 2543,2572,2563
|
||||||
memalign(64)+32 2546,2545,2571
|
memalign(64)+32 2546,2545,2571
|
||||||
memalign(64)+64 2570,2533,2558
|
memalign(64)+64 2570,2533,2558
|
||||||
|
|
||||||
btw, malloc seems to do 8 byte alignment by default here
|
btw, malloc seems to do 8 byte alignment by default here
|
||||||
*/
|
*/
|
||||||
|
@ -54,26 +54,26 @@ typedef struct MJpegContext {
|
|||||||
/* JPEG marker codes */
|
/* JPEG marker codes */
|
||||||
typedef enum {
|
typedef enum {
|
||||||
/* start of frame */
|
/* start of frame */
|
||||||
SOF0 = 0xc0, /* baseline */
|
SOF0 = 0xc0, /* baseline */
|
||||||
SOF1 = 0xc1, /* extended sequential, huffman */
|
SOF1 = 0xc1, /* extended sequential, huffman */
|
||||||
SOF2 = 0xc2, /* progressive, huffman */
|
SOF2 = 0xc2, /* progressive, huffman */
|
||||||
SOF3 = 0xc3, /* lossless, huffman */
|
SOF3 = 0xc3, /* lossless, huffman */
|
||||||
|
|
||||||
SOF5 = 0xc5, /* differential sequential, huffman */
|
SOF5 = 0xc5, /* differential sequential, huffman */
|
||||||
SOF6 = 0xc6, /* differential progressive, huffman */
|
SOF6 = 0xc6, /* differential progressive, huffman */
|
||||||
SOF7 = 0xc7, /* differential lossless, huffman */
|
SOF7 = 0xc7, /* differential lossless, huffman */
|
||||||
JPG = 0xc8, /* reserved for JPEG extension */
|
JPG = 0xc8, /* reserved for JPEG extension */
|
||||||
SOF9 = 0xc9, /* extended sequential, arithmetic */
|
SOF9 = 0xc9, /* extended sequential, arithmetic */
|
||||||
SOF10 = 0xca, /* progressive, arithmetic */
|
SOF10 = 0xca, /* progressive, arithmetic */
|
||||||
SOF11 = 0xcb, /* lossless, arithmetic */
|
SOF11 = 0xcb, /* lossless, arithmetic */
|
||||||
|
|
||||||
SOF13 = 0xcd, /* differential sequential, arithmetic */
|
SOF13 = 0xcd, /* differential sequential, arithmetic */
|
||||||
SOF14 = 0xce, /* differential progressive, arithmetic */
|
SOF14 = 0xce, /* differential progressive, arithmetic */
|
||||||
SOF15 = 0xcf, /* differential lossless, arithmetic */
|
SOF15 = 0xcf, /* differential lossless, arithmetic */
|
||||||
|
|
||||||
DHT = 0xc4, /* define huffman tables */
|
DHT = 0xc4, /* define huffman tables */
|
||||||
|
|
||||||
DAC = 0xcc, /* define arithmetic-coding conditioning */
|
DAC = 0xcc, /* define arithmetic-coding conditioning */
|
||||||
|
|
||||||
/* restart with modulo 8 count "m" */
|
/* restart with modulo 8 count "m" */
|
||||||
RST0 = 0xd0,
|
RST0 = 0xd0,
|
||||||
@ -85,14 +85,14 @@ typedef enum {
|
|||||||
RST6 = 0xd6,
|
RST6 = 0xd6,
|
||||||
RST7 = 0xd7,
|
RST7 = 0xd7,
|
||||||
|
|
||||||
SOI = 0xd8, /* start of image */
|
SOI = 0xd8, /* start of image */
|
||||||
EOI = 0xd9, /* end of image */
|
EOI = 0xd9, /* end of image */
|
||||||
SOS = 0xda, /* start of scan */
|
SOS = 0xda, /* start of scan */
|
||||||
DQT = 0xdb, /* define quantization tables */
|
DQT = 0xdb, /* define quantization tables */
|
||||||
DNL = 0xdc, /* define number of lines */
|
DNL = 0xdc, /* define number of lines */
|
||||||
DRI = 0xdd, /* define restart interval */
|
DRI = 0xdd, /* define restart interval */
|
||||||
DHP = 0xde, /* define hierarchical progression */
|
DHP = 0xde, /* define hierarchical progression */
|
||||||
EXP = 0xdf, /* expand reference components */
|
EXP = 0xdf, /* expand reference components */
|
||||||
|
|
||||||
APP0 = 0xe0,
|
APP0 = 0xe0,
|
||||||
APP1 = 0xe1,
|
APP1 = 0xe1,
|
||||||
@ -118,17 +118,17 @@ typedef enum {
|
|||||||
JPG4 = 0xf4,
|
JPG4 = 0xf4,
|
||||||
JPG5 = 0xf5,
|
JPG5 = 0xf5,
|
||||||
JPG6 = 0xf6,
|
JPG6 = 0xf6,
|
||||||
SOF48 = 0xf7, ///< JPEG-LS
|
SOF48 = 0xf7, ///< JPEG-LS
|
||||||
LSE = 0xf8, ///< JPEG-LS extension parameters
|
LSE = 0xf8, ///< JPEG-LS extension parameters
|
||||||
JPG9 = 0xf9,
|
JPG9 = 0xf9,
|
||||||
JPG10 = 0xfa,
|
JPG10 = 0xfa,
|
||||||
JPG11 = 0xfb,
|
JPG11 = 0xfb,
|
||||||
JPG12 = 0xfc,
|
JPG12 = 0xfc,
|
||||||
JPG13 = 0xfd,
|
JPG13 = 0xfd,
|
||||||
|
|
||||||
COM = 0xfe, /* comment */
|
COM = 0xfe, /* comment */
|
||||||
|
|
||||||
TEM = 0x01, /* temporary private use for arithmetic coding */
|
TEM = 0x01, /* temporary private use for arithmetic coding */
|
||||||
|
|
||||||
/* 0x02 -> 0xbf reserved */
|
/* 0x02 -> 0xbf reserved */
|
||||||
} JPEG_MARKER;
|
} JPEG_MARKER;
|
||||||
@ -583,7 +583,7 @@ void mjpeg_picture_trailer(MpegEncContext *s)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline void mjpeg_encode_dc(MpegEncContext *s, int val,
|
static inline void mjpeg_encode_dc(MpegEncContext *s, int val,
|
||||||
uint8_t *huff_size, uint16_t *huff_code)
|
uint8_t *huff_size, uint16_t *huff_code)
|
||||||
{
|
{
|
||||||
int mant, nbits;
|
int mant, nbits;
|
||||||
|
|
||||||
@ -935,10 +935,10 @@ static int mjpeg_decode_init(AVCodecContext *avctx)
|
|||||||
|
|
||||||
if (avctx->flags & CODEC_FLAG_EXTERN_HUFF)
|
if (avctx->flags & CODEC_FLAG_EXTERN_HUFF)
|
||||||
{
|
{
|
||||||
av_log(avctx, AV_LOG_INFO, "mjpeg: using external huffman table\n");
|
av_log(avctx, AV_LOG_INFO, "mjpeg: using external huffman table\n");
|
||||||
init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8);
|
init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8);
|
||||||
mjpeg_decode_dht(s);
|
mjpeg_decode_dht(s);
|
||||||
/* should check for error - but dunno */
|
/* should check for error - but dunno */
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -1017,10 +1017,10 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s)
|
|||||||
while (len >= 65) {
|
while (len >= 65) {
|
||||||
/* only 8 bit precision handled */
|
/* only 8 bit precision handled */
|
||||||
if (get_bits(&s->gb, 4) != 0)
|
if (get_bits(&s->gb, 4) != 0)
|
||||||
{
|
{
|
||||||
dprintf("dqt: 16bit precision\n");
|
dprintf("dqt: 16bit precision\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
index = get_bits(&s->gb, 4);
|
index = get_bits(&s->gb, 4);
|
||||||
if (index >= 4)
|
if (index >= 4)
|
||||||
return -1;
|
return -1;
|
||||||
@ -1028,14 +1028,14 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s)
|
|||||||
/* read quant table */
|
/* read quant table */
|
||||||
for(i=0;i<64;i++) {
|
for(i=0;i<64;i++) {
|
||||||
j = s->scantable.permutated[i];
|
j = s->scantable.permutated[i];
|
||||||
s->quant_matrixes[index][j] = get_bits(&s->gb, 8);
|
s->quant_matrixes[index][j] = get_bits(&s->gb, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
//XXX FIXME finetune, and perhaps add dc too
|
//XXX FIXME finetune, and perhaps add dc too
|
||||||
s->qscale[index]= FFMAX(
|
s->qscale[index]= FFMAX(
|
||||||
s->quant_matrixes[index][s->scantable.permutated[1]],
|
s->quant_matrixes[index][s->scantable.permutated[1]],
|
||||||
s->quant_matrixes[index][s->scantable.permutated[8]]) >> 1;
|
s->quant_matrixes[index][s->scantable.permutated[8]]) >> 1;
|
||||||
dprintf("qscale[%d]: %d\n", index, s->qscale[index]);
|
dprintf("qscale[%d]: %d\n", index, s->qscale[index]);
|
||||||
len -= 65;
|
len -= 65;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1132,7 +1132,7 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s)
|
|||||||
if (s->quant_index[i] >= 4)
|
if (s->quant_index[i] >= 4)
|
||||||
return -1;
|
return -1;
|
||||||
dprintf("component %d %d:%d id: %d quant:%d\n", i, s->h_count[i],
|
dprintf("component %d %d:%d id: %d quant:%d\n", i, s->h_count[i],
|
||||||
s->v_count[i], s->component_id[i], s->quant_index[i]);
|
s->v_count[i], s->component_id[i], s->quant_index[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(s->v_max==1 && s->h_max==1 && s->lossless==1) s->rgb=1;
|
if(s->v_max==1 && s->h_max==1 && s->lossless==1) s->rgb=1;
|
||||||
@ -1151,7 +1151,7 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s)
|
|||||||
s->org_height != 0 &&
|
s->org_height != 0 &&
|
||||||
s->height < ((s->org_height * 3) / 4)) {
|
s->height < ((s->org_height * 3) / 4)) {
|
||||||
s->interlaced = 1;
|
s->interlaced = 1;
|
||||||
// s->bottom_field = (s->interlace_polarity) ? 1 : 0;
|
// s->bottom_field = (s->interlace_polarity) ? 1 : 0;
|
||||||
s->bottom_field = 0;
|
s->bottom_field = 0;
|
||||||
s->avctx->height *= 2;
|
s->avctx->height *= 2;
|
||||||
}
|
}
|
||||||
@ -1202,7 +1202,7 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s)
|
|||||||
|
|
||||||
if (len != (8+(3*nb_components)))
|
if (len != (8+(3*nb_components)))
|
||||||
{
|
{
|
||||||
dprintf("decode_sof0: error, len(%d) mismatch\n", len);
|
dprintf("decode_sof0: error, len(%d) mismatch\n", len);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -1214,7 +1214,7 @@ static inline int mjpeg_decode_dc(MJpegDecodeContext *s, int dc_index)
|
|||||||
code = get_vlc2(&s->gb, s->vlcs[0][dc_index].table, 9, 2);
|
code = get_vlc2(&s->gb, s->vlcs[0][dc_index].table, 9, 2);
|
||||||
if (code < 0)
|
if (code < 0)
|
||||||
{
|
{
|
||||||
dprintf("mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index,
|
dprintf("mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index,
|
||||||
&s->vlcs[0][dc_index]);
|
&s->vlcs[0][dc_index]);
|
||||||
return 0xffff;
|
return 0xffff;
|
||||||
}
|
}
|
||||||
@ -1247,7 +1247,7 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block,
|
|||||||
ac_vlc = &s->vlcs[1][ac_index];
|
ac_vlc = &s->vlcs[1][ac_index];
|
||||||
i = 1;
|
i = 1;
|
||||||
for(;;) {
|
for(;;) {
|
||||||
code = get_vlc2(&s->gb, s->vlcs[1][ac_index].table, 9, 2);
|
code = get_vlc2(&s->gb, s->vlcs[1][ac_index].table, 9, 2);
|
||||||
|
|
||||||
if (code < 0) {
|
if (code < 0) {
|
||||||
dprintf("error ac\n");
|
dprintf("error ac\n");
|
||||||
@ -1452,7 +1452,7 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s){
|
|||||||
dprintf("error y=%d x=%d\n", mb_y, mb_x);
|
dprintf("error y=%d x=%d\n", mb_y, mb_x);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
// dprintf("mb: %d %d processed\n", mb_y, mb_x);
|
// dprintf("mb: %d %d processed\n", mb_y, mb_x);
|
||||||
ptr = s->picture.data[c] +
|
ptr = s->picture.data[c] +
|
||||||
(((s->linesize[c] * (v * mb_y + y) * 8) +
|
(((s->linesize[c] * (v * mb_y + y) * 8) +
|
||||||
(h * mb_x + x) * 8) >> s->avctx->lowres);
|
(h * mb_x + x) * 8) >> s->avctx->lowres);
|
||||||
@ -1491,29 +1491,29 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s)
|
|||||||
nb_components = get_bits(&s->gb, 8);
|
nb_components = get_bits(&s->gb, 8);
|
||||||
if (len != 6+2*nb_components)
|
if (len != 6+2*nb_components)
|
||||||
{
|
{
|
||||||
dprintf("decode_sos: invalid len (%d)\n", len);
|
dprintf("decode_sos: invalid len (%d)\n", len);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
/* XXX: only interleaved scan accepted */
|
/* XXX: only interleaved scan accepted */
|
||||||
if (nb_components != s->nb_components)
|
if (nb_components != s->nb_components)
|
||||||
{
|
{
|
||||||
dprintf("decode_sos: components(%d) mismatch\n", nb_components);
|
dprintf("decode_sos: components(%d) mismatch\n", nb_components);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
vmax = 0;
|
vmax = 0;
|
||||||
hmax = 0;
|
hmax = 0;
|
||||||
for(i=0;i<nb_components;i++) {
|
for(i=0;i<nb_components;i++) {
|
||||||
id = get_bits(&s->gb, 8) - 1;
|
id = get_bits(&s->gb, 8) - 1;
|
||||||
dprintf("component: %d\n", id);
|
dprintf("component: %d\n", id);
|
||||||
/* find component index */
|
/* find component index */
|
||||||
for(index=0;index<s->nb_components;index++)
|
for(index=0;index<s->nb_components;index++)
|
||||||
if (id == s->component_id[index])
|
if (id == s->component_id[index])
|
||||||
break;
|
break;
|
||||||
if (index == s->nb_components)
|
if (index == s->nb_components)
|
||||||
{
|
{
|
||||||
dprintf("decode_sos: index(%d) out of components\n", index);
|
dprintf("decode_sos: index(%d) out of components\n", index);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
s->comp_index[i] = index;
|
s->comp_index[i] = index;
|
||||||
|
|
||||||
@ -1524,26 +1524,26 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s)
|
|||||||
s->dc_index[i] = get_bits(&s->gb, 4);
|
s->dc_index[i] = get_bits(&s->gb, 4);
|
||||||
s->ac_index[i] = get_bits(&s->gb, 4);
|
s->ac_index[i] = get_bits(&s->gb, 4);
|
||||||
|
|
||||||
if (s->dc_index[i] < 0 || s->ac_index[i] < 0 ||
|
if (s->dc_index[i] < 0 || s->ac_index[i] < 0 ||
|
||||||
s->dc_index[i] >= 4 || s->ac_index[i] >= 4)
|
s->dc_index[i] >= 4 || s->ac_index[i] >= 4)
|
||||||
goto out_of_range;
|
goto out_of_range;
|
||||||
#if 0 //buggy
|
#if 0 //buggy
|
||||||
switch(s->start_code)
|
switch(s->start_code)
|
||||||
{
|
{
|
||||||
case SOF0:
|
case SOF0:
|
||||||
if (dc_index[i] > 1 || ac_index[i] > 1)
|
if (dc_index[i] > 1 || ac_index[i] > 1)
|
||||||
goto out_of_range;
|
goto out_of_range;
|
||||||
break;
|
break;
|
||||||
case SOF1:
|
case SOF1:
|
||||||
case SOF2:
|
case SOF2:
|
||||||
if (dc_index[i] > 3 || ac_index[i] > 3)
|
if (dc_index[i] > 3 || ac_index[i] > 3)
|
||||||
goto out_of_range;
|
goto out_of_range;
|
||||||
break;
|
break;
|
||||||
case SOF3:
|
case SOF3:
|
||||||
if (dc_index[i] > 3 || ac_index[i] != 0)
|
if (dc_index[i] > 3 || ac_index[i] != 0)
|
||||||
goto out_of_range;
|
goto out_of_range;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1605,7 +1605,7 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s)
|
|||||||
static int mjpeg_decode_dri(MJpegDecodeContext *s)
|
static int mjpeg_decode_dri(MJpegDecodeContext *s)
|
||||||
{
|
{
|
||||||
if (get_bits(&s->gb, 16) != 4)
|
if (get_bits(&s->gb, 16) != 4)
|
||||||
return -1;
|
return -1;
|
||||||
s->restart_interval = get_bits(&s->gb, 16);
|
s->restart_interval = get_bits(&s->gb, 16);
|
||||||
s->restart_count = 0;
|
s->restart_count = 0;
|
||||||
dprintf("restart interval: %d\n", s->restart_interval);
|
dprintf("restart interval: %d\n", s->restart_interval);
|
||||||
@ -1619,7 +1619,7 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
|
|||||||
|
|
||||||
len = get_bits(&s->gb, 16);
|
len = get_bits(&s->gb, 16);
|
||||||
if (len < 5)
|
if (len < 5)
|
||||||
return -1;
|
return -1;
|
||||||
if(8*len + get_bits_count(&s->gb) > s->gb.size_in_bits)
|
if(8*len + get_bits_count(&s->gb) > s->gb.size_in_bits)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@ -1636,35 +1636,35 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
|
|||||||
informations, but it's always present in AVID creates files */
|
informations, but it's always present in AVID creates files */
|
||||||
if (id == ff_get_fourcc("AVI1"))
|
if (id == ff_get_fourcc("AVI1"))
|
||||||
{
|
{
|
||||||
/* structure:
|
/* structure:
|
||||||
4bytes AVI1
|
4bytes AVI1
|
||||||
1bytes polarity
|
1bytes polarity
|
||||||
1bytes always zero
|
1bytes always zero
|
||||||
4bytes field_size
|
4bytes field_size
|
||||||
4bytes field_size_less_padding
|
4bytes field_size_less_padding
|
||||||
*/
|
*/
|
||||||
s->buggy_avid = 1;
|
s->buggy_avid = 1;
|
||||||
// if (s->first_picture)
|
// if (s->first_picture)
|
||||||
// printf("mjpeg: workarounding buggy AVID\n");
|
// printf("mjpeg: workarounding buggy AVID\n");
|
||||||
s->interlace_polarity = get_bits(&s->gb, 8);
|
s->interlace_polarity = get_bits(&s->gb, 8);
|
||||||
#if 0
|
#if 0
|
||||||
skip_bits(&s->gb, 8);
|
skip_bits(&s->gb, 8);
|
||||||
skip_bits(&s->gb, 32);
|
skip_bits(&s->gb, 32);
|
||||||
skip_bits(&s->gb, 32);
|
skip_bits(&s->gb, 32);
|
||||||
len -= 10;
|
len -= 10;
|
||||||
#endif
|
#endif
|
||||||
// if (s->interlace_polarity)
|
// if (s->interlace_polarity)
|
||||||
// printf("mjpeg: interlace polarity: %d\n", s->interlace_polarity);
|
// printf("mjpeg: interlace polarity: %d\n", s->interlace_polarity);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
// len -= 2;
|
// len -= 2;
|
||||||
|
|
||||||
if (id == ff_get_fourcc("JFIF"))
|
if (id == ff_get_fourcc("JFIF"))
|
||||||
{
|
{
|
||||||
int t_w, t_h, v1, v2;
|
int t_w, t_h, v1, v2;
|
||||||
skip_bits(&s->gb, 8); /* the trailing zero-byte */
|
skip_bits(&s->gb, 8); /* the trailing zero-byte */
|
||||||
v1= get_bits(&s->gb, 8);
|
v1= get_bits(&s->gb, 8);
|
||||||
v2= get_bits(&s->gb, 8);
|
v2= get_bits(&s->gb, 8);
|
||||||
skip_bits(&s->gb, 8);
|
skip_bits(&s->gb, 8);
|
||||||
|
|
||||||
@ -1678,37 +1678,37 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
|
|||||||
s->avctx->sample_aspect_ratio.den
|
s->avctx->sample_aspect_ratio.den
|
||||||
);
|
);
|
||||||
|
|
||||||
t_w = get_bits(&s->gb, 8);
|
t_w = get_bits(&s->gb, 8);
|
||||||
t_h = get_bits(&s->gb, 8);
|
t_h = get_bits(&s->gb, 8);
|
||||||
if (t_w && t_h)
|
if (t_w && t_h)
|
||||||
{
|
{
|
||||||
/* skip thumbnail */
|
/* skip thumbnail */
|
||||||
if (len-10-(t_w*t_h*3) > 0)
|
if (len-10-(t_w*t_h*3) > 0)
|
||||||
len -= t_w*t_h*3;
|
len -= t_w*t_h*3;
|
||||||
}
|
}
|
||||||
len -= 10;
|
len -= 10;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (id == ff_get_fourcc("Adob") && (get_bits(&s->gb, 8) == 'e'))
|
if (id == ff_get_fourcc("Adob") && (get_bits(&s->gb, 8) == 'e'))
|
||||||
{
|
{
|
||||||
if (s->avctx->debug & FF_DEBUG_PICT_INFO)
|
if (s->avctx->debug & FF_DEBUG_PICT_INFO)
|
||||||
av_log(s->avctx, AV_LOG_INFO, "mjpeg: Adobe header found\n");
|
av_log(s->avctx, AV_LOG_INFO, "mjpeg: Adobe header found\n");
|
||||||
skip_bits(&s->gb, 16); /* version */
|
skip_bits(&s->gb, 16); /* version */
|
||||||
skip_bits(&s->gb, 16); /* flags0 */
|
skip_bits(&s->gb, 16); /* flags0 */
|
||||||
skip_bits(&s->gb, 16); /* flags1 */
|
skip_bits(&s->gb, 16); /* flags1 */
|
||||||
skip_bits(&s->gb, 8); /* transform */
|
skip_bits(&s->gb, 8); /* transform */
|
||||||
len -= 7;
|
len -= 7;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (id == ff_get_fourcc("LJIF")){
|
if (id == ff_get_fourcc("LJIF")){
|
||||||
if (s->avctx->debug & FF_DEBUG_PICT_INFO)
|
if (s->avctx->debug & FF_DEBUG_PICT_INFO)
|
||||||
av_log(s->avctx, AV_LOG_INFO, "Pegasus lossless jpeg header found\n");
|
av_log(s->avctx, AV_LOG_INFO, "Pegasus lossless jpeg header found\n");
|
||||||
skip_bits(&s->gb, 16); /* version ? */
|
skip_bits(&s->gb, 16); /* version ? */
|
||||||
skip_bits(&s->gb, 16); /* unknwon always 0? */
|
skip_bits(&s->gb, 16); /* unknwon always 0? */
|
||||||
skip_bits(&s->gb, 16); /* unknwon always 0? */
|
skip_bits(&s->gb, 16); /* unknwon always 0? */
|
||||||
skip_bits(&s->gb, 16); /* unknwon always 0? */
|
skip_bits(&s->gb, 16); /* unknwon always 0? */
|
||||||
switch( get_bits(&s->gb, 8)){
|
switch( get_bits(&s->gb, 8)){
|
||||||
case 1:
|
case 1:
|
||||||
s->rgb= 1;
|
s->rgb= 1;
|
||||||
@ -1728,32 +1728,32 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
|
|||||||
/* Apple MJPEG-A */
|
/* Apple MJPEG-A */
|
||||||
if ((s->start_code == APP1) && (len > (0x28 - 8)))
|
if ((s->start_code == APP1) && (len > (0x28 - 8)))
|
||||||
{
|
{
|
||||||
id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16);
|
id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16);
|
||||||
id = be2me_32(id);
|
id = be2me_32(id);
|
||||||
len -= 4;
|
len -= 4;
|
||||||
if (id == ff_get_fourcc("mjpg")) /* Apple MJPEG-A */
|
if (id == ff_get_fourcc("mjpg")) /* Apple MJPEG-A */
|
||||||
{
|
{
|
||||||
#if 0
|
#if 0
|
||||||
skip_bits(&s->gb, 32); /* field size */
|
skip_bits(&s->gb, 32); /* field size */
|
||||||
skip_bits(&s->gb, 32); /* pad field size */
|
skip_bits(&s->gb, 32); /* pad field size */
|
||||||
skip_bits(&s->gb, 32); /* next off */
|
skip_bits(&s->gb, 32); /* next off */
|
||||||
skip_bits(&s->gb, 32); /* quant off */
|
skip_bits(&s->gb, 32); /* quant off */
|
||||||
skip_bits(&s->gb, 32); /* huff off */
|
skip_bits(&s->gb, 32); /* huff off */
|
||||||
skip_bits(&s->gb, 32); /* image off */
|
skip_bits(&s->gb, 32); /* image off */
|
||||||
skip_bits(&s->gb, 32); /* scan off */
|
skip_bits(&s->gb, 32); /* scan off */
|
||||||
skip_bits(&s->gb, 32); /* data off */
|
skip_bits(&s->gb, 32); /* data off */
|
||||||
#endif
|
#endif
|
||||||
if (s->avctx->debug & FF_DEBUG_PICT_INFO)
|
if (s->avctx->debug & FF_DEBUG_PICT_INFO)
|
||||||
av_log(s->avctx, AV_LOG_INFO, "mjpeg: Apple MJPEG-A header found\n");
|
av_log(s->avctx, AV_LOG_INFO, "mjpeg: Apple MJPEG-A header found\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
/* slow but needed for extreme adobe jpegs */
|
/* slow but needed for extreme adobe jpegs */
|
||||||
if (len < 0)
|
if (len < 0)
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error, decode_app parser read over the end\n");
|
av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error, decode_app parser read over the end\n");
|
||||||
while(--len > 0)
|
while(--len > 0)
|
||||||
skip_bits(&s->gb, 8);
|
skip_bits(&s->gb, 8);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -1762,32 +1762,32 @@ static int mjpeg_decode_com(MJpegDecodeContext *s)
|
|||||||
{
|
{
|
||||||
int len = get_bits(&s->gb, 16);
|
int len = get_bits(&s->gb, 16);
|
||||||
if (len >= 2 && 8*len - 16 + get_bits_count(&s->gb) <= s->gb.size_in_bits) {
|
if (len >= 2 && 8*len - 16 + get_bits_count(&s->gb) <= s->gb.size_in_bits) {
|
||||||
uint8_t *cbuf = av_malloc(len - 1);
|
uint8_t *cbuf = av_malloc(len - 1);
|
||||||
if (cbuf) {
|
if (cbuf) {
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < len - 2; i++)
|
for (i = 0; i < len - 2; i++)
|
||||||
cbuf[i] = get_bits(&s->gb, 8);
|
cbuf[i] = get_bits(&s->gb, 8);
|
||||||
if (i > 0 && cbuf[i-1] == '\n')
|
if (i > 0 && cbuf[i-1] == '\n')
|
||||||
cbuf[i-1] = 0;
|
cbuf[i-1] = 0;
|
||||||
else
|
else
|
||||||
cbuf[i] = 0;
|
cbuf[i] = 0;
|
||||||
|
|
||||||
if(s->avctx->debug & FF_DEBUG_PICT_INFO)
|
if(s->avctx->debug & FF_DEBUG_PICT_INFO)
|
||||||
av_log(s->avctx, AV_LOG_INFO, "mjpeg comment: '%s'\n", cbuf);
|
av_log(s->avctx, AV_LOG_INFO, "mjpeg comment: '%s'\n", cbuf);
|
||||||
|
|
||||||
/* buggy avid, it puts EOI only at every 10th frame */
|
/* buggy avid, it puts EOI only at every 10th frame */
|
||||||
if (!strcmp(cbuf, "AVID"))
|
if (!strcmp(cbuf, "AVID"))
|
||||||
{
|
{
|
||||||
s->buggy_avid = 1;
|
s->buggy_avid = 1;
|
||||||
// if (s->first_picture)
|
// if (s->first_picture)
|
||||||
// printf("mjpeg: workarounding buggy AVID\n");
|
// printf("mjpeg: workarounding buggy AVID\n");
|
||||||
}
|
}
|
||||||
else if(!strcmp(cbuf, "CS=ITU601")){
|
else if(!strcmp(cbuf, "CS=ITU601")){
|
||||||
s->cs_itu601= 1;
|
s->cs_itu601= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
av_free(cbuf);
|
av_free(cbuf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -1830,13 +1830,13 @@ static int find_marker(uint8_t **pbuf_ptr, uint8_t *buf_end)
|
|||||||
buf_ptr = *pbuf_ptr;
|
buf_ptr = *pbuf_ptr;
|
||||||
while (buf_ptr < buf_end) {
|
while (buf_ptr < buf_end) {
|
||||||
v = *buf_ptr++;
|
v = *buf_ptr++;
|
||||||
v2 = *buf_ptr;
|
v2 = *buf_ptr;
|
||||||
if ((v == 0xff) && (v2 >= 0xc0) && (v2 <= 0xfe) && buf_ptr < buf_end) {
|
if ((v == 0xff) && (v2 >= 0xc0) && (v2 <= 0xfe) && buf_ptr < buf_end) {
|
||||||
val = *buf_ptr++;
|
val = *buf_ptr++;
|
||||||
goto found;
|
goto found;
|
||||||
}
|
}
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
skipped++;
|
skipped++;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
val = -1;
|
val = -1;
|
||||||
@ -1862,74 +1862,74 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
|
|||||||
while (buf_ptr < buf_end) {
|
while (buf_ptr < buf_end) {
|
||||||
/* find start next marker */
|
/* find start next marker */
|
||||||
start_code = find_marker(&buf_ptr, buf_end);
|
start_code = find_marker(&buf_ptr, buf_end);
|
||||||
{
|
{
|
||||||
/* EOF */
|
/* EOF */
|
||||||
if (start_code < 0) {
|
if (start_code < 0) {
|
||||||
goto the_end;
|
goto the_end;
|
||||||
} else {
|
} else {
|
||||||
dprintf("marker=%x avail_size_in_buf=%d\n", start_code, buf_end - buf_ptr);
|
dprintf("marker=%x avail_size_in_buf=%d\n", start_code, buf_end - buf_ptr);
|
||||||
|
|
||||||
if ((buf_end - buf_ptr) > s->buffer_size)
|
if ((buf_end - buf_ptr) > s->buffer_size)
|
||||||
{
|
{
|
||||||
av_free(s->buffer);
|
av_free(s->buffer);
|
||||||
s->buffer_size = buf_end-buf_ptr;
|
s->buffer_size = buf_end-buf_ptr;
|
||||||
s->buffer = av_malloc(s->buffer_size + FF_INPUT_BUFFER_PADDING_SIZE);
|
s->buffer = av_malloc(s->buffer_size + FF_INPUT_BUFFER_PADDING_SIZE);
|
||||||
dprintf("buffer too small, expanding to %d bytes\n",
|
dprintf("buffer too small, expanding to %d bytes\n",
|
||||||
s->buffer_size);
|
s->buffer_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* unescape buffer of SOS */
|
/* unescape buffer of SOS */
|
||||||
if (start_code == SOS)
|
if (start_code == SOS)
|
||||||
{
|
{
|
||||||
uint8_t *src = buf_ptr;
|
uint8_t *src = buf_ptr;
|
||||||
uint8_t *dst = s->buffer;
|
uint8_t *dst = s->buffer;
|
||||||
|
|
||||||
while (src<buf_end)
|
while (src<buf_end)
|
||||||
{
|
{
|
||||||
uint8_t x = *(src++);
|
uint8_t x = *(src++);
|
||||||
|
|
||||||
*(dst++) = x;
|
*(dst++) = x;
|
||||||
if (x == 0xff)
|
if (x == 0xff)
|
||||||
{
|
{
|
||||||
while(src<buf_end && x == 0xff)
|
while(src<buf_end && x == 0xff)
|
||||||
x = *(src++);
|
x = *(src++);
|
||||||
|
|
||||||
if (x >= 0xd0 && x <= 0xd7)
|
if (x >= 0xd0 && x <= 0xd7)
|
||||||
*(dst++) = x;
|
*(dst++) = x;
|
||||||
else if (x)
|
else if (x)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
init_get_bits(&s->gb, s->buffer, (dst - s->buffer)*8);
|
init_get_bits(&s->gb, s->buffer, (dst - s->buffer)*8);
|
||||||
|
|
||||||
dprintf("escaping removed %d bytes\n",
|
dprintf("escaping removed %d bytes\n",
|
||||||
(buf_end - buf_ptr) - (dst - s->buffer));
|
(buf_end - buf_ptr) - (dst - s->buffer));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
init_get_bits(&s->gb, buf_ptr, (buf_end - buf_ptr)*8);
|
init_get_bits(&s->gb, buf_ptr, (buf_end - buf_ptr)*8);
|
||||||
|
|
||||||
s->start_code = start_code;
|
s->start_code = start_code;
|
||||||
if(s->avctx->debug & FF_DEBUG_STARTCODE){
|
if(s->avctx->debug & FF_DEBUG_STARTCODE){
|
||||||
av_log(s->avctx, AV_LOG_DEBUG, "startcode: %X\n", start_code);
|
av_log(s->avctx, AV_LOG_DEBUG, "startcode: %X\n", start_code);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* process markers */
|
/* process markers */
|
||||||
if (start_code >= 0xd0 && start_code <= 0xd7) {
|
if (start_code >= 0xd0 && start_code <= 0xd7) {
|
||||||
dprintf("restart marker: %d\n", start_code&0x0f);
|
dprintf("restart marker: %d\n", start_code&0x0f);
|
||||||
/* APP fields */
|
/* APP fields */
|
||||||
} else if (start_code >= APP0 && start_code <= APP15) {
|
} else if (start_code >= APP0 && start_code <= APP15) {
|
||||||
mjpeg_decode_app(s);
|
mjpeg_decode_app(s);
|
||||||
/* Comment */
|
/* Comment */
|
||||||
} else if (start_code == COM){
|
} else if (start_code == COM){
|
||||||
mjpeg_decode_com(s);
|
mjpeg_decode_com(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
switch(start_code) {
|
switch(start_code) {
|
||||||
case SOI:
|
case SOI:
|
||||||
s->restart_interval = 0;
|
s->restart_interval = 0;
|
||||||
reset_ls_coding_parameters(s, 1);
|
reset_ls_coding_parameters(s, 1);
|
||||||
|
|
||||||
s->restart_count = 0;
|
s->restart_count = 0;
|
||||||
/* nothing to do on SOI */
|
/* nothing to do on SOI */
|
||||||
break;
|
break;
|
||||||
case DQT:
|
case DQT:
|
||||||
@ -1944,12 +1944,12 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
|
|||||||
case SOF0:
|
case SOF0:
|
||||||
s->lossless=0;
|
s->lossless=0;
|
||||||
if (mjpeg_decode_sof(s) < 0)
|
if (mjpeg_decode_sof(s) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
break;
|
break;
|
||||||
case SOF3:
|
case SOF3:
|
||||||
s->lossless=1;
|
s->lossless=1;
|
||||||
if (mjpeg_decode_sof(s) < 0)
|
if (mjpeg_decode_sof(s) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
break;
|
break;
|
||||||
case SOF48:
|
case SOF48:
|
||||||
s->lossless=1;
|
s->lossless=1;
|
||||||
@ -1961,11 +1961,11 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
|
|||||||
if (decode_lse(s) < 0)
|
if (decode_lse(s) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
break;
|
break;
|
||||||
case EOI:
|
case EOI:
|
||||||
if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
|
if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
|
||||||
break;
|
break;
|
||||||
eoi_parser:
|
eoi_parser:
|
||||||
{
|
{
|
||||||
if (s->interlaced) {
|
if (s->interlaced) {
|
||||||
s->bottom_field ^= 1;
|
s->bottom_field ^= 1;
|
||||||
/* if not bottom field, do not output image yet */
|
/* if not bottom field, do not output image yet */
|
||||||
@ -1987,41 +1987,41 @@ eoi_parser:
|
|||||||
|
|
||||||
goto the_end;
|
goto the_end;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SOS:
|
case SOS:
|
||||||
mjpeg_decode_sos(s);
|
mjpeg_decode_sos(s);
|
||||||
/* buggy avid puts EOI every 10-20th frame */
|
/* buggy avid puts EOI every 10-20th frame */
|
||||||
/* if restart period is over process EOI */
|
/* if restart period is over process EOI */
|
||||||
if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
|
if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
|
||||||
goto eoi_parser;
|
goto eoi_parser;
|
||||||
break;
|
break;
|
||||||
case DRI:
|
case DRI:
|
||||||
mjpeg_decode_dri(s);
|
mjpeg_decode_dri(s);
|
||||||
break;
|
break;
|
||||||
case SOF1:
|
case SOF1:
|
||||||
case SOF2:
|
case SOF2:
|
||||||
case SOF5:
|
case SOF5:
|
||||||
case SOF6:
|
case SOF6:
|
||||||
case SOF7:
|
case SOF7:
|
||||||
case SOF9:
|
case SOF9:
|
||||||
case SOF10:
|
case SOF10:
|
||||||
case SOF11:
|
case SOF11:
|
||||||
case SOF13:
|
case SOF13:
|
||||||
case SOF14:
|
case SOF14:
|
||||||
case SOF15:
|
case SOF15:
|
||||||
case JPG:
|
case JPG:
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "mjpeg: unsupported coding type (%x)\n", start_code);
|
av_log(s->avctx, AV_LOG_ERROR, "mjpeg: unsupported coding type (%x)\n", start_code);
|
||||||
break;
|
break;
|
||||||
// default:
|
// default:
|
||||||
// printf("mjpeg: unsupported marker (%x)\n", start_code);
|
// printf("mjpeg: unsupported marker (%x)\n", start_code);
|
||||||
// break;
|
// break;
|
||||||
}
|
}
|
||||||
|
|
||||||
not_the_end:
|
not_the_end:
|
||||||
/* eof process start code */
|
/* eof process start code */
|
||||||
buf_ptr += (get_bits_count(&s->gb)+7)/8;
|
buf_ptr += (get_bits_count(&s->gb)+7)/8;
|
||||||
dprintf("marker parser used %d bytes (%d bits)\n",
|
dprintf("marker parser used %d bytes (%d bits)\n",
|
||||||
(get_bits_count(&s->gb)+7)/8, get_bits_count(&s->gb));
|
(get_bits_count(&s->gb)+7)/8, get_bits_count(&s->gb));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2057,8 +2057,8 @@ read_header:
|
|||||||
|
|
||||||
if (get_bits_long(&hgb, 32) != be2me_32(ff_get_fourcc("mjpg")))
|
if (get_bits_long(&hgb, 32) != be2me_32(ff_get_fourcc("mjpg")))
|
||||||
{
|
{
|
||||||
dprintf("not mjpeg-b (bad fourcc)\n");
|
dprintf("not mjpeg-b (bad fourcc)\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
field_size = get_bits_long(&hgb, 32); /* field size */
|
field_size = get_bits_long(&hgb, 32); /* field size */
|
||||||
@ -2067,34 +2067,34 @@ read_header:
|
|||||||
second_field_offs = get_bits_long(&hgb, 32);
|
second_field_offs = get_bits_long(&hgb, 32);
|
||||||
dprintf("second field offs: 0x%x\n", second_field_offs);
|
dprintf("second field offs: 0x%x\n", second_field_offs);
|
||||||
if (second_field_offs)
|
if (second_field_offs)
|
||||||
s->interlaced = 1;
|
s->interlaced = 1;
|
||||||
|
|
||||||
dqt_offs = get_bits_long(&hgb, 32);
|
dqt_offs = get_bits_long(&hgb, 32);
|
||||||
dprintf("dqt offs: 0x%x\n", dqt_offs);
|
dprintf("dqt offs: 0x%x\n", dqt_offs);
|
||||||
if (dqt_offs)
|
if (dqt_offs)
|
||||||
{
|
{
|
||||||
init_get_bits(&s->gb, buf+dqt_offs, (buf_end - (buf+dqt_offs))*8);
|
init_get_bits(&s->gb, buf+dqt_offs, (buf_end - (buf+dqt_offs))*8);
|
||||||
s->start_code = DQT;
|
s->start_code = DQT;
|
||||||
mjpeg_decode_dqt(s);
|
mjpeg_decode_dqt(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
dht_offs = get_bits_long(&hgb, 32);
|
dht_offs = get_bits_long(&hgb, 32);
|
||||||
dprintf("dht offs: 0x%x\n", dht_offs);
|
dprintf("dht offs: 0x%x\n", dht_offs);
|
||||||
if (dht_offs)
|
if (dht_offs)
|
||||||
{
|
{
|
||||||
init_get_bits(&s->gb, buf+dht_offs, (buf_end - (buf+dht_offs))*8);
|
init_get_bits(&s->gb, buf+dht_offs, (buf_end - (buf+dht_offs))*8);
|
||||||
s->start_code = DHT;
|
s->start_code = DHT;
|
||||||
mjpeg_decode_dht(s);
|
mjpeg_decode_dht(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
sof_offs = get_bits_long(&hgb, 32);
|
sof_offs = get_bits_long(&hgb, 32);
|
||||||
dprintf("sof offs: 0x%x\n", sof_offs);
|
dprintf("sof offs: 0x%x\n", sof_offs);
|
||||||
if (sof_offs)
|
if (sof_offs)
|
||||||
{
|
{
|
||||||
init_get_bits(&s->gb, buf+sof_offs, (buf_end - (buf+sof_offs))*8);
|
init_get_bits(&s->gb, buf+sof_offs, (buf_end - (buf+sof_offs))*8);
|
||||||
s->start_code = SOF0;
|
s->start_code = SOF0;
|
||||||
if (mjpeg_decode_sof(s) < 0)
|
if (mjpeg_decode_sof(s) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
sos_offs = get_bits_long(&hgb, 32);
|
sos_offs = get_bits_long(&hgb, 32);
|
||||||
@ -2103,22 +2103,22 @@ read_header:
|
|||||||
dprintf("sod offs: 0x%x\n", sod_offs);
|
dprintf("sod offs: 0x%x\n", sod_offs);
|
||||||
if (sos_offs)
|
if (sos_offs)
|
||||||
{
|
{
|
||||||
// init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8);
|
// init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8);
|
||||||
init_get_bits(&s->gb, buf+sos_offs, field_size*8);
|
init_get_bits(&s->gb, buf+sos_offs, field_size*8);
|
||||||
s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16));
|
s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16));
|
||||||
s->start_code = SOS;
|
s->start_code = SOS;
|
||||||
mjpeg_decode_sos(s);
|
mjpeg_decode_sos(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (s->interlaced) {
|
if (s->interlaced) {
|
||||||
s->bottom_field ^= 1;
|
s->bottom_field ^= 1;
|
||||||
/* if not bottom field, do not output image yet */
|
/* if not bottom field, do not output image yet */
|
||||||
if (s->bottom_field && second_field_offs)
|
if (s->bottom_field && second_field_offs)
|
||||||
{
|
{
|
||||||
buf_ptr = buf + second_field_offs;
|
buf_ptr = buf + second_field_offs;
|
||||||
second_field_offs = 0;
|
second_field_offs = 0;
|
||||||
goto read_header;
|
goto read_header;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//XXX FIXME factorize, this looks very similar to the EOI code
|
//XXX FIXME factorize, this looks very similar to the EOI code
|
||||||
@ -2153,7 +2153,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
|
|||||||
int i = 0, j = 0;
|
int i = 0, j = 0;
|
||||||
|
|
||||||
if (!avctx->width || !avctx->height)
|
if (!avctx->width || !avctx->height)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
buf_ptr = buf;
|
buf_ptr = buf;
|
||||||
buf_end = buf + buf_size;
|
buf_end = buf + buf_size;
|
||||||
@ -2161,7 +2161,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
|
|||||||
#if 1
|
#if 1
|
||||||
recoded = av_mallocz(buf_size + 1024);
|
recoded = av_mallocz(buf_size + 1024);
|
||||||
if (!recoded)
|
if (!recoded)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
/* SOI */
|
/* SOI */
|
||||||
recoded[j++] = 0xFF;
|
recoded[j++] = 0xFF;
|
||||||
@ -2187,9 +2187,9 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
|
|||||||
|
|
||||||
for (i = 14; i < buf_size && j < buf_size+1024-2; i++)
|
for (i = 14; i < buf_size && j < buf_size+1024-2; i++)
|
||||||
{
|
{
|
||||||
recoded[j++] = buf[i];
|
recoded[j++] = buf[i];
|
||||||
if (buf[i] == 0xff)
|
if (buf[i] == 0xff)
|
||||||
recoded[j++] = 0;
|
recoded[j++] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* EOI */
|
/* EOI */
|
||||||
@ -2229,33 +2229,33 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
|
|||||||
if (avctx->get_buffer(avctx, &s->picture) < 0)
|
if (avctx->get_buffer(avctx, &s->picture) < 0)
|
||||||
{
|
{
|
||||||
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
|
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
s->picture.pict_type = I_TYPE;
|
s->picture.pict_type = I_TYPE;
|
||||||
s->picture.key_frame = 1;
|
s->picture.key_frame = 1;
|
||||||
|
|
||||||
for (i = 0; i < 3; i++)
|
for (i = 0; i < 3; i++)
|
||||||
s->linesize[i] = s->picture.linesize[i] << s->interlaced;
|
s->linesize[i] = s->picture.linesize[i] << s->interlaced;
|
||||||
|
|
||||||
/* DQT */
|
/* DQT */
|
||||||
for (i = 0; i < 64; i++)
|
for (i = 0; i < 64; i++)
|
||||||
{
|
{
|
||||||
j = s->scantable.permutated[i];
|
j = s->scantable.permutated[i];
|
||||||
s->quant_matrixes[0][j] = sp5x_quant_table[(qscale * 2) + i];
|
s->quant_matrixes[0][j] = sp5x_quant_table[(qscale * 2) + i];
|
||||||
}
|
}
|
||||||
s->qscale[0] = FFMAX(
|
s->qscale[0] = FFMAX(
|
||||||
s->quant_matrixes[0][s->scantable.permutated[1]],
|
s->quant_matrixes[0][s->scantable.permutated[1]],
|
||||||
s->quant_matrixes[0][s->scantable.permutated[8]]) >> 1;
|
s->quant_matrixes[0][s->scantable.permutated[8]]) >> 1;
|
||||||
|
|
||||||
for (i = 0; i < 64; i++)
|
for (i = 0; i < 64; i++)
|
||||||
{
|
{
|
||||||
j = s->scantable.permutated[i];
|
j = s->scantable.permutated[i];
|
||||||
s->quant_matrixes[1][j] = sp5x_quant_table[(qscale * 2) + 1 + i];
|
s->quant_matrixes[1][j] = sp5x_quant_table[(qscale * 2) + 1 + i];
|
||||||
}
|
}
|
||||||
s->qscale[1] = FFMAX(
|
s->qscale[1] = FFMAX(
|
||||||
s->quant_matrixes[1][s->scantable.permutated[1]],
|
s->quant_matrixes[1][s->scantable.permutated[1]],
|
||||||
s->quant_matrixes[1][s->scantable.permutated[8]]) >> 1;
|
s->quant_matrixes[1][s->scantable.permutated[8]]) >> 1;
|
||||||
|
|
||||||
/* DHT */
|
/* DHT */
|
||||||
|
|
||||||
@ -2282,7 +2282,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
|
|||||||
s->ac_index[2] = 1;
|
s->ac_index[2] = 1;
|
||||||
|
|
||||||
for (i = 0; i < 3; i++)
|
for (i = 0; i < 3; i++)
|
||||||
s->last_dc[i] = 1024;
|
s->last_dc[i] = 1024;
|
||||||
|
|
||||||
s->mb_width = (s->width * s->h_max * 8 -1) / (s->h_max * 8);
|
s->mb_width = (s->width * s->h_max * 8 -1) / (s->h_max * 8);
|
||||||
s->mb_height = (s->height * s->v_max * 8 -1) / (s->v_max * 8);
|
s->mb_height = (s->height * s->v_max * 8 -1) / (s->v_max * 8);
|
||||||
|
@ -61,7 +61,7 @@ static void add_pixels_clamped_mlib(const DCTELEM *block, uint8_t *pixels, int l
|
|||||||
/* put block, width 16 pixel, height 8/16 */
|
/* put block, width 16 pixel, height 8/16 */
|
||||||
|
|
||||||
static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
|
static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
|
||||||
int stride, int height)
|
int stride, int height)
|
||||||
{
|
{
|
||||||
switch (height) {
|
switch (height) {
|
||||||
case 8:
|
case 8:
|
||||||
@ -78,7 +78,7 @@ static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||||
int stride, int height)
|
int stride, int height)
|
||||||
{
|
{
|
||||||
switch (height) {
|
switch (height) {
|
||||||
case 8:
|
case 8:
|
||||||
@ -95,7 +95,7 @@ static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||||
int stride, int height)
|
int stride, int height)
|
||||||
{
|
{
|
||||||
switch (height) {
|
switch (height) {
|
||||||
case 8:
|
case 8:
|
||||||
@ -112,7 +112,7 @@ static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
||||||
int stride, int height)
|
int stride, int height)
|
||||||
{
|
{
|
||||||
switch (height) {
|
switch (height) {
|
||||||
case 8:
|
case 8:
|
||||||
@ -131,7 +131,7 @@ static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
|||||||
/* put block, width 8 pixel, height 4/8/16 */
|
/* put block, width 8 pixel, height 4/8/16 */
|
||||||
|
|
||||||
static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
|
static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
|
||||||
int stride, int height)
|
int stride, int height)
|
||||||
{
|
{
|
||||||
switch (height) {
|
switch (height) {
|
||||||
case 4:
|
case 4:
|
||||||
@ -152,7 +152,7 @@ static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||||
int stride, int height)
|
int stride, int height)
|
||||||
{
|
{
|
||||||
switch (height) {
|
switch (height) {
|
||||||
case 4:
|
case 4:
|
||||||
@ -173,7 +173,7 @@ static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||||
int stride, int height)
|
int stride, int height)
|
||||||
{
|
{
|
||||||
switch (height) {
|
switch (height) {
|
||||||
case 4:
|
case 4:
|
||||||
@ -194,7 +194,7 @@ static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
||||||
int stride, int height)
|
int stride, int height)
|
||||||
{
|
{
|
||||||
switch (height) {
|
switch (height) {
|
||||||
case 4:
|
case 4:
|
||||||
@ -217,7 +217,7 @@ static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
|||||||
/* average block, width 16 pixel, height 8/16 */
|
/* average block, width 16 pixel, height 8/16 */
|
||||||
|
|
||||||
static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
|
static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
|
||||||
int stride, int height)
|
int stride, int height)
|
||||||
{
|
{
|
||||||
switch (height) {
|
switch (height) {
|
||||||
case 8:
|
case 8:
|
||||||
@ -234,7 +234,7 @@ static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||||
int stride, int height)
|
int stride, int height)
|
||||||
{
|
{
|
||||||
switch (height) {
|
switch (height) {
|
||||||
case 8:
|
case 8:
|
||||||
@ -251,7 +251,7 @@ static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||||
int stride, int height)
|
int stride, int height)
|
||||||
{
|
{
|
||||||
switch (height) {
|
switch (height) {
|
||||||
case 8:
|
case 8:
|
||||||
@ -268,7 +268,7 @@ static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
||||||
int stride, int height)
|
int stride, int height)
|
||||||
{
|
{
|
||||||
switch (height) {
|
switch (height) {
|
||||||
case 8:
|
case 8:
|
||||||
@ -287,7 +287,7 @@ static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
|||||||
/* average block, width 8 pixel, height 4/8/16 */
|
/* average block, width 8 pixel, height 4/8/16 */
|
||||||
|
|
||||||
static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
|
static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
|
||||||
int stride, int height)
|
int stride, int height)
|
||||||
{
|
{
|
||||||
switch (height) {
|
switch (height) {
|
||||||
case 4:
|
case 4:
|
||||||
@ -308,7 +308,7 @@ static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||||
int stride, int height)
|
int stride, int height)
|
||||||
{
|
{
|
||||||
switch (height) {
|
switch (height) {
|
||||||
case 4:
|
case 4:
|
||||||
@ -329,7 +329,7 @@ static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||||
int stride, int height)
|
int stride, int height)
|
||||||
{
|
{
|
||||||
switch (height) {
|
switch (height) {
|
||||||
case 4:
|
case 4:
|
||||||
@ -350,7 +350,7 @@ static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void avg_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
static void avg_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
||||||
int stride, int height)
|
int stride, int height)
|
||||||
{
|
{
|
||||||
switch (height) {
|
switch (height) {
|
||||||
case 4:
|
case 4:
|
||||||
@ -450,7 +450,7 @@ void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
|
|||||||
void MPV_common_init_mlib(MpegEncContext *s)
|
void MPV_common_init_mlib(MpegEncContext *s)
|
||||||
{
|
{
|
||||||
if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){
|
if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){
|
||||||
s->dsp.fdct = ff_fdct_mlib;
|
s->dsp.fdct = ff_fdct_mlib;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(s->avctx->idct_algo==FF_IDCT_AUTO || s->avctx->idct_algo==FF_IDCT_MLIB){
|
if(s->avctx->idct_algo==FF_IDCT_AUTO || s->avctx->idct_algo==FF_IDCT_MLIB){
|
||||||
|
@ -45,7 +45,7 @@
|
|||||||
#define P_MV1 P[9]
|
#define P_MV1 P[9]
|
||||||
|
|
||||||
static inline int sad_hpel_motion_search(MpegEncContext * s,
|
static inline int sad_hpel_motion_search(MpegEncContext * s,
|
||||||
int *mx_ptr, int *my_ptr, int dmin,
|
int *mx_ptr, int *my_ptr, int dmin,
|
||||||
int src_index, int ref_index,
|
int src_index, int ref_index,
|
||||||
int size, int h);
|
int size, int h);
|
||||||
|
|
||||||
@ -293,25 +293,25 @@ static int pix_dev(uint8_t * pix, int line_size, int mean)
|
|||||||
|
|
||||||
s = 0;
|
s = 0;
|
||||||
for (i = 0; i < 16; i++) {
|
for (i = 0; i < 16; i++) {
|
||||||
for (j = 0; j < 16; j += 8) {
|
for (j = 0; j < 16; j += 8) {
|
||||||
s += ABS(pix[0]-mean);
|
s += ABS(pix[0]-mean);
|
||||||
s += ABS(pix[1]-mean);
|
s += ABS(pix[1]-mean);
|
||||||
s += ABS(pix[2]-mean);
|
s += ABS(pix[2]-mean);
|
||||||
s += ABS(pix[3]-mean);
|
s += ABS(pix[3]-mean);
|
||||||
s += ABS(pix[4]-mean);
|
s += ABS(pix[4]-mean);
|
||||||
s += ABS(pix[5]-mean);
|
s += ABS(pix[5]-mean);
|
||||||
s += ABS(pix[6]-mean);
|
s += ABS(pix[6]-mean);
|
||||||
s += ABS(pix[7]-mean);
|
s += ABS(pix[7]-mean);
|
||||||
pix += 8;
|
pix += 8;
|
||||||
}
|
}
|
||||||
pix += line_size - 16;
|
pix += line_size - 16;
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline void no_motion_search(MpegEncContext * s,
|
static inline void no_motion_search(MpegEncContext * s,
|
||||||
int *mx_ptr, int *my_ptr)
|
int *mx_ptr, int *my_ptr)
|
||||||
{
|
{
|
||||||
*mx_ptr = 16 * s->mb_x;
|
*mx_ptr = 16 * s->mb_x;
|
||||||
*my_ptr = 16 * s->mb_y;
|
*my_ptr = 16 * s->mb_y;
|
||||||
@ -328,35 +328,35 @@ static int full_motion_search(MpegEncContext * s,
|
|||||||
|
|
||||||
xx = 16 * s->mb_x;
|
xx = 16 * s->mb_x;
|
||||||
yy = 16 * s->mb_y;
|
yy = 16 * s->mb_y;
|
||||||
x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */
|
x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */
|
||||||
if (x1 < xmin)
|
if (x1 < xmin)
|
||||||
x1 = xmin;
|
x1 = xmin;
|
||||||
x2 = xx + range - 1;
|
x2 = xx + range - 1;
|
||||||
if (x2 > xmax)
|
if (x2 > xmax)
|
||||||
x2 = xmax;
|
x2 = xmax;
|
||||||
y1 = yy - range + 1;
|
y1 = yy - range + 1;
|
||||||
if (y1 < ymin)
|
if (y1 < ymin)
|
||||||
y1 = ymin;
|
y1 = ymin;
|
||||||
y2 = yy + range - 1;
|
y2 = yy + range - 1;
|
||||||
if (y2 > ymax)
|
if (y2 > ymax)
|
||||||
y2 = ymax;
|
y2 = ymax;
|
||||||
pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
|
pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
|
||||||
dmin = 0x7fffffff;
|
dmin = 0x7fffffff;
|
||||||
mx = 0;
|
mx = 0;
|
||||||
my = 0;
|
my = 0;
|
||||||
for (y = y1; y <= y2; y++) {
|
for (y = y1; y <= y2; y++) {
|
||||||
for (x = x1; x <= x2; x++) {
|
for (x = x1; x <= x2; x++) {
|
||||||
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
|
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
|
||||||
s->linesize, 16);
|
s->linesize, 16);
|
||||||
if (d < dmin ||
|
if (d < dmin ||
|
||||||
(d == dmin &&
|
(d == dmin &&
|
||||||
(abs(x - xx) + abs(y - yy)) <
|
(abs(x - xx) + abs(y - yy)) <
|
||||||
(abs(mx - xx) + abs(my - yy)))) {
|
(abs(mx - xx) + abs(my - yy)))) {
|
||||||
dmin = d;
|
dmin = d;
|
||||||
mx = x;
|
mx = x;
|
||||||
my = y;
|
my = y;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
*mx_ptr = mx;
|
*mx_ptr = mx;
|
||||||
@ -364,8 +364,8 @@ static int full_motion_search(MpegEncContext * s,
|
|||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
|
if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
|
||||||
*my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
|
*my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
|
||||||
fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr);
|
fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return dmin;
|
return dmin;
|
||||||
@ -386,22 +386,22 @@ static int log_motion_search(MpegEncContext * s,
|
|||||||
/* Left limit */
|
/* Left limit */
|
||||||
x1 = xx - range;
|
x1 = xx - range;
|
||||||
if (x1 < xmin)
|
if (x1 < xmin)
|
||||||
x1 = xmin;
|
x1 = xmin;
|
||||||
|
|
||||||
/* Right limit */
|
/* Right limit */
|
||||||
x2 = xx + range;
|
x2 = xx + range;
|
||||||
if (x2 > xmax)
|
if (x2 > xmax)
|
||||||
x2 = xmax;
|
x2 = xmax;
|
||||||
|
|
||||||
/* Upper limit */
|
/* Upper limit */
|
||||||
y1 = yy - range;
|
y1 = yy - range;
|
||||||
if (y1 < ymin)
|
if (y1 < ymin)
|
||||||
y1 = ymin;
|
y1 = ymin;
|
||||||
|
|
||||||
/* Lower limit */
|
/* Lower limit */
|
||||||
y2 = yy + range;
|
y2 = yy + range;
|
||||||
if (y2 > ymax)
|
if (y2 > ymax)
|
||||||
y2 = ymax;
|
y2 = ymax;
|
||||||
|
|
||||||
pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
|
pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
|
||||||
dmin = 0x7fffffff;
|
dmin = 0x7fffffff;
|
||||||
@ -409,34 +409,34 @@ static int log_motion_search(MpegEncContext * s,
|
|||||||
my = 0;
|
my = 0;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
for (y = y1; y <= y2; y += range) {
|
for (y = y1; y <= y2; y += range) {
|
||||||
for (x = x1; x <= x2; x += range) {
|
for (x = x1; x <= x2; x += range) {
|
||||||
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
|
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
|
||||||
if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
|
if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
|
||||||
dmin = d;
|
dmin = d;
|
||||||
mx = x;
|
mx = x;
|
||||||
my = y;
|
my = y;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
range = range >> 1;
|
range = range >> 1;
|
||||||
|
|
||||||
x1 = mx - range;
|
x1 = mx - range;
|
||||||
if (x1 < xmin)
|
if (x1 < xmin)
|
||||||
x1 = xmin;
|
x1 = xmin;
|
||||||
|
|
||||||
x2 = mx + range;
|
x2 = mx + range;
|
||||||
if (x2 > xmax)
|
if (x2 > xmax)
|
||||||
x2 = xmax;
|
x2 = xmax;
|
||||||
|
|
||||||
y1 = my - range;
|
y1 = my - range;
|
||||||
if (y1 < ymin)
|
if (y1 < ymin)
|
||||||
y1 = ymin;
|
y1 = ymin;
|
||||||
|
|
||||||
y2 = my + range;
|
y2 = my + range;
|
||||||
if (y2 > ymax)
|
if (y2 > ymax)
|
||||||
y2 = ymax;
|
y2 = ymax;
|
||||||
|
|
||||||
} while (range >= 1);
|
} while (range >= 1);
|
||||||
|
|
||||||
@ -462,22 +462,22 @@ static int phods_motion_search(MpegEncContext * s,
|
|||||||
/* Left limit */
|
/* Left limit */
|
||||||
x1 = xx - range;
|
x1 = xx - range;
|
||||||
if (x1 < xmin)
|
if (x1 < xmin)
|
||||||
x1 = xmin;
|
x1 = xmin;
|
||||||
|
|
||||||
/* Right limit */
|
/* Right limit */
|
||||||
x2 = xx + range;
|
x2 = xx + range;
|
||||||
if (x2 > xmax)
|
if (x2 > xmax)
|
||||||
x2 = xmax;
|
x2 = xmax;
|
||||||
|
|
||||||
/* Upper limit */
|
/* Upper limit */
|
||||||
y1 = yy - range;
|
y1 = yy - range;
|
||||||
if (y1 < ymin)
|
if (y1 < ymin)
|
||||||
y1 = ymin;
|
y1 = ymin;
|
||||||
|
|
||||||
/* Lower limit */
|
/* Lower limit */
|
||||||
y2 = yy + range;
|
y2 = yy + range;
|
||||||
if (y2 > ymax)
|
if (y2 > ymax)
|
||||||
y2 = ymax;
|
y2 = ymax;
|
||||||
|
|
||||||
pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
|
pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
|
||||||
mx = 0;
|
mx = 0;
|
||||||
@ -489,43 +489,43 @@ static int phods_motion_search(MpegEncContext * s,
|
|||||||
dminx = 0x7fffffff;
|
dminx = 0x7fffffff;
|
||||||
dminy = 0x7fffffff;
|
dminy = 0x7fffffff;
|
||||||
|
|
||||||
lastx = x;
|
lastx = x;
|
||||||
for (x = x1; x <= x2; x += range) {
|
for (x = x1; x <= x2; x += range) {
|
||||||
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
|
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
|
||||||
if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
|
if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
|
||||||
dminx = d;
|
dminx = d;
|
||||||
mx = x;
|
mx = x;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
x = lastx;
|
x = lastx;
|
||||||
for (y = y1; y <= y2; y += range) {
|
for (y = y1; y <= y2; y += range) {
|
||||||
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
|
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
|
||||||
if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
|
if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
|
||||||
dminy = d;
|
dminy = d;
|
||||||
my = y;
|
my = y;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
range = range >> 1;
|
range = range >> 1;
|
||||||
|
|
||||||
x = mx;
|
x = mx;
|
||||||
y = my;
|
y = my;
|
||||||
x1 = mx - range;
|
x1 = mx - range;
|
||||||
if (x1 < xmin)
|
if (x1 < xmin)
|
||||||
x1 = xmin;
|
x1 = xmin;
|
||||||
|
|
||||||
x2 = mx + range;
|
x2 = mx + range;
|
||||||
if (x2 > xmax)
|
if (x2 > xmax)
|
||||||
x2 = xmax;
|
x2 = xmax;
|
||||||
|
|
||||||
y1 = my - range;
|
y1 = my - range;
|
||||||
if (y1 < ymin)
|
if (y1 < ymin)
|
||||||
y1 = ymin;
|
y1 = ymin;
|
||||||
|
|
||||||
y2 = my + range;
|
y2 = my + range;
|
||||||
if (y2 > ymax)
|
if (y2 > ymax)
|
||||||
y2 = ymax;
|
y2 = ymax;
|
||||||
|
|
||||||
} while (range >= 1);
|
} while (range >= 1);
|
||||||
|
|
||||||
@ -550,7 +550,7 @@ static int phods_motion_search(MpegEncContext * s,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline int sad_hpel_motion_search(MpegEncContext * s,
|
static inline int sad_hpel_motion_search(MpegEncContext * s,
|
||||||
int *mx_ptr, int *my_ptr, int dmin,
|
int *mx_ptr, int *my_ptr, int dmin,
|
||||||
int src_index, int ref_index,
|
int src_index, int ref_index,
|
||||||
int size, int h)
|
int size, int h)
|
||||||
{
|
{
|
||||||
@ -1190,24 +1190,24 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
|
|||||||
switch(s->me_method) {
|
switch(s->me_method) {
|
||||||
case ME_ZERO:
|
case ME_ZERO:
|
||||||
default:
|
default:
|
||||||
no_motion_search(s, &mx, &my);
|
no_motion_search(s, &mx, &my);
|
||||||
mx-= mb_x*16;
|
mx-= mb_x*16;
|
||||||
my-= mb_y*16;
|
my-= mb_y*16;
|
||||||
dmin = 0;
|
dmin = 0;
|
||||||
break;
|
break;
|
||||||
#if 0
|
#if 0
|
||||||
case ME_FULL:
|
case ME_FULL:
|
||||||
dmin = full_motion_search(s, &mx, &my, range, ref_picture);
|
dmin = full_motion_search(s, &mx, &my, range, ref_picture);
|
||||||
mx-= mb_x*16;
|
mx-= mb_x*16;
|
||||||
my-= mb_y*16;
|
my-= mb_y*16;
|
||||||
break;
|
break;
|
||||||
case ME_LOG:
|
case ME_LOG:
|
||||||
dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
|
dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
|
||||||
mx-= mb_x*16;
|
mx-= mb_x*16;
|
||||||
my-= mb_y*16;
|
my-= mb_y*16;
|
||||||
break;
|
break;
|
||||||
case ME_PHODS:
|
case ME_PHODS:
|
||||||
dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
|
dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
|
||||||
mx-= mb_x*16;
|
mx-= mb_x*16;
|
||||||
my-= mb_y*16;
|
my-= mb_y*16;
|
||||||
break;
|
break;
|
||||||
@ -1264,7 +1264,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
|
|||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
|
printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
|
||||||
varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
|
varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
|
||||||
#endif
|
#endif
|
||||||
if(mb_type){
|
if(mb_type){
|
||||||
if (vard <= 64 || vard < varc)
|
if (vard <= 64 || vard < varc)
|
||||||
@ -1479,24 +1479,24 @@ static int ff_estimate_motion_b(MpegEncContext * s,
|
|||||||
switch(s->me_method) {
|
switch(s->me_method) {
|
||||||
case ME_ZERO:
|
case ME_ZERO:
|
||||||
default:
|
default:
|
||||||
no_motion_search(s, &mx, &my);
|
no_motion_search(s, &mx, &my);
|
||||||
dmin = 0;
|
dmin = 0;
|
||||||
mx-= mb_x*16;
|
mx-= mb_x*16;
|
||||||
my-= mb_y*16;
|
my-= mb_y*16;
|
||||||
break;
|
break;
|
||||||
#if 0
|
#if 0
|
||||||
case ME_FULL:
|
case ME_FULL:
|
||||||
dmin = full_motion_search(s, &mx, &my, range, ref_picture);
|
dmin = full_motion_search(s, &mx, &my, range, ref_picture);
|
||||||
mx-= mb_x*16;
|
mx-= mb_x*16;
|
||||||
my-= mb_y*16;
|
my-= mb_y*16;
|
||||||
break;
|
break;
|
||||||
case ME_LOG:
|
case ME_LOG:
|
||||||
dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
|
dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
|
||||||
mx-= mb_x*16;
|
mx-= mb_x*16;
|
||||||
my-= mb_y*16;
|
my-= mb_y*16;
|
||||||
break;
|
break;
|
||||||
case ME_PHODS:
|
case ME_PHODS:
|
||||||
dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
|
dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
|
||||||
mx-= mb_x*16;
|
mx-= mb_x*16;
|
||||||
my-= mb_y*16;
|
my-= mb_y*16;
|
||||||
break;
|
break;
|
||||||
|
@ -45,7 +45,7 @@
|
|||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
static int hpel_motion_search)(MpegEncContext * s,
|
static int hpel_motion_search)(MpegEncContext * s,
|
||||||
int *mx_ptr, int *my_ptr, int dmin,
|
int *mx_ptr, int *my_ptr, int dmin,
|
||||||
uint8_t *ref_data[3],
|
uint8_t *ref_data[3],
|
||||||
int size)
|
int size)
|
||||||
{
|
{
|
||||||
@ -113,7 +113,7 @@ static int hpel_motion_search)(MpegEncContext * s,
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
static int hpel_motion_search(MpegEncContext * s,
|
static int hpel_motion_search(MpegEncContext * s,
|
||||||
int *mx_ptr, int *my_ptr, int dmin,
|
int *mx_ptr, int *my_ptr, int dmin,
|
||||||
int src_index, int ref_index,
|
int src_index, int ref_index,
|
||||||
int size, int h)
|
int size, int h)
|
||||||
{
|
{
|
||||||
@ -271,7 +271,7 @@ int inline ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int qpel_motion_search(MpegEncContext * s,
|
static int qpel_motion_search(MpegEncContext * s,
|
||||||
int *mx_ptr, int *my_ptr, int dmin,
|
int *mx_ptr, int *my_ptr, int dmin,
|
||||||
int src_index, int ref_index,
|
int src_index, int ref_index,
|
||||||
int size, int h)
|
int size, int h)
|
||||||
{
|
{
|
||||||
@ -1005,7 +1005,7 @@ static int epzs_motion_search4(MpegEncContext * s,
|
|||||||
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
|
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
|
||||||
/* first line */
|
/* first line */
|
||||||
if (s->first_slice_line) {
|
if (s->first_slice_line) {
|
||||||
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
|
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
|
||||||
CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
|
CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
|
||||||
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
|
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
|
||||||
CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
|
CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
|
||||||
@ -1067,7 +1067,7 @@ static int epzs_motion_search2(MpegEncContext * s,
|
|||||||
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
|
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
|
||||||
/* first line */
|
/* first line */
|
||||||
if (s->first_slice_line) {
|
if (s->first_slice_line) {
|
||||||
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
|
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
|
||||||
CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
|
CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
|
||||||
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
|
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
|
||||||
CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
|
CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
|
||||||
|
@ -28,51 +28,51 @@
|
|||||||
|
|
||||||
#define BUFFER_SIZE (2*MPA_FRAME_SIZE)
|
#define BUFFER_SIZE (2*MPA_FRAME_SIZE)
|
||||||
typedef struct Mp3AudioContext {
|
typedef struct Mp3AudioContext {
|
||||||
lame_global_flags *gfp;
|
lame_global_flags *gfp;
|
||||||
int stereo;
|
int stereo;
|
||||||
uint8_t buffer[BUFFER_SIZE];
|
uint8_t buffer[BUFFER_SIZE];
|
||||||
int buffer_index;
|
int buffer_index;
|
||||||
} Mp3AudioContext;
|
} Mp3AudioContext;
|
||||||
|
|
||||||
static int MP3lame_encode_init(AVCodecContext *avctx)
|
static int MP3lame_encode_init(AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
Mp3AudioContext *s = avctx->priv_data;
|
Mp3AudioContext *s = avctx->priv_data;
|
||||||
|
|
||||||
if (avctx->channels > 2)
|
if (avctx->channels > 2)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
s->stereo = avctx->channels > 1 ? 1 : 0;
|
s->stereo = avctx->channels > 1 ? 1 : 0;
|
||||||
|
|
||||||
if ((s->gfp = lame_init()) == NULL)
|
if ((s->gfp = lame_init()) == NULL)
|
||||||
goto err;
|
goto err;
|
||||||
lame_set_in_samplerate(s->gfp, avctx->sample_rate);
|
lame_set_in_samplerate(s->gfp, avctx->sample_rate);
|
||||||
lame_set_out_samplerate(s->gfp, avctx->sample_rate);
|
lame_set_out_samplerate(s->gfp, avctx->sample_rate);
|
||||||
lame_set_num_channels(s->gfp, avctx->channels);
|
lame_set_num_channels(s->gfp, avctx->channels);
|
||||||
/* lame 3.91 dies on quality != 5 */
|
/* lame 3.91 dies on quality != 5 */
|
||||||
lame_set_quality(s->gfp, 5);
|
lame_set_quality(s->gfp, 5);
|
||||||
/* lame 3.91 doesn't work in mono */
|
/* lame 3.91 doesn't work in mono */
|
||||||
lame_set_mode(s->gfp, JOINT_STEREO);
|
lame_set_mode(s->gfp, JOINT_STEREO);
|
||||||
lame_set_brate(s->gfp, avctx->bit_rate/1000);
|
lame_set_brate(s->gfp, avctx->bit_rate/1000);
|
||||||
if(avctx->flags & CODEC_FLAG_QSCALE) {
|
if(avctx->flags & CODEC_FLAG_QSCALE) {
|
||||||
lame_set_brate(s->gfp, 0);
|
lame_set_brate(s->gfp, 0);
|
||||||
lame_set_VBR(s->gfp, vbr_default);
|
lame_set_VBR(s->gfp, vbr_default);
|
||||||
lame_set_VBR_q(s->gfp, avctx->global_quality / (float)FF_QP2LAMBDA);
|
lame_set_VBR_q(s->gfp, avctx->global_quality / (float)FF_QP2LAMBDA);
|
||||||
}
|
}
|
||||||
lame_set_bWriteVbrTag(s->gfp,0);
|
lame_set_bWriteVbrTag(s->gfp,0);
|
||||||
if (lame_init_params(s->gfp) < 0)
|
if (lame_init_params(s->gfp) < 0)
|
||||||
goto err_close;
|
goto err_close;
|
||||||
|
|
||||||
avctx->frame_size = lame_get_framesize(s->gfp);
|
avctx->frame_size = lame_get_framesize(s->gfp);
|
||||||
|
|
||||||
avctx->coded_frame= avcodec_alloc_frame();
|
avctx->coded_frame= avcodec_alloc_frame();
|
||||||
avctx->coded_frame->key_frame= 1;
|
avctx->coded_frame->key_frame= 1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err_close:
|
err_close:
|
||||||
lame_close(s->gfp);
|
lame_close(s->gfp);
|
||||||
err:
|
err:
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const int sSampleRates[3] = {
|
static const int sSampleRates[3] = {
|
||||||
@ -136,11 +136,11 @@ static int mp3len(void *data, int *samplesPerFrame, int *sampleRate)
|
|||||||
int MP3lame_encode_frame(AVCodecContext *avctx,
|
int MP3lame_encode_frame(AVCodecContext *avctx,
|
||||||
unsigned char *frame, int buf_size, void *data)
|
unsigned char *frame, int buf_size, void *data)
|
||||||
{
|
{
|
||||||
Mp3AudioContext *s = avctx->priv_data;
|
Mp3AudioContext *s = avctx->priv_data;
|
||||||
int len;
|
int len;
|
||||||
int lame_result;
|
int lame_result;
|
||||||
|
|
||||||
/* lame 3.91 dies on '1-channel interleaved' data */
|
/* lame 3.91 dies on '1-channel interleaved' data */
|
||||||
|
|
||||||
if(data){
|
if(data){
|
||||||
if (s->stereo) {
|
if (s->stereo) {
|
||||||
@ -198,12 +198,12 @@ int MP3lame_encode_frame(AVCodecContext *avctx,
|
|||||||
|
|
||||||
int MP3lame_encode_close(AVCodecContext *avctx)
|
int MP3lame_encode_close(AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
Mp3AudioContext *s = avctx->priv_data;
|
Mp3AudioContext *s = avctx->priv_data;
|
||||||
|
|
||||||
av_freep(&avctx->coded_frame);
|
av_freep(&avctx->coded_frame);
|
||||||
|
|
||||||
lame_close(s->gfp);
|
lame_close(s->gfp);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -35,14 +35,14 @@
|
|||||||
|
|
||||||
|
|
||||||
/* Start codes. */
|
/* Start codes. */
|
||||||
#define SEQ_END_CODE 0x000001b7
|
#define SEQ_END_CODE 0x000001b7
|
||||||
#define SEQ_START_CODE 0x000001b3
|
#define SEQ_START_CODE 0x000001b3
|
||||||
#define GOP_START_CODE 0x000001b8
|
#define GOP_START_CODE 0x000001b8
|
||||||
#define PICTURE_START_CODE 0x00000100
|
#define PICTURE_START_CODE 0x00000100
|
||||||
#define SLICE_MIN_START_CODE 0x00000101
|
#define SLICE_MIN_START_CODE 0x00000101
|
||||||
#define SLICE_MAX_START_CODE 0x000001af
|
#define SLICE_MAX_START_CODE 0x000001af
|
||||||
#define EXT_START_CODE 0x000001b5
|
#define EXT_START_CODE 0x000001b5
|
||||||
#define USER_START_CODE 0x000001b2
|
#define USER_START_CODE 0x000001b2
|
||||||
|
|
||||||
#define DC_VLC_BITS 9
|
#define DC_VLC_BITS 9
|
||||||
#define MV_VLC_BITS 9
|
#define MV_VLC_BITS 9
|
||||||
@ -89,7 +89,7 @@ const enum PixelFormat pixfmt_yuv_444[]= {PIX_FMT_YUV444P,-1};
|
|||||||
const enum PixelFormat pixfmt_xvmc_mpg2_420[] = {
|
const enum PixelFormat pixfmt_xvmc_mpg2_420[] = {
|
||||||
PIX_FMT_XVMC_MPEG2_IDCT,
|
PIX_FMT_XVMC_MPEG2_IDCT,
|
||||||
PIX_FMT_XVMC_MPEG2_MC,
|
PIX_FMT_XVMC_MPEG2_MC,
|
||||||
-1};
|
-1};
|
||||||
#ifdef CONFIG_ENCODERS
|
#ifdef CONFIG_ENCODERS
|
||||||
static uint8_t (*mv_penalty)[MAX_MV*2+1]= NULL;
|
static uint8_t (*mv_penalty)[MAX_MV*2+1]= NULL;
|
||||||
static uint8_t fcode_tab[MAX_MV*2+1];
|
static uint8_t fcode_tab[MAX_MV*2+1];
|
||||||
@ -166,7 +166,7 @@ static void init_uni_ac_vlc(RLTable *rl, uint32_t *uni_ac_vlc_bits, uint8_t *uni
|
|||||||
code= rl->index_run[0][run] + alevel - 1;
|
code= rl->index_run[0][run] + alevel - 1;
|
||||||
|
|
||||||
if (code < 111 /* rl->n */) {
|
if (code < 111 /* rl->n */) {
|
||||||
/* store the vlc & sign at once */
|
/* store the vlc & sign at once */
|
||||||
len= mpeg1_vlc[code][1]+1;
|
len= mpeg1_vlc[code][1]+1;
|
||||||
bits= (mpeg1_vlc[code][0]<<1) + sign;
|
bits= (mpeg1_vlc[code][0]<<1) + sign;
|
||||||
} else {
|
} else {
|
||||||
@ -764,38 +764,38 @@ void ff_mpeg1_encode_init(MpegEncContext *s)
|
|||||||
if(!done){
|
if(!done){
|
||||||
int f_code;
|
int f_code;
|
||||||
int mv;
|
int mv;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
done=1;
|
done=1;
|
||||||
init_rl(&rl_mpeg1, 1);
|
init_rl(&rl_mpeg1, 1);
|
||||||
|
|
||||||
for(i=0; i<64; i++)
|
for(i=0; i<64; i++)
|
||||||
{
|
{
|
||||||
mpeg1_max_level[0][i]= rl_mpeg1.max_level[0][i];
|
mpeg1_max_level[0][i]= rl_mpeg1.max_level[0][i];
|
||||||
mpeg1_index_run[0][i]= rl_mpeg1.index_run[0][i];
|
mpeg1_index_run[0][i]= rl_mpeg1.index_run[0][i];
|
||||||
}
|
}
|
||||||
|
|
||||||
init_uni_ac_vlc(&rl_mpeg1, uni_mpeg1_ac_vlc_bits, uni_mpeg1_ac_vlc_len);
|
init_uni_ac_vlc(&rl_mpeg1, uni_mpeg1_ac_vlc_bits, uni_mpeg1_ac_vlc_len);
|
||||||
|
|
||||||
/* build unified dc encoding tables */
|
/* build unified dc encoding tables */
|
||||||
for(i=-255; i<256; i++)
|
for(i=-255; i<256; i++)
|
||||||
{
|
{
|
||||||
int adiff, index;
|
int adiff, index;
|
||||||
int bits, code;
|
int bits, code;
|
||||||
int diff=i;
|
int diff=i;
|
||||||
|
|
||||||
adiff = ABS(diff);
|
adiff = ABS(diff);
|
||||||
if(diff<0) diff--;
|
if(diff<0) diff--;
|
||||||
index = av_log2(2*adiff);
|
index = av_log2(2*adiff);
|
||||||
|
|
||||||
bits= vlc_dc_lum_bits[index] + index;
|
bits= vlc_dc_lum_bits[index] + index;
|
||||||
code= (vlc_dc_lum_code[index]<<index) + (diff & ((1 << index) - 1));
|
code= (vlc_dc_lum_code[index]<<index) + (diff & ((1 << index) - 1));
|
||||||
mpeg1_lum_dc_uni[i+255]= bits + (code<<8);
|
mpeg1_lum_dc_uni[i+255]= bits + (code<<8);
|
||||||
|
|
||||||
bits= vlc_dc_chroma_bits[index] + index;
|
bits= vlc_dc_chroma_bits[index] + index;
|
||||||
code= (vlc_dc_chroma_code[index]<<index) + (diff & ((1 << index) - 1));
|
code= (vlc_dc_chroma_code[index]<<index) + (diff & ((1 << index) - 1));
|
||||||
mpeg1_chr_dc_uni[i+255]= bits + (code<<8);
|
mpeg1_chr_dc_uni[i+255]= bits + (code<<8);
|
||||||
}
|
}
|
||||||
|
|
||||||
mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
|
mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
|
||||||
|
|
||||||
@ -873,14 +873,14 @@ static inline void encode_dc(MpegEncContext *s, int diff, int component)
|
|||||||
}else{
|
}else{
|
||||||
if (component == 0) {
|
if (component == 0) {
|
||||||
put_bits(
|
put_bits(
|
||||||
&s->pb,
|
&s->pb,
|
||||||
mpeg1_lum_dc_uni[diff+255]&0xFF,
|
mpeg1_lum_dc_uni[diff+255]&0xFF,
|
||||||
mpeg1_lum_dc_uni[diff+255]>>8);
|
mpeg1_lum_dc_uni[diff+255]>>8);
|
||||||
} else {
|
} else {
|
||||||
put_bits(
|
put_bits(
|
||||||
&s->pb,
|
&s->pb,
|
||||||
mpeg1_chr_dc_uni[diff+255]&0xFF,
|
mpeg1_chr_dc_uni[diff+255]&0xFF,
|
||||||
mpeg1_chr_dc_uni[diff+255]>>8);
|
mpeg1_chr_dc_uni[diff+255]>>8);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -946,10 +946,10 @@ static void mpeg1_encode_block(MpegEncContext *s,
|
|||||||
// code = get_rl_index(rl, 0, run, alevel);
|
// code = get_rl_index(rl, 0, run, alevel);
|
||||||
if (alevel <= mpeg1_max_level[0][run]){
|
if (alevel <= mpeg1_max_level[0][run]){
|
||||||
code= mpeg1_index_run[0][run] + alevel - 1;
|
code= mpeg1_index_run[0][run] + alevel - 1;
|
||||||
/* store the vlc & sign at once */
|
/* store the vlc & sign at once */
|
||||||
put_bits(&s->pb, mpeg1_vlc[code][1]+1, (mpeg1_vlc[code][0]<<1) + sign);
|
put_bits(&s->pb, mpeg1_vlc[code][1]+1, (mpeg1_vlc[code][0]<<1) + sign);
|
||||||
} else {
|
} else {
|
||||||
/* escape seems to be pretty rare <5% so i dont optimize it */
|
/* escape seems to be pretty rare <5% so i dont optimize it */
|
||||||
put_bits(&s->pb, mpeg1_vlc[111/*rl->n*/][1], mpeg1_vlc[111/*rl->n*/][0]);
|
put_bits(&s->pb, mpeg1_vlc[111/*rl->n*/][1], mpeg1_vlc[111/*rl->n*/][0]);
|
||||||
/* escape: only clip in this case */
|
/* escape: only clip in this case */
|
||||||
put_bits(&s->pb, 6, run);
|
put_bits(&s->pb, 6, run);
|
||||||
@ -1376,8 +1376,8 @@ static int mpeg_decode_mb(MpegEncContext *s,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if(mb_block_count > 6){
|
if(mb_block_count > 6){
|
||||||
cbp<<= mb_block_count-6;
|
cbp<<= mb_block_count-6;
|
||||||
cbp |= get_bits(&s->gb, mb_block_count-6);
|
cbp |= get_bits(&s->gb, mb_block_count-6);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_XVMC
|
#ifdef HAVE_XVMC
|
||||||
@ -2074,7 +2074,7 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
|
|||||||
uint8_t old_permutation[64];
|
uint8_t old_permutation[64];
|
||||||
|
|
||||||
if (
|
if (
|
||||||
(s1->mpeg_enc_ctx_allocated == 0)||
|
(s1->mpeg_enc_ctx_allocated == 0)||
|
||||||
avctx->coded_width != s->width ||
|
avctx->coded_width != s->width ||
|
||||||
avctx->coded_height != s->height||
|
avctx->coded_height != s->height||
|
||||||
s1->save_aspect_info != s->aspect_ratio_info||
|
s1->save_aspect_info != s->aspect_ratio_info||
|
||||||
@ -2088,8 +2088,8 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
|
|||||||
s->parse_context= pc;
|
s->parse_context= pc;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( (s->width == 0 )||(s->height == 0))
|
if( (s->width == 0 )||(s->height == 0))
|
||||||
return -2;
|
return -2;
|
||||||
|
|
||||||
avcodec_set_dimensions(avctx, s->width, s->height);
|
avcodec_set_dimensions(avctx, s->width, s->height);
|
||||||
avctx->bit_rate = s->bit_rate;
|
avctx->bit_rate = s->bit_rate;
|
||||||
@ -2129,7 +2129,7 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
|
|||||||
mpeg2_aspect[s->aspect_ratio_info],
|
mpeg2_aspect[s->aspect_ratio_info],
|
||||||
(AVRational){s1->pan_scan.width, s1->pan_scan.height}
|
(AVRational){s1->pan_scan.width, s1->pan_scan.height}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
s->avctx->sample_aspect_ratio=
|
s->avctx->sample_aspect_ratio=
|
||||||
mpeg2_aspect[s->aspect_ratio_info];
|
mpeg2_aspect[s->aspect_ratio_info];
|
||||||
@ -2312,16 +2312,16 @@ static void mpeg_decode_picture_display_extension(Mpeg1Context *s1)
|
|||||||
nofco = 1;
|
nofco = 1;
|
||||||
if(s->progressive_sequence){
|
if(s->progressive_sequence){
|
||||||
if(s->repeat_first_field){
|
if(s->repeat_first_field){
|
||||||
nofco++;
|
nofco++;
|
||||||
if(s->top_field_first)
|
if(s->top_field_first)
|
||||||
nofco++;
|
nofco++;
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
if(s->picture_structure == PICT_FRAME){
|
if(s->picture_structure == PICT_FRAME){
|
||||||
nofco++;
|
nofco++;
|
||||||
if(s->repeat_first_field)
|
if(s->repeat_first_field)
|
||||||
nofco++;
|
nofco++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for(i=0; i<nofco; i++){
|
for(i=0; i<nofco; i++){
|
||||||
s1->pan_scan.position[i][0]= get_sbits(&s->gb, 16);
|
s1->pan_scan.position[i][0]= get_sbits(&s->gb, 16);
|
||||||
@ -2985,8 +2985,8 @@ static void mpeg_decode_gop(AVCodecContext *avctx,
|
|||||||
|
|
||||||
if(s->avctx->debug & FF_DEBUG_PICT_INFO)
|
if(s->avctx->debug & FF_DEBUG_PICT_INFO)
|
||||||
av_log(s->avctx, AV_LOG_DEBUG, "GOP (%2d:%02d:%02d.[%02d]) broken_link=%d\n",
|
av_log(s->avctx, AV_LOG_DEBUG, "GOP (%2d:%02d:%02d.[%02d]) broken_link=%d\n",
|
||||||
time_code_hours, time_code_minutes, time_code_seconds,
|
time_code_hours, time_code_minutes, time_code_seconds,
|
||||||
time_code_pictures, broken_link);
|
time_code_pictures, broken_link);
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* finds the end of the current frame in the bitstream.
|
* finds the end of the current frame in the bitstream.
|
||||||
@ -3044,13 +3044,13 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
|
|||||||
dprintf("fill_buffer\n");
|
dprintf("fill_buffer\n");
|
||||||
|
|
||||||
if (buf_size == 0) {
|
if (buf_size == 0) {
|
||||||
/* special case for last picture */
|
/* special case for last picture */
|
||||||
if (s2->low_delay==0 && s2->next_picture_ptr) {
|
if (s2->low_delay==0 && s2->next_picture_ptr) {
|
||||||
*picture= *(AVFrame*)s2->next_picture_ptr;
|
*picture= *(AVFrame*)s2->next_picture_ptr;
|
||||||
s2->next_picture_ptr= NULL;
|
s2->next_picture_ptr= NULL;
|
||||||
|
|
||||||
*data_size = sizeof(AVFrame);
|
*data_size = sizeof(AVFrame);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3111,13 +3111,13 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
|
|||||||
switch(start_code) {
|
switch(start_code) {
|
||||||
case SEQ_START_CODE:
|
case SEQ_START_CODE:
|
||||||
mpeg1_decode_sequence(avctx, buf_ptr,
|
mpeg1_decode_sequence(avctx, buf_ptr,
|
||||||
input_size);
|
input_size);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PICTURE_START_CODE:
|
case PICTURE_START_CODE:
|
||||||
/* we have a complete image : we try to decompress it */
|
/* we have a complete image : we try to decompress it */
|
||||||
mpeg1_decode_picture(avctx,
|
mpeg1_decode_picture(avctx,
|
||||||
buf_ptr, input_size);
|
buf_ptr, input_size);
|
||||||
break;
|
break;
|
||||||
case EXT_START_CODE:
|
case EXT_START_CODE:
|
||||||
mpeg_decode_extension(avctx,
|
mpeg_decode_extension(avctx,
|
||||||
|
@ -4,14 +4,14 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
const int16_t ff_mpeg1_default_intra_matrix[64] = {
|
const int16_t ff_mpeg1_default_intra_matrix[64] = {
|
||||||
8, 16, 19, 22, 26, 27, 29, 34,
|
8, 16, 19, 22, 26, 27, 29, 34,
|
||||||
16, 16, 22, 24, 27, 29, 34, 37,
|
16, 16, 22, 24, 27, 29, 34, 37,
|
||||||
19, 22, 26, 27, 29, 34, 34, 38,
|
19, 22, 26, 27, 29, 34, 34, 38,
|
||||||
22, 22, 26, 27, 29, 34, 37, 40,
|
22, 22, 26, 27, 29, 34, 37, 40,
|
||||||
22, 26, 27, 29, 32, 35, 40, 48,
|
22, 26, 27, 29, 32, 35, 40, 48,
|
||||||
26, 27, 29, 32, 35, 40, 48, 58,
|
26, 27, 29, 32, 35, 40, 48, 58,
|
||||||
26, 27, 29, 34, 38, 46, 56, 69,
|
26, 27, 29, 34, 38, 46, 56, 69,
|
||||||
27, 29, 35, 38, 46, 56, 69, 83
|
27, 29, 35, 38, 46, 56, 69, 83
|
||||||
};
|
};
|
||||||
|
|
||||||
const int16_t ff_mpeg1_default_non_intra_matrix[64] = {
|
const int16_t ff_mpeg1_default_non_intra_matrix[64] = {
|
||||||
|
@ -748,7 +748,7 @@ static void encode_frame(MpegAudioContext *s,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int MPA_encode_frame(AVCodecContext *avctx,
|
static int MPA_encode_frame(AVCodecContext *avctx,
|
||||||
unsigned char *frame, int buf_size, void *data)
|
unsigned char *frame, int buf_size, void *data)
|
||||||
{
|
{
|
||||||
MpegAudioContext *s = avctx->priv_data;
|
MpegAudioContext *s = avctx->priv_data;
|
||||||
short *samples = data;
|
short *samples = data;
|
||||||
|
@ -55,7 +55,7 @@ int l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
|
|||||||
int mpa_decode_header(AVCodecContext *avctx, uint32_t head);
|
int mpa_decode_header(AVCodecContext *avctx, uint32_t head);
|
||||||
void ff_mpa_synth_init(MPA_INT *window);
|
void ff_mpa_synth_init(MPA_INT *window);
|
||||||
void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
|
void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
|
||||||
MPA_INT *window, int *dither_state,
|
MPA_INT *window, int *dither_state,
|
||||||
OUT_INT *samples, int incr,
|
OUT_INT *samples, int incr,
|
||||||
int32_t sb_samples[SBLIMIT]);
|
int32_t sb_samples[SBLIMIT]);
|
||||||
|
|
||||||
|
@ -64,7 +64,7 @@ static always_inline int MULH(int a, int b){
|
|||||||
struct GranuleDef;
|
struct GranuleDef;
|
||||||
|
|
||||||
typedef struct MPADecodeContext {
|
typedef struct MPADecodeContext {
|
||||||
uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE]; /* input buffer */
|
uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE]; /* input buffer */
|
||||||
int inbuf_index;
|
int inbuf_index;
|
||||||
uint8_t *inbuf_ptr, *inbuf;
|
uint8_t *inbuf_ptr, *inbuf;
|
||||||
int frame_size;
|
int frame_size;
|
||||||
@ -340,13 +340,13 @@ static int decode_init(AVCodecContext * avctx)
|
|||||||
scale_factor_mult[i][2]);
|
scale_factor_mult[i][2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
ff_mpa_synth_init(window);
|
ff_mpa_synth_init(window);
|
||||||
|
|
||||||
/* huffman decode tables */
|
/* huffman decode tables */
|
||||||
huff_code_table[0] = NULL;
|
huff_code_table[0] = NULL;
|
||||||
for(i=1;i<16;i++) {
|
for(i=1;i<16;i++) {
|
||||||
const HuffTable *h = &mpa_huff_tables[i];
|
const HuffTable *h = &mpa_huff_tables[i];
|
||||||
int xsize, x, y;
|
int xsize, x, y;
|
||||||
unsigned int n;
|
unsigned int n;
|
||||||
uint8_t *code_table;
|
uint8_t *code_table;
|
||||||
|
|
||||||
@ -378,11 +378,11 @@ static int decode_init(AVCodecContext * avctx)
|
|||||||
band_index_long[i][22] = k;
|
band_index_long[i][22] = k;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* compute n ^ (4/3) and store it in mantissa/exp format */
|
/* compute n ^ (4/3) and store it in mantissa/exp format */
|
||||||
table_4_3_exp= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_exp[0]));
|
table_4_3_exp= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_exp[0]));
|
||||||
if(!table_4_3_exp)
|
if(!table_4_3_exp)
|
||||||
return -1;
|
return -1;
|
||||||
table_4_3_value= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_value[0]));
|
table_4_3_value= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_value[0]));
|
||||||
if(!table_4_3_value)
|
if(!table_4_3_value)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@ -844,7 +844,7 @@ void ff_mpa_synth_init(MPA_INT *window)
|
|||||||
32 samples. */
|
32 samples. */
|
||||||
/* XXX: optimize by avoiding ring buffer usage */
|
/* XXX: optimize by avoiding ring buffer usage */
|
||||||
void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
|
void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
|
||||||
MPA_INT *window, int *dither_state,
|
MPA_INT *window, int *dither_state,
|
||||||
OUT_INT *samples, int incr,
|
OUT_INT *samples, int incr,
|
||||||
int32_t sb_samples[SBLIMIT])
|
int32_t sb_samples[SBLIMIT])
|
||||||
{
|
{
|
||||||
@ -2440,8 +2440,8 @@ static int mp_decode_frame(MPADecodeContext *s,
|
|||||||
samples_ptr = samples + ch;
|
samples_ptr = samples + ch;
|
||||||
for(i=0;i<nb_frames;i++) {
|
for(i=0;i<nb_frames;i++) {
|
||||||
ff_mpa_synth_filter(s->synth_buf[ch], &(s->synth_buf_offset[ch]),
|
ff_mpa_synth_filter(s->synth_buf[ch], &(s->synth_buf_offset[ch]),
|
||||||
window, &s->dither_state,
|
window, &s->dither_state,
|
||||||
samples_ptr, s->nb_channels,
|
samples_ptr, s->nb_channels,
|
||||||
s->sb_samples[ch][i]);
|
s->sb_samples[ch][i]);
|
||||||
samples_ptr += 32 * s->nb_channels;
|
samples_ptr += 32 * s->nb_channels;
|
||||||
}
|
}
|
||||||
@ -2453,8 +2453,8 @@ static int mp_decode_frame(MPADecodeContext *s,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int decode_frame(AVCodecContext * avctx,
|
static int decode_frame(AVCodecContext * avctx,
|
||||||
void *data, int *data_size,
|
void *data, int *data_size,
|
||||||
uint8_t * buf, int buf_size)
|
uint8_t * buf, int buf_size)
|
||||||
{
|
{
|
||||||
MPADecodeContext *s = avctx->priv_data;
|
MPADecodeContext *s = avctx->priv_data;
|
||||||
uint32_t header;
|
uint32_t header;
|
||||||
@ -2464,8 +2464,8 @@ static int decode_frame(AVCodecContext * avctx,
|
|||||||
|
|
||||||
buf_ptr = buf;
|
buf_ptr = buf;
|
||||||
while (buf_size > 0) {
|
while (buf_size > 0) {
|
||||||
len = s->inbuf_ptr - s->inbuf;
|
len = s->inbuf_ptr - s->inbuf;
|
||||||
if (s->frame_size == 0) {
|
if (s->frame_size == 0) {
|
||||||
/* special case for next header for first frame in free
|
/* special case for next header for first frame in free
|
||||||
format case (XXX: find a simpler method) */
|
format case (XXX: find a simpler method) */
|
||||||
if (s->free_format_next_header != 0) {
|
if (s->free_format_next_header != 0) {
|
||||||
@ -2477,34 +2477,34 @@ static int decode_frame(AVCodecContext * avctx,
|
|||||||
s->free_format_next_header = 0;
|
s->free_format_next_header = 0;
|
||||||
goto got_header;
|
goto got_header;
|
||||||
}
|
}
|
||||||
/* no header seen : find one. We need at least HEADER_SIZE
|
/* no header seen : find one. We need at least HEADER_SIZE
|
||||||
bytes to parse it */
|
bytes to parse it */
|
||||||
len = HEADER_SIZE - len;
|
len = HEADER_SIZE - len;
|
||||||
if (len > buf_size)
|
if (len > buf_size)
|
||||||
len = buf_size;
|
len = buf_size;
|
||||||
if (len > 0) {
|
if (len > 0) {
|
||||||
memcpy(s->inbuf_ptr, buf_ptr, len);
|
memcpy(s->inbuf_ptr, buf_ptr, len);
|
||||||
buf_ptr += len;
|
buf_ptr += len;
|
||||||
buf_size -= len;
|
buf_size -= len;
|
||||||
s->inbuf_ptr += len;
|
s->inbuf_ptr += len;
|
||||||
}
|
}
|
||||||
if ((s->inbuf_ptr - s->inbuf) >= HEADER_SIZE) {
|
if ((s->inbuf_ptr - s->inbuf) >= HEADER_SIZE) {
|
||||||
got_header:
|
got_header:
|
||||||
header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
|
header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
|
||||||
(s->inbuf[2] << 8) | s->inbuf[3];
|
(s->inbuf[2] << 8) | s->inbuf[3];
|
||||||
|
|
||||||
if (ff_mpa_check_header(header) < 0) {
|
if (ff_mpa_check_header(header) < 0) {
|
||||||
/* no sync found : move by one byte (inefficient, but simple!) */
|
/* no sync found : move by one byte (inefficient, but simple!) */
|
||||||
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
|
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
|
||||||
s->inbuf_ptr--;
|
s->inbuf_ptr--;
|
||||||
dprintf("skip %x\n", header);
|
dprintf("skip %x\n", header);
|
||||||
/* reset free format frame size to give a chance
|
/* reset free format frame size to give a chance
|
||||||
to get a new bitrate */
|
to get a new bitrate */
|
||||||
s->free_format_frame_size = 0;
|
s->free_format_frame_size = 0;
|
||||||
} else {
|
} else {
|
||||||
if (decode_header(s, header) == 1) {
|
if (decode_header(s, header) == 1) {
|
||||||
/* free format: prepare to compute frame size */
|
/* free format: prepare to compute frame size */
|
||||||
s->frame_size = -1;
|
s->frame_size = -1;
|
||||||
}
|
}
|
||||||
/* update codec info */
|
/* update codec info */
|
||||||
avctx->sample_rate = s->sample_rate;
|
avctx->sample_rate = s->sample_rate;
|
||||||
@ -2525,18 +2525,18 @@ static int decode_frame(AVCodecContext * avctx,
|
|||||||
avctx->frame_size = 1152;
|
avctx->frame_size = 1152;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (s->frame_size == -1) {
|
} else if (s->frame_size == -1) {
|
||||||
/* free format : find next sync to compute frame size */
|
/* free format : find next sync to compute frame size */
|
||||||
len = MPA_MAX_CODED_FRAME_SIZE - len;
|
len = MPA_MAX_CODED_FRAME_SIZE - len;
|
||||||
if (len > buf_size)
|
if (len > buf_size)
|
||||||
len = buf_size;
|
len = buf_size;
|
||||||
if (len == 0) {
|
if (len == 0) {
|
||||||
/* frame too long: resync */
|
/* frame too long: resync */
|
||||||
s->frame_size = 0;
|
s->frame_size = 0;
|
||||||
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
|
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
|
||||||
s->inbuf_ptr--;
|
s->inbuf_ptr--;
|
||||||
} else {
|
} else {
|
||||||
uint8_t *p, *pend;
|
uint8_t *p, *pend;
|
||||||
uint32_t header1;
|
uint32_t header1;
|
||||||
@ -2580,17 +2580,17 @@ static int decode_frame(AVCodecContext * avctx,
|
|||||||
s->inbuf_ptr += len;
|
s->inbuf_ptr += len;
|
||||||
buf_size -= len;
|
buf_size -= len;
|
||||||
}
|
}
|
||||||
} else if (len < s->frame_size) {
|
} else if (len < s->frame_size) {
|
||||||
if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE)
|
if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE)
|
||||||
s->frame_size = MPA_MAX_CODED_FRAME_SIZE;
|
s->frame_size = MPA_MAX_CODED_FRAME_SIZE;
|
||||||
len = s->frame_size - len;
|
len = s->frame_size - len;
|
||||||
if (len > buf_size)
|
if (len > buf_size)
|
||||||
len = buf_size;
|
len = buf_size;
|
||||||
memcpy(s->inbuf_ptr, buf_ptr, len);
|
memcpy(s->inbuf_ptr, buf_ptr, len);
|
||||||
buf_ptr += len;
|
buf_ptr += len;
|
||||||
s->inbuf_ptr += len;
|
s->inbuf_ptr += len;
|
||||||
buf_size -= len;
|
buf_size -= len;
|
||||||
}
|
}
|
||||||
next_data:
|
next_data:
|
||||||
if (s->frame_size > 0 &&
|
if (s->frame_size > 0 &&
|
||||||
(s->inbuf_ptr - s->inbuf) >= s->frame_size) {
|
(s->inbuf_ptr - s->inbuf) >= s->frame_size) {
|
||||||
@ -2601,22 +2601,22 @@ static int decode_frame(AVCodecContext * avctx,
|
|||||||
} else {
|
} else {
|
||||||
out_size = mp_decode_frame(s, out_samples);
|
out_size = mp_decode_frame(s, out_samples);
|
||||||
}
|
}
|
||||||
s->inbuf_ptr = s->inbuf;
|
s->inbuf_ptr = s->inbuf;
|
||||||
s->frame_size = 0;
|
s->frame_size = 0;
|
||||||
if(out_size>=0)
|
if(out_size>=0)
|
||||||
*data_size = out_size;
|
*data_size = out_size;
|
||||||
else
|
else
|
||||||
av_log(avctx, AV_LOG_DEBUG, "Error while decoding mpeg audio frame\n"); //FIXME return -1 / but also return the number of bytes consumed
|
av_log(avctx, AV_LOG_DEBUG, "Error while decoding mpeg audio frame\n"); //FIXME return -1 / but also return the number of bytes consumed
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return buf_ptr - buf;
|
return buf_ptr - buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static int decode_frame_adu(AVCodecContext * avctx,
|
static int decode_frame_adu(AVCodecContext * avctx,
|
||||||
void *data, int *data_size,
|
void *data, int *data_size,
|
||||||
uint8_t * buf, int buf_size)
|
uint8_t * buf, int buf_size)
|
||||||
{
|
{
|
||||||
MPADecodeContext *s = avctx->priv_data;
|
MPADecodeContext *s = avctx->priv_data;
|
||||||
uint32_t header;
|
uint32_t header;
|
||||||
@ -2747,8 +2747,8 @@ static int decode_close_mp3on4(AVCodecContext * avctx)
|
|||||||
|
|
||||||
|
|
||||||
static int decode_frame_mp3on4(AVCodecContext * avctx,
|
static int decode_frame_mp3on4(AVCodecContext * avctx,
|
||||||
void *data, int *data_size,
|
void *data, int *data_size,
|
||||||
uint8_t * buf, int buf_size)
|
uint8_t * buf, int buf_size)
|
||||||
{
|
{
|
||||||
MP3On4DecodeContext *s = avctx->priv_data;
|
MP3On4DecodeContext *s = avctx->priv_data;
|
||||||
MPADecodeContext *m;
|
MPADecodeContext *m;
|
||||||
|
@ -354,7 +354,7 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
|
|||||||
r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
|
r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
|
||||||
|
|
||||||
if(r<0 || !pic->age || !pic->type || !pic->data[0]){
|
if(r<0 || !pic->age || !pic->type || !pic->data[0]){
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
|
av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -913,7 +913,7 @@ int MPV_encode_init(AVCodecContext *avctx)
|
|||||||
s->width = avctx->width;
|
s->width = avctx->width;
|
||||||
s->height = avctx->height;
|
s->height = avctx->height;
|
||||||
if(avctx->gop_size > 600){
|
if(avctx->gop_size > 600){
|
||||||
av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
|
av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
|
||||||
avctx->gop_size=600;
|
avctx->gop_size=600;
|
||||||
}
|
}
|
||||||
s->gop_size = avctx->gop_size;
|
s->gop_size = avctx->gop_size;
|
||||||
@ -1120,7 +1120,7 @@ int MPV_encode_init(AVCodecContext *avctx)
|
|||||||
s->out_format = FMT_MJPEG;
|
s->out_format = FMT_MJPEG;
|
||||||
s->intra_only = 1; /* force intra only for jpeg */
|
s->intra_only = 1; /* force intra only for jpeg */
|
||||||
s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
|
s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
|
||||||
s->mjpeg_data_only_frames = 0; /* write all the needed headers */
|
s->mjpeg_data_only_frames = 0; /* write all the needed headers */
|
||||||
s->mjpeg_vsample[0] = 1<<chroma_v_shift;
|
s->mjpeg_vsample[0] = 1<<chroma_v_shift;
|
||||||
s->mjpeg_vsample[1] = 1;
|
s->mjpeg_vsample[1] = 1;
|
||||||
s->mjpeg_vsample[2] = 1;
|
s->mjpeg_vsample[2] = 1;
|
||||||
@ -1143,24 +1143,24 @@ int MPV_encode_init(AVCodecContext *avctx)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
s->out_format = FMT_H263;
|
s->out_format = FMT_H263;
|
||||||
s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
|
s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
|
||||||
avctx->delay=0;
|
avctx->delay=0;
|
||||||
s->low_delay=1;
|
s->low_delay=1;
|
||||||
break;
|
break;
|
||||||
case CODEC_ID_H263P:
|
case CODEC_ID_H263P:
|
||||||
s->out_format = FMT_H263;
|
s->out_format = FMT_H263;
|
||||||
s->h263_plus = 1;
|
s->h263_plus = 1;
|
||||||
/* Fx */
|
/* Fx */
|
||||||
s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
|
s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
|
||||||
s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
|
s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
|
||||||
s->modified_quant= s->h263_aic;
|
s->modified_quant= s->h263_aic;
|
||||||
s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
|
s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
|
||||||
s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
|
s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
|
||||||
s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
|
s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
|
||||||
s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
|
s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
|
||||||
s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
|
s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
|
||||||
|
|
||||||
/* /Fx */
|
/* /Fx */
|
||||||
/* These are just to be sure */
|
/* These are just to be sure */
|
||||||
avctx->delay=0;
|
avctx->delay=0;
|
||||||
s->low_delay=1;
|
s->low_delay=1;
|
||||||
@ -2473,7 +2473,7 @@ static inline void gmc1_motion(MpegEncContext *s,
|
|||||||
|
|
||||||
dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
|
dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
|
||||||
if (s->no_rounding){
|
if (s->no_rounding){
|
||||||
s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
|
s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
|
||||||
}else{
|
}else{
|
||||||
s->dsp.put_pixels_tab [0][dxy](dest_y, ptr, linesize, 16);
|
s->dsp.put_pixels_tab [0][dxy](dest_y, ptr, linesize, 16);
|
||||||
}
|
}
|
||||||
@ -4148,7 +4148,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s->dsp.get_pixels(s->block[0], ptr_y , wrap_y);
|
s->dsp.get_pixels(s->block[0], ptr_y , wrap_y);
|
||||||
s->dsp.get_pixels(s->block[1], ptr_y + 8, wrap_y);
|
s->dsp.get_pixels(s->block[1], ptr_y + 8, wrap_y);
|
||||||
s->dsp.get_pixels(s->block[2], ptr_y + dct_offset , wrap_y);
|
s->dsp.get_pixels(s->block[2], ptr_y + dct_offset , wrap_y);
|
||||||
s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
|
s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
|
||||||
@ -4157,7 +4157,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
|
|||||||
skip_dct[4]= 1;
|
skip_dct[4]= 1;
|
||||||
skip_dct[5]= 1;
|
skip_dct[5]= 1;
|
||||||
}else{
|
}else{
|
||||||
s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
|
s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
|
||||||
s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
|
s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
@ -4170,7 +4170,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
|
|||||||
dest_cr = s->dest[2];
|
dest_cr = s->dest[2];
|
||||||
|
|
||||||
if ((!s->no_rounding) || s->pict_type==B_TYPE){
|
if ((!s->no_rounding) || s->pict_type==B_TYPE){
|
||||||
op_pix = s->dsp.put_pixels_tab;
|
op_pix = s->dsp.put_pixels_tab;
|
||||||
op_qpix= s->dsp.put_qpel_pixels_tab;
|
op_qpix= s->dsp.put_qpel_pixels_tab;
|
||||||
}else{
|
}else{
|
||||||
op_pix = s->dsp.put_no_rnd_pixels_tab;
|
op_pix = s->dsp.put_no_rnd_pixels_tab;
|
||||||
@ -4208,7 +4208,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s->dsp.diff_pixels(s->block[0], ptr_y , dest_y , wrap_y);
|
s->dsp.diff_pixels(s->block[0], ptr_y , dest_y , wrap_y);
|
||||||
s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
|
s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
|
||||||
s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset , dest_y + dct_offset , wrap_y);
|
s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset , dest_y + dct_offset , wrap_y);
|
||||||
s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
|
s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
|
||||||
@ -4223,7 +4223,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
|
|||||||
/* pre quantization */
|
/* pre quantization */
|
||||||
if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
|
if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
|
||||||
//FIXME optimize
|
//FIXME optimize
|
||||||
if(s->dsp.sad[1](NULL, ptr_y , dest_y , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
|
if(s->dsp.sad[1](NULL, ptr_y , dest_y , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
|
||||||
if(s->dsp.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
|
if(s->dsp.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
|
||||||
if(s->dsp.sad[1](NULL, ptr_y +dct_offset , dest_y +dct_offset , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
|
if(s->dsp.sad[1](NULL, ptr_y +dct_offset , dest_y +dct_offset , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
|
||||||
if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
|
if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
|
||||||
@ -6265,7 +6265,7 @@ static int dct_quantize_c(MpegEncContext *s,
|
|||||||
|
|
||||||
/* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
|
/* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
|
||||||
if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
|
if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
|
||||||
ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
|
ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
|
||||||
|
|
||||||
return last_non_zero;
|
return last_non_zero;
|
||||||
}
|
}
|
||||||
|
@ -126,7 +126,7 @@ typedef struct ScanTable{
|
|||||||
uint8_t permutated[64];
|
uint8_t permutated[64];
|
||||||
uint8_t raster_end[64];
|
uint8_t raster_end[64];
|
||||||
#ifdef ARCH_POWERPC
|
#ifdef ARCH_POWERPC
|
||||||
/** Used by dct_quantise_alitvec to find last-non-zero */
|
/** Used by dct_quantise_alitvec to find last-non-zero */
|
||||||
uint8_t __align8 inverse[64];
|
uint8_t __align8 inverse[64];
|
||||||
#endif
|
#endif
|
||||||
} ScanTable;
|
} ScanTable;
|
||||||
@ -181,7 +181,7 @@ typedef struct Picture{
|
|||||||
uint16_t *mb_var; ///< Table for MB variances
|
uint16_t *mb_var; ///< Table for MB variances
|
||||||
uint16_t *mc_mb_var; ///< Table for motion compensated MB variances
|
uint16_t *mc_mb_var; ///< Table for motion compensated MB variances
|
||||||
uint8_t *mb_mean; ///< Table for MB luminance
|
uint8_t *mb_mean; ///< Table for MB luminance
|
||||||
int32_t *mb_cmp_score; ///< Table for MB cmp scores, for mb decision FIXME remove
|
int32_t *mb_cmp_score; ///< Table for MB cmp scores, for mb decision FIXME remove
|
||||||
int b_frame_score; /* */
|
int b_frame_score; /* */
|
||||||
} Picture;
|
} Picture;
|
||||||
|
|
||||||
@ -245,7 +245,7 @@ typedef struct MotionEstContext{
|
|||||||
uint8_t (*mv_penalty)[MAX_MV*2+1]; ///< amount of bits needed to encode a MV
|
uint8_t (*mv_penalty)[MAX_MV*2+1]; ///< amount of bits needed to encode a MV
|
||||||
uint8_t *current_mv_penalty;
|
uint8_t *current_mv_penalty;
|
||||||
int (*sub_motion_search)(struct MpegEncContext * s,
|
int (*sub_motion_search)(struct MpegEncContext * s,
|
||||||
int *mx_ptr, int *my_ptr, int dmin,
|
int *mx_ptr, int *my_ptr, int dmin,
|
||||||
int src_index, int ref_index,
|
int src_index, int ref_index,
|
||||||
int size, int h);
|
int size, int h);
|
||||||
}MotionEstContext;
|
}MotionEstContext;
|
||||||
|
@ -544,24 +544,24 @@ void msmpeg4_encode_mb(MpegEncContext * s,
|
|||||||
handle_slices(s);
|
handle_slices(s);
|
||||||
|
|
||||||
if (!s->mb_intra) {
|
if (!s->mb_intra) {
|
||||||
/* compute cbp */
|
/* compute cbp */
|
||||||
set_stat(ST_INTER_MB);
|
set_stat(ST_INTER_MB);
|
||||||
cbp = 0;
|
cbp = 0;
|
||||||
for (i = 0; i < 6; i++) {
|
for (i = 0; i < 6; i++) {
|
||||||
if (s->block_last_index[i] >= 0)
|
if (s->block_last_index[i] >= 0)
|
||||||
cbp |= 1 << (5 - i);
|
cbp |= 1 << (5 - i);
|
||||||
}
|
}
|
||||||
if (s->use_skip_mb_code && (cbp | motion_x | motion_y) == 0) {
|
if (s->use_skip_mb_code && (cbp | motion_x | motion_y) == 0) {
|
||||||
/* skip macroblock */
|
/* skip macroblock */
|
||||||
put_bits(&s->pb, 1, 1);
|
put_bits(&s->pb, 1, 1);
|
||||||
s->last_bits++;
|
s->last_bits++;
|
||||||
s->misc_bits++;
|
s->misc_bits++;
|
||||||
s->skip_count++;
|
s->skip_count++;
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (s->use_skip_mb_code)
|
if (s->use_skip_mb_code)
|
||||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||||
|
|
||||||
if(s->msmpeg4_version<=2){
|
if(s->msmpeg4_version<=2){
|
||||||
put_bits(&s->pb,
|
put_bits(&s->pb,
|
||||||
@ -599,10 +599,10 @@ void msmpeg4_encode_mb(MpegEncContext * s,
|
|||||||
}
|
}
|
||||||
s->p_tex_bits += get_bits_diff(s);
|
s->p_tex_bits += get_bits_diff(s);
|
||||||
} else {
|
} else {
|
||||||
/* compute cbp */
|
/* compute cbp */
|
||||||
cbp = 0;
|
cbp = 0;
|
||||||
coded_cbp = 0;
|
coded_cbp = 0;
|
||||||
for (i = 0; i < 6; i++) {
|
for (i = 0; i < 6; i++) {
|
||||||
int val, pred;
|
int val, pred;
|
||||||
val = (s->block_last_index[i] >= 1);
|
val = (s->block_last_index[i] >= 1);
|
||||||
cbp |= val << (5 - i);
|
cbp |= val << (5 - i);
|
||||||
@ -613,7 +613,7 @@ void msmpeg4_encode_mb(MpegEncContext * s,
|
|||||||
val = val ^ pred;
|
val = val ^ pred;
|
||||||
}
|
}
|
||||||
coded_cbp |= val << (5 - i);
|
coded_cbp |= val << (5 - i);
|
||||||
}
|
}
|
||||||
#if 0
|
#if 0
|
||||||
if (coded_cbp)
|
if (coded_cbp)
|
||||||
printf("cbp=%x %x\n", cbp, coded_cbp);
|
printf("cbp=%x %x\n", cbp, coded_cbp);
|
||||||
@ -625,12 +625,12 @@ void msmpeg4_encode_mb(MpegEncContext * s,
|
|||||||
v2_intra_cbpc[cbp&3][1], v2_intra_cbpc[cbp&3][0]);
|
v2_intra_cbpc[cbp&3][1], v2_intra_cbpc[cbp&3][0]);
|
||||||
} else {
|
} else {
|
||||||
if (s->use_skip_mb_code)
|
if (s->use_skip_mb_code)
|
||||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||||
put_bits(&s->pb,
|
put_bits(&s->pb,
|
||||||
v2_mb_type[(cbp&3) + 4][1],
|
v2_mb_type[(cbp&3) + 4][1],
|
||||||
v2_mb_type[(cbp&3) + 4][0]);
|
v2_mb_type[(cbp&3) + 4][0]);
|
||||||
}
|
}
|
||||||
put_bits(&s->pb, 1, 0); /* no AC prediction yet */
|
put_bits(&s->pb, 1, 0); /* no AC prediction yet */
|
||||||
put_bits(&s->pb,
|
put_bits(&s->pb,
|
||||||
cbpy_tab[cbp>>2][1],
|
cbpy_tab[cbp>>2][1],
|
||||||
cbpy_tab[cbp>>2][0]);
|
cbpy_tab[cbp>>2][0]);
|
||||||
@ -641,13 +641,13 @@ void msmpeg4_encode_mb(MpegEncContext * s,
|
|||||||
ff_msmp4_mb_i_table[coded_cbp][1], ff_msmp4_mb_i_table[coded_cbp][0]);
|
ff_msmp4_mb_i_table[coded_cbp][1], ff_msmp4_mb_i_table[coded_cbp][0]);
|
||||||
} else {
|
} else {
|
||||||
if (s->use_skip_mb_code)
|
if (s->use_skip_mb_code)
|
||||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||||
put_bits(&s->pb,
|
put_bits(&s->pb,
|
||||||
table_mb_non_intra[cbp][1],
|
table_mb_non_intra[cbp][1],
|
||||||
table_mb_non_intra[cbp][0]);
|
table_mb_non_intra[cbp][0]);
|
||||||
}
|
}
|
||||||
set_stat(ST_INTRA_MB);
|
set_stat(ST_INTRA_MB);
|
||||||
put_bits(&s->pb, 1, 0); /* no AC prediction yet */
|
put_bits(&s->pb, 1, 0); /* no AC prediction yet */
|
||||||
if(s->inter_intra_pred){
|
if(s->inter_intra_pred){
|
||||||
s->h263_aic_dir=0;
|
s->h263_aic_dir=0;
|
||||||
put_bits(&s->pb, table_inter_intra[s->h263_aic_dir][1], table_inter_intra[s->h263_aic_dir][0]);
|
put_bits(&s->pb, table_inter_intra[s->h263_aic_dir][1], table_inter_intra[s->h263_aic_dir][0]);
|
||||||
@ -702,9 +702,9 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
|
|||||||
|
|
||||||
/* find prediction */
|
/* find prediction */
|
||||||
if (n < 4) {
|
if (n < 4) {
|
||||||
scale = s->y_dc_scale;
|
scale = s->y_dc_scale;
|
||||||
} else {
|
} else {
|
||||||
scale = s->c_dc_scale;
|
scale = s->c_dc_scale;
|
||||||
}
|
}
|
||||||
|
|
||||||
wrap = s->block_wrap[n];
|
wrap = s->block_wrap[n];
|
||||||
@ -727,22 +727,22 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
|
|||||||
to problems if Q could vary !) */
|
to problems if Q could vary !) */
|
||||||
#if (defined(ARCH_X86) || defined(ARCH_X86_64)) && !defined PIC
|
#if (defined(ARCH_X86) || defined(ARCH_X86_64)) && !defined PIC
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movl %3, %%eax \n\t"
|
"movl %3, %%eax \n\t"
|
||||||
"shrl $1, %%eax \n\t"
|
"shrl $1, %%eax \n\t"
|
||||||
"addl %%eax, %2 \n\t"
|
"addl %%eax, %2 \n\t"
|
||||||
"addl %%eax, %1 \n\t"
|
"addl %%eax, %1 \n\t"
|
||||||
"addl %0, %%eax \n\t"
|
"addl %0, %%eax \n\t"
|
||||||
"mull %4 \n\t"
|
"mull %4 \n\t"
|
||||||
"movl %%edx, %0 \n\t"
|
"movl %%edx, %0 \n\t"
|
||||||
"movl %1, %%eax \n\t"
|
"movl %1, %%eax \n\t"
|
||||||
"mull %4 \n\t"
|
"mull %4 \n\t"
|
||||||
"movl %%edx, %1 \n\t"
|
"movl %%edx, %1 \n\t"
|
||||||
"movl %2, %%eax \n\t"
|
"movl %2, %%eax \n\t"
|
||||||
"mull %4 \n\t"
|
"mull %4 \n\t"
|
||||||
"movl %%edx, %2 \n\t"
|
"movl %%edx, %2 \n\t"
|
||||||
: "+b" (a), "+c" (b), "+D" (c)
|
: "+b" (a), "+c" (b), "+D" (c)
|
||||||
: "g" (scale), "S" (inverse[scale])
|
: "g" (scale), "S" (inverse[scale])
|
||||||
: "%eax", "%edx"
|
: "%eax", "%edx"
|
||||||
);
|
);
|
||||||
#else
|
#else
|
||||||
/* #elif defined (ARCH_ALPHA) */
|
/* #elif defined (ARCH_ALPHA) */
|
||||||
@ -750,13 +750,13 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
|
|||||||
common case. But they are costly everywhere...
|
common case. But they are costly everywhere...
|
||||||
*/
|
*/
|
||||||
if (scale == 8) {
|
if (scale == 8) {
|
||||||
a = (a + (8 >> 1)) / 8;
|
a = (a + (8 >> 1)) / 8;
|
||||||
b = (b + (8 >> 1)) / 8;
|
b = (b + (8 >> 1)) / 8;
|
||||||
c = (c + (8 >> 1)) / 8;
|
c = (c + (8 >> 1)) / 8;
|
||||||
} else {
|
} else {
|
||||||
a = FASTDIV((a + (scale >> 1)), scale);
|
a = FASTDIV((a + (scale >> 1)), scale);
|
||||||
b = FASTDIV((b + (scale >> 1)), scale);
|
b = FASTDIV((b + (scale >> 1)), scale);
|
||||||
c = FASTDIV((c + (scale >> 1)), scale);
|
c = FASTDIV((c + (scale >> 1)), scale);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
/* XXX: WARNING: they did not choose the same test as MPEG4. This
|
/* XXX: WARNING: they did not choose the same test as MPEG4. This
|
||||||
@ -957,17 +957,17 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
|
|||||||
/* AC coefs */
|
/* AC coefs */
|
||||||
last_non_zero = i - 1;
|
last_non_zero = i - 1;
|
||||||
for (; i <= last_index; i++) {
|
for (; i <= last_index; i++) {
|
||||||
j = scantable[i];
|
j = scantable[i];
|
||||||
level = block[j];
|
level = block[j];
|
||||||
if (level) {
|
if (level) {
|
||||||
run = i - last_non_zero - 1;
|
run = i - last_non_zero - 1;
|
||||||
last = (i == last_index);
|
last = (i == last_index);
|
||||||
sign = 0;
|
sign = 0;
|
||||||
slevel = level;
|
slevel = level;
|
||||||
if (level < 0) {
|
if (level < 0) {
|
||||||
sign = 1;
|
sign = 1;
|
||||||
level = -level;
|
level = -level;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(level<=MAX_LEVEL && run<=MAX_RUN){
|
if(level<=MAX_LEVEL && run<=MAX_RUN){
|
||||||
s->ac_stats[s->mb_intra][n>3][level][run][last]++;
|
s->ac_stats[s->mb_intra][n>3][level][run][last]++;
|
||||||
@ -1030,8 +1030,8 @@ else
|
|||||||
} else {
|
} else {
|
||||||
put_bits(&s->pb, 1, sign);
|
put_bits(&s->pb, 1, sign);
|
||||||
}
|
}
|
||||||
last_non_zero = i;
|
last_non_zero = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1064,7 +1064,7 @@ static void init_h263_dc_for_msmpeg4(void)
|
|||||||
v = abs(level);
|
v = abs(level);
|
||||||
while (v) {
|
while (v) {
|
||||||
v >>= 1;
|
v >>= 1;
|
||||||
size++;
|
size++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (level < 0)
|
if (level < 0)
|
||||||
@ -1301,11 +1301,11 @@ return -1;
|
|||||||
}
|
}
|
||||||
s->no_rounding = 1;
|
s->no_rounding = 1;
|
||||||
if(s->avctx->debug&FF_DEBUG_PICT_INFO)
|
if(s->avctx->debug&FF_DEBUG_PICT_INFO)
|
||||||
av_log(s->avctx, AV_LOG_DEBUG, "qscale:%d rlc:%d rl:%d dc:%d mbrl:%d slice:%d \n",
|
av_log(s->avctx, AV_LOG_DEBUG, "qscale:%d rlc:%d rl:%d dc:%d mbrl:%d slice:%d \n",
|
||||||
s->qscale,
|
s->qscale,
|
||||||
s->rl_chroma_table_index,
|
s->rl_chroma_table_index,
|
||||||
s->rl_table_index,
|
s->rl_table_index,
|
||||||
s->dc_table_index,
|
s->dc_table_index,
|
||||||
s->per_mb_rl_table,
|
s->per_mb_rl_table,
|
||||||
s->slice_height);
|
s->slice_height);
|
||||||
} else {
|
} else {
|
||||||
@ -1349,20 +1349,20 @@ return -1;
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(s->avctx->debug&FF_DEBUG_PICT_INFO)
|
if(s->avctx->debug&FF_DEBUG_PICT_INFO)
|
||||||
av_log(s->avctx, AV_LOG_DEBUG, "skip:%d rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d \n",
|
av_log(s->avctx, AV_LOG_DEBUG, "skip:%d rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d \n",
|
||||||
s->use_skip_mb_code,
|
s->use_skip_mb_code,
|
||||||
s->rl_table_index,
|
s->rl_table_index,
|
||||||
s->rl_chroma_table_index,
|
s->rl_chroma_table_index,
|
||||||
s->dc_table_index,
|
s->dc_table_index,
|
||||||
s->mv_table_index,
|
s->mv_table_index,
|
||||||
s->per_mb_rl_table,
|
s->per_mb_rl_table,
|
||||||
s->qscale);
|
s->qscale);
|
||||||
|
|
||||||
if(s->flipflop_rounding){
|
if(s->flipflop_rounding){
|
||||||
s->no_rounding ^= 1;
|
s->no_rounding ^= 1;
|
||||||
}else{
|
}else{
|
||||||
s->no_rounding = 0;
|
s->no_rounding = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
|
//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
|
||||||
|
|
||||||
@ -1557,10 +1557,10 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
|
|||||||
s->dsp.clear_blocks(s->block[0]);
|
s->dsp.clear_blocks(s->block[0]);
|
||||||
for (i = 0; i < 6; i++) {
|
for (i = 0; i < 6; i++) {
|
||||||
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
|
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
|
||||||
{
|
{
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
|
av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -1593,8 +1593,8 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
|
|||||||
code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3);
|
code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3);
|
||||||
if (code < 0)
|
if (code < 0)
|
||||||
return -1;
|
return -1;
|
||||||
//s->mb_intra = (code & 0x40) ? 0 : 1;
|
//s->mb_intra = (code & 0x40) ? 0 : 1;
|
||||||
s->mb_intra = (~code & 0x40) >> 6;
|
s->mb_intra = (~code & 0x40) >> 6;
|
||||||
|
|
||||||
cbp = code & 0x3f;
|
cbp = code & 0x3f;
|
||||||
} else {
|
} else {
|
||||||
@ -1650,10 +1650,10 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
|
|||||||
s->dsp.clear_blocks(s->block[0]);
|
s->dsp.clear_blocks(s->block[0]);
|
||||||
for (i = 0; i < 6; i++) {
|
for (i = 0; i < 6; i++) {
|
||||||
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
|
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
|
||||||
{
|
{
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
|
av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -1672,7 +1672,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
|
|||||||
qmul=1;
|
qmul=1;
|
||||||
qadd=0;
|
qadd=0;
|
||||||
|
|
||||||
/* DC coef */
|
/* DC coef */
|
||||||
set_stat(ST_DC);
|
set_stat(ST_DC);
|
||||||
level = msmpeg4_decode_dc(s, n, &dc_pred_dir);
|
level = msmpeg4_decode_dc(s, n, &dc_pred_dir);
|
||||||
|
|
||||||
@ -1808,8 +1808,8 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
//level = level * qmul + (level>0) * qadd - (level<=0) * qadd ;
|
//level = level * qmul + (level>0) * qadd - (level<=0) * qadd ;
|
||||||
if (level>0) level= level * qmul + qadd;
|
if (level>0) level= level * qmul + qadd;
|
||||||
else level= level * qmul - qadd;
|
else level= level * qmul - qadd;
|
||||||
#if 0 // waste of time too :(
|
#if 0 // waste of time too :(
|
||||||
if(level>2048 || level<-2048){
|
if(level>2048 || level<-2048){
|
||||||
|
@ -45,7 +45,7 @@ Theora_decode_frame(AVCodecContext *ctx, void *outdata, int *outdata_size,
|
|||||||
thc->op.bytes = buf_size;
|
thc->op.bytes = buf_size;
|
||||||
|
|
||||||
if(theora_decode_packetin(&thc->state, &thc->op))
|
if(theora_decode_packetin(&thc->state, &thc->op))
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
theora_decode_YUVout(&thc->state, &yuv);
|
theora_decode_YUVout(&thc->state, &yuv);
|
||||||
|
|
||||||
@ -78,7 +78,7 @@ Theora_decode_init(AVCodecContext *ctx)
|
|||||||
uint8_t *cdp;
|
uint8_t *cdp;
|
||||||
|
|
||||||
if(ctx->extradata_size < 6)
|
if(ctx->extradata_size < 6)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
theora_info_init(&thc->info);
|
theora_info_init(&thc->info);
|
||||||
|
|
||||||
@ -87,25 +87,25 @@ Theora_decode_init(AVCodecContext *ctx)
|
|||||||
size = ctx->extradata_size;
|
size = ctx->extradata_size;
|
||||||
|
|
||||||
for(i = 0; i < 3; i++){
|
for(i = 0; i < 3; i++){
|
||||||
hs = *cdp++ << 8;
|
hs = *cdp++ << 8;
|
||||||
hs += *cdp++;
|
hs += *cdp++;
|
||||||
size -= 2;
|
size -= 2;
|
||||||
|
|
||||||
if(hs > size){
|
if(hs > size){
|
||||||
av_log(ctx, AV_LOG_ERROR, "extradata too small: %i > %i\n",
|
av_log(ctx, AV_LOG_ERROR, "extradata too small: %i > %i\n",
|
||||||
hs, size);
|
hs, size);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
op.packet = cdp;
|
op.packet = cdp;
|
||||||
op.bytes = hs;
|
op.bytes = hs;
|
||||||
op.b_o_s = !i;
|
op.b_o_s = !i;
|
||||||
if(theora_decode_header(&thc->info, &thc->comment, &op))
|
if(theora_decode_header(&thc->info, &thc->comment, &op))
|
||||||
return -1;
|
return -1;
|
||||||
op.packetno++;
|
op.packetno++;
|
||||||
|
|
||||||
cdp += hs;
|
cdp += hs;
|
||||||
size -= hs;
|
size -= hs;
|
||||||
}
|
}
|
||||||
|
|
||||||
theora_decode_init(&thc->state, &thc->info);
|
theora_decode_init(&thc->state, &thc->info);
|
||||||
|
@ -40,13 +40,13 @@ static int oggvorbis_init_encoder(vorbis_info *vi, AVCodecContext *avccontext) {
|
|||||||
|
|
||||||
return (vorbis_encode_setup_managed(vi, avccontext->channels,
|
return (vorbis_encode_setup_managed(vi, avccontext->channels,
|
||||||
avccontext->sample_rate, -1, avccontext->bit_rate, -1) ||
|
avccontext->sample_rate, -1, avccontext->bit_rate, -1) ||
|
||||||
vorbis_encode_ctl(vi, OV_ECTL_RATEMANAGE_AVG, NULL) ||
|
vorbis_encode_ctl(vi, OV_ECTL_RATEMANAGE_AVG, NULL) ||
|
||||||
vorbis_encode_setup_init(vi)) ;
|
vorbis_encode_setup_init(vi)) ;
|
||||||
#else
|
#else
|
||||||
/* constant bitrate */
|
/* constant bitrate */
|
||||||
|
|
||||||
return vorbis_encode_init(vi, avccontext->channels,
|
return vorbis_encode_init(vi, avccontext->channels,
|
||||||
avccontext->sample_rate, -1, avccontext->bit_rate, -1) ;
|
avccontext->sample_rate, -1, avccontext->bit_rate, -1) ;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -58,8 +58,8 @@ static int oggvorbis_encode_init(AVCodecContext *avccontext) {
|
|||||||
|
|
||||||
vorbis_info_init(&context->vi) ;
|
vorbis_info_init(&context->vi) ;
|
||||||
if(oggvorbis_init_encoder(&context->vi, avccontext) < 0) {
|
if(oggvorbis_init_encoder(&context->vi, avccontext) < 0) {
|
||||||
av_log(avccontext, AV_LOG_ERROR, "oggvorbis_encode_init: init_encoder failed") ;
|
av_log(avccontext, AV_LOG_ERROR, "oggvorbis_encode_init: init_encoder failed") ;
|
||||||
return -1 ;
|
return -1 ;
|
||||||
}
|
}
|
||||||
vorbis_analysis_init(&context->vd, &context->vi) ;
|
vorbis_analysis_init(&context->vd, &context->vi) ;
|
||||||
vorbis_block_init(&context->vd, &context->vb) ;
|
vorbis_block_init(&context->vd, &context->vb) ;
|
||||||
@ -101,8 +101,8 @@ static int oggvorbis_encode_init(AVCodecContext *avccontext) {
|
|||||||
|
|
||||||
|
|
||||||
static int oggvorbis_encode_frame(AVCodecContext *avccontext,
|
static int oggvorbis_encode_frame(AVCodecContext *avccontext,
|
||||||
unsigned char *packets,
|
unsigned char *packets,
|
||||||
int buf_size, void *data)
|
int buf_size, void *data)
|
||||||
{
|
{
|
||||||
OggVorbisContext *context = avccontext->priv_data ;
|
OggVorbisContext *context = avccontext->priv_data ;
|
||||||
float **buffer ;
|
float **buffer ;
|
||||||
@ -113,22 +113,22 @@ static int oggvorbis_encode_frame(AVCodecContext *avccontext,
|
|||||||
buffer = vorbis_analysis_buffer(&context->vd, samples) ;
|
buffer = vorbis_analysis_buffer(&context->vd, samples) ;
|
||||||
|
|
||||||
if(context->vi.channels == 1) {
|
if(context->vi.channels == 1) {
|
||||||
for(l = 0 ; l < samples ; l++)
|
for(l = 0 ; l < samples ; l++)
|
||||||
buffer[0][l]=audio[l]/32768.f;
|
buffer[0][l]=audio[l]/32768.f;
|
||||||
} else {
|
} else {
|
||||||
for(l = 0 ; l < samples ; l++){
|
for(l = 0 ; l < samples ; l++){
|
||||||
buffer[0][l]=audio[l*2]/32768.f;
|
buffer[0][l]=audio[l*2]/32768.f;
|
||||||
buffer[1][l]=audio[l*2+1]/32768.f;
|
buffer[1][l]=audio[l*2+1]/32768.f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vorbis_analysis_wrote(&context->vd, samples) ;
|
vorbis_analysis_wrote(&context->vd, samples) ;
|
||||||
|
|
||||||
while(vorbis_analysis_blockout(&context->vd, &context->vb) == 1) {
|
while(vorbis_analysis_blockout(&context->vd, &context->vb) == 1) {
|
||||||
vorbis_analysis(&context->vb, NULL);
|
vorbis_analysis(&context->vb, NULL);
|
||||||
vorbis_bitrate_addblock(&context->vb) ;
|
vorbis_bitrate_addblock(&context->vb) ;
|
||||||
|
|
||||||
while(vorbis_bitrate_flushpacket(&context->vd, &op)) {
|
while(vorbis_bitrate_flushpacket(&context->vd, &op)) {
|
||||||
if(op.bytes==1) //id love to say this is a hack, bad sadly its not, appearently the end of stream decission is in libogg
|
if(op.bytes==1) //id love to say this is a hack, bad sadly its not, appearently the end of stream decission is in libogg
|
||||||
continue;
|
continue;
|
||||||
memcpy(context->buffer + context->buffer_index, &op, sizeof(ogg_packet));
|
memcpy(context->buffer + context->buffer_index, &op, sizeof(ogg_packet));
|
||||||
@ -136,7 +136,7 @@ static int oggvorbis_encode_frame(AVCodecContext *avccontext,
|
|||||||
memcpy(context->buffer + context->buffer_index, op.packet, op.bytes);
|
memcpy(context->buffer + context->buffer_index, op.packet, op.bytes);
|
||||||
context->buffer_index += op.bytes;
|
context->buffer_index += op.bytes;
|
||||||
// av_log(avccontext, AV_LOG_DEBUG, "e%d / %d\n", context->buffer_index, op.bytes);
|
// av_log(avccontext, AV_LOG_DEBUG, "e%d / %d\n", context->buffer_index, op.bytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
l=0;
|
l=0;
|
||||||
@ -268,19 +268,19 @@ static inline int conv(int samples, float **pcm, char *buf, int channels) {
|
|||||||
float *mono ;
|
float *mono ;
|
||||||
|
|
||||||
for(i = 0 ; i < channels ; i++){
|
for(i = 0 ; i < channels ; i++){
|
||||||
ptr = &data[i];
|
ptr = &data[i];
|
||||||
mono = pcm[i] ;
|
mono = pcm[i] ;
|
||||||
|
|
||||||
for(j = 0 ; j < samples ; j++) {
|
for(j = 0 ; j < samples ; j++) {
|
||||||
|
|
||||||
val = mono[j] * 32767.f;
|
val = mono[j] * 32767.f;
|
||||||
|
|
||||||
if(val > 32767) val = 32767 ;
|
if(val > 32767) val = 32767 ;
|
||||||
if(val < -32768) val = -32768 ;
|
if(val < -32768) val = -32768 ;
|
||||||
|
|
||||||
*ptr = val ;
|
*ptr = val ;
|
||||||
ptr += channels;
|
ptr += channels;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0 ;
|
return 0 ;
|
||||||
@ -311,15 +311,15 @@ static int oggvorbis_decode_frame(AVCodecContext *avccontext,
|
|||||||
av_log(avccontext, AV_LOG_DEBUG, "\n");*/
|
av_log(avccontext, AV_LOG_DEBUG, "\n");*/
|
||||||
|
|
||||||
if(vorbis_synthesis(&context->vb, op) == 0)
|
if(vorbis_synthesis(&context->vb, op) == 0)
|
||||||
vorbis_synthesis_blockin(&context->vd, &context->vb) ;
|
vorbis_synthesis_blockin(&context->vd, &context->vb) ;
|
||||||
|
|
||||||
total_samples = 0 ;
|
total_samples = 0 ;
|
||||||
total_bytes = 0 ;
|
total_bytes = 0 ;
|
||||||
|
|
||||||
while((samples = vorbis_synthesis_pcmout(&context->vd, &pcm)) > 0) {
|
while((samples = vorbis_synthesis_pcmout(&context->vd, &pcm)) > 0) {
|
||||||
conv(samples, pcm, (char*)data + total_bytes, context->vi.channels) ;
|
conv(samples, pcm, (char*)data + total_bytes, context->vi.channels) ;
|
||||||
total_bytes += samples * 2 * context->vi.channels ;
|
total_bytes += samples * 2 * context->vi.channels ;
|
||||||
total_samples += samples ;
|
total_samples += samples ;
|
||||||
vorbis_synthesis_read(&context->vd, samples) ;
|
vorbis_synthesis_read(&context->vd, samples) ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -191,11 +191,11 @@ void av_parser_close(AVCodecParserContext *s)
|
|||||||
|
|
||||||
//#define END_NOT_FOUND (-100)
|
//#define END_NOT_FOUND (-100)
|
||||||
|
|
||||||
#define PICTURE_START_CODE 0x00000100
|
#define PICTURE_START_CODE 0x00000100
|
||||||
#define SEQ_START_CODE 0x000001b3
|
#define SEQ_START_CODE 0x000001b3
|
||||||
#define EXT_START_CODE 0x000001b5
|
#define EXT_START_CODE 0x000001b5
|
||||||
#define SLICE_MIN_START_CODE 0x00000101
|
#define SLICE_MIN_START_CODE 0x00000101
|
||||||
#define SLICE_MAX_START_CODE 0x000001af
|
#define SLICE_MAX_START_CODE 0x000001af
|
||||||
|
|
||||||
typedef struct ParseContext1{
|
typedef struct ParseContext1{
|
||||||
ParseContext pc;
|
ParseContext pc;
|
||||||
@ -571,7 +571,7 @@ static int mpeg4video_split(AVCodecContext *avctx,
|
|||||||
/*************************/
|
/*************************/
|
||||||
|
|
||||||
typedef struct MpegAudioParseContext {
|
typedef struct MpegAudioParseContext {
|
||||||
uint8_t inbuf[MPA_MAX_CODED_FRAME_SIZE]; /* input buffer */
|
uint8_t inbuf[MPA_MAX_CODED_FRAME_SIZE]; /* input buffer */
|
||||||
uint8_t *inbuf_ptr;
|
uint8_t *inbuf_ptr;
|
||||||
int frame_size;
|
int frame_size;
|
||||||
int free_format_frame_size;
|
int free_format_frame_size;
|
||||||
@ -608,8 +608,8 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
|
|||||||
*poutbuf_size = 0;
|
*poutbuf_size = 0;
|
||||||
buf_ptr = buf;
|
buf_ptr = buf;
|
||||||
while (buf_size > 0) {
|
while (buf_size > 0) {
|
||||||
len = s->inbuf_ptr - s->inbuf;
|
len = s->inbuf_ptr - s->inbuf;
|
||||||
if (s->frame_size == 0) {
|
if (s->frame_size == 0) {
|
||||||
/* special case for next header for first frame in free
|
/* special case for next header for first frame in free
|
||||||
format case (XXX: find a simpler method) */
|
format case (XXX: find a simpler method) */
|
||||||
if (s->free_format_next_header != 0) {
|
if (s->free_format_next_header != 0) {
|
||||||
@ -621,34 +621,34 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
|
|||||||
s->free_format_next_header = 0;
|
s->free_format_next_header = 0;
|
||||||
goto got_header;
|
goto got_header;
|
||||||
}
|
}
|
||||||
/* no header seen : find one. We need at least MPA_HEADER_SIZE
|
/* no header seen : find one. We need at least MPA_HEADER_SIZE
|
||||||
bytes to parse it */
|
bytes to parse it */
|
||||||
len = MPA_HEADER_SIZE - len;
|
len = MPA_HEADER_SIZE - len;
|
||||||
if (len > buf_size)
|
if (len > buf_size)
|
||||||
len = buf_size;
|
len = buf_size;
|
||||||
if (len > 0) {
|
if (len > 0) {
|
||||||
memcpy(s->inbuf_ptr, buf_ptr, len);
|
memcpy(s->inbuf_ptr, buf_ptr, len);
|
||||||
buf_ptr += len;
|
buf_ptr += len;
|
||||||
buf_size -= len;
|
buf_size -= len;
|
||||||
s->inbuf_ptr += len;
|
s->inbuf_ptr += len;
|
||||||
}
|
}
|
||||||
if ((s->inbuf_ptr - s->inbuf) >= MPA_HEADER_SIZE) {
|
if ((s->inbuf_ptr - s->inbuf) >= MPA_HEADER_SIZE) {
|
||||||
got_header:
|
got_header:
|
||||||
sr= avctx->sample_rate;
|
sr= avctx->sample_rate;
|
||||||
header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
|
header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
|
||||||
(s->inbuf[2] << 8) | s->inbuf[3];
|
(s->inbuf[2] << 8) | s->inbuf[3];
|
||||||
|
|
||||||
ret = mpa_decode_header(avctx, header);
|
ret = mpa_decode_header(avctx, header);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
s->header_count= -2;
|
s->header_count= -2;
|
||||||
/* no sync found : move by one byte (inefficient, but simple!) */
|
/* no sync found : move by one byte (inefficient, but simple!) */
|
||||||
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
|
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
|
||||||
s->inbuf_ptr--;
|
s->inbuf_ptr--;
|
||||||
dprintf("skip %x\n", header);
|
dprintf("skip %x\n", header);
|
||||||
/* reset free format frame size to give a chance
|
/* reset free format frame size to give a chance
|
||||||
to get a new bitrate */
|
to get a new bitrate */
|
||||||
s->free_format_frame_size = 0;
|
s->free_format_frame_size = 0;
|
||||||
} else {
|
} else {
|
||||||
if((header&SAME_HEADER_MASK) != (s->header&SAME_HEADER_MASK) && s->header)
|
if((header&SAME_HEADER_MASK) != (s->header&SAME_HEADER_MASK) && s->header)
|
||||||
s->header_count= -3;
|
s->header_count= -3;
|
||||||
s->header= header;
|
s->header= header;
|
||||||
@ -657,26 +657,26 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
|
|||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
/* free format: prepare to compute frame size */
|
/* free format: prepare to compute frame size */
|
||||||
if (decode_header(s, header) == 1) {
|
if (decode_header(s, header) == 1) {
|
||||||
s->frame_size = -1;
|
s->frame_size = -1;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
if(s->header_count <= 0)
|
if(s->header_count <= 0)
|
||||||
avctx->sample_rate= sr; //FIXME ugly
|
avctx->sample_rate= sr; //FIXME ugly
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
#if 0
|
#if 0
|
||||||
if (s->frame_size == -1) {
|
if (s->frame_size == -1) {
|
||||||
/* free format : find next sync to compute frame size */
|
/* free format : find next sync to compute frame size */
|
||||||
len = MPA_MAX_CODED_FRAME_SIZE - len;
|
len = MPA_MAX_CODED_FRAME_SIZE - len;
|
||||||
if (len > buf_size)
|
if (len > buf_size)
|
||||||
len = buf_size;
|
len = buf_size;
|
||||||
if (len == 0) {
|
if (len == 0) {
|
||||||
/* frame too long: resync */
|
/* frame too long: resync */
|
||||||
s->frame_size = 0;
|
s->frame_size = 0;
|
||||||
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
|
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
|
||||||
s->inbuf_ptr--;
|
s->inbuf_ptr--;
|
||||||
} else {
|
} else {
|
||||||
uint8_t *p, *pend;
|
uint8_t *p, *pend;
|
||||||
uint32_t header1;
|
uint32_t header1;
|
||||||
@ -720,19 +720,19 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
|
|||||||
s->inbuf_ptr += len;
|
s->inbuf_ptr += len;
|
||||||
buf_size -= len;
|
buf_size -= len;
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
if (len < s->frame_size) {
|
if (len < s->frame_size) {
|
||||||
if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE)
|
if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE)
|
||||||
s->frame_size = MPA_MAX_CODED_FRAME_SIZE;
|
s->frame_size = MPA_MAX_CODED_FRAME_SIZE;
|
||||||
len = s->frame_size - len;
|
len = s->frame_size - len;
|
||||||
if (len > buf_size)
|
if (len > buf_size)
|
||||||
len = buf_size;
|
len = buf_size;
|
||||||
memcpy(s->inbuf_ptr, buf_ptr, len);
|
memcpy(s->inbuf_ptr, buf_ptr, len);
|
||||||
buf_ptr += len;
|
buf_ptr += len;
|
||||||
s->inbuf_ptr += len;
|
s->inbuf_ptr += len;
|
||||||
buf_size -= len;
|
buf_size -= len;
|
||||||
}
|
}
|
||||||
// next_data:
|
// next_data:
|
||||||
if (s->frame_size > 0 &&
|
if (s->frame_size > 0 &&
|
||||||
(s->inbuf_ptr - s->inbuf) >= s->frame_size) {
|
(s->inbuf_ptr - s->inbuf) >= s->frame_size) {
|
||||||
@ -740,10 +740,10 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
|
|||||||
*poutbuf = s->inbuf;
|
*poutbuf = s->inbuf;
|
||||||
*poutbuf_size = s->inbuf_ptr - s->inbuf;
|
*poutbuf_size = s->inbuf_ptr - s->inbuf;
|
||||||
}
|
}
|
||||||
s->inbuf_ptr = s->inbuf;
|
s->inbuf_ptr = s->inbuf;
|
||||||
s->frame_size = 0;
|
s->frame_size = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return buf_ptr - buf;
|
return buf_ptr - buf;
|
||||||
}
|
}
|
||||||
@ -783,7 +783,7 @@ static int ac3_parse(AVCodecParserContext *s1,
|
|||||||
const uint8_t *buf_ptr;
|
const uint8_t *buf_ptr;
|
||||||
int len, sample_rate, bit_rate;
|
int len, sample_rate, bit_rate;
|
||||||
static const int ac3_channels[8] = {
|
static const int ac3_channels[8] = {
|
||||||
2, 1, 2, 3, 3, 4, 4, 5
|
2, 1, 2, 3, 3, 4, 4, 5
|
||||||
};
|
};
|
||||||
|
|
||||||
*poutbuf = NULL;
|
*poutbuf = NULL;
|
||||||
@ -812,7 +812,7 @@ static int ac3_parse(AVCodecParserContext *s1,
|
|||||||
memmove(s->inbuf, s->inbuf + 1, AC3_HEADER_SIZE - 1);
|
memmove(s->inbuf, s->inbuf + 1, AC3_HEADER_SIZE - 1);
|
||||||
s->inbuf_ptr--;
|
s->inbuf_ptr--;
|
||||||
} else {
|
} else {
|
||||||
s->frame_size = len;
|
s->frame_size = len;
|
||||||
/* update codec info */
|
/* update codec info */
|
||||||
avctx->sample_rate = sample_rate;
|
avctx->sample_rate = sample_rate;
|
||||||
/* set channels,except if the user explicitly requests 1 or 2 channels, XXX/FIXME this is a bit ugly */
|
/* set channels,except if the user explicitly requests 1 or 2 channels, XXX/FIXME this is a bit ugly */
|
||||||
@ -821,7 +821,7 @@ static int ac3_parse(AVCodecParserContext *s1,
|
|||||||
if (s->flags & A52_LFE)
|
if (s->flags & A52_LFE)
|
||||||
avctx->channels++;
|
avctx->channels++;
|
||||||
}
|
}
|
||||||
avctx->bit_rate = bit_rate;
|
avctx->bit_rate = bit_rate;
|
||||||
avctx->frame_size = 6 * 256;
|
avctx->frame_size = 6 * 256;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -27,48 +27,48 @@
|
|||||||
|
|
||||||
/* from g711.c by SUN microsystems (unrestricted use) */
|
/* from g711.c by SUN microsystems (unrestricted use) */
|
||||||
|
|
||||||
#define SIGN_BIT (0x80) /* Sign bit for a A-law byte. */
|
#define SIGN_BIT (0x80) /* Sign bit for a A-law byte. */
|
||||||
#define QUANT_MASK (0xf) /* Quantization field mask. */
|
#define QUANT_MASK (0xf) /* Quantization field mask. */
|
||||||
#define NSEGS (8) /* Number of A-law segments. */
|
#define NSEGS (8) /* Number of A-law segments. */
|
||||||
#define SEG_SHIFT (4) /* Left shift for segment number. */
|
#define SEG_SHIFT (4) /* Left shift for segment number. */
|
||||||
#define SEG_MASK (0x70) /* Segment field mask. */
|
#define SEG_MASK (0x70) /* Segment field mask. */
|
||||||
|
|
||||||
#define BIAS (0x84) /* Bias for linear code. */
|
#define BIAS (0x84) /* Bias for linear code. */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* alaw2linear() - Convert an A-law value to 16-bit linear PCM
|
* alaw2linear() - Convert an A-law value to 16-bit linear PCM
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
static int alaw2linear(unsigned char a_val)
|
static int alaw2linear(unsigned char a_val)
|
||||||
{
|
{
|
||||||
int t;
|
int t;
|
||||||
int seg;
|
int seg;
|
||||||
|
|
||||||
a_val ^= 0x55;
|
a_val ^= 0x55;
|
||||||
|
|
||||||
t = a_val & QUANT_MASK;
|
t = a_val & QUANT_MASK;
|
||||||
seg = ((unsigned)a_val & SEG_MASK) >> SEG_SHIFT;
|
seg = ((unsigned)a_val & SEG_MASK) >> SEG_SHIFT;
|
||||||
if(seg) t= (t + t + 1 + 32) << (seg + 2);
|
if(seg) t= (t + t + 1 + 32) << (seg + 2);
|
||||||
else t= (t + t + 1 ) << 3;
|
else t= (t + t + 1 ) << 3;
|
||||||
|
|
||||||
return ((a_val & SIGN_BIT) ? t : -t);
|
return ((a_val & SIGN_BIT) ? t : -t);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ulaw2linear(unsigned char u_val)
|
static int ulaw2linear(unsigned char u_val)
|
||||||
{
|
{
|
||||||
int t;
|
int t;
|
||||||
|
|
||||||
/* Complement to obtain normal u-law value. */
|
/* Complement to obtain normal u-law value. */
|
||||||
u_val = ~u_val;
|
u_val = ~u_val;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Extract and bias the quantization bits. Then
|
* Extract and bias the quantization bits. Then
|
||||||
* shift up by the segment number and subtract out the bias.
|
* shift up by the segment number and subtract out the bias.
|
||||||
*/
|
*/
|
||||||
t = ((u_val & QUANT_MASK) << 3) + BIAS;
|
t = ((u_val & QUANT_MASK) << 3) + BIAS;
|
||||||
t <<= ((unsigned)u_val & SEG_MASK) >> SEG_SHIFT;
|
t <<= ((unsigned)u_val & SEG_MASK) >> SEG_SHIFT;
|
||||||
|
|
||||||
return ((u_val & SIGN_BIT) ? (BIAS - t) : (t - BIAS));
|
return ((u_val & SIGN_BIT) ? (BIAS - t) : (t - BIAS));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 16384 entries per table */
|
/* 16384 entries per table */
|
||||||
@ -209,7 +209,7 @@ static inline void encode_from16(int bps, int le, int us,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int pcm_encode_frame(AVCodecContext *avctx,
|
static int pcm_encode_frame(AVCodecContext *avctx,
|
||||||
unsigned char *frame, int buf_size, void *data)
|
unsigned char *frame, int buf_size, void *data)
|
||||||
{
|
{
|
||||||
int n, sample_size, v;
|
int n, sample_size, v;
|
||||||
short *samples;
|
short *samples;
|
||||||
@ -397,8 +397,8 @@ static inline void decode_to16(int bps, int le, int us,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int pcm_decode_frame(AVCodecContext *avctx,
|
static int pcm_decode_frame(AVCodecContext *avctx,
|
||||||
void *data, int *data_size,
|
void *data, int *data_size,
|
||||||
uint8_t *buf, int buf_size)
|
uint8_t *buf, int buf_size)
|
||||||
{
|
{
|
||||||
PCMDecode *s = avctx->priv_data;
|
PCMDecode *s = avctx->priv_data;
|
||||||
int n;
|
int n;
|
||||||
@ -509,9 +509,9 @@ AVCodec name ## _encoder = { \
|
|||||||
CODEC_TYPE_AUDIO, \
|
CODEC_TYPE_AUDIO, \
|
||||||
id, \
|
id, \
|
||||||
0, \
|
0, \
|
||||||
pcm_encode_init, \
|
pcm_encode_init, \
|
||||||
pcm_encode_frame, \
|
pcm_encode_frame, \
|
||||||
pcm_encode_close, \
|
pcm_encode_close, \
|
||||||
NULL, \
|
NULL, \
|
||||||
}; \
|
}; \
|
||||||
AVCodec name ## _decoder = { \
|
AVCodec name ## _decoder = { \
|
||||||
@ -519,7 +519,7 @@ AVCodec name ## _decoder = { \
|
|||||||
CODEC_TYPE_AUDIO, \
|
CODEC_TYPE_AUDIO, \
|
||||||
id, \
|
id, \
|
||||||
sizeof(PCMDecode), \
|
sizeof(PCMDecode), \
|
||||||
pcm_decode_init, \
|
pcm_decode_init, \
|
||||||
NULL, \
|
NULL, \
|
||||||
NULL, \
|
NULL, \
|
||||||
pcm_decode_frame, \
|
pcm_decode_frame, \
|
||||||
|
@ -67,7 +67,7 @@ int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h
|
|||||||
/*
|
/*
|
||||||
Read unaligned pixels into our vectors. The vectors are as follows:
|
Read unaligned pixels into our vectors. The vectors are as follows:
|
||||||
pix1v: pix1[0]-pix1[15]
|
pix1v: pix1[0]-pix1[15]
|
||||||
pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16]
|
pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16]
|
||||||
*/
|
*/
|
||||||
tv = (vector unsigned char *) pix1;
|
tv = (vector unsigned char *) pix1;
|
||||||
pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
|
pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
|
||||||
@ -184,7 +184,7 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int
|
|||||||
fact to avoid a potentially expensive unaligned read, as well
|
fact to avoid a potentially expensive unaligned read, as well
|
||||||
as some splitting, and vector addition each time around the loop.
|
as some splitting, and vector addition each time around the loop.
|
||||||
Read unaligned pixels into our vectors. The vectors are as follows:
|
Read unaligned pixels into our vectors. The vectors are as follows:
|
||||||
pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16]
|
pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16]
|
||||||
Split the pixel vectors into shorts
|
Split the pixel vectors into shorts
|
||||||
*/
|
*/
|
||||||
tv = (vector unsigned char *) &pix2[0];
|
tv = (vector unsigned char *) &pix2[0];
|
||||||
@ -204,7 +204,7 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int
|
|||||||
/*
|
/*
|
||||||
Read unaligned pixels into our vectors. The vectors are as follows:
|
Read unaligned pixels into our vectors. The vectors are as follows:
|
||||||
pix1v: pix1[0]-pix1[15]
|
pix1v: pix1[0]-pix1[15]
|
||||||
pix3v: pix3[0]-pix3[15] pix3iv: pix3[1]-pix3[16]
|
pix3v: pix3[0]-pix3[15] pix3iv: pix3[1]-pix3[16]
|
||||||
*/
|
*/
|
||||||
tv = (vector unsigned char *) pix1;
|
tv = (vector unsigned char *) pix1;
|
||||||
pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
|
pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
|
||||||
@ -273,7 +273,7 @@ int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|||||||
|
|
||||||
|
|
||||||
for(i=0;i<h;i++) {
|
for(i=0;i<h;i++) {
|
||||||
/* Read potentially unaligned pixels into t1 and t2 */
|
/* Read potentially unaligned pixels into t1 and t2 */
|
||||||
perm1 = vec_lvsl(0, pix1);
|
perm1 = vec_lvsl(0, pix1);
|
||||||
pix1v = (vector unsigned char *) pix1;
|
pix1v = (vector unsigned char *) pix1;
|
||||||
perm2 = vec_lvsl(0, pix2);
|
perm2 = vec_lvsl(0, pix2);
|
||||||
@ -281,12 +281,12 @@ int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|||||||
t1 = vec_perm(pix1v[0], pix1v[1], perm1);
|
t1 = vec_perm(pix1v[0], pix1v[1], perm1);
|
||||||
t2 = vec_perm(pix2v[0], pix2v[1], perm2);
|
t2 = vec_perm(pix2v[0], pix2v[1], perm2);
|
||||||
|
|
||||||
/* Calculate a sum of abs differences vector */
|
/* Calculate a sum of abs differences vector */
|
||||||
t3 = vec_max(t1, t2);
|
t3 = vec_max(t1, t2);
|
||||||
t4 = vec_min(t1, t2);
|
t4 = vec_min(t1, t2);
|
||||||
t5 = vec_sub(t3, t4);
|
t5 = vec_sub(t3, t4);
|
||||||
|
|
||||||
/* Add each 4 pixel group together and put 4 results into sad */
|
/* Add each 4 pixel group together and put 4 results into sad */
|
||||||
sad = vec_sum4s(t5, sad);
|
sad = vec_sum4s(t5, sad);
|
||||||
|
|
||||||
pix1 += line_size;
|
pix1 += line_size;
|
||||||
@ -316,9 +316,9 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|||||||
permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
|
permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
|
||||||
|
|
||||||
for(i=0;i<h;i++) {
|
for(i=0;i<h;i++) {
|
||||||
/* Read potentially unaligned pixels into t1 and t2
|
/* Read potentially unaligned pixels into t1 and t2
|
||||||
Since we're reading 16 pixels, and actually only want 8,
|
Since we're reading 16 pixels, and actually only want 8,
|
||||||
mask out the last 8 pixels. The 0s don't change the sum. */
|
mask out the last 8 pixels. The 0s don't change the sum. */
|
||||||
perm1 = vec_lvsl(0, pix1);
|
perm1 = vec_lvsl(0, pix1);
|
||||||
pix1v = (vector unsigned char *) pix1;
|
pix1v = (vector unsigned char *) pix1;
|
||||||
perm2 = vec_lvsl(0, pix2);
|
perm2 = vec_lvsl(0, pix2);
|
||||||
@ -326,12 +326,12 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|||||||
t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);
|
t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);
|
||||||
t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);
|
t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);
|
||||||
|
|
||||||
/* Calculate a sum of abs differences vector */
|
/* Calculate a sum of abs differences vector */
|
||||||
t3 = vec_max(t1, t2);
|
t3 = vec_max(t1, t2);
|
||||||
t4 = vec_min(t1, t2);
|
t4 = vec_min(t1, t2);
|
||||||
t5 = vec_sub(t3, t4);
|
t5 = vec_sub(t3, t4);
|
||||||
|
|
||||||
/* Add each 4 pixel group together and put 4 results into sad */
|
/* Add each 4 pixel group together and put 4 results into sad */
|
||||||
sad = vec_sum4s(t5, sad);
|
sad = vec_sum4s(t5, sad);
|
||||||
|
|
||||||
pix1 += line_size;
|
pix1 += line_size;
|
||||||
@ -398,9 +398,9 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|||||||
|
|
||||||
|
|
||||||
for(i=0;i<h;i++) {
|
for(i=0;i<h;i++) {
|
||||||
/* Read potentially unaligned pixels into t1 and t2
|
/* Read potentially unaligned pixels into t1 and t2
|
||||||
Since we're reading 16 pixels, and actually only want 8,
|
Since we're reading 16 pixels, and actually only want 8,
|
||||||
mask out the last 8 pixels. The 0s don't change the sum. */
|
mask out the last 8 pixels. The 0s don't change the sum. */
|
||||||
perm1 = vec_lvsl(0, pix1);
|
perm1 = vec_lvsl(0, pix1);
|
||||||
pix1v = (vector unsigned char *) pix1;
|
pix1v = (vector unsigned char *) pix1;
|
||||||
perm2 = vec_lvsl(0, pix2);
|
perm2 = vec_lvsl(0, pix2);
|
||||||
@ -413,7 +413,7 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|||||||
of the fact that abs(a-b)^2 = (a-b)^2.
|
of the fact that abs(a-b)^2 = (a-b)^2.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Calculate abs differences vector */
|
/* Calculate abs differences vector */
|
||||||
t3 = vec_max(t1, t2);
|
t3 = vec_max(t1, t2);
|
||||||
t4 = vec_min(t1, t2);
|
t4 = vec_min(t1, t2);
|
||||||
t5 = vec_sub(t3, t4);
|
t5 = vec_sub(t3, t4);
|
||||||
@ -451,7 +451,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|||||||
sum = (vector unsigned int)vec_splat_u32(0);
|
sum = (vector unsigned int)vec_splat_u32(0);
|
||||||
|
|
||||||
for(i=0;i<h;i++) {
|
for(i=0;i<h;i++) {
|
||||||
/* Read potentially unaligned pixels into t1 and t2 */
|
/* Read potentially unaligned pixels into t1 and t2 */
|
||||||
perm1 = vec_lvsl(0, pix1);
|
perm1 = vec_lvsl(0, pix1);
|
||||||
pix1v = (vector unsigned char *) pix1;
|
pix1v = (vector unsigned char *) pix1;
|
||||||
perm2 = vec_lvsl(0, pix2);
|
perm2 = vec_lvsl(0, pix2);
|
||||||
@ -464,7 +464,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|||||||
of the fact that abs(a-b)^2 = (a-b)^2.
|
of the fact that abs(a-b)^2 = (a-b)^2.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Calculate abs differences vector */
|
/* Calculate abs differences vector */
|
||||||
t3 = vec_max(t1, t2);
|
t3 = vec_max(t1, t2);
|
||||||
t4 = vec_min(t1, t2);
|
t4 = vec_min(t1, t2);
|
||||||
t5 = vec_sub(t3, t4);
|
t5 = vec_sub(t3, t4);
|
||||||
@ -498,12 +498,12 @@ int pix_sum_altivec(uint8_t * pix, int line_size)
|
|||||||
sad = (vector unsigned int)vec_splat_u32(0);
|
sad = (vector unsigned int)vec_splat_u32(0);
|
||||||
|
|
||||||
for (i = 0; i < 16; i++) {
|
for (i = 0; i < 16; i++) {
|
||||||
/* Read the potentially unaligned 16 pixels into t1 */
|
/* Read the potentially unaligned 16 pixels into t1 */
|
||||||
perm = vec_lvsl(0, pix);
|
perm = vec_lvsl(0, pix);
|
||||||
pixv = (vector unsigned char *) pix;
|
pixv = (vector unsigned char *) pix;
|
||||||
t1 = vec_perm(pixv[0], pixv[1], perm);
|
t1 = vec_perm(pixv[0], pixv[1], perm);
|
||||||
|
|
||||||
/* Add each 4 pixel group together and put 4 results into sad */
|
/* Add each 4 pixel group together and put 4 results into sad */
|
||||||
sad = vec_sum4s(t1, sad);
|
sad = vec_sum4s(t1, sad);
|
||||||
|
|
||||||
pix += line_size;
|
pix += line_size;
|
||||||
@ -1335,32 +1335,32 @@ POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
|
|||||||
0x00, 0x01, 0x02, 0x03,
|
0x00, 0x01, 0x02, 0x03,
|
||||||
0x04, 0x05, 0x06, 0x07);
|
0x04, 0x05, 0x06, 0x07);
|
||||||
|
|
||||||
#define ONEITERBUTTERFLY(i, res) \
|
#define ONEITERBUTTERFLY(i, res) \
|
||||||
{ \
|
{ \
|
||||||
register vector unsigned char src1, src2, srcO; \
|
register vector unsigned char src1, src2, srcO; \
|
||||||
register vector unsigned char dst1, dst2, dstO; \
|
register vector unsigned char dst1, dst2, dstO; \
|
||||||
src1 = vec_ld(stride * i, src); \
|
src1 = vec_ld(stride * i, src); \
|
||||||
if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8) \
|
if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8) \
|
||||||
src2 = vec_ld((stride * i) + 16, src); \
|
src2 = vec_ld((stride * i) + 16, src); \
|
||||||
srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
|
srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
|
||||||
dst1 = vec_ld(stride * i, dst); \
|
dst1 = vec_ld(stride * i, dst); \
|
||||||
if ((((stride * i) + (unsigned long)dst) & 0x0000000F) > 8) \
|
if ((((stride * i) + (unsigned long)dst) & 0x0000000F) > 8) \
|
||||||
dst2 = vec_ld((stride * i) + 16, dst); \
|
dst2 = vec_ld((stride * i) + 16, dst); \
|
||||||
dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
|
dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
|
||||||
/* promote the unsigned chars to signed shorts */ \
|
/* promote the unsigned chars to signed shorts */ \
|
||||||
/* we're in the 8x8 function, we only care for the first 8 */ \
|
/* we're in the 8x8 function, we only care for the first 8 */ \
|
||||||
register vector signed short srcV = \
|
register vector signed short srcV = \
|
||||||
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
|
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
|
||||||
register vector signed short dstV = \
|
register vector signed short dstV = \
|
||||||
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
|
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
|
||||||
/* substractions inside the first butterfly */ \
|
/* substractions inside the first butterfly */ \
|
||||||
register vector signed short but0 = vec_sub(srcV, dstV); \
|
register vector signed short but0 = vec_sub(srcV, dstV); \
|
||||||
register vector signed short op1 = vec_perm(but0, but0, perm1); \
|
register vector signed short op1 = vec_perm(but0, but0, perm1); \
|
||||||
register vector signed short but1 = vec_mladd(but0, vprod1, op1); \
|
register vector signed short but1 = vec_mladd(but0, vprod1, op1); \
|
||||||
register vector signed short op2 = vec_perm(but1, but1, perm2); \
|
register vector signed short op2 = vec_perm(but1, but1, perm2); \
|
||||||
register vector signed short but2 = vec_mladd(but1, vprod2, op2); \
|
register vector signed short but2 = vec_mladd(but1, vprod2, op2); \
|
||||||
register vector signed short op3 = vec_perm(but2, but2, perm3); \
|
register vector signed short op3 = vec_perm(but2, but2, perm3); \
|
||||||
res = vec_mladd(but2, vprod3, op3); \
|
res = vec_mladd(but2, vprod3, op3); \
|
||||||
}
|
}
|
||||||
ONEITERBUTTERFLY(0, temp0);
|
ONEITERBUTTERFLY(0, temp0);
|
||||||
ONEITERBUTTERFLY(1, temp1);
|
ONEITERBUTTERFLY(1, temp1);
|
||||||
@ -1480,26 +1480,26 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
|
|||||||
0x00, 0x01, 0x02, 0x03,
|
0x00, 0x01, 0x02, 0x03,
|
||||||
0x04, 0x05, 0x06, 0x07);
|
0x04, 0x05, 0x06, 0x07);
|
||||||
|
|
||||||
#define ONEITERBUTTERFLY(i, res1, res2) \
|
#define ONEITERBUTTERFLY(i, res1, res2) \
|
||||||
{ \
|
{ \
|
||||||
register vector unsigned char src1 asm ("v22"), src2 asm ("v23"); \
|
register vector unsigned char src1 asm ("v22"), src2 asm ("v23"); \
|
||||||
register vector unsigned char dst1 asm ("v24"), dst2 asm ("v25"); \
|
register vector unsigned char dst1 asm ("v24"), dst2 asm ("v25"); \
|
||||||
src1 = vec_ld(stride * i, src); \
|
src1 = vec_ld(stride * i, src); \
|
||||||
src2 = vec_ld((stride * i) + 16, src); \
|
src2 = vec_ld((stride * i) + 16, src); \
|
||||||
register vector unsigned char srcO asm ("v22") = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
|
register vector unsigned char srcO asm ("v22") = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
|
||||||
dst1 = vec_ld(stride * i, dst); \
|
dst1 = vec_ld(stride * i, dst); \
|
||||||
dst2 = vec_ld((stride * i) + 16, dst); \
|
dst2 = vec_ld((stride * i) + 16, dst); \
|
||||||
register vector unsigned char dstO asm ("v23") = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
|
register vector unsigned char dstO asm ("v23") = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
|
||||||
/* promote the unsigned chars to signed shorts */ \
|
/* promote the unsigned chars to signed shorts */ \
|
||||||
register vector signed short srcV asm ("v24") = \
|
register vector signed short srcV asm ("v24") = \
|
||||||
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
|
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
|
||||||
register vector signed short dstV asm ("v25") = \
|
register vector signed short dstV asm ("v25") = \
|
||||||
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
|
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
|
||||||
register vector signed short srcW asm ("v26") = \
|
register vector signed short srcW asm ("v26") = \
|
||||||
(vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \
|
(vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \
|
||||||
register vector signed short dstW asm ("v27") = \
|
register vector signed short dstW asm ("v27") = \
|
||||||
(vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \
|
(vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \
|
||||||
/* substractions inside the first butterfly */ \
|
/* substractions inside the first butterfly */ \
|
||||||
register vector signed short but0 asm ("v28") = vec_sub(srcV, dstV); \
|
register vector signed short but0 asm ("v28") = vec_sub(srcV, dstV); \
|
||||||
register vector signed short but0S asm ("v29") = vec_sub(srcW, dstW); \
|
register vector signed short but0S asm ("v29") = vec_sub(srcW, dstW); \
|
||||||
register vector signed short op1 asm ("v30") = vec_perm(but0, but0, perm1); \
|
register vector signed short op1 asm ("v30") = vec_perm(but0, but0, perm1); \
|
||||||
@ -1511,9 +1511,9 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
|
|||||||
register vector signed short op2S asm ("v27") = vec_perm(but1S, but1S, perm2); \
|
register vector signed short op2S asm ("v27") = vec_perm(but1S, but1S, perm2); \
|
||||||
register vector signed short but2S asm ("v28") = vec_mladd(but1S, vprod2, op2S); \
|
register vector signed short but2S asm ("v28") = vec_mladd(but1S, vprod2, op2S); \
|
||||||
register vector signed short op3 asm ("v29") = vec_perm(but2, but2, perm3); \
|
register vector signed short op3 asm ("v29") = vec_perm(but2, but2, perm3); \
|
||||||
res1 = vec_mladd(but2, vprod3, op3); \
|
res1 = vec_mladd(but2, vprod3, op3); \
|
||||||
register vector signed short op3S asm ("v30") = vec_perm(but2S, but2S, perm3); \
|
register vector signed short op3S asm ("v30") = vec_perm(but2S, but2S, perm3); \
|
||||||
res2 = vec_mladd(but2S, vprod3, op3S); \
|
res2 = vec_mladd(but2S, vprod3, op3S); \
|
||||||
}
|
}
|
||||||
ONEITERBUTTERFLY(0, temp0, temp0S);
|
ONEITERBUTTERFLY(0, temp0, temp0S);
|
||||||
ONEITERBUTTERFLY(1, temp1, temp1S);
|
ONEITERBUTTERFLY(1, temp1, temp1S);
|
||||||
@ -1623,12 +1623,12 @@ POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1);
|
|||||||
int has_altivec(void)
|
int has_altivec(void)
|
||||||
{
|
{
|
||||||
#ifdef __AMIGAOS4__
|
#ifdef __AMIGAOS4__
|
||||||
ULONG result = 0;
|
ULONG result = 0;
|
||||||
extern struct ExecIFace *IExec;
|
extern struct ExecIFace *IExec;
|
||||||
|
|
||||||
IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE);
|
IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE);
|
||||||
if (result == VECTORTYPE_ALTIVEC) return 1;
|
if (result == VECTORTYPE_ALTIVEC) return 1;
|
||||||
return 0;
|
return 0;
|
||||||
#else /* __AMIGAOS4__ */
|
#else /* __AMIGAOS4__ */
|
||||||
|
|
||||||
#ifdef CONFIG_DARWIN
|
#ifdef CONFIG_DARWIN
|
||||||
|
@ -191,33 +191,33 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint
|
|||||||
|
|
||||||
/* from dsputil.c */
|
/* from dsputil.c */
|
||||||
static inline void put_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
|
static inline void put_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < h; i++) {
|
for (i = 0; i < h; i++) {
|
||||||
uint32_t a, b;
|
uint32_t a, b;
|
||||||
a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
|
a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
|
||||||
b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
|
b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
|
||||||
*((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b);
|
*((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b);
|
||||||
a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
|
a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
|
||||||
b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
|
b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
|
||||||
*((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b);
|
*((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b);
|
||||||
}
|
}
|
||||||
} static inline void avg_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
|
} static inline void avg_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < h; i++) {
|
for (i = 0; i < h; i++) {
|
||||||
uint32_t a, b;
|
uint32_t a, b;
|
||||||
a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
|
a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
|
||||||
b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
|
b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
|
||||||
*((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b));
|
*((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b));
|
||||||
a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
|
a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
|
||||||
b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
|
b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
|
||||||
*((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b));
|
*((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b));
|
||||||
}
|
}
|
||||||
} static inline void put_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
|
} static inline void put_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
|
||||||
put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
|
put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
|
||||||
put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
|
put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
|
||||||
} static inline void avg_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
|
} static inline void avg_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
|
||||||
avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
|
avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
|
||||||
avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
|
avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* UNIMPLEMENTED YET !! */
|
/* UNIMPLEMENTED YET !! */
|
||||||
|
@ -87,16 +87,16 @@ void powerpc_display_perf_report(void)
|
|||||||
{
|
{
|
||||||
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
|
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
|
||||||
{
|
{
|
||||||
if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
|
if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
|
||||||
av_log(NULL, AV_LOG_INFO,
|
av_log(NULL, AV_LOG_INFO,
|
||||||
" Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
|
" Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
|
||||||
perfname[i],
|
perfname[i],
|
||||||
j+1,
|
j+1,
|
||||||
perfdata[j][i][powerpc_data_min],
|
perfdata[j][i][powerpc_data_min],
|
||||||
perfdata[j][i][powerpc_data_max],
|
perfdata[j][i][powerpc_data_max],
|
||||||
(double)perfdata[j][i][powerpc_data_sum] /
|
(double)perfdata[j][i][powerpc_data_sum] /
|
||||||
(double)perfdata[j][i][powerpc_data_num],
|
(double)perfdata[j][i][powerpc_data_num],
|
||||||
perfdata[j][i][powerpc_data_num]);
|
perfdata[j][i][powerpc_data_num]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -179,7 +179,7 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
|
for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
|
||||||
asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
|
asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
memset(blocks, 0, sizeof(DCTELEM)*6*64);
|
memset(blocks, 0, sizeof(DCTELEM)*6*64);
|
||||||
@ -284,25 +284,25 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
|
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
|
||||||
c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
|
c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
|
||||||
c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
|
c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
|
||||||
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
|
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
|
||||||
c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
|
c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
|
||||||
c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
|
c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
|
||||||
c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
|
c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
|
||||||
c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
|
c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
|
||||||
|
|
||||||
c->gmc1 = gmc1_altivec;
|
c->gmc1 = gmc1_altivec;
|
||||||
|
|
||||||
#ifdef CONFIG_DARWIN // ATM gcc-3.3 and gcc-3.4 fail to compile these in linux...
|
#ifdef CONFIG_DARWIN // ATM gcc-3.3 and gcc-3.4 fail to compile these in linux...
|
||||||
c->hadamard8_diff[0] = hadamard8_diff16_altivec;
|
c->hadamard8_diff[0] = hadamard8_diff16_altivec;
|
||||||
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
|
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_ENCODERS
|
#ifdef CONFIG_ENCODERS
|
||||||
if (avctx->dct_algo == FF_DCT_AUTO ||
|
if (avctx->dct_algo == FF_DCT_AUTO ||
|
||||||
avctx->dct_algo == FF_DCT_ALTIVEC)
|
avctx->dct_algo == FF_DCT_ALTIVEC)
|
||||||
{
|
{
|
||||||
c->fdct = fdct_altivec;
|
c->fdct = fdct_altivec;
|
||||||
}
|
}
|
||||||
#endif //CONFIG_ENCODERS
|
#endif //CONFIG_ENCODERS
|
||||||
|
|
||||||
if (avctx->lowres==0)
|
if (avctx->lowres==0)
|
||||||
@ -325,14 +325,14 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
|
|||||||
int i, j;
|
int i, j;
|
||||||
for (i = 0 ; i < powerpc_perf_total ; i++)
|
for (i = 0 ; i < powerpc_perf_total ; i++)
|
||||||
{
|
{
|
||||||
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
|
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
|
||||||
{
|
{
|
||||||
perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
|
perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
|
||||||
perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
|
perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
|
||||||
perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
|
perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
|
||||||
perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
|
perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* POWERPC_PERFORMANCE_REPORT */
|
#endif /* POWERPC_PERFORMANCE_REPORT */
|
||||||
} else
|
} else
|
||||||
|
@ -114,10 +114,10 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][
|
|||||||
#define POWERPC_GET_PMC6(a) do {} while (0)
|
#define POWERPC_GET_PMC6(a) do {} while (0)
|
||||||
#endif
|
#endif
|
||||||
#endif /* POWERPC_MODE_64BITS */
|
#endif /* POWERPC_MODE_64BITS */
|
||||||
#define POWERPC_PERF_DECLARE(a, cond) \
|
#define POWERPC_PERF_DECLARE(a, cond) \
|
||||||
POWERP_PMC_DATATYPE \
|
POWERP_PMC_DATATYPE \
|
||||||
pmc_start[POWERPC_NUM_PMC_ENABLED], \
|
pmc_start[POWERPC_NUM_PMC_ENABLED], \
|
||||||
pmc_stop[POWERPC_NUM_PMC_ENABLED], \
|
pmc_stop[POWERPC_NUM_PMC_ENABLED], \
|
||||||
pmc_loop_index;
|
pmc_loop_index;
|
||||||
#define POWERPC_PERF_START_COUNT(a, cond) do { \
|
#define POWERPC_PERF_START_COUNT(a, cond) do { \
|
||||||
POWERPC_GET_PMC6(pmc_start[5]); \
|
POWERPC_GET_PMC6(pmc_start[5]); \
|
||||||
@ -141,8 +141,8 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][
|
|||||||
pmc_loop_index++) \
|
pmc_loop_index++) \
|
||||||
{ \
|
{ \
|
||||||
if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \
|
if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \
|
||||||
{ \
|
{ \
|
||||||
POWERP_PMC_DATATYPE diff = \
|
POWERP_PMC_DATATYPE diff = \
|
||||||
pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \
|
pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \
|
||||||
if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
|
if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
|
||||||
perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \
|
perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \
|
||||||
|
@ -65,8 +65,8 @@ void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z)
|
|||||||
POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6);
|
POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6);
|
||||||
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
|
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
|
||||||
int ln = s->nbits;
|
int ln = s->nbits;
|
||||||
int j, np, np2;
|
int j, np, np2;
|
||||||
int nblocks, nloops;
|
int nblocks, nloops;
|
||||||
register FFTComplex *p, *q;
|
register FFTComplex *p, *q;
|
||||||
FFTComplex *exptab = s->exptab;
|
FFTComplex *exptab = s->exptab;
|
||||||
int l;
|
int l;
|
||||||
@ -147,8 +147,8 @@ POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
int ln = s->nbits;
|
int ln = s->nbits;
|
||||||
int j, np, np2;
|
int j, np, np2;
|
||||||
int nblocks, nloops;
|
int nblocks, nloops;
|
||||||
register FFTComplex *p, *q;
|
register FFTComplex *p, *q;
|
||||||
FFTComplex *cptr, *cptr1;
|
FFTComplex *cptr, *cptr1;
|
||||||
int k;
|
int k;
|
||||||
|
@ -30,31 +30,31 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
static inline vector signed char ff_vmrglb (vector signed char const A,
|
static inline vector signed char ff_vmrglb (vector signed char const A,
|
||||||
vector signed char const B)
|
vector signed char const B)
|
||||||
{
|
{
|
||||||
static const vector unsigned char lowbyte = {
|
static const vector unsigned char lowbyte = {
|
||||||
0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b, 0x1b,
|
0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b, 0x1b,
|
||||||
0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f
|
0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f
|
||||||
};
|
};
|
||||||
return vec_perm (A, B, lowbyte);
|
return vec_perm (A, B, lowbyte);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline vector signed short ff_vmrglh (vector signed short const A,
|
static inline vector signed short ff_vmrglh (vector signed short const A,
|
||||||
vector signed short const B)
|
vector signed short const B)
|
||||||
{
|
{
|
||||||
static const vector unsigned char lowhalf = {
|
static const vector unsigned char lowhalf = {
|
||||||
0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b,
|
0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b,
|
||||||
0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f
|
0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f
|
||||||
};
|
};
|
||||||
return vec_perm (A, B, lowhalf);
|
return vec_perm (A, B, lowhalf);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline vector signed int ff_vmrglw (vector signed int const A,
|
static inline vector signed int ff_vmrglw (vector signed int const A,
|
||||||
vector signed int const B)
|
vector signed int const B)
|
||||||
{
|
{
|
||||||
static const vector unsigned char lowword = {
|
static const vector unsigned char lowword = {
|
||||||
0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b,
|
0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b,
|
||||||
0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f
|
0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f
|
||||||
};
|
};
|
||||||
return vec_perm (A, B, lowword);
|
return vec_perm (A, B, lowword);
|
||||||
}
|
}
|
||||||
|
@ -51,108 +51,108 @@
|
|||||||
#define vector_s32_t vector signed int
|
#define vector_s32_t vector signed int
|
||||||
#define vector_u32_t vector unsigned int
|
#define vector_u32_t vector unsigned int
|
||||||
|
|
||||||
#define IDCT_HALF \
|
#define IDCT_HALF \
|
||||||
/* 1st stage */ \
|
/* 1st stage */ \
|
||||||
t1 = vec_mradds (a1, vx7, vx1 ); \
|
t1 = vec_mradds (a1, vx7, vx1 ); \
|
||||||
t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \
|
t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \
|
||||||
t7 = vec_mradds (a2, vx5, vx3); \
|
t7 = vec_mradds (a2, vx5, vx3); \
|
||||||
t3 = vec_mradds (ma2, vx3, vx5); \
|
t3 = vec_mradds (ma2, vx3, vx5); \
|
||||||
\
|
\
|
||||||
/* 2nd stage */ \
|
/* 2nd stage */ \
|
||||||
t5 = vec_adds (vx0, vx4); \
|
t5 = vec_adds (vx0, vx4); \
|
||||||
t0 = vec_subs (vx0, vx4); \
|
t0 = vec_subs (vx0, vx4); \
|
||||||
t2 = vec_mradds (a0, vx6, vx2); \
|
t2 = vec_mradds (a0, vx6, vx2); \
|
||||||
t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \
|
t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \
|
||||||
t6 = vec_adds (t8, t3); \
|
t6 = vec_adds (t8, t3); \
|
||||||
t3 = vec_subs (t8, t3); \
|
t3 = vec_subs (t8, t3); \
|
||||||
t8 = vec_subs (t1, t7); \
|
t8 = vec_subs (t1, t7); \
|
||||||
t1 = vec_adds (t1, t7); \
|
t1 = vec_adds (t1, t7); \
|
||||||
\
|
\
|
||||||
/* 3rd stage */ \
|
/* 3rd stage */ \
|
||||||
t7 = vec_adds (t5, t2); \
|
t7 = vec_adds (t5, t2); \
|
||||||
t2 = vec_subs (t5, t2); \
|
t2 = vec_subs (t5, t2); \
|
||||||
t5 = vec_adds (t0, t4); \
|
t5 = vec_adds (t0, t4); \
|
||||||
t0 = vec_subs (t0, t4); \
|
t0 = vec_subs (t0, t4); \
|
||||||
t4 = vec_subs (t8, t3); \
|
t4 = vec_subs (t8, t3); \
|
||||||
t3 = vec_adds (t8, t3); \
|
t3 = vec_adds (t8, t3); \
|
||||||
\
|
\
|
||||||
/* 4th stage */ \
|
/* 4th stage */ \
|
||||||
vy0 = vec_adds (t7, t1); \
|
vy0 = vec_adds (t7, t1); \
|
||||||
vy7 = vec_subs (t7, t1); \
|
vy7 = vec_subs (t7, t1); \
|
||||||
vy1 = vec_mradds (c4, t3, t5); \
|
vy1 = vec_mradds (c4, t3, t5); \
|
||||||
vy6 = vec_mradds (mc4, t3, t5); \
|
vy6 = vec_mradds (mc4, t3, t5); \
|
||||||
vy2 = vec_mradds (c4, t4, t0); \
|
vy2 = vec_mradds (c4, t4, t0); \
|
||||||
vy5 = vec_mradds (mc4, t4, t0); \
|
vy5 = vec_mradds (mc4, t4, t0); \
|
||||||
vy3 = vec_adds (t2, t6); \
|
vy3 = vec_adds (t2, t6); \
|
||||||
vy4 = vec_subs (t2, t6);
|
vy4 = vec_subs (t2, t6);
|
||||||
|
|
||||||
|
|
||||||
#define IDCT \
|
#define IDCT \
|
||||||
vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \
|
vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \
|
||||||
vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \
|
vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \
|
||||||
vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \
|
vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \
|
||||||
vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \
|
vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \
|
||||||
vector_u16_t shift; \
|
vector_u16_t shift; \
|
||||||
\
|
\
|
||||||
c4 = vec_splat (constants[0], 0); \
|
c4 = vec_splat (constants[0], 0); \
|
||||||
a0 = vec_splat (constants[0], 1); \
|
a0 = vec_splat (constants[0], 1); \
|
||||||
a1 = vec_splat (constants[0], 2); \
|
a1 = vec_splat (constants[0], 2); \
|
||||||
a2 = vec_splat (constants[0], 3); \
|
a2 = vec_splat (constants[0], 3); \
|
||||||
mc4 = vec_splat (constants[0], 4); \
|
mc4 = vec_splat (constants[0], 4); \
|
||||||
ma2 = vec_splat (constants[0], 5); \
|
ma2 = vec_splat (constants[0], 5); \
|
||||||
bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3); \
|
bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3); \
|
||||||
\
|
\
|
||||||
zero = vec_splat_s16 (0); \
|
zero = vec_splat_s16 (0); \
|
||||||
shift = vec_splat_u16 (4); \
|
shift = vec_splat_u16 (4); \
|
||||||
\
|
\
|
||||||
vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \
|
vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \
|
||||||
vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero); \
|
vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero); \
|
||||||
vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero); \
|
vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero); \
|
||||||
vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero); \
|
vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero); \
|
||||||
vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero); \
|
vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero); \
|
||||||
vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero); \
|
vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero); \
|
||||||
vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero); \
|
vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero); \
|
||||||
vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero); \
|
vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero); \
|
||||||
\
|
\
|
||||||
IDCT_HALF \
|
IDCT_HALF \
|
||||||
\
|
\
|
||||||
vx0 = vec_mergeh (vy0, vy4); \
|
vx0 = vec_mergeh (vy0, vy4); \
|
||||||
vx1 = vec_mergel (vy0, vy4); \
|
vx1 = vec_mergel (vy0, vy4); \
|
||||||
vx2 = vec_mergeh (vy1, vy5); \
|
vx2 = vec_mergeh (vy1, vy5); \
|
||||||
vx3 = vec_mergel (vy1, vy5); \
|
vx3 = vec_mergel (vy1, vy5); \
|
||||||
vx4 = vec_mergeh (vy2, vy6); \
|
vx4 = vec_mergeh (vy2, vy6); \
|
||||||
vx5 = vec_mergel (vy2, vy6); \
|
vx5 = vec_mergel (vy2, vy6); \
|
||||||
vx6 = vec_mergeh (vy3, vy7); \
|
vx6 = vec_mergeh (vy3, vy7); \
|
||||||
vx7 = vec_mergel (vy3, vy7); \
|
vx7 = vec_mergel (vy3, vy7); \
|
||||||
\
|
\
|
||||||
vy0 = vec_mergeh (vx0, vx4); \
|
vy0 = vec_mergeh (vx0, vx4); \
|
||||||
vy1 = vec_mergel (vx0, vx4); \
|
vy1 = vec_mergel (vx0, vx4); \
|
||||||
vy2 = vec_mergeh (vx1, vx5); \
|
vy2 = vec_mergeh (vx1, vx5); \
|
||||||
vy3 = vec_mergel (vx1, vx5); \
|
vy3 = vec_mergel (vx1, vx5); \
|
||||||
vy4 = vec_mergeh (vx2, vx6); \
|
vy4 = vec_mergeh (vx2, vx6); \
|
||||||
vy5 = vec_mergel (vx2, vx6); \
|
vy5 = vec_mergel (vx2, vx6); \
|
||||||
vy6 = vec_mergeh (vx3, vx7); \
|
vy6 = vec_mergeh (vx3, vx7); \
|
||||||
vy7 = vec_mergel (vx3, vx7); \
|
vy7 = vec_mergel (vx3, vx7); \
|
||||||
\
|
\
|
||||||
vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \
|
vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \
|
||||||
vx1 = vec_mergel (vy0, vy4); \
|
vx1 = vec_mergel (vy0, vy4); \
|
||||||
vx2 = vec_mergeh (vy1, vy5); \
|
vx2 = vec_mergeh (vy1, vy5); \
|
||||||
vx3 = vec_mergel (vy1, vy5); \
|
vx3 = vec_mergel (vy1, vy5); \
|
||||||
vx4 = vec_mergeh (vy2, vy6); \
|
vx4 = vec_mergeh (vy2, vy6); \
|
||||||
vx5 = vec_mergel (vy2, vy6); \
|
vx5 = vec_mergel (vy2, vy6); \
|
||||||
vx6 = vec_mergeh (vy3, vy7); \
|
vx6 = vec_mergeh (vy3, vy7); \
|
||||||
vx7 = vec_mergel (vy3, vy7); \
|
vx7 = vec_mergel (vy3, vy7); \
|
||||||
\
|
\
|
||||||
IDCT_HALF \
|
IDCT_HALF \
|
||||||
\
|
\
|
||||||
shift = vec_splat_u16 (6); \
|
shift = vec_splat_u16 (6); \
|
||||||
vx0 = vec_sra (vy0, shift); \
|
vx0 = vec_sra (vy0, shift); \
|
||||||
vx1 = vec_sra (vy1, shift); \
|
vx1 = vec_sra (vy1, shift); \
|
||||||
vx2 = vec_sra (vy2, shift); \
|
vx2 = vec_sra (vy2, shift); \
|
||||||
vx3 = vec_sra (vy3, shift); \
|
vx3 = vec_sra (vy3, shift); \
|
||||||
vx4 = vec_sra (vy4, shift); \
|
vx4 = vec_sra (vy4, shift); \
|
||||||
vx5 = vec_sra (vy5, shift); \
|
vx5 = vec_sra (vy5, shift); \
|
||||||
vx6 = vec_sra (vy6, shift); \
|
vx6 = vec_sra (vy6, shift); \
|
||||||
vx7 = vec_sra (vy7, shift);
|
vx7 = vec_sra (vy7, shift);
|
||||||
|
|
||||||
|
|
||||||
@ -180,18 +180,18 @@ POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
|
|||||||
#endif
|
#endif
|
||||||
IDCT
|
IDCT
|
||||||
|
|
||||||
#define COPY(dest,src) \
|
#define COPY(dest,src) \
|
||||||
tmp = vec_packsu (src, src); \
|
tmp = vec_packsu (src, src); \
|
||||||
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \
|
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \
|
||||||
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
|
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
|
||||||
|
|
||||||
COPY (dest, vx0) dest += stride;
|
COPY (dest, vx0) dest += stride;
|
||||||
COPY (dest, vx1) dest += stride;
|
COPY (dest, vx1) dest += stride;
|
||||||
COPY (dest, vx2) dest += stride;
|
COPY (dest, vx2) dest += stride;
|
||||||
COPY (dest, vx3) dest += stride;
|
COPY (dest, vx3) dest += stride;
|
||||||
COPY (dest, vx4) dest += stride;
|
COPY (dest, vx4) dest += stride;
|
||||||
COPY (dest, vx5) dest += stride;
|
COPY (dest, vx5) dest += stride;
|
||||||
COPY (dest, vx6) dest += stride;
|
COPY (dest, vx6) dest += stride;
|
||||||
COPY (dest, vx7)
|
COPY (dest, vx7)
|
||||||
|
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
|
POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
|
||||||
@ -225,22 +225,22 @@ POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
|
|||||||
perm0 = vec_mergeh (p, p0);
|
perm0 = vec_mergeh (p, p0);
|
||||||
perm1 = vec_mergeh (p, p1);
|
perm1 = vec_mergeh (p, p1);
|
||||||
|
|
||||||
#define ADD(dest,src,perm) \
|
#define ADD(dest,src,perm) \
|
||||||
/* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \
|
/* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \
|
||||||
tmp = vec_ld (0, dest); \
|
tmp = vec_ld (0, dest); \
|
||||||
tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \
|
tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \
|
||||||
tmp3 = vec_adds (tmp2, src); \
|
tmp3 = vec_adds (tmp2, src); \
|
||||||
tmp = vec_packsu (tmp3, tmp3); \
|
tmp = vec_packsu (tmp3, tmp3); \
|
||||||
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \
|
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \
|
||||||
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
|
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
|
||||||
|
|
||||||
ADD (dest, vx0, perm0) dest += stride;
|
ADD (dest, vx0, perm0) dest += stride;
|
||||||
ADD (dest, vx1, perm1) dest += stride;
|
ADD (dest, vx1, perm1) dest += stride;
|
||||||
ADD (dest, vx2, perm0) dest += stride;
|
ADD (dest, vx2, perm0) dest += stride;
|
||||||
ADD (dest, vx3, perm1) dest += stride;
|
ADD (dest, vx3, perm1) dest += stride;
|
||||||
ADD (dest, vx4, perm0) dest += stride;
|
ADD (dest, vx4, perm0) dest += stride;
|
||||||
ADD (dest, vx5, perm1) dest += stride;
|
ADD (dest, vx5, perm1) dest += stride;
|
||||||
ADD (dest, vx6, perm0) dest += stride;
|
ADD (dest, vx6, perm0) dest += stride;
|
||||||
ADD (dest, vx7, perm1)
|
ADD (dest, vx7, perm1)
|
||||||
|
|
||||||
POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);
|
POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);
|
||||||
|
@ -152,9 +152,9 @@ int dct_quantize_altivec(MpegEncContext* s,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// The following block could exist as a separate an altivec dct
|
// The following block could exist as a separate an altivec dct
|
||||||
// function. However, if we put it inline, the DCT data can remain
|
// function. However, if we put it inline, the DCT data can remain
|
||||||
// in the vector local variables, as floats, which we'll use during the
|
// in the vector local variables, as floats, which we'll use during the
|
||||||
// quantize step...
|
// quantize step...
|
||||||
{
|
{
|
||||||
const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f);
|
const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f);
|
||||||
const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f);
|
const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f);
|
||||||
@ -206,11 +206,11 @@ int dct_quantize_altivec(MpegEncContext* s,
|
|||||||
z1 = vec_madd(vec_add(tmp12, tmp13), vec_0_541196100, (vector float)zero);
|
z1 = vec_madd(vec_add(tmp12, tmp13), vec_0_541196100, (vector float)zero);
|
||||||
|
|
||||||
// dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
// dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
||||||
// CONST_BITS-PASS1_BITS);
|
// CONST_BITS-PASS1_BITS);
|
||||||
row2 = vec_madd(tmp13, vec_0_765366865, z1);
|
row2 = vec_madd(tmp13, vec_0_765366865, z1);
|
||||||
|
|
||||||
// dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
// dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
||||||
// CONST_BITS-PASS1_BITS);
|
// CONST_BITS-PASS1_BITS);
|
||||||
row6 = vec_madd(tmp12, vec_1_847759065, z1);
|
row6 = vec_madd(tmp12, vec_1_847759065, z1);
|
||||||
|
|
||||||
z1 = vec_add(tmp4, tmp7); // z1 = tmp4 + tmp7;
|
z1 = vec_add(tmp4, tmp7); // z1 = tmp4 + tmp7;
|
||||||
@ -315,7 +315,7 @@ int dct_quantize_altivec(MpegEncContext* s,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Load the bias vector (We add 0.5 to the bias so that we're
|
// Load the bias vector (We add 0.5 to the bias so that we're
|
||||||
// rounding when we convert to int, instead of flooring.)
|
// rounding when we convert to int, instead of flooring.)
|
||||||
{
|
{
|
||||||
vector signed int biasInt;
|
vector signed int biasInt;
|
||||||
const vector float negOneFloat = (vector float)FOUROF(-1.0f);
|
const vector float negOneFloat = (vector float)FOUROF(-1.0f);
|
||||||
|
@ -80,7 +80,7 @@ static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size)
|
|||||||
"pextlb $10, $0, $10 \n\t"
|
"pextlb $10, $0, $10 \n\t"
|
||||||
"sq $10, 80(%1) \n\t"
|
"sq $10, 80(%1) \n\t"
|
||||||
"pextlb $8, $0, $8 \n\t"
|
"pextlb $8, $0, $8 \n\t"
|
||||||
"sq $8, 96(%1) \n\t"
|
"sq $8, 96(%1) \n\t"
|
||||||
"pextlb $9, $0, $9 \n\t"
|
"pextlb $9, $0, $9 \n\t"
|
||||||
"sq $9, 112(%1) \n\t"
|
"sq $9, 112(%1) \n\t"
|
||||||
".set pop \n\t"
|
".set pop \n\t"
|
||||||
@ -112,7 +112,7 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz
|
|||||||
asm volatile (
|
asm volatile (
|
||||||
".set push \n\t"
|
".set push \n\t"
|
||||||
".set mips3 \n\t"
|
".set mips3 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"ldr $8, 0(%1) \n\t"
|
"ldr $8, 0(%1) \n\t"
|
||||||
"add $11, %1, %3 \n\t"
|
"add $11, %1, %3 \n\t"
|
||||||
"ldl $8, 7(%1) \n\t"
|
"ldl $8, 7(%1) \n\t"
|
||||||
@ -133,7 +133,7 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz
|
|||||||
"bgtz %2, 1b \n\t"
|
"bgtz %2, 1b \n\t"
|
||||||
".set pop \n\t"
|
".set pop \n\t"
|
||||||
: "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size)
|
: "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size)
|
||||||
: "$8", "$9", "$10", "$11", "$12", "$13", "memory" );
|
: "$8", "$9", "$10", "$11", "$12", "$13", "memory" );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -15,32 +15,32 @@
|
|||||||
#include "../dsputil.h"
|
#include "../dsputil.h"
|
||||||
#include "mmi.h"
|
#include "mmi.h"
|
||||||
|
|
||||||
#define BITS_INV_ACC 5 // 4 or 5 for IEEE
|
#define BITS_INV_ACC 5 // 4 or 5 for IEEE
|
||||||
#define SHIFT_INV_ROW (16 - BITS_INV_ACC)
|
#define SHIFT_INV_ROW (16 - BITS_INV_ACC)
|
||||||
#define SHIFT_INV_COL (1 + BITS_INV_ACC)
|
#define SHIFT_INV_COL (1 + BITS_INV_ACC)
|
||||||
|
|
||||||
#define TG1 6518
|
#define TG1 6518
|
||||||
#define TG2 13573
|
#define TG2 13573
|
||||||
#define TG3 21895
|
#define TG3 21895
|
||||||
#define CS4 23170
|
#define CS4 23170
|
||||||
|
|
||||||
#define ROUNDER_0 0
|
#define ROUNDER_0 0
|
||||||
#define ROUNDER_1 16
|
#define ROUNDER_1 16
|
||||||
|
|
||||||
#define TAB_i_04 (32+0)
|
#define TAB_i_04 (32+0)
|
||||||
#define TAB_i_17 (32+64)
|
#define TAB_i_17 (32+64)
|
||||||
#define TAB_i_26 (32+128)
|
#define TAB_i_26 (32+128)
|
||||||
#define TAB_i_35 (32+192)
|
#define TAB_i_35 (32+192)
|
||||||
|
|
||||||
#define TG_1_16 (32+256+0)
|
#define TG_1_16 (32+256+0)
|
||||||
#define TG_2_16 (32+256+16)
|
#define TG_2_16 (32+256+16)
|
||||||
#define TG_3_16 (32+256+32)
|
#define TG_3_16 (32+256+32)
|
||||||
#define COS_4_16 (32+256+48)
|
#define COS_4_16 (32+256+48)
|
||||||
|
|
||||||
#define CLIPMAX (32+256+64+0)
|
#define CLIPMAX (32+256+64+0)
|
||||||
|
|
||||||
static short consttable[] align16 = {
|
static short consttable[] align16 = {
|
||||||
/* rounder 0*/ // assume SHIFT_INV_ROW == 11
|
/* rounder 0*/ // assume SHIFT_INV_ROW == 11
|
||||||
0x3ff, 1, 0x3ff, 1, 0x3ff, 1, 0x3ff, 1,
|
0x3ff, 1, 0x3ff, 1, 0x3ff, 1, 0x3ff, 1,
|
||||||
/* rounder 1*/
|
/* rounder 1*/
|
||||||
0x3ff, 0, 0x3ff, 0, 0x3ff, 0, 0x3ff, 0,
|
0x3ff, 0, 0x3ff, 0, 0x3ff, 0, 0x3ff, 0,
|
||||||
@ -75,274 +75,274 @@ static short consttable[] align16 = {
|
|||||||
|
|
||||||
|
|
||||||
#define DCT_8_INV_ROW1(blk, rowoff, taboff, rnd, outreg) { \
|
#define DCT_8_INV_ROW1(blk, rowoff, taboff, rnd, outreg) { \
|
||||||
lq(blk, rowoff, $16); /* r16 = x7 x5 x3 x1 x6 x4 x2 x0 */ \
|
lq(blk, rowoff, $16); /* r16 = x7 x5 x3 x1 x6 x4 x2 x0 */ \
|
||||||
/*slot*/ \
|
/*slot*/ \
|
||||||
lq($24, 0+taboff, $17); /* r17 = w */ \
|
lq($24, 0+taboff, $17); /* r17 = w */ \
|
||||||
/*delay slot $16*/ \
|
/*delay slot $16*/ \
|
||||||
lq($24, 16+taboff, $18);/* r18 = w */ \
|
lq($24, 16+taboff, $18);/* r18 = w */ \
|
||||||
prevh($16, $2); /* r2 = x1 x3 x5 x7 x0 x2 x4 x6 */ \
|
prevh($16, $2); /* r2 = x1 x3 x5 x7 x0 x2 x4 x6 */ \
|
||||||
lq($24, 32+taboff, $19);/* r19 = w */ \
|
lq($24, 32+taboff, $19);/* r19 = w */ \
|
||||||
phmadh($17, $16, $17); /* r17 = b1"b0'a1"a0' */ \
|
phmadh($17, $16, $17); /* r17 = b1"b0'a1"a0' */ \
|
||||||
lq($24, 48+taboff, $20);/* r20 = w */ \
|
lq($24, 48+taboff, $20);/* r20 = w */ \
|
||||||
phmadh($18, $2, $18); /* r18 = b1'b0"a1'a0" */ \
|
phmadh($18, $2, $18); /* r18 = b1'b0"a1'a0" */ \
|
||||||
phmadh($19, $16, $19); /* r19 = b3"b2'a3"a2' */ \
|
phmadh($19, $16, $19); /* r19 = b3"b2'a3"a2' */ \
|
||||||
phmadh($20, $2, $20); /* r20 = b3'b2"a3'a2" */ \
|
phmadh($20, $2, $20); /* r20 = b3'b2"a3'a2" */ \
|
||||||
paddw($17, $18, $17); /* r17 = (b1)(b0)(a1)(a0) */ \
|
paddw($17, $18, $17); /* r17 = (b1)(b0)(a1)(a0) */ \
|
||||||
paddw($19, $20, $19); /* r19 = (b3)(b2)(a3)(a2) */ \
|
paddw($19, $20, $19); /* r19 = (b3)(b2)(a3)(a2) */ \
|
||||||
pcpyld($19, $17, $18); /* r18 = (a3)(a2)(a1)(a0) */ \
|
pcpyld($19, $17, $18); /* r18 = (a3)(a2)(a1)(a0) */ \
|
||||||
pcpyud($17, $19, $20); /* r20 = (b3)(b2)(b1)(b0) */ \
|
pcpyud($17, $19, $20); /* r20 = (b3)(b2)(b1)(b0) */ \
|
||||||
paddw($18, rnd, $18); /* r18 = (a3)(a2)(a1)(a0) */\
|
paddw($18, rnd, $18); /* r18 = (a3)(a2)(a1)(a0) */\
|
||||||
paddw($18, $20, $17); /* r17 = ()()()(a0+b0) */ \
|
paddw($18, $20, $17); /* r17 = ()()()(a0+b0) */ \
|
||||||
psubw($18, $20, $20); /* r20 = ()()()(a0-b0) */ \
|
psubw($18, $20, $20); /* r20 = ()()()(a0-b0) */ \
|
||||||
psraw($17, SHIFT_INV_ROW, $17); /* r17 = (y3 y2 y1 y0) */ \
|
psraw($17, SHIFT_INV_ROW, $17); /* r17 = (y3 y2 y1 y0) */ \
|
||||||
psraw($20, SHIFT_INV_ROW, $20); /* r20 = (y4 y5 y6 y7) */ \
|
psraw($20, SHIFT_INV_ROW, $20); /* r20 = (y4 y5 y6 y7) */ \
|
||||||
ppach($20, $17, outreg);/* out = y4 y5 y6 y7 y3 y2 y1 y0 Note order */ \
|
ppach($20, $17, outreg);/* out = y4 y5 y6 y7 y3 y2 y1 y0 Note order */ \
|
||||||
\
|
\
|
||||||
prevh(outreg, $2); \
|
prevh(outreg, $2); \
|
||||||
pcpyud($2, $2, $2); \
|
pcpyud($2, $2, $2); \
|
||||||
pcpyld($2, outreg, outreg); \
|
pcpyld($2, outreg, outreg); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#define DCT_8_INV_COL8() \
|
#define DCT_8_INV_COL8() \
|
||||||
\
|
\
|
||||||
lq($24, TG_3_16, $2); /* r2 = tn3 */ \
|
lq($24, TG_3_16, $2); /* r2 = tn3 */ \
|
||||||
\
|
\
|
||||||
pmulth($11, $2, $17); /* r17 = x3 * tn3 (6420) */ \
|
pmulth($11, $2, $17); /* r17 = x3 * tn3 (6420) */ \
|
||||||
psraw($17, 15, $17); \
|
psraw($17, 15, $17); \
|
||||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||||
psraw($3, 15, $3); \
|
psraw($3, 15, $3); \
|
||||||
pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \
|
pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \
|
||||||
psubh($17, $13, $17); /* r17 = tm35 */ \
|
psubh($17, $13, $17); /* r17 = tm35 */ \
|
||||||
\
|
\
|
||||||
pmulth($13, $2, $18); /* r18 = x5 * tn3 (6420) */ \
|
pmulth($13, $2, $18); /* r18 = x5 * tn3 (6420) */ \
|
||||||
psraw($18, 15, $18); \
|
psraw($18, 15, $18); \
|
||||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||||
psraw($3, 15, $3); \
|
psraw($3, 15, $3); \
|
||||||
pinteh($3, $18, $18); /* r18 = x5 * tn3 */ \
|
pinteh($3, $18, $18); /* r18 = x5 * tn3 */ \
|
||||||
paddh($18, $11, $18); /* r18 = tp35 */ \
|
paddh($18, $11, $18); /* r18 = tp35 */ \
|
||||||
\
|
\
|
||||||
lq($24, TG_1_16, $2); /* r2 = tn1 */ \
|
lq($24, TG_1_16, $2); /* r2 = tn1 */ \
|
||||||
\
|
\
|
||||||
pmulth($15, $2, $19); /* r19 = x7 * tn1 (6420) */ \
|
pmulth($15, $2, $19); /* r19 = x7 * tn1 (6420) */ \
|
||||||
psraw($19, 15, $19); \
|
psraw($19, 15, $19); \
|
||||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||||
psraw($3, 15, $3); \
|
psraw($3, 15, $3); \
|
||||||
pinteh($3, $19, $19); /* r19 = x7 * tn1 */ \
|
pinteh($3, $19, $19); /* r19 = x7 * tn1 */ \
|
||||||
paddh($19, $9, $19); /* r19 = tp17 */ \
|
paddh($19, $9, $19); /* r19 = tp17 */ \
|
||||||
\
|
\
|
||||||
pmulth($9, $2, $20); /* r20 = x1 * tn1 (6420) */ \
|
pmulth($9, $2, $20); /* r20 = x1 * tn1 (6420) */ \
|
||||||
psraw($20, 15, $20); \
|
psraw($20, 15, $20); \
|
||||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||||
psraw($3, 15, $3); \
|
psraw($3, 15, $3); \
|
||||||
pinteh($3, $20, $20); /* r20 = x1 * tn1 */ \
|
pinteh($3, $20, $20); /* r20 = x1 * tn1 */ \
|
||||||
psubh($20, $15, $20); /* r20 = tm17 */ \
|
psubh($20, $15, $20); /* r20 = tm17 */ \
|
||||||
\
|
\
|
||||||
psubh($19, $18, $3); /* r3 = t1 */ \
|
psubh($19, $18, $3); /* r3 = t1 */ \
|
||||||
paddh($20, $17, $16); /* r16 = t2 */ \
|
paddh($20, $17, $16); /* r16 = t2 */ \
|
||||||
psubh($20, $17, $23); /* r23 = b3 */ \
|
psubh($20, $17, $23); /* r23 = b3 */ \
|
||||||
paddh($19, $18, $20); /* r20 = b0 */ \
|
paddh($19, $18, $20); /* r20 = b0 */ \
|
||||||
\
|
\
|
||||||
lq($24, COS_4_16, $2); /* r2 = cs4 */ \
|
lq($24, COS_4_16, $2); /* r2 = cs4 */ \
|
||||||
\
|
\
|
||||||
paddh($3, $16, $21); /* r21 = t1+t2 */ \
|
paddh($3, $16, $21); /* r21 = t1+t2 */ \
|
||||||
psubh($3, $16, $22); /* r22 = t1-t2 */ \
|
psubh($3, $16, $22); /* r22 = t1-t2 */ \
|
||||||
\
|
\
|
||||||
pmulth($21, $2, $21); /* r21 = cs4 * (t1+t2) 6420 */ \
|
pmulth($21, $2, $21); /* r21 = cs4 * (t1+t2) 6420 */ \
|
||||||
psraw($21, 15, $21); \
|
psraw($21, 15, $21); \
|
||||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||||
psraw($3, 15, $3); \
|
psraw($3, 15, $3); \
|
||||||
pinteh($3, $21, $21); /* r21 = b1 */ \
|
pinteh($3, $21, $21); /* r21 = b1 */ \
|
||||||
\
|
\
|
||||||
pmulth($22, $2, $22); /* r22 = cs4 * (t1-t2) 6420 */ \
|
pmulth($22, $2, $22); /* r22 = cs4 * (t1-t2) 6420 */ \
|
||||||
psraw($22, 15, $22); \
|
psraw($22, 15, $22); \
|
||||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||||
psraw($3, 15, $3); \
|
psraw($3, 15, $3); \
|
||||||
pinteh($3, $22, $22); /* r22 = b2 */ \
|
pinteh($3, $22, $22); /* r22 = b2 */ \
|
||||||
\
|
\
|
||||||
lq($24, TG_2_16, $2); /* r2 = tn2 */ \
|
lq($24, TG_2_16, $2); /* r2 = tn2 */ \
|
||||||
\
|
\
|
||||||
pmulth($10, $2, $17); /* r17 = x2 * tn2 (6420) */ \
|
pmulth($10, $2, $17); /* r17 = x2 * tn2 (6420) */ \
|
||||||
psraw($17, 15, $17); \
|
psraw($17, 15, $17); \
|
||||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||||
psraw($3, 15, $3); \
|
psraw($3, 15, $3); \
|
||||||
pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \
|
pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \
|
||||||
psubh($17, $14, $17); /* r17 = tm26 */ \
|
psubh($17, $14, $17); /* r17 = tm26 */ \
|
||||||
\
|
\
|
||||||
pmulth($14, $2, $18); /* r18 = x6 * tn2 (6420) */ \
|
pmulth($14, $2, $18); /* r18 = x6 * tn2 (6420) */ \
|
||||||
psraw($18, 15, $18); \
|
psraw($18, 15, $18); \
|
||||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||||
psraw($3, 15, $3); \
|
psraw($3, 15, $3); \
|
||||||
pinteh($3, $18, $18); /* r18 = x6 * tn2 */ \
|
pinteh($3, $18, $18); /* r18 = x6 * tn2 */ \
|
||||||
paddh($18, $10, $18); /* r18 = tp26 */ \
|
paddh($18, $10, $18); /* r18 = tp26 */ \
|
||||||
\
|
\
|
||||||
paddh($8, $12, $2); /* r2 = tp04 */ \
|
paddh($8, $12, $2); /* r2 = tp04 */ \
|
||||||
psubh($8, $12, $3); /* r3 = tm04 */ \
|
psubh($8, $12, $3); /* r3 = tm04 */ \
|
||||||
\
|
\
|
||||||
paddh($2, $18, $16); /* r16 = a0 */ \
|
paddh($2, $18, $16); /* r16 = a0 */ \
|
||||||
psubh($2, $18, $19); /* r19 = a3 */ \
|
psubh($2, $18, $19); /* r19 = a3 */ \
|
||||||
psubh($3, $17, $18); /* r18 = a2 */ \
|
psubh($3, $17, $18); /* r18 = a2 */ \
|
||||||
paddh($3, $17, $17); /* r17 = a1 */
|
paddh($3, $17, $17); /* r17 = a1 */
|
||||||
|
|
||||||
|
|
||||||
#define DCT_8_INV_COL8_STORE(blk) \
|
#define DCT_8_INV_COL8_STORE(blk) \
|
||||||
\
|
\
|
||||||
paddh($16, $20, $2); /* y0 a0+b0 */ \
|
paddh($16, $20, $2); /* y0 a0+b0 */ \
|
||||||
psubh($16, $20, $16); /* y7 a0-b0 */ \
|
psubh($16, $20, $16); /* y7 a0-b0 */ \
|
||||||
psrah($2, SHIFT_INV_COL, $2); \
|
psrah($2, SHIFT_INV_COL, $2); \
|
||||||
psrah($16, SHIFT_INV_COL, $16); \
|
psrah($16, SHIFT_INV_COL, $16); \
|
||||||
sq($2, 0, blk); \
|
sq($2, 0, blk); \
|
||||||
sq($16, 112, blk); \
|
sq($16, 112, blk); \
|
||||||
\
|
\
|
||||||
paddh($17, $21, $3); /* y1 a1+b1 */ \
|
paddh($17, $21, $3); /* y1 a1+b1 */ \
|
||||||
psubh($17, $21, $17); /* y6 a1-b1 */ \
|
psubh($17, $21, $17); /* y6 a1-b1 */ \
|
||||||
psrah($3, SHIFT_INV_COL, $3); \
|
psrah($3, SHIFT_INV_COL, $3); \
|
||||||
psrah($17, SHIFT_INV_COL, $17); \
|
psrah($17, SHIFT_INV_COL, $17); \
|
||||||
sq($3, 16, blk); \
|
sq($3, 16, blk); \
|
||||||
sq($17, 96, blk); \
|
sq($17, 96, blk); \
|
||||||
\
|
\
|
||||||
paddh($18, $22, $2); /* y2 a2+b2 */ \
|
paddh($18, $22, $2); /* y2 a2+b2 */ \
|
||||||
psubh($18, $22, $18); /* y5 a2-b2 */ \
|
psubh($18, $22, $18); /* y5 a2-b2 */ \
|
||||||
psrah($2, SHIFT_INV_COL, $2); \
|
psrah($2, SHIFT_INV_COL, $2); \
|
||||||
psrah($18, SHIFT_INV_COL, $18); \
|
psrah($18, SHIFT_INV_COL, $18); \
|
||||||
sq($2, 32, blk); \
|
sq($2, 32, blk); \
|
||||||
sq($18, 80, blk); \
|
sq($18, 80, blk); \
|
||||||
\
|
\
|
||||||
paddh($19, $23, $3); /* y3 a3+b3 */ \
|
paddh($19, $23, $3); /* y3 a3+b3 */ \
|
||||||
psubh($19, $23, $19); /* y4 a3-b3 */ \
|
psubh($19, $23, $19); /* y4 a3-b3 */ \
|
||||||
psrah($3, SHIFT_INV_COL, $3); \
|
psrah($3, SHIFT_INV_COL, $3); \
|
||||||
psrah($19, SHIFT_INV_COL, $19); \
|
psrah($19, SHIFT_INV_COL, $19); \
|
||||||
sq($3, 48, blk); \
|
sq($3, 48, blk); \
|
||||||
sq($19, 64, blk);
|
sq($19, 64, blk);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define DCT_8_INV_COL8_PMS() \
|
#define DCT_8_INV_COL8_PMS() \
|
||||||
paddh($16, $20, $2); /* y0 a0+b0 */ \
|
paddh($16, $20, $2); /* y0 a0+b0 */ \
|
||||||
psubh($16, $20, $20); /* y7 a0-b0 */ \
|
psubh($16, $20, $20); /* y7 a0-b0 */ \
|
||||||
psrah($2, SHIFT_INV_COL, $16); \
|
psrah($2, SHIFT_INV_COL, $16); \
|
||||||
psrah($20, SHIFT_INV_COL, $20); \
|
psrah($20, SHIFT_INV_COL, $20); \
|
||||||
\
|
\
|
||||||
paddh($17, $21, $3); /* y1 a1+b1 */ \
|
paddh($17, $21, $3); /* y1 a1+b1 */ \
|
||||||
psubh($17, $21, $21); /* y6 a1-b1 */ \
|
psubh($17, $21, $21); /* y6 a1-b1 */ \
|
||||||
psrah($3, SHIFT_INV_COL, $17); \
|
psrah($3, SHIFT_INV_COL, $17); \
|
||||||
psrah($21, SHIFT_INV_COL, $21); \
|
psrah($21, SHIFT_INV_COL, $21); \
|
||||||
\
|
\
|
||||||
paddh($18, $22, $2); /* y2 a2+b2 */ \
|
paddh($18, $22, $2); /* y2 a2+b2 */ \
|
||||||
psubh($18, $22, $22); /* y5 a2-b2 */ \
|
psubh($18, $22, $22); /* y5 a2-b2 */ \
|
||||||
psrah($2, SHIFT_INV_COL, $18); \
|
psrah($2, SHIFT_INV_COL, $18); \
|
||||||
psrah($22, SHIFT_INV_COL, $22); \
|
psrah($22, SHIFT_INV_COL, $22); \
|
||||||
\
|
\
|
||||||
paddh($19, $23, $3); /* y3 a3+b3 */ \
|
paddh($19, $23, $3); /* y3 a3+b3 */ \
|
||||||
psubh($19, $23, $23); /* y4 a3-b3 */ \
|
psubh($19, $23, $23); /* y4 a3-b3 */ \
|
||||||
psrah($3, SHIFT_INV_COL, $19); \
|
psrah($3, SHIFT_INV_COL, $19); \
|
||||||
psrah($23, SHIFT_INV_COL, $23);
|
psrah($23, SHIFT_INV_COL, $23);
|
||||||
|
|
||||||
#define PUT(rs) \
|
#define PUT(rs) \
|
||||||
pminh(rs, $11, $2); \
|
pminh(rs, $11, $2); \
|
||||||
pmaxh($2, $0, $2); \
|
pmaxh($2, $0, $2); \
|
||||||
ppacb($0, $2, $2); \
|
ppacb($0, $2, $2); \
|
||||||
sd3(2, 0, 4); \
|
sd3(2, 0, 4); \
|
||||||
__asm__ __volatile__ ("add $4, $5, $4");
|
__asm__ __volatile__ ("add $4, $5, $4");
|
||||||
|
|
||||||
#define DCT_8_INV_COL8_PUT() \
|
#define DCT_8_INV_COL8_PUT() \
|
||||||
PUT($16); \
|
PUT($16); \
|
||||||
PUT($17); \
|
PUT($17); \
|
||||||
PUT($18); \
|
PUT($18); \
|
||||||
PUT($19); \
|
PUT($19); \
|
||||||
PUT($23); \
|
PUT($23); \
|
||||||
PUT($22); \
|
PUT($22); \
|
||||||
PUT($21); \
|
PUT($21); \
|
||||||
PUT($20);
|
PUT($20);
|
||||||
|
|
||||||
#define ADD(rs) \
|
#define ADD(rs) \
|
||||||
ld3(4, 0, 2); \
|
ld3(4, 0, 2); \
|
||||||
pextlb($0, $2, $2); \
|
pextlb($0, $2, $2); \
|
||||||
paddh($2, rs, $2); \
|
paddh($2, rs, $2); \
|
||||||
pminh($2, $11, $2); \
|
pminh($2, $11, $2); \
|
||||||
pmaxh($2, $0, $2); \
|
pmaxh($2, $0, $2); \
|
||||||
ppacb($0, $2, $2); \
|
ppacb($0, $2, $2); \
|
||||||
sd3(2, 0, 4); \
|
sd3(2, 0, 4); \
|
||||||
__asm__ __volatile__ ("add $4, $5, $4");
|
__asm__ __volatile__ ("add $4, $5, $4");
|
||||||
|
|
||||||
/*fixme: schedule*/
|
/*fixme: schedule*/
|
||||||
#define DCT_8_INV_COL8_ADD() \
|
#define DCT_8_INV_COL8_ADD() \
|
||||||
ADD($16); \
|
ADD($16); \
|
||||||
ADD($17); \
|
ADD($17); \
|
||||||
ADD($18); \
|
ADD($18); \
|
||||||
ADD($19); \
|
ADD($19); \
|
||||||
ADD($23); \
|
ADD($23); \
|
||||||
ADD($22); \
|
ADD($22); \
|
||||||
ADD($21); \
|
ADD($21); \
|
||||||
ADD($20);
|
ADD($20);
|
||||||
|
|
||||||
|
|
||||||
void ff_mmi_idct(int16_t * block)
|
void ff_mmi_idct(int16_t * block)
|
||||||
{
|
{
|
||||||
/* $4 = block */
|
/* $4 = block */
|
||||||
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
|
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
|
||||||
lq($24, ROUNDER_0, $8);
|
lq($24, ROUNDER_0, $8);
|
||||||
lq($24, ROUNDER_1, $7);
|
lq($24, ROUNDER_1, $7);
|
||||||
DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8);
|
DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8);
|
||||||
DCT_8_INV_ROW1($4, 16, TAB_i_17, $7, $9);
|
DCT_8_INV_ROW1($4, 16, TAB_i_17, $7, $9);
|
||||||
DCT_8_INV_ROW1($4, 32, TAB_i_26, $7, $10);
|
DCT_8_INV_ROW1($4, 32, TAB_i_26, $7, $10);
|
||||||
DCT_8_INV_ROW1($4, 48, TAB_i_35, $7, $11);
|
DCT_8_INV_ROW1($4, 48, TAB_i_35, $7, $11);
|
||||||
DCT_8_INV_ROW1($4, 64, TAB_i_04, $7, $12);
|
DCT_8_INV_ROW1($4, 64, TAB_i_04, $7, $12);
|
||||||
DCT_8_INV_ROW1($4, 80, TAB_i_35, $7, $13);
|
DCT_8_INV_ROW1($4, 80, TAB_i_35, $7, $13);
|
||||||
DCT_8_INV_ROW1($4, 96, TAB_i_26, $7, $14);
|
DCT_8_INV_ROW1($4, 96, TAB_i_26, $7, $14);
|
||||||
DCT_8_INV_ROW1($4, 112, TAB_i_17, $7, $15);
|
DCT_8_INV_ROW1($4, 112, TAB_i_17, $7, $15);
|
||||||
DCT_8_INV_COL8();
|
DCT_8_INV_COL8();
|
||||||
DCT_8_INV_COL8_STORE($4);
|
DCT_8_INV_COL8_STORE($4);
|
||||||
|
|
||||||
//let savedtemp regs be saved
|
//let savedtemp regs be saved
|
||||||
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
|
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
|
void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
|
||||||
{
|
{
|
||||||
/* $4 = dest, $5 = line_size, $6 = block */
|
/* $4 = dest, $5 = line_size, $6 = block */
|
||||||
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
|
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
|
||||||
lq($24, ROUNDER_0, $8);
|
lq($24, ROUNDER_0, $8);
|
||||||
lq($24, ROUNDER_1, $7);
|
lq($24, ROUNDER_1, $7);
|
||||||
DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
|
DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
|
||||||
DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
|
DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
|
||||||
DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
|
DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
|
||||||
DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
|
DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
|
||||||
DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
|
DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
|
||||||
DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
|
DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
|
||||||
DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
|
DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
|
||||||
DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
|
DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
|
||||||
DCT_8_INV_COL8();
|
DCT_8_INV_COL8();
|
||||||
lq($24, CLIPMAX, $11);
|
lq($24, CLIPMAX, $11);
|
||||||
DCT_8_INV_COL8_PMS();
|
DCT_8_INV_COL8_PMS();
|
||||||
DCT_8_INV_COL8_PUT();
|
DCT_8_INV_COL8_PUT();
|
||||||
|
|
||||||
//let savedtemp regs be saved
|
//let savedtemp regs be saved
|
||||||
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
|
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
|
void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
|
||||||
{
|
{
|
||||||
/* $4 = dest, $5 = line_size, $6 = block */
|
/* $4 = dest, $5 = line_size, $6 = block */
|
||||||
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
|
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
|
||||||
lq($24, ROUNDER_0, $8);
|
lq($24, ROUNDER_0, $8);
|
||||||
lq($24, ROUNDER_1, $7);
|
lq($24, ROUNDER_1, $7);
|
||||||
DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
|
DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
|
||||||
DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
|
DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
|
||||||
DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
|
DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
|
||||||
DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
|
DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
|
||||||
DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
|
DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
|
||||||
DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
|
DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
|
||||||
DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
|
DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
|
||||||
DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
|
DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
|
||||||
DCT_8_INV_COL8();
|
DCT_8_INV_COL8();
|
||||||
lq($24, CLIPMAX, $11);
|
lq($24, CLIPMAX, $11);
|
||||||
DCT_8_INV_COL8_PMS();
|
DCT_8_INV_COL8_PMS();
|
||||||
DCT_8_INV_COL8_ADD();
|
DCT_8_INV_COL8_ADD();
|
||||||
|
|
||||||
//let savedtemp regs be saved
|
//let savedtemp regs be saved
|
||||||
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
|
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,148 +5,148 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
#define r0 $zero
|
#define r0 $zero
|
||||||
#define r1 $at //assembler!
|
#define r1 $at //assembler!
|
||||||
#define r2 $v0 //return
|
#define r2 $v0 //return
|
||||||
#define r3 $v1 //return
|
#define r3 $v1 //return
|
||||||
#define r4 $a0 //arg
|
#define r4 $a0 //arg
|
||||||
#define r5 $a1 //arg
|
#define r5 $a1 //arg
|
||||||
#define r6 $a2 //arg
|
#define r6 $a2 //arg
|
||||||
#define r7 $a3 //arg
|
#define r7 $a3 //arg
|
||||||
#define r8 $t0 //temp
|
#define r8 $t0 //temp
|
||||||
#define r9 $t1 //temp
|
#define r9 $t1 //temp
|
||||||
#define r10 $t2 //temp
|
#define r10 $t2 //temp
|
||||||
#define r11 $t3 //temp
|
#define r11 $t3 //temp
|
||||||
#define r12 $t4 //temp
|
#define r12 $t4 //temp
|
||||||
#define r13 $t5 //temp
|
#define r13 $t5 //temp
|
||||||
#define r14 $t6 //temp
|
#define r14 $t6 //temp
|
||||||
#define r15 $t7 //temp
|
#define r15 $t7 //temp
|
||||||
#define r16 $s0 //saved temp
|
#define r16 $s0 //saved temp
|
||||||
#define r17 $s1 //saved temp
|
#define r17 $s1 //saved temp
|
||||||
#define r18 $s2 //saved temp
|
#define r18 $s2 //saved temp
|
||||||
#define r19 $s3 //saved temp
|
#define r19 $s3 //saved temp
|
||||||
#define r20 $s4 //saved temp
|
#define r20 $s4 //saved temp
|
||||||
#define r21 $s5 //saved temp
|
#define r21 $s5 //saved temp
|
||||||
#define r22 $s6 //saved temp
|
#define r22 $s6 //saved temp
|
||||||
#define r23 $s7 //saved temp
|
#define r23 $s7 //saved temp
|
||||||
#define r24 $t8 //temp
|
#define r24 $t8 //temp
|
||||||
#define r25 $t9 //temp
|
#define r25 $t9 //temp
|
||||||
#define r26 $k0 //kernel
|
#define r26 $k0 //kernel
|
||||||
#define r27 $k1 //kernel
|
#define r27 $k1 //kernel
|
||||||
#define r28 $gp //global ptr
|
#define r28 $gp //global ptr
|
||||||
#define r29 $sp //stack ptr
|
#define r29 $sp //stack ptr
|
||||||
#define r30 $fp //frame ptr
|
#define r30 $fp //frame ptr
|
||||||
#define r31 $ra //return addr
|
#define r31 $ra //return addr
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#define lq(base, off, reg) \
|
#define lq(base, off, reg) \
|
||||||
__asm__ __volatile__ ("lq " #reg ", %0("#base ")" : : "i" (off) )
|
__asm__ __volatile__ ("lq " #reg ", %0("#base ")" : : "i" (off) )
|
||||||
|
|
||||||
#define lq2(mem, reg) \
|
#define lq2(mem, reg) \
|
||||||
__asm__ __volatile__ ("lq " #reg ", %0" : : "r" (mem))
|
__asm__ __volatile__ ("lq " #reg ", %0" : : "r" (mem))
|
||||||
|
|
||||||
#define sq(reg, off, base) \
|
#define sq(reg, off, base) \
|
||||||
__asm__ __volatile__ ("sq " #reg ", %0("#base ")" : : "i" (off) )
|
__asm__ __volatile__ ("sq " #reg ", %0("#base ")" : : "i" (off) )
|
||||||
|
|
||||||
/*
|
/*
|
||||||
#define ld(base, off, reg) \
|
#define ld(base, off, reg) \
|
||||||
__asm__ __volatile__ ("ld " #reg ", " #off "("#base ")")
|
__asm__ __volatile__ ("ld " #reg ", " #off "("#base ")")
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define ld3(base, off, reg) \
|
#define ld3(base, off, reg) \
|
||||||
__asm__ __volatile__ (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off)))
|
__asm__ __volatile__ (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off)))
|
||||||
|
|
||||||
#define ldr3(base, off, reg) \
|
#define ldr3(base, off, reg) \
|
||||||
__asm__ __volatile__ (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off)))
|
__asm__ __volatile__ (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off)))
|
||||||
|
|
||||||
#define ldl3(base, off, reg) \
|
#define ldl3(base, off, reg) \
|
||||||
__asm__ __volatile__ (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off)))
|
__asm__ __volatile__ (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off)))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
#define sd(reg, off, base) \
|
#define sd(reg, off, base) \
|
||||||
__asm__ __volatile__ ("sd " #reg ", " #off "("#base ")")
|
__asm__ __volatile__ ("sd " #reg ", " #off "("#base ")")
|
||||||
*/
|
*/
|
||||||
//seems assembler has bug encoding mnemonic 'sd', so DIY
|
//seems assembler has bug encoding mnemonic 'sd', so DIY
|
||||||
#define sd3(reg, off, base) \
|
#define sd3(reg, off, base) \
|
||||||
__asm__ __volatile__ (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off)))
|
__asm__ __volatile__ (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off)))
|
||||||
|
|
||||||
#define sw(reg, off, base) \
|
#define sw(reg, off, base) \
|
||||||
__asm__ __volatile__ ("sw " #reg ", " #off "("#base ")")
|
__asm__ __volatile__ ("sw " #reg ", " #off "("#base ")")
|
||||||
|
|
||||||
#define sq2(reg, mem) \
|
#define sq2(reg, mem) \
|
||||||
__asm__ __volatile__ ("sq " #reg ", %0" : : "m" (*(mem)))
|
__asm__ __volatile__ ("sq " #reg ", %0" : : "m" (*(mem)))
|
||||||
|
|
||||||
#define pinth(rs, rt, rd) \
|
#define pinth(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("pinth " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("pinth " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define phmadh(rs, rt, rd) \
|
#define phmadh(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("phmadh " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("phmadh " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define pcpyud(rs, rt, rd) \
|
#define pcpyud(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("pcpyud " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("pcpyud " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define pcpyld(rs, rt, rd) \
|
#define pcpyld(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("pcpyld " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("pcpyld " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define pcpyh(rt, rd) \
|
#define pcpyh(rt, rd) \
|
||||||
__asm__ __volatile__ ("pcpyh " #rd ", " #rt )
|
__asm__ __volatile__ ("pcpyh " #rd ", " #rt )
|
||||||
|
|
||||||
#define paddw(rs, rt, rd) \
|
#define paddw(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("paddw " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("paddw " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define pextlw(rs, rt, rd) \
|
#define pextlw(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("pextlw " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("pextlw " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define pextuw(rs, rt, rd) \
|
#define pextuw(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("pextuw " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("pextuw " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define pextlh(rs, rt, rd) \
|
#define pextlh(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("pextlh " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("pextlh " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define pextuh(rs, rt, rd) \
|
#define pextuh(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("pextuh " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("pextuh " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define psubw(rs, rt, rd) \
|
#define psubw(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("psubw " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("psubw " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define psraw(rt, sa, rd) \
|
#define psraw(rt, sa, rd) \
|
||||||
__asm__ __volatile__ ("psraw " #rd ", " #rt ", %0" : : "i"(sa) )
|
__asm__ __volatile__ ("psraw " #rd ", " #rt ", %0" : : "i"(sa) )
|
||||||
|
|
||||||
#define ppach(rs, rt, rd) \
|
#define ppach(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("ppach " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("ppach " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define ppacb(rs, rt, rd) \
|
#define ppacb(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("ppacb " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("ppacb " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define prevh(rt, rd) \
|
#define prevh(rt, rd) \
|
||||||
__asm__ __volatile__ ("prevh " #rd ", " #rt )
|
__asm__ __volatile__ ("prevh " #rd ", " #rt )
|
||||||
|
|
||||||
#define pmulth(rs, rt, rd) \
|
#define pmulth(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("pmulth " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("pmulth " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define pmaxh(rs, rt, rd) \
|
#define pmaxh(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("pmaxh " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("pmaxh " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define pminh(rs, rt, rd) \
|
#define pminh(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("pminh " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("pminh " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define pinteh(rs, rt, rd) \
|
#define pinteh(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("pinteh " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("pinteh " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define paddh(rs, rt, rd) \
|
#define paddh(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("paddh " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("paddh " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define psubh(rs, rt, rd) \
|
#define psubh(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("psubh " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("psubh " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#define psrah(rt, sa, rd) \
|
#define psrah(rt, sa, rd) \
|
||||||
__asm__ __volatile__ ("psrah " #rd ", " #rt ", %0" : : "i"(sa) )
|
__asm__ __volatile__ ("psrah " #rd ", " #rt ", %0" : : "i"(sa) )
|
||||||
|
|
||||||
#define pmfhl_uw(rd) \
|
#define pmfhl_uw(rd) \
|
||||||
__asm__ __volatile__ ("pmfhl.uw " #rd)
|
__asm__ __volatile__ ("pmfhl.uw " #rd)
|
||||||
|
|
||||||
#define pextlb(rs, rt, rd) \
|
#define pextlb(rs, rt, rd) \
|
||||||
__asm__ __volatile__ ("pextlb " #rd ", " #rs ", " #rt )
|
__asm__ __volatile__ ("pextlb " #rd ", " #rs ", " #rt )
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ static void dct_unquantize_h263_mmi(MpegEncContext *s,
|
|||||||
level = block[0] * s->c_dc_scale;
|
level = block[0] * s->c_dc_scale;
|
||||||
}else {
|
}else {
|
||||||
qadd = 0;
|
qadd = 0;
|
||||||
level = block[0];
|
level = block[0];
|
||||||
}
|
}
|
||||||
nCoeffs= 63; //does not allways use zigzag table
|
nCoeffs= 63; //does not allways use zigzag table
|
||||||
} else {
|
} else {
|
||||||
@ -49,29 +49,29 @@ static void dct_unquantize_h263_mmi(MpegEncContext *s,
|
|||||||
}
|
}
|
||||||
|
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"add $14, $0, %3 \n\t"
|
"add $14, $0, %3 \n\t"
|
||||||
"pcpyld $8, %0, %0 \n\t"
|
"pcpyld $8, %0, %0 \n\t"
|
||||||
"pcpyh $8, $8 \n\t" //r8 = qmul
|
"pcpyh $8, $8 \n\t" //r8 = qmul
|
||||||
"pcpyld $9, %1, %1 \n\t"
|
"pcpyld $9, %1, %1 \n\t"
|
||||||
"pcpyh $9, $9 \n\t" //r9 = qadd
|
"pcpyh $9, $9 \n\t" //r9 = qadd
|
||||||
".p2align 2 \n\t"
|
".p2align 2 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"lq $10, 0($14) \n\t" //r10 = level
|
"lq $10, 0($14) \n\t" //r10 = level
|
||||||
"addi $14, $14, 16 \n\t" //block+=8
|
"addi $14, $14, 16 \n\t" //block+=8
|
||||||
"addi %2, %2, -8 \n\t"
|
"addi %2, %2, -8 \n\t"
|
||||||
"pcgth $11, $0, $10 \n\t" //r11 = level < 0 ? -1 : 0
|
"pcgth $11, $0, $10 \n\t" //r11 = level < 0 ? -1 : 0
|
||||||
"pcgth $12, $10, $0 \n\t" //r12 = level > 0 ? -1 : 0
|
"pcgth $12, $10, $0 \n\t" //r12 = level > 0 ? -1 : 0
|
||||||
"por $12, $11, $12 \n\t"
|
"por $12, $11, $12 \n\t"
|
||||||
"pmulth $10, $10, $8 \n\t"
|
"pmulth $10, $10, $8 \n\t"
|
||||||
"paddh $13, $9, $11 \n\t"
|
"paddh $13, $9, $11 \n\t"
|
||||||
"pxor $13, $13, $11 \n\t" //r13 = level < 0 ? -qadd : qadd
|
"pxor $13, $13, $11 \n\t" //r13 = level < 0 ? -qadd : qadd
|
||||||
"pmfhl.uw $11 \n\t"
|
"pmfhl.uw $11 \n\t"
|
||||||
"pinteh $10, $11, $10 \n\t" //r10 = level * qmul
|
"pinteh $10, $11, $10 \n\t" //r10 = level * qmul
|
||||||
"paddh $10, $10, $13 \n\t"
|
"paddh $10, $10, $13 \n\t"
|
||||||
"pand $10, $10, $12 \n\t"
|
"pand $10, $10, $12 \n\t"
|
||||||
"sq $10, -16($14) \n\t"
|
"sq $10, -16($14) \n\t"
|
||||||
"bgez %2, 1b \n\t"
|
"bgez %2, 1b \n\t"
|
||||||
:: "r"(qmul), "r" (qadd), "r" (nCoeffs), "r" (block) : "$8", "$9", "$10", "$11", "$12", "$13", "$14", "memory" );
|
:: "r"(qmul), "r" (qadd), "r" (nCoeffs), "r" (block) : "$8", "$9", "$10", "$11", "$12", "$13", "$14", "memory" );
|
||||||
|
|
||||||
if(s->mb_intra)
|
if(s->mb_intra)
|
||||||
block[0]= level;
|
block[0]= level;
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user