COSMETICS: tabs --> spaces, some prettyprinting
Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
50827fcf44
commit
bb270c0896
@ -96,7 +96,7 @@ unknown_opt:
|
||||
if(po->u.func2_arg(opt+1, arg)<0)
|
||||
goto unknown_opt;
|
||||
} else {
|
||||
po->u.func_arg(arg);
|
||||
po->u.func_arg(arg);
|
||||
}
|
||||
} else {
|
||||
parse_arg_file(opt);
|
||||
@ -122,8 +122,8 @@ void print_error(const char *filename, int err)
|
||||
break;
|
||||
case AVERROR_IO:
|
||||
fprintf(stderr, "%s: I/O error occured\n"
|
||||
"Usually that means that input file is truncated and/or corrupted.\n",
|
||||
filename);
|
||||
"Usually that means that input file is truncated and/or corrupted.\n",
|
||||
filename);
|
||||
break;
|
||||
case AVERROR_NOMEM:
|
||||
fprintf(stderr, "%s: memory allocation error occured\n", filename);
|
||||
|
174
configure
vendored
174
configure
vendored
@ -688,26 +688,26 @@ fi
|
||||
needmdynamicnopic="no"
|
||||
if test $targetos = Darwin; then
|
||||
if test -n "`$cc -v 2>&1 | grep xlc`"; then
|
||||
CFLAGS="$CFLAGS -qpdf2 -qlanglvl=extc99 -qmaxmem=-1 -qarch=auto -qtune=auto"
|
||||
CFLAGS="$CFLAGS -qpdf2 -qlanglvl=extc99 -qmaxmem=-1 -qarch=auto -qtune=auto"
|
||||
else
|
||||
gcc_version="`$cc -v 2>&1 | grep version | cut -d ' ' -f3-`"
|
||||
case "$gcc_version" in
|
||||
*2.95*)
|
||||
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer"
|
||||
;;
|
||||
*[34].*)
|
||||
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer -force_cpusubtype_ALL -Wno-sign-compare"
|
||||
if test "$lshared" = no; then
|
||||
needmdynamicnopic="yes"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer"
|
||||
if test "$lshared" = no; then
|
||||
needmdynamicnopic="yes"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
gcc_version="`$cc -v 2>&1 | grep version | cut -d ' ' -f3-`"
|
||||
case "$gcc_version" in
|
||||
*2.95*)
|
||||
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer"
|
||||
;;
|
||||
*[34].*)
|
||||
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer -force_cpusubtype_ALL -Wno-sign-compare"
|
||||
if test "$lshared" = no; then
|
||||
needmdynamicnopic="yes"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer"
|
||||
if test "$lshared" = no; then
|
||||
needmdynamicnopic="yes"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
fi
|
||||
|
||||
@ -725,62 +725,62 @@ TUNECPU="generic"
|
||||
POWERPCMODE="32bits"
|
||||
if test $tune != "generic"; then
|
||||
case $tune in
|
||||
601|ppc601|PowerPC601)
|
||||
CFLAGS="$CFLAGS -mcpu=601"
|
||||
if test $altivec = "yes"; then
|
||||
echo "WARNING: Tuning for PPC601 but AltiVec enabled!";
|
||||
fi
|
||||
TUNECPU=ppc601
|
||||
;;
|
||||
603*|ppc603*|PowerPC603*)
|
||||
CFLAGS="$CFLAGS -mcpu=603"
|
||||
if test $altivec = "yes"; then
|
||||
echo "WARNING: Tuning for PPC603 but AltiVec enabled!";
|
||||
fi
|
||||
TUNECPU=ppc603
|
||||
;;
|
||||
604*|ppc604*|PowerPC604*)
|
||||
CFLAGS="$CFLAGS -mcpu=604"
|
||||
if test $altivec = "yes"; then
|
||||
echo "WARNING: Tuning for PPC604 but AltiVec enabled!";
|
||||
fi
|
||||
TUNECPU=ppc604
|
||||
;;
|
||||
G3|g3|75*|ppc75*|PowerPC75*)
|
||||
CFLAGS="$CFLAGS -mcpu=750 -mtune=750 -mpowerpc-gfxopt"
|
||||
if test $altivec = "yes"; then
|
||||
echo "WARNING: Tuning for PPC75x but AltiVec enabled!";
|
||||
fi
|
||||
TUNECPU=ppc750
|
||||
;;
|
||||
G4|g4|745*|ppc745*|PowerPC745*)
|
||||
CFLAGS="$CFLAGS -mcpu=7450 -mtune=7450 -mpowerpc-gfxopt"
|
||||
if test $altivec = "no"; then
|
||||
echo "WARNING: Tuning for PPC745x but AltiVec disabled!";
|
||||
fi
|
||||
TUNECPU=ppc7450
|
||||
;;
|
||||
74*|ppc74*|PowerPC74*)
|
||||
CFLAGS="$CFLAGS -mcpu=7400 -mtune=7400 -mpowerpc-gfxopt"
|
||||
if test $altivec = "no"; then
|
||||
echo "WARNING: Tuning for PPC74xx but AltiVec disabled!";
|
||||
fi
|
||||
TUNECPU=ppc7400
|
||||
;;
|
||||
G5|g5|970|ppc970|PowerPC970|power4*|Power4*)
|
||||
CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc-gfxopt -mpowerpc64"
|
||||
if test $altivec = "no"; then
|
||||
echo "WARNING: Tuning for PPC970 but AltiVec disabled!";
|
||||
fi
|
||||
TUNECPU=ppc970
|
||||
601|ppc601|PowerPC601)
|
||||
CFLAGS="$CFLAGS -mcpu=601"
|
||||
if test $altivec = "yes"; then
|
||||
echo "WARNING: Tuning for PPC601 but AltiVec enabled!";
|
||||
fi
|
||||
TUNECPU=ppc601
|
||||
;;
|
||||
603*|ppc603*|PowerPC603*)
|
||||
CFLAGS="$CFLAGS -mcpu=603"
|
||||
if test $altivec = "yes"; then
|
||||
echo "WARNING: Tuning for PPC603 but AltiVec enabled!";
|
||||
fi
|
||||
TUNECPU=ppc603
|
||||
;;
|
||||
604*|ppc604*|PowerPC604*)
|
||||
CFLAGS="$CFLAGS -mcpu=604"
|
||||
if test $altivec = "yes"; then
|
||||
echo "WARNING: Tuning for PPC604 but AltiVec enabled!";
|
||||
fi
|
||||
TUNECPU=ppc604
|
||||
;;
|
||||
G3|g3|75*|ppc75*|PowerPC75*)
|
||||
CFLAGS="$CFLAGS -mcpu=750 -mtune=750 -mpowerpc-gfxopt"
|
||||
if test $altivec = "yes"; then
|
||||
echo "WARNING: Tuning for PPC75x but AltiVec enabled!";
|
||||
fi
|
||||
TUNECPU=ppc750
|
||||
;;
|
||||
G4|g4|745*|ppc745*|PowerPC745*)
|
||||
CFLAGS="$CFLAGS -mcpu=7450 -mtune=7450 -mpowerpc-gfxopt"
|
||||
if test $altivec = "no"; then
|
||||
echo "WARNING: Tuning for PPC745x but AltiVec disabled!";
|
||||
fi
|
||||
TUNECPU=ppc7450
|
||||
;;
|
||||
74*|ppc74*|PowerPC74*)
|
||||
CFLAGS="$CFLAGS -mcpu=7400 -mtune=7400 -mpowerpc-gfxopt"
|
||||
if test $altivec = "no"; then
|
||||
echo "WARNING: Tuning for PPC74xx but AltiVec disabled!";
|
||||
fi
|
||||
TUNECPU=ppc7400
|
||||
;;
|
||||
G5|g5|970|ppc970|PowerPC970|power4*|Power4*)
|
||||
CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc-gfxopt -mpowerpc64"
|
||||
if test $altivec = "no"; then
|
||||
echo "WARNING: Tuning for PPC970 but AltiVec disabled!";
|
||||
fi
|
||||
TUNECPU=ppc970
|
||||
POWERPCMODE="64bits"
|
||||
;;
|
||||
i[3456]86|pentium|pentiumpro|pentium-mmx|pentium[234]|prescott|k6|k6-[23]|athlon|athlon-tbird|athlon-4|athlon-[mx]p|winchip-c6|winchip2|c3|nocona|athlon64|k8|opteron|athlon-fx)
|
||||
CFLAGS="$CFLAGS -march=$tune"
|
||||
;;
|
||||
*)
|
||||
echo "WARNING: Unknown CPU \"$tune\", ignored."
|
||||
;;
|
||||
;;
|
||||
i[3456]86|pentium|pentiumpro|pentium-mmx|pentium[234]|prescott|k6|k6-[23]|athlon|athlon-tbird|athlon-4|athlon-[mx]p|winchip-c6|winchip2|c3|nocona|athlon64|k8|opteron|athlon-fx)
|
||||
CFLAGS="$CFLAGS -march=$tune"
|
||||
;;
|
||||
*)
|
||||
echo "WARNING: Unknown CPU \"$tune\", ignored."
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
@ -876,8 +876,8 @@ if test -z "$cross_prefix" ; then
|
||||
cat > $TMPC << EOF
|
||||
#include <inttypes.h>
|
||||
int main(int argc, char ** argv){
|
||||
volatile uint32_t i=0x01234567;
|
||||
return (*((uint8_t*)(&i))) == 0x67;
|
||||
volatile uint32_t i=0x01234567;
|
||||
return (*((uint8_t*)(&i))) == 0x67;
|
||||
}
|
||||
EOF
|
||||
|
||||
@ -912,8 +912,8 @@ $cc -o $TMPE $TMPC 2>/dev/null || inttypes="no"
|
||||
cat > $TMPC << EOF
|
||||
#include <inttypes.h>
|
||||
int main(int argc, char ** argv){
|
||||
volatile uint_fast64_t i=0x01234567;
|
||||
return 0;
|
||||
volatile uint_fast64_t i=0x01234567;
|
||||
return 0;
|
||||
}
|
||||
EOF
|
||||
|
||||
@ -1152,10 +1152,10 @@ fi
|
||||
|
||||
case "`$cc -v 2>&1 | grep version`" in
|
||||
*gcc*)
|
||||
CFLAGS="-Wall -Wno-switch $CFLAGS"
|
||||
;;
|
||||
CFLAGS="-Wall -Wno-switch $CFLAGS"
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
;;
|
||||
esac
|
||||
|
||||
if test "$sdl" = "no" ; then
|
||||
@ -1163,7 +1163,7 @@ if test "$sdl" = "no" ; then
|
||||
fi
|
||||
|
||||
if test "$debug" = "yes"; then
|
||||
CFLAGS="-g $CFLAGS"
|
||||
CFLAGS="-g $CFLAGS"
|
||||
fi
|
||||
|
||||
if test "$optimize" = "small"; then
|
||||
@ -1173,10 +1173,10 @@ fi
|
||||
|
||||
if test "$optimize" = "yes"; then
|
||||
if test -n "`$cc -v 2>&1 | grep xlc`"; then
|
||||
CFLAGS="$CFLAGS -O5"
|
||||
LDFLAGS="$LDFLAGS -O5"
|
||||
CFLAGS="$CFLAGS -O5"
|
||||
LDFLAGS="$LDFLAGS -O5"
|
||||
else
|
||||
CFLAGS="-O3 $CFLAGS"
|
||||
CFLAGS="-O3 $CFLAGS"
|
||||
fi
|
||||
fi
|
||||
|
||||
@ -1793,9 +1793,9 @@ done
|
||||
|
||||
diff $TMPH config.h >/dev/null 2>&1
|
||||
if test $? -ne 0 ; then
|
||||
mv -f $TMPH config.h
|
||||
mv -f $TMPH config.h
|
||||
else
|
||||
echo "config.h is unchanged"
|
||||
echo "config.h is unchanged"
|
||||
fi
|
||||
|
||||
rm -f $TMPO $TMPC $TMPE $TMPS $TMPH
|
||||
|
84
cws2fws.c
84
cws2fws.c
@ -25,37 +25,37 @@ main(int argc, char *argv[])
|
||||
|
||||
if (argc < 3)
|
||||
{
|
||||
printf("Usage: %s <infile.swf> <outfile.swf>\n", argv[0]);
|
||||
exit(1);
|
||||
printf("Usage: %s <infile.swf> <outfile.swf>\n", argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fd_in = open(argv[1], O_RDONLY);
|
||||
if (fd_in < 0)
|
||||
{
|
||||
perror("Error while opening: ");
|
||||
exit(1);
|
||||
perror("Error while opening: ");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fd_out = open(argv[2], O_WRONLY|O_CREAT, 00644);
|
||||
if (fd_out < 0)
|
||||
{
|
||||
perror("Error while opening: ");
|
||||
close(fd_in);
|
||||
exit(1);
|
||||
perror("Error while opening: ");
|
||||
close(fd_in);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (read(fd_in, &buf_in, 8) != 8)
|
||||
{
|
||||
printf("Header error\n");
|
||||
close(fd_in);
|
||||
close(fd_out);
|
||||
exit(1);
|
||||
printf("Header error\n");
|
||||
close(fd_in);
|
||||
close(fd_out);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (buf_in[0] != 'C' || buf_in[1] != 'W' || buf_in[2] != 'S')
|
||||
{
|
||||
printf("Not a compressed flash file\n");
|
||||
exit(1);
|
||||
printf("Not a compressed flash file\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fstat(fd_in, &statbuf);
|
||||
@ -75,48 +75,48 @@ main(int argc, char *argv[])
|
||||
|
||||
for (i = 0; i < comp_len-4;)
|
||||
{
|
||||
int ret, len = read(fd_in, &buf_in, 1024);
|
||||
int ret, len = read(fd_in, &buf_in, 1024);
|
||||
|
||||
dbgprintf("read %d bytes\n", len);
|
||||
dbgprintf("read %d bytes\n", len);
|
||||
|
||||
last_out = zstream.total_out;
|
||||
last_out = zstream.total_out;
|
||||
|
||||
zstream.next_in = &buf_in[0];
|
||||
zstream.avail_in = len;
|
||||
zstream.next_out = &buf_out[0];
|
||||
zstream.avail_out = 1024;
|
||||
zstream.next_in = &buf_in[0];
|
||||
zstream.avail_in = len;
|
||||
zstream.next_out = &buf_out[0];
|
||||
zstream.avail_out = 1024;
|
||||
|
||||
ret = inflate(&zstream, Z_SYNC_FLUSH);
|
||||
if (ret == Z_STREAM_END || ret == Z_BUF_ERROR)
|
||||
break;
|
||||
if (ret != Z_OK)
|
||||
{
|
||||
printf("Error while decompressing: %d\n", ret);
|
||||
inflateEnd(&zstream);
|
||||
exit(1);
|
||||
}
|
||||
ret = inflate(&zstream, Z_SYNC_FLUSH);
|
||||
if (ret == Z_STREAM_END || ret == Z_BUF_ERROR)
|
||||
break;
|
||||
if (ret != Z_OK)
|
||||
{
|
||||
printf("Error while decompressing: %d\n", ret);
|
||||
inflateEnd(&zstream);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
dbgprintf("a_in: %d t_in: %d a_out: %d t_out: %d -- %d out\n",
|
||||
zstream.avail_in, zstream.total_in, zstream.avail_out, zstream.total_out,
|
||||
zstream.total_out-last_out);
|
||||
dbgprintf("a_in: %d t_in: %d a_out: %d t_out: %d -- %d out\n",
|
||||
zstream.avail_in, zstream.total_in, zstream.avail_out, zstream.total_out,
|
||||
zstream.total_out-last_out);
|
||||
|
||||
write(fd_out, &buf_out, zstream.total_out-last_out);
|
||||
write(fd_out, &buf_out, zstream.total_out-last_out);
|
||||
|
||||
i += len;
|
||||
i += len;
|
||||
}
|
||||
|
||||
if (zstream.total_out != uncomp_len-8)
|
||||
{
|
||||
printf("Size mismatch (%d != %d), updating header...\n",
|
||||
zstream.total_out, uncomp_len-8);
|
||||
printf("Size mismatch (%d != %d), updating header...\n",
|
||||
zstream.total_out, uncomp_len-8);
|
||||
|
||||
buf_in[0] = (zstream.total_out+8) & 0xff;
|
||||
buf_in[1] = (zstream.total_out+8 >> 8) & 0xff;
|
||||
buf_in[2] = (zstream.total_out+8 >> 16) & 0xff;
|
||||
buf_in[3] = (zstream.total_out+8 >> 24) & 0xff;
|
||||
buf_in[0] = (zstream.total_out+8) & 0xff;
|
||||
buf_in[1] = (zstream.total_out+8 >> 8) & 0xff;
|
||||
buf_in[2] = (zstream.total_out+8 >> 16) & 0xff;
|
||||
buf_in[3] = (zstream.total_out+8 >> 24) & 0xff;
|
||||
|
||||
lseek(fd_out, 4, SEEK_SET);
|
||||
write(fd_out, &buf_in, 4);
|
||||
lseek(fd_out, 4, SEEK_SET);
|
||||
write(fd_out, &buf_in, 4);
|
||||
}
|
||||
|
||||
inflateEnd(&zstream);
|
||||
|
286
doc/texi2pod.pl
286
doc/texi2pod.pl
@ -39,24 +39,24 @@ $ibase = "";
|
||||
|
||||
while ($_ = shift) {
|
||||
if (/^-D(.*)$/) {
|
||||
if ($1 ne "") {
|
||||
$flag = $1;
|
||||
} else {
|
||||
$flag = shift;
|
||||
}
|
||||
$value = "";
|
||||
($flag, $value) = ($flag =~ /^([^=]+)(?:=(.+))?/);
|
||||
die "no flag specified for -D\n"
|
||||
unless $flag ne "";
|
||||
die "flags may only contain letters, digits, hyphens, dashes and underscores\n"
|
||||
unless $flag =~ /^[a-zA-Z0-9_-]+$/;
|
||||
$defs{$flag} = $value;
|
||||
if ($1 ne "") {
|
||||
$flag = $1;
|
||||
} else {
|
||||
$flag = shift;
|
||||
}
|
||||
$value = "";
|
||||
($flag, $value) = ($flag =~ /^([^=]+)(?:=(.+))?/);
|
||||
die "no flag specified for -D\n"
|
||||
unless $flag ne "";
|
||||
die "flags may only contain letters, digits, hyphens, dashes and underscores\n"
|
||||
unless $flag =~ /^[a-zA-Z0-9_-]+$/;
|
||||
$defs{$flag} = $value;
|
||||
} elsif (/^-/) {
|
||||
usage();
|
||||
usage();
|
||||
} else {
|
||||
$in = $_, next unless defined $in;
|
||||
$out = $_, next unless defined $out;
|
||||
usage();
|
||||
$in = $_, next unless defined $in;
|
||||
$out = $_, next unless defined $out;
|
||||
usage();
|
||||
}
|
||||
}
|
||||
|
||||
@ -76,13 +76,13 @@ while(defined $inf) {
|
||||
while(<$inf>) {
|
||||
# Certain commands are discarded without further processing.
|
||||
/^\@(?:
|
||||
[a-z]+index # @*index: useful only in complete manual
|
||||
|need # @need: useful only in printed manual
|
||||
|(?:end\s+)?group # @group .. @end group: ditto
|
||||
|page # @page: ditto
|
||||
|node # @node: useful only in .info file
|
||||
|(?:end\s+)?ifnottex # @ifnottex .. @end ifnottex: use contents
|
||||
)\b/x and next;
|
||||
[a-z]+index # @*index: useful only in complete manual
|
||||
|need # @need: useful only in printed manual
|
||||
|(?:end\s+)?group # @group .. @end group: ditto
|
||||
|page # @page: ditto
|
||||
|node # @node: useful only in .info file
|
||||
|(?:end\s+)?ifnottex # @ifnottex .. @end ifnottex: use contents
|
||||
)\b/x and next;
|
||||
|
||||
chomp;
|
||||
|
||||
@ -92,38 +92,38 @@ while(<$inf>) {
|
||||
|
||||
# Identify a man title but keep only the one we are interested in.
|
||||
/^\@c\s+man\s+title\s+([A-Za-z0-9-]+)\s+(.+)/ and do {
|
||||
if (exists $defs{$1}) {
|
||||
$fn = $1;
|
||||
$tl = postprocess($2);
|
||||
}
|
||||
next;
|
||||
if (exists $defs{$1}) {
|
||||
$fn = $1;
|
||||
$tl = postprocess($2);
|
||||
}
|
||||
next;
|
||||
};
|
||||
|
||||
# Look for blocks surrounded by @c man begin SECTION ... @c man end.
|
||||
# This really oughta be @ifman ... @end ifman and the like, but such
|
||||
# would require rev'ing all other Texinfo translators.
|
||||
/^\@c\s+man\s+begin\s+([A-Z]+)\s+([A-Za-z0-9-]+)/ and do {
|
||||
$output = 1 if exists $defs{$2};
|
||||
$output = 1 if exists $defs{$2};
|
||||
$sect = $1;
|
||||
next;
|
||||
next;
|
||||
};
|
||||
/^\@c\s+man\s+begin\s+([A-Z]+)/ and $sect = $1, $output = 1, next;
|
||||
/^\@c\s+man\s+end/ and do {
|
||||
$sects{$sect} = "" unless exists $sects{$sect};
|
||||
$sects{$sect} .= postprocess($section);
|
||||
$section = "";
|
||||
$output = 0;
|
||||
next;
|
||||
$sects{$sect} = "" unless exists $sects{$sect};
|
||||
$sects{$sect} .= postprocess($section);
|
||||
$section = "";
|
||||
$output = 0;
|
||||
next;
|
||||
};
|
||||
|
||||
# handle variables
|
||||
/^\@set\s+([a-zA-Z0-9_-]+)\s*(.*)$/ and do {
|
||||
$defs{$1} = $2;
|
||||
next;
|
||||
$defs{$1} = $2;
|
||||
next;
|
||||
};
|
||||
/^\@clear\s+([a-zA-Z0-9_-]+)/ and do {
|
||||
delete $defs{$1};
|
||||
next;
|
||||
delete $defs{$1};
|
||||
next;
|
||||
};
|
||||
|
||||
next unless $output;
|
||||
@ -135,55 +135,55 @@ while(<$inf>) {
|
||||
# End-block handler goes up here because it needs to operate even
|
||||
# if we are skipping.
|
||||
/^\@end\s+([a-z]+)/ and do {
|
||||
# Ignore @end foo, where foo is not an operation which may
|
||||
# cause us to skip, if we are presently skipping.
|
||||
my $ended = $1;
|
||||
next if $skipping && $ended !~ /^(?:ifset|ifclear|ignore|menu|iftex)$/;
|
||||
# Ignore @end foo, where foo is not an operation which may
|
||||
# cause us to skip, if we are presently skipping.
|
||||
my $ended = $1;
|
||||
next if $skipping && $ended !~ /^(?:ifset|ifclear|ignore|menu|iftex)$/;
|
||||
|
||||
die "\@end $ended without \@$ended at line $.\n" unless defined $endw;
|
||||
die "\@$endw ended by \@end $ended at line $.\n" unless $ended eq $endw;
|
||||
die "\@end $ended without \@$ended at line $.\n" unless defined $endw;
|
||||
die "\@$endw ended by \@end $ended at line $.\n" unless $ended eq $endw;
|
||||
|
||||
$endw = pop @endwstack;
|
||||
$endw = pop @endwstack;
|
||||
|
||||
if ($ended =~ /^(?:ifset|ifclear|ignore|menu|iftex)$/) {
|
||||
$skipping = pop @skstack;
|
||||
next;
|
||||
} elsif ($ended =~ /^(?:example|smallexample|display)$/) {
|
||||
$shift = "";
|
||||
$_ = ""; # need a paragraph break
|
||||
} elsif ($ended =~ /^(?:itemize|enumerate|[fv]?table)$/) {
|
||||
$_ = "\n=back\n";
|
||||
$ic = pop @icstack;
|
||||
} else {
|
||||
die "unknown command \@end $ended at line $.\n";
|
||||
}
|
||||
if ($ended =~ /^(?:ifset|ifclear|ignore|menu|iftex)$/) {
|
||||
$skipping = pop @skstack;
|
||||
next;
|
||||
} elsif ($ended =~ /^(?:example|smallexample|display)$/) {
|
||||
$shift = "";
|
||||
$_ = ""; # need a paragraph break
|
||||
} elsif ($ended =~ /^(?:itemize|enumerate|[fv]?table)$/) {
|
||||
$_ = "\n=back\n";
|
||||
$ic = pop @icstack;
|
||||
} else {
|
||||
die "unknown command \@end $ended at line $.\n";
|
||||
}
|
||||
};
|
||||
|
||||
# We must handle commands which can cause skipping even while we
|
||||
# are skipping, otherwise we will not process nested conditionals
|
||||
# correctly.
|
||||
/^\@ifset\s+([a-zA-Z0-9_-]+)/ and do {
|
||||
push @endwstack, $endw;
|
||||
push @skstack, $skipping;
|
||||
$endw = "ifset";
|
||||
$skipping = 1 unless exists $defs{$1};
|
||||
next;
|
||||
push @endwstack, $endw;
|
||||
push @skstack, $skipping;
|
||||
$endw = "ifset";
|
||||
$skipping = 1 unless exists $defs{$1};
|
||||
next;
|
||||
};
|
||||
|
||||
/^\@ifclear\s+([a-zA-Z0-9_-]+)/ and do {
|
||||
push @endwstack, $endw;
|
||||
push @skstack, $skipping;
|
||||
$endw = "ifclear";
|
||||
$skipping = 1 if exists $defs{$1};
|
||||
next;
|
||||
push @endwstack, $endw;
|
||||
push @skstack, $skipping;
|
||||
$endw = "ifclear";
|
||||
$skipping = 1 if exists $defs{$1};
|
||||
next;
|
||||
};
|
||||
|
||||
/^\@(ignore|menu|iftex)\b/ and do {
|
||||
push @endwstack, $endw;
|
||||
push @skstack, $skipping;
|
||||
$endw = $1;
|
||||
$skipping = 1;
|
||||
next;
|
||||
push @endwstack, $endw;
|
||||
push @skstack, $skipping;
|
||||
$endw = $1;
|
||||
$skipping = 1;
|
||||
next;
|
||||
};
|
||||
|
||||
next if $skipping;
|
||||
@ -210,85 +210,85 @@ while(<$inf>) {
|
||||
|
||||
# Inside a verbatim block, handle @var specially.
|
||||
if ($shift ne "") {
|
||||
s/\@var\{([^\}]*)\}/<$1>/g;
|
||||
s/\@var\{([^\}]*)\}/<$1>/g;
|
||||
}
|
||||
|
||||
# POD doesn't interpret E<> inside a verbatim block.
|
||||
if ($shift eq "") {
|
||||
s/</</g;
|
||||
s/>/>/g;
|
||||
s/</</g;
|
||||
s/>/>/g;
|
||||
} else {
|
||||
s/</</g;
|
||||
s/>/>/g;
|
||||
s/</</g;
|
||||
s/>/>/g;
|
||||
}
|
||||
|
||||
# Single line command handlers.
|
||||
|
||||
/^\@include\s+(.+)$/ and do {
|
||||
push @instack, $inf;
|
||||
$inf = gensym();
|
||||
push @instack, $inf;
|
||||
$inf = gensym();
|
||||
|
||||
# Try cwd and $ibase.
|
||||
open($inf, "<" . $1)
|
||||
or open($inf, "<" . $ibase . "/" . $1)
|
||||
or die "cannot open $1 or $ibase/$1: $!\n";
|
||||
next;
|
||||
# Try cwd and $ibase.
|
||||
open($inf, "<" . $1)
|
||||
or open($inf, "<" . $ibase . "/" . $1)
|
||||
or die "cannot open $1 or $ibase/$1: $!\n";
|
||||
next;
|
||||
};
|
||||
|
||||
/^\@(?:section|unnumbered|unnumberedsec|center)\s+(.+)$/
|
||||
and $_ = "\n=head2 $1\n";
|
||||
and $_ = "\n=head2 $1\n";
|
||||
/^\@subsection\s+(.+)$/
|
||||
and $_ = "\n=head3 $1\n";
|
||||
and $_ = "\n=head3 $1\n";
|
||||
|
||||
# Block command handlers:
|
||||
/^\@itemize\s+(\@[a-z]+|\*|-)/ and do {
|
||||
push @endwstack, $endw;
|
||||
push @icstack, $ic;
|
||||
$ic = $1;
|
||||
$_ = "\n=over 4\n";
|
||||
$endw = "itemize";
|
||||
push @endwstack, $endw;
|
||||
push @icstack, $ic;
|
||||
$ic = $1;
|
||||
$_ = "\n=over 4\n";
|
||||
$endw = "itemize";
|
||||
};
|
||||
|
||||
/^\@enumerate(?:\s+([a-zA-Z0-9]+))?/ and do {
|
||||
push @endwstack, $endw;
|
||||
push @icstack, $ic;
|
||||
if (defined $1) {
|
||||
$ic = $1 . ".";
|
||||
} else {
|
||||
$ic = "1.";
|
||||
}
|
||||
$_ = "\n=over 4\n";
|
||||
$endw = "enumerate";
|
||||
push @endwstack, $endw;
|
||||
push @icstack, $ic;
|
||||
if (defined $1) {
|
||||
$ic = $1 . ".";
|
||||
} else {
|
||||
$ic = "1.";
|
||||
}
|
||||
$_ = "\n=over 4\n";
|
||||
$endw = "enumerate";
|
||||
};
|
||||
|
||||
/^\@([fv]?table)\s+(\@[a-z]+)/ and do {
|
||||
push @endwstack, $endw;
|
||||
push @icstack, $ic;
|
||||
$endw = $1;
|
||||
$ic = $2;
|
||||
$ic =~ s/\@(?:samp|strong|key|gcctabopt|option|env)/B/;
|
||||
$ic =~ s/\@(?:code|kbd)/C/;
|
||||
$ic =~ s/\@(?:dfn|var|emph|cite|i)/I/;
|
||||
$ic =~ s/\@(?:file)/F/;
|
||||
$_ = "\n=over 4\n";
|
||||
push @endwstack, $endw;
|
||||
push @icstack, $ic;
|
||||
$endw = $1;
|
||||
$ic = $2;
|
||||
$ic =~ s/\@(?:samp|strong|key|gcctabopt|option|env)/B/;
|
||||
$ic =~ s/\@(?:code|kbd)/C/;
|
||||
$ic =~ s/\@(?:dfn|var|emph|cite|i)/I/;
|
||||
$ic =~ s/\@(?:file)/F/;
|
||||
$_ = "\n=over 4\n";
|
||||
};
|
||||
|
||||
/^\@((?:small)?example|display)/ and do {
|
||||
push @endwstack, $endw;
|
||||
$endw = $1;
|
||||
$shift = "\t";
|
||||
$_ = ""; # need a paragraph break
|
||||
push @endwstack, $endw;
|
||||
$endw = $1;
|
||||
$shift = "\t";
|
||||
$_ = ""; # need a paragraph break
|
||||
};
|
||||
|
||||
/^\@itemx?\s*(.+)?$/ and do {
|
||||
if (defined $1) {
|
||||
# Entity escapes prevent munging by the <> processing below.
|
||||
$_ = "\n=item $ic\<$1\>\n";
|
||||
} else {
|
||||
$_ = "\n=item $ic\n";
|
||||
$ic =~ y/A-Ya-y/B-Zb-z/;
|
||||
$ic =~ s/(\d+)/$1 + 1/eg;
|
||||
}
|
||||
if (defined $1) {
|
||||
# Entity escapes prevent munging by the <> processing below.
|
||||
$_ = "\n=item $ic\<$1\>\n";
|
||||
} else {
|
||||
$_ = "\n=item $ic\n";
|
||||
$ic =~ y/A-Ya-y/B-Zb-z/;
|
||||
$ic =~ s/(\d+)/$1 + 1/eg;
|
||||
}
|
||||
};
|
||||
|
||||
$section .= $shift.$_."\n";
|
||||
@ -304,13 +304,13 @@ $sects{NAME} = "$fn \- $tl\n";
|
||||
$sects{FOOTNOTES} .= "=back\n" if exists $sects{FOOTNOTES};
|
||||
|
||||
for $sect (qw(NAME SYNOPSIS DESCRIPTION OPTIONS EXAMPLES ENVIRONMENT FILES
|
||||
BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
|
||||
BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
|
||||
if(exists $sects{$sect}) {
|
||||
$head = $sect;
|
||||
$head =~ s/SEEALSO/SEE ALSO/;
|
||||
print "=head1 $head\n\n";
|
||||
print scalar unmunge ($sects{$sect});
|
||||
print "\n";
|
||||
$head = $sect;
|
||||
$head =~ s/SEEALSO/SEE ALSO/;
|
||||
print "=head1 $head\n\n";
|
||||
print scalar unmunge ($sects{$sect});
|
||||
print "\n";
|
||||
}
|
||||
}
|
||||
|
||||
@ -325,13 +325,13 @@ sub postprocess
|
||||
|
||||
# @value{foo} is replaced by whatever 'foo' is defined as.
|
||||
while (m/(\@value\{([a-zA-Z0-9_-]+)\})/g) {
|
||||
if (! exists $defs{$2}) {
|
||||
print STDERR "Option $2 not defined\n";
|
||||
s/\Q$1\E//;
|
||||
} else {
|
||||
$value = $defs{$2};
|
||||
s/\Q$1\E/$value/;
|
||||
}
|
||||
if (! exists $defs{$2}) {
|
||||
print STDERR "Option $2 not defined\n";
|
||||
s/\Q$1\E//;
|
||||
} else {
|
||||
$value = $defs{$2};
|
||||
s/\Q$1\E/$value/;
|
||||
}
|
||||
}
|
||||
|
||||
# Formatting commands.
|
||||
@ -381,9 +381,9 @@ sub postprocess
|
||||
# processing because otherwise the regexp will choke on formatting
|
||||
# inside @footnote.
|
||||
while (/\@footnote/g) {
|
||||
s/\@footnote\{([^\}]+)\}/[$fnno]/;
|
||||
add_footnote($1, $fnno);
|
||||
$fnno++;
|
||||
s/\@footnote\{([^\}]+)\}/[$fnno]/;
|
||||
add_footnote($1, $fnno);
|
||||
$fnno++;
|
||||
}
|
||||
|
||||
return $_;
|
||||
@ -406,7 +406,7 @@ sub unmunge
|
||||
sub add_footnote
|
||||
{
|
||||
unless (exists $sects{FOOTNOTES}) {
|
||||
$sects{FOOTNOTES} = "\n=over 4\n\n";
|
||||
$sects{FOOTNOTES} = "\n=over 4\n\n";
|
||||
}
|
||||
|
||||
$sects{FOOTNOTES} .= "=item $fnno.\n\n"; $fnno++;
|
||||
@ -419,9 +419,9 @@ sub add_footnote
|
||||
my $genseq = 0;
|
||||
sub gensym
|
||||
{
|
||||
my $name = "GEN" . $genseq++;
|
||||
my $ref = \*{$name};
|
||||
delete $::{$name};
|
||||
return $ref;
|
||||
my $name = "GEN" . $genseq++;
|
||||
my $ref = \*{$name};
|
||||
delete $::{$name};
|
||||
return $ref;
|
||||
}
|
||||
}
|
||||
|
72
ffmpeg.c
72
ffmpeg.c
@ -579,7 +579,7 @@ static void do_audio_out(AVFormatContext *s,
|
||||
break;
|
||||
}
|
||||
ret = avcodec_encode_audio(enc, audio_out, size_out,
|
||||
(short *)buftmp);
|
||||
(short *)buftmp);
|
||||
audio_size += ret;
|
||||
pkt.stream_index= ost->index;
|
||||
pkt.data= audio_out;
|
||||
@ -821,10 +821,10 @@ static void do_video_out(AVFormatContext *s,
|
||||
padcolor);
|
||||
}
|
||||
|
||||
if (enc->pix_fmt != PIX_FMT_YUV420P) {
|
||||
if (enc->pix_fmt != PIX_FMT_YUV420P) {
|
||||
int size;
|
||||
|
||||
av_free(buf);
|
||||
av_free(buf);
|
||||
/* create temporary picture */
|
||||
size = avpicture_get_size(enc->pix_fmt, enc->width, enc->height);
|
||||
buf = av_malloc(size);
|
||||
@ -842,7 +842,7 @@ static void do_video_out(AVFormatContext *s,
|
||||
|
||||
goto the_end;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (ost->video_crop) {
|
||||
picture_crop_temp.data[0] = formatted_picture->data[0] +
|
||||
(ost->topBand * formatted_picture->linesize[0]) + ost->leftBand;
|
||||
@ -921,7 +921,7 @@ static void do_video_out(AVFormatContext *s,
|
||||
avoid any copies. We support temorarily the older
|
||||
method. */
|
||||
AVFrame* old_frame = enc->coded_frame;
|
||||
enc->coded_frame = dec->coded_frame; //FIXME/XXX remove this hack
|
||||
enc->coded_frame = dec->coded_frame; //FIXME/XXX remove this hack
|
||||
pkt.data= (uint8_t *)final_picture;
|
||||
pkt.size= sizeof(AVPicture);
|
||||
if(dec->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE)
|
||||
@ -930,7 +930,7 @@ static void do_video_out(AVFormatContext *s,
|
||||
pkt.flags |= PKT_FLAG_KEY;
|
||||
|
||||
av_interleaved_write_frame(s, &pkt);
|
||||
enc->coded_frame = old_frame;
|
||||
enc->coded_frame = old_frame;
|
||||
} else {
|
||||
AVFrame big_picture;
|
||||
|
||||
@ -1044,8 +1044,8 @@ static void do_video_stats(AVFormatContext *os, AVOutputStream *ost,
|
||||
}
|
||||
|
||||
static void print_report(AVFormatContext **output_files,
|
||||
AVOutputStream **ost_table, int nb_ostreams,
|
||||
int is_last_report)
|
||||
AVOutputStream **ost_table, int nb_ostreams,
|
||||
int is_last_report)
|
||||
{
|
||||
char buf[1024];
|
||||
AVOutputStream *ost;
|
||||
@ -1138,9 +1138,9 @@ static void print_report(AVFormatContext **output_files,
|
||||
"size=%8.0fkB time=%0.1f bitrate=%6.1fkbits/s",
|
||||
(double)total_size / 1024, ti1, bitrate);
|
||||
|
||||
if (verbose > 1)
|
||||
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " dup=%d drop=%d",
|
||||
nb_frames_dup, nb_frames_drop);
|
||||
if (verbose > 1)
|
||||
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " dup=%d drop=%d",
|
||||
nb_frames_dup, nb_frames_drop);
|
||||
|
||||
if (verbose >= 0)
|
||||
fprintf(stderr, "%s \r", buf);
|
||||
@ -1323,7 +1323,7 @@ static int output_packet(AVInputStream *ist, int ist_index,
|
||||
}
|
||||
#endif
|
||||
/* if output time reached then transcode raw format,
|
||||
encode packets and output them */
|
||||
encode packets and output them */
|
||||
if (start_time == 0 || ist->pts >= start_time)
|
||||
for(i=0;i<nb_ostreams;i++) {
|
||||
int frame_size;
|
||||
@ -1898,7 +1898,7 @@ static int av_encode(AVFormatContext **output_files,
|
||||
/* init pts */
|
||||
for(i=0;i<nb_istreams;i++) {
|
||||
ist = ist_table[i];
|
||||
is = input_files[ist->file_index];
|
||||
is = input_files[ist->file_index];
|
||||
ist->pts = 0;
|
||||
ist->next_pts = av_rescale_q(ist->st->start_time, ist->st->time_base, AV_TIME_BASE_Q);
|
||||
if(ist->st->start_time == AV_NOPTS_VALUE)
|
||||
@ -2273,7 +2273,7 @@ static void opt_frame_rate(const char *arg)
|
||||
{
|
||||
if (parse_frame_rate(&frame_rate, &frame_rate_base, arg) < 0) {
|
||||
fprintf(stderr, "Incorrect frame rate\n");
|
||||
exit(1);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2289,7 +2289,7 @@ static void opt_frame_crop_top(const char *arg)
|
||||
exit(1);
|
||||
}
|
||||
if ((frame_topBand) >= frame_height){
|
||||
fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
|
||||
fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
|
||||
exit(1);
|
||||
}
|
||||
frame_height -= frame_topBand;
|
||||
@ -2307,7 +2307,7 @@ static void opt_frame_crop_bottom(const char *arg)
|
||||
exit(1);
|
||||
}
|
||||
if ((frame_bottomBand) >= frame_height){
|
||||
fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
|
||||
fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
|
||||
exit(1);
|
||||
}
|
||||
frame_height -= frame_bottomBand;
|
||||
@ -2325,7 +2325,7 @@ static void opt_frame_crop_left(const char *arg)
|
||||
exit(1);
|
||||
}
|
||||
if ((frame_leftBand) >= frame_width){
|
||||
fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
|
||||
fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
|
||||
exit(1);
|
||||
}
|
||||
frame_width -= frame_leftBand;
|
||||
@ -2343,7 +2343,7 @@ static void opt_frame_crop_right(const char *arg)
|
||||
exit(1);
|
||||
}
|
||||
if ((frame_rightBand) >= frame_width){
|
||||
fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
|
||||
fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
|
||||
exit(1);
|
||||
}
|
||||
frame_width -= frame_rightBand;
|
||||
@ -2364,7 +2364,7 @@ static void opt_frame_size(const char *arg)
|
||||
|
||||
#define SCALEBITS 10
|
||||
#define ONE_HALF (1 << (SCALEBITS - 1))
|
||||
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
|
||||
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
|
||||
|
||||
#define RGB_TO_Y(r, g, b) \
|
||||
((FIX(0.29900) * (r) + FIX(0.58700) * (g) + \
|
||||
@ -2462,16 +2462,16 @@ static void opt_frame_aspect_ratio(const char *arg)
|
||||
p = strchr(arg, ':');
|
||||
if (p) {
|
||||
x = strtol(arg, (char **)&arg, 10);
|
||||
if (arg == p)
|
||||
y = strtol(arg+1, (char **)&arg, 10);
|
||||
if (x > 0 && y > 0)
|
||||
ar = (double)x / (double)y;
|
||||
if (arg == p)
|
||||
y = strtol(arg+1, (char **)&arg, 10);
|
||||
if (x > 0 && y > 0)
|
||||
ar = (double)x / (double)y;
|
||||
} else
|
||||
ar = strtod(arg, (char **)&arg);
|
||||
|
||||
if (!ar) {
|
||||
fprintf(stderr, "Incorrect aspect ratio specification.\n");
|
||||
exit(1);
|
||||
exit(1);
|
||||
}
|
||||
frame_aspect_ratio = ar;
|
||||
}
|
||||
@ -2957,8 +2957,8 @@ static void opt_input_file(const char *filename)
|
||||
}
|
||||
frame_height = enc->height;
|
||||
frame_width = enc->width;
|
||||
frame_aspect_ratio = av_q2d(enc->sample_aspect_ratio) * enc->width / enc->height;
|
||||
frame_pix_fmt = enc->pix_fmt;
|
||||
frame_aspect_ratio = av_q2d(enc->sample_aspect_ratio) * enc->width / enc->height;
|
||||
frame_pix_fmt = enc->pix_fmt;
|
||||
rfps = ic->streams[i]->r_frame_rate.num;
|
||||
rfps_base = ic->streams[i]->r_frame_rate.den;
|
||||
enc->workaround_bugs = workaround_bugs;
|
||||
@ -3454,7 +3454,7 @@ static void opt_output_file(const char *filename)
|
||||
|
||||
oc->timestamp = rec_timestamp;
|
||||
|
||||
if (str_title)
|
||||
if (str_title)
|
||||
pstrcpy(oc->title, sizeof(oc->title), str_title);
|
||||
if (str_author)
|
||||
pstrcpy(oc->author, sizeof(oc->author), str_author);
|
||||
@ -3490,11 +3490,11 @@ static void opt_output_file(const char *filename)
|
||||
fprintf(stderr, "Not overwriting - exiting\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
else {
|
||||
}
|
||||
else {
|
||||
fprintf(stderr,"File '%s' already exists. Exiting.\n", filename);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -3579,14 +3579,14 @@ static void prepare_grab(void)
|
||||
fmt1 = av_find_input_format(video_grab_format);
|
||||
vp->device = video_device;
|
||||
vp->channel = video_channel;
|
||||
vp->standard = video_standard;
|
||||
vp->standard = video_standard;
|
||||
if (av_open_input_file(&ic, "", fmt1, 0, vp) < 0) {
|
||||
fprintf(stderr, "Could not find video grab device\n");
|
||||
exit(1);
|
||||
}
|
||||
/* If not enough info to get the stream parameters, we decode the
|
||||
first frames to get it. */
|
||||
if ((ic->ctx_flags & AVFMTCTX_NOHEADER) && av_find_stream_info(ic) < 0) {
|
||||
if ((ic->ctx_flags & AVFMTCTX_NOHEADER) && av_find_stream_info(ic) < 0) {
|
||||
fprintf(stderr, "Could not find video grab parameters\n");
|
||||
exit(1);
|
||||
}
|
||||
@ -4276,11 +4276,11 @@ int main(int argc, char **argv)
|
||||
for(i=0;i<nb_output_files;i++) {
|
||||
/* maybe av_close_output_file ??? */
|
||||
AVFormatContext *s = output_files[i];
|
||||
int j;
|
||||
int j;
|
||||
if (!(s->oformat->flags & AVFMT_NOFILE))
|
||||
url_fclose(&s->pb);
|
||||
for(j=0;j<s->nb_streams;j++)
|
||||
av_free(s->streams[j]);
|
||||
url_fclose(&s->pb);
|
||||
for(j=0;j<s->nb_streams;j++)
|
||||
av_free(s->streams[j]);
|
||||
av_free(s);
|
||||
}
|
||||
for(i=0;i<nb_input_files;i++)
|
||||
|
46
ffplay.c
46
ffplay.c
@ -1649,7 +1649,7 @@ static int stream_component_open(VideoState *is, int stream_index)
|
||||
|
||||
memset(&is->audio_pkt, 0, sizeof(is->audio_pkt));
|
||||
packet_queue_init(&is->audioq);
|
||||
SDL_PauseAudio(0);
|
||||
SDL_PauseAudio(0);
|
||||
break;
|
||||
case CODEC_TYPE_VIDEO:
|
||||
is->video_stream = stream_index;
|
||||
@ -1937,11 +1937,11 @@ static int decode_thread(void *arg)
|
||||
}
|
||||
ret = av_read_frame(ic, pkt);
|
||||
if (ret < 0) {
|
||||
if (url_ferror(&ic->pb) == 0) {
|
||||
if (url_ferror(&ic->pb) == 0) {
|
||||
SDL_Delay(100); /* wait for user event */
|
||||
continue;
|
||||
} else
|
||||
break;
|
||||
continue;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
if (pkt->stream_index == is->audio_stream) {
|
||||
packet_queue_put(&is->audioq, pkt);
|
||||
@ -2224,23 +2224,23 @@ void event_loop(void)
|
||||
}
|
||||
break;
|
||||
case SDL_MOUSEBUTTONDOWN:
|
||||
if (cur_stream) {
|
||||
int ns, hh, mm, ss;
|
||||
int tns, thh, tmm, tss;
|
||||
tns = cur_stream->ic->duration/1000000LL;
|
||||
thh = tns/3600;
|
||||
tmm = (tns%3600)/60;
|
||||
tss = (tns%60);
|
||||
frac = (double)event.button.x/(double)cur_stream->width;
|
||||
ns = frac*tns;
|
||||
hh = ns/3600;
|
||||
mm = (ns%3600)/60;
|
||||
ss = (ns%60);
|
||||
fprintf(stderr, "Seek to %2.0f%% (%2d:%02d:%02d) of total duration (%2d:%02d:%02d) \n", frac*100,
|
||||
hh, mm, ss, thh, tmm, tss);
|
||||
stream_seek(cur_stream, (int64_t)(cur_stream->ic->start_time+frac*cur_stream->ic->duration), 0);
|
||||
}
|
||||
break;
|
||||
if (cur_stream) {
|
||||
int ns, hh, mm, ss;
|
||||
int tns, thh, tmm, tss;
|
||||
tns = cur_stream->ic->duration/1000000LL;
|
||||
thh = tns/3600;
|
||||
tmm = (tns%3600)/60;
|
||||
tss = (tns%60);
|
||||
frac = (double)event.button.x/(double)cur_stream->width;
|
||||
ns = frac*tns;
|
||||
hh = ns/3600;
|
||||
mm = (ns%3600)/60;
|
||||
ss = (ns%60);
|
||||
fprintf(stderr, "Seek to %2.0f%% (%2d:%02d:%02d) of total duration (%2d:%02d:%02d) \n", frac*100,
|
||||
hh, mm, ss, thh, tmm, tss);
|
||||
stream_seek(cur_stream, (int64_t)(cur_stream->ic->start_time+frac*cur_stream->ic->duration), 0);
|
||||
}
|
||||
break;
|
||||
case SDL_VIDEORESIZE:
|
||||
if (cur_stream) {
|
||||
screen = SDL_SetVideoMode(event.resize.w, event.resize.h, 0,
|
||||
@ -2452,7 +2452,7 @@ int main(int argc, char **argv)
|
||||
if (dpy) {
|
||||
fs_screen_width = DisplayWidth(dpy, DefaultScreen(dpy));
|
||||
fs_screen_height = DisplayHeight(dpy, DefaultScreen(dpy));
|
||||
XCloseDisplay(dpy);
|
||||
XCloseDisplay(dpy);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
42
ffserver.c
42
ffserver.c
@ -1204,7 +1204,7 @@ static int http_parse_request(HTTPContext *c)
|
||||
pstrcpy(c->protocol, sizeof(c->protocol), protocol);
|
||||
|
||||
if (ffserver_debug)
|
||||
http_log("New connection: %s %s\n", cmd, url);
|
||||
http_log("New connection: %s %s\n", cmd, url);
|
||||
|
||||
/* find the filename and the optional info string in the request */
|
||||
p = url;
|
||||
@ -2001,7 +2001,7 @@ static int http_prepare_data(HTTPContext *c)
|
||||
c->fmt_ctx.nb_streams = c->stream->nb_streams;
|
||||
for(i=0;i<c->fmt_ctx.nb_streams;i++) {
|
||||
AVStream *st;
|
||||
AVStream *src;
|
||||
AVStream *src;
|
||||
st = av_mallocz(sizeof(AVStream));
|
||||
st->codec= avcodec_alloc_context();
|
||||
c->fmt_ctx.streams[i] = st;
|
||||
@ -2012,8 +2012,8 @@ static int http_prepare_data(HTTPContext *c)
|
||||
else
|
||||
src = c->stream->feed->streams[c->stream->feed_streams[i]];
|
||||
|
||||
*st = *src;
|
||||
st->priv_data = 0;
|
||||
*st = *src;
|
||||
st->priv_data = 0;
|
||||
st->codec->frame_number = 0; /* XXX: should be done in
|
||||
AVStream, not in codec */
|
||||
/* I'm pretty sure that this is not correct...
|
||||
@ -2452,8 +2452,8 @@ static int http_receive_data(HTTPContext *c)
|
||||
s.priv_data = av_mallocz(fmt_in->priv_data_size);
|
||||
if (!s.priv_data)
|
||||
goto fail;
|
||||
} else
|
||||
s.priv_data = NULL;
|
||||
} else
|
||||
s.priv_data = NULL;
|
||||
|
||||
if (fmt_in->read_header(&s, 0) < 0) {
|
||||
av_freep(&s.priv_data);
|
||||
@ -3868,20 +3868,20 @@ static int parse_ffconfig(const char *filename)
|
||||
|
||||
feed->child_argv[i] = av_malloc(30 + strlen(feed->filename));
|
||||
|
||||
snprintf(feed->child_argv[i], 30+strlen(feed->filename),
|
||||
"http://%s:%d/%s",
|
||||
(my_http_addr.sin_addr.s_addr == INADDR_ANY) ? "127.0.0.1" :
|
||||
inet_ntoa(my_http_addr.sin_addr),
|
||||
ntohs(my_http_addr.sin_port), feed->filename);
|
||||
snprintf(feed->child_argv[i], 30+strlen(feed->filename),
|
||||
"http://%s:%d/%s",
|
||||
(my_http_addr.sin_addr.s_addr == INADDR_ANY) ? "127.0.0.1" :
|
||||
inet_ntoa(my_http_addr.sin_addr),
|
||||
ntohs(my_http_addr.sin_port), feed->filename);
|
||||
|
||||
if (ffserver_debug)
|
||||
{
|
||||
int j;
|
||||
fprintf(stdout, "Launch commandline: ");
|
||||
for (j = 0; j <= i; j++)
|
||||
fprintf(stdout, "%s ", feed->child_argv[j]);
|
||||
fprintf(stdout, "\n");
|
||||
}
|
||||
if (ffserver_debug)
|
||||
{
|
||||
int j;
|
||||
fprintf(stdout, "Launch commandline: ");
|
||||
for (j = 0; j <= i; j++)
|
||||
fprintf(stdout, "%s ", feed->child_argv[j]);
|
||||
fprintf(stdout, "\n");
|
||||
}
|
||||
}
|
||||
} else if (!strcasecmp(cmd, "ReadOnlyFile")) {
|
||||
if (feed) {
|
||||
@ -4074,8 +4074,8 @@ static int parse_ffconfig(const char *filename)
|
||||
if (stream) {
|
||||
audio_enc.sample_rate = atoi(arg);
|
||||
}
|
||||
} else if (!strcasecmp(cmd, "AudioQuality")) {
|
||||
get_arg(arg, sizeof(arg), &p);
|
||||
} else if (!strcasecmp(cmd, "AudioQuality")) {
|
||||
get_arg(arg, sizeof(arg), &p);
|
||||
if (stream) {
|
||||
// audio_enc.quality = atof(arg) * 1000;
|
||||
}
|
||||
|
@ -44,11 +44,11 @@ const enum PixelFormat pixfmt_rgb24[] = {PIX_FMT_BGR24, PIX_FMT_RGBA32, -1};
|
||||
*/
|
||||
typedef struct EightBpsContext {
|
||||
|
||||
AVCodecContext *avctx;
|
||||
AVFrame pic;
|
||||
AVCodecContext *avctx;
|
||||
AVFrame pic;
|
||||
|
||||
unsigned char planes;
|
||||
unsigned char planemap[4];
|
||||
unsigned char planes;
|
||||
unsigned char planemap[4];
|
||||
} EightBpsContext;
|
||||
|
||||
|
||||
@ -59,87 +59,87 @@ typedef struct EightBpsContext {
|
||||
*/
|
||||
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
|
||||
{
|
||||
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
|
||||
unsigned char *encoded = (unsigned char *)buf;
|
||||
unsigned char *pixptr, *pixptr_end;
|
||||
unsigned int height = avctx->height; // Real image height
|
||||
unsigned int dlen, p, row;
|
||||
unsigned char *lp, *dp;
|
||||
unsigned char count;
|
||||
unsigned int px_inc;
|
||||
unsigned int planes = c->planes;
|
||||
unsigned char *planemap = c->planemap;
|
||||
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
|
||||
unsigned char *encoded = (unsigned char *)buf;
|
||||
unsigned char *pixptr, *pixptr_end;
|
||||
unsigned int height = avctx->height; // Real image height
|
||||
unsigned int dlen, p, row;
|
||||
unsigned char *lp, *dp;
|
||||
unsigned char count;
|
||||
unsigned int px_inc;
|
||||
unsigned int planes = c->planes;
|
||||
unsigned char *planemap = c->planemap;
|
||||
|
||||
if(c->pic.data[0])
|
||||
avctx->release_buffer(avctx, &c->pic);
|
||||
if(c->pic.data[0])
|
||||
avctx->release_buffer(avctx, &c->pic);
|
||||
|
||||
c->pic.reference = 0;
|
||||
c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
|
||||
if(avctx->get_buffer(avctx, &c->pic) < 0){
|
||||
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
|
||||
return -1;
|
||||
}
|
||||
c->pic.reference = 0;
|
||||
c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
|
||||
if(avctx->get_buffer(avctx, &c->pic) < 0){
|
||||
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Set data pointer after line lengths */
|
||||
dp = encoded + planes * (height << 1);
|
||||
/* Set data pointer after line lengths */
|
||||
dp = encoded + planes * (height << 1);
|
||||
|
||||
/* Ignore alpha plane, don't know what to do with it */
|
||||
if (planes == 4)
|
||||
planes--;
|
||||
/* Ignore alpha plane, don't know what to do with it */
|
||||
if (planes == 4)
|
||||
planes--;
|
||||
|
||||
px_inc = planes + (avctx->pix_fmt == PIX_FMT_RGBA32);
|
||||
px_inc = planes + (avctx->pix_fmt == PIX_FMT_RGBA32);
|
||||
|
||||
for (p = 0; p < planes; p++) {
|
||||
/* Lines length pointer for this plane */
|
||||
lp = encoded + p * (height << 1);
|
||||
for (p = 0; p < planes; p++) {
|
||||
/* Lines length pointer for this plane */
|
||||
lp = encoded + p * (height << 1);
|
||||
|
||||
/* Decode a plane */
|
||||
for(row = 0; row < height; row++) {
|
||||
pixptr = c->pic.data[0] + row * c->pic.linesize[0] + planemap[p];
|
||||
pixptr_end = pixptr + c->pic.linesize[0];
|
||||
dlen = be2me_16(*(unsigned short *)(lp+row*2));
|
||||
/* Decode a row of this plane */
|
||||
while(dlen > 0) {
|
||||
if(dp + 1 >= buf+buf_size) return -1;
|
||||
if ((count = *dp++) <= 127) {
|
||||
count++;
|
||||
dlen -= count + 1;
|
||||
if (pixptr + count * px_inc > pixptr_end)
|
||||
break;
|
||||
if(dp + count > buf+buf_size) return -1;
|
||||
while(count--) {
|
||||
*pixptr = *dp++;
|
||||
pixptr += px_inc;
|
||||
}
|
||||
} else {
|
||||
count = 257 - count;
|
||||
if (pixptr + count * px_inc > pixptr_end)
|
||||
break;
|
||||
while(count--) {
|
||||
*pixptr = *dp;
|
||||
pixptr += px_inc;
|
||||
}
|
||||
dp++;
|
||||
dlen -= 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Decode a plane */
|
||||
for(row = 0; row < height; row++) {
|
||||
pixptr = c->pic.data[0] + row * c->pic.linesize[0] + planemap[p];
|
||||
pixptr_end = pixptr + c->pic.linesize[0];
|
||||
dlen = be2me_16(*(unsigned short *)(lp+row*2));
|
||||
/* Decode a row of this plane */
|
||||
while(dlen > 0) {
|
||||
if(dp + 1 >= buf+buf_size) return -1;
|
||||
if ((count = *dp++) <= 127) {
|
||||
count++;
|
||||
dlen -= count + 1;
|
||||
if (pixptr + count * px_inc > pixptr_end)
|
||||
break;
|
||||
if(dp + count > buf+buf_size) return -1;
|
||||
while(count--) {
|
||||
*pixptr = *dp++;
|
||||
pixptr += px_inc;
|
||||
}
|
||||
} else {
|
||||
count = 257 - count;
|
||||
if (pixptr + count * px_inc > pixptr_end)
|
||||
break;
|
||||
while(count--) {
|
||||
*pixptr = *dp;
|
||||
pixptr += px_inc;
|
||||
}
|
||||
dp++;
|
||||
dlen -= 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (avctx->palctrl) {
|
||||
memcpy (c->pic.data[1], avctx->palctrl->palette, AVPALETTE_SIZE);
|
||||
if (avctx->palctrl->palette_changed) {
|
||||
c->pic.palette_has_changed = 1;
|
||||
avctx->palctrl->palette_changed = 0;
|
||||
} else
|
||||
c->pic.palette_has_changed = 0;
|
||||
}
|
||||
if (avctx->palctrl) {
|
||||
memcpy (c->pic.data[1], avctx->palctrl->palette, AVPALETTE_SIZE);
|
||||
if (avctx->palctrl->palette_changed) {
|
||||
c->pic.palette_has_changed = 1;
|
||||
avctx->palctrl->palette_changed = 0;
|
||||
} else
|
||||
c->pic.palette_has_changed = 0;
|
||||
}
|
||||
|
||||
*data_size = sizeof(AVFrame);
|
||||
*(AVFrame*)data = c->pic;
|
||||
*data_size = sizeof(AVFrame);
|
||||
*(AVFrame*)data = c->pic;
|
||||
|
||||
/* always report that the buffer was completely consumed */
|
||||
return buf_size;
|
||||
/* always report that the buffer was completely consumed */
|
||||
return buf_size;
|
||||
}
|
||||
|
||||
|
||||
@ -150,53 +150,53 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
|
||||
*/
|
||||
static int decode_init(AVCodecContext *avctx)
|
||||
{
|
||||
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
|
||||
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
|
||||
|
||||
c->avctx = avctx;
|
||||
avctx->has_b_frames = 0;
|
||||
c->avctx = avctx;
|
||||
avctx->has_b_frames = 0;
|
||||
|
||||
c->pic.data[0] = NULL;
|
||||
c->pic.data[0] = NULL;
|
||||
|
||||
if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
switch (avctx->bits_per_sample) {
|
||||
case 8:
|
||||
avctx->pix_fmt = PIX_FMT_PAL8;
|
||||
c->planes = 1;
|
||||
c->planemap[0] = 0; // 1st plane is palette indexes
|
||||
if (avctx->palctrl == NULL) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Error: PAL8 format but no palette from demuxer.\n");
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
case 24:
|
||||
avctx->pix_fmt = avctx->get_format(avctx, pixfmt_rgb24);
|
||||
c->planes = 3;
|
||||
c->planemap[0] = 2; // 1st plane is red
|
||||
c->planemap[1] = 1; // 2nd plane is green
|
||||
c->planemap[2] = 0; // 3rd plane is blue
|
||||
break;
|
||||
case 32:
|
||||
avctx->pix_fmt = PIX_FMT_RGBA32;
|
||||
c->planes = 4;
|
||||
switch (avctx->bits_per_sample) {
|
||||
case 8:
|
||||
avctx->pix_fmt = PIX_FMT_PAL8;
|
||||
c->planes = 1;
|
||||
c->planemap[0] = 0; // 1st plane is palette indexes
|
||||
if (avctx->palctrl == NULL) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Error: PAL8 format but no palette from demuxer.\n");
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
case 24:
|
||||
avctx->pix_fmt = avctx->get_format(avctx, pixfmt_rgb24);
|
||||
c->planes = 3;
|
||||
c->planemap[0] = 2; // 1st plane is red
|
||||
c->planemap[1] = 1; // 2nd plane is green
|
||||
c->planemap[2] = 0; // 3rd plane is blue
|
||||
break;
|
||||
case 32:
|
||||
avctx->pix_fmt = PIX_FMT_RGBA32;
|
||||
c->planes = 4;
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
c->planemap[0] = 1; // 1st plane is red
|
||||
c->planemap[1] = 2; // 2nd plane is green
|
||||
c->planemap[2] = 3; // 3rd plane is blue
|
||||
c->planemap[3] = 0; // 4th plane is alpha???
|
||||
c->planemap[0] = 1; // 1st plane is red
|
||||
c->planemap[1] = 2; // 2nd plane is green
|
||||
c->planemap[2] = 3; // 3rd plane is blue
|
||||
c->planemap[3] = 0; // 4th plane is alpha???
|
||||
#else
|
||||
c->planemap[0] = 2; // 1st plane is red
|
||||
c->planemap[1] = 1; // 2nd plane is green
|
||||
c->planemap[2] = 0; // 3rd plane is blue
|
||||
c->planemap[3] = 3; // 4th plane is alpha???
|
||||
c->planemap[0] = 2; // 1st plane is red
|
||||
c->planemap[1] = 1; // 2nd plane is green
|
||||
c->planemap[2] = 0; // 3rd plane is blue
|
||||
c->planemap[3] = 3; // 4th plane is alpha???
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
av_log(avctx, AV_LOG_ERROR, "Error: Unsupported color depth: %u.\n", avctx->bits_per_sample);
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
av_log(avctx, AV_LOG_ERROR, "Error: Unsupported color depth: %u.\n", avctx->bits_per_sample);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -211,24 +211,24 @@ static int decode_init(AVCodecContext *avctx)
|
||||
*/
|
||||
static int decode_end(AVCodecContext *avctx)
|
||||
{
|
||||
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
|
||||
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
|
||||
|
||||
if (c->pic.data[0])
|
||||
avctx->release_buffer(avctx, &c->pic);
|
||||
if (c->pic.data[0])
|
||||
avctx->release_buffer(avctx, &c->pic);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
AVCodec eightbps_decoder = {
|
||||
"8bps",
|
||||
CODEC_TYPE_VIDEO,
|
||||
CODEC_ID_8BPS,
|
||||
sizeof(EightBpsContext),
|
||||
decode_init,
|
||||
NULL,
|
||||
decode_end,
|
||||
decode_frame,
|
||||
CODEC_CAP_DR1,
|
||||
"8bps",
|
||||
CODEC_TYPE_VIDEO,
|
||||
CODEC_ID_8BPS,
|
||||
sizeof(EightBpsContext),
|
||||
decode_init,
|
||||
NULL,
|
||||
decode_end,
|
||||
decode_frame,
|
||||
CODEC_CAP_DR1,
|
||||
};
|
||||
|
@ -58,11 +58,11 @@ typedef struct AC3DecodeState {
|
||||
a52_state_t* (*a52_init)(uint32_t mm_accel);
|
||||
sample_t* (*a52_samples)(a52_state_t * state);
|
||||
int (*a52_syncinfo)(uint8_t * buf, int * flags,
|
||||
int * sample_rate, int * bit_rate);
|
||||
int * sample_rate, int * bit_rate);
|
||||
int (*a52_frame)(a52_state_t * state, uint8_t * buf, int * flags,
|
||||
sample_t * level, sample_t bias);
|
||||
sample_t * level, sample_t bias);
|
||||
void (*a52_dynrng)(a52_state_t * state,
|
||||
sample_t (* call) (sample_t, void *), void * data);
|
||||
sample_t (* call) (sample_t, void *), void * data);
|
||||
int (*a52_block)(a52_state_t * state);
|
||||
void (*a52_free)(a52_state_t * state);
|
||||
|
||||
@ -105,7 +105,7 @@ static int a52_decode_init(AVCodecContext *avctx)
|
||||
if (!s->a52_init || !s->a52_samples || !s->a52_syncinfo
|
||||
|| !s->a52_frame || !s->a52_block || !s->a52_free)
|
||||
{
|
||||
dlclose(s->handle);
|
||||
dlclose(s->handle);
|
||||
return -1;
|
||||
}
|
||||
#else
|
||||
@ -130,22 +130,22 @@ static int a52_decode_init(AVCodecContext *avctx)
|
||||
static inline int blah (int32_t i)
|
||||
{
|
||||
if (i > 0x43c07fff)
|
||||
return 32767;
|
||||
return 32767;
|
||||
else if (i < 0x43bf8000)
|
||||
return -32768;
|
||||
return -32768;
|
||||
return i - 0x43c00000;
|
||||
}
|
||||
|
||||
static inline void float_to_int (float * _f, int16_t * s16, int nchannels)
|
||||
{
|
||||
int i, j, c;
|
||||
int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format
|
||||
int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format
|
||||
|
||||
j = 0;
|
||||
nchannels *= 256;
|
||||
for (i = 0; i < 256; i++) {
|
||||
for (c = 0; c < nchannels; c += 256)
|
||||
s16[j++] = blah (f[i + c]);
|
||||
for (c = 0; c < nchannels; c += 256)
|
||||
s16[j++] = blah (f[i + c]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -164,7 +164,7 @@ static int a52_decode_frame(AVCodecContext *avctx,
|
||||
short *out_samples = data;
|
||||
float level;
|
||||
static const int ac3_channels[8] = {
|
||||
2, 1, 2, 3, 3, 4, 4, 5
|
||||
2, 1, 2, 3, 3, 4, 4, 5
|
||||
};
|
||||
|
||||
buf_ptr = buf;
|
||||
@ -186,20 +186,20 @@ static int a52_decode_frame(AVCodecContext *avctx,
|
||||
memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1);
|
||||
s->inbuf_ptr--;
|
||||
} else {
|
||||
s->frame_size = len;
|
||||
s->frame_size = len;
|
||||
/* update codec info */
|
||||
avctx->sample_rate = sample_rate;
|
||||
s->channels = ac3_channels[s->flags & 7];
|
||||
if (s->flags & A52_LFE)
|
||||
s->channels++;
|
||||
if (avctx->channels == 0)
|
||||
/* No specific number of channel requested */
|
||||
avctx->channels = s->channels;
|
||||
else if (s->channels < avctx->channels) {
|
||||
av_log(avctx, AV_LOG_ERROR, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len);
|
||||
avctx->channels = s->channels;
|
||||
}
|
||||
avctx->bit_rate = bit_rate;
|
||||
s->channels++;
|
||||
if (avctx->channels == 0)
|
||||
/* No specific number of channel requested */
|
||||
avctx->channels = s->channels;
|
||||
else if (s->channels < avctx->channels) {
|
||||
av_log(avctx, AV_LOG_ERROR, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len);
|
||||
avctx->channels = s->channels;
|
||||
}
|
||||
avctx->bit_rate = bit_rate;
|
||||
}
|
||||
}
|
||||
} else if (len < s->frame_size) {
|
||||
|
@ -54,23 +54,23 @@ stream_samples_t samples;
|
||||
static inline int blah (int32_t i)
|
||||
{
|
||||
if (i > 0x43c07fff)
|
||||
return 32767;
|
||||
return 32767;
|
||||
else if (i < 0x43bf8000)
|
||||
return -32768;
|
||||
return -32768;
|
||||
else
|
||||
return i - 0x43c00000;
|
||||
return i - 0x43c00000;
|
||||
}
|
||||
|
||||
static inline void float_to_int (float * _f, int16_t * s16, int nchannels)
|
||||
{
|
||||
int i, j, c;
|
||||
int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format
|
||||
int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format
|
||||
|
||||
j = 0;
|
||||
nchannels *= 256;
|
||||
for (i = 0; i < 256; i++) {
|
||||
for (c = 0; c < nchannels; c += 256)
|
||||
s16[j++] = blah (f[i + c]);
|
||||
for (c = 0; c < nchannels; c += 256)
|
||||
s16[j++] = blah (f[i + c]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -89,7 +89,7 @@ static int ac3_decode_frame(AVCodecContext *avctx,
|
||||
short *out_samples = data;
|
||||
float level;
|
||||
static const int ac3_channels[8] = {
|
||||
2, 1, 2, 3, 3, 4, 4, 5
|
||||
2, 1, 2, 3, 3, 4, 4, 5
|
||||
};
|
||||
|
||||
buf_ptr = buf;
|
||||
@ -111,20 +111,20 @@ static int ac3_decode_frame(AVCodecContext *avctx,
|
||||
memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1);
|
||||
s->inbuf_ptr--;
|
||||
} else {
|
||||
s->frame_size = len;
|
||||
s->frame_size = len;
|
||||
/* update codec info */
|
||||
avctx->sample_rate = sample_rate;
|
||||
s->channels = ac3_channels[s->flags & 7];
|
||||
if (s->flags & AC3_LFE)
|
||||
s->channels++;
|
||||
if (avctx->channels == 0)
|
||||
/* No specific number of channel requested */
|
||||
avctx->channels = s->channels;
|
||||
else if (s->channels < avctx->channels) {
|
||||
s->channels++;
|
||||
if (avctx->channels == 0)
|
||||
/* No specific number of channel requested */
|
||||
avctx->channels = s->channels;
|
||||
else if (s->channels < avctx->channels) {
|
||||
av_log( avctx, AV_LOG_INFO, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len);
|
||||
avctx->channels = s->channels;
|
||||
}
|
||||
avctx->bit_rate = bit_rate;
|
||||
avctx->channels = s->channels;
|
||||
}
|
||||
avctx->bit_rate = bit_rate;
|
||||
}
|
||||
}
|
||||
} else if (len < s->frame_size) {
|
||||
|
@ -337,8 +337,8 @@ static void fft_init(int ln)
|
||||
/* do a 2^n point complex fft on 2^ln points. */
|
||||
static void fft(IComplex *z, int ln)
|
||||
{
|
||||
int j, l, np, np2;
|
||||
int nblocks, nloops;
|
||||
int j, l, np, np2;
|
||||
int nblocks, nloops;
|
||||
register IComplex *p,*q;
|
||||
int tmp_re, tmp_im;
|
||||
|
||||
@ -472,7 +472,7 @@ static void compute_exp_strategy(uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNEL
|
||||
exp_strategy[i][ch] = EXP_REUSE;
|
||||
}
|
||||
if (is_lfe)
|
||||
return;
|
||||
return;
|
||||
|
||||
/* now select the encoding strategy type : if exponents are often
|
||||
recoded, we use a coarse encoding */
|
||||
@ -493,7 +493,7 @@ static void compute_exp_strategy(uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNEL
|
||||
exp_strategy[i][ch] = EXP_D15;
|
||||
break;
|
||||
}
|
||||
i = j;
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
|
||||
@ -553,9 +553,9 @@ static int encode_exp(uint8_t encoded_exp[N/2],
|
||||
/* Decrease the delta between each groups to within 2
|
||||
* so that they can be differentially encoded */
|
||||
for (i=1;i<=nb_groups;i++)
|
||||
exp1[i] = FFMIN(exp1[i], exp1[i-1] + 2);
|
||||
exp1[i] = FFMIN(exp1[i], exp1[i-1] + 2);
|
||||
for (i=nb_groups-1;i>=0;i--)
|
||||
exp1[i] = FFMIN(exp1[i], exp1[i+1] + 2);
|
||||
exp1[i] = FFMIN(exp1[i], exp1[i+1] + 2);
|
||||
|
||||
/* now we have the exponent values the decoder will see */
|
||||
encoded_exp[0] = exp1[0];
|
||||
@ -708,8 +708,8 @@ static int compute_bit_allocation(AC3EncodeContext *s,
|
||||
if(i==0) frame_bits += 4;
|
||||
}
|
||||
frame_bits += 2 * s->nb_channels; /* chexpstr[2] * c */
|
||||
if (s->lfe)
|
||||
frame_bits++; /* lfeexpstr */
|
||||
if (s->lfe)
|
||||
frame_bits++; /* lfeexpstr */
|
||||
for(ch=0;ch<s->nb_channels;ch++) {
|
||||
if (exp_strategy[i][ch] != EXP_REUSE)
|
||||
frame_bits += 6 + 2; /* chbwcod[6], gainrng[2] */
|
||||
@ -736,11 +736,11 @@ static int compute_bit_allocation(AC3EncodeContext *s,
|
||||
|
||||
csnroffst = s->csnroffst;
|
||||
while (csnroffst >= 0 &&
|
||||
bit_alloc(s, bap, encoded_exp, exp_strategy, frame_bits, csnroffst, 0) < 0)
|
||||
csnroffst -= SNR_INC1;
|
||||
bit_alloc(s, bap, encoded_exp, exp_strategy, frame_bits, csnroffst, 0) < 0)
|
||||
csnroffst -= SNR_INC1;
|
||||
if (csnroffst < 0) {
|
||||
av_log(NULL, AV_LOG_ERROR, "Yack, Error !!!\n");
|
||||
return -1;
|
||||
av_log(NULL, AV_LOG_ERROR, "Yack, Error !!!\n");
|
||||
return -1;
|
||||
}
|
||||
while ((csnroffst + SNR_INC1) <= 63 &&
|
||||
bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits,
|
||||
@ -815,19 +815,19 @@ static int AC3_encode_init(AVCodecContext *avctx)
|
||||
int i, j, ch;
|
||||
float alpha;
|
||||
static const uint8_t acmod_defs[6] = {
|
||||
0x01, /* C */
|
||||
0x02, /* L R */
|
||||
0x03, /* L C R */
|
||||
0x06, /* L R SL SR */
|
||||
0x07, /* L C R SL SR */
|
||||
0x07, /* L C R SL SR (+LFE) */
|
||||
0x01, /* C */
|
||||
0x02, /* L R */
|
||||
0x03, /* L C R */
|
||||
0x06, /* L R SL SR */
|
||||
0x07, /* L C R SL SR */
|
||||
0x07, /* L C R SL SR (+LFE) */
|
||||
};
|
||||
|
||||
avctx->frame_size = AC3_FRAME_SIZE;
|
||||
|
||||
/* number of channels */
|
||||
if (channels < 1 || channels > 6)
|
||||
return -1;
|
||||
return -1;
|
||||
s->acmod = acmod_defs[channels - 1];
|
||||
s->lfe = (channels == 6) ? 1 : 0;
|
||||
s->nb_all_channels = channels;
|
||||
@ -871,7 +871,7 @@ static int AC3_encode_init(AVCodecContext *avctx)
|
||||
s->nb_coefs[ch] = ((s->chbwcod[ch] + 12) * 3) + 37;
|
||||
}
|
||||
if (s->lfe) {
|
||||
s->nb_coefs[s->lfe_channel] = 7; /* fixed */
|
||||
s->nb_coefs[s->lfe_channel] = 7; /* fixed */
|
||||
}
|
||||
/* initial snr offset */
|
||||
s->csnroffst = 40;
|
||||
@ -907,9 +907,9 @@ static void output_frame_header(AC3EncodeContext *s, unsigned char *frame)
|
||||
put_bits(&s->pb, 3, s->bsmod);
|
||||
put_bits(&s->pb, 3, s->acmod);
|
||||
if ((s->acmod & 0x01) && s->acmod != 0x01)
|
||||
put_bits(&s->pb, 2, 1); /* XXX -4.5 dB */
|
||||
put_bits(&s->pb, 2, 1); /* XXX -4.5 dB */
|
||||
if (s->acmod & 0x04)
|
||||
put_bits(&s->pb, 2, 1); /* XXX -6 dB */
|
||||
put_bits(&s->pb, 2, 1); /* XXX -6 dB */
|
||||
if (s->acmod == 0x02)
|
||||
put_bits(&s->pb, 2, 0); /* surround not indicated */
|
||||
put_bits(&s->pb, 1, s->lfe); /* LFE */
|
||||
@ -995,20 +995,20 @@ static void output_audio_block(AC3EncodeContext *s,
|
||||
|
||||
if (s->acmod == 2)
|
||||
{
|
||||
if(block_num==0)
|
||||
{
|
||||
/* first block must define rematrixing (rematstr) */
|
||||
put_bits(&s->pb, 1, 1);
|
||||
if(block_num==0)
|
||||
{
|
||||
/* first block must define rematrixing (rematstr) */
|
||||
put_bits(&s->pb, 1, 1);
|
||||
|
||||
/* dummy rematrixing rematflg(1:4)=0 */
|
||||
for (rbnd=0;rbnd<4;rbnd++)
|
||||
put_bits(&s->pb, 1, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* no matrixing (but should be used in the future) */
|
||||
put_bits(&s->pb, 1, 0);
|
||||
}
|
||||
/* dummy rematrixing rematflg(1:4)=0 */
|
||||
for (rbnd=0;rbnd<4;rbnd++)
|
||||
put_bits(&s->pb, 1, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* no matrixing (but should be used in the future) */
|
||||
put_bits(&s->pb, 1, 0);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(DEBUG)
|
||||
@ -1023,7 +1023,7 @@ static void output_audio_block(AC3EncodeContext *s,
|
||||
}
|
||||
|
||||
if (s->lfe) {
|
||||
put_bits(&s->pb, 1, exp_strategy[s->lfe_channel]);
|
||||
put_bits(&s->pb, 1, exp_strategy[s->lfe_channel]);
|
||||
}
|
||||
|
||||
for(ch=0;ch<s->nb_channels;ch++) {
|
||||
@ -1047,7 +1047,7 @@ static void output_audio_block(AC3EncodeContext *s,
|
||||
group_size = 4;
|
||||
break;
|
||||
}
|
||||
nb_groups = (s->nb_coefs[ch] + (group_size * 3) - 4) / (3 * group_size);
|
||||
nb_groups = (s->nb_coefs[ch] + (group_size * 3) - 4) / (3 * group_size);
|
||||
p = encoded_exp[ch];
|
||||
|
||||
/* first exponent */
|
||||
@ -1075,8 +1075,8 @@ static void output_audio_block(AC3EncodeContext *s,
|
||||
put_bits(&s->pb, 7, ((delta0 * 5 + delta1) * 5) + delta2);
|
||||
}
|
||||
|
||||
if (ch != s->lfe_channel)
|
||||
put_bits(&s->pb, 2, 0); /* no gain range info */
|
||||
if (ch != s->lfe_channel)
|
||||
put_bits(&s->pb, 2, 0); /* no gain range info */
|
||||
}
|
||||
|
||||
/* bit allocation info */
|
||||
|
@ -300,7 +300,7 @@ static inline unsigned char adpcm_yamaha_compress_sample(ADPCMChannelStatus *c,
|
||||
}
|
||||
|
||||
static int adpcm_encode_frame(AVCodecContext *avctx,
|
||||
unsigned char *frame, int buf_size, void *data)
|
||||
unsigned char *frame, int buf_size, void *data)
|
||||
{
|
||||
int n, i, st;
|
||||
short *samples;
|
||||
@ -431,8 +431,8 @@ static int adpcm_decode_init(AVCodecContext * avctx)
|
||||
|
||||
switch(avctx->codec->id) {
|
||||
case CODEC_ID_ADPCM_CT:
|
||||
c->status[0].step = c->status[1].step = 511;
|
||||
break;
|
||||
c->status[0].step = c->status[1].step = 511;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -498,16 +498,16 @@ static inline short adpcm_ct_expand_nibble(ADPCMChannelStatus *c, char nibble)
|
||||
predictor = c->predictor;
|
||||
/* predictor update is not so trivial: predictor is multiplied on 254/256 before updating */
|
||||
if(sign)
|
||||
predictor = ((predictor * 254) >> 8) - diff;
|
||||
predictor = ((predictor * 254) >> 8) - diff;
|
||||
else
|
||||
predictor = ((predictor * 254) >> 8) + diff;
|
||||
predictor = ((predictor * 254) >> 8) + diff;
|
||||
/* calculate new step and clamp it to range 511..32767 */
|
||||
new_step = (ct_adpcm_table[nibble & 7] * c->step) >> 8;
|
||||
c->step = new_step;
|
||||
if(c->step < 511)
|
||||
c->step = 511;
|
||||
c->step = 511;
|
||||
if(c->step > 32767)
|
||||
c->step = 32767;
|
||||
c->step = 32767;
|
||||
|
||||
CLAMP_TO_SHORT(predictor);
|
||||
c->predictor = predictor;
|
||||
@ -612,8 +612,8 @@ static void xa_decode(short *out, const unsigned char *in,
|
||||
}
|
||||
|
||||
static int adpcm_decode_frame(AVCodecContext *avctx,
|
||||
void *data, int *data_size,
|
||||
uint8_t *buf, int buf_size)
|
||||
void *data, int *data_size,
|
||||
uint8_t *buf, int buf_size)
|
||||
{
|
||||
ADPCMContext *c = avctx->priv_data;
|
||||
ADPCMChannelStatus *cs;
|
||||
@ -701,7 +701,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
|
||||
cs->predictor -= 0x10000;
|
||||
CLAMP_TO_SHORT(cs->predictor);
|
||||
|
||||
// XXX: is this correct ??: *samples++ = cs->predictor;
|
||||
// XXX: is this correct ??: *samples++ = cs->predictor;
|
||||
|
||||
cs->step_index = *src++;
|
||||
if (cs->step_index < 0) cs->step_index = 0;
|
||||
@ -710,19 +710,19 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
|
||||
}
|
||||
|
||||
for(m=4; src < (buf + buf_size);) {
|
||||
*samples++ = adpcm_ima_expand_nibble(&c->status[0], src[0] & 0x0F, 3);
|
||||
*samples++ = adpcm_ima_expand_nibble(&c->status[0], src[0] & 0x0F, 3);
|
||||
if (st)
|
||||
*samples++ = adpcm_ima_expand_nibble(&c->status[1], src[4] & 0x0F, 3);
|
||||
*samples++ = adpcm_ima_expand_nibble(&c->status[0], (src[0] >> 4) & 0x0F, 3);
|
||||
if (st) {
|
||||
if (st) {
|
||||
*samples++ = adpcm_ima_expand_nibble(&c->status[1], (src[4] >> 4) & 0x0F, 3);
|
||||
if (!--m) {
|
||||
m=4;
|
||||
src+=4;
|
||||
}
|
||||
}
|
||||
src++;
|
||||
}
|
||||
if (!--m) {
|
||||
m=4;
|
||||
src+=4;
|
||||
}
|
||||
}
|
||||
src++;
|
||||
}
|
||||
break;
|
||||
case CODEC_ID_ADPCM_4XM:
|
||||
cs = &(c->status[0]);
|
||||
@ -739,13 +739,13 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
|
||||
|
||||
m= (buf_size - (src - buf))>>st;
|
||||
for(i=0; i<m; i++) {
|
||||
*samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] & 0x0F, 4);
|
||||
*samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] & 0x0F, 4);
|
||||
if (st)
|
||||
*samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] & 0x0F, 4);
|
||||
*samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] >> 4, 4);
|
||||
if (st)
|
||||
if (st)
|
||||
*samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] >> 4, 4);
|
||||
}
|
||||
}
|
||||
|
||||
src += m<<st;
|
||||
|
||||
@ -958,7 +958,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
|
||||
}
|
||||
break;
|
||||
case CODEC_ID_ADPCM_CT:
|
||||
while (src < buf + buf_size) {
|
||||
while (src < buf + buf_size) {
|
||||
if (st) {
|
||||
*samples++ = adpcm_ct_expand_nibble(&c->status[0],
|
||||
(src[0] >> 4) & 0x0F);
|
||||
@ -970,78 +970,78 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
|
||||
*samples++ = adpcm_ct_expand_nibble(&c->status[0],
|
||||
src[0] & 0x0F);
|
||||
}
|
||||
src++;
|
||||
src++;
|
||||
}
|
||||
break;
|
||||
case CODEC_ID_ADPCM_SWF:
|
||||
{
|
||||
GetBitContext gb;
|
||||
const int *table;
|
||||
int k0, signmask;
|
||||
int size = buf_size*8;
|
||||
GetBitContext gb;
|
||||
const int *table;
|
||||
int k0, signmask;
|
||||
int size = buf_size*8;
|
||||
|
||||
init_get_bits(&gb, buf, size);
|
||||
init_get_bits(&gb, buf, size);
|
||||
|
||||
// first frame, read bits & inital values
|
||||
if (!c->nb_bits)
|
||||
{
|
||||
c->nb_bits = get_bits(&gb, 2)+2;
|
||||
// av_log(NULL,AV_LOG_INFO,"nb_bits: %d\n", c->nb_bits);
|
||||
}
|
||||
// first frame, read bits & inital values
|
||||
if (!c->nb_bits)
|
||||
{
|
||||
c->nb_bits = get_bits(&gb, 2)+2;
|
||||
// av_log(NULL,AV_LOG_INFO,"nb_bits: %d\n", c->nb_bits);
|
||||
}
|
||||
|
||||
table = swf_index_tables[c->nb_bits-2];
|
||||
k0 = 1 << (c->nb_bits-2);
|
||||
signmask = 1 << (c->nb_bits-1);
|
||||
table = swf_index_tables[c->nb_bits-2];
|
||||
k0 = 1 << (c->nb_bits-2);
|
||||
signmask = 1 << (c->nb_bits-1);
|
||||
|
||||
while (get_bits_count(&gb) <= size)
|
||||
{
|
||||
int i;
|
||||
while (get_bits_count(&gb) <= size)
|
||||
{
|
||||
int i;
|
||||
|
||||
c->nb_samples++;
|
||||
// wrap around at every 4096 samples...
|
||||
if ((c->nb_samples & 0xfff) == 1)
|
||||
{
|
||||
for (i = 0; i <= st; i++)
|
||||
{
|
||||
*samples++ = c->status[i].predictor = get_sbits(&gb, 16);
|
||||
c->status[i].step_index = get_bits(&gb, 6);
|
||||
}
|
||||
}
|
||||
c->nb_samples++;
|
||||
// wrap around at every 4096 samples...
|
||||
if ((c->nb_samples & 0xfff) == 1)
|
||||
{
|
||||
for (i = 0; i <= st; i++)
|
||||
{
|
||||
*samples++ = c->status[i].predictor = get_sbits(&gb, 16);
|
||||
c->status[i].step_index = get_bits(&gb, 6);
|
||||
}
|
||||
}
|
||||
|
||||
// similar to IMA adpcm
|
||||
for (i = 0; i <= st; i++)
|
||||
{
|
||||
int delta = get_bits(&gb, c->nb_bits);
|
||||
int step = step_table[c->status[i].step_index];
|
||||
long vpdiff = 0; // vpdiff = (delta+0.5)*step/4
|
||||
int k = k0;
|
||||
// similar to IMA adpcm
|
||||
for (i = 0; i <= st; i++)
|
||||
{
|
||||
int delta = get_bits(&gb, c->nb_bits);
|
||||
int step = step_table[c->status[i].step_index];
|
||||
long vpdiff = 0; // vpdiff = (delta+0.5)*step/4
|
||||
int k = k0;
|
||||
|
||||
do {
|
||||
if (delta & k)
|
||||
vpdiff += step;
|
||||
step >>= 1;
|
||||
k >>= 1;
|
||||
} while(k);
|
||||
vpdiff += step;
|
||||
do {
|
||||
if (delta & k)
|
||||
vpdiff += step;
|
||||
step >>= 1;
|
||||
k >>= 1;
|
||||
} while(k);
|
||||
vpdiff += step;
|
||||
|
||||
if (delta & signmask)
|
||||
c->status[i].predictor -= vpdiff;
|
||||
else
|
||||
c->status[i].predictor += vpdiff;
|
||||
if (delta & signmask)
|
||||
c->status[i].predictor -= vpdiff;
|
||||
else
|
||||
c->status[i].predictor += vpdiff;
|
||||
|
||||
c->status[i].step_index += table[delta & (~signmask)];
|
||||
c->status[i].step_index += table[delta & (~signmask)];
|
||||
|
||||
c->status[i].step_index = clip(c->status[i].step_index, 0, 88);
|
||||
c->status[i].predictor = clip(c->status[i].predictor, -32768, 32767);
|
||||
c->status[i].step_index = clip(c->status[i].step_index, 0, 88);
|
||||
c->status[i].predictor = clip(c->status[i].predictor, -32768, 32767);
|
||||
|
||||
*samples++ = c->status[i].predictor;
|
||||
}
|
||||
}
|
||||
*samples++ = c->status[i].predictor;
|
||||
}
|
||||
}
|
||||
|
||||
// src += get_bits_count(&gb)*8;
|
||||
src += size;
|
||||
// src += get_bits_count(&gb)*8;
|
||||
src += size;
|
||||
|
||||
break;
|
||||
break;
|
||||
}
|
||||
case CODEC_ID_ADPCM_YAMAHA:
|
||||
while (src < buf + buf_size) {
|
||||
|
@ -35,7 +35,7 @@ void avcodec_register_all(void)
|
||||
static int inited = 0;
|
||||
|
||||
if (inited != 0)
|
||||
return;
|
||||
return;
|
||||
inited = 1;
|
||||
|
||||
/* encoders */
|
||||
|
@ -84,24 +84,24 @@ static inline uint64_t WORD_VEC(uint64_t x)
|
||||
} *) (p))->__l) = l; \
|
||||
} while (0)
|
||||
struct unaligned_long { uint64_t l; } __attribute__((packed));
|
||||
#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
|
||||
#define uldq(a) (((const struct unaligned_long *) (a))->l)
|
||||
#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
|
||||
#define uldq(a) (((const struct unaligned_long *) (a))->l)
|
||||
|
||||
#if GNUC_PREREQ(3,3)
|
||||
#define prefetch(p) __builtin_prefetch((p), 0, 1)
|
||||
#define prefetch_en(p) __builtin_prefetch((p), 0, 0)
|
||||
#define prefetch_m(p) __builtin_prefetch((p), 1, 1)
|
||||
#define prefetch_men(p) __builtin_prefetch((p), 1, 0)
|
||||
#define cmpbge __builtin_alpha_cmpbge
|
||||
#define cmpbge __builtin_alpha_cmpbge
|
||||
/* Avoid warnings. */
|
||||
#define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b))
|
||||
#define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b))
|
||||
#define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b))
|
||||
#define zap __builtin_alpha_zap
|
||||
#define zapnot __builtin_alpha_zapnot
|
||||
#define amask __builtin_alpha_amask
|
||||
#define implver __builtin_alpha_implver
|
||||
#define rpcc __builtin_alpha_rpcc
|
||||
#define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b))
|
||||
#define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b))
|
||||
#define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b))
|
||||
#define zap __builtin_alpha_zap
|
||||
#define zapnot __builtin_alpha_zapnot
|
||||
#define amask __builtin_alpha_amask
|
||||
#define implver __builtin_alpha_implver
|
||||
#define rpcc __builtin_alpha_rpcc
|
||||
#else
|
||||
#define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory")
|
||||
#define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory")
|
||||
@ -113,26 +113,26 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
|
||||
#define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
|
||||
#define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
|
||||
#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
|
||||
#define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; })
|
||||
#define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; })
|
||||
#define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; })
|
||||
#define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; })
|
||||
#define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; })
|
||||
#define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; })
|
||||
#endif
|
||||
#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory")
|
||||
|
||||
#if GNUC_PREREQ(3,3) && defined(__alpha_max__)
|
||||
#define minub8 __builtin_alpha_minub8
|
||||
#define minsb8 __builtin_alpha_minsb8
|
||||
#define minuw4 __builtin_alpha_minuw4
|
||||
#define minsw4 __builtin_alpha_minsw4
|
||||
#define maxub8 __builtin_alpha_maxub8
|
||||
#define maxsb8 __builtin_alpha_maxsb8
|
||||
#define maxuw4 __builtin_alpha_maxuw4
|
||||
#define maxsw4 __builtin_alpha_maxsw4
|
||||
#define perr __builtin_alpha_perr
|
||||
#define pklb __builtin_alpha_pklb
|
||||
#define pkwb __builtin_alpha_pkwb
|
||||
#define unpkbl __builtin_alpha_unpkbl
|
||||
#define unpkbw __builtin_alpha_unpkbw
|
||||
#define minub8 __builtin_alpha_minub8
|
||||
#define minsb8 __builtin_alpha_minsb8
|
||||
#define minuw4 __builtin_alpha_minuw4
|
||||
#define minsw4 __builtin_alpha_minsw4
|
||||
#define maxub8 __builtin_alpha_maxub8
|
||||
#define maxsb8 __builtin_alpha_maxsb8
|
||||
#define maxuw4 __builtin_alpha_maxuw4
|
||||
#define maxsw4 __builtin_alpha_maxsw4
|
||||
#define perr __builtin_alpha_perr
|
||||
#define pklb __builtin_alpha_pklb
|
||||
#define pkwb __builtin_alpha_pkwb
|
||||
#define unpkbl __builtin_alpha_unpkbl
|
||||
#define unpkbw __builtin_alpha_unpkbw
|
||||
#else
|
||||
#define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
||||
#define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
||||
@ -143,13 +143,13 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
|
||||
#define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
||||
#define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
|
||||
#define perr(a, b) ({ uint64_t __r; asm (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
|
||||
#define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
||||
#define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
||||
#define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
||||
#define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
||||
#define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
||||
#define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
||||
#define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
||||
#define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
|
||||
#endif
|
||||
|
||||
#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */
|
||||
#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */
|
||||
|
||||
#include <c_asm.h>
|
||||
#define ldq(p) (*(const uint64_t *) (p))
|
||||
@ -157,7 +157,7 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
|
||||
#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
|
||||
#define stl(l, p) do { *(int32_t *) (p) = (l); } while (0)
|
||||
#define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a)
|
||||
#define uldq(a) (*(const __unaligned uint64_t *) (a))
|
||||
#define uldq(a) (*(const __unaligned uint64_t *) (a))
|
||||
#define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b)
|
||||
#define extql(a, b) asm ("extql %a0,%a1,%v0", a, b)
|
||||
#define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b)
|
||||
@ -166,7 +166,7 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
|
||||
#define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b)
|
||||
#define amask(a) asm ("amask %a0,%v0", a)
|
||||
#define implver() asm ("implver %v0")
|
||||
#define rpcc() asm ("rpcc %v0")
|
||||
#define rpcc() asm ("rpcc %v0")
|
||||
#define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b)
|
||||
#define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b)
|
||||
#define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b)
|
||||
|
@ -71,7 +71,7 @@ $unaligned:
|
||||
addq a1, a2, a1
|
||||
nop
|
||||
|
||||
ldq_u t4, 0(a1)
|
||||
ldq_u t4, 0(a1)
|
||||
ldq_u t5, 8(a1)
|
||||
addq a1, a2, a1
|
||||
nop
|
||||
@ -120,20 +120,20 @@ $aligned:
|
||||
addq a1, a2, a1
|
||||
ldq t3, 0(a1)
|
||||
|
||||
addq a0, a2, t4
|
||||
addq a1, a2, a1
|
||||
addq t4, a2, t5
|
||||
subq a3, 4, a3
|
||||
addq a0, a2, t4
|
||||
addq a1, a2, a1
|
||||
addq t4, a2, t5
|
||||
subq a3, 4, a3
|
||||
|
||||
stq t0, 0(a0)
|
||||
addq t5, a2, t6
|
||||
stq t1, 0(t4)
|
||||
addq t6, a2, a0
|
||||
stq t0, 0(a0)
|
||||
addq t5, a2, t6
|
||||
stq t1, 0(t4)
|
||||
addq t6, a2, a0
|
||||
|
||||
stq t2, 0(t5)
|
||||
stq t3, 0(t6)
|
||||
stq t2, 0(t5)
|
||||
stq t3, 0(t6)
|
||||
|
||||
bne a3, $aligned
|
||||
bne a3, $aligned
|
||||
ret
|
||||
.end put_pixels_axp_asm
|
||||
|
||||
|
@ -116,7 +116,7 @@ int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
||||
return result;
|
||||
}
|
||||
|
||||
#if 0 /* now done in assembly */
|
||||
#if 0 /* now done in assembly */
|
||||
int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
|
||||
{
|
||||
int result = 0;
|
||||
|
@ -285,7 +285,7 @@ void simple_idct_axp(DCTELEM *block)
|
||||
stq(v, block + 1 * 4);
|
||||
stq(w, block + 2 * 4);
|
||||
stq(w, block + 3 * 4);
|
||||
block += 4 * 4;
|
||||
block += 4 * 4;
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < 8; i++)
|
||||
|
@ -301,7 +301,7 @@ static int amr_nb_decode_frame(AVCodecContext * avctx,
|
||||
|
||||
|
||||
static int amr_nb_encode_frame(AVCodecContext *avctx,
|
||||
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
|
||||
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
|
||||
{
|
||||
short serial_data[250] = {0};
|
||||
|
||||
@ -440,7 +440,7 @@ static int amr_nb_decode_frame(AVCodecContext * avctx,
|
||||
}
|
||||
|
||||
static int amr_nb_encode_frame(AVCodecContext *avctx,
|
||||
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
|
||||
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
|
||||
{
|
||||
AMRContext *s = (AMRContext*)avctx->priv_data;
|
||||
int written;
|
||||
@ -584,7 +584,7 @@ static int amr_wb_encode_close(AVCodecContext * avctx)
|
||||
}
|
||||
|
||||
static int amr_wb_encode_frame(AVCodecContext *avctx,
|
||||
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
|
||||
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
|
||||
{
|
||||
AMRWBContext *s = (AMRWBContext*) avctx->priv_data;
|
||||
int size = E_IF_encode(s->state, s->mode, data, frame, s->allow_dtx);
|
||||
|
@ -205,13 +205,13 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
|
||||
#endif
|
||||
c->idct_put= j_rev_dct_ARM_put;
|
||||
c->idct_add= j_rev_dct_ARM_add;
|
||||
c->idct = j_rev_dct_ARM;
|
||||
c->idct = j_rev_dct_ARM;
|
||||
c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
|
||||
} else if (idct_algo==FF_IDCT_SIMPLEARM){
|
||||
c->idct_put= simple_idct_ARM_put;
|
||||
c->idct_add= simple_idct_ARM_add;
|
||||
c->idct = simple_idct_ARM;
|
||||
c->idct_permutation_type= FF_NO_IDCT_PERM;
|
||||
c->idct_put= simple_idct_ARM_put;
|
||||
c->idct_add= simple_idct_ARM_add;
|
||||
c->idct = simple_idct_ARM;
|
||||
c->idct_permutation_type= FF_NO_IDCT_PERM;
|
||||
#ifdef HAVE_IPP
|
||||
} else if (idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_IPP){
|
||||
#else
|
||||
|
@ -138,10 +138,10 @@ void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
|
||||
mm_flags = mm_support();
|
||||
|
||||
if (avctx->dsp_mask) {
|
||||
if (avctx->dsp_mask & FF_MM_FORCE)
|
||||
mm_flags |= (avctx->dsp_mask & 0xffff);
|
||||
else
|
||||
mm_flags &= ~(avctx->dsp_mask & 0xffff);
|
||||
if (avctx->dsp_mask & FF_MM_FORCE)
|
||||
mm_flags |= (avctx->dsp_mask & 0xffff);
|
||||
else
|
||||
mm_flags &= ~(avctx->dsp_mask & 0xffff);
|
||||
}
|
||||
|
||||
if (!(mm_flags & MM_IWMMXT)) return;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
C-like prototype :
|
||||
void j_rev_dct_ARM(DCTBLOCK data)
|
||||
void j_rev_dct_ARM(DCTBLOCK data)
|
||||
|
||||
With DCTBLOCK being a pointer to an array of 64 'signed shorts'
|
||||
|
||||
@ -51,336 +51,336 @@
|
||||
#define FIX_M_1_961570560_ID 40
|
||||
#define FIX_M_2_562915447_ID 44
|
||||
#define FIX_0xFFFF_ID 48
|
||||
.text
|
||||
.align
|
||||
.text
|
||||
.align
|
||||
|
||||
.global j_rev_dct_ARM
|
||||
.global j_rev_dct_ARM
|
||||
j_rev_dct_ARM:
|
||||
stmdb sp!, { r4 - r12, lr } @ all callee saved regs
|
||||
stmdb sp!, { r4 - r12, lr } @ all callee saved regs
|
||||
|
||||
sub sp, sp, #4 @ reserve some space on the stack
|
||||
str r0, [ sp ] @ save the DCT pointer to the stack
|
||||
sub sp, sp, #4 @ reserve some space on the stack
|
||||
str r0, [ sp ] @ save the DCT pointer to the stack
|
||||
|
||||
mov lr, r0 @ lr = pointer to the current row
|
||||
mov r12, #8 @ r12 = row-counter
|
||||
add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
|
||||
mov lr, r0 @ lr = pointer to the current row
|
||||
mov r12, #8 @ r12 = row-counter
|
||||
add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
|
||||
row_loop:
|
||||
ldrsh r0, [lr, # 0] @ r0 = 'd0'
|
||||
ldrsh r1, [lr, # 8] @ r1 = 'd1'
|
||||
ldrsh r0, [lr, # 0] @ r0 = 'd0'
|
||||
ldrsh r1, [lr, # 8] @ r1 = 'd1'
|
||||
|
||||
@ Optimization for row that have all items except the first set to 0
|
||||
@ (this works as the DCTELEMS are always 4-byte aligned)
|
||||
ldr r5, [lr, # 0]
|
||||
ldr r2, [lr, # 4]
|
||||
ldr r3, [lr, # 8]
|
||||
ldr r4, [lr, #12]
|
||||
orr r3, r3, r4
|
||||
orr r3, r3, r2
|
||||
orrs r5, r3, r5
|
||||
beq end_of_row_loop @ nothing to be done as ALL of them are '0'
|
||||
orrs r2, r3, r1
|
||||
beq empty_row
|
||||
@ Optimization for row that have all items except the first set to 0
|
||||
@ (this works as the DCTELEMS are always 4-byte aligned)
|
||||
ldr r5, [lr, # 0]
|
||||
ldr r2, [lr, # 4]
|
||||
ldr r3, [lr, # 8]
|
||||
ldr r4, [lr, #12]
|
||||
orr r3, r3, r4
|
||||
orr r3, r3, r2
|
||||
orrs r5, r3, r5
|
||||
beq end_of_row_loop @ nothing to be done as ALL of them are '0'
|
||||
orrs r2, r3, r1
|
||||
beq empty_row
|
||||
|
||||
ldrsh r2, [lr, # 2] @ r2 = 'd2'
|
||||
ldrsh r4, [lr, # 4] @ r4 = 'd4'
|
||||
ldrsh r6, [lr, # 6] @ r6 = 'd6'
|
||||
ldrsh r2, [lr, # 2] @ r2 = 'd2'
|
||||
ldrsh r4, [lr, # 4] @ r4 = 'd4'
|
||||
ldrsh r6, [lr, # 6] @ r6 = 'd6'
|
||||
|
||||
ldr r3, [r11, #FIX_0_541196100_ID]
|
||||
add r7, r2, r6
|
||||
ldr r5, [r11, #FIX_M_1_847759065_ID]
|
||||
mul r7, r3, r7 @ r7 = z1
|
||||
ldr r3, [r11, #FIX_0_765366865_ID]
|
||||
mla r6, r5, r6, r7 @ r6 = tmp2
|
||||
add r5, r0, r4 @ r5 = tmp0
|
||||
mla r2, r3, r2, r7 @ r2 = tmp3
|
||||
sub r3, r0, r4 @ r3 = tmp1
|
||||
ldr r3, [r11, #FIX_0_541196100_ID]
|
||||
add r7, r2, r6
|
||||
ldr r5, [r11, #FIX_M_1_847759065_ID]
|
||||
mul r7, r3, r7 @ r7 = z1
|
||||
ldr r3, [r11, #FIX_0_765366865_ID]
|
||||
mla r6, r5, r6, r7 @ r6 = tmp2
|
||||
add r5, r0, r4 @ r5 = tmp0
|
||||
mla r2, r3, r2, r7 @ r2 = tmp3
|
||||
sub r3, r0, r4 @ r3 = tmp1
|
||||
|
||||
add r0, r2, r5, lsl #13 @ r0 = tmp10
|
||||
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
|
||||
add r4, r6, r3, lsl #13 @ r4 = tmp11
|
||||
rsb r3, r6, r3, lsl #13 @ r3 = tmp12
|
||||
add r0, r2, r5, lsl #13 @ r0 = tmp10
|
||||
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
|
||||
add r4, r6, r3, lsl #13 @ r4 = tmp11
|
||||
rsb r3, r6, r3, lsl #13 @ r3 = tmp12
|
||||
|
||||
stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
|
||||
stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
|
||||
|
||||
ldrsh r3, [lr, #10] @ r3 = 'd3'
|
||||
ldrsh r5, [lr, #12] @ r5 = 'd5'
|
||||
ldrsh r7, [lr, #14] @ r7 = 'd7'
|
||||
ldrsh r3, [lr, #10] @ r3 = 'd3'
|
||||
ldrsh r5, [lr, #12] @ r5 = 'd5'
|
||||
ldrsh r7, [lr, #14] @ r7 = 'd7'
|
||||
|
||||
add r0, r3, r5 @ r0 = 'z2'
|
||||
add r2, r1, r7 @ r2 = 'z1'
|
||||
add r4, r3, r7 @ r4 = 'z3'
|
||||
add r6, r1, r5 @ r6 = 'z4'
|
||||
ldr r9, [r11, #FIX_1_175875602_ID]
|
||||
add r8, r4, r6 @ r8 = z3 + z4
|
||||
ldr r10, [r11, #FIX_M_0_899976223_ID]
|
||||
mul r8, r9, r8 @ r8 = 'z5'
|
||||
ldr r9, [r11, #FIX_M_2_562915447_ID]
|
||||
mul r2, r10, r2 @ r2 = 'z1'
|
||||
ldr r10, [r11, #FIX_M_1_961570560_ID]
|
||||
mul r0, r9, r0 @ r0 = 'z2'
|
||||
ldr r9, [r11, #FIX_M_0_390180644_ID]
|
||||
mla r4, r10, r4, r8 @ r4 = 'z3'
|
||||
ldr r10, [r11, #FIX_0_298631336_ID]
|
||||
mla r6, r9, r6, r8 @ r6 = 'z4'
|
||||
ldr r9, [r11, #FIX_2_053119869_ID]
|
||||
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
|
||||
ldr r10, [r11, #FIX_3_072711026_ID]
|
||||
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
|
||||
ldr r9, [r11, #FIX_1_501321110_ID]
|
||||
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
|
||||
add r7, r7, r4 @ r7 = tmp0
|
||||
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
|
||||
add r5, r5, r6 @ r5 = tmp1
|
||||
add r3, r3, r4 @ r3 = tmp2
|
||||
add r1, r1, r6 @ r1 = tmp3
|
||||
add r0, r3, r5 @ r0 = 'z2'
|
||||
add r2, r1, r7 @ r2 = 'z1'
|
||||
add r4, r3, r7 @ r4 = 'z3'
|
||||
add r6, r1, r5 @ r6 = 'z4'
|
||||
ldr r9, [r11, #FIX_1_175875602_ID]
|
||||
add r8, r4, r6 @ r8 = z3 + z4
|
||||
ldr r10, [r11, #FIX_M_0_899976223_ID]
|
||||
mul r8, r9, r8 @ r8 = 'z5'
|
||||
ldr r9, [r11, #FIX_M_2_562915447_ID]
|
||||
mul r2, r10, r2 @ r2 = 'z1'
|
||||
ldr r10, [r11, #FIX_M_1_961570560_ID]
|
||||
mul r0, r9, r0 @ r0 = 'z2'
|
||||
ldr r9, [r11, #FIX_M_0_390180644_ID]
|
||||
mla r4, r10, r4, r8 @ r4 = 'z3'
|
||||
ldr r10, [r11, #FIX_0_298631336_ID]
|
||||
mla r6, r9, r6, r8 @ r6 = 'z4'
|
||||
ldr r9, [r11, #FIX_2_053119869_ID]
|
||||
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
|
||||
ldr r10, [r11, #FIX_3_072711026_ID]
|
||||
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
|
||||
ldr r9, [r11, #FIX_1_501321110_ID]
|
||||
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
|
||||
add r7, r7, r4 @ r7 = tmp0
|
||||
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
|
||||
add r5, r5, r6 @ r5 = tmp1
|
||||
add r3, r3, r4 @ r3 = tmp2
|
||||
add r1, r1, r6 @ r1 = tmp3
|
||||
|
||||
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
|
||||
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
|
||||
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
|
||||
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
|
||||
|
||||
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
|
||||
add r8, r0, r1
|
||||
add r8, r8, #(1<<10)
|
||||
mov r8, r8, asr #11
|
||||
strh r8, [lr, # 0]
|
||||
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
|
||||
add r8, r0, r1
|
||||
add r8, r8, #(1<<10)
|
||||
mov r8, r8, asr #11
|
||||
strh r8, [lr, # 0]
|
||||
|
||||
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
|
||||
sub r8, r0, r1
|
||||
add r8, r8, #(1<<10)
|
||||
mov r8, r8, asr #11
|
||||
strh r8, [lr, #14]
|
||||
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
|
||||
sub r8, r0, r1
|
||||
add r8, r8, #(1<<10)
|
||||
mov r8, r8, asr #11
|
||||
strh r8, [lr, #14]
|
||||
|
||||
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
|
||||
add r8, r6, r3
|
||||
add r8, r8, #(1<<10)
|
||||
mov r8, r8, asr #11
|
||||
strh r8, [lr, # 2]
|
||||
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
|
||||
add r8, r6, r3
|
||||
add r8, r8, #(1<<10)
|
||||
mov r8, r8, asr #11
|
||||
strh r8, [lr, # 2]
|
||||
|
||||
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
|
||||
sub r8, r6, r3
|
||||
add r8, r8, #(1<<10)
|
||||
mov r8, r8, asr #11
|
||||
strh r8, [lr, #12]
|
||||
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
|
||||
sub r8, r6, r3
|
||||
add r8, r8, #(1<<10)
|
||||
mov r8, r8, asr #11
|
||||
strh r8, [lr, #12]
|
||||
|
||||
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
|
||||
add r8, r4, r5
|
||||
add r8, r8, #(1<<10)
|
||||
mov r8, r8, asr #11
|
||||
strh r8, [lr, # 4]
|
||||
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
|
||||
add r8, r4, r5
|
||||
add r8, r8, #(1<<10)
|
||||
mov r8, r8, asr #11
|
||||
strh r8, [lr, # 4]
|
||||
|
||||
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
|
||||
sub r8, r4, r5
|
||||
add r8, r8, #(1<<10)
|
||||
mov r8, r8, asr #11
|
||||
strh r8, [lr, #10]
|
||||
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
|
||||
sub r8, r4, r5
|
||||
add r8, r8, #(1<<10)
|
||||
mov r8, r8, asr #11
|
||||
strh r8, [lr, #10]
|
||||
|
||||
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
|
||||
add r8, r2, r7
|
||||
add r8, r8, #(1<<10)
|
||||
mov r8, r8, asr #11
|
||||
strh r8, [lr, # 6]
|
||||
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
|
||||
add r8, r2, r7
|
||||
add r8, r8, #(1<<10)
|
||||
mov r8, r8, asr #11
|
||||
strh r8, [lr, # 6]
|
||||
|
||||
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
|
||||
sub r8, r2, r7
|
||||
add r8, r8, #(1<<10)
|
||||
mov r8, r8, asr #11
|
||||
strh r8, [lr, # 8]
|
||||
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
|
||||
sub r8, r2, r7
|
||||
add r8, r8, #(1<<10)
|
||||
mov r8, r8, asr #11
|
||||
strh r8, [lr, # 8]
|
||||
|
||||
@ End of row loop
|
||||
add lr, lr, #16
|
||||
subs r12, r12, #1
|
||||
bne row_loop
|
||||
beq start_column_loop
|
||||
@ End of row loop
|
||||
add lr, lr, #16
|
||||
subs r12, r12, #1
|
||||
bne row_loop
|
||||
beq start_column_loop
|
||||
|
||||
empty_row:
|
||||
ldr r1, [r11, #FIX_0xFFFF_ID]
|
||||
mov r0, r0, lsl #2
|
||||
and r0, r0, r1
|
||||
add r0, r0, r0, lsl #16
|
||||
str r0, [lr, # 0]
|
||||
str r0, [lr, # 4]
|
||||
str r0, [lr, # 8]
|
||||
str r0, [lr, #12]
|
||||
ldr r1, [r11, #FIX_0xFFFF_ID]
|
||||
mov r0, r0, lsl #2
|
||||
and r0, r0, r1
|
||||
add r0, r0, r0, lsl #16
|
||||
str r0, [lr, # 0]
|
||||
str r0, [lr, # 4]
|
||||
str r0, [lr, # 8]
|
||||
str r0, [lr, #12]
|
||||
|
||||
end_of_row_loop:
|
||||
@ End of loop
|
||||
add lr, lr, #16
|
||||
subs r12, r12, #1
|
||||
bne row_loop
|
||||
@ End of loop
|
||||
add lr, lr, #16
|
||||
subs r12, r12, #1
|
||||
bne row_loop
|
||||
|
||||
start_column_loop:
|
||||
@ Start of column loop
|
||||
ldr lr, [ sp ]
|
||||
mov r12, #8
|
||||
@ Start of column loop
|
||||
ldr lr, [ sp ]
|
||||
mov r12, #8
|
||||
column_loop:
|
||||
ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0'
|
||||
ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2'
|
||||
ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4'
|
||||
ldrsh r6, [lr, #(12*8)] @ r6 = 'd6'
|
||||
ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0'
|
||||
ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2'
|
||||
ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4'
|
||||
ldrsh r6, [lr, #(12*8)] @ r6 = 'd6'
|
||||
|
||||
ldr r3, [r11, #FIX_0_541196100_ID]
|
||||
add r1, r2, r6
|
||||
ldr r5, [r11, #FIX_M_1_847759065_ID]
|
||||
mul r1, r3, r1 @ r1 = z1
|
||||
ldr r3, [r11, #FIX_0_765366865_ID]
|
||||
mla r6, r5, r6, r1 @ r6 = tmp2
|
||||
add r5, r0, r4 @ r5 = tmp0
|
||||
mla r2, r3, r2, r1 @ r2 = tmp3
|
||||
sub r3, r0, r4 @ r3 = tmp1
|
||||
ldr r3, [r11, #FIX_0_541196100_ID]
|
||||
add r1, r2, r6
|
||||
ldr r5, [r11, #FIX_M_1_847759065_ID]
|
||||
mul r1, r3, r1 @ r1 = z1
|
||||
ldr r3, [r11, #FIX_0_765366865_ID]
|
||||
mla r6, r5, r6, r1 @ r6 = tmp2
|
||||
add r5, r0, r4 @ r5 = tmp0
|
||||
mla r2, r3, r2, r1 @ r2 = tmp3
|
||||
sub r3, r0, r4 @ r3 = tmp1
|
||||
|
||||
add r0, r2, r5, lsl #13 @ r0 = tmp10
|
||||
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
|
||||
add r4, r6, r3, lsl #13 @ r4 = tmp11
|
||||
rsb r6, r6, r3, lsl #13 @ r6 = tmp12
|
||||
add r0, r2, r5, lsl #13 @ r0 = tmp10
|
||||
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
|
||||
add r4, r6, r3, lsl #13 @ r4 = tmp11
|
||||
rsb r6, r6, r3, lsl #13 @ r6 = tmp12
|
||||
|
||||
ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1'
|
||||
ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3'
|
||||
ldrsh r5, [lr, #(10*8)] @ r5 = 'd5'
|
||||
ldrsh r7, [lr, #(14*8)] @ r7 = 'd7'
|
||||
ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1'
|
||||
ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3'
|
||||
ldrsh r5, [lr, #(10*8)] @ r5 = 'd5'
|
||||
ldrsh r7, [lr, #(14*8)] @ r7 = 'd7'
|
||||
|
||||
@ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
|
||||
orr r9, r1, r3
|
||||
orr r10, r5, r7
|
||||
orrs r10, r9, r10
|
||||
beq empty_odd_column
|
||||
@ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
|
||||
orr r9, r1, r3
|
||||
orr r10, r5, r7
|
||||
orrs r10, r9, r10
|
||||
beq empty_odd_column
|
||||
|
||||
stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
|
||||
stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
|
||||
|
||||
add r0, r3, r5 @ r0 = 'z2'
|
||||
add r2, r1, r7 @ r2 = 'z1'
|
||||
add r4, r3, r7 @ r4 = 'z3'
|
||||
add r6, r1, r5 @ r6 = 'z4'
|
||||
ldr r9, [r11, #FIX_1_175875602_ID]
|
||||
add r8, r4, r6
|
||||
ldr r10, [r11, #FIX_M_0_899976223_ID]
|
||||
mul r8, r9, r8 @ r8 = 'z5'
|
||||
ldr r9, [r11, #FIX_M_2_562915447_ID]
|
||||
mul r2, r10, r2 @ r2 = 'z1'
|
||||
ldr r10, [r11, #FIX_M_1_961570560_ID]
|
||||
mul r0, r9, r0 @ r0 = 'z2'
|
||||
ldr r9, [r11, #FIX_M_0_390180644_ID]
|
||||
mla r4, r10, r4, r8 @ r4 = 'z3'
|
||||
ldr r10, [r11, #FIX_0_298631336_ID]
|
||||
mla r6, r9, r6, r8 @ r6 = 'z4'
|
||||
ldr r9, [r11, #FIX_2_053119869_ID]
|
||||
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
|
||||
ldr r10, [r11, #FIX_3_072711026_ID]
|
||||
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
|
||||
ldr r9, [r11, #FIX_1_501321110_ID]
|
||||
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
|
||||
add r7, r7, r4 @ r7 = tmp0
|
||||
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
|
||||
add r5, r5, r6 @ r5 = tmp1
|
||||
add r3, r3, r4 @ r3 = tmp2
|
||||
add r1, r1, r6 @ r1 = tmp3
|
||||
add r0, r3, r5 @ r0 = 'z2'
|
||||
add r2, r1, r7 @ r2 = 'z1'
|
||||
add r4, r3, r7 @ r4 = 'z3'
|
||||
add r6, r1, r5 @ r6 = 'z4'
|
||||
ldr r9, [r11, #FIX_1_175875602_ID]
|
||||
add r8, r4, r6
|
||||
ldr r10, [r11, #FIX_M_0_899976223_ID]
|
||||
mul r8, r9, r8 @ r8 = 'z5'
|
||||
ldr r9, [r11, #FIX_M_2_562915447_ID]
|
||||
mul r2, r10, r2 @ r2 = 'z1'
|
||||
ldr r10, [r11, #FIX_M_1_961570560_ID]
|
||||
mul r0, r9, r0 @ r0 = 'z2'
|
||||
ldr r9, [r11, #FIX_M_0_390180644_ID]
|
||||
mla r4, r10, r4, r8 @ r4 = 'z3'
|
||||
ldr r10, [r11, #FIX_0_298631336_ID]
|
||||
mla r6, r9, r6, r8 @ r6 = 'z4'
|
||||
ldr r9, [r11, #FIX_2_053119869_ID]
|
||||
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
|
||||
ldr r10, [r11, #FIX_3_072711026_ID]
|
||||
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
|
||||
ldr r9, [r11, #FIX_1_501321110_ID]
|
||||
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
|
||||
add r7, r7, r4 @ r7 = tmp0
|
||||
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
|
||||
add r5, r5, r6 @ r5 = tmp1
|
||||
add r3, r3, r4 @ r3 = tmp2
|
||||
add r1, r1, r6 @ r1 = tmp3
|
||||
|
||||
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
|
||||
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
|
||||
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
|
||||
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
|
||||
|
||||
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
|
||||
add r8, r0, r1
|
||||
add r8, r8, #(1<<17)
|
||||
mov r8, r8, asr #18
|
||||
strh r8, [lr, #( 0*8)]
|
||||
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
|
||||
add r8, r0, r1
|
||||
add r8, r8, #(1<<17)
|
||||
mov r8, r8, asr #18
|
||||
strh r8, [lr, #( 0*8)]
|
||||
|
||||
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
|
||||
sub r8, r0, r1
|
||||
add r8, r8, #(1<<17)
|
||||
mov r8, r8, asr #18
|
||||
strh r8, [lr, #(14*8)]
|
||||
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
|
||||
sub r8, r0, r1
|
||||
add r8, r8, #(1<<17)
|
||||
mov r8, r8, asr #18
|
||||
strh r8, [lr, #(14*8)]
|
||||
|
||||
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
|
||||
add r8, r4, r3
|
||||
add r8, r8, #(1<<17)
|
||||
mov r8, r8, asr #18
|
||||
strh r8, [lr, #( 2*8)]
|
||||
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
|
||||
add r8, r4, r3
|
||||
add r8, r8, #(1<<17)
|
||||
mov r8, r8, asr #18
|
||||
strh r8, [lr, #( 2*8)]
|
||||
|
||||
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
|
||||
sub r8, r4, r3
|
||||
add r8, r8, #(1<<17)
|
||||
mov r8, r8, asr #18
|
||||
strh r8, [lr, #(12*8)]
|
||||
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
|
||||
sub r8, r4, r3
|
||||
add r8, r8, #(1<<17)
|
||||
mov r8, r8, asr #18
|
||||
strh r8, [lr, #(12*8)]
|
||||
|
||||
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
|
||||
add r8, r6, r5
|
||||
add r8, r8, #(1<<17)
|
||||
mov r8, r8, asr #18
|
||||
strh r8, [lr, #( 4*8)]
|
||||
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
|
||||
add r8, r6, r5
|
||||
add r8, r8, #(1<<17)
|
||||
mov r8, r8, asr #18
|
||||
strh r8, [lr, #( 4*8)]
|
||||
|
||||
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
|
||||
sub r8, r6, r5
|
||||
add r8, r8, #(1<<17)
|
||||
mov r8, r8, asr #18
|
||||
strh r8, [lr, #(10*8)]
|
||||
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
|
||||
sub r8, r6, r5
|
||||
add r8, r8, #(1<<17)
|
||||
mov r8, r8, asr #18
|
||||
strh r8, [lr, #(10*8)]
|
||||
|
||||
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
|
||||
add r8, r2, r7
|
||||
add r8, r8, #(1<<17)
|
||||
mov r8, r8, asr #18
|
||||
strh r8, [lr, #( 6*8)]
|
||||
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
|
||||
add r8, r2, r7
|
||||
add r8, r8, #(1<<17)
|
||||
mov r8, r8, asr #18
|
||||
strh r8, [lr, #( 6*8)]
|
||||
|
||||
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
|
||||
sub r8, r2, r7
|
||||
add r8, r8, #(1<<17)
|
||||
mov r8, r8, asr #18
|
||||
strh r8, [lr, #( 8*8)]
|
||||
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
|
||||
sub r8, r2, r7
|
||||
add r8, r8, #(1<<17)
|
||||
mov r8, r8, asr #18
|
||||
strh r8, [lr, #( 8*8)]
|
||||
|
||||
@ End of row loop
|
||||
add lr, lr, #2
|
||||
subs r12, r12, #1
|
||||
bne column_loop
|
||||
beq the_end
|
||||
@ End of row loop
|
||||
add lr, lr, #2
|
||||
subs r12, r12, #1
|
||||
bne column_loop
|
||||
beq the_end
|
||||
|
||||
empty_odd_column:
|
||||
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
|
||||
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
|
||||
add r0, r0, #(1<<17)
|
||||
mov r0, r0, asr #18
|
||||
strh r0, [lr, #( 0*8)]
|
||||
strh r0, [lr, #(14*8)]
|
||||
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
|
||||
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
|
||||
add r0, r0, #(1<<17)
|
||||
mov r0, r0, asr #18
|
||||
strh r0, [lr, #( 0*8)]
|
||||
strh r0, [lr, #(14*8)]
|
||||
|
||||
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
|
||||
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
|
||||
add r4, r4, #(1<<17)
|
||||
mov r4, r4, asr #18
|
||||
strh r4, [lr, #( 2*8)]
|
||||
strh r4, [lr, #(12*8)]
|
||||
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
|
||||
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
|
||||
add r4, r4, #(1<<17)
|
||||
mov r4, r4, asr #18
|
||||
strh r4, [lr, #( 2*8)]
|
||||
strh r4, [lr, #(12*8)]
|
||||
|
||||
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
|
||||
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
|
||||
add r6, r6, #(1<<17)
|
||||
mov r6, r6, asr #18
|
||||
strh r6, [lr, #( 4*8)]
|
||||
strh r6, [lr, #(10*8)]
|
||||
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
|
||||
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
|
||||
add r6, r6, #(1<<17)
|
||||
mov r6, r6, asr #18
|
||||
strh r6, [lr, #( 4*8)]
|
||||
strh r6, [lr, #(10*8)]
|
||||
|
||||
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
|
||||
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
|
||||
add r2, r2, #(1<<17)
|
||||
mov r2, r2, asr #18
|
||||
strh r2, [lr, #( 6*8)]
|
||||
strh r2, [lr, #( 8*8)]
|
||||
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
|
||||
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
|
||||
add r2, r2, #(1<<17)
|
||||
mov r2, r2, asr #18
|
||||
strh r2, [lr, #( 6*8)]
|
||||
strh r2, [lr, #( 8*8)]
|
||||
|
||||
@ End of row loop
|
||||
add lr, lr, #2
|
||||
subs r12, r12, #1
|
||||
bne column_loop
|
||||
@ End of row loop
|
||||
add lr, lr, #2
|
||||
subs r12, r12, #1
|
||||
bne column_loop
|
||||
|
||||
the_end:
|
||||
@ The end....
|
||||
add sp, sp, #4
|
||||
ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
|
||||
@ The end....
|
||||
add sp, sp, #4
|
||||
ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
|
||||
|
||||
const_array:
|
||||
.align
|
||||
.word FIX_0_298631336
|
||||
.word FIX_0_541196100
|
||||
.word FIX_0_765366865
|
||||
.word FIX_1_175875602
|
||||
.word FIX_1_501321110
|
||||
.word FIX_2_053119869
|
||||
.word FIX_3_072711026
|
||||
.word FIX_M_0_390180644
|
||||
.word FIX_M_0_899976223
|
||||
.word FIX_M_1_847759065
|
||||
.word FIX_M_1_961570560
|
||||
.word FIX_M_2_562915447
|
||||
.word FIX_0xFFFF
|
||||
.align
|
||||
.word FIX_0_298631336
|
||||
.word FIX_0_541196100
|
||||
.word FIX_0_765366865
|
||||
.word FIX_1_175875602
|
||||
.word FIX_1_501321110
|
||||
.word FIX_2_053119869
|
||||
.word FIX_3_072711026
|
||||
.word FIX_M_0_390180644
|
||||
.word FIX_M_0_899976223
|
||||
.word FIX_M_1_847759065
|
||||
.word FIX_M_1_961570560
|
||||
.word FIX_M_2_562915447
|
||||
.word FIX_0xFFFF
|
||||
|
@ -51,9 +51,9 @@
|
||||
#define COL_SHIFTED_1 524288 /* 1<< (COL_SHIFT-1) */
|
||||
|
||||
|
||||
.text
|
||||
.align
|
||||
.global simple_idct_ARM
|
||||
.text
|
||||
.align
|
||||
.global simple_idct_ARM
|
||||
|
||||
simple_idct_ARM:
|
||||
@@ void simple_idct_ARM(int16_t *block)
|
||||
@ -120,8 +120,8 @@ __b_evaluation:
|
||||
ldr r11, [r12, #offW7] @ R11=W7
|
||||
mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
||||
mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
||||
teq r2, #0 @ if null avoid muls
|
||||
mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||
teq r2, #0 @ if null avoid muls
|
||||
mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||
rsbne r2, r2, #0 @ R2=-ROWr16[3]
|
||||
mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||
mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||
@ -147,7 +147,7 @@ __b_evaluation:
|
||||
@@ MAC16(b3, -W1, row[7]);
|
||||
@@ MAC16(b1, -W5, row[7]);
|
||||
mov r3, r3, asr #16 @ R3=ROWr16[5]
|
||||
teq r3, #0 @ if null avoid muls
|
||||
teq r3, #0 @ if null avoid muls
|
||||
mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5]=b0
|
||||
mov r4, r4, asr #16 @ R4=ROWr16[7]
|
||||
mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5]=b2
|
||||
@ -155,7 +155,7 @@ __b_evaluation:
|
||||
rsbne r3, r3, #0 @ R3=-ROWr16[5]
|
||||
mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5]=b1
|
||||
@@ R3 is free now
|
||||
teq r4, #0 @ if null avoid muls
|
||||
teq r4, #0 @ if null avoid muls
|
||||
mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7]=b0
|
||||
mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7]=b2
|
||||
rsbne r4, r4, #0 @ R4=-ROWr16[7]
|
||||
@ -187,7 +187,7 @@ __a_evaluation:
|
||||
teq r2, #0
|
||||
beq __end_bef_a_evaluation
|
||||
|
||||
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
|
||||
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
|
||||
mul r11, r8, r4 @ R11=W2*ROWr16[2]
|
||||
sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3)
|
||||
add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0)
|
||||
@ -203,7 +203,7 @@ __a_evaluation:
|
||||
@@ a2 -= W4*row[4]
|
||||
@@ a3 += W4*row[4]
|
||||
ldrsh r11, [r14, #8] @ R11=ROWr16[4]
|
||||
teq r11, #0 @ if null avoid muls
|
||||
teq r11, #0 @ if null avoid muls
|
||||
mulne r11, r9, r11 @ R11=W4*ROWr16[4]
|
||||
@@ R9 is free now
|
||||
ldrsh r9, [r14, #12] @ R9=ROWr16[6]
|
||||
@ -212,7 +212,7 @@ __a_evaluation:
|
||||
subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2)
|
||||
addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3)
|
||||
@@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
|
||||
teq r9, #0 @ if null avoid muls
|
||||
teq r9, #0 @ if null avoid muls
|
||||
mulne r11, r10, r9 @ R11=W6*ROWr16[6]
|
||||
addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0)
|
||||
mulne r10, r8, r9 @ R10=W2*ROWr16[6]
|
||||
@ -294,165 +294,165 @@ __end_row_loop:
|
||||
|
||||
|
||||
|
||||
@@ at this point, R0=block, R1-R11 (free)
|
||||
@@ R12=__const_ptr_, R14=&block[n]
|
||||
add r14, r0, #14 @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block.
|
||||
@@ at this point, R0=block, R1-R11 (free)
|
||||
@@ R12=__const_ptr_, R14=&block[n]
|
||||
add r14, r0, #14 @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block.
|
||||
__col_loop:
|
||||
|
||||
__b_evaluation2:
|
||||
@@ at this point, R0=block (temp), R1-R11 (free)
|
||||
@@ R12=__const_ptr_, R14=&block[n]
|
||||
@@ proceed with b0-b3 first, followed by a0-a3
|
||||
@@ MUL16(b0, W1, col[8x1]);
|
||||
@@ MUL16(b1, W3, col[8x1]);
|
||||
@@ MUL16(b2, W5, col[8x1]);
|
||||
@@ MUL16(b3, W7, col[8x1]);
|
||||
@@ MAC16(b0, W3, col[8x3]);
|
||||
@@ MAC16(b1, -W7, col[8x3]);
|
||||
@@ MAC16(b2, -W1, col[8x3]);
|
||||
@@ MAC16(b3, -W5, col[8x3]);
|
||||
ldr r8, [r12, #offW1] @ R8=W1
|
||||
ldrsh r7, [r14, #16]
|
||||
mul r0, r8, r7 @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
||||
ldr r9, [r12, #offW3] @ R9=W3
|
||||
ldr r10, [r12, #offW5] @ R10=W5
|
||||
mul r1, r9, r7 @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
||||
ldr r11, [r12, #offW7] @ R11=W7
|
||||
mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
||||
ldrsh r2, [r14, #48]
|
||||
mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
||||
teq r2, #0 @ if 0, then avoid muls
|
||||
mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||
rsbne r2, r2, #0 @ R2=-ROWr16[3]
|
||||
mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||
mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||
mlane r7, r10, r2, r7 @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||
@@ at this point, R0=block (temp), R1-R11 (free)
|
||||
@@ R12=__const_ptr_, R14=&block[n]
|
||||
@@ proceed with b0-b3 first, followed by a0-a3
|
||||
@@ MUL16(b0, W1, col[8x1]);
|
||||
@@ MUL16(b1, W3, col[8x1]);
|
||||
@@ MUL16(b2, W5, col[8x1]);
|
||||
@@ MUL16(b3, W7, col[8x1]);
|
||||
@@ MAC16(b0, W3, col[8x3]);
|
||||
@@ MAC16(b1, -W7, col[8x3]);
|
||||
@@ MAC16(b2, -W1, col[8x3]);
|
||||
@@ MAC16(b3, -W5, col[8x3]);
|
||||
ldr r8, [r12, #offW1] @ R8=W1
|
||||
ldrsh r7, [r14, #16]
|
||||
mul r0, r8, r7 @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
||||
ldr r9, [r12, #offW3] @ R9=W3
|
||||
ldr r10, [r12, #offW5] @ R10=W5
|
||||
mul r1, r9, r7 @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
||||
ldr r11, [r12, #offW7] @ R11=W7
|
||||
mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
||||
ldrsh r2, [r14, #48]
|
||||
mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
|
||||
teq r2, #0 @ if 0, then avoid muls
|
||||
mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||
rsbne r2, r2, #0 @ R2=-ROWr16[3]
|
||||
mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||
mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||
mlane r7, r10, r2, r7 @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
|
||||
|
||||
@@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free),
|
||||
@@ R5=b2, R6 (free), R7=b3, R8=W1, R9=W3, R10=W5, R11=W7,
|
||||
@@ R12=__const_ptr_, R14=&block[n]
|
||||
@@ MAC16(b0, W5, col[5x8]);
|
||||
@@ MAC16(b2, W7, col[5x8]);
|
||||
@@ MAC16(b3, W3, col[5x8]);
|
||||
@@ MAC16(b1, -W1, col[5x8]);
|
||||
@@ MAC16(b0, W7, col[7x8]);
|
||||
@@ MAC16(b2, W3, col[7x8]);
|
||||
@@ MAC16(b3, -W1, col[7x8]);
|
||||
@@ MAC16(b1, -W5, col[7x8]);
|
||||
ldrsh r3, [r14, #80] @ R3=COLr16[5x8]
|
||||
teq r3, #0 @ if 0 then avoid muls
|
||||
mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5x8]=b0
|
||||
mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5x8]=b2
|
||||
mlane r7, r9, r3, r7 @ R7+=W3*ROWr16[5x8]=b3
|
||||
rsbne r3, r3, #0 @ R3=-ROWr16[5x8]
|
||||
ldrsh r4, [r14, #112] @ R4=COLr16[7x8]
|
||||
mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5x8]=b1
|
||||
@@ R3 is free now
|
||||
teq r4, #0 @ if 0 then avoid muls
|
||||
mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7x8]=b0
|
||||
mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7x8]=b2
|
||||
rsbne r4, r4, #0 @ R4=-ROWr16[7x8]
|
||||
mlane r7, r8, r4, r7 @ R7-=W1*ROWr16[7x8]=b3
|
||||
mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7x8]=b1
|
||||
@@ R4 is free now
|
||||
@@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free),
|
||||
@@ R5=b2, R6 (free), R7=b3, R8=W1, R9=W3, R10=W5, R11=W7,
|
||||
@@ R12=__const_ptr_, R14=&block[n]
|
||||
@@ MAC16(b0, W5, col[5x8]);
|
||||
@@ MAC16(b2, W7, col[5x8]);
|
||||
@@ MAC16(b3, W3, col[5x8]);
|
||||
@@ MAC16(b1, -W1, col[5x8]);
|
||||
@@ MAC16(b0, W7, col[7x8]);
|
||||
@@ MAC16(b2, W3, col[7x8]);
|
||||
@@ MAC16(b3, -W1, col[7x8]);
|
||||
@@ MAC16(b1, -W5, col[7x8]);
|
||||
ldrsh r3, [r14, #80] @ R3=COLr16[5x8]
|
||||
teq r3, #0 @ if 0 then avoid muls
|
||||
mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5x8]=b0
|
||||
mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5x8]=b2
|
||||
mlane r7, r9, r3, r7 @ R7+=W3*ROWr16[5x8]=b3
|
||||
rsbne r3, r3, #0 @ R3=-ROWr16[5x8]
|
||||
ldrsh r4, [r14, #112] @ R4=COLr16[7x8]
|
||||
mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5x8]=b1
|
||||
@@ R3 is free now
|
||||
teq r4, #0 @ if 0 then avoid muls
|
||||
mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7x8]=b0
|
||||
mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7x8]=b2
|
||||
rsbne r4, r4, #0 @ R4=-ROWr16[7x8]
|
||||
mlane r7, r8, r4, r7 @ R7-=W1*ROWr16[7x8]=b3
|
||||
mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7x8]=b1
|
||||
@@ R4 is free now
|
||||
__end_b_evaluation2:
|
||||
@@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free),
|
||||
@@ R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
|
||||
@@ R12=__const_ptr_, R14=&block[n]
|
||||
@@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free),
|
||||
@@ R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
|
||||
@@ R12=__const_ptr_, R14=&block[n]
|
||||
|
||||
__a_evaluation2:
|
||||
@@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1));
|
||||
@@ a1 = a0 + W6 * row[2];
|
||||
@@ a2 = a0 - W6 * row[2];
|
||||
@@ a3 = a0 - W2 * row[2];
|
||||
@@ a0 = a0 + W2 * row[2];
|
||||
ldrsh r6, [r14, #0]
|
||||
ldr r9, [r12, #offW4] @ R9=W4
|
||||
mul r6, r9, r6 @ R6=W4*ROWr16[0]
|
||||
ldr r10, [r12, #offW6] @ R10=W6
|
||||
ldrsh r4, [r14, #32] @ R4=ROWr16[2] (a3 not defined yet)
|
||||
add r6, r6, #COL_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(COL_SHIFT-1) (a0)
|
||||
mul r11, r10, r4 @ R11=W6*ROWr16[2]
|
||||
ldr r8, [r12, #offW2] @ R8=W2
|
||||
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
|
||||
sub r3, r6, r11 @ R3=a0-W6*ROWr16[2] (a2)
|
||||
mul r11, r8, r4 @ R11=W2*ROWr16[2]
|
||||
sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3)
|
||||
add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0)
|
||||
@@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1));
|
||||
@@ a1 = a0 + W6 * row[2];
|
||||
@@ a2 = a0 - W6 * row[2];
|
||||
@@ a3 = a0 - W2 * row[2];
|
||||
@@ a0 = a0 + W2 * row[2];
|
||||
ldrsh r6, [r14, #0]
|
||||
ldr r9, [r12, #offW4] @ R9=W4
|
||||
mul r6, r9, r6 @ R6=W4*ROWr16[0]
|
||||
ldr r10, [r12, #offW6] @ R10=W6
|
||||
ldrsh r4, [r14, #32] @ R4=ROWr16[2] (a3 not defined yet)
|
||||
add r6, r6, #COL_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(COL_SHIFT-1) (a0)
|
||||
mul r11, r10, r4 @ R11=W6*ROWr16[2]
|
||||
ldr r8, [r12, #offW2] @ R8=W2
|
||||
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
|
||||
sub r3, r6, r11 @ R3=a0-W6*ROWr16[2] (a2)
|
||||
mul r11, r8, r4 @ R11=W2*ROWr16[2]
|
||||
sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3)
|
||||
add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0)
|
||||
|
||||
@@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3,
|
||||
@@ R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free),
|
||||
@@ R12=__const_ptr_, R14=&block[n]
|
||||
@@ a0 += W4*row[4]
|
||||
@@ a1 -= W4*row[4]
|
||||
@@ a2 -= W4*row[4]
|
||||
@@ a3 += W4*row[4]
|
||||
ldrsh r11, [r14, #64] @ R11=ROWr16[4]
|
||||
teq r11, #0 @ if null avoid muls
|
||||
mulne r11, r9, r11 @ R11=W4*ROWr16[4]
|
||||
@@ R9 is free now
|
||||
addne r6, r6, r11 @ R6+=W4*ROWr16[4] (a0)
|
||||
subne r2, r2, r11 @ R2-=W4*ROWr16[4] (a1)
|
||||
subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2)
|
||||
ldrsh r9, [r14, #96] @ R9=ROWr16[6]
|
||||
addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3)
|
||||
@@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
|
||||
teq r9, #0 @ if null avoid muls
|
||||
mulne r11, r10, r9 @ R11=W6*ROWr16[6]
|
||||
addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0)
|
||||
mulne r10, r8, r9 @ R10=W2*ROWr16[6]
|
||||
@@ a0 += W6*row[6];
|
||||
@@ a3 -= W6*row[6];
|
||||
@@ a1 -= W2*row[6];
|
||||
@@ a2 += W2*row[6];
|
||||
subne r4, r4, r11 @ R4-=W6*ROWr16[6] (a3)
|
||||
subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1)
|
||||
addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2)
|
||||
@@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3,
|
||||
@@ R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free),
|
||||
@@ R12=__const_ptr_, R14=&block[n]
|
||||
@@ a0 += W4*row[4]
|
||||
@@ a1 -= W4*row[4]
|
||||
@@ a2 -= W4*row[4]
|
||||
@@ a3 += W4*row[4]
|
||||
ldrsh r11, [r14, #64] @ R11=ROWr16[4]
|
||||
teq r11, #0 @ if null avoid muls
|
||||
mulne r11, r9, r11 @ R11=W4*ROWr16[4]
|
||||
@@ R9 is free now
|
||||
addne r6, r6, r11 @ R6+=W4*ROWr16[4] (a0)
|
||||
subne r2, r2, r11 @ R2-=W4*ROWr16[4] (a1)
|
||||
subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2)
|
||||
ldrsh r9, [r14, #96] @ R9=ROWr16[6]
|
||||
addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3)
|
||||
@@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
|
||||
teq r9, #0 @ if null avoid muls
|
||||
mulne r11, r10, r9 @ R11=W6*ROWr16[6]
|
||||
addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0)
|
||||
mulne r10, r8, r9 @ R10=W2*ROWr16[6]
|
||||
@@ a0 += W6*row[6];
|
||||
@@ a3 -= W6*row[6];
|
||||
@@ a1 -= W2*row[6];
|
||||
@@ a2 += W2*row[6];
|
||||
subne r4, r4, r11 @ R4-=W6*ROWr16[6] (a3)
|
||||
subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1)
|
||||
addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2)
|
||||
__end_a_evaluation2:
|
||||
@@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3,
|
||||
@@ R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
|
||||
@@ R12=__const_ptr_, R14=&block[n]
|
||||
@@ col[0 ] = ((a0 + b0) >> COL_SHIFT);
|
||||
@@ col[8 ] = ((a1 + b1) >> COL_SHIFT);
|
||||
@@ col[16] = ((a2 + b2) >> COL_SHIFT);
|
||||
@@ col[24] = ((a3 + b3) >> COL_SHIFT);
|
||||
@@ col[32] = ((a3 - b3) >> COL_SHIFT);
|
||||
@@ col[40] = ((a2 - b2) >> COL_SHIFT);
|
||||
@@ col[48] = ((a1 - b1) >> COL_SHIFT);
|
||||
@@ col[56] = ((a0 - b0) >> COL_SHIFT);
|
||||
@@@@@ no optimisation here @@@@@
|
||||
add r8, r6, r0 @ R8=a0+b0
|
||||
add r9, r2, r1 @ R9=a1+b1
|
||||
mov r8, r8, asr #COL_SHIFT
|
||||
mov r9, r9, asr #COL_SHIFT
|
||||
strh r8, [r14, #0]
|
||||
strh r9, [r14, #16]
|
||||
add r8, r3, r5 @ R8=a2+b2
|
||||
add r9, r4, r7 @ R9=a3+b3
|
||||
mov r8, r8, asr #COL_SHIFT
|
||||
mov r9, r9, asr #COL_SHIFT
|
||||
strh r8, [r14, #32]
|
||||
strh r9, [r14, #48]
|
||||
sub r8, r4, r7 @ R8=a3-b3
|
||||
sub r9, r3, r5 @ R9=a2-b2
|
||||
mov r8, r8, asr #COL_SHIFT
|
||||
mov r9, r9, asr #COL_SHIFT
|
||||
strh r8, [r14, #64]
|
||||
strh r9, [r14, #80]
|
||||
sub r8, r2, r1 @ R8=a1-b1
|
||||
sub r9, r6, r0 @ R9=a0-b0
|
||||
mov r8, r8, asr #COL_SHIFT
|
||||
mov r9, r9, asr #COL_SHIFT
|
||||
strh r8, [r14, #96]
|
||||
strh r9, [r14, #112]
|
||||
@@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3,
|
||||
@@ R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
|
||||
@@ R12=__const_ptr_, R14=&block[n]
|
||||
@@ col[0 ] = ((a0 + b0) >> COL_SHIFT);
|
||||
@@ col[8 ] = ((a1 + b1) >> COL_SHIFT);
|
||||
@@ col[16] = ((a2 + b2) >> COL_SHIFT);
|
||||
@@ col[24] = ((a3 + b3) >> COL_SHIFT);
|
||||
@@ col[32] = ((a3 - b3) >> COL_SHIFT);
|
||||
@@ col[40] = ((a2 - b2) >> COL_SHIFT);
|
||||
@@ col[48] = ((a1 - b1) >> COL_SHIFT);
|
||||
@@ col[56] = ((a0 - b0) >> COL_SHIFT);
|
||||
@@@@@ no optimisation here @@@@@
|
||||
add r8, r6, r0 @ R8=a0+b0
|
||||
add r9, r2, r1 @ R9=a1+b1
|
||||
mov r8, r8, asr #COL_SHIFT
|
||||
mov r9, r9, asr #COL_SHIFT
|
||||
strh r8, [r14, #0]
|
||||
strh r9, [r14, #16]
|
||||
add r8, r3, r5 @ R8=a2+b2
|
||||
add r9, r4, r7 @ R9=a3+b3
|
||||
mov r8, r8, asr #COL_SHIFT
|
||||
mov r9, r9, asr #COL_SHIFT
|
||||
strh r8, [r14, #32]
|
||||
strh r9, [r14, #48]
|
||||
sub r8, r4, r7 @ R8=a3-b3
|
||||
sub r9, r3, r5 @ R9=a2-b2
|
||||
mov r8, r8, asr #COL_SHIFT
|
||||
mov r9, r9, asr #COL_SHIFT
|
||||
strh r8, [r14, #64]
|
||||
strh r9, [r14, #80]
|
||||
sub r8, r2, r1 @ R8=a1-b1
|
||||
sub r9, r6, r0 @ R9=a0-b0
|
||||
mov r8, r8, asr #COL_SHIFT
|
||||
mov r9, r9, asr #COL_SHIFT
|
||||
strh r8, [r14, #96]
|
||||
strh r9, [r14, #112]
|
||||
|
||||
__end_col_loop:
|
||||
@@ at this point, R0-R11 (free)
|
||||
@@ R12=__const_ptr_, R14=&block[n]
|
||||
ldr r0, [sp, #0] @ R0=block
|
||||
teq r0, r14 @ compare current &block[n] to block, when block is reached, the loop is finished.
|
||||
sub r14, r14, #2
|
||||
bne __col_loop
|
||||
@@ at this point, R0-R11 (free)
|
||||
@@ R12=__const_ptr_, R14=&block[n]
|
||||
ldr r0, [sp, #0] @ R0=block
|
||||
teq r0, r14 @ compare current &block[n] to block, when block is reached, the loop is finished.
|
||||
sub r14, r14, #2
|
||||
bne __col_loop
|
||||
|
||||
|
||||
|
||||
@ -466,15 +466,15 @@ __end_simple_idct_ARM:
|
||||
|
||||
@@ kind of sub-function, here not to overload the common case.
|
||||
__end_bef_a_evaluation:
|
||||
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
|
||||
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
|
||||
mul r11, r8, r4 @ R11=W2*ROWr16[2]
|
||||
sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3)
|
||||
add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0)
|
||||
bal __end_a_evaluation
|
||||
bal __end_a_evaluation
|
||||
|
||||
|
||||
__constant_ptr__: @@ see #defines at the beginning of the source code for values.
|
||||
.align
|
||||
.align
|
||||
.word W1
|
||||
.word W2
|
||||
.word W3
|
||||
|
@ -15,21 +15,21 @@ extern "C" {
|
||||
#include <sys/types.h> /* size_t */
|
||||
|
||||
//FIXME the following 2 really dont belong in here
|
||||
#define FFMPEG_VERSION_INT 0x000409
|
||||
#define FFMPEG_VERSION "CVS"
|
||||
#define FFMPEG_VERSION_INT 0x000409
|
||||
#define FFMPEG_VERSION "CVS"
|
||||
|
||||
#define AV_STRINGIFY(s) AV_TOSTRING(s)
|
||||
#define AV_STRINGIFY(s) AV_TOSTRING(s)
|
||||
#define AV_TOSTRING(s) #s
|
||||
|
||||
#define LIBAVCODEC_VERSION_INT ((51<<16)+(0<<8)+0)
|
||||
#define LIBAVCODEC_VERSION 51.0.0
|
||||
#define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT
|
||||
#define LIBAVCODEC_VERSION_INT ((51<<16)+(0<<8)+0)
|
||||
#define LIBAVCODEC_VERSION 51.0.0
|
||||
#define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT
|
||||
|
||||
#define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
|
||||
#define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
|
||||
|
||||
#define AV_NOPTS_VALUE int64_t_C(0x8000000000000000)
|
||||
#define AV_TIME_BASE 1000000
|
||||
#define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE}
|
||||
#define AV_NOPTS_VALUE int64_t_C(0x8000000000000000)
|
||||
#define AV_TIME_BASE 1000000
|
||||
#define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE}
|
||||
|
||||
enum CodecID {
|
||||
CODEC_ID_NONE,
|
||||
@ -362,9 +362,9 @@ extern int motion_estimation_method;
|
||||
#define CODEC_FLAG2_LOCAL_HEADER 0x00000008 ///< place global headers at every keyframe instead of in extradata
|
||||
|
||||
/* Unsupported options :
|
||||
* Syntax Arithmetic coding (SAC)
|
||||
* Reference Picture Selection
|
||||
* Independant Segment Decoding */
|
||||
* Syntax Arithmetic coding (SAC)
|
||||
* Reference Picture Selection
|
||||
* Independant Segment Decoding */
|
||||
/* /Fx */
|
||||
/* codec capabilities */
|
||||
|
||||
@ -646,9 +646,9 @@ typedef struct AVPanScan{
|
||||
*/\
|
||||
int8_t *ref_index[2];
|
||||
|
||||
#define FF_QSCALE_TYPE_MPEG1 0
|
||||
#define FF_QSCALE_TYPE_MPEG2 1
|
||||
#define FF_QSCALE_TYPE_H264 2
|
||||
#define FF_QSCALE_TYPE_MPEG1 0
|
||||
#define FF_QSCALE_TYPE_MPEG2 1
|
||||
#define FF_QSCALE_TYPE_H264 2
|
||||
|
||||
#define FF_BUFFER_TYPE_INTERNAL 1
|
||||
#define FF_BUFFER_TYPE_USER 2 ///< Direct rendering buffers (image is (de)allocated by user)
|
||||
@ -684,9 +684,9 @@ typedef struct AVCLASS AVClass;
|
||||
struct AVCLASS {
|
||||
const char* class_name;
|
||||
const char* (*item_name)(void*); /* actually passing a pointer to an AVCodecContext
|
||||
or AVFormatContext, which begin with an AVClass.
|
||||
Needed because av_log is in libavcodec and has no visibility
|
||||
of AVIn/OutputFormat */
|
||||
or AVFormatContext, which begin with an AVClass.
|
||||
Needed because av_log is in libavcodec and has no visibility
|
||||
of AVIn/OutputFormat */
|
||||
struct AVOption *option;
|
||||
};
|
||||
|
||||
@ -1252,18 +1252,18 @@ typedef struct AVCodecContext {
|
||||
* result into program crash)
|
||||
*/
|
||||
unsigned dsp_mask;
|
||||
#define FF_MM_FORCE 0x80000000 /* force usage of selected flags (OR) */
|
||||
#define FF_MM_FORCE 0x80000000 /* force usage of selected flags (OR) */
|
||||
/* lower 16 bits - CPU features */
|
||||
#ifdef HAVE_MMX
|
||||
#define FF_MM_MMX 0x0001 /* standard MMX */
|
||||
#define FF_MM_3DNOW 0x0004 /* AMD 3DNOW */
|
||||
#define FF_MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
|
||||
#define FF_MM_SSE 0x0008 /* SSE functions */
|
||||
#define FF_MM_SSE2 0x0010 /* PIV SSE2 functions */
|
||||
#define FF_MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */
|
||||
#define FF_MM_MMX 0x0001 /* standard MMX */
|
||||
#define FF_MM_3DNOW 0x0004 /* AMD 3DNOW */
|
||||
#define FF_MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
|
||||
#define FF_MM_SSE 0x0008 /* SSE functions */
|
||||
#define FF_MM_SSE2 0x0010 /* PIV SSE2 functions */
|
||||
#define FF_MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */
|
||||
#endif /* HAVE_MMX */
|
||||
#ifdef HAVE_IWMMXT
|
||||
#define FF_MM_IWMMXT 0x0100 /* XScale IWMMXT */
|
||||
#define FF_MM_IWMMXT 0x0100 /* XScale IWMMXT */
|
||||
#endif /* HAVE_IWMMXT */
|
||||
|
||||
/**
|
||||
@ -2223,7 +2223,7 @@ int avcodec_find_best_pix_fmt(int pix_fmt_mask, int src_pix_fmt,
|
||||
#define FF_ALPHA_TRANSP 0x0001 /* image has some totally transparent pixels */
|
||||
#define FF_ALPHA_SEMI_TRANSP 0x0002 /* image has some transparent pixels */
|
||||
int img_get_alpha_info(const AVPicture *src,
|
||||
int pix_fmt, int width, int height);
|
||||
int pix_fmt, int width, int height);
|
||||
|
||||
/* convert among pixel formats */
|
||||
int img_convert(AVPicture *dst, int dst_pix_fmt,
|
||||
|
@ -35,20 +35,20 @@ typedef struct ThreadContext{
|
||||
|
||||
// it's odd Be never patented that :D
|
||||
struct benaphore {
|
||||
vint32 atom;
|
||||
sem_id sem;
|
||||
vint32 atom;
|
||||
sem_id sem;
|
||||
};
|
||||
static inline int lock_ben(struct benaphore *ben)
|
||||
{
|
||||
if (atomic_add(&ben->atom, 1) > 0)
|
||||
return acquire_sem(ben->sem);
|
||||
return B_OK;
|
||||
if (atomic_add(&ben->atom, 1) > 0)
|
||||
return acquire_sem(ben->sem);
|
||||
return B_OK;
|
||||
}
|
||||
static inline int unlock_ben(struct benaphore *ben)
|
||||
{
|
||||
if (atomic_add(&ben->atom, -1) > 1)
|
||||
return release_sem(ben->sem);
|
||||
return B_OK;
|
||||
if (atomic_add(&ben->atom, -1) > 1)
|
||||
return release_sem(ben->sem);
|
||||
return B_OK;
|
||||
}
|
||||
|
||||
static struct benaphore av_thread_lib_ben;
|
||||
@ -155,25 +155,25 @@ fail:
|
||||
|
||||
int avcodec_thread_lock_lib(void)
|
||||
{
|
||||
return lock_ben(&av_thread_lib_ben);
|
||||
return lock_ben(&av_thread_lib_ben);
|
||||
}
|
||||
|
||||
int avcodec_thread_unlock_lib(void)
|
||||
{
|
||||
return unlock_ben(&av_thread_lib_ben);
|
||||
return unlock_ben(&av_thread_lib_ben);
|
||||
}
|
||||
|
||||
/* our versions of _init and _fini (which are called by those actually from crt.o) */
|
||||
|
||||
void initialize_after(void)
|
||||
{
|
||||
av_thread_lib_ben.atom = 0;
|
||||
av_thread_lib_ben.sem = create_sem(0, "libavcodec benaphore");
|
||||
av_thread_lib_ben.atom = 0;
|
||||
av_thread_lib_ben.sem = create_sem(0, "libavcodec benaphore");
|
||||
}
|
||||
|
||||
void uninitialize_before(void)
|
||||
{
|
||||
delete_sem(av_thread_lib_ben.sem);
|
||||
delete_sem(av_thread_lib_ben.sem);
|
||||
}
|
||||
|
||||
|
||||
|
@ -83,7 +83,7 @@ int check_marker(GetBitContext *s, const char *msg)
|
||||
{
|
||||
int bit= get_bits1(s);
|
||||
if(!bit)
|
||||
av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg);
|
||||
av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg);
|
||||
|
||||
return bit;
|
||||
}
|
||||
|
@ -146,7 +146,7 @@ typedef struct RL_VLC_ELEM {
|
||||
# ifdef __GNUC__
|
||||
static inline uint32_t unaligned32(const void *v) {
|
||||
struct Unaligned {
|
||||
uint32_t i;
|
||||
uint32_t i;
|
||||
} __attribute__((packed));
|
||||
|
||||
return ((const struct Unaligned *) v)->i;
|
||||
@ -183,7 +183,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
|
||||
bit_buf = (bit_buf<<n) | value;
|
||||
bit_left-=n;
|
||||
} else {
|
||||
bit_buf<<=bit_left;
|
||||
bit_buf<<=bit_left;
|
||||
bit_buf |= value >> (n - bit_left);
|
||||
#ifdef UNALIGNED_STORES_ARE_BAD
|
||||
if (3 & (intptr_t) s->buf_ptr) {
|
||||
@ -196,7 +196,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
|
||||
*(uint32_t *)s->buf_ptr = be2me_32(bit_buf);
|
||||
//printf("bitbuf = %08x\n", bit_buf);
|
||||
s->buf_ptr+=4;
|
||||
bit_left+=32 - n;
|
||||
bit_left+=32 - n;
|
||||
bit_buf = value;
|
||||
}
|
||||
|
||||
@ -212,21 +212,21 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
|
||||
# ifdef ALIGNED_BITSTREAM_WRITER
|
||||
# if defined(ARCH_X86) || defined(ARCH_X86_64)
|
||||
asm volatile(
|
||||
"movl %0, %%ecx \n\t"
|
||||
"xorl %%eax, %%eax \n\t"
|
||||
"shrdl %%cl, %1, %%eax \n\t"
|
||||
"shrl %%cl, %1 \n\t"
|
||||
"movl %0, %%ecx \n\t"
|
||||
"shrl $3, %%ecx \n\t"
|
||||
"andl $0xFFFFFFFC, %%ecx \n\t"
|
||||
"bswapl %1 \n\t"
|
||||
"orl %1, (%2, %%ecx) \n\t"
|
||||
"bswapl %%eax \n\t"
|
||||
"addl %3, %0 \n\t"
|
||||
"movl %%eax, 4(%2, %%ecx) \n\t"
|
||||
: "=&r" (s->index), "=&r" (value)
|
||||
: "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
|
||||
: "%eax", "%ecx"
|
||||
"movl %0, %%ecx \n\t"
|
||||
"xorl %%eax, %%eax \n\t"
|
||||
"shrdl %%cl, %1, %%eax \n\t"
|
||||
"shrl %%cl, %1 \n\t"
|
||||
"movl %0, %%ecx \n\t"
|
||||
"shrl $3, %%ecx \n\t"
|
||||
"andl $0xFFFFFFFC, %%ecx \n\t"
|
||||
"bswapl %1 \n\t"
|
||||
"orl %1, (%2, %%ecx) \n\t"
|
||||
"bswapl %%eax \n\t"
|
||||
"addl %3, %0 \n\t"
|
||||
"movl %%eax, 4(%2, %%ecx) \n\t"
|
||||
: "=&r" (s->index), "=&r" (value)
|
||||
: "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
|
||||
: "%eax", "%ecx"
|
||||
);
|
||||
# else
|
||||
int index= s->index;
|
||||
@ -243,20 +243,20 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
|
||||
# else //ALIGNED_BITSTREAM_WRITER
|
||||
# if defined(ARCH_X86) || defined(ARCH_X86_64)
|
||||
asm volatile(
|
||||
"movl $7, %%ecx \n\t"
|
||||
"andl %0, %%ecx \n\t"
|
||||
"addl %3, %%ecx \n\t"
|
||||
"negl %%ecx \n\t"
|
||||
"shll %%cl, %1 \n\t"
|
||||
"bswapl %1 \n\t"
|
||||
"movl %0, %%ecx \n\t"
|
||||
"shrl $3, %%ecx \n\t"
|
||||
"orl %1, (%%ecx, %2) \n\t"
|
||||
"addl %3, %0 \n\t"
|
||||
"movl $0, 4(%%ecx, %2) \n\t"
|
||||
: "=&r" (s->index), "=&r" (value)
|
||||
: "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
|
||||
: "%ecx"
|
||||
"movl $7, %%ecx \n\t"
|
||||
"andl %0, %%ecx \n\t"
|
||||
"addl %3, %%ecx \n\t"
|
||||
"negl %%ecx \n\t"
|
||||
"shll %%cl, %1 \n\t"
|
||||
"bswapl %1 \n\t"
|
||||
"movl %0, %%ecx \n\t"
|
||||
"shrl $3, %%ecx \n\t"
|
||||
"orl %1, (%%ecx, %2) \n\t"
|
||||
"addl %3, %0 \n\t"
|
||||
"movl $0, 4(%%ecx, %2) \n\t"
|
||||
: "=&r" (s->index), "=&r" (value)
|
||||
: "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
|
||||
: "%ecx"
|
||||
);
|
||||
# else
|
||||
int index= s->index;
|
||||
@ -276,9 +276,9 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
|
||||
static inline uint8_t* pbBufPtr(PutBitContext *s)
|
||||
{
|
||||
#ifdef ALT_BITSTREAM_WRITER
|
||||
return s->buf + (s->index>>3);
|
||||
return s->buf + (s->index>>3);
|
||||
#else
|
||||
return s->buf_ptr;
|
||||
return s->buf_ptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -290,10 +290,10 @@ static inline void skip_put_bytes(PutBitContext *s, int n){
|
||||
assert((put_bits_count(s)&7)==0);
|
||||
#ifdef ALT_BITSTREAM_WRITER
|
||||
FIXME may need some cleaning of the buffer
|
||||
s->index += n<<3;
|
||||
s->index += n<<3;
|
||||
#else
|
||||
assert(s->bit_left==32);
|
||||
s->buf_ptr += n;
|
||||
s->buf_ptr += n;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -366,10 +366,10 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
|
||||
static inline int unaligned32_be(const void *v)
|
||||
{
|
||||
#ifdef CONFIG_ALIGN
|
||||
const uint8_t *p=v;
|
||||
return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]);
|
||||
const uint8_t *p=v;
|
||||
return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]);
|
||||
#else
|
||||
return be2me_32( unaligned32(v)); //original
|
||||
return be2me_32( unaligned32(v)); //original
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -528,8 +528,8 @@ static inline int get_bits_count(GetBitContext *s){
|
||||
#if defined(ARCH_X86) || defined(ARCH_X86_64)
|
||||
# define SKIP_CACHE(name, gb, num)\
|
||||
asm(\
|
||||
"shldl %2, %1, %0 \n\t"\
|
||||
"shll %2, %1 \n\t"\
|
||||
"shldl %2, %1, %0 \n\t"\
|
||||
"shll %2, %1 \n\t"\
|
||||
: "+r" (name##_cache0), "+r" (name##_cache1)\
|
||||
: "Ic" ((uint8_t)num)\
|
||||
);
|
||||
|
@ -61,13 +61,13 @@ static int decode_frame(AVCodecContext *avctx,
|
||||
uint8_t *cb= &a->picture.data[1][ y*a->picture.linesize[1] ];
|
||||
uint8_t *cr= &a->picture.data[2][ y*a->picture.linesize[2] ];
|
||||
for(x=0; x<avctx->width; x+=4){
|
||||
luma[3] = get_bits(&a->gb, 5) << 3;
|
||||
luma[2] = get_bits(&a->gb, 5) << 3;
|
||||
luma[1] = get_bits(&a->gb, 5) << 3;
|
||||
luma[0] = get_bits(&a->gb, 5) << 3;
|
||||
luma+= 4;
|
||||
*(cb++) = get_bits(&a->gb, 6) << 2;
|
||||
*(cr++) = get_bits(&a->gb, 6) << 2;
|
||||
luma[3] = get_bits(&a->gb, 5) << 3;
|
||||
luma[2] = get_bits(&a->gb, 5) << 3;
|
||||
luma[1] = get_bits(&a->gb, 5) << 3;
|
||||
luma[0] = get_bits(&a->gb, 5) << 3;
|
||||
luma+= 4;
|
||||
*(cb++) = get_bits(&a->gb, 6) << 2;
|
||||
*(cr++) = get_bits(&a->gb, 6) << 2;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -65,14 +65,14 @@ int64_t gettime(void)
|
||||
static short idct_mmx_perm[64];
|
||||
|
||||
static short idct_simple_mmx_perm[64]={
|
||||
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
|
||||
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
|
||||
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
|
||||
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
|
||||
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
|
||||
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
|
||||
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
|
||||
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
|
||||
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
|
||||
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
|
||||
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
|
||||
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
|
||||
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
|
||||
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
|
||||
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
|
||||
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
|
||||
};
|
||||
|
||||
void idct_mmx_init(void)
|
||||
@ -81,8 +81,8 @@ void idct_mmx_init(void)
|
||||
|
||||
/* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
|
||||
for (i = 0; i < 64; i++) {
|
||||
idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
|
||||
// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
|
||||
idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
|
||||
// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
|
||||
}
|
||||
}
|
||||
|
||||
@ -151,7 +151,7 @@ void dct_error(const char *name, int is_idct,
|
||||
for(i=0;i<64;i++)
|
||||
block[idct_simple_mmx_perm[i]] = block1[i];
|
||||
|
||||
} else {
|
||||
} else {
|
||||
for(i=0; i<64; i++)
|
||||
block[i]= block1[i];
|
||||
}
|
||||
@ -186,9 +186,9 @@ void dct_error(const char *name, int is_idct,
|
||||
if (v > err_inf)
|
||||
err_inf = v;
|
||||
err2 += v * v;
|
||||
sysErr[i] += block[i] - block1[i];
|
||||
blockSumErr += v;
|
||||
if( abs(block[i])>maxout) maxout=abs(block[i]);
|
||||
sysErr[i] += block[i] - block1[i];
|
||||
blockSumErr += v;
|
||||
if( abs(block[i])>maxout) maxout=abs(block[i]);
|
||||
}
|
||||
if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
|
||||
#if 0 // print different matrix pairs
|
||||
@ -209,7 +209,7 @@ void dct_error(const char *name, int is_idct,
|
||||
|
||||
#if 1 // dump systematic errors
|
||||
for(i=0; i<64; i++){
|
||||
if(i%8==0) printf("\n");
|
||||
if(i%8==0) printf("\n");
|
||||
printf("%5d ", (int)sysErr[i]);
|
||||
}
|
||||
printf("\n");
|
||||
@ -503,7 +503,7 @@ int main(int argc, char **argv)
|
||||
dct_error("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, test);
|
||||
// dct_error("ODIVX-C", 1, odivx_idct_c, idct);
|
||||
//printf(" test against odivx idct\n");
|
||||
// dct_error("REF", 1, idct, odivx_idct_c);
|
||||
// dct_error("REF", 1, idct, odivx_idct_c);
|
||||
// dct_error("INT", 1, j_rev_dct, odivx_idct_c);
|
||||
// dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c);
|
||||
// dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c);
|
||||
|
@ -124,14 +124,14 @@ const uint32_t inverse[256]={
|
||||
|
||||
/* Input permutation for the simple_idct_mmx */
|
||||
static const uint8_t simple_mmx_permutation[64]={
|
||||
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
|
||||
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
|
||||
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
|
||||
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
|
||||
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
|
||||
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
|
||||
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
|
||||
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
|
||||
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
|
||||
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
|
||||
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
|
||||
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
|
||||
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
|
||||
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
|
||||
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
|
||||
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
|
||||
};
|
||||
|
||||
static int pix_sum_c(uint8_t * pix, int line_size)
|
||||
@ -140,18 +140,18 @@ static int pix_sum_c(uint8_t * pix, int line_size)
|
||||
|
||||
s = 0;
|
||||
for (i = 0; i < 16; i++) {
|
||||
for (j = 0; j < 16; j += 8) {
|
||||
s += pix[0];
|
||||
s += pix[1];
|
||||
s += pix[2];
|
||||
s += pix[3];
|
||||
s += pix[4];
|
||||
s += pix[5];
|
||||
s += pix[6];
|
||||
s += pix[7];
|
||||
pix += 8;
|
||||
}
|
||||
pix += line_size - 16;
|
||||
for (j = 0; j < 16; j += 8) {
|
||||
s += pix[0];
|
||||
s += pix[1];
|
||||
s += pix[2];
|
||||
s += pix[3];
|
||||
s += pix[4];
|
||||
s += pix[5];
|
||||
s += pix[6];
|
||||
s += pix[7];
|
||||
pix += 8;
|
||||
}
|
||||
pix += line_size - 16;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
@ -163,33 +163,33 @@ static int pix_norm1_c(uint8_t * pix, int line_size)
|
||||
|
||||
s = 0;
|
||||
for (i = 0; i < 16; i++) {
|
||||
for (j = 0; j < 16; j += 8) {
|
||||
for (j = 0; j < 16; j += 8) {
|
||||
#if 0
|
||||
s += sq[pix[0]];
|
||||
s += sq[pix[1]];
|
||||
s += sq[pix[2]];
|
||||
s += sq[pix[3]];
|
||||
s += sq[pix[4]];
|
||||
s += sq[pix[5]];
|
||||
s += sq[pix[6]];
|
||||
s += sq[pix[7]];
|
||||
s += sq[pix[0]];
|
||||
s += sq[pix[1]];
|
||||
s += sq[pix[2]];
|
||||
s += sq[pix[3]];
|
||||
s += sq[pix[4]];
|
||||
s += sq[pix[5]];
|
||||
s += sq[pix[6]];
|
||||
s += sq[pix[7]];
|
||||
#else
|
||||
#if LONG_MAX > 2147483647
|
||||
register uint64_t x=*(uint64_t*)pix;
|
||||
s += sq[x&0xff];
|
||||
s += sq[(x>>8)&0xff];
|
||||
s += sq[(x>>16)&0xff];
|
||||
s += sq[(x>>24)&0xff];
|
||||
register uint64_t x=*(uint64_t*)pix;
|
||||
s += sq[x&0xff];
|
||||
s += sq[(x>>8)&0xff];
|
||||
s += sq[(x>>16)&0xff];
|
||||
s += sq[(x>>24)&0xff];
|
||||
s += sq[(x>>32)&0xff];
|
||||
s += sq[(x>>40)&0xff];
|
||||
s += sq[(x>>48)&0xff];
|
||||
s += sq[(x>>56)&0xff];
|
||||
#else
|
||||
register uint32_t x=*(uint32_t*)pix;
|
||||
s += sq[x&0xff];
|
||||
s += sq[(x>>8)&0xff];
|
||||
s += sq[(x>>16)&0xff];
|
||||
s += sq[(x>>24)&0xff];
|
||||
register uint32_t x=*(uint32_t*)pix;
|
||||
s += sq[x&0xff];
|
||||
s += sq[(x>>8)&0xff];
|
||||
s += sq[(x>>16)&0xff];
|
||||
s += sq[(x>>24)&0xff];
|
||||
x=*(uint32_t*)(pix+4);
|
||||
s += sq[x&0xff];
|
||||
s += sq[(x>>8)&0xff];
|
||||
@ -197,9 +197,9 @@ static int pix_norm1_c(uint8_t * pix, int line_size)
|
||||
s += sq[(x>>24)&0xff];
|
||||
#endif
|
||||
#endif
|
||||
pix += 8;
|
||||
}
|
||||
pix += line_size - 16;
|
||||
pix += 8;
|
||||
}
|
||||
pix += line_size - 16;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
@ -410,7 +410,7 @@ static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int lin
|
||||
}
|
||||
|
||||
static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
|
||||
const uint8_t *s2, int stride){
|
||||
const uint8_t *s2, int stride){
|
||||
int i;
|
||||
|
||||
/* read the pixels */
|
||||
@ -431,7 +431,7 @@ static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
|
||||
|
||||
|
||||
static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
|
||||
int line_size)
|
||||
int line_size)
|
||||
{
|
||||
int i;
|
||||
uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
||||
@ -453,7 +453,7 @@ static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
|
||||
}
|
||||
|
||||
static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
|
||||
int line_size)
|
||||
int line_size)
|
||||
{
|
||||
int i;
|
||||
uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
||||
@ -471,7 +471,7 @@ static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels
|
||||
}
|
||||
|
||||
static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
|
||||
int line_size)
|
||||
int line_size)
|
||||
{
|
||||
int i;
|
||||
uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
||||
@ -1214,7 +1214,7 @@ static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
|
||||
dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
@ -1225,7 +1225,7 @@ static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
|
||||
dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
@ -1236,7 +1236,7 @@ static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
|
||||
dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
@ -1247,7 +1247,7 @@ static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
|
||||
dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
@ -1258,7 +1258,7 @@ static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
||||
dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
@ -1269,7 +1269,7 @@ static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
|
||||
dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
@ -1280,7 +1280,7 @@ static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
||||
dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
@ -1291,7 +1291,7 @@ static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
|
||||
dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
@ -1311,7 +1311,7 @@ static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
|
||||
dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
@ -1322,7 +1322,7 @@ static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
|
||||
dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
@ -1333,7 +1333,7 @@ static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
|
||||
dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
@ -1344,7 +1344,7 @@ static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
||||
dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
@ -1355,7 +1355,7 @@ static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
||||
dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
@ -1366,7 +1366,7 @@ static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
|
||||
dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
@ -1377,7 +1377,7 @@ static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
||||
dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
@ -1388,7 +1388,7 @@ static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int
|
||||
int i,j;
|
||||
for (i=0; i < height; i++) {
|
||||
for (j=0; j < width; j++) {
|
||||
dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
||||
dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
||||
}
|
||||
src += stride;
|
||||
dst += stride;
|
||||
@ -3666,15 +3666,15 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
||||
#ifdef CONFIG_ENCODERS
|
||||
if(avctx->dct_algo==FF_DCT_FASTINT) {
|
||||
c->fdct = fdct_ifast;
|
||||
c->fdct248 = fdct_ifast248;
|
||||
c->fdct248 = fdct_ifast248;
|
||||
}
|
||||
else if(avctx->dct_algo==FF_DCT_FAAN) {
|
||||
c->fdct = ff_faandct;
|
||||
c->fdct248 = ff_faandct248;
|
||||
c->fdct248 = ff_faandct248;
|
||||
}
|
||||
else {
|
||||
c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
|
||||
c->fdct248 = ff_fdct248_islow;
|
||||
c->fdct248 = ff_fdct248_islow;
|
||||
}
|
||||
#endif //CONFIG_ENCODERS
|
||||
|
||||
|
@ -151,7 +151,7 @@ typedef struct DSPContext {
|
||||
* global motion compensation.
|
||||
*/
|
||||
void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
|
||||
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
|
||||
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
|
||||
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
|
||||
int (*pix_sum)(uint8_t * pix, int line_size);
|
||||
int (*pix_norm1)(uint8_t * pix, int line_size);
|
||||
@ -342,7 +342,7 @@ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scant
|
||||
|
||||
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
|
||||
|
||||
#define BYTE_VEC32(c) ((c)*0x01010101UL)
|
||||
#define BYTE_VEC32(c) ((c)*0x01010101UL)
|
||||
|
||||
static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
|
||||
{
|
||||
|
@ -194,7 +194,7 @@ channels_multi (int flags)
|
||||
{
|
||||
if (flags & DTS_LFE)
|
||||
return 6;
|
||||
else if (flags & 1) /* center channel */
|
||||
else if (flags & 1) /* center channel */
|
||||
return 5;
|
||||
else if ((flags & DTS_CHANNEL_MASK) == DTS_2F2R)
|
||||
return 4;
|
||||
|
354
libavcodec/dv.c
354
libavcodec/dv.c
@ -84,7 +84,7 @@ static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm)
|
||||
j = perm[i];
|
||||
s->dv_idct_shift[0][0][q][j] =
|
||||
dv_quant_shifts[q][dv_88_areas[i]] + 1;
|
||||
s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1;
|
||||
s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1;
|
||||
}
|
||||
|
||||
/* 248DCT */
|
||||
@ -92,7 +92,7 @@ static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm)
|
||||
/* 248 table */
|
||||
s->dv_idct_shift[0][1][q][i] =
|
||||
dv_quant_shifts[q][dv_248_areas[i]] + 1;
|
||||
s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1;
|
||||
s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -114,35 +114,35 @@ static int dvvideo_init(AVCodecContext *avctx)
|
||||
done = 1;
|
||||
|
||||
dv_vlc_map = av_mallocz_static(DV_VLC_MAP_LEV_SIZE*DV_VLC_MAP_RUN_SIZE*sizeof(struct dv_vlc_pair));
|
||||
if (!dv_vlc_map)
|
||||
return -ENOMEM;
|
||||
if (!dv_vlc_map)
|
||||
return -ENOMEM;
|
||||
|
||||
/* dv_anchor lets each thread know its Id */
|
||||
dv_anchor = av_malloc(12*27*sizeof(void*));
|
||||
if (!dv_anchor) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
for (i=0; i<12*27; i++)
|
||||
dv_anchor[i] = (void*)(size_t)i;
|
||||
/* dv_anchor lets each thread know its Id */
|
||||
dv_anchor = av_malloc(12*27*sizeof(void*));
|
||||
if (!dv_anchor) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
for (i=0; i<12*27; i++)
|
||||
dv_anchor[i] = (void*)(size_t)i;
|
||||
|
||||
/* it's faster to include sign bit in a generic VLC parsing scheme */
|
||||
for (i=0, j=0; i<NB_DV_VLC; i++, j++) {
|
||||
new_dv_vlc_bits[j] = dv_vlc_bits[i];
|
||||
new_dv_vlc_len[j] = dv_vlc_len[i];
|
||||
new_dv_vlc_run[j] = dv_vlc_run[i];
|
||||
new_dv_vlc_level[j] = dv_vlc_level[i];
|
||||
/* it's faster to include sign bit in a generic VLC parsing scheme */
|
||||
for (i=0, j=0; i<NB_DV_VLC; i++, j++) {
|
||||
new_dv_vlc_bits[j] = dv_vlc_bits[i];
|
||||
new_dv_vlc_len[j] = dv_vlc_len[i];
|
||||
new_dv_vlc_run[j] = dv_vlc_run[i];
|
||||
new_dv_vlc_level[j] = dv_vlc_level[i];
|
||||
|
||||
if (dv_vlc_level[i]) {
|
||||
new_dv_vlc_bits[j] <<= 1;
|
||||
new_dv_vlc_len[j]++;
|
||||
if (dv_vlc_level[i]) {
|
||||
new_dv_vlc_bits[j] <<= 1;
|
||||
new_dv_vlc_len[j]++;
|
||||
|
||||
j++;
|
||||
new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1;
|
||||
new_dv_vlc_len[j] = dv_vlc_len[i] + 1;
|
||||
new_dv_vlc_run[j] = dv_vlc_run[i];
|
||||
new_dv_vlc_level[j] = -dv_vlc_level[i];
|
||||
}
|
||||
}
|
||||
j++;
|
||||
new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1;
|
||||
new_dv_vlc_len[j] = dv_vlc_len[i] + 1;
|
||||
new_dv_vlc_run[j] = dv_vlc_run[i];
|
||||
new_dv_vlc_level[j] = -dv_vlc_level[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* NOTE: as a trick, we use the fact the no codes are unused
|
||||
to accelerate the parsing of partial codes */
|
||||
@ -150,10 +150,10 @@ static int dvvideo_init(AVCodecContext *avctx)
|
||||
new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2, 0);
|
||||
|
||||
dv_rl_vlc = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM));
|
||||
if (!dv_rl_vlc) {
|
||||
av_free(dv_anchor);
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!dv_rl_vlc) {
|
||||
av_free(dv_anchor);
|
||||
return -ENOMEM;
|
||||
}
|
||||
for(i = 0; i < dv_vlc.table_size; i++){
|
||||
int code= dv_vlc.table[i][0];
|
||||
int len = dv_vlc.table[i][1];
|
||||
@ -170,49 +170,49 @@ static int dvvideo_init(AVCodecContext *avctx)
|
||||
dv_rl_vlc[i].level = level;
|
||||
dv_rl_vlc[i].run = run;
|
||||
}
|
||||
free_vlc(&dv_vlc);
|
||||
free_vlc(&dv_vlc);
|
||||
|
||||
for (i = 0; i < NB_DV_VLC - 1; i++) {
|
||||
for (i = 0; i < NB_DV_VLC - 1; i++) {
|
||||
if (dv_vlc_run[i] >= DV_VLC_MAP_RUN_SIZE)
|
||||
continue;
|
||||
continue;
|
||||
#ifdef DV_CODEC_TINY_TARGET
|
||||
if (dv_vlc_level[i] >= DV_VLC_MAP_LEV_SIZE)
|
||||
continue;
|
||||
continue;
|
||||
#endif
|
||||
|
||||
if (dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size != 0)
|
||||
continue;
|
||||
if (dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size != 0)
|
||||
continue;
|
||||
|
||||
dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].vlc = dv_vlc_bits[i] <<
|
||||
(!!dv_vlc_level[i]);
|
||||
dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size = dv_vlc_len[i] +
|
||||
(!!dv_vlc_level[i]);
|
||||
}
|
||||
for (i = 0; i < DV_VLC_MAP_RUN_SIZE; i++) {
|
||||
dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].vlc = dv_vlc_bits[i] <<
|
||||
(!!dv_vlc_level[i]);
|
||||
dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size = dv_vlc_len[i] +
|
||||
(!!dv_vlc_level[i]);
|
||||
}
|
||||
for (i = 0; i < DV_VLC_MAP_RUN_SIZE; i++) {
|
||||
#ifdef DV_CODEC_TINY_TARGET
|
||||
for (j = 1; j < DV_VLC_MAP_LEV_SIZE; j++) {
|
||||
if (dv_vlc_map[i][j].size == 0) {
|
||||
dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
|
||||
(dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
|
||||
dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
|
||||
dv_vlc_map[0][j].size;
|
||||
}
|
||||
}
|
||||
for (j = 1; j < DV_VLC_MAP_LEV_SIZE; j++) {
|
||||
if (dv_vlc_map[i][j].size == 0) {
|
||||
dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
|
||||
(dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
|
||||
dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
|
||||
dv_vlc_map[0][j].size;
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (j = 1; j < DV_VLC_MAP_LEV_SIZE/2; j++) {
|
||||
if (dv_vlc_map[i][j].size == 0) {
|
||||
dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
|
||||
(dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
|
||||
dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
|
||||
dv_vlc_map[0][j].size;
|
||||
}
|
||||
dv_vlc_map[i][((uint16_t)(-j))&0x1ff].vlc =
|
||||
dv_vlc_map[i][j].vlc | 1;
|
||||
dv_vlc_map[i][((uint16_t)(-j))&0x1ff].size =
|
||||
dv_vlc_map[i][j].size;
|
||||
}
|
||||
for (j = 1; j < DV_VLC_MAP_LEV_SIZE/2; j++) {
|
||||
if (dv_vlc_map[i][j].size == 0) {
|
||||
dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
|
||||
(dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
|
||||
dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
|
||||
dv_vlc_map[0][j].size;
|
||||
}
|
||||
dv_vlc_map[i][((uint16_t)(-j))&0x1ff].vlc =
|
||||
dv_vlc_map[i][j].vlc | 1;
|
||||
dv_vlc_map[i][((uint16_t)(-j))&0x1ff].size =
|
||||
dv_vlc_map[i][j].size;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Generic DSP setup */
|
||||
@ -241,7 +241,7 @@ static int dvvideo_init(AVCodecContext *avctx)
|
||||
|
||||
/* FIXME: I really don't think this should be here */
|
||||
if (dv_codec_profile(avctx))
|
||||
avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt;
|
||||
avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt;
|
||||
avctx->coded_frame = &s->picture;
|
||||
s->avctx= avctx;
|
||||
|
||||
@ -306,9 +306,9 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
|
||||
/* if we must parse a partial vlc, we do it here */
|
||||
if (partial_bit_count > 0) {
|
||||
re_cache = ((unsigned)re_cache >> partial_bit_count) |
|
||||
(mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count));
|
||||
re_index -= partial_bit_count;
|
||||
mb->partial_bit_count = 0;
|
||||
(mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count));
|
||||
re_index -= partial_bit_count;
|
||||
mb->partial_bit_count = 0;
|
||||
}
|
||||
|
||||
/* get the AC coefficients until last_index is reached */
|
||||
@ -318,30 +318,30 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
|
||||
#endif
|
||||
/* our own optimized GET_RL_VLC */
|
||||
index = NEG_USR32(re_cache, TEX_VLC_BITS);
|
||||
vlc_len = dv_rl_vlc[index].len;
|
||||
vlc_len = dv_rl_vlc[index].len;
|
||||
if (vlc_len < 0) {
|
||||
index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level;
|
||||
vlc_len = TEX_VLC_BITS - vlc_len;
|
||||
}
|
||||
level = dv_rl_vlc[index].level;
|
||||
run = dv_rl_vlc[index].run;
|
||||
run = dv_rl_vlc[index].run;
|
||||
|
||||
/* gotta check if we're still within gb boundaries */
|
||||
if (re_index + vlc_len > last_index) {
|
||||
/* should be < 16 bits otherwise a codeword could have been parsed */
|
||||
mb->partial_bit_count = last_index - re_index;
|
||||
mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count);
|
||||
re_index = last_index;
|
||||
break;
|
||||
}
|
||||
re_index += vlc_len;
|
||||
/* gotta check if we're still within gb boundaries */
|
||||
if (re_index + vlc_len > last_index) {
|
||||
/* should be < 16 bits otherwise a codeword could have been parsed */
|
||||
mb->partial_bit_count = last_index - re_index;
|
||||
mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count);
|
||||
re_index = last_index;
|
||||
break;
|
||||
}
|
||||
re_index += vlc_len;
|
||||
|
||||
#ifdef VLC_DEBUG
|
||||
printf("run=%d level=%d\n", run, level);
|
||||
printf("run=%d level=%d\n", run, level);
|
||||
#endif
|
||||
pos += run;
|
||||
if (pos >= 64)
|
||||
break;
|
||||
pos += run;
|
||||
if (pos >= 64)
|
||||
break;
|
||||
|
||||
assert(level);
|
||||
pos1 = scan_table[pos];
|
||||
@ -404,7 +404,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
|
||||
block = block1;
|
||||
for(j = 0;j < 6; j++) {
|
||||
last_index = block_sizes[j];
|
||||
init_get_bits(&gb, buf_ptr, last_index);
|
||||
init_get_bits(&gb, buf_ptr, last_index);
|
||||
|
||||
/* get the dc */
|
||||
dc = get_sbits(&gb, 9);
|
||||
@ -444,7 +444,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
|
||||
block = block1;
|
||||
mb = mb1;
|
||||
init_get_bits(&gb, mb_bit_buffer, put_bits_count(&pb));
|
||||
flush_put_bits(&pb);
|
||||
flush_put_bits(&pb);
|
||||
for(j = 0;j < 6; j++, block += 64, mb++) {
|
||||
if (mb->pos < 64 && get_bits_left(&gb) > 0) {
|
||||
dv_decode_ac(&gb, mb, block);
|
||||
@ -456,7 +456,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
|
||||
/* all blocks are finished, so the extra bytes can be used at
|
||||
the video segment level */
|
||||
if (j >= 6)
|
||||
bit_copy(&vs_pb, &gb);
|
||||
bit_copy(&vs_pb, &gb);
|
||||
}
|
||||
|
||||
/* we need a pass other the whole video segment */
|
||||
@ -475,8 +475,8 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
|
||||
#endif
|
||||
dv_decode_ac(&gb, mb, block);
|
||||
}
|
||||
if (mb->pos >= 64 && mb->pos < 127)
|
||||
av_log(NULL, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos);
|
||||
if (mb->pos >= 64 && mb->pos < 127)
|
||||
av_log(NULL, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos);
|
||||
block += 64;
|
||||
mb++;
|
||||
}
|
||||
@ -508,7 +508,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
|
||||
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) {
|
||||
uint64_t aligned_pixels[64/8];
|
||||
uint8_t *pixels= (uint8_t*)aligned_pixels;
|
||||
uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1;
|
||||
uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1;
|
||||
int x, y, linesize;
|
||||
/* NOTE: at end of line, the macroblock is handled as 420 */
|
||||
idct_put(pixels, 8, block);
|
||||
@ -543,21 +543,21 @@ static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
|
||||
int size;
|
||||
if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
|
||||
*vlc = dv_vlc_map[run][level].vlc | sign;
|
||||
size = dv_vlc_map[run][level].size;
|
||||
size = dv_vlc_map[run][level].size;
|
||||
}
|
||||
else {
|
||||
if (level < DV_VLC_MAP_LEV_SIZE) {
|
||||
*vlc = dv_vlc_map[0][level].vlc | sign;
|
||||
size = dv_vlc_map[0][level].size;
|
||||
} else {
|
||||
*vlc = dv_vlc_map[0][level].vlc | sign;
|
||||
size = dv_vlc_map[0][level].size;
|
||||
} else {
|
||||
*vlc = 0xfe00 | (level << 1) | sign;
|
||||
size = 16;
|
||||
}
|
||||
if (run) {
|
||||
*vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc :
|
||||
(0x1f80 | (run - 1))) << size;
|
||||
size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
|
||||
}
|
||||
size = 16;
|
||||
}
|
||||
if (run) {
|
||||
*vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc :
|
||||
(0x1f80 | (run - 1))) << size;
|
||||
size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
|
||||
}
|
||||
}
|
||||
|
||||
return size;
|
||||
@ -568,13 +568,13 @@ static always_inline int dv_rl2vlc_size(int run, int level)
|
||||
int size;
|
||||
|
||||
if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
|
||||
size = dv_vlc_map[run][level].size;
|
||||
size = dv_vlc_map[run][level].size;
|
||||
}
|
||||
else {
|
||||
size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16;
|
||||
if (run) {
|
||||
size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
|
||||
}
|
||||
size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16;
|
||||
if (run) {
|
||||
size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
|
||||
}
|
||||
}
|
||||
return size;
|
||||
}
|
||||
@ -620,14 +620,14 @@ static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext
|
||||
for (; size > (bits_left = put_bits_left(pb)); pb++) {
|
||||
if (bits_left) {
|
||||
size -= bits_left;
|
||||
put_bits(pb, bits_left, vlc >> size);
|
||||
vlc = vlc & ((1<<size)-1);
|
||||
}
|
||||
if (pb + 1 >= pb_end) {
|
||||
bi->partial_bit_count = size;
|
||||
bi->partial_bit_buffer = vlc;
|
||||
return pb;
|
||||
}
|
||||
put_bits(pb, bits_left, vlc >> size);
|
||||
vlc = vlc & ((1<<size)-1);
|
||||
}
|
||||
if (pb + 1 >= pb_end) {
|
||||
bi->partial_bit_count = size;
|
||||
bi->partial_bit_buffer = vlc;
|
||||
return pb;
|
||||
}
|
||||
}
|
||||
|
||||
/* Store VLC */
|
||||
@ -712,14 +712,14 @@ static always_inline int dv_guess_dct_mode(DCTELEM *blk) {
|
||||
s = blk;
|
||||
for(i=0; i<7; i++) {
|
||||
score88 += SC(0, 8) + SC(1, 9) + SC(2, 10) + SC(3, 11) +
|
||||
SC(4, 12) + SC(5,13) + SC(6, 14) + SC(7, 15);
|
||||
SC(4, 12) + SC(5,13) + SC(6, 14) + SC(7, 15);
|
||||
s += 8;
|
||||
}
|
||||
/* Compute 2-4-8 score (small values give a better chance for 2-4-8 DCT) */
|
||||
s = blk;
|
||||
for(i=0; i<6; i++) {
|
||||
score248 += SC(0, 16) + SC(1,17) + SC(2, 18) + SC(3, 19) +
|
||||
SC(4, 20) + SC(5,21) + SC(6, 22) + SC(7, 23);
|
||||
SC(4, 20) + SC(5,21) + SC(6, 22) + SC(7, 23);
|
||||
s += 8;
|
||||
}
|
||||
|
||||
@ -736,30 +736,30 @@ static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos)
|
||||
b = blks;
|
||||
for (i=0; i<5; i++) {
|
||||
if (!qnos[i])
|
||||
continue;
|
||||
continue;
|
||||
|
||||
qnos[i]--;
|
||||
size[i] = 0;
|
||||
qnos[i]--;
|
||||
size[i] = 0;
|
||||
for (j=0; j<6; j++, b++) {
|
||||
for (a=0; a<4; a++) {
|
||||
if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) {
|
||||
b->bit_size[a] = 1; // 4 areas 4 bits for EOB :)
|
||||
b->area_q[a]++;
|
||||
for (a=0; a<4; a++) {
|
||||
if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) {
|
||||
b->bit_size[a] = 1; // 4 areas 4 bits for EOB :)
|
||||
b->area_q[a]++;
|
||||
prev= b->prev[a];
|
||||
for (k= b->next[prev] ; k<mb_area_start[a+1]; k= b->next[k]) {
|
||||
b->mb[k] >>= 1;
|
||||
if (b->mb[k]) {
|
||||
b->mb[k] >>= 1;
|
||||
if (b->mb[k]) {
|
||||
b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]);
|
||||
prev= k;
|
||||
prev= k;
|
||||
} else {
|
||||
b->next[prev] = b->next[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
b->prev[a+1]= prev;
|
||||
}
|
||||
size[i] += b->bit_size[a];
|
||||
}
|
||||
}
|
||||
}
|
||||
size[i] += b->bit_size[a];
|
||||
}
|
||||
}
|
||||
}
|
||||
} while ((vs_total_ac_bits < size[0] + size[1] + size[2] + size[3] + size[4]) &&
|
||||
(qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4]));
|
||||
@ -797,68 +797,68 @@ static inline void dv_encode_video_segment(DVVideoContext *s,
|
||||
mb_x = v & 0xff;
|
||||
mb_y = v >> 8;
|
||||
y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8);
|
||||
c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ?
|
||||
((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) :
|
||||
(((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8));
|
||||
do_edge_wrap = 0;
|
||||
qnos[mb_index] = 15; /* No quantization */
|
||||
c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ?
|
||||
((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) :
|
||||
(((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8));
|
||||
do_edge_wrap = 0;
|
||||
qnos[mb_index] = 15; /* No quantization */
|
||||
ptr = dif + mb_index*80 + 4;
|
||||
for(j = 0;j < 6; j++) {
|
||||
if (j < 4) { /* Four Y blocks */
|
||||
/* NOTE: at end of line, the macroblock is handled as 420 */
|
||||
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) {
|
||||
/* NOTE: at end of line, the macroblock is handled as 420 */
|
||||
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) {
|
||||
data = y_ptr + (j * 8);
|
||||
} else {
|
||||
data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]);
|
||||
}
|
||||
linesize = s->picture.linesize[0];
|
||||
linesize = s->picture.linesize[0];
|
||||
} else { /* Cr and Cb blocks */
|
||||
/* don't ask Fabrice why they inverted Cb and Cr ! */
|
||||
data = s->picture.data[6 - j] + c_offset;
|
||||
linesize = s->picture.linesize[6 - j];
|
||||
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8))
|
||||
do_edge_wrap = 1;
|
||||
}
|
||||
/* don't ask Fabrice why they inverted Cb and Cr ! */
|
||||
data = s->picture.data[6 - j] + c_offset;
|
||||
linesize = s->picture.linesize[6 - j];
|
||||
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8))
|
||||
do_edge_wrap = 1;
|
||||
}
|
||||
|
||||
/* Everything is set up -- now just copy data -> DCT block */
|
||||
if (do_edge_wrap) { /* Edge wrap copy: 4x16 -> 8x8 */
|
||||
uint8_t* d;
|
||||
DCTELEM *b = block;
|
||||
for (i=0;i<8;i++) {
|
||||
d = data + 8 * linesize;
|
||||
b[0] = data[0]; b[1] = data[1]; b[2] = data[2]; b[3] = data[3];
|
||||
/* Everything is set up -- now just copy data -> DCT block */
|
||||
if (do_edge_wrap) { /* Edge wrap copy: 4x16 -> 8x8 */
|
||||
uint8_t* d;
|
||||
DCTELEM *b = block;
|
||||
for (i=0;i<8;i++) {
|
||||
d = data + 8 * linesize;
|
||||
b[0] = data[0]; b[1] = data[1]; b[2] = data[2]; b[3] = data[3];
|
||||
b[4] = d[0]; b[5] = d[1]; b[6] = d[2]; b[7] = d[3];
|
||||
data += linesize;
|
||||
b += 8;
|
||||
}
|
||||
} else { /* Simple copy: 8x8 -> 8x8 */
|
||||
s->get_pixels(block, data, linesize);
|
||||
}
|
||||
data += linesize;
|
||||
b += 8;
|
||||
}
|
||||
} else { /* Simple copy: 8x8 -> 8x8 */
|
||||
s->get_pixels(block, data, linesize);
|
||||
}
|
||||
|
||||
if(s->avctx->flags & CODEC_FLAG_INTERLACED_DCT)
|
||||
enc_blk->dct_mode = dv_guess_dct_mode(block);
|
||||
else
|
||||
enc_blk->dct_mode = 0;
|
||||
enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0;
|
||||
enc_blk->partial_bit_count = 0;
|
||||
enc_blk->partial_bit_buffer = 0;
|
||||
enc_blk->cur_ac = 0;
|
||||
enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0;
|
||||
enc_blk->partial_bit_count = 0;
|
||||
enc_blk->partial_bit_buffer = 0;
|
||||
enc_blk->cur_ac = 0;
|
||||
|
||||
s->fdct[enc_blk->dct_mode](block);
|
||||
s->fdct[enc_blk->dct_mode](block);
|
||||
|
||||
dv_set_class_number(block, enc_blk,
|
||||
enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4);
|
||||
dv_set_class_number(block, enc_blk,
|
||||
enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4);
|
||||
|
||||
init_put_bits(pb, ptr, block_sizes[j]/8);
|
||||
put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2));
|
||||
put_bits(pb, 1, enc_blk->dct_mode);
|
||||
put_bits(pb, 2, enc_blk->cno);
|
||||
put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2));
|
||||
put_bits(pb, 1, enc_blk->dct_mode);
|
||||
put_bits(pb, 2, enc_blk->cno);
|
||||
|
||||
vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] +
|
||||
enc_blk->bit_size[2] + enc_blk->bit_size[3];
|
||||
++enc_blk;
|
||||
++pb;
|
||||
ptr += block_sizes[j]/8;
|
||||
vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] +
|
||||
enc_blk->bit_size[2] + enc_blk->bit_size[3];
|
||||
++enc_blk;
|
||||
++pb;
|
||||
ptr += block_sizes[j]/8;
|
||||
}
|
||||
}
|
||||
|
||||
@ -898,7 +898,7 @@ static int dv_decode_mt(AVCodecContext *avctx, void* sl)
|
||||
DVVideoContext *s = avctx->priv_data;
|
||||
int slice = (size_t)sl;
|
||||
dv_decode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80],
|
||||
&s->sys->video_place[slice*5]);
|
||||
&s->sys->video_place[slice*5]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -907,7 +907,7 @@ static int dv_encode_mt(AVCodecContext *avctx, void* sl)
|
||||
DVVideoContext *s = avctx->priv_data;
|
||||
int slice = (size_t)sl;
|
||||
dv_encode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80],
|
||||
&s->sys->video_place[slice*5]);
|
||||
&s->sys->video_place[slice*5]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -940,7 +940,7 @@ static int dvvideo_decode_frame(AVCodecContext *avctx,
|
||||
|
||||
s->buf = buf;
|
||||
avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL,
|
||||
s->sys->difseg_size * 27);
|
||||
s->sys->difseg_size * 27);
|
||||
|
||||
emms_c();
|
||||
|
||||
@ -958,7 +958,7 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size,
|
||||
|
||||
s->sys = dv_codec_profile(c);
|
||||
if (!s->sys)
|
||||
return -1;
|
||||
return -1;
|
||||
if(buf_size < s->sys->frame_size)
|
||||
return -1;
|
||||
|
||||
@ -969,7 +969,7 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size,
|
||||
|
||||
s->buf = buf;
|
||||
c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL,
|
||||
s->sys->difseg_size * 27);
|
||||
s->sys->difseg_size * 27);
|
||||
|
||||
emms_c();
|
||||
return s->sys->frame_size;
|
||||
|
@ -192,7 +192,7 @@ static void dvb_encode_rle4(uint8_t **pq,
|
||||
|
||||
#define SCALEBITS 10
|
||||
#define ONE_HALF (1 << (SCALEBITS - 1))
|
||||
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
|
||||
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
|
||||
|
||||
#define RGB_TO_Y_CCIR(r, g, b) \
|
||||
((FIX(0.29900*219.0/255.0) * (r) + FIX(0.58700*219.0/255.0) * (g) + \
|
||||
|
@ -108,8 +108,8 @@ static void filter181(int16_t *data, int width, int height, int stride){
|
||||
|
||||
/**
|
||||
* guess the dc of blocks which dont have a undamaged dc
|
||||
* @param w width in 8 pixel blocks
|
||||
* @param h height in 8 pixel blocks
|
||||
* @param w width in 8 pixel blocks
|
||||
* @param h height in 8 pixel blocks
|
||||
*/
|
||||
static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, int is_luma){
|
||||
int b_x, b_y;
|
||||
@ -192,8 +192,8 @@ static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, i
|
||||
|
||||
/**
|
||||
* simple horizontal deblocking filter used for error resilience
|
||||
* @param w width in 8 pixel blocks
|
||||
* @param h height in 8 pixel blocks
|
||||
* @param w width in 8 pixel blocks
|
||||
* @param h height in 8 pixel blocks
|
||||
*/
|
||||
static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){
|
||||
int b_x, b_y;
|
||||
@ -252,8 +252,8 @@ static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int st
|
||||
|
||||
/**
|
||||
* simple vertical deblocking filter used for error resilience
|
||||
* @param w width in 8 pixel blocks
|
||||
* @param h height in 8 pixel blocks
|
||||
* @param w width in 8 pixel blocks
|
||||
* @param h height in 8 pixel blocks
|
||||
*/
|
||||
static void v_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){
|
||||
int b_x, b_y;
|
||||
@ -348,7 +348,7 @@ static void guess_mv(MpegEncContext *s){
|
||||
s->mv_type = MV_TYPE_16X16;
|
||||
s->mb_skipped=0;
|
||||
|
||||
s->dsp.clear_blocks(s->block[0]);
|
||||
s->dsp.clear_blocks(s->block[0]);
|
||||
|
||||
s->mb_x= mb_x;
|
||||
s->mb_y= mb_y;
|
||||
@ -476,7 +476,7 @@ int score_sum=0;
|
||||
s->mv_type = MV_TYPE_16X16;
|
||||
s->mb_skipped=0;
|
||||
|
||||
s->dsp.clear_blocks(s->block[0]);
|
||||
s->dsp.clear_blocks(s->block[0]);
|
||||
|
||||
s->mb_x= mb_x;
|
||||
s->mb_y= mb_y;
|
||||
@ -582,7 +582,7 @@ static int is_intra_more_likely(MpegEncContext *s){
|
||||
uint8_t *mb_ptr = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize;
|
||||
uint8_t *last_mb_ptr= s->last_picture.data [0] + mb_x*16 + mb_y*16*s->linesize;
|
||||
|
||||
is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr , s->linesize, 16);
|
||||
is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr , s->linesize, 16);
|
||||
is_intra_likely -= s->dsp.sad[0](NULL, last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize, 16);
|
||||
}else{
|
||||
if(IS_INTRA(s->current_picture.mb_type[mb_xy]))
|
||||
@ -873,7 +873,7 @@ void ff_er_frame_end(MpegEncContext *s){
|
||||
s->mv[0][0][1] = s->current_picture.motion_val[0][ mb_x*2 + mb_y*2*s->b8_stride ][1];
|
||||
}
|
||||
|
||||
s->dsp.clear_blocks(s->block[0]);
|
||||
s->dsp.clear_blocks(s->block[0]);
|
||||
|
||||
s->mb_x= mb_x;
|
||||
s->mb_y= mb_y;
|
||||
|
@ -46,7 +46,7 @@ static int Faac_encode_init(AVCodecContext *avctx)
|
||||
/* check faac version */
|
||||
faac_cfg = faacEncGetCurrentConfiguration(s->faac_handle);
|
||||
if (faac_cfg->version != FAAC_CFG_VERSION) {
|
||||
av_log(avctx, AV_LOG_ERROR, "wrong libfaac version (compiled for: %d, using %d)\n", FAAC_CFG_VERSION, faac_cfg->version);
|
||||
av_log(avctx, AV_LOG_ERROR, "wrong libfaac version (compiled for: %d, using %d)\n", FAAC_CFG_VERSION, faac_cfg->version);
|
||||
faacEncClose(s->faac_handle);
|
||||
return -1;
|
||||
}
|
||||
|
@ -47,8 +47,8 @@ static const char* libfaadname = "libfaad.so.0";
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
void* handle; /* dlopen handle */
|
||||
void* faac_handle; /* FAAD library handle */
|
||||
void* handle; /* dlopen handle */
|
||||
void* faac_handle; /* FAAD library handle */
|
||||
int frame_size;
|
||||
int sample_size;
|
||||
int flags;
|
||||
@ -57,36 +57,36 @@ typedef struct {
|
||||
faacDecHandle FAADAPI (*faacDecOpen)(void);
|
||||
faacDecConfigurationPtr FAADAPI (*faacDecGetCurrentConfiguration)(faacDecHandle hDecoder);
|
||||
#ifndef FAAD2_VERSION
|
||||
int FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder,
|
||||
int FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder,
|
||||
faacDecConfigurationPtr config);
|
||||
int FAADAPI (*faacDecInit)(faacDecHandle hDecoder,
|
||||
unsigned char *buffer,
|
||||
unsigned long *samplerate,
|
||||
unsigned long *channels);
|
||||
int FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer,
|
||||
int FAADAPI (*faacDecInit)(faacDecHandle hDecoder,
|
||||
unsigned char *buffer,
|
||||
unsigned long *samplerate,
|
||||
unsigned long *channels);
|
||||
int FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer,
|
||||
unsigned long SizeOfDecoderSpecificInfo,
|
||||
unsigned long *samplerate, unsigned long *channels);
|
||||
int FAADAPI (*faacDecDecode)(faacDecHandle hDecoder,
|
||||
unsigned char *buffer,
|
||||
unsigned long *bytesconsumed,
|
||||
short *sample_buffer,
|
||||
int FAADAPI (*faacDecDecode)(faacDecHandle hDecoder,
|
||||
unsigned char *buffer,
|
||||
unsigned long *bytesconsumed,
|
||||
short *sample_buffer,
|
||||
unsigned long *samples);
|
||||
#else
|
||||
unsigned char FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder,
|
||||
unsigned char FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder,
|
||||
faacDecConfigurationPtr config);
|
||||
long FAADAPI (*faacDecInit)(faacDecHandle hDecoder,
|
||||
unsigned char *buffer,
|
||||
unsigned long buffer_size,
|
||||
unsigned long *samplerate,
|
||||
unsigned char *channels);
|
||||
char FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer,
|
||||
long FAADAPI (*faacDecInit)(faacDecHandle hDecoder,
|
||||
unsigned char *buffer,
|
||||
unsigned long buffer_size,
|
||||
unsigned long *samplerate,
|
||||
unsigned char *channels);
|
||||
char FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer,
|
||||
unsigned long SizeOfDecoderSpecificInfo,
|
||||
unsigned long *samplerate, unsigned char *channels);
|
||||
void *FAADAPI (*faacDecDecode)(faacDecHandle hDecoder,
|
||||
faacDecFrameInfo *hInfo,
|
||||
unsigned char *buffer,
|
||||
unsigned long buffer_size);
|
||||
char* FAADAPI (*faacDecGetErrorMessage)(unsigned char errcode);
|
||||
void *FAADAPI (*faacDecDecode)(faacDecHandle hDecoder,
|
||||
faacDecFrameInfo *hInfo,
|
||||
unsigned char *buffer,
|
||||
unsigned long buffer_size);
|
||||
char* FAADAPI (*faacDecGetErrorMessage)(unsigned char errcode);
|
||||
#endif
|
||||
|
||||
void FAADAPI (*faacDecClose)(faacDecHandle hDecoder);
|
||||
@ -112,14 +112,14 @@ static int faac_init_mp4(AVCodecContext *avctx)
|
||||
int r = 0;
|
||||
|
||||
if (avctx->extradata)
|
||||
r = s->faacDecInit2(s->faac_handle, (uint8_t*) avctx->extradata,
|
||||
avctx->extradata_size,
|
||||
&samplerate, &channels);
|
||||
r = s->faacDecInit2(s->faac_handle, (uint8_t*) avctx->extradata,
|
||||
avctx->extradata_size,
|
||||
&samplerate, &channels);
|
||||
// else r = s->faacDecInit(s->faac_handle ... );
|
||||
|
||||
if (r < 0)
|
||||
av_log(avctx, AV_LOG_ERROR, "faacDecInit2 failed r:%d sr:%ld ch:%ld s:%d\n",
|
||||
r, samplerate, (long)channels, avctx->extradata_size);
|
||||
av_log(avctx, AV_LOG_ERROR, "faacDecInit2 failed r:%d sr:%ld ch:%ld s:%d\n",
|
||||
r, samplerate, (long)channels, avctx->extradata_size);
|
||||
avctx->sample_rate = samplerate;
|
||||
avctx->channels = channels;
|
||||
|
||||
@ -141,7 +141,7 @@ static int faac_decode_frame(AVCodecContext *avctx,
|
||||
void *out;
|
||||
#endif
|
||||
if(buf_size == 0)
|
||||
return 0;
|
||||
return 0;
|
||||
#ifndef FAAD2_VERSION
|
||||
out = s->faacDecDecode(s->faac_handle,
|
||||
(unsigned char*)buf,
|
||||
@ -150,16 +150,16 @@ static int faac_decode_frame(AVCodecContext *avctx,
|
||||
&samples);
|
||||
samples *= s->sample_size;
|
||||
if (data_size)
|
||||
*data_size = samples;
|
||||
*data_size = samples;
|
||||
return (buf_size < (int)bytesconsumed)
|
||||
? buf_size : (int)bytesconsumed;
|
||||
? buf_size : (int)bytesconsumed;
|
||||
#else
|
||||
|
||||
out = s->faacDecDecode(s->faac_handle, &frame_info, (unsigned char*)buf, (unsigned long)buf_size);
|
||||
|
||||
if (frame_info.error > 0) {
|
||||
av_log(avctx, AV_LOG_ERROR, "faac: frame decoding failed: %s\n",
|
||||
s->faacDecGetErrorMessage(frame_info.error));
|
||||
av_log(avctx, AV_LOG_ERROR, "faac: frame decoding failed: %s\n",
|
||||
s->faacDecGetErrorMessage(frame_info.error));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -167,10 +167,10 @@ static int faac_decode_frame(AVCodecContext *avctx,
|
||||
memcpy(data, out, frame_info.samples); // CHECKME - can we cheat this one
|
||||
|
||||
if (data_size)
|
||||
*data_size = frame_info.samples;
|
||||
*data_size = frame_info.samples;
|
||||
|
||||
return (buf_size < (int)frame_info.bytesconsumed)
|
||||
? buf_size : (int)frame_info.bytesconsumed;
|
||||
? buf_size : (int)frame_info.bytesconsumed;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -196,8 +196,8 @@ static int faac_decode_init(AVCodecContext *avctx)
|
||||
s->handle = dlopen(libfaadname, RTLD_LAZY);
|
||||
if (!s->handle)
|
||||
{
|
||||
av_log(avctx, AV_LOG_ERROR, "FAAD library: %s could not be opened! \n%s\n",
|
||||
libfaadname, dlerror());
|
||||
av_log(avctx, AV_LOG_ERROR, "FAAD library: %s could not be opened! \n%s\n",
|
||||
libfaadname, dlerror());
|
||||
return -1;
|
||||
}
|
||||
#define dfaac(a, b) \
|
||||
@ -209,32 +209,32 @@ static int faac_decode_init(AVCodecContext *avctx)
|
||||
#endif /* CONFIG_FAADBIN */
|
||||
|
||||
// resolve all needed function calls
|
||||
dfaac(Open, (faacDecHandle FAADAPI (*)(void)));
|
||||
dfaac(GetCurrentConfiguration, (faacDecConfigurationPtr
|
||||
FAADAPI (*)(faacDecHandle)));
|
||||
dfaac(Open, (faacDecHandle FAADAPI (*)(void)));
|
||||
dfaac(GetCurrentConfiguration, (faacDecConfigurationPtr
|
||||
FAADAPI (*)(faacDecHandle)));
|
||||
#ifndef FAAD2_VERSION
|
||||
dfaac(SetConfiguration, (int FAADAPI (*)(faacDecHandle,
|
||||
faacDecConfigurationPtr)));
|
||||
dfaac(SetConfiguration, (int FAADAPI (*)(faacDecHandle,
|
||||
faacDecConfigurationPtr)));
|
||||
|
||||
dfaac(Init, (int FAADAPI (*)(faacDecHandle, unsigned char*,
|
||||
unsigned long*, unsigned long*)));
|
||||
dfaac(Init, (int FAADAPI (*)(faacDecHandle, unsigned char*,
|
||||
unsigned long*, unsigned long*)));
|
||||
dfaac(Init2, (int FAADAPI (*)(faacDecHandle, unsigned char*,
|
||||
unsigned long, unsigned long*,
|
||||
unsigned long*)));
|
||||
unsigned long, unsigned long*,
|
||||
unsigned long*)));
|
||||
dfaac(Close, (void FAADAPI (*)(faacDecHandle hDecoder)));
|
||||
dfaac(Decode, (int FAADAPI (*)(faacDecHandle, unsigned char*,
|
||||
unsigned long*, short*, unsigned long*)));
|
||||
dfaac(Decode, (int FAADAPI (*)(faacDecHandle, unsigned char*,
|
||||
unsigned long*, short*, unsigned long*)));
|
||||
#else
|
||||
dfaac(SetConfiguration, (unsigned char FAADAPI (*)(faacDecHandle,
|
||||
faacDecConfigurationPtr)));
|
||||
dfaac(Init, (long FAADAPI (*)(faacDecHandle, unsigned char*,
|
||||
unsigned long, unsigned long*, unsigned char*)));
|
||||
dfaac(Init2, (char FAADAPI (*)(faacDecHandle, unsigned char*,
|
||||
unsigned long, unsigned long*,
|
||||
unsigned char*)));
|
||||
dfaac(Decode, (void *FAADAPI (*)(faacDecHandle, faacDecFrameInfo*,
|
||||
unsigned char*, unsigned long)));
|
||||
dfaac(GetErrorMessage, (char* FAADAPI (*)(unsigned char)));
|
||||
dfaac(SetConfiguration, (unsigned char FAADAPI (*)(faacDecHandle,
|
||||
faacDecConfigurationPtr)));
|
||||
dfaac(Init, (long FAADAPI (*)(faacDecHandle, unsigned char*,
|
||||
unsigned long, unsigned long*, unsigned char*)));
|
||||
dfaac(Init2, (char FAADAPI (*)(faacDecHandle, unsigned char*,
|
||||
unsigned long, unsigned long*,
|
||||
unsigned char*)));
|
||||
dfaac(Decode, (void *FAADAPI (*)(faacDecHandle, faacDecFrameInfo*,
|
||||
unsigned char*, unsigned long)));
|
||||
dfaac(GetErrorMessage, (char* FAADAPI (*)(unsigned char)));
|
||||
#endif
|
||||
#undef dfacc
|
||||
|
||||
@ -243,8 +243,8 @@ static int faac_decode_init(AVCodecContext *avctx)
|
||||
}
|
||||
if (err) {
|
||||
dlclose(s->handle);
|
||||
av_log(avctx, AV_LOG_ERROR, "FAAD library: cannot resolve %s in %s!\n",
|
||||
err, libfaadname);
|
||||
av_log(avctx, AV_LOG_ERROR, "FAAD library: cannot resolve %s in %s!\n",
|
||||
err, libfaadname);
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
@ -260,31 +260,31 @@ static int faac_decode_init(AVCodecContext *avctx)
|
||||
faac_cfg = s->faacDecGetCurrentConfiguration(s->faac_handle);
|
||||
|
||||
if (faac_cfg) {
|
||||
switch (avctx->bits_per_sample) {
|
||||
case 8: av_log(avctx, AV_LOG_ERROR, "FAADlib unsupported bps %d\n", avctx->bits_per_sample); break;
|
||||
default:
|
||||
case 16:
|
||||
switch (avctx->bits_per_sample) {
|
||||
case 8: av_log(avctx, AV_LOG_ERROR, "FAADlib unsupported bps %d\n", avctx->bits_per_sample); break;
|
||||
default:
|
||||
case 16:
|
||||
#ifdef FAAD2_VERSION
|
||||
faac_cfg->outputFormat = FAAD_FMT_16BIT;
|
||||
faac_cfg->outputFormat = FAAD_FMT_16BIT;
|
||||
#endif
|
||||
s->sample_size = 2;
|
||||
break;
|
||||
case 24:
|
||||
s->sample_size = 2;
|
||||
break;
|
||||
case 24:
|
||||
#ifdef FAAD2_VERSION
|
||||
faac_cfg->outputFormat = FAAD_FMT_24BIT;
|
||||
faac_cfg->outputFormat = FAAD_FMT_24BIT;
|
||||
#endif
|
||||
s->sample_size = 3;
|
||||
break;
|
||||
case 32:
|
||||
s->sample_size = 3;
|
||||
break;
|
||||
case 32:
|
||||
#ifdef FAAD2_VERSION
|
||||
faac_cfg->outputFormat = FAAD_FMT_32BIT;
|
||||
faac_cfg->outputFormat = FAAD_FMT_32BIT;
|
||||
#endif
|
||||
s->sample_size = 4;
|
||||
break;
|
||||
}
|
||||
s->sample_size = 4;
|
||||
break;
|
||||
}
|
||||
|
||||
faac_cfg->defSampleRate = (!avctx->sample_rate) ? 44100 : avctx->sample_rate;
|
||||
faac_cfg->defObjectType = LC;
|
||||
faac_cfg->defSampleRate = (!avctx->sample_rate) ? 44100 : avctx->sample_rate;
|
||||
faac_cfg->defObjectType = LC;
|
||||
}
|
||||
|
||||
s->faacDecSetConfiguration(s->faac_handle, faac_cfg);
|
||||
|
@ -204,15 +204,15 @@ void ff_faandct248(DCTELEM * data)
|
||||
data[8*6 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1));
|
||||
|
||||
tmp10 = tmp4 + tmp7;
|
||||
tmp11 = tmp5 + tmp6;
|
||||
tmp12 = tmp5 - tmp6;
|
||||
tmp13 = tmp4 - tmp7;
|
||||
tmp11 = tmp5 + tmp6;
|
||||
tmp12 = tmp5 - tmp6;
|
||||
tmp13 = tmp4 - tmp7;
|
||||
|
||||
data[8*1 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11));
|
||||
data[8*5 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11));
|
||||
data[8*1 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11));
|
||||
data[8*5 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11));
|
||||
|
||||
z1 = (tmp12 + tmp13)* A1;
|
||||
data[8*3 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1));
|
||||
data[8*7 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1));
|
||||
z1 = (tmp12 + tmp13)* A1;
|
||||
data[8*3 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1));
|
||||
data[8*7 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1));
|
||||
}
|
||||
}
|
||||
|
@ -64,51 +64,51 @@ void init_fdct()
|
||||
void fdct(block)
|
||||
short *block;
|
||||
{
|
||||
register int i, j;
|
||||
double s;
|
||||
double tmp[64];
|
||||
register int i, j;
|
||||
double s;
|
||||
double tmp[64];
|
||||
|
||||
for(i = 0; i < 8; i++)
|
||||
for(j = 0; j < 8; j++)
|
||||
{
|
||||
s = 0.0;
|
||||
for(i = 0; i < 8; i++)
|
||||
for(j = 0; j < 8; j++)
|
||||
{
|
||||
s = 0.0;
|
||||
|
||||
/*
|
||||
* for(k = 0; k < 8; k++)
|
||||
* s += c[j][k] * block[8 * i + k];
|
||||
* for(k = 0; k < 8; k++)
|
||||
* s += c[j][k] * block[8 * i + k];
|
||||
*/
|
||||
s += c[j][0] * block[8 * i + 0];
|
||||
s += c[j][1] * block[8 * i + 1];
|
||||
s += c[j][2] * block[8 * i + 2];
|
||||
s += c[j][3] * block[8 * i + 3];
|
||||
s += c[j][4] * block[8 * i + 4];
|
||||
s += c[j][5] * block[8 * i + 5];
|
||||
s += c[j][6] * block[8 * i + 6];
|
||||
s += c[j][7] * block[8 * i + 7];
|
||||
s += c[j][0] * block[8 * i + 0];
|
||||
s += c[j][1] * block[8 * i + 1];
|
||||
s += c[j][2] * block[8 * i + 2];
|
||||
s += c[j][3] * block[8 * i + 3];
|
||||
s += c[j][4] * block[8 * i + 4];
|
||||
s += c[j][5] * block[8 * i + 5];
|
||||
s += c[j][6] * block[8 * i + 6];
|
||||
s += c[j][7] * block[8 * i + 7];
|
||||
|
||||
tmp[8 * i + j] = s;
|
||||
}
|
||||
tmp[8 * i + j] = s;
|
||||
}
|
||||
|
||||
for(j = 0; j < 8; j++)
|
||||
for(i = 0; i < 8; i++)
|
||||
{
|
||||
s = 0.0;
|
||||
for(j = 0; j < 8; j++)
|
||||
for(i = 0; i < 8; i++)
|
||||
{
|
||||
s = 0.0;
|
||||
|
||||
/*
|
||||
* for(k = 0; k < 8; k++)
|
||||
* s += c[i][k] * tmp[8 * k + j];
|
||||
* for(k = 0; k < 8; k++)
|
||||
* s += c[i][k] * tmp[8 * k + j];
|
||||
*/
|
||||
s += c[i][0] * tmp[8 * 0 + j];
|
||||
s += c[i][1] * tmp[8 * 1 + j];
|
||||
s += c[i][2] * tmp[8 * 2 + j];
|
||||
s += c[i][3] * tmp[8 * 3 + j];
|
||||
s += c[i][4] * tmp[8 * 4 + j];
|
||||
s += c[i][5] * tmp[8 * 5 + j];
|
||||
s += c[i][6] * tmp[8 * 6 + j];
|
||||
s += c[i][7] * tmp[8 * 7 + j];
|
||||
s*=8.0;
|
||||
s += c[i][0] * tmp[8 * 0 + j];
|
||||
s += c[i][1] * tmp[8 * 1 + j];
|
||||
s += c[i][2] * tmp[8 * 2 + j];
|
||||
s += c[i][3] * tmp[8 * 3 + j];
|
||||
s += c[i][4] * tmp[8 * 4 + j];
|
||||
s += c[i][5] * tmp[8 * 5 + j];
|
||||
s += c[i][6] * tmp[8 * 6 + j];
|
||||
s += c[i][7] * tmp[8 * 7 + j];
|
||||
s*=8.0;
|
||||
|
||||
block[8 * i + j] = (short)floor(s + 0.499999);
|
||||
block[8 * i + j] = (short)floor(s + 0.499999);
|
||||
/*
|
||||
* reason for adding 0.499999 instead of 0.5:
|
||||
* s is quite often x.5 (at least for i and/or j = 0 or 4)
|
||||
|
@ -149,8 +149,8 @@ int ff_fft_init(FFTContext *s, int nbits, int inverse)
|
||||
void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
|
||||
{
|
||||
int ln = s->nbits;
|
||||
int j, np, np2;
|
||||
int nblocks, nloops;
|
||||
int j, np, np2;
|
||||
int nblocks, nloops;
|
||||
register FFTComplex *p, *q;
|
||||
FFTComplex *exptab = s->exptab;
|
||||
int l;
|
||||
|
@ -31,30 +31,30 @@
|
||||
* instead of simply using 32bit integer arithmetic.
|
||||
*/
|
||||
typedef struct Float11 {
|
||||
int sign; /**< 1bit sign */
|
||||
int exp; /**< 4bit exponent */
|
||||
int mant; /**< 6bit mantissa */
|
||||
int sign; /**< 1bit sign */
|
||||
int exp; /**< 4bit exponent */
|
||||
int mant; /**< 6bit mantissa */
|
||||
} Float11;
|
||||
|
||||
static inline Float11* i2f(int16_t i, Float11* f)
|
||||
{
|
||||
f->sign = (i < 0);
|
||||
if (f->sign)
|
||||
i = -i;
|
||||
f->exp = av_log2_16bit(i) + !!i;
|
||||
f->mant = i? (i<<6) >> f->exp :
|
||||
1<<5;
|
||||
return f;
|
||||
f->sign = (i < 0);
|
||||
if (f->sign)
|
||||
i = -i;
|
||||
f->exp = av_log2_16bit(i) + !!i;
|
||||
f->mant = i? (i<<6) >> f->exp :
|
||||
1<<5;
|
||||
return f;
|
||||
}
|
||||
|
||||
static inline int16_t mult(Float11* f1, Float11* f2)
|
||||
{
|
||||
int res, exp;
|
||||
int res, exp;
|
||||
|
||||
exp = f1->exp + f2->exp;
|
||||
res = (((f1->mant * f2->mant) + 0x30) >> 4) << 7;
|
||||
res = exp > 26 ? res << (exp - 26) : res >> (26 - exp);
|
||||
return (f1->sign ^ f2->sign) ? -res : res;
|
||||
exp = f1->exp + f2->exp;
|
||||
res = (((f1->mant * f2->mant) + 0x30) >> 4) << 7;
|
||||
res = exp > 26 ? res << (exp - 26) : res >> (26 - exp);
|
||||
return (f1->sign ^ f2->sign) ? -res : res;
|
||||
}
|
||||
|
||||
static inline int sgn(int value)
|
||||
@ -63,32 +63,32 @@ static inline int sgn(int value)
|
||||
}
|
||||
|
||||
typedef struct G726Tables {
|
||||
int bits; /**< bits per sample */
|
||||
int* quant; /**< quantization table */
|
||||
int* iquant; /**< inverse quantization table */
|
||||
int* W; /**< special table #1 ;-) */
|
||||
int* F; /**< special table #2 */
|
||||
int bits; /**< bits per sample */
|
||||
int* quant; /**< quantization table */
|
||||
int* iquant; /**< inverse quantization table */
|
||||
int* W; /**< special table #1 ;-) */
|
||||
int* F; /**< special table #2 */
|
||||
} G726Tables;
|
||||
|
||||
typedef struct G726Context {
|
||||
G726Tables* tbls; /**< static tables needed for computation */
|
||||
G726Tables* tbls; /**< static tables needed for computation */
|
||||
|
||||
Float11 sr[2]; /**< prev. reconstructed samples */
|
||||
Float11 dq[6]; /**< prev. difference */
|
||||
int a[2]; /**< second order predictor coeffs */
|
||||
int b[6]; /**< sixth order predictor coeffs */
|
||||
int pk[2]; /**< signs of prev. 2 sez + dq */
|
||||
Float11 sr[2]; /**< prev. reconstructed samples */
|
||||
Float11 dq[6]; /**< prev. difference */
|
||||
int a[2]; /**< second order predictor coeffs */
|
||||
int b[6]; /**< sixth order predictor coeffs */
|
||||
int pk[2]; /**< signs of prev. 2 sez + dq */
|
||||
|
||||
int ap; /**< scale factor control */
|
||||
int yu; /**< fast scale factor */
|
||||
int yl; /**< slow scale factor */
|
||||
int dms; /**< short average magnitude of F[i] */
|
||||
int dml; /**< long average magnitude of F[i] */
|
||||
int td; /**< tone detect */
|
||||
int ap; /**< scale factor control */
|
||||
int yu; /**< fast scale factor */
|
||||
int yl; /**< slow scale factor */
|
||||
int dms; /**< short average magnitude of F[i] */
|
||||
int dml; /**< long average magnitude of F[i] */
|
||||
int td; /**< tone detect */
|
||||
|
||||
int se; /**< estimated signal for the next iteration */
|
||||
int sez; /**< estimated second order prediction */
|
||||
int y; /**< quantizer scaling factor for the next iteration */
|
||||
int se; /**< estimated signal for the next iteration */
|
||||
int sez; /**< estimated second order prediction */
|
||||
int y; /**< quantizer scaling factor for the next iteration */
|
||||
} G726Context;
|
||||
|
||||
static int quant_tbl16[] = /**< 16kbit/s 2bits per sample */
|
||||
@ -113,34 +113,34 @@ static int quant_tbl32[] = /**< 32kbit/s 4bits per sample
|
||||
{ -125, 79, 177, 245, 299, 348, 399, INT_MAX };
|
||||
static int iquant_tbl32[] =
|
||||
{ INT_MIN, 4, 135, 213, 273, 323, 373, 425,
|
||||
425, 373, 323, 273, 213, 135, 4, INT_MIN };
|
||||
425, 373, 323, 273, 213, 135, 4, INT_MIN };
|
||||
static int W_tbl32[] =
|
||||
{ -12, 18, 41, 64, 112, 198, 355, 1122,
|
||||
1122, 355, 198, 112, 64, 41, 18, -12};
|
||||
1122, 355, 198, 112, 64, 41, 18, -12};
|
||||
static int F_tbl32[] =
|
||||
{ 0, 0, 0, 1, 1, 1, 3, 7, 7, 3, 1, 1, 1, 0, 0, 0 };
|
||||
|
||||
static int quant_tbl40[] = /**< 40kbit/s 5bits per sample */
|
||||
{ -122, -16, 67, 138, 197, 249, 297, 338,
|
||||
377, 412, 444, 474, 501, 527, 552, INT_MAX };
|
||||
377, 412, 444, 474, 501, 527, 552, INT_MAX };
|
||||
static int iquant_tbl40[] =
|
||||
{ INT_MIN, -66, 28, 104, 169, 224, 274, 318,
|
||||
358, 395, 429, 459, 488, 514, 539, 566,
|
||||
566, 539, 514, 488, 459, 429, 395, 358,
|
||||
318, 274, 224, 169, 104, 28, -66, INT_MIN };
|
||||
358, 395, 429, 459, 488, 514, 539, 566,
|
||||
566, 539, 514, 488, 459, 429, 395, 358,
|
||||
318, 274, 224, 169, 104, 28, -66, INT_MIN };
|
||||
static int W_tbl40[] =
|
||||
{ 14, 14, 24, 39, 40, 41, 58, 100,
|
||||
141, 179, 219, 280, 358, 440, 529, 696,
|
||||
696, 529, 440, 358, 280, 219, 179, 141,
|
||||
100, 58, 41, 40, 39, 24, 14, 14 };
|
||||
141, 179, 219, 280, 358, 440, 529, 696,
|
||||
696, 529, 440, 358, 280, 219, 179, 141,
|
||||
100, 58, 41, 40, 39, 24, 14, 14 };
|
||||
static int F_tbl40[] =
|
||||
{ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 6,
|
||||
6, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
|
||||
6, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
|
||||
|
||||
static G726Tables G726Tables_pool[] =
|
||||
{{ 2, quant_tbl16, iquant_tbl16, W_tbl16, F_tbl16 },
|
||||
{ 3, quant_tbl24, iquant_tbl24, W_tbl24, F_tbl24 },
|
||||
{ 4, quant_tbl32, iquant_tbl32, W_tbl32, F_tbl32 },
|
||||
{ 3, quant_tbl24, iquant_tbl24, W_tbl24, F_tbl24 },
|
||||
{ 4, quant_tbl32, iquant_tbl32, W_tbl32, F_tbl32 },
|
||||
{ 5, quant_tbl40, iquant_tbl40, W_tbl40, F_tbl40 }};
|
||||
|
||||
|
||||
@ -207,20 +207,20 @@ static inline int16_t g726_iterate(G726Context* c, int16_t I)
|
||||
dq0 = dq ? sgn(dq) : 0;
|
||||
if (tr) {
|
||||
c->a[0] = 0;
|
||||
c->a[1] = 0;
|
||||
c->a[1] = 0;
|
||||
for (i=0; i<6; i++)
|
||||
c->b[i] = 0;
|
||||
c->b[i] = 0;
|
||||
} else {
|
||||
/* This is a bit crazy, but it really is +255 not +256 */
|
||||
fa1 = clip((-c->a[0]*c->pk[0]*pk0)>>5, -256, 255);
|
||||
/* This is a bit crazy, but it really is +255 not +256 */
|
||||
fa1 = clip((-c->a[0]*c->pk[0]*pk0)>>5, -256, 255);
|
||||
|
||||
c->a[1] += 128*pk0*c->pk[1] + fa1 - (c->a[1]>>7);
|
||||
c->a[1] = clip(c->a[1], -12288, 12288);
|
||||
c->a[1] += 128*pk0*c->pk[1] + fa1 - (c->a[1]>>7);
|
||||
c->a[1] = clip(c->a[1], -12288, 12288);
|
||||
c->a[0] += 64*3*pk0*c->pk[0] - (c->a[0] >> 8);
|
||||
c->a[0] = clip(c->a[0], -(15360 - c->a[1]), 15360 - c->a[1]);
|
||||
c->a[0] = clip(c->a[0], -(15360 - c->a[1]), 15360 - c->a[1]);
|
||||
|
||||
for (i=0; i<6; i++)
|
||||
c->b[i] += 128*dq0*sgn(-c->dq[i].sign) - (c->b[i]>>8);
|
||||
c->b[i] += 128*dq0*sgn(-c->dq[i].sign) - (c->b[i]>>8);
|
||||
}
|
||||
|
||||
/* Update Dq and Sr and Pk */
|
||||
@ -323,13 +323,13 @@ static int g726_init(AVCodecContext * avctx)
|
||||
|
||||
if (avctx->channels != 1 ||
|
||||
(avctx->bit_rate != 16000 && avctx->bit_rate != 24000 &&
|
||||
avctx->bit_rate != 32000 && avctx->bit_rate != 40000)) {
|
||||
avctx->bit_rate != 32000 && avctx->bit_rate != 40000)) {
|
||||
av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n");
|
||||
return -1;
|
||||
return -1;
|
||||
}
|
||||
if (avctx->sample_rate != 8000 && avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL) {
|
||||
av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n");
|
||||
return -1;
|
||||
return -1;
|
||||
}
|
||||
g726_reset(&c->c, avctx->bit_rate);
|
||||
c->code_size = c->c.tbls->bits;
|
||||
@ -384,12 +384,12 @@ static int g726_decode_frame(AVCodecContext *avctx,
|
||||
init_get_bits(&gb, buf, buf_size * 8);
|
||||
if (c->bits_left) {
|
||||
int s = c->code_size - c->bits_left;;
|
||||
code = (c->bit_buffer << s) | get_bits(&gb, s);
|
||||
*samples++ = g726_decode(&c->c, code & mask);
|
||||
code = (c->bit_buffer << s) | get_bits(&gb, s);
|
||||
*samples++ = g726_decode(&c->c, code & mask);
|
||||
}
|
||||
|
||||
while (get_bits_count(&gb) + c->code_size <= buf_size*8)
|
||||
*samples++ = g726_decode(&c->c, get_bits(&gb, c->code_size) & mask);
|
||||
*samples++ = g726_decode(&c->c, get_bits(&gb, c->code_size) & mask);
|
||||
|
||||
c->bits_left = buf_size*8 - get_bits_count(&gb);
|
||||
c->bit_buffer = get_bits(&gb, c->bits_left);
|
||||
|
@ -288,7 +288,7 @@ static inline int get_sr_golomb_flac(GetBitContext *gb, int k, int limit, int es
|
||||
* read unsigned golomb rice code (shorten).
|
||||
*/
|
||||
static inline unsigned int get_ur_golomb_shorten(GetBitContext *gb, int k){
|
||||
return get_ur_golomb_jpegls(gb, k, INT_MAX, 0);
|
||||
return get_ur_golomb_jpegls(gb, k, INT_MAX, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -395,7 +395,7 @@ static inline void set_te_golomb(PutBitContext *pb, int i, int range){
|
||||
*/
|
||||
static inline void set_se_golomb(PutBitContext *pb, int i){
|
||||
// if (i>32767 || i<-32767)
|
||||
// av_log(NULL,AV_LOG_ERROR,"value out of range %d\n", i);
|
||||
// av_log(NULL,AV_LOG_ERROR,"value out of range %d\n", i);
|
||||
#if 0
|
||||
if(i<=0) i= -2*i;
|
||||
else i= 2*i-1;
|
||||
|
@ -231,11 +231,11 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
|
||||
(coded_frame_rate_base * (int64_t)s->avctx->time_base.den);
|
||||
put_bits(&s->pb, 8, temp_ref & 0xff); /* TemporalReference */
|
||||
|
||||
put_bits(&s->pb, 1, 1); /* marker */
|
||||
put_bits(&s->pb, 1, 0); /* h263 id */
|
||||
put_bits(&s->pb, 1, 0); /* split screen off */
|
||||
put_bits(&s->pb, 1, 0); /* camera off */
|
||||
put_bits(&s->pb, 1, 0); /* freeze picture release off */
|
||||
put_bits(&s->pb, 1, 1); /* marker */
|
||||
put_bits(&s->pb, 1, 0); /* h263 id */
|
||||
put_bits(&s->pb, 1, 0); /* split screen off */
|
||||
put_bits(&s->pb, 1, 0); /* camera off */
|
||||
put_bits(&s->pb, 1, 0); /* freeze picture release off */
|
||||
|
||||
format = h263_get_picture_format(s->width, s->height);
|
||||
if (!s->h263_plus) {
|
||||
@ -245,12 +245,12 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
|
||||
/* By now UMV IS DISABLED ON H.263v1, since the restrictions
|
||||
of H.263v1 UMV implies to check the predicted MV after
|
||||
calculation of the current MB to see if we're on the limits */
|
||||
put_bits(&s->pb, 1, 0); /* Unrestricted Motion Vector: off */
|
||||
put_bits(&s->pb, 1, 0); /* SAC: off */
|
||||
put_bits(&s->pb, 1, s->obmc); /* Advanced Prediction */
|
||||
put_bits(&s->pb, 1, 0); /* only I/P frames, no PB frame */
|
||||
put_bits(&s->pb, 1, 0); /* Unrestricted Motion Vector: off */
|
||||
put_bits(&s->pb, 1, 0); /* SAC: off */
|
||||
put_bits(&s->pb, 1, s->obmc); /* Advanced Prediction */
|
||||
put_bits(&s->pb, 1, 0); /* only I/P frames, no PB frame */
|
||||
put_bits(&s->pb, 5, s->qscale);
|
||||
put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
|
||||
put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
|
||||
} else {
|
||||
int ufep=1;
|
||||
/* H.263v2 */
|
||||
@ -286,9 +286,9 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
|
||||
put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
|
||||
|
||||
/* This should be here if PLUSPTYPE */
|
||||
put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
|
||||
put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
|
||||
|
||||
if (format == 7) {
|
||||
if (format == 7) {
|
||||
/* Custom Picture Format (CPFMT) */
|
||||
aspect_to_info(s, s->avctx->sample_aspect_ratio);
|
||||
|
||||
@ -299,7 +299,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
|
||||
if (s->aspect_ratio_info == FF_ASPECT_EXTENDED){
|
||||
put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.num);
|
||||
put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(s->custom_pcf){
|
||||
if(ufep){
|
||||
@ -320,7 +320,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
|
||||
put_bits(&s->pb, 5, s->qscale);
|
||||
}
|
||||
|
||||
put_bits(&s->pb, 1, 0); /* no PEI */
|
||||
put_bits(&s->pb, 1, 0); /* no PEI */
|
||||
|
||||
if(s->h263_slice_structured){
|
||||
put_bits(&s->pb, 1, 1);
|
||||
@ -823,8 +823,8 @@ static inline void mpeg4_encode_blocks(MpegEncContext * s, DCTELEM block[6][64],
|
||||
}
|
||||
|
||||
void mpeg4_encode_mb(MpegEncContext * s,
|
||||
DCTELEM block[6][64],
|
||||
int motion_x, int motion_y)
|
||||
DCTELEM block[6][64],
|
||||
int motion_x, int motion_y)
|
||||
{
|
||||
int cbpc, cbpy, pred_x, pred_y;
|
||||
PutBitContext * const pb2 = s->data_partitioning ? &s->pb2 : &s->pb;
|
||||
@ -884,7 +884,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
||||
return;
|
||||
}
|
||||
|
||||
put_bits(&s->pb, 1, 0); /* mb coded modb1=0 */
|
||||
put_bits(&s->pb, 1, 0); /* mb coded modb1=0 */
|
||||
put_bits(&s->pb, 1, cbp ? 0 : 1); /* modb2 */ //FIXME merge
|
||||
put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we don't need it :)
|
||||
if(cbp) put_bits(&s->pb, 6, cbp);
|
||||
@ -998,7 +998,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
||||
if(pic==NULL || pic->pict_type!=B_TYPE) break;
|
||||
|
||||
b_pic= pic->data[0] + offset + 16; //FIXME +16
|
||||
diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16);
|
||||
diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16);
|
||||
if(diff>s->qscale*70){ //FIXME check that 70 is optimal
|
||||
s->mb_skipped=0;
|
||||
break;
|
||||
@ -1021,7 +1021,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
||||
}
|
||||
}
|
||||
|
||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||
cbpc = cbp & 3;
|
||||
cbpy = cbp >> 2;
|
||||
cbpy ^= 0xf;
|
||||
@ -1121,7 +1121,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
||||
int dc_diff[6]; //dc values with the dc prediction subtracted
|
||||
int dir[6]; //prediction direction
|
||||
int zigzag_last_index[6];
|
||||
uint8_t *scan_table[6];
|
||||
uint8_t *scan_table[6];
|
||||
int i;
|
||||
|
||||
for(i=0; i<6; i++){
|
||||
@ -1152,7 +1152,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
||||
intra_MCBPC_code[cbpc]);
|
||||
} else {
|
||||
if(s->dquant) cbpc+=8;
|
||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||
put_bits(&s->pb,
|
||||
inter_MCBPC_bits[cbpc + 4],
|
||||
inter_MCBPC_code[cbpc + 4]);
|
||||
@ -1185,8 +1185,8 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
||||
}
|
||||
|
||||
void h263_encode_mb(MpegEncContext * s,
|
||||
DCTELEM block[6][64],
|
||||
int motion_x, int motion_y)
|
||||
DCTELEM block[6][64],
|
||||
int motion_x, int motion_y)
|
||||
{
|
||||
int cbpc, cbpy, i, cbp, pred_x, pred_y;
|
||||
int16_t pred_dc;
|
||||
@ -1211,7 +1211,7 @@ void h263_encode_mb(MpegEncContext * s,
|
||||
|
||||
return;
|
||||
}
|
||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||
|
||||
cbpc = cbp & 3;
|
||||
cbpy = cbp >> 2;
|
||||
@ -1346,14 +1346,14 @@ void h263_encode_mb(MpegEncContext * s,
|
||||
intra_MCBPC_code[cbpc]);
|
||||
} else {
|
||||
if(s->dquant) cbpc+=8;
|
||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||
put_bits(&s->pb,
|
||||
inter_MCBPC_bits[cbpc + 4],
|
||||
inter_MCBPC_code[cbpc + 4]);
|
||||
}
|
||||
if (s->h263_aic) {
|
||||
/* XXX: currently, we do not try to use ac prediction */
|
||||
put_bits(&s->pb, 1, 0); /* no AC prediction */
|
||||
put_bits(&s->pb, 1, 0); /* no AC prediction */
|
||||
}
|
||||
cbpy = cbp >> 2;
|
||||
put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
|
||||
@ -1796,7 +1796,7 @@ static void init_uni_dc_tab(void)
|
||||
v = abs(level);
|
||||
while (v) {
|
||||
v >>= 1;
|
||||
size++;
|
||||
size++;
|
||||
}
|
||||
|
||||
if (level < 0)
|
||||
@ -2318,14 +2318,14 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
|
||||
put_bits(&s->pb, 16, 0);
|
||||
put_bits(&s->pb, 16, 0x120 + vol_number); /* video obj layer */
|
||||
|
||||
put_bits(&s->pb, 1, 0); /* random access vol */
|
||||
put_bits(&s->pb, 8, s->vo_type); /* video obj type indication */
|
||||
put_bits(&s->pb, 1, 0); /* random access vol */
|
||||
put_bits(&s->pb, 8, s->vo_type); /* video obj type indication */
|
||||
if(s->workaround_bugs & FF_BUG_MS) {
|
||||
put_bits(&s->pb, 1, 0); /* is obj layer id= no */
|
||||
put_bits(&s->pb, 1, 0); /* is obj layer id= no */
|
||||
} else {
|
||||
put_bits(&s->pb, 1, 1); /* is obj layer id= yes */
|
||||
put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */
|
||||
put_bits(&s->pb, 3, 1); /* is obj layer priority */
|
||||
put_bits(&s->pb, 1, 1); /* is obj layer id= yes */
|
||||
put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */
|
||||
put_bits(&s->pb, 3, 1); /* is obj layer priority */
|
||||
}
|
||||
|
||||
aspect_to_info(s, s->avctx->sample_aspect_ratio);
|
||||
@ -2337,37 +2337,37 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
|
||||
}
|
||||
|
||||
if(s->workaround_bugs & FF_BUG_MS) { //
|
||||
put_bits(&s->pb, 1, 0); /* vol control parameters= no @@@ */
|
||||
put_bits(&s->pb, 1, 0); /* vol control parameters= no @@@ */
|
||||
} else {
|
||||
put_bits(&s->pb, 1, 1); /* vol control parameters= yes */
|
||||
put_bits(&s->pb, 2, 1); /* chroma format YUV 420/YV12 */
|
||||
put_bits(&s->pb, 1, 1); /* vol control parameters= yes */
|
||||
put_bits(&s->pb, 2, 1); /* chroma format YUV 420/YV12 */
|
||||
put_bits(&s->pb, 1, s->low_delay);
|
||||
put_bits(&s->pb, 1, 0); /* vbv parameters= no */
|
||||
put_bits(&s->pb, 1, 0); /* vbv parameters= no */
|
||||
}
|
||||
|
||||
put_bits(&s->pb, 2, RECT_SHAPE); /* vol shape= rectangle */
|
||||
put_bits(&s->pb, 1, 1); /* marker bit */
|
||||
put_bits(&s->pb, 2, RECT_SHAPE); /* vol shape= rectangle */
|
||||
put_bits(&s->pb, 1, 1); /* marker bit */
|
||||
|
||||
put_bits(&s->pb, 16, s->avctx->time_base.den);
|
||||
if (s->time_increment_bits < 1)
|
||||
s->time_increment_bits = 1;
|
||||
put_bits(&s->pb, 1, 1); /* marker bit */
|
||||
put_bits(&s->pb, 1, 0); /* fixed vop rate=no */
|
||||
put_bits(&s->pb, 1, 1); /* marker bit */
|
||||
put_bits(&s->pb, 13, s->width); /* vol width */
|
||||
put_bits(&s->pb, 1, 1); /* marker bit */
|
||||
put_bits(&s->pb, 13, s->height); /* vol height */
|
||||
put_bits(&s->pb, 1, 1); /* marker bit */
|
||||
put_bits(&s->pb, 1, 1); /* marker bit */
|
||||
put_bits(&s->pb, 1, 0); /* fixed vop rate=no */
|
||||
put_bits(&s->pb, 1, 1); /* marker bit */
|
||||
put_bits(&s->pb, 13, s->width); /* vol width */
|
||||
put_bits(&s->pb, 1, 1); /* marker bit */
|
||||
put_bits(&s->pb, 13, s->height); /* vol height */
|
||||
put_bits(&s->pb, 1, 1); /* marker bit */
|
||||
put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1);
|
||||
put_bits(&s->pb, 1, 1); /* obmc disable */
|
||||
put_bits(&s->pb, 1, 1); /* obmc disable */
|
||||
if (vo_ver_id == 1) {
|
||||
put_bits(&s->pb, 1, s->vol_sprite_usage); /* sprite enable */
|
||||
put_bits(&s->pb, 1, s->vol_sprite_usage); /* sprite enable */
|
||||
}else{
|
||||
put_bits(&s->pb, 2, s->vol_sprite_usage); /* sprite enable */
|
||||
put_bits(&s->pb, 2, s->vol_sprite_usage); /* sprite enable */
|
||||
}
|
||||
|
||||
put_bits(&s->pb, 1, 0); /* not 8 bit == false */
|
||||
put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/
|
||||
put_bits(&s->pb, 1, 0); /* not 8 bit == false */
|
||||
put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/
|
||||
|
||||
if(s->mpeg_quant){
|
||||
ff_write_quant_matrix(&s->pb, s->avctx->intra_matrix);
|
||||
@ -2376,27 +2376,27 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
|
||||
|
||||
if (vo_ver_id != 1)
|
||||
put_bits(&s->pb, 1, s->quarter_sample);
|
||||
put_bits(&s->pb, 1, 1); /* complexity estimation disable */
|
||||
put_bits(&s->pb, 1, 1); /* complexity estimation disable */
|
||||
s->resync_marker= s->rtp_mode;
|
||||
put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */
|
||||
put_bits(&s->pb, 1, s->data_partitioning ? 1 : 0);
|
||||
if(s->data_partitioning){
|
||||
put_bits(&s->pb, 1, 0); /* no rvlc */
|
||||
put_bits(&s->pb, 1, 0); /* no rvlc */
|
||||
}
|
||||
|
||||
if (vo_ver_id != 1){
|
||||
put_bits(&s->pb, 1, 0); /* newpred */
|
||||
put_bits(&s->pb, 1, 0); /* reduced res vop */
|
||||
put_bits(&s->pb, 1, 0); /* newpred */
|
||||
put_bits(&s->pb, 1, 0); /* reduced res vop */
|
||||
}
|
||||
put_bits(&s->pb, 1, 0); /* scalability */
|
||||
put_bits(&s->pb, 1, 0); /* scalability */
|
||||
|
||||
ff_mpeg4_stuffing(&s->pb);
|
||||
|
||||
/* user data */
|
||||
if(!(s->flags & CODEC_FLAG_BITEXACT)){
|
||||
put_bits(&s->pb, 16, 0);
|
||||
put_bits(&s->pb, 16, 0x1B2); /* user_data */
|
||||
ff_put_string(&s->pb, LIBAVCODEC_IDENT, 0);
|
||||
put_bits(&s->pb, 16, 0x1B2); /* user_data */
|
||||
ff_put_string(&s->pb, LIBAVCODEC_IDENT, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2421,9 +2421,9 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
|
||||
|
||||
//printf("num:%d rate:%d base:%d\n", s->picture_number, s->time_base.den, FRAME_RATE_BASE);
|
||||
|
||||
put_bits(&s->pb, 16, 0); /* vop header */
|
||||
put_bits(&s->pb, 16, VOP_STARTCODE); /* vop header */
|
||||
put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */
|
||||
put_bits(&s->pb, 16, 0); /* vop header */
|
||||
put_bits(&s->pb, 16, VOP_STARTCODE); /* vop header */
|
||||
put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */
|
||||
|
||||
assert(s->time>=0);
|
||||
time_div= s->time/s->avctx->time_base.den;
|
||||
@ -2435,15 +2435,15 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
|
||||
|
||||
put_bits(&s->pb, 1, 0);
|
||||
|
||||
put_bits(&s->pb, 1, 1); /* marker */
|
||||
put_bits(&s->pb, s->time_increment_bits, time_mod); /* time increment */
|
||||
put_bits(&s->pb, 1, 1); /* marker */
|
||||
put_bits(&s->pb, 1, 1); /* vop coded */
|
||||
put_bits(&s->pb, 1, 1); /* marker */
|
||||
put_bits(&s->pb, s->time_increment_bits, time_mod); /* time increment */
|
||||
put_bits(&s->pb, 1, 1); /* marker */
|
||||
put_bits(&s->pb, 1, 1); /* vop coded */
|
||||
if ( s->pict_type == P_TYPE
|
||||
|| (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) {
|
||||
put_bits(&s->pb, 1, s->no_rounding); /* rounding type */
|
||||
put_bits(&s->pb, 1, s->no_rounding); /* rounding type */
|
||||
}
|
||||
put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */
|
||||
put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */
|
||||
if(!s->progressive_sequence){
|
||||
put_bits(&s->pb, 1, s->current_picture_ptr->top_field_first);
|
||||
put_bits(&s->pb, 1, s->alternate_scan);
|
||||
@ -2453,9 +2453,9 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
|
||||
put_bits(&s->pb, 5, s->qscale);
|
||||
|
||||
if (s->pict_type != I_TYPE)
|
||||
put_bits(&s->pb, 3, s->f_code); /* fcode_for */
|
||||
put_bits(&s->pb, 3, s->f_code); /* fcode_for */
|
||||
if (s->pict_type == B_TYPE)
|
||||
put_bits(&s->pb, 3, s->b_code); /* fcode_back */
|
||||
put_bits(&s->pb, 3, s->b_code); /* fcode_back */
|
||||
// printf("****frame %d\n", picture_number);
|
||||
}
|
||||
|
||||
@ -2492,9 +2492,9 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *di
|
||||
|
||||
/* find prediction */
|
||||
if (n < 4) {
|
||||
scale = s->y_dc_scale;
|
||||
scale = s->y_dc_scale;
|
||||
} else {
|
||||
scale = s->c_dc_scale;
|
||||
scale = s->c_dc_scale;
|
||||
}
|
||||
if(IS_3IV1)
|
||||
scale= 8;
|
||||
@ -2520,10 +2520,10 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *di
|
||||
}
|
||||
|
||||
if (abs(a - b) < abs(b - c)) {
|
||||
pred = c;
|
||||
pred = c;
|
||||
*dir_ptr = 1; /* top */
|
||||
} else {
|
||||
pred = a;
|
||||
pred = a;
|
||||
*dir_ptr = 0; /* left */
|
||||
}
|
||||
/* we assume pred is positive */
|
||||
@ -2629,11 +2629,11 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
|
||||
// if(level<-255 || level>255) printf("dc overflow\n");
|
||||
level+=256;
|
||||
if (n < 4) {
|
||||
/* luminance */
|
||||
put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]);
|
||||
/* luminance */
|
||||
put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]);
|
||||
} else {
|
||||
/* chrominance */
|
||||
put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]);
|
||||
/* chrominance */
|
||||
put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]);
|
||||
}
|
||||
#else
|
||||
int size, v;
|
||||
@ -2641,25 +2641,25 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
|
||||
size = 0;
|
||||
v = abs(level);
|
||||
while (v) {
|
||||
v >>= 1;
|
||||
size++;
|
||||
v >>= 1;
|
||||
size++;
|
||||
}
|
||||
|
||||
if (n < 4) {
|
||||
/* luminance */
|
||||
put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]);
|
||||
/* luminance */
|
||||
put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]);
|
||||
} else {
|
||||
/* chrominance */
|
||||
put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]);
|
||||
/* chrominance */
|
||||
put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]);
|
||||
}
|
||||
|
||||
/* encode remaining bits */
|
||||
if (size > 0) {
|
||||
if (level < 0)
|
||||
level = (-level) ^ ((1 << size) - 1);
|
||||
put_bits(&s->pb, size, level);
|
||||
if (size > 8)
|
||||
put_bits(&s->pb, 1, 1);
|
||||
if (level < 0)
|
||||
level = (-level) ^ ((1 << size) - 1);
|
||||
put_bits(&s->pb, size, level);
|
||||
if (size > 8)
|
||||
put_bits(&s->pb, 1, 1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -2689,16 +2689,16 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
|
||||
const int last_index = s->block_last_index[n];
|
||||
|
||||
if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
|
||||
/* mpeg4 based DC predictor */
|
||||
mpeg4_encode_dc(dc_pb, intra_dc, n);
|
||||
/* mpeg4 based DC predictor */
|
||||
mpeg4_encode_dc(dc_pb, intra_dc, n);
|
||||
if(last_index<1) return;
|
||||
i = 1;
|
||||
i = 1;
|
||||
rl = &rl_intra;
|
||||
bits_tab= uni_mpeg4_intra_rl_bits;
|
||||
len_tab = uni_mpeg4_intra_rl_len;
|
||||
} else {
|
||||
if(last_index<0) return;
|
||||
i = 0;
|
||||
i = 0;
|
||||
rl = &rl_inter;
|
||||
bits_tab= uni_mpeg4_inter_rl_bits;
|
||||
len_tab = uni_mpeg4_inter_rl_len;
|
||||
@ -2708,9 +2708,9 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
|
||||
last_non_zero = i - 1;
|
||||
#if 1
|
||||
for (; i < last_index; i++) {
|
||||
int level = block[ scan_table[i] ];
|
||||
if (level) {
|
||||
int run = i - last_non_zero - 1;
|
||||
int level = block[ scan_table[i] ];
|
||||
if (level) {
|
||||
int run = i - last_non_zero - 1;
|
||||
level+=64;
|
||||
if((level&(~127)) == 0){
|
||||
const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
|
||||
@ -2718,11 +2718,11 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
|
||||
}else{ //ESC3
|
||||
put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(0<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1);
|
||||
}
|
||||
last_non_zero = i;
|
||||
}
|
||||
last_non_zero = i;
|
||||
}
|
||||
}
|
||||
/*if(i<=last_index)*/{
|
||||
int level = block[ scan_table[i] ];
|
||||
int level = block[ scan_table[i] ];
|
||||
int run = i - last_non_zero - 1;
|
||||
level+=64;
|
||||
if((level&(~127)) == 0){
|
||||
@ -2734,17 +2734,17 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
|
||||
}
|
||||
#else
|
||||
for (; i <= last_index; i++) {
|
||||
const int slevel = block[ scan_table[i] ];
|
||||
if (slevel) {
|
||||
const int slevel = block[ scan_table[i] ];
|
||||
if (slevel) {
|
||||
int level;
|
||||
int run = i - last_non_zero - 1;
|
||||
last = (i == last_index);
|
||||
sign = 0;
|
||||
level = slevel;
|
||||
if (level < 0) {
|
||||
sign = 1;
|
||||
level = -level;
|
||||
}
|
||||
int run = i - last_non_zero - 1;
|
||||
last = (i == last_index);
|
||||
sign = 0;
|
||||
level = slevel;
|
||||
if (level < 0) {
|
||||
sign = 1;
|
||||
level = -level;
|
||||
}
|
||||
code = get_rl_index(rl, last, run, level);
|
||||
put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
|
||||
if (code == rl->n) {
|
||||
@ -2786,8 +2786,8 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
|
||||
} else {
|
||||
put_bits(ac_pb, 1, sign);
|
||||
}
|
||||
last_non_zero = i;
|
||||
}
|
||||
last_non_zero = i;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -2802,15 +2802,15 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in
|
||||
int len=0;
|
||||
|
||||
if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
|
||||
/* mpeg4 based DC predictor */
|
||||
len += mpeg4_get_dc_length(intra_dc, n);
|
||||
/* mpeg4 based DC predictor */
|
||||
len += mpeg4_get_dc_length(intra_dc, n);
|
||||
if(last_index<1) return len;
|
||||
i = 1;
|
||||
i = 1;
|
||||
rl = &rl_intra;
|
||||
len_tab = uni_mpeg4_intra_rl_len;
|
||||
} else {
|
||||
if(last_index<0) return 0;
|
||||
i = 0;
|
||||
i = 0;
|
||||
rl = &rl_inter;
|
||||
len_tab = uni_mpeg4_inter_rl_len;
|
||||
}
|
||||
@ -2818,9 +2818,9 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in
|
||||
/* AC coefs */
|
||||
last_non_zero = i - 1;
|
||||
for (; i < last_index; i++) {
|
||||
int level = block[ scan_table[i] ];
|
||||
if (level) {
|
||||
int run = i - last_non_zero - 1;
|
||||
int level = block[ scan_table[i] ];
|
||||
if (level) {
|
||||
int run = i - last_non_zero - 1;
|
||||
level+=64;
|
||||
if((level&(~127)) == 0){
|
||||
const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
|
||||
@ -2828,11 +2828,11 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in
|
||||
}else{ //ESC3
|
||||
len += 7+2+1+6+1+12+1;
|
||||
}
|
||||
last_non_zero = i;
|
||||
}
|
||||
last_non_zero = i;
|
||||
}
|
||||
}
|
||||
/*if(i<=last_index)*/{
|
||||
int level = block[ scan_table[i] ];
|
||||
int level = block[ scan_table[i] ];
|
||||
int run = i - last_non_zero - 1;
|
||||
level+=64;
|
||||
if((level&(~127)) == 0){
|
||||
@ -3251,7 +3251,7 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s)
|
||||
//FIXME reduced res stuff here
|
||||
|
||||
if (s->pict_type != I_TYPE) {
|
||||
int f_code = get_bits(&s->gb, 3); /* fcode_for */
|
||||
int f_code = get_bits(&s->gb, 3); /* fcode_for */
|
||||
if(f_code==0){
|
||||
av_log(s->avctx, AV_LOG_ERROR, "Error, video packet header damaged (f_code=0)\n");
|
||||
}
|
||||
@ -4741,7 +4741,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
|
||||
|
||||
if(intra) {
|
||||
if(s->qscale < s->intra_dc_threshold){
|
||||
/* DC coef */
|
||||
/* DC coef */
|
||||
if(s->partitioned_frame){
|
||||
level = s->dc_val[0][ s->block_index[n] ];
|
||||
if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale);
|
||||
@ -4898,7 +4898,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (level>0) level= level * qmul + qadd;
|
||||
if (level>0) level= level * qmul + qadd;
|
||||
else level= level * qmul - qadd;
|
||||
|
||||
if((unsigned)(level + 2048) > 4095){
|
||||
@ -5014,18 +5014,18 @@ int h263_decode_picture_header(MpegEncContext *s)
|
||||
}
|
||||
if (get_bits1(&s->gb) != 0) {
|
||||
av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n");
|
||||
return -1; /* h263 id */
|
||||
return -1; /* h263 id */
|
||||
}
|
||||
skip_bits1(&s->gb); /* split screen off */
|
||||
skip_bits1(&s->gb); /* camera off */
|
||||
skip_bits1(&s->gb); /* freeze picture release off */
|
||||
skip_bits1(&s->gb); /* split screen off */
|
||||
skip_bits1(&s->gb); /* camera off */
|
||||
skip_bits1(&s->gb); /* freeze picture release off */
|
||||
|
||||
format = get_bits(&s->gb, 3);
|
||||
/*
|
||||
0 forbidden
|
||||
1 sub-QCIF
|
||||
10 QCIF
|
||||
7 extended PTYPE (PLUSPTYPE)
|
||||
7 extended PTYPE (PLUSPTYPE)
|
||||
*/
|
||||
|
||||
if (format != 7 && format != 6) {
|
||||
@ -5042,17 +5042,17 @@ int h263_decode_picture_header(MpegEncContext *s)
|
||||
|
||||
if (get_bits1(&s->gb) != 0) {
|
||||
av_log(s->avctx, AV_LOG_ERROR, "H263 SAC not supported\n");
|
||||
return -1; /* SAC: off */
|
||||
return -1; /* SAC: off */
|
||||
}
|
||||
s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */
|
||||
s->unrestricted_mv = s->h263_long_vectors || s->obmc;
|
||||
|
||||
if (get_bits1(&s->gb) != 0) {
|
||||
av_log(s->avctx, AV_LOG_ERROR, "H263 PB frame not supported\n");
|
||||
return -1; /* not PB frame */
|
||||
return -1; /* not PB frame */
|
||||
}
|
||||
s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
|
||||
skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
|
||||
skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
|
||||
|
||||
s->width = width;
|
||||
s->height = height;
|
||||
@ -5511,17 +5511,17 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
|
||||
}
|
||||
s->low_delay= get_bits1(gb);
|
||||
if(get_bits1(gb)){ /* vbv parameters */
|
||||
get_bits(gb, 15); /* first_half_bitrate */
|
||||
skip_bits1(gb); /* marker */
|
||||
get_bits(gb, 15); /* latter_half_bitrate */
|
||||
skip_bits1(gb); /* marker */
|
||||
get_bits(gb, 15); /* first_half_vbv_buffer_size */
|
||||
skip_bits1(gb); /* marker */
|
||||
get_bits(gb, 3); /* latter_half_vbv_buffer_size */
|
||||
get_bits(gb, 11); /* first_half_vbv_occupancy */
|
||||
skip_bits1(gb); /* marker */
|
||||
get_bits(gb, 15); /* latter_half_vbv_occupancy */
|
||||
skip_bits1(gb); /* marker */
|
||||
get_bits(gb, 15); /* first_half_bitrate */
|
||||
skip_bits1(gb); /* marker */
|
||||
get_bits(gb, 15); /* latter_half_bitrate */
|
||||
skip_bits1(gb); /* marker */
|
||||
get_bits(gb, 15); /* first_half_vbv_buffer_size */
|
||||
skip_bits1(gb); /* marker */
|
||||
get_bits(gb, 3); /* latter_half_vbv_buffer_size */
|
||||
get_bits(gb, 11); /* first_half_vbv_occupancy */
|
||||
skip_bits1(gb); /* marker */
|
||||
get_bits(gb, 15); /* latter_half_vbv_occupancy */
|
||||
skip_bits1(gb); /* marker */
|
||||
}
|
||||
}else{
|
||||
// set low delay flag only once the smartest? low delay detection won't be overriden
|
||||
@ -5628,7 +5628,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
|
||||
/* load custom intra matrix */
|
||||
if(get_bits1(gb)){
|
||||
int last=0;
|
||||
for(i=0; i<64; i++){
|
||||
for(i=0; i<64; i++){
|
||||
int j;
|
||||
v= get_bits(gb, 8);
|
||||
if(v==0) break;
|
||||
@ -5641,7 +5641,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
|
||||
|
||||
/* replicate last value */
|
||||
for(; i<64; i++){
|
||||
int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
|
||||
int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
|
||||
s->intra_matrix[j]= last;
|
||||
s->chroma_intra_matrix[j]= last;
|
||||
}
|
||||
@ -5650,7 +5650,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
|
||||
/* load custom non intra matrix */
|
||||
if(get_bits1(gb)){
|
||||
int last=0;
|
||||
for(i=0; i<64; i++){
|
||||
for(i=0; i<64; i++){
|
||||
int j;
|
||||
v= get_bits(gb, 8);
|
||||
if(v==0) break;
|
||||
@ -5663,7 +5663,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
|
||||
|
||||
/* replicate last value */
|
||||
for(; i<64; i++){
|
||||
int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
|
||||
int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
|
||||
s->inter_matrix[j]= last;
|
||||
s->chroma_inter_matrix[j]= last;
|
||||
}
|
||||
@ -5794,7 +5794,7 @@ static int decode_user_data(MpegEncContext *s, GetBitContext *gb){
|
||||
static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
|
||||
int time_incr, time_increment;
|
||||
|
||||
s->pict_type = get_bits(gb, 2) + I_TYPE; /* pict type: I = 0 , P = 1 */
|
||||
s->pict_type = get_bits(gb, 2) + I_TYPE; /* pict type: I = 0 , P = 1 */
|
||||
if(s->pict_type==B_TYPE && s->low_delay && s->vol_control_parameters==0 && !(s->flags & CODEC_FLAG_LOW_DELAY)){
|
||||
av_log(s->avctx, AV_LOG_ERROR, "low_delay flag incorrectly, clearing it\n");
|
||||
s->low_delay=0;
|
||||
@ -5877,9 +5877,9 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
|
||||
if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == P_TYPE
|
||||
|| (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) {
|
||||
/* rounding type for motion estimation */
|
||||
s->no_rounding = get_bits1(gb);
|
||||
s->no_rounding = get_bits1(gb);
|
||||
} else {
|
||||
s->no_rounding = 0;
|
||||
s->no_rounding = 0;
|
||||
}
|
||||
//FIXME reduced res stuff
|
||||
|
||||
@ -5938,7 +5938,7 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
|
||||
}
|
||||
|
||||
if (s->pict_type != I_TYPE) {
|
||||
s->f_code = get_bits(gb, 3); /* fcode_for */
|
||||
s->f_code = get_bits(gb, 3); /* fcode_for */
|
||||
if(s->f_code==0){
|
||||
av_log(s->avctx, AV_LOG_ERROR, "Error, header damaged or not MPEG4 header (f_code=0)\n");
|
||||
return -1; // makes no sense to continue, as the MV decoding will break very quickly
|
||||
@ -6094,15 +6094,15 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
|
||||
|
||||
if (get_bits1(&s->gb) != 1) {
|
||||
av_log(s->avctx, AV_LOG_ERROR, "Bad marker\n");
|
||||
return -1; /* marker */
|
||||
return -1; /* marker */
|
||||
}
|
||||
if (get_bits1(&s->gb) != 0) {
|
||||
av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n");
|
||||
return -1; /* h263 id */
|
||||
return -1; /* h263 id */
|
||||
}
|
||||
skip_bits1(&s->gb); /* split screen off */
|
||||
skip_bits1(&s->gb); /* camera off */
|
||||
skip_bits1(&s->gb); /* freeze picture release off */
|
||||
skip_bits1(&s->gb); /* split screen off */
|
||||
skip_bits1(&s->gb); /* camera off */
|
||||
skip_bits1(&s->gb); /* freeze picture release off */
|
||||
|
||||
format = get_bits(&s->gb, 3);
|
||||
if (format != 7) {
|
||||
@ -6118,23 +6118,23 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
|
||||
|
||||
if (get_bits1(&s->gb) != 0) {
|
||||
av_log(s->avctx, AV_LOG_ERROR, "SAC not supported\n");
|
||||
return -1; /* SAC: off */
|
||||
return -1; /* SAC: off */
|
||||
}
|
||||
if (get_bits1(&s->gb) != 0) {
|
||||
s->obmc= 1;
|
||||
av_log(s->avctx, AV_LOG_ERROR, "Advanced Prediction Mode not supported\n");
|
||||
// return -1; /* advanced prediction mode: off */
|
||||
// return -1; /* advanced prediction mode: off */
|
||||
}
|
||||
if (get_bits1(&s->gb) != 0) {
|
||||
av_log(s->avctx, AV_LOG_ERROR, "PB frame mode no supported\n");
|
||||
return -1; /* PB frame mode */
|
||||
return -1; /* PB frame mode */
|
||||
}
|
||||
|
||||
/* skip unknown header garbage */
|
||||
skip_bits(&s->gb, 41);
|
||||
|
||||
s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
|
||||
skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
|
||||
skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
|
||||
|
||||
/* PEI */
|
||||
while (get_bits1(&s->gb) != 0) {
|
||||
@ -6208,7 +6208,7 @@ int flv_h263_decode_picture_header(MpegEncContext *s)
|
||||
if (s->dropable)
|
||||
s->pict_type = P_TYPE;
|
||||
|
||||
skip_bits1(&s->gb); /* deblocking flag */
|
||||
skip_bits1(&s->gb); /* deblocking flag */
|
||||
s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
|
||||
|
||||
s->h263_plus = 0;
|
||||
|
@ -147,15 +147,15 @@ typedef struct H264Context{
|
||||
MpegEncContext s;
|
||||
int nal_ref_idc;
|
||||
int nal_unit_type;
|
||||
#define NAL_SLICE 1
|
||||
#define NAL_DPA 2
|
||||
#define NAL_DPB 3
|
||||
#define NAL_DPC 4
|
||||
#define NAL_IDR_SLICE 5
|
||||
#define NAL_SEI 6
|
||||
#define NAL_SPS 7
|
||||
#define NAL_PPS 8
|
||||
#define NAL_AUD 9
|
||||
#define NAL_SLICE 1
|
||||
#define NAL_DPA 2
|
||||
#define NAL_DPB 3
|
||||
#define NAL_DPC 4
|
||||
#define NAL_IDR_SLICE 5
|
||||
#define NAL_SEI 6
|
||||
#define NAL_SPS 7
|
||||
#define NAL_PPS 8
|
||||
#define NAL_AUD 9
|
||||
#define NAL_END_SEQUENCE 10
|
||||
#define NAL_END_STREAM 11
|
||||
#define NAL_FILLER_DATA 12
|
||||
@ -1461,7 +1461,7 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c
|
||||
int i, si, di;
|
||||
uint8_t *dst;
|
||||
|
||||
// src[0]&0x80; //forbidden bit
|
||||
// src[0]&0x80; //forbidden bit
|
||||
h->nal_ref_idc= src[0]>>5;
|
||||
h->nal_unit_type= src[0]&0x1F;
|
||||
|
||||
@ -7545,8 +7545,8 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
|
||||
case NAL_SPS_EXT:
|
||||
case NAL_AUXILIARY_SLICE:
|
||||
break;
|
||||
default:
|
||||
av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
|
||||
default:
|
||||
av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
|
||||
#define cpuid(index,eax,ebx,ecx,edx)\
|
||||
__asm __volatile\
|
||||
("mov %%"REG_b", %%"REG_S"\n\t"\
|
||||
("mov %%"REG_b", %%"REG_S"\n\t"\
|
||||
"cpuid\n\t"\
|
||||
"xchg %%"REG_b", %%"REG_S\
|
||||
: "=a" (eax), "=S" (ebx),\
|
||||
@ -89,8 +89,8 @@ int mm_support(void)
|
||||
edx == 0x48727561 &&
|
||||
ecx == 0x736c7561) { /* "CentaurHauls" */
|
||||
/* VIA C3 */
|
||||
if(ext_caps & (1<<24))
|
||||
rval |= MM_MMXEXT;
|
||||
if(ext_caps & (1<<24))
|
||||
rval |= MM_MMXEXT;
|
||||
} else if (ebx == 0x69727943 &&
|
||||
edx == 0x736e4978 &&
|
||||
ecx == 0x64616574) {
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -27,206 +27,206 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line
|
||||
{
|
||||
MOVQ_BFE(mm6);
|
||||
__asm __volatile(
|
||||
"lea (%3, %3), %%"REG_a" \n\t"
|
||||
".balign 8 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 1(%1), %%mm1 \n\t"
|
||||
"movq (%1, %3), %%mm2 \n\t"
|
||||
"movq 1(%1, %3), %%mm3 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%2) \n\t"
|
||||
"movq %%mm5, (%2, %3) \n\t"
|
||||
"add %%"REG_a", %1 \n\t"
|
||||
"add %%"REG_a", %2 \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 1(%1), %%mm1 \n\t"
|
||||
"movq (%1, %3), %%mm2 \n\t"
|
||||
"movq 1(%1, %3), %%mm3 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%2) \n\t"
|
||||
"movq %%mm5, (%2, %3) \n\t"
|
||||
"add %%"REG_a", %1 \n\t"
|
||||
"add %%"REG_a", %2 \n\t"
|
||||
"subl $4, %0 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
:"+g"(h), "+S"(pixels), "+D"(block)
|
||||
:"r"((long)line_size)
|
||||
:REG_a, "memory");
|
||||
"lea (%3, %3), %%"REG_a" \n\t"
|
||||
".balign 8 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 1(%1), %%mm1 \n\t"
|
||||
"movq (%1, %3), %%mm2 \n\t"
|
||||
"movq 1(%1, %3), %%mm3 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%2) \n\t"
|
||||
"movq %%mm5, (%2, %3) \n\t"
|
||||
"add %%"REG_a", %1 \n\t"
|
||||
"add %%"REG_a", %2 \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 1(%1), %%mm1 \n\t"
|
||||
"movq (%1, %3), %%mm2 \n\t"
|
||||
"movq 1(%1, %3), %%mm3 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%2) \n\t"
|
||||
"movq %%mm5, (%2, %3) \n\t"
|
||||
"add %%"REG_a", %1 \n\t"
|
||||
"add %%"REG_a", %2 \n\t"
|
||||
"subl $4, %0 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
:"+g"(h), "+S"(pixels), "+D"(block)
|
||||
:"r"((long)line_size)
|
||||
:REG_a, "memory");
|
||||
}
|
||||
|
||||
static void attribute_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
|
||||
{
|
||||
MOVQ_BFE(mm6);
|
||||
__asm __volatile(
|
||||
"testl $1, %0 \n\t"
|
||||
" jz 1f \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq (%2), %%mm1 \n\t"
|
||||
"add %4, %1 \n\t"
|
||||
"add $8, %2 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
|
||||
"movq %%mm4, (%3) \n\t"
|
||||
"add %5, %3 \n\t"
|
||||
"decl %0 \n\t"
|
||||
".balign 8 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq (%2), %%mm1 \n\t"
|
||||
"add %4, %1 \n\t"
|
||||
"movq (%1), %%mm2 \n\t"
|
||||
"movq 8(%2), %%mm3 \n\t"
|
||||
"add %4, %1 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%3) \n\t"
|
||||
"add %5, %3 \n\t"
|
||||
"movq %%mm5, (%3) \n\t"
|
||||
"add %5, %3 \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 16(%2), %%mm1 \n\t"
|
||||
"add %4, %1 \n\t"
|
||||
"movq (%1), %%mm2 \n\t"
|
||||
"movq 24(%2), %%mm3 \n\t"
|
||||
"add %4, %1 \n\t"
|
||||
"add $32, %2 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%3) \n\t"
|
||||
"add %5, %3 \n\t"
|
||||
"movq %%mm5, (%3) \n\t"
|
||||
"add %5, %3 \n\t"
|
||||
"subl $4, %0 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
"testl $1, %0 \n\t"
|
||||
" jz 1f \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq (%2), %%mm1 \n\t"
|
||||
"add %4, %1 \n\t"
|
||||
"add $8, %2 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
|
||||
"movq %%mm4, (%3) \n\t"
|
||||
"add %5, %3 \n\t"
|
||||
"decl %0 \n\t"
|
||||
".balign 8 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq (%2), %%mm1 \n\t"
|
||||
"add %4, %1 \n\t"
|
||||
"movq (%1), %%mm2 \n\t"
|
||||
"movq 8(%2), %%mm3 \n\t"
|
||||
"add %4, %1 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%3) \n\t"
|
||||
"add %5, %3 \n\t"
|
||||
"movq %%mm5, (%3) \n\t"
|
||||
"add %5, %3 \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 16(%2), %%mm1 \n\t"
|
||||
"add %4, %1 \n\t"
|
||||
"movq (%1), %%mm2 \n\t"
|
||||
"movq 24(%2), %%mm3 \n\t"
|
||||
"add %4, %1 \n\t"
|
||||
"add $32, %2 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%3) \n\t"
|
||||
"add %5, %3 \n\t"
|
||||
"movq %%mm5, (%3) \n\t"
|
||||
"add %5, %3 \n\t"
|
||||
"subl $4, %0 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
|
||||
:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
|
||||
#else
|
||||
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
|
||||
#endif
|
||||
:"S"((long)src1Stride), "D"((long)dstStride)
|
||||
:"memory");
|
||||
:"S"((long)src1Stride), "D"((long)dstStride)
|
||||
:"memory");
|
||||
}
|
||||
|
||||
static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||
{
|
||||
MOVQ_BFE(mm6);
|
||||
__asm __volatile(
|
||||
"lea (%3, %3), %%"REG_a" \n\t"
|
||||
".balign 8 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 1(%1), %%mm1 \n\t"
|
||||
"movq (%1, %3), %%mm2 \n\t"
|
||||
"movq 1(%1, %3), %%mm3 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%2) \n\t"
|
||||
"movq %%mm5, (%2, %3) \n\t"
|
||||
"movq 8(%1), %%mm0 \n\t"
|
||||
"movq 9(%1), %%mm1 \n\t"
|
||||
"movq 8(%1, %3), %%mm2 \n\t"
|
||||
"movq 9(%1, %3), %%mm3 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, 8(%2) \n\t"
|
||||
"movq %%mm5, 8(%2, %3) \n\t"
|
||||
"add %%"REG_a", %1 \n\t"
|
||||
"add %%"REG_a", %2 \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 1(%1), %%mm1 \n\t"
|
||||
"movq (%1, %3), %%mm2 \n\t"
|
||||
"movq 1(%1, %3), %%mm3 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%2) \n\t"
|
||||
"movq %%mm5, (%2, %3) \n\t"
|
||||
"movq 8(%1), %%mm0 \n\t"
|
||||
"movq 9(%1), %%mm1 \n\t"
|
||||
"movq 8(%1, %3), %%mm2 \n\t"
|
||||
"movq 9(%1, %3), %%mm3 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, 8(%2) \n\t"
|
||||
"movq %%mm5, 8(%2, %3) \n\t"
|
||||
"add %%"REG_a", %1 \n\t"
|
||||
"add %%"REG_a", %2 \n\t"
|
||||
"subl $4, %0 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
:"+g"(h), "+S"(pixels), "+D"(block)
|
||||
:"r"((long)line_size)
|
||||
:REG_a, "memory");
|
||||
"lea (%3, %3), %%"REG_a" \n\t"
|
||||
".balign 8 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 1(%1), %%mm1 \n\t"
|
||||
"movq (%1, %3), %%mm2 \n\t"
|
||||
"movq 1(%1, %3), %%mm3 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%2) \n\t"
|
||||
"movq %%mm5, (%2, %3) \n\t"
|
||||
"movq 8(%1), %%mm0 \n\t"
|
||||
"movq 9(%1), %%mm1 \n\t"
|
||||
"movq 8(%1, %3), %%mm2 \n\t"
|
||||
"movq 9(%1, %3), %%mm3 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, 8(%2) \n\t"
|
||||
"movq %%mm5, 8(%2, %3) \n\t"
|
||||
"add %%"REG_a", %1 \n\t"
|
||||
"add %%"REG_a", %2 \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 1(%1), %%mm1 \n\t"
|
||||
"movq (%1, %3), %%mm2 \n\t"
|
||||
"movq 1(%1, %3), %%mm3 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%2) \n\t"
|
||||
"movq %%mm5, (%2, %3) \n\t"
|
||||
"movq 8(%1), %%mm0 \n\t"
|
||||
"movq 9(%1), %%mm1 \n\t"
|
||||
"movq 8(%1, %3), %%mm2 \n\t"
|
||||
"movq 9(%1, %3), %%mm3 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, 8(%2) \n\t"
|
||||
"movq %%mm5, 8(%2, %3) \n\t"
|
||||
"add %%"REG_a", %1 \n\t"
|
||||
"add %%"REG_a", %2 \n\t"
|
||||
"subl $4, %0 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
:"+g"(h), "+S"(pixels), "+D"(block)
|
||||
:"r"((long)line_size)
|
||||
:REG_a, "memory");
|
||||
}
|
||||
|
||||
static void attribute_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
|
||||
{
|
||||
MOVQ_BFE(mm6);
|
||||
__asm __volatile(
|
||||
"testl $1, %0 \n\t"
|
||||
" jz 1f \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq (%2), %%mm1 \n\t"
|
||||
"movq 8(%1), %%mm2 \n\t"
|
||||
"movq 8(%2), %%mm3 \n\t"
|
||||
"add %4, %1 \n\t"
|
||||
"add $16, %2 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%3) \n\t"
|
||||
"movq %%mm5, 8(%3) \n\t"
|
||||
"add %5, %3 \n\t"
|
||||
"decl %0 \n\t"
|
||||
".balign 8 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq (%2), %%mm1 \n\t"
|
||||
"movq 8(%1), %%mm2 \n\t"
|
||||
"movq 8(%2), %%mm3 \n\t"
|
||||
"add %4, %1 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%3) \n\t"
|
||||
"movq %%mm5, 8(%3) \n\t"
|
||||
"add %5, %3 \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 16(%2), %%mm1 \n\t"
|
||||
"movq 8(%1), %%mm2 \n\t"
|
||||
"movq 24(%2), %%mm3 \n\t"
|
||||
"add %4, %1 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%3) \n\t"
|
||||
"movq %%mm5, 8(%3) \n\t"
|
||||
"add %5, %3 \n\t"
|
||||
"add $32, %2 \n\t"
|
||||
"subl $2, %0 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
"testl $1, %0 \n\t"
|
||||
" jz 1f \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq (%2), %%mm1 \n\t"
|
||||
"movq 8(%1), %%mm2 \n\t"
|
||||
"movq 8(%2), %%mm3 \n\t"
|
||||
"add %4, %1 \n\t"
|
||||
"add $16, %2 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%3) \n\t"
|
||||
"movq %%mm5, 8(%3) \n\t"
|
||||
"add %5, %3 \n\t"
|
||||
"decl %0 \n\t"
|
||||
".balign 8 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq (%2), %%mm1 \n\t"
|
||||
"movq 8(%1), %%mm2 \n\t"
|
||||
"movq 8(%2), %%mm3 \n\t"
|
||||
"add %4, %1 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%3) \n\t"
|
||||
"movq %%mm5, 8(%3) \n\t"
|
||||
"add %5, %3 \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 16(%2), %%mm1 \n\t"
|
||||
"movq 8(%1), %%mm2 \n\t"
|
||||
"movq 24(%2), %%mm3 \n\t"
|
||||
"add %4, %1 \n\t"
|
||||
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
|
||||
"movq %%mm4, (%3) \n\t"
|
||||
"movq %%mm5, 8(%3) \n\t"
|
||||
"add %5, %3 \n\t"
|
||||
"add $32, %2 \n\t"
|
||||
"subl $2, %0 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
|
||||
:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
|
||||
:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
|
||||
#else
|
||||
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
|
||||
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
|
||||
#endif
|
||||
:"S"((long)src1Stride), "D"((long)dstStride)
|
||||
:"memory");
|
||||
:"S"((long)src1Stride), "D"((long)dstStride)
|
||||
:"memory");
|
||||
}
|
||||
|
||||
static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||
{
|
||||
MOVQ_BFE(mm6);
|
||||
__asm __volatile(
|
||||
"lea (%3, %3), %%"REG_a" \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
".balign 8 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %3), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"),%%mm2 \n\t"
|
||||
PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
|
||||
"movq %%mm4, (%2) \n\t"
|
||||
"movq %%mm5, (%2, %3) \n\t"
|
||||
"add %%"REG_a", %1 \n\t"
|
||||
"add %%"REG_a", %2 \n\t"
|
||||
"movq (%1, %3), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"),%%mm0 \n\t"
|
||||
PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
|
||||
"movq %%mm4, (%2) \n\t"
|
||||
"movq %%mm5, (%2, %3) \n\t"
|
||||
"add %%"REG_a", %1 \n\t"
|
||||
"add %%"REG_a", %2 \n\t"
|
||||
"subl $4, %0 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
:"+g"(h), "+S"(pixels), "+D"(block)
|
||||
:"r"((long)line_size)
|
||||
:REG_a, "memory");
|
||||
"lea (%3, %3), %%"REG_a" \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
".balign 8 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %3), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"),%%mm2 \n\t"
|
||||
PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
|
||||
"movq %%mm4, (%2) \n\t"
|
||||
"movq %%mm5, (%2, %3) \n\t"
|
||||
"add %%"REG_a", %1 \n\t"
|
||||
"add %%"REG_a", %2 \n\t"
|
||||
"movq (%1, %3), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"),%%mm0 \n\t"
|
||||
PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
|
||||
"movq %%mm4, (%2) \n\t"
|
||||
"movq %%mm5, (%2, %3) \n\t"
|
||||
"add %%"REG_a", %1 \n\t"
|
||||
"add %%"REG_a", %2 \n\t"
|
||||
"subl $4, %0 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
:"+g"(h), "+S"(pixels), "+D"(block)
|
||||
:"r"((long)line_size)
|
||||
:REG_a, "memory");
|
||||
}
|
||||
|
||||
static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||
@ -234,65 +234,65 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
|
||||
MOVQ_ZERO(mm7);
|
||||
SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
|
||||
__asm __volatile(
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 1(%1), %%mm4 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm4, %%mm5 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||
"punpckhbw %%mm7, %%mm5 \n\t"
|
||||
"paddusw %%mm0, %%mm4 \n\t"
|
||||
"paddusw %%mm1, %%mm5 \n\t"
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"
|
||||
"add %3, %1 \n\t"
|
||||
".balign 8 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm2, %%mm3 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||
"punpckhbw %%mm7, %%mm3 \n\t"
|
||||
"paddusw %%mm2, %%mm0 \n\t"
|
||||
"paddusw %%mm3, %%mm1 \n\t"
|
||||
"paddusw %%mm6, %%mm4 \n\t"
|
||||
"paddusw %%mm6, %%mm5 \n\t"
|
||||
"paddusw %%mm0, %%mm4 \n\t"
|
||||
"paddusw %%mm1, %%mm5 \n\t"
|
||||
"psrlw $2, %%mm4 \n\t"
|
||||
"psrlw $2, %%mm5 \n\t"
|
||||
"packuswb %%mm5, %%mm4 \n\t"
|
||||
"movq %%mm4, (%2, %%"REG_a") \n\t"
|
||||
"add %3, %%"REG_a" \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 1(%1), %%mm4 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm4, %%mm5 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||
"punpckhbw %%mm7, %%mm5 \n\t"
|
||||
"paddusw %%mm0, %%mm4 \n\t"
|
||||
"paddusw %%mm1, %%mm5 \n\t"
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"
|
||||
"add %3, %1 \n\t"
|
||||
".balign 8 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm2, %%mm3 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||
"punpckhbw %%mm7, %%mm3 \n\t"
|
||||
"paddusw %%mm2, %%mm0 \n\t"
|
||||
"paddusw %%mm3, %%mm1 \n\t"
|
||||
"paddusw %%mm6, %%mm4 \n\t"
|
||||
"paddusw %%mm6, %%mm5 \n\t"
|
||||
"paddusw %%mm0, %%mm4 \n\t"
|
||||
"paddusw %%mm1, %%mm5 \n\t"
|
||||
"psrlw $2, %%mm4 \n\t"
|
||||
"psrlw $2, %%mm5 \n\t"
|
||||
"packuswb %%mm5, %%mm4 \n\t"
|
||||
"movq %%mm4, (%2, %%"REG_a") \n\t"
|
||||
"add %3, %%"REG_a" \n\t"
|
||||
|
||||
"movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
|
||||
"movq 1(%1, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq %%mm2, %%mm3 \n\t"
|
||||
"movq %%mm4, %%mm5 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||
"punpckhbw %%mm7, %%mm3 \n\t"
|
||||
"punpckhbw %%mm7, %%mm5 \n\t"
|
||||
"paddusw %%mm2, %%mm4 \n\t"
|
||||
"paddusw %%mm3, %%mm5 \n\t"
|
||||
"paddusw %%mm6, %%mm0 \n\t"
|
||||
"paddusw %%mm6, %%mm1 \n\t"
|
||||
"paddusw %%mm4, %%mm0 \n\t"
|
||||
"paddusw %%mm5, %%mm1 \n\t"
|
||||
"psrlw $2, %%mm0 \n\t"
|
||||
"psrlw $2, %%mm1 \n\t"
|
||||
"packuswb %%mm1, %%mm0 \n\t"
|
||||
"movq %%mm0, (%2, %%"REG_a") \n\t"
|
||||
"add %3, %%"REG_a" \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
|
||||
"movq 1(%1, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq %%mm2, %%mm3 \n\t"
|
||||
"movq %%mm4, %%mm5 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||
"punpckhbw %%mm7, %%mm3 \n\t"
|
||||
"punpckhbw %%mm7, %%mm5 \n\t"
|
||||
"paddusw %%mm2, %%mm4 \n\t"
|
||||
"paddusw %%mm3, %%mm5 \n\t"
|
||||
"paddusw %%mm6, %%mm0 \n\t"
|
||||
"paddusw %%mm6, %%mm1 \n\t"
|
||||
"paddusw %%mm4, %%mm0 \n\t"
|
||||
"paddusw %%mm5, %%mm1 \n\t"
|
||||
"psrlw $2, %%mm0 \n\t"
|
||||
"psrlw $2, %%mm1 \n\t"
|
||||
"packuswb %%mm1, %%mm0 \n\t"
|
||||
"movq %%mm0, (%2, %%"REG_a") \n\t"
|
||||
"add %3, %%"REG_a" \n\t"
|
||||
|
||||
"subl $2, %0 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
:"+g"(h), "+S"(pixels)
|
||||
:"D"(block), "r"((long)line_size)
|
||||
:REG_a, "memory");
|
||||
"subl $2, %0 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
:"+g"(h), "+S"(pixels)
|
||||
:"D"(block), "r"((long)line_size)
|
||||
:REG_a, "memory");
|
||||
}
|
||||
|
||||
// avg_pixels
|
||||
@ -301,16 +301,16 @@ static void attribute_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pi
|
||||
MOVQ_BFE(mm6);
|
||||
JUMPALIGN();
|
||||
do {
|
||||
__asm __volatile(
|
||||
"movd %0, %%mm0 \n\t"
|
||||
"movd %1, %%mm1 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
"movd %%mm2, %0 \n\t"
|
||||
:"+m"(*block)
|
||||
:"m"(*pixels)
|
||||
:"memory");
|
||||
pixels += line_size;
|
||||
block += line_size;
|
||||
__asm __volatile(
|
||||
"movd %0, %%mm0 \n\t"
|
||||
"movd %1, %%mm1 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
"movd %%mm2, %0 \n\t"
|
||||
:"+m"(*block)
|
||||
:"m"(*pixels)
|
||||
:"memory");
|
||||
pixels += line_size;
|
||||
block += line_size;
|
||||
}
|
||||
while (--h);
|
||||
}
|
||||
@ -321,16 +321,16 @@ static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_si
|
||||
MOVQ_BFE(mm6);
|
||||
JUMPALIGN();
|
||||
do {
|
||||
__asm __volatile(
|
||||
"movq %0, %%mm0 \n\t"
|
||||
"movq %1, %%mm1 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
"movq %%mm2, %0 \n\t"
|
||||
:"+m"(*block)
|
||||
:"m"(*pixels)
|
||||
:"memory");
|
||||
pixels += line_size;
|
||||
block += line_size;
|
||||
__asm __volatile(
|
||||
"movq %0, %%mm0 \n\t"
|
||||
"movq %1, %%mm1 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
"movq %%mm2, %0 \n\t"
|
||||
:"+m"(*block)
|
||||
:"m"(*pixels)
|
||||
:"memory");
|
||||
pixels += line_size;
|
||||
block += line_size;
|
||||
}
|
||||
while (--h);
|
||||
}
|
||||
@ -340,20 +340,20 @@ static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_s
|
||||
MOVQ_BFE(mm6);
|
||||
JUMPALIGN();
|
||||
do {
|
||||
__asm __volatile(
|
||||
"movq %0, %%mm0 \n\t"
|
||||
"movq %1, %%mm1 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
"movq %%mm2, %0 \n\t"
|
||||
"movq 8%0, %%mm0 \n\t"
|
||||
"movq 8%1, %%mm1 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
"movq %%mm2, 8%0 \n\t"
|
||||
:"+m"(*block)
|
||||
:"m"(*pixels)
|
||||
:"memory");
|
||||
pixels += line_size;
|
||||
block += line_size;
|
||||
__asm __volatile(
|
||||
"movq %0, %%mm0 \n\t"
|
||||
"movq %1, %%mm1 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
"movq %%mm2, %0 \n\t"
|
||||
"movq 8%0, %%mm0 \n\t"
|
||||
"movq 8%1, %%mm1 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
"movq %%mm2, 8%0 \n\t"
|
||||
:"+m"(*block)
|
||||
:"m"(*pixels)
|
||||
:"memory");
|
||||
pixels += line_size;
|
||||
block += line_size;
|
||||
}
|
||||
while (--h);
|
||||
}
|
||||
@ -363,18 +363,18 @@ static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line
|
||||
MOVQ_BFE(mm6);
|
||||
JUMPALIGN();
|
||||
do {
|
||||
__asm __volatile(
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq 1%1, %%mm1 \n\t"
|
||||
"movq %0, %%mm3 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||
"movq %%mm0, %0 \n\t"
|
||||
:"+m"(*block)
|
||||
:"m"(*pixels)
|
||||
:"memory");
|
||||
pixels += line_size;
|
||||
block += line_size;
|
||||
__asm __volatile(
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq 1%1, %%mm1 \n\t"
|
||||
"movq %0, %%mm3 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||
"movq %%mm0, %0 \n\t"
|
||||
:"+m"(*block)
|
||||
:"m"(*pixels)
|
||||
:"memory");
|
||||
pixels += line_size;
|
||||
block += line_size;
|
||||
} while (--h);
|
||||
}
|
||||
|
||||
@ -383,17 +383,17 @@ static __attribute__((unused)) void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *
|
||||
MOVQ_BFE(mm6);
|
||||
JUMPALIGN();
|
||||
do {
|
||||
__asm __volatile(
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq %2, %%mm1 \n\t"
|
||||
"movq %0, %%mm3 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||
"movq %%mm0, %0 \n\t"
|
||||
:"+m"(*dst)
|
||||
:"m"(*src1), "m"(*src2)
|
||||
:"memory");
|
||||
dst += dstStride;
|
||||
__asm __volatile(
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq %2, %%mm1 \n\t"
|
||||
"movq %0, %%mm3 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||
"movq %%mm0, %0 \n\t"
|
||||
:"+m"(*dst)
|
||||
:"m"(*src1), "m"(*src2)
|
||||
:"memory");
|
||||
dst += dstStride;
|
||||
src1 += src1Stride;
|
||||
src2 += 8;
|
||||
} while (--h);
|
||||
@ -404,24 +404,24 @@ static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin
|
||||
MOVQ_BFE(mm6);
|
||||
JUMPALIGN();
|
||||
do {
|
||||
__asm __volatile(
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq 1%1, %%mm1 \n\t"
|
||||
"movq %0, %%mm3 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||
"movq %%mm0, %0 \n\t"
|
||||
"movq 8%1, %%mm0 \n\t"
|
||||
"movq 9%1, %%mm1 \n\t"
|
||||
"movq 8%0, %%mm3 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||
"movq %%mm0, 8%0 \n\t"
|
||||
:"+m"(*block)
|
||||
:"m"(*pixels)
|
||||
:"memory");
|
||||
pixels += line_size;
|
||||
block += line_size;
|
||||
__asm __volatile(
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq 1%1, %%mm1 \n\t"
|
||||
"movq %0, %%mm3 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||
"movq %%mm0, %0 \n\t"
|
||||
"movq 8%1, %%mm0 \n\t"
|
||||
"movq 9%1, %%mm1 \n\t"
|
||||
"movq 8%0, %%mm3 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||
"movq %%mm0, 8%0 \n\t"
|
||||
:"+m"(*block)
|
||||
:"m"(*pixels)
|
||||
:"memory");
|
||||
pixels += line_size;
|
||||
block += line_size;
|
||||
} while (--h);
|
||||
}
|
||||
|
||||
@ -430,23 +430,23 @@ static __attribute__((unused)) void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t
|
||||
MOVQ_BFE(mm6);
|
||||
JUMPALIGN();
|
||||
do {
|
||||
__asm __volatile(
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq %2, %%mm1 \n\t"
|
||||
"movq %0, %%mm3 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||
"movq %%mm0, %0 \n\t"
|
||||
"movq 8%1, %%mm0 \n\t"
|
||||
"movq 8%2, %%mm1 \n\t"
|
||||
"movq 8%0, %%mm3 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||
"movq %%mm0, 8%0 \n\t"
|
||||
:"+m"(*dst)
|
||||
:"m"(*src1), "m"(*src2)
|
||||
:"memory");
|
||||
dst += dstStride;
|
||||
__asm __volatile(
|
||||
"movq %1, %%mm0 \n\t"
|
||||
"movq %2, %%mm1 \n\t"
|
||||
"movq %0, %%mm3 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||
"movq %%mm0, %0 \n\t"
|
||||
"movq 8%1, %%mm0 \n\t"
|
||||
"movq 8%2, %%mm1 \n\t"
|
||||
"movq 8%0, %%mm3 \n\t"
|
||||
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
|
||||
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
|
||||
"movq %%mm0, 8%0 \n\t"
|
||||
:"+m"(*dst)
|
||||
:"m"(*src1), "m"(*src2)
|
||||
:"memory");
|
||||
dst += dstStride;
|
||||
src1 += src1Stride;
|
||||
src2 += 16;
|
||||
} while (--h);
|
||||
@ -456,39 +456,39 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
|
||||
{
|
||||
MOVQ_BFE(mm6);
|
||||
__asm __volatile(
|
||||
"lea (%3, %3), %%"REG_a" \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
".balign 8 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %3), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm2 \n\t"
|
||||
PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
|
||||
"movq (%2), %%mm3 \n\t"
|
||||
PAVGB(%%mm3, %%mm4, %%mm0, %%mm6)
|
||||
"movq (%2, %3), %%mm3 \n\t"
|
||||
PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
|
||||
"movq %%mm0, (%2) \n\t"
|
||||
"movq %%mm1, (%2, %3) \n\t"
|
||||
"add %%"REG_a", %1 \n\t"
|
||||
"add %%"REG_a", %2 \n\t"
|
||||
"lea (%3, %3), %%"REG_a" \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
".balign 8 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %3), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm2 \n\t"
|
||||
PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
|
||||
"movq (%2), %%mm3 \n\t"
|
||||
PAVGB(%%mm3, %%mm4, %%mm0, %%mm6)
|
||||
"movq (%2, %3), %%mm3 \n\t"
|
||||
PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
|
||||
"movq %%mm0, (%2) \n\t"
|
||||
"movq %%mm1, (%2, %3) \n\t"
|
||||
"add %%"REG_a", %1 \n\t"
|
||||
"add %%"REG_a", %2 \n\t"
|
||||
|
||||
"movq (%1, %3), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
|
||||
"movq (%2), %%mm3 \n\t"
|
||||
PAVGB(%%mm3, %%mm4, %%mm2, %%mm6)
|
||||
"movq (%2, %3), %%mm3 \n\t"
|
||||
PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
|
||||
"movq %%mm2, (%2) \n\t"
|
||||
"movq %%mm1, (%2, %3) \n\t"
|
||||
"add %%"REG_a", %1 \n\t"
|
||||
"add %%"REG_a", %2 \n\t"
|
||||
"movq (%1, %3), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
|
||||
"movq (%2), %%mm3 \n\t"
|
||||
PAVGB(%%mm3, %%mm4, %%mm2, %%mm6)
|
||||
"movq (%2, %3), %%mm3 \n\t"
|
||||
PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
|
||||
"movq %%mm2, (%2) \n\t"
|
||||
"movq %%mm1, (%2, %3) \n\t"
|
||||
"add %%"REG_a", %1 \n\t"
|
||||
"add %%"REG_a", %2 \n\t"
|
||||
|
||||
"subl $4, %0 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
:"+g"(h), "+S"(pixels), "+D"(block)
|
||||
:"r"((long)line_size)
|
||||
:REG_a, "memory");
|
||||
"subl $4, %0 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
:"+g"(h), "+S"(pixels), "+D"(block)
|
||||
:"r"((long)line_size)
|
||||
:REG_a, "memory");
|
||||
}
|
||||
|
||||
// this routine is 'slightly' suboptimal but mostly unused
|
||||
@ -497,73 +497,73 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
|
||||
MOVQ_ZERO(mm7);
|
||||
SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
|
||||
__asm __volatile(
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 1(%1), %%mm4 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm4, %%mm5 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||
"punpckhbw %%mm7, %%mm5 \n\t"
|
||||
"paddusw %%mm0, %%mm4 \n\t"
|
||||
"paddusw %%mm1, %%mm5 \n\t"
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"
|
||||
"add %3, %1 \n\t"
|
||||
".balign 8 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm2, %%mm3 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||
"punpckhbw %%mm7, %%mm3 \n\t"
|
||||
"paddusw %%mm2, %%mm0 \n\t"
|
||||
"paddusw %%mm3, %%mm1 \n\t"
|
||||
"paddusw %%mm6, %%mm4 \n\t"
|
||||
"paddusw %%mm6, %%mm5 \n\t"
|
||||
"paddusw %%mm0, %%mm4 \n\t"
|
||||
"paddusw %%mm1, %%mm5 \n\t"
|
||||
"psrlw $2, %%mm4 \n\t"
|
||||
"psrlw $2, %%mm5 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"packuswb %%mm5, %%mm4 \n\t"
|
||||
"pcmpeqd %%mm2, %%mm2 \n\t"
|
||||
"paddb %%mm2, %%mm2 \n\t"
|
||||
PAVGB(%%mm3, %%mm4, %%mm5, %%mm2)
|
||||
"movq %%mm5, (%2, %%"REG_a") \n\t"
|
||||
"add %3, %%"REG_a" \n\t"
|
||||
"movq (%1), %%mm0 \n\t"
|
||||
"movq 1(%1), %%mm4 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm4, %%mm5 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||
"punpckhbw %%mm7, %%mm5 \n\t"
|
||||
"paddusw %%mm0, %%mm4 \n\t"
|
||||
"paddusw %%mm1, %%mm5 \n\t"
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"
|
||||
"add %3, %1 \n\t"
|
||||
".balign 8 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm2, %%mm3 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||
"punpckhbw %%mm7, %%mm3 \n\t"
|
||||
"paddusw %%mm2, %%mm0 \n\t"
|
||||
"paddusw %%mm3, %%mm1 \n\t"
|
||||
"paddusw %%mm6, %%mm4 \n\t"
|
||||
"paddusw %%mm6, %%mm5 \n\t"
|
||||
"paddusw %%mm0, %%mm4 \n\t"
|
||||
"paddusw %%mm1, %%mm5 \n\t"
|
||||
"psrlw $2, %%mm4 \n\t"
|
||||
"psrlw $2, %%mm5 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"packuswb %%mm5, %%mm4 \n\t"
|
||||
"pcmpeqd %%mm2, %%mm2 \n\t"
|
||||
"paddb %%mm2, %%mm2 \n\t"
|
||||
PAVGB(%%mm3, %%mm4, %%mm5, %%mm2)
|
||||
"movq %%mm5, (%2, %%"REG_a") \n\t"
|
||||
"add %3, %%"REG_a" \n\t"
|
||||
|
||||
"movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
|
||||
"movq 1(%1, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq %%mm2, %%mm3 \n\t"
|
||||
"movq %%mm4, %%mm5 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||
"punpckhbw %%mm7, %%mm3 \n\t"
|
||||
"punpckhbw %%mm7, %%mm5 \n\t"
|
||||
"paddusw %%mm2, %%mm4 \n\t"
|
||||
"paddusw %%mm3, %%mm5 \n\t"
|
||||
"paddusw %%mm6, %%mm0 \n\t"
|
||||
"paddusw %%mm6, %%mm1 \n\t"
|
||||
"paddusw %%mm4, %%mm0 \n\t"
|
||||
"paddusw %%mm5, %%mm1 \n\t"
|
||||
"psrlw $2, %%mm0 \n\t"
|
||||
"psrlw $2, %%mm1 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"packuswb %%mm1, %%mm0 \n\t"
|
||||
"pcmpeqd %%mm2, %%mm2 \n\t"
|
||||
"paddb %%mm2, %%mm2 \n\t"
|
||||
PAVGB(%%mm3, %%mm0, %%mm1, %%mm2)
|
||||
"movq %%mm1, (%2, %%"REG_a") \n\t"
|
||||
"add %3, %%"REG_a" \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
|
||||
"movq 1(%1, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq %%mm2, %%mm3 \n\t"
|
||||
"movq %%mm4, %%mm5 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||
"punpckhbw %%mm7, %%mm3 \n\t"
|
||||
"punpckhbw %%mm7, %%mm5 \n\t"
|
||||
"paddusw %%mm2, %%mm4 \n\t"
|
||||
"paddusw %%mm3, %%mm5 \n\t"
|
||||
"paddusw %%mm6, %%mm0 \n\t"
|
||||
"paddusw %%mm6, %%mm1 \n\t"
|
||||
"paddusw %%mm4, %%mm0 \n\t"
|
||||
"paddusw %%mm5, %%mm1 \n\t"
|
||||
"psrlw $2, %%mm0 \n\t"
|
||||
"psrlw $2, %%mm1 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"packuswb %%mm1, %%mm0 \n\t"
|
||||
"pcmpeqd %%mm2, %%mm2 \n\t"
|
||||
"paddb %%mm2, %%mm2 \n\t"
|
||||
PAVGB(%%mm3, %%mm0, %%mm1, %%mm2)
|
||||
"movq %%mm1, (%2, %%"REG_a") \n\t"
|
||||
"add %3, %%"REG_a" \n\t"
|
||||
|
||||
"subl $2, %0 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
:"+g"(h), "+S"(pixels)
|
||||
:"D"(block), "r"((long)line_size)
|
||||
:REG_a, "memory");
|
||||
"subl $2, %0 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
:"+g"(h), "+S"(pixels)
|
||||
:"D"(block), "r"((long)line_size)
|
||||
:REG_a, "memory");
|
||||
}
|
||||
|
||||
//FIXME optimize
|
||||
|
@ -30,21 +30,21 @@
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define BITS_FRW_ACC 3 //; 2 or 3 for accuracy
|
||||
#define SHIFT_FRW_COL BITS_FRW_ACC
|
||||
#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3)
|
||||
#define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1))
|
||||
//#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1))
|
||||
#define BITS_FRW_ACC 3 //; 2 or 3 for accuracy
|
||||
#define SHIFT_FRW_COL BITS_FRW_ACC
|
||||
#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3)
|
||||
#define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1))
|
||||
//#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1))
|
||||
|
||||
//concatenated table, for forward DCT transformation
|
||||
static const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = {
|
||||
13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5
|
||||
27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5
|
||||
-21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5
|
||||
13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5
|
||||
27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5
|
||||
-21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5
|
||||
};
|
||||
|
||||
static const int16_t ocos_4_16[4] ATTR_ALIGN(8) = {
|
||||
23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5
|
||||
23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5
|
||||
};
|
||||
|
||||
static const int64_t fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL;
|
||||
@ -351,62 +351,62 @@ static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
|
||||
static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
|
||||
{
|
||||
asm volatile(
|
||||
".macro FDCT_ROW_SSE2_H1 i t \n\t"
|
||||
"movq \\i(%0), %%xmm2 \n\t"
|
||||
"movq \\i+8(%0), %%xmm0 \n\t"
|
||||
"movdqa \\t+32(%1), %%xmm3 \n\t"
|
||||
"movdqa \\t+48(%1), %%xmm7 \n\t"
|
||||
"movdqa \\t(%1), %%xmm4 \n\t"
|
||||
"movdqa \\t+16(%1), %%xmm5 \n\t"
|
||||
".endm \n\t"
|
||||
".macro FDCT_ROW_SSE2_H2 i t \n\t"
|
||||
"movq \\i(%0), %%xmm2 \n\t"
|
||||
"movq \\i+8(%0), %%xmm0 \n\t"
|
||||
"movdqa \\t+32(%1), %%xmm3 \n\t"
|
||||
"movdqa \\t+48(%1), %%xmm7 \n\t"
|
||||
".endm \n\t"
|
||||
".macro FDCT_ROW_SSE2 i \n\t"
|
||||
"movq %%xmm2, %%xmm1 \n\t"
|
||||
"pshuflw $27, %%xmm0, %%xmm0 \n\t"
|
||||
"paddsw %%xmm0, %%xmm1 \n\t"
|
||||
"psubsw %%xmm0, %%xmm2 \n\t"
|
||||
"punpckldq %%xmm2, %%xmm1 \n\t"
|
||||
"pshufd $78, %%xmm1, %%xmm2 \n\t"
|
||||
"pmaddwd %%xmm2, %%xmm3 \n\t"
|
||||
"pmaddwd %%xmm1, %%xmm7 \n\t"
|
||||
"pmaddwd %%xmm5, %%xmm2 \n\t"
|
||||
"pmaddwd %%xmm4, %%xmm1 \n\t"
|
||||
"paddd %%xmm7, %%xmm3 \n\t"
|
||||
"paddd %%xmm2, %%xmm1 \n\t"
|
||||
"paddd %%xmm6, %%xmm3 \n\t"
|
||||
"paddd %%xmm6, %%xmm1 \n\t"
|
||||
"psrad %3, %%xmm3 \n\t"
|
||||
"psrad %3, %%xmm1 \n\t"
|
||||
"packssdw %%xmm3, %%xmm1 \n\t"
|
||||
"movdqa %%xmm1, \\i(%4) \n\t"
|
||||
".endm \n\t"
|
||||
"movdqa (%2), %%xmm6 \n\t"
|
||||
"FDCT_ROW_SSE2_H1 0 0 \n\t"
|
||||
"FDCT_ROW_SSE2 0 \n\t"
|
||||
"FDCT_ROW_SSE2_H2 64 0 \n\t"
|
||||
"FDCT_ROW_SSE2 64 \n\t"
|
||||
".macro FDCT_ROW_SSE2_H1 i t \n\t"
|
||||
"movq \\i(%0), %%xmm2 \n\t"
|
||||
"movq \\i+8(%0), %%xmm0 \n\t"
|
||||
"movdqa \\t+32(%1), %%xmm3 \n\t"
|
||||
"movdqa \\t+48(%1), %%xmm7 \n\t"
|
||||
"movdqa \\t(%1), %%xmm4 \n\t"
|
||||
"movdqa \\t+16(%1), %%xmm5 \n\t"
|
||||
".endm \n\t"
|
||||
".macro FDCT_ROW_SSE2_H2 i t \n\t"
|
||||
"movq \\i(%0), %%xmm2 \n\t"
|
||||
"movq \\i+8(%0), %%xmm0 \n\t"
|
||||
"movdqa \\t+32(%1), %%xmm3 \n\t"
|
||||
"movdqa \\t+48(%1), %%xmm7 \n\t"
|
||||
".endm \n\t"
|
||||
".macro FDCT_ROW_SSE2 i \n\t"
|
||||
"movq %%xmm2, %%xmm1 \n\t"
|
||||
"pshuflw $27, %%xmm0, %%xmm0 \n\t"
|
||||
"paddsw %%xmm0, %%xmm1 \n\t"
|
||||
"psubsw %%xmm0, %%xmm2 \n\t"
|
||||
"punpckldq %%xmm2, %%xmm1 \n\t"
|
||||
"pshufd $78, %%xmm1, %%xmm2 \n\t"
|
||||
"pmaddwd %%xmm2, %%xmm3 \n\t"
|
||||
"pmaddwd %%xmm1, %%xmm7 \n\t"
|
||||
"pmaddwd %%xmm5, %%xmm2 \n\t"
|
||||
"pmaddwd %%xmm4, %%xmm1 \n\t"
|
||||
"paddd %%xmm7, %%xmm3 \n\t"
|
||||
"paddd %%xmm2, %%xmm1 \n\t"
|
||||
"paddd %%xmm6, %%xmm3 \n\t"
|
||||
"paddd %%xmm6, %%xmm1 \n\t"
|
||||
"psrad %3, %%xmm3 \n\t"
|
||||
"psrad %3, %%xmm1 \n\t"
|
||||
"packssdw %%xmm3, %%xmm1 \n\t"
|
||||
"movdqa %%xmm1, \\i(%4) \n\t"
|
||||
".endm \n\t"
|
||||
"movdqa (%2), %%xmm6 \n\t"
|
||||
"FDCT_ROW_SSE2_H1 0 0 \n\t"
|
||||
"FDCT_ROW_SSE2 0 \n\t"
|
||||
"FDCT_ROW_SSE2_H2 64 0 \n\t"
|
||||
"FDCT_ROW_SSE2 64 \n\t"
|
||||
|
||||
"FDCT_ROW_SSE2_H1 16 64 \n\t"
|
||||
"FDCT_ROW_SSE2 16 \n\t"
|
||||
"FDCT_ROW_SSE2_H2 112 64 \n\t"
|
||||
"FDCT_ROW_SSE2 112 \n\t"
|
||||
"FDCT_ROW_SSE2_H1 16 64 \n\t"
|
||||
"FDCT_ROW_SSE2 16 \n\t"
|
||||
"FDCT_ROW_SSE2_H2 112 64 \n\t"
|
||||
"FDCT_ROW_SSE2 112 \n\t"
|
||||
|
||||
"FDCT_ROW_SSE2_H1 32 128 \n\t"
|
||||
"FDCT_ROW_SSE2 32 \n\t"
|
||||
"FDCT_ROW_SSE2_H2 96 128 \n\t"
|
||||
"FDCT_ROW_SSE2 96 \n\t"
|
||||
"FDCT_ROW_SSE2_H1 32 128 \n\t"
|
||||
"FDCT_ROW_SSE2 32 \n\t"
|
||||
"FDCT_ROW_SSE2_H2 96 128 \n\t"
|
||||
"FDCT_ROW_SSE2 96 \n\t"
|
||||
|
||||
"FDCT_ROW_SSE2_H1 48 192 \n\t"
|
||||
"FDCT_ROW_SSE2 48 \n\t"
|
||||
"FDCT_ROW_SSE2_H2 80 192 \n\t"
|
||||
"FDCT_ROW_SSE2 80 \n\t"
|
||||
:
|
||||
: "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
|
||||
"FDCT_ROW_SSE2_H1 48 192 \n\t"
|
||||
"FDCT_ROW_SSE2 48 \n\t"
|
||||
"FDCT_ROW_SSE2_H2 80 192 \n\t"
|
||||
"FDCT_ROW_SSE2 80 \n\t"
|
||||
:
|
||||
: "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -45,8 +45,8 @@ static void print_v4sf(const char *str, __m128 a)
|
||||
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z)
|
||||
{
|
||||
int ln = s->nbits;
|
||||
int j, np, np2;
|
||||
int nblocks, nloops;
|
||||
int j, np, np2;
|
||||
int nblocks, nloops;
|
||||
register FFTComplex *p, *q;
|
||||
FFTComplex *cptr, *cptr1;
|
||||
int k;
|
||||
|
@ -47,9 +47,9 @@
|
||||
SUMSUB_BADC( d13, s02, s13, d02 )
|
||||
|
||||
#define SBUTTERFLY(a,b,t,n)\
|
||||
"movq " #a ", " #t " \n\t" /* abcd */\
|
||||
"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
|
||||
"punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
|
||||
"movq " #a ", " #t " \n\t" /* abcd */\
|
||||
"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
|
||||
"punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
|
||||
|
||||
#define TRANSPOSE4(a,b,c,d,t)\
|
||||
SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
|
||||
@ -369,73 +369,73 @@ static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a
|
||||
/* motion compensation */
|
||||
|
||||
#define QPEL_H264V(A,B,C,D,E,F,OP)\
|
||||
"movd (%0), "#F" \n\t"\
|
||||
"movq "#C", %%mm6 \n\t"\
|
||||
"paddw "#D", %%mm6 \n\t"\
|
||||
"psllw $2, %%mm6 \n\t"\
|
||||
"psubw "#B", %%mm6 \n\t"\
|
||||
"psubw "#E", %%mm6 \n\t"\
|
||||
"pmullw %4, %%mm6 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"punpcklbw %%mm7, "#F" \n\t"\
|
||||
"paddw %5, "#A" \n\t"\
|
||||
"paddw "#F", "#A" \n\t"\
|
||||
"paddw "#A", %%mm6 \n\t"\
|
||||
"psraw $5, %%mm6 \n\t"\
|
||||
"packuswb %%mm6, %%mm6 \n\t"\
|
||||
"movd (%0), "#F" \n\t"\
|
||||
"movq "#C", %%mm6 \n\t"\
|
||||
"paddw "#D", %%mm6 \n\t"\
|
||||
"psllw $2, %%mm6 \n\t"\
|
||||
"psubw "#B", %%mm6 \n\t"\
|
||||
"psubw "#E", %%mm6 \n\t"\
|
||||
"pmullw %4, %%mm6 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"punpcklbw %%mm7, "#F" \n\t"\
|
||||
"paddw %5, "#A" \n\t"\
|
||||
"paddw "#F", "#A" \n\t"\
|
||||
"paddw "#A", %%mm6 \n\t"\
|
||||
"psraw $5, %%mm6 \n\t"\
|
||||
"packuswb %%mm6, %%mm6 \n\t"\
|
||||
OP(%%mm6, (%1), A, d)\
|
||||
"add %3, %1 \n\t"
|
||||
"add %3, %1 \n\t"
|
||||
|
||||
#define QPEL_H264HV(A,B,C,D,E,F,OF)\
|
||||
"movd (%0), "#F" \n\t"\
|
||||
"movq "#C", %%mm6 \n\t"\
|
||||
"paddw "#D", %%mm6 \n\t"\
|
||||
"psllw $2, %%mm6 \n\t"\
|
||||
"psubw "#B", %%mm6 \n\t"\
|
||||
"psubw "#E", %%mm6 \n\t"\
|
||||
"pmullw %3, %%mm6 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"punpcklbw %%mm7, "#F" \n\t"\
|
||||
"paddw "#F", "#A" \n\t"\
|
||||
"paddw "#A", %%mm6 \n\t"\
|
||||
"movq %%mm6, "#OF"(%1) \n\t"
|
||||
"movd (%0), "#F" \n\t"\
|
||||
"movq "#C", %%mm6 \n\t"\
|
||||
"paddw "#D", %%mm6 \n\t"\
|
||||
"psllw $2, %%mm6 \n\t"\
|
||||
"psubw "#B", %%mm6 \n\t"\
|
||||
"psubw "#E", %%mm6 \n\t"\
|
||||
"pmullw %3, %%mm6 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"punpcklbw %%mm7, "#F" \n\t"\
|
||||
"paddw "#F", "#A" \n\t"\
|
||||
"paddw "#A", %%mm6 \n\t"\
|
||||
"movq %%mm6, "#OF"(%1) \n\t"
|
||||
|
||||
#define QPEL_H264(OPNAME, OP, MMX)\
|
||||
static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
||||
int h=4;\
|
||||
\
|
||||
asm volatile(\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
"movq %5, %%mm4 \n\t"\
|
||||
"movq %6, %%mm5 \n\t"\
|
||||
"1: \n\t"\
|
||||
"movd -1(%0), %%mm1 \n\t"\
|
||||
"movd (%0), %%mm2 \n\t"\
|
||||
"movd 1(%0), %%mm3 \n\t"\
|
||||
"movd 2(%0), %%mm0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||
"paddw %%mm0, %%mm1 \n\t"\
|
||||
"paddw %%mm3, %%mm2 \n\t"\
|
||||
"movd -2(%0), %%mm0 \n\t"\
|
||||
"movd 3(%0), %%mm3 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||
"paddw %%mm3, %%mm0 \n\t"\
|
||||
"psllw $2, %%mm2 \n\t"\
|
||||
"psubw %%mm1, %%mm2 \n\t"\
|
||||
"pmullw %%mm4, %%mm2 \n\t"\
|
||||
"paddw %%mm5, %%mm0 \n\t"\
|
||||
"paddw %%mm2, %%mm0 \n\t"\
|
||||
"psraw $5, %%mm0 \n\t"\
|
||||
"packuswb %%mm0, %%mm0 \n\t"\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
"movq %5, %%mm4 \n\t"\
|
||||
"movq %6, %%mm5 \n\t"\
|
||||
"1: \n\t"\
|
||||
"movd -1(%0), %%mm1 \n\t"\
|
||||
"movd (%0), %%mm2 \n\t"\
|
||||
"movd 1(%0), %%mm3 \n\t"\
|
||||
"movd 2(%0), %%mm0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||
"paddw %%mm0, %%mm1 \n\t"\
|
||||
"paddw %%mm3, %%mm2 \n\t"\
|
||||
"movd -2(%0), %%mm0 \n\t"\
|
||||
"movd 3(%0), %%mm3 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||
"paddw %%mm3, %%mm0 \n\t"\
|
||||
"psllw $2, %%mm2 \n\t"\
|
||||
"psubw %%mm1, %%mm2 \n\t"\
|
||||
"pmullw %%mm4, %%mm2 \n\t"\
|
||||
"paddw %%mm5, %%mm0 \n\t"\
|
||||
"paddw %%mm2, %%mm0 \n\t"\
|
||||
"psraw $5, %%mm0 \n\t"\
|
||||
"packuswb %%mm0, %%mm0 \n\t"\
|
||||
OP(%%mm0, (%1),%%mm6, d)\
|
||||
"add %3, %0 \n\t"\
|
||||
"add %4, %1 \n\t"\
|
||||
"decl %2 \n\t"\
|
||||
" jnz 1b \n\t"\
|
||||
"add %3, %0 \n\t"\
|
||||
"add %4, %1 \n\t"\
|
||||
"decl %2 \n\t"\
|
||||
" jnz 1b \n\t"\
|
||||
: "+a"(src), "+c"(dst), "+m"(h)\
|
||||
: "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
|
||||
: "memory"\
|
||||
@ -444,22 +444,22 @@ static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i
|
||||
static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
||||
src -= 2*srcStride;\
|
||||
asm volatile(\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
"movd (%0), %%mm0 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm1 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm2 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm3 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm4 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
"movd (%0), %%mm0 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm1 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm2 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm3 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm4 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"\
|
||||
QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
|
||||
QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
|
||||
QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
|
||||
@ -476,22 +476,22 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
|
||||
src -= 2*srcStride+2;\
|
||||
while(w--){\
|
||||
asm volatile(\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
"movd (%0), %%mm0 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm1 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm2 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm3 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm4 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
"movd (%0), %%mm0 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm1 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm2 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm3 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm4 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"\
|
||||
QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\
|
||||
QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\
|
||||
QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\
|
||||
@ -506,28 +506,28 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
|
||||
}\
|
||||
tmp -= 3*4;\
|
||||
asm volatile(\
|
||||
"movq %4, %%mm6 \n\t"\
|
||||
"1: \n\t"\
|
||||
"movq (%0), %%mm0 \n\t"\
|
||||
"paddw 10(%0), %%mm0 \n\t"\
|
||||
"movq 2(%0), %%mm1 \n\t"\
|
||||
"paddw 8(%0), %%mm1 \n\t"\
|
||||
"movq 4(%0), %%mm2 \n\t"\
|
||||
"paddw 6(%0), %%mm2 \n\t"\
|
||||
"psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\
|
||||
"psraw $2, %%mm0 \n\t"/*(a-b)/4 */\
|
||||
"psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\
|
||||
"paddsw %%mm2, %%mm0 \n\t"\
|
||||
"psraw $2, %%mm0 \n\t"/*((a-b)/4-b)/4 */\
|
||||
"paddw %%mm6, %%mm2 \n\t"\
|
||||
"paddw %%mm2, %%mm0 \n\t"\
|
||||
"psraw $6, %%mm0 \n\t"\
|
||||
"packuswb %%mm0, %%mm0 \n\t"\
|
||||
"movq %4, %%mm6 \n\t"\
|
||||
"1: \n\t"\
|
||||
"movq (%0), %%mm0 \n\t"\
|
||||
"paddw 10(%0), %%mm0 \n\t"\
|
||||
"movq 2(%0), %%mm1 \n\t"\
|
||||
"paddw 8(%0), %%mm1 \n\t"\
|
||||
"movq 4(%0), %%mm2 \n\t"\
|
||||
"paddw 6(%0), %%mm2 \n\t"\
|
||||
"psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\
|
||||
"psraw $2, %%mm0 \n\t"/*(a-b)/4 */\
|
||||
"psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\
|
||||
"paddsw %%mm2, %%mm0 \n\t"\
|
||||
"psraw $2, %%mm0 \n\t"/*((a-b)/4-b)/4 */\
|
||||
"paddw %%mm6, %%mm2 \n\t"\
|
||||
"paddw %%mm2, %%mm0 \n\t"\
|
||||
"psraw $6, %%mm0 \n\t"\
|
||||
"packuswb %%mm0, %%mm0 \n\t"\
|
||||
OP(%%mm0, (%1),%%mm7, d)\
|
||||
"add $24, %0 \n\t"\
|
||||
"add %3, %1 \n\t"\
|
||||
"decl %2 \n\t"\
|
||||
" jnz 1b \n\t"\
|
||||
"add $24, %0 \n\t"\
|
||||
"add %3, %1 \n\t"\
|
||||
"decl %2 \n\t"\
|
||||
" jnz 1b \n\t"\
|
||||
: "+a"(tmp), "+c"(dst), "+m"(h)\
|
||||
: "S"((long)dstStride), "m"(ff_pw_32)\
|
||||
: "memory"\
|
||||
@ -537,54 +537,54 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
|
||||
static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
||||
int h=8;\
|
||||
asm volatile(\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
"movq %5, %%mm6 \n\t"\
|
||||
"1: \n\t"\
|
||||
"movq (%0), %%mm0 \n\t"\
|
||||
"movq 1(%0), %%mm2 \n\t"\
|
||||
"movq %%mm0, %%mm1 \n\t"\
|
||||
"movq %%mm2, %%mm3 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||
"punpckhbw %%mm7, %%mm1 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||
"punpckhbw %%mm7, %%mm3 \n\t"\
|
||||
"paddw %%mm2, %%mm0 \n\t"\
|
||||
"paddw %%mm3, %%mm1 \n\t"\
|
||||
"psllw $2, %%mm0 \n\t"\
|
||||
"psllw $2, %%mm1 \n\t"\
|
||||
"movq -1(%0), %%mm2 \n\t"\
|
||||
"movq 2(%0), %%mm4 \n\t"\
|
||||
"movq %%mm2, %%mm3 \n\t"\
|
||||
"movq %%mm4, %%mm5 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||
"punpckhbw %%mm7, %%mm3 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"\
|
||||
"punpckhbw %%mm7, %%mm5 \n\t"\
|
||||
"paddw %%mm4, %%mm2 \n\t"\
|
||||
"paddw %%mm3, %%mm5 \n\t"\
|
||||
"psubw %%mm2, %%mm0 \n\t"\
|
||||
"psubw %%mm5, %%mm1 \n\t"\
|
||||
"pmullw %%mm6, %%mm0 \n\t"\
|
||||
"pmullw %%mm6, %%mm1 \n\t"\
|
||||
"movd -2(%0), %%mm2 \n\t"\
|
||||
"movd 7(%0), %%mm5 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm5 \n\t"\
|
||||
"paddw %%mm3, %%mm2 \n\t"\
|
||||
"paddw %%mm5, %%mm4 \n\t"\
|
||||
"movq %6, %%mm5 \n\t"\
|
||||
"paddw %%mm5, %%mm2 \n\t"\
|
||||
"paddw %%mm5, %%mm4 \n\t"\
|
||||
"paddw %%mm2, %%mm0 \n\t"\
|
||||
"paddw %%mm4, %%mm1 \n\t"\
|
||||
"psraw $5, %%mm0 \n\t"\
|
||||
"psraw $5, %%mm1 \n\t"\
|
||||
"packuswb %%mm1, %%mm0 \n\t"\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
"movq %5, %%mm6 \n\t"\
|
||||
"1: \n\t"\
|
||||
"movq (%0), %%mm0 \n\t"\
|
||||
"movq 1(%0), %%mm2 \n\t"\
|
||||
"movq %%mm0, %%mm1 \n\t"\
|
||||
"movq %%mm2, %%mm3 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||
"punpckhbw %%mm7, %%mm1 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||
"punpckhbw %%mm7, %%mm3 \n\t"\
|
||||
"paddw %%mm2, %%mm0 \n\t"\
|
||||
"paddw %%mm3, %%mm1 \n\t"\
|
||||
"psllw $2, %%mm0 \n\t"\
|
||||
"psllw $2, %%mm1 \n\t"\
|
||||
"movq -1(%0), %%mm2 \n\t"\
|
||||
"movq 2(%0), %%mm4 \n\t"\
|
||||
"movq %%mm2, %%mm3 \n\t"\
|
||||
"movq %%mm4, %%mm5 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||
"punpckhbw %%mm7, %%mm3 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"\
|
||||
"punpckhbw %%mm7, %%mm5 \n\t"\
|
||||
"paddw %%mm4, %%mm2 \n\t"\
|
||||
"paddw %%mm3, %%mm5 \n\t"\
|
||||
"psubw %%mm2, %%mm0 \n\t"\
|
||||
"psubw %%mm5, %%mm1 \n\t"\
|
||||
"pmullw %%mm6, %%mm0 \n\t"\
|
||||
"pmullw %%mm6, %%mm1 \n\t"\
|
||||
"movd -2(%0), %%mm2 \n\t"\
|
||||
"movd 7(%0), %%mm5 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm5 \n\t"\
|
||||
"paddw %%mm3, %%mm2 \n\t"\
|
||||
"paddw %%mm5, %%mm4 \n\t"\
|
||||
"movq %6, %%mm5 \n\t"\
|
||||
"paddw %%mm5, %%mm2 \n\t"\
|
||||
"paddw %%mm5, %%mm4 \n\t"\
|
||||
"paddw %%mm2, %%mm0 \n\t"\
|
||||
"paddw %%mm4, %%mm1 \n\t"\
|
||||
"psraw $5, %%mm0 \n\t"\
|
||||
"psraw $5, %%mm1 \n\t"\
|
||||
"packuswb %%mm1, %%mm0 \n\t"\
|
||||
OP(%%mm0, (%1),%%mm5, q)\
|
||||
"add %3, %0 \n\t"\
|
||||
"add %4, %1 \n\t"\
|
||||
"decl %2 \n\t"\
|
||||
" jnz 1b \n\t"\
|
||||
"add %3, %0 \n\t"\
|
||||
"add %4, %1 \n\t"\
|
||||
"decl %2 \n\t"\
|
||||
" jnz 1b \n\t"\
|
||||
: "+a"(src), "+c"(dst), "+m"(h)\
|
||||
: "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
|
||||
: "memory"\
|
||||
@ -597,22 +597,22 @@ static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i
|
||||
\
|
||||
while(h--){\
|
||||
asm volatile(\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
"movd (%0), %%mm0 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm1 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm2 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm3 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm4 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
"movd (%0), %%mm0 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm1 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm2 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm3 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm4 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"\
|
||||
QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
|
||||
QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
|
||||
QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
|
||||
@ -636,22 +636,22 @@ static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
|
||||
src -= 2*srcStride+2;\
|
||||
while(w--){\
|
||||
asm volatile(\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
"movd (%0), %%mm0 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm1 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm2 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm3 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm4 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
"movd (%0), %%mm0 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm1 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm2 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm3 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"movd (%0), %%mm4 \n\t"\
|
||||
"add %2, %0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"\
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"\
|
||||
QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*4)\
|
||||
QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*4)\
|
||||
QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*4)\
|
||||
@ -670,42 +670,42 @@ static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
|
||||
}\
|
||||
tmp -= 4*4;\
|
||||
asm volatile(\
|
||||
"movq %4, %%mm6 \n\t"\
|
||||
"1: \n\t"\
|
||||
"movq (%0), %%mm0 \n\t"\
|
||||
"movq 8(%0), %%mm3 \n\t"\
|
||||
"movq 2(%0), %%mm1 \n\t"\
|
||||
"movq 10(%0), %%mm4 \n\t"\
|
||||
"paddw %%mm4, %%mm0 \n\t"\
|
||||
"paddw %%mm3, %%mm1 \n\t"\
|
||||
"paddw 18(%0), %%mm3 \n\t"\
|
||||
"paddw 16(%0), %%mm4 \n\t"\
|
||||
"movq 4(%0), %%mm2 \n\t"\
|
||||
"movq 12(%0), %%mm5 \n\t"\
|
||||
"paddw 6(%0), %%mm2 \n\t"\
|
||||
"paddw 14(%0), %%mm5 \n\t"\
|
||||
"psubw %%mm1, %%mm0 \n\t"\
|
||||
"psubw %%mm4, %%mm3 \n\t"\
|
||||
"psraw $2, %%mm0 \n\t"\
|
||||
"psraw $2, %%mm3 \n\t"\
|
||||
"psubw %%mm1, %%mm0 \n\t"\
|
||||
"psubw %%mm4, %%mm3 \n\t"\
|
||||
"paddsw %%mm2, %%mm0 \n\t"\
|
||||
"paddsw %%mm5, %%mm3 \n\t"\
|
||||
"psraw $2, %%mm0 \n\t"\
|
||||
"psraw $2, %%mm3 \n\t"\
|
||||
"paddw %%mm6, %%mm2 \n\t"\
|
||||
"paddw %%mm6, %%mm5 \n\t"\
|
||||
"paddw %%mm2, %%mm0 \n\t"\
|
||||
"paddw %%mm5, %%mm3 \n\t"\
|
||||
"psraw $6, %%mm0 \n\t"\
|
||||
"psraw $6, %%mm3 \n\t"\
|
||||
"packuswb %%mm3, %%mm0 \n\t"\
|
||||
"movq %4, %%mm6 \n\t"\
|
||||
"1: \n\t"\
|
||||
"movq (%0), %%mm0 \n\t"\
|
||||
"movq 8(%0), %%mm3 \n\t"\
|
||||
"movq 2(%0), %%mm1 \n\t"\
|
||||
"movq 10(%0), %%mm4 \n\t"\
|
||||
"paddw %%mm4, %%mm0 \n\t"\
|
||||
"paddw %%mm3, %%mm1 \n\t"\
|
||||
"paddw 18(%0), %%mm3 \n\t"\
|
||||
"paddw 16(%0), %%mm4 \n\t"\
|
||||
"movq 4(%0), %%mm2 \n\t"\
|
||||
"movq 12(%0), %%mm5 \n\t"\
|
||||
"paddw 6(%0), %%mm2 \n\t"\
|
||||
"paddw 14(%0), %%mm5 \n\t"\
|
||||
"psubw %%mm1, %%mm0 \n\t"\
|
||||
"psubw %%mm4, %%mm3 \n\t"\
|
||||
"psraw $2, %%mm0 \n\t"\
|
||||
"psraw $2, %%mm3 \n\t"\
|
||||
"psubw %%mm1, %%mm0 \n\t"\
|
||||
"psubw %%mm4, %%mm3 \n\t"\
|
||||
"paddsw %%mm2, %%mm0 \n\t"\
|
||||
"paddsw %%mm5, %%mm3 \n\t"\
|
||||
"psraw $2, %%mm0 \n\t"\
|
||||
"psraw $2, %%mm3 \n\t"\
|
||||
"paddw %%mm6, %%mm2 \n\t"\
|
||||
"paddw %%mm6, %%mm5 \n\t"\
|
||||
"paddw %%mm2, %%mm0 \n\t"\
|
||||
"paddw %%mm5, %%mm3 \n\t"\
|
||||
"psraw $6, %%mm0 \n\t"\
|
||||
"psraw $6, %%mm3 \n\t"\
|
||||
"packuswb %%mm3, %%mm0 \n\t"\
|
||||
OP(%%mm0, (%1),%%mm7, q)\
|
||||
"add $32, %0 \n\t"\
|
||||
"add %3, %1 \n\t"\
|
||||
"decl %2 \n\t"\
|
||||
" jnz 1b \n\t"\
|
||||
"add $32, %0 \n\t"\
|
||||
"add %3, %1 \n\t"\
|
||||
"decl %2 \n\t"\
|
||||
" jnz 1b \n\t"\
|
||||
: "+a"(tmp), "+c"(dst), "+m"(h)\
|
||||
: "S"((long)dstStride), "m"(ff_pw_32)\
|
||||
: "memory"\
|
||||
@ -862,15 +862,15 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *
|
||||
}\
|
||||
|
||||
|
||||
#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
|
||||
#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
|
||||
#define AVG_3DNOW_OP(a,b,temp, size) \
|
||||
"mov" #size " " #b ", " #temp " \n\t"\
|
||||
"pavgusb " #temp ", " #a " \n\t"\
|
||||
"mov" #size " " #a ", " #b " \n\t"
|
||||
"mov" #size " " #b ", " #temp " \n\t"\
|
||||
"pavgusb " #temp ", " #a " \n\t"\
|
||||
"mov" #size " " #a ", " #b " \n\t"
|
||||
#define AVG_MMX2_OP(a,b,temp, size) \
|
||||
"mov" #size " " #b ", " #temp " \n\t"\
|
||||
"pavgb " #temp ", " #a " \n\t"\
|
||||
"mov" #size " " #a ", " #b " \n\t"
|
||||
"mov" #size " " #b ", " #temp " \n\t"\
|
||||
"pavgb " #temp ", " #a " \n\t"\
|
||||
"mov" #size " " #a ", " #b " \n\t"
|
||||
|
||||
QPEL_H264(put_, PUT_OP, 3dnow)
|
||||
QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
|
||||
|
@ -38,7 +38,7 @@
|
||||
#if 0
|
||||
/* C row IDCT - its just here to document the MMXEXT and MMX versions */
|
||||
static inline void idct_row (int16_t * row, int offset,
|
||||
int16_t * table, int32_t * rounder)
|
||||
int16_t * table, int32_t * rounder)
|
||||
{
|
||||
int C1, C2, C3, C4, C5, C6, C7;
|
||||
int a0, a1, a2, a3, b0, b1, b2, b3;
|
||||
@ -77,241 +77,241 @@ static inline void idct_row (int16_t * row, int offset,
|
||||
|
||||
/* MMXEXT row IDCT */
|
||||
|
||||
#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \
|
||||
c4, c6, c4, c6, \
|
||||
c1, c3, -c1, -c5, \
|
||||
c5, c7, c3, -c7, \
|
||||
c4, -c6, c4, -c6, \
|
||||
-c4, c2, c4, -c2, \
|
||||
c5, -c1, c3, -c1, \
|
||||
c7, c3, c7, -c5 }
|
||||
#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \
|
||||
c4, c6, c4, c6, \
|
||||
c1, c3, -c1, -c5, \
|
||||
c5, c7, c3, -c7, \
|
||||
c4, -c6, c4, -c6, \
|
||||
-c4, c2, c4, -c2, \
|
||||
c5, -c1, c3, -c1, \
|
||||
c7, c3, c7, -c5 }
|
||||
|
||||
static inline void mmxext_row_head (int16_t * row, int offset, const int16_t * table)
|
||||
{
|
||||
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
||||
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
||||
|
||||
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
|
||||
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
|
||||
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
|
||||
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
|
||||
|
||||
movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
|
||||
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
|
||||
movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
|
||||
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
|
||||
|
||||
movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
|
||||
pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
|
||||
movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
|
||||
pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
|
||||
|
||||
pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
|
||||
pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
|
||||
}
|
||||
|
||||
static inline void mmxext_row (const int16_t * table, const int32_t * rounder)
|
||||
{
|
||||
movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1
|
||||
pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6
|
||||
movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1
|
||||
pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6
|
||||
|
||||
pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2
|
||||
pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5
|
||||
pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2
|
||||
pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5
|
||||
|
||||
movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5
|
||||
pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3
|
||||
movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5
|
||||
pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3
|
||||
|
||||
paddd_m2r (*rounder, mm3); // mm3 += rounder
|
||||
pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7
|
||||
paddd_m2r (*rounder, mm3); // mm3 += rounder
|
||||
pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7
|
||||
|
||||
pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6
|
||||
paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
|
||||
pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6
|
||||
paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
|
||||
|
||||
pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3
|
||||
movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
|
||||
pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3
|
||||
movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
|
||||
|
||||
pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7
|
||||
paddd_r2r (mm7, mm1); // mm1 = b1 b0
|
||||
pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7
|
||||
paddd_r2r (mm7, mm1); // mm1 = b1 b0
|
||||
|
||||
paddd_m2r (*rounder, mm0); // mm0 += rounder
|
||||
psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
|
||||
paddd_m2r (*rounder, mm0); // mm0 += rounder
|
||||
psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
|
||||
|
||||
psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
|
||||
paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
|
||||
psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
|
||||
paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
|
||||
|
||||
paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
|
||||
psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
|
||||
paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
|
||||
psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
|
||||
|
||||
paddd_r2r (mm6, mm5); // mm5 = b3 b2
|
||||
movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder
|
||||
paddd_r2r (mm6, mm5); // mm5 = b3 b2
|
||||
movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder
|
||||
|
||||
paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
|
||||
psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder
|
||||
paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
|
||||
psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder
|
||||
}
|
||||
|
||||
static inline void mmxext_row_tail (int16_t * row, int store)
|
||||
{
|
||||
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
||||
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
||||
|
||||
psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
|
||||
psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
|
||||
|
||||
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
|
||||
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
|
||||
|
||||
packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
|
||||
packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
|
||||
|
||||
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
|
||||
pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
|
||||
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
|
||||
pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
|
||||
|
||||
/* slot */
|
||||
|
||||
movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
|
||||
movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
|
||||
}
|
||||
|
||||
static inline void mmxext_row_mid (int16_t * row, int store,
|
||||
int offset, const int16_t * table)
|
||||
int offset, const int16_t * table)
|
||||
{
|
||||
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
||||
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
||||
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
||||
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
||||
|
||||
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
|
||||
psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
|
||||
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
|
||||
psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
|
||||
|
||||
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
|
||||
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
|
||||
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
|
||||
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
|
||||
|
||||
packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
|
||||
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
|
||||
packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
|
||||
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
|
||||
|
||||
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
|
||||
pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
|
||||
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
|
||||
pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
|
||||
|
||||
movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
|
||||
movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
|
||||
movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
|
||||
movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
|
||||
|
||||
pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
|
||||
pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
|
||||
|
||||
movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
|
||||
pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
|
||||
movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
|
||||
pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
|
||||
}
|
||||
|
||||
|
||||
/* MMX row IDCT */
|
||||
|
||||
#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
|
||||
c4, c6, -c4, -c2, \
|
||||
c1, c3, c3, -c7, \
|
||||
c5, c7, -c1, -c5, \
|
||||
c4, -c6, c4, -c2, \
|
||||
-c4, c2, c4, -c6, \
|
||||
c5, -c1, c7, -c5, \
|
||||
c7, c3, c3, -c1 }
|
||||
#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
|
||||
c4, c6, -c4, -c2, \
|
||||
c1, c3, c3, -c7, \
|
||||
c5, c7, -c1, -c5, \
|
||||
c4, -c6, c4, -c2, \
|
||||
-c4, c2, c4, -c6, \
|
||||
c5, -c1, c7, -c5, \
|
||||
c7, c3, c3, -c1 }
|
||||
|
||||
static inline void mmx_row_head (int16_t * row, int offset, const int16_t * table)
|
||||
{
|
||||
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
||||
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
||||
|
||||
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
|
||||
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
|
||||
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
|
||||
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
|
||||
|
||||
movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
|
||||
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
|
||||
movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
|
||||
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
|
||||
|
||||
punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
|
||||
punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
|
||||
|
||||
movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
|
||||
pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
|
||||
movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
|
||||
pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
|
||||
|
||||
movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
|
||||
punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
|
||||
movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
|
||||
punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
|
||||
}
|
||||
|
||||
static inline void mmx_row (const int16_t * table, const int32_t * rounder)
|
||||
{
|
||||
pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6
|
||||
punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1
|
||||
pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6
|
||||
punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1
|
||||
|
||||
pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2
|
||||
punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5
|
||||
pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2
|
||||
punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5
|
||||
|
||||
movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5
|
||||
pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3
|
||||
movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5
|
||||
pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3
|
||||
|
||||
paddd_m2r (*rounder, mm3); // mm3 += rounder
|
||||
pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7
|
||||
paddd_m2r (*rounder, mm3); // mm3 += rounder
|
||||
pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7
|
||||
|
||||
pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6
|
||||
paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
|
||||
pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6
|
||||
paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
|
||||
|
||||
pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3
|
||||
movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
|
||||
pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3
|
||||
movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
|
||||
|
||||
pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7
|
||||
paddd_r2r (mm7, mm1); // mm1 = b1 b0
|
||||
pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7
|
||||
paddd_r2r (mm7, mm1); // mm1 = b1 b0
|
||||
|
||||
paddd_m2r (*rounder, mm0); // mm0 += rounder
|
||||
psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
|
||||
paddd_m2r (*rounder, mm0); // mm0 += rounder
|
||||
psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
|
||||
|
||||
psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
|
||||
paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
|
||||
psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
|
||||
paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
|
||||
|
||||
paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
|
||||
psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
|
||||
paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
|
||||
psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
|
||||
|
||||
paddd_r2r (mm6, mm5); // mm5 = b3 b2
|
||||
movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder
|
||||
paddd_r2r (mm6, mm5); // mm5 = b3 b2
|
||||
movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder
|
||||
|
||||
paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
|
||||
psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder
|
||||
paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
|
||||
psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder
|
||||
}
|
||||
|
||||
static inline void mmx_row_tail (int16_t * row, int store)
|
||||
{
|
||||
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
||||
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
||||
|
||||
psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
|
||||
psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
|
||||
|
||||
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
|
||||
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
|
||||
|
||||
packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
|
||||
packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
|
||||
|
||||
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
|
||||
movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5
|
||||
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
|
||||
movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5
|
||||
|
||||
pslld_i2r (16, mm7); // mm7 = y7 0 y5 0
|
||||
pslld_i2r (16, mm7); // mm7 = y7 0 y5 0
|
||||
|
||||
psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4
|
||||
psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4
|
||||
|
||||
por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4
|
||||
por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4
|
||||
|
||||
/* slot */
|
||||
|
||||
movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
|
||||
movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
|
||||
}
|
||||
|
||||
static inline void mmx_row_mid (int16_t * row, int store,
|
||||
int offset, const int16_t * table)
|
||||
int offset, const int16_t * table)
|
||||
{
|
||||
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
||||
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
||||
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
||||
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
||||
|
||||
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
|
||||
psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
|
||||
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
|
||||
psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
|
||||
|
||||
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
|
||||
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
|
||||
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
|
||||
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
|
||||
|
||||
packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
|
||||
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
|
||||
packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
|
||||
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
|
||||
|
||||
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
|
||||
movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5
|
||||
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
|
||||
movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5
|
||||
|
||||
punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
|
||||
psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4
|
||||
punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
|
||||
psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4
|
||||
|
||||
movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
|
||||
pslld_i2r (16, mm1); // mm1 = y7 0 y5 0
|
||||
movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
|
||||
pslld_i2r (16, mm1); // mm1 = y7 0 y5 0
|
||||
|
||||
movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
|
||||
por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4
|
||||
movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
|
||||
por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4
|
||||
|
||||
movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
|
||||
punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
|
||||
movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
|
||||
punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
|
||||
|
||||
movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
|
||||
pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
|
||||
movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
|
||||
pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
|
||||
}
|
||||
|
||||
|
||||
@ -403,132 +403,132 @@ static inline void idct_col (int16_t * col, int offset)
|
||||
/* column code adapted from peter gubanov */
|
||||
/* http://www.elecard.com/peter/idct.shtml */
|
||||
|
||||
movq_m2r (*_T1, mm0); // mm0 = T1
|
||||
movq_m2r (*_T1, mm0); // mm0 = T1
|
||||
|
||||
movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1
|
||||
movq_r2r (mm0, mm2); // mm2 = T1
|
||||
movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1
|
||||
movq_r2r (mm0, mm2); // mm2 = T1
|
||||
|
||||
movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7
|
||||
pmulhw_r2r (mm1, mm0); // mm0 = T1*x1
|
||||
movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7
|
||||
pmulhw_r2r (mm1, mm0); // mm0 = T1*x1
|
||||
|
||||
movq_m2r (*_T3, mm5); // mm5 = T3
|
||||
pmulhw_r2r (mm4, mm2); // mm2 = T1*x7
|
||||
movq_m2r (*_T3, mm5); // mm5 = T3
|
||||
pmulhw_r2r (mm4, mm2); // mm2 = T1*x7
|
||||
|
||||
movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5
|
||||
movq_r2r (mm5, mm7); // mm7 = T3-1
|
||||
movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5
|
||||
movq_r2r (mm5, mm7); // mm7 = T3-1
|
||||
|
||||
movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3
|
||||
psubsw_r2r (mm4, mm0); // mm0 = v17
|
||||
movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3
|
||||
psubsw_r2r (mm4, mm0); // mm0 = v17
|
||||
|
||||
movq_m2r (*_T2, mm4); // mm4 = T2
|
||||
pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3
|
||||
movq_m2r (*_T2, mm4); // mm4 = T2
|
||||
pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3
|
||||
|
||||
paddsw_r2r (mm2, mm1); // mm1 = u17
|
||||
pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5
|
||||
paddsw_r2r (mm2, mm1); // mm1 = u17
|
||||
pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5
|
||||
|
||||
/* slot */
|
||||
|
||||
movq_r2r (mm4, mm2); // mm2 = T2
|
||||
paddsw_r2r (mm3, mm5); // mm5 = T3*x3
|
||||
movq_r2r (mm4, mm2); // mm2 = T2
|
||||
paddsw_r2r (mm3, mm5); // mm5 = T3*x3
|
||||
|
||||
pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2
|
||||
paddsw_r2r (mm6, mm7); // mm7 = T3*x5
|
||||
paddsw_r2r (mm6, mm7); // mm7 = T3*x5
|
||||
|
||||
psubsw_r2r (mm6, mm5); // mm5 = v35
|
||||
paddsw_r2r (mm3, mm7); // mm7 = u35
|
||||
psubsw_r2r (mm6, mm5); // mm5 = v35
|
||||
paddsw_r2r (mm3, mm7); // mm7 = u35
|
||||
|
||||
movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6
|
||||
movq_r2r (mm0, mm6); // mm6 = v17
|
||||
movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6
|
||||
movq_r2r (mm0, mm6); // mm6 = v17
|
||||
|
||||
pmulhw_r2r (mm3, mm2); // mm2 = T2*x6
|
||||
psubsw_r2r (mm5, mm0); // mm0 = b3
|
||||
pmulhw_r2r (mm3, mm2); // mm2 = T2*x6
|
||||
psubsw_r2r (mm5, mm0); // mm0 = b3
|
||||
|
||||
psubsw_r2r (mm3, mm4); // mm4 = v26
|
||||
paddsw_r2r (mm6, mm5); // mm5 = v12
|
||||
psubsw_r2r (mm3, mm4); // mm4 = v26
|
||||
paddsw_r2r (mm6, mm5); // mm5 = v12
|
||||
|
||||
movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0
|
||||
movq_r2r (mm1, mm6); // mm6 = u17
|
||||
movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0
|
||||
movq_r2r (mm1, mm6); // mm6 = u17
|
||||
|
||||
paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26
|
||||
paddsw_r2r (mm7, mm6); // mm6 = b0
|
||||
paddsw_r2r (mm7, mm6); // mm6 = b0
|
||||
|
||||
psubsw_r2r (mm7, mm1); // mm1 = u12
|
||||
movq_r2r (mm1, mm7); // mm7 = u12
|
||||
psubsw_r2r (mm7, mm1); // mm1 = u12
|
||||
movq_r2r (mm1, mm7); // mm7 = u12
|
||||
|
||||
movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0
|
||||
paddsw_r2r (mm5, mm1); // mm1 = u12+v12
|
||||
movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0
|
||||
paddsw_r2r (mm5, mm1); // mm1 = u12+v12
|
||||
|
||||
movq_m2r (*_C4, mm0); // mm0 = C4/2
|
||||
psubsw_r2r (mm5, mm7); // mm7 = u12-v12
|
||||
movq_m2r (*_C4, mm0); // mm0 = C4/2
|
||||
psubsw_r2r (mm5, mm7); // mm7 = u12-v12
|
||||
|
||||
movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1
|
||||
pmulhw_r2r (mm0, mm1); // mm1 = b1/2
|
||||
movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1
|
||||
pmulhw_r2r (mm0, mm1); // mm1 = b1/2
|
||||
|
||||
movq_r2r (mm4, mm6); // mm6 = v26
|
||||
pmulhw_r2r (mm0, mm7); // mm7 = b2/2
|
||||
movq_r2r (mm4, mm6); // mm6 = v26
|
||||
pmulhw_r2r (mm0, mm7); // mm7 = b2/2
|
||||
|
||||
movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4
|
||||
movq_r2r (mm3, mm0); // mm0 = x0
|
||||
movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4
|
||||
movq_r2r (mm3, mm0); // mm0 = x0
|
||||
|
||||
psubsw_r2r (mm5, mm3); // mm3 = v04
|
||||
paddsw_r2r (mm5, mm0); // mm0 = u04
|
||||
psubsw_r2r (mm5, mm3); // mm3 = v04
|
||||
paddsw_r2r (mm5, mm0); // mm0 = u04
|
||||
|
||||
paddsw_r2r (mm3, mm4); // mm4 = a1
|
||||
movq_r2r (mm0, mm5); // mm5 = u04
|
||||
paddsw_r2r (mm3, mm4); // mm4 = a1
|
||||
movq_r2r (mm0, mm5); // mm5 = u04
|
||||
|
||||
psubsw_r2r (mm6, mm3); // mm3 = a2
|
||||
paddsw_r2r (mm2, mm5); // mm5 = a0
|
||||
psubsw_r2r (mm6, mm3); // mm3 = a2
|
||||
paddsw_r2r (mm2, mm5); // mm5 = a0
|
||||
|
||||
paddsw_r2r (mm1, mm1); // mm1 = b1
|
||||
psubsw_r2r (mm2, mm0); // mm0 = a3
|
||||
paddsw_r2r (mm1, mm1); // mm1 = b1
|
||||
psubsw_r2r (mm2, mm0); // mm0 = a3
|
||||
|
||||
paddsw_r2r (mm7, mm7); // mm7 = b2
|
||||
movq_r2r (mm3, mm2); // mm2 = a2
|
||||
paddsw_r2r (mm7, mm7); // mm7 = b2
|
||||
movq_r2r (mm3, mm2); // mm2 = a2
|
||||
|
||||
movq_r2r (mm4, mm6); // mm6 = a1
|
||||
paddsw_r2r (mm7, mm3); // mm3 = a2+b2
|
||||
movq_r2r (mm4, mm6); // mm6 = a1
|
||||
paddsw_r2r (mm7, mm3); // mm3 = a2+b2
|
||||
|
||||
psraw_i2r (COL_SHIFT, mm3); // mm3 = y2
|
||||
paddsw_r2r (mm1, mm4); // mm4 = a1+b1
|
||||
psraw_i2r (COL_SHIFT, mm3); // mm3 = y2
|
||||
paddsw_r2r (mm1, mm4); // mm4 = a1+b1
|
||||
|
||||
psraw_i2r (COL_SHIFT, mm4); // mm4 = y1
|
||||
psubsw_r2r (mm1, mm6); // mm6 = a1-b1
|
||||
psraw_i2r (COL_SHIFT, mm4); // mm4 = y1
|
||||
psubsw_r2r (mm1, mm6); // mm6 = a1-b1
|
||||
|
||||
movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0
|
||||
psubsw_r2r (mm7, mm2); // mm2 = a2-b2
|
||||
movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0
|
||||
psubsw_r2r (mm7, mm2); // mm2 = a2-b2
|
||||
|
||||
psraw_i2r (COL_SHIFT, mm6); // mm6 = y6
|
||||
movq_r2r (mm5, mm7); // mm7 = a0
|
||||
psraw_i2r (COL_SHIFT, mm6); // mm6 = y6
|
||||
movq_r2r (mm5, mm7); // mm7 = a0
|
||||
|
||||
movq_r2m (mm4, *(col+offset+1*8)); // save y1
|
||||
psraw_i2r (COL_SHIFT, mm2); // mm2 = y5
|
||||
movq_r2m (mm4, *(col+offset+1*8)); // save y1
|
||||
psraw_i2r (COL_SHIFT, mm2); // mm2 = y5
|
||||
|
||||
movq_r2m (mm3, *(col+offset+2*8)); // save y2
|
||||
paddsw_r2r (mm1, mm5); // mm5 = a0+b0
|
||||
movq_r2m (mm3, *(col+offset+2*8)); // save y2
|
||||
paddsw_r2r (mm1, mm5); // mm5 = a0+b0
|
||||
|
||||
movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3
|
||||
psubsw_r2r (mm1, mm7); // mm7 = a0-b0
|
||||
movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3
|
||||
psubsw_r2r (mm1, mm7); // mm7 = a0-b0
|
||||
|
||||
psraw_i2r (COL_SHIFT, mm5); // mm5 = y0
|
||||
movq_r2r (mm0, mm3); // mm3 = a3
|
||||
psraw_i2r (COL_SHIFT, mm5); // mm5 = y0
|
||||
movq_r2r (mm0, mm3); // mm3 = a3
|
||||
|
||||
movq_r2m (mm2, *(col+offset+5*8)); // save y5
|
||||
psubsw_r2r (mm4, mm3); // mm3 = a3-b3
|
||||
movq_r2m (mm2, *(col+offset+5*8)); // save y5
|
||||
psubsw_r2r (mm4, mm3); // mm3 = a3-b3
|
||||
|
||||
psraw_i2r (COL_SHIFT, mm7); // mm7 = y7
|
||||
paddsw_r2r (mm0, mm4); // mm4 = a3+b3
|
||||
psraw_i2r (COL_SHIFT, mm7); // mm7 = y7
|
||||
paddsw_r2r (mm0, mm4); // mm4 = a3+b3
|
||||
|
||||
movq_r2m (mm5, *(col+offset+0*8)); // save y0
|
||||
psraw_i2r (COL_SHIFT, mm3); // mm3 = y4
|
||||
movq_r2m (mm5, *(col+offset+0*8)); // save y0
|
||||
psraw_i2r (COL_SHIFT, mm3); // mm3 = y4
|
||||
|
||||
movq_r2m (mm6, *(col+offset+6*8)); // save y6
|
||||
psraw_i2r (COL_SHIFT, mm4); // mm4 = y3
|
||||
movq_r2m (mm6, *(col+offset+6*8)); // save y6
|
||||
psraw_i2r (COL_SHIFT, mm4); // mm4 = y3
|
||||
|
||||
movq_r2m (mm7, *(col+offset+7*8)); // save y7
|
||||
movq_r2m (mm7, *(col+offset+7*8)); // save y7
|
||||
|
||||
movq_r2m (mm3, *(col+offset+4*8)); // save y4
|
||||
movq_r2m (mm3, *(col+offset+4*8)); // save y4
|
||||
|
||||
movq_r2m (mm4, *(col+offset+3*8)); // save y3
|
||||
movq_r2m (mm4, *(col+offset+3*8)); // save y3
|
||||
|
||||
#undef T1
|
||||
#undef T2
|
||||
@ -540,61 +540,61 @@ static const int32_t rounder0[] ATTR_ALIGN(8) =
|
||||
rounder ((1 << (COL_SHIFT - 1)) - 0.5);
|
||||
static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
|
||||
static const int32_t rounder1[] ATTR_ALIGN(8) =
|
||||
rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
|
||||
rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
|
||||
static const int32_t rounder7[] ATTR_ALIGN(8) =
|
||||
rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
|
||||
rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
|
||||
static const int32_t rounder2[] ATTR_ALIGN(8) =
|
||||
rounder (0.60355339059); /* C2 * (C6+C2)/2 */
|
||||
rounder (0.60355339059); /* C2 * (C6+C2)/2 */
|
||||
static const int32_t rounder6[] ATTR_ALIGN(8) =
|
||||
rounder (-0.25); /* C2 * (C6-C2)/2 */
|
||||
rounder (-0.25); /* C2 * (C6-C2)/2 */
|
||||
static const int32_t rounder3[] ATTR_ALIGN(8) =
|
||||
rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
|
||||
rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
|
||||
static const int32_t rounder5[] ATTR_ALIGN(8) =
|
||||
rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
|
||||
rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
|
||||
|
||||
#undef COL_SHIFT
|
||||
#undef ROW_SHIFT
|
||||
|
||||
#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
|
||||
void idct (int16_t * block) \
|
||||
{ \
|
||||
static const int16_t table04[] ATTR_ALIGN(16) = \
|
||||
table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
|
||||
static const int16_t table17[] ATTR_ALIGN(16) = \
|
||||
table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
|
||||
static const int16_t table26[] ATTR_ALIGN(16) = \
|
||||
table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
|
||||
static const int16_t table35[] ATTR_ALIGN(16) = \
|
||||
table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
|
||||
\
|
||||
idct_row_head (block, 0*8, table04); \
|
||||
idct_row (table04, rounder0); \
|
||||
idct_row_mid (block, 0*8, 4*8, table04); \
|
||||
idct_row (table04, rounder4); \
|
||||
idct_row_mid (block, 4*8, 1*8, table17); \
|
||||
idct_row (table17, rounder1); \
|
||||
idct_row_mid (block, 1*8, 7*8, table17); \
|
||||
idct_row (table17, rounder7); \
|
||||
idct_row_mid (block, 7*8, 2*8, table26); \
|
||||
idct_row (table26, rounder2); \
|
||||
idct_row_mid (block, 2*8, 6*8, table26); \
|
||||
idct_row (table26, rounder6); \
|
||||
idct_row_mid (block, 6*8, 3*8, table35); \
|
||||
idct_row (table35, rounder3); \
|
||||
idct_row_mid (block, 3*8, 5*8, table35); \
|
||||
idct_row (table35, rounder5); \
|
||||
idct_row_tail (block, 5*8); \
|
||||
\
|
||||
idct_col (block, 0); \
|
||||
idct_col (block, 4); \
|
||||
#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
|
||||
void idct (int16_t * block) \
|
||||
{ \
|
||||
static const int16_t table04[] ATTR_ALIGN(16) = \
|
||||
table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
|
||||
static const int16_t table17[] ATTR_ALIGN(16) = \
|
||||
table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
|
||||
static const int16_t table26[] ATTR_ALIGN(16) = \
|
||||
table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
|
||||
static const int16_t table35[] ATTR_ALIGN(16) = \
|
||||
table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
|
||||
\
|
||||
idct_row_head (block, 0*8, table04); \
|
||||
idct_row (table04, rounder0); \
|
||||
idct_row_mid (block, 0*8, 4*8, table04); \
|
||||
idct_row (table04, rounder4); \
|
||||
idct_row_mid (block, 4*8, 1*8, table17); \
|
||||
idct_row (table17, rounder1); \
|
||||
idct_row_mid (block, 1*8, 7*8, table17); \
|
||||
idct_row (table17, rounder7); \
|
||||
idct_row_mid (block, 7*8, 2*8, table26); \
|
||||
idct_row (table26, rounder2); \
|
||||
idct_row_mid (block, 2*8, 6*8, table26); \
|
||||
idct_row (table26, rounder6); \
|
||||
idct_row_mid (block, 6*8, 3*8, table35); \
|
||||
idct_row (table35, rounder3); \
|
||||
idct_row_mid (block, 3*8, 5*8, table35); \
|
||||
idct_row (table35, rounder5); \
|
||||
idct_row_tail (block, 5*8); \
|
||||
\
|
||||
idct_col (block, 0); \
|
||||
idct_col (block, 4); \
|
||||
}
|
||||
|
||||
void ff_mmx_idct(DCTELEM *block);
|
||||
void ff_mmxext_idct(DCTELEM *block);
|
||||
|
||||
declare_idct (ff_mmxext_idct, mmxext_table,
|
||||
mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
|
||||
mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
|
||||
|
||||
declare_idct (ff_mmx_idct, mmx_table,
|
||||
mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
|
||||
mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
|
||||
|
||||
|
@ -27,257 +27,257 @@
|
||||
* values by ULL, lest they be truncated by the compiler)
|
||||
*/
|
||||
|
||||
typedef union {
|
||||
long long q; /* Quadword (64-bit) value */
|
||||
unsigned long long uq; /* Unsigned Quadword */
|
||||
int d[2]; /* 2 Doubleword (32-bit) values */
|
||||
unsigned int ud[2]; /* 2 Unsigned Doubleword */
|
||||
short w[4]; /* 4 Word (16-bit) values */
|
||||
unsigned short uw[4]; /* 4 Unsigned Word */
|
||||
char b[8]; /* 8 Byte (8-bit) values */
|
||||
unsigned char ub[8]; /* 8 Unsigned Byte */
|
||||
float s[2]; /* Single-precision (32-bit) value */
|
||||
} mmx_t; /* On an 8-byte (64-bit) boundary */
|
||||
typedef union {
|
||||
long long q; /* Quadword (64-bit) value */
|
||||
unsigned long long uq; /* Unsigned Quadword */
|
||||
int d[2]; /* 2 Doubleword (32-bit) values */
|
||||
unsigned int ud[2]; /* 2 Unsigned Doubleword */
|
||||
short w[4]; /* 4 Word (16-bit) values */
|
||||
unsigned short uw[4]; /* 4 Unsigned Word */
|
||||
char b[8]; /* 8 Byte (8-bit) values */
|
||||
unsigned char ub[8]; /* 8 Unsigned Byte */
|
||||
float s[2]; /* Single-precision (32-bit) value */
|
||||
} mmx_t; /* On an 8-byte (64-bit) boundary */
|
||||
|
||||
|
||||
#define mmx_i2r(op,imm,reg) \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
: "i" (imm) )
|
||||
#define mmx_i2r(op,imm,reg) \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
: "i" (imm) )
|
||||
|
||||
#define mmx_m2r(op,mem,reg) \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
: "m" (mem))
|
||||
#define mmx_m2r(op,mem,reg) \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
: "m" (mem))
|
||||
|
||||
#define mmx_r2m(op,reg,mem) \
|
||||
__asm__ __volatile__ (#op " %%" #reg ", %0" \
|
||||
: "=m" (mem) \
|
||||
: /* nothing */ )
|
||||
#define mmx_r2m(op,reg,mem) \
|
||||
__asm__ __volatile__ (#op " %%" #reg ", %0" \
|
||||
: "=m" (mem) \
|
||||
: /* nothing */ )
|
||||
|
||||
#define mmx_r2r(op,regs,regd) \
|
||||
__asm__ __volatile__ (#op " %" #regs ", %" #regd)
|
||||
#define mmx_r2r(op,regs,regd) \
|
||||
__asm__ __volatile__ (#op " %" #regs ", %" #regd)
|
||||
|
||||
|
||||
#define emms() __asm__ __volatile__ ("emms")
|
||||
#define emms() __asm__ __volatile__ ("emms")
|
||||
|
||||
#define movd_m2r(var,reg) mmx_m2r (movd, var, reg)
|
||||
#define movd_r2m(reg,var) mmx_r2m (movd, reg, var)
|
||||
#define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd)
|
||||
#define movd_m2r(var,reg) mmx_m2r (movd, var, reg)
|
||||
#define movd_r2m(reg,var) mmx_r2m (movd, reg, var)
|
||||
#define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd)
|
||||
|
||||
#define movq_m2r(var,reg) mmx_m2r (movq, var, reg)
|
||||
#define movq_r2m(reg,var) mmx_r2m (movq, reg, var)
|
||||
#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd)
|
||||
#define movq_m2r(var,reg) mmx_m2r (movq, var, reg)
|
||||
#define movq_r2m(reg,var) mmx_r2m (movq, reg, var)
|
||||
#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd)
|
||||
|
||||
#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg)
|
||||
#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd)
|
||||
#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg)
|
||||
#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd)
|
||||
#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg)
|
||||
#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd)
|
||||
#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg)
|
||||
#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd)
|
||||
|
||||
#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg)
|
||||
#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd)
|
||||
#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg)
|
||||
#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd)
|
||||
|
||||
#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg)
|
||||
#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd)
|
||||
#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg)
|
||||
#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd)
|
||||
#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg)
|
||||
#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd)
|
||||
#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg)
|
||||
#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd)
|
||||
#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg)
|
||||
#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd)
|
||||
#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg)
|
||||
#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd)
|
||||
|
||||
#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg)
|
||||
#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd)
|
||||
#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg)
|
||||
#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd)
|
||||
#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg)
|
||||
#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd)
|
||||
#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg)
|
||||
#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd)
|
||||
|
||||
#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg)
|
||||
#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd)
|
||||
#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg)
|
||||
#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd)
|
||||
#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg)
|
||||
#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd)
|
||||
#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg)
|
||||
#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd)
|
||||
|
||||
#define pand_m2r(var,reg) mmx_m2r (pand, var, reg)
|
||||
#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd)
|
||||
#define pand_m2r(var,reg) mmx_m2r (pand, var, reg)
|
||||
#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd)
|
||||
|
||||
#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg)
|
||||
#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd)
|
||||
#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg)
|
||||
#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd)
|
||||
|
||||
#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg)
|
||||
#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd)
|
||||
#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg)
|
||||
#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd)
|
||||
#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg)
|
||||
#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd)
|
||||
#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg)
|
||||
#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd)
|
||||
#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg)
|
||||
#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd)
|
||||
#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg)
|
||||
#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd)
|
||||
|
||||
#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg)
|
||||
#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd)
|
||||
#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg)
|
||||
#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd)
|
||||
#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg)
|
||||
#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd)
|
||||
#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg)
|
||||
#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd)
|
||||
#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg)
|
||||
#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd)
|
||||
#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg)
|
||||
#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd)
|
||||
|
||||
#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg)
|
||||
#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd)
|
||||
#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg)
|
||||
#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd)
|
||||
|
||||
#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg)
|
||||
#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd)
|
||||
#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg)
|
||||
#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd)
|
||||
|
||||
#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg)
|
||||
#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd)
|
||||
#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg)
|
||||
#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd)
|
||||
|
||||
#define por_m2r(var,reg) mmx_m2r (por, var, reg)
|
||||
#define por_r2r(regs,regd) mmx_r2r (por, regs, regd)
|
||||
#define por_m2r(var,reg) mmx_m2r (por, var, reg)
|
||||
#define por_r2r(regs,regd) mmx_r2r (por, regs, regd)
|
||||
|
||||
#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg)
|
||||
#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg)
|
||||
#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd)
|
||||
#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg)
|
||||
#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg)
|
||||
#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd)
|
||||
#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg)
|
||||
#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg)
|
||||
#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd)
|
||||
#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg)
|
||||
#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg)
|
||||
#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd)
|
||||
#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg)
|
||||
#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg)
|
||||
#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd)
|
||||
#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg)
|
||||
#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg)
|
||||
#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd)
|
||||
|
||||
#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg)
|
||||
#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg)
|
||||
#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd)
|
||||
#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg)
|
||||
#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg)
|
||||
#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd)
|
||||
#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg)
|
||||
#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg)
|
||||
#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd)
|
||||
#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg)
|
||||
#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg)
|
||||
#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd)
|
||||
|
||||
#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg)
|
||||
#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg)
|
||||
#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd)
|
||||
#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg)
|
||||
#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg)
|
||||
#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd)
|
||||
#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg)
|
||||
#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg)
|
||||
#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd)
|
||||
#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg)
|
||||
#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg)
|
||||
#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd)
|
||||
#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg)
|
||||
#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg)
|
||||
#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd)
|
||||
#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg)
|
||||
#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg)
|
||||
#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd)
|
||||
|
||||
#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg)
|
||||
#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd)
|
||||
#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg)
|
||||
#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd)
|
||||
#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg)
|
||||
#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd)
|
||||
#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg)
|
||||
#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd)
|
||||
#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg)
|
||||
#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd)
|
||||
#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg)
|
||||
#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd)
|
||||
|
||||
#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg)
|
||||
#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd)
|
||||
#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg)
|
||||
#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd)
|
||||
#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg)
|
||||
#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd)
|
||||
#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg)
|
||||
#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd)
|
||||
|
||||
#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg)
|
||||
#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd)
|
||||
#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg)
|
||||
#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd)
|
||||
#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg)
|
||||
#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd)
|
||||
#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg)
|
||||
#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd)
|
||||
|
||||
#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg)
|
||||
#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd)
|
||||
#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg)
|
||||
#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd)
|
||||
#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg)
|
||||
#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd)
|
||||
#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg)
|
||||
#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd)
|
||||
#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg)
|
||||
#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd)
|
||||
#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg)
|
||||
#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd)
|
||||
|
||||
#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg)
|
||||
#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd)
|
||||
#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg)
|
||||
#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd)
|
||||
#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg)
|
||||
#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd)
|
||||
#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg)
|
||||
#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd)
|
||||
#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg)
|
||||
#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd)
|
||||
#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg)
|
||||
#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd)
|
||||
|
||||
#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg)
|
||||
#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd)
|
||||
#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg)
|
||||
#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd)
|
||||
|
||||
|
||||
/* 3DNOW extensions */
|
||||
|
||||
#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg)
|
||||
#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd)
|
||||
#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg)
|
||||
#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd)
|
||||
|
||||
|
||||
/* AMD MMX extensions - also available in intel SSE */
|
||||
|
||||
|
||||
#define mmx_m2ri(op,mem,reg,imm) \
|
||||
#define mmx_m2ri(op,mem,reg,imm) \
|
||||
__asm__ __volatile__ (#op " %1, %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
: "X" (mem), "X" (imm))
|
||||
#define mmx_r2ri(op,regs,regd,imm) \
|
||||
#define mmx_r2ri(op,regs,regd,imm) \
|
||||
__asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
|
||||
: /* nothing */ \
|
||||
: "X" (imm) )
|
||||
|
||||
#define mmx_fetch(mem,hint) \
|
||||
__asm__ __volatile__ ("prefetch" #hint " %0" \
|
||||
: /* nothing */ \
|
||||
: "X" (mem))
|
||||
#define mmx_fetch(mem,hint) \
|
||||
__asm__ __volatile__ ("prefetch" #hint " %0" \
|
||||
: /* nothing */ \
|
||||
: "X" (mem))
|
||||
|
||||
|
||||
#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg)
|
||||
#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg)
|
||||
|
||||
#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var)
|
||||
#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var)
|
||||
|
||||
#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg)
|
||||
#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd)
|
||||
#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg)
|
||||
#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd)
|
||||
#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg)
|
||||
#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd)
|
||||
#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg)
|
||||
#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd)
|
||||
|
||||
#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm)
|
||||
#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm)
|
||||
|
||||
#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm)
|
||||
#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm)
|
||||
|
||||
#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg)
|
||||
#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd)
|
||||
#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg)
|
||||
#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd)
|
||||
|
||||
#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg)
|
||||
#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd)
|
||||
#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg)
|
||||
#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd)
|
||||
|
||||
#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg)
|
||||
#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd)
|
||||
#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg)
|
||||
#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd)
|
||||
|
||||
#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg)
|
||||
#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd)
|
||||
#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg)
|
||||
#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd)
|
||||
|
||||
#define pmovmskb(mmreg,reg) \
|
||||
__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
|
||||
#define pmovmskb(mmreg,reg) \
|
||||
__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
|
||||
|
||||
#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg)
|
||||
#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd)
|
||||
#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg)
|
||||
#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd)
|
||||
|
||||
#define prefetcht0(mem) mmx_fetch (mem, t0)
|
||||
#define prefetcht1(mem) mmx_fetch (mem, t1)
|
||||
#define prefetcht2(mem) mmx_fetch (mem, t2)
|
||||
#define prefetchnta(mem) mmx_fetch (mem, nta)
|
||||
#define prefetcht0(mem) mmx_fetch (mem, t0)
|
||||
#define prefetcht1(mem) mmx_fetch (mem, t1)
|
||||
#define prefetcht2(mem) mmx_fetch (mem, t2)
|
||||
#define prefetchnta(mem) mmx_fetch (mem, nta)
|
||||
|
||||
#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg)
|
||||
#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd)
|
||||
#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg)
|
||||
#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd)
|
||||
|
||||
#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm)
|
||||
#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm)
|
||||
#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm)
|
||||
#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm)
|
||||
|
||||
#define sfence() __asm__ __volatile__ ("sfence\n\t")
|
||||
#define sfence() __asm__ __volatile__ ("sfence\n\t")
|
||||
|
||||
/* SSE2 */
|
||||
#define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm)
|
||||
#define pshufhw_r2r(regs,regd,imm) mmx_r2ri(pshufhw, regs, regd, imm)
|
||||
#define pshuflw_m2r(var,reg,imm) mmx_m2ri(pshuflw, var, reg, imm)
|
||||
#define pshuflw_r2r(regs,regd,imm) mmx_r2ri(pshuflw, regs, regd, imm)
|
||||
#define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm)
|
||||
#define pshufhw_r2r(regs,regd,imm) mmx_r2ri(pshufhw, regs, regd, imm)
|
||||
#define pshuflw_m2r(var,reg,imm) mmx_m2ri(pshuflw, var, reg, imm)
|
||||
#define pshuflw_r2r(regs,regd,imm) mmx_r2ri(pshuflw, regs, regd, imm)
|
||||
|
||||
#define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm)
|
||||
#define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm)
|
||||
|
||||
#define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg)
|
||||
#define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var)
|
||||
#define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd)
|
||||
#define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg)
|
||||
#define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var)
|
||||
#define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd)
|
||||
#define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg)
|
||||
#define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var)
|
||||
#define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd)
|
||||
#define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg)
|
||||
#define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var)
|
||||
#define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd)
|
||||
|
||||
#define pmullw_r2m(reg,var) mmx_r2m (pmullw, reg, var)
|
||||
#define pmullw_r2m(reg,var) mmx_r2m (pmullw, reg, var)
|
||||
|
||||
#define pslldq_i2r(imm,reg) mmx_i2r (pslldq, imm, reg)
|
||||
#define psrldq_i2r(imm,reg) mmx_i2r (psrldq, imm, reg)
|
||||
#define pslldq_i2r(imm,reg) mmx_i2r (pslldq, imm, reg)
|
||||
#define psrldq_i2r(imm,reg) mmx_i2r (psrldq, imm, reg)
|
||||
|
||||
#define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd)
|
||||
#define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd)
|
||||
#define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd)
|
||||
#define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd)
|
||||
|
||||
|
||||
#endif /* AVCODEC_I386MMX_H */
|
||||
|
@ -34,33 +34,33 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
|
||||
{
|
||||
long len= -(stride*h);
|
||||
asm volatile(
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm2 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm4 \n\t"
|
||||
"add %3, %%"REG_a" \n\t"
|
||||
"psubusb %%mm0, %%mm2 \n\t"
|
||||
"psubusb %%mm4, %%mm0 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm5 \n\t"
|
||||
"psubusb %%mm1, %%mm3 \n\t"
|
||||
"psubusb %%mm5, %%mm1 \n\t"
|
||||
"por %%mm2, %%mm0 \n\t"
|
||||
"por %%mm1, %%mm3 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm3, %%mm2 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"
|
||||
"punpckhbw %%mm7, %%mm2 \n\t"
|
||||
"paddw %%mm1, %%mm0 \n\t"
|
||||
"paddw %%mm3, %%mm2 \n\t"
|
||||
"paddw %%mm2, %%mm0 \n\t"
|
||||
"paddw %%mm0, %%mm6 \n\t"
|
||||
"add %3, %%"REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm2 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm4 \n\t"
|
||||
"add %3, %%"REG_a" \n\t"
|
||||
"psubusb %%mm0, %%mm2 \n\t"
|
||||
"psubusb %%mm4, %%mm0 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm5 \n\t"
|
||||
"psubusb %%mm1, %%mm3 \n\t"
|
||||
"psubusb %%mm5, %%mm1 \n\t"
|
||||
"por %%mm2, %%mm0 \n\t"
|
||||
"por %%mm1, %%mm3 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm3, %%mm2 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"
|
||||
"punpckhbw %%mm7, %%mm2 \n\t"
|
||||
"paddw %%mm1, %%mm0 \n\t"
|
||||
"paddw %%mm3, %%mm2 \n\t"
|
||||
"paddw %%mm2, %%mm0 \n\t"
|
||||
"paddw %%mm0, %%mm6 \n\t"
|
||||
"add %3, %%"REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
: "+a" (len)
|
||||
: "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride)
|
||||
);
|
||||
@ -70,19 +70,19 @@ static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
|
||||
{
|
||||
long len= -(stride*h);
|
||||
asm volatile(
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm2 \n\t"
|
||||
"psadbw %%mm2, %%mm0 \n\t"
|
||||
"add %3, %%"REG_a" \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"psadbw %%mm1, %%mm3 \n\t"
|
||||
"paddw %%mm3, %%mm0 \n\t"
|
||||
"paddw %%mm0, %%mm6 \n\t"
|
||||
"add %3, %%"REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm2 \n\t"
|
||||
"psadbw %%mm2, %%mm0 \n\t"
|
||||
"add %3, %%"REG_a" \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"psadbw %%mm1, %%mm3 \n\t"
|
||||
"paddw %%mm3, %%mm0 \n\t"
|
||||
"paddw %%mm0, %%mm6 \n\t"
|
||||
"add %3, %%"REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
: "+a" (len)
|
||||
: "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride)
|
||||
);
|
||||
@ -92,23 +92,23 @@ static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, in
|
||||
{
|
||||
long len= -(stride*h);
|
||||
asm volatile(
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm2 \n\t"
|
||||
"pavgb %%mm2, %%mm0 \n\t"
|
||||
"movq (%3, %%"REG_a"), %%mm2 \n\t"
|
||||
"psadbw %%mm2, %%mm0 \n\t"
|
||||
"add %4, %%"REG_a" \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"pavgb %%mm1, %%mm3 \n\t"
|
||||
"movq (%3, %%"REG_a"), %%mm1 \n\t"
|
||||
"psadbw %%mm1, %%mm3 \n\t"
|
||||
"paddw %%mm3, %%mm0 \n\t"
|
||||
"paddw %%mm0, %%mm6 \n\t"
|
||||
"add %4, %%"REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm2 \n\t"
|
||||
"pavgb %%mm2, %%mm0 \n\t"
|
||||
"movq (%3, %%"REG_a"), %%mm2 \n\t"
|
||||
"psadbw %%mm2, %%mm0 \n\t"
|
||||
"add %4, %%"REG_a" \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"pavgb %%mm1, %%mm3 \n\t"
|
||||
"movq (%3, %%"REG_a"), %%mm1 \n\t"
|
||||
"psadbw %%mm1, %%mm3 \n\t"
|
||||
"paddw %%mm3, %%mm0 \n\t"
|
||||
"paddw %%mm0, %%mm6 \n\t"
|
||||
"add %4, %%"REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
: "+a" (len)
|
||||
: "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride)
|
||||
);
|
||||
@ -118,34 +118,34 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
|
||||
{ //FIXME reuse src
|
||||
long len= -(stride*h);
|
||||
asm volatile(
|
||||
".balign 16 \n\t"
|
||||
"movq "MANGLE(bone)", %%mm5 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm2 \n\t"
|
||||
"movq 1(%1, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq 1(%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"pavgb %%mm2, %%mm0 \n\t"
|
||||
"pavgb %%mm1, %%mm3 \n\t"
|
||||
"psubusb %%mm5, %%mm3 \n\t"
|
||||
"pavgb %%mm3, %%mm0 \n\t"
|
||||
"movq (%3, %%"REG_a"), %%mm2 \n\t"
|
||||
"psadbw %%mm2, %%mm0 \n\t"
|
||||
"add %4, %%"REG_a" \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
|
||||
"movq 1(%2, %%"REG_a"), %%mm4 \n\t"
|
||||
"pavgb %%mm3, %%mm1 \n\t"
|
||||
"pavgb %%mm4, %%mm2 \n\t"
|
||||
"psubusb %%mm5, %%mm2 \n\t"
|
||||
"pavgb %%mm1, %%mm2 \n\t"
|
||||
"movq (%3, %%"REG_a"), %%mm1 \n\t"
|
||||
"psadbw %%mm1, %%mm2 \n\t"
|
||||
"paddw %%mm2, %%mm0 \n\t"
|
||||
"paddw %%mm0, %%mm6 \n\t"
|
||||
"add %4, %%"REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
".balign 16 \n\t"
|
||||
"movq "MANGLE(bone)", %%mm5 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm2 \n\t"
|
||||
"movq 1(%1, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq 1(%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"pavgb %%mm2, %%mm0 \n\t"
|
||||
"pavgb %%mm1, %%mm3 \n\t"
|
||||
"psubusb %%mm5, %%mm3 \n\t"
|
||||
"pavgb %%mm3, %%mm0 \n\t"
|
||||
"movq (%3, %%"REG_a"), %%mm2 \n\t"
|
||||
"psadbw %%mm2, %%mm0 \n\t"
|
||||
"add %4, %%"REG_a" \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
|
||||
"movq 1(%2, %%"REG_a"), %%mm4 \n\t"
|
||||
"pavgb %%mm3, %%mm1 \n\t"
|
||||
"pavgb %%mm4, %%mm2 \n\t"
|
||||
"psubusb %%mm5, %%mm2 \n\t"
|
||||
"pavgb %%mm1, %%mm2 \n\t"
|
||||
"movq (%3, %%"REG_a"), %%mm1 \n\t"
|
||||
"psadbw %%mm1, %%mm2 \n\t"
|
||||
"paddw %%mm2, %%mm0 \n\t"
|
||||
"paddw %%mm0, %%mm6 \n\t"
|
||||
"add %4, %%"REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
: "+a" (len)
|
||||
: "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride)
|
||||
);
|
||||
@ -155,35 +155,35 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int
|
||||
{
|
||||
long len= -(stride*h);
|
||||
asm volatile(
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm2 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"
|
||||
"punpckhbw %%mm7, %%mm2 \n\t"
|
||||
"punpckhbw %%mm7, %%mm3 \n\t"
|
||||
"paddw %%mm0, %%mm1 \n\t"
|
||||
"paddw %%mm2, %%mm3 \n\t"
|
||||
"movq (%3, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq (%3, %%"REG_a"), %%mm2 \n\t"
|
||||
"paddw %%mm5, %%mm1 \n\t"
|
||||
"paddw %%mm5, %%mm3 \n\t"
|
||||
"psrlw $1, %%mm1 \n\t"
|
||||
"psrlw $1, %%mm3 \n\t"
|
||||
"packuswb %%mm3, %%mm1 \n\t"
|
||||
"psubusb %%mm1, %%mm4 \n\t"
|
||||
"psubusb %%mm2, %%mm1 \n\t"
|
||||
"por %%mm4, %%mm1 \n\t"
|
||||
"movq %%mm1, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||
"paddw %%mm1, %%mm0 \n\t"
|
||||
"paddw %%mm0, %%mm6 \n\t"
|
||||
"add %4, %%"REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm2 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"
|
||||
"punpckhbw %%mm7, %%mm2 \n\t"
|
||||
"punpckhbw %%mm7, %%mm3 \n\t"
|
||||
"paddw %%mm0, %%mm1 \n\t"
|
||||
"paddw %%mm2, %%mm3 \n\t"
|
||||
"movq (%3, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq (%3, %%"REG_a"), %%mm2 \n\t"
|
||||
"paddw %%mm5, %%mm1 \n\t"
|
||||
"paddw %%mm5, %%mm3 \n\t"
|
||||
"psrlw $1, %%mm1 \n\t"
|
||||
"psrlw $1, %%mm3 \n\t"
|
||||
"packuswb %%mm3, %%mm1 \n\t"
|
||||
"psubusb %%mm1, %%mm4 \n\t"
|
||||
"psubusb %%mm2, %%mm1 \n\t"
|
||||
"por %%mm4, %%mm1 \n\t"
|
||||
"movq %%mm1, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||
"paddw %%mm1, %%mm0 \n\t"
|
||||
"paddw %%mm0, %%mm6 \n\t"
|
||||
"add %4, %%"REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
: "+a" (len)
|
||||
: "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride)
|
||||
);
|
||||
@ -193,47 +193,47 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
|
||||
{
|
||||
long len= -(stride*h);
|
||||
asm volatile(
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq %%mm0, %%mm4 \n\t"
|
||||
"movq %%mm1, %%mm2 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"
|
||||
"punpckhbw %%mm7, %%mm4 \n\t"
|
||||
"punpckhbw %%mm7, %%mm2 \n\t"
|
||||
"paddw %%mm1, %%mm0 \n\t"
|
||||
"paddw %%mm2, %%mm4 \n\t"
|
||||
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
|
||||
"movq 1(%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"movq %%mm2, %%mm1 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||
"paddw %%mm0, %%mm2 \n\t"
|
||||
"paddw %%mm4, %%mm1 \n\t"
|
||||
"movq %%mm3, %%mm4 \n\t"
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"
|
||||
"punpckhbw %%mm7, %%mm4 \n\t"
|
||||
"paddw %%mm3, %%mm2 \n\t"
|
||||
"paddw %%mm4, %%mm1 \n\t"
|
||||
"movq (%3, %%"REG_a"), %%mm3 \n\t"
|
||||
"movq (%3, %%"REG_a"), %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm2 \n\t"
|
||||
"paddw %%mm5, %%mm1 \n\t"
|
||||
"psrlw $2, %%mm2 \n\t"
|
||||
"psrlw $2, %%mm1 \n\t"
|
||||
"packuswb %%mm1, %%mm2 \n\t"
|
||||
"psubusb %%mm2, %%mm3 \n\t"
|
||||
"psubusb %%mm4, %%mm2 \n\t"
|
||||
"por %%mm3, %%mm2 \n\t"
|
||||
"movq %%mm2, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpckhbw %%mm7, %%mm2 \n\t"
|
||||
"paddw %%mm2, %%mm0 \n\t"
|
||||
"paddw %%mm0, %%mm6 \n\t"
|
||||
"add %4, %%"REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq (%2, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq %%mm0, %%mm4 \n\t"
|
||||
"movq %%mm1, %%mm2 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm1 \n\t"
|
||||
"punpckhbw %%mm7, %%mm4 \n\t"
|
||||
"punpckhbw %%mm7, %%mm2 \n\t"
|
||||
"paddw %%mm1, %%mm0 \n\t"
|
||||
"paddw %%mm2, %%mm4 \n\t"
|
||||
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
|
||||
"movq 1(%2, %%"REG_a"), %%mm3 \n\t"
|
||||
"movq %%mm2, %%mm1 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
"punpckhbw %%mm7, %%mm1 \n\t"
|
||||
"paddw %%mm0, %%mm2 \n\t"
|
||||
"paddw %%mm4, %%mm1 \n\t"
|
||||
"movq %%mm3, %%mm4 \n\t"
|
||||
"punpcklbw %%mm7, %%mm3 \n\t"
|
||||
"punpckhbw %%mm7, %%mm4 \n\t"
|
||||
"paddw %%mm3, %%mm2 \n\t"
|
||||
"paddw %%mm4, %%mm1 \n\t"
|
||||
"movq (%3, %%"REG_a"), %%mm3 \n\t"
|
||||
"movq (%3, %%"REG_a"), %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm2 \n\t"
|
||||
"paddw %%mm5, %%mm1 \n\t"
|
||||
"psrlw $2, %%mm2 \n\t"
|
||||
"psrlw $2, %%mm1 \n\t"
|
||||
"packuswb %%mm1, %%mm2 \n\t"
|
||||
"psubusb %%mm2, %%mm3 \n\t"
|
||||
"psubusb %%mm4, %%mm2 \n\t"
|
||||
"por %%mm3, %%mm2 \n\t"
|
||||
"movq %%mm2, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpckhbw %%mm7, %%mm2 \n\t"
|
||||
"paddw %%mm2, %%mm0 \n\t"
|
||||
"paddw %%mm0, %%mm6 \n\t"
|
||||
"add %4, %%"REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
: "+a" (len)
|
||||
: "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride)
|
||||
);
|
||||
@ -243,13 +243,13 @@ static inline int sum_mmx(void)
|
||||
{
|
||||
int ret;
|
||||
asm volatile(
|
||||
"movq %%mm6, %%mm0 \n\t"
|
||||
"psrlq $32, %%mm6 \n\t"
|
||||
"paddw %%mm0, %%mm6 \n\t"
|
||||
"movq %%mm6, %%mm0 \n\t"
|
||||
"psrlq $16, %%mm6 \n\t"
|
||||
"paddw %%mm0, %%mm6 \n\t"
|
||||
"movd %%mm6, %0 \n\t"
|
||||
"movq %%mm6, %%mm0 \n\t"
|
||||
"psrlq $32, %%mm6 \n\t"
|
||||
"paddw %%mm0, %%mm6 \n\t"
|
||||
"movq %%mm6, %%mm0 \n\t"
|
||||
"psrlq $16, %%mm6 \n\t"
|
||||
"paddw %%mm0, %%mm6 \n\t"
|
||||
"movd %%mm6, %0 \n\t"
|
||||
: "=r" (ret)
|
||||
);
|
||||
return ret&0xFFFF;
|
||||
@ -259,7 +259,7 @@ static inline int sum_mmx2(void)
|
||||
{
|
||||
int ret;
|
||||
asm volatile(
|
||||
"movd %%mm6, %0 \n\t"
|
||||
"movd %%mm6, %0 \n\t"
|
||||
: "=r" (ret)
|
||||
);
|
||||
return ret;
|
||||
@ -270,8 +270,8 @@ static inline int sum_mmx2(void)
|
||||
static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
||||
{\
|
||||
assert(h==8);\
|
||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t":);\
|
||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t":);\
|
||||
\
|
||||
sad8_1_ ## suf(blk1, blk2, stride, 8);\
|
||||
\
|
||||
@ -280,9 +280,9 @@ static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h
|
||||
static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
||||
{\
|
||||
assert(h==8);\
|
||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t"\
|
||||
"movq %0, %%mm5 \n\t"\
|
||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t"\
|
||||
"movq %0, %%mm5 \n\t"\
|
||||
:: "m"(round_tab[1]) \
|
||||
);\
|
||||
\
|
||||
@ -294,9 +294,9 @@ static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in
|
||||
static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
||||
{\
|
||||
assert(h==8);\
|
||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t"\
|
||||
"movq %0, %%mm5 \n\t"\
|
||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t"\
|
||||
"movq %0, %%mm5 \n\t"\
|
||||
:: "m"(round_tab[1]) \
|
||||
);\
|
||||
\
|
||||
@ -308,9 +308,9 @@ static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in
|
||||
static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
||||
{\
|
||||
assert(h==8);\
|
||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t"\
|
||||
"movq %0, %%mm5 \n\t"\
|
||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t"\
|
||||
"movq %0, %%mm5 \n\t"\
|
||||
:: "m"(round_tab[2]) \
|
||||
);\
|
||||
\
|
||||
@ -321,8 +321,8 @@ static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i
|
||||
\
|
||||
static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
||||
{\
|
||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t":);\
|
||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t":);\
|
||||
\
|
||||
sad8_1_ ## suf(blk1 , blk2 , stride, h);\
|
||||
sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\
|
||||
@ -331,9 +331,9 @@ static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int
|
||||
}\
|
||||
static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
||||
{\
|
||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t"\
|
||||
"movq %0, %%mm5 \n\t"\
|
||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t"\
|
||||
"movq %0, %%mm5 \n\t"\
|
||||
:: "m"(round_tab[1]) \
|
||||
);\
|
||||
\
|
||||
@ -344,9 +344,9 @@ static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i
|
||||
}\
|
||||
static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
||||
{\
|
||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t"\
|
||||
"movq %0, %%mm5 \n\t"\
|
||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t"\
|
||||
"movq %0, %%mm5 \n\t"\
|
||||
:: "m"(round_tab[1]) \
|
||||
);\
|
||||
\
|
||||
@ -357,9 +357,9 @@ static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i
|
||||
}\
|
||||
static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
|
||||
{\
|
||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t"\
|
||||
"movq %0, %%mm5 \n\t"\
|
||||
asm volatile("pxor %%mm7, %%mm7 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t"\
|
||||
"movq %0, %%mm5 \n\t"\
|
||||
:: "m"(round_tab[2]) \
|
||||
);\
|
||||
\
|
||||
@ -384,15 +384,15 @@ void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
|
||||
c->pix_abs[1][2] = sad8_y2_mmx;
|
||||
c->pix_abs[1][3] = sad8_xy2_mmx;
|
||||
|
||||
c->sad[0]= sad16_mmx;
|
||||
c->sad[0]= sad16_mmx;
|
||||
c->sad[1]= sad8_mmx;
|
||||
}
|
||||
if (mm_flags & MM_MMXEXT) {
|
||||
c->pix_abs[0][0] = sad16_mmx2;
|
||||
c->pix_abs[1][0] = sad8_mmx2;
|
||||
c->pix_abs[0][0] = sad16_mmx2;
|
||||
c->pix_abs[1][0] = sad8_mmx2;
|
||||
|
||||
c->sad[0]= sad16_mmx2;
|
||||
c->sad[1]= sad8_mmx2;
|
||||
c->sad[0]= sad16_mmx2;
|
||||
c->sad[1]= sad8_mmx2;
|
||||
|
||||
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
||||
c->pix_abs[0][1] = sad16_x2_mmx2;
|
||||
|
@ -57,52 +57,52 @@ static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
|
||||
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
|
||||
//printf("%d %d ", qmul, qadd);
|
||||
asm volatile(
|
||||
"movd %1, %%mm6 \n\t" //qmul
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"movd %2, %%mm5 \n\t" //qadd
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"packssdw %%mm5, %%mm5 \n\t"
|
||||
"packssdw %%mm5, %%mm5 \n\t"
|
||||
"psubw %%mm5, %%mm7 \n\t"
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
".balign 16\n\t"
|
||||
"1: \n\t"
|
||||
"movq (%0, %3), %%mm0 \n\t"
|
||||
"movq 8(%0, %3), %%mm1 \n\t"
|
||||
"movd %1, %%mm6 \n\t" //qmul
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"movd %2, %%mm5 \n\t" //qadd
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"packssdw %%mm5, %%mm5 \n\t"
|
||||
"packssdw %%mm5, %%mm5 \n\t"
|
||||
"psubw %%mm5, %%mm7 \n\t"
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%0, %3), %%mm0 \n\t"
|
||||
"movq 8(%0, %3), %%mm1 \n\t"
|
||||
|
||||
"pmullw %%mm6, %%mm0 \n\t"
|
||||
"pmullw %%mm6, %%mm1 \n\t"
|
||||
"pmullw %%mm6, %%mm0 \n\t"
|
||||
"pmullw %%mm6, %%mm1 \n\t"
|
||||
|
||||
"movq (%0, %3), %%mm2 \n\t"
|
||||
"movq 8(%0, %3), %%mm3 \n\t"
|
||||
"movq (%0, %3), %%mm2 \n\t"
|
||||
"movq 8(%0, %3), %%mm3 \n\t"
|
||||
|
||||
"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
|
||||
"paddw %%mm7, %%mm0 \n\t"
|
||||
"paddw %%mm7, %%mm1 \n\t"
|
||||
"paddw %%mm7, %%mm0 \n\t"
|
||||
"paddw %%mm7, %%mm1 \n\t"
|
||||
|
||||
"pxor %%mm0, %%mm2 \n\t"
|
||||
"pxor %%mm1, %%mm3 \n\t"
|
||||
"pxor %%mm0, %%mm2 \n\t"
|
||||
"pxor %%mm1, %%mm3 \n\t"
|
||||
|
||||
"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
|
||||
"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
|
||||
"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
|
||||
"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
|
||||
|
||||
"pandn %%mm2, %%mm0 \n\t"
|
||||
"pandn %%mm3, %%mm1 \n\t"
|
||||
"pandn %%mm2, %%mm0 \n\t"
|
||||
"pandn %%mm3, %%mm1 \n\t"
|
||||
|
||||
"movq %%mm0, (%0, %3) \n\t"
|
||||
"movq %%mm1, 8(%0, %3) \n\t"
|
||||
"movq %%mm0, (%0, %3) \n\t"
|
||||
"movq %%mm1, 8(%0, %3) \n\t"
|
||||
|
||||
"add $16, %3 \n\t"
|
||||
"jng 1b \n\t"
|
||||
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
|
||||
: "memory"
|
||||
);
|
||||
"add $16, %3 \n\t"
|
||||
"jng 1b \n\t"
|
||||
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
|
||||
: "memory"
|
||||
);
|
||||
block[0]= level;
|
||||
}
|
||||
|
||||
@ -120,52 +120,52 @@ static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
|
||||
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
|
||||
//printf("%d %d ", qmul, qadd);
|
||||
asm volatile(
|
||||
"movd %1, %%mm6 \n\t" //qmul
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"movd %2, %%mm5 \n\t" //qadd
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"packssdw %%mm5, %%mm5 \n\t"
|
||||
"packssdw %%mm5, %%mm5 \n\t"
|
||||
"psubw %%mm5, %%mm7 \n\t"
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
".balign 16\n\t"
|
||||
"1: \n\t"
|
||||
"movq (%0, %3), %%mm0 \n\t"
|
||||
"movq 8(%0, %3), %%mm1 \n\t"
|
||||
"movd %1, %%mm6 \n\t" //qmul
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"movd %2, %%mm5 \n\t" //qadd
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"packssdw %%mm5, %%mm5 \n\t"
|
||||
"packssdw %%mm5, %%mm5 \n\t"
|
||||
"psubw %%mm5, %%mm7 \n\t"
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%0, %3), %%mm0 \n\t"
|
||||
"movq 8(%0, %3), %%mm1 \n\t"
|
||||
|
||||
"pmullw %%mm6, %%mm0 \n\t"
|
||||
"pmullw %%mm6, %%mm1 \n\t"
|
||||
"pmullw %%mm6, %%mm0 \n\t"
|
||||
"pmullw %%mm6, %%mm1 \n\t"
|
||||
|
||||
"movq (%0, %3), %%mm2 \n\t"
|
||||
"movq 8(%0, %3), %%mm3 \n\t"
|
||||
"movq (%0, %3), %%mm2 \n\t"
|
||||
"movq 8(%0, %3), %%mm3 \n\t"
|
||||
|
||||
"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
|
||||
"paddw %%mm7, %%mm0 \n\t"
|
||||
"paddw %%mm7, %%mm1 \n\t"
|
||||
"paddw %%mm7, %%mm0 \n\t"
|
||||
"paddw %%mm7, %%mm1 \n\t"
|
||||
|
||||
"pxor %%mm0, %%mm2 \n\t"
|
||||
"pxor %%mm1, %%mm3 \n\t"
|
||||
"pxor %%mm0, %%mm2 \n\t"
|
||||
"pxor %%mm1, %%mm3 \n\t"
|
||||
|
||||
"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
|
||||
"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
|
||||
"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
|
||||
"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
|
||||
|
||||
"pandn %%mm2, %%mm0 \n\t"
|
||||
"pandn %%mm3, %%mm1 \n\t"
|
||||
"pandn %%mm2, %%mm0 \n\t"
|
||||
"pandn %%mm3, %%mm1 \n\t"
|
||||
|
||||
"movq %%mm0, (%0, %3) \n\t"
|
||||
"movq %%mm1, 8(%0, %3) \n\t"
|
||||
"movq %%mm0, (%0, %3) \n\t"
|
||||
"movq %%mm1, 8(%0, %3) \n\t"
|
||||
|
||||
"add $16, %3 \n\t"
|
||||
"jng 1b \n\t"
|
||||
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
|
||||
: "memory"
|
||||
);
|
||||
"add $16, %3 \n\t"
|
||||
"jng 1b \n\t"
|
||||
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@ -216,54 +216,54 @@ static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
|
||||
/* XXX: only mpeg1 */
|
||||
quant_matrix = s->intra_matrix;
|
||||
asm volatile(
|
||||
"pcmpeqw %%mm7, %%mm7 \n\t"
|
||||
"psrlw $15, %%mm7 \n\t"
|
||||
"movd %2, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"mov %3, %%"REG_a" \n\t"
|
||||
".balign 16\n\t"
|
||||
"1: \n\t"
|
||||
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
|
||||
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
|
||||
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
|
||||
"pxor %%mm2, %%mm2 \n\t"
|
||||
"pxor %%mm3, %%mm3 \n\t"
|
||||
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
|
||||
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
|
||||
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
|
||||
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
|
||||
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
|
||||
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
|
||||
"psraw $3, %%mm0 \n\t"
|
||||
"psraw $3, %%mm1 \n\t"
|
||||
"psubw %%mm7, %%mm0 \n\t"
|
||||
"psubw %%mm7, %%mm1 \n\t"
|
||||
"por %%mm7, %%mm0 \n\t"
|
||||
"por %%mm7, %%mm1 \n\t"
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"psubw %%mm2, %%mm0 \n\t"
|
||||
"psubw %%mm3, %%mm1 \n\t"
|
||||
"pandn %%mm0, %%mm4 \n\t"
|
||||
"pandn %%mm1, %%mm5 \n\t"
|
||||
"movq %%mm4, (%0, %%"REG_a") \n\t"
|
||||
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
|
||||
"pcmpeqw %%mm7, %%mm7 \n\t"
|
||||
"psrlw $15, %%mm7 \n\t"
|
||||
"movd %2, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"mov %3, %%"REG_a" \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
|
||||
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
|
||||
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
|
||||
"pxor %%mm2, %%mm2 \n\t"
|
||||
"pxor %%mm3, %%mm3 \n\t"
|
||||
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
|
||||
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
|
||||
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
|
||||
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
|
||||
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
|
||||
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
|
||||
"psraw $3, %%mm0 \n\t"
|
||||
"psraw $3, %%mm1 \n\t"
|
||||
"psubw %%mm7, %%mm0 \n\t"
|
||||
"psubw %%mm7, %%mm1 \n\t"
|
||||
"por %%mm7, %%mm0 \n\t"
|
||||
"por %%mm7, %%mm1 \n\t"
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"psubw %%mm2, %%mm0 \n\t"
|
||||
"psubw %%mm3, %%mm1 \n\t"
|
||||
"pandn %%mm0, %%mm4 \n\t"
|
||||
"pandn %%mm1, %%mm5 \n\t"
|
||||
"movq %%mm4, (%0, %%"REG_a") \n\t"
|
||||
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
|
||||
|
||||
"add $16, %%"REG_a" \n\t"
|
||||
"js 1b \n\t"
|
||||
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
|
||||
: "%"REG_a, "memory"
|
||||
);
|
||||
"add $16, %%"REG_a" \n\t"
|
||||
"js 1b \n\t"
|
||||
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
|
||||
: "%"REG_a, "memory"
|
||||
);
|
||||
block[0]= block0;
|
||||
}
|
||||
|
||||
@ -279,58 +279,58 @@ static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
|
||||
|
||||
quant_matrix = s->inter_matrix;
|
||||
asm volatile(
|
||||
"pcmpeqw %%mm7, %%mm7 \n\t"
|
||||
"psrlw $15, %%mm7 \n\t"
|
||||
"movd %2, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"mov %3, %%"REG_a" \n\t"
|
||||
".balign 16\n\t"
|
||||
"1: \n\t"
|
||||
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
|
||||
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
|
||||
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
|
||||
"pxor %%mm2, %%mm2 \n\t"
|
||||
"pxor %%mm3, %%mm3 \n\t"
|
||||
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
|
||||
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
|
||||
"paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
|
||||
"paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
|
||||
"paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1
|
||||
"paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1
|
||||
"pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
|
||||
"pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
|
||||
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
|
||||
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
|
||||
"psraw $4, %%mm0 \n\t"
|
||||
"psraw $4, %%mm1 \n\t"
|
||||
"psubw %%mm7, %%mm0 \n\t"
|
||||
"psubw %%mm7, %%mm1 \n\t"
|
||||
"por %%mm7, %%mm0 \n\t"
|
||||
"por %%mm7, %%mm1 \n\t"
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"psubw %%mm2, %%mm0 \n\t"
|
||||
"psubw %%mm3, %%mm1 \n\t"
|
||||
"pandn %%mm0, %%mm4 \n\t"
|
||||
"pandn %%mm1, %%mm5 \n\t"
|
||||
"movq %%mm4, (%0, %%"REG_a") \n\t"
|
||||
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
|
||||
"pcmpeqw %%mm7, %%mm7 \n\t"
|
||||
"psrlw $15, %%mm7 \n\t"
|
||||
"movd %2, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"mov %3, %%"REG_a" \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
|
||||
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
|
||||
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
|
||||
"pxor %%mm2, %%mm2 \n\t"
|
||||
"pxor %%mm3, %%mm3 \n\t"
|
||||
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
|
||||
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
|
||||
"paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
|
||||
"paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
|
||||
"paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1
|
||||
"paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1
|
||||
"pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
|
||||
"pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
|
||||
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
|
||||
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
|
||||
"psraw $4, %%mm0 \n\t"
|
||||
"psraw $4, %%mm1 \n\t"
|
||||
"psubw %%mm7, %%mm0 \n\t"
|
||||
"psubw %%mm7, %%mm1 \n\t"
|
||||
"por %%mm7, %%mm0 \n\t"
|
||||
"por %%mm7, %%mm1 \n\t"
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"psubw %%mm2, %%mm0 \n\t"
|
||||
"psubw %%mm3, %%mm1 \n\t"
|
||||
"pandn %%mm0, %%mm4 \n\t"
|
||||
"pandn %%mm1, %%mm5 \n\t"
|
||||
"movq %%mm4, (%0, %%"REG_a") \n\t"
|
||||
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
|
||||
|
||||
"add $16, %%"REG_a" \n\t"
|
||||
"js 1b \n\t"
|
||||
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
|
||||
: "%"REG_a, "memory"
|
||||
);
|
||||
"add $16, %%"REG_a" \n\t"
|
||||
"js 1b \n\t"
|
||||
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
|
||||
: "%"REG_a, "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
|
||||
@ -351,50 +351,50 @@ static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
|
||||
block0 = block[0] * s->c_dc_scale;
|
||||
quant_matrix = s->intra_matrix;
|
||||
asm volatile(
|
||||
"pcmpeqw %%mm7, %%mm7 \n\t"
|
||||
"psrlw $15, %%mm7 \n\t"
|
||||
"movd %2, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"mov %3, %%"REG_a" \n\t"
|
||||
".balign 16\n\t"
|
||||
"1: \n\t"
|
||||
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
|
||||
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
|
||||
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
|
||||
"pxor %%mm2, %%mm2 \n\t"
|
||||
"pxor %%mm3, %%mm3 \n\t"
|
||||
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
|
||||
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
|
||||
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
|
||||
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
|
||||
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
|
||||
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
|
||||
"psraw $3, %%mm0 \n\t"
|
||||
"psraw $3, %%mm1 \n\t"
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"psubw %%mm2, %%mm0 \n\t"
|
||||
"psubw %%mm3, %%mm1 \n\t"
|
||||
"pandn %%mm0, %%mm4 \n\t"
|
||||
"pandn %%mm1, %%mm5 \n\t"
|
||||
"movq %%mm4, (%0, %%"REG_a") \n\t"
|
||||
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
|
||||
"pcmpeqw %%mm7, %%mm7 \n\t"
|
||||
"psrlw $15, %%mm7 \n\t"
|
||||
"movd %2, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"mov %3, %%"REG_a" \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
|
||||
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
|
||||
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
|
||||
"pxor %%mm2, %%mm2 \n\t"
|
||||
"pxor %%mm3, %%mm3 \n\t"
|
||||
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
|
||||
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
|
||||
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
|
||||
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
|
||||
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
|
||||
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
|
||||
"psraw $3, %%mm0 \n\t"
|
||||
"psraw $3, %%mm1 \n\t"
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"psubw %%mm2, %%mm0 \n\t"
|
||||
"psubw %%mm3, %%mm1 \n\t"
|
||||
"pandn %%mm0, %%mm4 \n\t"
|
||||
"pandn %%mm1, %%mm5 \n\t"
|
||||
"movq %%mm4, (%0, %%"REG_a") \n\t"
|
||||
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
|
||||
|
||||
"add $16, %%"REG_a" \n\t"
|
||||
"jng 1b \n\t"
|
||||
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
|
||||
: "%"REG_a, "memory"
|
||||
);
|
||||
"add $16, %%"REG_a" \n\t"
|
||||
"jng 1b \n\t"
|
||||
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
|
||||
: "%"REG_a, "memory"
|
||||
);
|
||||
block[0]= block0;
|
||||
//Note, we dont do mismatch control for intra as errors cannot accumulate
|
||||
}
|
||||
@ -412,68 +412,68 @@ static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
|
||||
|
||||
quant_matrix = s->inter_matrix;
|
||||
asm volatile(
|
||||
"pcmpeqw %%mm7, %%mm7 \n\t"
|
||||
"psrlq $48, %%mm7 \n\t"
|
||||
"movd %2, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"mov %3, %%"REG_a" \n\t"
|
||||
".balign 16\n\t"
|
||||
"1: \n\t"
|
||||
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
|
||||
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
|
||||
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
|
||||
"pxor %%mm2, %%mm2 \n\t"
|
||||
"pxor %%mm3, %%mm3 \n\t"
|
||||
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
|
||||
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
|
||||
"paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
|
||||
"paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
|
||||
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*2*q
|
||||
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*2*q
|
||||
"paddw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
|
||||
"paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
|
||||
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
|
||||
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
|
||||
"psrlw $4, %%mm0 \n\t"
|
||||
"psrlw $4, %%mm1 \n\t"
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"psubw %%mm2, %%mm0 \n\t"
|
||||
"psubw %%mm3, %%mm1 \n\t"
|
||||
"pandn %%mm0, %%mm4 \n\t"
|
||||
"pandn %%mm1, %%mm5 \n\t"
|
||||
"pxor %%mm4, %%mm7 \n\t"
|
||||
"pxor %%mm5, %%mm7 \n\t"
|
||||
"movq %%mm4, (%0, %%"REG_a") \n\t"
|
||||
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
|
||||
"pcmpeqw %%mm7, %%mm7 \n\t"
|
||||
"psrlq $48, %%mm7 \n\t"
|
||||
"movd %2, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"mov %3, %%"REG_a" \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%0, %%"REG_a"), %%mm0 \n\t"
|
||||
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq (%1, %%"REG_a"), %%mm4 \n\t"
|
||||
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
|
||||
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
|
||||
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
|
||||
"pxor %%mm2, %%mm2 \n\t"
|
||||
"pxor %%mm3, %%mm3 \n\t"
|
||||
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
|
||||
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
|
||||
"paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
|
||||
"paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
|
||||
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*2*q
|
||||
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*2*q
|
||||
"paddw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
|
||||
"paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
|
||||
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
|
||||
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
|
||||
"psrlw $4, %%mm0 \n\t"
|
||||
"psrlw $4, %%mm1 \n\t"
|
||||
"pxor %%mm2, %%mm0 \n\t"
|
||||
"pxor %%mm3, %%mm1 \n\t"
|
||||
"psubw %%mm2, %%mm0 \n\t"
|
||||
"psubw %%mm3, %%mm1 \n\t"
|
||||
"pandn %%mm0, %%mm4 \n\t"
|
||||
"pandn %%mm1, %%mm5 \n\t"
|
||||
"pxor %%mm4, %%mm7 \n\t"
|
||||
"pxor %%mm5, %%mm7 \n\t"
|
||||
"movq %%mm4, (%0, %%"REG_a") \n\t"
|
||||
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
|
||||
|
||||
"add $16, %%"REG_a" \n\t"
|
||||
"jng 1b \n\t"
|
||||
"movd 124(%0, %3), %%mm0 \n\t"
|
||||
"movq %%mm7, %%mm6 \n\t"
|
||||
"psrlq $32, %%mm7 \n\t"
|
||||
"pxor %%mm6, %%mm7 \n\t"
|
||||
"movq %%mm7, %%mm6 \n\t"
|
||||
"psrlq $16, %%mm7 \n\t"
|
||||
"pxor %%mm6, %%mm7 \n\t"
|
||||
"pslld $31, %%mm7 \n\t"
|
||||
"psrlq $15, %%mm7 \n\t"
|
||||
"pxor %%mm7, %%mm0 \n\t"
|
||||
"movd %%mm0, 124(%0, %3) \n\t"
|
||||
"add $16, %%"REG_a" \n\t"
|
||||
"jng 1b \n\t"
|
||||
"movd 124(%0, %3), %%mm0 \n\t"
|
||||
"movq %%mm7, %%mm6 \n\t"
|
||||
"psrlq $32, %%mm7 \n\t"
|
||||
"pxor %%mm6, %%mm7 \n\t"
|
||||
"movq %%mm7, %%mm6 \n\t"
|
||||
"psrlq $16, %%mm7 \n\t"
|
||||
"pxor %%mm6, %%mm7 \n\t"
|
||||
"pslld $31, %%mm7 \n\t"
|
||||
"psrlq $15, %%mm7 \n\t"
|
||||
"pxor %%mm7, %%mm0 \n\t"
|
||||
"movd %%mm0, 124(%0, %3) \n\t"
|
||||
|
||||
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
|
||||
: "%"REG_a, "memory"
|
||||
);
|
||||
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
|
||||
: "%"REG_a, "memory"
|
||||
);
|
||||
}
|
||||
|
||||
/* draw the edges of width 'w' of an image of size width, height
|
||||
@ -488,79 +488,79 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
|
||||
ptr = buf;
|
||||
if(w==8)
|
||||
{
|
||||
asm volatile(
|
||||
"1: \n\t"
|
||||
"movd (%0), %%mm0 \n\t"
|
||||
"punpcklbw %%mm0, %%mm0 \n\t"
|
||||
"punpcklwd %%mm0, %%mm0 \n\t"
|
||||
"punpckldq %%mm0, %%mm0 \n\t"
|
||||
"movq %%mm0, -8(%0) \n\t"
|
||||
"movq -8(%0, %2), %%mm1 \n\t"
|
||||
"punpckhbw %%mm1, %%mm1 \n\t"
|
||||
"punpckhwd %%mm1, %%mm1 \n\t"
|
||||
"punpckhdq %%mm1, %%mm1 \n\t"
|
||||
"movq %%mm1, (%0, %2) \n\t"
|
||||
"add %1, %0 \n\t"
|
||||
"cmp %3, %0 \n\t"
|
||||
" jb 1b \n\t"
|
||||
: "+r" (ptr)
|
||||
: "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
|
||||
);
|
||||
asm volatile(
|
||||
"1: \n\t"
|
||||
"movd (%0), %%mm0 \n\t"
|
||||
"punpcklbw %%mm0, %%mm0 \n\t"
|
||||
"punpcklwd %%mm0, %%mm0 \n\t"
|
||||
"punpckldq %%mm0, %%mm0 \n\t"
|
||||
"movq %%mm0, -8(%0) \n\t"
|
||||
"movq -8(%0, %2), %%mm1 \n\t"
|
||||
"punpckhbw %%mm1, %%mm1 \n\t"
|
||||
"punpckhwd %%mm1, %%mm1 \n\t"
|
||||
"punpckhdq %%mm1, %%mm1 \n\t"
|
||||
"movq %%mm1, (%0, %2) \n\t"
|
||||
"add %1, %0 \n\t"
|
||||
"cmp %3, %0 \n\t"
|
||||
" jb 1b \n\t"
|
||||
: "+r" (ptr)
|
||||
: "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
asm volatile(
|
||||
"1: \n\t"
|
||||
"movd (%0), %%mm0 \n\t"
|
||||
"punpcklbw %%mm0, %%mm0 \n\t"
|
||||
"punpcklwd %%mm0, %%mm0 \n\t"
|
||||
"punpckldq %%mm0, %%mm0 \n\t"
|
||||
"movq %%mm0, -8(%0) \n\t"
|
||||
"movq %%mm0, -16(%0) \n\t"
|
||||
"movq -8(%0, %2), %%mm1 \n\t"
|
||||
"punpckhbw %%mm1, %%mm1 \n\t"
|
||||
"punpckhwd %%mm1, %%mm1 \n\t"
|
||||
"punpckhdq %%mm1, %%mm1 \n\t"
|
||||
"movq %%mm1, (%0, %2) \n\t"
|
||||
"movq %%mm1, 8(%0, %2) \n\t"
|
||||
"add %1, %0 \n\t"
|
||||
"cmp %3, %0 \n\t"
|
||||
" jb 1b \n\t"
|
||||
: "+r" (ptr)
|
||||
: "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
|
||||
);
|
||||
asm volatile(
|
||||
"1: \n\t"
|
||||
"movd (%0), %%mm0 \n\t"
|
||||
"punpcklbw %%mm0, %%mm0 \n\t"
|
||||
"punpcklwd %%mm0, %%mm0 \n\t"
|
||||
"punpckldq %%mm0, %%mm0 \n\t"
|
||||
"movq %%mm0, -8(%0) \n\t"
|
||||
"movq %%mm0, -16(%0) \n\t"
|
||||
"movq -8(%0, %2), %%mm1 \n\t"
|
||||
"punpckhbw %%mm1, %%mm1 \n\t"
|
||||
"punpckhwd %%mm1, %%mm1 \n\t"
|
||||
"punpckhdq %%mm1, %%mm1 \n\t"
|
||||
"movq %%mm1, (%0, %2) \n\t"
|
||||
"movq %%mm1, 8(%0, %2) \n\t"
|
||||
"add %1, %0 \n\t"
|
||||
"cmp %3, %0 \n\t"
|
||||
" jb 1b \n\t"
|
||||
: "+r" (ptr)
|
||||
: "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
|
||||
);
|
||||
}
|
||||
|
||||
for(i=0;i<w;i+=4) {
|
||||
/* top and bottom (and hopefully also the corners) */
|
||||
ptr= buf - (i + 1) * wrap - w;
|
||||
asm volatile(
|
||||
"1: \n\t"
|
||||
"movq (%1, %0), %%mm0 \n\t"
|
||||
"movq %%mm0, (%0) \n\t"
|
||||
"movq %%mm0, (%0, %2) \n\t"
|
||||
"movq %%mm0, (%0, %2, 2) \n\t"
|
||||
"movq %%mm0, (%0, %3) \n\t"
|
||||
"add $8, %0 \n\t"
|
||||
"cmp %4, %0 \n\t"
|
||||
" jb 1b \n\t"
|
||||
: "+r" (ptr)
|
||||
: "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w)
|
||||
);
|
||||
ptr= last_line + (i + 1) * wrap - w;
|
||||
asm volatile(
|
||||
"1: \n\t"
|
||||
"movq (%1, %0), %%mm0 \n\t"
|
||||
"movq %%mm0, (%0) \n\t"
|
||||
"movq %%mm0, (%0, %2) \n\t"
|
||||
"movq %%mm0, (%0, %2, 2) \n\t"
|
||||
"movq %%mm0, (%0, %3) \n\t"
|
||||
"add $8, %0 \n\t"
|
||||
"cmp %4, %0 \n\t"
|
||||
" jb 1b \n\t"
|
||||
: "+r" (ptr)
|
||||
: "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w)
|
||||
);
|
||||
ptr= buf - (i + 1) * wrap - w;
|
||||
asm volatile(
|
||||
"1: \n\t"
|
||||
"movq (%1, %0), %%mm0 \n\t"
|
||||
"movq %%mm0, (%0) \n\t"
|
||||
"movq %%mm0, (%0, %2) \n\t"
|
||||
"movq %%mm0, (%0, %2, 2) \n\t"
|
||||
"movq %%mm0, (%0, %3) \n\t"
|
||||
"add $8, %0 \n\t"
|
||||
"cmp %4, %0 \n\t"
|
||||
" jb 1b \n\t"
|
||||
: "+r" (ptr)
|
||||
: "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w)
|
||||
);
|
||||
ptr= last_line + (i + 1) * wrap - w;
|
||||
asm volatile(
|
||||
"1: \n\t"
|
||||
"movq (%1, %0), %%mm0 \n\t"
|
||||
"movq %%mm0, (%0) \n\t"
|
||||
"movq %%mm0, (%0, %2) \n\t"
|
||||
"movq %%mm0, (%0, %2, 2) \n\t"
|
||||
"movq %%mm0, (%0, %3) \n\t"
|
||||
"add $8, %0 \n\t"
|
||||
"cmp %4, %0 \n\t"
|
||||
" jb 1b \n\t"
|
||||
: "+r" (ptr)
|
||||
: "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@ -572,47 +572,47 @@ static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){
|
||||
s->dct_count[intra]++;
|
||||
|
||||
asm volatile(
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"1: \n\t"
|
||||
"pxor %%mm0, %%mm0 \n\t"
|
||||
"pxor %%mm1, %%mm1 \n\t"
|
||||
"movq (%0), %%mm2 \n\t"
|
||||
"movq 8(%0), %%mm3 \n\t"
|
||||
"pcmpgtw %%mm2, %%mm0 \n\t"
|
||||
"pcmpgtw %%mm3, %%mm1 \n\t"
|
||||
"pxor %%mm0, %%mm2 \n\t"
|
||||
"pxor %%mm1, %%mm3 \n\t"
|
||||
"psubw %%mm0, %%mm2 \n\t"
|
||||
"psubw %%mm1, %%mm3 \n\t"
|
||||
"movq %%mm2, %%mm4 \n\t"
|
||||
"movq %%mm3, %%mm5 \n\t"
|
||||
"psubusw (%2), %%mm2 \n\t"
|
||||
"psubusw 8(%2), %%mm3 \n\t"
|
||||
"pxor %%mm0, %%mm2 \n\t"
|
||||
"pxor %%mm1, %%mm3 \n\t"
|
||||
"psubw %%mm0, %%mm2 \n\t"
|
||||
"psubw %%mm1, %%mm3 \n\t"
|
||||
"movq %%mm2, (%0) \n\t"
|
||||
"movq %%mm3, 8(%0) \n\t"
|
||||
"movq %%mm4, %%mm2 \n\t"
|
||||
"movq %%mm5, %%mm3 \n\t"
|
||||
"punpcklwd %%mm7, %%mm4 \n\t"
|
||||
"punpckhwd %%mm7, %%mm2 \n\t"
|
||||
"punpcklwd %%mm7, %%mm5 \n\t"
|
||||
"punpckhwd %%mm7, %%mm3 \n\t"
|
||||
"paddd (%1), %%mm4 \n\t"
|
||||
"paddd 8(%1), %%mm2 \n\t"
|
||||
"paddd 16(%1), %%mm5 \n\t"
|
||||
"paddd 24(%1), %%mm3 \n\t"
|
||||
"movq %%mm4, (%1) \n\t"
|
||||
"movq %%mm2, 8(%1) \n\t"
|
||||
"movq %%mm5, 16(%1) \n\t"
|
||||
"movq %%mm3, 24(%1) \n\t"
|
||||
"add $16, %0 \n\t"
|
||||
"add $32, %1 \n\t"
|
||||
"add $16, %2 \n\t"
|
||||
"cmp %3, %0 \n\t"
|
||||
" jb 1b \n\t"
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"1: \n\t"
|
||||
"pxor %%mm0, %%mm0 \n\t"
|
||||
"pxor %%mm1, %%mm1 \n\t"
|
||||
"movq (%0), %%mm2 \n\t"
|
||||
"movq 8(%0), %%mm3 \n\t"
|
||||
"pcmpgtw %%mm2, %%mm0 \n\t"
|
||||
"pcmpgtw %%mm3, %%mm1 \n\t"
|
||||
"pxor %%mm0, %%mm2 \n\t"
|
||||
"pxor %%mm1, %%mm3 \n\t"
|
||||
"psubw %%mm0, %%mm2 \n\t"
|
||||
"psubw %%mm1, %%mm3 \n\t"
|
||||
"movq %%mm2, %%mm4 \n\t"
|
||||
"movq %%mm3, %%mm5 \n\t"
|
||||
"psubusw (%2), %%mm2 \n\t"
|
||||
"psubusw 8(%2), %%mm3 \n\t"
|
||||
"pxor %%mm0, %%mm2 \n\t"
|
||||
"pxor %%mm1, %%mm3 \n\t"
|
||||
"psubw %%mm0, %%mm2 \n\t"
|
||||
"psubw %%mm1, %%mm3 \n\t"
|
||||
"movq %%mm2, (%0) \n\t"
|
||||
"movq %%mm3, 8(%0) \n\t"
|
||||
"movq %%mm4, %%mm2 \n\t"
|
||||
"movq %%mm5, %%mm3 \n\t"
|
||||
"punpcklwd %%mm7, %%mm4 \n\t"
|
||||
"punpckhwd %%mm7, %%mm2 \n\t"
|
||||
"punpcklwd %%mm7, %%mm5 \n\t"
|
||||
"punpckhwd %%mm7, %%mm3 \n\t"
|
||||
"paddd (%1), %%mm4 \n\t"
|
||||
"paddd 8(%1), %%mm2 \n\t"
|
||||
"paddd 16(%1), %%mm5 \n\t"
|
||||
"paddd 24(%1), %%mm3 \n\t"
|
||||
"movq %%mm4, (%1) \n\t"
|
||||
"movq %%mm2, 8(%1) \n\t"
|
||||
"movq %%mm5, 16(%1) \n\t"
|
||||
"movq %%mm3, 24(%1) \n\t"
|
||||
"add $16, %0 \n\t"
|
||||
"add $32, %1 \n\t"
|
||||
"add $16, %2 \n\t"
|
||||
"cmp %3, %0 \n\t"
|
||||
" jb 1b \n\t"
|
||||
: "+r" (block), "+r" (sum), "+r" (offset)
|
||||
: "r"(block+64)
|
||||
);
|
||||
@ -626,47 +626,47 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
|
||||
s->dct_count[intra]++;
|
||||
|
||||
asm volatile(
|
||||
"pxor %%xmm7, %%xmm7 \n\t"
|
||||
"1: \n\t"
|
||||
"pxor %%xmm0, %%xmm0 \n\t"
|
||||
"pxor %%xmm1, %%xmm1 \n\t"
|
||||
"movdqa (%0), %%xmm2 \n\t"
|
||||
"movdqa 16(%0), %%xmm3 \n\t"
|
||||
"pcmpgtw %%xmm2, %%xmm0 \n\t"
|
||||
"pcmpgtw %%xmm3, %%xmm1 \n\t"
|
||||
"pxor %%xmm0, %%xmm2 \n\t"
|
||||
"pxor %%xmm1, %%xmm3 \n\t"
|
||||
"psubw %%xmm0, %%xmm2 \n\t"
|
||||
"psubw %%xmm1, %%xmm3 \n\t"
|
||||
"movdqa %%xmm2, %%xmm4 \n\t"
|
||||
"movdqa %%xmm3, %%xmm5 \n\t"
|
||||
"psubusw (%2), %%xmm2 \n\t"
|
||||
"psubusw 16(%2), %%xmm3 \n\t"
|
||||
"pxor %%xmm0, %%xmm2 \n\t"
|
||||
"pxor %%xmm1, %%xmm3 \n\t"
|
||||
"psubw %%xmm0, %%xmm2 \n\t"
|
||||
"psubw %%xmm1, %%xmm3 \n\t"
|
||||
"movdqa %%xmm2, (%0) \n\t"
|
||||
"movdqa %%xmm3, 16(%0) \n\t"
|
||||
"movdqa %%xmm4, %%xmm6 \n\t"
|
||||
"movdqa %%xmm5, %%xmm0 \n\t"
|
||||
"punpcklwd %%xmm7, %%xmm4 \n\t"
|
||||
"punpckhwd %%xmm7, %%xmm6 \n\t"
|
||||
"punpcklwd %%xmm7, %%xmm5 \n\t"
|
||||
"punpckhwd %%xmm7, %%xmm0 \n\t"
|
||||
"paddd (%1), %%xmm4 \n\t"
|
||||
"paddd 16(%1), %%xmm6 \n\t"
|
||||
"paddd 32(%1), %%xmm5 \n\t"
|
||||
"paddd 48(%1), %%xmm0 \n\t"
|
||||
"movdqa %%xmm4, (%1) \n\t"
|
||||
"movdqa %%xmm6, 16(%1) \n\t"
|
||||
"movdqa %%xmm5, 32(%1) \n\t"
|
||||
"movdqa %%xmm0, 48(%1) \n\t"
|
||||
"add $32, %0 \n\t"
|
||||
"add $64, %1 \n\t"
|
||||
"add $32, %2 \n\t"
|
||||
"cmp %3, %0 \n\t"
|
||||
" jb 1b \n\t"
|
||||
"pxor %%xmm7, %%xmm7 \n\t"
|
||||
"1: \n\t"
|
||||
"pxor %%xmm0, %%xmm0 \n\t"
|
||||
"pxor %%xmm1, %%xmm1 \n\t"
|
||||
"movdqa (%0), %%xmm2 \n\t"
|
||||
"movdqa 16(%0), %%xmm3 \n\t"
|
||||
"pcmpgtw %%xmm2, %%xmm0 \n\t"
|
||||
"pcmpgtw %%xmm3, %%xmm1 \n\t"
|
||||
"pxor %%xmm0, %%xmm2 \n\t"
|
||||
"pxor %%xmm1, %%xmm3 \n\t"
|
||||
"psubw %%xmm0, %%xmm2 \n\t"
|
||||
"psubw %%xmm1, %%xmm3 \n\t"
|
||||
"movdqa %%xmm2, %%xmm4 \n\t"
|
||||
"movdqa %%xmm3, %%xmm5 \n\t"
|
||||
"psubusw (%2), %%xmm2 \n\t"
|
||||
"psubusw 16(%2), %%xmm3 \n\t"
|
||||
"pxor %%xmm0, %%xmm2 \n\t"
|
||||
"pxor %%xmm1, %%xmm3 \n\t"
|
||||
"psubw %%xmm0, %%xmm2 \n\t"
|
||||
"psubw %%xmm1, %%xmm3 \n\t"
|
||||
"movdqa %%xmm2, (%0) \n\t"
|
||||
"movdqa %%xmm3, 16(%0) \n\t"
|
||||
"movdqa %%xmm4, %%xmm6 \n\t"
|
||||
"movdqa %%xmm5, %%xmm0 \n\t"
|
||||
"punpcklwd %%xmm7, %%xmm4 \n\t"
|
||||
"punpckhwd %%xmm7, %%xmm6 \n\t"
|
||||
"punpcklwd %%xmm7, %%xmm5 \n\t"
|
||||
"punpckhwd %%xmm7, %%xmm0 \n\t"
|
||||
"paddd (%1), %%xmm4 \n\t"
|
||||
"paddd 16(%1), %%xmm6 \n\t"
|
||||
"paddd 32(%1), %%xmm5 \n\t"
|
||||
"paddd 48(%1), %%xmm0 \n\t"
|
||||
"movdqa %%xmm4, (%1) \n\t"
|
||||
"movdqa %%xmm6, 16(%1) \n\t"
|
||||
"movdqa %%xmm5, 32(%1) \n\t"
|
||||
"movdqa %%xmm0, 48(%1) \n\t"
|
||||
"add $32, %0 \n\t"
|
||||
"add $64, %1 \n\t"
|
||||
"add $32, %2 \n\t"
|
||||
"cmp %3, %0 \n\t"
|
||||
" jb 1b \n\t"
|
||||
: "+r" (block), "+r" (sum), "+r" (offset)
|
||||
: "r"(block+64)
|
||||
);
|
||||
@ -705,10 +705,10 @@ void MPV_common_init_mmx(MpegEncContext *s)
|
||||
draw_edges = draw_edges_mmx;
|
||||
|
||||
if (mm_flags & MM_SSE2) {
|
||||
s->denoise_dct= denoise_dct_sse2;
|
||||
} else {
|
||||
s->denoise_dct= denoise_dct_mmx;
|
||||
}
|
||||
s->denoise_dct= denoise_dct_sse2;
|
||||
} else {
|
||||
s->denoise_dct= denoise_dct_mmx;
|
||||
}
|
||||
|
||||
if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
|
||||
if(mm_flags & MM_SSE2){
|
||||
|
@ -21,26 +21,26 @@
|
||||
#undef PMAXW
|
||||
#ifdef HAVE_MMX2
|
||||
#define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
|
||||
#define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
|
||||
#define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
|
||||
#define PMAX(a,b) \
|
||||
"pshufw $0x0E," #a ", " #b " \n\t"\
|
||||
PMAXW(b, a)\
|
||||
"pshufw $0x01," #a ", " #b " \n\t"\
|
||||
PMAXW(b, a)
|
||||
"pshufw $0x0E," #a ", " #b " \n\t"\
|
||||
PMAXW(b, a)\
|
||||
"pshufw $0x01," #a ", " #b " \n\t"\
|
||||
PMAXW(b, a)
|
||||
#else
|
||||
#define SPREADW(a) \
|
||||
"punpcklwd " #a ", " #a " \n\t"\
|
||||
"punpcklwd " #a ", " #a " \n\t"
|
||||
"punpcklwd " #a ", " #a " \n\t"\
|
||||
"punpcklwd " #a ", " #a " \n\t"
|
||||
#define PMAXW(a,b) \
|
||||
"psubusw " #a ", " #b " \n\t"\
|
||||
"paddw " #a ", " #b " \n\t"
|
||||
"psubusw " #a ", " #b " \n\t"\
|
||||
"paddw " #a ", " #b " \n\t"
|
||||
#define PMAX(a,b) \
|
||||
"movq " #a ", " #b " \n\t"\
|
||||
"psrlq $32, " #a " \n\t"\
|
||||
PMAXW(b, a)\
|
||||
"movq " #a ", " #b " \n\t"\
|
||||
"psrlq $16, " #a " \n\t"\
|
||||
PMAXW(b, a)
|
||||
"movq " #a ", " #b " \n\t"\
|
||||
"psrlq $32, " #a " \n\t"\
|
||||
PMAXW(b, a)\
|
||||
"movq " #a ", " #b " \n\t"\
|
||||
"psrlq $16, " #a " \n\t"\
|
||||
PMAXW(b, a)
|
||||
|
||||
#endif
|
||||
|
||||
@ -71,18 +71,18 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
|
||||
if (!s->h263_aic) {
|
||||
#if 1
|
||||
asm volatile (
|
||||
"mul %%ecx \n\t"
|
||||
: "=d" (level), "=a"(dummy)
|
||||
: "a" ((block[0]>>2) + q), "c" (inverse[q<<1])
|
||||
"mul %%ecx \n\t"
|
||||
: "=d" (level), "=a"(dummy)
|
||||
: "a" ((block[0]>>2) + q), "c" (inverse[q<<1])
|
||||
);
|
||||
#else
|
||||
asm volatile (
|
||||
"xorl %%edx, %%edx \n\t"
|
||||
"divw %%cx \n\t"
|
||||
"movzwl %%ax, %%eax \n\t"
|
||||
: "=a" (level)
|
||||
: "a" ((block[0]>>2) + q), "c" (q<<1)
|
||||
: "%edx"
|
||||
"xorl %%edx, %%edx \n\t"
|
||||
"divw %%cx \n\t"
|
||||
"movzwl %%ax, %%eax \n\t"
|
||||
: "=a" (level)
|
||||
: "a" ((block[0]>>2) + q), "c" (q<<1)
|
||||
: "%edx"
|
||||
);
|
||||
#endif
|
||||
} else
|
||||
@ -103,94 +103,94 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
|
||||
if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){
|
||||
|
||||
asm volatile(
|
||||
"movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1
|
||||
"movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1
|
||||
SPREADW(%%mm3)
|
||||
"pxor %%mm7, %%mm7 \n\t" // 0
|
||||
"pxor %%mm4, %%mm4 \n\t" // 0
|
||||
"movq (%2), %%mm5 \n\t" // qmat[0]
|
||||
"pxor %%mm6, %%mm6 \n\t"
|
||||
"psubw (%3), %%mm6 \n\t" // -bias[0]
|
||||
"mov $-128, %%"REG_a" \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"pxor %%mm1, %%mm1 \n\t" // 0
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
|
||||
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
|
||||
"pxor %%mm1, %%mm0 \n\t"
|
||||
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
|
||||
"psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
|
||||
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
|
||||
"por %%mm0, %%mm4 \n\t"
|
||||
"pxor %%mm1, %%mm0 \n\t"
|
||||
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
|
||||
"movq %%mm0, (%5, %%"REG_a") \n\t"
|
||||
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
|
||||
"movq (%4, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
|
||||
"pandn %%mm1, %%mm0 \n\t"
|
||||
PMAXW(%%mm0, %%mm3)
|
||||
"add $8, %%"REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
PMAX(%%mm3, %%mm0)
|
||||
"movd %%mm3, %%"REG_a" \n\t"
|
||||
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
|
||||
: "+a" (last_non_zero_p1)
|
||||
"pxor %%mm7, %%mm7 \n\t" // 0
|
||||
"pxor %%mm4, %%mm4 \n\t" // 0
|
||||
"movq (%2), %%mm5 \n\t" // qmat[0]
|
||||
"pxor %%mm6, %%mm6 \n\t"
|
||||
"psubw (%3), %%mm6 \n\t" // -bias[0]
|
||||
"mov $-128, %%"REG_a" \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"pxor %%mm1, %%mm1 \n\t" // 0
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
|
||||
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
|
||||
"pxor %%mm1, %%mm0 \n\t"
|
||||
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
|
||||
"psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
|
||||
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
|
||||
"por %%mm0, %%mm4 \n\t"
|
||||
"pxor %%mm1, %%mm0 \n\t"
|
||||
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
|
||||
"movq %%mm0, (%5, %%"REG_a") \n\t"
|
||||
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
|
||||
"movq (%4, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
|
||||
"pandn %%mm1, %%mm0 \n\t"
|
||||
PMAXW(%%mm0, %%mm3)
|
||||
"add $8, %%"REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
PMAX(%%mm3, %%mm0)
|
||||
"movd %%mm3, %%"REG_a" \n\t"
|
||||
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
|
||||
: "+a" (last_non_zero_p1)
|
||||
: "r" (block+64), "r" (qmat), "r" (bias),
|
||||
"r" (inv_zigzag_direct16+64), "r" (temp_block+64)
|
||||
);
|
||||
// note the asm is split cuz gcc doesnt like that many operands ...
|
||||
asm volatile(
|
||||
"movd %1, %%mm1 \n\t" // max_qcoeff
|
||||
SPREADW(%%mm1)
|
||||
"psubusw %%mm1, %%mm4 \n\t"
|
||||
"packuswb %%mm4, %%mm4 \n\t"
|
||||
"movd %%mm4, %0 \n\t" // *overflow
|
||||
"movd %1, %%mm1 \n\t" // max_qcoeff
|
||||
SPREADW(%%mm1)
|
||||
"psubusw %%mm1, %%mm4 \n\t"
|
||||
"packuswb %%mm4, %%mm4 \n\t"
|
||||
"movd %%mm4, %0 \n\t" // *overflow
|
||||
: "=g" (*overflow)
|
||||
: "g" (s->max_qcoeff)
|
||||
);
|
||||
}else{ // FMT_H263
|
||||
asm volatile(
|
||||
"movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1
|
||||
"movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1
|
||||
SPREADW(%%mm3)
|
||||
"pxor %%mm7, %%mm7 \n\t" // 0
|
||||
"pxor %%mm4, %%mm4 \n\t" // 0
|
||||
"mov $-128, %%"REG_a" \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"pxor %%mm1, %%mm1 \n\t" // 0
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
|
||||
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
|
||||
"pxor %%mm1, %%mm0 \n\t"
|
||||
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
|
||||
"movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0]
|
||||
"paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
|
||||
"movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i]
|
||||
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
|
||||
"por %%mm0, %%mm4 \n\t"
|
||||
"pxor %%mm1, %%mm0 \n\t"
|
||||
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
|
||||
"movq %%mm0, (%5, %%"REG_a") \n\t"
|
||||
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
|
||||
"movq (%4, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
|
||||
"pandn %%mm1, %%mm0 \n\t"
|
||||
PMAXW(%%mm0, %%mm3)
|
||||
"add $8, %%"REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
PMAX(%%mm3, %%mm0)
|
||||
"movd %%mm3, %%"REG_a" \n\t"
|
||||
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
|
||||
: "+a" (last_non_zero_p1)
|
||||
"pxor %%mm7, %%mm7 \n\t" // 0
|
||||
"pxor %%mm4, %%mm4 \n\t" // 0
|
||||
"mov $-128, %%"REG_a" \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"pxor %%mm1, %%mm1 \n\t" // 0
|
||||
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
|
||||
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
|
||||
"pxor %%mm1, %%mm0 \n\t"
|
||||
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
|
||||
"movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0]
|
||||
"paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
|
||||
"movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i]
|
||||
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
|
||||
"por %%mm0, %%mm4 \n\t"
|
||||
"pxor %%mm1, %%mm0 \n\t"
|
||||
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
|
||||
"movq %%mm0, (%5, %%"REG_a") \n\t"
|
||||
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
|
||||
"movq (%4, %%"REG_a"), %%mm1 \n\t"
|
||||
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
|
||||
"pandn %%mm1, %%mm0 \n\t"
|
||||
PMAXW(%%mm0, %%mm3)
|
||||
"add $8, %%"REG_a" \n\t"
|
||||
" js 1b \n\t"
|
||||
PMAX(%%mm3, %%mm0)
|
||||
"movd %%mm3, %%"REG_a" \n\t"
|
||||
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
|
||||
: "+a" (last_non_zero_p1)
|
||||
: "r" (block+64), "r" (qmat+64), "r" (bias+64),
|
||||
"r" (inv_zigzag_direct16+64), "r" (temp_block+64)
|
||||
);
|
||||
// note the asm is split cuz gcc doesnt like that many operands ...
|
||||
asm volatile(
|
||||
"movd %1, %%mm1 \n\t" // max_qcoeff
|
||||
SPREADW(%%mm1)
|
||||
"psubusw %%mm1, %%mm4 \n\t"
|
||||
"packuswb %%mm4, %%mm4 \n\t"
|
||||
"movd %%mm4, %0 \n\t" // *overflow
|
||||
"movd %1, %%mm1 \n\t" // max_qcoeff
|
||||
SPREADW(%%mm1)
|
||||
"psubusw %%mm1, %%mm4 \n\t"
|
||||
"packuswb %%mm4, %%mm4 \n\t"
|
||||
"movd %%mm4, %0 \n\t" // *overflow
|
||||
: "=g" (*overflow)
|
||||
: "g" (s->max_qcoeff)
|
||||
);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -257,13 +257,13 @@ enum PixelFormat avcodec_get_pix_fmt(const char* name)
|
||||
|
||||
for (i=0; i < PIX_FMT_NB; i++)
|
||||
if (!strcmp(pix_fmt_info[i].name, name))
|
||||
break;
|
||||
break;
|
||||
return i;
|
||||
}
|
||||
|
||||
/* Picture field are filled with 'ptr' addresses. Also return size */
|
||||
int avpicture_fill(AVPicture *picture, uint8_t *ptr,
|
||||
int pix_fmt, int width, int height)
|
||||
int pix_fmt, int width, int height)
|
||||
{
|
||||
int size, w2, h2, size2;
|
||||
PixFmtInfo *pinfo;
|
||||
@ -373,36 +373,36 @@ int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height,
|
||||
pix_fmt == PIX_FMT_RGB565 ||
|
||||
pix_fmt == PIX_FMT_RGB555)
|
||||
w = width * 2;
|
||||
else if (pix_fmt == PIX_FMT_UYVY411)
|
||||
w = width + width/2;
|
||||
else if (pix_fmt == PIX_FMT_PAL8)
|
||||
w = width;
|
||||
else
|
||||
w = width * (pf->depth * pf->nb_channels / 8);
|
||||
else if (pix_fmt == PIX_FMT_UYVY411)
|
||||
w = width + width/2;
|
||||
else if (pix_fmt == PIX_FMT_PAL8)
|
||||
w = width;
|
||||
else
|
||||
w = width * (pf->depth * pf->nb_channels / 8);
|
||||
|
||||
data_planes = 1;
|
||||
h = height;
|
||||
data_planes = 1;
|
||||
h = height;
|
||||
} else {
|
||||
data_planes = pf->nb_channels;
|
||||
w = (width*pf->depth + 7)/8;
|
||||
h = height;
|
||||
w = (width*pf->depth + 7)/8;
|
||||
h = height;
|
||||
}
|
||||
|
||||
for (i=0; i<data_planes; i++) {
|
||||
if (i == 1) {
|
||||
w = width >> pf->x_chroma_shift;
|
||||
h = height >> pf->y_chroma_shift;
|
||||
}
|
||||
w = width >> pf->x_chroma_shift;
|
||||
h = height >> pf->y_chroma_shift;
|
||||
}
|
||||
s = src->data[i];
|
||||
for(j=0; j<h; j++) {
|
||||
memcpy(dest, s, w);
|
||||
dest += w;
|
||||
s += src->linesize[i];
|
||||
}
|
||||
for(j=0; j<h; j++) {
|
||||
memcpy(dest, s, w);
|
||||
dest += w;
|
||||
s += src->linesize[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (pf->pixel_type == FF_PIXEL_PALETTE)
|
||||
memcpy((unsigned char *)(((size_t)dest + 3) & ~3), src->data[1], 256 * 4);
|
||||
memcpy((unsigned char *)(((size_t)dest + 3) & ~3), src->data[1], 256 * 4);
|
||||
|
||||
return size;
|
||||
}
|
||||
@ -486,9 +486,9 @@ static int avg_bits_per_pixel(int pix_fmt)
|
||||
case PIX_FMT_RGB555:
|
||||
bits = 16;
|
||||
break;
|
||||
case PIX_FMT_UYVY411:
|
||||
bits = 12;
|
||||
break;
|
||||
case PIX_FMT_UYVY411:
|
||||
bits = 12;
|
||||
break;
|
||||
default:
|
||||
bits = pf->depth * pf->nb_channels;
|
||||
break;
|
||||
@ -604,9 +604,9 @@ void img_copy(AVPicture *dst, const AVPicture *src,
|
||||
case PIX_FMT_RGB555:
|
||||
bits = 16;
|
||||
break;
|
||||
case PIX_FMT_UYVY411:
|
||||
bits = 12;
|
||||
break;
|
||||
case PIX_FMT_UYVY411:
|
||||
bits = 12;
|
||||
break;
|
||||
default:
|
||||
bits = pf->depth * pf->nb_channels;
|
||||
break;
|
||||
@ -910,11 +910,11 @@ static void uyvy411_to_yuv411p(AVPicture *dst, const AVPicture *src,
|
||||
cr = cr1;
|
||||
for(w = width; w >= 4; w -= 4) {
|
||||
cb[0] = p[0];
|
||||
lum[0] = p[1];
|
||||
lum[0] = p[1];
|
||||
lum[1] = p[2];
|
||||
cr[0] = p[3];
|
||||
lum[2] = p[4];
|
||||
lum[3] = p[5];
|
||||
lum[2] = p[4];
|
||||
lum[3] = p[5];
|
||||
p += 6;
|
||||
lum += 4;
|
||||
cb++;
|
||||
@ -996,7 +996,7 @@ static void yuv420p_to_uyvy422(AVPicture *dst, const AVPicture *src,
|
||||
|
||||
#define SCALEBITS 10
|
||||
#define ONE_HALF (1 << (SCALEBITS - 1))
|
||||
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
|
||||
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
|
||||
|
||||
#define YUV_TO_RGB1_CCIR(cb1, cr1)\
|
||||
{\
|
||||
@ -1046,7 +1046,7 @@ static void yuv420p_to_uyvy422(AVPicture *dst, const AVPicture *src,
|
||||
static inline int C_JPEG_TO_CCIR(int y) {
|
||||
y = (((y - 128) * FIX(112.0/127.0) + (ONE_HALF + (128 << SCALEBITS))) >> SCALEBITS);
|
||||
if (y < 16)
|
||||
y = 16;
|
||||
y = 16;
|
||||
return y;
|
||||
}
|
||||
|
||||
@ -1681,7 +1681,7 @@ static void gray_to_monoblack(AVPicture *dst, const AVPicture *src,
|
||||
|
||||
typedef struct ConvertEntry {
|
||||
void (*convert)(AVPicture *dst,
|
||||
const AVPicture *src, int width, int height);
|
||||
const AVPicture *src, int width, int height);
|
||||
} ConvertEntry;
|
||||
|
||||
/* Add each new convertion function in this table. In order to be able
|
||||
@ -1721,7 +1721,7 @@ static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = {
|
||||
[PIX_FMT_RGBA32] = {
|
||||
.convert = yuv420p_to_rgba32
|
||||
},
|
||||
[PIX_FMT_UYVY422] = {
|
||||
[PIX_FMT_UYVY422] = {
|
||||
.convert = yuv420p_to_uyvy422,
|
||||
},
|
||||
},
|
||||
@ -2224,7 +2224,7 @@ static int get_alpha_info_pal8(const AVPicture *src, int width, int height)
|
||||
* @return ored mask of FF_ALPHA_xxx constants
|
||||
*/
|
||||
int img_get_alpha_info(const AVPicture *src,
|
||||
int pix_fmt, int width, int height)
|
||||
int pix_fmt, int width, int height)
|
||||
{
|
||||
PixFmtInfo *pf = &pix_fmt_info[pix_fmt];
|
||||
int ret;
|
||||
@ -2300,10 +2300,10 @@ int img_get_alpha_info(const AVPicture *src,
|
||||
|
||||
/* filter parameters: [-1 4 2 4 -1] // 8 */
|
||||
static void deinterlace_line(uint8_t *dst,
|
||||
const uint8_t *lum_m4, const uint8_t *lum_m3,
|
||||
const uint8_t *lum_m2, const uint8_t *lum_m1,
|
||||
const uint8_t *lum,
|
||||
int size)
|
||||
const uint8_t *lum_m4, const uint8_t *lum_m3,
|
||||
const uint8_t *lum_m2, const uint8_t *lum_m1,
|
||||
const uint8_t *lum,
|
||||
int size)
|
||||
{
|
||||
#ifndef HAVE_MMX
|
||||
uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
||||
@ -2421,7 +2421,7 @@ static void deinterlace_bottom_field(uint8_t *dst, int dst_wrap,
|
||||
}
|
||||
|
||||
static void deinterlace_bottom_field_inplace(uint8_t *src1, int src_wrap,
|
||||
int width, int height)
|
||||
int width, int height)
|
||||
{
|
||||
uint8_t *src_m1, *src_0, *src_p1, *src_p2;
|
||||
int y;
|
||||
@ -2455,7 +2455,7 @@ int avpicture_deinterlace(AVPicture *dst, const AVPicture *src,
|
||||
if (pix_fmt != PIX_FMT_YUV420P &&
|
||||
pix_fmt != PIX_FMT_YUV422P &&
|
||||
pix_fmt != PIX_FMT_YUV444P &&
|
||||
pix_fmt != PIX_FMT_YUV411P)
|
||||
pix_fmt != PIX_FMT_YUV411P)
|
||||
return -1;
|
||||
if ((width & 3) != 0 || (height & 3) != 0)
|
||||
return -1;
|
||||
|
@ -821,7 +821,7 @@ static void glue(RGB_NAME, _to_pal8)(AVPicture *dst, const AVPicture *src,
|
||||
#ifdef RGBA_IN
|
||||
|
||||
static int glue(get_alpha_info_, RGB_NAME)(const AVPicture *src,
|
||||
int width, int height)
|
||||
int width, int height)
|
||||
{
|
||||
const unsigned char *p;
|
||||
int src_wrap, ret, x, y;
|
||||
|
@ -64,8 +64,8 @@ static inline int get_phase(int pos)
|
||||
|
||||
/* This function must be optimized */
|
||||
static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
|
||||
int src_width, int src_start, int src_incr,
|
||||
int16_t *filters)
|
||||
int src_width, int src_start, int src_incr,
|
||||
int16_t *filters)
|
||||
{
|
||||
int src_pos, phase, sum, i;
|
||||
const uint8_t *s;
|
||||
@ -108,7 +108,7 @@ static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
|
||||
|
||||
/* This function must be optimized */
|
||||
static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
|
||||
int wrap, int16_t *filter)
|
||||
int wrap, int16_t *filter)
|
||||
{
|
||||
int sum, i;
|
||||
const uint8_t *s;
|
||||
@ -167,7 +167,7 @@ static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
|
||||
|
||||
/* XXX: do four pixels at a time */
|
||||
static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
|
||||
const uint8_t *src, int src_width,
|
||||
const uint8_t *src, int src_width,
|
||||
int src_start, int src_incr, int16_t *filters)
|
||||
{
|
||||
int src_pos, phase;
|
||||
@ -212,7 +212,7 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
|
||||
}
|
||||
|
||||
static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
|
||||
int wrap, int16_t *filter)
|
||||
int wrap, int16_t *filter)
|
||||
{
|
||||
int sum, i, v;
|
||||
const uint8_t *s;
|
||||
@ -277,18 +277,18 @@ static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_ALTIVEC
|
||||
typedef union {
|
||||
typedef union {
|
||||
vector unsigned char v;
|
||||
unsigned char c[16];
|
||||
} vec_uc_t;
|
||||
|
||||
typedef union {
|
||||
typedef union {
|
||||
vector signed short v;
|
||||
signed short s[8];
|
||||
} vec_ss_t;
|
||||
|
||||
void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
|
||||
int wrap, int16_t *filter)
|
||||
int wrap, int16_t *filter)
|
||||
{
|
||||
int sum, i;
|
||||
const uint8_t *s;
|
||||
@ -405,7 +405,7 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
|
||||
|
||||
/* slow version to handle limit cases. Does not need optimisation */
|
||||
static void h_resample_slow(uint8_t *dst, int dst_width,
|
||||
const uint8_t *src, int src_width,
|
||||
const uint8_t *src, int src_width,
|
||||
int src_start, int src_incr, int16_t *filters)
|
||||
{
|
||||
int src_pos, phase, sum, j, v, i;
|
||||
@ -441,8 +441,8 @@ static void h_resample_slow(uint8_t *dst, int dst_width,
|
||||
}
|
||||
|
||||
static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
|
||||
int src_width, int src_start, int src_incr,
|
||||
int16_t *filters)
|
||||
int src_width, int src_start, int src_incr,
|
||||
int16_t *filters)
|
||||
{
|
||||
int n, src_end;
|
||||
|
||||
@ -559,7 +559,7 @@ ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
|
||||
ImgReSampleContext *s;
|
||||
|
||||
if (!owidth || !oheight || !iwidth || !iheight)
|
||||
return NULL;
|
||||
return NULL;
|
||||
|
||||
s = av_mallocz(sizeof(ImgReSampleContext));
|
||||
if (!s)
|
||||
|
@ -70,13 +70,13 @@ static void build_modpred(Indeo3DecodeContext *s)
|
||||
for (i=0; i < 128; ++i) {
|
||||
s->ModPred[i+0*128] = (i > 126) ? 254 : 2*((i + 1) - ((i + 1) % 2));
|
||||
s->ModPred[i+1*128] = (i == 7) ? 20 : ((i == 119 || i == 120)
|
||||
? 236 : 2*((i + 2) - ((i + 1) % 3)));
|
||||
? 236 : 2*((i + 2) - ((i + 1) % 3)));
|
||||
s->ModPred[i+2*128] = (i > 125) ? 248 : 2*((i + 2) - ((i + 2) % 4));
|
||||
s->ModPred[i+3*128] = 2*((i + 1) - ((i - 3) % 5));
|
||||
s->ModPred[i+3*128] = 2*((i + 1) - ((i - 3) % 5));
|
||||
s->ModPred[i+4*128] = (i == 8) ? 20 : 2*((i + 1) - ((i - 3) % 6));
|
||||
s->ModPred[i+5*128] = 2*((i + 4) - ((i + 3) % 7));
|
||||
s->ModPred[i+5*128] = 2*((i + 4) - ((i + 3) % 7));
|
||||
s->ModPred[i+6*128] = (i > 123) ? 240 : 2*((i + 4) - ((i + 4) % 8));
|
||||
s->ModPred[i+7*128] = 2*((i + 5) - ((i + 4) % 9));
|
||||
s->ModPred[i+7*128] = 2*((i + 5) - ((i + 4) % 9));
|
||||
}
|
||||
|
||||
s->corrector_type = (unsigned short *) av_malloc (24 * 256 * sizeof(unsigned short));
|
||||
@ -84,8 +84,8 @@ static void build_modpred(Indeo3DecodeContext *s)
|
||||
for (i=0; i < 24; ++i) {
|
||||
for (j=0; j < 256; ++j) {
|
||||
s->corrector_type[i*256+j] = (j < corrector_type_0[i])
|
||||
? 1 : ((j < 248 || (i == 16 && j == 248))
|
||||
? 0 : corrector_type_2[j - 248]);
|
||||
? 1 : ((j < 248 || (i == 16 && j == 248))
|
||||
? 0 : corrector_type_2[j - 248]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -83,10 +83,10 @@
|
||||
*/
|
||||
|
||||
#if CONST_BITS == 8
|
||||
#define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */
|
||||
#define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */
|
||||
#define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */
|
||||
#define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */
|
||||
#define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */
|
||||
#define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */
|
||||
#define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */
|
||||
#define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */
|
||||
#else
|
||||
#define FIX_0_382683433 FIX(0.382683433)
|
||||
#define FIX_0_541196100 FIX(0.541196100)
|
||||
@ -135,7 +135,7 @@ static always_inline void row_fdct(DCTELEM * data){
|
||||
|
||||
/* Even part */
|
||||
|
||||
tmp10 = tmp0 + tmp3; /* phase 2 */
|
||||
tmp10 = tmp0 + tmp3; /* phase 2 */
|
||||
tmp13 = tmp0 - tmp3;
|
||||
tmp11 = tmp1 + tmp2;
|
||||
tmp12 = tmp1 - tmp2;
|
||||
@ -144,30 +144,30 @@ static always_inline void row_fdct(DCTELEM * data){
|
||||
dataptr[4] = tmp10 - tmp11;
|
||||
|
||||
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
|
||||
dataptr[2] = tmp13 + z1; /* phase 5 */
|
||||
dataptr[2] = tmp13 + z1; /* phase 5 */
|
||||
dataptr[6] = tmp13 - z1;
|
||||
|
||||
/* Odd part */
|
||||
|
||||
tmp10 = tmp4 + tmp5; /* phase 2 */
|
||||
tmp10 = tmp4 + tmp5; /* phase 2 */
|
||||
tmp11 = tmp5 + tmp6;
|
||||
tmp12 = tmp6 + tmp7;
|
||||
|
||||
/* The rotator is modified from fig 4-8 to avoid extra negations. */
|
||||
z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
|
||||
z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
|
||||
z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
|
||||
z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
|
||||
z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
|
||||
z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
|
||||
z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
|
||||
|
||||
z11 = tmp7 + z3; /* phase 5 */
|
||||
z11 = tmp7 + z3; /* phase 5 */
|
||||
z13 = tmp7 - z3;
|
||||
|
||||
dataptr[5] = z13 + z2; /* phase 6 */
|
||||
dataptr[5] = z13 + z2; /* phase 6 */
|
||||
dataptr[3] = z13 - z2;
|
||||
dataptr[1] = z11 + z4;
|
||||
dataptr[7] = z11 - z4;
|
||||
|
||||
dataptr += DCTSIZE; /* advance pointer to next row */
|
||||
dataptr += DCTSIZE; /* advance pointer to next row */
|
||||
}
|
||||
}
|
||||
|
||||
@ -202,7 +202,7 @@ fdct_ifast (DCTELEM * data)
|
||||
|
||||
/* Even part */
|
||||
|
||||
tmp10 = tmp0 + tmp3; /* phase 2 */
|
||||
tmp10 = tmp0 + tmp3; /* phase 2 */
|
||||
tmp13 = tmp0 - tmp3;
|
||||
tmp11 = tmp1 + tmp2;
|
||||
tmp12 = tmp1 - tmp2;
|
||||
@ -216,7 +216,7 @@ fdct_ifast (DCTELEM * data)
|
||||
|
||||
/* Odd part */
|
||||
|
||||
tmp10 = tmp4 + tmp5; /* phase 2 */
|
||||
tmp10 = tmp4 + tmp5; /* phase 2 */
|
||||
tmp11 = tmp5 + tmp6;
|
||||
tmp12 = tmp6 + tmp7;
|
||||
|
||||
@ -226,7 +226,7 @@ fdct_ifast (DCTELEM * data)
|
||||
z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
|
||||
z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
|
||||
|
||||
z11 = tmp7 + z3; /* phase 5 */
|
||||
z11 = tmp7 + z3; /* phase 5 */
|
||||
z13 = tmp7 - z3;
|
||||
|
||||
dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
|
||||
@ -234,7 +234,7 @@ fdct_ifast (DCTELEM * data)
|
||||
dataptr[DCTSIZE*1] = z11 + z4;
|
||||
dataptr[DCTSIZE*7] = z11 - z4;
|
||||
|
||||
dataptr++; /* advance pointer to next column */
|
||||
dataptr++; /* advance pointer to next column */
|
||||
}
|
||||
}
|
||||
|
||||
@ -293,7 +293,7 @@ fdct_ifast248 (DCTELEM * data)
|
||||
dataptr[DCTSIZE*3] = tmp13 + z1;
|
||||
dataptr[DCTSIZE*7] = tmp13 - z1;
|
||||
|
||||
dataptr++; /* advance pointer to next column */
|
||||
dataptr++; /* advance pointer to next column */
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -92,10 +92,10 @@
|
||||
|
||||
#if BITS_IN_JSAMPLE == 8
|
||||
#define CONST_BITS 13
|
||||
#define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */
|
||||
#define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */
|
||||
#else
|
||||
#define CONST_BITS 13
|
||||
#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
|
||||
#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
|
||||
#endif
|
||||
|
||||
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
|
||||
@ -106,18 +106,18 @@
|
||||
*/
|
||||
|
||||
#if CONST_BITS == 13
|
||||
#define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */
|
||||
#define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */
|
||||
#define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */
|
||||
#define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */
|
||||
#define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */
|
||||
#define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */
|
||||
#define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */
|
||||
#define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */
|
||||
#define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */
|
||||
#define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */
|
||||
#define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */
|
||||
#define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */
|
||||
#define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */
|
||||
#define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */
|
||||
#define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */
|
||||
#define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */
|
||||
#define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */
|
||||
#define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */
|
||||
#define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */
|
||||
#define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */
|
||||
#define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */
|
||||
#define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */
|
||||
#define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */
|
||||
#define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */
|
||||
#else
|
||||
#define FIX_0_298631336 FIX(0.298631336)
|
||||
#define FIX_0_390180644 FIX(0.390180644)
|
||||
@ -185,9 +185,9 @@ static always_inline void row_fdct(DCTELEM * data){
|
||||
|
||||
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
|
||||
dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
||||
CONST_BITS-PASS1_BITS);
|
||||
CONST_BITS-PASS1_BITS);
|
||||
dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
||||
CONST_BITS-PASS1_BITS);
|
||||
CONST_BITS-PASS1_BITS);
|
||||
|
||||
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
|
||||
* cK represents cos(K*pi/16).
|
||||
@ -217,7 +217,7 @@ static always_inline void row_fdct(DCTELEM * data){
|
||||
dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
|
||||
dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
|
||||
|
||||
dataptr += DCTSIZE; /* advance pointer to next row */
|
||||
dataptr += DCTSIZE; /* advance pointer to next row */
|
||||
}
|
||||
}
|
||||
|
||||
@ -267,9 +267,9 @@ ff_jpeg_fdct_islow (DCTELEM * data)
|
||||
|
||||
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
|
||||
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
||||
CONST_BITS+PASS1_BITS);
|
||||
CONST_BITS+PASS1_BITS);
|
||||
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
||||
CONST_BITS+PASS1_BITS);
|
||||
CONST_BITS+PASS1_BITS);
|
||||
|
||||
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
|
||||
* cK represents cos(K*pi/16).
|
||||
@ -295,15 +295,15 @@ ff_jpeg_fdct_islow (DCTELEM * data)
|
||||
z4 += z5;
|
||||
|
||||
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,
|
||||
CONST_BITS+PASS1_BITS);
|
||||
CONST_BITS+PASS1_BITS);
|
||||
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,
|
||||
CONST_BITS+PASS1_BITS);
|
||||
CONST_BITS+PASS1_BITS);
|
||||
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,
|
||||
CONST_BITS+PASS1_BITS);
|
||||
CONST_BITS+PASS1_BITS);
|
||||
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,
|
||||
CONST_BITS+PASS1_BITS);
|
||||
CONST_BITS+PASS1_BITS);
|
||||
|
||||
dataptr++; /* advance pointer to next column */
|
||||
dataptr++; /* advance pointer to next column */
|
||||
}
|
||||
}
|
||||
|
||||
@ -350,9 +350,9 @@ ff_fdct248_islow (DCTELEM * data)
|
||||
|
||||
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
|
||||
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
||||
CONST_BITS+PASS1_BITS);
|
||||
CONST_BITS+PASS1_BITS);
|
||||
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
||||
CONST_BITS+PASS1_BITS);
|
||||
CONST_BITS+PASS1_BITS);
|
||||
|
||||
tmp10 = tmp4 + tmp7;
|
||||
tmp11 = tmp5 + tmp6;
|
||||
@ -364,10 +364,10 @@ ff_fdct248_islow (DCTELEM * data)
|
||||
|
||||
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
|
||||
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
||||
CONST_BITS+PASS1_BITS);
|
||||
CONST_BITS+PASS1_BITS);
|
||||
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
||||
CONST_BITS+PASS1_BITS);
|
||||
CONST_BITS+PASS1_BITS);
|
||||
|
||||
dataptr++; /* advance pointer to next column */
|
||||
dataptr++; /* advance pointer to next column */
|
||||
}
|
||||
}
|
||||
|
1270
libavcodec/jrevdct.c
1270
libavcodec/jrevdct.c
File diff suppressed because it is too large
Load Diff
@ -81,8 +81,8 @@
|
||||
*/
|
||||
typedef struct LclContext {
|
||||
|
||||
AVCodecContext *avctx;
|
||||
AVFrame pic;
|
||||
AVCodecContext *avctx;
|
||||
AVFrame pic;
|
||||
PutBitContext pb;
|
||||
|
||||
// Image type
|
||||
@ -198,8 +198,8 @@ static unsigned int mszh_decomp(unsigned char * srcptr, int srclen, unsigned cha
|
||||
*/
|
||||
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
|
||||
{
|
||||
LclContext * const c = (LclContext *)avctx->priv_data;
|
||||
unsigned char *encoded = (unsigned char *)buf;
|
||||
LclContext * const c = (LclContext *)avctx->priv_data;
|
||||
unsigned char *encoded = (unsigned char *)buf;
|
||||
unsigned int pixel_ptr;
|
||||
int row, col;
|
||||
unsigned char *outptr;
|
||||
@ -214,15 +214,15 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
|
||||
#endif
|
||||
unsigned int len = buf_size;
|
||||
|
||||
if(c->pic.data[0])
|
||||
avctx->release_buffer(avctx, &c->pic);
|
||||
if(c->pic.data[0])
|
||||
avctx->release_buffer(avctx, &c->pic);
|
||||
|
||||
c->pic.reference = 0;
|
||||
c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
|
||||
if(avctx->get_buffer(avctx, &c->pic) < 0){
|
||||
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
|
||||
return -1;
|
||||
}
|
||||
c->pic.reference = 0;
|
||||
c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
|
||||
if(avctx->get_buffer(avctx, &c->pic) < 0){
|
||||
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
outptr = c->pic.data[0]; // Output image pointer
|
||||
|
||||
@ -358,7 +358,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
|
||||
pixel_ptr = row * width * 3;
|
||||
yq = encoded[pixel_ptr++];
|
||||
uqvq = encoded[pixel_ptr++];
|
||||
uqvq+=(encoded[pixel_ptr++] << 8);
|
||||
uqvq+=(encoded[pixel_ptr++] << 8);
|
||||
for (col = 1; col < width; col++) {
|
||||
encoded[pixel_ptr] = yq -= encoded[pixel_ptr];
|
||||
uqvq -= (encoded[pixel_ptr+1] | (encoded[pixel_ptr+2]<<8));
|
||||
@ -588,8 +588,8 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
|
||||
c->zstream.avail_in = avctx->width*3;
|
||||
zret = deflate(&(c->zstream), Z_NO_FLUSH);
|
||||
if (zret != Z_OK) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret);
|
||||
return -1;
|
||||
av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
zret = deflate(&(c->zstream), Z_FINISH);
|
||||
@ -714,7 +714,7 @@ static int decode_init(AVCodecContext *avctx)
|
||||
break;
|
||||
default:
|
||||
if ((c->compression < Z_NO_COMPRESSION) || (c->compression > Z_BEST_COMPRESSION)) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Unsupported compression level for ZLIB: (%d).\n", c->compression);
|
||||
av_log(avctx, AV_LOG_ERROR, "Unsupported compression level for ZLIB: (%d).\n", c->compression);
|
||||
return 1;
|
||||
}
|
||||
av_log(avctx, AV_LOG_INFO, "Compression level for ZLIB: (%d).\n", c->compression);
|
||||
@ -851,15 +851,15 @@ static int encode_init(AVCodecContext *avctx)
|
||||
*/
|
||||
static int decode_end(AVCodecContext *avctx)
|
||||
{
|
||||
LclContext * const c = (LclContext *)avctx->priv_data;
|
||||
LclContext * const c = (LclContext *)avctx->priv_data;
|
||||
|
||||
if (c->pic.data[0])
|
||||
avctx->release_buffer(avctx, &c->pic);
|
||||
if (c->pic.data[0])
|
||||
avctx->release_buffer(avctx, &c->pic);
|
||||
#ifdef CONFIG_ZLIB
|
||||
inflateEnd(&(c->zstream));
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -883,28 +883,28 @@ static int encode_end(AVCodecContext *avctx)
|
||||
}
|
||||
|
||||
AVCodec mszh_decoder = {
|
||||
"mszh",
|
||||
CODEC_TYPE_VIDEO,
|
||||
CODEC_ID_MSZH,
|
||||
sizeof(LclContext),
|
||||
decode_init,
|
||||
NULL,
|
||||
decode_end,
|
||||
decode_frame,
|
||||
CODEC_CAP_DR1,
|
||||
"mszh",
|
||||
CODEC_TYPE_VIDEO,
|
||||
CODEC_ID_MSZH,
|
||||
sizeof(LclContext),
|
||||
decode_init,
|
||||
NULL,
|
||||
decode_end,
|
||||
decode_frame,
|
||||
CODEC_CAP_DR1,
|
||||
};
|
||||
|
||||
|
||||
AVCodec zlib_decoder = {
|
||||
"zlib",
|
||||
CODEC_TYPE_VIDEO,
|
||||
CODEC_ID_ZLIB,
|
||||
sizeof(LclContext),
|
||||
decode_init,
|
||||
NULL,
|
||||
decode_end,
|
||||
decode_frame,
|
||||
CODEC_CAP_DR1,
|
||||
"zlib",
|
||||
CODEC_TYPE_VIDEO,
|
||||
CODEC_ID_ZLIB,
|
||||
sizeof(LclContext),
|
||||
decode_init,
|
||||
NULL,
|
||||
decode_end,
|
||||
decode_frame,
|
||||
CODEC_CAP_DR1,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_ENCODERS
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -42,7 +42,7 @@ void pp_postprocess(uint8_t * src[3], int srcStride[3],
|
||||
uint8_t * dst[3], int dstStride[3],
|
||||
int horizontalSize, int verticalSize,
|
||||
QP_STORE_T *QP_store, int QP_stride,
|
||||
pp_mode_t *mode, pp_context_t *ppContext, int pict_type);
|
||||
pp_mode_t *mode, pp_context_t *ppContext, int pict_type);
|
||||
|
||||
|
||||
/**
|
||||
|
@ -26,35 +26,35 @@
|
||||
#endif
|
||||
|
||||
#define ALTIVEC_TRANSPOSE_8x8_SHORT(src_a,src_b,src_c,src_d,src_e,src_f,src_g,src_h) \
|
||||
do { \
|
||||
__typeof__(src_a) tempA1, tempB1, tempC1, tempD1; \
|
||||
__typeof__(src_a) tempE1, tempF1, tempG1, tempH1; \
|
||||
__typeof__(src_a) tempA2, tempB2, tempC2, tempD2; \
|
||||
__typeof__(src_a) tempE2, tempF2, tempG2, tempH2; \
|
||||
tempA1 = vec_mergeh (src_a, src_e); \
|
||||
tempB1 = vec_mergel (src_a, src_e); \
|
||||
tempC1 = vec_mergeh (src_b, src_f); \
|
||||
tempD1 = vec_mergel (src_b, src_f); \
|
||||
tempE1 = vec_mergeh (src_c, src_g); \
|
||||
tempF1 = vec_mergel (src_c, src_g); \
|
||||
tempG1 = vec_mergeh (src_d, src_h); \
|
||||
tempH1 = vec_mergel (src_d, src_h); \
|
||||
tempA2 = vec_mergeh (tempA1, tempE1); \
|
||||
tempB2 = vec_mergel (tempA1, tempE1); \
|
||||
tempC2 = vec_mergeh (tempB1, tempF1); \
|
||||
tempD2 = vec_mergel (tempB1, tempF1); \
|
||||
tempE2 = vec_mergeh (tempC1, tempG1); \
|
||||
tempF2 = vec_mergel (tempC1, tempG1); \
|
||||
tempG2 = vec_mergeh (tempD1, tempH1); \
|
||||
tempH2 = vec_mergel (tempD1, tempH1); \
|
||||
src_a = vec_mergeh (tempA2, tempE2); \
|
||||
src_b = vec_mergel (tempA2, tempE2); \
|
||||
src_c = vec_mergeh (tempB2, tempF2); \
|
||||
src_d = vec_mergel (tempB2, tempF2); \
|
||||
src_e = vec_mergeh (tempC2, tempG2); \
|
||||
src_f = vec_mergel (tempC2, tempG2); \
|
||||
src_g = vec_mergeh (tempD2, tempH2); \
|
||||
src_h = vec_mergel (tempD2, tempH2); \
|
||||
do { \
|
||||
__typeof__(src_a) tempA1, tempB1, tempC1, tempD1; \
|
||||
__typeof__(src_a) tempE1, tempF1, tempG1, tempH1; \
|
||||
__typeof__(src_a) tempA2, tempB2, tempC2, tempD2; \
|
||||
__typeof__(src_a) tempE2, tempF2, tempG2, tempH2; \
|
||||
tempA1 = vec_mergeh (src_a, src_e); \
|
||||
tempB1 = vec_mergel (src_a, src_e); \
|
||||
tempC1 = vec_mergeh (src_b, src_f); \
|
||||
tempD1 = vec_mergel (src_b, src_f); \
|
||||
tempE1 = vec_mergeh (src_c, src_g); \
|
||||
tempF1 = vec_mergel (src_c, src_g); \
|
||||
tempG1 = vec_mergeh (src_d, src_h); \
|
||||
tempH1 = vec_mergel (src_d, src_h); \
|
||||
tempA2 = vec_mergeh (tempA1, tempE1); \
|
||||
tempB2 = vec_mergel (tempA1, tempE1); \
|
||||
tempC2 = vec_mergeh (tempB1, tempF1); \
|
||||
tempD2 = vec_mergel (tempB1, tempF1); \
|
||||
tempE2 = vec_mergeh (tempC1, tempG1); \
|
||||
tempF2 = vec_mergel (tempC1, tempG1); \
|
||||
tempG2 = vec_mergeh (tempD1, tempH1); \
|
||||
tempH2 = vec_mergel (tempD1, tempH1); \
|
||||
src_a = vec_mergeh (tempA2, tempE2); \
|
||||
src_b = vec_mergel (tempA2, tempE2); \
|
||||
src_c = vec_mergeh (tempB2, tempF2); \
|
||||
src_d = vec_mergel (tempB2, tempF2); \
|
||||
src_e = vec_mergeh (tempC2, tempG2); \
|
||||
src_f = vec_mergel (tempC2, tempG2); \
|
||||
src_g = vec_mergeh (tempD2, tempH2); \
|
||||
src_h = vec_mergel (tempD2, tempH2); \
|
||||
} while (0)
|
||||
|
||||
|
||||
@ -94,25 +94,25 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
|
||||
|
||||
vector signed short v_srcAss0, v_srcAss1, v_srcAss2, v_srcAss3, v_srcAss4, v_srcAss5, v_srcAss6, v_srcAss7;
|
||||
|
||||
#define LOAD_LINE(i) \
|
||||
register int j##i = i * stride; \
|
||||
vector unsigned char perm##i = vec_lvsl(j##i, src2); \
|
||||
const vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \
|
||||
vector unsigned char v_srcA2##i; \
|
||||
if (two_vectors) \
|
||||
v_srcA2##i = vec_ld(j##i + 16, src2); \
|
||||
const vector unsigned char v_srcA##i = \
|
||||
vec_perm(v_srcA1##i, v_srcA2##i, perm##i); \
|
||||
#define LOAD_LINE(i) \
|
||||
register int j##i = i * stride; \
|
||||
vector unsigned char perm##i = vec_lvsl(j##i, src2); \
|
||||
const vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \
|
||||
vector unsigned char v_srcA2##i; \
|
||||
if (two_vectors) \
|
||||
v_srcA2##i = vec_ld(j##i + 16, src2); \
|
||||
const vector unsigned char v_srcA##i = \
|
||||
vec_perm(v_srcA1##i, v_srcA2##i, perm##i); \
|
||||
v_srcAss##i = \
|
||||
(vector signed short)vec_mergeh((vector signed char)zero, \
|
||||
(vector signed char)v_srcA##i)
|
||||
(vector signed short)vec_mergeh((vector signed char)zero, \
|
||||
(vector signed char)v_srcA##i)
|
||||
|
||||
#define LOAD_LINE_ALIGNED(i) \
|
||||
register int j##i = i * stride; \
|
||||
const vector unsigned char v_srcA##i = vec_ld(j##i, src2); \
|
||||
v_srcAss##i = \
|
||||
(vector signed short)vec_mergeh((vector signed char)zero, \
|
||||
(vector signed char)v_srcA##i)
|
||||
(vector signed short)vec_mergeh((vector signed char)zero, \
|
||||
(vector signed char)v_srcA##i)
|
||||
|
||||
// special casing the aligned case is worthwhile, as all call from
|
||||
// the (transposed) horizontable deblocks will be aligned, i naddition
|
||||
@ -139,15 +139,15 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
|
||||
#undef LOAD_LINE
|
||||
#undef LOAD_LINE_ALIGNED
|
||||
|
||||
#define ITER(i, j) \
|
||||
const vector signed short v_diff##i = \
|
||||
vec_sub(v_srcAss##i, v_srcAss##j); \
|
||||
const vector signed short v_sum##i = \
|
||||
vec_add(v_diff##i, v_dcOffset); \
|
||||
const vector signed short v_comp##i = \
|
||||
(vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \
|
||||
v_dcThreshold); \
|
||||
const vector signed short v_part##i = vec_and(mask, v_comp##i); \
|
||||
#define ITER(i, j) \
|
||||
const vector signed short v_diff##i = \
|
||||
vec_sub(v_srcAss##i, v_srcAss##j); \
|
||||
const vector signed short v_sum##i = \
|
||||
vec_add(v_diff##i, v_dcOffset); \
|
||||
const vector signed short v_comp##i = \
|
||||
(vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \
|
||||
v_dcThreshold); \
|
||||
const vector signed short v_part##i = vec_and(mask, v_comp##i); \
|
||||
v_numEq = vec_sum4s(v_part##i, v_numEq);
|
||||
|
||||
ITER(0, 1);
|
||||
@ -167,13 +167,13 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
|
||||
if (numEq > c->ppMode.flatnessThreshold)
|
||||
{
|
||||
const vector unsigned char mmoP1 = (const vector unsigned char)
|
||||
AVV(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
|
||||
0x00, 0x01, 0x12, 0x13, 0x08, 0x09, 0x1A, 0x1B);
|
||||
AVV(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
|
||||
0x00, 0x01, 0x12, 0x13, 0x08, 0x09, 0x1A, 0x1B);
|
||||
const vector unsigned char mmoP2 = (const vector unsigned char)
|
||||
AVV(0x04, 0x05, 0x16, 0x17, 0x0C, 0x0D, 0x1E, 0x1F,
|
||||
0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f);
|
||||
AVV(0x04, 0x05, 0x16, 0x17, 0x0C, 0x0D, 0x1E, 0x1F,
|
||||
0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f);
|
||||
const vector unsigned char mmoP = (const vector unsigned char)
|
||||
vec_lvsl(8, (unsigned char*)0);
|
||||
vec_lvsl(8, (unsigned char*)0);
|
||||
|
||||
vector signed short mmoL1 = vec_perm(v_srcAss0, v_srcAss2, mmoP1);
|
||||
vector signed short mmoL2 = vec_perm(v_srcAss4, v_srcAss6, mmoP2);
|
||||
@ -185,9 +185,9 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
|
||||
vector unsigned short mmoSum = (vector unsigned short)vec_add(mmoDiff, v2QP);
|
||||
|
||||
if (vec_any_gt(mmoSum, v4QP))
|
||||
return 0;
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
return 1;
|
||||
}
|
||||
else return 2;
|
||||
}
|
||||
@ -218,21 +218,21 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
|
||||
vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9;
|
||||
|
||||
#define LOAD_LINE(i) \
|
||||
const vector unsigned char perml##i = \
|
||||
vec_lvsl(i * stride, src2); \
|
||||
const vector unsigned char perml##i = \
|
||||
vec_lvsl(i * stride, src2); \
|
||||
vbA##i = vec_ld(i * stride, src2); \
|
||||
vbB##i = vec_ld(i * stride + 16, src2); \
|
||||
vbT##i = vec_perm(vbA##i, vbB##i, perml##i); \
|
||||
vb##i = \
|
||||
(vector signed short)vec_mergeh((vector unsigned char)zero, \
|
||||
(vector unsigned char)vbT##i)
|
||||
(vector signed short)vec_mergeh((vector unsigned char)zero, \
|
||||
(vector unsigned char)vbT##i)
|
||||
|
||||
#define LOAD_LINE_ALIGNED(i) \
|
||||
register int j##i = i * stride; \
|
||||
vbT##i = vec_ld(j##i, src2); \
|
||||
vb##i = \
|
||||
(vector signed short)vec_mergeh((vector signed char)zero, \
|
||||
(vector signed char)vbT##i)
|
||||
(vector signed short)vec_mergeh((vector signed char)zero, \
|
||||
(vector signed char)vbT##i)
|
||||
|
||||
// special casing the aligned case is worthwhile, as all call from
|
||||
// the (transposed) horizontable deblocks will be aligned, in addition
|
||||
@ -308,11 +308,11 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
|
||||
const vector signed short temp91 = vec_sub(v_sumsB8, vb5);
|
||||
const vector signed short v_sumsB9 = vec_add(temp91, v_last);
|
||||
|
||||
#define COMPUTE_VR(i, j, k) \
|
||||
const vector signed short temps1##i = \
|
||||
vec_add(v_sumsB##i, v_sumsB##k); \
|
||||
const vector signed short temps2##i = \
|
||||
vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \
|
||||
#define COMPUTE_VR(i, j, k) \
|
||||
const vector signed short temps1##i = \
|
||||
vec_add(v_sumsB##i, v_sumsB##k); \
|
||||
const vector signed short temps2##i = \
|
||||
vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \
|
||||
const vector signed short vr##j = vec_sra(temps2##i, v_4)
|
||||
|
||||
COMPUTE_VR(0, 1, 2);
|
||||
@ -326,31 +326,31 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
|
||||
|
||||
const vector signed char neg1 = vec_splat_s8(-1);
|
||||
const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
|
||||
|
||||
#define PACK_AND_STORE(i) \
|
||||
const vector unsigned char perms##i = \
|
||||
vec_lvsr(i * stride, src2); \
|
||||
const vector unsigned char vf##i = \
|
||||
vec_packsu(vr##i, (vector signed short)zero); \
|
||||
const vector unsigned char vg##i = \
|
||||
vec_perm(vf##i, vbT##i, permHH); \
|
||||
const vector unsigned char mask##i = \
|
||||
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \
|
||||
const vector unsigned char vg2##i = \
|
||||
vec_perm(vg##i, vg##i, perms##i); \
|
||||
const vector unsigned char svA##i = \
|
||||
vec_sel(vbA##i, vg2##i, mask##i); \
|
||||
const vector unsigned char svB##i = \
|
||||
vec_sel(vg2##i, vbB##i, mask##i); \
|
||||
vec_st(svA##i, i * stride, src2); \
|
||||
#define PACK_AND_STORE(i) \
|
||||
const vector unsigned char perms##i = \
|
||||
vec_lvsr(i * stride, src2); \
|
||||
const vector unsigned char vf##i = \
|
||||
vec_packsu(vr##i, (vector signed short)zero); \
|
||||
const vector unsigned char vg##i = \
|
||||
vec_perm(vf##i, vbT##i, permHH); \
|
||||
const vector unsigned char mask##i = \
|
||||
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \
|
||||
const vector unsigned char vg2##i = \
|
||||
vec_perm(vg##i, vg##i, perms##i); \
|
||||
const vector unsigned char svA##i = \
|
||||
vec_sel(vbA##i, vg2##i, mask##i); \
|
||||
const vector unsigned char svB##i = \
|
||||
vec_sel(vg2##i, vbB##i, mask##i); \
|
||||
vec_st(svA##i, i * stride, src2); \
|
||||
vec_st(svB##i, i * stride + 16, src2)
|
||||
|
||||
#define PACK_AND_STORE_ALIGNED(i) \
|
||||
const vector unsigned char vf##i = \
|
||||
vec_packsu(vr##i, (vector signed short)zero); \
|
||||
const vector unsigned char vg##i = \
|
||||
vec_perm(vf##i, vbT##i, permHH); \
|
||||
#define PACK_AND_STORE_ALIGNED(i) \
|
||||
const vector unsigned char vf##i = \
|
||||
vec_packsu(vr##i, (vector signed short)zero); \
|
||||
const vector unsigned char vg##i = \
|
||||
vec_perm(vf##i, vbT##i, permHH); \
|
||||
vec_st(vg##i, i * stride, src2)
|
||||
|
||||
// special casing the aligned case is worthwhile, as all call from
|
||||
@ -398,17 +398,17 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext
|
||||
vqp = vec_splat(vqp, 0);
|
||||
|
||||
#define LOAD_LINE(i) \
|
||||
const vector unsigned char perm##i = \
|
||||
vec_lvsl(i * stride, src2); \
|
||||
const vector unsigned char vbA##i = \
|
||||
vec_ld(i * stride, src2); \
|
||||
const vector unsigned char vbB##i = \
|
||||
vec_ld(i * stride + 16, src2); \
|
||||
const vector unsigned char vbT##i = \
|
||||
vec_perm(vbA##i, vbB##i, perm##i); \
|
||||
const vector signed short vb##i = \
|
||||
(vector signed short)vec_mergeh((vector unsigned char)zero, \
|
||||
(vector unsigned char)vbT##i)
|
||||
const vector unsigned char perm##i = \
|
||||
vec_lvsl(i * stride, src2); \
|
||||
const vector unsigned char vbA##i = \
|
||||
vec_ld(i * stride, src2); \
|
||||
const vector unsigned char vbB##i = \
|
||||
vec_ld(i * stride + 16, src2); \
|
||||
const vector unsigned char vbT##i = \
|
||||
vec_perm(vbA##i, vbB##i, perm##i); \
|
||||
const vector signed short vb##i = \
|
||||
(vector signed short)vec_mergeh((vector unsigned char)zero, \
|
||||
(vector unsigned char)vbT##i)
|
||||
|
||||
src2 += stride*3;
|
||||
|
||||
@ -426,7 +426,7 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext
|
||||
const vector signed short v_2 = vec_splat_s16(2);
|
||||
const vector signed short v_5 = vec_splat_s16(5);
|
||||
const vector signed short v_32 = vec_sl(v_1,
|
||||
(vector unsigned short)v_5);
|
||||
(vector unsigned short)v_5);
|
||||
/* middle energy */
|
||||
const vector signed short l3minusl6 = vec_sub(vb3, vb6);
|
||||
const vector signed short l5minusl4 = vec_sub(vb5, vb4);
|
||||
@ -483,22 +483,22 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext
|
||||
|
||||
const vector signed char neg1 = vec_splat_s8(-1);
|
||||
const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
|
||||
|
||||
#define STORE(i) \
|
||||
const vector unsigned char perms##i = \
|
||||
vec_lvsr(i * stride, src2); \
|
||||
const vector unsigned char vg##i = \
|
||||
vec_perm(st##i, vbT##i, permHH); \
|
||||
const vector unsigned char mask##i = \
|
||||
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \
|
||||
const vector unsigned char vg2##i = \
|
||||
vec_perm(vg##i, vg##i, perms##i); \
|
||||
const vector unsigned char svA##i = \
|
||||
vec_sel(vbA##i, vg2##i, mask##i); \
|
||||
const vector unsigned char svB##i = \
|
||||
vec_sel(vg2##i, vbB##i, mask##i); \
|
||||
vec_st(svA##i, i * stride, src2); \
|
||||
#define STORE(i) \
|
||||
const vector unsigned char perms##i = \
|
||||
vec_lvsr(i * stride, src2); \
|
||||
const vector unsigned char vg##i = \
|
||||
vec_perm(st##i, vbT##i, permHH); \
|
||||
const vector unsigned char mask##i = \
|
||||
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \
|
||||
const vector unsigned char vg2##i = \
|
||||
vec_perm(vg##i, vg##i, perms##i); \
|
||||
const vector unsigned char svA##i = \
|
||||
vec_sel(vbA##i, vg2##i, mask##i); \
|
||||
const vector unsigned char svB##i = \
|
||||
vec_sel(vg2##i, vbB##i, mask##i); \
|
||||
vec_st(svA##i, i * stride, src2); \
|
||||
vec_st(svB##i, i * stride + 16, src2)
|
||||
|
||||
STORE(4);
|
||||
@ -522,11 +522,11 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
||||
dt[0] = deringThreshold;
|
||||
v_dt = vec_splat(vec_ld(0, dt), 0);
|
||||
|
||||
#define LOAD_LINE(i) \
|
||||
const vector unsigned char perm##i = \
|
||||
vec_lvsl(i * stride, srcCopy); \
|
||||
vector unsigned char sA##i = vec_ld(i * stride, srcCopy); \
|
||||
vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy); \
|
||||
#define LOAD_LINE(i) \
|
||||
const vector unsigned char perm##i = \
|
||||
vec_lvsl(i * stride, srcCopy); \
|
||||
vector unsigned char sA##i = vec_ld(i * stride, srcCopy); \
|
||||
vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy); \
|
||||
vector unsigned char src##i = vec_perm(sA##i, sB##i, perm##i)
|
||||
|
||||
LOAD_LINE(0);
|
||||
@ -545,13 +545,13 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
||||
{
|
||||
const vector unsigned char trunc_perm = (vector unsigned char)
|
||||
AVV(0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18);
|
||||
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18);
|
||||
const vector unsigned char trunc_src12 = vec_perm(src1, src2, trunc_perm);
|
||||
const vector unsigned char trunc_src34 = vec_perm(src3, src4, trunc_perm);
|
||||
const vector unsigned char trunc_src56 = vec_perm(src5, src6, trunc_perm);
|
||||
const vector unsigned char trunc_src78 = vec_perm(src7, src8, trunc_perm);
|
||||
|
||||
#define EXTRACT(op) do { \
|
||||
#define EXTRACT(op) do { \
|
||||
const vector unsigned char s##op##_1 = vec_##op(trunc_src12, trunc_src34); \
|
||||
const vector unsigned char s##op##_2 = vec_##op(trunc_src56, trunc_src78); \
|
||||
const vector unsigned char s##op##_6 = vec_##op(s##op##_1, s##op##_2); \
|
||||
@ -584,29 +584,29 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
||||
{
|
||||
const vector unsigned short mask1 = (vector unsigned short)
|
||||
AVV(0x0001, 0x0002, 0x0004, 0x0008,
|
||||
0x0010, 0x0020, 0x0040, 0x0080);
|
||||
0x0010, 0x0020, 0x0040, 0x0080);
|
||||
const vector unsigned short mask2 = (vector unsigned short)
|
||||
AVV(0x0100, 0x0200, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000);
|
||||
0x0000, 0x0000, 0x0000, 0x0000);
|
||||
|
||||
const vector unsigned int vuint32_16 = vec_sl(vec_splat_u32(1), vec_splat_u32(4));
|
||||
const vector unsigned int vuint32_1 = vec_splat_u32(1);
|
||||
|
||||
#define COMPARE(i) \
|
||||
vector signed int sum##i; \
|
||||
do { \
|
||||
const vector unsigned char cmp##i = \
|
||||
(vector unsigned char)vec_cmpgt(src##i, v_avg); \
|
||||
const vector unsigned short cmpHi##i = \
|
||||
(vector unsigned short)vec_mergeh(cmp##i, cmp##i); \
|
||||
const vector unsigned short cmpLi##i = \
|
||||
(vector unsigned short)vec_mergel(cmp##i, cmp##i); \
|
||||
const vector signed short cmpHf##i = \
|
||||
(vector signed short)vec_and(cmpHi##i, mask1); \
|
||||
const vector signed short cmpLf##i = \
|
||||
(vector signed short)vec_and(cmpLi##i, mask2); \
|
||||
const vector signed int sump##i = vec_sum4s(cmpHf##i, zero); \
|
||||
const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i); \
|
||||
#define COMPARE(i) \
|
||||
vector signed int sum##i; \
|
||||
do { \
|
||||
const vector unsigned char cmp##i = \
|
||||
(vector unsigned char)vec_cmpgt(src##i, v_avg); \
|
||||
const vector unsigned short cmpHi##i = \
|
||||
(vector unsigned short)vec_mergeh(cmp##i, cmp##i); \
|
||||
const vector unsigned short cmpLi##i = \
|
||||
(vector unsigned short)vec_mergel(cmp##i, cmp##i); \
|
||||
const vector signed short cmpHf##i = \
|
||||
(vector signed short)vec_and(cmpHi##i, mask1); \
|
||||
const vector signed short cmpLf##i = \
|
||||
(vector signed short)vec_and(cmpLi##i, mask2); \
|
||||
const vector signed int sump##i = vec_sum4s(cmpHf##i, zero); \
|
||||
const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i); \
|
||||
sum##i = vec_sums(sumq##i, zero); } while (0)
|
||||
|
||||
COMPARE(0);
|
||||
@ -643,11 +643,11 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
||||
const vector signed int t2B = vec_or(sumB, tB);
|
||||
const vector signed int t2C = vec_or(sumC, tC);
|
||||
const vector signed int t3A = vec_and(vec_sra(t2A, vuint32_1),
|
||||
vec_sl(t2A, vuint32_1));
|
||||
vec_sl(t2A, vuint32_1));
|
||||
const vector signed int t3B = vec_and(vec_sra(t2B, vuint32_1),
|
||||
vec_sl(t2B, vuint32_1));
|
||||
vec_sl(t2B, vuint32_1));
|
||||
const vector signed int t3C = vec_and(vec_sra(t2C, vuint32_1),
|
||||
vec_sl(t2C, vuint32_1));
|
||||
vec_sl(t2C, vuint32_1));
|
||||
const vector signed int yA = vec_and(t2A, t3A);
|
||||
const vector signed int yB = vec_and(t2B, t3B);
|
||||
const vector signed int yC = vec_and(t2C, t3C);
|
||||
@ -659,15 +659,15 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
||||
const vector signed int sumBd4 = vec_perm(yB, yC, strangeperm1);
|
||||
const vector signed int sumBd8 = vec_perm(yB, yC, strangeperm2);
|
||||
const vector signed int sumAp = vec_and(yA,
|
||||
vec_and(sumAd4,sumAd8));
|
||||
vec_and(sumAd4,sumAd8));
|
||||
const vector signed int sumBp = vec_and(yB,
|
||||
vec_and(sumBd4,sumBd8));
|
||||
vec_and(sumBd4,sumBd8));
|
||||
sumA2 = vec_or(sumAp,
|
||||
vec_sra(sumAp,
|
||||
vuint32_16));
|
||||
vec_sra(sumAp,
|
||||
vuint32_16));
|
||||
sumB2 = vec_or(sumBp,
|
||||
vec_sra(sumBp,
|
||||
vuint32_16));
|
||||
vec_sra(sumBp,
|
||||
vuint32_16));
|
||||
}
|
||||
vec_st(sumA2, 0, S);
|
||||
vec_st(sumB2, 16, S);
|
||||
@ -686,84 +686,84 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
||||
|
||||
const vector unsigned char permA1 = (vector unsigned char)
|
||||
AVV(0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F,
|
||||
0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
|
||||
0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
|
||||
const vector unsigned char permA2 = (vector unsigned char)
|
||||
AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11,
|
||||
0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
|
||||
0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
|
||||
const vector unsigned char permA1inc = (vector unsigned char)
|
||||
AVV(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
||||
const vector unsigned char permA2inc = (vector unsigned char)
|
||||
AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
||||
const vector unsigned char magic = (vector unsigned char)
|
||||
AVV(0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
||||
const vector unsigned char extractPerm = (vector unsigned char)
|
||||
AVV(0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01,
|
||||
0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01);
|
||||
0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01);
|
||||
const vector unsigned char extractPermInc = (vector unsigned char)
|
||||
AVV(0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
|
||||
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01);
|
||||
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01);
|
||||
const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0);
|
||||
const vector unsigned char tenRight = (vector unsigned char)
|
||||
AVV(0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
||||
const vector unsigned char eightLeft = (vector unsigned char)
|
||||
AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08);
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08);
|
||||
|
||||
|
||||
#define F_INIT(i) \
|
||||
vector unsigned char tenRightM##i = tenRight; \
|
||||
vector unsigned char permA1M##i = permA1; \
|
||||
vector unsigned char permA2M##i = permA2; \
|
||||
#define F_INIT(i) \
|
||||
vector unsigned char tenRightM##i = tenRight; \
|
||||
vector unsigned char permA1M##i = permA1; \
|
||||
vector unsigned char permA2M##i = permA2; \
|
||||
vector unsigned char extractPermM##i = extractPerm
|
||||
|
||||
#define F2(i, j, k, l) \
|
||||
if (S[i] & (1 << (l+1))) { \
|
||||
const vector unsigned char a_##j##_A##l = \
|
||||
vec_perm(src##i, src##j, permA1M##i); \
|
||||
const vector unsigned char a_##j##_B##l = \
|
||||
vec_perm(a_##j##_A##l, src##k, permA2M##i); \
|
||||
const vector signed int a_##j##_sump##l = \
|
||||
(vector signed int)vec_msum(a_##j##_B##l, magic, \
|
||||
(vector unsigned int)zero); \
|
||||
vector signed int F_##j##_##l = \
|
||||
vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4); \
|
||||
F_##j##_##l = vec_splat(F_##j##_##l, 3); \
|
||||
const vector signed int p_##j##_##l = \
|
||||
(vector signed int)vec_perm(src##j, \
|
||||
(vector unsigned char)zero, \
|
||||
extractPermM##i); \
|
||||
const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2); \
|
||||
const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2); \
|
||||
vector signed int newpm_##j##_##l; \
|
||||
if (vec_all_lt(sum_##j##_##l, F_##j##_##l)) \
|
||||
newpm_##j##_##l = sum_##j##_##l; \
|
||||
else if (vec_all_gt(diff_##j##_##l, F_##j##_##l)) \
|
||||
newpm_##j##_##l = diff_##j##_##l; \
|
||||
else newpm_##j##_##l = F_##j##_##l; \
|
||||
const vector unsigned char newpm2_##j##_##l = \
|
||||
vec_splat((vector unsigned char)newpm_##j##_##l, 15); \
|
||||
const vector unsigned char mask##j##l = vec_add(identity, \
|
||||
tenRightM##i); \
|
||||
src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l); \
|
||||
} \
|
||||
permA1M##i = vec_add(permA1M##i, permA1inc); \
|
||||
permA2M##i = vec_add(permA2M##i, permA2inc); \
|
||||
tenRightM##i = vec_sro(tenRightM##i, eightLeft); \
|
||||
#define F2(i, j, k, l) \
|
||||
if (S[i] & (1 << (l+1))) { \
|
||||
const vector unsigned char a_##j##_A##l = \
|
||||
vec_perm(src##i, src##j, permA1M##i); \
|
||||
const vector unsigned char a_##j##_B##l = \
|
||||
vec_perm(a_##j##_A##l, src##k, permA2M##i); \
|
||||
const vector signed int a_##j##_sump##l = \
|
||||
(vector signed int)vec_msum(a_##j##_B##l, magic, \
|
||||
(vector unsigned int)zero); \
|
||||
vector signed int F_##j##_##l = \
|
||||
vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4); \
|
||||
F_##j##_##l = vec_splat(F_##j##_##l, 3); \
|
||||
const vector signed int p_##j##_##l = \
|
||||
(vector signed int)vec_perm(src##j, \
|
||||
(vector unsigned char)zero, \
|
||||
extractPermM##i); \
|
||||
const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2);\
|
||||
const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2);\
|
||||
vector signed int newpm_##j##_##l; \
|
||||
if (vec_all_lt(sum_##j##_##l, F_##j##_##l)) \
|
||||
newpm_##j##_##l = sum_##j##_##l; \
|
||||
else if (vec_all_gt(diff_##j##_##l, F_##j##_##l)) \
|
||||
newpm_##j##_##l = diff_##j##_##l; \
|
||||
else newpm_##j##_##l = F_##j##_##l; \
|
||||
const vector unsigned char newpm2_##j##_##l = \
|
||||
vec_splat((vector unsigned char)newpm_##j##_##l, 15); \
|
||||
const vector unsigned char mask##j##l = vec_add(identity, \
|
||||
tenRightM##i); \
|
||||
src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l); \
|
||||
} \
|
||||
permA1M##i = vec_add(permA1M##i, permA1inc); \
|
||||
permA2M##i = vec_add(permA2M##i, permA2inc); \
|
||||
tenRightM##i = vec_sro(tenRightM##i, eightLeft); \
|
||||
extractPermM##i = vec_add(extractPermM##i, extractPermInc)
|
||||
|
||||
#define ITER(i, j, k) \
|
||||
F_INIT(i); \
|
||||
F2(i, j, k, 0); \
|
||||
F2(i, j, k, 1); \
|
||||
F2(i, j, k, 2); \
|
||||
F2(i, j, k, 3); \
|
||||
F2(i, j, k, 4); \
|
||||
F2(i, j, k, 5); \
|
||||
F2(i, j, k, 6); \
|
||||
#define ITER(i, j, k) \
|
||||
F_INIT(i); \
|
||||
F2(i, j, k, 0); \
|
||||
F2(i, j, k, 1); \
|
||||
F2(i, j, k, 2); \
|
||||
F2(i, j, k, 3); \
|
||||
F2(i, j, k, 4); \
|
||||
F2(i, j, k, 5); \
|
||||
F2(i, j, k, 6); \
|
||||
F2(i, j, k, 7)
|
||||
|
||||
ITER(0, 1, 2);
|
||||
@ -777,16 +777,16 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
||||
|
||||
const vector signed char neg1 = vec_splat_s8(-1);
|
||||
|
||||
#define STORE_LINE(i) \
|
||||
const vector unsigned char permST##i = \
|
||||
vec_lvsr(i * stride, srcCopy); \
|
||||
const vector unsigned char maskST##i = \
|
||||
vec_perm((vector unsigned char)zero, \
|
||||
(vector unsigned char)neg1, permST##i); \
|
||||
src##i = vec_perm(src##i ,src##i, permST##i); \
|
||||
sA##i= vec_sel(sA##i, src##i, maskST##i); \
|
||||
sB##i= vec_sel(src##i, sB##i, maskST##i); \
|
||||
vec_st(sA##i, i * stride, srcCopy); \
|
||||
#define STORE_LINE(i) \
|
||||
const vector unsigned char permST##i = \
|
||||
vec_lvsr(i * stride, srcCopy); \
|
||||
const vector unsigned char maskST##i = \
|
||||
vec_perm((vector unsigned char)zero, \
|
||||
(vector unsigned char)neg1, permST##i); \
|
||||
src##i = vec_perm(src##i ,src##i, permST##i); \
|
||||
sA##i= vec_sel(sA##i, src##i, maskST##i); \
|
||||
sB##i= vec_sel(src##i, sB##i, maskST##i); \
|
||||
vec_st(sA##i, i * stride, srcCopy); \
|
||||
vec_st(sB##i, i * stride + 16, srcCopy)
|
||||
|
||||
STORE_LINE(1);
|
||||
@ -808,7 +808,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
||||
#define do_a_deblock_altivec(a...) do_a_deblock_C(a)
|
||||
|
||||
static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
||||
uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
|
||||
uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
|
||||
{
|
||||
const vector signed int zero = vec_splat_s32(0);
|
||||
const vector signed short vsint16_1 = vec_splat_s16(1);
|
||||
@ -820,16 +820,16 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
||||
tempBluredPast[128]= maxNoise[1];
|
||||
tempBluredPast[129]= maxNoise[2];
|
||||
|
||||
#define LOAD_LINE(src, i) \
|
||||
register int j##src##i = i * stride; \
|
||||
vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \
|
||||
const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \
|
||||
#define LOAD_LINE(src, i) \
|
||||
register int j##src##i = i * stride; \
|
||||
vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \
|
||||
const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \
|
||||
const vector unsigned char v_##src##A2##i = vec_ld(j##src##i + 16, src); \
|
||||
const vector unsigned char v_##src##A##i = \
|
||||
vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i); \
|
||||
vector signed short v_##src##Ass##i = \
|
||||
(vector signed short)vec_mergeh((vector signed char)zero, \
|
||||
(vector signed char)v_##src##A##i)
|
||||
const vector unsigned char v_##src##A##i = \
|
||||
vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i); \
|
||||
vector signed short v_##src##Ass##i = \
|
||||
(vector signed short)vec_mergeh((vector signed char)zero, \
|
||||
(vector signed char)v_##src##A##i)
|
||||
|
||||
LOAD_LINE(src, 0);
|
||||
LOAD_LINE(src, 1);
|
||||
@ -850,10 +850,10 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
||||
LOAD_LINE(tempBlured, 7);
|
||||
#undef LOAD_LINE
|
||||
|
||||
#define ACCUMULATE_DIFFS(i) \
|
||||
vector signed short v_d##i = vec_sub(v_tempBluredAss##i, \
|
||||
v_srcAss##i); \
|
||||
v_dp = vec_msums(v_d##i, v_d##i, v_dp); \
|
||||
#define ACCUMULATE_DIFFS(i) \
|
||||
vector signed short v_d##i = vec_sub(v_tempBluredAss##i, \
|
||||
v_srcAss##i); \
|
||||
v_dp = vec_msums(v_d##i, v_d##i, v_dp); \
|
||||
v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp)
|
||||
|
||||
ACCUMULATE_DIFFS(0);
|
||||
@ -916,12 +916,12 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
||||
const vector signed short vsint16_4 = vec_splat_s16(4);
|
||||
const vector unsigned short vuint16_3 = vec_splat_u16(3);
|
||||
|
||||
#define OP(i) \
|
||||
const vector signed short v_temp##i = \
|
||||
vec_mladd(v_tempBluredAss##i, \
|
||||
vsint16_7, v_srcAss##i); \
|
||||
const vector signed short v_temp2##i = \
|
||||
vec_add(v_temp##i, vsint16_4); \
|
||||
#define OP(i) \
|
||||
const vector signed short v_temp##i = \
|
||||
vec_mladd(v_tempBluredAss##i, \
|
||||
vsint16_7, v_srcAss##i); \
|
||||
const vector signed short v_temp2##i = \
|
||||
vec_add(v_temp##i, vsint16_4); \
|
||||
v_tempBluredAss##i = vec_sr(v_temp2##i, vuint16_3)
|
||||
|
||||
OP(0);
|
||||
@ -937,12 +937,12 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
||||
const vector signed short vsint16_3 = vec_splat_s16(3);
|
||||
const vector signed short vsint16_2 = vec_splat_s16(2);
|
||||
|
||||
#define OP(i) \
|
||||
const vector signed short v_temp##i = \
|
||||
vec_mladd(v_tempBluredAss##i, \
|
||||
vsint16_3, v_srcAss##i); \
|
||||
const vector signed short v_temp2##i = \
|
||||
vec_add(v_temp##i, vsint16_2); \
|
||||
#define OP(i) \
|
||||
const vector signed short v_temp##i = \
|
||||
vec_mladd(v_tempBluredAss##i, \
|
||||
vsint16_3, v_srcAss##i); \
|
||||
const vector signed short v_temp2##i = \
|
||||
vec_add(v_temp##i, vsint16_2); \
|
||||
v_tempBluredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2)
|
||||
|
||||
OP(0);
|
||||
@ -959,24 +959,24 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
||||
|
||||
const vector signed char neg1 = vec_splat_s8(-1);
|
||||
const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
|
||||
|
||||
#define PACK_AND_STORE(src, i) \
|
||||
const vector unsigned char perms##src##i = \
|
||||
vec_lvsr(i * stride, src); \
|
||||
const vector unsigned char vf##src##i = \
|
||||
vec_packsu(v_tempBluredAss##i, (vector signed short)zero); \
|
||||
const vector unsigned char vg##src##i = \
|
||||
vec_perm(vf##src##i, v_##src##A##i, permHH); \
|
||||
const vector unsigned char mask##src##i = \
|
||||
#define PACK_AND_STORE(src, i) \
|
||||
const vector unsigned char perms##src##i = \
|
||||
vec_lvsr(i * stride, src); \
|
||||
const vector unsigned char vf##src##i = \
|
||||
vec_packsu(v_tempBluredAss##i, (vector signed short)zero); \
|
||||
const vector unsigned char vg##src##i = \
|
||||
vec_perm(vf##src##i, v_##src##A##i, permHH); \
|
||||
const vector unsigned char mask##src##i = \
|
||||
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \
|
||||
const vector unsigned char vg2##src##i = \
|
||||
vec_perm(vg##src##i, vg##src##i, perms##src##i); \
|
||||
const vector unsigned char svA##src##i = \
|
||||
vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i); \
|
||||
const vector unsigned char svB##src##i = \
|
||||
vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \
|
||||
vec_st(svA##src##i, i * stride, src); \
|
||||
const vector unsigned char vg2##src##i = \
|
||||
vec_perm(vg##src##i, vg##src##i, perms##src##i); \
|
||||
const vector unsigned char svA##src##i = \
|
||||
vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i); \
|
||||
const vector unsigned char svB##src##i = \
|
||||
vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \
|
||||
vec_st(svA##src##i, i * stride, src); \
|
||||
vec_st(svB##src##i, i * stride + 16, src)
|
||||
|
||||
PACK_AND_STORE(src, 0);
|
||||
@ -1001,14 +1001,14 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
||||
static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) {
|
||||
const vector unsigned char zero = vec_splat_u8(0);
|
||||
|
||||
#define LOAD_DOUBLE_LINE(i, j) \
|
||||
vector unsigned char perm1##i = vec_lvsl(i * stride, src); \
|
||||
vector unsigned char perm2##i = vec_lvsl(j * stride, src); \
|
||||
vector unsigned char srcA##i = vec_ld(i * stride, src); \
|
||||
#define LOAD_DOUBLE_LINE(i, j) \
|
||||
vector unsigned char perm1##i = vec_lvsl(i * stride, src); \
|
||||
vector unsigned char perm2##i = vec_lvsl(j * stride, src); \
|
||||
vector unsigned char srcA##i = vec_ld(i * stride, src); \
|
||||
vector unsigned char srcB##i = vec_ld(i * stride + 16, src); \
|
||||
vector unsigned char srcC##i = vec_ld(j * stride, src); \
|
||||
vector unsigned char srcC##i = vec_ld(j * stride, src); \
|
||||
vector unsigned char srcD##i = vec_ld(j * stride+ 16, src); \
|
||||
vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i); \
|
||||
vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i); \
|
||||
vector unsigned char src##j = vec_perm(srcC##i, srcD##i, perm2##i)
|
||||
|
||||
LOAD_DOUBLE_LINE(0, 1);
|
||||
@ -1107,10 +1107,10 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds
|
||||
const vector unsigned char zero = vec_splat_u8(0);
|
||||
const vector unsigned char magic_perm = (const vector unsigned char)
|
||||
AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
|
||||
|
||||
#define LOAD_DOUBLE_LINE(i, j) \
|
||||
vector unsigned char src##i = vec_ld(i * 16, src); \
|
||||
#define LOAD_DOUBLE_LINE(i, j) \
|
||||
vector unsigned char src##i = vec_ld(i * 16, src); \
|
||||
vector unsigned char src##j = vec_ld(j * 16, src)
|
||||
|
||||
LOAD_DOUBLE_LINE(0, 1);
|
||||
@ -1169,24 +1169,24 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds
|
||||
|
||||
|
||||
const vector signed char neg1 = vec_splat_s8(-1);
|
||||
#define STORE_DOUBLE_LINE(i, j) \
|
||||
vector unsigned char dstA##i = vec_ld(i * stride, dst); \
|
||||
vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \
|
||||
vector unsigned char dstA##j = vec_ld(j * stride, dst); \
|
||||
vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst); \
|
||||
vector unsigned char align##i = vec_lvsr(i * stride, dst); \
|
||||
vector unsigned char align##j = vec_lvsr(j * stride, dst); \
|
||||
vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \
|
||||
vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \
|
||||
vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i); \
|
||||
vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j); \
|
||||
vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \
|
||||
vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \
|
||||
vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \
|
||||
vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j); \
|
||||
vec_st(dstAF##i, i * stride, dst); \
|
||||
vec_st(dstBF##i, i * stride + 16, dst); \
|
||||
vec_st(dstAF##j, j * stride, dst); \
|
||||
#define STORE_DOUBLE_LINE(i, j) \
|
||||
vector unsigned char dstA##i = vec_ld(i * stride, dst); \
|
||||
vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \
|
||||
vector unsigned char dstA##j = vec_ld(j * stride, dst); \
|
||||
vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst); \
|
||||
vector unsigned char align##i = vec_lvsr(i * stride, dst); \
|
||||
vector unsigned char align##j = vec_lvsr(j * stride, dst); \
|
||||
vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \
|
||||
vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \
|
||||
vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i); \
|
||||
vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j); \
|
||||
vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \
|
||||
vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \
|
||||
vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \
|
||||
vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j); \
|
||||
vec_st(dstAF##i, i * stride, dst); \
|
||||
vec_st(dstBF##i, i * stride + 16, dst); \
|
||||
vec_st(dstAF##j, j * stride, dst); \
|
||||
vec_st(dstBF##j, j * stride + 16, dst)
|
||||
|
||||
STORE_DOUBLE_LINE(0,1);
|
||||
|
@ -21,42 +21,42 @@
|
||||
* internal api header.
|
||||
*/
|
||||
|
||||
#define V_DEBLOCK 0x01
|
||||
#define H_DEBLOCK 0x02
|
||||
#define DERING 0x04
|
||||
#define LEVEL_FIX 0x08 ///< Brightness & Contrast
|
||||
#define V_DEBLOCK 0x01
|
||||
#define H_DEBLOCK 0x02
|
||||
#define DERING 0x04
|
||||
#define LEVEL_FIX 0x08 ///< Brightness & Contrast
|
||||
|
||||
#define LUM_V_DEBLOCK V_DEBLOCK // 1
|
||||
#define LUM_H_DEBLOCK H_DEBLOCK // 2
|
||||
#define CHROM_V_DEBLOCK (V_DEBLOCK<<4) // 16
|
||||
#define CHROM_H_DEBLOCK (H_DEBLOCK<<4) // 32
|
||||
#define LUM_DERING DERING // 4
|
||||
#define CHROM_DERING (DERING<<4) // 64
|
||||
#define LUM_LEVEL_FIX LEVEL_FIX // 8
|
||||
#define CHROM_LEVEL_FIX (LEVEL_FIX<<4) // 128 (not implemented yet)
|
||||
#define LUM_V_DEBLOCK V_DEBLOCK // 1
|
||||
#define LUM_H_DEBLOCK H_DEBLOCK // 2
|
||||
#define CHROM_V_DEBLOCK (V_DEBLOCK<<4) // 16
|
||||
#define CHROM_H_DEBLOCK (H_DEBLOCK<<4) // 32
|
||||
#define LUM_DERING DERING // 4
|
||||
#define CHROM_DERING (DERING<<4) // 64
|
||||
#define LUM_LEVEL_FIX LEVEL_FIX // 8
|
||||
#define CHROM_LEVEL_FIX (LEVEL_FIX<<4) // 128 (not implemented yet)
|
||||
|
||||
// Experimental vertical filters
|
||||
#define V_X1_FILTER 0x0200 // 512
|
||||
#define V_A_DEBLOCK 0x0400
|
||||
#define V_X1_FILTER 0x0200 // 512
|
||||
#define V_A_DEBLOCK 0x0400
|
||||
|
||||
// Experimental horizontal filters
|
||||
#define H_X1_FILTER 0x2000 // 8192
|
||||
#define H_A_DEBLOCK 0x4000
|
||||
#define H_X1_FILTER 0x2000 // 8192
|
||||
#define H_A_DEBLOCK 0x4000
|
||||
|
||||
/// select between full y range (255-0) or standart one (234-16)
|
||||
#define FULL_Y_RANGE 0x8000 // 32768
|
||||
#define FULL_Y_RANGE 0x8000 // 32768
|
||||
|
||||
//Deinterlacing Filters
|
||||
#define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536
|
||||
#define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072
|
||||
#define CUBIC_BLEND_DEINT_FILTER 0x8000 // (not implemented yet)
|
||||
#define CUBIC_IPOL_DEINT_FILTER 0x40000 // 262144
|
||||
#define MEDIAN_DEINT_FILTER 0x80000 // 524288
|
||||
#define FFMPEG_DEINT_FILTER 0x400000
|
||||
#define LOWPASS5_DEINT_FILTER 0x800000
|
||||
#define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536
|
||||
#define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072
|
||||
#define CUBIC_BLEND_DEINT_FILTER 0x8000 // (not implemented yet)
|
||||
#define CUBIC_IPOL_DEINT_FILTER 0x40000 // 262144
|
||||
#define MEDIAN_DEINT_FILTER 0x80000 // 524288
|
||||
#define FFMPEG_DEINT_FILTER 0x400000
|
||||
#define LOWPASS5_DEINT_FILTER 0x800000
|
||||
|
||||
#define TEMP_NOISE_FILTER 0x100000
|
||||
#define FORCE_QUANT 0x200000
|
||||
#define TEMP_NOISE_FILTER 0x100000
|
||||
#define FORCE_QUANT 0x200000
|
||||
|
||||
//use if u want a faster postprocessing code
|
||||
//cant differentiate between chroma & luma filters (both on or both off)
|
||||
@ -66,8 +66,8 @@
|
||||
|
||||
#if 1
|
||||
static inline int CLIP(int a){
|
||||
if(a&256) return ((a)>>31)^(-1);
|
||||
else return a;
|
||||
if(a&256) return ((a)>>31)^(-1);
|
||||
else return a;
|
||||
}
|
||||
//#define CLIP(a) (((a)&256) ? ((a)>>31)^(-1) : (a))
|
||||
#elif 0
|
||||
@ -79,92 +79,92 @@ static inline int CLIP(int a){
|
||||
* Postprocessng filter.
|
||||
*/
|
||||
struct PPFilter{
|
||||
char *shortName;
|
||||
char *longName;
|
||||
int chromDefault; ///< is chrominance filtering on by default if this filter is manually activated
|
||||
int minLumQuality; ///< minimum quality to turn luminance filtering on
|
||||
int minChromQuality; ///< minimum quality to turn chrominance filtering on
|
||||
int mask; ///< Bitmask to turn this filter on
|
||||
char *shortName;
|
||||
char *longName;
|
||||
int chromDefault; ///< is chrominance filtering on by default if this filter is manually activated
|
||||
int minLumQuality; ///< minimum quality to turn luminance filtering on
|
||||
int minChromQuality; ///< minimum quality to turn chrominance filtering on
|
||||
int mask; ///< Bitmask to turn this filter on
|
||||
};
|
||||
|
||||
/**
|
||||
* Postprocessng mode.
|
||||
*/
|
||||
typedef struct PPMode{
|
||||
int lumMode; ///< acivates filters for luminance
|
||||
int chromMode; ///< acivates filters for chrominance
|
||||
int error; ///< non zero on error
|
||||
int lumMode; ///< acivates filters for luminance
|
||||
int chromMode; ///< acivates filters for chrominance
|
||||
int error; ///< non zero on error
|
||||
|
||||
int minAllowedY; ///< for brigtness correction
|
||||
int maxAllowedY; ///< for brihtness correction
|
||||
float maxClippedThreshold; ///< amount of "black" u r willing to loose to get a brightness corrected picture
|
||||
int minAllowedY; ///< for brigtness correction
|
||||
int maxAllowedY; ///< for brihtness correction
|
||||
float maxClippedThreshold; ///< amount of "black" u r willing to loose to get a brightness corrected picture
|
||||
|
||||
int maxTmpNoise[3]; ///< for Temporal Noise Reducing filter (Maximal sum of abs differences)
|
||||
int maxTmpNoise[3]; ///< for Temporal Noise Reducing filter (Maximal sum of abs differences)
|
||||
|
||||
int baseDcDiff;
|
||||
int flatnessThreshold;
|
||||
int baseDcDiff;
|
||||
int flatnessThreshold;
|
||||
|
||||
int forcedQuant; ///< quantizer if FORCE_QUANT is used
|
||||
int forcedQuant; ///< quantizer if FORCE_QUANT is used
|
||||
} PPMode;
|
||||
|
||||
/**
|
||||
* postprocess context.
|
||||
*/
|
||||
typedef struct PPContext{
|
||||
uint8_t *tempBlocks; ///<used for the horizontal code
|
||||
uint8_t *tempBlocks; ///<used for the horizontal code
|
||||
|
||||
/**
|
||||
* luma histogram.
|
||||
* we need 64bit here otherwise we'll going to have a problem
|
||||
* after watching a black picture for 5 hours
|
||||
*/
|
||||
uint64_t *yHistogram;
|
||||
/**
|
||||
* luma histogram.
|
||||
* we need 64bit here otherwise we'll going to have a problem
|
||||
* after watching a black picture for 5 hours
|
||||
*/
|
||||
uint64_t *yHistogram;
|
||||
|
||||
uint64_t __attribute__((aligned(8))) packedYOffset;
|
||||
uint64_t __attribute__((aligned(8))) packedYScale;
|
||||
uint64_t __attribute__((aligned(8))) packedYOffset;
|
||||
uint64_t __attribute__((aligned(8))) packedYScale;
|
||||
|
||||
/** Temporal noise reducing buffers */
|
||||
uint8_t *tempBlured[3];
|
||||
int32_t *tempBluredPast[3];
|
||||
/** Temporal noise reducing buffers */
|
||||
uint8_t *tempBlured[3];
|
||||
int32_t *tempBluredPast[3];
|
||||
|
||||
/** Temporary buffers for handling the last row(s) */
|
||||
uint8_t *tempDst;
|
||||
uint8_t *tempSrc;
|
||||
/** Temporary buffers for handling the last row(s) */
|
||||
uint8_t *tempDst;
|
||||
uint8_t *tempSrc;
|
||||
|
||||
uint8_t *deintTemp;
|
||||
uint8_t *deintTemp;
|
||||
|
||||
uint64_t __attribute__((aligned(8))) pQPb;
|
||||
uint64_t __attribute__((aligned(8))) pQPb2;
|
||||
uint64_t __attribute__((aligned(8))) pQPb;
|
||||
uint64_t __attribute__((aligned(8))) pQPb2;
|
||||
|
||||
uint64_t __attribute__((aligned(8))) mmxDcOffset[64];
|
||||
uint64_t __attribute__((aligned(8))) mmxDcThreshold[64];
|
||||
uint64_t __attribute__((aligned(8))) mmxDcOffset[64];
|
||||
uint64_t __attribute__((aligned(8))) mmxDcThreshold[64];
|
||||
|
||||
QP_STORE_T *stdQPTable; ///< used to fix MPEG2 style qscale
|
||||
QP_STORE_T *nonBQPTable;
|
||||
QP_STORE_T *forcedQPTable;
|
||||
QP_STORE_T *stdQPTable; ///< used to fix MPEG2 style qscale
|
||||
QP_STORE_T *nonBQPTable;
|
||||
QP_STORE_T *forcedQPTable;
|
||||
|
||||
int QP;
|
||||
int nonBQP;
|
||||
int QP;
|
||||
int nonBQP;
|
||||
|
||||
int frameNum;
|
||||
int frameNum;
|
||||
|
||||
int cpuCaps;
|
||||
int cpuCaps;
|
||||
|
||||
int qpStride; ///<size of qp buffers (needed to realloc them if needed)
|
||||
int stride; ///<size of some buffers (needed to realloc them if needed)
|
||||
int qpStride; ///<size of qp buffers (needed to realloc them if needed)
|
||||
int stride; ///<size of some buffers (needed to realloc them if needed)
|
||||
|
||||
int hChromaSubSample;
|
||||
int vChromaSubSample;
|
||||
int hChromaSubSample;
|
||||
int vChromaSubSample;
|
||||
|
||||
PPMode ppMode;
|
||||
PPMode ppMode;
|
||||
} PPContext;
|
||||
|
||||
|
||||
static inline void linecpy(void *dest, void *src, int lines, int stride)
|
||||
{
|
||||
if (stride > 0) {
|
||||
memcpy(dest, src, lines*stride);
|
||||
} else {
|
||||
memcpy(dest+(lines-1)*stride, src+(lines-1)*stride, -lines*stride);
|
||||
}
|
||||
if (stride > 0) {
|
||||
memcpy(dest, src, lines*stride);
|
||||
} else {
|
||||
memcpy(dest+(lines-1)*stride, src+(lines-1)*stride, -lines*stride);
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -64,8 +64,8 @@ void *av_malloc(unsigned int size)
|
||||
Indeed, we should align it:
|
||||
on 4 for 386
|
||||
on 16 for 486
|
||||
on 32 for 586, PPro - k6-III
|
||||
on 64 for K7 (maybe for P3 too).
|
||||
on 32 for 586, PPro - k6-III
|
||||
on 64 for K7 (maybe for P3 too).
|
||||
Because L1 and L2 caches are aligned on those values.
|
||||
But I don't want to code such logic here!
|
||||
*/
|
||||
@ -76,13 +76,13 @@ void *av_malloc(unsigned int size)
|
||||
Why not larger? because i didnt see a difference in benchmarks ...
|
||||
*/
|
||||
/* benchmarks with p3
|
||||
memalign(64)+1 3071,3051,3032
|
||||
memalign(64)+2 3051,3032,3041
|
||||
memalign(64)+4 2911,2896,2915
|
||||
memalign(64)+8 2545,2554,2550
|
||||
memalign(64)+16 2543,2572,2563
|
||||
memalign(64)+32 2546,2545,2571
|
||||
memalign(64)+64 2570,2533,2558
|
||||
memalign(64)+1 3071,3051,3032
|
||||
memalign(64)+2 3051,3032,3041
|
||||
memalign(64)+4 2911,2896,2915
|
||||
memalign(64)+8 2545,2554,2550
|
||||
memalign(64)+16 2543,2572,2563
|
||||
memalign(64)+32 2546,2545,2571
|
||||
memalign(64)+64 2570,2533,2558
|
||||
|
||||
btw, malloc seems to do 8 byte alignment by default here
|
||||
*/
|
||||
|
@ -54,26 +54,26 @@ typedef struct MJpegContext {
|
||||
/* JPEG marker codes */
|
||||
typedef enum {
|
||||
/* start of frame */
|
||||
SOF0 = 0xc0, /* baseline */
|
||||
SOF1 = 0xc1, /* extended sequential, huffman */
|
||||
SOF2 = 0xc2, /* progressive, huffman */
|
||||
SOF3 = 0xc3, /* lossless, huffman */
|
||||
SOF0 = 0xc0, /* baseline */
|
||||
SOF1 = 0xc1, /* extended sequential, huffman */
|
||||
SOF2 = 0xc2, /* progressive, huffman */
|
||||
SOF3 = 0xc3, /* lossless, huffman */
|
||||
|
||||
SOF5 = 0xc5, /* differential sequential, huffman */
|
||||
SOF6 = 0xc6, /* differential progressive, huffman */
|
||||
SOF7 = 0xc7, /* differential lossless, huffman */
|
||||
JPG = 0xc8, /* reserved for JPEG extension */
|
||||
SOF9 = 0xc9, /* extended sequential, arithmetic */
|
||||
SOF10 = 0xca, /* progressive, arithmetic */
|
||||
SOF11 = 0xcb, /* lossless, arithmetic */
|
||||
SOF5 = 0xc5, /* differential sequential, huffman */
|
||||
SOF6 = 0xc6, /* differential progressive, huffman */
|
||||
SOF7 = 0xc7, /* differential lossless, huffman */
|
||||
JPG = 0xc8, /* reserved for JPEG extension */
|
||||
SOF9 = 0xc9, /* extended sequential, arithmetic */
|
||||
SOF10 = 0xca, /* progressive, arithmetic */
|
||||
SOF11 = 0xcb, /* lossless, arithmetic */
|
||||
|
||||
SOF13 = 0xcd, /* differential sequential, arithmetic */
|
||||
SOF14 = 0xce, /* differential progressive, arithmetic */
|
||||
SOF15 = 0xcf, /* differential lossless, arithmetic */
|
||||
SOF13 = 0xcd, /* differential sequential, arithmetic */
|
||||
SOF14 = 0xce, /* differential progressive, arithmetic */
|
||||
SOF15 = 0xcf, /* differential lossless, arithmetic */
|
||||
|
||||
DHT = 0xc4, /* define huffman tables */
|
||||
DHT = 0xc4, /* define huffman tables */
|
||||
|
||||
DAC = 0xcc, /* define arithmetic-coding conditioning */
|
||||
DAC = 0xcc, /* define arithmetic-coding conditioning */
|
||||
|
||||
/* restart with modulo 8 count "m" */
|
||||
RST0 = 0xd0,
|
||||
@ -85,14 +85,14 @@ typedef enum {
|
||||
RST6 = 0xd6,
|
||||
RST7 = 0xd7,
|
||||
|
||||
SOI = 0xd8, /* start of image */
|
||||
EOI = 0xd9, /* end of image */
|
||||
SOS = 0xda, /* start of scan */
|
||||
DQT = 0xdb, /* define quantization tables */
|
||||
DNL = 0xdc, /* define number of lines */
|
||||
DRI = 0xdd, /* define restart interval */
|
||||
DHP = 0xde, /* define hierarchical progression */
|
||||
EXP = 0xdf, /* expand reference components */
|
||||
SOI = 0xd8, /* start of image */
|
||||
EOI = 0xd9, /* end of image */
|
||||
SOS = 0xda, /* start of scan */
|
||||
DQT = 0xdb, /* define quantization tables */
|
||||
DNL = 0xdc, /* define number of lines */
|
||||
DRI = 0xdd, /* define restart interval */
|
||||
DHP = 0xde, /* define hierarchical progression */
|
||||
EXP = 0xdf, /* expand reference components */
|
||||
|
||||
APP0 = 0xe0,
|
||||
APP1 = 0xe1,
|
||||
@ -118,17 +118,17 @@ typedef enum {
|
||||
JPG4 = 0xf4,
|
||||
JPG5 = 0xf5,
|
||||
JPG6 = 0xf6,
|
||||
SOF48 = 0xf7, ///< JPEG-LS
|
||||
LSE = 0xf8, ///< JPEG-LS extension parameters
|
||||
SOF48 = 0xf7, ///< JPEG-LS
|
||||
LSE = 0xf8, ///< JPEG-LS extension parameters
|
||||
JPG9 = 0xf9,
|
||||
JPG10 = 0xfa,
|
||||
JPG11 = 0xfb,
|
||||
JPG12 = 0xfc,
|
||||
JPG13 = 0xfd,
|
||||
|
||||
COM = 0xfe, /* comment */
|
||||
COM = 0xfe, /* comment */
|
||||
|
||||
TEM = 0x01, /* temporary private use for arithmetic coding */
|
||||
TEM = 0x01, /* temporary private use for arithmetic coding */
|
||||
|
||||
/* 0x02 -> 0xbf reserved */
|
||||
} JPEG_MARKER;
|
||||
@ -583,7 +583,7 @@ void mjpeg_picture_trailer(MpegEncContext *s)
|
||||
}
|
||||
|
||||
static inline void mjpeg_encode_dc(MpegEncContext *s, int val,
|
||||
uint8_t *huff_size, uint16_t *huff_code)
|
||||
uint8_t *huff_size, uint16_t *huff_code)
|
||||
{
|
||||
int mant, nbits;
|
||||
|
||||
@ -935,10 +935,10 @@ static int mjpeg_decode_init(AVCodecContext *avctx)
|
||||
|
||||
if (avctx->flags & CODEC_FLAG_EXTERN_HUFF)
|
||||
{
|
||||
av_log(avctx, AV_LOG_INFO, "mjpeg: using external huffman table\n");
|
||||
init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8);
|
||||
mjpeg_decode_dht(s);
|
||||
/* should check for error - but dunno */
|
||||
av_log(avctx, AV_LOG_INFO, "mjpeg: using external huffman table\n");
|
||||
init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8);
|
||||
mjpeg_decode_dht(s);
|
||||
/* should check for error - but dunno */
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -1017,10 +1017,10 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s)
|
||||
while (len >= 65) {
|
||||
/* only 8 bit precision handled */
|
||||
if (get_bits(&s->gb, 4) != 0)
|
||||
{
|
||||
dprintf("dqt: 16bit precision\n");
|
||||
{
|
||||
dprintf("dqt: 16bit precision\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
index = get_bits(&s->gb, 4);
|
||||
if (index >= 4)
|
||||
return -1;
|
||||
@ -1028,14 +1028,14 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s)
|
||||
/* read quant table */
|
||||
for(i=0;i<64;i++) {
|
||||
j = s->scantable.permutated[i];
|
||||
s->quant_matrixes[index][j] = get_bits(&s->gb, 8);
|
||||
s->quant_matrixes[index][j] = get_bits(&s->gb, 8);
|
||||
}
|
||||
|
||||
//XXX FIXME finetune, and perhaps add dc too
|
||||
s->qscale[index]= FFMAX(
|
||||
s->quant_matrixes[index][s->scantable.permutated[1]],
|
||||
s->quant_matrixes[index][s->scantable.permutated[8]]) >> 1;
|
||||
dprintf("qscale[%d]: %d\n", index, s->qscale[index]);
|
||||
dprintf("qscale[%d]: %d\n", index, s->qscale[index]);
|
||||
len -= 65;
|
||||
}
|
||||
|
||||
@ -1132,7 +1132,7 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s)
|
||||
if (s->quant_index[i] >= 4)
|
||||
return -1;
|
||||
dprintf("component %d %d:%d id: %d quant:%d\n", i, s->h_count[i],
|
||||
s->v_count[i], s->component_id[i], s->quant_index[i]);
|
||||
s->v_count[i], s->component_id[i], s->quant_index[i]);
|
||||
}
|
||||
|
||||
if(s->v_max==1 && s->h_max==1 && s->lossless==1) s->rgb=1;
|
||||
@ -1151,7 +1151,7 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s)
|
||||
s->org_height != 0 &&
|
||||
s->height < ((s->org_height * 3) / 4)) {
|
||||
s->interlaced = 1;
|
||||
// s->bottom_field = (s->interlace_polarity) ? 1 : 0;
|
||||
// s->bottom_field = (s->interlace_polarity) ? 1 : 0;
|
||||
s->bottom_field = 0;
|
||||
s->avctx->height *= 2;
|
||||
}
|
||||
@ -1202,7 +1202,7 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s)
|
||||
|
||||
if (len != (8+(3*nb_components)))
|
||||
{
|
||||
dprintf("decode_sof0: error, len(%d) mismatch\n", len);
|
||||
dprintf("decode_sof0: error, len(%d) mismatch\n", len);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -1214,7 +1214,7 @@ static inline int mjpeg_decode_dc(MJpegDecodeContext *s, int dc_index)
|
||||
code = get_vlc2(&s->gb, s->vlcs[0][dc_index].table, 9, 2);
|
||||
if (code < 0)
|
||||
{
|
||||
dprintf("mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index,
|
||||
dprintf("mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index,
|
||||
&s->vlcs[0][dc_index]);
|
||||
return 0xffff;
|
||||
}
|
||||
@ -1247,7 +1247,7 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block,
|
||||
ac_vlc = &s->vlcs[1][ac_index];
|
||||
i = 1;
|
||||
for(;;) {
|
||||
code = get_vlc2(&s->gb, s->vlcs[1][ac_index].table, 9, 2);
|
||||
code = get_vlc2(&s->gb, s->vlcs[1][ac_index].table, 9, 2);
|
||||
|
||||
if (code < 0) {
|
||||
dprintf("error ac\n");
|
||||
@ -1452,7 +1452,7 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s){
|
||||
dprintf("error y=%d x=%d\n", mb_y, mb_x);
|
||||
return -1;
|
||||
}
|
||||
// dprintf("mb: %d %d processed\n", mb_y, mb_x);
|
||||
// dprintf("mb: %d %d processed\n", mb_y, mb_x);
|
||||
ptr = s->picture.data[c] +
|
||||
(((s->linesize[c] * (v * mb_y + y) * 8) +
|
||||
(h * mb_x + x) * 8) >> s->avctx->lowres);
|
||||
@ -1491,29 +1491,29 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s)
|
||||
nb_components = get_bits(&s->gb, 8);
|
||||
if (len != 6+2*nb_components)
|
||||
{
|
||||
dprintf("decode_sos: invalid len (%d)\n", len);
|
||||
return -1;
|
||||
dprintf("decode_sos: invalid len (%d)\n", len);
|
||||
return -1;
|
||||
}
|
||||
/* XXX: only interleaved scan accepted */
|
||||
if (nb_components != s->nb_components)
|
||||
{
|
||||
dprintf("decode_sos: components(%d) mismatch\n", nb_components);
|
||||
dprintf("decode_sos: components(%d) mismatch\n", nb_components);
|
||||
return -1;
|
||||
}
|
||||
vmax = 0;
|
||||
hmax = 0;
|
||||
for(i=0;i<nb_components;i++) {
|
||||
id = get_bits(&s->gb, 8) - 1;
|
||||
dprintf("component: %d\n", id);
|
||||
dprintf("component: %d\n", id);
|
||||
/* find component index */
|
||||
for(index=0;index<s->nb_components;index++)
|
||||
if (id == s->component_id[index])
|
||||
break;
|
||||
if (index == s->nb_components)
|
||||
{
|
||||
dprintf("decode_sos: index(%d) out of components\n", index);
|
||||
{
|
||||
dprintf("decode_sos: index(%d) out of components\n", index);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
s->comp_index[i] = index;
|
||||
|
||||
@ -1524,26 +1524,26 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s)
|
||||
s->dc_index[i] = get_bits(&s->gb, 4);
|
||||
s->ac_index[i] = get_bits(&s->gb, 4);
|
||||
|
||||
if (s->dc_index[i] < 0 || s->ac_index[i] < 0 ||
|
||||
s->dc_index[i] >= 4 || s->ac_index[i] >= 4)
|
||||
goto out_of_range;
|
||||
if (s->dc_index[i] < 0 || s->ac_index[i] < 0 ||
|
||||
s->dc_index[i] >= 4 || s->ac_index[i] >= 4)
|
||||
goto out_of_range;
|
||||
#if 0 //buggy
|
||||
switch(s->start_code)
|
||||
{
|
||||
case SOF0:
|
||||
if (dc_index[i] > 1 || ac_index[i] > 1)
|
||||
goto out_of_range;
|
||||
break;
|
||||
case SOF1:
|
||||
case SOF2:
|
||||
if (dc_index[i] > 3 || ac_index[i] > 3)
|
||||
goto out_of_range;
|
||||
break;
|
||||
case SOF3:
|
||||
if (dc_index[i] > 3 || ac_index[i] != 0)
|
||||
goto out_of_range;
|
||||
break;
|
||||
}
|
||||
switch(s->start_code)
|
||||
{
|
||||
case SOF0:
|
||||
if (dc_index[i] > 1 || ac_index[i] > 1)
|
||||
goto out_of_range;
|
||||
break;
|
||||
case SOF1:
|
||||
case SOF2:
|
||||
if (dc_index[i] > 3 || ac_index[i] > 3)
|
||||
goto out_of_range;
|
||||
break;
|
||||
case SOF3:
|
||||
if (dc_index[i] > 3 || ac_index[i] != 0)
|
||||
goto out_of_range;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1605,7 +1605,7 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s)
|
||||
static int mjpeg_decode_dri(MJpegDecodeContext *s)
|
||||
{
|
||||
if (get_bits(&s->gb, 16) != 4)
|
||||
return -1;
|
||||
return -1;
|
||||
s->restart_interval = get_bits(&s->gb, 16);
|
||||
s->restart_count = 0;
|
||||
dprintf("restart interval: %d\n", s->restart_interval);
|
||||
@ -1619,7 +1619,7 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
|
||||
|
||||
len = get_bits(&s->gb, 16);
|
||||
if (len < 5)
|
||||
return -1;
|
||||
return -1;
|
||||
if(8*len + get_bits_count(&s->gb) > s->gb.size_in_bits)
|
||||
return -1;
|
||||
|
||||
@ -1636,35 +1636,35 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
|
||||
informations, but it's always present in AVID creates files */
|
||||
if (id == ff_get_fourcc("AVI1"))
|
||||
{
|
||||
/* structure:
|
||||
4bytes AVI1
|
||||
1bytes polarity
|
||||
1bytes always zero
|
||||
4bytes field_size
|
||||
4bytes field_size_less_padding
|
||||
*/
|
||||
s->buggy_avid = 1;
|
||||
// if (s->first_picture)
|
||||
// printf("mjpeg: workarounding buggy AVID\n");
|
||||
s->interlace_polarity = get_bits(&s->gb, 8);
|
||||
/* structure:
|
||||
4bytes AVI1
|
||||
1bytes polarity
|
||||
1bytes always zero
|
||||
4bytes field_size
|
||||
4bytes field_size_less_padding
|
||||
*/
|
||||
s->buggy_avid = 1;
|
||||
// if (s->first_picture)
|
||||
// printf("mjpeg: workarounding buggy AVID\n");
|
||||
s->interlace_polarity = get_bits(&s->gb, 8);
|
||||
#if 0
|
||||
skip_bits(&s->gb, 8);
|
||||
skip_bits(&s->gb, 32);
|
||||
skip_bits(&s->gb, 32);
|
||||
len -= 10;
|
||||
skip_bits(&s->gb, 8);
|
||||
skip_bits(&s->gb, 32);
|
||||
skip_bits(&s->gb, 32);
|
||||
len -= 10;
|
||||
#endif
|
||||
// if (s->interlace_polarity)
|
||||
// printf("mjpeg: interlace polarity: %d\n", s->interlace_polarity);
|
||||
goto out;
|
||||
// if (s->interlace_polarity)
|
||||
// printf("mjpeg: interlace polarity: %d\n", s->interlace_polarity);
|
||||
goto out;
|
||||
}
|
||||
|
||||
// len -= 2;
|
||||
|
||||
if (id == ff_get_fourcc("JFIF"))
|
||||
{
|
||||
int t_w, t_h, v1, v2;
|
||||
skip_bits(&s->gb, 8); /* the trailing zero-byte */
|
||||
v1= get_bits(&s->gb, 8);
|
||||
int t_w, t_h, v1, v2;
|
||||
skip_bits(&s->gb, 8); /* the trailing zero-byte */
|
||||
v1= get_bits(&s->gb, 8);
|
||||
v2= get_bits(&s->gb, 8);
|
||||
skip_bits(&s->gb, 8);
|
||||
|
||||
@ -1678,37 +1678,37 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
|
||||
s->avctx->sample_aspect_ratio.den
|
||||
);
|
||||
|
||||
t_w = get_bits(&s->gb, 8);
|
||||
t_h = get_bits(&s->gb, 8);
|
||||
if (t_w && t_h)
|
||||
{
|
||||
/* skip thumbnail */
|
||||
if (len-10-(t_w*t_h*3) > 0)
|
||||
len -= t_w*t_h*3;
|
||||
}
|
||||
len -= 10;
|
||||
goto out;
|
||||
t_w = get_bits(&s->gb, 8);
|
||||
t_h = get_bits(&s->gb, 8);
|
||||
if (t_w && t_h)
|
||||
{
|
||||
/* skip thumbnail */
|
||||
if (len-10-(t_w*t_h*3) > 0)
|
||||
len -= t_w*t_h*3;
|
||||
}
|
||||
len -= 10;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (id == ff_get_fourcc("Adob") && (get_bits(&s->gb, 8) == 'e'))
|
||||
{
|
||||
if (s->avctx->debug & FF_DEBUG_PICT_INFO)
|
||||
av_log(s->avctx, AV_LOG_INFO, "mjpeg: Adobe header found\n");
|
||||
skip_bits(&s->gb, 16); /* version */
|
||||
skip_bits(&s->gb, 16); /* flags0 */
|
||||
skip_bits(&s->gb, 16); /* flags1 */
|
||||
skip_bits(&s->gb, 8); /* transform */
|
||||
len -= 7;
|
||||
goto out;
|
||||
skip_bits(&s->gb, 16); /* version */
|
||||
skip_bits(&s->gb, 16); /* flags0 */
|
||||
skip_bits(&s->gb, 16); /* flags1 */
|
||||
skip_bits(&s->gb, 8); /* transform */
|
||||
len -= 7;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (id == ff_get_fourcc("LJIF")){
|
||||
if (s->avctx->debug & FF_DEBUG_PICT_INFO)
|
||||
av_log(s->avctx, AV_LOG_INFO, "Pegasus lossless jpeg header found\n");
|
||||
skip_bits(&s->gb, 16); /* version ? */
|
||||
skip_bits(&s->gb, 16); /* unknwon always 0? */
|
||||
skip_bits(&s->gb, 16); /* unknwon always 0? */
|
||||
skip_bits(&s->gb, 16); /* unknwon always 0? */
|
||||
skip_bits(&s->gb, 16); /* version ? */
|
||||
skip_bits(&s->gb, 16); /* unknwon always 0? */
|
||||
skip_bits(&s->gb, 16); /* unknwon always 0? */
|
||||
skip_bits(&s->gb, 16); /* unknwon always 0? */
|
||||
switch( get_bits(&s->gb, 8)){
|
||||
case 1:
|
||||
s->rgb= 1;
|
||||
@ -1728,32 +1728,32 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
|
||||
/* Apple MJPEG-A */
|
||||
if ((s->start_code == APP1) && (len > (0x28 - 8)))
|
||||
{
|
||||
id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16);
|
||||
id = be2me_32(id);
|
||||
len -= 4;
|
||||
if (id == ff_get_fourcc("mjpg")) /* Apple MJPEG-A */
|
||||
{
|
||||
id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16);
|
||||
id = be2me_32(id);
|
||||
len -= 4;
|
||||
if (id == ff_get_fourcc("mjpg")) /* Apple MJPEG-A */
|
||||
{
|
||||
#if 0
|
||||
skip_bits(&s->gb, 32); /* field size */
|
||||
skip_bits(&s->gb, 32); /* pad field size */
|
||||
skip_bits(&s->gb, 32); /* next off */
|
||||
skip_bits(&s->gb, 32); /* quant off */
|
||||
skip_bits(&s->gb, 32); /* huff off */
|
||||
skip_bits(&s->gb, 32); /* image off */
|
||||
skip_bits(&s->gb, 32); /* scan off */
|
||||
skip_bits(&s->gb, 32); /* data off */
|
||||
skip_bits(&s->gb, 32); /* field size */
|
||||
skip_bits(&s->gb, 32); /* pad field size */
|
||||
skip_bits(&s->gb, 32); /* next off */
|
||||
skip_bits(&s->gb, 32); /* quant off */
|
||||
skip_bits(&s->gb, 32); /* huff off */
|
||||
skip_bits(&s->gb, 32); /* image off */
|
||||
skip_bits(&s->gb, 32); /* scan off */
|
||||
skip_bits(&s->gb, 32); /* data off */
|
||||
#endif
|
||||
if (s->avctx->debug & FF_DEBUG_PICT_INFO)
|
||||
av_log(s->avctx, AV_LOG_INFO, "mjpeg: Apple MJPEG-A header found\n");
|
||||
}
|
||||
av_log(s->avctx, AV_LOG_INFO, "mjpeg: Apple MJPEG-A header found\n");
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
/* slow but needed for extreme adobe jpegs */
|
||||
if (len < 0)
|
||||
av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error, decode_app parser read over the end\n");
|
||||
av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error, decode_app parser read over the end\n");
|
||||
while(--len > 0)
|
||||
skip_bits(&s->gb, 8);
|
||||
skip_bits(&s->gb, 8);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1762,32 +1762,32 @@ static int mjpeg_decode_com(MJpegDecodeContext *s)
|
||||
{
|
||||
int len = get_bits(&s->gb, 16);
|
||||
if (len >= 2 && 8*len - 16 + get_bits_count(&s->gb) <= s->gb.size_in_bits) {
|
||||
uint8_t *cbuf = av_malloc(len - 1);
|
||||
if (cbuf) {
|
||||
int i;
|
||||
for (i = 0; i < len - 2; i++)
|
||||
cbuf[i] = get_bits(&s->gb, 8);
|
||||
if (i > 0 && cbuf[i-1] == '\n')
|
||||
cbuf[i-1] = 0;
|
||||
else
|
||||
cbuf[i] = 0;
|
||||
uint8_t *cbuf = av_malloc(len - 1);
|
||||
if (cbuf) {
|
||||
int i;
|
||||
for (i = 0; i < len - 2; i++)
|
||||
cbuf[i] = get_bits(&s->gb, 8);
|
||||
if (i > 0 && cbuf[i-1] == '\n')
|
||||
cbuf[i-1] = 0;
|
||||
else
|
||||
cbuf[i] = 0;
|
||||
|
||||
if(s->avctx->debug & FF_DEBUG_PICT_INFO)
|
||||
av_log(s->avctx, AV_LOG_INFO, "mjpeg comment: '%s'\n", cbuf);
|
||||
|
||||
/* buggy avid, it puts EOI only at every 10th frame */
|
||||
if (!strcmp(cbuf, "AVID"))
|
||||
{
|
||||
s->buggy_avid = 1;
|
||||
// if (s->first_picture)
|
||||
// printf("mjpeg: workarounding buggy AVID\n");
|
||||
}
|
||||
/* buggy avid, it puts EOI only at every 10th frame */
|
||||
if (!strcmp(cbuf, "AVID"))
|
||||
{
|
||||
s->buggy_avid = 1;
|
||||
// if (s->first_picture)
|
||||
// printf("mjpeg: workarounding buggy AVID\n");
|
||||
}
|
||||
else if(!strcmp(cbuf, "CS=ITU601")){
|
||||
s->cs_itu601= 1;
|
||||
}
|
||||
|
||||
av_free(cbuf);
|
||||
}
|
||||
av_free(cbuf);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -1830,13 +1830,13 @@ static int find_marker(uint8_t **pbuf_ptr, uint8_t *buf_end)
|
||||
buf_ptr = *pbuf_ptr;
|
||||
while (buf_ptr < buf_end) {
|
||||
v = *buf_ptr++;
|
||||
v2 = *buf_ptr;
|
||||
v2 = *buf_ptr;
|
||||
if ((v == 0xff) && (v2 >= 0xc0) && (v2 <= 0xfe) && buf_ptr < buf_end) {
|
||||
val = *buf_ptr++;
|
||||
goto found;
|
||||
val = *buf_ptr++;
|
||||
goto found;
|
||||
}
|
||||
#ifdef DEBUG
|
||||
skipped++;
|
||||
skipped++;
|
||||
#endif
|
||||
}
|
||||
val = -1;
|
||||
@ -1862,74 +1862,74 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
|
||||
while (buf_ptr < buf_end) {
|
||||
/* find start next marker */
|
||||
start_code = find_marker(&buf_ptr, buf_end);
|
||||
{
|
||||
/* EOF */
|
||||
{
|
||||
/* EOF */
|
||||
if (start_code < 0) {
|
||||
goto the_end;
|
||||
goto the_end;
|
||||
} else {
|
||||
dprintf("marker=%x avail_size_in_buf=%d\n", start_code, buf_end - buf_ptr);
|
||||
|
||||
if ((buf_end - buf_ptr) > s->buffer_size)
|
||||
{
|
||||
av_free(s->buffer);
|
||||
s->buffer_size = buf_end-buf_ptr;
|
||||
if ((buf_end - buf_ptr) > s->buffer_size)
|
||||
{
|
||||
av_free(s->buffer);
|
||||
s->buffer_size = buf_end-buf_ptr;
|
||||
s->buffer = av_malloc(s->buffer_size + FF_INPUT_BUFFER_PADDING_SIZE);
|
||||
dprintf("buffer too small, expanding to %d bytes\n",
|
||||
s->buffer_size);
|
||||
}
|
||||
dprintf("buffer too small, expanding to %d bytes\n",
|
||||
s->buffer_size);
|
||||
}
|
||||
|
||||
/* unescape buffer of SOS */
|
||||
if (start_code == SOS)
|
||||
{
|
||||
uint8_t *src = buf_ptr;
|
||||
uint8_t *dst = s->buffer;
|
||||
/* unescape buffer of SOS */
|
||||
if (start_code == SOS)
|
||||
{
|
||||
uint8_t *src = buf_ptr;
|
||||
uint8_t *dst = s->buffer;
|
||||
|
||||
while (src<buf_end)
|
||||
{
|
||||
uint8_t x = *(src++);
|
||||
while (src<buf_end)
|
||||
{
|
||||
uint8_t x = *(src++);
|
||||
|
||||
*(dst++) = x;
|
||||
if (x == 0xff)
|
||||
{
|
||||
*(dst++) = x;
|
||||
if (x == 0xff)
|
||||
{
|
||||
while(src<buf_end && x == 0xff)
|
||||
x = *(src++);
|
||||
|
||||
if (x >= 0xd0 && x <= 0xd7)
|
||||
*(dst++) = x;
|
||||
else if (x)
|
||||
break;
|
||||
}
|
||||
}
|
||||
init_get_bits(&s->gb, s->buffer, (dst - s->buffer)*8);
|
||||
if (x >= 0xd0 && x <= 0xd7)
|
||||
*(dst++) = x;
|
||||
else if (x)
|
||||
break;
|
||||
}
|
||||
}
|
||||
init_get_bits(&s->gb, s->buffer, (dst - s->buffer)*8);
|
||||
|
||||
dprintf("escaping removed %d bytes\n",
|
||||
(buf_end - buf_ptr) - (dst - s->buffer));
|
||||
}
|
||||
else
|
||||
init_get_bits(&s->gb, buf_ptr, (buf_end - buf_ptr)*8);
|
||||
dprintf("escaping removed %d bytes\n",
|
||||
(buf_end - buf_ptr) - (dst - s->buffer));
|
||||
}
|
||||
else
|
||||
init_get_bits(&s->gb, buf_ptr, (buf_end - buf_ptr)*8);
|
||||
|
||||
s->start_code = start_code;
|
||||
s->start_code = start_code;
|
||||
if(s->avctx->debug & FF_DEBUG_STARTCODE){
|
||||
av_log(s->avctx, AV_LOG_DEBUG, "startcode: %X\n", start_code);
|
||||
}
|
||||
|
||||
/* process markers */
|
||||
if (start_code >= 0xd0 && start_code <= 0xd7) {
|
||||
dprintf("restart marker: %d\n", start_code&0x0f);
|
||||
/* APP fields */
|
||||
} else if (start_code >= APP0 && start_code <= APP15) {
|
||||
mjpeg_decode_app(s);
|
||||
/* Comment */
|
||||
} else if (start_code == COM){
|
||||
mjpeg_decode_com(s);
|
||||
}
|
||||
/* process markers */
|
||||
if (start_code >= 0xd0 && start_code <= 0xd7) {
|
||||
dprintf("restart marker: %d\n", start_code&0x0f);
|
||||
/* APP fields */
|
||||
} else if (start_code >= APP0 && start_code <= APP15) {
|
||||
mjpeg_decode_app(s);
|
||||
/* Comment */
|
||||
} else if (start_code == COM){
|
||||
mjpeg_decode_com(s);
|
||||
}
|
||||
|
||||
switch(start_code) {
|
||||
case SOI:
|
||||
s->restart_interval = 0;
|
||||
s->restart_interval = 0;
|
||||
reset_ls_coding_parameters(s, 1);
|
||||
|
||||
s->restart_count = 0;
|
||||
s->restart_count = 0;
|
||||
/* nothing to do on SOI */
|
||||
break;
|
||||
case DQT:
|
||||
@ -1944,12 +1944,12 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
|
||||
case SOF0:
|
||||
s->lossless=0;
|
||||
if (mjpeg_decode_sof(s) < 0)
|
||||
return -1;
|
||||
return -1;
|
||||
break;
|
||||
case SOF3:
|
||||
s->lossless=1;
|
||||
if (mjpeg_decode_sof(s) < 0)
|
||||
return -1;
|
||||
return -1;
|
||||
break;
|
||||
case SOF48:
|
||||
s->lossless=1;
|
||||
@ -1961,11 +1961,11 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
|
||||
if (decode_lse(s) < 0)
|
||||
return -1;
|
||||
break;
|
||||
case EOI:
|
||||
if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
|
||||
case EOI:
|
||||
if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
|
||||
break;
|
||||
eoi_parser:
|
||||
{
|
||||
{
|
||||
if (s->interlaced) {
|
||||
s->bottom_field ^= 1;
|
||||
/* if not bottom field, do not output image yet */
|
||||
@ -1987,41 +1987,41 @@ eoi_parser:
|
||||
|
||||
goto the_end;
|
||||
}
|
||||
break;
|
||||
break;
|
||||
case SOS:
|
||||
mjpeg_decode_sos(s);
|
||||
/* buggy avid puts EOI every 10-20th frame */
|
||||
/* if restart period is over process EOI */
|
||||
if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
|
||||
goto eoi_parser;
|
||||
/* buggy avid puts EOI every 10-20th frame */
|
||||
/* if restart period is over process EOI */
|
||||
if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
|
||||
goto eoi_parser;
|
||||
break;
|
||||
case DRI:
|
||||
mjpeg_decode_dri(s);
|
||||
break;
|
||||
case SOF1:
|
||||
case SOF2:
|
||||
case SOF5:
|
||||
case SOF6:
|
||||
case SOF7:
|
||||
case SOF9:
|
||||
case SOF10:
|
||||
case SOF11:
|
||||
case SOF13:
|
||||
case SOF14:
|
||||
case SOF15:
|
||||
case JPG:
|
||||
av_log(s->avctx, AV_LOG_ERROR, "mjpeg: unsupported coding type (%x)\n", start_code);
|
||||
break;
|
||||
// default:
|
||||
// printf("mjpeg: unsupported marker (%x)\n", start_code);
|
||||
// break;
|
||||
case DRI:
|
||||
mjpeg_decode_dri(s);
|
||||
break;
|
||||
case SOF1:
|
||||
case SOF2:
|
||||
case SOF5:
|
||||
case SOF6:
|
||||
case SOF7:
|
||||
case SOF9:
|
||||
case SOF10:
|
||||
case SOF11:
|
||||
case SOF13:
|
||||
case SOF14:
|
||||
case SOF15:
|
||||
case JPG:
|
||||
av_log(s->avctx, AV_LOG_ERROR, "mjpeg: unsupported coding type (%x)\n", start_code);
|
||||
break;
|
||||
// default:
|
||||
// printf("mjpeg: unsupported marker (%x)\n", start_code);
|
||||
// break;
|
||||
}
|
||||
|
||||
not_the_end:
|
||||
/* eof process start code */
|
||||
buf_ptr += (get_bits_count(&s->gb)+7)/8;
|
||||
dprintf("marker parser used %d bytes (%d bits)\n",
|
||||
(get_bits_count(&s->gb)+7)/8, get_bits_count(&s->gb));
|
||||
/* eof process start code */
|
||||
buf_ptr += (get_bits_count(&s->gb)+7)/8;
|
||||
dprintf("marker parser used %d bytes (%d bits)\n",
|
||||
(get_bits_count(&s->gb)+7)/8, get_bits_count(&s->gb));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2057,8 +2057,8 @@ read_header:
|
||||
|
||||
if (get_bits_long(&hgb, 32) != be2me_32(ff_get_fourcc("mjpg")))
|
||||
{
|
||||
dprintf("not mjpeg-b (bad fourcc)\n");
|
||||
return 0;
|
||||
dprintf("not mjpeg-b (bad fourcc)\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
field_size = get_bits_long(&hgb, 32); /* field size */
|
||||
@ -2067,34 +2067,34 @@ read_header:
|
||||
second_field_offs = get_bits_long(&hgb, 32);
|
||||
dprintf("second field offs: 0x%x\n", second_field_offs);
|
||||
if (second_field_offs)
|
||||
s->interlaced = 1;
|
||||
s->interlaced = 1;
|
||||
|
||||
dqt_offs = get_bits_long(&hgb, 32);
|
||||
dprintf("dqt offs: 0x%x\n", dqt_offs);
|
||||
if (dqt_offs)
|
||||
{
|
||||
init_get_bits(&s->gb, buf+dqt_offs, (buf_end - (buf+dqt_offs))*8);
|
||||
s->start_code = DQT;
|
||||
mjpeg_decode_dqt(s);
|
||||
init_get_bits(&s->gb, buf+dqt_offs, (buf_end - (buf+dqt_offs))*8);
|
||||
s->start_code = DQT;
|
||||
mjpeg_decode_dqt(s);
|
||||
}
|
||||
|
||||
dht_offs = get_bits_long(&hgb, 32);
|
||||
dprintf("dht offs: 0x%x\n", dht_offs);
|
||||
if (dht_offs)
|
||||
{
|
||||
init_get_bits(&s->gb, buf+dht_offs, (buf_end - (buf+dht_offs))*8);
|
||||
s->start_code = DHT;
|
||||
mjpeg_decode_dht(s);
|
||||
init_get_bits(&s->gb, buf+dht_offs, (buf_end - (buf+dht_offs))*8);
|
||||
s->start_code = DHT;
|
||||
mjpeg_decode_dht(s);
|
||||
}
|
||||
|
||||
sof_offs = get_bits_long(&hgb, 32);
|
||||
dprintf("sof offs: 0x%x\n", sof_offs);
|
||||
if (sof_offs)
|
||||
{
|
||||
init_get_bits(&s->gb, buf+sof_offs, (buf_end - (buf+sof_offs))*8);
|
||||
s->start_code = SOF0;
|
||||
if (mjpeg_decode_sof(s) < 0)
|
||||
return -1;
|
||||
init_get_bits(&s->gb, buf+sof_offs, (buf_end - (buf+sof_offs))*8);
|
||||
s->start_code = SOF0;
|
||||
if (mjpeg_decode_sof(s) < 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
sos_offs = get_bits_long(&hgb, 32);
|
||||
@ -2103,22 +2103,22 @@ read_header:
|
||||
dprintf("sod offs: 0x%x\n", sod_offs);
|
||||
if (sos_offs)
|
||||
{
|
||||
// init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8);
|
||||
init_get_bits(&s->gb, buf+sos_offs, field_size*8);
|
||||
s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16));
|
||||
s->start_code = SOS;
|
||||
mjpeg_decode_sos(s);
|
||||
// init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8);
|
||||
init_get_bits(&s->gb, buf+sos_offs, field_size*8);
|
||||
s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16));
|
||||
s->start_code = SOS;
|
||||
mjpeg_decode_sos(s);
|
||||
}
|
||||
|
||||
if (s->interlaced) {
|
||||
s->bottom_field ^= 1;
|
||||
/* if not bottom field, do not output image yet */
|
||||
if (s->bottom_field && second_field_offs)
|
||||
{
|
||||
buf_ptr = buf + second_field_offs;
|
||||
second_field_offs = 0;
|
||||
goto read_header;
|
||||
}
|
||||
{
|
||||
buf_ptr = buf + second_field_offs;
|
||||
second_field_offs = 0;
|
||||
goto read_header;
|
||||
}
|
||||
}
|
||||
|
||||
//XXX FIXME factorize, this looks very similar to the EOI code
|
||||
@ -2153,7 +2153,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
|
||||
int i = 0, j = 0;
|
||||
|
||||
if (!avctx->width || !avctx->height)
|
||||
return -1;
|
||||
return -1;
|
||||
|
||||
buf_ptr = buf;
|
||||
buf_end = buf + buf_size;
|
||||
@ -2161,7 +2161,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
|
||||
#if 1
|
||||
recoded = av_mallocz(buf_size + 1024);
|
||||
if (!recoded)
|
||||
return -1;
|
||||
return -1;
|
||||
|
||||
/* SOI */
|
||||
recoded[j++] = 0xFF;
|
||||
@ -2187,9 +2187,9 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
|
||||
|
||||
for (i = 14; i < buf_size && j < buf_size+1024-2; i++)
|
||||
{
|
||||
recoded[j++] = buf[i];
|
||||
if (buf[i] == 0xff)
|
||||
recoded[j++] = 0;
|
||||
recoded[j++] = buf[i];
|
||||
if (buf[i] == 0xff)
|
||||
recoded[j++] = 0;
|
||||
}
|
||||
|
||||
/* EOI */
|
||||
@ -2229,33 +2229,33 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
|
||||
if (avctx->get_buffer(avctx, &s->picture) < 0)
|
||||
{
|
||||
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
|
||||
return -1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
s->picture.pict_type = I_TYPE;
|
||||
s->picture.key_frame = 1;
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
s->linesize[i] = s->picture.linesize[i] << s->interlaced;
|
||||
s->linesize[i] = s->picture.linesize[i] << s->interlaced;
|
||||
|
||||
/* DQT */
|
||||
for (i = 0; i < 64; i++)
|
||||
{
|
||||
j = s->scantable.permutated[i];
|
||||
s->quant_matrixes[0][j] = sp5x_quant_table[(qscale * 2) + i];
|
||||
j = s->scantable.permutated[i];
|
||||
s->quant_matrixes[0][j] = sp5x_quant_table[(qscale * 2) + i];
|
||||
}
|
||||
s->qscale[0] = FFMAX(
|
||||
s->quant_matrixes[0][s->scantable.permutated[1]],
|
||||
s->quant_matrixes[0][s->scantable.permutated[8]]) >> 1;
|
||||
s->quant_matrixes[0][s->scantable.permutated[1]],
|
||||
s->quant_matrixes[0][s->scantable.permutated[8]]) >> 1;
|
||||
|
||||
for (i = 0; i < 64; i++)
|
||||
{
|
||||
j = s->scantable.permutated[i];
|
||||
s->quant_matrixes[1][j] = sp5x_quant_table[(qscale * 2) + 1 + i];
|
||||
j = s->scantable.permutated[i];
|
||||
s->quant_matrixes[1][j] = sp5x_quant_table[(qscale * 2) + 1 + i];
|
||||
}
|
||||
s->qscale[1] = FFMAX(
|
||||
s->quant_matrixes[1][s->scantable.permutated[1]],
|
||||
s->quant_matrixes[1][s->scantable.permutated[8]]) >> 1;
|
||||
s->quant_matrixes[1][s->scantable.permutated[1]],
|
||||
s->quant_matrixes[1][s->scantable.permutated[8]]) >> 1;
|
||||
|
||||
/* DHT */
|
||||
|
||||
@ -2282,7 +2282,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
|
||||
s->ac_index[2] = 1;
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
s->last_dc[i] = 1024;
|
||||
s->last_dc[i] = 1024;
|
||||
|
||||
s->mb_width = (s->width * s->h_max * 8 -1) / (s->h_max * 8);
|
||||
s->mb_height = (s->height * s->v_max * 8 -1) / (s->v_max * 8);
|
||||
|
@ -61,7 +61,7 @@ static void add_pixels_clamped_mlib(const DCTELEM *block, uint8_t *pixels, int l
|
||||
/* put block, width 16 pixel, height 8/16 */
|
||||
|
||||
static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
int stride, int height)
|
||||
int stride, int height)
|
||||
{
|
||||
switch (height) {
|
||||
case 8:
|
||||
@ -78,7 +78,7 @@ static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
}
|
||||
|
||||
static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
int stride, int height)
|
||||
int stride, int height)
|
||||
{
|
||||
switch (height) {
|
||||
case 8:
|
||||
@ -95,7 +95,7 @@ static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
}
|
||||
|
||||
static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
int stride, int height)
|
||||
int stride, int height)
|
||||
{
|
||||
switch (height) {
|
||||
case 8:
|
||||
@ -112,7 +112,7 @@ static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
}
|
||||
|
||||
static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
||||
int stride, int height)
|
||||
int stride, int height)
|
||||
{
|
||||
switch (height) {
|
||||
case 8:
|
||||
@ -131,7 +131,7 @@ static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
||||
/* put block, width 8 pixel, height 4/8/16 */
|
||||
|
||||
static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
int stride, int height)
|
||||
int stride, int height)
|
||||
{
|
||||
switch (height) {
|
||||
case 4:
|
||||
@ -152,7 +152,7 @@ static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
}
|
||||
|
||||
static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
int stride, int height)
|
||||
int stride, int height)
|
||||
{
|
||||
switch (height) {
|
||||
case 4:
|
||||
@ -173,7 +173,7 @@ static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
}
|
||||
|
||||
static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
int stride, int height)
|
||||
int stride, int height)
|
||||
{
|
||||
switch (height) {
|
||||
case 4:
|
||||
@ -194,7 +194,7 @@ static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
}
|
||||
|
||||
static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
||||
int stride, int height)
|
||||
int stride, int height)
|
||||
{
|
||||
switch (height) {
|
||||
case 4:
|
||||
@ -217,7 +217,7 @@ static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
||||
/* average block, width 16 pixel, height 8/16 */
|
||||
|
||||
static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
int stride, int height)
|
||||
int stride, int height)
|
||||
{
|
||||
switch (height) {
|
||||
case 8:
|
||||
@ -234,7 +234,7 @@ static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
}
|
||||
|
||||
static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
int stride, int height)
|
||||
int stride, int height)
|
||||
{
|
||||
switch (height) {
|
||||
case 8:
|
||||
@ -251,7 +251,7 @@ static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
}
|
||||
|
||||
static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
int stride, int height)
|
||||
int stride, int height)
|
||||
{
|
||||
switch (height) {
|
||||
case 8:
|
||||
@ -268,7 +268,7 @@ static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
}
|
||||
|
||||
static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
||||
int stride, int height)
|
||||
int stride, int height)
|
||||
{
|
||||
switch (height) {
|
||||
case 8:
|
||||
@ -287,7 +287,7 @@ static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
||||
/* average block, width 8 pixel, height 4/8/16 */
|
||||
|
||||
static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
int stride, int height)
|
||||
int stride, int height)
|
||||
{
|
||||
switch (height) {
|
||||
case 4:
|
||||
@ -308,7 +308,7 @@ static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
}
|
||||
|
||||
static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
int stride, int height)
|
||||
int stride, int height)
|
||||
{
|
||||
switch (height) {
|
||||
case 4:
|
||||
@ -329,7 +329,7 @@ static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
}
|
||||
|
||||
static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
int stride, int height)
|
||||
int stride, int height)
|
||||
{
|
||||
switch (height) {
|
||||
case 4:
|
||||
@ -350,7 +350,7 @@ static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
|
||||
}
|
||||
|
||||
static void avg_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
|
||||
int stride, int height)
|
||||
int stride, int height)
|
||||
{
|
||||
switch (height) {
|
||||
case 4:
|
||||
@ -450,7 +450,7 @@ void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
|
||||
void MPV_common_init_mlib(MpegEncContext *s)
|
||||
{
|
||||
if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){
|
||||
s->dsp.fdct = ff_fdct_mlib;
|
||||
s->dsp.fdct = ff_fdct_mlib;
|
||||
}
|
||||
|
||||
if(s->avctx->idct_algo==FF_IDCT_AUTO || s->avctx->idct_algo==FF_IDCT_MLIB){
|
||||
|
@ -45,7 +45,7 @@
|
||||
#define P_MV1 P[9]
|
||||
|
||||
static inline int sad_hpel_motion_search(MpegEncContext * s,
|
||||
int *mx_ptr, int *my_ptr, int dmin,
|
||||
int *mx_ptr, int *my_ptr, int dmin,
|
||||
int src_index, int ref_index,
|
||||
int size, int h);
|
||||
|
||||
@ -293,25 +293,25 @@ static int pix_dev(uint8_t * pix, int line_size, int mean)
|
||||
|
||||
s = 0;
|
||||
for (i = 0; i < 16; i++) {
|
||||
for (j = 0; j < 16; j += 8) {
|
||||
s += ABS(pix[0]-mean);
|
||||
s += ABS(pix[1]-mean);
|
||||
s += ABS(pix[2]-mean);
|
||||
s += ABS(pix[3]-mean);
|
||||
s += ABS(pix[4]-mean);
|
||||
s += ABS(pix[5]-mean);
|
||||
s += ABS(pix[6]-mean);
|
||||
s += ABS(pix[7]-mean);
|
||||
pix += 8;
|
||||
}
|
||||
pix += line_size - 16;
|
||||
for (j = 0; j < 16; j += 8) {
|
||||
s += ABS(pix[0]-mean);
|
||||
s += ABS(pix[1]-mean);
|
||||
s += ABS(pix[2]-mean);
|
||||
s += ABS(pix[3]-mean);
|
||||
s += ABS(pix[4]-mean);
|
||||
s += ABS(pix[5]-mean);
|
||||
s += ABS(pix[6]-mean);
|
||||
s += ABS(pix[7]-mean);
|
||||
pix += 8;
|
||||
}
|
||||
pix += line_size - 16;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void no_motion_search(MpegEncContext * s,
|
||||
int *mx_ptr, int *my_ptr)
|
||||
int *mx_ptr, int *my_ptr)
|
||||
{
|
||||
*mx_ptr = 16 * s->mb_x;
|
||||
*my_ptr = 16 * s->mb_y;
|
||||
@ -328,35 +328,35 @@ static int full_motion_search(MpegEncContext * s,
|
||||
|
||||
xx = 16 * s->mb_x;
|
||||
yy = 16 * s->mb_y;
|
||||
x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */
|
||||
x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */
|
||||
if (x1 < xmin)
|
||||
x1 = xmin;
|
||||
x1 = xmin;
|
||||
x2 = xx + range - 1;
|
||||
if (x2 > xmax)
|
||||
x2 = xmax;
|
||||
x2 = xmax;
|
||||
y1 = yy - range + 1;
|
||||
if (y1 < ymin)
|
||||
y1 = ymin;
|
||||
y1 = ymin;
|
||||
y2 = yy + range - 1;
|
||||
if (y2 > ymax)
|
||||
y2 = ymax;
|
||||
y2 = ymax;
|
||||
pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
|
||||
dmin = 0x7fffffff;
|
||||
mx = 0;
|
||||
my = 0;
|
||||
for (y = y1; y <= y2; y++) {
|
||||
for (x = x1; x <= x2; x++) {
|
||||
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
|
||||
s->linesize, 16);
|
||||
if (d < dmin ||
|
||||
(d == dmin &&
|
||||
(abs(x - xx) + abs(y - yy)) <
|
||||
(abs(mx - xx) + abs(my - yy)))) {
|
||||
dmin = d;
|
||||
mx = x;
|
||||
my = y;
|
||||
}
|
||||
}
|
||||
for (x = x1; x <= x2; x++) {
|
||||
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
|
||||
s->linesize, 16);
|
||||
if (d < dmin ||
|
||||
(d == dmin &&
|
||||
(abs(x - xx) + abs(y - yy)) <
|
||||
(abs(mx - xx) + abs(my - yy)))) {
|
||||
dmin = d;
|
||||
mx = x;
|
||||
my = y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*mx_ptr = mx;
|
||||
@ -364,8 +364,8 @@ static int full_motion_search(MpegEncContext * s,
|
||||
|
||||
#if 0
|
||||
if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
|
||||
*my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
|
||||
fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr);
|
||||
*my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
|
||||
fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr);
|
||||
}
|
||||
#endif
|
||||
return dmin;
|
||||
@ -386,22 +386,22 @@ static int log_motion_search(MpegEncContext * s,
|
||||
/* Left limit */
|
||||
x1 = xx - range;
|
||||
if (x1 < xmin)
|
||||
x1 = xmin;
|
||||
x1 = xmin;
|
||||
|
||||
/* Right limit */
|
||||
x2 = xx + range;
|
||||
if (x2 > xmax)
|
||||
x2 = xmax;
|
||||
x2 = xmax;
|
||||
|
||||
/* Upper limit */
|
||||
y1 = yy - range;
|
||||
if (y1 < ymin)
|
||||
y1 = ymin;
|
||||
y1 = ymin;
|
||||
|
||||
/* Lower limit */
|
||||
y2 = yy + range;
|
||||
if (y2 > ymax)
|
||||
y2 = ymax;
|
||||
y2 = ymax;
|
||||
|
||||
pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
|
||||
dmin = 0x7fffffff;
|
||||
@ -409,34 +409,34 @@ static int log_motion_search(MpegEncContext * s,
|
||||
my = 0;
|
||||
|
||||
do {
|
||||
for (y = y1; y <= y2; y += range) {
|
||||
for (x = x1; x <= x2; x += range) {
|
||||
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
|
||||
if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
|
||||
dmin = d;
|
||||
mx = x;
|
||||
my = y;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (y = y1; y <= y2; y += range) {
|
||||
for (x = x1; x <= x2; x += range) {
|
||||
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
|
||||
if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
|
||||
dmin = d;
|
||||
mx = x;
|
||||
my = y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
range = range >> 1;
|
||||
range = range >> 1;
|
||||
|
||||
x1 = mx - range;
|
||||
if (x1 < xmin)
|
||||
x1 = xmin;
|
||||
x1 = mx - range;
|
||||
if (x1 < xmin)
|
||||
x1 = xmin;
|
||||
|
||||
x2 = mx + range;
|
||||
if (x2 > xmax)
|
||||
x2 = xmax;
|
||||
x2 = mx + range;
|
||||
if (x2 > xmax)
|
||||
x2 = xmax;
|
||||
|
||||
y1 = my - range;
|
||||
if (y1 < ymin)
|
||||
y1 = ymin;
|
||||
y1 = my - range;
|
||||
if (y1 < ymin)
|
||||
y1 = ymin;
|
||||
|
||||
y2 = my + range;
|
||||
if (y2 > ymax)
|
||||
y2 = ymax;
|
||||
y2 = my + range;
|
||||
if (y2 > ymax)
|
||||
y2 = ymax;
|
||||
|
||||
} while (range >= 1);
|
||||
|
||||
@ -462,22 +462,22 @@ static int phods_motion_search(MpegEncContext * s,
|
||||
/* Left limit */
|
||||
x1 = xx - range;
|
||||
if (x1 < xmin)
|
||||
x1 = xmin;
|
||||
x1 = xmin;
|
||||
|
||||
/* Right limit */
|
||||
x2 = xx + range;
|
||||
if (x2 > xmax)
|
||||
x2 = xmax;
|
||||
x2 = xmax;
|
||||
|
||||
/* Upper limit */
|
||||
y1 = yy - range;
|
||||
if (y1 < ymin)
|
||||
y1 = ymin;
|
||||
y1 = ymin;
|
||||
|
||||
/* Lower limit */
|
||||
y2 = yy + range;
|
||||
if (y2 > ymax)
|
||||
y2 = ymax;
|
||||
y2 = ymax;
|
||||
|
||||
pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
|
||||
mx = 0;
|
||||
@ -489,43 +489,43 @@ static int phods_motion_search(MpegEncContext * s,
|
||||
dminx = 0x7fffffff;
|
||||
dminy = 0x7fffffff;
|
||||
|
||||
lastx = x;
|
||||
for (x = x1; x <= x2; x += range) {
|
||||
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
|
||||
if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
|
||||
dminx = d;
|
||||
mx = x;
|
||||
}
|
||||
}
|
||||
lastx = x;
|
||||
for (x = x1; x <= x2; x += range) {
|
||||
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
|
||||
if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
|
||||
dminx = d;
|
||||
mx = x;
|
||||
}
|
||||
}
|
||||
|
||||
x = lastx;
|
||||
for (y = y1; y <= y2; y += range) {
|
||||
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
|
||||
if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
|
||||
dminy = d;
|
||||
my = y;
|
||||
}
|
||||
}
|
||||
x = lastx;
|
||||
for (y = y1; y <= y2; y += range) {
|
||||
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
|
||||
if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
|
||||
dminy = d;
|
||||
my = y;
|
||||
}
|
||||
}
|
||||
|
||||
range = range >> 1;
|
||||
range = range >> 1;
|
||||
|
||||
x = mx;
|
||||
y = my;
|
||||
x1 = mx - range;
|
||||
if (x1 < xmin)
|
||||
x1 = xmin;
|
||||
x = mx;
|
||||
y = my;
|
||||
x1 = mx - range;
|
||||
if (x1 < xmin)
|
||||
x1 = xmin;
|
||||
|
||||
x2 = mx + range;
|
||||
if (x2 > xmax)
|
||||
x2 = xmax;
|
||||
x2 = mx + range;
|
||||
if (x2 > xmax)
|
||||
x2 = xmax;
|
||||
|
||||
y1 = my - range;
|
||||
if (y1 < ymin)
|
||||
y1 = ymin;
|
||||
y1 = my - range;
|
||||
if (y1 < ymin)
|
||||
y1 = ymin;
|
||||
|
||||
y2 = my + range;
|
||||
if (y2 > ymax)
|
||||
y2 = ymax;
|
||||
y2 = my + range;
|
||||
if (y2 > ymax)
|
||||
y2 = ymax;
|
||||
|
||||
} while (range >= 1);
|
||||
|
||||
@ -550,7 +550,7 @@ static int phods_motion_search(MpegEncContext * s,
|
||||
}
|
||||
|
||||
static inline int sad_hpel_motion_search(MpegEncContext * s,
|
||||
int *mx_ptr, int *my_ptr, int dmin,
|
||||
int *mx_ptr, int *my_ptr, int dmin,
|
||||
int src_index, int ref_index,
|
||||
int size, int h)
|
||||
{
|
||||
@ -1190,24 +1190,24 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
|
||||
switch(s->me_method) {
|
||||
case ME_ZERO:
|
||||
default:
|
||||
no_motion_search(s, &mx, &my);
|
||||
no_motion_search(s, &mx, &my);
|
||||
mx-= mb_x*16;
|
||||
my-= mb_y*16;
|
||||
dmin = 0;
|
||||
break;
|
||||
#if 0
|
||||
case ME_FULL:
|
||||
dmin = full_motion_search(s, &mx, &my, range, ref_picture);
|
||||
dmin = full_motion_search(s, &mx, &my, range, ref_picture);
|
||||
mx-= mb_x*16;
|
||||
my-= mb_y*16;
|
||||
break;
|
||||
case ME_LOG:
|
||||
dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
|
||||
dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
|
||||
mx-= mb_x*16;
|
||||
my-= mb_y*16;
|
||||
break;
|
||||
case ME_PHODS:
|
||||
dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
|
||||
dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
|
||||
mx-= mb_x*16;
|
||||
my-= mb_y*16;
|
||||
break;
|
||||
@ -1264,7 +1264,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
|
||||
|
||||
#if 0
|
||||
printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
|
||||
varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
|
||||
varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
|
||||
#endif
|
||||
if(mb_type){
|
||||
if (vard <= 64 || vard < varc)
|
||||
@ -1479,24 +1479,24 @@ static int ff_estimate_motion_b(MpegEncContext * s,
|
||||
switch(s->me_method) {
|
||||
case ME_ZERO:
|
||||
default:
|
||||
no_motion_search(s, &mx, &my);
|
||||
no_motion_search(s, &mx, &my);
|
||||
dmin = 0;
|
||||
mx-= mb_x*16;
|
||||
my-= mb_y*16;
|
||||
break;
|
||||
#if 0
|
||||
case ME_FULL:
|
||||
dmin = full_motion_search(s, &mx, &my, range, ref_picture);
|
||||
dmin = full_motion_search(s, &mx, &my, range, ref_picture);
|
||||
mx-= mb_x*16;
|
||||
my-= mb_y*16;
|
||||
break;
|
||||
case ME_LOG:
|
||||
dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
|
||||
dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
|
||||
mx-= mb_x*16;
|
||||
my-= mb_y*16;
|
||||
break;
|
||||
case ME_PHODS:
|
||||
dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
|
||||
dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
|
||||
mx-= mb_x*16;
|
||||
my-= mb_y*16;
|
||||
break;
|
||||
|
@ -45,7 +45,7 @@
|
||||
|
||||
#if 0
|
||||
static int hpel_motion_search)(MpegEncContext * s,
|
||||
int *mx_ptr, int *my_ptr, int dmin,
|
||||
int *mx_ptr, int *my_ptr, int dmin,
|
||||
uint8_t *ref_data[3],
|
||||
int size)
|
||||
{
|
||||
@ -113,7 +113,7 @@ static int hpel_motion_search)(MpegEncContext * s,
|
||||
|
||||
#else
|
||||
static int hpel_motion_search(MpegEncContext * s,
|
||||
int *mx_ptr, int *my_ptr, int dmin,
|
||||
int *mx_ptr, int *my_ptr, int dmin,
|
||||
int src_index, int ref_index,
|
||||
int size, int h)
|
||||
{
|
||||
@ -271,7 +271,7 @@ int inline ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
|
||||
}
|
||||
|
||||
static int qpel_motion_search(MpegEncContext * s,
|
||||
int *mx_ptr, int *my_ptr, int dmin,
|
||||
int *mx_ptr, int *my_ptr, int dmin,
|
||||
int src_index, int ref_index,
|
||||
int size, int h)
|
||||
{
|
||||
@ -1005,7 +1005,7 @@ static int epzs_motion_search4(MpegEncContext * s,
|
||||
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
|
||||
/* first line */
|
||||
if (s->first_slice_line) {
|
||||
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
|
||||
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
|
||||
CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
|
||||
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
|
||||
CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
|
||||
@ -1067,7 +1067,7 @@ static int epzs_motion_search2(MpegEncContext * s,
|
||||
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
|
||||
/* first line */
|
||||
if (s->first_slice_line) {
|
||||
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
|
||||
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
|
||||
CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
|
||||
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
|
||||
CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
|
||||
|
@ -28,51 +28,51 @@
|
||||
|
||||
#define BUFFER_SIZE (2*MPA_FRAME_SIZE)
|
||||
typedef struct Mp3AudioContext {
|
||||
lame_global_flags *gfp;
|
||||
int stereo;
|
||||
lame_global_flags *gfp;
|
||||
int stereo;
|
||||
uint8_t buffer[BUFFER_SIZE];
|
||||
int buffer_index;
|
||||
} Mp3AudioContext;
|
||||
|
||||
static int MP3lame_encode_init(AVCodecContext *avctx)
|
||||
{
|
||||
Mp3AudioContext *s = avctx->priv_data;
|
||||
Mp3AudioContext *s = avctx->priv_data;
|
||||
|
||||
if (avctx->channels > 2)
|
||||
return -1;
|
||||
if (avctx->channels > 2)
|
||||
return -1;
|
||||
|
||||
s->stereo = avctx->channels > 1 ? 1 : 0;
|
||||
s->stereo = avctx->channels > 1 ? 1 : 0;
|
||||
|
||||
if ((s->gfp = lame_init()) == NULL)
|
||||
goto err;
|
||||
lame_set_in_samplerate(s->gfp, avctx->sample_rate);
|
||||
lame_set_out_samplerate(s->gfp, avctx->sample_rate);
|
||||
lame_set_num_channels(s->gfp, avctx->channels);
|
||||
/* lame 3.91 dies on quality != 5 */
|
||||
lame_set_quality(s->gfp, 5);
|
||||
/* lame 3.91 doesn't work in mono */
|
||||
lame_set_mode(s->gfp, JOINT_STEREO);
|
||||
lame_set_brate(s->gfp, avctx->bit_rate/1000);
|
||||
if ((s->gfp = lame_init()) == NULL)
|
||||
goto err;
|
||||
lame_set_in_samplerate(s->gfp, avctx->sample_rate);
|
||||
lame_set_out_samplerate(s->gfp, avctx->sample_rate);
|
||||
lame_set_num_channels(s->gfp, avctx->channels);
|
||||
/* lame 3.91 dies on quality != 5 */
|
||||
lame_set_quality(s->gfp, 5);
|
||||
/* lame 3.91 doesn't work in mono */
|
||||
lame_set_mode(s->gfp, JOINT_STEREO);
|
||||
lame_set_brate(s->gfp, avctx->bit_rate/1000);
|
||||
if(avctx->flags & CODEC_FLAG_QSCALE) {
|
||||
lame_set_brate(s->gfp, 0);
|
||||
lame_set_VBR(s->gfp, vbr_default);
|
||||
lame_set_VBR_q(s->gfp, avctx->global_quality / (float)FF_QP2LAMBDA);
|
||||
}
|
||||
lame_set_bWriteVbrTag(s->gfp,0);
|
||||
if (lame_init_params(s->gfp) < 0)
|
||||
goto err_close;
|
||||
if (lame_init_params(s->gfp) < 0)
|
||||
goto err_close;
|
||||
|
||||
avctx->frame_size = lame_get_framesize(s->gfp);
|
||||
avctx->frame_size = lame_get_framesize(s->gfp);
|
||||
|
||||
avctx->coded_frame= avcodec_alloc_frame();
|
||||
avctx->coded_frame->key_frame= 1;
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
|
||||
err_close:
|
||||
lame_close(s->gfp);
|
||||
lame_close(s->gfp);
|
||||
err:
|
||||
return -1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static const int sSampleRates[3] = {
|
||||
@ -136,11 +136,11 @@ static int mp3len(void *data, int *samplesPerFrame, int *sampleRate)
|
||||
int MP3lame_encode_frame(AVCodecContext *avctx,
|
||||
unsigned char *frame, int buf_size, void *data)
|
||||
{
|
||||
Mp3AudioContext *s = avctx->priv_data;
|
||||
int len;
|
||||
int lame_result;
|
||||
Mp3AudioContext *s = avctx->priv_data;
|
||||
int len;
|
||||
int lame_result;
|
||||
|
||||
/* lame 3.91 dies on '1-channel interleaved' data */
|
||||
/* lame 3.91 dies on '1-channel interleaved' data */
|
||||
|
||||
if(data){
|
||||
if (s->stereo) {
|
||||
@ -198,12 +198,12 @@ int MP3lame_encode_frame(AVCodecContext *avctx,
|
||||
|
||||
int MP3lame_encode_close(AVCodecContext *avctx)
|
||||
{
|
||||
Mp3AudioContext *s = avctx->priv_data;
|
||||
Mp3AudioContext *s = avctx->priv_data;
|
||||
|
||||
av_freep(&avctx->coded_frame);
|
||||
|
||||
lame_close(s->gfp);
|
||||
return 0;
|
||||
lame_close(s->gfp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -35,14 +35,14 @@
|
||||
|
||||
|
||||
/* Start codes. */
|
||||
#define SEQ_END_CODE 0x000001b7
|
||||
#define SEQ_START_CODE 0x000001b3
|
||||
#define GOP_START_CODE 0x000001b8
|
||||
#define PICTURE_START_CODE 0x00000100
|
||||
#define SLICE_MIN_START_CODE 0x00000101
|
||||
#define SLICE_MAX_START_CODE 0x000001af
|
||||
#define EXT_START_CODE 0x000001b5
|
||||
#define USER_START_CODE 0x000001b2
|
||||
#define SEQ_END_CODE 0x000001b7
|
||||
#define SEQ_START_CODE 0x000001b3
|
||||
#define GOP_START_CODE 0x000001b8
|
||||
#define PICTURE_START_CODE 0x00000100
|
||||
#define SLICE_MIN_START_CODE 0x00000101
|
||||
#define SLICE_MAX_START_CODE 0x000001af
|
||||
#define EXT_START_CODE 0x000001b5
|
||||
#define USER_START_CODE 0x000001b2
|
||||
|
||||
#define DC_VLC_BITS 9
|
||||
#define MV_VLC_BITS 9
|
||||
@ -89,7 +89,7 @@ const enum PixelFormat pixfmt_yuv_444[]= {PIX_FMT_YUV444P,-1};
|
||||
const enum PixelFormat pixfmt_xvmc_mpg2_420[] = {
|
||||
PIX_FMT_XVMC_MPEG2_IDCT,
|
||||
PIX_FMT_XVMC_MPEG2_MC,
|
||||
-1};
|
||||
-1};
|
||||
#ifdef CONFIG_ENCODERS
|
||||
static uint8_t (*mv_penalty)[MAX_MV*2+1]= NULL;
|
||||
static uint8_t fcode_tab[MAX_MV*2+1];
|
||||
@ -166,7 +166,7 @@ static void init_uni_ac_vlc(RLTable *rl, uint32_t *uni_ac_vlc_bits, uint8_t *uni
|
||||
code= rl->index_run[0][run] + alevel - 1;
|
||||
|
||||
if (code < 111 /* rl->n */) {
|
||||
/* store the vlc & sign at once */
|
||||
/* store the vlc & sign at once */
|
||||
len= mpeg1_vlc[code][1]+1;
|
||||
bits= (mpeg1_vlc[code][0]<<1) + sign;
|
||||
} else {
|
||||
@ -764,38 +764,38 @@ void ff_mpeg1_encode_init(MpegEncContext *s)
|
||||
if(!done){
|
||||
int f_code;
|
||||
int mv;
|
||||
int i;
|
||||
int i;
|
||||
|
||||
done=1;
|
||||
init_rl(&rl_mpeg1, 1);
|
||||
|
||||
for(i=0; i<64; i++)
|
||||
{
|
||||
mpeg1_max_level[0][i]= rl_mpeg1.max_level[0][i];
|
||||
mpeg1_index_run[0][i]= rl_mpeg1.index_run[0][i];
|
||||
}
|
||||
for(i=0; i<64; i++)
|
||||
{
|
||||
mpeg1_max_level[0][i]= rl_mpeg1.max_level[0][i];
|
||||
mpeg1_index_run[0][i]= rl_mpeg1.index_run[0][i];
|
||||
}
|
||||
|
||||
init_uni_ac_vlc(&rl_mpeg1, uni_mpeg1_ac_vlc_bits, uni_mpeg1_ac_vlc_len);
|
||||
|
||||
/* build unified dc encoding tables */
|
||||
for(i=-255; i<256; i++)
|
||||
{
|
||||
int adiff, index;
|
||||
int bits, code;
|
||||
int diff=i;
|
||||
/* build unified dc encoding tables */
|
||||
for(i=-255; i<256; i++)
|
||||
{
|
||||
int adiff, index;
|
||||
int bits, code;
|
||||
int diff=i;
|
||||
|
||||
adiff = ABS(diff);
|
||||
if(diff<0) diff--;
|
||||
index = av_log2(2*adiff);
|
||||
adiff = ABS(diff);
|
||||
if(diff<0) diff--;
|
||||
index = av_log2(2*adiff);
|
||||
|
||||
bits= vlc_dc_lum_bits[index] + index;
|
||||
code= (vlc_dc_lum_code[index]<<index) + (diff & ((1 << index) - 1));
|
||||
mpeg1_lum_dc_uni[i+255]= bits + (code<<8);
|
||||
bits= vlc_dc_lum_bits[index] + index;
|
||||
code= (vlc_dc_lum_code[index]<<index) + (diff & ((1 << index) - 1));
|
||||
mpeg1_lum_dc_uni[i+255]= bits + (code<<8);
|
||||
|
||||
bits= vlc_dc_chroma_bits[index] + index;
|
||||
code= (vlc_dc_chroma_code[index]<<index) + (diff & ((1 << index) - 1));
|
||||
mpeg1_chr_dc_uni[i+255]= bits + (code<<8);
|
||||
}
|
||||
bits= vlc_dc_chroma_bits[index] + index;
|
||||
code= (vlc_dc_chroma_code[index]<<index) + (diff & ((1 << index) - 1));
|
||||
mpeg1_chr_dc_uni[i+255]= bits + (code<<8);
|
||||
}
|
||||
|
||||
mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
|
||||
|
||||
@ -873,14 +873,14 @@ static inline void encode_dc(MpegEncContext *s, int diff, int component)
|
||||
}else{
|
||||
if (component == 0) {
|
||||
put_bits(
|
||||
&s->pb,
|
||||
mpeg1_lum_dc_uni[diff+255]&0xFF,
|
||||
mpeg1_lum_dc_uni[diff+255]>>8);
|
||||
&s->pb,
|
||||
mpeg1_lum_dc_uni[diff+255]&0xFF,
|
||||
mpeg1_lum_dc_uni[diff+255]>>8);
|
||||
} else {
|
||||
put_bits(
|
||||
&s->pb,
|
||||
mpeg1_chr_dc_uni[diff+255]&0xFF,
|
||||
mpeg1_chr_dc_uni[diff+255]>>8);
|
||||
mpeg1_chr_dc_uni[diff+255]&0xFF,
|
||||
mpeg1_chr_dc_uni[diff+255]>>8);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -946,10 +946,10 @@ static void mpeg1_encode_block(MpegEncContext *s,
|
||||
// code = get_rl_index(rl, 0, run, alevel);
|
||||
if (alevel <= mpeg1_max_level[0][run]){
|
||||
code= mpeg1_index_run[0][run] + alevel - 1;
|
||||
/* store the vlc & sign at once */
|
||||
/* store the vlc & sign at once */
|
||||
put_bits(&s->pb, mpeg1_vlc[code][1]+1, (mpeg1_vlc[code][0]<<1) + sign);
|
||||
} else {
|
||||
/* escape seems to be pretty rare <5% so i dont optimize it */
|
||||
/* escape seems to be pretty rare <5% so i dont optimize it */
|
||||
put_bits(&s->pb, mpeg1_vlc[111/*rl->n*/][1], mpeg1_vlc[111/*rl->n*/][0]);
|
||||
/* escape: only clip in this case */
|
||||
put_bits(&s->pb, 6, run);
|
||||
@ -1376,8 +1376,8 @@ static int mpeg_decode_mb(MpegEncContext *s,
|
||||
return -1;
|
||||
}
|
||||
if(mb_block_count > 6){
|
||||
cbp<<= mb_block_count-6;
|
||||
cbp |= get_bits(&s->gb, mb_block_count-6);
|
||||
cbp<<= mb_block_count-6;
|
||||
cbp |= get_bits(&s->gb, mb_block_count-6);
|
||||
}
|
||||
|
||||
#ifdef HAVE_XVMC
|
||||
@ -2074,7 +2074,7 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
|
||||
uint8_t old_permutation[64];
|
||||
|
||||
if (
|
||||
(s1->mpeg_enc_ctx_allocated == 0)||
|
||||
(s1->mpeg_enc_ctx_allocated == 0)||
|
||||
avctx->coded_width != s->width ||
|
||||
avctx->coded_height != s->height||
|
||||
s1->save_aspect_info != s->aspect_ratio_info||
|
||||
@ -2088,8 +2088,8 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
|
||||
s->parse_context= pc;
|
||||
}
|
||||
|
||||
if( (s->width == 0 )||(s->height == 0))
|
||||
return -2;
|
||||
if( (s->width == 0 )||(s->height == 0))
|
||||
return -2;
|
||||
|
||||
avcodec_set_dimensions(avctx, s->width, s->height);
|
||||
avctx->bit_rate = s->bit_rate;
|
||||
@ -2129,7 +2129,7 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
|
||||
mpeg2_aspect[s->aspect_ratio_info],
|
||||
(AVRational){s1->pan_scan.width, s1->pan_scan.height}
|
||||
);
|
||||
}
|
||||
}
|
||||
}else{
|
||||
s->avctx->sample_aspect_ratio=
|
||||
mpeg2_aspect[s->aspect_ratio_info];
|
||||
@ -2312,16 +2312,16 @@ static void mpeg_decode_picture_display_extension(Mpeg1Context *s1)
|
||||
nofco = 1;
|
||||
if(s->progressive_sequence){
|
||||
if(s->repeat_first_field){
|
||||
nofco++;
|
||||
if(s->top_field_first)
|
||||
nofco++;
|
||||
}
|
||||
nofco++;
|
||||
if(s->top_field_first)
|
||||
nofco++;
|
||||
}
|
||||
}else{
|
||||
if(s->picture_structure == PICT_FRAME){
|
||||
nofco++;
|
||||
if(s->repeat_first_field)
|
||||
nofco++;
|
||||
}
|
||||
if(s->repeat_first_field)
|
||||
nofco++;
|
||||
}
|
||||
}
|
||||
for(i=0; i<nofco; i++){
|
||||
s1->pan_scan.position[i][0]= get_sbits(&s->gb, 16);
|
||||
@ -2985,8 +2985,8 @@ static void mpeg_decode_gop(AVCodecContext *avctx,
|
||||
|
||||
if(s->avctx->debug & FF_DEBUG_PICT_INFO)
|
||||
av_log(s->avctx, AV_LOG_DEBUG, "GOP (%2d:%02d:%02d.[%02d]) broken_link=%d\n",
|
||||
time_code_hours, time_code_minutes, time_code_seconds,
|
||||
time_code_pictures, broken_link);
|
||||
time_code_hours, time_code_minutes, time_code_seconds,
|
||||
time_code_pictures, broken_link);
|
||||
}
|
||||
/**
|
||||
* finds the end of the current frame in the bitstream.
|
||||
@ -3044,13 +3044,13 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
|
||||
dprintf("fill_buffer\n");
|
||||
|
||||
if (buf_size == 0) {
|
||||
/* special case for last picture */
|
||||
if (s2->low_delay==0 && s2->next_picture_ptr) {
|
||||
*picture= *(AVFrame*)s2->next_picture_ptr;
|
||||
s2->next_picture_ptr= NULL;
|
||||
/* special case for last picture */
|
||||
if (s2->low_delay==0 && s2->next_picture_ptr) {
|
||||
*picture= *(AVFrame*)s2->next_picture_ptr;
|
||||
s2->next_picture_ptr= NULL;
|
||||
|
||||
*data_size = sizeof(AVFrame);
|
||||
}
|
||||
*data_size = sizeof(AVFrame);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3111,13 +3111,13 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
|
||||
switch(start_code) {
|
||||
case SEQ_START_CODE:
|
||||
mpeg1_decode_sequence(avctx, buf_ptr,
|
||||
input_size);
|
||||
input_size);
|
||||
break;
|
||||
|
||||
case PICTURE_START_CODE:
|
||||
/* we have a complete image : we try to decompress it */
|
||||
mpeg1_decode_picture(avctx,
|
||||
buf_ptr, input_size);
|
||||
buf_ptr, input_size);
|
||||
break;
|
||||
case EXT_START_CODE:
|
||||
mpeg_decode_extension(avctx,
|
||||
|
@ -4,14 +4,14 @@
|
||||
*/
|
||||
|
||||
const int16_t ff_mpeg1_default_intra_matrix[64] = {
|
||||
8, 16, 19, 22, 26, 27, 29, 34,
|
||||
16, 16, 22, 24, 27, 29, 34, 37,
|
||||
19, 22, 26, 27, 29, 34, 34, 38,
|
||||
22, 22, 26, 27, 29, 34, 37, 40,
|
||||
22, 26, 27, 29, 32, 35, 40, 48,
|
||||
26, 27, 29, 32, 35, 40, 48, 58,
|
||||
26, 27, 29, 34, 38, 46, 56, 69,
|
||||
27, 29, 35, 38, 46, 56, 69, 83
|
||||
8, 16, 19, 22, 26, 27, 29, 34,
|
||||
16, 16, 22, 24, 27, 29, 34, 37,
|
||||
19, 22, 26, 27, 29, 34, 34, 38,
|
||||
22, 22, 26, 27, 29, 34, 37, 40,
|
||||
22, 26, 27, 29, 32, 35, 40, 48,
|
||||
26, 27, 29, 32, 35, 40, 48, 58,
|
||||
26, 27, 29, 34, 38, 46, 56, 69,
|
||||
27, 29, 35, 38, 46, 56, 69, 83
|
||||
};
|
||||
|
||||
const int16_t ff_mpeg1_default_non_intra_matrix[64] = {
|
||||
|
@ -748,7 +748,7 @@ static void encode_frame(MpegAudioContext *s,
|
||||
}
|
||||
|
||||
static int MPA_encode_frame(AVCodecContext *avctx,
|
||||
unsigned char *frame, int buf_size, void *data)
|
||||
unsigned char *frame, int buf_size, void *data)
|
||||
{
|
||||
MpegAudioContext *s = avctx->priv_data;
|
||||
short *samples = data;
|
||||
|
@ -55,7 +55,7 @@ int l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
|
||||
int mpa_decode_header(AVCodecContext *avctx, uint32_t head);
|
||||
void ff_mpa_synth_init(MPA_INT *window);
|
||||
void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
|
||||
MPA_INT *window, int *dither_state,
|
||||
MPA_INT *window, int *dither_state,
|
||||
OUT_INT *samples, int incr,
|
||||
int32_t sb_samples[SBLIMIT]);
|
||||
|
||||
|
@ -64,7 +64,7 @@ static always_inline int MULH(int a, int b){
|
||||
struct GranuleDef;
|
||||
|
||||
typedef struct MPADecodeContext {
|
||||
uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE]; /* input buffer */
|
||||
uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE]; /* input buffer */
|
||||
int inbuf_index;
|
||||
uint8_t *inbuf_ptr, *inbuf;
|
||||
int frame_size;
|
||||
@ -340,13 +340,13 @@ static int decode_init(AVCodecContext * avctx)
|
||||
scale_factor_mult[i][2]);
|
||||
}
|
||||
|
||||
ff_mpa_synth_init(window);
|
||||
ff_mpa_synth_init(window);
|
||||
|
||||
/* huffman decode tables */
|
||||
huff_code_table[0] = NULL;
|
||||
for(i=1;i<16;i++) {
|
||||
const HuffTable *h = &mpa_huff_tables[i];
|
||||
int xsize, x, y;
|
||||
int xsize, x, y;
|
||||
unsigned int n;
|
||||
uint8_t *code_table;
|
||||
|
||||
@ -378,11 +378,11 @@ static int decode_init(AVCodecContext * avctx)
|
||||
band_index_long[i][22] = k;
|
||||
}
|
||||
|
||||
/* compute n ^ (4/3) and store it in mantissa/exp format */
|
||||
table_4_3_exp= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_exp[0]));
|
||||
/* compute n ^ (4/3) and store it in mantissa/exp format */
|
||||
table_4_3_exp= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_exp[0]));
|
||||
if(!table_4_3_exp)
|
||||
return -1;
|
||||
table_4_3_value= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_value[0]));
|
||||
return -1;
|
||||
table_4_3_value= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_value[0]));
|
||||
if(!table_4_3_value)
|
||||
return -1;
|
||||
|
||||
@ -844,7 +844,7 @@ void ff_mpa_synth_init(MPA_INT *window)
|
||||
32 samples. */
|
||||
/* XXX: optimize by avoiding ring buffer usage */
|
||||
void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
|
||||
MPA_INT *window, int *dither_state,
|
||||
MPA_INT *window, int *dither_state,
|
||||
OUT_INT *samples, int incr,
|
||||
int32_t sb_samples[SBLIMIT])
|
||||
{
|
||||
@ -2440,8 +2440,8 @@ static int mp_decode_frame(MPADecodeContext *s,
|
||||
samples_ptr = samples + ch;
|
||||
for(i=0;i<nb_frames;i++) {
|
||||
ff_mpa_synth_filter(s->synth_buf[ch], &(s->synth_buf_offset[ch]),
|
||||
window, &s->dither_state,
|
||||
samples_ptr, s->nb_channels,
|
||||
window, &s->dither_state,
|
||||
samples_ptr, s->nb_channels,
|
||||
s->sb_samples[ch][i]);
|
||||
samples_ptr += 32 * s->nb_channels;
|
||||
}
|
||||
@ -2453,8 +2453,8 @@ static int mp_decode_frame(MPADecodeContext *s,
|
||||
}
|
||||
|
||||
static int decode_frame(AVCodecContext * avctx,
|
||||
void *data, int *data_size,
|
||||
uint8_t * buf, int buf_size)
|
||||
void *data, int *data_size,
|
||||
uint8_t * buf, int buf_size)
|
||||
{
|
||||
MPADecodeContext *s = avctx->priv_data;
|
||||
uint32_t header;
|
||||
@ -2464,8 +2464,8 @@ static int decode_frame(AVCodecContext * avctx,
|
||||
|
||||
buf_ptr = buf;
|
||||
while (buf_size > 0) {
|
||||
len = s->inbuf_ptr - s->inbuf;
|
||||
if (s->frame_size == 0) {
|
||||
len = s->inbuf_ptr - s->inbuf;
|
||||
if (s->frame_size == 0) {
|
||||
/* special case for next header for first frame in free
|
||||
format case (XXX: find a simpler method) */
|
||||
if (s->free_format_next_header != 0) {
|
||||
@ -2477,34 +2477,34 @@ static int decode_frame(AVCodecContext * avctx,
|
||||
s->free_format_next_header = 0;
|
||||
goto got_header;
|
||||
}
|
||||
/* no header seen : find one. We need at least HEADER_SIZE
|
||||
/* no header seen : find one. We need at least HEADER_SIZE
|
||||
bytes to parse it */
|
||||
len = HEADER_SIZE - len;
|
||||
if (len > buf_size)
|
||||
len = buf_size;
|
||||
if (len > 0) {
|
||||
memcpy(s->inbuf_ptr, buf_ptr, len);
|
||||
buf_ptr += len;
|
||||
buf_size -= len;
|
||||
s->inbuf_ptr += len;
|
||||
}
|
||||
if ((s->inbuf_ptr - s->inbuf) >= HEADER_SIZE) {
|
||||
len = HEADER_SIZE - len;
|
||||
if (len > buf_size)
|
||||
len = buf_size;
|
||||
if (len > 0) {
|
||||
memcpy(s->inbuf_ptr, buf_ptr, len);
|
||||
buf_ptr += len;
|
||||
buf_size -= len;
|
||||
s->inbuf_ptr += len;
|
||||
}
|
||||
if ((s->inbuf_ptr - s->inbuf) >= HEADER_SIZE) {
|
||||
got_header:
|
||||
header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
|
||||
(s->inbuf[2] << 8) | s->inbuf[3];
|
||||
header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
|
||||
(s->inbuf[2] << 8) | s->inbuf[3];
|
||||
|
||||
if (ff_mpa_check_header(header) < 0) {
|
||||
/* no sync found : move by one byte (inefficient, but simple!) */
|
||||
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
|
||||
s->inbuf_ptr--;
|
||||
if (ff_mpa_check_header(header) < 0) {
|
||||
/* no sync found : move by one byte (inefficient, but simple!) */
|
||||
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
|
||||
s->inbuf_ptr--;
|
||||
dprintf("skip %x\n", header);
|
||||
/* reset free format frame size to give a chance
|
||||
to get a new bitrate */
|
||||
s->free_format_frame_size = 0;
|
||||
} else {
|
||||
if (decode_header(s, header) == 1) {
|
||||
} else {
|
||||
if (decode_header(s, header) == 1) {
|
||||
/* free format: prepare to compute frame size */
|
||||
s->frame_size = -1;
|
||||
s->frame_size = -1;
|
||||
}
|
||||
/* update codec info */
|
||||
avctx->sample_rate = s->sample_rate;
|
||||
@ -2525,18 +2525,18 @@ static int decode_frame(AVCodecContext * avctx,
|
||||
avctx->frame_size = 1152;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (s->frame_size == -1) {
|
||||
/* free format : find next sync to compute frame size */
|
||||
len = MPA_MAX_CODED_FRAME_SIZE - len;
|
||||
if (len > buf_size)
|
||||
len = buf_size;
|
||||
len = MPA_MAX_CODED_FRAME_SIZE - len;
|
||||
if (len > buf_size)
|
||||
len = buf_size;
|
||||
if (len == 0) {
|
||||
/* frame too long: resync */
|
||||
/* frame too long: resync */
|
||||
s->frame_size = 0;
|
||||
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
|
||||
s->inbuf_ptr--;
|
||||
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
|
||||
s->inbuf_ptr--;
|
||||
} else {
|
||||
uint8_t *p, *pend;
|
||||
uint32_t header1;
|
||||
@ -2580,17 +2580,17 @@ static int decode_frame(AVCodecContext * avctx,
|
||||
s->inbuf_ptr += len;
|
||||
buf_size -= len;
|
||||
}
|
||||
} else if (len < s->frame_size) {
|
||||
} else if (len < s->frame_size) {
|
||||
if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE)
|
||||
s->frame_size = MPA_MAX_CODED_FRAME_SIZE;
|
||||
len = s->frame_size - len;
|
||||
if (len > buf_size)
|
||||
len = buf_size;
|
||||
memcpy(s->inbuf_ptr, buf_ptr, len);
|
||||
buf_ptr += len;
|
||||
s->inbuf_ptr += len;
|
||||
buf_size -= len;
|
||||
}
|
||||
len = s->frame_size - len;
|
||||
if (len > buf_size)
|
||||
len = buf_size;
|
||||
memcpy(s->inbuf_ptr, buf_ptr, len);
|
||||
buf_ptr += len;
|
||||
s->inbuf_ptr += len;
|
||||
buf_size -= len;
|
||||
}
|
||||
next_data:
|
||||
if (s->frame_size > 0 &&
|
||||
(s->inbuf_ptr - s->inbuf) >= s->frame_size) {
|
||||
@ -2601,22 +2601,22 @@ static int decode_frame(AVCodecContext * avctx,
|
||||
} else {
|
||||
out_size = mp_decode_frame(s, out_samples);
|
||||
}
|
||||
s->inbuf_ptr = s->inbuf;
|
||||
s->frame_size = 0;
|
||||
s->inbuf_ptr = s->inbuf;
|
||||
s->frame_size = 0;
|
||||
if(out_size>=0)
|
||||
*data_size = out_size;
|
||||
*data_size = out_size;
|
||||
else
|
||||
av_log(avctx, AV_LOG_DEBUG, "Error while decoding mpeg audio frame\n"); //FIXME return -1 / but also return the number of bytes consumed
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return buf_ptr - buf;
|
||||
}
|
||||
|
||||
|
||||
static int decode_frame_adu(AVCodecContext * avctx,
|
||||
void *data, int *data_size,
|
||||
uint8_t * buf, int buf_size)
|
||||
void *data, int *data_size,
|
||||
uint8_t * buf, int buf_size)
|
||||
{
|
||||
MPADecodeContext *s = avctx->priv_data;
|
||||
uint32_t header;
|
||||
@ -2747,8 +2747,8 @@ static int decode_close_mp3on4(AVCodecContext * avctx)
|
||||
|
||||
|
||||
static int decode_frame_mp3on4(AVCodecContext * avctx,
|
||||
void *data, int *data_size,
|
||||
uint8_t * buf, int buf_size)
|
||||
void *data, int *data_size,
|
||||
uint8_t * buf, int buf_size)
|
||||
{
|
||||
MP3On4DecodeContext *s = avctx->priv_data;
|
||||
MPADecodeContext *m;
|
||||
|
@ -354,7 +354,7 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
|
||||
r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
|
||||
|
||||
if(r<0 || !pic->age || !pic->type || !pic->data[0]){
|
||||
av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
|
||||
av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -913,7 +913,7 @@ int MPV_encode_init(AVCodecContext *avctx)
|
||||
s->width = avctx->width;
|
||||
s->height = avctx->height;
|
||||
if(avctx->gop_size > 600){
|
||||
av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
|
||||
av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
|
||||
avctx->gop_size=600;
|
||||
}
|
||||
s->gop_size = avctx->gop_size;
|
||||
@ -1120,7 +1120,7 @@ int MPV_encode_init(AVCodecContext *avctx)
|
||||
s->out_format = FMT_MJPEG;
|
||||
s->intra_only = 1; /* force intra only for jpeg */
|
||||
s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
|
||||
s->mjpeg_data_only_frames = 0; /* write all the needed headers */
|
||||
s->mjpeg_data_only_frames = 0; /* write all the needed headers */
|
||||
s->mjpeg_vsample[0] = 1<<chroma_v_shift;
|
||||
s->mjpeg_vsample[1] = 1;
|
||||
s->mjpeg_vsample[2] = 1;
|
||||
@ -1143,24 +1143,24 @@ int MPV_encode_init(AVCodecContext *avctx)
|
||||
return -1;
|
||||
}
|
||||
s->out_format = FMT_H263;
|
||||
s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
|
||||
s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
|
||||
avctx->delay=0;
|
||||
s->low_delay=1;
|
||||
break;
|
||||
case CODEC_ID_H263P:
|
||||
s->out_format = FMT_H263;
|
||||
s->h263_plus = 1;
|
||||
/* Fx */
|
||||
/* Fx */
|
||||
s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
|
||||
s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
|
||||
s->modified_quant= s->h263_aic;
|
||||
s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
|
||||
s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
|
||||
s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
|
||||
s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
|
||||
s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
|
||||
s->modified_quant= s->h263_aic;
|
||||
s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
|
||||
s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
|
||||
s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
|
||||
s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
|
||||
s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
|
||||
|
||||
/* /Fx */
|
||||
/* /Fx */
|
||||
/* These are just to be sure */
|
||||
avctx->delay=0;
|
||||
s->low_delay=1;
|
||||
@ -2473,7 +2473,7 @@ static inline void gmc1_motion(MpegEncContext *s,
|
||||
|
||||
dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
|
||||
if (s->no_rounding){
|
||||
s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
|
||||
s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
|
||||
}else{
|
||||
s->dsp.put_pixels_tab [0][dxy](dest_y, ptr, linesize, 16);
|
||||
}
|
||||
@ -4148,7 +4148,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
|
||||
}
|
||||
}
|
||||
|
||||
s->dsp.get_pixels(s->block[0], ptr_y , wrap_y);
|
||||
s->dsp.get_pixels(s->block[0], ptr_y , wrap_y);
|
||||
s->dsp.get_pixels(s->block[1], ptr_y + 8, wrap_y);
|
||||
s->dsp.get_pixels(s->block[2], ptr_y + dct_offset , wrap_y);
|
||||
s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
|
||||
@ -4157,7 +4157,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
|
||||
skip_dct[4]= 1;
|
||||
skip_dct[5]= 1;
|
||||
}else{
|
||||
s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
|
||||
s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
|
||||
s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
|
||||
}
|
||||
}else{
|
||||
@ -4170,7 +4170,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
|
||||
dest_cr = s->dest[2];
|
||||
|
||||
if ((!s->no_rounding) || s->pict_type==B_TYPE){
|
||||
op_pix = s->dsp.put_pixels_tab;
|
||||
op_pix = s->dsp.put_pixels_tab;
|
||||
op_qpix= s->dsp.put_qpel_pixels_tab;
|
||||
}else{
|
||||
op_pix = s->dsp.put_no_rnd_pixels_tab;
|
||||
@ -4208,7 +4208,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
|
||||
}
|
||||
}
|
||||
|
||||
s->dsp.diff_pixels(s->block[0], ptr_y , dest_y , wrap_y);
|
||||
s->dsp.diff_pixels(s->block[0], ptr_y , dest_y , wrap_y);
|
||||
s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
|
||||
s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset , dest_y + dct_offset , wrap_y);
|
||||
s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
|
||||
@ -4223,7 +4223,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
|
||||
/* pre quantization */
|
||||
if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
|
||||
//FIXME optimize
|
||||
if(s->dsp.sad[1](NULL, ptr_y , dest_y , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
|
||||
if(s->dsp.sad[1](NULL, ptr_y , dest_y , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
|
||||
if(s->dsp.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
|
||||
if(s->dsp.sad[1](NULL, ptr_y +dct_offset , dest_y +dct_offset , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
|
||||
if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
|
||||
@ -6265,7 +6265,7 @@ static int dct_quantize_c(MpegEncContext *s,
|
||||
|
||||
/* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
|
||||
if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
|
||||
ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
|
||||
ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
|
||||
|
||||
return last_non_zero;
|
||||
}
|
||||
|
@ -126,7 +126,7 @@ typedef struct ScanTable{
|
||||
uint8_t permutated[64];
|
||||
uint8_t raster_end[64];
|
||||
#ifdef ARCH_POWERPC
|
||||
/** Used by dct_quantise_alitvec to find last-non-zero */
|
||||
/** Used by dct_quantise_alitvec to find last-non-zero */
|
||||
uint8_t __align8 inverse[64];
|
||||
#endif
|
||||
} ScanTable;
|
||||
@ -181,7 +181,7 @@ typedef struct Picture{
|
||||
uint16_t *mb_var; ///< Table for MB variances
|
||||
uint16_t *mc_mb_var; ///< Table for motion compensated MB variances
|
||||
uint8_t *mb_mean; ///< Table for MB luminance
|
||||
int32_t *mb_cmp_score; ///< Table for MB cmp scores, for mb decision FIXME remove
|
||||
int32_t *mb_cmp_score; ///< Table for MB cmp scores, for mb decision FIXME remove
|
||||
int b_frame_score; /* */
|
||||
} Picture;
|
||||
|
||||
@ -245,7 +245,7 @@ typedef struct MotionEstContext{
|
||||
uint8_t (*mv_penalty)[MAX_MV*2+1]; ///< amount of bits needed to encode a MV
|
||||
uint8_t *current_mv_penalty;
|
||||
int (*sub_motion_search)(struct MpegEncContext * s,
|
||||
int *mx_ptr, int *my_ptr, int dmin,
|
||||
int *mx_ptr, int *my_ptr, int dmin,
|
||||
int src_index, int ref_index,
|
||||
int size, int h);
|
||||
}MotionEstContext;
|
||||
|
@ -544,24 +544,24 @@ void msmpeg4_encode_mb(MpegEncContext * s,
|
||||
handle_slices(s);
|
||||
|
||||
if (!s->mb_intra) {
|
||||
/* compute cbp */
|
||||
/* compute cbp */
|
||||
set_stat(ST_INTER_MB);
|
||||
cbp = 0;
|
||||
for (i = 0; i < 6; i++) {
|
||||
if (s->block_last_index[i] >= 0)
|
||||
cbp |= 1 << (5 - i);
|
||||
}
|
||||
if (s->use_skip_mb_code && (cbp | motion_x | motion_y) == 0) {
|
||||
/* skip macroblock */
|
||||
put_bits(&s->pb, 1, 1);
|
||||
cbp = 0;
|
||||
for (i = 0; i < 6; i++) {
|
||||
if (s->block_last_index[i] >= 0)
|
||||
cbp |= 1 << (5 - i);
|
||||
}
|
||||
if (s->use_skip_mb_code && (cbp | motion_x | motion_y) == 0) {
|
||||
/* skip macroblock */
|
||||
put_bits(&s->pb, 1, 1);
|
||||
s->last_bits++;
|
||||
s->misc_bits++;
|
||||
s->misc_bits++;
|
||||
s->skip_count++;
|
||||
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (s->use_skip_mb_code)
|
||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||
|
||||
if(s->msmpeg4_version<=2){
|
||||
put_bits(&s->pb,
|
||||
@ -599,10 +599,10 @@ void msmpeg4_encode_mb(MpegEncContext * s,
|
||||
}
|
||||
s->p_tex_bits += get_bits_diff(s);
|
||||
} else {
|
||||
/* compute cbp */
|
||||
cbp = 0;
|
||||
/* compute cbp */
|
||||
cbp = 0;
|
||||
coded_cbp = 0;
|
||||
for (i = 0; i < 6; i++) {
|
||||
for (i = 0; i < 6; i++) {
|
||||
int val, pred;
|
||||
val = (s->block_last_index[i] >= 1);
|
||||
cbp |= val << (5 - i);
|
||||
@ -613,7 +613,7 @@ void msmpeg4_encode_mb(MpegEncContext * s,
|
||||
val = val ^ pred;
|
||||
}
|
||||
coded_cbp |= val << (5 - i);
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
if (coded_cbp)
|
||||
printf("cbp=%x %x\n", cbp, coded_cbp);
|
||||
@ -625,12 +625,12 @@ void msmpeg4_encode_mb(MpegEncContext * s,
|
||||
v2_intra_cbpc[cbp&3][1], v2_intra_cbpc[cbp&3][0]);
|
||||
} else {
|
||||
if (s->use_skip_mb_code)
|
||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||
put_bits(&s->pb,
|
||||
v2_mb_type[(cbp&3) + 4][1],
|
||||
v2_mb_type[(cbp&3) + 4][0]);
|
||||
}
|
||||
put_bits(&s->pb, 1, 0); /* no AC prediction yet */
|
||||
put_bits(&s->pb, 1, 0); /* no AC prediction yet */
|
||||
put_bits(&s->pb,
|
||||
cbpy_tab[cbp>>2][1],
|
||||
cbpy_tab[cbp>>2][0]);
|
||||
@ -641,13 +641,13 @@ void msmpeg4_encode_mb(MpegEncContext * s,
|
||||
ff_msmp4_mb_i_table[coded_cbp][1], ff_msmp4_mb_i_table[coded_cbp][0]);
|
||||
} else {
|
||||
if (s->use_skip_mb_code)
|
||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||
put_bits(&s->pb, 1, 0); /* mb coded */
|
||||
put_bits(&s->pb,
|
||||
table_mb_non_intra[cbp][1],
|
||||
table_mb_non_intra[cbp][0]);
|
||||
}
|
||||
set_stat(ST_INTRA_MB);
|
||||
put_bits(&s->pb, 1, 0); /* no AC prediction yet */
|
||||
put_bits(&s->pb, 1, 0); /* no AC prediction yet */
|
||||
if(s->inter_intra_pred){
|
||||
s->h263_aic_dir=0;
|
||||
put_bits(&s->pb, table_inter_intra[s->h263_aic_dir][1], table_inter_intra[s->h263_aic_dir][0]);
|
||||
@ -702,9 +702,9 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
|
||||
|
||||
/* find prediction */
|
||||
if (n < 4) {
|
||||
scale = s->y_dc_scale;
|
||||
scale = s->y_dc_scale;
|
||||
} else {
|
||||
scale = s->c_dc_scale;
|
||||
scale = s->c_dc_scale;
|
||||
}
|
||||
|
||||
wrap = s->block_wrap[n];
|
||||
@ -727,22 +727,22 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
|
||||
to problems if Q could vary !) */
|
||||
#if (defined(ARCH_X86) || defined(ARCH_X86_64)) && !defined PIC
|
||||
asm volatile(
|
||||
"movl %3, %%eax \n\t"
|
||||
"shrl $1, %%eax \n\t"
|
||||
"addl %%eax, %2 \n\t"
|
||||
"addl %%eax, %1 \n\t"
|
||||
"addl %0, %%eax \n\t"
|
||||
"mull %4 \n\t"
|
||||
"movl %%edx, %0 \n\t"
|
||||
"movl %1, %%eax \n\t"
|
||||
"mull %4 \n\t"
|
||||
"movl %%edx, %1 \n\t"
|
||||
"movl %2, %%eax \n\t"
|
||||
"mull %4 \n\t"
|
||||
"movl %%edx, %2 \n\t"
|
||||
: "+b" (a), "+c" (b), "+D" (c)
|
||||
: "g" (scale), "S" (inverse[scale])
|
||||
: "%eax", "%edx"
|
||||
"movl %3, %%eax \n\t"
|
||||
"shrl $1, %%eax \n\t"
|
||||
"addl %%eax, %2 \n\t"
|
||||
"addl %%eax, %1 \n\t"
|
||||
"addl %0, %%eax \n\t"
|
||||
"mull %4 \n\t"
|
||||
"movl %%edx, %0 \n\t"
|
||||
"movl %1, %%eax \n\t"
|
||||
"mull %4 \n\t"
|
||||
"movl %%edx, %1 \n\t"
|
||||
"movl %2, %%eax \n\t"
|
||||
"mull %4 \n\t"
|
||||
"movl %%edx, %2 \n\t"
|
||||
: "+b" (a), "+c" (b), "+D" (c)
|
||||
: "g" (scale), "S" (inverse[scale])
|
||||
: "%eax", "%edx"
|
||||
);
|
||||
#else
|
||||
/* #elif defined (ARCH_ALPHA) */
|
||||
@ -750,13 +750,13 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
|
||||
common case. But they are costly everywhere...
|
||||
*/
|
||||
if (scale == 8) {
|
||||
a = (a + (8 >> 1)) / 8;
|
||||
b = (b + (8 >> 1)) / 8;
|
||||
c = (c + (8 >> 1)) / 8;
|
||||
a = (a + (8 >> 1)) / 8;
|
||||
b = (b + (8 >> 1)) / 8;
|
||||
c = (c + (8 >> 1)) / 8;
|
||||
} else {
|
||||
a = FASTDIV((a + (scale >> 1)), scale);
|
||||
b = FASTDIV((b + (scale >> 1)), scale);
|
||||
c = FASTDIV((c + (scale >> 1)), scale);
|
||||
a = FASTDIV((a + (scale >> 1)), scale);
|
||||
b = FASTDIV((b + (scale >> 1)), scale);
|
||||
c = FASTDIV((c + (scale >> 1)), scale);
|
||||
}
|
||||
#endif
|
||||
/* XXX: WARNING: they did not choose the same test as MPEG4. This
|
||||
@ -957,17 +957,17 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
|
||||
/* AC coefs */
|
||||
last_non_zero = i - 1;
|
||||
for (; i <= last_index; i++) {
|
||||
j = scantable[i];
|
||||
level = block[j];
|
||||
if (level) {
|
||||
run = i - last_non_zero - 1;
|
||||
last = (i == last_index);
|
||||
sign = 0;
|
||||
slevel = level;
|
||||
if (level < 0) {
|
||||
sign = 1;
|
||||
level = -level;
|
||||
}
|
||||
j = scantable[i];
|
||||
level = block[j];
|
||||
if (level) {
|
||||
run = i - last_non_zero - 1;
|
||||
last = (i == last_index);
|
||||
sign = 0;
|
||||
slevel = level;
|
||||
if (level < 0) {
|
||||
sign = 1;
|
||||
level = -level;
|
||||
}
|
||||
|
||||
if(level<=MAX_LEVEL && run<=MAX_RUN){
|
||||
s->ac_stats[s->mb_intra][n>3][level][run][last]++;
|
||||
@ -1030,8 +1030,8 @@ else
|
||||
} else {
|
||||
put_bits(&s->pb, 1, sign);
|
||||
}
|
||||
last_non_zero = i;
|
||||
}
|
||||
last_non_zero = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1064,7 +1064,7 @@ static void init_h263_dc_for_msmpeg4(void)
|
||||
v = abs(level);
|
||||
while (v) {
|
||||
v >>= 1;
|
||||
size++;
|
||||
size++;
|
||||
}
|
||||
|
||||
if (level < 0)
|
||||
@ -1301,11 +1301,11 @@ return -1;
|
||||
}
|
||||
s->no_rounding = 1;
|
||||
if(s->avctx->debug&FF_DEBUG_PICT_INFO)
|
||||
av_log(s->avctx, AV_LOG_DEBUG, "qscale:%d rlc:%d rl:%d dc:%d mbrl:%d slice:%d \n",
|
||||
s->qscale,
|
||||
s->rl_chroma_table_index,
|
||||
s->rl_table_index,
|
||||
s->dc_table_index,
|
||||
av_log(s->avctx, AV_LOG_DEBUG, "qscale:%d rlc:%d rl:%d dc:%d mbrl:%d slice:%d \n",
|
||||
s->qscale,
|
||||
s->rl_chroma_table_index,
|
||||
s->rl_table_index,
|
||||
s->dc_table_index,
|
||||
s->per_mb_rl_table,
|
||||
s->slice_height);
|
||||
} else {
|
||||
@ -1349,20 +1349,20 @@ return -1;
|
||||
}
|
||||
|
||||
if(s->avctx->debug&FF_DEBUG_PICT_INFO)
|
||||
av_log(s->avctx, AV_LOG_DEBUG, "skip:%d rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d \n",
|
||||
s->use_skip_mb_code,
|
||||
s->rl_table_index,
|
||||
s->rl_chroma_table_index,
|
||||
s->dc_table_index,
|
||||
s->mv_table_index,
|
||||
av_log(s->avctx, AV_LOG_DEBUG, "skip:%d rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d \n",
|
||||
s->use_skip_mb_code,
|
||||
s->rl_table_index,
|
||||
s->rl_chroma_table_index,
|
||||
s->dc_table_index,
|
||||
s->mv_table_index,
|
||||
s->per_mb_rl_table,
|
||||
s->qscale);
|
||||
|
||||
if(s->flipflop_rounding){
|
||||
s->no_rounding ^= 1;
|
||||
}else{
|
||||
s->no_rounding = 0;
|
||||
}
|
||||
if(s->flipflop_rounding){
|
||||
s->no_rounding ^= 1;
|
||||
}else{
|
||||
s->no_rounding = 0;
|
||||
}
|
||||
}
|
||||
//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
|
||||
|
||||
@ -1557,10 +1557,10 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
|
||||
s->dsp.clear_blocks(s->block[0]);
|
||||
for (i = 0; i < 6; i++) {
|
||||
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
|
||||
{
|
||||
{
|
||||
av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -1593,8 +1593,8 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
|
||||
code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3);
|
||||
if (code < 0)
|
||||
return -1;
|
||||
//s->mb_intra = (code & 0x40) ? 0 : 1;
|
||||
s->mb_intra = (~code & 0x40) >> 6;
|
||||
//s->mb_intra = (code & 0x40) ? 0 : 1;
|
||||
s->mb_intra = (~code & 0x40) >> 6;
|
||||
|
||||
cbp = code & 0x3f;
|
||||
} else {
|
||||
@ -1650,10 +1650,10 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
|
||||
s->dsp.clear_blocks(s->block[0]);
|
||||
for (i = 0; i < 6; i++) {
|
||||
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
|
||||
{
|
||||
av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
|
||||
return -1;
|
||||
}
|
||||
{
|
||||
av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -1672,7 +1672,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
|
||||
qmul=1;
|
||||
qadd=0;
|
||||
|
||||
/* DC coef */
|
||||
/* DC coef */
|
||||
set_stat(ST_DC);
|
||||
level = msmpeg4_decode_dc(s, n, &dc_pred_dir);
|
||||
|
||||
@ -1808,8 +1808,8 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
//level = level * qmul + (level>0) * qadd - (level<=0) * qadd ;
|
||||
if (level>0) level= level * qmul + qadd;
|
||||
//level = level * qmul + (level>0) * qadd - (level<=0) * qadd ;
|
||||
if (level>0) level= level * qmul + qadd;
|
||||
else level= level * qmul - qadd;
|
||||
#if 0 // waste of time too :(
|
||||
if(level>2048 || level<-2048){
|
||||
|
@ -45,7 +45,7 @@ Theora_decode_frame(AVCodecContext *ctx, void *outdata, int *outdata_size,
|
||||
thc->op.bytes = buf_size;
|
||||
|
||||
if(theora_decode_packetin(&thc->state, &thc->op))
|
||||
return -1;
|
||||
return -1;
|
||||
|
||||
theora_decode_YUVout(&thc->state, &yuv);
|
||||
|
||||
@ -78,7 +78,7 @@ Theora_decode_init(AVCodecContext *ctx)
|
||||
uint8_t *cdp;
|
||||
|
||||
if(ctx->extradata_size < 6)
|
||||
return -1;
|
||||
return -1;
|
||||
|
||||
theora_info_init(&thc->info);
|
||||
|
||||
@ -87,25 +87,25 @@ Theora_decode_init(AVCodecContext *ctx)
|
||||
size = ctx->extradata_size;
|
||||
|
||||
for(i = 0; i < 3; i++){
|
||||
hs = *cdp++ << 8;
|
||||
hs += *cdp++;
|
||||
size -= 2;
|
||||
hs = *cdp++ << 8;
|
||||
hs += *cdp++;
|
||||
size -= 2;
|
||||
|
||||
if(hs > size){
|
||||
av_log(ctx, AV_LOG_ERROR, "extradata too small: %i > %i\n",
|
||||
if(hs > size){
|
||||
av_log(ctx, AV_LOG_ERROR, "extradata too small: %i > %i\n",
|
||||
hs, size);
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
op.packet = cdp;
|
||||
op.bytes = hs;
|
||||
op.b_o_s = !i;
|
||||
if(theora_decode_header(&thc->info, &thc->comment, &op))
|
||||
return -1;
|
||||
op.packetno++;
|
||||
op.packet = cdp;
|
||||
op.bytes = hs;
|
||||
op.b_o_s = !i;
|
||||
if(theora_decode_header(&thc->info, &thc->comment, &op))
|
||||
return -1;
|
||||
op.packetno++;
|
||||
|
||||
cdp += hs;
|
||||
size -= hs;
|
||||
cdp += hs;
|
||||
size -= hs;
|
||||
}
|
||||
|
||||
theora_decode_init(&thc->state, &thc->info);
|
||||
|
@ -40,13 +40,13 @@ static int oggvorbis_init_encoder(vorbis_info *vi, AVCodecContext *avccontext) {
|
||||
|
||||
return (vorbis_encode_setup_managed(vi, avccontext->channels,
|
||||
avccontext->sample_rate, -1, avccontext->bit_rate, -1) ||
|
||||
vorbis_encode_ctl(vi, OV_ECTL_RATEMANAGE_AVG, NULL) ||
|
||||
vorbis_encode_setup_init(vi)) ;
|
||||
vorbis_encode_ctl(vi, OV_ECTL_RATEMANAGE_AVG, NULL) ||
|
||||
vorbis_encode_setup_init(vi)) ;
|
||||
#else
|
||||
/* constant bitrate */
|
||||
|
||||
return vorbis_encode_init(vi, avccontext->channels,
|
||||
avccontext->sample_rate, -1, avccontext->bit_rate, -1) ;
|
||||
avccontext->sample_rate, -1, avccontext->bit_rate, -1) ;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -58,8 +58,8 @@ static int oggvorbis_encode_init(AVCodecContext *avccontext) {
|
||||
|
||||
vorbis_info_init(&context->vi) ;
|
||||
if(oggvorbis_init_encoder(&context->vi, avccontext) < 0) {
|
||||
av_log(avccontext, AV_LOG_ERROR, "oggvorbis_encode_init: init_encoder failed") ;
|
||||
return -1 ;
|
||||
av_log(avccontext, AV_LOG_ERROR, "oggvorbis_encode_init: init_encoder failed") ;
|
||||
return -1 ;
|
||||
}
|
||||
vorbis_analysis_init(&context->vd, &context->vi) ;
|
||||
vorbis_block_init(&context->vd, &context->vb) ;
|
||||
@ -101,8 +101,8 @@ static int oggvorbis_encode_init(AVCodecContext *avccontext) {
|
||||
|
||||
|
||||
static int oggvorbis_encode_frame(AVCodecContext *avccontext,
|
||||
unsigned char *packets,
|
||||
int buf_size, void *data)
|
||||
unsigned char *packets,
|
||||
int buf_size, void *data)
|
||||
{
|
||||
OggVorbisContext *context = avccontext->priv_data ;
|
||||
float **buffer ;
|
||||
@ -113,22 +113,22 @@ static int oggvorbis_encode_frame(AVCodecContext *avccontext,
|
||||
buffer = vorbis_analysis_buffer(&context->vd, samples) ;
|
||||
|
||||
if(context->vi.channels == 1) {
|
||||
for(l = 0 ; l < samples ; l++)
|
||||
buffer[0][l]=audio[l]/32768.f;
|
||||
for(l = 0 ; l < samples ; l++)
|
||||
buffer[0][l]=audio[l]/32768.f;
|
||||
} else {
|
||||
for(l = 0 ; l < samples ; l++){
|
||||
buffer[0][l]=audio[l*2]/32768.f;
|
||||
buffer[1][l]=audio[l*2+1]/32768.f;
|
||||
}
|
||||
for(l = 0 ; l < samples ; l++){
|
||||
buffer[0][l]=audio[l*2]/32768.f;
|
||||
buffer[1][l]=audio[l*2+1]/32768.f;
|
||||
}
|
||||
}
|
||||
|
||||
vorbis_analysis_wrote(&context->vd, samples) ;
|
||||
|
||||
while(vorbis_analysis_blockout(&context->vd, &context->vb) == 1) {
|
||||
vorbis_analysis(&context->vb, NULL);
|
||||
vorbis_bitrate_addblock(&context->vb) ;
|
||||
vorbis_analysis(&context->vb, NULL);
|
||||
vorbis_bitrate_addblock(&context->vb) ;
|
||||
|
||||
while(vorbis_bitrate_flushpacket(&context->vd, &op)) {
|
||||
while(vorbis_bitrate_flushpacket(&context->vd, &op)) {
|
||||
if(op.bytes==1) //id love to say this is a hack, bad sadly its not, appearently the end of stream decission is in libogg
|
||||
continue;
|
||||
memcpy(context->buffer + context->buffer_index, &op, sizeof(ogg_packet));
|
||||
@ -136,7 +136,7 @@ static int oggvorbis_encode_frame(AVCodecContext *avccontext,
|
||||
memcpy(context->buffer + context->buffer_index, op.packet, op.bytes);
|
||||
context->buffer_index += op.bytes;
|
||||
// av_log(avccontext, AV_LOG_DEBUG, "e%d / %d\n", context->buffer_index, op.bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
l=0;
|
||||
@ -268,19 +268,19 @@ static inline int conv(int samples, float **pcm, char *buf, int channels) {
|
||||
float *mono ;
|
||||
|
||||
for(i = 0 ; i < channels ; i++){
|
||||
ptr = &data[i];
|
||||
mono = pcm[i] ;
|
||||
ptr = &data[i];
|
||||
mono = pcm[i] ;
|
||||
|
||||
for(j = 0 ; j < samples ; j++) {
|
||||
for(j = 0 ; j < samples ; j++) {
|
||||
|
||||
val = mono[j] * 32767.f;
|
||||
val = mono[j] * 32767.f;
|
||||
|
||||
if(val > 32767) val = 32767 ;
|
||||
if(val < -32768) val = -32768 ;
|
||||
if(val > 32767) val = 32767 ;
|
||||
if(val < -32768) val = -32768 ;
|
||||
|
||||
*ptr = val ;
|
||||
ptr += channels;
|
||||
}
|
||||
*ptr = val ;
|
||||
ptr += channels;
|
||||
}
|
||||
}
|
||||
|
||||
return 0 ;
|
||||
@ -311,15 +311,15 @@ static int oggvorbis_decode_frame(AVCodecContext *avccontext,
|
||||
av_log(avccontext, AV_LOG_DEBUG, "\n");*/
|
||||
|
||||
if(vorbis_synthesis(&context->vb, op) == 0)
|
||||
vorbis_synthesis_blockin(&context->vd, &context->vb) ;
|
||||
vorbis_synthesis_blockin(&context->vd, &context->vb) ;
|
||||
|
||||
total_samples = 0 ;
|
||||
total_bytes = 0 ;
|
||||
|
||||
while((samples = vorbis_synthesis_pcmout(&context->vd, &pcm)) > 0) {
|
||||
conv(samples, pcm, (char*)data + total_bytes, context->vi.channels) ;
|
||||
total_bytes += samples * 2 * context->vi.channels ;
|
||||
total_samples += samples ;
|
||||
conv(samples, pcm, (char*)data + total_bytes, context->vi.channels) ;
|
||||
total_bytes += samples * 2 * context->vi.channels ;
|
||||
total_samples += samples ;
|
||||
vorbis_synthesis_read(&context->vd, samples) ;
|
||||
}
|
||||
|
||||
|
@ -191,11 +191,11 @@ void av_parser_close(AVCodecParserContext *s)
|
||||
|
||||
//#define END_NOT_FOUND (-100)
|
||||
|
||||
#define PICTURE_START_CODE 0x00000100
|
||||
#define SEQ_START_CODE 0x000001b3
|
||||
#define EXT_START_CODE 0x000001b5
|
||||
#define SLICE_MIN_START_CODE 0x00000101
|
||||
#define SLICE_MAX_START_CODE 0x000001af
|
||||
#define PICTURE_START_CODE 0x00000100
|
||||
#define SEQ_START_CODE 0x000001b3
|
||||
#define EXT_START_CODE 0x000001b5
|
||||
#define SLICE_MIN_START_CODE 0x00000101
|
||||
#define SLICE_MAX_START_CODE 0x000001af
|
||||
|
||||
typedef struct ParseContext1{
|
||||
ParseContext pc;
|
||||
@ -571,7 +571,7 @@ static int mpeg4video_split(AVCodecContext *avctx,
|
||||
/*************************/
|
||||
|
||||
typedef struct MpegAudioParseContext {
|
||||
uint8_t inbuf[MPA_MAX_CODED_FRAME_SIZE]; /* input buffer */
|
||||
uint8_t inbuf[MPA_MAX_CODED_FRAME_SIZE]; /* input buffer */
|
||||
uint8_t *inbuf_ptr;
|
||||
int frame_size;
|
||||
int free_format_frame_size;
|
||||
@ -608,8 +608,8 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
|
||||
*poutbuf_size = 0;
|
||||
buf_ptr = buf;
|
||||
while (buf_size > 0) {
|
||||
len = s->inbuf_ptr - s->inbuf;
|
||||
if (s->frame_size == 0) {
|
||||
len = s->inbuf_ptr - s->inbuf;
|
||||
if (s->frame_size == 0) {
|
||||
/* special case for next header for first frame in free
|
||||
format case (XXX: find a simpler method) */
|
||||
if (s->free_format_next_header != 0) {
|
||||
@ -621,34 +621,34 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
|
||||
s->free_format_next_header = 0;
|
||||
goto got_header;
|
||||
}
|
||||
/* no header seen : find one. We need at least MPA_HEADER_SIZE
|
||||
/* no header seen : find one. We need at least MPA_HEADER_SIZE
|
||||
bytes to parse it */
|
||||
len = MPA_HEADER_SIZE - len;
|
||||
if (len > buf_size)
|
||||
len = buf_size;
|
||||
if (len > 0) {
|
||||
memcpy(s->inbuf_ptr, buf_ptr, len);
|
||||
buf_ptr += len;
|
||||
buf_size -= len;
|
||||
s->inbuf_ptr += len;
|
||||
}
|
||||
if ((s->inbuf_ptr - s->inbuf) >= MPA_HEADER_SIZE) {
|
||||
len = MPA_HEADER_SIZE - len;
|
||||
if (len > buf_size)
|
||||
len = buf_size;
|
||||
if (len > 0) {
|
||||
memcpy(s->inbuf_ptr, buf_ptr, len);
|
||||
buf_ptr += len;
|
||||
buf_size -= len;
|
||||
s->inbuf_ptr += len;
|
||||
}
|
||||
if ((s->inbuf_ptr - s->inbuf) >= MPA_HEADER_SIZE) {
|
||||
got_header:
|
||||
sr= avctx->sample_rate;
|
||||
header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
|
||||
(s->inbuf[2] << 8) | s->inbuf[3];
|
||||
header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
|
||||
(s->inbuf[2] << 8) | s->inbuf[3];
|
||||
|
||||
ret = mpa_decode_header(avctx, header);
|
||||
if (ret < 0) {
|
||||
s->header_count= -2;
|
||||
/* no sync found : move by one byte (inefficient, but simple!) */
|
||||
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
|
||||
s->inbuf_ptr--;
|
||||
/* no sync found : move by one byte (inefficient, but simple!) */
|
||||
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
|
||||
s->inbuf_ptr--;
|
||||
dprintf("skip %x\n", header);
|
||||
/* reset free format frame size to give a chance
|
||||
to get a new bitrate */
|
||||
s->free_format_frame_size = 0;
|
||||
} else {
|
||||
} else {
|
||||
if((header&SAME_HEADER_MASK) != (s->header&SAME_HEADER_MASK) && s->header)
|
||||
s->header_count= -3;
|
||||
s->header= header;
|
||||
@ -657,26 +657,26 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
|
||||
|
||||
#if 0
|
||||
/* free format: prepare to compute frame size */
|
||||
if (decode_header(s, header) == 1) {
|
||||
s->frame_size = -1;
|
||||
if (decode_header(s, header) == 1) {
|
||||
s->frame_size = -1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
if(s->header_count <= 0)
|
||||
avctx->sample_rate= sr; //FIXME ugly
|
||||
}
|
||||
}
|
||||
} else
|
||||
#if 0
|
||||
if (s->frame_size == -1) {
|
||||
/* free format : find next sync to compute frame size */
|
||||
len = MPA_MAX_CODED_FRAME_SIZE - len;
|
||||
if (len > buf_size)
|
||||
len = buf_size;
|
||||
len = MPA_MAX_CODED_FRAME_SIZE - len;
|
||||
if (len > buf_size)
|
||||
len = buf_size;
|
||||
if (len == 0) {
|
||||
/* frame too long: resync */
|
||||
/* frame too long: resync */
|
||||
s->frame_size = 0;
|
||||
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
|
||||
s->inbuf_ptr--;
|
||||
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
|
||||
s->inbuf_ptr--;
|
||||
} else {
|
||||
uint8_t *p, *pend;
|
||||
uint32_t header1;
|
||||
@ -720,19 +720,19 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
|
||||
s->inbuf_ptr += len;
|
||||
buf_size -= len;
|
||||
}
|
||||
} else
|
||||
} else
|
||||
#endif
|
||||
if (len < s->frame_size) {
|
||||
if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE)
|
||||
s->frame_size = MPA_MAX_CODED_FRAME_SIZE;
|
||||
len = s->frame_size - len;
|
||||
if (len > buf_size)
|
||||
len = buf_size;
|
||||
memcpy(s->inbuf_ptr, buf_ptr, len);
|
||||
buf_ptr += len;
|
||||
s->inbuf_ptr += len;
|
||||
buf_size -= len;
|
||||
}
|
||||
len = s->frame_size - len;
|
||||
if (len > buf_size)
|
||||
len = buf_size;
|
||||
memcpy(s->inbuf_ptr, buf_ptr, len);
|
||||
buf_ptr += len;
|
||||
s->inbuf_ptr += len;
|
||||
buf_size -= len;
|
||||
}
|
||||
// next_data:
|
||||
if (s->frame_size > 0 &&
|
||||
(s->inbuf_ptr - s->inbuf) >= s->frame_size) {
|
||||
@ -740,10 +740,10 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
|
||||
*poutbuf = s->inbuf;
|
||||
*poutbuf_size = s->inbuf_ptr - s->inbuf;
|
||||
}
|
||||
s->inbuf_ptr = s->inbuf;
|
||||
s->frame_size = 0;
|
||||
break;
|
||||
}
|
||||
s->inbuf_ptr = s->inbuf;
|
||||
s->frame_size = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return buf_ptr - buf;
|
||||
}
|
||||
@ -783,7 +783,7 @@ static int ac3_parse(AVCodecParserContext *s1,
|
||||
const uint8_t *buf_ptr;
|
||||
int len, sample_rate, bit_rate;
|
||||
static const int ac3_channels[8] = {
|
||||
2, 1, 2, 3, 3, 4, 4, 5
|
||||
2, 1, 2, 3, 3, 4, 4, 5
|
||||
};
|
||||
|
||||
*poutbuf = NULL;
|
||||
@ -812,7 +812,7 @@ static int ac3_parse(AVCodecParserContext *s1,
|
||||
memmove(s->inbuf, s->inbuf + 1, AC3_HEADER_SIZE - 1);
|
||||
s->inbuf_ptr--;
|
||||
} else {
|
||||
s->frame_size = len;
|
||||
s->frame_size = len;
|
||||
/* update codec info */
|
||||
avctx->sample_rate = sample_rate;
|
||||
/* set channels,except if the user explicitly requests 1 or 2 channels, XXX/FIXME this is a bit ugly */
|
||||
@ -821,7 +821,7 @@ static int ac3_parse(AVCodecParserContext *s1,
|
||||
if (s->flags & A52_LFE)
|
||||
avctx->channels++;
|
||||
}
|
||||
avctx->bit_rate = bit_rate;
|
||||
avctx->bit_rate = bit_rate;
|
||||
avctx->frame_size = 6 * 256;
|
||||
}
|
||||
}
|
||||
|
@ -27,48 +27,48 @@
|
||||
|
||||
/* from g711.c by SUN microsystems (unrestricted use) */
|
||||
|
||||
#define SIGN_BIT (0x80) /* Sign bit for a A-law byte. */
|
||||
#define QUANT_MASK (0xf) /* Quantization field mask. */
|
||||
#define NSEGS (8) /* Number of A-law segments. */
|
||||
#define SEG_SHIFT (4) /* Left shift for segment number. */
|
||||
#define SEG_MASK (0x70) /* Segment field mask. */
|
||||
#define SIGN_BIT (0x80) /* Sign bit for a A-law byte. */
|
||||
#define QUANT_MASK (0xf) /* Quantization field mask. */
|
||||
#define NSEGS (8) /* Number of A-law segments. */
|
||||
#define SEG_SHIFT (4) /* Left shift for segment number. */
|
||||
#define SEG_MASK (0x70) /* Segment field mask. */
|
||||
|
||||
#define BIAS (0x84) /* Bias for linear code. */
|
||||
#define BIAS (0x84) /* Bias for linear code. */
|
||||
|
||||
/*
|
||||
* alaw2linear() - Convert an A-law value to 16-bit linear PCM
|
||||
*
|
||||
*/
|
||||
static int alaw2linear(unsigned char a_val)
|
||||
static int alaw2linear(unsigned char a_val)
|
||||
{
|
||||
int t;
|
||||
int seg;
|
||||
int t;
|
||||
int seg;
|
||||
|
||||
a_val ^= 0x55;
|
||||
a_val ^= 0x55;
|
||||
|
||||
t = a_val & QUANT_MASK;
|
||||
seg = ((unsigned)a_val & SEG_MASK) >> SEG_SHIFT;
|
||||
if(seg) t= (t + t + 1 + 32) << (seg + 2);
|
||||
else t= (t + t + 1 ) << 3;
|
||||
t = a_val & QUANT_MASK;
|
||||
seg = ((unsigned)a_val & SEG_MASK) >> SEG_SHIFT;
|
||||
if(seg) t= (t + t + 1 + 32) << (seg + 2);
|
||||
else t= (t + t + 1 ) << 3;
|
||||
|
||||
return ((a_val & SIGN_BIT) ? t : -t);
|
||||
return ((a_val & SIGN_BIT) ? t : -t);
|
||||
}
|
||||
|
||||
static int ulaw2linear(unsigned char u_val)
|
||||
static int ulaw2linear(unsigned char u_val)
|
||||
{
|
||||
int t;
|
||||
int t;
|
||||
|
||||
/* Complement to obtain normal u-law value. */
|
||||
u_val = ~u_val;
|
||||
/* Complement to obtain normal u-law value. */
|
||||
u_val = ~u_val;
|
||||
|
||||
/*
|
||||
* Extract and bias the quantization bits. Then
|
||||
* shift up by the segment number and subtract out the bias.
|
||||
*/
|
||||
t = ((u_val & QUANT_MASK) << 3) + BIAS;
|
||||
t <<= ((unsigned)u_val & SEG_MASK) >> SEG_SHIFT;
|
||||
/*
|
||||
* Extract and bias the quantization bits. Then
|
||||
* shift up by the segment number and subtract out the bias.
|
||||
*/
|
||||
t = ((u_val & QUANT_MASK) << 3) + BIAS;
|
||||
t <<= ((unsigned)u_val & SEG_MASK) >> SEG_SHIFT;
|
||||
|
||||
return ((u_val & SIGN_BIT) ? (BIAS - t) : (t - BIAS));
|
||||
return ((u_val & SIGN_BIT) ? (BIAS - t) : (t - BIAS));
|
||||
}
|
||||
|
||||
/* 16384 entries per table */
|
||||
@ -209,7 +209,7 @@ static inline void encode_from16(int bps, int le, int us,
|
||||
}
|
||||
|
||||
static int pcm_encode_frame(AVCodecContext *avctx,
|
||||
unsigned char *frame, int buf_size, void *data)
|
||||
unsigned char *frame, int buf_size, void *data)
|
||||
{
|
||||
int n, sample_size, v;
|
||||
short *samples;
|
||||
@ -397,8 +397,8 @@ static inline void decode_to16(int bps, int le, int us,
|
||||
}
|
||||
|
||||
static int pcm_decode_frame(AVCodecContext *avctx,
|
||||
void *data, int *data_size,
|
||||
uint8_t *buf, int buf_size)
|
||||
void *data, int *data_size,
|
||||
uint8_t *buf, int buf_size)
|
||||
{
|
||||
PCMDecode *s = avctx->priv_data;
|
||||
int n;
|
||||
@ -509,9 +509,9 @@ AVCodec name ## _encoder = { \
|
||||
CODEC_TYPE_AUDIO, \
|
||||
id, \
|
||||
0, \
|
||||
pcm_encode_init, \
|
||||
pcm_encode_frame, \
|
||||
pcm_encode_close, \
|
||||
pcm_encode_init, \
|
||||
pcm_encode_frame, \
|
||||
pcm_encode_close, \
|
||||
NULL, \
|
||||
}; \
|
||||
AVCodec name ## _decoder = { \
|
||||
@ -519,7 +519,7 @@ AVCodec name ## _decoder = { \
|
||||
CODEC_TYPE_AUDIO, \
|
||||
id, \
|
||||
sizeof(PCMDecode), \
|
||||
pcm_decode_init, \
|
||||
pcm_decode_init, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
pcm_decode_frame, \
|
||||
|
@ -67,7 +67,7 @@ int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h
|
||||
/*
|
||||
Read unaligned pixels into our vectors. The vectors are as follows:
|
||||
pix1v: pix1[0]-pix1[15]
|
||||
pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16]
|
||||
pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16]
|
||||
*/
|
||||
tv = (vector unsigned char *) pix1;
|
||||
pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
|
||||
@ -184,7 +184,7 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int
|
||||
fact to avoid a potentially expensive unaligned read, as well
|
||||
as some splitting, and vector addition each time around the loop.
|
||||
Read unaligned pixels into our vectors. The vectors are as follows:
|
||||
pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16]
|
||||
pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16]
|
||||
Split the pixel vectors into shorts
|
||||
*/
|
||||
tv = (vector unsigned char *) &pix2[0];
|
||||
@ -204,7 +204,7 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int
|
||||
/*
|
||||
Read unaligned pixels into our vectors. The vectors are as follows:
|
||||
pix1v: pix1[0]-pix1[15]
|
||||
pix3v: pix3[0]-pix3[15] pix3iv: pix3[1]-pix3[16]
|
||||
pix3v: pix3[0]-pix3[15] pix3iv: pix3[1]-pix3[16]
|
||||
*/
|
||||
tv = (vector unsigned char *) pix1;
|
||||
pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
|
||||
@ -273,7 +273,7 @@ int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
||||
|
||||
|
||||
for(i=0;i<h;i++) {
|
||||
/* Read potentially unaligned pixels into t1 and t2 */
|
||||
/* Read potentially unaligned pixels into t1 and t2 */
|
||||
perm1 = vec_lvsl(0, pix1);
|
||||
pix1v = (vector unsigned char *) pix1;
|
||||
perm2 = vec_lvsl(0, pix2);
|
||||
@ -281,12 +281,12 @@ int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
||||
t1 = vec_perm(pix1v[0], pix1v[1], perm1);
|
||||
t2 = vec_perm(pix2v[0], pix2v[1], perm2);
|
||||
|
||||
/* Calculate a sum of abs differences vector */
|
||||
/* Calculate a sum of abs differences vector */
|
||||
t3 = vec_max(t1, t2);
|
||||
t4 = vec_min(t1, t2);
|
||||
t5 = vec_sub(t3, t4);
|
||||
|
||||
/* Add each 4 pixel group together and put 4 results into sad */
|
||||
/* Add each 4 pixel group together and put 4 results into sad */
|
||||
sad = vec_sum4s(t5, sad);
|
||||
|
||||
pix1 += line_size;
|
||||
@ -316,9 +316,9 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
||||
permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
|
||||
|
||||
for(i=0;i<h;i++) {
|
||||
/* Read potentially unaligned pixels into t1 and t2
|
||||
Since we're reading 16 pixels, and actually only want 8,
|
||||
mask out the last 8 pixels. The 0s don't change the sum. */
|
||||
/* Read potentially unaligned pixels into t1 and t2
|
||||
Since we're reading 16 pixels, and actually only want 8,
|
||||
mask out the last 8 pixels. The 0s don't change the sum. */
|
||||
perm1 = vec_lvsl(0, pix1);
|
||||
pix1v = (vector unsigned char *) pix1;
|
||||
perm2 = vec_lvsl(0, pix2);
|
||||
@ -326,12 +326,12 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
||||
t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);
|
||||
t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);
|
||||
|
||||
/* Calculate a sum of abs differences vector */
|
||||
/* Calculate a sum of abs differences vector */
|
||||
t3 = vec_max(t1, t2);
|
||||
t4 = vec_min(t1, t2);
|
||||
t5 = vec_sub(t3, t4);
|
||||
|
||||
/* Add each 4 pixel group together and put 4 results into sad */
|
||||
/* Add each 4 pixel group together and put 4 results into sad */
|
||||
sad = vec_sum4s(t5, sad);
|
||||
|
||||
pix1 += line_size;
|
||||
@ -398,9 +398,9 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
||||
|
||||
|
||||
for(i=0;i<h;i++) {
|
||||
/* Read potentially unaligned pixels into t1 and t2
|
||||
Since we're reading 16 pixels, and actually only want 8,
|
||||
mask out the last 8 pixels. The 0s don't change the sum. */
|
||||
/* Read potentially unaligned pixels into t1 and t2
|
||||
Since we're reading 16 pixels, and actually only want 8,
|
||||
mask out the last 8 pixels. The 0s don't change the sum. */
|
||||
perm1 = vec_lvsl(0, pix1);
|
||||
pix1v = (vector unsigned char *) pix1;
|
||||
perm2 = vec_lvsl(0, pix2);
|
||||
@ -413,7 +413,7 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
||||
of the fact that abs(a-b)^2 = (a-b)^2.
|
||||
*/
|
||||
|
||||
/* Calculate abs differences vector */
|
||||
/* Calculate abs differences vector */
|
||||
t3 = vec_max(t1, t2);
|
||||
t4 = vec_min(t1, t2);
|
||||
t5 = vec_sub(t3, t4);
|
||||
@ -451,7 +451,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
||||
sum = (vector unsigned int)vec_splat_u32(0);
|
||||
|
||||
for(i=0;i<h;i++) {
|
||||
/* Read potentially unaligned pixels into t1 and t2 */
|
||||
/* Read potentially unaligned pixels into t1 and t2 */
|
||||
perm1 = vec_lvsl(0, pix1);
|
||||
pix1v = (vector unsigned char *) pix1;
|
||||
perm2 = vec_lvsl(0, pix2);
|
||||
@ -464,7 +464,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
||||
of the fact that abs(a-b)^2 = (a-b)^2.
|
||||
*/
|
||||
|
||||
/* Calculate abs differences vector */
|
||||
/* Calculate abs differences vector */
|
||||
t3 = vec_max(t1, t2);
|
||||
t4 = vec_min(t1, t2);
|
||||
t5 = vec_sub(t3, t4);
|
||||
@ -498,12 +498,12 @@ int pix_sum_altivec(uint8_t * pix, int line_size)
|
||||
sad = (vector unsigned int)vec_splat_u32(0);
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
/* Read the potentially unaligned 16 pixels into t1 */
|
||||
/* Read the potentially unaligned 16 pixels into t1 */
|
||||
perm = vec_lvsl(0, pix);
|
||||
pixv = (vector unsigned char *) pix;
|
||||
t1 = vec_perm(pixv[0], pixv[1], perm);
|
||||
|
||||
/* Add each 4 pixel group together and put 4 results into sad */
|
||||
/* Add each 4 pixel group together and put 4 results into sad */
|
||||
sad = vec_sum4s(t1, sad);
|
||||
|
||||
pix += line_size;
|
||||
@ -1335,32 +1335,32 @@ POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
|
||||
0x00, 0x01, 0x02, 0x03,
|
||||
0x04, 0x05, 0x06, 0x07);
|
||||
|
||||
#define ONEITERBUTTERFLY(i, res) \
|
||||
{ \
|
||||
register vector unsigned char src1, src2, srcO; \
|
||||
register vector unsigned char dst1, dst2, dstO; \
|
||||
src1 = vec_ld(stride * i, src); \
|
||||
if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8) \
|
||||
src2 = vec_ld((stride * i) + 16, src); \
|
||||
srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
|
||||
dst1 = vec_ld(stride * i, dst); \
|
||||
if ((((stride * i) + (unsigned long)dst) & 0x0000000F) > 8) \
|
||||
dst2 = vec_ld((stride * i) + 16, dst); \
|
||||
dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
|
||||
/* promote the unsigned chars to signed shorts */ \
|
||||
/* we're in the 8x8 function, we only care for the first 8 */ \
|
||||
register vector signed short srcV = \
|
||||
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
|
||||
register vector signed short dstV = \
|
||||
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
|
||||
/* substractions inside the first butterfly */ \
|
||||
register vector signed short but0 = vec_sub(srcV, dstV); \
|
||||
register vector signed short op1 = vec_perm(but0, but0, perm1); \
|
||||
register vector signed short but1 = vec_mladd(but0, vprod1, op1); \
|
||||
register vector signed short op2 = vec_perm(but1, but1, perm2); \
|
||||
register vector signed short but2 = vec_mladd(but1, vprod2, op2); \
|
||||
register vector signed short op3 = vec_perm(but2, but2, perm3); \
|
||||
res = vec_mladd(but2, vprod3, op3); \
|
||||
#define ONEITERBUTTERFLY(i, res) \
|
||||
{ \
|
||||
register vector unsigned char src1, src2, srcO; \
|
||||
register vector unsigned char dst1, dst2, dstO; \
|
||||
src1 = vec_ld(stride * i, src); \
|
||||
if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8) \
|
||||
src2 = vec_ld((stride * i) + 16, src); \
|
||||
srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
|
||||
dst1 = vec_ld(stride * i, dst); \
|
||||
if ((((stride * i) + (unsigned long)dst) & 0x0000000F) > 8) \
|
||||
dst2 = vec_ld((stride * i) + 16, dst); \
|
||||
dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
|
||||
/* promote the unsigned chars to signed shorts */ \
|
||||
/* we're in the 8x8 function, we only care for the first 8 */ \
|
||||
register vector signed short srcV = \
|
||||
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
|
||||
register vector signed short dstV = \
|
||||
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
|
||||
/* substractions inside the first butterfly */ \
|
||||
register vector signed short but0 = vec_sub(srcV, dstV); \
|
||||
register vector signed short op1 = vec_perm(but0, but0, perm1); \
|
||||
register vector signed short but1 = vec_mladd(but0, vprod1, op1); \
|
||||
register vector signed short op2 = vec_perm(but1, but1, perm2); \
|
||||
register vector signed short but2 = vec_mladd(but1, vprod2, op2); \
|
||||
register vector signed short op3 = vec_perm(but2, but2, perm3); \
|
||||
res = vec_mladd(but2, vprod3, op3); \
|
||||
}
|
||||
ONEITERBUTTERFLY(0, temp0);
|
||||
ONEITERBUTTERFLY(1, temp1);
|
||||
@ -1480,26 +1480,26 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
|
||||
0x00, 0x01, 0x02, 0x03,
|
||||
0x04, 0x05, 0x06, 0x07);
|
||||
|
||||
#define ONEITERBUTTERFLY(i, res1, res2) \
|
||||
{ \
|
||||
#define ONEITERBUTTERFLY(i, res1, res2) \
|
||||
{ \
|
||||
register vector unsigned char src1 asm ("v22"), src2 asm ("v23"); \
|
||||
register vector unsigned char dst1 asm ("v24"), dst2 asm ("v25"); \
|
||||
src1 = vec_ld(stride * i, src); \
|
||||
src2 = vec_ld((stride * i) + 16, src); \
|
||||
src1 = vec_ld(stride * i, src); \
|
||||
src2 = vec_ld((stride * i) + 16, src); \
|
||||
register vector unsigned char srcO asm ("v22") = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
|
||||
dst1 = vec_ld(stride * i, dst); \
|
||||
dst2 = vec_ld((stride * i) + 16, dst); \
|
||||
dst1 = vec_ld(stride * i, dst); \
|
||||
dst2 = vec_ld((stride * i) + 16, dst); \
|
||||
register vector unsigned char dstO asm ("v23") = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
|
||||
/* promote the unsigned chars to signed shorts */ \
|
||||
/* promote the unsigned chars to signed shorts */ \
|
||||
register vector signed short srcV asm ("v24") = \
|
||||
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
|
||||
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
|
||||
register vector signed short dstV asm ("v25") = \
|
||||
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
|
||||
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
|
||||
register vector signed short srcW asm ("v26") = \
|
||||
(vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \
|
||||
(vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \
|
||||
register vector signed short dstW asm ("v27") = \
|
||||
(vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \
|
||||
/* substractions inside the first butterfly */ \
|
||||
(vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \
|
||||
/* substractions inside the first butterfly */ \
|
||||
register vector signed short but0 asm ("v28") = vec_sub(srcV, dstV); \
|
||||
register vector signed short but0S asm ("v29") = vec_sub(srcW, dstW); \
|
||||
register vector signed short op1 asm ("v30") = vec_perm(but0, but0, perm1); \
|
||||
@ -1511,9 +1511,9 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
|
||||
register vector signed short op2S asm ("v27") = vec_perm(but1S, but1S, perm2); \
|
||||
register vector signed short but2S asm ("v28") = vec_mladd(but1S, vprod2, op2S); \
|
||||
register vector signed short op3 asm ("v29") = vec_perm(but2, but2, perm3); \
|
||||
res1 = vec_mladd(but2, vprod3, op3); \
|
||||
res1 = vec_mladd(but2, vprod3, op3); \
|
||||
register vector signed short op3S asm ("v30") = vec_perm(but2S, but2S, perm3); \
|
||||
res2 = vec_mladd(but2S, vprod3, op3S); \
|
||||
res2 = vec_mladd(but2S, vprod3, op3S); \
|
||||
}
|
||||
ONEITERBUTTERFLY(0, temp0, temp0S);
|
||||
ONEITERBUTTERFLY(1, temp1, temp1S);
|
||||
@ -1623,12 +1623,12 @@ POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1);
|
||||
int has_altivec(void)
|
||||
{
|
||||
#ifdef __AMIGAOS4__
|
||||
ULONG result = 0;
|
||||
extern struct ExecIFace *IExec;
|
||||
ULONG result = 0;
|
||||
extern struct ExecIFace *IExec;
|
||||
|
||||
IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE);
|
||||
if (result == VECTORTYPE_ALTIVEC) return 1;
|
||||
return 0;
|
||||
IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE);
|
||||
if (result == VECTORTYPE_ALTIVEC) return 1;
|
||||
return 0;
|
||||
#else /* __AMIGAOS4__ */
|
||||
|
||||
#ifdef CONFIG_DARWIN
|
||||
|
@ -191,33 +191,33 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint
|
||||
|
||||
/* from dsputil.c */
|
||||
static inline void put_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
|
||||
int i;
|
||||
for (i = 0; i < h; i++) {
|
||||
uint32_t a, b;
|
||||
a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
|
||||
b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
|
||||
*((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b);
|
||||
a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
|
||||
b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
|
||||
*((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b);
|
||||
}
|
||||
int i;
|
||||
for (i = 0; i < h; i++) {
|
||||
uint32_t a, b;
|
||||
a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
|
||||
b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
|
||||
*((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b);
|
||||
a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
|
||||
b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
|
||||
*((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b);
|
||||
}
|
||||
} static inline void avg_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
|
||||
int i;
|
||||
for (i = 0; i < h; i++) {
|
||||
uint32_t a, b;
|
||||
a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
|
||||
b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
|
||||
*((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b));
|
||||
a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
|
||||
b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
|
||||
*((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b));
|
||||
}
|
||||
int i;
|
||||
for (i = 0; i < h; i++) {
|
||||
uint32_t a, b;
|
||||
a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
|
||||
b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
|
||||
*((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b));
|
||||
a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
|
||||
b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
|
||||
*((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b));
|
||||
}
|
||||
} static inline void put_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
|
||||
put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
|
||||
put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
|
||||
put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
|
||||
put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
|
||||
} static inline void avg_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
|
||||
avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
|
||||
avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
|
||||
avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
|
||||
avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
|
||||
}
|
||||
|
||||
/* UNIMPLEMENTED YET !! */
|
||||
|
@ -87,16 +87,16 @@ void powerpc_display_perf_report(void)
|
||||
{
|
||||
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
|
||||
{
|
||||
if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
|
||||
av_log(NULL, AV_LOG_INFO,
|
||||
" Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
|
||||
perfname[i],
|
||||
j+1,
|
||||
perfdata[j][i][powerpc_data_min],
|
||||
perfdata[j][i][powerpc_data_max],
|
||||
(double)perfdata[j][i][powerpc_data_sum] /
|
||||
(double)perfdata[j][i][powerpc_data_num],
|
||||
perfdata[j][i][powerpc_data_num]);
|
||||
if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
|
||||
av_log(NULL, AV_LOG_INFO,
|
||||
" Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
|
||||
perfname[i],
|
||||
j+1,
|
||||
perfdata[j][i][powerpc_data_min],
|
||||
perfdata[j][i][powerpc_data_max],
|
||||
(double)perfdata[j][i][powerpc_data_sum] /
|
||||
(double)perfdata[j][i][powerpc_data_num],
|
||||
perfdata[j][i][powerpc_data_num]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -179,7 +179,7 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
|
||||
}
|
||||
else
|
||||
for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
|
||||
asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
|
||||
asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
|
||||
}
|
||||
#else
|
||||
memset(blocks, 0, sizeof(DCTELEM)*6*64);
|
||||
@ -284,25 +284,25 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
|
||||
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
|
||||
c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
|
||||
c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
|
||||
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
|
||||
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
|
||||
c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
|
||||
c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
|
||||
c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
|
||||
c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
|
||||
|
||||
c->gmc1 = gmc1_altivec;
|
||||
c->gmc1 = gmc1_altivec;
|
||||
|
||||
#ifdef CONFIG_DARWIN // ATM gcc-3.3 and gcc-3.4 fail to compile these in linux...
|
||||
c->hadamard8_diff[0] = hadamard8_diff16_altivec;
|
||||
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
|
||||
c->hadamard8_diff[0] = hadamard8_diff16_altivec;
|
||||
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ENCODERS
|
||||
if (avctx->dct_algo == FF_DCT_AUTO ||
|
||||
avctx->dct_algo == FF_DCT_ALTIVEC)
|
||||
{
|
||||
c->fdct = fdct_altivec;
|
||||
}
|
||||
if (avctx->dct_algo == FF_DCT_AUTO ||
|
||||
avctx->dct_algo == FF_DCT_ALTIVEC)
|
||||
{
|
||||
c->fdct = fdct_altivec;
|
||||
}
|
||||
#endif //CONFIG_ENCODERS
|
||||
|
||||
if (avctx->lowres==0)
|
||||
@ -325,14 +325,14 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
|
||||
int i, j;
|
||||
for (i = 0 ; i < powerpc_perf_total ; i++)
|
||||
{
|
||||
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
|
||||
{
|
||||
perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
|
||||
perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
|
||||
perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
|
||||
perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
|
||||
}
|
||||
}
|
||||
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
|
||||
{
|
||||
perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
|
||||
perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
|
||||
perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
|
||||
perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* POWERPC_PERFORMANCE_REPORT */
|
||||
} else
|
||||
|
@ -114,10 +114,10 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][
|
||||
#define POWERPC_GET_PMC6(a) do {} while (0)
|
||||
#endif
|
||||
#endif /* POWERPC_MODE_64BITS */
|
||||
#define POWERPC_PERF_DECLARE(a, cond) \
|
||||
POWERP_PMC_DATATYPE \
|
||||
pmc_start[POWERPC_NUM_PMC_ENABLED], \
|
||||
pmc_stop[POWERPC_NUM_PMC_ENABLED], \
|
||||
#define POWERPC_PERF_DECLARE(a, cond) \
|
||||
POWERP_PMC_DATATYPE \
|
||||
pmc_start[POWERPC_NUM_PMC_ENABLED], \
|
||||
pmc_stop[POWERPC_NUM_PMC_ENABLED], \
|
||||
pmc_loop_index;
|
||||
#define POWERPC_PERF_START_COUNT(a, cond) do { \
|
||||
POWERPC_GET_PMC6(pmc_start[5]); \
|
||||
@ -141,8 +141,8 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][
|
||||
pmc_loop_index++) \
|
||||
{ \
|
||||
if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \
|
||||
{ \
|
||||
POWERP_PMC_DATATYPE diff = \
|
||||
{ \
|
||||
POWERP_PMC_DATATYPE diff = \
|
||||
pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \
|
||||
if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
|
||||
perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \
|
||||
|
@ -65,8 +65,8 @@ void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z)
|
||||
POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6);
|
||||
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
|
||||
int ln = s->nbits;
|
||||
int j, np, np2;
|
||||
int nblocks, nloops;
|
||||
int j, np, np2;
|
||||
int nblocks, nloops;
|
||||
register FFTComplex *p, *q;
|
||||
FFTComplex *exptab = s->exptab;
|
||||
int l;
|
||||
@ -147,8 +147,8 @@ POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
|
||||
#endif
|
||||
|
||||
int ln = s->nbits;
|
||||
int j, np, np2;
|
||||
int nblocks, nloops;
|
||||
int j, np, np2;
|
||||
int nblocks, nloops;
|
||||
register FFTComplex *p, *q;
|
||||
FFTComplex *cptr, *cptr1;
|
||||
int k;
|
||||
|
@ -30,31 +30,31 @@
|
||||
*/
|
||||
|
||||
static inline vector signed char ff_vmrglb (vector signed char const A,
|
||||
vector signed char const B)
|
||||
vector signed char const B)
|
||||
{
|
||||
static const vector unsigned char lowbyte = {
|
||||
0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b, 0x1b,
|
||||
0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f
|
||||
0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b, 0x1b,
|
||||
0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f
|
||||
};
|
||||
return vec_perm (A, B, lowbyte);
|
||||
}
|
||||
|
||||
static inline vector signed short ff_vmrglh (vector signed short const A,
|
||||
vector signed short const B)
|
||||
vector signed short const B)
|
||||
{
|
||||
static const vector unsigned char lowhalf = {
|
||||
0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b,
|
||||
0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f
|
||||
0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b,
|
||||
0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f
|
||||
};
|
||||
return vec_perm (A, B, lowhalf);
|
||||
}
|
||||
|
||||
static inline vector signed int ff_vmrglw (vector signed int const A,
|
||||
vector signed int const B)
|
||||
vector signed int const B)
|
||||
{
|
||||
static const vector unsigned char lowword = {
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b,
|
||||
0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b,
|
||||
0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f
|
||||
};
|
||||
return vec_perm (A, B, lowword);
|
||||
}
|
||||
|
@ -51,108 +51,108 @@
|
||||
#define vector_s32_t vector signed int
|
||||
#define vector_u32_t vector unsigned int
|
||||
|
||||
#define IDCT_HALF \
|
||||
/* 1st stage */ \
|
||||
t1 = vec_mradds (a1, vx7, vx1 ); \
|
||||
t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \
|
||||
t7 = vec_mradds (a2, vx5, vx3); \
|
||||
t3 = vec_mradds (ma2, vx3, vx5); \
|
||||
\
|
||||
/* 2nd stage */ \
|
||||
t5 = vec_adds (vx0, vx4); \
|
||||
t0 = vec_subs (vx0, vx4); \
|
||||
t2 = vec_mradds (a0, vx6, vx2); \
|
||||
t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \
|
||||
t6 = vec_adds (t8, t3); \
|
||||
t3 = vec_subs (t8, t3); \
|
||||
t8 = vec_subs (t1, t7); \
|
||||
t1 = vec_adds (t1, t7); \
|
||||
\
|
||||
/* 3rd stage */ \
|
||||
t7 = vec_adds (t5, t2); \
|
||||
t2 = vec_subs (t5, t2); \
|
||||
t5 = vec_adds (t0, t4); \
|
||||
t0 = vec_subs (t0, t4); \
|
||||
t4 = vec_subs (t8, t3); \
|
||||
t3 = vec_adds (t8, t3); \
|
||||
\
|
||||
/* 4th stage */ \
|
||||
vy0 = vec_adds (t7, t1); \
|
||||
vy7 = vec_subs (t7, t1); \
|
||||
vy1 = vec_mradds (c4, t3, t5); \
|
||||
vy6 = vec_mradds (mc4, t3, t5); \
|
||||
vy2 = vec_mradds (c4, t4, t0); \
|
||||
vy5 = vec_mradds (mc4, t4, t0); \
|
||||
vy3 = vec_adds (t2, t6); \
|
||||
#define IDCT_HALF \
|
||||
/* 1st stage */ \
|
||||
t1 = vec_mradds (a1, vx7, vx1 ); \
|
||||
t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \
|
||||
t7 = vec_mradds (a2, vx5, vx3); \
|
||||
t3 = vec_mradds (ma2, vx3, vx5); \
|
||||
\
|
||||
/* 2nd stage */ \
|
||||
t5 = vec_adds (vx0, vx4); \
|
||||
t0 = vec_subs (vx0, vx4); \
|
||||
t2 = vec_mradds (a0, vx6, vx2); \
|
||||
t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \
|
||||
t6 = vec_adds (t8, t3); \
|
||||
t3 = vec_subs (t8, t3); \
|
||||
t8 = vec_subs (t1, t7); \
|
||||
t1 = vec_adds (t1, t7); \
|
||||
\
|
||||
/* 3rd stage */ \
|
||||
t7 = vec_adds (t5, t2); \
|
||||
t2 = vec_subs (t5, t2); \
|
||||
t5 = vec_adds (t0, t4); \
|
||||
t0 = vec_subs (t0, t4); \
|
||||
t4 = vec_subs (t8, t3); \
|
||||
t3 = vec_adds (t8, t3); \
|
||||
\
|
||||
/* 4th stage */ \
|
||||
vy0 = vec_adds (t7, t1); \
|
||||
vy7 = vec_subs (t7, t1); \
|
||||
vy1 = vec_mradds (c4, t3, t5); \
|
||||
vy6 = vec_mradds (mc4, t3, t5); \
|
||||
vy2 = vec_mradds (c4, t4, t0); \
|
||||
vy5 = vec_mradds (mc4, t4, t0); \
|
||||
vy3 = vec_adds (t2, t6); \
|
||||
vy4 = vec_subs (t2, t6);
|
||||
|
||||
|
||||
#define IDCT \
|
||||
vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \
|
||||
vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \
|
||||
vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \
|
||||
vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \
|
||||
vector_u16_t shift; \
|
||||
\
|
||||
c4 = vec_splat (constants[0], 0); \
|
||||
a0 = vec_splat (constants[0], 1); \
|
||||
a1 = vec_splat (constants[0], 2); \
|
||||
a2 = vec_splat (constants[0], 3); \
|
||||
mc4 = vec_splat (constants[0], 4); \
|
||||
ma2 = vec_splat (constants[0], 5); \
|
||||
bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3); \
|
||||
\
|
||||
zero = vec_splat_s16 (0); \
|
||||
shift = vec_splat_u16 (4); \
|
||||
\
|
||||
vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \
|
||||
vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero); \
|
||||
vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero); \
|
||||
vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero); \
|
||||
vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero); \
|
||||
vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero); \
|
||||
vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero); \
|
||||
vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero); \
|
||||
\
|
||||
IDCT_HALF \
|
||||
\
|
||||
vx0 = vec_mergeh (vy0, vy4); \
|
||||
vx1 = vec_mergel (vy0, vy4); \
|
||||
vx2 = vec_mergeh (vy1, vy5); \
|
||||
vx3 = vec_mergel (vy1, vy5); \
|
||||
vx4 = vec_mergeh (vy2, vy6); \
|
||||
vx5 = vec_mergel (vy2, vy6); \
|
||||
vx6 = vec_mergeh (vy3, vy7); \
|
||||
vx7 = vec_mergel (vy3, vy7); \
|
||||
\
|
||||
vy0 = vec_mergeh (vx0, vx4); \
|
||||
vy1 = vec_mergel (vx0, vx4); \
|
||||
vy2 = vec_mergeh (vx1, vx5); \
|
||||
vy3 = vec_mergel (vx1, vx5); \
|
||||
vy4 = vec_mergeh (vx2, vx6); \
|
||||
vy5 = vec_mergel (vx2, vx6); \
|
||||
vy6 = vec_mergeh (vx3, vx7); \
|
||||
vy7 = vec_mergel (vx3, vx7); \
|
||||
\
|
||||
vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \
|
||||
vx1 = vec_mergel (vy0, vy4); \
|
||||
vx2 = vec_mergeh (vy1, vy5); \
|
||||
vx3 = vec_mergel (vy1, vy5); \
|
||||
vx4 = vec_mergeh (vy2, vy6); \
|
||||
vx5 = vec_mergel (vy2, vy6); \
|
||||
vx6 = vec_mergeh (vy3, vy7); \
|
||||
vx7 = vec_mergel (vy3, vy7); \
|
||||
\
|
||||
IDCT_HALF \
|
||||
\
|
||||
shift = vec_splat_u16 (6); \
|
||||
vx0 = vec_sra (vy0, shift); \
|
||||
vx1 = vec_sra (vy1, shift); \
|
||||
vx2 = vec_sra (vy2, shift); \
|
||||
vx3 = vec_sra (vy3, shift); \
|
||||
vx4 = vec_sra (vy4, shift); \
|
||||
vx5 = vec_sra (vy5, shift); \
|
||||
vx6 = vec_sra (vy6, shift); \
|
||||
#define IDCT \
|
||||
vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \
|
||||
vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \
|
||||
vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \
|
||||
vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \
|
||||
vector_u16_t shift; \
|
||||
\
|
||||
c4 = vec_splat (constants[0], 0); \
|
||||
a0 = vec_splat (constants[0], 1); \
|
||||
a1 = vec_splat (constants[0], 2); \
|
||||
a2 = vec_splat (constants[0], 3); \
|
||||
mc4 = vec_splat (constants[0], 4); \
|
||||
ma2 = vec_splat (constants[0], 5); \
|
||||
bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3); \
|
||||
\
|
||||
zero = vec_splat_s16 (0); \
|
||||
shift = vec_splat_u16 (4); \
|
||||
\
|
||||
vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \
|
||||
vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero); \
|
||||
vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero); \
|
||||
vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero); \
|
||||
vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero); \
|
||||
vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero); \
|
||||
vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero); \
|
||||
vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero); \
|
||||
\
|
||||
IDCT_HALF \
|
||||
\
|
||||
vx0 = vec_mergeh (vy0, vy4); \
|
||||
vx1 = vec_mergel (vy0, vy4); \
|
||||
vx2 = vec_mergeh (vy1, vy5); \
|
||||
vx3 = vec_mergel (vy1, vy5); \
|
||||
vx4 = vec_mergeh (vy2, vy6); \
|
||||
vx5 = vec_mergel (vy2, vy6); \
|
||||
vx6 = vec_mergeh (vy3, vy7); \
|
||||
vx7 = vec_mergel (vy3, vy7); \
|
||||
\
|
||||
vy0 = vec_mergeh (vx0, vx4); \
|
||||
vy1 = vec_mergel (vx0, vx4); \
|
||||
vy2 = vec_mergeh (vx1, vx5); \
|
||||
vy3 = vec_mergel (vx1, vx5); \
|
||||
vy4 = vec_mergeh (vx2, vx6); \
|
||||
vy5 = vec_mergel (vx2, vx6); \
|
||||
vy6 = vec_mergeh (vx3, vx7); \
|
||||
vy7 = vec_mergel (vx3, vx7); \
|
||||
\
|
||||
vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \
|
||||
vx1 = vec_mergel (vy0, vy4); \
|
||||
vx2 = vec_mergeh (vy1, vy5); \
|
||||
vx3 = vec_mergel (vy1, vy5); \
|
||||
vx4 = vec_mergeh (vy2, vy6); \
|
||||
vx5 = vec_mergel (vy2, vy6); \
|
||||
vx6 = vec_mergeh (vy3, vy7); \
|
||||
vx7 = vec_mergel (vy3, vy7); \
|
||||
\
|
||||
IDCT_HALF \
|
||||
\
|
||||
shift = vec_splat_u16 (6); \
|
||||
vx0 = vec_sra (vy0, shift); \
|
||||
vx1 = vec_sra (vy1, shift); \
|
||||
vx2 = vec_sra (vy2, shift); \
|
||||
vx3 = vec_sra (vy3, shift); \
|
||||
vx4 = vec_sra (vy4, shift); \
|
||||
vx5 = vec_sra (vy5, shift); \
|
||||
vx6 = vec_sra (vy6, shift); \
|
||||
vx7 = vec_sra (vy7, shift);
|
||||
|
||||
|
||||
@ -180,18 +180,18 @@ POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
|
||||
#endif
|
||||
IDCT
|
||||
|
||||
#define COPY(dest,src) \
|
||||
tmp = vec_packsu (src, src); \
|
||||
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \
|
||||
#define COPY(dest,src) \
|
||||
tmp = vec_packsu (src, src); \
|
||||
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \
|
||||
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
|
||||
|
||||
COPY (dest, vx0) dest += stride;
|
||||
COPY (dest, vx1) dest += stride;
|
||||
COPY (dest, vx2) dest += stride;
|
||||
COPY (dest, vx3) dest += stride;
|
||||
COPY (dest, vx4) dest += stride;
|
||||
COPY (dest, vx5) dest += stride;
|
||||
COPY (dest, vx6) dest += stride;
|
||||
COPY (dest, vx0) dest += stride;
|
||||
COPY (dest, vx1) dest += stride;
|
||||
COPY (dest, vx2) dest += stride;
|
||||
COPY (dest, vx3) dest += stride;
|
||||
COPY (dest, vx4) dest += stride;
|
||||
COPY (dest, vx5) dest += stride;
|
||||
COPY (dest, vx6) dest += stride;
|
||||
COPY (dest, vx7)
|
||||
|
||||
POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
|
||||
@ -225,22 +225,22 @@ POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
|
||||
perm0 = vec_mergeh (p, p0);
|
||||
perm1 = vec_mergeh (p, p1);
|
||||
|
||||
#define ADD(dest,src,perm) \
|
||||
/* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \
|
||||
tmp = vec_ld (0, dest); \
|
||||
tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \
|
||||
tmp3 = vec_adds (tmp2, src); \
|
||||
tmp = vec_packsu (tmp3, tmp3); \
|
||||
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \
|
||||
#define ADD(dest,src,perm) \
|
||||
/* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \
|
||||
tmp = vec_ld (0, dest); \
|
||||
tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \
|
||||
tmp3 = vec_adds (tmp2, src); \
|
||||
tmp = vec_packsu (tmp3, tmp3); \
|
||||
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \
|
||||
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
|
||||
|
||||
ADD (dest, vx0, perm0) dest += stride;
|
||||
ADD (dest, vx1, perm1) dest += stride;
|
||||
ADD (dest, vx2, perm0) dest += stride;
|
||||
ADD (dest, vx3, perm1) dest += stride;
|
||||
ADD (dest, vx4, perm0) dest += stride;
|
||||
ADD (dest, vx5, perm1) dest += stride;
|
||||
ADD (dest, vx6, perm0) dest += stride;
|
||||
ADD (dest, vx0, perm0) dest += stride;
|
||||
ADD (dest, vx1, perm1) dest += stride;
|
||||
ADD (dest, vx2, perm0) dest += stride;
|
||||
ADD (dest, vx3, perm1) dest += stride;
|
||||
ADD (dest, vx4, perm0) dest += stride;
|
||||
ADD (dest, vx5, perm1) dest += stride;
|
||||
ADD (dest, vx6, perm0) dest += stride;
|
||||
ADD (dest, vx7, perm1)
|
||||
|
||||
POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);
|
||||
|
@ -152,9 +152,9 @@ int dct_quantize_altivec(MpegEncContext* s,
|
||||
}
|
||||
|
||||
// The following block could exist as a separate an altivec dct
|
||||
// function. However, if we put it inline, the DCT data can remain
|
||||
// in the vector local variables, as floats, which we'll use during the
|
||||
// quantize step...
|
||||
// function. However, if we put it inline, the DCT data can remain
|
||||
// in the vector local variables, as floats, which we'll use during the
|
||||
// quantize step...
|
||||
{
|
||||
const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f);
|
||||
const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f);
|
||||
@ -206,11 +206,11 @@ int dct_quantize_altivec(MpegEncContext* s,
|
||||
z1 = vec_madd(vec_add(tmp12, tmp13), vec_0_541196100, (vector float)zero);
|
||||
|
||||
// dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
|
||||
// CONST_BITS-PASS1_BITS);
|
||||
// CONST_BITS-PASS1_BITS);
|
||||
row2 = vec_madd(tmp13, vec_0_765366865, z1);
|
||||
|
||||
// dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
|
||||
// CONST_BITS-PASS1_BITS);
|
||||
// CONST_BITS-PASS1_BITS);
|
||||
row6 = vec_madd(tmp12, vec_1_847759065, z1);
|
||||
|
||||
z1 = vec_add(tmp4, tmp7); // z1 = tmp4 + tmp7;
|
||||
@ -315,7 +315,7 @@ int dct_quantize_altivec(MpegEncContext* s,
|
||||
}
|
||||
|
||||
// Load the bias vector (We add 0.5 to the bias so that we're
|
||||
// rounding when we convert to int, instead of flooring.)
|
||||
// rounding when we convert to int, instead of flooring.)
|
||||
{
|
||||
vector signed int biasInt;
|
||||
const vector float negOneFloat = (vector float)FOUROF(-1.0f);
|
||||
|
@ -80,7 +80,7 @@ static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size)
|
||||
"pextlb $10, $0, $10 \n\t"
|
||||
"sq $10, 80(%1) \n\t"
|
||||
"pextlb $8, $0, $8 \n\t"
|
||||
"sq $8, 96(%1) \n\t"
|
||||
"sq $8, 96(%1) \n\t"
|
||||
"pextlb $9, $0, $9 \n\t"
|
||||
"sq $9, 112(%1) \n\t"
|
||||
".set pop \n\t"
|
||||
@ -112,7 +112,7 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz
|
||||
asm volatile (
|
||||
".set push \n\t"
|
||||
".set mips3 \n\t"
|
||||
"1: \n\t"
|
||||
"1: \n\t"
|
||||
"ldr $8, 0(%1) \n\t"
|
||||
"add $11, %1, %3 \n\t"
|
||||
"ldl $8, 7(%1) \n\t"
|
||||
@ -133,7 +133,7 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz
|
||||
"bgtz %2, 1b \n\t"
|
||||
".set pop \n\t"
|
||||
: "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size)
|
||||
: "$8", "$9", "$10", "$11", "$12", "$13", "memory" );
|
||||
: "$8", "$9", "$10", "$11", "$12", "$13", "memory" );
|
||||
}
|
||||
|
||||
|
||||
|
@ -15,32 +15,32 @@
|
||||
#include "../dsputil.h"
|
||||
#include "mmi.h"
|
||||
|
||||
#define BITS_INV_ACC 5 // 4 or 5 for IEEE
|
||||
#define SHIFT_INV_ROW (16 - BITS_INV_ACC)
|
||||
#define BITS_INV_ACC 5 // 4 or 5 for IEEE
|
||||
#define SHIFT_INV_ROW (16 - BITS_INV_ACC)
|
||||
#define SHIFT_INV_COL (1 + BITS_INV_ACC)
|
||||
|
||||
#define TG1 6518
|
||||
#define TG2 13573
|
||||
#define TG3 21895
|
||||
#define CS4 23170
|
||||
#define TG1 6518
|
||||
#define TG2 13573
|
||||
#define TG3 21895
|
||||
#define CS4 23170
|
||||
|
||||
#define ROUNDER_0 0
|
||||
#define ROUNDER_1 16
|
||||
#define ROUNDER_0 0
|
||||
#define ROUNDER_1 16
|
||||
|
||||
#define TAB_i_04 (32+0)
|
||||
#define TAB_i_17 (32+64)
|
||||
#define TAB_i_26 (32+128)
|
||||
#define TAB_i_35 (32+192)
|
||||
#define TAB_i_04 (32+0)
|
||||
#define TAB_i_17 (32+64)
|
||||
#define TAB_i_26 (32+128)
|
||||
#define TAB_i_35 (32+192)
|
||||
|
||||
#define TG_1_16 (32+256+0)
|
||||
#define TG_2_16 (32+256+16)
|
||||
#define TG_3_16 (32+256+32)
|
||||
#define COS_4_16 (32+256+48)
|
||||
#define TG_1_16 (32+256+0)
|
||||
#define TG_2_16 (32+256+16)
|
||||
#define TG_3_16 (32+256+32)
|
||||
#define COS_4_16 (32+256+48)
|
||||
|
||||
#define CLIPMAX (32+256+64+0)
|
||||
#define CLIPMAX (32+256+64+0)
|
||||
|
||||
static short consttable[] align16 = {
|
||||
/* rounder 0*/ // assume SHIFT_INV_ROW == 11
|
||||
/* rounder 0*/ // assume SHIFT_INV_ROW == 11
|
||||
0x3ff, 1, 0x3ff, 1, 0x3ff, 1, 0x3ff, 1,
|
||||
/* rounder 1*/
|
||||
0x3ff, 0, 0x3ff, 0, 0x3ff, 0, 0x3ff, 0,
|
||||
@ -75,274 +75,274 @@ static short consttable[] align16 = {
|
||||
|
||||
|
||||
#define DCT_8_INV_ROW1(blk, rowoff, taboff, rnd, outreg) { \
|
||||
lq(blk, rowoff, $16); /* r16 = x7 x5 x3 x1 x6 x4 x2 x0 */ \
|
||||
/*slot*/ \
|
||||
lq($24, 0+taboff, $17); /* r17 = w */ \
|
||||
/*delay slot $16*/ \
|
||||
lq($24, 16+taboff, $18);/* r18 = w */ \
|
||||
prevh($16, $2); /* r2 = x1 x3 x5 x7 x0 x2 x4 x6 */ \
|
||||
lq($24, 32+taboff, $19);/* r19 = w */ \
|
||||
phmadh($17, $16, $17); /* r17 = b1"b0'a1"a0' */ \
|
||||
lq($24, 48+taboff, $20);/* r20 = w */ \
|
||||
phmadh($18, $2, $18); /* r18 = b1'b0"a1'a0" */ \
|
||||
phmadh($19, $16, $19); /* r19 = b3"b2'a3"a2' */ \
|
||||
phmadh($20, $2, $20); /* r20 = b3'b2"a3'a2" */ \
|
||||
paddw($17, $18, $17); /* r17 = (b1)(b0)(a1)(a0) */ \
|
||||
paddw($19, $20, $19); /* r19 = (b3)(b2)(a3)(a2) */ \
|
||||
pcpyld($19, $17, $18); /* r18 = (a3)(a2)(a1)(a0) */ \
|
||||
pcpyud($17, $19, $20); /* r20 = (b3)(b2)(b1)(b0) */ \
|
||||
paddw($18, rnd, $18); /* r18 = (a3)(a2)(a1)(a0) */\
|
||||
paddw($18, $20, $17); /* r17 = ()()()(a0+b0) */ \
|
||||
psubw($18, $20, $20); /* r20 = ()()()(a0-b0) */ \
|
||||
psraw($17, SHIFT_INV_ROW, $17); /* r17 = (y3 y2 y1 y0) */ \
|
||||
psraw($20, SHIFT_INV_ROW, $20); /* r20 = (y4 y5 y6 y7) */ \
|
||||
ppach($20, $17, outreg);/* out = y4 y5 y6 y7 y3 y2 y1 y0 Note order */ \
|
||||
lq(blk, rowoff, $16); /* r16 = x7 x5 x3 x1 x6 x4 x2 x0 */ \
|
||||
/*slot*/ \
|
||||
lq($24, 0+taboff, $17); /* r17 = w */ \
|
||||
/*delay slot $16*/ \
|
||||
lq($24, 16+taboff, $18);/* r18 = w */ \
|
||||
prevh($16, $2); /* r2 = x1 x3 x5 x7 x0 x2 x4 x6 */ \
|
||||
lq($24, 32+taboff, $19);/* r19 = w */ \
|
||||
phmadh($17, $16, $17); /* r17 = b1"b0'a1"a0' */ \
|
||||
lq($24, 48+taboff, $20);/* r20 = w */ \
|
||||
phmadh($18, $2, $18); /* r18 = b1'b0"a1'a0" */ \
|
||||
phmadh($19, $16, $19); /* r19 = b3"b2'a3"a2' */ \
|
||||
phmadh($20, $2, $20); /* r20 = b3'b2"a3'a2" */ \
|
||||
paddw($17, $18, $17); /* r17 = (b1)(b0)(a1)(a0) */ \
|
||||
paddw($19, $20, $19); /* r19 = (b3)(b2)(a3)(a2) */ \
|
||||
pcpyld($19, $17, $18); /* r18 = (a3)(a2)(a1)(a0) */ \
|
||||
pcpyud($17, $19, $20); /* r20 = (b3)(b2)(b1)(b0) */ \
|
||||
paddw($18, rnd, $18); /* r18 = (a3)(a2)(a1)(a0) */\
|
||||
paddw($18, $20, $17); /* r17 = ()()()(a0+b0) */ \
|
||||
psubw($18, $20, $20); /* r20 = ()()()(a0-b0) */ \
|
||||
psraw($17, SHIFT_INV_ROW, $17); /* r17 = (y3 y2 y1 y0) */ \
|
||||
psraw($20, SHIFT_INV_ROW, $20); /* r20 = (y4 y5 y6 y7) */ \
|
||||
ppach($20, $17, outreg);/* out = y4 y5 y6 y7 y3 y2 y1 y0 Note order */ \
|
||||
\
|
||||
prevh(outreg, $2); \
|
||||
pcpyud($2, $2, $2); \
|
||||
pcpyld($2, outreg, outreg); \
|
||||
prevh(outreg, $2); \
|
||||
pcpyud($2, $2, $2); \
|
||||
pcpyld($2, outreg, outreg); \
|
||||
}
|
||||
|
||||
|
||||
#define DCT_8_INV_COL8() \
|
||||
\
|
||||
lq($24, TG_3_16, $2); /* r2 = tn3 */ \
|
||||
lq($24, TG_3_16, $2); /* r2 = tn3 */ \
|
||||
\
|
||||
pmulth($11, $2, $17); /* r17 = x3 * tn3 (6420) */ \
|
||||
psraw($17, 15, $17); \
|
||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||
psraw($3, 15, $3); \
|
||||
pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \
|
||||
psubh($17, $13, $17); /* r17 = tm35 */ \
|
||||
pmulth($11, $2, $17); /* r17 = x3 * tn3 (6420) */ \
|
||||
psraw($17, 15, $17); \
|
||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||
psraw($3, 15, $3); \
|
||||
pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \
|
||||
psubh($17, $13, $17); /* r17 = tm35 */ \
|
||||
\
|
||||
pmulth($13, $2, $18); /* r18 = x5 * tn3 (6420) */ \
|
||||
psraw($18, 15, $18); \
|
||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||
psraw($3, 15, $3); \
|
||||
pinteh($3, $18, $18); /* r18 = x5 * tn3 */ \
|
||||
paddh($18, $11, $18); /* r18 = tp35 */ \
|
||||
pmulth($13, $2, $18); /* r18 = x5 * tn3 (6420) */ \
|
||||
psraw($18, 15, $18); \
|
||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||
psraw($3, 15, $3); \
|
||||
pinteh($3, $18, $18); /* r18 = x5 * tn3 */ \
|
||||
paddh($18, $11, $18); /* r18 = tp35 */ \
|
||||
\
|
||||
lq($24, TG_1_16, $2); /* r2 = tn1 */ \
|
||||
lq($24, TG_1_16, $2); /* r2 = tn1 */ \
|
||||
\
|
||||
pmulth($15, $2, $19); /* r19 = x7 * tn1 (6420) */ \
|
||||
psraw($19, 15, $19); \
|
||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||
psraw($3, 15, $3); \
|
||||
pinteh($3, $19, $19); /* r19 = x7 * tn1 */ \
|
||||
paddh($19, $9, $19); /* r19 = tp17 */ \
|
||||
pmulth($15, $2, $19); /* r19 = x7 * tn1 (6420) */ \
|
||||
psraw($19, 15, $19); \
|
||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||
psraw($3, 15, $3); \
|
||||
pinteh($3, $19, $19); /* r19 = x7 * tn1 */ \
|
||||
paddh($19, $9, $19); /* r19 = tp17 */ \
|
||||
\
|
||||
pmulth($9, $2, $20); /* r20 = x1 * tn1 (6420) */ \
|
||||
psraw($20, 15, $20); \
|
||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||
psraw($3, 15, $3); \
|
||||
pinteh($3, $20, $20); /* r20 = x1 * tn1 */ \
|
||||
psubh($20, $15, $20); /* r20 = tm17 */ \
|
||||
pmulth($9, $2, $20); /* r20 = x1 * tn1 (6420) */ \
|
||||
psraw($20, 15, $20); \
|
||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||
psraw($3, 15, $3); \
|
||||
pinteh($3, $20, $20); /* r20 = x1 * tn1 */ \
|
||||
psubh($20, $15, $20); /* r20 = tm17 */ \
|
||||
\
|
||||
psubh($19, $18, $3); /* r3 = t1 */ \
|
||||
paddh($20, $17, $16); /* r16 = t2 */ \
|
||||
psubh($20, $17, $23); /* r23 = b3 */ \
|
||||
paddh($19, $18, $20); /* r20 = b0 */ \
|
||||
psubh($19, $18, $3); /* r3 = t1 */ \
|
||||
paddh($20, $17, $16); /* r16 = t2 */ \
|
||||
psubh($20, $17, $23); /* r23 = b3 */ \
|
||||
paddh($19, $18, $20); /* r20 = b0 */ \
|
||||
\
|
||||
lq($24, COS_4_16, $2); /* r2 = cs4 */ \
|
||||
lq($24, COS_4_16, $2); /* r2 = cs4 */ \
|
||||
\
|
||||
paddh($3, $16, $21); /* r21 = t1+t2 */ \
|
||||
psubh($3, $16, $22); /* r22 = t1-t2 */ \
|
||||
paddh($3, $16, $21); /* r21 = t1+t2 */ \
|
||||
psubh($3, $16, $22); /* r22 = t1-t2 */ \
|
||||
\
|
||||
pmulth($21, $2, $21); /* r21 = cs4 * (t1+t2) 6420 */ \
|
||||
psraw($21, 15, $21); \
|
||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||
psraw($3, 15, $3); \
|
||||
pinteh($3, $21, $21); /* r21 = b1 */ \
|
||||
pmulth($21, $2, $21); /* r21 = cs4 * (t1+t2) 6420 */ \
|
||||
psraw($21, 15, $21); \
|
||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||
psraw($3, 15, $3); \
|
||||
pinteh($3, $21, $21); /* r21 = b1 */ \
|
||||
\
|
||||
pmulth($22, $2, $22); /* r22 = cs4 * (t1-t2) 6420 */ \
|
||||
psraw($22, 15, $22); \
|
||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||
psraw($3, 15, $3); \
|
||||
pinteh($3, $22, $22); /* r22 = b2 */ \
|
||||
pmulth($22, $2, $22); /* r22 = cs4 * (t1-t2) 6420 */ \
|
||||
psraw($22, 15, $22); \
|
||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||
psraw($3, 15, $3); \
|
||||
pinteh($3, $22, $22); /* r22 = b2 */ \
|
||||
\
|
||||
lq($24, TG_2_16, $2); /* r2 = tn2 */ \
|
||||
lq($24, TG_2_16, $2); /* r2 = tn2 */ \
|
||||
\
|
||||
pmulth($10, $2, $17); /* r17 = x2 * tn2 (6420) */ \
|
||||
psraw($17, 15, $17); \
|
||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||
psraw($3, 15, $3); \
|
||||
pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \
|
||||
psubh($17, $14, $17); /* r17 = tm26 */ \
|
||||
pmulth($10, $2, $17); /* r17 = x2 * tn2 (6420) */ \
|
||||
psraw($17, 15, $17); \
|
||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||
psraw($3, 15, $3); \
|
||||
pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \
|
||||
psubh($17, $14, $17); /* r17 = tm26 */ \
|
||||
\
|
||||
pmulth($14, $2, $18); /* r18 = x6 * tn2 (6420) */ \
|
||||
psraw($18, 15, $18); \
|
||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||
psraw($3, 15, $3); \
|
||||
pinteh($3, $18, $18); /* r18 = x6 * tn2 */ \
|
||||
paddh($18, $10, $18); /* r18 = tp26 */ \
|
||||
pmulth($14, $2, $18); /* r18 = x6 * tn2 (6420) */ \
|
||||
psraw($18, 15, $18); \
|
||||
pmfhl_uw($3); /* r3 = 7531 */ \
|
||||
psraw($3, 15, $3); \
|
||||
pinteh($3, $18, $18); /* r18 = x6 * tn2 */ \
|
||||
paddh($18, $10, $18); /* r18 = tp26 */ \
|
||||
\
|
||||
paddh($8, $12, $2); /* r2 = tp04 */ \
|
||||
psubh($8, $12, $3); /* r3 = tm04 */ \
|
||||
paddh($8, $12, $2); /* r2 = tp04 */ \
|
||||
psubh($8, $12, $3); /* r3 = tm04 */ \
|
||||
\
|
||||
paddh($2, $18, $16); /* r16 = a0 */ \
|
||||
psubh($2, $18, $19); /* r19 = a3 */ \
|
||||
psubh($3, $17, $18); /* r18 = a2 */ \
|
||||
paddh($3, $17, $17); /* r17 = a1 */
|
||||
paddh($2, $18, $16); /* r16 = a0 */ \
|
||||
psubh($2, $18, $19); /* r19 = a3 */ \
|
||||
psubh($3, $17, $18); /* r18 = a2 */ \
|
||||
paddh($3, $17, $17); /* r17 = a1 */
|
||||
|
||||
|
||||
#define DCT_8_INV_COL8_STORE(blk) \
|
||||
\
|
||||
paddh($16, $20, $2); /* y0 a0+b0 */ \
|
||||
psubh($16, $20, $16); /* y7 a0-b0 */ \
|
||||
psrah($2, SHIFT_INV_COL, $2); \
|
||||
psrah($16, SHIFT_INV_COL, $16); \
|
||||
sq($2, 0, blk); \
|
||||
sq($16, 112, blk); \
|
||||
paddh($16, $20, $2); /* y0 a0+b0 */ \
|
||||
psubh($16, $20, $16); /* y7 a0-b0 */ \
|
||||
psrah($2, SHIFT_INV_COL, $2); \
|
||||
psrah($16, SHIFT_INV_COL, $16); \
|
||||
sq($2, 0, blk); \
|
||||
sq($16, 112, blk); \
|
||||
\
|
||||
paddh($17, $21, $3); /* y1 a1+b1 */ \
|
||||
psubh($17, $21, $17); /* y6 a1-b1 */ \
|
||||
psrah($3, SHIFT_INV_COL, $3); \
|
||||
psrah($17, SHIFT_INV_COL, $17); \
|
||||
sq($3, 16, blk); \
|
||||
sq($17, 96, blk); \
|
||||
paddh($17, $21, $3); /* y1 a1+b1 */ \
|
||||
psubh($17, $21, $17); /* y6 a1-b1 */ \
|
||||
psrah($3, SHIFT_INV_COL, $3); \
|
||||
psrah($17, SHIFT_INV_COL, $17); \
|
||||
sq($3, 16, blk); \
|
||||
sq($17, 96, blk); \
|
||||
\
|
||||
paddh($18, $22, $2); /* y2 a2+b2 */ \
|
||||
psubh($18, $22, $18); /* y5 a2-b2 */ \
|
||||
psrah($2, SHIFT_INV_COL, $2); \
|
||||
psrah($18, SHIFT_INV_COL, $18); \
|
||||
sq($2, 32, blk); \
|
||||
sq($18, 80, blk); \
|
||||
paddh($18, $22, $2); /* y2 a2+b2 */ \
|
||||
psubh($18, $22, $18); /* y5 a2-b2 */ \
|
||||
psrah($2, SHIFT_INV_COL, $2); \
|
||||
psrah($18, SHIFT_INV_COL, $18); \
|
||||
sq($2, 32, blk); \
|
||||
sq($18, 80, blk); \
|
||||
\
|
||||
paddh($19, $23, $3); /* y3 a3+b3 */ \
|
||||
psubh($19, $23, $19); /* y4 a3-b3 */ \
|
||||
psrah($3, SHIFT_INV_COL, $3); \
|
||||
psrah($19, SHIFT_INV_COL, $19); \
|
||||
sq($3, 48, blk); \
|
||||
sq($19, 64, blk);
|
||||
paddh($19, $23, $3); /* y3 a3+b3 */ \
|
||||
psubh($19, $23, $19); /* y4 a3-b3 */ \
|
||||
psrah($3, SHIFT_INV_COL, $3); \
|
||||
psrah($19, SHIFT_INV_COL, $19); \
|
||||
sq($3, 48, blk); \
|
||||
sq($19, 64, blk);
|
||||
|
||||
|
||||
|
||||
#define DCT_8_INV_COL8_PMS() \
|
||||
paddh($16, $20, $2); /* y0 a0+b0 */ \
|
||||
psubh($16, $20, $20); /* y7 a0-b0 */ \
|
||||
psrah($2, SHIFT_INV_COL, $16); \
|
||||
psrah($20, SHIFT_INV_COL, $20); \
|
||||
paddh($16, $20, $2); /* y0 a0+b0 */ \
|
||||
psubh($16, $20, $20); /* y7 a0-b0 */ \
|
||||
psrah($2, SHIFT_INV_COL, $16); \
|
||||
psrah($20, SHIFT_INV_COL, $20); \
|
||||
\
|
||||
paddh($17, $21, $3); /* y1 a1+b1 */ \
|
||||
psubh($17, $21, $21); /* y6 a1-b1 */ \
|
||||
psrah($3, SHIFT_INV_COL, $17); \
|
||||
psrah($21, SHIFT_INV_COL, $21); \
|
||||
paddh($17, $21, $3); /* y1 a1+b1 */ \
|
||||
psubh($17, $21, $21); /* y6 a1-b1 */ \
|
||||
psrah($3, SHIFT_INV_COL, $17); \
|
||||
psrah($21, SHIFT_INV_COL, $21); \
|
||||
\
|
||||
paddh($18, $22, $2); /* y2 a2+b2 */ \
|
||||
psubh($18, $22, $22); /* y5 a2-b2 */ \
|
||||
psrah($2, SHIFT_INV_COL, $18); \
|
||||
psrah($22, SHIFT_INV_COL, $22); \
|
||||
paddh($18, $22, $2); /* y2 a2+b2 */ \
|
||||
psubh($18, $22, $22); /* y5 a2-b2 */ \
|
||||
psrah($2, SHIFT_INV_COL, $18); \
|
||||
psrah($22, SHIFT_INV_COL, $22); \
|
||||
\
|
||||
paddh($19, $23, $3); /* y3 a3+b3 */ \
|
||||
psubh($19, $23, $23); /* y4 a3-b3 */ \
|
||||
psrah($3, SHIFT_INV_COL, $19); \
|
||||
psrah($23, SHIFT_INV_COL, $23);
|
||||
paddh($19, $23, $3); /* y3 a3+b3 */ \
|
||||
psubh($19, $23, $23); /* y4 a3-b3 */ \
|
||||
psrah($3, SHIFT_INV_COL, $19); \
|
||||
psrah($23, SHIFT_INV_COL, $23);
|
||||
|
||||
#define PUT(rs) \
|
||||
pminh(rs, $11, $2); \
|
||||
pmaxh($2, $0, $2); \
|
||||
ppacb($0, $2, $2); \
|
||||
sd3(2, 0, 4); \
|
||||
__asm__ __volatile__ ("add $4, $5, $4");
|
||||
#define PUT(rs) \
|
||||
pminh(rs, $11, $2); \
|
||||
pmaxh($2, $0, $2); \
|
||||
ppacb($0, $2, $2); \
|
||||
sd3(2, 0, 4); \
|
||||
__asm__ __volatile__ ("add $4, $5, $4");
|
||||
|
||||
#define DCT_8_INV_COL8_PUT() \
|
||||
PUT($16); \
|
||||
PUT($17); \
|
||||
PUT($18); \
|
||||
PUT($19); \
|
||||
PUT($23); \
|
||||
PUT($22); \
|
||||
PUT($21); \
|
||||
PUT($20);
|
||||
PUT($16); \
|
||||
PUT($17); \
|
||||
PUT($18); \
|
||||
PUT($19); \
|
||||
PUT($23); \
|
||||
PUT($22); \
|
||||
PUT($21); \
|
||||
PUT($20);
|
||||
|
||||
#define ADD(rs) \
|
||||
ld3(4, 0, 2); \
|
||||
pextlb($0, $2, $2); \
|
||||
paddh($2, rs, $2); \
|
||||
pminh($2, $11, $2); \
|
||||
pmaxh($2, $0, $2); \
|
||||
ppacb($0, $2, $2); \
|
||||
sd3(2, 0, 4); \
|
||||
__asm__ __volatile__ ("add $4, $5, $4");
|
||||
#define ADD(rs) \
|
||||
ld3(4, 0, 2); \
|
||||
pextlb($0, $2, $2); \
|
||||
paddh($2, rs, $2); \
|
||||
pminh($2, $11, $2); \
|
||||
pmaxh($2, $0, $2); \
|
||||
ppacb($0, $2, $2); \
|
||||
sd3(2, 0, 4); \
|
||||
__asm__ __volatile__ ("add $4, $5, $4");
|
||||
|
||||
/*fixme: schedule*/
|
||||
#define DCT_8_INV_COL8_ADD() \
|
||||
ADD($16); \
|
||||
ADD($17); \
|
||||
ADD($18); \
|
||||
ADD($19); \
|
||||
ADD($23); \
|
||||
ADD($22); \
|
||||
ADD($21); \
|
||||
ADD($20);
|
||||
ADD($16); \
|
||||
ADD($17); \
|
||||
ADD($18); \
|
||||
ADD($19); \
|
||||
ADD($23); \
|
||||
ADD($22); \
|
||||
ADD($21); \
|
||||
ADD($20);
|
||||
|
||||
|
||||
void ff_mmi_idct(int16_t * block)
|
||||
{
|
||||
/* $4 = block */
|
||||
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
|
||||
lq($24, ROUNDER_0, $8);
|
||||
lq($24, ROUNDER_1, $7);
|
||||
DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8);
|
||||
DCT_8_INV_ROW1($4, 16, TAB_i_17, $7, $9);
|
||||
DCT_8_INV_ROW1($4, 32, TAB_i_26, $7, $10);
|
||||
DCT_8_INV_ROW1($4, 48, TAB_i_35, $7, $11);
|
||||
DCT_8_INV_ROW1($4, 64, TAB_i_04, $7, $12);
|
||||
DCT_8_INV_ROW1($4, 80, TAB_i_35, $7, $13);
|
||||
DCT_8_INV_ROW1($4, 96, TAB_i_26, $7, $14);
|
||||
DCT_8_INV_ROW1($4, 112, TAB_i_17, $7, $15);
|
||||
DCT_8_INV_COL8();
|
||||
DCT_8_INV_COL8_STORE($4);
|
||||
/* $4 = block */
|
||||
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
|
||||
lq($24, ROUNDER_0, $8);
|
||||
lq($24, ROUNDER_1, $7);
|
||||
DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8);
|
||||
DCT_8_INV_ROW1($4, 16, TAB_i_17, $7, $9);
|
||||
DCT_8_INV_ROW1($4, 32, TAB_i_26, $7, $10);
|
||||
DCT_8_INV_ROW1($4, 48, TAB_i_35, $7, $11);
|
||||
DCT_8_INV_ROW1($4, 64, TAB_i_04, $7, $12);
|
||||
DCT_8_INV_ROW1($4, 80, TAB_i_35, $7, $13);
|
||||
DCT_8_INV_ROW1($4, 96, TAB_i_26, $7, $14);
|
||||
DCT_8_INV_ROW1($4, 112, TAB_i_17, $7, $15);
|
||||
DCT_8_INV_COL8();
|
||||
DCT_8_INV_COL8_STORE($4);
|
||||
|
||||
//let savedtemp regs be saved
|
||||
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
|
||||
//let savedtemp regs be saved
|
||||
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
|
||||
}
|
||||
|
||||
|
||||
void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
|
||||
{
|
||||
/* $4 = dest, $5 = line_size, $6 = block */
|
||||
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
|
||||
lq($24, ROUNDER_0, $8);
|
||||
lq($24, ROUNDER_1, $7);
|
||||
DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
|
||||
DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
|
||||
DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
|
||||
DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
|
||||
DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
|
||||
DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
|
||||
DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
|
||||
DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
|
||||
DCT_8_INV_COL8();
|
||||
lq($24, CLIPMAX, $11);
|
||||
DCT_8_INV_COL8_PMS();
|
||||
DCT_8_INV_COL8_PUT();
|
||||
/* $4 = dest, $5 = line_size, $6 = block */
|
||||
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
|
||||
lq($24, ROUNDER_0, $8);
|
||||
lq($24, ROUNDER_1, $7);
|
||||
DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
|
||||
DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
|
||||
DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
|
||||
DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
|
||||
DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
|
||||
DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
|
||||
DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
|
||||
DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
|
||||
DCT_8_INV_COL8();
|
||||
lq($24, CLIPMAX, $11);
|
||||
DCT_8_INV_COL8_PMS();
|
||||
DCT_8_INV_COL8_PUT();
|
||||
|
||||
//let savedtemp regs be saved
|
||||
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
|
||||
//let savedtemp regs be saved
|
||||
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
|
||||
}
|
||||
|
||||
|
||||
void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
|
||||
{
|
||||
/* $4 = dest, $5 = line_size, $6 = block */
|
||||
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
|
||||
lq($24, ROUNDER_0, $8);
|
||||
lq($24, ROUNDER_1, $7);
|
||||
DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
|
||||
DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
|
||||
DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
|
||||
DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
|
||||
DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
|
||||
DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
|
||||
DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
|
||||
DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
|
||||
DCT_8_INV_COL8();
|
||||
lq($24, CLIPMAX, $11);
|
||||
DCT_8_INV_COL8_PMS();
|
||||
DCT_8_INV_COL8_ADD();
|
||||
/* $4 = dest, $5 = line_size, $6 = block */
|
||||
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
|
||||
lq($24, ROUNDER_0, $8);
|
||||
lq($24, ROUNDER_1, $7);
|
||||
DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
|
||||
DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
|
||||
DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
|
||||
DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
|
||||
DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
|
||||
DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
|
||||
DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
|
||||
DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
|
||||
DCT_8_INV_COL8();
|
||||
lq($24, CLIPMAX, $11);
|
||||
DCT_8_INV_COL8_PMS();
|
||||
DCT_8_INV_COL8_ADD();
|
||||
|
||||
//let savedtemp regs be saved
|
||||
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
|
||||
//let savedtemp regs be saved
|
||||
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
|
||||
}
|
||||
|
||||
|
@ -5,148 +5,148 @@
|
||||
|
||||
/*
|
||||
#define r0 $zero
|
||||
#define r1 $at //assembler!
|
||||
#define r2 $v0 //return
|
||||
#define r3 $v1 //return
|
||||
#define r4 $a0 //arg
|
||||
#define r5 $a1 //arg
|
||||
#define r6 $a2 //arg
|
||||
#define r7 $a3 //arg
|
||||
#define r8 $t0 //temp
|
||||
#define r9 $t1 //temp
|
||||
#define r10 $t2 //temp
|
||||
#define r11 $t3 //temp
|
||||
#define r12 $t4 //temp
|
||||
#define r13 $t5 //temp
|
||||
#define r14 $t6 //temp
|
||||
#define r15 $t7 //temp
|
||||
#define r16 $s0 //saved temp
|
||||
#define r17 $s1 //saved temp
|
||||
#define r18 $s2 //saved temp
|
||||
#define r19 $s3 //saved temp
|
||||
#define r20 $s4 //saved temp
|
||||
#define r21 $s5 //saved temp
|
||||
#define r22 $s6 //saved temp
|
||||
#define r23 $s7 //saved temp
|
||||
#define r24 $t8 //temp
|
||||
#define r25 $t9 //temp
|
||||
#define r26 $k0 //kernel
|
||||
#define r27 $k1 //kernel
|
||||
#define r28 $gp //global ptr
|
||||
#define r29 $sp //stack ptr
|
||||
#define r30 $fp //frame ptr
|
||||
#define r31 $ra //return addr
|
||||
#define r1 $at //assembler!
|
||||
#define r2 $v0 //return
|
||||
#define r3 $v1 //return
|
||||
#define r4 $a0 //arg
|
||||
#define r5 $a1 //arg
|
||||
#define r6 $a2 //arg
|
||||
#define r7 $a3 //arg
|
||||
#define r8 $t0 //temp
|
||||
#define r9 $t1 //temp
|
||||
#define r10 $t2 //temp
|
||||
#define r11 $t3 //temp
|
||||
#define r12 $t4 //temp
|
||||
#define r13 $t5 //temp
|
||||
#define r14 $t6 //temp
|
||||
#define r15 $t7 //temp
|
||||
#define r16 $s0 //saved temp
|
||||
#define r17 $s1 //saved temp
|
||||
#define r18 $s2 //saved temp
|
||||
#define r19 $s3 //saved temp
|
||||
#define r20 $s4 //saved temp
|
||||
#define r21 $s5 //saved temp
|
||||
#define r22 $s6 //saved temp
|
||||
#define r23 $s7 //saved temp
|
||||
#define r24 $t8 //temp
|
||||
#define r25 $t9 //temp
|
||||
#define r26 $k0 //kernel
|
||||
#define r27 $k1 //kernel
|
||||
#define r28 $gp //global ptr
|
||||
#define r29 $sp //stack ptr
|
||||
#define r30 $fp //frame ptr
|
||||
#define r31 $ra //return addr
|
||||
*/
|
||||
|
||||
|
||||
#define lq(base, off, reg) \
|
||||
__asm__ __volatile__ ("lq " #reg ", %0("#base ")" : : "i" (off) )
|
||||
#define lq(base, off, reg) \
|
||||
__asm__ __volatile__ ("lq " #reg ", %0("#base ")" : : "i" (off) )
|
||||
|
||||
#define lq2(mem, reg) \
|
||||
__asm__ __volatile__ ("lq " #reg ", %0" : : "r" (mem))
|
||||
#define lq2(mem, reg) \
|
||||
__asm__ __volatile__ ("lq " #reg ", %0" : : "r" (mem))
|
||||
|
||||
#define sq(reg, off, base) \
|
||||
__asm__ __volatile__ ("sq " #reg ", %0("#base ")" : : "i" (off) )
|
||||
#define sq(reg, off, base) \
|
||||
__asm__ __volatile__ ("sq " #reg ", %0("#base ")" : : "i" (off) )
|
||||
|
||||
/*
|
||||
#define ld(base, off, reg) \
|
||||
__asm__ __volatile__ ("ld " #reg ", " #off "("#base ")")
|
||||
#define ld(base, off, reg) \
|
||||
__asm__ __volatile__ ("ld " #reg ", " #off "("#base ")")
|
||||
*/
|
||||
|
||||
#define ld3(base, off, reg) \
|
||||
__asm__ __volatile__ (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off)))
|
||||
#define ld3(base, off, reg) \
|
||||
__asm__ __volatile__ (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off)))
|
||||
|
||||
#define ldr3(base, off, reg) \
|
||||
__asm__ __volatile__ (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off)))
|
||||
#define ldr3(base, off, reg) \
|
||||
__asm__ __volatile__ (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off)))
|
||||
|
||||
#define ldl3(base, off, reg) \
|
||||
__asm__ __volatile__ (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off)))
|
||||
#define ldl3(base, off, reg) \
|
||||
__asm__ __volatile__ (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off)))
|
||||
|
||||
/*
|
||||
#define sd(reg, off, base) \
|
||||
__asm__ __volatile__ ("sd " #reg ", " #off "("#base ")")
|
||||
#define sd(reg, off, base) \
|
||||
__asm__ __volatile__ ("sd " #reg ", " #off "("#base ")")
|
||||
*/
|
||||
//seems assembler has bug encoding mnemonic 'sd', so DIY
|
||||
#define sd3(reg, off, base) \
|
||||
__asm__ __volatile__ (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off)))
|
||||
#define sd3(reg, off, base) \
|
||||
__asm__ __volatile__ (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off)))
|
||||
|
||||
#define sw(reg, off, base) \
|
||||
__asm__ __volatile__ ("sw " #reg ", " #off "("#base ")")
|
||||
#define sw(reg, off, base) \
|
||||
__asm__ __volatile__ ("sw " #reg ", " #off "("#base ")")
|
||||
|
||||
#define sq2(reg, mem) \
|
||||
__asm__ __volatile__ ("sq " #reg ", %0" : : "m" (*(mem)))
|
||||
#define sq2(reg, mem) \
|
||||
__asm__ __volatile__ ("sq " #reg ", %0" : : "m" (*(mem)))
|
||||
|
||||
#define pinth(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pinth " #rd ", " #rs ", " #rt )
|
||||
#define pinth(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pinth " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define phmadh(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("phmadh " #rd ", " #rs ", " #rt )
|
||||
#define phmadh(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("phmadh " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define pcpyud(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pcpyud " #rd ", " #rs ", " #rt )
|
||||
#define pcpyud(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pcpyud " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define pcpyld(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pcpyld " #rd ", " #rs ", " #rt )
|
||||
#define pcpyld(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pcpyld " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define pcpyh(rt, rd) \
|
||||
__asm__ __volatile__ ("pcpyh " #rd ", " #rt )
|
||||
#define pcpyh(rt, rd) \
|
||||
__asm__ __volatile__ ("pcpyh " #rd ", " #rt )
|
||||
|
||||
#define paddw(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("paddw " #rd ", " #rs ", " #rt )
|
||||
#define paddw(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("paddw " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define pextlw(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pextlw " #rd ", " #rs ", " #rt )
|
||||
#define pextlw(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pextlw " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define pextuw(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pextuw " #rd ", " #rs ", " #rt )
|
||||
#define pextuw(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pextuw " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define pextlh(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pextlh " #rd ", " #rs ", " #rt )
|
||||
#define pextlh(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pextlh " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define pextuh(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pextuh " #rd ", " #rs ", " #rt )
|
||||
#define pextuh(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pextuh " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define psubw(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("psubw " #rd ", " #rs ", " #rt )
|
||||
#define psubw(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("psubw " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define psraw(rt, sa, rd) \
|
||||
__asm__ __volatile__ ("psraw " #rd ", " #rt ", %0" : : "i"(sa) )
|
||||
#define psraw(rt, sa, rd) \
|
||||
__asm__ __volatile__ ("psraw " #rd ", " #rt ", %0" : : "i"(sa) )
|
||||
|
||||
#define ppach(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("ppach " #rd ", " #rs ", " #rt )
|
||||
#define ppach(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("ppach " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define ppacb(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("ppacb " #rd ", " #rs ", " #rt )
|
||||
#define ppacb(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("ppacb " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define prevh(rt, rd) \
|
||||
__asm__ __volatile__ ("prevh " #rd ", " #rt )
|
||||
#define prevh(rt, rd) \
|
||||
__asm__ __volatile__ ("prevh " #rd ", " #rt )
|
||||
|
||||
#define pmulth(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pmulth " #rd ", " #rs ", " #rt )
|
||||
#define pmulth(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pmulth " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define pmaxh(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pmaxh " #rd ", " #rs ", " #rt )
|
||||
#define pmaxh(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pmaxh " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define pminh(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pminh " #rd ", " #rs ", " #rt )
|
||||
#define pminh(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pminh " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define pinteh(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pinteh " #rd ", " #rs ", " #rt )
|
||||
#define pinteh(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pinteh " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define paddh(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("paddh " #rd ", " #rs ", " #rt )
|
||||
#define paddh(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("paddh " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define psubh(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("psubh " #rd ", " #rs ", " #rt )
|
||||
#define psubh(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("psubh " #rd ", " #rs ", " #rt )
|
||||
|
||||
#define psrah(rt, sa, rd) \
|
||||
__asm__ __volatile__ ("psrah " #rd ", " #rt ", %0" : : "i"(sa) )
|
||||
#define psrah(rt, sa, rd) \
|
||||
__asm__ __volatile__ ("psrah " #rd ", " #rt ", %0" : : "i"(sa) )
|
||||
|
||||
#define pmfhl_uw(rd) \
|
||||
__asm__ __volatile__ ("pmfhl.uw " #rd)
|
||||
#define pmfhl_uw(rd) \
|
||||
__asm__ __volatile__ ("pmfhl.uw " #rd)
|
||||
|
||||
#define pextlb(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pextlb " #rd ", " #rs ", " #rt )
|
||||
#define pextlb(rs, rt, rd) \
|
||||
__asm__ __volatile__ ("pextlb " #rd ", " #rs ", " #rt )
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -41,7 +41,7 @@ static void dct_unquantize_h263_mmi(MpegEncContext *s,
|
||||
level = block[0] * s->c_dc_scale;
|
||||
}else {
|
||||
qadd = 0;
|
||||
level = block[0];
|
||||
level = block[0];
|
||||
}
|
||||
nCoeffs= 63; //does not allways use zigzag table
|
||||
} else {
|
||||
@ -49,29 +49,29 @@ static void dct_unquantize_h263_mmi(MpegEncContext *s,
|
||||
}
|
||||
|
||||
asm volatile(
|
||||
"add $14, $0, %3 \n\t"
|
||||
"pcpyld $8, %0, %0 \n\t"
|
||||
"pcpyh $8, $8 \n\t" //r8 = qmul
|
||||
"pcpyld $9, %1, %1 \n\t"
|
||||
"pcpyh $9, $9 \n\t" //r9 = qadd
|
||||
"add $14, $0, %3 \n\t"
|
||||
"pcpyld $8, %0, %0 \n\t"
|
||||
"pcpyh $8, $8 \n\t" //r8 = qmul
|
||||
"pcpyld $9, %1, %1 \n\t"
|
||||
"pcpyh $9, $9 \n\t" //r9 = qadd
|
||||
".p2align 2 \n\t"
|
||||
"1: \n\t"
|
||||
"lq $10, 0($14) \n\t" //r10 = level
|
||||
"addi $14, $14, 16 \n\t" //block+=8
|
||||
"addi %2, %2, -8 \n\t"
|
||||
"pcgth $11, $0, $10 \n\t" //r11 = level < 0 ? -1 : 0
|
||||
"pcgth $12, $10, $0 \n\t" //r12 = level > 0 ? -1 : 0
|
||||
"por $12, $11, $12 \n\t"
|
||||
"pmulth $10, $10, $8 \n\t"
|
||||
"paddh $13, $9, $11 \n\t"
|
||||
"1: \n\t"
|
||||
"lq $10, 0($14) \n\t" //r10 = level
|
||||
"addi $14, $14, 16 \n\t" //block+=8
|
||||
"addi %2, %2, -8 \n\t"
|
||||
"pcgth $11, $0, $10 \n\t" //r11 = level < 0 ? -1 : 0
|
||||
"pcgth $12, $10, $0 \n\t" //r12 = level > 0 ? -1 : 0
|
||||
"por $12, $11, $12 \n\t"
|
||||
"pmulth $10, $10, $8 \n\t"
|
||||
"paddh $13, $9, $11 \n\t"
|
||||
"pxor $13, $13, $11 \n\t" //r13 = level < 0 ? -qadd : qadd
|
||||
"pmfhl.uw $11 \n\t"
|
||||
"pinteh $10, $11, $10 \n\t" //r10 = level * qmul
|
||||
"paddh $10, $10, $13 \n\t"
|
||||
"pmfhl.uw $11 \n\t"
|
||||
"pinteh $10, $11, $10 \n\t" //r10 = level * qmul
|
||||
"paddh $10, $10, $13 \n\t"
|
||||
"pand $10, $10, $12 \n\t"
|
||||
"sq $10, -16($14) \n\t"
|
||||
"bgez %2, 1b \n\t"
|
||||
:: "r"(qmul), "r" (qadd), "r" (nCoeffs), "r" (block) : "$8", "$9", "$10", "$11", "$12", "$13", "$14", "memory" );
|
||||
"sq $10, -16($14) \n\t"
|
||||
"bgez %2, 1b \n\t"
|
||||
:: "r"(qmul), "r" (qadd), "r" (nCoeffs), "r" (block) : "$8", "$9", "$10", "$11", "$12", "$13", "$14", "memory" );
|
||||
|
||||
if(s->mb_intra)
|
||||
block[0]= level;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user