COSMETICS: tabs --> spaces, some prettyprinting

Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Diego Biurrun 2005-12-22 01:10:11 +00:00
parent 50827fcf44
commit bb270c0896
178 changed files with 21340 additions and 21340 deletions

View File

@ -96,7 +96,7 @@ unknown_opt:
if(po->u.func2_arg(opt+1, arg)<0)
goto unknown_opt;
} else {
po->u.func_arg(arg);
po->u.func_arg(arg);
}
} else {
parse_arg_file(opt);
@ -122,8 +122,8 @@ void print_error(const char *filename, int err)
break;
case AVERROR_IO:
fprintf(stderr, "%s: I/O error occured\n"
"Usually that means that input file is truncated and/or corrupted.\n",
filename);
"Usually that means that input file is truncated and/or corrupted.\n",
filename);
break;
case AVERROR_NOMEM:
fprintf(stderr, "%s: memory allocation error occured\n", filename);

174
configure vendored
View File

@ -688,26 +688,26 @@ fi
needmdynamicnopic="no"
if test $targetos = Darwin; then
if test -n "`$cc -v 2>&1 | grep xlc`"; then
CFLAGS="$CFLAGS -qpdf2 -qlanglvl=extc99 -qmaxmem=-1 -qarch=auto -qtune=auto"
CFLAGS="$CFLAGS -qpdf2 -qlanglvl=extc99 -qmaxmem=-1 -qarch=auto -qtune=auto"
else
gcc_version="`$cc -v 2>&1 | grep version | cut -d ' ' -f3-`"
case "$gcc_version" in
*2.95*)
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer"
;;
*[34].*)
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer -force_cpusubtype_ALL -Wno-sign-compare"
if test "$lshared" = no; then
needmdynamicnopic="yes"
fi
;;
*)
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer"
if test "$lshared" = no; then
needmdynamicnopic="yes"
fi
;;
esac
gcc_version="`$cc -v 2>&1 | grep version | cut -d ' ' -f3-`"
case "$gcc_version" in
*2.95*)
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer"
;;
*[34].*)
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer -force_cpusubtype_ALL -Wno-sign-compare"
if test "$lshared" = no; then
needmdynamicnopic="yes"
fi
;;
*)
CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer"
if test "$lshared" = no; then
needmdynamicnopic="yes"
fi
;;
esac
fi
fi
@ -725,62 +725,62 @@ TUNECPU="generic"
POWERPCMODE="32bits"
if test $tune != "generic"; then
case $tune in
601|ppc601|PowerPC601)
CFLAGS="$CFLAGS -mcpu=601"
if test $altivec = "yes"; then
echo "WARNING: Tuning for PPC601 but AltiVec enabled!";
fi
TUNECPU=ppc601
;;
603*|ppc603*|PowerPC603*)
CFLAGS="$CFLAGS -mcpu=603"
if test $altivec = "yes"; then
echo "WARNING: Tuning for PPC603 but AltiVec enabled!";
fi
TUNECPU=ppc603
;;
604*|ppc604*|PowerPC604*)
CFLAGS="$CFLAGS -mcpu=604"
if test $altivec = "yes"; then
echo "WARNING: Tuning for PPC604 but AltiVec enabled!";
fi
TUNECPU=ppc604
;;
G3|g3|75*|ppc75*|PowerPC75*)
CFLAGS="$CFLAGS -mcpu=750 -mtune=750 -mpowerpc-gfxopt"
if test $altivec = "yes"; then
echo "WARNING: Tuning for PPC75x but AltiVec enabled!";
fi
TUNECPU=ppc750
;;
G4|g4|745*|ppc745*|PowerPC745*)
CFLAGS="$CFLAGS -mcpu=7450 -mtune=7450 -mpowerpc-gfxopt"
if test $altivec = "no"; then
echo "WARNING: Tuning for PPC745x but AltiVec disabled!";
fi
TUNECPU=ppc7450
;;
74*|ppc74*|PowerPC74*)
CFLAGS="$CFLAGS -mcpu=7400 -mtune=7400 -mpowerpc-gfxopt"
if test $altivec = "no"; then
echo "WARNING: Tuning for PPC74xx but AltiVec disabled!";
fi
TUNECPU=ppc7400
;;
G5|g5|970|ppc970|PowerPC970|power4*|Power4*)
CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc-gfxopt -mpowerpc64"
if test $altivec = "no"; then
echo "WARNING: Tuning for PPC970 but AltiVec disabled!";
fi
TUNECPU=ppc970
601|ppc601|PowerPC601)
CFLAGS="$CFLAGS -mcpu=601"
if test $altivec = "yes"; then
echo "WARNING: Tuning for PPC601 but AltiVec enabled!";
fi
TUNECPU=ppc601
;;
603*|ppc603*|PowerPC603*)
CFLAGS="$CFLAGS -mcpu=603"
if test $altivec = "yes"; then
echo "WARNING: Tuning for PPC603 but AltiVec enabled!";
fi
TUNECPU=ppc603
;;
604*|ppc604*|PowerPC604*)
CFLAGS="$CFLAGS -mcpu=604"
if test $altivec = "yes"; then
echo "WARNING: Tuning for PPC604 but AltiVec enabled!";
fi
TUNECPU=ppc604
;;
G3|g3|75*|ppc75*|PowerPC75*)
CFLAGS="$CFLAGS -mcpu=750 -mtune=750 -mpowerpc-gfxopt"
if test $altivec = "yes"; then
echo "WARNING: Tuning for PPC75x but AltiVec enabled!";
fi
TUNECPU=ppc750
;;
G4|g4|745*|ppc745*|PowerPC745*)
CFLAGS="$CFLAGS -mcpu=7450 -mtune=7450 -mpowerpc-gfxopt"
if test $altivec = "no"; then
echo "WARNING: Tuning for PPC745x but AltiVec disabled!";
fi
TUNECPU=ppc7450
;;
74*|ppc74*|PowerPC74*)
CFLAGS="$CFLAGS -mcpu=7400 -mtune=7400 -mpowerpc-gfxopt"
if test $altivec = "no"; then
echo "WARNING: Tuning for PPC74xx but AltiVec disabled!";
fi
TUNECPU=ppc7400
;;
G5|g5|970|ppc970|PowerPC970|power4*|Power4*)
CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc-gfxopt -mpowerpc64"
if test $altivec = "no"; then
echo "WARNING: Tuning for PPC970 but AltiVec disabled!";
fi
TUNECPU=ppc970
POWERPCMODE="64bits"
;;
i[3456]86|pentium|pentiumpro|pentium-mmx|pentium[234]|prescott|k6|k6-[23]|athlon|athlon-tbird|athlon-4|athlon-[mx]p|winchip-c6|winchip2|c3|nocona|athlon64|k8|opteron|athlon-fx)
CFLAGS="$CFLAGS -march=$tune"
;;
*)
echo "WARNING: Unknown CPU \"$tune\", ignored."
;;
;;
i[3456]86|pentium|pentiumpro|pentium-mmx|pentium[234]|prescott|k6|k6-[23]|athlon|athlon-tbird|athlon-4|athlon-[mx]p|winchip-c6|winchip2|c3|nocona|athlon64|k8|opteron|athlon-fx)
CFLAGS="$CFLAGS -march=$tune"
;;
*)
echo "WARNING: Unknown CPU \"$tune\", ignored."
;;
esac
fi
@ -876,8 +876,8 @@ if test -z "$cross_prefix" ; then
cat > $TMPC << EOF
#include <inttypes.h>
int main(int argc, char ** argv){
volatile uint32_t i=0x01234567;
return (*((uint8_t*)(&i))) == 0x67;
volatile uint32_t i=0x01234567;
return (*((uint8_t*)(&i))) == 0x67;
}
EOF
@ -912,8 +912,8 @@ $cc -o $TMPE $TMPC 2>/dev/null || inttypes="no"
cat > $TMPC << EOF
#include <inttypes.h>
int main(int argc, char ** argv){
volatile uint_fast64_t i=0x01234567;
return 0;
volatile uint_fast64_t i=0x01234567;
return 0;
}
EOF
@ -1152,10 +1152,10 @@ fi
case "`$cc -v 2>&1 | grep version`" in
*gcc*)
CFLAGS="-Wall -Wno-switch $CFLAGS"
;;
CFLAGS="-Wall -Wno-switch $CFLAGS"
;;
*)
;;
;;
esac
if test "$sdl" = "no" ; then
@ -1163,7 +1163,7 @@ if test "$sdl" = "no" ; then
fi
if test "$debug" = "yes"; then
CFLAGS="-g $CFLAGS"
CFLAGS="-g $CFLAGS"
fi
if test "$optimize" = "small"; then
@ -1173,10 +1173,10 @@ fi
if test "$optimize" = "yes"; then
if test -n "`$cc -v 2>&1 | grep xlc`"; then
CFLAGS="$CFLAGS -O5"
LDFLAGS="$LDFLAGS -O5"
CFLAGS="$CFLAGS -O5"
LDFLAGS="$LDFLAGS -O5"
else
CFLAGS="-O3 $CFLAGS"
CFLAGS="-O3 $CFLAGS"
fi
fi
@ -1793,9 +1793,9 @@ done
diff $TMPH config.h >/dev/null 2>&1
if test $? -ne 0 ; then
mv -f $TMPH config.h
mv -f $TMPH config.h
else
echo "config.h is unchanged"
echo "config.h is unchanged"
fi
rm -f $TMPO $TMPC $TMPE $TMPS $TMPH

View File

@ -25,37 +25,37 @@ main(int argc, char *argv[])
if (argc < 3)
{
printf("Usage: %s <infile.swf> <outfile.swf>\n", argv[0]);
exit(1);
printf("Usage: %s <infile.swf> <outfile.swf>\n", argv[0]);
exit(1);
}
fd_in = open(argv[1], O_RDONLY);
if (fd_in < 0)
{
perror("Error while opening: ");
exit(1);
perror("Error while opening: ");
exit(1);
}
fd_out = open(argv[2], O_WRONLY|O_CREAT, 00644);
if (fd_out < 0)
{
perror("Error while opening: ");
close(fd_in);
exit(1);
perror("Error while opening: ");
close(fd_in);
exit(1);
}
if (read(fd_in, &buf_in, 8) != 8)
{
printf("Header error\n");
close(fd_in);
close(fd_out);
exit(1);
printf("Header error\n");
close(fd_in);
close(fd_out);
exit(1);
}
if (buf_in[0] != 'C' || buf_in[1] != 'W' || buf_in[2] != 'S')
{
printf("Not a compressed flash file\n");
exit(1);
printf("Not a compressed flash file\n");
exit(1);
}
fstat(fd_in, &statbuf);
@ -75,48 +75,48 @@ main(int argc, char *argv[])
for (i = 0; i < comp_len-4;)
{
int ret, len = read(fd_in, &buf_in, 1024);
int ret, len = read(fd_in, &buf_in, 1024);
dbgprintf("read %d bytes\n", len);
dbgprintf("read %d bytes\n", len);
last_out = zstream.total_out;
last_out = zstream.total_out;
zstream.next_in = &buf_in[0];
zstream.avail_in = len;
zstream.next_out = &buf_out[0];
zstream.avail_out = 1024;
zstream.next_in = &buf_in[0];
zstream.avail_in = len;
zstream.next_out = &buf_out[0];
zstream.avail_out = 1024;
ret = inflate(&zstream, Z_SYNC_FLUSH);
if (ret == Z_STREAM_END || ret == Z_BUF_ERROR)
break;
if (ret != Z_OK)
{
printf("Error while decompressing: %d\n", ret);
inflateEnd(&zstream);
exit(1);
}
ret = inflate(&zstream, Z_SYNC_FLUSH);
if (ret == Z_STREAM_END || ret == Z_BUF_ERROR)
break;
if (ret != Z_OK)
{
printf("Error while decompressing: %d\n", ret);
inflateEnd(&zstream);
exit(1);
}
dbgprintf("a_in: %d t_in: %d a_out: %d t_out: %d -- %d out\n",
zstream.avail_in, zstream.total_in, zstream.avail_out, zstream.total_out,
zstream.total_out-last_out);
dbgprintf("a_in: %d t_in: %d a_out: %d t_out: %d -- %d out\n",
zstream.avail_in, zstream.total_in, zstream.avail_out, zstream.total_out,
zstream.total_out-last_out);
write(fd_out, &buf_out, zstream.total_out-last_out);
write(fd_out, &buf_out, zstream.total_out-last_out);
i += len;
i += len;
}
if (zstream.total_out != uncomp_len-8)
{
printf("Size mismatch (%d != %d), updating header...\n",
zstream.total_out, uncomp_len-8);
printf("Size mismatch (%d != %d), updating header...\n",
zstream.total_out, uncomp_len-8);
buf_in[0] = (zstream.total_out+8) & 0xff;
buf_in[1] = (zstream.total_out+8 >> 8) & 0xff;
buf_in[2] = (zstream.total_out+8 >> 16) & 0xff;
buf_in[3] = (zstream.total_out+8 >> 24) & 0xff;
buf_in[0] = (zstream.total_out+8) & 0xff;
buf_in[1] = (zstream.total_out+8 >> 8) & 0xff;
buf_in[2] = (zstream.total_out+8 >> 16) & 0xff;
buf_in[3] = (zstream.total_out+8 >> 24) & 0xff;
lseek(fd_out, 4, SEEK_SET);
write(fd_out, &buf_in, 4);
lseek(fd_out, 4, SEEK_SET);
write(fd_out, &buf_in, 4);
}
inflateEnd(&zstream);

View File

@ -39,24 +39,24 @@ $ibase = "";
while ($_ = shift) {
if (/^-D(.*)$/) {
if ($1 ne "") {
$flag = $1;
} else {
$flag = shift;
}
$value = "";
($flag, $value) = ($flag =~ /^([^=]+)(?:=(.+))?/);
die "no flag specified for -D\n"
unless $flag ne "";
die "flags may only contain letters, digits, hyphens, dashes and underscores\n"
unless $flag =~ /^[a-zA-Z0-9_-]+$/;
$defs{$flag} = $value;
if ($1 ne "") {
$flag = $1;
} else {
$flag = shift;
}
$value = "";
($flag, $value) = ($flag =~ /^([^=]+)(?:=(.+))?/);
die "no flag specified for -D\n"
unless $flag ne "";
die "flags may only contain letters, digits, hyphens, dashes and underscores\n"
unless $flag =~ /^[a-zA-Z0-9_-]+$/;
$defs{$flag} = $value;
} elsif (/^-/) {
usage();
usage();
} else {
$in = $_, next unless defined $in;
$out = $_, next unless defined $out;
usage();
$in = $_, next unless defined $in;
$out = $_, next unless defined $out;
usage();
}
}
@ -76,13 +76,13 @@ while(defined $inf) {
while(<$inf>) {
# Certain commands are discarded without further processing.
/^\@(?:
[a-z]+index # @*index: useful only in complete manual
|need # @need: useful only in printed manual
|(?:end\s+)?group # @group .. @end group: ditto
|page # @page: ditto
|node # @node: useful only in .info file
|(?:end\s+)?ifnottex # @ifnottex .. @end ifnottex: use contents
)\b/x and next;
[a-z]+index # @*index: useful only in complete manual
|need # @need: useful only in printed manual
|(?:end\s+)?group # @group .. @end group: ditto
|page # @page: ditto
|node # @node: useful only in .info file
|(?:end\s+)?ifnottex # @ifnottex .. @end ifnottex: use contents
)\b/x and next;
chomp;
@ -92,38 +92,38 @@ while(<$inf>) {
# Identify a man title but keep only the one we are interested in.
/^\@c\s+man\s+title\s+([A-Za-z0-9-]+)\s+(.+)/ and do {
if (exists $defs{$1}) {
$fn = $1;
$tl = postprocess($2);
}
next;
if (exists $defs{$1}) {
$fn = $1;
$tl = postprocess($2);
}
next;
};
# Look for blocks surrounded by @c man begin SECTION ... @c man end.
# This really oughta be @ifman ... @end ifman and the like, but such
# would require rev'ing all other Texinfo translators.
/^\@c\s+man\s+begin\s+([A-Z]+)\s+([A-Za-z0-9-]+)/ and do {
$output = 1 if exists $defs{$2};
$output = 1 if exists $defs{$2};
$sect = $1;
next;
next;
};
/^\@c\s+man\s+begin\s+([A-Z]+)/ and $sect = $1, $output = 1, next;
/^\@c\s+man\s+end/ and do {
$sects{$sect} = "" unless exists $sects{$sect};
$sects{$sect} .= postprocess($section);
$section = "";
$output = 0;
next;
$sects{$sect} = "" unless exists $sects{$sect};
$sects{$sect} .= postprocess($section);
$section = "";
$output = 0;
next;
};
# handle variables
/^\@set\s+([a-zA-Z0-9_-]+)\s*(.*)$/ and do {
$defs{$1} = $2;
next;
$defs{$1} = $2;
next;
};
/^\@clear\s+([a-zA-Z0-9_-]+)/ and do {
delete $defs{$1};
next;
delete $defs{$1};
next;
};
next unless $output;
@ -135,55 +135,55 @@ while(<$inf>) {
# End-block handler goes up here because it needs to operate even
# if we are skipping.
/^\@end\s+([a-z]+)/ and do {
# Ignore @end foo, where foo is not an operation which may
# cause us to skip, if we are presently skipping.
my $ended = $1;
next if $skipping && $ended !~ /^(?:ifset|ifclear|ignore|menu|iftex)$/;
# Ignore @end foo, where foo is not an operation which may
# cause us to skip, if we are presently skipping.
my $ended = $1;
next if $skipping && $ended !~ /^(?:ifset|ifclear|ignore|menu|iftex)$/;
die "\@end $ended without \@$ended at line $.\n" unless defined $endw;
die "\@$endw ended by \@end $ended at line $.\n" unless $ended eq $endw;
die "\@end $ended without \@$ended at line $.\n" unless defined $endw;
die "\@$endw ended by \@end $ended at line $.\n" unless $ended eq $endw;
$endw = pop @endwstack;
$endw = pop @endwstack;
if ($ended =~ /^(?:ifset|ifclear|ignore|menu|iftex)$/) {
$skipping = pop @skstack;
next;
} elsif ($ended =~ /^(?:example|smallexample|display)$/) {
$shift = "";
$_ = ""; # need a paragraph break
} elsif ($ended =~ /^(?:itemize|enumerate|[fv]?table)$/) {
$_ = "\n=back\n";
$ic = pop @icstack;
} else {
die "unknown command \@end $ended at line $.\n";
}
if ($ended =~ /^(?:ifset|ifclear|ignore|menu|iftex)$/) {
$skipping = pop @skstack;
next;
} elsif ($ended =~ /^(?:example|smallexample|display)$/) {
$shift = "";
$_ = ""; # need a paragraph break
} elsif ($ended =~ /^(?:itemize|enumerate|[fv]?table)$/) {
$_ = "\n=back\n";
$ic = pop @icstack;
} else {
die "unknown command \@end $ended at line $.\n";
}
};
# We must handle commands which can cause skipping even while we
# are skipping, otherwise we will not process nested conditionals
# correctly.
/^\@ifset\s+([a-zA-Z0-9_-]+)/ and do {
push @endwstack, $endw;
push @skstack, $skipping;
$endw = "ifset";
$skipping = 1 unless exists $defs{$1};
next;
push @endwstack, $endw;
push @skstack, $skipping;
$endw = "ifset";
$skipping = 1 unless exists $defs{$1};
next;
};
/^\@ifclear\s+([a-zA-Z0-9_-]+)/ and do {
push @endwstack, $endw;
push @skstack, $skipping;
$endw = "ifclear";
$skipping = 1 if exists $defs{$1};
next;
push @endwstack, $endw;
push @skstack, $skipping;
$endw = "ifclear";
$skipping = 1 if exists $defs{$1};
next;
};
/^\@(ignore|menu|iftex)\b/ and do {
push @endwstack, $endw;
push @skstack, $skipping;
$endw = $1;
$skipping = 1;
next;
push @endwstack, $endw;
push @skstack, $skipping;
$endw = $1;
$skipping = 1;
next;
};
next if $skipping;
@ -210,85 +210,85 @@ while(<$inf>) {
# Inside a verbatim block, handle @var specially.
if ($shift ne "") {
s/\@var\{([^\}]*)\}/<$1>/g;
s/\@var\{([^\}]*)\}/<$1>/g;
}
# POD doesn't interpret E<> inside a verbatim block.
if ($shift eq "") {
s/</&lt;/g;
s/>/&gt;/g;
s/</&lt;/g;
s/>/&gt;/g;
} else {
s/</&LT;/g;
s/>/&GT;/g;
s/</&LT;/g;
s/>/&GT;/g;
}
# Single line command handlers.
/^\@include\s+(.+)$/ and do {
push @instack, $inf;
$inf = gensym();
push @instack, $inf;
$inf = gensym();
# Try cwd and $ibase.
open($inf, "<" . $1)
or open($inf, "<" . $ibase . "/" . $1)
or die "cannot open $1 or $ibase/$1: $!\n";
next;
# Try cwd and $ibase.
open($inf, "<" . $1)
or open($inf, "<" . $ibase . "/" . $1)
or die "cannot open $1 or $ibase/$1: $!\n";
next;
};
/^\@(?:section|unnumbered|unnumberedsec|center)\s+(.+)$/
and $_ = "\n=head2 $1\n";
and $_ = "\n=head2 $1\n";
/^\@subsection\s+(.+)$/
and $_ = "\n=head3 $1\n";
and $_ = "\n=head3 $1\n";
# Block command handlers:
/^\@itemize\s+(\@[a-z]+|\*|-)/ and do {
push @endwstack, $endw;
push @icstack, $ic;
$ic = $1;
$_ = "\n=over 4\n";
$endw = "itemize";
push @endwstack, $endw;
push @icstack, $ic;
$ic = $1;
$_ = "\n=over 4\n";
$endw = "itemize";
};
/^\@enumerate(?:\s+([a-zA-Z0-9]+))?/ and do {
push @endwstack, $endw;
push @icstack, $ic;
if (defined $1) {
$ic = $1 . ".";
} else {
$ic = "1.";
}
$_ = "\n=over 4\n";
$endw = "enumerate";
push @endwstack, $endw;
push @icstack, $ic;
if (defined $1) {
$ic = $1 . ".";
} else {
$ic = "1.";
}
$_ = "\n=over 4\n";
$endw = "enumerate";
};
/^\@([fv]?table)\s+(\@[a-z]+)/ and do {
push @endwstack, $endw;
push @icstack, $ic;
$endw = $1;
$ic = $2;
$ic =~ s/\@(?:samp|strong|key|gcctabopt|option|env)/B/;
$ic =~ s/\@(?:code|kbd)/C/;
$ic =~ s/\@(?:dfn|var|emph|cite|i)/I/;
$ic =~ s/\@(?:file)/F/;
$_ = "\n=over 4\n";
push @endwstack, $endw;
push @icstack, $ic;
$endw = $1;
$ic = $2;
$ic =~ s/\@(?:samp|strong|key|gcctabopt|option|env)/B/;
$ic =~ s/\@(?:code|kbd)/C/;
$ic =~ s/\@(?:dfn|var|emph|cite|i)/I/;
$ic =~ s/\@(?:file)/F/;
$_ = "\n=over 4\n";
};
/^\@((?:small)?example|display)/ and do {
push @endwstack, $endw;
$endw = $1;
$shift = "\t";
$_ = ""; # need a paragraph break
push @endwstack, $endw;
$endw = $1;
$shift = "\t";
$_ = ""; # need a paragraph break
};
/^\@itemx?\s*(.+)?$/ and do {
if (defined $1) {
# Entity escapes prevent munging by the <> processing below.
$_ = "\n=item $ic\&LT;$1\&GT;\n";
} else {
$_ = "\n=item $ic\n";
$ic =~ y/A-Ya-y/B-Zb-z/;
$ic =~ s/(\d+)/$1 + 1/eg;
}
if (defined $1) {
# Entity escapes prevent munging by the <> processing below.
$_ = "\n=item $ic\&LT;$1\&GT;\n";
} else {
$_ = "\n=item $ic\n";
$ic =~ y/A-Ya-y/B-Zb-z/;
$ic =~ s/(\d+)/$1 + 1/eg;
}
};
$section .= $shift.$_."\n";
@ -304,13 +304,13 @@ $sects{NAME} = "$fn \- $tl\n";
$sects{FOOTNOTES} .= "=back\n" if exists $sects{FOOTNOTES};
for $sect (qw(NAME SYNOPSIS DESCRIPTION OPTIONS EXAMPLES ENVIRONMENT FILES
BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
if(exists $sects{$sect}) {
$head = $sect;
$head =~ s/SEEALSO/SEE ALSO/;
print "=head1 $head\n\n";
print scalar unmunge ($sects{$sect});
print "\n";
$head = $sect;
$head =~ s/SEEALSO/SEE ALSO/;
print "=head1 $head\n\n";
print scalar unmunge ($sects{$sect});
print "\n";
}
}
@ -325,13 +325,13 @@ sub postprocess
# @value{foo} is replaced by whatever 'foo' is defined as.
while (m/(\@value\{([a-zA-Z0-9_-]+)\})/g) {
if (! exists $defs{$2}) {
print STDERR "Option $2 not defined\n";
s/\Q$1\E//;
} else {
$value = $defs{$2};
s/\Q$1\E/$value/;
}
if (! exists $defs{$2}) {
print STDERR "Option $2 not defined\n";
s/\Q$1\E//;
} else {
$value = $defs{$2};
s/\Q$1\E/$value/;
}
}
# Formatting commands.
@ -381,9 +381,9 @@ sub postprocess
# processing because otherwise the regexp will choke on formatting
# inside @footnote.
while (/\@footnote/g) {
s/\@footnote\{([^\}]+)\}/[$fnno]/;
add_footnote($1, $fnno);
$fnno++;
s/\@footnote\{([^\}]+)\}/[$fnno]/;
add_footnote($1, $fnno);
$fnno++;
}
return $_;
@ -406,7 +406,7 @@ sub unmunge
sub add_footnote
{
unless (exists $sects{FOOTNOTES}) {
$sects{FOOTNOTES} = "\n=over 4\n\n";
$sects{FOOTNOTES} = "\n=over 4\n\n";
}
$sects{FOOTNOTES} .= "=item $fnno.\n\n"; $fnno++;
@ -419,9 +419,9 @@ sub add_footnote
my $genseq = 0;
sub gensym
{
my $name = "GEN" . $genseq++;
my $ref = \*{$name};
delete $::{$name};
return $ref;
my $name = "GEN" . $genseq++;
my $ref = \*{$name};
delete $::{$name};
return $ref;
}
}

View File

@ -579,7 +579,7 @@ static void do_audio_out(AVFormatContext *s,
break;
}
ret = avcodec_encode_audio(enc, audio_out, size_out,
(short *)buftmp);
(short *)buftmp);
audio_size += ret;
pkt.stream_index= ost->index;
pkt.data= audio_out;
@ -821,10 +821,10 @@ static void do_video_out(AVFormatContext *s,
padcolor);
}
if (enc->pix_fmt != PIX_FMT_YUV420P) {
if (enc->pix_fmt != PIX_FMT_YUV420P) {
int size;
av_free(buf);
av_free(buf);
/* create temporary picture */
size = avpicture_get_size(enc->pix_fmt, enc->width, enc->height);
buf = av_malloc(size);
@ -842,7 +842,7 @@ static void do_video_out(AVFormatContext *s,
goto the_end;
}
}
}
} else if (ost->video_crop) {
picture_crop_temp.data[0] = formatted_picture->data[0] +
(ost->topBand * formatted_picture->linesize[0]) + ost->leftBand;
@ -921,7 +921,7 @@ static void do_video_out(AVFormatContext *s,
avoid any copies. We support temorarily the older
method. */
AVFrame* old_frame = enc->coded_frame;
enc->coded_frame = dec->coded_frame; //FIXME/XXX remove this hack
enc->coded_frame = dec->coded_frame; //FIXME/XXX remove this hack
pkt.data= (uint8_t *)final_picture;
pkt.size= sizeof(AVPicture);
if(dec->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE)
@ -930,7 +930,7 @@ static void do_video_out(AVFormatContext *s,
pkt.flags |= PKT_FLAG_KEY;
av_interleaved_write_frame(s, &pkt);
enc->coded_frame = old_frame;
enc->coded_frame = old_frame;
} else {
AVFrame big_picture;
@ -1044,8 +1044,8 @@ static void do_video_stats(AVFormatContext *os, AVOutputStream *ost,
}
static void print_report(AVFormatContext **output_files,
AVOutputStream **ost_table, int nb_ostreams,
int is_last_report)
AVOutputStream **ost_table, int nb_ostreams,
int is_last_report)
{
char buf[1024];
AVOutputStream *ost;
@ -1138,9 +1138,9 @@ static void print_report(AVFormatContext **output_files,
"size=%8.0fkB time=%0.1f bitrate=%6.1fkbits/s",
(double)total_size / 1024, ti1, bitrate);
if (verbose > 1)
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " dup=%d drop=%d",
nb_frames_dup, nb_frames_drop);
if (verbose > 1)
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " dup=%d drop=%d",
nb_frames_dup, nb_frames_drop);
if (verbose >= 0)
fprintf(stderr, "%s \r", buf);
@ -1323,7 +1323,7 @@ static int output_packet(AVInputStream *ist, int ist_index,
}
#endif
/* if output time reached then transcode raw format,
encode packets and output them */
encode packets and output them */
if (start_time == 0 || ist->pts >= start_time)
for(i=0;i<nb_ostreams;i++) {
int frame_size;
@ -1898,7 +1898,7 @@ static int av_encode(AVFormatContext **output_files,
/* init pts */
for(i=0;i<nb_istreams;i++) {
ist = ist_table[i];
is = input_files[ist->file_index];
is = input_files[ist->file_index];
ist->pts = 0;
ist->next_pts = av_rescale_q(ist->st->start_time, ist->st->time_base, AV_TIME_BASE_Q);
if(ist->st->start_time == AV_NOPTS_VALUE)
@ -2273,7 +2273,7 @@ static void opt_frame_rate(const char *arg)
{
if (parse_frame_rate(&frame_rate, &frame_rate_base, arg) < 0) {
fprintf(stderr, "Incorrect frame rate\n");
exit(1);
exit(1);
}
}
@ -2289,7 +2289,7 @@ static void opt_frame_crop_top(const char *arg)
exit(1);
}
if ((frame_topBand) >= frame_height){
fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
exit(1);
}
frame_height -= frame_topBand;
@ -2307,7 +2307,7 @@ static void opt_frame_crop_bottom(const char *arg)
exit(1);
}
if ((frame_bottomBand) >= frame_height){
fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
exit(1);
}
frame_height -= frame_bottomBand;
@ -2325,7 +2325,7 @@ static void opt_frame_crop_left(const char *arg)
exit(1);
}
if ((frame_leftBand) >= frame_width){
fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
exit(1);
}
frame_width -= frame_leftBand;
@ -2343,7 +2343,7 @@ static void opt_frame_crop_right(const char *arg)
exit(1);
}
if ((frame_rightBand) >= frame_width){
fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n");
exit(1);
}
frame_width -= frame_rightBand;
@ -2364,7 +2364,7 @@ static void opt_frame_size(const char *arg)
#define SCALEBITS 10
#define ONE_HALF (1 << (SCALEBITS - 1))
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
#define RGB_TO_Y(r, g, b) \
((FIX(0.29900) * (r) + FIX(0.58700) * (g) + \
@ -2462,16 +2462,16 @@ static void opt_frame_aspect_ratio(const char *arg)
p = strchr(arg, ':');
if (p) {
x = strtol(arg, (char **)&arg, 10);
if (arg == p)
y = strtol(arg+1, (char **)&arg, 10);
if (x > 0 && y > 0)
ar = (double)x / (double)y;
if (arg == p)
y = strtol(arg+1, (char **)&arg, 10);
if (x > 0 && y > 0)
ar = (double)x / (double)y;
} else
ar = strtod(arg, (char **)&arg);
if (!ar) {
fprintf(stderr, "Incorrect aspect ratio specification.\n");
exit(1);
exit(1);
}
frame_aspect_ratio = ar;
}
@ -2957,8 +2957,8 @@ static void opt_input_file(const char *filename)
}
frame_height = enc->height;
frame_width = enc->width;
frame_aspect_ratio = av_q2d(enc->sample_aspect_ratio) * enc->width / enc->height;
frame_pix_fmt = enc->pix_fmt;
frame_aspect_ratio = av_q2d(enc->sample_aspect_ratio) * enc->width / enc->height;
frame_pix_fmt = enc->pix_fmt;
rfps = ic->streams[i]->r_frame_rate.num;
rfps_base = ic->streams[i]->r_frame_rate.den;
enc->workaround_bugs = workaround_bugs;
@ -3454,7 +3454,7 @@ static void opt_output_file(const char *filename)
oc->timestamp = rec_timestamp;
if (str_title)
if (str_title)
pstrcpy(oc->title, sizeof(oc->title), str_title);
if (str_author)
pstrcpy(oc->author, sizeof(oc->author), str_author);
@ -3490,11 +3490,11 @@ static void opt_output_file(const char *filename)
fprintf(stderr, "Not overwriting - exiting\n");
exit(1);
}
}
else {
}
else {
fprintf(stderr,"File '%s' already exists. Exiting.\n", filename);
exit(1);
}
}
}
}
@ -3579,14 +3579,14 @@ static void prepare_grab(void)
fmt1 = av_find_input_format(video_grab_format);
vp->device = video_device;
vp->channel = video_channel;
vp->standard = video_standard;
vp->standard = video_standard;
if (av_open_input_file(&ic, "", fmt1, 0, vp) < 0) {
fprintf(stderr, "Could not find video grab device\n");
exit(1);
}
/* If not enough info to get the stream parameters, we decode the
first frames to get it. */
if ((ic->ctx_flags & AVFMTCTX_NOHEADER) && av_find_stream_info(ic) < 0) {
if ((ic->ctx_flags & AVFMTCTX_NOHEADER) && av_find_stream_info(ic) < 0) {
fprintf(stderr, "Could not find video grab parameters\n");
exit(1);
}
@ -4276,11 +4276,11 @@ int main(int argc, char **argv)
for(i=0;i<nb_output_files;i++) {
/* maybe av_close_output_file ??? */
AVFormatContext *s = output_files[i];
int j;
int j;
if (!(s->oformat->flags & AVFMT_NOFILE))
url_fclose(&s->pb);
for(j=0;j<s->nb_streams;j++)
av_free(s->streams[j]);
url_fclose(&s->pb);
for(j=0;j<s->nb_streams;j++)
av_free(s->streams[j]);
av_free(s);
}
for(i=0;i<nb_input_files;i++)

View File

@ -1649,7 +1649,7 @@ static int stream_component_open(VideoState *is, int stream_index)
memset(&is->audio_pkt, 0, sizeof(is->audio_pkt));
packet_queue_init(&is->audioq);
SDL_PauseAudio(0);
SDL_PauseAudio(0);
break;
case CODEC_TYPE_VIDEO:
is->video_stream = stream_index;
@ -1937,11 +1937,11 @@ static int decode_thread(void *arg)
}
ret = av_read_frame(ic, pkt);
if (ret < 0) {
if (url_ferror(&ic->pb) == 0) {
if (url_ferror(&ic->pb) == 0) {
SDL_Delay(100); /* wait for user event */
continue;
} else
break;
continue;
} else
break;
}
if (pkt->stream_index == is->audio_stream) {
packet_queue_put(&is->audioq, pkt);
@ -2224,23 +2224,23 @@ void event_loop(void)
}
break;
case SDL_MOUSEBUTTONDOWN:
if (cur_stream) {
int ns, hh, mm, ss;
int tns, thh, tmm, tss;
tns = cur_stream->ic->duration/1000000LL;
thh = tns/3600;
tmm = (tns%3600)/60;
tss = (tns%60);
frac = (double)event.button.x/(double)cur_stream->width;
ns = frac*tns;
hh = ns/3600;
mm = (ns%3600)/60;
ss = (ns%60);
fprintf(stderr, "Seek to %2.0f%% (%2d:%02d:%02d) of total duration (%2d:%02d:%02d) \n", frac*100,
hh, mm, ss, thh, tmm, tss);
stream_seek(cur_stream, (int64_t)(cur_stream->ic->start_time+frac*cur_stream->ic->duration), 0);
}
break;
if (cur_stream) {
int ns, hh, mm, ss;
int tns, thh, tmm, tss;
tns = cur_stream->ic->duration/1000000LL;
thh = tns/3600;
tmm = (tns%3600)/60;
tss = (tns%60);
frac = (double)event.button.x/(double)cur_stream->width;
ns = frac*tns;
hh = ns/3600;
mm = (ns%3600)/60;
ss = (ns%60);
fprintf(stderr, "Seek to %2.0f%% (%2d:%02d:%02d) of total duration (%2d:%02d:%02d) \n", frac*100,
hh, mm, ss, thh, tmm, tss);
stream_seek(cur_stream, (int64_t)(cur_stream->ic->start_time+frac*cur_stream->ic->duration), 0);
}
break;
case SDL_VIDEORESIZE:
if (cur_stream) {
screen = SDL_SetVideoMode(event.resize.w, event.resize.h, 0,
@ -2452,7 +2452,7 @@ int main(int argc, char **argv)
if (dpy) {
fs_screen_width = DisplayWidth(dpy, DefaultScreen(dpy));
fs_screen_height = DisplayHeight(dpy, DefaultScreen(dpy));
XCloseDisplay(dpy);
XCloseDisplay(dpy);
}
}
#endif

View File

@ -1204,7 +1204,7 @@ static int http_parse_request(HTTPContext *c)
pstrcpy(c->protocol, sizeof(c->protocol), protocol);
if (ffserver_debug)
http_log("New connection: %s %s\n", cmd, url);
http_log("New connection: %s %s\n", cmd, url);
/* find the filename and the optional info string in the request */
p = url;
@ -2001,7 +2001,7 @@ static int http_prepare_data(HTTPContext *c)
c->fmt_ctx.nb_streams = c->stream->nb_streams;
for(i=0;i<c->fmt_ctx.nb_streams;i++) {
AVStream *st;
AVStream *src;
AVStream *src;
st = av_mallocz(sizeof(AVStream));
st->codec= avcodec_alloc_context();
c->fmt_ctx.streams[i] = st;
@ -2012,8 +2012,8 @@ static int http_prepare_data(HTTPContext *c)
else
src = c->stream->feed->streams[c->stream->feed_streams[i]];
*st = *src;
st->priv_data = 0;
*st = *src;
st->priv_data = 0;
st->codec->frame_number = 0; /* XXX: should be done in
AVStream, not in codec */
/* I'm pretty sure that this is not correct...
@ -2452,8 +2452,8 @@ static int http_receive_data(HTTPContext *c)
s.priv_data = av_mallocz(fmt_in->priv_data_size);
if (!s.priv_data)
goto fail;
} else
s.priv_data = NULL;
} else
s.priv_data = NULL;
if (fmt_in->read_header(&s, 0) < 0) {
av_freep(&s.priv_data);
@ -3868,20 +3868,20 @@ static int parse_ffconfig(const char *filename)
feed->child_argv[i] = av_malloc(30 + strlen(feed->filename));
snprintf(feed->child_argv[i], 30+strlen(feed->filename),
"http://%s:%d/%s",
(my_http_addr.sin_addr.s_addr == INADDR_ANY) ? "127.0.0.1" :
inet_ntoa(my_http_addr.sin_addr),
ntohs(my_http_addr.sin_port), feed->filename);
snprintf(feed->child_argv[i], 30+strlen(feed->filename),
"http://%s:%d/%s",
(my_http_addr.sin_addr.s_addr == INADDR_ANY) ? "127.0.0.1" :
inet_ntoa(my_http_addr.sin_addr),
ntohs(my_http_addr.sin_port), feed->filename);
if (ffserver_debug)
{
int j;
fprintf(stdout, "Launch commandline: ");
for (j = 0; j <= i; j++)
fprintf(stdout, "%s ", feed->child_argv[j]);
fprintf(stdout, "\n");
}
if (ffserver_debug)
{
int j;
fprintf(stdout, "Launch commandline: ");
for (j = 0; j <= i; j++)
fprintf(stdout, "%s ", feed->child_argv[j]);
fprintf(stdout, "\n");
}
}
} else if (!strcasecmp(cmd, "ReadOnlyFile")) {
if (feed) {
@ -4074,8 +4074,8 @@ static int parse_ffconfig(const char *filename)
if (stream) {
audio_enc.sample_rate = atoi(arg);
}
} else if (!strcasecmp(cmd, "AudioQuality")) {
get_arg(arg, sizeof(arg), &p);
} else if (!strcasecmp(cmd, "AudioQuality")) {
get_arg(arg, sizeof(arg), &p);
if (stream) {
// audio_enc.quality = atof(arg) * 1000;
}

View File

@ -44,11 +44,11 @@ const enum PixelFormat pixfmt_rgb24[] = {PIX_FMT_BGR24, PIX_FMT_RGBA32, -1};
*/
typedef struct EightBpsContext {
AVCodecContext *avctx;
AVFrame pic;
AVCodecContext *avctx;
AVFrame pic;
unsigned char planes;
unsigned char planemap[4];
unsigned char planes;
unsigned char planemap[4];
} EightBpsContext;
@ -59,87 +59,87 @@ typedef struct EightBpsContext {
*/
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
{
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
unsigned char *encoded = (unsigned char *)buf;
unsigned char *pixptr, *pixptr_end;
unsigned int height = avctx->height; // Real image height
unsigned int dlen, p, row;
unsigned char *lp, *dp;
unsigned char count;
unsigned int px_inc;
unsigned int planes = c->planes;
unsigned char *planemap = c->planemap;
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
unsigned char *encoded = (unsigned char *)buf;
unsigned char *pixptr, *pixptr_end;
unsigned int height = avctx->height; // Real image height
unsigned int dlen, p, row;
unsigned char *lp, *dp;
unsigned char count;
unsigned int px_inc;
unsigned int planes = c->planes;
unsigned char *planemap = c->planemap;
if(c->pic.data[0])
avctx->release_buffer(avctx, &c->pic);
if(c->pic.data[0])
avctx->release_buffer(avctx, &c->pic);
c->pic.reference = 0;
c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
if(avctx->get_buffer(avctx, &c->pic) < 0){
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
return -1;
}
c->pic.reference = 0;
c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
if(avctx->get_buffer(avctx, &c->pic) < 0){
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
return -1;
}
/* Set data pointer after line lengths */
dp = encoded + planes * (height << 1);
/* Set data pointer after line lengths */
dp = encoded + planes * (height << 1);
/* Ignore alpha plane, don't know what to do with it */
if (planes == 4)
planes--;
/* Ignore alpha plane, don't know what to do with it */
if (planes == 4)
planes--;
px_inc = planes + (avctx->pix_fmt == PIX_FMT_RGBA32);
px_inc = planes + (avctx->pix_fmt == PIX_FMT_RGBA32);
for (p = 0; p < planes; p++) {
/* Lines length pointer for this plane */
lp = encoded + p * (height << 1);
for (p = 0; p < planes; p++) {
/* Lines length pointer for this plane */
lp = encoded + p * (height << 1);
/* Decode a plane */
for(row = 0; row < height; row++) {
pixptr = c->pic.data[0] + row * c->pic.linesize[0] + planemap[p];
pixptr_end = pixptr + c->pic.linesize[0];
dlen = be2me_16(*(unsigned short *)(lp+row*2));
/* Decode a row of this plane */
while(dlen > 0) {
if(dp + 1 >= buf+buf_size) return -1;
if ((count = *dp++) <= 127) {
count++;
dlen -= count + 1;
if (pixptr + count * px_inc > pixptr_end)
break;
if(dp + count > buf+buf_size) return -1;
while(count--) {
*pixptr = *dp++;
pixptr += px_inc;
}
} else {
count = 257 - count;
if (pixptr + count * px_inc > pixptr_end)
break;
while(count--) {
*pixptr = *dp;
pixptr += px_inc;
}
dp++;
dlen -= 2;
}
}
}
}
/* Decode a plane */
for(row = 0; row < height; row++) {
pixptr = c->pic.data[0] + row * c->pic.linesize[0] + planemap[p];
pixptr_end = pixptr + c->pic.linesize[0];
dlen = be2me_16(*(unsigned short *)(lp+row*2));
/* Decode a row of this plane */
while(dlen > 0) {
if(dp + 1 >= buf+buf_size) return -1;
if ((count = *dp++) <= 127) {
count++;
dlen -= count + 1;
if (pixptr + count * px_inc > pixptr_end)
break;
if(dp + count > buf+buf_size) return -1;
while(count--) {
*pixptr = *dp++;
pixptr += px_inc;
}
} else {
count = 257 - count;
if (pixptr + count * px_inc > pixptr_end)
break;
while(count--) {
*pixptr = *dp;
pixptr += px_inc;
}
dp++;
dlen -= 2;
}
}
}
}
if (avctx->palctrl) {
memcpy (c->pic.data[1], avctx->palctrl->palette, AVPALETTE_SIZE);
if (avctx->palctrl->palette_changed) {
c->pic.palette_has_changed = 1;
avctx->palctrl->palette_changed = 0;
} else
c->pic.palette_has_changed = 0;
}
if (avctx->palctrl) {
memcpy (c->pic.data[1], avctx->palctrl->palette, AVPALETTE_SIZE);
if (avctx->palctrl->palette_changed) {
c->pic.palette_has_changed = 1;
avctx->palctrl->palette_changed = 0;
} else
c->pic.palette_has_changed = 0;
}
*data_size = sizeof(AVFrame);
*(AVFrame*)data = c->pic;
*data_size = sizeof(AVFrame);
*(AVFrame*)data = c->pic;
/* always report that the buffer was completely consumed */
return buf_size;
/* always report that the buffer was completely consumed */
return buf_size;
}
@ -150,53 +150,53 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
*/
static int decode_init(AVCodecContext *avctx)
{
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
c->avctx = avctx;
avctx->has_b_frames = 0;
c->avctx = avctx;
avctx->has_b_frames = 0;
c->pic.data[0] = NULL;
c->pic.data[0] = NULL;
if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
return 1;
}
switch (avctx->bits_per_sample) {
case 8:
avctx->pix_fmt = PIX_FMT_PAL8;
c->planes = 1;
c->planemap[0] = 0; // 1st plane is palette indexes
if (avctx->palctrl == NULL) {
av_log(avctx, AV_LOG_ERROR, "Error: PAL8 format but no palette from demuxer.\n");
return -1;
}
break;
case 24:
avctx->pix_fmt = avctx->get_format(avctx, pixfmt_rgb24);
c->planes = 3;
c->planemap[0] = 2; // 1st plane is red
c->planemap[1] = 1; // 2nd plane is green
c->planemap[2] = 0; // 3rd plane is blue
break;
case 32:
avctx->pix_fmt = PIX_FMT_RGBA32;
c->planes = 4;
switch (avctx->bits_per_sample) {
case 8:
avctx->pix_fmt = PIX_FMT_PAL8;
c->planes = 1;
c->planemap[0] = 0; // 1st plane is palette indexes
if (avctx->palctrl == NULL) {
av_log(avctx, AV_LOG_ERROR, "Error: PAL8 format but no palette from demuxer.\n");
return -1;
}
break;
case 24:
avctx->pix_fmt = avctx->get_format(avctx, pixfmt_rgb24);
c->planes = 3;
c->planemap[0] = 2; // 1st plane is red
c->planemap[1] = 1; // 2nd plane is green
c->planemap[2] = 0; // 3rd plane is blue
break;
case 32:
avctx->pix_fmt = PIX_FMT_RGBA32;
c->planes = 4;
#ifdef WORDS_BIGENDIAN
c->planemap[0] = 1; // 1st plane is red
c->planemap[1] = 2; // 2nd plane is green
c->planemap[2] = 3; // 3rd plane is blue
c->planemap[3] = 0; // 4th plane is alpha???
c->planemap[0] = 1; // 1st plane is red
c->planemap[1] = 2; // 2nd plane is green
c->planemap[2] = 3; // 3rd plane is blue
c->planemap[3] = 0; // 4th plane is alpha???
#else
c->planemap[0] = 2; // 1st plane is red
c->planemap[1] = 1; // 2nd plane is green
c->planemap[2] = 0; // 3rd plane is blue
c->planemap[3] = 3; // 4th plane is alpha???
c->planemap[0] = 2; // 1st plane is red
c->planemap[1] = 1; // 2nd plane is green
c->planemap[2] = 0; // 3rd plane is blue
c->planemap[3] = 3; // 4th plane is alpha???
#endif
break;
default:
av_log(avctx, AV_LOG_ERROR, "Error: Unsupported color depth: %u.\n", avctx->bits_per_sample);
return -1;
}
break;
default:
av_log(avctx, AV_LOG_ERROR, "Error: Unsupported color depth: %u.\n", avctx->bits_per_sample);
return -1;
}
return 0;
}
@ -211,24 +211,24 @@ static int decode_init(AVCodecContext *avctx)
*/
static int decode_end(AVCodecContext *avctx)
{
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
if (c->pic.data[0])
avctx->release_buffer(avctx, &c->pic);
if (c->pic.data[0])
avctx->release_buffer(avctx, &c->pic);
return 0;
return 0;
}
AVCodec eightbps_decoder = {
"8bps",
CODEC_TYPE_VIDEO,
CODEC_ID_8BPS,
sizeof(EightBpsContext),
decode_init,
NULL,
decode_end,
decode_frame,
CODEC_CAP_DR1,
"8bps",
CODEC_TYPE_VIDEO,
CODEC_ID_8BPS,
sizeof(EightBpsContext),
decode_init,
NULL,
decode_end,
decode_frame,
CODEC_CAP_DR1,
};

View File

@ -58,11 +58,11 @@ typedef struct AC3DecodeState {
a52_state_t* (*a52_init)(uint32_t mm_accel);
sample_t* (*a52_samples)(a52_state_t * state);
int (*a52_syncinfo)(uint8_t * buf, int * flags,
int * sample_rate, int * bit_rate);
int * sample_rate, int * bit_rate);
int (*a52_frame)(a52_state_t * state, uint8_t * buf, int * flags,
sample_t * level, sample_t bias);
sample_t * level, sample_t bias);
void (*a52_dynrng)(a52_state_t * state,
sample_t (* call) (sample_t, void *), void * data);
sample_t (* call) (sample_t, void *), void * data);
int (*a52_block)(a52_state_t * state);
void (*a52_free)(a52_state_t * state);
@ -105,7 +105,7 @@ static int a52_decode_init(AVCodecContext *avctx)
if (!s->a52_init || !s->a52_samples || !s->a52_syncinfo
|| !s->a52_frame || !s->a52_block || !s->a52_free)
{
dlclose(s->handle);
dlclose(s->handle);
return -1;
}
#else
@ -130,22 +130,22 @@ static int a52_decode_init(AVCodecContext *avctx)
static inline int blah (int32_t i)
{
if (i > 0x43c07fff)
return 32767;
return 32767;
else if (i < 0x43bf8000)
return -32768;
return -32768;
return i - 0x43c00000;
}
static inline void float_to_int (float * _f, int16_t * s16, int nchannels)
{
int i, j, c;
int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format
int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format
j = 0;
nchannels *= 256;
for (i = 0; i < 256; i++) {
for (c = 0; c < nchannels; c += 256)
s16[j++] = blah (f[i + c]);
for (c = 0; c < nchannels; c += 256)
s16[j++] = blah (f[i + c]);
}
}
@ -164,7 +164,7 @@ static int a52_decode_frame(AVCodecContext *avctx,
short *out_samples = data;
float level;
static const int ac3_channels[8] = {
2, 1, 2, 3, 3, 4, 4, 5
2, 1, 2, 3, 3, 4, 4, 5
};
buf_ptr = buf;
@ -186,20 +186,20 @@ static int a52_decode_frame(AVCodecContext *avctx,
memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1);
s->inbuf_ptr--;
} else {
s->frame_size = len;
s->frame_size = len;
/* update codec info */
avctx->sample_rate = sample_rate;
s->channels = ac3_channels[s->flags & 7];
if (s->flags & A52_LFE)
s->channels++;
if (avctx->channels == 0)
/* No specific number of channel requested */
avctx->channels = s->channels;
else if (s->channels < avctx->channels) {
av_log(avctx, AV_LOG_ERROR, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len);
avctx->channels = s->channels;
}
avctx->bit_rate = bit_rate;
s->channels++;
if (avctx->channels == 0)
/* No specific number of channel requested */
avctx->channels = s->channels;
else if (s->channels < avctx->channels) {
av_log(avctx, AV_LOG_ERROR, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len);
avctx->channels = s->channels;
}
avctx->bit_rate = bit_rate;
}
}
} else if (len < s->frame_size) {

View File

@ -54,23 +54,23 @@ stream_samples_t samples;
static inline int blah (int32_t i)
{
if (i > 0x43c07fff)
return 32767;
return 32767;
else if (i < 0x43bf8000)
return -32768;
return -32768;
else
return i - 0x43c00000;
return i - 0x43c00000;
}
static inline void float_to_int (float * _f, int16_t * s16, int nchannels)
{
int i, j, c;
int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format
int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format
j = 0;
nchannels *= 256;
for (i = 0; i < 256; i++) {
for (c = 0; c < nchannels; c += 256)
s16[j++] = blah (f[i + c]);
for (c = 0; c < nchannels; c += 256)
s16[j++] = blah (f[i + c]);
}
}
@ -89,7 +89,7 @@ static int ac3_decode_frame(AVCodecContext *avctx,
short *out_samples = data;
float level;
static const int ac3_channels[8] = {
2, 1, 2, 3, 3, 4, 4, 5
2, 1, 2, 3, 3, 4, 4, 5
};
buf_ptr = buf;
@ -111,20 +111,20 @@ static int ac3_decode_frame(AVCodecContext *avctx,
memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1);
s->inbuf_ptr--;
} else {
s->frame_size = len;
s->frame_size = len;
/* update codec info */
avctx->sample_rate = sample_rate;
s->channels = ac3_channels[s->flags & 7];
if (s->flags & AC3_LFE)
s->channels++;
if (avctx->channels == 0)
/* No specific number of channel requested */
avctx->channels = s->channels;
else if (s->channels < avctx->channels) {
s->channels++;
if (avctx->channels == 0)
/* No specific number of channel requested */
avctx->channels = s->channels;
else if (s->channels < avctx->channels) {
av_log( avctx, AV_LOG_INFO, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len);
avctx->channels = s->channels;
}
avctx->bit_rate = bit_rate;
avctx->channels = s->channels;
}
avctx->bit_rate = bit_rate;
}
}
} else if (len < s->frame_size) {

View File

@ -337,8 +337,8 @@ static void fft_init(int ln)
/* do a 2^n point complex fft on 2^ln points. */
static void fft(IComplex *z, int ln)
{
int j, l, np, np2;
int nblocks, nloops;
int j, l, np, np2;
int nblocks, nloops;
register IComplex *p,*q;
int tmp_re, tmp_im;
@ -472,7 +472,7 @@ static void compute_exp_strategy(uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNEL
exp_strategy[i][ch] = EXP_REUSE;
}
if (is_lfe)
return;
return;
/* now select the encoding strategy type : if exponents are often
recoded, we use a coarse encoding */
@ -493,7 +493,7 @@ static void compute_exp_strategy(uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNEL
exp_strategy[i][ch] = EXP_D15;
break;
}
i = j;
i = j;
}
}
@ -553,9 +553,9 @@ static int encode_exp(uint8_t encoded_exp[N/2],
/* Decrease the delta between each groups to within 2
* so that they can be differentially encoded */
for (i=1;i<=nb_groups;i++)
exp1[i] = FFMIN(exp1[i], exp1[i-1] + 2);
exp1[i] = FFMIN(exp1[i], exp1[i-1] + 2);
for (i=nb_groups-1;i>=0;i--)
exp1[i] = FFMIN(exp1[i], exp1[i+1] + 2);
exp1[i] = FFMIN(exp1[i], exp1[i+1] + 2);
/* now we have the exponent values the decoder will see */
encoded_exp[0] = exp1[0];
@ -708,8 +708,8 @@ static int compute_bit_allocation(AC3EncodeContext *s,
if(i==0) frame_bits += 4;
}
frame_bits += 2 * s->nb_channels; /* chexpstr[2] * c */
if (s->lfe)
frame_bits++; /* lfeexpstr */
if (s->lfe)
frame_bits++; /* lfeexpstr */
for(ch=0;ch<s->nb_channels;ch++) {
if (exp_strategy[i][ch] != EXP_REUSE)
frame_bits += 6 + 2; /* chbwcod[6], gainrng[2] */
@ -736,11 +736,11 @@ static int compute_bit_allocation(AC3EncodeContext *s,
csnroffst = s->csnroffst;
while (csnroffst >= 0 &&
bit_alloc(s, bap, encoded_exp, exp_strategy, frame_bits, csnroffst, 0) < 0)
csnroffst -= SNR_INC1;
bit_alloc(s, bap, encoded_exp, exp_strategy, frame_bits, csnroffst, 0) < 0)
csnroffst -= SNR_INC1;
if (csnroffst < 0) {
av_log(NULL, AV_LOG_ERROR, "Yack, Error !!!\n");
return -1;
av_log(NULL, AV_LOG_ERROR, "Yack, Error !!!\n");
return -1;
}
while ((csnroffst + SNR_INC1) <= 63 &&
bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits,
@ -815,19 +815,19 @@ static int AC3_encode_init(AVCodecContext *avctx)
int i, j, ch;
float alpha;
static const uint8_t acmod_defs[6] = {
0x01, /* C */
0x02, /* L R */
0x03, /* L C R */
0x06, /* L R SL SR */
0x07, /* L C R SL SR */
0x07, /* L C R SL SR (+LFE) */
0x01, /* C */
0x02, /* L R */
0x03, /* L C R */
0x06, /* L R SL SR */
0x07, /* L C R SL SR */
0x07, /* L C R SL SR (+LFE) */
};
avctx->frame_size = AC3_FRAME_SIZE;
/* number of channels */
if (channels < 1 || channels > 6)
return -1;
return -1;
s->acmod = acmod_defs[channels - 1];
s->lfe = (channels == 6) ? 1 : 0;
s->nb_all_channels = channels;
@ -871,7 +871,7 @@ static int AC3_encode_init(AVCodecContext *avctx)
s->nb_coefs[ch] = ((s->chbwcod[ch] + 12) * 3) + 37;
}
if (s->lfe) {
s->nb_coefs[s->lfe_channel] = 7; /* fixed */
s->nb_coefs[s->lfe_channel] = 7; /* fixed */
}
/* initial snr offset */
s->csnroffst = 40;
@ -907,9 +907,9 @@ static void output_frame_header(AC3EncodeContext *s, unsigned char *frame)
put_bits(&s->pb, 3, s->bsmod);
put_bits(&s->pb, 3, s->acmod);
if ((s->acmod & 0x01) && s->acmod != 0x01)
put_bits(&s->pb, 2, 1); /* XXX -4.5 dB */
put_bits(&s->pb, 2, 1); /* XXX -4.5 dB */
if (s->acmod & 0x04)
put_bits(&s->pb, 2, 1); /* XXX -6 dB */
put_bits(&s->pb, 2, 1); /* XXX -6 dB */
if (s->acmod == 0x02)
put_bits(&s->pb, 2, 0); /* surround not indicated */
put_bits(&s->pb, 1, s->lfe); /* LFE */
@ -995,20 +995,20 @@ static void output_audio_block(AC3EncodeContext *s,
if (s->acmod == 2)
{
if(block_num==0)
{
/* first block must define rematrixing (rematstr) */
put_bits(&s->pb, 1, 1);
if(block_num==0)
{
/* first block must define rematrixing (rematstr) */
put_bits(&s->pb, 1, 1);
/* dummy rematrixing rematflg(1:4)=0 */
for (rbnd=0;rbnd<4;rbnd++)
put_bits(&s->pb, 1, 0);
}
else
{
/* no matrixing (but should be used in the future) */
put_bits(&s->pb, 1, 0);
}
/* dummy rematrixing rematflg(1:4)=0 */
for (rbnd=0;rbnd<4;rbnd++)
put_bits(&s->pb, 1, 0);
}
else
{
/* no matrixing (but should be used in the future) */
put_bits(&s->pb, 1, 0);
}
}
#if defined(DEBUG)
@ -1023,7 +1023,7 @@ static void output_audio_block(AC3EncodeContext *s,
}
if (s->lfe) {
put_bits(&s->pb, 1, exp_strategy[s->lfe_channel]);
put_bits(&s->pb, 1, exp_strategy[s->lfe_channel]);
}
for(ch=0;ch<s->nb_channels;ch++) {
@ -1047,7 +1047,7 @@ static void output_audio_block(AC3EncodeContext *s,
group_size = 4;
break;
}
nb_groups = (s->nb_coefs[ch] + (group_size * 3) - 4) / (3 * group_size);
nb_groups = (s->nb_coefs[ch] + (group_size * 3) - 4) / (3 * group_size);
p = encoded_exp[ch];
/* first exponent */
@ -1075,8 +1075,8 @@ static void output_audio_block(AC3EncodeContext *s,
put_bits(&s->pb, 7, ((delta0 * 5 + delta1) * 5) + delta2);
}
if (ch != s->lfe_channel)
put_bits(&s->pb, 2, 0); /* no gain range info */
if (ch != s->lfe_channel)
put_bits(&s->pb, 2, 0); /* no gain range info */
}
/* bit allocation info */

View File

@ -300,7 +300,7 @@ static inline unsigned char adpcm_yamaha_compress_sample(ADPCMChannelStatus *c,
}
static int adpcm_encode_frame(AVCodecContext *avctx,
unsigned char *frame, int buf_size, void *data)
unsigned char *frame, int buf_size, void *data)
{
int n, i, st;
short *samples;
@ -431,8 +431,8 @@ static int adpcm_decode_init(AVCodecContext * avctx)
switch(avctx->codec->id) {
case CODEC_ID_ADPCM_CT:
c->status[0].step = c->status[1].step = 511;
break;
c->status[0].step = c->status[1].step = 511;
break;
default:
break;
}
@ -498,16 +498,16 @@ static inline short adpcm_ct_expand_nibble(ADPCMChannelStatus *c, char nibble)
predictor = c->predictor;
/* predictor update is not so trivial: predictor is multiplied on 254/256 before updating */
if(sign)
predictor = ((predictor * 254) >> 8) - diff;
predictor = ((predictor * 254) >> 8) - diff;
else
predictor = ((predictor * 254) >> 8) + diff;
predictor = ((predictor * 254) >> 8) + diff;
/* calculate new step and clamp it to range 511..32767 */
new_step = (ct_adpcm_table[nibble & 7] * c->step) >> 8;
c->step = new_step;
if(c->step < 511)
c->step = 511;
c->step = 511;
if(c->step > 32767)
c->step = 32767;
c->step = 32767;
CLAMP_TO_SHORT(predictor);
c->predictor = predictor;
@ -612,8 +612,8 @@ static void xa_decode(short *out, const unsigned char *in,
}
static int adpcm_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
uint8_t *buf, int buf_size)
void *data, int *data_size,
uint8_t *buf, int buf_size)
{
ADPCMContext *c = avctx->priv_data;
ADPCMChannelStatus *cs;
@ -701,7 +701,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
cs->predictor -= 0x10000;
CLAMP_TO_SHORT(cs->predictor);
// XXX: is this correct ??: *samples++ = cs->predictor;
// XXX: is this correct ??: *samples++ = cs->predictor;
cs->step_index = *src++;
if (cs->step_index < 0) cs->step_index = 0;
@ -710,19 +710,19 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
}
for(m=4; src < (buf + buf_size);) {
*samples++ = adpcm_ima_expand_nibble(&c->status[0], src[0] & 0x0F, 3);
*samples++ = adpcm_ima_expand_nibble(&c->status[0], src[0] & 0x0F, 3);
if (st)
*samples++ = adpcm_ima_expand_nibble(&c->status[1], src[4] & 0x0F, 3);
*samples++ = adpcm_ima_expand_nibble(&c->status[0], (src[0] >> 4) & 0x0F, 3);
if (st) {
if (st) {
*samples++ = adpcm_ima_expand_nibble(&c->status[1], (src[4] >> 4) & 0x0F, 3);
if (!--m) {
m=4;
src+=4;
}
}
src++;
}
if (!--m) {
m=4;
src+=4;
}
}
src++;
}
break;
case CODEC_ID_ADPCM_4XM:
cs = &(c->status[0]);
@ -739,13 +739,13 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
m= (buf_size - (src - buf))>>st;
for(i=0; i<m; i++) {
*samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] & 0x0F, 4);
*samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] & 0x0F, 4);
if (st)
*samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] & 0x0F, 4);
*samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] >> 4, 4);
if (st)
if (st)
*samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] >> 4, 4);
}
}
src += m<<st;
@ -958,7 +958,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
}
break;
case CODEC_ID_ADPCM_CT:
while (src < buf + buf_size) {
while (src < buf + buf_size) {
if (st) {
*samples++ = adpcm_ct_expand_nibble(&c->status[0],
(src[0] >> 4) & 0x0F);
@ -970,78 +970,78 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
*samples++ = adpcm_ct_expand_nibble(&c->status[0],
src[0] & 0x0F);
}
src++;
src++;
}
break;
case CODEC_ID_ADPCM_SWF:
{
GetBitContext gb;
const int *table;
int k0, signmask;
int size = buf_size*8;
GetBitContext gb;
const int *table;
int k0, signmask;
int size = buf_size*8;
init_get_bits(&gb, buf, size);
init_get_bits(&gb, buf, size);
// first frame, read bits & inital values
if (!c->nb_bits)
{
c->nb_bits = get_bits(&gb, 2)+2;
// av_log(NULL,AV_LOG_INFO,"nb_bits: %d\n", c->nb_bits);
}
// first frame, read bits & inital values
if (!c->nb_bits)
{
c->nb_bits = get_bits(&gb, 2)+2;
// av_log(NULL,AV_LOG_INFO,"nb_bits: %d\n", c->nb_bits);
}
table = swf_index_tables[c->nb_bits-2];
k0 = 1 << (c->nb_bits-2);
signmask = 1 << (c->nb_bits-1);
table = swf_index_tables[c->nb_bits-2];
k0 = 1 << (c->nb_bits-2);
signmask = 1 << (c->nb_bits-1);
while (get_bits_count(&gb) <= size)
{
int i;
while (get_bits_count(&gb) <= size)
{
int i;
c->nb_samples++;
// wrap around at every 4096 samples...
if ((c->nb_samples & 0xfff) == 1)
{
for (i = 0; i <= st; i++)
{
*samples++ = c->status[i].predictor = get_sbits(&gb, 16);
c->status[i].step_index = get_bits(&gb, 6);
}
}
c->nb_samples++;
// wrap around at every 4096 samples...
if ((c->nb_samples & 0xfff) == 1)
{
for (i = 0; i <= st; i++)
{
*samples++ = c->status[i].predictor = get_sbits(&gb, 16);
c->status[i].step_index = get_bits(&gb, 6);
}
}
// similar to IMA adpcm
for (i = 0; i <= st; i++)
{
int delta = get_bits(&gb, c->nb_bits);
int step = step_table[c->status[i].step_index];
long vpdiff = 0; // vpdiff = (delta+0.5)*step/4
int k = k0;
// similar to IMA adpcm
for (i = 0; i <= st; i++)
{
int delta = get_bits(&gb, c->nb_bits);
int step = step_table[c->status[i].step_index];
long vpdiff = 0; // vpdiff = (delta+0.5)*step/4
int k = k0;
do {
if (delta & k)
vpdiff += step;
step >>= 1;
k >>= 1;
} while(k);
vpdiff += step;
do {
if (delta & k)
vpdiff += step;
step >>= 1;
k >>= 1;
} while(k);
vpdiff += step;
if (delta & signmask)
c->status[i].predictor -= vpdiff;
else
c->status[i].predictor += vpdiff;
if (delta & signmask)
c->status[i].predictor -= vpdiff;
else
c->status[i].predictor += vpdiff;
c->status[i].step_index += table[delta & (~signmask)];
c->status[i].step_index += table[delta & (~signmask)];
c->status[i].step_index = clip(c->status[i].step_index, 0, 88);
c->status[i].predictor = clip(c->status[i].predictor, -32768, 32767);
c->status[i].step_index = clip(c->status[i].step_index, 0, 88);
c->status[i].predictor = clip(c->status[i].predictor, -32768, 32767);
*samples++ = c->status[i].predictor;
}
}
*samples++ = c->status[i].predictor;
}
}
// src += get_bits_count(&gb)*8;
src += size;
// src += get_bits_count(&gb)*8;
src += size;
break;
break;
}
case CODEC_ID_ADPCM_YAMAHA:
while (src < buf + buf_size) {

View File

@ -35,7 +35,7 @@ void avcodec_register_all(void)
static int inited = 0;
if (inited != 0)
return;
return;
inited = 1;
/* encoders */

View File

@ -84,24 +84,24 @@ static inline uint64_t WORD_VEC(uint64_t x)
} *) (p))->__l) = l; \
} while (0)
struct unaligned_long { uint64_t l; } __attribute__((packed));
#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
#define uldq(a) (((const struct unaligned_long *) (a))->l)
#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
#define uldq(a) (((const struct unaligned_long *) (a))->l)
#if GNUC_PREREQ(3,3)
#define prefetch(p) __builtin_prefetch((p), 0, 1)
#define prefetch_en(p) __builtin_prefetch((p), 0, 0)
#define prefetch_m(p) __builtin_prefetch((p), 1, 1)
#define prefetch_men(p) __builtin_prefetch((p), 1, 0)
#define cmpbge __builtin_alpha_cmpbge
#define cmpbge __builtin_alpha_cmpbge
/* Avoid warnings. */
#define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b))
#define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b))
#define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b))
#define zap __builtin_alpha_zap
#define zapnot __builtin_alpha_zapnot
#define amask __builtin_alpha_amask
#define implver __builtin_alpha_implver
#define rpcc __builtin_alpha_rpcc
#define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b))
#define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b))
#define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b))
#define zap __builtin_alpha_zap
#define zapnot __builtin_alpha_zapnot
#define amask __builtin_alpha_amask
#define implver __builtin_alpha_implver
#define rpcc __builtin_alpha_rpcc
#else
#define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory")
#define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory")
@ -113,26 +113,26 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
#define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
#define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
#define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; })
#define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; })
#define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; })
#define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; })
#define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; })
#define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; })
#endif
#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory")
#if GNUC_PREREQ(3,3) && defined(__alpha_max__)
#define minub8 __builtin_alpha_minub8
#define minsb8 __builtin_alpha_minsb8
#define minuw4 __builtin_alpha_minuw4
#define minsw4 __builtin_alpha_minsw4
#define maxub8 __builtin_alpha_maxub8
#define maxsb8 __builtin_alpha_maxsb8
#define maxuw4 __builtin_alpha_maxuw4
#define maxsw4 __builtin_alpha_maxsw4
#define perr __builtin_alpha_perr
#define pklb __builtin_alpha_pklb
#define pkwb __builtin_alpha_pkwb
#define unpkbl __builtin_alpha_unpkbl
#define unpkbw __builtin_alpha_unpkbw
#define minub8 __builtin_alpha_minub8
#define minsb8 __builtin_alpha_minsb8
#define minuw4 __builtin_alpha_minuw4
#define minsw4 __builtin_alpha_minsw4
#define maxub8 __builtin_alpha_maxub8
#define maxsb8 __builtin_alpha_maxsb8
#define maxuw4 __builtin_alpha_maxuw4
#define maxsw4 __builtin_alpha_maxsw4
#define perr __builtin_alpha_perr
#define pklb __builtin_alpha_pklb
#define pkwb __builtin_alpha_pkwb
#define unpkbl __builtin_alpha_unpkbl
#define unpkbw __builtin_alpha_unpkbw
#else
#define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
@ -143,13 +143,13 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
#define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define perr(a, b) ({ uint64_t __r; asm (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
#define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#endif
#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */
#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */
#include <c_asm.h>
#define ldq(p) (*(const uint64_t *) (p))
@ -157,7 +157,7 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
#define stl(l, p) do { *(int32_t *) (p) = (l); } while (0)
#define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a)
#define uldq(a) (*(const __unaligned uint64_t *) (a))
#define uldq(a) (*(const __unaligned uint64_t *) (a))
#define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b)
#define extql(a, b) asm ("extql %a0,%a1,%v0", a, b)
#define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b)
@ -166,7 +166,7 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
#define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b)
#define amask(a) asm ("amask %a0,%v0", a)
#define implver() asm ("implver %v0")
#define rpcc() asm ("rpcc %v0")
#define rpcc() asm ("rpcc %v0")
#define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b)
#define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b)
#define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b)

View File

@ -71,7 +71,7 @@ $unaligned:
addq a1, a2, a1
nop
ldq_u t4, 0(a1)
ldq_u t4, 0(a1)
ldq_u t5, 8(a1)
addq a1, a2, a1
nop
@ -120,20 +120,20 @@ $aligned:
addq a1, a2, a1
ldq t3, 0(a1)
addq a0, a2, t4
addq a1, a2, a1
addq t4, a2, t5
subq a3, 4, a3
addq a0, a2, t4
addq a1, a2, a1
addq t4, a2, t5
subq a3, 4, a3
stq t0, 0(a0)
addq t5, a2, t6
stq t1, 0(t4)
addq t6, a2, a0
stq t0, 0(a0)
addq t5, a2, t6
stq t1, 0(t4)
addq t6, a2, a0
stq t2, 0(t5)
stq t3, 0(t6)
stq t2, 0(t5)
stq t3, 0(t6)
bne a3, $aligned
bne a3, $aligned
ret
.end put_pixels_axp_asm

View File

@ -116,7 +116,7 @@ int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
return result;
}
#if 0 /* now done in assembly */
#if 0 /* now done in assembly */
int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
{
int result = 0;

View File

@ -285,7 +285,7 @@ void simple_idct_axp(DCTELEM *block)
stq(v, block + 1 * 4);
stq(w, block + 2 * 4);
stq(w, block + 3 * 4);
block += 4 * 4;
block += 4 * 4;
}
} else {
for (i = 0; i < 8; i++)

View File

@ -301,7 +301,7 @@ static int amr_nb_decode_frame(AVCodecContext * avctx,
static int amr_nb_encode_frame(AVCodecContext *avctx,
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
{
short serial_data[250] = {0};
@ -440,7 +440,7 @@ static int amr_nb_decode_frame(AVCodecContext * avctx,
}
static int amr_nb_encode_frame(AVCodecContext *avctx,
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
{
AMRContext *s = (AMRContext*)avctx->priv_data;
int written;
@ -584,7 +584,7 @@ static int amr_wb_encode_close(AVCodecContext * avctx)
}
static int amr_wb_encode_frame(AVCodecContext *avctx,
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
{
AMRWBContext *s = (AMRWBContext*) avctx->priv_data;
int size = E_IF_encode(s->state, s->mode, data, frame, s->allow_dtx);

View File

@ -205,13 +205,13 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
#endif
c->idct_put= j_rev_dct_ARM_put;
c->idct_add= j_rev_dct_ARM_add;
c->idct = j_rev_dct_ARM;
c->idct = j_rev_dct_ARM;
c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
} else if (idct_algo==FF_IDCT_SIMPLEARM){
c->idct_put= simple_idct_ARM_put;
c->idct_add= simple_idct_ARM_add;
c->idct = simple_idct_ARM;
c->idct_permutation_type= FF_NO_IDCT_PERM;
c->idct_put= simple_idct_ARM_put;
c->idct_add= simple_idct_ARM_add;
c->idct = simple_idct_ARM;
c->idct_permutation_type= FF_NO_IDCT_PERM;
#ifdef HAVE_IPP
} else if (idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_IPP){
#else

View File

@ -138,10 +138,10 @@ void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
mm_flags = mm_support();
if (avctx->dsp_mask) {
if (avctx->dsp_mask & FF_MM_FORCE)
mm_flags |= (avctx->dsp_mask & 0xffff);
else
mm_flags &= ~(avctx->dsp_mask & 0xffff);
if (avctx->dsp_mask & FF_MM_FORCE)
mm_flags |= (avctx->dsp_mask & 0xffff);
else
mm_flags &= ~(avctx->dsp_mask & 0xffff);
}
if (!(mm_flags & MM_IWMMXT)) return;

View File

@ -1,6 +1,6 @@
/*
C-like prototype :
void j_rev_dct_ARM(DCTBLOCK data)
void j_rev_dct_ARM(DCTBLOCK data)
With DCTBLOCK being a pointer to an array of 64 'signed shorts'
@ -51,336 +51,336 @@
#define FIX_M_1_961570560_ID 40
#define FIX_M_2_562915447_ID 44
#define FIX_0xFFFF_ID 48
.text
.align
.text
.align
.global j_rev_dct_ARM
.global j_rev_dct_ARM
j_rev_dct_ARM:
stmdb sp!, { r4 - r12, lr } @ all callee saved regs
stmdb sp!, { r4 - r12, lr } @ all callee saved regs
sub sp, sp, #4 @ reserve some space on the stack
str r0, [ sp ] @ save the DCT pointer to the stack
sub sp, sp, #4 @ reserve some space on the stack
str r0, [ sp ] @ save the DCT pointer to the stack
mov lr, r0 @ lr = pointer to the current row
mov r12, #8 @ r12 = row-counter
add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
mov lr, r0 @ lr = pointer to the current row
mov r12, #8 @ r12 = row-counter
add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
row_loop:
ldrsh r0, [lr, # 0] @ r0 = 'd0'
ldrsh r1, [lr, # 8] @ r1 = 'd1'
ldrsh r0, [lr, # 0] @ r0 = 'd0'
ldrsh r1, [lr, # 8] @ r1 = 'd1'
@ Optimization for row that have all items except the first set to 0
@ (this works as the DCTELEMS are always 4-byte aligned)
ldr r5, [lr, # 0]
ldr r2, [lr, # 4]
ldr r3, [lr, # 8]
ldr r4, [lr, #12]
orr r3, r3, r4
orr r3, r3, r2
orrs r5, r3, r5
beq end_of_row_loop @ nothing to be done as ALL of them are '0'
orrs r2, r3, r1
beq empty_row
@ Optimization for row that have all items except the first set to 0
@ (this works as the DCTELEMS are always 4-byte aligned)
ldr r5, [lr, # 0]
ldr r2, [lr, # 4]
ldr r3, [lr, # 8]
ldr r4, [lr, #12]
orr r3, r3, r4
orr r3, r3, r2
orrs r5, r3, r5
beq end_of_row_loop @ nothing to be done as ALL of them are '0'
orrs r2, r3, r1
beq empty_row
ldrsh r2, [lr, # 2] @ r2 = 'd2'
ldrsh r4, [lr, # 4] @ r4 = 'd4'
ldrsh r6, [lr, # 6] @ r6 = 'd6'
ldrsh r2, [lr, # 2] @ r2 = 'd2'
ldrsh r4, [lr, # 4] @ r4 = 'd4'
ldrsh r6, [lr, # 6] @ r6 = 'd6'
ldr r3, [r11, #FIX_0_541196100_ID]
add r7, r2, r6
ldr r5, [r11, #FIX_M_1_847759065_ID]
mul r7, r3, r7 @ r7 = z1
ldr r3, [r11, #FIX_0_765366865_ID]
mla r6, r5, r6, r7 @ r6 = tmp2
add r5, r0, r4 @ r5 = tmp0
mla r2, r3, r2, r7 @ r2 = tmp3
sub r3, r0, r4 @ r3 = tmp1
ldr r3, [r11, #FIX_0_541196100_ID]
add r7, r2, r6
ldr r5, [r11, #FIX_M_1_847759065_ID]
mul r7, r3, r7 @ r7 = z1
ldr r3, [r11, #FIX_0_765366865_ID]
mla r6, r5, r6, r7 @ r6 = tmp2
add r5, r0, r4 @ r5 = tmp0
mla r2, r3, r2, r7 @ r2 = tmp3
sub r3, r0, r4 @ r3 = tmp1
add r0, r2, r5, lsl #13 @ r0 = tmp10
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
add r4, r6, r3, lsl #13 @ r4 = tmp11
rsb r3, r6, r3, lsl #13 @ r3 = tmp12
add r0, r2, r5, lsl #13 @ r0 = tmp10
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
add r4, r6, r3, lsl #13 @ r4 = tmp11
rsb r3, r6, r3, lsl #13 @ r3 = tmp12
stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
ldrsh r3, [lr, #10] @ r3 = 'd3'
ldrsh r5, [lr, #12] @ r5 = 'd5'
ldrsh r7, [lr, #14] @ r7 = 'd7'
ldrsh r3, [lr, #10] @ r3 = 'd3'
ldrsh r5, [lr, #12] @ r5 = 'd5'
ldrsh r7, [lr, #14] @ r7 = 'd7'
add r0, r3, r5 @ r0 = 'z2'
add r2, r1, r7 @ r2 = 'z1'
add r4, r3, r7 @ r4 = 'z3'
add r6, r1, r5 @ r6 = 'z4'
ldr r9, [r11, #FIX_1_175875602_ID]
add r8, r4, r6 @ r8 = z3 + z4
ldr r10, [r11, #FIX_M_0_899976223_ID]
mul r8, r9, r8 @ r8 = 'z5'
ldr r9, [r11, #FIX_M_2_562915447_ID]
mul r2, r10, r2 @ r2 = 'z1'
ldr r10, [r11, #FIX_M_1_961570560_ID]
mul r0, r9, r0 @ r0 = 'z2'
ldr r9, [r11, #FIX_M_0_390180644_ID]
mla r4, r10, r4, r8 @ r4 = 'z3'
ldr r10, [r11, #FIX_0_298631336_ID]
mla r6, r9, r6, r8 @ r6 = 'z4'
ldr r9, [r11, #FIX_2_053119869_ID]
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
ldr r10, [r11, #FIX_3_072711026_ID]
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
ldr r9, [r11, #FIX_1_501321110_ID]
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
add r7, r7, r4 @ r7 = tmp0
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
add r5, r5, r6 @ r5 = tmp1
add r3, r3, r4 @ r3 = tmp2
add r1, r1, r6 @ r1 = tmp3
add r0, r3, r5 @ r0 = 'z2'
add r2, r1, r7 @ r2 = 'z1'
add r4, r3, r7 @ r4 = 'z3'
add r6, r1, r5 @ r6 = 'z4'
ldr r9, [r11, #FIX_1_175875602_ID]
add r8, r4, r6 @ r8 = z3 + z4
ldr r10, [r11, #FIX_M_0_899976223_ID]
mul r8, r9, r8 @ r8 = 'z5'
ldr r9, [r11, #FIX_M_2_562915447_ID]
mul r2, r10, r2 @ r2 = 'z1'
ldr r10, [r11, #FIX_M_1_961570560_ID]
mul r0, r9, r0 @ r0 = 'z2'
ldr r9, [r11, #FIX_M_0_390180644_ID]
mla r4, r10, r4, r8 @ r4 = 'z3'
ldr r10, [r11, #FIX_0_298631336_ID]
mla r6, r9, r6, r8 @ r6 = 'z4'
ldr r9, [r11, #FIX_2_053119869_ID]
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
ldr r10, [r11, #FIX_3_072711026_ID]
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
ldr r9, [r11, #FIX_1_501321110_ID]
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
add r7, r7, r4 @ r7 = tmp0
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
add r5, r5, r6 @ r5 = tmp1
add r3, r3, r4 @ r3 = tmp2
add r1, r1, r6 @ r1 = tmp3
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
add r8, r0, r1
add r8, r8, #(1<<10)
mov r8, r8, asr #11
strh r8, [lr, # 0]
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
add r8, r0, r1
add r8, r8, #(1<<10)
mov r8, r8, asr #11
strh r8, [lr, # 0]
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
sub r8, r0, r1
add r8, r8, #(1<<10)
mov r8, r8, asr #11
strh r8, [lr, #14]
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
sub r8, r0, r1
add r8, r8, #(1<<10)
mov r8, r8, asr #11
strh r8, [lr, #14]
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
add r8, r6, r3
add r8, r8, #(1<<10)
mov r8, r8, asr #11
strh r8, [lr, # 2]
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
add r8, r6, r3
add r8, r8, #(1<<10)
mov r8, r8, asr #11
strh r8, [lr, # 2]
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
sub r8, r6, r3
add r8, r8, #(1<<10)
mov r8, r8, asr #11
strh r8, [lr, #12]
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
sub r8, r6, r3
add r8, r8, #(1<<10)
mov r8, r8, asr #11
strh r8, [lr, #12]
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
add r8, r4, r5
add r8, r8, #(1<<10)
mov r8, r8, asr #11
strh r8, [lr, # 4]
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
add r8, r4, r5
add r8, r8, #(1<<10)
mov r8, r8, asr #11
strh r8, [lr, # 4]
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
sub r8, r4, r5
add r8, r8, #(1<<10)
mov r8, r8, asr #11
strh r8, [lr, #10]
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
sub r8, r4, r5
add r8, r8, #(1<<10)
mov r8, r8, asr #11
strh r8, [lr, #10]
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
add r8, r2, r7
add r8, r8, #(1<<10)
mov r8, r8, asr #11
strh r8, [lr, # 6]
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
add r8, r2, r7
add r8, r8, #(1<<10)
mov r8, r8, asr #11
strh r8, [lr, # 6]
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
sub r8, r2, r7
add r8, r8, #(1<<10)
mov r8, r8, asr #11
strh r8, [lr, # 8]
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
sub r8, r2, r7
add r8, r8, #(1<<10)
mov r8, r8, asr #11
strh r8, [lr, # 8]
@ End of row loop
add lr, lr, #16
subs r12, r12, #1
bne row_loop
beq start_column_loop
@ End of row loop
add lr, lr, #16
subs r12, r12, #1
bne row_loop
beq start_column_loop
empty_row:
ldr r1, [r11, #FIX_0xFFFF_ID]
mov r0, r0, lsl #2
and r0, r0, r1
add r0, r0, r0, lsl #16
str r0, [lr, # 0]
str r0, [lr, # 4]
str r0, [lr, # 8]
str r0, [lr, #12]
ldr r1, [r11, #FIX_0xFFFF_ID]
mov r0, r0, lsl #2
and r0, r0, r1
add r0, r0, r0, lsl #16
str r0, [lr, # 0]
str r0, [lr, # 4]
str r0, [lr, # 8]
str r0, [lr, #12]
end_of_row_loop:
@ End of loop
add lr, lr, #16
subs r12, r12, #1
bne row_loop
@ End of loop
add lr, lr, #16
subs r12, r12, #1
bne row_loop
start_column_loop:
@ Start of column loop
ldr lr, [ sp ]
mov r12, #8
@ Start of column loop
ldr lr, [ sp ]
mov r12, #8
column_loop:
ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0'
ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2'
ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4'
ldrsh r6, [lr, #(12*8)] @ r6 = 'd6'
ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0'
ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2'
ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4'
ldrsh r6, [lr, #(12*8)] @ r6 = 'd6'
ldr r3, [r11, #FIX_0_541196100_ID]
add r1, r2, r6
ldr r5, [r11, #FIX_M_1_847759065_ID]
mul r1, r3, r1 @ r1 = z1
ldr r3, [r11, #FIX_0_765366865_ID]
mla r6, r5, r6, r1 @ r6 = tmp2
add r5, r0, r4 @ r5 = tmp0
mla r2, r3, r2, r1 @ r2 = tmp3
sub r3, r0, r4 @ r3 = tmp1
ldr r3, [r11, #FIX_0_541196100_ID]
add r1, r2, r6
ldr r5, [r11, #FIX_M_1_847759065_ID]
mul r1, r3, r1 @ r1 = z1
ldr r3, [r11, #FIX_0_765366865_ID]
mla r6, r5, r6, r1 @ r6 = tmp2
add r5, r0, r4 @ r5 = tmp0
mla r2, r3, r2, r1 @ r2 = tmp3
sub r3, r0, r4 @ r3 = tmp1
add r0, r2, r5, lsl #13 @ r0 = tmp10
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
add r4, r6, r3, lsl #13 @ r4 = tmp11
rsb r6, r6, r3, lsl #13 @ r6 = tmp12
add r0, r2, r5, lsl #13 @ r0 = tmp10
rsb r2, r2, r5, lsl #13 @ r2 = tmp13
add r4, r6, r3, lsl #13 @ r4 = tmp11
rsb r6, r6, r3, lsl #13 @ r6 = tmp12
ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1'
ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3'
ldrsh r5, [lr, #(10*8)] @ r5 = 'd5'
ldrsh r7, [lr, #(14*8)] @ r7 = 'd7'
ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1'
ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3'
ldrsh r5, [lr, #(10*8)] @ r5 = 'd5'
ldrsh r7, [lr, #(14*8)] @ r7 = 'd7'
@ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
orr r9, r1, r3
orr r10, r5, r7
orrs r10, r9, r10
beq empty_odd_column
@ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
orr r9, r1, r3
orr r10, r5, r7
orrs r10, r9, r10
beq empty_odd_column
stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
add r0, r3, r5 @ r0 = 'z2'
add r2, r1, r7 @ r2 = 'z1'
add r4, r3, r7 @ r4 = 'z3'
add r6, r1, r5 @ r6 = 'z4'
ldr r9, [r11, #FIX_1_175875602_ID]
add r8, r4, r6
ldr r10, [r11, #FIX_M_0_899976223_ID]
mul r8, r9, r8 @ r8 = 'z5'
ldr r9, [r11, #FIX_M_2_562915447_ID]
mul r2, r10, r2 @ r2 = 'z1'
ldr r10, [r11, #FIX_M_1_961570560_ID]
mul r0, r9, r0 @ r0 = 'z2'
ldr r9, [r11, #FIX_M_0_390180644_ID]
mla r4, r10, r4, r8 @ r4 = 'z3'
ldr r10, [r11, #FIX_0_298631336_ID]
mla r6, r9, r6, r8 @ r6 = 'z4'
ldr r9, [r11, #FIX_2_053119869_ID]
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
ldr r10, [r11, #FIX_3_072711026_ID]
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
ldr r9, [r11, #FIX_1_501321110_ID]
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
add r7, r7, r4 @ r7 = tmp0
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
add r5, r5, r6 @ r5 = tmp1
add r3, r3, r4 @ r3 = tmp2
add r1, r1, r6 @ r1 = tmp3
add r0, r3, r5 @ r0 = 'z2'
add r2, r1, r7 @ r2 = 'z1'
add r4, r3, r7 @ r4 = 'z3'
add r6, r1, r5 @ r6 = 'z4'
ldr r9, [r11, #FIX_1_175875602_ID]
add r8, r4, r6
ldr r10, [r11, #FIX_M_0_899976223_ID]
mul r8, r9, r8 @ r8 = 'z5'
ldr r9, [r11, #FIX_M_2_562915447_ID]
mul r2, r10, r2 @ r2 = 'z1'
ldr r10, [r11, #FIX_M_1_961570560_ID]
mul r0, r9, r0 @ r0 = 'z2'
ldr r9, [r11, #FIX_M_0_390180644_ID]
mla r4, r10, r4, r8 @ r4 = 'z3'
ldr r10, [r11, #FIX_0_298631336_ID]
mla r6, r9, r6, r8 @ r6 = 'z4'
ldr r9, [r11, #FIX_2_053119869_ID]
mla r7, r10, r7, r2 @ r7 = tmp0 + z1
ldr r10, [r11, #FIX_3_072711026_ID]
mla r5, r9, r5, r0 @ r5 = tmp1 + z2
ldr r9, [r11, #FIX_1_501321110_ID]
mla r3, r10, r3, r0 @ r3 = tmp2 + z2
add r7, r7, r4 @ r7 = tmp0
mla r1, r9, r1, r2 @ r1 = tmp3 + z1
add r5, r5, r6 @ r5 = tmp1
add r3, r3, r4 @ r3 = tmp2
add r1, r1, r6 @ r1 = tmp3
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
@ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
add r8, r0, r1
add r8, r8, #(1<<17)
mov r8, r8, asr #18
strh r8, [lr, #( 0*8)]
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
add r8, r0, r1
add r8, r8, #(1<<17)
mov r8, r8, asr #18
strh r8, [lr, #( 0*8)]
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
sub r8, r0, r1
add r8, r8, #(1<<17)
mov r8, r8, asr #18
strh r8, [lr, #(14*8)]
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
sub r8, r0, r1
add r8, r8, #(1<<17)
mov r8, r8, asr #18
strh r8, [lr, #(14*8)]
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
add r8, r4, r3
add r8, r8, #(1<<17)
mov r8, r8, asr #18
strh r8, [lr, #( 2*8)]
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
add r8, r4, r3
add r8, r8, #(1<<17)
mov r8, r8, asr #18
strh r8, [lr, #( 2*8)]
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
sub r8, r4, r3
add r8, r8, #(1<<17)
mov r8, r8, asr #18
strh r8, [lr, #(12*8)]
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
sub r8, r4, r3
add r8, r8, #(1<<17)
mov r8, r8, asr #18
strh r8, [lr, #(12*8)]
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
add r8, r6, r5
add r8, r8, #(1<<17)
mov r8, r8, asr #18
strh r8, [lr, #( 4*8)]
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
add r8, r6, r5
add r8, r8, #(1<<17)
mov r8, r8, asr #18
strh r8, [lr, #( 4*8)]
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
sub r8, r6, r5
add r8, r8, #(1<<17)
mov r8, r8, asr #18
strh r8, [lr, #(10*8)]
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
sub r8, r6, r5
add r8, r8, #(1<<17)
mov r8, r8, asr #18
strh r8, [lr, #(10*8)]
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
add r8, r2, r7
add r8, r8, #(1<<17)
mov r8, r8, asr #18
strh r8, [lr, #( 6*8)]
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
add r8, r2, r7
add r8, r8, #(1<<17)
mov r8, r8, asr #18
strh r8, [lr, #( 6*8)]
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
sub r8, r2, r7
add r8, r8, #(1<<17)
mov r8, r8, asr #18
strh r8, [lr, #( 8*8)]
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
sub r8, r2, r7
add r8, r8, #(1<<17)
mov r8, r8, asr #18
strh r8, [lr, #( 8*8)]
@ End of row loop
add lr, lr, #2
subs r12, r12, #1
bne column_loop
beq the_end
@ End of row loop
add lr, lr, #2
subs r12, r12, #1
bne column_loop
beq the_end
empty_odd_column:
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
add r0, r0, #(1<<17)
mov r0, r0, asr #18
strh r0, [lr, #( 0*8)]
strh r0, [lr, #(14*8)]
@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
add r0, r0, #(1<<17)
mov r0, r0, asr #18
strh r0, [lr, #( 0*8)]
strh r0, [lr, #(14*8)]
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
add r4, r4, #(1<<17)
mov r4, r4, asr #18
strh r4, [lr, #( 2*8)]
strh r4, [lr, #(12*8)]
@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
add r4, r4, #(1<<17)
mov r4, r4, asr #18
strh r4, [lr, #( 2*8)]
strh r4, [lr, #(12*8)]
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
add r6, r6, #(1<<17)
mov r6, r6, asr #18
strh r6, [lr, #( 4*8)]
strh r6, [lr, #(10*8)]
@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
add r6, r6, #(1<<17)
mov r6, r6, asr #18
strh r6, [lr, #( 4*8)]
strh r6, [lr, #(10*8)]
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
add r2, r2, #(1<<17)
mov r2, r2, asr #18
strh r2, [lr, #( 6*8)]
strh r2, [lr, #( 8*8)]
@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
add r2, r2, #(1<<17)
mov r2, r2, asr #18
strh r2, [lr, #( 6*8)]
strh r2, [lr, #( 8*8)]
@ End of row loop
add lr, lr, #2
subs r12, r12, #1
bne column_loop
@ End of row loop
add lr, lr, #2
subs r12, r12, #1
bne column_loop
the_end:
@ The end....
add sp, sp, #4
ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
@ The end....
add sp, sp, #4
ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
const_array:
.align
.word FIX_0_298631336
.word FIX_0_541196100
.word FIX_0_765366865
.word FIX_1_175875602
.word FIX_1_501321110
.word FIX_2_053119869
.word FIX_3_072711026
.word FIX_M_0_390180644
.word FIX_M_0_899976223
.word FIX_M_1_847759065
.word FIX_M_1_961570560
.word FIX_M_2_562915447
.word FIX_0xFFFF
.align
.word FIX_0_298631336
.word FIX_0_541196100
.word FIX_0_765366865
.word FIX_1_175875602
.word FIX_1_501321110
.word FIX_2_053119869
.word FIX_3_072711026
.word FIX_M_0_390180644
.word FIX_M_0_899976223
.word FIX_M_1_847759065
.word FIX_M_1_961570560
.word FIX_M_2_562915447
.word FIX_0xFFFF

View File

@ -51,9 +51,9 @@
#define COL_SHIFTED_1 524288 /* 1<< (COL_SHIFT-1) */
.text
.align
.global simple_idct_ARM
.text
.align
.global simple_idct_ARM
simple_idct_ARM:
@@ void simple_idct_ARM(int16_t *block)
@ -120,8 +120,8 @@ __b_evaluation:
ldr r11, [r12, #offW7] @ R11=W7
mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
teq r2, #0 @ if null avoid muls
mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
teq r2, #0 @ if null avoid muls
mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
rsbne r2, r2, #0 @ R2=-ROWr16[3]
mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
@ -147,7 +147,7 @@ __b_evaluation:
@@ MAC16(b3, -W1, row[7]);
@@ MAC16(b1, -W5, row[7]);
mov r3, r3, asr #16 @ R3=ROWr16[5]
teq r3, #0 @ if null avoid muls
teq r3, #0 @ if null avoid muls
mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5]=b0
mov r4, r4, asr #16 @ R4=ROWr16[7]
mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5]=b2
@ -155,7 +155,7 @@ __b_evaluation:
rsbne r3, r3, #0 @ R3=-ROWr16[5]
mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5]=b1
@@ R3 is free now
teq r4, #0 @ if null avoid muls
teq r4, #0 @ if null avoid muls
mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7]=b0
mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7]=b2
rsbne r4, r4, #0 @ R4=-ROWr16[7]
@ -187,7 +187,7 @@ __a_evaluation:
teq r2, #0
beq __end_bef_a_evaluation
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
mul r11, r8, r4 @ R11=W2*ROWr16[2]
sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3)
add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0)
@ -203,7 +203,7 @@ __a_evaluation:
@@ a2 -= W4*row[4]
@@ a3 += W4*row[4]
ldrsh r11, [r14, #8] @ R11=ROWr16[4]
teq r11, #0 @ if null avoid muls
teq r11, #0 @ if null avoid muls
mulne r11, r9, r11 @ R11=W4*ROWr16[4]
@@ R9 is free now
ldrsh r9, [r14, #12] @ R9=ROWr16[6]
@ -212,7 +212,7 @@ __a_evaluation:
subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2)
addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3)
@@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
teq r9, #0 @ if null avoid muls
teq r9, #0 @ if null avoid muls
mulne r11, r10, r9 @ R11=W6*ROWr16[6]
addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0)
mulne r10, r8, r9 @ R10=W2*ROWr16[6]
@ -294,165 +294,165 @@ __end_row_loop:
@@ at this point, R0=block, R1-R11 (free)
@@ R12=__const_ptr_, R14=&block[n]
add r14, r0, #14 @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block.
@@ at this point, R0=block, R1-R11 (free)
@@ R12=__const_ptr_, R14=&block[n]
add r14, r0, #14 @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block.
__col_loop:
__b_evaluation2:
@@ at this point, R0=block (temp), R1-R11 (free)
@@ R12=__const_ptr_, R14=&block[n]
@@ proceed with b0-b3 first, followed by a0-a3
@@ MUL16(b0, W1, col[8x1]);
@@ MUL16(b1, W3, col[8x1]);
@@ MUL16(b2, W5, col[8x1]);
@@ MUL16(b3, W7, col[8x1]);
@@ MAC16(b0, W3, col[8x3]);
@@ MAC16(b1, -W7, col[8x3]);
@@ MAC16(b2, -W1, col[8x3]);
@@ MAC16(b3, -W5, col[8x3]);
ldr r8, [r12, #offW1] @ R8=W1
ldrsh r7, [r14, #16]
mul r0, r8, r7 @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
ldr r9, [r12, #offW3] @ R9=W3
ldr r10, [r12, #offW5] @ R10=W5
mul r1, r9, r7 @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
ldr r11, [r12, #offW7] @ R11=W7
mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
ldrsh r2, [r14, #48]
mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
teq r2, #0 @ if 0, then avoid muls
mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
rsbne r2, r2, #0 @ R2=-ROWr16[3]
mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
mlane r7, r10, r2, r7 @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
@@ at this point, R0=block (temp), R1-R11 (free)
@@ R12=__const_ptr_, R14=&block[n]
@@ proceed with b0-b3 first, followed by a0-a3
@@ MUL16(b0, W1, col[8x1]);
@@ MUL16(b1, W3, col[8x1]);
@@ MUL16(b2, W5, col[8x1]);
@@ MUL16(b3, W7, col[8x1]);
@@ MAC16(b0, W3, col[8x3]);
@@ MAC16(b1, -W7, col[8x3]);
@@ MAC16(b2, -W1, col[8x3]);
@@ MAC16(b3, -W5, col[8x3]);
ldr r8, [r12, #offW1] @ R8=W1
ldrsh r7, [r14, #16]
mul r0, r8, r7 @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
ldr r9, [r12, #offW3] @ R9=W3
ldr r10, [r12, #offW5] @ R10=W5
mul r1, r9, r7 @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
ldr r11, [r12, #offW7] @ R11=W7
mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
ldrsh r2, [r14, #48]
mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
teq r2, #0 @ if 0, then avoid muls
mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
rsbne r2, r2, #0 @ R2=-ROWr16[3]
mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
mlane r7, r10, r2, r7 @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
@@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free),
@@ R5=b2, R6 (free), R7=b3, R8=W1, R9=W3, R10=W5, R11=W7,
@@ R12=__const_ptr_, R14=&block[n]
@@ MAC16(b0, W5, col[5x8]);
@@ MAC16(b2, W7, col[5x8]);
@@ MAC16(b3, W3, col[5x8]);
@@ MAC16(b1, -W1, col[5x8]);
@@ MAC16(b0, W7, col[7x8]);
@@ MAC16(b2, W3, col[7x8]);
@@ MAC16(b3, -W1, col[7x8]);
@@ MAC16(b1, -W5, col[7x8]);
ldrsh r3, [r14, #80] @ R3=COLr16[5x8]
teq r3, #0 @ if 0 then avoid muls
mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5x8]=b0
mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5x8]=b2
mlane r7, r9, r3, r7 @ R7+=W3*ROWr16[5x8]=b3
rsbne r3, r3, #0 @ R3=-ROWr16[5x8]
ldrsh r4, [r14, #112] @ R4=COLr16[7x8]
mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5x8]=b1
@@ R3 is free now
teq r4, #0 @ if 0 then avoid muls
mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7x8]=b0
mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7x8]=b2
rsbne r4, r4, #0 @ R4=-ROWr16[7x8]
mlane r7, r8, r4, r7 @ R7-=W1*ROWr16[7x8]=b3
mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7x8]=b1
@@ R4 is free now
@@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free),
@@ R5=b2, R6 (free), R7=b3, R8=W1, R9=W3, R10=W5, R11=W7,
@@ R12=__const_ptr_, R14=&block[n]
@@ MAC16(b0, W5, col[5x8]);
@@ MAC16(b2, W7, col[5x8]);
@@ MAC16(b3, W3, col[5x8]);
@@ MAC16(b1, -W1, col[5x8]);
@@ MAC16(b0, W7, col[7x8]);
@@ MAC16(b2, W3, col[7x8]);
@@ MAC16(b3, -W1, col[7x8]);
@@ MAC16(b1, -W5, col[7x8]);
ldrsh r3, [r14, #80] @ R3=COLr16[5x8]
teq r3, #0 @ if 0 then avoid muls
mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5x8]=b0
mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5x8]=b2
mlane r7, r9, r3, r7 @ R7+=W3*ROWr16[5x8]=b3
rsbne r3, r3, #0 @ R3=-ROWr16[5x8]
ldrsh r4, [r14, #112] @ R4=COLr16[7x8]
mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5x8]=b1
@@ R3 is free now
teq r4, #0 @ if 0 then avoid muls
mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7x8]=b0
mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7x8]=b2
rsbne r4, r4, #0 @ R4=-ROWr16[7x8]
mlane r7, r8, r4, r7 @ R7-=W1*ROWr16[7x8]=b3
mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7x8]=b1
@@ R4 is free now
__end_b_evaluation2:
@@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free),
@@ R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
@@ R12=__const_ptr_, R14=&block[n]
@@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free),
@@ R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
@@ R12=__const_ptr_, R14=&block[n]
__a_evaluation2:
@@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1));
@@ a1 = a0 + W6 * row[2];
@@ a2 = a0 - W6 * row[2];
@@ a3 = a0 - W2 * row[2];
@@ a0 = a0 + W2 * row[2];
ldrsh r6, [r14, #0]
ldr r9, [r12, #offW4] @ R9=W4
mul r6, r9, r6 @ R6=W4*ROWr16[0]
ldr r10, [r12, #offW6] @ R10=W6
ldrsh r4, [r14, #32] @ R4=ROWr16[2] (a3 not defined yet)
add r6, r6, #COL_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(COL_SHIFT-1) (a0)
mul r11, r10, r4 @ R11=W6*ROWr16[2]
ldr r8, [r12, #offW2] @ R8=W2
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
sub r3, r6, r11 @ R3=a0-W6*ROWr16[2] (a2)
mul r11, r8, r4 @ R11=W2*ROWr16[2]
sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3)
add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0)
@@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1));
@@ a1 = a0 + W6 * row[2];
@@ a2 = a0 - W6 * row[2];
@@ a3 = a0 - W2 * row[2];
@@ a0 = a0 + W2 * row[2];
ldrsh r6, [r14, #0]
ldr r9, [r12, #offW4] @ R9=W4
mul r6, r9, r6 @ R6=W4*ROWr16[0]
ldr r10, [r12, #offW6] @ R10=W6
ldrsh r4, [r14, #32] @ R4=ROWr16[2] (a3 not defined yet)
add r6, r6, #COL_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(COL_SHIFT-1) (a0)
mul r11, r10, r4 @ R11=W6*ROWr16[2]
ldr r8, [r12, #offW2] @ R8=W2
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
sub r3, r6, r11 @ R3=a0-W6*ROWr16[2] (a2)
mul r11, r8, r4 @ R11=W2*ROWr16[2]
sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3)
add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0)
@@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3,
@@ R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free),
@@ R12=__const_ptr_, R14=&block[n]
@@ a0 += W4*row[4]
@@ a1 -= W4*row[4]
@@ a2 -= W4*row[4]
@@ a3 += W4*row[4]
ldrsh r11, [r14, #64] @ R11=ROWr16[4]
teq r11, #0 @ if null avoid muls
mulne r11, r9, r11 @ R11=W4*ROWr16[4]
@@ R9 is free now
addne r6, r6, r11 @ R6+=W4*ROWr16[4] (a0)
subne r2, r2, r11 @ R2-=W4*ROWr16[4] (a1)
subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2)
ldrsh r9, [r14, #96] @ R9=ROWr16[6]
addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3)
@@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
teq r9, #0 @ if null avoid muls
mulne r11, r10, r9 @ R11=W6*ROWr16[6]
addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0)
mulne r10, r8, r9 @ R10=W2*ROWr16[6]
@@ a0 += W6*row[6];
@@ a3 -= W6*row[6];
@@ a1 -= W2*row[6];
@@ a2 += W2*row[6];
subne r4, r4, r11 @ R4-=W6*ROWr16[6] (a3)
subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1)
addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2)
@@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3,
@@ R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free),
@@ R12=__const_ptr_, R14=&block[n]
@@ a0 += W4*row[4]
@@ a1 -= W4*row[4]
@@ a2 -= W4*row[4]
@@ a3 += W4*row[4]
ldrsh r11, [r14, #64] @ R11=ROWr16[4]
teq r11, #0 @ if null avoid muls
mulne r11, r9, r11 @ R11=W4*ROWr16[4]
@@ R9 is free now
addne r6, r6, r11 @ R6+=W4*ROWr16[4] (a0)
subne r2, r2, r11 @ R2-=W4*ROWr16[4] (a1)
subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2)
ldrsh r9, [r14, #96] @ R9=ROWr16[6]
addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3)
@@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
teq r9, #0 @ if null avoid muls
mulne r11, r10, r9 @ R11=W6*ROWr16[6]
addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0)
mulne r10, r8, r9 @ R10=W2*ROWr16[6]
@@ a0 += W6*row[6];
@@ a3 -= W6*row[6];
@@ a1 -= W2*row[6];
@@ a2 += W2*row[6];
subne r4, r4, r11 @ R4-=W6*ROWr16[6] (a3)
subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1)
addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2)
__end_a_evaluation2:
@@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3,
@@ R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
@@ R12=__const_ptr_, R14=&block[n]
@@ col[0 ] = ((a0 + b0) >> COL_SHIFT);
@@ col[8 ] = ((a1 + b1) >> COL_SHIFT);
@@ col[16] = ((a2 + b2) >> COL_SHIFT);
@@ col[24] = ((a3 + b3) >> COL_SHIFT);
@@ col[32] = ((a3 - b3) >> COL_SHIFT);
@@ col[40] = ((a2 - b2) >> COL_SHIFT);
@@ col[48] = ((a1 - b1) >> COL_SHIFT);
@@ col[56] = ((a0 - b0) >> COL_SHIFT);
@@@@@ no optimisation here @@@@@
add r8, r6, r0 @ R8=a0+b0
add r9, r2, r1 @ R9=a1+b1
mov r8, r8, asr #COL_SHIFT
mov r9, r9, asr #COL_SHIFT
strh r8, [r14, #0]
strh r9, [r14, #16]
add r8, r3, r5 @ R8=a2+b2
add r9, r4, r7 @ R9=a3+b3
mov r8, r8, asr #COL_SHIFT
mov r9, r9, asr #COL_SHIFT
strh r8, [r14, #32]
strh r9, [r14, #48]
sub r8, r4, r7 @ R8=a3-b3
sub r9, r3, r5 @ R9=a2-b2
mov r8, r8, asr #COL_SHIFT
mov r9, r9, asr #COL_SHIFT
strh r8, [r14, #64]
strh r9, [r14, #80]
sub r8, r2, r1 @ R8=a1-b1
sub r9, r6, r0 @ R9=a0-b0
mov r8, r8, asr #COL_SHIFT
mov r9, r9, asr #COL_SHIFT
strh r8, [r14, #96]
strh r9, [r14, #112]
@@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3,
@@ R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
@@ R12=__const_ptr_, R14=&block[n]
@@ col[0 ] = ((a0 + b0) >> COL_SHIFT);
@@ col[8 ] = ((a1 + b1) >> COL_SHIFT);
@@ col[16] = ((a2 + b2) >> COL_SHIFT);
@@ col[24] = ((a3 + b3) >> COL_SHIFT);
@@ col[32] = ((a3 - b3) >> COL_SHIFT);
@@ col[40] = ((a2 - b2) >> COL_SHIFT);
@@ col[48] = ((a1 - b1) >> COL_SHIFT);
@@ col[56] = ((a0 - b0) >> COL_SHIFT);
@@@@@ no optimisation here @@@@@
add r8, r6, r0 @ R8=a0+b0
add r9, r2, r1 @ R9=a1+b1
mov r8, r8, asr #COL_SHIFT
mov r9, r9, asr #COL_SHIFT
strh r8, [r14, #0]
strh r9, [r14, #16]
add r8, r3, r5 @ R8=a2+b2
add r9, r4, r7 @ R9=a3+b3
mov r8, r8, asr #COL_SHIFT
mov r9, r9, asr #COL_SHIFT
strh r8, [r14, #32]
strh r9, [r14, #48]
sub r8, r4, r7 @ R8=a3-b3
sub r9, r3, r5 @ R9=a2-b2
mov r8, r8, asr #COL_SHIFT
mov r9, r9, asr #COL_SHIFT
strh r8, [r14, #64]
strh r9, [r14, #80]
sub r8, r2, r1 @ R8=a1-b1
sub r9, r6, r0 @ R9=a0-b0
mov r8, r8, asr #COL_SHIFT
mov r9, r9, asr #COL_SHIFT
strh r8, [r14, #96]
strh r9, [r14, #112]
__end_col_loop:
@@ at this point, R0-R11 (free)
@@ R12=__const_ptr_, R14=&block[n]
ldr r0, [sp, #0] @ R0=block
teq r0, r14 @ compare current &block[n] to block, when block is reached, the loop is finished.
sub r14, r14, #2
bne __col_loop
@@ at this point, R0-R11 (free)
@@ R12=__const_ptr_, R14=&block[n]
ldr r0, [sp, #0] @ R0=block
teq r0, r14 @ compare current &block[n] to block, when block is reached, the loop is finished.
sub r14, r14, #2
bne __col_loop
@ -466,15 +466,15 @@ __end_simple_idct_ARM:
@@ kind of sub-function, here not to overload the common case.
__end_bef_a_evaluation:
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1)
mul r11, r8, r4 @ R11=W2*ROWr16[2]
sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3)
add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0)
bal __end_a_evaluation
bal __end_a_evaluation
__constant_ptr__: @@ see #defines at the beginning of the source code for values.
.align
.align
.word W1
.word W2
.word W3

View File

@ -15,21 +15,21 @@ extern "C" {
#include <sys/types.h> /* size_t */
//FIXME the following 2 really dont belong in here
#define FFMPEG_VERSION_INT 0x000409
#define FFMPEG_VERSION "CVS"
#define FFMPEG_VERSION_INT 0x000409
#define FFMPEG_VERSION "CVS"
#define AV_STRINGIFY(s) AV_TOSTRING(s)
#define AV_STRINGIFY(s) AV_TOSTRING(s)
#define AV_TOSTRING(s) #s
#define LIBAVCODEC_VERSION_INT ((51<<16)+(0<<8)+0)
#define LIBAVCODEC_VERSION 51.0.0
#define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT
#define LIBAVCODEC_VERSION_INT ((51<<16)+(0<<8)+0)
#define LIBAVCODEC_VERSION 51.0.0
#define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT
#define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
#define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
#define AV_NOPTS_VALUE int64_t_C(0x8000000000000000)
#define AV_TIME_BASE 1000000
#define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE}
#define AV_NOPTS_VALUE int64_t_C(0x8000000000000000)
#define AV_TIME_BASE 1000000
#define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE}
enum CodecID {
CODEC_ID_NONE,
@ -362,9 +362,9 @@ extern int motion_estimation_method;
#define CODEC_FLAG2_LOCAL_HEADER 0x00000008 ///< place global headers at every keyframe instead of in extradata
/* Unsupported options :
* Syntax Arithmetic coding (SAC)
* Reference Picture Selection
* Independant Segment Decoding */
* Syntax Arithmetic coding (SAC)
* Reference Picture Selection
* Independant Segment Decoding */
/* /Fx */
/* codec capabilities */
@ -646,9 +646,9 @@ typedef struct AVPanScan{
*/\
int8_t *ref_index[2];
#define FF_QSCALE_TYPE_MPEG1 0
#define FF_QSCALE_TYPE_MPEG2 1
#define FF_QSCALE_TYPE_H264 2
#define FF_QSCALE_TYPE_MPEG1 0
#define FF_QSCALE_TYPE_MPEG2 1
#define FF_QSCALE_TYPE_H264 2
#define FF_BUFFER_TYPE_INTERNAL 1
#define FF_BUFFER_TYPE_USER 2 ///< Direct rendering buffers (image is (de)allocated by user)
@ -684,9 +684,9 @@ typedef struct AVCLASS AVClass;
struct AVCLASS {
const char* class_name;
const char* (*item_name)(void*); /* actually passing a pointer to an AVCodecContext
or AVFormatContext, which begin with an AVClass.
Needed because av_log is in libavcodec and has no visibility
of AVIn/OutputFormat */
or AVFormatContext, which begin with an AVClass.
Needed because av_log is in libavcodec and has no visibility
of AVIn/OutputFormat */
struct AVOption *option;
};
@ -1252,18 +1252,18 @@ typedef struct AVCodecContext {
* result into program crash)
*/
unsigned dsp_mask;
#define FF_MM_FORCE 0x80000000 /* force usage of selected flags (OR) */
#define FF_MM_FORCE 0x80000000 /* force usage of selected flags (OR) */
/* lower 16 bits - CPU features */
#ifdef HAVE_MMX
#define FF_MM_MMX 0x0001 /* standard MMX */
#define FF_MM_3DNOW 0x0004 /* AMD 3DNOW */
#define FF_MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
#define FF_MM_SSE 0x0008 /* SSE functions */
#define FF_MM_SSE2 0x0010 /* PIV SSE2 functions */
#define FF_MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */
#define FF_MM_MMX 0x0001 /* standard MMX */
#define FF_MM_3DNOW 0x0004 /* AMD 3DNOW */
#define FF_MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
#define FF_MM_SSE 0x0008 /* SSE functions */
#define FF_MM_SSE2 0x0010 /* PIV SSE2 functions */
#define FF_MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */
#endif /* HAVE_MMX */
#ifdef HAVE_IWMMXT
#define FF_MM_IWMMXT 0x0100 /* XScale IWMMXT */
#define FF_MM_IWMMXT 0x0100 /* XScale IWMMXT */
#endif /* HAVE_IWMMXT */
/**
@ -2223,7 +2223,7 @@ int avcodec_find_best_pix_fmt(int pix_fmt_mask, int src_pix_fmt,
#define FF_ALPHA_TRANSP 0x0001 /* image has some totally transparent pixels */
#define FF_ALPHA_SEMI_TRANSP 0x0002 /* image has some transparent pixels */
int img_get_alpha_info(const AVPicture *src,
int pix_fmt, int width, int height);
int pix_fmt, int width, int height);
/* convert among pixel formats */
int img_convert(AVPicture *dst, int dst_pix_fmt,

View File

@ -35,20 +35,20 @@ typedef struct ThreadContext{
// it's odd Be never patented that :D
struct benaphore {
vint32 atom;
sem_id sem;
vint32 atom;
sem_id sem;
};
static inline int lock_ben(struct benaphore *ben)
{
if (atomic_add(&ben->atom, 1) > 0)
return acquire_sem(ben->sem);
return B_OK;
if (atomic_add(&ben->atom, 1) > 0)
return acquire_sem(ben->sem);
return B_OK;
}
static inline int unlock_ben(struct benaphore *ben)
{
if (atomic_add(&ben->atom, -1) > 1)
return release_sem(ben->sem);
return B_OK;
if (atomic_add(&ben->atom, -1) > 1)
return release_sem(ben->sem);
return B_OK;
}
static struct benaphore av_thread_lib_ben;
@ -155,25 +155,25 @@ fail:
int avcodec_thread_lock_lib(void)
{
return lock_ben(&av_thread_lib_ben);
return lock_ben(&av_thread_lib_ben);
}
int avcodec_thread_unlock_lib(void)
{
return unlock_ben(&av_thread_lib_ben);
return unlock_ben(&av_thread_lib_ben);
}
/* our versions of _init and _fini (which are called by those actually from crt.o) */
void initialize_after(void)
{
av_thread_lib_ben.atom = 0;
av_thread_lib_ben.sem = create_sem(0, "libavcodec benaphore");
av_thread_lib_ben.atom = 0;
av_thread_lib_ben.sem = create_sem(0, "libavcodec benaphore");
}
void uninitialize_before(void)
{
delete_sem(av_thread_lib_ben.sem);
delete_sem(av_thread_lib_ben.sem);
}

View File

@ -83,7 +83,7 @@ int check_marker(GetBitContext *s, const char *msg)
{
int bit= get_bits1(s);
if(!bit)
av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg);
av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg);
return bit;
}

View File

@ -146,7 +146,7 @@ typedef struct RL_VLC_ELEM {
# ifdef __GNUC__
static inline uint32_t unaligned32(const void *v) {
struct Unaligned {
uint32_t i;
uint32_t i;
} __attribute__((packed));
return ((const struct Unaligned *) v)->i;
@ -183,7 +183,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
bit_buf = (bit_buf<<n) | value;
bit_left-=n;
} else {
bit_buf<<=bit_left;
bit_buf<<=bit_left;
bit_buf |= value >> (n - bit_left);
#ifdef UNALIGNED_STORES_ARE_BAD
if (3 & (intptr_t) s->buf_ptr) {
@ -196,7 +196,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
*(uint32_t *)s->buf_ptr = be2me_32(bit_buf);
//printf("bitbuf = %08x\n", bit_buf);
s->buf_ptr+=4;
bit_left+=32 - n;
bit_left+=32 - n;
bit_buf = value;
}
@ -212,21 +212,21 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
# ifdef ALIGNED_BITSTREAM_WRITER
# if defined(ARCH_X86) || defined(ARCH_X86_64)
asm volatile(
"movl %0, %%ecx \n\t"
"xorl %%eax, %%eax \n\t"
"shrdl %%cl, %1, %%eax \n\t"
"shrl %%cl, %1 \n\t"
"movl %0, %%ecx \n\t"
"shrl $3, %%ecx \n\t"
"andl $0xFFFFFFFC, %%ecx \n\t"
"bswapl %1 \n\t"
"orl %1, (%2, %%ecx) \n\t"
"bswapl %%eax \n\t"
"addl %3, %0 \n\t"
"movl %%eax, 4(%2, %%ecx) \n\t"
: "=&r" (s->index), "=&r" (value)
: "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
: "%eax", "%ecx"
"movl %0, %%ecx \n\t"
"xorl %%eax, %%eax \n\t"
"shrdl %%cl, %1, %%eax \n\t"
"shrl %%cl, %1 \n\t"
"movl %0, %%ecx \n\t"
"shrl $3, %%ecx \n\t"
"andl $0xFFFFFFFC, %%ecx \n\t"
"bswapl %1 \n\t"
"orl %1, (%2, %%ecx) \n\t"
"bswapl %%eax \n\t"
"addl %3, %0 \n\t"
"movl %%eax, 4(%2, %%ecx) \n\t"
: "=&r" (s->index), "=&r" (value)
: "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
: "%eax", "%ecx"
);
# else
int index= s->index;
@ -243,20 +243,20 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
# else //ALIGNED_BITSTREAM_WRITER
# if defined(ARCH_X86) || defined(ARCH_X86_64)
asm volatile(
"movl $7, %%ecx \n\t"
"andl %0, %%ecx \n\t"
"addl %3, %%ecx \n\t"
"negl %%ecx \n\t"
"shll %%cl, %1 \n\t"
"bswapl %1 \n\t"
"movl %0, %%ecx \n\t"
"shrl $3, %%ecx \n\t"
"orl %1, (%%ecx, %2) \n\t"
"addl %3, %0 \n\t"
"movl $0, 4(%%ecx, %2) \n\t"
: "=&r" (s->index), "=&r" (value)
: "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
: "%ecx"
"movl $7, %%ecx \n\t"
"andl %0, %%ecx \n\t"
"addl %3, %%ecx \n\t"
"negl %%ecx \n\t"
"shll %%cl, %1 \n\t"
"bswapl %1 \n\t"
"movl %0, %%ecx \n\t"
"shrl $3, %%ecx \n\t"
"orl %1, (%%ecx, %2) \n\t"
"addl %3, %0 \n\t"
"movl $0, 4(%%ecx, %2) \n\t"
: "=&r" (s->index), "=&r" (value)
: "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
: "%ecx"
);
# else
int index= s->index;
@ -276,9 +276,9 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
static inline uint8_t* pbBufPtr(PutBitContext *s)
{
#ifdef ALT_BITSTREAM_WRITER
return s->buf + (s->index>>3);
return s->buf + (s->index>>3);
#else
return s->buf_ptr;
return s->buf_ptr;
#endif
}
@ -290,10 +290,10 @@ static inline void skip_put_bytes(PutBitContext *s, int n){
assert((put_bits_count(s)&7)==0);
#ifdef ALT_BITSTREAM_WRITER
FIXME may need some cleaning of the buffer
s->index += n<<3;
s->index += n<<3;
#else
assert(s->bit_left==32);
s->buf_ptr += n;
s->buf_ptr += n;
#endif
}
@ -366,10 +366,10 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
static inline int unaligned32_be(const void *v)
{
#ifdef CONFIG_ALIGN
const uint8_t *p=v;
return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]);
const uint8_t *p=v;
return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]);
#else
return be2me_32( unaligned32(v)); //original
return be2me_32( unaligned32(v)); //original
#endif
}
@ -528,8 +528,8 @@ static inline int get_bits_count(GetBitContext *s){
#if defined(ARCH_X86) || defined(ARCH_X86_64)
# define SKIP_CACHE(name, gb, num)\
asm(\
"shldl %2, %1, %0 \n\t"\
"shll %2, %1 \n\t"\
"shldl %2, %1, %0 \n\t"\
"shll %2, %1 \n\t"\
: "+r" (name##_cache0), "+r" (name##_cache1)\
: "Ic" ((uint8_t)num)\
);

View File

@ -61,13 +61,13 @@ static int decode_frame(AVCodecContext *avctx,
uint8_t *cb= &a->picture.data[1][ y*a->picture.linesize[1] ];
uint8_t *cr= &a->picture.data[2][ y*a->picture.linesize[2] ];
for(x=0; x<avctx->width; x+=4){
luma[3] = get_bits(&a->gb, 5) << 3;
luma[2] = get_bits(&a->gb, 5) << 3;
luma[1] = get_bits(&a->gb, 5) << 3;
luma[0] = get_bits(&a->gb, 5) << 3;
luma+= 4;
*(cb++) = get_bits(&a->gb, 6) << 2;
*(cr++) = get_bits(&a->gb, 6) << 2;
luma[3] = get_bits(&a->gb, 5) << 3;
luma[2] = get_bits(&a->gb, 5) << 3;
luma[1] = get_bits(&a->gb, 5) << 3;
luma[0] = get_bits(&a->gb, 5) << 3;
luma+= 4;
*(cb++) = get_bits(&a->gb, 6) << 2;
*(cr++) = get_bits(&a->gb, 6) << 2;
}
}

View File

@ -65,14 +65,14 @@ int64_t gettime(void)
static short idct_mmx_perm[64];
static short idct_simple_mmx_perm[64]={
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
};
void idct_mmx_init(void)
@ -81,8 +81,8 @@ void idct_mmx_init(void)
/* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
for (i = 0; i < 64; i++) {
idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
}
}
@ -151,7 +151,7 @@ void dct_error(const char *name, int is_idct,
for(i=0;i<64;i++)
block[idct_simple_mmx_perm[i]] = block1[i];
} else {
} else {
for(i=0; i<64; i++)
block[i]= block1[i];
}
@ -186,9 +186,9 @@ void dct_error(const char *name, int is_idct,
if (v > err_inf)
err_inf = v;
err2 += v * v;
sysErr[i] += block[i] - block1[i];
blockSumErr += v;
if( abs(block[i])>maxout) maxout=abs(block[i]);
sysErr[i] += block[i] - block1[i];
blockSumErr += v;
if( abs(block[i])>maxout) maxout=abs(block[i]);
}
if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
#if 0 // print different matrix pairs
@ -209,7 +209,7 @@ void dct_error(const char *name, int is_idct,
#if 1 // dump systematic errors
for(i=0; i<64; i++){
if(i%8==0) printf("\n");
if(i%8==0) printf("\n");
printf("%5d ", (int)sysErr[i]);
}
printf("\n");
@ -503,7 +503,7 @@ int main(int argc, char **argv)
dct_error("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, test);
// dct_error("ODIVX-C", 1, odivx_idct_c, idct);
//printf(" test against odivx idct\n");
// dct_error("REF", 1, idct, odivx_idct_c);
// dct_error("REF", 1, idct, odivx_idct_c);
// dct_error("INT", 1, j_rev_dct, odivx_idct_c);
// dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c);
// dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c);

View File

@ -124,14 +124,14 @@ const uint32_t inverse[256]={
/* Input permutation for the simple_idct_mmx */
static const uint8_t simple_mmx_permutation[64]={
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
};
static int pix_sum_c(uint8_t * pix, int line_size)
@ -140,18 +140,18 @@ static int pix_sum_c(uint8_t * pix, int line_size)
s = 0;
for (i = 0; i < 16; i++) {
for (j = 0; j < 16; j += 8) {
s += pix[0];
s += pix[1];
s += pix[2];
s += pix[3];
s += pix[4];
s += pix[5];
s += pix[6];
s += pix[7];
pix += 8;
}
pix += line_size - 16;
for (j = 0; j < 16; j += 8) {
s += pix[0];
s += pix[1];
s += pix[2];
s += pix[3];
s += pix[4];
s += pix[5];
s += pix[6];
s += pix[7];
pix += 8;
}
pix += line_size - 16;
}
return s;
}
@ -163,33 +163,33 @@ static int pix_norm1_c(uint8_t * pix, int line_size)
s = 0;
for (i = 0; i < 16; i++) {
for (j = 0; j < 16; j += 8) {
for (j = 0; j < 16; j += 8) {
#if 0
s += sq[pix[0]];
s += sq[pix[1]];
s += sq[pix[2]];
s += sq[pix[3]];
s += sq[pix[4]];
s += sq[pix[5]];
s += sq[pix[6]];
s += sq[pix[7]];
s += sq[pix[0]];
s += sq[pix[1]];
s += sq[pix[2]];
s += sq[pix[3]];
s += sq[pix[4]];
s += sq[pix[5]];
s += sq[pix[6]];
s += sq[pix[7]];
#else
#if LONG_MAX > 2147483647
register uint64_t x=*(uint64_t*)pix;
s += sq[x&0xff];
s += sq[(x>>8)&0xff];
s += sq[(x>>16)&0xff];
s += sq[(x>>24)&0xff];
register uint64_t x=*(uint64_t*)pix;
s += sq[x&0xff];
s += sq[(x>>8)&0xff];
s += sq[(x>>16)&0xff];
s += sq[(x>>24)&0xff];
s += sq[(x>>32)&0xff];
s += sq[(x>>40)&0xff];
s += sq[(x>>48)&0xff];
s += sq[(x>>56)&0xff];
#else
register uint32_t x=*(uint32_t*)pix;
s += sq[x&0xff];
s += sq[(x>>8)&0xff];
s += sq[(x>>16)&0xff];
s += sq[(x>>24)&0xff];
register uint32_t x=*(uint32_t*)pix;
s += sq[x&0xff];
s += sq[(x>>8)&0xff];
s += sq[(x>>16)&0xff];
s += sq[(x>>24)&0xff];
x=*(uint32_t*)(pix+4);
s += sq[x&0xff];
s += sq[(x>>8)&0xff];
@ -197,9 +197,9 @@ static int pix_norm1_c(uint8_t * pix, int line_size)
s += sq[(x>>24)&0xff];
#endif
#endif
pix += 8;
}
pix += line_size - 16;
pix += 8;
}
pix += line_size - 16;
}
return s;
}
@ -410,7 +410,7 @@ static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int lin
}
static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
const uint8_t *s2, int stride){
const uint8_t *s2, int stride){
int i;
/* read the pixels */
@ -431,7 +431,7 @@ static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
int line_size)
int line_size)
{
int i;
uint8_t *cm = cropTbl + MAX_NEG_CROP;
@ -453,7 +453,7 @@ static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
}
static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
int line_size)
int line_size)
{
int i;
uint8_t *cm = cropTbl + MAX_NEG_CROP;
@ -471,7 +471,7 @@ static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels
}
static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
int line_size)
int line_size)
{
int i;
uint8_t *cm = cropTbl + MAX_NEG_CROP;
@ -1214,7 +1214,7 @@ static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int
int i,j;
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
}
src += stride;
dst += stride;
@ -1225,7 +1225,7 @@ static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int
int i,j;
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
}
src += stride;
dst += stride;
@ -1236,7 +1236,7 @@ static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int
int i,j;
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
}
src += stride;
dst += stride;
@ -1247,7 +1247,7 @@ static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int
int i,j;
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
}
src += stride;
dst += stride;
@ -1258,7 +1258,7 @@ static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int
int i,j;
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
}
src += stride;
dst += stride;
@ -1269,7 +1269,7 @@ static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int
int i,j;
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
}
src += stride;
dst += stride;
@ -1280,7 +1280,7 @@ static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int
int i,j;
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
}
src += stride;
dst += stride;
@ -1291,7 +1291,7 @@ static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int
int i,j;
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
}
src += stride;
dst += stride;
@ -1311,7 +1311,7 @@ static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int
int i,j;
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
}
src += stride;
dst += stride;
@ -1322,7 +1322,7 @@ static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int
int i,j;
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
}
src += stride;
dst += stride;
@ -1333,7 +1333,7 @@ static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int
int i,j;
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
}
src += stride;
dst += stride;
@ -1344,7 +1344,7 @@ static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int
int i,j;
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
}
src += stride;
dst += stride;
@ -1355,7 +1355,7 @@ static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int
int i,j;
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
}
src += stride;
dst += stride;
@ -1366,7 +1366,7 @@ static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int
int i,j;
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
}
src += stride;
dst += stride;
@ -1377,7 +1377,7 @@ static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int
int i,j;
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
}
src += stride;
dst += stride;
@ -1388,7 +1388,7 @@ static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int
int i,j;
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
}
src += stride;
dst += stride;
@ -3666,15 +3666,15 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
#ifdef CONFIG_ENCODERS
if(avctx->dct_algo==FF_DCT_FASTINT) {
c->fdct = fdct_ifast;
c->fdct248 = fdct_ifast248;
c->fdct248 = fdct_ifast248;
}
else if(avctx->dct_algo==FF_DCT_FAAN) {
c->fdct = ff_faandct;
c->fdct248 = ff_faandct248;
c->fdct248 = ff_faandct248;
}
else {
c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
c->fdct248 = ff_fdct248_islow;
c->fdct248 = ff_fdct248_islow;
}
#endif //CONFIG_ENCODERS

View File

@ -151,7 +151,7 @@ typedef struct DSPContext {
* global motion compensation.
*/
void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
int (*pix_sum)(uint8_t * pix, int line_size);
int (*pix_norm1)(uint8_t * pix, int line_size);
@ -342,7 +342,7 @@ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scant
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
#define BYTE_VEC32(c) ((c)*0x01010101UL)
#define BYTE_VEC32(c) ((c)*0x01010101UL)
static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
{

View File

@ -194,7 +194,7 @@ channels_multi (int flags)
{
if (flags & DTS_LFE)
return 6;
else if (flags & 1) /* center channel */
else if (flags & 1) /* center channel */
return 5;
else if ((flags & DTS_CHANNEL_MASK) == DTS_2F2R)
return 4;

View File

@ -84,7 +84,7 @@ static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm)
j = perm[i];
s->dv_idct_shift[0][0][q][j] =
dv_quant_shifts[q][dv_88_areas[i]] + 1;
s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1;
s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1;
}
/* 248DCT */
@ -92,7 +92,7 @@ static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm)
/* 248 table */
s->dv_idct_shift[0][1][q][i] =
dv_quant_shifts[q][dv_248_areas[i]] + 1;
s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1;
s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1;
}
}
}
@ -114,35 +114,35 @@ static int dvvideo_init(AVCodecContext *avctx)
done = 1;
dv_vlc_map = av_mallocz_static(DV_VLC_MAP_LEV_SIZE*DV_VLC_MAP_RUN_SIZE*sizeof(struct dv_vlc_pair));
if (!dv_vlc_map)
return -ENOMEM;
if (!dv_vlc_map)
return -ENOMEM;
/* dv_anchor lets each thread know its Id */
dv_anchor = av_malloc(12*27*sizeof(void*));
if (!dv_anchor) {
return -ENOMEM;
}
for (i=0; i<12*27; i++)
dv_anchor[i] = (void*)(size_t)i;
/* dv_anchor lets each thread know its Id */
dv_anchor = av_malloc(12*27*sizeof(void*));
if (!dv_anchor) {
return -ENOMEM;
}
for (i=0; i<12*27; i++)
dv_anchor[i] = (void*)(size_t)i;
/* it's faster to include sign bit in a generic VLC parsing scheme */
for (i=0, j=0; i<NB_DV_VLC; i++, j++) {
new_dv_vlc_bits[j] = dv_vlc_bits[i];
new_dv_vlc_len[j] = dv_vlc_len[i];
new_dv_vlc_run[j] = dv_vlc_run[i];
new_dv_vlc_level[j] = dv_vlc_level[i];
/* it's faster to include sign bit in a generic VLC parsing scheme */
for (i=0, j=0; i<NB_DV_VLC; i++, j++) {
new_dv_vlc_bits[j] = dv_vlc_bits[i];
new_dv_vlc_len[j] = dv_vlc_len[i];
new_dv_vlc_run[j] = dv_vlc_run[i];
new_dv_vlc_level[j] = dv_vlc_level[i];
if (dv_vlc_level[i]) {
new_dv_vlc_bits[j] <<= 1;
new_dv_vlc_len[j]++;
if (dv_vlc_level[i]) {
new_dv_vlc_bits[j] <<= 1;
new_dv_vlc_len[j]++;
j++;
new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1;
new_dv_vlc_len[j] = dv_vlc_len[i] + 1;
new_dv_vlc_run[j] = dv_vlc_run[i];
new_dv_vlc_level[j] = -dv_vlc_level[i];
}
}
j++;
new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1;
new_dv_vlc_len[j] = dv_vlc_len[i] + 1;
new_dv_vlc_run[j] = dv_vlc_run[i];
new_dv_vlc_level[j] = -dv_vlc_level[i];
}
}
/* NOTE: as a trick, we use the fact the no codes are unused
to accelerate the parsing of partial codes */
@ -150,10 +150,10 @@ static int dvvideo_init(AVCodecContext *avctx)
new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2, 0);
dv_rl_vlc = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM));
if (!dv_rl_vlc) {
av_free(dv_anchor);
return -ENOMEM;
}
if (!dv_rl_vlc) {
av_free(dv_anchor);
return -ENOMEM;
}
for(i = 0; i < dv_vlc.table_size; i++){
int code= dv_vlc.table[i][0];
int len = dv_vlc.table[i][1];
@ -170,49 +170,49 @@ static int dvvideo_init(AVCodecContext *avctx)
dv_rl_vlc[i].level = level;
dv_rl_vlc[i].run = run;
}
free_vlc(&dv_vlc);
free_vlc(&dv_vlc);
for (i = 0; i < NB_DV_VLC - 1; i++) {
for (i = 0; i < NB_DV_VLC - 1; i++) {
if (dv_vlc_run[i] >= DV_VLC_MAP_RUN_SIZE)
continue;
continue;
#ifdef DV_CODEC_TINY_TARGET
if (dv_vlc_level[i] >= DV_VLC_MAP_LEV_SIZE)
continue;
continue;
#endif
if (dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size != 0)
continue;
if (dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size != 0)
continue;
dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].vlc = dv_vlc_bits[i] <<
(!!dv_vlc_level[i]);
dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size = dv_vlc_len[i] +
(!!dv_vlc_level[i]);
}
for (i = 0; i < DV_VLC_MAP_RUN_SIZE; i++) {
dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].vlc = dv_vlc_bits[i] <<
(!!dv_vlc_level[i]);
dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size = dv_vlc_len[i] +
(!!dv_vlc_level[i]);
}
for (i = 0; i < DV_VLC_MAP_RUN_SIZE; i++) {
#ifdef DV_CODEC_TINY_TARGET
for (j = 1; j < DV_VLC_MAP_LEV_SIZE; j++) {
if (dv_vlc_map[i][j].size == 0) {
dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
(dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
dv_vlc_map[0][j].size;
}
}
for (j = 1; j < DV_VLC_MAP_LEV_SIZE; j++) {
if (dv_vlc_map[i][j].size == 0) {
dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
(dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
dv_vlc_map[0][j].size;
}
}
#else
for (j = 1; j < DV_VLC_MAP_LEV_SIZE/2; j++) {
if (dv_vlc_map[i][j].size == 0) {
dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
(dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
dv_vlc_map[0][j].size;
}
dv_vlc_map[i][((uint16_t)(-j))&0x1ff].vlc =
dv_vlc_map[i][j].vlc | 1;
dv_vlc_map[i][((uint16_t)(-j))&0x1ff].size =
dv_vlc_map[i][j].size;
}
for (j = 1; j < DV_VLC_MAP_LEV_SIZE/2; j++) {
if (dv_vlc_map[i][j].size == 0) {
dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
(dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
dv_vlc_map[0][j].size;
}
dv_vlc_map[i][((uint16_t)(-j))&0x1ff].vlc =
dv_vlc_map[i][j].vlc | 1;
dv_vlc_map[i][((uint16_t)(-j))&0x1ff].size =
dv_vlc_map[i][j].size;
}
#endif
}
}
}
/* Generic DSP setup */
@ -241,7 +241,7 @@ static int dvvideo_init(AVCodecContext *avctx)
/* FIXME: I really don't think this should be here */
if (dv_codec_profile(avctx))
avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt;
avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt;
avctx->coded_frame = &s->picture;
s->avctx= avctx;
@ -306,9 +306,9 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
/* if we must parse a partial vlc, we do it here */
if (partial_bit_count > 0) {
re_cache = ((unsigned)re_cache >> partial_bit_count) |
(mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count));
re_index -= partial_bit_count;
mb->partial_bit_count = 0;
(mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count));
re_index -= partial_bit_count;
mb->partial_bit_count = 0;
}
/* get the AC coefficients until last_index is reached */
@ -318,30 +318,30 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
#endif
/* our own optimized GET_RL_VLC */
index = NEG_USR32(re_cache, TEX_VLC_BITS);
vlc_len = dv_rl_vlc[index].len;
vlc_len = dv_rl_vlc[index].len;
if (vlc_len < 0) {
index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level;
vlc_len = TEX_VLC_BITS - vlc_len;
}
level = dv_rl_vlc[index].level;
run = dv_rl_vlc[index].run;
run = dv_rl_vlc[index].run;
/* gotta check if we're still within gb boundaries */
if (re_index + vlc_len > last_index) {
/* should be < 16 bits otherwise a codeword could have been parsed */
mb->partial_bit_count = last_index - re_index;
mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count);
re_index = last_index;
break;
}
re_index += vlc_len;
/* gotta check if we're still within gb boundaries */
if (re_index + vlc_len > last_index) {
/* should be < 16 bits otherwise a codeword could have been parsed */
mb->partial_bit_count = last_index - re_index;
mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count);
re_index = last_index;
break;
}
re_index += vlc_len;
#ifdef VLC_DEBUG
printf("run=%d level=%d\n", run, level);
printf("run=%d level=%d\n", run, level);
#endif
pos += run;
if (pos >= 64)
break;
pos += run;
if (pos >= 64)
break;
assert(level);
pos1 = scan_table[pos];
@ -404,7 +404,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
block = block1;
for(j = 0;j < 6; j++) {
last_index = block_sizes[j];
init_get_bits(&gb, buf_ptr, last_index);
init_get_bits(&gb, buf_ptr, last_index);
/* get the dc */
dc = get_sbits(&gb, 9);
@ -444,7 +444,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
block = block1;
mb = mb1;
init_get_bits(&gb, mb_bit_buffer, put_bits_count(&pb));
flush_put_bits(&pb);
flush_put_bits(&pb);
for(j = 0;j < 6; j++, block += 64, mb++) {
if (mb->pos < 64 && get_bits_left(&gb) > 0) {
dv_decode_ac(&gb, mb, block);
@ -456,7 +456,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
/* all blocks are finished, so the extra bytes can be used at
the video segment level */
if (j >= 6)
bit_copy(&vs_pb, &gb);
bit_copy(&vs_pb, &gb);
}
/* we need a pass other the whole video segment */
@ -475,8 +475,8 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
#endif
dv_decode_ac(&gb, mb, block);
}
if (mb->pos >= 64 && mb->pos < 127)
av_log(NULL, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos);
if (mb->pos >= 64 && mb->pos < 127)
av_log(NULL, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos);
block += 64;
mb++;
}
@ -508,7 +508,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) {
uint64_t aligned_pixels[64/8];
uint8_t *pixels= (uint8_t*)aligned_pixels;
uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1;
uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1;
int x, y, linesize;
/* NOTE: at end of line, the macroblock is handled as 420 */
idct_put(pixels, 8, block);
@ -543,21 +543,21 @@ static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
int size;
if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
*vlc = dv_vlc_map[run][level].vlc | sign;
size = dv_vlc_map[run][level].size;
size = dv_vlc_map[run][level].size;
}
else {
if (level < DV_VLC_MAP_LEV_SIZE) {
*vlc = dv_vlc_map[0][level].vlc | sign;
size = dv_vlc_map[0][level].size;
} else {
*vlc = dv_vlc_map[0][level].vlc | sign;
size = dv_vlc_map[0][level].size;
} else {
*vlc = 0xfe00 | (level << 1) | sign;
size = 16;
}
if (run) {
*vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc :
(0x1f80 | (run - 1))) << size;
size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
}
size = 16;
}
if (run) {
*vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc :
(0x1f80 | (run - 1))) << size;
size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
}
}
return size;
@ -568,13 +568,13 @@ static always_inline int dv_rl2vlc_size(int run, int level)
int size;
if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
size = dv_vlc_map[run][level].size;
size = dv_vlc_map[run][level].size;
}
else {
size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16;
if (run) {
size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
}
size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16;
if (run) {
size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
}
}
return size;
}
@ -620,14 +620,14 @@ static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext
for (; size > (bits_left = put_bits_left(pb)); pb++) {
if (bits_left) {
size -= bits_left;
put_bits(pb, bits_left, vlc >> size);
vlc = vlc & ((1<<size)-1);
}
if (pb + 1 >= pb_end) {
bi->partial_bit_count = size;
bi->partial_bit_buffer = vlc;
return pb;
}
put_bits(pb, bits_left, vlc >> size);
vlc = vlc & ((1<<size)-1);
}
if (pb + 1 >= pb_end) {
bi->partial_bit_count = size;
bi->partial_bit_buffer = vlc;
return pb;
}
}
/* Store VLC */
@ -712,14 +712,14 @@ static always_inline int dv_guess_dct_mode(DCTELEM *blk) {
s = blk;
for(i=0; i<7; i++) {
score88 += SC(0, 8) + SC(1, 9) + SC(2, 10) + SC(3, 11) +
SC(4, 12) + SC(5,13) + SC(6, 14) + SC(7, 15);
SC(4, 12) + SC(5,13) + SC(6, 14) + SC(7, 15);
s += 8;
}
/* Compute 2-4-8 score (small values give a better chance for 2-4-8 DCT) */
s = blk;
for(i=0; i<6; i++) {
score248 += SC(0, 16) + SC(1,17) + SC(2, 18) + SC(3, 19) +
SC(4, 20) + SC(5,21) + SC(6, 22) + SC(7, 23);
SC(4, 20) + SC(5,21) + SC(6, 22) + SC(7, 23);
s += 8;
}
@ -736,30 +736,30 @@ static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos)
b = blks;
for (i=0; i<5; i++) {
if (!qnos[i])
continue;
continue;
qnos[i]--;
size[i] = 0;
qnos[i]--;
size[i] = 0;
for (j=0; j<6; j++, b++) {
for (a=0; a<4; a++) {
if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) {
b->bit_size[a] = 1; // 4 areas 4 bits for EOB :)
b->area_q[a]++;
for (a=0; a<4; a++) {
if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) {
b->bit_size[a] = 1; // 4 areas 4 bits for EOB :)
b->area_q[a]++;
prev= b->prev[a];
for (k= b->next[prev] ; k<mb_area_start[a+1]; k= b->next[k]) {
b->mb[k] >>= 1;
if (b->mb[k]) {
b->mb[k] >>= 1;
if (b->mb[k]) {
b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]);
prev= k;
prev= k;
} else {
b->next[prev] = b->next[k];
}
}
}
b->prev[a+1]= prev;
}
size[i] += b->bit_size[a];
}
}
}
size[i] += b->bit_size[a];
}
}
}
} while ((vs_total_ac_bits < size[0] + size[1] + size[2] + size[3] + size[4]) &&
(qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4]));
@ -797,68 +797,68 @@ static inline void dv_encode_video_segment(DVVideoContext *s,
mb_x = v & 0xff;
mb_y = v >> 8;
y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8);
c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ?
((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) :
(((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8));
do_edge_wrap = 0;
qnos[mb_index] = 15; /* No quantization */
c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ?
((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) :
(((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8));
do_edge_wrap = 0;
qnos[mb_index] = 15; /* No quantization */
ptr = dif + mb_index*80 + 4;
for(j = 0;j < 6; j++) {
if (j < 4) { /* Four Y blocks */
/* NOTE: at end of line, the macroblock is handled as 420 */
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) {
/* NOTE: at end of line, the macroblock is handled as 420 */
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) {
data = y_ptr + (j * 8);
} else {
data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]);
}
linesize = s->picture.linesize[0];
linesize = s->picture.linesize[0];
} else { /* Cr and Cb blocks */
/* don't ask Fabrice why they inverted Cb and Cr ! */
data = s->picture.data[6 - j] + c_offset;
linesize = s->picture.linesize[6 - j];
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8))
do_edge_wrap = 1;
}
/* don't ask Fabrice why they inverted Cb and Cr ! */
data = s->picture.data[6 - j] + c_offset;
linesize = s->picture.linesize[6 - j];
if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8))
do_edge_wrap = 1;
}
/* Everything is set up -- now just copy data -> DCT block */
if (do_edge_wrap) { /* Edge wrap copy: 4x16 -> 8x8 */
uint8_t* d;
DCTELEM *b = block;
for (i=0;i<8;i++) {
d = data + 8 * linesize;
b[0] = data[0]; b[1] = data[1]; b[2] = data[2]; b[3] = data[3];
/* Everything is set up -- now just copy data -> DCT block */
if (do_edge_wrap) { /* Edge wrap copy: 4x16 -> 8x8 */
uint8_t* d;
DCTELEM *b = block;
for (i=0;i<8;i++) {
d = data + 8 * linesize;
b[0] = data[0]; b[1] = data[1]; b[2] = data[2]; b[3] = data[3];
b[4] = d[0]; b[5] = d[1]; b[6] = d[2]; b[7] = d[3];
data += linesize;
b += 8;
}
} else { /* Simple copy: 8x8 -> 8x8 */
s->get_pixels(block, data, linesize);
}
data += linesize;
b += 8;
}
} else { /* Simple copy: 8x8 -> 8x8 */
s->get_pixels(block, data, linesize);
}
if(s->avctx->flags & CODEC_FLAG_INTERLACED_DCT)
enc_blk->dct_mode = dv_guess_dct_mode(block);
else
enc_blk->dct_mode = 0;
enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0;
enc_blk->partial_bit_count = 0;
enc_blk->partial_bit_buffer = 0;
enc_blk->cur_ac = 0;
enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0;
enc_blk->partial_bit_count = 0;
enc_blk->partial_bit_buffer = 0;
enc_blk->cur_ac = 0;
s->fdct[enc_blk->dct_mode](block);
s->fdct[enc_blk->dct_mode](block);
dv_set_class_number(block, enc_blk,
enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4);
dv_set_class_number(block, enc_blk,
enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4);
init_put_bits(pb, ptr, block_sizes[j]/8);
put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2));
put_bits(pb, 1, enc_blk->dct_mode);
put_bits(pb, 2, enc_blk->cno);
put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2));
put_bits(pb, 1, enc_blk->dct_mode);
put_bits(pb, 2, enc_blk->cno);
vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] +
enc_blk->bit_size[2] + enc_blk->bit_size[3];
++enc_blk;
++pb;
ptr += block_sizes[j]/8;
vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] +
enc_blk->bit_size[2] + enc_blk->bit_size[3];
++enc_blk;
++pb;
ptr += block_sizes[j]/8;
}
}
@ -898,7 +898,7 @@ static int dv_decode_mt(AVCodecContext *avctx, void* sl)
DVVideoContext *s = avctx->priv_data;
int slice = (size_t)sl;
dv_decode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80],
&s->sys->video_place[slice*5]);
&s->sys->video_place[slice*5]);
return 0;
}
@ -907,7 +907,7 @@ static int dv_encode_mt(AVCodecContext *avctx, void* sl)
DVVideoContext *s = avctx->priv_data;
int slice = (size_t)sl;
dv_encode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80],
&s->sys->video_place[slice*5]);
&s->sys->video_place[slice*5]);
return 0;
}
@ -940,7 +940,7 @@ static int dvvideo_decode_frame(AVCodecContext *avctx,
s->buf = buf;
avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL,
s->sys->difseg_size * 27);
s->sys->difseg_size * 27);
emms_c();
@ -958,7 +958,7 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size,
s->sys = dv_codec_profile(c);
if (!s->sys)
return -1;
return -1;
if(buf_size < s->sys->frame_size)
return -1;
@ -969,7 +969,7 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size,
s->buf = buf;
c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL,
s->sys->difseg_size * 27);
s->sys->difseg_size * 27);
emms_c();
return s->sys->frame_size;

View File

@ -192,7 +192,7 @@ static void dvb_encode_rle4(uint8_t **pq,
#define SCALEBITS 10
#define ONE_HALF (1 << (SCALEBITS - 1))
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
#define RGB_TO_Y_CCIR(r, g, b) \
((FIX(0.29900*219.0/255.0) * (r) + FIX(0.58700*219.0/255.0) * (g) + \

View File

@ -108,8 +108,8 @@ static void filter181(int16_t *data, int width, int height, int stride){
/**
* guess the dc of blocks which dont have a undamaged dc
* @param w width in 8 pixel blocks
* @param h height in 8 pixel blocks
* @param w width in 8 pixel blocks
* @param h height in 8 pixel blocks
*/
static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, int is_luma){
int b_x, b_y;
@ -192,8 +192,8 @@ static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, i
/**
* simple horizontal deblocking filter used for error resilience
* @param w width in 8 pixel blocks
* @param h height in 8 pixel blocks
* @param w width in 8 pixel blocks
* @param h height in 8 pixel blocks
*/
static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){
int b_x, b_y;
@ -252,8 +252,8 @@ static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int st
/**
* simple vertical deblocking filter used for error resilience
* @param w width in 8 pixel blocks
* @param h height in 8 pixel blocks
* @param w width in 8 pixel blocks
* @param h height in 8 pixel blocks
*/
static void v_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){
int b_x, b_y;
@ -348,7 +348,7 @@ static void guess_mv(MpegEncContext *s){
s->mv_type = MV_TYPE_16X16;
s->mb_skipped=0;
s->dsp.clear_blocks(s->block[0]);
s->dsp.clear_blocks(s->block[0]);
s->mb_x= mb_x;
s->mb_y= mb_y;
@ -476,7 +476,7 @@ int score_sum=0;
s->mv_type = MV_TYPE_16X16;
s->mb_skipped=0;
s->dsp.clear_blocks(s->block[0]);
s->dsp.clear_blocks(s->block[0]);
s->mb_x= mb_x;
s->mb_y= mb_y;
@ -582,7 +582,7 @@ static int is_intra_more_likely(MpegEncContext *s){
uint8_t *mb_ptr = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize;
uint8_t *last_mb_ptr= s->last_picture.data [0] + mb_x*16 + mb_y*16*s->linesize;
is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr , s->linesize, 16);
is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr , s->linesize, 16);
is_intra_likely -= s->dsp.sad[0](NULL, last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize, 16);
}else{
if(IS_INTRA(s->current_picture.mb_type[mb_xy]))
@ -873,7 +873,7 @@ void ff_er_frame_end(MpegEncContext *s){
s->mv[0][0][1] = s->current_picture.motion_val[0][ mb_x*2 + mb_y*2*s->b8_stride ][1];
}
s->dsp.clear_blocks(s->block[0]);
s->dsp.clear_blocks(s->block[0]);
s->mb_x= mb_x;
s->mb_y= mb_y;

View File

@ -46,7 +46,7 @@ static int Faac_encode_init(AVCodecContext *avctx)
/* check faac version */
faac_cfg = faacEncGetCurrentConfiguration(s->faac_handle);
if (faac_cfg->version != FAAC_CFG_VERSION) {
av_log(avctx, AV_LOG_ERROR, "wrong libfaac version (compiled for: %d, using %d)\n", FAAC_CFG_VERSION, faac_cfg->version);
av_log(avctx, AV_LOG_ERROR, "wrong libfaac version (compiled for: %d, using %d)\n", FAAC_CFG_VERSION, faac_cfg->version);
faacEncClose(s->faac_handle);
return -1;
}

View File

@ -47,8 +47,8 @@ static const char* libfaadname = "libfaad.so.0";
#endif
typedef struct {
void* handle; /* dlopen handle */
void* faac_handle; /* FAAD library handle */
void* handle; /* dlopen handle */
void* faac_handle; /* FAAD library handle */
int frame_size;
int sample_size;
int flags;
@ -57,36 +57,36 @@ typedef struct {
faacDecHandle FAADAPI (*faacDecOpen)(void);
faacDecConfigurationPtr FAADAPI (*faacDecGetCurrentConfiguration)(faacDecHandle hDecoder);
#ifndef FAAD2_VERSION
int FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder,
int FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder,
faacDecConfigurationPtr config);
int FAADAPI (*faacDecInit)(faacDecHandle hDecoder,
unsigned char *buffer,
unsigned long *samplerate,
unsigned long *channels);
int FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer,
int FAADAPI (*faacDecInit)(faacDecHandle hDecoder,
unsigned char *buffer,
unsigned long *samplerate,
unsigned long *channels);
int FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer,
unsigned long SizeOfDecoderSpecificInfo,
unsigned long *samplerate, unsigned long *channels);
int FAADAPI (*faacDecDecode)(faacDecHandle hDecoder,
unsigned char *buffer,
unsigned long *bytesconsumed,
short *sample_buffer,
int FAADAPI (*faacDecDecode)(faacDecHandle hDecoder,
unsigned char *buffer,
unsigned long *bytesconsumed,
short *sample_buffer,
unsigned long *samples);
#else
unsigned char FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder,
unsigned char FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder,
faacDecConfigurationPtr config);
long FAADAPI (*faacDecInit)(faacDecHandle hDecoder,
unsigned char *buffer,
unsigned long buffer_size,
unsigned long *samplerate,
unsigned char *channels);
char FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer,
long FAADAPI (*faacDecInit)(faacDecHandle hDecoder,
unsigned char *buffer,
unsigned long buffer_size,
unsigned long *samplerate,
unsigned char *channels);
char FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer,
unsigned long SizeOfDecoderSpecificInfo,
unsigned long *samplerate, unsigned char *channels);
void *FAADAPI (*faacDecDecode)(faacDecHandle hDecoder,
faacDecFrameInfo *hInfo,
unsigned char *buffer,
unsigned long buffer_size);
char* FAADAPI (*faacDecGetErrorMessage)(unsigned char errcode);
void *FAADAPI (*faacDecDecode)(faacDecHandle hDecoder,
faacDecFrameInfo *hInfo,
unsigned char *buffer,
unsigned long buffer_size);
char* FAADAPI (*faacDecGetErrorMessage)(unsigned char errcode);
#endif
void FAADAPI (*faacDecClose)(faacDecHandle hDecoder);
@ -112,14 +112,14 @@ static int faac_init_mp4(AVCodecContext *avctx)
int r = 0;
if (avctx->extradata)
r = s->faacDecInit2(s->faac_handle, (uint8_t*) avctx->extradata,
avctx->extradata_size,
&samplerate, &channels);
r = s->faacDecInit2(s->faac_handle, (uint8_t*) avctx->extradata,
avctx->extradata_size,
&samplerate, &channels);
// else r = s->faacDecInit(s->faac_handle ... );
if (r < 0)
av_log(avctx, AV_LOG_ERROR, "faacDecInit2 failed r:%d sr:%ld ch:%ld s:%d\n",
r, samplerate, (long)channels, avctx->extradata_size);
av_log(avctx, AV_LOG_ERROR, "faacDecInit2 failed r:%d sr:%ld ch:%ld s:%d\n",
r, samplerate, (long)channels, avctx->extradata_size);
avctx->sample_rate = samplerate;
avctx->channels = channels;
@ -141,7 +141,7 @@ static int faac_decode_frame(AVCodecContext *avctx,
void *out;
#endif
if(buf_size == 0)
return 0;
return 0;
#ifndef FAAD2_VERSION
out = s->faacDecDecode(s->faac_handle,
(unsigned char*)buf,
@ -150,16 +150,16 @@ static int faac_decode_frame(AVCodecContext *avctx,
&samples);
samples *= s->sample_size;
if (data_size)
*data_size = samples;
*data_size = samples;
return (buf_size < (int)bytesconsumed)
? buf_size : (int)bytesconsumed;
? buf_size : (int)bytesconsumed;
#else
out = s->faacDecDecode(s->faac_handle, &frame_info, (unsigned char*)buf, (unsigned long)buf_size);
if (frame_info.error > 0) {
av_log(avctx, AV_LOG_ERROR, "faac: frame decoding failed: %s\n",
s->faacDecGetErrorMessage(frame_info.error));
av_log(avctx, AV_LOG_ERROR, "faac: frame decoding failed: %s\n",
s->faacDecGetErrorMessage(frame_info.error));
return 0;
}
@ -167,10 +167,10 @@ static int faac_decode_frame(AVCodecContext *avctx,
memcpy(data, out, frame_info.samples); // CHECKME - can we cheat this one
if (data_size)
*data_size = frame_info.samples;
*data_size = frame_info.samples;
return (buf_size < (int)frame_info.bytesconsumed)
? buf_size : (int)frame_info.bytesconsumed;
? buf_size : (int)frame_info.bytesconsumed;
#endif
}
@ -196,8 +196,8 @@ static int faac_decode_init(AVCodecContext *avctx)
s->handle = dlopen(libfaadname, RTLD_LAZY);
if (!s->handle)
{
av_log(avctx, AV_LOG_ERROR, "FAAD library: %s could not be opened! \n%s\n",
libfaadname, dlerror());
av_log(avctx, AV_LOG_ERROR, "FAAD library: %s could not be opened! \n%s\n",
libfaadname, dlerror());
return -1;
}
#define dfaac(a, b) \
@ -209,32 +209,32 @@ static int faac_decode_init(AVCodecContext *avctx)
#endif /* CONFIG_FAADBIN */
// resolve all needed function calls
dfaac(Open, (faacDecHandle FAADAPI (*)(void)));
dfaac(GetCurrentConfiguration, (faacDecConfigurationPtr
FAADAPI (*)(faacDecHandle)));
dfaac(Open, (faacDecHandle FAADAPI (*)(void)));
dfaac(GetCurrentConfiguration, (faacDecConfigurationPtr
FAADAPI (*)(faacDecHandle)));
#ifndef FAAD2_VERSION
dfaac(SetConfiguration, (int FAADAPI (*)(faacDecHandle,
faacDecConfigurationPtr)));
dfaac(SetConfiguration, (int FAADAPI (*)(faacDecHandle,
faacDecConfigurationPtr)));
dfaac(Init, (int FAADAPI (*)(faacDecHandle, unsigned char*,
unsigned long*, unsigned long*)));
dfaac(Init, (int FAADAPI (*)(faacDecHandle, unsigned char*,
unsigned long*, unsigned long*)));
dfaac(Init2, (int FAADAPI (*)(faacDecHandle, unsigned char*,
unsigned long, unsigned long*,
unsigned long*)));
unsigned long, unsigned long*,
unsigned long*)));
dfaac(Close, (void FAADAPI (*)(faacDecHandle hDecoder)));
dfaac(Decode, (int FAADAPI (*)(faacDecHandle, unsigned char*,
unsigned long*, short*, unsigned long*)));
dfaac(Decode, (int FAADAPI (*)(faacDecHandle, unsigned char*,
unsigned long*, short*, unsigned long*)));
#else
dfaac(SetConfiguration, (unsigned char FAADAPI (*)(faacDecHandle,
faacDecConfigurationPtr)));
dfaac(Init, (long FAADAPI (*)(faacDecHandle, unsigned char*,
unsigned long, unsigned long*, unsigned char*)));
dfaac(Init2, (char FAADAPI (*)(faacDecHandle, unsigned char*,
unsigned long, unsigned long*,
unsigned char*)));
dfaac(Decode, (void *FAADAPI (*)(faacDecHandle, faacDecFrameInfo*,
unsigned char*, unsigned long)));
dfaac(GetErrorMessage, (char* FAADAPI (*)(unsigned char)));
dfaac(SetConfiguration, (unsigned char FAADAPI (*)(faacDecHandle,
faacDecConfigurationPtr)));
dfaac(Init, (long FAADAPI (*)(faacDecHandle, unsigned char*,
unsigned long, unsigned long*, unsigned char*)));
dfaac(Init2, (char FAADAPI (*)(faacDecHandle, unsigned char*,
unsigned long, unsigned long*,
unsigned char*)));
dfaac(Decode, (void *FAADAPI (*)(faacDecHandle, faacDecFrameInfo*,
unsigned char*, unsigned long)));
dfaac(GetErrorMessage, (char* FAADAPI (*)(unsigned char)));
#endif
#undef dfacc
@ -243,8 +243,8 @@ static int faac_decode_init(AVCodecContext *avctx)
}
if (err) {
dlclose(s->handle);
av_log(avctx, AV_LOG_ERROR, "FAAD library: cannot resolve %s in %s!\n",
err, libfaadname);
av_log(avctx, AV_LOG_ERROR, "FAAD library: cannot resolve %s in %s!\n",
err, libfaadname);
return -1;
}
#endif
@ -260,31 +260,31 @@ static int faac_decode_init(AVCodecContext *avctx)
faac_cfg = s->faacDecGetCurrentConfiguration(s->faac_handle);
if (faac_cfg) {
switch (avctx->bits_per_sample) {
case 8: av_log(avctx, AV_LOG_ERROR, "FAADlib unsupported bps %d\n", avctx->bits_per_sample); break;
default:
case 16:
switch (avctx->bits_per_sample) {
case 8: av_log(avctx, AV_LOG_ERROR, "FAADlib unsupported bps %d\n", avctx->bits_per_sample); break;
default:
case 16:
#ifdef FAAD2_VERSION
faac_cfg->outputFormat = FAAD_FMT_16BIT;
faac_cfg->outputFormat = FAAD_FMT_16BIT;
#endif
s->sample_size = 2;
break;
case 24:
s->sample_size = 2;
break;
case 24:
#ifdef FAAD2_VERSION
faac_cfg->outputFormat = FAAD_FMT_24BIT;
faac_cfg->outputFormat = FAAD_FMT_24BIT;
#endif
s->sample_size = 3;
break;
case 32:
s->sample_size = 3;
break;
case 32:
#ifdef FAAD2_VERSION
faac_cfg->outputFormat = FAAD_FMT_32BIT;
faac_cfg->outputFormat = FAAD_FMT_32BIT;
#endif
s->sample_size = 4;
break;
}
s->sample_size = 4;
break;
}
faac_cfg->defSampleRate = (!avctx->sample_rate) ? 44100 : avctx->sample_rate;
faac_cfg->defObjectType = LC;
faac_cfg->defSampleRate = (!avctx->sample_rate) ? 44100 : avctx->sample_rate;
faac_cfg->defObjectType = LC;
}
s->faacDecSetConfiguration(s->faac_handle, faac_cfg);

View File

@ -204,15 +204,15 @@ void ff_faandct248(DCTELEM * data)
data[8*6 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1));
tmp10 = tmp4 + tmp7;
tmp11 = tmp5 + tmp6;
tmp12 = tmp5 - tmp6;
tmp13 = tmp4 - tmp7;
tmp11 = tmp5 + tmp6;
tmp12 = tmp5 - tmp6;
tmp13 = tmp4 - tmp7;
data[8*1 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11));
data[8*5 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11));
data[8*1 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11));
data[8*5 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11));
z1 = (tmp12 + tmp13)* A1;
data[8*3 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1));
data[8*7 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1));
z1 = (tmp12 + tmp13)* A1;
data[8*3 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1));
data[8*7 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1));
}
}

View File

@ -64,51 +64,51 @@ void init_fdct()
void fdct(block)
short *block;
{
register int i, j;
double s;
double tmp[64];
register int i, j;
double s;
double tmp[64];
for(i = 0; i < 8; i++)
for(j = 0; j < 8; j++)
{
s = 0.0;
for(i = 0; i < 8; i++)
for(j = 0; j < 8; j++)
{
s = 0.0;
/*
* for(k = 0; k < 8; k++)
* s += c[j][k] * block[8 * i + k];
* for(k = 0; k < 8; k++)
* s += c[j][k] * block[8 * i + k];
*/
s += c[j][0] * block[8 * i + 0];
s += c[j][1] * block[8 * i + 1];
s += c[j][2] * block[8 * i + 2];
s += c[j][3] * block[8 * i + 3];
s += c[j][4] * block[8 * i + 4];
s += c[j][5] * block[8 * i + 5];
s += c[j][6] * block[8 * i + 6];
s += c[j][7] * block[8 * i + 7];
s += c[j][0] * block[8 * i + 0];
s += c[j][1] * block[8 * i + 1];
s += c[j][2] * block[8 * i + 2];
s += c[j][3] * block[8 * i + 3];
s += c[j][4] * block[8 * i + 4];
s += c[j][5] * block[8 * i + 5];
s += c[j][6] * block[8 * i + 6];
s += c[j][7] * block[8 * i + 7];
tmp[8 * i + j] = s;
}
tmp[8 * i + j] = s;
}
for(j = 0; j < 8; j++)
for(i = 0; i < 8; i++)
{
s = 0.0;
for(j = 0; j < 8; j++)
for(i = 0; i < 8; i++)
{
s = 0.0;
/*
* for(k = 0; k < 8; k++)
* s += c[i][k] * tmp[8 * k + j];
* for(k = 0; k < 8; k++)
* s += c[i][k] * tmp[8 * k + j];
*/
s += c[i][0] * tmp[8 * 0 + j];
s += c[i][1] * tmp[8 * 1 + j];
s += c[i][2] * tmp[8 * 2 + j];
s += c[i][3] * tmp[8 * 3 + j];
s += c[i][4] * tmp[8 * 4 + j];
s += c[i][5] * tmp[8 * 5 + j];
s += c[i][6] * tmp[8 * 6 + j];
s += c[i][7] * tmp[8 * 7 + j];
s*=8.0;
s += c[i][0] * tmp[8 * 0 + j];
s += c[i][1] * tmp[8 * 1 + j];
s += c[i][2] * tmp[8 * 2 + j];
s += c[i][3] * tmp[8 * 3 + j];
s += c[i][4] * tmp[8 * 4 + j];
s += c[i][5] * tmp[8 * 5 + j];
s += c[i][6] * tmp[8 * 6 + j];
s += c[i][7] * tmp[8 * 7 + j];
s*=8.0;
block[8 * i + j] = (short)floor(s + 0.499999);
block[8 * i + j] = (short)floor(s + 0.499999);
/*
* reason for adding 0.499999 instead of 0.5:
* s is quite often x.5 (at least for i and/or j = 0 or 4)

View File

@ -149,8 +149,8 @@ int ff_fft_init(FFTContext *s, int nbits, int inverse)
void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
{
int ln = s->nbits;
int j, np, np2;
int nblocks, nloops;
int j, np, np2;
int nblocks, nloops;
register FFTComplex *p, *q;
FFTComplex *exptab = s->exptab;
int l;

View File

@ -31,30 +31,30 @@
* instead of simply using 32bit integer arithmetic.
*/
typedef struct Float11 {
int sign; /**< 1bit sign */
int exp; /**< 4bit exponent */
int mant; /**< 6bit mantissa */
int sign; /**< 1bit sign */
int exp; /**< 4bit exponent */
int mant; /**< 6bit mantissa */
} Float11;
static inline Float11* i2f(int16_t i, Float11* f)
{
f->sign = (i < 0);
if (f->sign)
i = -i;
f->exp = av_log2_16bit(i) + !!i;
f->mant = i? (i<<6) >> f->exp :
1<<5;
return f;
f->sign = (i < 0);
if (f->sign)
i = -i;
f->exp = av_log2_16bit(i) + !!i;
f->mant = i? (i<<6) >> f->exp :
1<<5;
return f;
}
static inline int16_t mult(Float11* f1, Float11* f2)
{
int res, exp;
int res, exp;
exp = f1->exp + f2->exp;
res = (((f1->mant * f2->mant) + 0x30) >> 4) << 7;
res = exp > 26 ? res << (exp - 26) : res >> (26 - exp);
return (f1->sign ^ f2->sign) ? -res : res;
exp = f1->exp + f2->exp;
res = (((f1->mant * f2->mant) + 0x30) >> 4) << 7;
res = exp > 26 ? res << (exp - 26) : res >> (26 - exp);
return (f1->sign ^ f2->sign) ? -res : res;
}
static inline int sgn(int value)
@ -63,32 +63,32 @@ static inline int sgn(int value)
}
typedef struct G726Tables {
int bits; /**< bits per sample */
int* quant; /**< quantization table */
int* iquant; /**< inverse quantization table */
int* W; /**< special table #1 ;-) */
int* F; /**< special table #2 */
int bits; /**< bits per sample */
int* quant; /**< quantization table */
int* iquant; /**< inverse quantization table */
int* W; /**< special table #1 ;-) */
int* F; /**< special table #2 */
} G726Tables;
typedef struct G726Context {
G726Tables* tbls; /**< static tables needed for computation */
G726Tables* tbls; /**< static tables needed for computation */
Float11 sr[2]; /**< prev. reconstructed samples */
Float11 dq[6]; /**< prev. difference */
int a[2]; /**< second order predictor coeffs */
int b[6]; /**< sixth order predictor coeffs */
int pk[2]; /**< signs of prev. 2 sez + dq */
Float11 sr[2]; /**< prev. reconstructed samples */
Float11 dq[6]; /**< prev. difference */
int a[2]; /**< second order predictor coeffs */
int b[6]; /**< sixth order predictor coeffs */
int pk[2]; /**< signs of prev. 2 sez + dq */
int ap; /**< scale factor control */
int yu; /**< fast scale factor */
int yl; /**< slow scale factor */
int dms; /**< short average magnitude of F[i] */
int dml; /**< long average magnitude of F[i] */
int td; /**< tone detect */
int ap; /**< scale factor control */
int yu; /**< fast scale factor */
int yl; /**< slow scale factor */
int dms; /**< short average magnitude of F[i] */
int dml; /**< long average magnitude of F[i] */
int td; /**< tone detect */
int se; /**< estimated signal for the next iteration */
int sez; /**< estimated second order prediction */
int y; /**< quantizer scaling factor for the next iteration */
int se; /**< estimated signal for the next iteration */
int sez; /**< estimated second order prediction */
int y; /**< quantizer scaling factor for the next iteration */
} G726Context;
static int quant_tbl16[] = /**< 16kbit/s 2bits per sample */
@ -113,34 +113,34 @@ static int quant_tbl32[] = /**< 32kbit/s 4bits per sample
{ -125, 79, 177, 245, 299, 348, 399, INT_MAX };
static int iquant_tbl32[] =
{ INT_MIN, 4, 135, 213, 273, 323, 373, 425,
425, 373, 323, 273, 213, 135, 4, INT_MIN };
425, 373, 323, 273, 213, 135, 4, INT_MIN };
static int W_tbl32[] =
{ -12, 18, 41, 64, 112, 198, 355, 1122,
1122, 355, 198, 112, 64, 41, 18, -12};
1122, 355, 198, 112, 64, 41, 18, -12};
static int F_tbl32[] =
{ 0, 0, 0, 1, 1, 1, 3, 7, 7, 3, 1, 1, 1, 0, 0, 0 };
static int quant_tbl40[] = /**< 40kbit/s 5bits per sample */
{ -122, -16, 67, 138, 197, 249, 297, 338,
377, 412, 444, 474, 501, 527, 552, INT_MAX };
377, 412, 444, 474, 501, 527, 552, INT_MAX };
static int iquant_tbl40[] =
{ INT_MIN, -66, 28, 104, 169, 224, 274, 318,
358, 395, 429, 459, 488, 514, 539, 566,
566, 539, 514, 488, 459, 429, 395, 358,
318, 274, 224, 169, 104, 28, -66, INT_MIN };
358, 395, 429, 459, 488, 514, 539, 566,
566, 539, 514, 488, 459, 429, 395, 358,
318, 274, 224, 169, 104, 28, -66, INT_MIN };
static int W_tbl40[] =
{ 14, 14, 24, 39, 40, 41, 58, 100,
141, 179, 219, 280, 358, 440, 529, 696,
696, 529, 440, 358, 280, 219, 179, 141,
100, 58, 41, 40, 39, 24, 14, 14 };
141, 179, 219, 280, 358, 440, 529, 696,
696, 529, 440, 358, 280, 219, 179, 141,
100, 58, 41, 40, 39, 24, 14, 14 };
static int F_tbl40[] =
{ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 6,
6, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
6, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
static G726Tables G726Tables_pool[] =
{{ 2, quant_tbl16, iquant_tbl16, W_tbl16, F_tbl16 },
{ 3, quant_tbl24, iquant_tbl24, W_tbl24, F_tbl24 },
{ 4, quant_tbl32, iquant_tbl32, W_tbl32, F_tbl32 },
{ 3, quant_tbl24, iquant_tbl24, W_tbl24, F_tbl24 },
{ 4, quant_tbl32, iquant_tbl32, W_tbl32, F_tbl32 },
{ 5, quant_tbl40, iquant_tbl40, W_tbl40, F_tbl40 }};
@ -207,20 +207,20 @@ static inline int16_t g726_iterate(G726Context* c, int16_t I)
dq0 = dq ? sgn(dq) : 0;
if (tr) {
c->a[0] = 0;
c->a[1] = 0;
c->a[1] = 0;
for (i=0; i<6; i++)
c->b[i] = 0;
c->b[i] = 0;
} else {
/* This is a bit crazy, but it really is +255 not +256 */
fa1 = clip((-c->a[0]*c->pk[0]*pk0)>>5, -256, 255);
/* This is a bit crazy, but it really is +255 not +256 */
fa1 = clip((-c->a[0]*c->pk[0]*pk0)>>5, -256, 255);
c->a[1] += 128*pk0*c->pk[1] + fa1 - (c->a[1]>>7);
c->a[1] = clip(c->a[1], -12288, 12288);
c->a[1] += 128*pk0*c->pk[1] + fa1 - (c->a[1]>>7);
c->a[1] = clip(c->a[1], -12288, 12288);
c->a[0] += 64*3*pk0*c->pk[0] - (c->a[0] >> 8);
c->a[0] = clip(c->a[0], -(15360 - c->a[1]), 15360 - c->a[1]);
c->a[0] = clip(c->a[0], -(15360 - c->a[1]), 15360 - c->a[1]);
for (i=0; i<6; i++)
c->b[i] += 128*dq0*sgn(-c->dq[i].sign) - (c->b[i]>>8);
c->b[i] += 128*dq0*sgn(-c->dq[i].sign) - (c->b[i]>>8);
}
/* Update Dq and Sr and Pk */
@ -323,13 +323,13 @@ static int g726_init(AVCodecContext * avctx)
if (avctx->channels != 1 ||
(avctx->bit_rate != 16000 && avctx->bit_rate != 24000 &&
avctx->bit_rate != 32000 && avctx->bit_rate != 40000)) {
avctx->bit_rate != 32000 && avctx->bit_rate != 40000)) {
av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n");
return -1;
return -1;
}
if (avctx->sample_rate != 8000 && avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL) {
av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n");
return -1;
return -1;
}
g726_reset(&c->c, avctx->bit_rate);
c->code_size = c->c.tbls->bits;
@ -384,12 +384,12 @@ static int g726_decode_frame(AVCodecContext *avctx,
init_get_bits(&gb, buf, buf_size * 8);
if (c->bits_left) {
int s = c->code_size - c->bits_left;;
code = (c->bit_buffer << s) | get_bits(&gb, s);
*samples++ = g726_decode(&c->c, code & mask);
code = (c->bit_buffer << s) | get_bits(&gb, s);
*samples++ = g726_decode(&c->c, code & mask);
}
while (get_bits_count(&gb) + c->code_size <= buf_size*8)
*samples++ = g726_decode(&c->c, get_bits(&gb, c->code_size) & mask);
*samples++ = g726_decode(&c->c, get_bits(&gb, c->code_size) & mask);
c->bits_left = buf_size*8 - get_bits_count(&gb);
c->bit_buffer = get_bits(&gb, c->bits_left);

View File

@ -288,7 +288,7 @@ static inline int get_sr_golomb_flac(GetBitContext *gb, int k, int limit, int es
* read unsigned golomb rice code (shorten).
*/
static inline unsigned int get_ur_golomb_shorten(GetBitContext *gb, int k){
return get_ur_golomb_jpegls(gb, k, INT_MAX, 0);
return get_ur_golomb_jpegls(gb, k, INT_MAX, 0);
}
/**
@ -395,7 +395,7 @@ static inline void set_te_golomb(PutBitContext *pb, int i, int range){
*/
static inline void set_se_golomb(PutBitContext *pb, int i){
// if (i>32767 || i<-32767)
// av_log(NULL,AV_LOG_ERROR,"value out of range %d\n", i);
// av_log(NULL,AV_LOG_ERROR,"value out of range %d\n", i);
#if 0
if(i<=0) i= -2*i;
else i= 2*i-1;

View File

@ -231,11 +231,11 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
(coded_frame_rate_base * (int64_t)s->avctx->time_base.den);
put_bits(&s->pb, 8, temp_ref & 0xff); /* TemporalReference */
put_bits(&s->pb, 1, 1); /* marker */
put_bits(&s->pb, 1, 0); /* h263 id */
put_bits(&s->pb, 1, 0); /* split screen off */
put_bits(&s->pb, 1, 0); /* camera off */
put_bits(&s->pb, 1, 0); /* freeze picture release off */
put_bits(&s->pb, 1, 1); /* marker */
put_bits(&s->pb, 1, 0); /* h263 id */
put_bits(&s->pb, 1, 0); /* split screen off */
put_bits(&s->pb, 1, 0); /* camera off */
put_bits(&s->pb, 1, 0); /* freeze picture release off */
format = h263_get_picture_format(s->width, s->height);
if (!s->h263_plus) {
@ -245,12 +245,12 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
/* By now UMV IS DISABLED ON H.263v1, since the restrictions
of H.263v1 UMV implies to check the predicted MV after
calculation of the current MB to see if we're on the limits */
put_bits(&s->pb, 1, 0); /* Unrestricted Motion Vector: off */
put_bits(&s->pb, 1, 0); /* SAC: off */
put_bits(&s->pb, 1, s->obmc); /* Advanced Prediction */
put_bits(&s->pb, 1, 0); /* only I/P frames, no PB frame */
put_bits(&s->pb, 1, 0); /* Unrestricted Motion Vector: off */
put_bits(&s->pb, 1, 0); /* SAC: off */
put_bits(&s->pb, 1, s->obmc); /* Advanced Prediction */
put_bits(&s->pb, 1, 0); /* only I/P frames, no PB frame */
put_bits(&s->pb, 5, s->qscale);
put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
} else {
int ufep=1;
/* H.263v2 */
@ -286,9 +286,9 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
/* This should be here if PLUSPTYPE */
put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
if (format == 7) {
if (format == 7) {
/* Custom Picture Format (CPFMT) */
aspect_to_info(s, s->avctx->sample_aspect_ratio);
@ -299,7 +299,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
if (s->aspect_ratio_info == FF_ASPECT_EXTENDED){
put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.num);
put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den);
}
}
}
if(s->custom_pcf){
if(ufep){
@ -320,7 +320,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb, 5, s->qscale);
}
put_bits(&s->pb, 1, 0); /* no PEI */
put_bits(&s->pb, 1, 0); /* no PEI */
if(s->h263_slice_structured){
put_bits(&s->pb, 1, 1);
@ -823,8 +823,8 @@ static inline void mpeg4_encode_blocks(MpegEncContext * s, DCTELEM block[6][64],
}
void mpeg4_encode_mb(MpegEncContext * s,
DCTELEM block[6][64],
int motion_x, int motion_y)
DCTELEM block[6][64],
int motion_x, int motion_y)
{
int cbpc, cbpy, pred_x, pred_y;
PutBitContext * const pb2 = s->data_partitioning ? &s->pb2 : &s->pb;
@ -884,7 +884,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
return;
}
put_bits(&s->pb, 1, 0); /* mb coded modb1=0 */
put_bits(&s->pb, 1, 0); /* mb coded modb1=0 */
put_bits(&s->pb, 1, cbp ? 0 : 1); /* modb2 */ //FIXME merge
put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we don't need it :)
if(cbp) put_bits(&s->pb, 6, cbp);
@ -998,7 +998,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
if(pic==NULL || pic->pict_type!=B_TYPE) break;
b_pic= pic->data[0] + offset + 16; //FIXME +16
diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16);
diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16);
if(diff>s->qscale*70){ //FIXME check that 70 is optimal
s->mb_skipped=0;
break;
@ -1021,7 +1021,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
}
}
put_bits(&s->pb, 1, 0); /* mb coded */
put_bits(&s->pb, 1, 0); /* mb coded */
cbpc = cbp & 3;
cbpy = cbp >> 2;
cbpy ^= 0xf;
@ -1121,7 +1121,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
int dc_diff[6]; //dc values with the dc prediction subtracted
int dir[6]; //prediction direction
int zigzag_last_index[6];
uint8_t *scan_table[6];
uint8_t *scan_table[6];
int i;
for(i=0; i<6; i++){
@ -1152,7 +1152,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
intra_MCBPC_code[cbpc]);
} else {
if(s->dquant) cbpc+=8;
put_bits(&s->pb, 1, 0); /* mb coded */
put_bits(&s->pb, 1, 0); /* mb coded */
put_bits(&s->pb,
inter_MCBPC_bits[cbpc + 4],
inter_MCBPC_code[cbpc + 4]);
@ -1185,8 +1185,8 @@ void mpeg4_encode_mb(MpegEncContext * s,
}
void h263_encode_mb(MpegEncContext * s,
DCTELEM block[6][64],
int motion_x, int motion_y)
DCTELEM block[6][64],
int motion_x, int motion_y)
{
int cbpc, cbpy, i, cbp, pred_x, pred_y;
int16_t pred_dc;
@ -1211,7 +1211,7 @@ void h263_encode_mb(MpegEncContext * s,
return;
}
put_bits(&s->pb, 1, 0); /* mb coded */
put_bits(&s->pb, 1, 0); /* mb coded */
cbpc = cbp & 3;
cbpy = cbp >> 2;
@ -1346,14 +1346,14 @@ void h263_encode_mb(MpegEncContext * s,
intra_MCBPC_code[cbpc]);
} else {
if(s->dquant) cbpc+=8;
put_bits(&s->pb, 1, 0); /* mb coded */
put_bits(&s->pb, 1, 0); /* mb coded */
put_bits(&s->pb,
inter_MCBPC_bits[cbpc + 4],
inter_MCBPC_code[cbpc + 4]);
}
if (s->h263_aic) {
/* XXX: currently, we do not try to use ac prediction */
put_bits(&s->pb, 1, 0); /* no AC prediction */
put_bits(&s->pb, 1, 0); /* no AC prediction */
}
cbpy = cbp >> 2;
put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
@ -1796,7 +1796,7 @@ static void init_uni_dc_tab(void)
v = abs(level);
while (v) {
v >>= 1;
size++;
size++;
}
if (level < 0)
@ -2318,14 +2318,14 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
put_bits(&s->pb, 16, 0);
put_bits(&s->pb, 16, 0x120 + vol_number); /* video obj layer */
put_bits(&s->pb, 1, 0); /* random access vol */
put_bits(&s->pb, 8, s->vo_type); /* video obj type indication */
put_bits(&s->pb, 1, 0); /* random access vol */
put_bits(&s->pb, 8, s->vo_type); /* video obj type indication */
if(s->workaround_bugs & FF_BUG_MS) {
put_bits(&s->pb, 1, 0); /* is obj layer id= no */
put_bits(&s->pb, 1, 0); /* is obj layer id= no */
} else {
put_bits(&s->pb, 1, 1); /* is obj layer id= yes */
put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */
put_bits(&s->pb, 3, 1); /* is obj layer priority */
put_bits(&s->pb, 1, 1); /* is obj layer id= yes */
put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */
put_bits(&s->pb, 3, 1); /* is obj layer priority */
}
aspect_to_info(s, s->avctx->sample_aspect_ratio);
@ -2337,37 +2337,37 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
}
if(s->workaround_bugs & FF_BUG_MS) { //
put_bits(&s->pb, 1, 0); /* vol control parameters= no @@@ */
put_bits(&s->pb, 1, 0); /* vol control parameters= no @@@ */
} else {
put_bits(&s->pb, 1, 1); /* vol control parameters= yes */
put_bits(&s->pb, 2, 1); /* chroma format YUV 420/YV12 */
put_bits(&s->pb, 1, 1); /* vol control parameters= yes */
put_bits(&s->pb, 2, 1); /* chroma format YUV 420/YV12 */
put_bits(&s->pb, 1, s->low_delay);
put_bits(&s->pb, 1, 0); /* vbv parameters= no */
put_bits(&s->pb, 1, 0); /* vbv parameters= no */
}
put_bits(&s->pb, 2, RECT_SHAPE); /* vol shape= rectangle */
put_bits(&s->pb, 1, 1); /* marker bit */
put_bits(&s->pb, 2, RECT_SHAPE); /* vol shape= rectangle */
put_bits(&s->pb, 1, 1); /* marker bit */
put_bits(&s->pb, 16, s->avctx->time_base.den);
if (s->time_increment_bits < 1)
s->time_increment_bits = 1;
put_bits(&s->pb, 1, 1); /* marker bit */
put_bits(&s->pb, 1, 0); /* fixed vop rate=no */
put_bits(&s->pb, 1, 1); /* marker bit */
put_bits(&s->pb, 13, s->width); /* vol width */
put_bits(&s->pb, 1, 1); /* marker bit */
put_bits(&s->pb, 13, s->height); /* vol height */
put_bits(&s->pb, 1, 1); /* marker bit */
put_bits(&s->pb, 1, 1); /* marker bit */
put_bits(&s->pb, 1, 0); /* fixed vop rate=no */
put_bits(&s->pb, 1, 1); /* marker bit */
put_bits(&s->pb, 13, s->width); /* vol width */
put_bits(&s->pb, 1, 1); /* marker bit */
put_bits(&s->pb, 13, s->height); /* vol height */
put_bits(&s->pb, 1, 1); /* marker bit */
put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1);
put_bits(&s->pb, 1, 1); /* obmc disable */
put_bits(&s->pb, 1, 1); /* obmc disable */
if (vo_ver_id == 1) {
put_bits(&s->pb, 1, s->vol_sprite_usage); /* sprite enable */
put_bits(&s->pb, 1, s->vol_sprite_usage); /* sprite enable */
}else{
put_bits(&s->pb, 2, s->vol_sprite_usage); /* sprite enable */
put_bits(&s->pb, 2, s->vol_sprite_usage); /* sprite enable */
}
put_bits(&s->pb, 1, 0); /* not 8 bit == false */
put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/
put_bits(&s->pb, 1, 0); /* not 8 bit == false */
put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/
if(s->mpeg_quant){
ff_write_quant_matrix(&s->pb, s->avctx->intra_matrix);
@ -2376,27 +2376,27 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
if (vo_ver_id != 1)
put_bits(&s->pb, 1, s->quarter_sample);
put_bits(&s->pb, 1, 1); /* complexity estimation disable */
put_bits(&s->pb, 1, 1); /* complexity estimation disable */
s->resync_marker= s->rtp_mode;
put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */
put_bits(&s->pb, 1, s->data_partitioning ? 1 : 0);
if(s->data_partitioning){
put_bits(&s->pb, 1, 0); /* no rvlc */
put_bits(&s->pb, 1, 0); /* no rvlc */
}
if (vo_ver_id != 1){
put_bits(&s->pb, 1, 0); /* newpred */
put_bits(&s->pb, 1, 0); /* reduced res vop */
put_bits(&s->pb, 1, 0); /* newpred */
put_bits(&s->pb, 1, 0); /* reduced res vop */
}
put_bits(&s->pb, 1, 0); /* scalability */
put_bits(&s->pb, 1, 0); /* scalability */
ff_mpeg4_stuffing(&s->pb);
/* user data */
if(!(s->flags & CODEC_FLAG_BITEXACT)){
put_bits(&s->pb, 16, 0);
put_bits(&s->pb, 16, 0x1B2); /* user_data */
ff_put_string(&s->pb, LIBAVCODEC_IDENT, 0);
put_bits(&s->pb, 16, 0x1B2); /* user_data */
ff_put_string(&s->pb, LIBAVCODEC_IDENT, 0);
}
}
@ -2421,9 +2421,9 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
//printf("num:%d rate:%d base:%d\n", s->picture_number, s->time_base.den, FRAME_RATE_BASE);
put_bits(&s->pb, 16, 0); /* vop header */
put_bits(&s->pb, 16, VOP_STARTCODE); /* vop header */
put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */
put_bits(&s->pb, 16, 0); /* vop header */
put_bits(&s->pb, 16, VOP_STARTCODE); /* vop header */
put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */
assert(s->time>=0);
time_div= s->time/s->avctx->time_base.den;
@ -2435,15 +2435,15 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb, 1, 0);
put_bits(&s->pb, 1, 1); /* marker */
put_bits(&s->pb, s->time_increment_bits, time_mod); /* time increment */
put_bits(&s->pb, 1, 1); /* marker */
put_bits(&s->pb, 1, 1); /* vop coded */
put_bits(&s->pb, 1, 1); /* marker */
put_bits(&s->pb, s->time_increment_bits, time_mod); /* time increment */
put_bits(&s->pb, 1, 1); /* marker */
put_bits(&s->pb, 1, 1); /* vop coded */
if ( s->pict_type == P_TYPE
|| (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) {
put_bits(&s->pb, 1, s->no_rounding); /* rounding type */
put_bits(&s->pb, 1, s->no_rounding); /* rounding type */
}
put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */
put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */
if(!s->progressive_sequence){
put_bits(&s->pb, 1, s->current_picture_ptr->top_field_first);
put_bits(&s->pb, 1, s->alternate_scan);
@ -2453,9 +2453,9 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb, 5, s->qscale);
if (s->pict_type != I_TYPE)
put_bits(&s->pb, 3, s->f_code); /* fcode_for */
put_bits(&s->pb, 3, s->f_code); /* fcode_for */
if (s->pict_type == B_TYPE)
put_bits(&s->pb, 3, s->b_code); /* fcode_back */
put_bits(&s->pb, 3, s->b_code); /* fcode_back */
// printf("****frame %d\n", picture_number);
}
@ -2492,9 +2492,9 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *di
/* find prediction */
if (n < 4) {
scale = s->y_dc_scale;
scale = s->y_dc_scale;
} else {
scale = s->c_dc_scale;
scale = s->c_dc_scale;
}
if(IS_3IV1)
scale= 8;
@ -2520,10 +2520,10 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *di
}
if (abs(a - b) < abs(b - c)) {
pred = c;
pred = c;
*dir_ptr = 1; /* top */
} else {
pred = a;
pred = a;
*dir_ptr = 0; /* left */
}
/* we assume pred is positive */
@ -2629,11 +2629,11 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
// if(level<-255 || level>255) printf("dc overflow\n");
level+=256;
if (n < 4) {
/* luminance */
put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]);
/* luminance */
put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]);
} else {
/* chrominance */
put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]);
/* chrominance */
put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]);
}
#else
int size, v;
@ -2641,25 +2641,25 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
size = 0;
v = abs(level);
while (v) {
v >>= 1;
size++;
v >>= 1;
size++;
}
if (n < 4) {
/* luminance */
put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]);
/* luminance */
put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]);
} else {
/* chrominance */
put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]);
/* chrominance */
put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]);
}
/* encode remaining bits */
if (size > 0) {
if (level < 0)
level = (-level) ^ ((1 << size) - 1);
put_bits(&s->pb, size, level);
if (size > 8)
put_bits(&s->pb, 1, 1);
if (level < 0)
level = (-level) ^ ((1 << size) - 1);
put_bits(&s->pb, size, level);
if (size > 8)
put_bits(&s->pb, 1, 1);
}
#endif
}
@ -2689,16 +2689,16 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
const int last_index = s->block_last_index[n];
if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
/* mpeg4 based DC predictor */
mpeg4_encode_dc(dc_pb, intra_dc, n);
/* mpeg4 based DC predictor */
mpeg4_encode_dc(dc_pb, intra_dc, n);
if(last_index<1) return;
i = 1;
i = 1;
rl = &rl_intra;
bits_tab= uni_mpeg4_intra_rl_bits;
len_tab = uni_mpeg4_intra_rl_len;
} else {
if(last_index<0) return;
i = 0;
i = 0;
rl = &rl_inter;
bits_tab= uni_mpeg4_inter_rl_bits;
len_tab = uni_mpeg4_inter_rl_len;
@ -2708,9 +2708,9 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
last_non_zero = i - 1;
#if 1
for (; i < last_index; i++) {
int level = block[ scan_table[i] ];
if (level) {
int run = i - last_non_zero - 1;
int level = block[ scan_table[i] ];
if (level) {
int run = i - last_non_zero - 1;
level+=64;
if((level&(~127)) == 0){
const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
@ -2718,11 +2718,11 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
}else{ //ESC3
put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(0<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1);
}
last_non_zero = i;
}
last_non_zero = i;
}
}
/*if(i<=last_index)*/{
int level = block[ scan_table[i] ];
int level = block[ scan_table[i] ];
int run = i - last_non_zero - 1;
level+=64;
if((level&(~127)) == 0){
@ -2734,17 +2734,17 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
}
#else
for (; i <= last_index; i++) {
const int slevel = block[ scan_table[i] ];
if (slevel) {
const int slevel = block[ scan_table[i] ];
if (slevel) {
int level;
int run = i - last_non_zero - 1;
last = (i == last_index);
sign = 0;
level = slevel;
if (level < 0) {
sign = 1;
level = -level;
}
int run = i - last_non_zero - 1;
last = (i == last_index);
sign = 0;
level = slevel;
if (level < 0) {
sign = 1;
level = -level;
}
code = get_rl_index(rl, last, run, level);
put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
if (code == rl->n) {
@ -2786,8 +2786,8 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
} else {
put_bits(ac_pb, 1, sign);
}
last_non_zero = i;
}
last_non_zero = i;
}
}
#endif
}
@ -2802,15 +2802,15 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in
int len=0;
if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
/* mpeg4 based DC predictor */
len += mpeg4_get_dc_length(intra_dc, n);
/* mpeg4 based DC predictor */
len += mpeg4_get_dc_length(intra_dc, n);
if(last_index<1) return len;
i = 1;
i = 1;
rl = &rl_intra;
len_tab = uni_mpeg4_intra_rl_len;
} else {
if(last_index<0) return 0;
i = 0;
i = 0;
rl = &rl_inter;
len_tab = uni_mpeg4_inter_rl_len;
}
@ -2818,9 +2818,9 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in
/* AC coefs */
last_non_zero = i - 1;
for (; i < last_index; i++) {
int level = block[ scan_table[i] ];
if (level) {
int run = i - last_non_zero - 1;
int level = block[ scan_table[i] ];
if (level) {
int run = i - last_non_zero - 1;
level+=64;
if((level&(~127)) == 0){
const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
@ -2828,11 +2828,11 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in
}else{ //ESC3
len += 7+2+1+6+1+12+1;
}
last_non_zero = i;
}
last_non_zero = i;
}
}
/*if(i<=last_index)*/{
int level = block[ scan_table[i] ];
int level = block[ scan_table[i] ];
int run = i - last_non_zero - 1;
level+=64;
if((level&(~127)) == 0){
@ -3251,7 +3251,7 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s)
//FIXME reduced res stuff here
if (s->pict_type != I_TYPE) {
int f_code = get_bits(&s->gb, 3); /* fcode_for */
int f_code = get_bits(&s->gb, 3); /* fcode_for */
if(f_code==0){
av_log(s->avctx, AV_LOG_ERROR, "Error, video packet header damaged (f_code=0)\n");
}
@ -4741,7 +4741,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
if(intra) {
if(s->qscale < s->intra_dc_threshold){
/* DC coef */
/* DC coef */
if(s->partitioned_frame){
level = s->dc_val[0][ s->block_index[n] ];
if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale);
@ -4898,7 +4898,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
}
}
#endif
if (level>0) level= level * qmul + qadd;
if (level>0) level= level * qmul + qadd;
else level= level * qmul - qadd;
if((unsigned)(level + 2048) > 4095){
@ -5014,18 +5014,18 @@ int h263_decode_picture_header(MpegEncContext *s)
}
if (get_bits1(&s->gb) != 0) {
av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n");
return -1; /* h263 id */
return -1; /* h263 id */
}
skip_bits1(&s->gb); /* split screen off */
skip_bits1(&s->gb); /* camera off */
skip_bits1(&s->gb); /* freeze picture release off */
skip_bits1(&s->gb); /* split screen off */
skip_bits1(&s->gb); /* camera off */
skip_bits1(&s->gb); /* freeze picture release off */
format = get_bits(&s->gb, 3);
/*
0 forbidden
1 sub-QCIF
10 QCIF
7 extended PTYPE (PLUSPTYPE)
7 extended PTYPE (PLUSPTYPE)
*/
if (format != 7 && format != 6) {
@ -5042,17 +5042,17 @@ int h263_decode_picture_header(MpegEncContext *s)
if (get_bits1(&s->gb) != 0) {
av_log(s->avctx, AV_LOG_ERROR, "H263 SAC not supported\n");
return -1; /* SAC: off */
return -1; /* SAC: off */
}
s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */
s->unrestricted_mv = s->h263_long_vectors || s->obmc;
if (get_bits1(&s->gb) != 0) {
av_log(s->avctx, AV_LOG_ERROR, "H263 PB frame not supported\n");
return -1; /* not PB frame */
return -1; /* not PB frame */
}
s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
s->width = width;
s->height = height;
@ -5511,17 +5511,17 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
}
s->low_delay= get_bits1(gb);
if(get_bits1(gb)){ /* vbv parameters */
get_bits(gb, 15); /* first_half_bitrate */
skip_bits1(gb); /* marker */
get_bits(gb, 15); /* latter_half_bitrate */
skip_bits1(gb); /* marker */
get_bits(gb, 15); /* first_half_vbv_buffer_size */
skip_bits1(gb); /* marker */
get_bits(gb, 3); /* latter_half_vbv_buffer_size */
get_bits(gb, 11); /* first_half_vbv_occupancy */
skip_bits1(gb); /* marker */
get_bits(gb, 15); /* latter_half_vbv_occupancy */
skip_bits1(gb); /* marker */
get_bits(gb, 15); /* first_half_bitrate */
skip_bits1(gb); /* marker */
get_bits(gb, 15); /* latter_half_bitrate */
skip_bits1(gb); /* marker */
get_bits(gb, 15); /* first_half_vbv_buffer_size */
skip_bits1(gb); /* marker */
get_bits(gb, 3); /* latter_half_vbv_buffer_size */
get_bits(gb, 11); /* first_half_vbv_occupancy */
skip_bits1(gb); /* marker */
get_bits(gb, 15); /* latter_half_vbv_occupancy */
skip_bits1(gb); /* marker */
}
}else{
// set low delay flag only once the smartest? low delay detection won't be overriden
@ -5628,7 +5628,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
/* load custom intra matrix */
if(get_bits1(gb)){
int last=0;
for(i=0; i<64; i++){
for(i=0; i<64; i++){
int j;
v= get_bits(gb, 8);
if(v==0) break;
@ -5641,7 +5641,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
/* replicate last value */
for(; i<64; i++){
int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
s->intra_matrix[j]= last;
s->chroma_intra_matrix[j]= last;
}
@ -5650,7 +5650,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
/* load custom non intra matrix */
if(get_bits1(gb)){
int last=0;
for(i=0; i<64; i++){
for(i=0; i<64; i++){
int j;
v= get_bits(gb, 8);
if(v==0) break;
@ -5663,7 +5663,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
/* replicate last value */
for(; i<64; i++){
int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
s->inter_matrix[j]= last;
s->chroma_inter_matrix[j]= last;
}
@ -5794,7 +5794,7 @@ static int decode_user_data(MpegEncContext *s, GetBitContext *gb){
static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
int time_incr, time_increment;
s->pict_type = get_bits(gb, 2) + I_TYPE; /* pict type: I = 0 , P = 1 */
s->pict_type = get_bits(gb, 2) + I_TYPE; /* pict type: I = 0 , P = 1 */
if(s->pict_type==B_TYPE && s->low_delay && s->vol_control_parameters==0 && !(s->flags & CODEC_FLAG_LOW_DELAY)){
av_log(s->avctx, AV_LOG_ERROR, "low_delay flag incorrectly, clearing it\n");
s->low_delay=0;
@ -5877,9 +5877,9 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == P_TYPE
|| (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) {
/* rounding type for motion estimation */
s->no_rounding = get_bits1(gb);
s->no_rounding = get_bits1(gb);
} else {
s->no_rounding = 0;
s->no_rounding = 0;
}
//FIXME reduced res stuff
@ -5938,7 +5938,7 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
}
if (s->pict_type != I_TYPE) {
s->f_code = get_bits(gb, 3); /* fcode_for */
s->f_code = get_bits(gb, 3); /* fcode_for */
if(s->f_code==0){
av_log(s->avctx, AV_LOG_ERROR, "Error, header damaged or not MPEG4 header (f_code=0)\n");
return -1; // makes no sense to continue, as the MV decoding will break very quickly
@ -6094,15 +6094,15 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
if (get_bits1(&s->gb) != 1) {
av_log(s->avctx, AV_LOG_ERROR, "Bad marker\n");
return -1; /* marker */
return -1; /* marker */
}
if (get_bits1(&s->gb) != 0) {
av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n");
return -1; /* h263 id */
return -1; /* h263 id */
}
skip_bits1(&s->gb); /* split screen off */
skip_bits1(&s->gb); /* camera off */
skip_bits1(&s->gb); /* freeze picture release off */
skip_bits1(&s->gb); /* split screen off */
skip_bits1(&s->gb); /* camera off */
skip_bits1(&s->gb); /* freeze picture release off */
format = get_bits(&s->gb, 3);
if (format != 7) {
@ -6118,23 +6118,23 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
if (get_bits1(&s->gb) != 0) {
av_log(s->avctx, AV_LOG_ERROR, "SAC not supported\n");
return -1; /* SAC: off */
return -1; /* SAC: off */
}
if (get_bits1(&s->gb) != 0) {
s->obmc= 1;
av_log(s->avctx, AV_LOG_ERROR, "Advanced Prediction Mode not supported\n");
// return -1; /* advanced prediction mode: off */
// return -1; /* advanced prediction mode: off */
}
if (get_bits1(&s->gb) != 0) {
av_log(s->avctx, AV_LOG_ERROR, "PB frame mode no supported\n");
return -1; /* PB frame mode */
return -1; /* PB frame mode */
}
/* skip unknown header garbage */
skip_bits(&s->gb, 41);
s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
/* PEI */
while (get_bits1(&s->gb) != 0) {
@ -6208,7 +6208,7 @@ int flv_h263_decode_picture_header(MpegEncContext *s)
if (s->dropable)
s->pict_type = P_TYPE;
skip_bits1(&s->gb); /* deblocking flag */
skip_bits1(&s->gb); /* deblocking flag */
s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
s->h263_plus = 0;

View File

@ -147,15 +147,15 @@ typedef struct H264Context{
MpegEncContext s;
int nal_ref_idc;
int nal_unit_type;
#define NAL_SLICE 1
#define NAL_DPA 2
#define NAL_DPB 3
#define NAL_DPC 4
#define NAL_IDR_SLICE 5
#define NAL_SEI 6
#define NAL_SPS 7
#define NAL_PPS 8
#define NAL_AUD 9
#define NAL_SLICE 1
#define NAL_DPA 2
#define NAL_DPB 3
#define NAL_DPC 4
#define NAL_IDR_SLICE 5
#define NAL_SEI 6
#define NAL_SPS 7
#define NAL_PPS 8
#define NAL_AUD 9
#define NAL_END_SEQUENCE 10
#define NAL_END_STREAM 11
#define NAL_FILLER_DATA 12
@ -1461,7 +1461,7 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c
int i, si, di;
uint8_t *dst;
// src[0]&0x80; //forbidden bit
// src[0]&0x80; //forbidden bit
h->nal_ref_idc= src[0]>>5;
h->nal_unit_type= src[0]&0x1F;
@ -7545,8 +7545,8 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
case NAL_SPS_EXT:
case NAL_AUXILIARY_SLICE:
break;
default:
av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
default:
av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
}
}

View File

@ -15,7 +15,7 @@
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
#define cpuid(index,eax,ebx,ecx,edx)\
__asm __volatile\
("mov %%"REG_b", %%"REG_S"\n\t"\
("mov %%"REG_b", %%"REG_S"\n\t"\
"cpuid\n\t"\
"xchg %%"REG_b", %%"REG_S\
: "=a" (eax), "=S" (ebx),\
@ -89,8 +89,8 @@ int mm_support(void)
edx == 0x48727561 &&
ecx == 0x736c7561) { /* "CentaurHauls" */
/* VIA C3 */
if(ext_caps & (1<<24))
rval |= MM_MMXEXT;
if(ext_caps & (1<<24))
rval |= MM_MMXEXT;
} else if (ebx == 0x69727943 &&
edx == 0x736e4978 &&
ecx == 0x64616574) {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -27,206 +27,206 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line
{
MOVQ_BFE(mm6);
__asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t"
"movq (%1, %3), %%mm2 \n\t"
"movq 1(%1, %3), %%mm3 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t"
"movq (%1, %3), %%mm2 \n\t"
"movq 1(%1, %3), %%mm3 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block)
:"r"((long)line_size)
:REG_a, "memory");
"lea (%3, %3), %%"REG_a" \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t"
"movq (%1, %3), %%mm2 \n\t"
"movq 1(%1, %3), %%mm3 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t"
"movq (%1, %3), %%mm2 \n\t"
"movq 1(%1, %3), %%mm3 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block)
:"r"((long)line_size)
:REG_a, "memory");
}
static void attribute_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{
MOVQ_BFE(mm6);
__asm __volatile(
"testl $1, %0 \n\t"
" jz 1f \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t"
"add %4, %1 \n\t"
"add $8, %2 \n\t"
PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
"movq %%mm4, (%3) \n\t"
"add %5, %3 \n\t"
"decl %0 \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t"
"add %4, %1 \n\t"
"movq (%1), %%mm2 \n\t"
"movq 8(%2), %%mm3 \n\t"
"add %4, %1 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%3) \n\t"
"add %5, %3 \n\t"
"movq %%mm5, (%3) \n\t"
"add %5, %3 \n\t"
"movq (%1), %%mm0 \n\t"
"movq 16(%2), %%mm1 \n\t"
"add %4, %1 \n\t"
"movq (%1), %%mm2 \n\t"
"movq 24(%2), %%mm3 \n\t"
"add %4, %1 \n\t"
"add $32, %2 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%3) \n\t"
"add %5, %3 \n\t"
"movq %%mm5, (%3) \n\t"
"add %5, %3 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
"testl $1, %0 \n\t"
" jz 1f \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t"
"add %4, %1 \n\t"
"add $8, %2 \n\t"
PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
"movq %%mm4, (%3) \n\t"
"add %5, %3 \n\t"
"decl %0 \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t"
"add %4, %1 \n\t"
"movq (%1), %%mm2 \n\t"
"movq 8(%2), %%mm3 \n\t"
"add %4, %1 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%3) \n\t"
"add %5, %3 \n\t"
"movq %%mm5, (%3) \n\t"
"add %5, %3 \n\t"
"movq (%1), %%mm0 \n\t"
"movq 16(%2), %%mm1 \n\t"
"add %4, %1 \n\t"
"movq (%1), %%mm2 \n\t"
"movq 24(%2), %%mm3 \n\t"
"add %4, %1 \n\t"
"add $32, %2 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%3) \n\t"
"add %5, %3 \n\t"
"movq %%mm5, (%3) \n\t"
"add %5, %3 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#else
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
:"memory");
:"S"((long)src1Stride), "D"((long)dstStride)
:"memory");
}
static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
MOVQ_BFE(mm6);
__asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t"
"movq (%1, %3), %%mm2 \n\t"
"movq 1(%1, %3), %%mm3 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t"
"movq 8(%1), %%mm0 \n\t"
"movq 9(%1), %%mm1 \n\t"
"movq 8(%1, %3), %%mm2 \n\t"
"movq 9(%1, %3), %%mm3 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, 8(%2) \n\t"
"movq %%mm5, 8(%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t"
"movq (%1, %3), %%mm2 \n\t"
"movq 1(%1, %3), %%mm3 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t"
"movq 8(%1), %%mm0 \n\t"
"movq 9(%1), %%mm1 \n\t"
"movq 8(%1, %3), %%mm2 \n\t"
"movq 9(%1, %3), %%mm3 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, 8(%2) \n\t"
"movq %%mm5, 8(%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block)
:"r"((long)line_size)
:REG_a, "memory");
"lea (%3, %3), %%"REG_a" \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t"
"movq (%1, %3), %%mm2 \n\t"
"movq 1(%1, %3), %%mm3 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t"
"movq 8(%1), %%mm0 \n\t"
"movq 9(%1), %%mm1 \n\t"
"movq 8(%1, %3), %%mm2 \n\t"
"movq 9(%1, %3), %%mm3 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, 8(%2) \n\t"
"movq %%mm5, 8(%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t"
"movq (%1, %3), %%mm2 \n\t"
"movq 1(%1, %3), %%mm3 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t"
"movq 8(%1), %%mm0 \n\t"
"movq 9(%1), %%mm1 \n\t"
"movq 8(%1, %3), %%mm2 \n\t"
"movq 9(%1, %3), %%mm3 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, 8(%2) \n\t"
"movq %%mm5, 8(%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block)
:"r"((long)line_size)
:REG_a, "memory");
}
static void attribute_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{
MOVQ_BFE(mm6);
__asm __volatile(
"testl $1, %0 \n\t"
" jz 1f \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t"
"movq 8(%1), %%mm2 \n\t"
"movq 8(%2), %%mm3 \n\t"
"add %4, %1 \n\t"
"add $16, %2 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%3) \n\t"
"movq %%mm5, 8(%3) \n\t"
"add %5, %3 \n\t"
"decl %0 \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t"
"movq 8(%1), %%mm2 \n\t"
"movq 8(%2), %%mm3 \n\t"
"add %4, %1 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%3) \n\t"
"movq %%mm5, 8(%3) \n\t"
"add %5, %3 \n\t"
"movq (%1), %%mm0 \n\t"
"movq 16(%2), %%mm1 \n\t"
"movq 8(%1), %%mm2 \n\t"
"movq 24(%2), %%mm3 \n\t"
"add %4, %1 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%3) \n\t"
"movq %%mm5, 8(%3) \n\t"
"add %5, %3 \n\t"
"add $32, %2 \n\t"
"subl $2, %0 \n\t"
"jnz 1b \n\t"
"testl $1, %0 \n\t"
" jz 1f \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t"
"movq 8(%1), %%mm2 \n\t"
"movq 8(%2), %%mm3 \n\t"
"add %4, %1 \n\t"
"add $16, %2 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%3) \n\t"
"movq %%mm5, 8(%3) \n\t"
"add %5, %3 \n\t"
"decl %0 \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t"
"movq 8(%1), %%mm2 \n\t"
"movq 8(%2), %%mm3 \n\t"
"add %4, %1 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%3) \n\t"
"movq %%mm5, 8(%3) \n\t"
"add %5, %3 \n\t"
"movq (%1), %%mm0 \n\t"
"movq 16(%2), %%mm1 \n\t"
"movq 8(%1), %%mm2 \n\t"
"movq 24(%2), %%mm3 \n\t"
"add %4, %1 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%3) \n\t"
"movq %%mm5, 8(%3) \n\t"
"add %5, %3 \n\t"
"add $32, %2 \n\t"
"subl $2, %0 \n\t"
"jnz 1b \n\t"
#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#else
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
:"memory");
:"S"((long)src1Stride), "D"((long)dstStride)
:"memory");
}
static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
MOVQ_BFE(mm6);
__asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"),%%mm2 \n\t"
PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
"movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"),%%mm0 \n\t"
PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
"movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block)
:"r"((long)line_size)
:REG_a, "memory");
"lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"),%%mm2 \n\t"
PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
"movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"),%%mm0 \n\t"
PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
"movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block)
:"r"((long)line_size)
:REG_a, "memory");
}
static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ -234,65 +234,65 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
MOVQ_ZERO(mm7);
SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
__asm __volatile(
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm4 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm4, %%mm5 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm4 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"punpckhbw %%mm7, %%mm5 \n\t"
"paddusw %%mm0, %%mm4 \n\t"
"paddusw %%mm1, %%mm5 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t"
"add %3, %1 \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"punpckhbw %%mm7, %%mm3 \n\t"
"paddusw %%mm2, %%mm0 \n\t"
"paddusw %%mm3, %%mm1 \n\t"
"paddusw %%mm6, %%mm4 \n\t"
"paddusw %%mm6, %%mm5 \n\t"
"paddusw %%mm0, %%mm4 \n\t"
"paddusw %%mm1, %%mm5 \n\t"
"psrlw $2, %%mm4 \n\t"
"psrlw $2, %%mm5 \n\t"
"packuswb %%mm5, %%mm4 \n\t"
"movq %%mm4, (%2, %%"REG_a") \n\t"
"add %3, %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm4 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm4, %%mm5 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm4 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"punpckhbw %%mm7, %%mm5 \n\t"
"paddusw %%mm0, %%mm4 \n\t"
"paddusw %%mm1, %%mm5 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t"
"add %3, %1 \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"punpckhbw %%mm7, %%mm3 \n\t"
"paddusw %%mm2, %%mm0 \n\t"
"paddusw %%mm3, %%mm1 \n\t"
"paddusw %%mm6, %%mm4 \n\t"
"paddusw %%mm6, %%mm5 \n\t"
"paddusw %%mm0, %%mm4 \n\t"
"paddusw %%mm1, %%mm5 \n\t"
"psrlw $2, %%mm4 \n\t"
"psrlw $2, %%mm5 \n\t"
"packuswb %%mm5, %%mm4 \n\t"
"movq %%mm4, (%2, %%"REG_a") \n\t"
"add %3, %%"REG_a" \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
"movq 1(%1, %%"REG_a"), %%mm4 \n\t"
"movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"punpcklbw %%mm7, %%mm4 \n\t"
"punpckhbw %%mm7, %%mm3 \n\t"
"punpckhbw %%mm7, %%mm5 \n\t"
"paddusw %%mm2, %%mm4 \n\t"
"paddusw %%mm3, %%mm5 \n\t"
"paddusw %%mm6, %%mm0 \n\t"
"paddusw %%mm6, %%mm1 \n\t"
"paddusw %%mm4, %%mm0 \n\t"
"paddusw %%mm5, %%mm1 \n\t"
"psrlw $2, %%mm0 \n\t"
"psrlw $2, %%mm1 \n\t"
"packuswb %%mm1, %%mm0 \n\t"
"movq %%mm0, (%2, %%"REG_a") \n\t"
"add %3, %%"REG_a" \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
"movq 1(%1, %%"REG_a"), %%mm4 \n\t"
"movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"punpcklbw %%mm7, %%mm4 \n\t"
"punpckhbw %%mm7, %%mm3 \n\t"
"punpckhbw %%mm7, %%mm5 \n\t"
"paddusw %%mm2, %%mm4 \n\t"
"paddusw %%mm3, %%mm5 \n\t"
"paddusw %%mm6, %%mm0 \n\t"
"paddusw %%mm6, %%mm1 \n\t"
"paddusw %%mm4, %%mm0 \n\t"
"paddusw %%mm5, %%mm1 \n\t"
"psrlw $2, %%mm0 \n\t"
"psrlw $2, %%mm1 \n\t"
"packuswb %%mm1, %%mm0 \n\t"
"movq %%mm0, (%2, %%"REG_a") \n\t"
"add %3, %%"REG_a" \n\t"
"subl $2, %0 \n\t"
"jnz 1b \n\t"
:"+g"(h), "+S"(pixels)
:"D"(block), "r"((long)line_size)
:REG_a, "memory");
"subl $2, %0 \n\t"
"jnz 1b \n\t"
:"+g"(h), "+S"(pixels)
:"D"(block), "r"((long)line_size)
:REG_a, "memory");
}
// avg_pixels
@ -301,16 +301,16 @@ static void attribute_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pi
MOVQ_BFE(mm6);
JUMPALIGN();
do {
__asm __volatile(
"movd %0, %%mm0 \n\t"
"movd %1, %%mm1 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
"movd %%mm2, %0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
__asm __volatile(
"movd %0, %%mm0 \n\t"
"movd %1, %%mm1 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
"movd %%mm2, %0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
}
while (--h);
}
@ -321,16 +321,16 @@ static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_si
MOVQ_BFE(mm6);
JUMPALIGN();
do {
__asm __volatile(
"movq %0, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
"movq %%mm2, %0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
__asm __volatile(
"movq %0, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
"movq %%mm2, %0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
}
while (--h);
}
@ -340,20 +340,20 @@ static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_s
MOVQ_BFE(mm6);
JUMPALIGN();
do {
__asm __volatile(
"movq %0, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
"movq %%mm2, %0 \n\t"
"movq 8%0, %%mm0 \n\t"
"movq 8%1, %%mm1 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
"movq %%mm2, 8%0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
__asm __volatile(
"movq %0, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
"movq %%mm2, %0 \n\t"
"movq 8%0, %%mm0 \n\t"
"movq 8%1, %%mm1 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
"movq %%mm2, 8%0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
}
while (--h);
}
@ -363,18 +363,18 @@ static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line
MOVQ_BFE(mm6);
JUMPALIGN();
do {
__asm __volatile(
"movq %1, %%mm0 \n\t"
"movq 1%1, %%mm1 \n\t"
"movq %0, %%mm3 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
"movq %%mm0, %0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
__asm __volatile(
"movq %1, %%mm0 \n\t"
"movq 1%1, %%mm1 \n\t"
"movq %0, %%mm3 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
"movq %%mm0, %0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
} while (--h);
}
@ -383,17 +383,17 @@ static __attribute__((unused)) void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *
MOVQ_BFE(mm6);
JUMPALIGN();
do {
__asm __volatile(
"movq %1, %%mm0 \n\t"
"movq %2, %%mm1 \n\t"
"movq %0, %%mm3 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
"movq %%mm0, %0 \n\t"
:"+m"(*dst)
:"m"(*src1), "m"(*src2)
:"memory");
dst += dstStride;
__asm __volatile(
"movq %1, %%mm0 \n\t"
"movq %2, %%mm1 \n\t"
"movq %0, %%mm3 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
"movq %%mm0, %0 \n\t"
:"+m"(*dst)
:"m"(*src1), "m"(*src2)
:"memory");
dst += dstStride;
src1 += src1Stride;
src2 += 8;
} while (--h);
@ -404,24 +404,24 @@ static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin
MOVQ_BFE(mm6);
JUMPALIGN();
do {
__asm __volatile(
"movq %1, %%mm0 \n\t"
"movq 1%1, %%mm1 \n\t"
"movq %0, %%mm3 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
"movq %%mm0, %0 \n\t"
"movq 8%1, %%mm0 \n\t"
"movq 9%1, %%mm1 \n\t"
"movq 8%0, %%mm3 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
"movq %%mm0, 8%0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
__asm __volatile(
"movq %1, %%mm0 \n\t"
"movq 1%1, %%mm1 \n\t"
"movq %0, %%mm3 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
"movq %%mm0, %0 \n\t"
"movq 8%1, %%mm0 \n\t"
"movq 9%1, %%mm1 \n\t"
"movq 8%0, %%mm3 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
"movq %%mm0, 8%0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
} while (--h);
}
@ -430,23 +430,23 @@ static __attribute__((unused)) void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t
MOVQ_BFE(mm6);
JUMPALIGN();
do {
__asm __volatile(
"movq %1, %%mm0 \n\t"
"movq %2, %%mm1 \n\t"
"movq %0, %%mm3 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
"movq %%mm0, %0 \n\t"
"movq 8%1, %%mm0 \n\t"
"movq 8%2, %%mm1 \n\t"
"movq 8%0, %%mm3 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
"movq %%mm0, 8%0 \n\t"
:"+m"(*dst)
:"m"(*src1), "m"(*src2)
:"memory");
dst += dstStride;
__asm __volatile(
"movq %1, %%mm0 \n\t"
"movq %2, %%mm1 \n\t"
"movq %0, %%mm3 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
"movq %%mm0, %0 \n\t"
"movq 8%1, %%mm0 \n\t"
"movq 8%2, %%mm1 \n\t"
"movq 8%0, %%mm3 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
"movq %%mm0, 8%0 \n\t"
:"+m"(*dst)
:"m"(*src1), "m"(*src2)
:"memory");
dst += dstStride;
src1 += src1Stride;
src2 += 16;
} while (--h);
@ -456,39 +456,39 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
{
MOVQ_BFE(mm6);
__asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t"
PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
"movq (%2), %%mm3 \n\t"
PAVGB(%%mm3, %%mm4, %%mm0, %%mm6)
"movq (%2, %3), %%mm3 \n\t"
PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
"movq %%mm0, (%2) \n\t"
"movq %%mm1, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t"
PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
"movq (%2), %%mm3 \n\t"
PAVGB(%%mm3, %%mm4, %%mm0, %%mm6)
"movq (%2, %3), %%mm3 \n\t"
PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
"movq %%mm0, (%2) \n\t"
"movq %%mm1, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
"movq (%2), %%mm3 \n\t"
PAVGB(%%mm3, %%mm4, %%mm2, %%mm6)
"movq (%2, %3), %%mm3 \n\t"
PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
"movq %%mm2, (%2) \n\t"
"movq %%mm1, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
"movq (%2), %%mm3 \n\t"
PAVGB(%%mm3, %%mm4, %%mm2, %%mm6)
"movq (%2, %3), %%mm3 \n\t"
PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
"movq %%mm2, (%2) \n\t"
"movq %%mm1, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block)
:"r"((long)line_size)
:REG_a, "memory");
"subl $4, %0 \n\t"
"jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block)
:"r"((long)line_size)
:REG_a, "memory");
}
// this routine is 'slightly' suboptimal but mostly unused
@ -497,73 +497,73 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
MOVQ_ZERO(mm7);
SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
__asm __volatile(
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm4 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm4, %%mm5 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm4 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"punpckhbw %%mm7, %%mm5 \n\t"
"paddusw %%mm0, %%mm4 \n\t"
"paddusw %%mm1, %%mm5 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t"
"add %3, %1 \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"punpckhbw %%mm7, %%mm3 \n\t"
"paddusw %%mm2, %%mm0 \n\t"
"paddusw %%mm3, %%mm1 \n\t"
"paddusw %%mm6, %%mm4 \n\t"
"paddusw %%mm6, %%mm5 \n\t"
"paddusw %%mm0, %%mm4 \n\t"
"paddusw %%mm1, %%mm5 \n\t"
"psrlw $2, %%mm4 \n\t"
"psrlw $2, %%mm5 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t"
"packuswb %%mm5, %%mm4 \n\t"
"pcmpeqd %%mm2, %%mm2 \n\t"
"paddb %%mm2, %%mm2 \n\t"
PAVGB(%%mm3, %%mm4, %%mm5, %%mm2)
"movq %%mm5, (%2, %%"REG_a") \n\t"
"add %3, %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm4 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm4, %%mm5 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm4 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"punpckhbw %%mm7, %%mm5 \n\t"
"paddusw %%mm0, %%mm4 \n\t"
"paddusw %%mm1, %%mm5 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t"
"add %3, %1 \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"punpckhbw %%mm7, %%mm3 \n\t"
"paddusw %%mm2, %%mm0 \n\t"
"paddusw %%mm3, %%mm1 \n\t"
"paddusw %%mm6, %%mm4 \n\t"
"paddusw %%mm6, %%mm5 \n\t"
"paddusw %%mm0, %%mm4 \n\t"
"paddusw %%mm1, %%mm5 \n\t"
"psrlw $2, %%mm4 \n\t"
"psrlw $2, %%mm5 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t"
"packuswb %%mm5, %%mm4 \n\t"
"pcmpeqd %%mm2, %%mm2 \n\t"
"paddb %%mm2, %%mm2 \n\t"
PAVGB(%%mm3, %%mm4, %%mm5, %%mm2)
"movq %%mm5, (%2, %%"REG_a") \n\t"
"add %3, %%"REG_a" \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
"movq 1(%1, %%"REG_a"), %%mm4 \n\t"
"movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"punpcklbw %%mm7, %%mm4 \n\t"
"punpckhbw %%mm7, %%mm3 \n\t"
"punpckhbw %%mm7, %%mm5 \n\t"
"paddusw %%mm2, %%mm4 \n\t"
"paddusw %%mm3, %%mm5 \n\t"
"paddusw %%mm6, %%mm0 \n\t"
"paddusw %%mm6, %%mm1 \n\t"
"paddusw %%mm4, %%mm0 \n\t"
"paddusw %%mm5, %%mm1 \n\t"
"psrlw $2, %%mm0 \n\t"
"psrlw $2, %%mm1 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t"
"packuswb %%mm1, %%mm0 \n\t"
"pcmpeqd %%mm2, %%mm2 \n\t"
"paddb %%mm2, %%mm2 \n\t"
PAVGB(%%mm3, %%mm0, %%mm1, %%mm2)
"movq %%mm1, (%2, %%"REG_a") \n\t"
"add %3, %%"REG_a" \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
"movq 1(%1, %%"REG_a"), %%mm4 \n\t"
"movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"punpcklbw %%mm7, %%mm4 \n\t"
"punpckhbw %%mm7, %%mm3 \n\t"
"punpckhbw %%mm7, %%mm5 \n\t"
"paddusw %%mm2, %%mm4 \n\t"
"paddusw %%mm3, %%mm5 \n\t"
"paddusw %%mm6, %%mm0 \n\t"
"paddusw %%mm6, %%mm1 \n\t"
"paddusw %%mm4, %%mm0 \n\t"
"paddusw %%mm5, %%mm1 \n\t"
"psrlw $2, %%mm0 \n\t"
"psrlw $2, %%mm1 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t"
"packuswb %%mm1, %%mm0 \n\t"
"pcmpeqd %%mm2, %%mm2 \n\t"
"paddb %%mm2, %%mm2 \n\t"
PAVGB(%%mm3, %%mm0, %%mm1, %%mm2)
"movq %%mm1, (%2, %%"REG_a") \n\t"
"add %3, %%"REG_a" \n\t"
"subl $2, %0 \n\t"
"jnz 1b \n\t"
:"+g"(h), "+S"(pixels)
:"D"(block), "r"((long)line_size)
:REG_a, "memory");
"subl $2, %0 \n\t"
"jnz 1b \n\t"
:"+g"(h), "+S"(pixels)
:"D"(block), "r"((long)line_size)
:REG_a, "memory");
}
//FIXME optimize

View File

@ -30,21 +30,21 @@
//
//////////////////////////////////////////////////////////////////////
#define BITS_FRW_ACC 3 //; 2 or 3 for accuracy
#define SHIFT_FRW_COL BITS_FRW_ACC
#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3)
#define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1))
//#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1))
#define BITS_FRW_ACC 3 //; 2 or 3 for accuracy
#define SHIFT_FRW_COL BITS_FRW_ACC
#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3)
#define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1))
//#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1))
//concatenated table, for forward DCT transformation
static const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = {
13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5
27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5
-21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5
13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5
27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5
-21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5
};
static const int16_t ocos_4_16[4] ATTR_ALIGN(8) = {
23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5
23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5
};
static const int64_t fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL;
@ -351,62 +351,62 @@ static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
{
asm volatile(
".macro FDCT_ROW_SSE2_H1 i t \n\t"
"movq \\i(%0), %%xmm2 \n\t"
"movq \\i+8(%0), %%xmm0 \n\t"
"movdqa \\t+32(%1), %%xmm3 \n\t"
"movdqa \\t+48(%1), %%xmm7 \n\t"
"movdqa \\t(%1), %%xmm4 \n\t"
"movdqa \\t+16(%1), %%xmm5 \n\t"
".endm \n\t"
".macro FDCT_ROW_SSE2_H2 i t \n\t"
"movq \\i(%0), %%xmm2 \n\t"
"movq \\i+8(%0), %%xmm0 \n\t"
"movdqa \\t+32(%1), %%xmm3 \n\t"
"movdqa \\t+48(%1), %%xmm7 \n\t"
".endm \n\t"
".macro FDCT_ROW_SSE2 i \n\t"
"movq %%xmm2, %%xmm1 \n\t"
"pshuflw $27, %%xmm0, %%xmm0 \n\t"
"paddsw %%xmm0, %%xmm1 \n\t"
"psubsw %%xmm0, %%xmm2 \n\t"
"punpckldq %%xmm2, %%xmm1 \n\t"
"pshufd $78, %%xmm1, %%xmm2 \n\t"
"pmaddwd %%xmm2, %%xmm3 \n\t"
"pmaddwd %%xmm1, %%xmm7 \n\t"
"pmaddwd %%xmm5, %%xmm2 \n\t"
"pmaddwd %%xmm4, %%xmm1 \n\t"
"paddd %%xmm7, %%xmm3 \n\t"
"paddd %%xmm2, %%xmm1 \n\t"
"paddd %%xmm6, %%xmm3 \n\t"
"paddd %%xmm6, %%xmm1 \n\t"
"psrad %3, %%xmm3 \n\t"
"psrad %3, %%xmm1 \n\t"
"packssdw %%xmm3, %%xmm1 \n\t"
"movdqa %%xmm1, \\i(%4) \n\t"
".endm \n\t"
"movdqa (%2), %%xmm6 \n\t"
"FDCT_ROW_SSE2_H1 0 0 \n\t"
"FDCT_ROW_SSE2 0 \n\t"
"FDCT_ROW_SSE2_H2 64 0 \n\t"
"FDCT_ROW_SSE2 64 \n\t"
".macro FDCT_ROW_SSE2_H1 i t \n\t"
"movq \\i(%0), %%xmm2 \n\t"
"movq \\i+8(%0), %%xmm0 \n\t"
"movdqa \\t+32(%1), %%xmm3 \n\t"
"movdqa \\t+48(%1), %%xmm7 \n\t"
"movdqa \\t(%1), %%xmm4 \n\t"
"movdqa \\t+16(%1), %%xmm5 \n\t"
".endm \n\t"
".macro FDCT_ROW_SSE2_H2 i t \n\t"
"movq \\i(%0), %%xmm2 \n\t"
"movq \\i+8(%0), %%xmm0 \n\t"
"movdqa \\t+32(%1), %%xmm3 \n\t"
"movdqa \\t+48(%1), %%xmm7 \n\t"
".endm \n\t"
".macro FDCT_ROW_SSE2 i \n\t"
"movq %%xmm2, %%xmm1 \n\t"
"pshuflw $27, %%xmm0, %%xmm0 \n\t"
"paddsw %%xmm0, %%xmm1 \n\t"
"psubsw %%xmm0, %%xmm2 \n\t"
"punpckldq %%xmm2, %%xmm1 \n\t"
"pshufd $78, %%xmm1, %%xmm2 \n\t"
"pmaddwd %%xmm2, %%xmm3 \n\t"
"pmaddwd %%xmm1, %%xmm7 \n\t"
"pmaddwd %%xmm5, %%xmm2 \n\t"
"pmaddwd %%xmm4, %%xmm1 \n\t"
"paddd %%xmm7, %%xmm3 \n\t"
"paddd %%xmm2, %%xmm1 \n\t"
"paddd %%xmm6, %%xmm3 \n\t"
"paddd %%xmm6, %%xmm1 \n\t"
"psrad %3, %%xmm3 \n\t"
"psrad %3, %%xmm1 \n\t"
"packssdw %%xmm3, %%xmm1 \n\t"
"movdqa %%xmm1, \\i(%4) \n\t"
".endm \n\t"
"movdqa (%2), %%xmm6 \n\t"
"FDCT_ROW_SSE2_H1 0 0 \n\t"
"FDCT_ROW_SSE2 0 \n\t"
"FDCT_ROW_SSE2_H2 64 0 \n\t"
"FDCT_ROW_SSE2 64 \n\t"
"FDCT_ROW_SSE2_H1 16 64 \n\t"
"FDCT_ROW_SSE2 16 \n\t"
"FDCT_ROW_SSE2_H2 112 64 \n\t"
"FDCT_ROW_SSE2 112 \n\t"
"FDCT_ROW_SSE2_H1 16 64 \n\t"
"FDCT_ROW_SSE2 16 \n\t"
"FDCT_ROW_SSE2_H2 112 64 \n\t"
"FDCT_ROW_SSE2 112 \n\t"
"FDCT_ROW_SSE2_H1 32 128 \n\t"
"FDCT_ROW_SSE2 32 \n\t"
"FDCT_ROW_SSE2_H2 96 128 \n\t"
"FDCT_ROW_SSE2 96 \n\t"
"FDCT_ROW_SSE2_H1 32 128 \n\t"
"FDCT_ROW_SSE2 32 \n\t"
"FDCT_ROW_SSE2_H2 96 128 \n\t"
"FDCT_ROW_SSE2 96 \n\t"
"FDCT_ROW_SSE2_H1 48 192 \n\t"
"FDCT_ROW_SSE2 48 \n\t"
"FDCT_ROW_SSE2_H2 80 192 \n\t"
"FDCT_ROW_SSE2 80 \n\t"
:
: "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
"FDCT_ROW_SSE2_H1 48 192 \n\t"
"FDCT_ROW_SSE2 48 \n\t"
"FDCT_ROW_SSE2_H2 80 192 \n\t"
"FDCT_ROW_SSE2 80 \n\t"
:
: "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
);
}

View File

@ -45,8 +45,8 @@ static void print_v4sf(const char *str, __m128 a)
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z)
{
int ln = s->nbits;
int j, np, np2;
int nblocks, nloops;
int j, np, np2;
int nblocks, nloops;
register FFTComplex *p, *q;
FFTComplex *cptr, *cptr1;
int k;

View File

@ -47,9 +47,9 @@
SUMSUB_BADC( d13, s02, s13, d02 )
#define SBUTTERFLY(a,b,t,n)\
"movq " #a ", " #t " \n\t" /* abcd */\
"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
"punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
"movq " #a ", " #t " \n\t" /* abcd */\
"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
"punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
#define TRANSPOSE4(a,b,c,d,t)\
SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
@ -369,73 +369,73 @@ static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a
/* motion compensation */
#define QPEL_H264V(A,B,C,D,E,F,OP)\
"movd (%0), "#F" \n\t"\
"movq "#C", %%mm6 \n\t"\
"paddw "#D", %%mm6 \n\t"\
"psllw $2, %%mm6 \n\t"\
"psubw "#B", %%mm6 \n\t"\
"psubw "#E", %%mm6 \n\t"\
"pmullw %4, %%mm6 \n\t"\
"add %2, %0 \n\t"\
"punpcklbw %%mm7, "#F" \n\t"\
"paddw %5, "#A" \n\t"\
"paddw "#F", "#A" \n\t"\
"paddw "#A", %%mm6 \n\t"\
"psraw $5, %%mm6 \n\t"\
"packuswb %%mm6, %%mm6 \n\t"\
"movd (%0), "#F" \n\t"\
"movq "#C", %%mm6 \n\t"\
"paddw "#D", %%mm6 \n\t"\
"psllw $2, %%mm6 \n\t"\
"psubw "#B", %%mm6 \n\t"\
"psubw "#E", %%mm6 \n\t"\
"pmullw %4, %%mm6 \n\t"\
"add %2, %0 \n\t"\
"punpcklbw %%mm7, "#F" \n\t"\
"paddw %5, "#A" \n\t"\
"paddw "#F", "#A" \n\t"\
"paddw "#A", %%mm6 \n\t"\
"psraw $5, %%mm6 \n\t"\
"packuswb %%mm6, %%mm6 \n\t"\
OP(%%mm6, (%1), A, d)\
"add %3, %1 \n\t"
"add %3, %1 \n\t"
#define QPEL_H264HV(A,B,C,D,E,F,OF)\
"movd (%0), "#F" \n\t"\
"movq "#C", %%mm6 \n\t"\
"paddw "#D", %%mm6 \n\t"\
"psllw $2, %%mm6 \n\t"\
"psubw "#B", %%mm6 \n\t"\
"psubw "#E", %%mm6 \n\t"\
"pmullw %3, %%mm6 \n\t"\
"add %2, %0 \n\t"\
"punpcklbw %%mm7, "#F" \n\t"\
"paddw "#F", "#A" \n\t"\
"paddw "#A", %%mm6 \n\t"\
"movq %%mm6, "#OF"(%1) \n\t"
"movd (%0), "#F" \n\t"\
"movq "#C", %%mm6 \n\t"\
"paddw "#D", %%mm6 \n\t"\
"psllw $2, %%mm6 \n\t"\
"psubw "#B", %%mm6 \n\t"\
"psubw "#E", %%mm6 \n\t"\
"pmullw %3, %%mm6 \n\t"\
"add %2, %0 \n\t"\
"punpcklbw %%mm7, "#F" \n\t"\
"paddw "#F", "#A" \n\t"\
"paddw "#A", %%mm6 \n\t"\
"movq %%mm6, "#OF"(%1) \n\t"
#define QPEL_H264(OPNAME, OP, MMX)\
static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
int h=4;\
\
asm volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"movq %5, %%mm4 \n\t"\
"movq %6, %%mm5 \n\t"\
"1: \n\t"\
"movd -1(%0), %%mm1 \n\t"\
"movd (%0), %%mm2 \n\t"\
"movd 1(%0), %%mm3 \n\t"\
"movd 2(%0), %%mm0 \n\t"\
"punpcklbw %%mm7, %%mm1 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpcklbw %%mm7, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"paddw %%mm0, %%mm1 \n\t"\
"paddw %%mm3, %%mm2 \n\t"\
"movd -2(%0), %%mm0 \n\t"\
"movd 3(%0), %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpcklbw %%mm7, %%mm3 \n\t"\
"paddw %%mm3, %%mm0 \n\t"\
"psllw $2, %%mm2 \n\t"\
"psubw %%mm1, %%mm2 \n\t"\
"pmullw %%mm4, %%mm2 \n\t"\
"paddw %%mm5, %%mm0 \n\t"\
"paddw %%mm2, %%mm0 \n\t"\
"psraw $5, %%mm0 \n\t"\
"packuswb %%mm0, %%mm0 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
"movq %5, %%mm4 \n\t"\
"movq %6, %%mm5 \n\t"\
"1: \n\t"\
"movd -1(%0), %%mm1 \n\t"\
"movd (%0), %%mm2 \n\t"\
"movd 1(%0), %%mm3 \n\t"\
"movd 2(%0), %%mm0 \n\t"\
"punpcklbw %%mm7, %%mm1 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpcklbw %%mm7, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"paddw %%mm0, %%mm1 \n\t"\
"paddw %%mm3, %%mm2 \n\t"\
"movd -2(%0), %%mm0 \n\t"\
"movd 3(%0), %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpcklbw %%mm7, %%mm3 \n\t"\
"paddw %%mm3, %%mm0 \n\t"\
"psllw $2, %%mm2 \n\t"\
"psubw %%mm1, %%mm2 \n\t"\
"pmullw %%mm4, %%mm2 \n\t"\
"paddw %%mm5, %%mm0 \n\t"\
"paddw %%mm2, %%mm0 \n\t"\
"psraw $5, %%mm0 \n\t"\
"packuswb %%mm0, %%mm0 \n\t"\
OP(%%mm0, (%1),%%mm6, d)\
"add %3, %0 \n\t"\
"add %4, %1 \n\t"\
"decl %2 \n\t"\
" jnz 1b \n\t"\
"add %3, %0 \n\t"\
"add %4, %1 \n\t"\
"decl %2 \n\t"\
" jnz 1b \n\t"\
: "+a"(src), "+c"(dst), "+m"(h)\
: "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
: "memory"\
@ -444,22 +444,22 @@ static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i
static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
src -= 2*srcStride;\
asm volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"movd (%0), %%mm0 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm1 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm2 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm3 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm4 \n\t"\
"add %2, %0 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpcklbw %%mm7, %%mm1 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpcklbw %%mm7, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm4 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
"movd (%0), %%mm0 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm1 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm2 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm3 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm4 \n\t"\
"add %2, %0 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpcklbw %%mm7, %%mm1 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpcklbw %%mm7, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm4 \n\t"\
QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
@ -476,22 +476,22 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
src -= 2*srcStride+2;\
while(w--){\
asm volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"movd (%0), %%mm0 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm1 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm2 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm3 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm4 \n\t"\
"add %2, %0 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpcklbw %%mm7, %%mm1 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpcklbw %%mm7, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm4 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
"movd (%0), %%mm0 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm1 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm2 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm3 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm4 \n\t"\
"add %2, %0 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpcklbw %%mm7, %%mm1 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpcklbw %%mm7, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm4 \n\t"\
QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\
QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\
QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\
@ -506,28 +506,28 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
}\
tmp -= 3*4;\
asm volatile(\
"movq %4, %%mm6 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
"paddw 10(%0), %%mm0 \n\t"\
"movq 2(%0), %%mm1 \n\t"\
"paddw 8(%0), %%mm1 \n\t"\
"movq 4(%0), %%mm2 \n\t"\
"paddw 6(%0), %%mm2 \n\t"\
"psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\
"psraw $2, %%mm0 \n\t"/*(a-b)/4 */\
"psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\
"paddsw %%mm2, %%mm0 \n\t"\
"psraw $2, %%mm0 \n\t"/*((a-b)/4-b)/4 */\
"paddw %%mm6, %%mm2 \n\t"\
"paddw %%mm2, %%mm0 \n\t"\
"psraw $6, %%mm0 \n\t"\
"packuswb %%mm0, %%mm0 \n\t"\
"movq %4, %%mm6 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
"paddw 10(%0), %%mm0 \n\t"\
"movq 2(%0), %%mm1 \n\t"\
"paddw 8(%0), %%mm1 \n\t"\
"movq 4(%0), %%mm2 \n\t"\
"paddw 6(%0), %%mm2 \n\t"\
"psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\
"psraw $2, %%mm0 \n\t"/*(a-b)/4 */\
"psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\
"paddsw %%mm2, %%mm0 \n\t"\
"psraw $2, %%mm0 \n\t"/*((a-b)/4-b)/4 */\
"paddw %%mm6, %%mm2 \n\t"\
"paddw %%mm2, %%mm0 \n\t"\
"psraw $6, %%mm0 \n\t"\
"packuswb %%mm0, %%mm0 \n\t"\
OP(%%mm0, (%1),%%mm7, d)\
"add $24, %0 \n\t"\
"add %3, %1 \n\t"\
"decl %2 \n\t"\
" jnz 1b \n\t"\
"add $24, %0 \n\t"\
"add %3, %1 \n\t"\
"decl %2 \n\t"\
" jnz 1b \n\t"\
: "+a"(tmp), "+c"(dst), "+m"(h)\
: "S"((long)dstStride), "m"(ff_pw_32)\
: "memory"\
@ -537,54 +537,54 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
int h=8;\
asm volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"movq %5, %%mm6 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
"movq 1(%0), %%mm2 \n\t"\
"movq %%mm0, %%mm1 \n\t"\
"movq %%mm2, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpckhbw %%mm7, %%mm1 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpckhbw %%mm7, %%mm3 \n\t"\
"paddw %%mm2, %%mm0 \n\t"\
"paddw %%mm3, %%mm1 \n\t"\
"psllw $2, %%mm0 \n\t"\
"psllw $2, %%mm1 \n\t"\
"movq -1(%0), %%mm2 \n\t"\
"movq 2(%0), %%mm4 \n\t"\
"movq %%mm2, %%mm3 \n\t"\
"movq %%mm4, %%mm5 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpckhbw %%mm7, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm4 \n\t"\
"punpckhbw %%mm7, %%mm5 \n\t"\
"paddw %%mm4, %%mm2 \n\t"\
"paddw %%mm3, %%mm5 \n\t"\
"psubw %%mm2, %%mm0 \n\t"\
"psubw %%mm5, %%mm1 \n\t"\
"pmullw %%mm6, %%mm0 \n\t"\
"pmullw %%mm6, %%mm1 \n\t"\
"movd -2(%0), %%mm2 \n\t"\
"movd 7(%0), %%mm5 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpcklbw %%mm7, %%mm5 \n\t"\
"paddw %%mm3, %%mm2 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\
"movq %6, %%mm5 \n\t"\
"paddw %%mm5, %%mm2 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\
"paddw %%mm2, %%mm0 \n\t"\
"paddw %%mm4, %%mm1 \n\t"\
"psraw $5, %%mm0 \n\t"\
"psraw $5, %%mm1 \n\t"\
"packuswb %%mm1, %%mm0 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
"movq %5, %%mm6 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
"movq 1(%0), %%mm2 \n\t"\
"movq %%mm0, %%mm1 \n\t"\
"movq %%mm2, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpckhbw %%mm7, %%mm1 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpckhbw %%mm7, %%mm3 \n\t"\
"paddw %%mm2, %%mm0 \n\t"\
"paddw %%mm3, %%mm1 \n\t"\
"psllw $2, %%mm0 \n\t"\
"psllw $2, %%mm1 \n\t"\
"movq -1(%0), %%mm2 \n\t"\
"movq 2(%0), %%mm4 \n\t"\
"movq %%mm2, %%mm3 \n\t"\
"movq %%mm4, %%mm5 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpckhbw %%mm7, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm4 \n\t"\
"punpckhbw %%mm7, %%mm5 \n\t"\
"paddw %%mm4, %%mm2 \n\t"\
"paddw %%mm3, %%mm5 \n\t"\
"psubw %%mm2, %%mm0 \n\t"\
"psubw %%mm5, %%mm1 \n\t"\
"pmullw %%mm6, %%mm0 \n\t"\
"pmullw %%mm6, %%mm1 \n\t"\
"movd -2(%0), %%mm2 \n\t"\
"movd 7(%0), %%mm5 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpcklbw %%mm7, %%mm5 \n\t"\
"paddw %%mm3, %%mm2 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\
"movq %6, %%mm5 \n\t"\
"paddw %%mm5, %%mm2 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\
"paddw %%mm2, %%mm0 \n\t"\
"paddw %%mm4, %%mm1 \n\t"\
"psraw $5, %%mm0 \n\t"\
"psraw $5, %%mm1 \n\t"\
"packuswb %%mm1, %%mm0 \n\t"\
OP(%%mm0, (%1),%%mm5, q)\
"add %3, %0 \n\t"\
"add %4, %1 \n\t"\
"decl %2 \n\t"\
" jnz 1b \n\t"\
"add %3, %0 \n\t"\
"add %4, %1 \n\t"\
"decl %2 \n\t"\
" jnz 1b \n\t"\
: "+a"(src), "+c"(dst), "+m"(h)\
: "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
: "memory"\
@ -597,22 +597,22 @@ static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i
\
while(h--){\
asm volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"movd (%0), %%mm0 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm1 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm2 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm3 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm4 \n\t"\
"add %2, %0 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpcklbw %%mm7, %%mm1 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpcklbw %%mm7, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm4 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
"movd (%0), %%mm0 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm1 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm2 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm3 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm4 \n\t"\
"add %2, %0 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpcklbw %%mm7, %%mm1 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpcklbw %%mm7, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm4 \n\t"\
QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
@ -636,22 +636,22 @@ static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
src -= 2*srcStride+2;\
while(w--){\
asm volatile(\
"pxor %%mm7, %%mm7 \n\t"\
"movd (%0), %%mm0 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm1 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm2 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm3 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm4 \n\t"\
"add %2, %0 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpcklbw %%mm7, %%mm1 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpcklbw %%mm7, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm4 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
"movd (%0), %%mm0 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm1 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm2 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm3 \n\t"\
"add %2, %0 \n\t"\
"movd (%0), %%mm4 \n\t"\
"add %2, %0 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpcklbw %%mm7, %%mm1 \n\t"\
"punpcklbw %%mm7, %%mm2 \n\t"\
"punpcklbw %%mm7, %%mm3 \n\t"\
"punpcklbw %%mm7, %%mm4 \n\t"\
QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*4)\
QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*4)\
QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*4)\
@ -670,42 +670,42 @@ static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
}\
tmp -= 4*4;\
asm volatile(\
"movq %4, %%mm6 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
"movq 8(%0), %%mm3 \n\t"\
"movq 2(%0), %%mm1 \n\t"\
"movq 10(%0), %%mm4 \n\t"\
"paddw %%mm4, %%mm0 \n\t"\
"paddw %%mm3, %%mm1 \n\t"\
"paddw 18(%0), %%mm3 \n\t"\
"paddw 16(%0), %%mm4 \n\t"\
"movq 4(%0), %%mm2 \n\t"\
"movq 12(%0), %%mm5 \n\t"\
"paddw 6(%0), %%mm2 \n\t"\
"paddw 14(%0), %%mm5 \n\t"\
"psubw %%mm1, %%mm0 \n\t"\
"psubw %%mm4, %%mm3 \n\t"\
"psraw $2, %%mm0 \n\t"\
"psraw $2, %%mm3 \n\t"\
"psubw %%mm1, %%mm0 \n\t"\
"psubw %%mm4, %%mm3 \n\t"\
"paddsw %%mm2, %%mm0 \n\t"\
"paddsw %%mm5, %%mm3 \n\t"\
"psraw $2, %%mm0 \n\t"\
"psraw $2, %%mm3 \n\t"\
"paddw %%mm6, %%mm2 \n\t"\
"paddw %%mm6, %%mm5 \n\t"\
"paddw %%mm2, %%mm0 \n\t"\
"paddw %%mm5, %%mm3 \n\t"\
"psraw $6, %%mm0 \n\t"\
"psraw $6, %%mm3 \n\t"\
"packuswb %%mm3, %%mm0 \n\t"\
"movq %4, %%mm6 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
"movq 8(%0), %%mm3 \n\t"\
"movq 2(%0), %%mm1 \n\t"\
"movq 10(%0), %%mm4 \n\t"\
"paddw %%mm4, %%mm0 \n\t"\
"paddw %%mm3, %%mm1 \n\t"\
"paddw 18(%0), %%mm3 \n\t"\
"paddw 16(%0), %%mm4 \n\t"\
"movq 4(%0), %%mm2 \n\t"\
"movq 12(%0), %%mm5 \n\t"\
"paddw 6(%0), %%mm2 \n\t"\
"paddw 14(%0), %%mm5 \n\t"\
"psubw %%mm1, %%mm0 \n\t"\
"psubw %%mm4, %%mm3 \n\t"\
"psraw $2, %%mm0 \n\t"\
"psraw $2, %%mm3 \n\t"\
"psubw %%mm1, %%mm0 \n\t"\
"psubw %%mm4, %%mm3 \n\t"\
"paddsw %%mm2, %%mm0 \n\t"\
"paddsw %%mm5, %%mm3 \n\t"\
"psraw $2, %%mm0 \n\t"\
"psraw $2, %%mm3 \n\t"\
"paddw %%mm6, %%mm2 \n\t"\
"paddw %%mm6, %%mm5 \n\t"\
"paddw %%mm2, %%mm0 \n\t"\
"paddw %%mm5, %%mm3 \n\t"\
"psraw $6, %%mm0 \n\t"\
"psraw $6, %%mm3 \n\t"\
"packuswb %%mm3, %%mm0 \n\t"\
OP(%%mm0, (%1),%%mm7, q)\
"add $32, %0 \n\t"\
"add %3, %1 \n\t"\
"decl %2 \n\t"\
" jnz 1b \n\t"\
"add $32, %0 \n\t"\
"add %3, %1 \n\t"\
"decl %2 \n\t"\
" jnz 1b \n\t"\
: "+a"(tmp), "+c"(dst), "+m"(h)\
: "S"((long)dstStride), "m"(ff_pw_32)\
: "memory"\
@ -862,15 +862,15 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *
}\
#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
#define AVG_3DNOW_OP(a,b,temp, size) \
"mov" #size " " #b ", " #temp " \n\t"\
"pavgusb " #temp ", " #a " \n\t"\
"mov" #size " " #a ", " #b " \n\t"
"mov" #size " " #b ", " #temp " \n\t"\
"pavgusb " #temp ", " #a " \n\t"\
"mov" #size " " #a ", " #b " \n\t"
#define AVG_MMX2_OP(a,b,temp, size) \
"mov" #size " " #b ", " #temp " \n\t"\
"pavgb " #temp ", " #a " \n\t"\
"mov" #size " " #a ", " #b " \n\t"
"mov" #size " " #b ", " #temp " \n\t"\
"pavgb " #temp ", " #a " \n\t"\
"mov" #size " " #a ", " #b " \n\t"
QPEL_H264(put_, PUT_OP, 3dnow)
QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)

View File

@ -38,7 +38,7 @@
#if 0
/* C row IDCT - its just here to document the MMXEXT and MMX versions */
static inline void idct_row (int16_t * row, int offset,
int16_t * table, int32_t * rounder)
int16_t * table, int32_t * rounder)
{
int C1, C2, C3, C4, C5, C6, C7;
int a0, a1, a2, a3, b0, b1, b2, b3;
@ -77,241 +77,241 @@ static inline void idct_row (int16_t * row, int offset,
/* MMXEXT row IDCT */
#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \
c4, c6, c4, c6, \
c1, c3, -c1, -c5, \
c5, c7, c3, -c7, \
c4, -c6, c4, -c6, \
-c4, c2, c4, -c2, \
c5, -c1, c3, -c1, \
c7, c3, c7, -c5 }
#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \
c4, c6, c4, c6, \
c1, c3, -c1, -c5, \
c5, c7, c3, -c7, \
c4, -c6, c4, -c6, \
-c4, c2, c4, -c2, \
c5, -c1, c3, -c1, \
c7, c3, c7, -c5 }
static inline void mmxext_row_head (int16_t * row, int offset, const int16_t * table)
{
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
}
static inline void mmxext_row (const int16_t * table, const int32_t * rounder)
{
movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1
pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6
movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1
pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6
pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2
pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5
pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2
pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5
movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5
pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3
movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5
pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3
paddd_m2r (*rounder, mm3); // mm3 += rounder
pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7
paddd_m2r (*rounder, mm3); // mm3 += rounder
pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7
pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6
paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6
paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3
movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3
movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7
paddd_r2r (mm7, mm1); // mm1 = b1 b0
pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7
paddd_r2r (mm7, mm1); // mm1 = b1 b0
paddd_m2r (*rounder, mm0); // mm0 += rounder
psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
paddd_m2r (*rounder, mm0); // mm0 += rounder
psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
paddd_r2r (mm6, mm5); // mm5 = b3 b2
movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder
paddd_r2r (mm6, mm5); // mm5 = b3 b2
movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder
paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder
paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder
}
static inline void mmxext_row_tail (int16_t * row, int store)
{
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
/* slot */
movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
}
static inline void mmxext_row_mid (int16_t * row, int store,
int offset, const int16_t * table)
int offset, const int16_t * table)
{
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
}
/* MMX row IDCT */
#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
c4, c6, -c4, -c2, \
c1, c3, c3, -c7, \
c5, c7, -c1, -c5, \
c4, -c6, c4, -c2, \
-c4, c2, c4, -c6, \
c5, -c1, c7, -c5, \
c7, c3, c3, -c1 }
#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
c4, c6, -c4, -c2, \
c1, c3, c3, -c7, \
c5, c7, -c1, -c5, \
c4, -c6, c4, -c2, \
-c4, c2, c4, -c6, \
c5, -c1, c7, -c5, \
c7, c3, c3, -c1 }
static inline void mmx_row_head (int16_t * row, int offset, const int16_t * table)
{
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
}
static inline void mmx_row (const int16_t * table, const int32_t * rounder)
{
pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6
punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1
pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6
punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1
pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2
punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5
pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2
punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5
movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5
pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3
movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5
pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3
paddd_m2r (*rounder, mm3); // mm3 += rounder
pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7
paddd_m2r (*rounder, mm3); // mm3 += rounder
pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7
pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6
paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6
paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3
movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3
movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7
paddd_r2r (mm7, mm1); // mm1 = b1 b0
pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7
paddd_r2r (mm7, mm1); // mm1 = b1 b0
paddd_m2r (*rounder, mm0); // mm0 += rounder
psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
paddd_m2r (*rounder, mm0); // mm0 += rounder
psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
paddd_r2r (mm6, mm5); // mm5 = b3 b2
movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder
paddd_r2r (mm6, mm5); // mm5 = b3 b2
movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder
paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder
paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder
}
static inline void mmx_row_tail (int16_t * row, int store)
{
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5
pslld_i2r (16, mm7); // mm7 = y7 0 y5 0
pslld_i2r (16, mm7); // mm7 = y7 0 y5 0
psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4
psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4
por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4
por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4
/* slot */
movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
}
static inline void mmx_row_mid (int16_t * row, int store,
int offset, const int16_t * table)
int offset, const int16_t * table)
{
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5
movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5
punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4
punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4
movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
pslld_i2r (16, mm1); // mm1 = y7 0 y5 0
movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
pslld_i2r (16, mm1); // mm1 = y7 0 y5 0
movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4
movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4
movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
}
@ -403,132 +403,132 @@ static inline void idct_col (int16_t * col, int offset)
/* column code adapted from peter gubanov */
/* http://www.elecard.com/peter/idct.shtml */
movq_m2r (*_T1, mm0); // mm0 = T1
movq_m2r (*_T1, mm0); // mm0 = T1
movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1
movq_r2r (mm0, mm2); // mm2 = T1
movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1
movq_r2r (mm0, mm2); // mm2 = T1
movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7
pmulhw_r2r (mm1, mm0); // mm0 = T1*x1
movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7
pmulhw_r2r (mm1, mm0); // mm0 = T1*x1
movq_m2r (*_T3, mm5); // mm5 = T3
pmulhw_r2r (mm4, mm2); // mm2 = T1*x7
movq_m2r (*_T3, mm5); // mm5 = T3
pmulhw_r2r (mm4, mm2); // mm2 = T1*x7
movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5
movq_r2r (mm5, mm7); // mm7 = T3-1
movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5
movq_r2r (mm5, mm7); // mm7 = T3-1
movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3
psubsw_r2r (mm4, mm0); // mm0 = v17
movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3
psubsw_r2r (mm4, mm0); // mm0 = v17
movq_m2r (*_T2, mm4); // mm4 = T2
pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3
movq_m2r (*_T2, mm4); // mm4 = T2
pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3
paddsw_r2r (mm2, mm1); // mm1 = u17
pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5
paddsw_r2r (mm2, mm1); // mm1 = u17
pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5
/* slot */
movq_r2r (mm4, mm2); // mm2 = T2
paddsw_r2r (mm3, mm5); // mm5 = T3*x3
movq_r2r (mm4, mm2); // mm2 = T2
paddsw_r2r (mm3, mm5); // mm5 = T3*x3
pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2
paddsw_r2r (mm6, mm7); // mm7 = T3*x5
paddsw_r2r (mm6, mm7); // mm7 = T3*x5
psubsw_r2r (mm6, mm5); // mm5 = v35
paddsw_r2r (mm3, mm7); // mm7 = u35
psubsw_r2r (mm6, mm5); // mm5 = v35
paddsw_r2r (mm3, mm7); // mm7 = u35
movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6
movq_r2r (mm0, mm6); // mm6 = v17
movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6
movq_r2r (mm0, mm6); // mm6 = v17
pmulhw_r2r (mm3, mm2); // mm2 = T2*x6
psubsw_r2r (mm5, mm0); // mm0 = b3
pmulhw_r2r (mm3, mm2); // mm2 = T2*x6
psubsw_r2r (mm5, mm0); // mm0 = b3
psubsw_r2r (mm3, mm4); // mm4 = v26
paddsw_r2r (mm6, mm5); // mm5 = v12
psubsw_r2r (mm3, mm4); // mm4 = v26
paddsw_r2r (mm6, mm5); // mm5 = v12
movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0
movq_r2r (mm1, mm6); // mm6 = u17
movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0
movq_r2r (mm1, mm6); // mm6 = u17
paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26
paddsw_r2r (mm7, mm6); // mm6 = b0
paddsw_r2r (mm7, mm6); // mm6 = b0
psubsw_r2r (mm7, mm1); // mm1 = u12
movq_r2r (mm1, mm7); // mm7 = u12
psubsw_r2r (mm7, mm1); // mm1 = u12
movq_r2r (mm1, mm7); // mm7 = u12
movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0
paddsw_r2r (mm5, mm1); // mm1 = u12+v12
movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0
paddsw_r2r (mm5, mm1); // mm1 = u12+v12
movq_m2r (*_C4, mm0); // mm0 = C4/2
psubsw_r2r (mm5, mm7); // mm7 = u12-v12
movq_m2r (*_C4, mm0); // mm0 = C4/2
psubsw_r2r (mm5, mm7); // mm7 = u12-v12
movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1
pmulhw_r2r (mm0, mm1); // mm1 = b1/2
movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1
pmulhw_r2r (mm0, mm1); // mm1 = b1/2
movq_r2r (mm4, mm6); // mm6 = v26
pmulhw_r2r (mm0, mm7); // mm7 = b2/2
movq_r2r (mm4, mm6); // mm6 = v26
pmulhw_r2r (mm0, mm7); // mm7 = b2/2
movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4
movq_r2r (mm3, mm0); // mm0 = x0
movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4
movq_r2r (mm3, mm0); // mm0 = x0
psubsw_r2r (mm5, mm3); // mm3 = v04
paddsw_r2r (mm5, mm0); // mm0 = u04
psubsw_r2r (mm5, mm3); // mm3 = v04
paddsw_r2r (mm5, mm0); // mm0 = u04
paddsw_r2r (mm3, mm4); // mm4 = a1
movq_r2r (mm0, mm5); // mm5 = u04
paddsw_r2r (mm3, mm4); // mm4 = a1
movq_r2r (mm0, mm5); // mm5 = u04
psubsw_r2r (mm6, mm3); // mm3 = a2
paddsw_r2r (mm2, mm5); // mm5 = a0
psubsw_r2r (mm6, mm3); // mm3 = a2
paddsw_r2r (mm2, mm5); // mm5 = a0
paddsw_r2r (mm1, mm1); // mm1 = b1
psubsw_r2r (mm2, mm0); // mm0 = a3
paddsw_r2r (mm1, mm1); // mm1 = b1
psubsw_r2r (mm2, mm0); // mm0 = a3
paddsw_r2r (mm7, mm7); // mm7 = b2
movq_r2r (mm3, mm2); // mm2 = a2
paddsw_r2r (mm7, mm7); // mm7 = b2
movq_r2r (mm3, mm2); // mm2 = a2
movq_r2r (mm4, mm6); // mm6 = a1
paddsw_r2r (mm7, mm3); // mm3 = a2+b2
movq_r2r (mm4, mm6); // mm6 = a1
paddsw_r2r (mm7, mm3); // mm3 = a2+b2
psraw_i2r (COL_SHIFT, mm3); // mm3 = y2
paddsw_r2r (mm1, mm4); // mm4 = a1+b1
psraw_i2r (COL_SHIFT, mm3); // mm3 = y2
paddsw_r2r (mm1, mm4); // mm4 = a1+b1
psraw_i2r (COL_SHIFT, mm4); // mm4 = y1
psubsw_r2r (mm1, mm6); // mm6 = a1-b1
psraw_i2r (COL_SHIFT, mm4); // mm4 = y1
psubsw_r2r (mm1, mm6); // mm6 = a1-b1
movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0
psubsw_r2r (mm7, mm2); // mm2 = a2-b2
movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0
psubsw_r2r (mm7, mm2); // mm2 = a2-b2
psraw_i2r (COL_SHIFT, mm6); // mm6 = y6
movq_r2r (mm5, mm7); // mm7 = a0
psraw_i2r (COL_SHIFT, mm6); // mm6 = y6
movq_r2r (mm5, mm7); // mm7 = a0
movq_r2m (mm4, *(col+offset+1*8)); // save y1
psraw_i2r (COL_SHIFT, mm2); // mm2 = y5
movq_r2m (mm4, *(col+offset+1*8)); // save y1
psraw_i2r (COL_SHIFT, mm2); // mm2 = y5
movq_r2m (mm3, *(col+offset+2*8)); // save y2
paddsw_r2r (mm1, mm5); // mm5 = a0+b0
movq_r2m (mm3, *(col+offset+2*8)); // save y2
paddsw_r2r (mm1, mm5); // mm5 = a0+b0
movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3
psubsw_r2r (mm1, mm7); // mm7 = a0-b0
movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3
psubsw_r2r (mm1, mm7); // mm7 = a0-b0
psraw_i2r (COL_SHIFT, mm5); // mm5 = y0
movq_r2r (mm0, mm3); // mm3 = a3
psraw_i2r (COL_SHIFT, mm5); // mm5 = y0
movq_r2r (mm0, mm3); // mm3 = a3
movq_r2m (mm2, *(col+offset+5*8)); // save y5
psubsw_r2r (mm4, mm3); // mm3 = a3-b3
movq_r2m (mm2, *(col+offset+5*8)); // save y5
psubsw_r2r (mm4, mm3); // mm3 = a3-b3
psraw_i2r (COL_SHIFT, mm7); // mm7 = y7
paddsw_r2r (mm0, mm4); // mm4 = a3+b3
psraw_i2r (COL_SHIFT, mm7); // mm7 = y7
paddsw_r2r (mm0, mm4); // mm4 = a3+b3
movq_r2m (mm5, *(col+offset+0*8)); // save y0
psraw_i2r (COL_SHIFT, mm3); // mm3 = y4
movq_r2m (mm5, *(col+offset+0*8)); // save y0
psraw_i2r (COL_SHIFT, mm3); // mm3 = y4
movq_r2m (mm6, *(col+offset+6*8)); // save y6
psraw_i2r (COL_SHIFT, mm4); // mm4 = y3
movq_r2m (mm6, *(col+offset+6*8)); // save y6
psraw_i2r (COL_SHIFT, mm4); // mm4 = y3
movq_r2m (mm7, *(col+offset+7*8)); // save y7
movq_r2m (mm7, *(col+offset+7*8)); // save y7
movq_r2m (mm3, *(col+offset+4*8)); // save y4
movq_r2m (mm3, *(col+offset+4*8)); // save y4
movq_r2m (mm4, *(col+offset+3*8)); // save y3
movq_r2m (mm4, *(col+offset+3*8)); // save y3
#undef T1
#undef T2
@ -540,61 +540,61 @@ static const int32_t rounder0[] ATTR_ALIGN(8) =
rounder ((1 << (COL_SHIFT - 1)) - 0.5);
static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
static const int32_t rounder1[] ATTR_ALIGN(8) =
rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
static const int32_t rounder7[] ATTR_ALIGN(8) =
rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
static const int32_t rounder2[] ATTR_ALIGN(8) =
rounder (0.60355339059); /* C2 * (C6+C2)/2 */
rounder (0.60355339059); /* C2 * (C6+C2)/2 */
static const int32_t rounder6[] ATTR_ALIGN(8) =
rounder (-0.25); /* C2 * (C6-C2)/2 */
rounder (-0.25); /* C2 * (C6-C2)/2 */
static const int32_t rounder3[] ATTR_ALIGN(8) =
rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
static const int32_t rounder5[] ATTR_ALIGN(8) =
rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
#undef COL_SHIFT
#undef ROW_SHIFT
#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
void idct (int16_t * block) \
{ \
static const int16_t table04[] ATTR_ALIGN(16) = \
table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
static const int16_t table17[] ATTR_ALIGN(16) = \
table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
static const int16_t table26[] ATTR_ALIGN(16) = \
table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
static const int16_t table35[] ATTR_ALIGN(16) = \
table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
\
idct_row_head (block, 0*8, table04); \
idct_row (table04, rounder0); \
idct_row_mid (block, 0*8, 4*8, table04); \
idct_row (table04, rounder4); \
idct_row_mid (block, 4*8, 1*8, table17); \
idct_row (table17, rounder1); \
idct_row_mid (block, 1*8, 7*8, table17); \
idct_row (table17, rounder7); \
idct_row_mid (block, 7*8, 2*8, table26); \
idct_row (table26, rounder2); \
idct_row_mid (block, 2*8, 6*8, table26); \
idct_row (table26, rounder6); \
idct_row_mid (block, 6*8, 3*8, table35); \
idct_row (table35, rounder3); \
idct_row_mid (block, 3*8, 5*8, table35); \
idct_row (table35, rounder5); \
idct_row_tail (block, 5*8); \
\
idct_col (block, 0); \
idct_col (block, 4); \
#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
void idct (int16_t * block) \
{ \
static const int16_t table04[] ATTR_ALIGN(16) = \
table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
static const int16_t table17[] ATTR_ALIGN(16) = \
table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
static const int16_t table26[] ATTR_ALIGN(16) = \
table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
static const int16_t table35[] ATTR_ALIGN(16) = \
table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
\
idct_row_head (block, 0*8, table04); \
idct_row (table04, rounder0); \
idct_row_mid (block, 0*8, 4*8, table04); \
idct_row (table04, rounder4); \
idct_row_mid (block, 4*8, 1*8, table17); \
idct_row (table17, rounder1); \
idct_row_mid (block, 1*8, 7*8, table17); \
idct_row (table17, rounder7); \
idct_row_mid (block, 7*8, 2*8, table26); \
idct_row (table26, rounder2); \
idct_row_mid (block, 2*8, 6*8, table26); \
idct_row (table26, rounder6); \
idct_row_mid (block, 6*8, 3*8, table35); \
idct_row (table35, rounder3); \
idct_row_mid (block, 3*8, 5*8, table35); \
idct_row (table35, rounder5); \
idct_row_tail (block, 5*8); \
\
idct_col (block, 0); \
idct_col (block, 4); \
}
void ff_mmx_idct(DCTELEM *block);
void ff_mmxext_idct(DCTELEM *block);
declare_idct (ff_mmxext_idct, mmxext_table,
mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
declare_idct (ff_mmx_idct, mmx_table,
mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)

View File

@ -27,257 +27,257 @@
* values by ULL, lest they be truncated by the compiler)
*/
typedef union {
long long q; /* Quadword (64-bit) value */
unsigned long long uq; /* Unsigned Quadword */
int d[2]; /* 2 Doubleword (32-bit) values */
unsigned int ud[2]; /* 2 Unsigned Doubleword */
short w[4]; /* 4 Word (16-bit) values */
unsigned short uw[4]; /* 4 Unsigned Word */
char b[8]; /* 8 Byte (8-bit) values */
unsigned char ub[8]; /* 8 Unsigned Byte */
float s[2]; /* Single-precision (32-bit) value */
} mmx_t; /* On an 8-byte (64-bit) boundary */
typedef union {
long long q; /* Quadword (64-bit) value */
unsigned long long uq; /* Unsigned Quadword */
int d[2]; /* 2 Doubleword (32-bit) values */
unsigned int ud[2]; /* 2 Unsigned Doubleword */
short w[4]; /* 4 Word (16-bit) values */
unsigned short uw[4]; /* 4 Unsigned Word */
char b[8]; /* 8 Byte (8-bit) values */
unsigned char ub[8]; /* 8 Unsigned Byte */
float s[2]; /* Single-precision (32-bit) value */
} mmx_t; /* On an 8-byte (64-bit) boundary */
#define mmx_i2r(op,imm,reg) \
__asm__ __volatile__ (#op " %0, %%" #reg \
: /* nothing */ \
: "i" (imm) )
#define mmx_i2r(op,imm,reg) \
__asm__ __volatile__ (#op " %0, %%" #reg \
: /* nothing */ \
: "i" (imm) )
#define mmx_m2r(op,mem,reg) \
__asm__ __volatile__ (#op " %0, %%" #reg \
: /* nothing */ \
: "m" (mem))
#define mmx_m2r(op,mem,reg) \
__asm__ __volatile__ (#op " %0, %%" #reg \
: /* nothing */ \
: "m" (mem))
#define mmx_r2m(op,reg,mem) \
__asm__ __volatile__ (#op " %%" #reg ", %0" \
: "=m" (mem) \
: /* nothing */ )
#define mmx_r2m(op,reg,mem) \
__asm__ __volatile__ (#op " %%" #reg ", %0" \
: "=m" (mem) \
: /* nothing */ )
#define mmx_r2r(op,regs,regd) \
__asm__ __volatile__ (#op " %" #regs ", %" #regd)
#define mmx_r2r(op,regs,regd) \
__asm__ __volatile__ (#op " %" #regs ", %" #regd)
#define emms() __asm__ __volatile__ ("emms")
#define emms() __asm__ __volatile__ ("emms")
#define movd_m2r(var,reg) mmx_m2r (movd, var, reg)
#define movd_r2m(reg,var) mmx_r2m (movd, reg, var)
#define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd)
#define movd_m2r(var,reg) mmx_m2r (movd, var, reg)
#define movd_r2m(reg,var) mmx_r2m (movd, reg, var)
#define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd)
#define movq_m2r(var,reg) mmx_m2r (movq, var, reg)
#define movq_r2m(reg,var) mmx_r2m (movq, reg, var)
#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd)
#define movq_m2r(var,reg) mmx_m2r (movq, var, reg)
#define movq_r2m(reg,var) mmx_r2m (movq, reg, var)
#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd)
#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg)
#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd)
#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg)
#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd)
#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg)
#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd)
#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg)
#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd)
#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg)
#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd)
#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg)
#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd)
#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg)
#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd)
#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg)
#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd)
#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg)
#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd)
#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg)
#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd)
#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg)
#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd)
#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg)
#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd)
#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg)
#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd)
#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg)
#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd)
#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg)
#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd)
#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg)
#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd)
#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg)
#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd)
#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg)
#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd)
#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg)
#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd)
#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg)
#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd)
#define pand_m2r(var,reg) mmx_m2r (pand, var, reg)
#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd)
#define pand_m2r(var,reg) mmx_m2r (pand, var, reg)
#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd)
#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg)
#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd)
#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg)
#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd)
#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg)
#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd)
#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg)
#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd)
#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg)
#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd)
#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg)
#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd)
#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg)
#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd)
#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg)
#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd)
#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg)
#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd)
#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg)
#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd)
#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg)
#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd)
#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg)
#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd)
#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg)
#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd)
#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg)
#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd)
#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg)
#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd)
#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg)
#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd)
#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg)
#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd)
#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg)
#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd)
#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg)
#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd)
#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg)
#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd)
#define por_m2r(var,reg) mmx_m2r (por, var, reg)
#define por_r2r(regs,regd) mmx_r2r (por, regs, regd)
#define por_m2r(var,reg) mmx_m2r (por, var, reg)
#define por_r2r(regs,regd) mmx_r2r (por, regs, regd)
#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg)
#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg)
#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd)
#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg)
#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg)
#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd)
#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg)
#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg)
#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd)
#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg)
#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg)
#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd)
#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg)
#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg)
#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd)
#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg)
#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg)
#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd)
#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg)
#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg)
#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd)
#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg)
#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg)
#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd)
#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg)
#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg)
#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd)
#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg)
#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg)
#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd)
#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg)
#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg)
#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd)
#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg)
#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg)
#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd)
#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg)
#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg)
#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd)
#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg)
#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg)
#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd)
#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg)
#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg)
#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd)
#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg)
#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg)
#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd)
#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg)
#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd)
#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg)
#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd)
#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg)
#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd)
#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg)
#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd)
#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg)
#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd)
#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg)
#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd)
#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg)
#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd)
#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg)
#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd)
#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg)
#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd)
#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg)
#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd)
#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg)
#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd)
#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg)
#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd)
#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg)
#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd)
#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg)
#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd)
#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg)
#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd)
#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg)
#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd)
#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg)
#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd)
#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg)
#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd)
#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg)
#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd)
#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg)
#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd)
#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg)
#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd)
#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg)
#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd)
#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg)
#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd)
#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg)
#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd)
#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg)
#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd)
#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg)
#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd)
#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg)
#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd)
#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg)
#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd)
/* 3DNOW extensions */
#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg)
#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd)
#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg)
#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd)
/* AMD MMX extensions - also available in intel SSE */
#define mmx_m2ri(op,mem,reg,imm) \
#define mmx_m2ri(op,mem,reg,imm) \
__asm__ __volatile__ (#op " %1, %0, %%" #reg \
: /* nothing */ \
: "X" (mem), "X" (imm))
#define mmx_r2ri(op,regs,regd,imm) \
#define mmx_r2ri(op,regs,regd,imm) \
__asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
: /* nothing */ \
: "X" (imm) )
#define mmx_fetch(mem,hint) \
__asm__ __volatile__ ("prefetch" #hint " %0" \
: /* nothing */ \
: "X" (mem))
#define mmx_fetch(mem,hint) \
__asm__ __volatile__ ("prefetch" #hint " %0" \
: /* nothing */ \
: "X" (mem))
#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg)
#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg)
#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var)
#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var)
#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg)
#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd)
#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg)
#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd)
#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg)
#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd)
#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg)
#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd)
#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm)
#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm)
#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm)
#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm)
#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg)
#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd)
#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg)
#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd)
#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg)
#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd)
#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg)
#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd)
#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg)
#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd)
#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg)
#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd)
#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg)
#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd)
#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg)
#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd)
#define pmovmskb(mmreg,reg) \
__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
#define pmovmskb(mmreg,reg) \
__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg)
#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd)
#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg)
#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd)
#define prefetcht0(mem) mmx_fetch (mem, t0)
#define prefetcht1(mem) mmx_fetch (mem, t1)
#define prefetcht2(mem) mmx_fetch (mem, t2)
#define prefetchnta(mem) mmx_fetch (mem, nta)
#define prefetcht0(mem) mmx_fetch (mem, t0)
#define prefetcht1(mem) mmx_fetch (mem, t1)
#define prefetcht2(mem) mmx_fetch (mem, t2)
#define prefetchnta(mem) mmx_fetch (mem, nta)
#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg)
#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd)
#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg)
#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd)
#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm)
#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm)
#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm)
#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm)
#define sfence() __asm__ __volatile__ ("sfence\n\t")
#define sfence() __asm__ __volatile__ ("sfence\n\t")
/* SSE2 */
#define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm)
#define pshufhw_r2r(regs,regd,imm) mmx_r2ri(pshufhw, regs, regd, imm)
#define pshuflw_m2r(var,reg,imm) mmx_m2ri(pshuflw, var, reg, imm)
#define pshuflw_r2r(regs,regd,imm) mmx_r2ri(pshuflw, regs, regd, imm)
#define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm)
#define pshufhw_r2r(regs,regd,imm) mmx_r2ri(pshufhw, regs, regd, imm)
#define pshuflw_m2r(var,reg,imm) mmx_m2ri(pshuflw, var, reg, imm)
#define pshuflw_r2r(regs,regd,imm) mmx_r2ri(pshuflw, regs, regd, imm)
#define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm)
#define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm)
#define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg)
#define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var)
#define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd)
#define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg)
#define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var)
#define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd)
#define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg)
#define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var)
#define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd)
#define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg)
#define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var)
#define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd)
#define pmullw_r2m(reg,var) mmx_r2m (pmullw, reg, var)
#define pmullw_r2m(reg,var) mmx_r2m (pmullw, reg, var)
#define pslldq_i2r(imm,reg) mmx_i2r (pslldq, imm, reg)
#define psrldq_i2r(imm,reg) mmx_i2r (psrldq, imm, reg)
#define pslldq_i2r(imm,reg) mmx_i2r (pslldq, imm, reg)
#define psrldq_i2r(imm,reg) mmx_i2r (psrldq, imm, reg)
#define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd)
#define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd)
#define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd)
#define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd)
#endif /* AVCODEC_I386MMX_H */

View File

@ -34,33 +34,33 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
long len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t"
"movq (%2, %%"REG_a"), %%mm4 \n\t"
"add %3, %%"REG_a" \n\t"
"psubusb %%mm0, %%mm2 \n\t"
"psubusb %%mm4, %%mm0 \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t"
"movq (%2, %%"REG_a"), %%mm5 \n\t"
"psubusb %%mm1, %%mm3 \n\t"
"psubusb %%mm5, %%mm1 \n\t"
"por %%mm2, %%mm0 \n\t"
"por %%mm1, %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm3, %%mm2 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm3 \n\t"
"punpckhbw %%mm7, %%mm2 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"paddw %%mm3, %%mm2 \n\t"
"paddw %%mm2, %%mm0 \n\t"
"paddw %%mm0, %%mm6 \n\t"
"add %3, %%"REG_a" \n\t"
" js 1b \n\t"
".balign 16 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t"
"movq (%2, %%"REG_a"), %%mm4 \n\t"
"add %3, %%"REG_a" \n\t"
"psubusb %%mm0, %%mm2 \n\t"
"psubusb %%mm4, %%mm0 \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t"
"movq (%2, %%"REG_a"), %%mm5 \n\t"
"psubusb %%mm1, %%mm3 \n\t"
"psubusb %%mm5, %%mm1 \n\t"
"por %%mm2, %%mm0 \n\t"
"por %%mm1, %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm3, %%mm2 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm3 \n\t"
"punpckhbw %%mm7, %%mm2 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"paddw %%mm3, %%mm2 \n\t"
"paddw %%mm2, %%mm0 \n\t"
"paddw %%mm0, %%mm6 \n\t"
"add %3, %%"REG_a" \n\t"
" js 1b \n\t"
: "+a" (len)
: "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride)
);
@ -70,19 +70,19 @@ static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
long len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t"
"psadbw %%mm2, %%mm0 \n\t"
"add %3, %%"REG_a" \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t"
"psadbw %%mm1, %%mm3 \n\t"
"paddw %%mm3, %%mm0 \n\t"
"paddw %%mm0, %%mm6 \n\t"
"add %3, %%"REG_a" \n\t"
" js 1b \n\t"
".balign 16 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t"
"psadbw %%mm2, %%mm0 \n\t"
"add %3, %%"REG_a" \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t"
"psadbw %%mm1, %%mm3 \n\t"
"paddw %%mm3, %%mm0 \n\t"
"paddw %%mm0, %%mm6 \n\t"
"add %3, %%"REG_a" \n\t"
" js 1b \n\t"
: "+a" (len)
: "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride)
);
@ -92,23 +92,23 @@ static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, in
{
long len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t"
"pavgb %%mm2, %%mm0 \n\t"
"movq (%3, %%"REG_a"), %%mm2 \n\t"
"psadbw %%mm2, %%mm0 \n\t"
"add %4, %%"REG_a" \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t"
"pavgb %%mm1, %%mm3 \n\t"
"movq (%3, %%"REG_a"), %%mm1 \n\t"
"psadbw %%mm1, %%mm3 \n\t"
"paddw %%mm3, %%mm0 \n\t"
"paddw %%mm0, %%mm6 \n\t"
"add %4, %%"REG_a" \n\t"
" js 1b \n\t"
".balign 16 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t"
"pavgb %%mm2, %%mm0 \n\t"
"movq (%3, %%"REG_a"), %%mm2 \n\t"
"psadbw %%mm2, %%mm0 \n\t"
"add %4, %%"REG_a" \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t"
"pavgb %%mm1, %%mm3 \n\t"
"movq (%3, %%"REG_a"), %%mm1 \n\t"
"psadbw %%mm1, %%mm3 \n\t"
"paddw %%mm3, %%mm0 \n\t"
"paddw %%mm0, %%mm6 \n\t"
"add %4, %%"REG_a" \n\t"
" js 1b \n\t"
: "+a" (len)
: "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride)
);
@ -118,34 +118,34 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ //FIXME reuse src
long len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"movq "MANGLE(bone)", %%mm5 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t"
"movq 1(%1, %%"REG_a"), %%mm1 \n\t"
"movq 1(%2, %%"REG_a"), %%mm3 \n\t"
"pavgb %%mm2, %%mm0 \n\t"
"pavgb %%mm1, %%mm3 \n\t"
"psubusb %%mm5, %%mm3 \n\t"
"pavgb %%mm3, %%mm0 \n\t"
"movq (%3, %%"REG_a"), %%mm2 \n\t"
"psadbw %%mm2, %%mm0 \n\t"
"add %4, %%"REG_a" \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
"movq 1(%2, %%"REG_a"), %%mm4 \n\t"
"pavgb %%mm3, %%mm1 \n\t"
"pavgb %%mm4, %%mm2 \n\t"
"psubusb %%mm5, %%mm2 \n\t"
"pavgb %%mm1, %%mm2 \n\t"
"movq (%3, %%"REG_a"), %%mm1 \n\t"
"psadbw %%mm1, %%mm2 \n\t"
"paddw %%mm2, %%mm0 \n\t"
"paddw %%mm0, %%mm6 \n\t"
"add %4, %%"REG_a" \n\t"
" js 1b \n\t"
".balign 16 \n\t"
"movq "MANGLE(bone)", %%mm5 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t"
"movq 1(%1, %%"REG_a"), %%mm1 \n\t"
"movq 1(%2, %%"REG_a"), %%mm3 \n\t"
"pavgb %%mm2, %%mm0 \n\t"
"pavgb %%mm1, %%mm3 \n\t"
"psubusb %%mm5, %%mm3 \n\t"
"pavgb %%mm3, %%mm0 \n\t"
"movq (%3, %%"REG_a"), %%mm2 \n\t"
"psadbw %%mm2, %%mm0 \n\t"
"add %4, %%"REG_a" \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
"movq 1(%2, %%"REG_a"), %%mm4 \n\t"
"pavgb %%mm3, %%mm1 \n\t"
"pavgb %%mm4, %%mm2 \n\t"
"psubusb %%mm5, %%mm2 \n\t"
"pavgb %%mm1, %%mm2 \n\t"
"movq (%3, %%"REG_a"), %%mm1 \n\t"
"psadbw %%mm1, %%mm2 \n\t"
"paddw %%mm2, %%mm0 \n\t"
"paddw %%mm0, %%mm6 \n\t"
"add %4, %%"REG_a" \n\t"
" js 1b \n\t"
: "+a" (len)
: "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride)
);
@ -155,35 +155,35 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int
{
long len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t"
"punpckhbw %%mm7, %%mm2 \n\t"
"punpckhbw %%mm7, %%mm3 \n\t"
"paddw %%mm0, %%mm1 \n\t"
"paddw %%mm2, %%mm3 \n\t"
"movq (%3, %%"REG_a"), %%mm4 \n\t"
"movq (%3, %%"REG_a"), %%mm2 \n\t"
"paddw %%mm5, %%mm1 \n\t"
"paddw %%mm5, %%mm3 \n\t"
"psrlw $1, %%mm1 \n\t"
"psrlw $1, %%mm3 \n\t"
"packuswb %%mm3, %%mm1 \n\t"
"psubusb %%mm1, %%mm4 \n\t"
"psubusb %%mm2, %%mm1 \n\t"
"por %%mm4, %%mm1 \n\t"
"movq %%mm1, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"paddw %%mm0, %%mm6 \n\t"
"add %4, %%"REG_a" \n\t"
" js 1b \n\t"
".balign 16 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t"
"punpckhbw %%mm7, %%mm2 \n\t"
"punpckhbw %%mm7, %%mm3 \n\t"
"paddw %%mm0, %%mm1 \n\t"
"paddw %%mm2, %%mm3 \n\t"
"movq (%3, %%"REG_a"), %%mm4 \n\t"
"movq (%3, %%"REG_a"), %%mm2 \n\t"
"paddw %%mm5, %%mm1 \n\t"
"paddw %%mm5, %%mm3 \n\t"
"psrlw $1, %%mm1 \n\t"
"psrlw $1, %%mm3 \n\t"
"packuswb %%mm3, %%mm1 \n\t"
"psubusb %%mm1, %%mm4 \n\t"
"psubusb %%mm2, %%mm1 \n\t"
"por %%mm4, %%mm1 \n\t"
"movq %%mm1, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"paddw %%mm0, %%mm6 \n\t"
"add %4, %%"REG_a" \n\t"
" js 1b \n\t"
: "+a" (len)
: "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride)
);
@ -193,47 +193,47 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
long len= -(stride*h);
asm volatile(
".balign 16 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm1 \n\t"
"movq %%mm0, %%mm4 \n\t"
"movq %%mm1, %%mm2 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t"
"punpckhbw %%mm7, %%mm4 \n\t"
"punpckhbw %%mm7, %%mm2 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"paddw %%mm2, %%mm4 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
"movq 1(%2, %%"REG_a"), %%mm3 \n\t"
"movq %%mm2, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"paddw %%mm0, %%mm2 \n\t"
"paddw %%mm4, %%mm1 \n\t"
"movq %%mm3, %%mm4 \n\t"
"punpcklbw %%mm7, %%mm3 \n\t"
"punpckhbw %%mm7, %%mm4 \n\t"
"paddw %%mm3, %%mm2 \n\t"
"paddw %%mm4, %%mm1 \n\t"
"movq (%3, %%"REG_a"), %%mm3 \n\t"
"movq (%3, %%"REG_a"), %%mm4 \n\t"
"paddw %%mm5, %%mm2 \n\t"
"paddw %%mm5, %%mm1 \n\t"
"psrlw $2, %%mm2 \n\t"
"psrlw $2, %%mm1 \n\t"
"packuswb %%mm1, %%mm2 \n\t"
"psubusb %%mm2, %%mm3 \n\t"
"psubusb %%mm4, %%mm2 \n\t"
"por %%mm3, %%mm2 \n\t"
"movq %%mm2, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpckhbw %%mm7, %%mm2 \n\t"
"paddw %%mm2, %%mm0 \n\t"
"paddw %%mm0, %%mm6 \n\t"
"add %4, %%"REG_a" \n\t"
" js 1b \n\t"
".balign 16 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm1 \n\t"
"movq %%mm0, %%mm4 \n\t"
"movq %%mm1, %%mm2 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t"
"punpckhbw %%mm7, %%mm4 \n\t"
"punpckhbw %%mm7, %%mm2 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"paddw %%mm2, %%mm4 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
"movq 1(%2, %%"REG_a"), %%mm3 \n\t"
"movq %%mm2, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"punpckhbw %%mm7, %%mm1 \n\t"
"paddw %%mm0, %%mm2 \n\t"
"paddw %%mm4, %%mm1 \n\t"
"movq %%mm3, %%mm4 \n\t"
"punpcklbw %%mm7, %%mm3 \n\t"
"punpckhbw %%mm7, %%mm4 \n\t"
"paddw %%mm3, %%mm2 \n\t"
"paddw %%mm4, %%mm1 \n\t"
"movq (%3, %%"REG_a"), %%mm3 \n\t"
"movq (%3, %%"REG_a"), %%mm4 \n\t"
"paddw %%mm5, %%mm2 \n\t"
"paddw %%mm5, %%mm1 \n\t"
"psrlw $2, %%mm2 \n\t"
"psrlw $2, %%mm1 \n\t"
"packuswb %%mm1, %%mm2 \n\t"
"psubusb %%mm2, %%mm3 \n\t"
"psubusb %%mm4, %%mm2 \n\t"
"por %%mm3, %%mm2 \n\t"
"movq %%mm2, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpckhbw %%mm7, %%mm2 \n\t"
"paddw %%mm2, %%mm0 \n\t"
"paddw %%mm0, %%mm6 \n\t"
"add %4, %%"REG_a" \n\t"
" js 1b \n\t"
: "+a" (len)
: "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride)
);
@ -243,13 +243,13 @@ static inline int sum_mmx(void)
{
int ret;
asm volatile(
"movq %%mm6, %%mm0 \n\t"
"psrlq $32, %%mm6 \n\t"
"paddw %%mm0, %%mm6 \n\t"
"movq %%mm6, %%mm0 \n\t"
"psrlq $16, %%mm6 \n\t"
"paddw %%mm0, %%mm6 \n\t"
"movd %%mm6, %0 \n\t"
"movq %%mm6, %%mm0 \n\t"
"psrlq $32, %%mm6 \n\t"
"paddw %%mm0, %%mm6 \n\t"
"movq %%mm6, %%mm0 \n\t"
"psrlq $16, %%mm6 \n\t"
"paddw %%mm0, %%mm6 \n\t"
"movd %%mm6, %0 \n\t"
: "=r" (ret)
);
return ret&0xFFFF;
@ -259,7 +259,7 @@ static inline int sum_mmx2(void)
{
int ret;
asm volatile(
"movd %%mm6, %0 \n\t"
"movd %%mm6, %0 \n\t"
: "=r" (ret)
);
return ret;
@ -270,8 +270,8 @@ static inline int sum_mmx2(void)
static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
assert(h==8);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
\
sad8_1_ ## suf(blk1, blk2, stride, 8);\
\
@ -280,9 +280,9 @@ static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h
static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
assert(h==8);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \
);\
\
@ -294,9 +294,9 @@ static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in
static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
assert(h==8);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \
);\
\
@ -308,9 +308,9 @@ static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in
static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
assert(h==8);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[2]) \
);\
\
@ -321,8 +321,8 @@ static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i
\
static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
\
sad8_1_ ## suf(blk1 , blk2 , stride, h);\
sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\
@ -331,9 +331,9 @@ static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int
}\
static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \
);\
\
@ -344,9 +344,9 @@ static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i
}\
static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \
);\
\
@ -357,9 +357,9 @@ static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i
}\
static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
:: "m"(round_tab[2]) \
);\
\
@ -384,15 +384,15 @@ void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
c->pix_abs[1][2] = sad8_y2_mmx;
c->pix_abs[1][3] = sad8_xy2_mmx;
c->sad[0]= sad16_mmx;
c->sad[0]= sad16_mmx;
c->sad[1]= sad8_mmx;
}
if (mm_flags & MM_MMXEXT) {
c->pix_abs[0][0] = sad16_mmx2;
c->pix_abs[1][0] = sad8_mmx2;
c->pix_abs[0][0] = sad16_mmx2;
c->pix_abs[1][0] = sad8_mmx2;
c->sad[0]= sad16_mmx2;
c->sad[1]= sad8_mmx2;
c->sad[0]= sad16_mmx2;
c->sad[1]= sad8_mmx2;
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->pix_abs[0][1] = sad16_x2_mmx2;

View File

@ -57,52 +57,52 @@ static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
//printf("%d %d ", qmul, qadd);
asm volatile(
"movd %1, %%mm6 \n\t" //qmul
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"movd %2, %%mm5 \n\t" //qadd
"pxor %%mm7, %%mm7 \n\t"
"packssdw %%mm5, %%mm5 \n\t"
"packssdw %%mm5, %%mm5 \n\t"
"psubw %%mm5, %%mm7 \n\t"
"pxor %%mm4, %%mm4 \n\t"
".balign 16\n\t"
"1: \n\t"
"movq (%0, %3), %%mm0 \n\t"
"movq 8(%0, %3), %%mm1 \n\t"
"movd %1, %%mm6 \n\t" //qmul
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"movd %2, %%mm5 \n\t" //qadd
"pxor %%mm7, %%mm7 \n\t"
"packssdw %%mm5, %%mm5 \n\t"
"packssdw %%mm5, %%mm5 \n\t"
"psubw %%mm5, %%mm7 \n\t"
"pxor %%mm4, %%mm4 \n\t"
".balign 16 \n\t"
"1: \n\t"
"movq (%0, %3), %%mm0 \n\t"
"movq 8(%0, %3), %%mm1 \n\t"
"pmullw %%mm6, %%mm0 \n\t"
"pmullw %%mm6, %%mm1 \n\t"
"pmullw %%mm6, %%mm0 \n\t"
"pmullw %%mm6, %%mm1 \n\t"
"movq (%0, %3), %%mm2 \n\t"
"movq 8(%0, %3), %%mm3 \n\t"
"movq (%0, %3), %%mm2 \n\t"
"movq 8(%0, %3), %%mm3 \n\t"
"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"paddw %%mm7, %%mm0 \n\t"
"paddw %%mm7, %%mm1 \n\t"
"paddw %%mm7, %%mm0 \n\t"
"paddw %%mm7, %%mm1 \n\t"
"pxor %%mm0, %%mm2 \n\t"
"pxor %%mm1, %%mm3 \n\t"
"pxor %%mm0, %%mm2 \n\t"
"pxor %%mm1, %%mm3 \n\t"
"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
"pandn %%mm2, %%mm0 \n\t"
"pandn %%mm3, %%mm1 \n\t"
"pandn %%mm2, %%mm0 \n\t"
"pandn %%mm3, %%mm1 \n\t"
"movq %%mm0, (%0, %3) \n\t"
"movq %%mm1, 8(%0, %3) \n\t"
"movq %%mm0, (%0, %3) \n\t"
"movq %%mm1, 8(%0, %3) \n\t"
"add $16, %3 \n\t"
"jng 1b \n\t"
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
: "memory"
);
"add $16, %3 \n\t"
"jng 1b \n\t"
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
: "memory"
);
block[0]= level;
}
@ -120,52 +120,52 @@ static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
//printf("%d %d ", qmul, qadd);
asm volatile(
"movd %1, %%mm6 \n\t" //qmul
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"movd %2, %%mm5 \n\t" //qadd
"pxor %%mm7, %%mm7 \n\t"
"packssdw %%mm5, %%mm5 \n\t"
"packssdw %%mm5, %%mm5 \n\t"
"psubw %%mm5, %%mm7 \n\t"
"pxor %%mm4, %%mm4 \n\t"
".balign 16\n\t"
"1: \n\t"
"movq (%0, %3), %%mm0 \n\t"
"movq 8(%0, %3), %%mm1 \n\t"
"movd %1, %%mm6 \n\t" //qmul
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"movd %2, %%mm5 \n\t" //qadd
"pxor %%mm7, %%mm7 \n\t"
"packssdw %%mm5, %%mm5 \n\t"
"packssdw %%mm5, %%mm5 \n\t"
"psubw %%mm5, %%mm7 \n\t"
"pxor %%mm4, %%mm4 \n\t"
".balign 16 \n\t"
"1: \n\t"
"movq (%0, %3), %%mm0 \n\t"
"movq 8(%0, %3), %%mm1 \n\t"
"pmullw %%mm6, %%mm0 \n\t"
"pmullw %%mm6, %%mm1 \n\t"
"pmullw %%mm6, %%mm0 \n\t"
"pmullw %%mm6, %%mm1 \n\t"
"movq (%0, %3), %%mm2 \n\t"
"movq 8(%0, %3), %%mm3 \n\t"
"movq (%0, %3), %%mm2 \n\t"
"movq 8(%0, %3), %%mm3 \n\t"
"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"paddw %%mm7, %%mm0 \n\t"
"paddw %%mm7, %%mm1 \n\t"
"paddw %%mm7, %%mm0 \n\t"
"paddw %%mm7, %%mm1 \n\t"
"pxor %%mm0, %%mm2 \n\t"
"pxor %%mm1, %%mm3 \n\t"
"pxor %%mm0, %%mm2 \n\t"
"pxor %%mm1, %%mm3 \n\t"
"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
"pandn %%mm2, %%mm0 \n\t"
"pandn %%mm3, %%mm1 \n\t"
"pandn %%mm2, %%mm0 \n\t"
"pandn %%mm3, %%mm1 \n\t"
"movq %%mm0, (%0, %3) \n\t"
"movq %%mm1, 8(%0, %3) \n\t"
"movq %%mm0, (%0, %3) \n\t"
"movq %%mm1, 8(%0, %3) \n\t"
"add $16, %3 \n\t"
"jng 1b \n\t"
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
: "memory"
);
"add $16, %3 \n\t"
"jng 1b \n\t"
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
: "memory"
);
}
@ -216,54 +216,54 @@ static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
/* XXX: only mpeg1 */
quant_matrix = s->intra_matrix;
asm volatile(
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $15, %%mm7 \n\t"
"movd %2, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t"
".balign 16\n\t"
"1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm4 \n\t"
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2 \n\t"
"pxor %%mm3, %%mm3 \n\t"
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
"psraw $3, %%mm0 \n\t"
"psraw $3, %%mm1 \n\t"
"psubw %%mm7, %%mm0 \n\t"
"psubw %%mm7, %%mm1 \n\t"
"por %%mm7, %%mm0 \n\t"
"por %%mm7, %%mm1 \n\t"
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"psubw %%mm2, %%mm0 \n\t"
"psubw %%mm3, %%mm1 \n\t"
"pandn %%mm0, %%mm4 \n\t"
"pandn %%mm1, %%mm5 \n\t"
"movq %%mm4, (%0, %%"REG_a") \n\t"
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $15, %%mm7 \n\t"
"movd %2, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t"
".balign 16 \n\t"
"1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm4 \n\t"
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2 \n\t"
"pxor %%mm3, %%mm3 \n\t"
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
"psraw $3, %%mm0 \n\t"
"psraw $3, %%mm1 \n\t"
"psubw %%mm7, %%mm0 \n\t"
"psubw %%mm7, %%mm1 \n\t"
"por %%mm7, %%mm0 \n\t"
"por %%mm7, %%mm1 \n\t"
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"psubw %%mm2, %%mm0 \n\t"
"psubw %%mm3, %%mm1 \n\t"
"pandn %%mm0, %%mm4 \n\t"
"pandn %%mm1, %%mm5 \n\t"
"movq %%mm4, (%0, %%"REG_a") \n\t"
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
"add $16, %%"REG_a" \n\t"
"js 1b \n\t"
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
: "%"REG_a, "memory"
);
"add $16, %%"REG_a" \n\t"
"js 1b \n\t"
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
: "%"REG_a, "memory"
);
block[0]= block0;
}
@ -279,58 +279,58 @@ static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
quant_matrix = s->inter_matrix;
asm volatile(
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $15, %%mm7 \n\t"
"movd %2, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t"
".balign 16\n\t"
"1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm4 \n\t"
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2 \n\t"
"pxor %%mm3, %%mm3 \n\t"
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
"paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
"paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
"paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1
"paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1
"pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
"pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
"psraw $4, %%mm0 \n\t"
"psraw $4, %%mm1 \n\t"
"psubw %%mm7, %%mm0 \n\t"
"psubw %%mm7, %%mm1 \n\t"
"por %%mm7, %%mm0 \n\t"
"por %%mm7, %%mm1 \n\t"
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"psubw %%mm2, %%mm0 \n\t"
"psubw %%mm3, %%mm1 \n\t"
"pandn %%mm0, %%mm4 \n\t"
"pandn %%mm1, %%mm5 \n\t"
"movq %%mm4, (%0, %%"REG_a") \n\t"
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $15, %%mm7 \n\t"
"movd %2, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t"
".balign 16 \n\t"
"1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm4 \n\t"
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2 \n\t"
"pxor %%mm3, %%mm3 \n\t"
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
"paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
"paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
"paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1
"paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1
"pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
"pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
"psraw $4, %%mm0 \n\t"
"psraw $4, %%mm1 \n\t"
"psubw %%mm7, %%mm0 \n\t"
"psubw %%mm7, %%mm1 \n\t"
"por %%mm7, %%mm0 \n\t"
"por %%mm7, %%mm1 \n\t"
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"psubw %%mm2, %%mm0 \n\t"
"psubw %%mm3, %%mm1 \n\t"
"pandn %%mm0, %%mm4 \n\t"
"pandn %%mm1, %%mm5 \n\t"
"movq %%mm4, (%0, %%"REG_a") \n\t"
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
"add $16, %%"REG_a" \n\t"
"js 1b \n\t"
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
: "%"REG_a, "memory"
);
"add $16, %%"REG_a" \n\t"
"js 1b \n\t"
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
: "%"REG_a, "memory"
);
}
static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
@ -351,50 +351,50 @@ static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
block0 = block[0] * s->c_dc_scale;
quant_matrix = s->intra_matrix;
asm volatile(
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $15, %%mm7 \n\t"
"movd %2, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t"
".balign 16\n\t"
"1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm4 \n\t"
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2 \n\t"
"pxor %%mm3, %%mm3 \n\t"
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
"psraw $3, %%mm0 \n\t"
"psraw $3, %%mm1 \n\t"
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"psubw %%mm2, %%mm0 \n\t"
"psubw %%mm3, %%mm1 \n\t"
"pandn %%mm0, %%mm4 \n\t"
"pandn %%mm1, %%mm5 \n\t"
"movq %%mm4, (%0, %%"REG_a") \n\t"
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $15, %%mm7 \n\t"
"movd %2, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t"
".balign 16 \n\t"
"1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm4 \n\t"
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2 \n\t"
"pxor %%mm3, %%mm3 \n\t"
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
"psraw $3, %%mm0 \n\t"
"psraw $3, %%mm1 \n\t"
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"psubw %%mm2, %%mm0 \n\t"
"psubw %%mm3, %%mm1 \n\t"
"pandn %%mm0, %%mm4 \n\t"
"pandn %%mm1, %%mm5 \n\t"
"movq %%mm4, (%0, %%"REG_a") \n\t"
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
"add $16, %%"REG_a" \n\t"
"jng 1b \n\t"
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
: "%"REG_a, "memory"
);
"add $16, %%"REG_a" \n\t"
"jng 1b \n\t"
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
: "%"REG_a, "memory"
);
block[0]= block0;
//Note, we dont do mismatch control for intra as errors cannot accumulate
}
@ -412,68 +412,68 @@ static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
quant_matrix = s->inter_matrix;
asm volatile(
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlq $48, %%mm7 \n\t"
"movd %2, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t"
".balign 16\n\t"
"1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm4 \n\t"
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2 \n\t"
"pxor %%mm3, %%mm3 \n\t"
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
"paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
"paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*2*q
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*2*q
"paddw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
"paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
"psrlw $4, %%mm0 \n\t"
"psrlw $4, %%mm1 \n\t"
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"psubw %%mm2, %%mm0 \n\t"
"psubw %%mm3, %%mm1 \n\t"
"pandn %%mm0, %%mm4 \n\t"
"pandn %%mm1, %%mm5 \n\t"
"pxor %%mm4, %%mm7 \n\t"
"pxor %%mm5, %%mm7 \n\t"
"movq %%mm4, (%0, %%"REG_a") \n\t"
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlq $48, %%mm7 \n\t"
"movd %2, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t"
".balign 16 \n\t"
"1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm4 \n\t"
"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2 \n\t"
"pxor %%mm3, %%mm3 \n\t"
"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
"paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
"paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*2*q
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*2*q
"paddw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q
"paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t" // FIXME slow
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
"psrlw $4, %%mm0 \n\t"
"psrlw $4, %%mm1 \n\t"
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"psubw %%mm2, %%mm0 \n\t"
"psubw %%mm3, %%mm1 \n\t"
"pandn %%mm0, %%mm4 \n\t"
"pandn %%mm1, %%mm5 \n\t"
"pxor %%mm4, %%mm7 \n\t"
"pxor %%mm5, %%mm7 \n\t"
"movq %%mm4, (%0, %%"REG_a") \n\t"
"movq %%mm5, 8(%0, %%"REG_a") \n\t"
"add $16, %%"REG_a" \n\t"
"jng 1b \n\t"
"movd 124(%0, %3), %%mm0 \n\t"
"movq %%mm7, %%mm6 \n\t"
"psrlq $32, %%mm7 \n\t"
"pxor %%mm6, %%mm7 \n\t"
"movq %%mm7, %%mm6 \n\t"
"psrlq $16, %%mm7 \n\t"
"pxor %%mm6, %%mm7 \n\t"
"pslld $31, %%mm7 \n\t"
"psrlq $15, %%mm7 \n\t"
"pxor %%mm7, %%mm0 \n\t"
"movd %%mm0, 124(%0, %3) \n\t"
"add $16, %%"REG_a" \n\t"
"jng 1b \n\t"
"movd 124(%0, %3), %%mm0 \n\t"
"movq %%mm7, %%mm6 \n\t"
"psrlq $32, %%mm7 \n\t"
"pxor %%mm6, %%mm7 \n\t"
"movq %%mm7, %%mm6 \n\t"
"psrlq $16, %%mm7 \n\t"
"pxor %%mm6, %%mm7 \n\t"
"pslld $31, %%mm7 \n\t"
"psrlq $15, %%mm7 \n\t"
"pxor %%mm7, %%mm0 \n\t"
"movd %%mm0, 124(%0, %3) \n\t"
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
: "%"REG_a, "memory"
);
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
: "%"REG_a, "memory"
);
}
/* draw the edges of width 'w' of an image of size width, height
@ -488,79 +488,79 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
ptr = buf;
if(w==8)
{
asm volatile(
"1: \n\t"
"movd (%0), %%mm0 \n\t"
"punpcklbw %%mm0, %%mm0 \n\t"
"punpcklwd %%mm0, %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"movq %%mm0, -8(%0) \n\t"
"movq -8(%0, %2), %%mm1 \n\t"
"punpckhbw %%mm1, %%mm1 \n\t"
"punpckhwd %%mm1, %%mm1 \n\t"
"punpckhdq %%mm1, %%mm1 \n\t"
"movq %%mm1, (%0, %2) \n\t"
"add %1, %0 \n\t"
"cmp %3, %0 \n\t"
" jb 1b \n\t"
: "+r" (ptr)
: "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
);
asm volatile(
"1: \n\t"
"movd (%0), %%mm0 \n\t"
"punpcklbw %%mm0, %%mm0 \n\t"
"punpcklwd %%mm0, %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"movq %%mm0, -8(%0) \n\t"
"movq -8(%0, %2), %%mm1 \n\t"
"punpckhbw %%mm1, %%mm1 \n\t"
"punpckhwd %%mm1, %%mm1 \n\t"
"punpckhdq %%mm1, %%mm1 \n\t"
"movq %%mm1, (%0, %2) \n\t"
"add %1, %0 \n\t"
"cmp %3, %0 \n\t"
" jb 1b \n\t"
: "+r" (ptr)
: "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
);
}
else
{
asm volatile(
"1: \n\t"
"movd (%0), %%mm0 \n\t"
"punpcklbw %%mm0, %%mm0 \n\t"
"punpcklwd %%mm0, %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"movq %%mm0, -8(%0) \n\t"
"movq %%mm0, -16(%0) \n\t"
"movq -8(%0, %2), %%mm1 \n\t"
"punpckhbw %%mm1, %%mm1 \n\t"
"punpckhwd %%mm1, %%mm1 \n\t"
"punpckhdq %%mm1, %%mm1 \n\t"
"movq %%mm1, (%0, %2) \n\t"
"movq %%mm1, 8(%0, %2) \n\t"
"add %1, %0 \n\t"
"cmp %3, %0 \n\t"
" jb 1b \n\t"
: "+r" (ptr)
: "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
);
asm volatile(
"1: \n\t"
"movd (%0), %%mm0 \n\t"
"punpcklbw %%mm0, %%mm0 \n\t"
"punpcklwd %%mm0, %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"movq %%mm0, -8(%0) \n\t"
"movq %%mm0, -16(%0) \n\t"
"movq -8(%0, %2), %%mm1 \n\t"
"punpckhbw %%mm1, %%mm1 \n\t"
"punpckhwd %%mm1, %%mm1 \n\t"
"punpckhdq %%mm1, %%mm1 \n\t"
"movq %%mm1, (%0, %2) \n\t"
"movq %%mm1, 8(%0, %2) \n\t"
"add %1, %0 \n\t"
"cmp %3, %0 \n\t"
" jb 1b \n\t"
: "+r" (ptr)
: "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
);
}
for(i=0;i<w;i+=4) {
/* top and bottom (and hopefully also the corners) */
ptr= buf - (i + 1) * wrap - w;
asm volatile(
"1: \n\t"
"movq (%1, %0), %%mm0 \n\t"
"movq %%mm0, (%0) \n\t"
"movq %%mm0, (%0, %2) \n\t"
"movq %%mm0, (%0, %2, 2) \n\t"
"movq %%mm0, (%0, %3) \n\t"
"add $8, %0 \n\t"
"cmp %4, %0 \n\t"
" jb 1b \n\t"
: "+r" (ptr)
: "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w)
);
ptr= last_line + (i + 1) * wrap - w;
asm volatile(
"1: \n\t"
"movq (%1, %0), %%mm0 \n\t"
"movq %%mm0, (%0) \n\t"
"movq %%mm0, (%0, %2) \n\t"
"movq %%mm0, (%0, %2, 2) \n\t"
"movq %%mm0, (%0, %3) \n\t"
"add $8, %0 \n\t"
"cmp %4, %0 \n\t"
" jb 1b \n\t"
: "+r" (ptr)
: "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w)
);
ptr= buf - (i + 1) * wrap - w;
asm volatile(
"1: \n\t"
"movq (%1, %0), %%mm0 \n\t"
"movq %%mm0, (%0) \n\t"
"movq %%mm0, (%0, %2) \n\t"
"movq %%mm0, (%0, %2, 2) \n\t"
"movq %%mm0, (%0, %3) \n\t"
"add $8, %0 \n\t"
"cmp %4, %0 \n\t"
" jb 1b \n\t"
: "+r" (ptr)
: "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w)
);
ptr= last_line + (i + 1) * wrap - w;
asm volatile(
"1: \n\t"
"movq (%1, %0), %%mm0 \n\t"
"movq %%mm0, (%0) \n\t"
"movq %%mm0, (%0, %2) \n\t"
"movq %%mm0, (%0, %2, 2) \n\t"
"movq %%mm0, (%0, %3) \n\t"
"add $8, %0 \n\t"
"cmp %4, %0 \n\t"
" jb 1b \n\t"
: "+r" (ptr)
: "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w)
);
}
}
@ -572,47 +572,47 @@ static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){
s->dct_count[intra]++;
asm volatile(
"pxor %%mm7, %%mm7 \n\t"
"1: \n\t"
"pxor %%mm0, %%mm0 \n\t"
"pxor %%mm1, %%mm1 \n\t"
"movq (%0), %%mm2 \n\t"
"movq 8(%0), %%mm3 \n\t"
"pcmpgtw %%mm2, %%mm0 \n\t"
"pcmpgtw %%mm3, %%mm1 \n\t"
"pxor %%mm0, %%mm2 \n\t"
"pxor %%mm1, %%mm3 \n\t"
"psubw %%mm0, %%mm2 \n\t"
"psubw %%mm1, %%mm3 \n\t"
"movq %%mm2, %%mm4 \n\t"
"movq %%mm3, %%mm5 \n\t"
"psubusw (%2), %%mm2 \n\t"
"psubusw 8(%2), %%mm3 \n\t"
"pxor %%mm0, %%mm2 \n\t"
"pxor %%mm1, %%mm3 \n\t"
"psubw %%mm0, %%mm2 \n\t"
"psubw %%mm1, %%mm3 \n\t"
"movq %%mm2, (%0) \n\t"
"movq %%mm3, 8(%0) \n\t"
"movq %%mm4, %%mm2 \n\t"
"movq %%mm5, %%mm3 \n\t"
"punpcklwd %%mm7, %%mm4 \n\t"
"punpckhwd %%mm7, %%mm2 \n\t"
"punpcklwd %%mm7, %%mm5 \n\t"
"punpckhwd %%mm7, %%mm3 \n\t"
"paddd (%1), %%mm4 \n\t"
"paddd 8(%1), %%mm2 \n\t"
"paddd 16(%1), %%mm5 \n\t"
"paddd 24(%1), %%mm3 \n\t"
"movq %%mm4, (%1) \n\t"
"movq %%mm2, 8(%1) \n\t"
"movq %%mm5, 16(%1) \n\t"
"movq %%mm3, 24(%1) \n\t"
"add $16, %0 \n\t"
"add $32, %1 \n\t"
"add $16, %2 \n\t"
"cmp %3, %0 \n\t"
" jb 1b \n\t"
"pxor %%mm7, %%mm7 \n\t"
"1: \n\t"
"pxor %%mm0, %%mm0 \n\t"
"pxor %%mm1, %%mm1 \n\t"
"movq (%0), %%mm2 \n\t"
"movq 8(%0), %%mm3 \n\t"
"pcmpgtw %%mm2, %%mm0 \n\t"
"pcmpgtw %%mm3, %%mm1 \n\t"
"pxor %%mm0, %%mm2 \n\t"
"pxor %%mm1, %%mm3 \n\t"
"psubw %%mm0, %%mm2 \n\t"
"psubw %%mm1, %%mm3 \n\t"
"movq %%mm2, %%mm4 \n\t"
"movq %%mm3, %%mm5 \n\t"
"psubusw (%2), %%mm2 \n\t"
"psubusw 8(%2), %%mm3 \n\t"
"pxor %%mm0, %%mm2 \n\t"
"pxor %%mm1, %%mm3 \n\t"
"psubw %%mm0, %%mm2 \n\t"
"psubw %%mm1, %%mm3 \n\t"
"movq %%mm2, (%0) \n\t"
"movq %%mm3, 8(%0) \n\t"
"movq %%mm4, %%mm2 \n\t"
"movq %%mm5, %%mm3 \n\t"
"punpcklwd %%mm7, %%mm4 \n\t"
"punpckhwd %%mm7, %%mm2 \n\t"
"punpcklwd %%mm7, %%mm5 \n\t"
"punpckhwd %%mm7, %%mm3 \n\t"
"paddd (%1), %%mm4 \n\t"
"paddd 8(%1), %%mm2 \n\t"
"paddd 16(%1), %%mm5 \n\t"
"paddd 24(%1), %%mm3 \n\t"
"movq %%mm4, (%1) \n\t"
"movq %%mm2, 8(%1) \n\t"
"movq %%mm5, 16(%1) \n\t"
"movq %%mm3, 24(%1) \n\t"
"add $16, %0 \n\t"
"add $32, %1 \n\t"
"add $16, %2 \n\t"
"cmp %3, %0 \n\t"
" jb 1b \n\t"
: "+r" (block), "+r" (sum), "+r" (offset)
: "r"(block+64)
);
@ -626,47 +626,47 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
s->dct_count[intra]++;
asm volatile(
"pxor %%xmm7, %%xmm7 \n\t"
"1: \n\t"
"pxor %%xmm0, %%xmm0 \n\t"
"pxor %%xmm1, %%xmm1 \n\t"
"movdqa (%0), %%xmm2 \n\t"
"movdqa 16(%0), %%xmm3 \n\t"
"pcmpgtw %%xmm2, %%xmm0 \n\t"
"pcmpgtw %%xmm3, %%xmm1 \n\t"
"pxor %%xmm0, %%xmm2 \n\t"
"pxor %%xmm1, %%xmm3 \n\t"
"psubw %%xmm0, %%xmm2 \n\t"
"psubw %%xmm1, %%xmm3 \n\t"
"movdqa %%xmm2, %%xmm4 \n\t"
"movdqa %%xmm3, %%xmm5 \n\t"
"psubusw (%2), %%xmm2 \n\t"
"psubusw 16(%2), %%xmm3 \n\t"
"pxor %%xmm0, %%xmm2 \n\t"
"pxor %%xmm1, %%xmm3 \n\t"
"psubw %%xmm0, %%xmm2 \n\t"
"psubw %%xmm1, %%xmm3 \n\t"
"movdqa %%xmm2, (%0) \n\t"
"movdqa %%xmm3, 16(%0) \n\t"
"movdqa %%xmm4, %%xmm6 \n\t"
"movdqa %%xmm5, %%xmm0 \n\t"
"punpcklwd %%xmm7, %%xmm4 \n\t"
"punpckhwd %%xmm7, %%xmm6 \n\t"
"punpcklwd %%xmm7, %%xmm5 \n\t"
"punpckhwd %%xmm7, %%xmm0 \n\t"
"paddd (%1), %%xmm4 \n\t"
"paddd 16(%1), %%xmm6 \n\t"
"paddd 32(%1), %%xmm5 \n\t"
"paddd 48(%1), %%xmm0 \n\t"
"movdqa %%xmm4, (%1) \n\t"
"movdqa %%xmm6, 16(%1) \n\t"
"movdqa %%xmm5, 32(%1) \n\t"
"movdqa %%xmm0, 48(%1) \n\t"
"add $32, %0 \n\t"
"add $64, %1 \n\t"
"add $32, %2 \n\t"
"cmp %3, %0 \n\t"
" jb 1b \n\t"
"pxor %%xmm7, %%xmm7 \n\t"
"1: \n\t"
"pxor %%xmm0, %%xmm0 \n\t"
"pxor %%xmm1, %%xmm1 \n\t"
"movdqa (%0), %%xmm2 \n\t"
"movdqa 16(%0), %%xmm3 \n\t"
"pcmpgtw %%xmm2, %%xmm0 \n\t"
"pcmpgtw %%xmm3, %%xmm1 \n\t"
"pxor %%xmm0, %%xmm2 \n\t"
"pxor %%xmm1, %%xmm3 \n\t"
"psubw %%xmm0, %%xmm2 \n\t"
"psubw %%xmm1, %%xmm3 \n\t"
"movdqa %%xmm2, %%xmm4 \n\t"
"movdqa %%xmm3, %%xmm5 \n\t"
"psubusw (%2), %%xmm2 \n\t"
"psubusw 16(%2), %%xmm3 \n\t"
"pxor %%xmm0, %%xmm2 \n\t"
"pxor %%xmm1, %%xmm3 \n\t"
"psubw %%xmm0, %%xmm2 \n\t"
"psubw %%xmm1, %%xmm3 \n\t"
"movdqa %%xmm2, (%0) \n\t"
"movdqa %%xmm3, 16(%0) \n\t"
"movdqa %%xmm4, %%xmm6 \n\t"
"movdqa %%xmm5, %%xmm0 \n\t"
"punpcklwd %%xmm7, %%xmm4 \n\t"
"punpckhwd %%xmm7, %%xmm6 \n\t"
"punpcklwd %%xmm7, %%xmm5 \n\t"
"punpckhwd %%xmm7, %%xmm0 \n\t"
"paddd (%1), %%xmm4 \n\t"
"paddd 16(%1), %%xmm6 \n\t"
"paddd 32(%1), %%xmm5 \n\t"
"paddd 48(%1), %%xmm0 \n\t"
"movdqa %%xmm4, (%1) \n\t"
"movdqa %%xmm6, 16(%1) \n\t"
"movdqa %%xmm5, 32(%1) \n\t"
"movdqa %%xmm0, 48(%1) \n\t"
"add $32, %0 \n\t"
"add $64, %1 \n\t"
"add $32, %2 \n\t"
"cmp %3, %0 \n\t"
" jb 1b \n\t"
: "+r" (block), "+r" (sum), "+r" (offset)
: "r"(block+64)
);
@ -705,10 +705,10 @@ void MPV_common_init_mmx(MpegEncContext *s)
draw_edges = draw_edges_mmx;
if (mm_flags & MM_SSE2) {
s->denoise_dct= denoise_dct_sse2;
} else {
s->denoise_dct= denoise_dct_mmx;
}
s->denoise_dct= denoise_dct_sse2;
} else {
s->denoise_dct= denoise_dct_mmx;
}
if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
if(mm_flags & MM_SSE2){

View File

@ -21,26 +21,26 @@
#undef PMAXW
#ifdef HAVE_MMX2
#define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
#define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
#define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
#define PMAX(a,b) \
"pshufw $0x0E," #a ", " #b " \n\t"\
PMAXW(b, a)\
"pshufw $0x01," #a ", " #b " \n\t"\
PMAXW(b, a)
"pshufw $0x0E," #a ", " #b " \n\t"\
PMAXW(b, a)\
"pshufw $0x01," #a ", " #b " \n\t"\
PMAXW(b, a)
#else
#define SPREADW(a) \
"punpcklwd " #a ", " #a " \n\t"\
"punpcklwd " #a ", " #a " \n\t"
"punpcklwd " #a ", " #a " \n\t"\
"punpcklwd " #a ", " #a " \n\t"
#define PMAXW(a,b) \
"psubusw " #a ", " #b " \n\t"\
"paddw " #a ", " #b " \n\t"
"psubusw " #a ", " #b " \n\t"\
"paddw " #a ", " #b " \n\t"
#define PMAX(a,b) \
"movq " #a ", " #b " \n\t"\
"psrlq $32, " #a " \n\t"\
PMAXW(b, a)\
"movq " #a ", " #b " \n\t"\
"psrlq $16, " #a " \n\t"\
PMAXW(b, a)
"movq " #a ", " #b " \n\t"\
"psrlq $32, " #a " \n\t"\
PMAXW(b, a)\
"movq " #a ", " #b " \n\t"\
"psrlq $16, " #a " \n\t"\
PMAXW(b, a)
#endif
@ -71,18 +71,18 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
if (!s->h263_aic) {
#if 1
asm volatile (
"mul %%ecx \n\t"
: "=d" (level), "=a"(dummy)
: "a" ((block[0]>>2) + q), "c" (inverse[q<<1])
"mul %%ecx \n\t"
: "=d" (level), "=a"(dummy)
: "a" ((block[0]>>2) + q), "c" (inverse[q<<1])
);
#else
asm volatile (
"xorl %%edx, %%edx \n\t"
"divw %%cx \n\t"
"movzwl %%ax, %%eax \n\t"
: "=a" (level)
: "a" ((block[0]>>2) + q), "c" (q<<1)
: "%edx"
"xorl %%edx, %%edx \n\t"
"divw %%cx \n\t"
"movzwl %%ax, %%eax \n\t"
: "=a" (level)
: "a" ((block[0]>>2) + q), "c" (q<<1)
: "%edx"
);
#endif
} else
@ -103,94 +103,94 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){
asm volatile(
"movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1
"movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1
SPREADW(%%mm3)
"pxor %%mm7, %%mm7 \n\t" // 0
"pxor %%mm4, %%mm4 \n\t" // 0
"movq (%2), %%mm5 \n\t" // qmat[0]
"pxor %%mm6, %%mm6 \n\t"
"psubw (%3), %%mm6 \n\t" // -bias[0]
"mov $-128, %%"REG_a" \n\t"
".balign 16 \n\t"
"1: \n\t"
"pxor %%mm1, %%mm1 \n\t" // 0
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
"pxor %%mm1, %%mm0 \n\t"
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
"psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
"por %%mm0, %%mm4 \n\t"
"pxor %%mm1, %%mm0 \n\t"
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
"movq %%mm0, (%5, %%"REG_a") \n\t"
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
"movq (%4, %%"REG_a"), %%mm1 \n\t"
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
"pandn %%mm1, %%mm0 \n\t"
PMAXW(%%mm0, %%mm3)
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
PMAX(%%mm3, %%mm0)
"movd %%mm3, %%"REG_a" \n\t"
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1)
"pxor %%mm7, %%mm7 \n\t" // 0
"pxor %%mm4, %%mm4 \n\t" // 0
"movq (%2), %%mm5 \n\t" // qmat[0]
"pxor %%mm6, %%mm6 \n\t"
"psubw (%3), %%mm6 \n\t" // -bias[0]
"mov $-128, %%"REG_a" \n\t"
".balign 16 \n\t"
"1: \n\t"
"pxor %%mm1, %%mm1 \n\t" // 0
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
"pxor %%mm1, %%mm0 \n\t"
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
"psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
"por %%mm0, %%mm4 \n\t"
"pxor %%mm1, %%mm0 \n\t"
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
"movq %%mm0, (%5, %%"REG_a") \n\t"
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
"movq (%4, %%"REG_a"), %%mm1 \n\t"
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
"pandn %%mm1, %%mm0 \n\t"
PMAXW(%%mm0, %%mm3)
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
PMAX(%%mm3, %%mm0)
"movd %%mm3, %%"REG_a" \n\t"
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1)
: "r" (block+64), "r" (qmat), "r" (bias),
"r" (inv_zigzag_direct16+64), "r" (temp_block+64)
);
// note the asm is split cuz gcc doesnt like that many operands ...
asm volatile(
"movd %1, %%mm1 \n\t" // max_qcoeff
SPREADW(%%mm1)
"psubusw %%mm1, %%mm4 \n\t"
"packuswb %%mm4, %%mm4 \n\t"
"movd %%mm4, %0 \n\t" // *overflow
"movd %1, %%mm1 \n\t" // max_qcoeff
SPREADW(%%mm1)
"psubusw %%mm1, %%mm4 \n\t"
"packuswb %%mm4, %%mm4 \n\t"
"movd %%mm4, %0 \n\t" // *overflow
: "=g" (*overflow)
: "g" (s->max_qcoeff)
);
}else{ // FMT_H263
asm volatile(
"movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1
"movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1
SPREADW(%%mm3)
"pxor %%mm7, %%mm7 \n\t" // 0
"pxor %%mm4, %%mm4 \n\t" // 0
"mov $-128, %%"REG_a" \n\t"
".balign 16 \n\t"
"1: \n\t"
"pxor %%mm1, %%mm1 \n\t" // 0
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
"pxor %%mm1, %%mm0 \n\t"
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
"movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0]
"paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
"movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i]
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
"por %%mm0, %%mm4 \n\t"
"pxor %%mm1, %%mm0 \n\t"
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
"movq %%mm0, (%5, %%"REG_a") \n\t"
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
"movq (%4, %%"REG_a"), %%mm1 \n\t"
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
"pandn %%mm1, %%mm0 \n\t"
PMAXW(%%mm0, %%mm3)
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
PMAX(%%mm3, %%mm0)
"movd %%mm3, %%"REG_a" \n\t"
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1)
"pxor %%mm7, %%mm7 \n\t" // 0
"pxor %%mm4, %%mm4 \n\t" // 0
"mov $-128, %%"REG_a" \n\t"
".balign 16 \n\t"
"1: \n\t"
"pxor %%mm1, %%mm1 \n\t" // 0
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
"pxor %%mm1, %%mm0 \n\t"
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
"movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0]
"paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
"movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i]
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
"por %%mm0, %%mm4 \n\t"
"pxor %%mm1, %%mm0 \n\t"
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
"movq %%mm0, (%5, %%"REG_a") \n\t"
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
"movq (%4, %%"REG_a"), %%mm1 \n\t"
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
"pandn %%mm1, %%mm0 \n\t"
PMAXW(%%mm0, %%mm3)
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
PMAX(%%mm3, %%mm0)
"movd %%mm3, %%"REG_a" \n\t"
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1)
: "r" (block+64), "r" (qmat+64), "r" (bias+64),
"r" (inv_zigzag_direct16+64), "r" (temp_block+64)
);
// note the asm is split cuz gcc doesnt like that many operands ...
asm volatile(
"movd %1, %%mm1 \n\t" // max_qcoeff
SPREADW(%%mm1)
"psubusw %%mm1, %%mm4 \n\t"
"packuswb %%mm4, %%mm4 \n\t"
"movd %%mm4, %0 \n\t" // *overflow
"movd %1, %%mm1 \n\t" // max_qcoeff
SPREADW(%%mm1)
"psubusw %%mm1, %%mm4 \n\t"
"packuswb %%mm4, %%mm4 \n\t"
"movd %%mm4, %0 \n\t" // *overflow
: "=g" (*overflow)
: "g" (s->max_qcoeff)
);

File diff suppressed because it is too large Load Diff

View File

@ -257,13 +257,13 @@ enum PixelFormat avcodec_get_pix_fmt(const char* name)
for (i=0; i < PIX_FMT_NB; i++)
if (!strcmp(pix_fmt_info[i].name, name))
break;
break;
return i;
}
/* Picture field are filled with 'ptr' addresses. Also return size */
int avpicture_fill(AVPicture *picture, uint8_t *ptr,
int pix_fmt, int width, int height)
int pix_fmt, int width, int height)
{
int size, w2, h2, size2;
PixFmtInfo *pinfo;
@ -373,36 +373,36 @@ int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height,
pix_fmt == PIX_FMT_RGB565 ||
pix_fmt == PIX_FMT_RGB555)
w = width * 2;
else if (pix_fmt == PIX_FMT_UYVY411)
w = width + width/2;
else if (pix_fmt == PIX_FMT_PAL8)
w = width;
else
w = width * (pf->depth * pf->nb_channels / 8);
else if (pix_fmt == PIX_FMT_UYVY411)
w = width + width/2;
else if (pix_fmt == PIX_FMT_PAL8)
w = width;
else
w = width * (pf->depth * pf->nb_channels / 8);
data_planes = 1;
h = height;
data_planes = 1;
h = height;
} else {
data_planes = pf->nb_channels;
w = (width*pf->depth + 7)/8;
h = height;
w = (width*pf->depth + 7)/8;
h = height;
}
for (i=0; i<data_planes; i++) {
if (i == 1) {
w = width >> pf->x_chroma_shift;
h = height >> pf->y_chroma_shift;
}
w = width >> pf->x_chroma_shift;
h = height >> pf->y_chroma_shift;
}
s = src->data[i];
for(j=0; j<h; j++) {
memcpy(dest, s, w);
dest += w;
s += src->linesize[i];
}
for(j=0; j<h; j++) {
memcpy(dest, s, w);
dest += w;
s += src->linesize[i];
}
}
if (pf->pixel_type == FF_PIXEL_PALETTE)
memcpy((unsigned char *)(((size_t)dest + 3) & ~3), src->data[1], 256 * 4);
memcpy((unsigned char *)(((size_t)dest + 3) & ~3), src->data[1], 256 * 4);
return size;
}
@ -486,9 +486,9 @@ static int avg_bits_per_pixel(int pix_fmt)
case PIX_FMT_RGB555:
bits = 16;
break;
case PIX_FMT_UYVY411:
bits = 12;
break;
case PIX_FMT_UYVY411:
bits = 12;
break;
default:
bits = pf->depth * pf->nb_channels;
break;
@ -604,9 +604,9 @@ void img_copy(AVPicture *dst, const AVPicture *src,
case PIX_FMT_RGB555:
bits = 16;
break;
case PIX_FMT_UYVY411:
bits = 12;
break;
case PIX_FMT_UYVY411:
bits = 12;
break;
default:
bits = pf->depth * pf->nb_channels;
break;
@ -910,11 +910,11 @@ static void uyvy411_to_yuv411p(AVPicture *dst, const AVPicture *src,
cr = cr1;
for(w = width; w >= 4; w -= 4) {
cb[0] = p[0];
lum[0] = p[1];
lum[0] = p[1];
lum[1] = p[2];
cr[0] = p[3];
lum[2] = p[4];
lum[3] = p[5];
lum[2] = p[4];
lum[3] = p[5];
p += 6;
lum += 4;
cb++;
@ -996,7 +996,7 @@ static void yuv420p_to_uyvy422(AVPicture *dst, const AVPicture *src,
#define SCALEBITS 10
#define ONE_HALF (1 << (SCALEBITS - 1))
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
#define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
#define YUV_TO_RGB1_CCIR(cb1, cr1)\
{\
@ -1046,7 +1046,7 @@ static void yuv420p_to_uyvy422(AVPicture *dst, const AVPicture *src,
static inline int C_JPEG_TO_CCIR(int y) {
y = (((y - 128) * FIX(112.0/127.0) + (ONE_HALF + (128 << SCALEBITS))) >> SCALEBITS);
if (y < 16)
y = 16;
y = 16;
return y;
}
@ -1681,7 +1681,7 @@ static void gray_to_monoblack(AVPicture *dst, const AVPicture *src,
typedef struct ConvertEntry {
void (*convert)(AVPicture *dst,
const AVPicture *src, int width, int height);
const AVPicture *src, int width, int height);
} ConvertEntry;
/* Add each new convertion function in this table. In order to be able
@ -1721,7 +1721,7 @@ static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = {
[PIX_FMT_RGBA32] = {
.convert = yuv420p_to_rgba32
},
[PIX_FMT_UYVY422] = {
[PIX_FMT_UYVY422] = {
.convert = yuv420p_to_uyvy422,
},
},
@ -2224,7 +2224,7 @@ static int get_alpha_info_pal8(const AVPicture *src, int width, int height)
* @return ored mask of FF_ALPHA_xxx constants
*/
int img_get_alpha_info(const AVPicture *src,
int pix_fmt, int width, int height)
int pix_fmt, int width, int height)
{
PixFmtInfo *pf = &pix_fmt_info[pix_fmt];
int ret;
@ -2300,10 +2300,10 @@ int img_get_alpha_info(const AVPicture *src,
/* filter parameters: [-1 4 2 4 -1] // 8 */
static void deinterlace_line(uint8_t *dst,
const uint8_t *lum_m4, const uint8_t *lum_m3,
const uint8_t *lum_m2, const uint8_t *lum_m1,
const uint8_t *lum,
int size)
const uint8_t *lum_m4, const uint8_t *lum_m3,
const uint8_t *lum_m2, const uint8_t *lum_m1,
const uint8_t *lum,
int size)
{
#ifndef HAVE_MMX
uint8_t *cm = cropTbl + MAX_NEG_CROP;
@ -2421,7 +2421,7 @@ static void deinterlace_bottom_field(uint8_t *dst, int dst_wrap,
}
static void deinterlace_bottom_field_inplace(uint8_t *src1, int src_wrap,
int width, int height)
int width, int height)
{
uint8_t *src_m1, *src_0, *src_p1, *src_p2;
int y;
@ -2455,7 +2455,7 @@ int avpicture_deinterlace(AVPicture *dst, const AVPicture *src,
if (pix_fmt != PIX_FMT_YUV420P &&
pix_fmt != PIX_FMT_YUV422P &&
pix_fmt != PIX_FMT_YUV444P &&
pix_fmt != PIX_FMT_YUV411P)
pix_fmt != PIX_FMT_YUV411P)
return -1;
if ((width & 3) != 0 || (height & 3) != 0)
return -1;

View File

@ -821,7 +821,7 @@ static void glue(RGB_NAME, _to_pal8)(AVPicture *dst, const AVPicture *src,
#ifdef RGBA_IN
static int glue(get_alpha_info_, RGB_NAME)(const AVPicture *src,
int width, int height)
int width, int height)
{
const unsigned char *p;
int src_wrap, ret, x, y;

View File

@ -64,8 +64,8 @@ static inline int get_phase(int pos)
/* This function must be optimized */
static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
int src_width, int src_start, int src_incr,
int16_t *filters)
int src_width, int src_start, int src_incr,
int16_t *filters)
{
int src_pos, phase, sum, i;
const uint8_t *s;
@ -108,7 +108,7 @@ static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
/* This function must be optimized */
static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
int wrap, int16_t *filter)
int wrap, int16_t *filter)
{
int sum, i;
const uint8_t *s;
@ -167,7 +167,7 @@ static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
/* XXX: do four pixels at a time */
static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
const uint8_t *src, int src_width,
const uint8_t *src, int src_width,
int src_start, int src_incr, int16_t *filters)
{
int src_pos, phase;
@ -212,7 +212,7 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
}
static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
int wrap, int16_t *filter)
int wrap, int16_t *filter)
{
int sum, i, v;
const uint8_t *s;
@ -277,18 +277,18 @@ static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
#endif
#ifdef HAVE_ALTIVEC
typedef union {
typedef union {
vector unsigned char v;
unsigned char c[16];
} vec_uc_t;
typedef union {
typedef union {
vector signed short v;
signed short s[8];
} vec_ss_t;
void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
int wrap, int16_t *filter)
int wrap, int16_t *filter)
{
int sum, i;
const uint8_t *s;
@ -405,7 +405,7 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
/* slow version to handle limit cases. Does not need optimisation */
static void h_resample_slow(uint8_t *dst, int dst_width,
const uint8_t *src, int src_width,
const uint8_t *src, int src_width,
int src_start, int src_incr, int16_t *filters)
{
int src_pos, phase, sum, j, v, i;
@ -441,8 +441,8 @@ static void h_resample_slow(uint8_t *dst, int dst_width,
}
static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
int src_width, int src_start, int src_incr,
int16_t *filters)
int src_width, int src_start, int src_incr,
int16_t *filters)
{
int n, src_end;
@ -559,7 +559,7 @@ ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
ImgReSampleContext *s;
if (!owidth || !oheight || !iwidth || !iheight)
return NULL;
return NULL;
s = av_mallocz(sizeof(ImgReSampleContext));
if (!s)

View File

@ -70,13 +70,13 @@ static void build_modpred(Indeo3DecodeContext *s)
for (i=0; i < 128; ++i) {
s->ModPred[i+0*128] = (i > 126) ? 254 : 2*((i + 1) - ((i + 1) % 2));
s->ModPred[i+1*128] = (i == 7) ? 20 : ((i == 119 || i == 120)
? 236 : 2*((i + 2) - ((i + 1) % 3)));
? 236 : 2*((i + 2) - ((i + 1) % 3)));
s->ModPred[i+2*128] = (i > 125) ? 248 : 2*((i + 2) - ((i + 2) % 4));
s->ModPred[i+3*128] = 2*((i + 1) - ((i - 3) % 5));
s->ModPred[i+3*128] = 2*((i + 1) - ((i - 3) % 5));
s->ModPred[i+4*128] = (i == 8) ? 20 : 2*((i + 1) - ((i - 3) % 6));
s->ModPred[i+5*128] = 2*((i + 4) - ((i + 3) % 7));
s->ModPred[i+5*128] = 2*((i + 4) - ((i + 3) % 7));
s->ModPred[i+6*128] = (i > 123) ? 240 : 2*((i + 4) - ((i + 4) % 8));
s->ModPred[i+7*128] = 2*((i + 5) - ((i + 4) % 9));
s->ModPred[i+7*128] = 2*((i + 5) - ((i + 4) % 9));
}
s->corrector_type = (unsigned short *) av_malloc (24 * 256 * sizeof(unsigned short));
@ -84,8 +84,8 @@ static void build_modpred(Indeo3DecodeContext *s)
for (i=0; i < 24; ++i) {
for (j=0; j < 256; ++j) {
s->corrector_type[i*256+j] = (j < corrector_type_0[i])
? 1 : ((j < 248 || (i == 16 && j == 248))
? 0 : corrector_type_2[j - 248]);
? 1 : ((j < 248 || (i == 16 && j == 248))
? 0 : corrector_type_2[j - 248]);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -83,10 +83,10 @@
*/
#if CONST_BITS == 8
#define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */
#define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */
#define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */
#define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */
#define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */
#define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */
#define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */
#define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */
#else
#define FIX_0_382683433 FIX(0.382683433)
#define FIX_0_541196100 FIX(0.541196100)
@ -135,7 +135,7 @@ static always_inline void row_fdct(DCTELEM * data){
/* Even part */
tmp10 = tmp0 + tmp3; /* phase 2 */
tmp10 = tmp0 + tmp3; /* phase 2 */
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
@ -144,30 +144,30 @@ static always_inline void row_fdct(DCTELEM * data){
dataptr[4] = tmp10 - tmp11;
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
dataptr[2] = tmp13 + z1; /* phase 5 */
dataptr[2] = tmp13 + z1; /* phase 5 */
dataptr[6] = tmp13 - z1;
/* Odd part */
tmp10 = tmp4 + tmp5; /* phase 2 */
tmp10 = tmp4 + tmp5; /* phase 2 */
tmp11 = tmp5 + tmp6;
tmp12 = tmp6 + tmp7;
/* The rotator is modified from fig 4-8 to avoid extra negations. */
z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
z11 = tmp7 + z3; /* phase 5 */
z11 = tmp7 + z3; /* phase 5 */
z13 = tmp7 - z3;
dataptr[5] = z13 + z2; /* phase 6 */
dataptr[5] = z13 + z2; /* phase 6 */
dataptr[3] = z13 - z2;
dataptr[1] = z11 + z4;
dataptr[7] = z11 - z4;
dataptr += DCTSIZE; /* advance pointer to next row */
dataptr += DCTSIZE; /* advance pointer to next row */
}
}
@ -202,7 +202,7 @@ fdct_ifast (DCTELEM * data)
/* Even part */
tmp10 = tmp0 + tmp3; /* phase 2 */
tmp10 = tmp0 + tmp3; /* phase 2 */
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
@ -216,7 +216,7 @@ fdct_ifast (DCTELEM * data)
/* Odd part */
tmp10 = tmp4 + tmp5; /* phase 2 */
tmp10 = tmp4 + tmp5; /* phase 2 */
tmp11 = tmp5 + tmp6;
tmp12 = tmp6 + tmp7;
@ -226,7 +226,7 @@ fdct_ifast (DCTELEM * data)
z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
z11 = tmp7 + z3; /* phase 5 */
z11 = tmp7 + z3; /* phase 5 */
z13 = tmp7 - z3;
dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
@ -234,7 +234,7 @@ fdct_ifast (DCTELEM * data)
dataptr[DCTSIZE*1] = z11 + z4;
dataptr[DCTSIZE*7] = z11 - z4;
dataptr++; /* advance pointer to next column */
dataptr++; /* advance pointer to next column */
}
}
@ -293,7 +293,7 @@ fdct_ifast248 (DCTELEM * data)
dataptr[DCTSIZE*3] = tmp13 + z1;
dataptr[DCTSIZE*7] = tmp13 - z1;
dataptr++; /* advance pointer to next column */
dataptr++; /* advance pointer to next column */
}
}

View File

@ -92,10 +92,10 @@
#if BITS_IN_JSAMPLE == 8
#define CONST_BITS 13
#define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */
#define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */
#else
#define CONST_BITS 13
#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
#endif
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
@ -106,18 +106,18 @@
*/
#if CONST_BITS == 13
#define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */
#define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */
#define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */
#define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */
#define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */
#define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */
#define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */
#define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */
#define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */
#define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */
#define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */
#define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */
#define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */
#define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */
#define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */
#define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */
#define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */
#define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */
#define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */
#define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */
#define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */
#define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */
#define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */
#define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */
#else
#define FIX_0_298631336 FIX(0.298631336)
#define FIX_0_390180644 FIX(0.390180644)
@ -185,9 +185,9 @@ static always_inline void row_fdct(DCTELEM * data){
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
CONST_BITS-PASS1_BITS);
CONST_BITS-PASS1_BITS);
dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
CONST_BITS-PASS1_BITS);
CONST_BITS-PASS1_BITS);
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
* cK represents cos(K*pi/16).
@ -217,7 +217,7 @@ static always_inline void row_fdct(DCTELEM * data){
dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
dataptr += DCTSIZE; /* advance pointer to next row */
dataptr += DCTSIZE; /* advance pointer to next row */
}
}
@ -267,9 +267,9 @@ ff_jpeg_fdct_islow (DCTELEM * data)
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
CONST_BITS+PASS1_BITS);
CONST_BITS+PASS1_BITS);
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
CONST_BITS+PASS1_BITS);
CONST_BITS+PASS1_BITS);
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
* cK represents cos(K*pi/16).
@ -295,15 +295,15 @@ ff_jpeg_fdct_islow (DCTELEM * data)
z4 += z5;
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,
CONST_BITS+PASS1_BITS);
CONST_BITS+PASS1_BITS);
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,
CONST_BITS+PASS1_BITS);
CONST_BITS+PASS1_BITS);
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,
CONST_BITS+PASS1_BITS);
CONST_BITS+PASS1_BITS);
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,
CONST_BITS+PASS1_BITS);
CONST_BITS+PASS1_BITS);
dataptr++; /* advance pointer to next column */
dataptr++; /* advance pointer to next column */
}
}
@ -350,9 +350,9 @@ ff_fdct248_islow (DCTELEM * data)
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
CONST_BITS+PASS1_BITS);
CONST_BITS+PASS1_BITS);
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
CONST_BITS+PASS1_BITS);
CONST_BITS+PASS1_BITS);
tmp10 = tmp4 + tmp7;
tmp11 = tmp5 + tmp6;
@ -364,10 +364,10 @@ ff_fdct248_islow (DCTELEM * data)
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
CONST_BITS+PASS1_BITS);
CONST_BITS+PASS1_BITS);
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
CONST_BITS+PASS1_BITS);
CONST_BITS+PASS1_BITS);
dataptr++; /* advance pointer to next column */
dataptr++; /* advance pointer to next column */
}
}

File diff suppressed because it is too large Load Diff

View File

@ -81,8 +81,8 @@
*/
typedef struct LclContext {
AVCodecContext *avctx;
AVFrame pic;
AVCodecContext *avctx;
AVFrame pic;
PutBitContext pb;
// Image type
@ -198,8 +198,8 @@ static unsigned int mszh_decomp(unsigned char * srcptr, int srclen, unsigned cha
*/
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
{
LclContext * const c = (LclContext *)avctx->priv_data;
unsigned char *encoded = (unsigned char *)buf;
LclContext * const c = (LclContext *)avctx->priv_data;
unsigned char *encoded = (unsigned char *)buf;
unsigned int pixel_ptr;
int row, col;
unsigned char *outptr;
@ -214,15 +214,15 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
#endif
unsigned int len = buf_size;
if(c->pic.data[0])
avctx->release_buffer(avctx, &c->pic);
if(c->pic.data[0])
avctx->release_buffer(avctx, &c->pic);
c->pic.reference = 0;
c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
if(avctx->get_buffer(avctx, &c->pic) < 0){
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
return -1;
}
c->pic.reference = 0;
c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
if(avctx->get_buffer(avctx, &c->pic) < 0){
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
return -1;
}
outptr = c->pic.data[0]; // Output image pointer
@ -358,7 +358,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
pixel_ptr = row * width * 3;
yq = encoded[pixel_ptr++];
uqvq = encoded[pixel_ptr++];
uqvq+=(encoded[pixel_ptr++] << 8);
uqvq+=(encoded[pixel_ptr++] << 8);
for (col = 1; col < width; col++) {
encoded[pixel_ptr] = yq -= encoded[pixel_ptr];
uqvq -= (encoded[pixel_ptr+1] | (encoded[pixel_ptr+2]<<8));
@ -588,8 +588,8 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
c->zstream.avail_in = avctx->width*3;
zret = deflate(&(c->zstream), Z_NO_FLUSH);
if (zret != Z_OK) {
av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret);
return -1;
av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret);
return -1;
}
}
zret = deflate(&(c->zstream), Z_FINISH);
@ -714,7 +714,7 @@ static int decode_init(AVCodecContext *avctx)
break;
default:
if ((c->compression < Z_NO_COMPRESSION) || (c->compression > Z_BEST_COMPRESSION)) {
av_log(avctx, AV_LOG_ERROR, "Unsupported compression level for ZLIB: (%d).\n", c->compression);
av_log(avctx, AV_LOG_ERROR, "Unsupported compression level for ZLIB: (%d).\n", c->compression);
return 1;
}
av_log(avctx, AV_LOG_INFO, "Compression level for ZLIB: (%d).\n", c->compression);
@ -851,15 +851,15 @@ static int encode_init(AVCodecContext *avctx)
*/
static int decode_end(AVCodecContext *avctx)
{
LclContext * const c = (LclContext *)avctx->priv_data;
LclContext * const c = (LclContext *)avctx->priv_data;
if (c->pic.data[0])
avctx->release_buffer(avctx, &c->pic);
if (c->pic.data[0])
avctx->release_buffer(avctx, &c->pic);
#ifdef CONFIG_ZLIB
inflateEnd(&(c->zstream));
#endif
return 0;
return 0;
}
@ -883,28 +883,28 @@ static int encode_end(AVCodecContext *avctx)
}
AVCodec mszh_decoder = {
"mszh",
CODEC_TYPE_VIDEO,
CODEC_ID_MSZH,
sizeof(LclContext),
decode_init,
NULL,
decode_end,
decode_frame,
CODEC_CAP_DR1,
"mszh",
CODEC_TYPE_VIDEO,
CODEC_ID_MSZH,
sizeof(LclContext),
decode_init,
NULL,
decode_end,
decode_frame,
CODEC_CAP_DR1,
};
AVCodec zlib_decoder = {
"zlib",
CODEC_TYPE_VIDEO,
CODEC_ID_ZLIB,
sizeof(LclContext),
decode_init,
NULL,
decode_end,
decode_frame,
CODEC_CAP_DR1,
"zlib",
CODEC_TYPE_VIDEO,
CODEC_ID_ZLIB,
sizeof(LclContext),
decode_init,
NULL,
decode_end,
decode_frame,
CODEC_CAP_DR1,
};
#ifdef CONFIG_ENCODERS

File diff suppressed because it is too large Load Diff

View File

@ -42,7 +42,7 @@ void pp_postprocess(uint8_t * src[3], int srcStride[3],
uint8_t * dst[3], int dstStride[3],
int horizontalSize, int verticalSize,
QP_STORE_T *QP_store, int QP_stride,
pp_mode_t *mode, pp_context_t *ppContext, int pict_type);
pp_mode_t *mode, pp_context_t *ppContext, int pict_type);
/**

View File

@ -26,35 +26,35 @@
#endif
#define ALTIVEC_TRANSPOSE_8x8_SHORT(src_a,src_b,src_c,src_d,src_e,src_f,src_g,src_h) \
do { \
__typeof__(src_a) tempA1, tempB1, tempC1, tempD1; \
__typeof__(src_a) tempE1, tempF1, tempG1, tempH1; \
__typeof__(src_a) tempA2, tempB2, tempC2, tempD2; \
__typeof__(src_a) tempE2, tempF2, tempG2, tempH2; \
tempA1 = vec_mergeh (src_a, src_e); \
tempB1 = vec_mergel (src_a, src_e); \
tempC1 = vec_mergeh (src_b, src_f); \
tempD1 = vec_mergel (src_b, src_f); \
tempE1 = vec_mergeh (src_c, src_g); \
tempF1 = vec_mergel (src_c, src_g); \
tempG1 = vec_mergeh (src_d, src_h); \
tempH1 = vec_mergel (src_d, src_h); \
tempA2 = vec_mergeh (tempA1, tempE1); \
tempB2 = vec_mergel (tempA1, tempE1); \
tempC2 = vec_mergeh (tempB1, tempF1); \
tempD2 = vec_mergel (tempB1, tempF1); \
tempE2 = vec_mergeh (tempC1, tempG1); \
tempF2 = vec_mergel (tempC1, tempG1); \
tempG2 = vec_mergeh (tempD1, tempH1); \
tempH2 = vec_mergel (tempD1, tempH1); \
src_a = vec_mergeh (tempA2, tempE2); \
src_b = vec_mergel (tempA2, tempE2); \
src_c = vec_mergeh (tempB2, tempF2); \
src_d = vec_mergel (tempB2, tempF2); \
src_e = vec_mergeh (tempC2, tempG2); \
src_f = vec_mergel (tempC2, tempG2); \
src_g = vec_mergeh (tempD2, tempH2); \
src_h = vec_mergel (tempD2, tempH2); \
do { \
__typeof__(src_a) tempA1, tempB1, tempC1, tempD1; \
__typeof__(src_a) tempE1, tempF1, tempG1, tempH1; \
__typeof__(src_a) tempA2, tempB2, tempC2, tempD2; \
__typeof__(src_a) tempE2, tempF2, tempG2, tempH2; \
tempA1 = vec_mergeh (src_a, src_e); \
tempB1 = vec_mergel (src_a, src_e); \
tempC1 = vec_mergeh (src_b, src_f); \
tempD1 = vec_mergel (src_b, src_f); \
tempE1 = vec_mergeh (src_c, src_g); \
tempF1 = vec_mergel (src_c, src_g); \
tempG1 = vec_mergeh (src_d, src_h); \
tempH1 = vec_mergel (src_d, src_h); \
tempA2 = vec_mergeh (tempA1, tempE1); \
tempB2 = vec_mergel (tempA1, tempE1); \
tempC2 = vec_mergeh (tempB1, tempF1); \
tempD2 = vec_mergel (tempB1, tempF1); \
tempE2 = vec_mergeh (tempC1, tempG1); \
tempF2 = vec_mergel (tempC1, tempG1); \
tempG2 = vec_mergeh (tempD1, tempH1); \
tempH2 = vec_mergel (tempD1, tempH1); \
src_a = vec_mergeh (tempA2, tempE2); \
src_b = vec_mergel (tempA2, tempE2); \
src_c = vec_mergeh (tempB2, tempF2); \
src_d = vec_mergel (tempB2, tempF2); \
src_e = vec_mergeh (tempC2, tempG2); \
src_f = vec_mergel (tempC2, tempG2); \
src_g = vec_mergeh (tempD2, tempH2); \
src_h = vec_mergel (tempD2, tempH2); \
} while (0)
@ -94,25 +94,25 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
vector signed short v_srcAss0, v_srcAss1, v_srcAss2, v_srcAss3, v_srcAss4, v_srcAss5, v_srcAss6, v_srcAss7;
#define LOAD_LINE(i) \
register int j##i = i * stride; \
vector unsigned char perm##i = vec_lvsl(j##i, src2); \
const vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \
vector unsigned char v_srcA2##i; \
if (two_vectors) \
v_srcA2##i = vec_ld(j##i + 16, src2); \
const vector unsigned char v_srcA##i = \
vec_perm(v_srcA1##i, v_srcA2##i, perm##i); \
#define LOAD_LINE(i) \
register int j##i = i * stride; \
vector unsigned char perm##i = vec_lvsl(j##i, src2); \
const vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \
vector unsigned char v_srcA2##i; \
if (two_vectors) \
v_srcA2##i = vec_ld(j##i + 16, src2); \
const vector unsigned char v_srcA##i = \
vec_perm(v_srcA1##i, v_srcA2##i, perm##i); \
v_srcAss##i = \
(vector signed short)vec_mergeh((vector signed char)zero, \
(vector signed char)v_srcA##i)
(vector signed short)vec_mergeh((vector signed char)zero, \
(vector signed char)v_srcA##i)
#define LOAD_LINE_ALIGNED(i) \
register int j##i = i * stride; \
const vector unsigned char v_srcA##i = vec_ld(j##i, src2); \
v_srcAss##i = \
(vector signed short)vec_mergeh((vector signed char)zero, \
(vector signed char)v_srcA##i)
(vector signed short)vec_mergeh((vector signed char)zero, \
(vector signed char)v_srcA##i)
// special casing the aligned case is worthwhile, as all call from
// the (transposed) horizontable deblocks will be aligned, i naddition
@ -139,15 +139,15 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
#undef LOAD_LINE
#undef LOAD_LINE_ALIGNED
#define ITER(i, j) \
const vector signed short v_diff##i = \
vec_sub(v_srcAss##i, v_srcAss##j); \
const vector signed short v_sum##i = \
vec_add(v_diff##i, v_dcOffset); \
const vector signed short v_comp##i = \
(vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \
v_dcThreshold); \
const vector signed short v_part##i = vec_and(mask, v_comp##i); \
#define ITER(i, j) \
const vector signed short v_diff##i = \
vec_sub(v_srcAss##i, v_srcAss##j); \
const vector signed short v_sum##i = \
vec_add(v_diff##i, v_dcOffset); \
const vector signed short v_comp##i = \
(vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \
v_dcThreshold); \
const vector signed short v_part##i = vec_and(mask, v_comp##i); \
v_numEq = vec_sum4s(v_part##i, v_numEq);
ITER(0, 1);
@ -167,13 +167,13 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
if (numEq > c->ppMode.flatnessThreshold)
{
const vector unsigned char mmoP1 = (const vector unsigned char)
AVV(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
0x00, 0x01, 0x12, 0x13, 0x08, 0x09, 0x1A, 0x1B);
AVV(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
0x00, 0x01, 0x12, 0x13, 0x08, 0x09, 0x1A, 0x1B);
const vector unsigned char mmoP2 = (const vector unsigned char)
AVV(0x04, 0x05, 0x16, 0x17, 0x0C, 0x0D, 0x1E, 0x1F,
0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f);
AVV(0x04, 0x05, 0x16, 0x17, 0x0C, 0x0D, 0x1E, 0x1F,
0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f);
const vector unsigned char mmoP = (const vector unsigned char)
vec_lvsl(8, (unsigned char*)0);
vec_lvsl(8, (unsigned char*)0);
vector signed short mmoL1 = vec_perm(v_srcAss0, v_srcAss2, mmoP1);
vector signed short mmoL2 = vec_perm(v_srcAss4, v_srcAss6, mmoP2);
@ -185,9 +185,9 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
vector unsigned short mmoSum = (vector unsigned short)vec_add(mmoDiff, v2QP);
if (vec_any_gt(mmoSum, v4QP))
return 0;
return 0;
else
return 1;
return 1;
}
else return 2;
}
@ -218,21 +218,21 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9;
#define LOAD_LINE(i) \
const vector unsigned char perml##i = \
vec_lvsl(i * stride, src2); \
const vector unsigned char perml##i = \
vec_lvsl(i * stride, src2); \
vbA##i = vec_ld(i * stride, src2); \
vbB##i = vec_ld(i * stride + 16, src2); \
vbT##i = vec_perm(vbA##i, vbB##i, perml##i); \
vb##i = \
(vector signed short)vec_mergeh((vector unsigned char)zero, \
(vector unsigned char)vbT##i)
(vector signed short)vec_mergeh((vector unsigned char)zero, \
(vector unsigned char)vbT##i)
#define LOAD_LINE_ALIGNED(i) \
register int j##i = i * stride; \
vbT##i = vec_ld(j##i, src2); \
vb##i = \
(vector signed short)vec_mergeh((vector signed char)zero, \
(vector signed char)vbT##i)
(vector signed short)vec_mergeh((vector signed char)zero, \
(vector signed char)vbT##i)
// special casing the aligned case is worthwhile, as all call from
// the (transposed) horizontable deblocks will be aligned, in addition
@ -308,11 +308,11 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
const vector signed short temp91 = vec_sub(v_sumsB8, vb5);
const vector signed short v_sumsB9 = vec_add(temp91, v_last);
#define COMPUTE_VR(i, j, k) \
const vector signed short temps1##i = \
vec_add(v_sumsB##i, v_sumsB##k); \
const vector signed short temps2##i = \
vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \
#define COMPUTE_VR(i, j, k) \
const vector signed short temps1##i = \
vec_add(v_sumsB##i, v_sumsB##k); \
const vector signed short temps2##i = \
vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \
const vector signed short vr##j = vec_sra(temps2##i, v_4)
COMPUTE_VR(0, 1, 2);
@ -326,31 +326,31 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
const vector signed char neg1 = vec_splat_s8(-1);
const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
#define PACK_AND_STORE(i) \
const vector unsigned char perms##i = \
vec_lvsr(i * stride, src2); \
const vector unsigned char vf##i = \
vec_packsu(vr##i, (vector signed short)zero); \
const vector unsigned char vg##i = \
vec_perm(vf##i, vbT##i, permHH); \
const vector unsigned char mask##i = \
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \
const vector unsigned char vg2##i = \
vec_perm(vg##i, vg##i, perms##i); \
const vector unsigned char svA##i = \
vec_sel(vbA##i, vg2##i, mask##i); \
const vector unsigned char svB##i = \
vec_sel(vg2##i, vbB##i, mask##i); \
vec_st(svA##i, i * stride, src2); \
#define PACK_AND_STORE(i) \
const vector unsigned char perms##i = \
vec_lvsr(i * stride, src2); \
const vector unsigned char vf##i = \
vec_packsu(vr##i, (vector signed short)zero); \
const vector unsigned char vg##i = \
vec_perm(vf##i, vbT##i, permHH); \
const vector unsigned char mask##i = \
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \
const vector unsigned char vg2##i = \
vec_perm(vg##i, vg##i, perms##i); \
const vector unsigned char svA##i = \
vec_sel(vbA##i, vg2##i, mask##i); \
const vector unsigned char svB##i = \
vec_sel(vg2##i, vbB##i, mask##i); \
vec_st(svA##i, i * stride, src2); \
vec_st(svB##i, i * stride + 16, src2)
#define PACK_AND_STORE_ALIGNED(i) \
const vector unsigned char vf##i = \
vec_packsu(vr##i, (vector signed short)zero); \
const vector unsigned char vg##i = \
vec_perm(vf##i, vbT##i, permHH); \
#define PACK_AND_STORE_ALIGNED(i) \
const vector unsigned char vf##i = \
vec_packsu(vr##i, (vector signed short)zero); \
const vector unsigned char vg##i = \
vec_perm(vf##i, vbT##i, permHH); \
vec_st(vg##i, i * stride, src2)
// special casing the aligned case is worthwhile, as all call from
@ -398,17 +398,17 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext
vqp = vec_splat(vqp, 0);
#define LOAD_LINE(i) \
const vector unsigned char perm##i = \
vec_lvsl(i * stride, src2); \
const vector unsigned char vbA##i = \
vec_ld(i * stride, src2); \
const vector unsigned char vbB##i = \
vec_ld(i * stride + 16, src2); \
const vector unsigned char vbT##i = \
vec_perm(vbA##i, vbB##i, perm##i); \
const vector signed short vb##i = \
(vector signed short)vec_mergeh((vector unsigned char)zero, \
(vector unsigned char)vbT##i)
const vector unsigned char perm##i = \
vec_lvsl(i * stride, src2); \
const vector unsigned char vbA##i = \
vec_ld(i * stride, src2); \
const vector unsigned char vbB##i = \
vec_ld(i * stride + 16, src2); \
const vector unsigned char vbT##i = \
vec_perm(vbA##i, vbB##i, perm##i); \
const vector signed short vb##i = \
(vector signed short)vec_mergeh((vector unsigned char)zero, \
(vector unsigned char)vbT##i)
src2 += stride*3;
@ -426,7 +426,7 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext
const vector signed short v_2 = vec_splat_s16(2);
const vector signed short v_5 = vec_splat_s16(5);
const vector signed short v_32 = vec_sl(v_1,
(vector unsigned short)v_5);
(vector unsigned short)v_5);
/* middle energy */
const vector signed short l3minusl6 = vec_sub(vb3, vb6);
const vector signed short l5minusl4 = vec_sub(vb5, vb4);
@ -483,22 +483,22 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext
const vector signed char neg1 = vec_splat_s8(-1);
const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
#define STORE(i) \
const vector unsigned char perms##i = \
vec_lvsr(i * stride, src2); \
const vector unsigned char vg##i = \
vec_perm(st##i, vbT##i, permHH); \
const vector unsigned char mask##i = \
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \
const vector unsigned char vg2##i = \
vec_perm(vg##i, vg##i, perms##i); \
const vector unsigned char svA##i = \
vec_sel(vbA##i, vg2##i, mask##i); \
const vector unsigned char svB##i = \
vec_sel(vg2##i, vbB##i, mask##i); \
vec_st(svA##i, i * stride, src2); \
#define STORE(i) \
const vector unsigned char perms##i = \
vec_lvsr(i * stride, src2); \
const vector unsigned char vg##i = \
vec_perm(st##i, vbT##i, permHH); \
const vector unsigned char mask##i = \
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \
const vector unsigned char vg2##i = \
vec_perm(vg##i, vg##i, perms##i); \
const vector unsigned char svA##i = \
vec_sel(vbA##i, vg2##i, mask##i); \
const vector unsigned char svB##i = \
vec_sel(vg2##i, vbB##i, mask##i); \
vec_st(svA##i, i * stride, src2); \
vec_st(svB##i, i * stride + 16, src2)
STORE(4);
@ -522,11 +522,11 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
dt[0] = deringThreshold;
v_dt = vec_splat(vec_ld(0, dt), 0);
#define LOAD_LINE(i) \
const vector unsigned char perm##i = \
vec_lvsl(i * stride, srcCopy); \
vector unsigned char sA##i = vec_ld(i * stride, srcCopy); \
vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy); \
#define LOAD_LINE(i) \
const vector unsigned char perm##i = \
vec_lvsl(i * stride, srcCopy); \
vector unsigned char sA##i = vec_ld(i * stride, srcCopy); \
vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy); \
vector unsigned char src##i = vec_perm(sA##i, sB##i, perm##i)
LOAD_LINE(0);
@ -545,13 +545,13 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
{
const vector unsigned char trunc_perm = (vector unsigned char)
AVV(0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18);
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18);
const vector unsigned char trunc_src12 = vec_perm(src1, src2, trunc_perm);
const vector unsigned char trunc_src34 = vec_perm(src3, src4, trunc_perm);
const vector unsigned char trunc_src56 = vec_perm(src5, src6, trunc_perm);
const vector unsigned char trunc_src78 = vec_perm(src7, src8, trunc_perm);
#define EXTRACT(op) do { \
#define EXTRACT(op) do { \
const vector unsigned char s##op##_1 = vec_##op(trunc_src12, trunc_src34); \
const vector unsigned char s##op##_2 = vec_##op(trunc_src56, trunc_src78); \
const vector unsigned char s##op##_6 = vec_##op(s##op##_1, s##op##_2); \
@ -584,29 +584,29 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
{
const vector unsigned short mask1 = (vector unsigned short)
AVV(0x0001, 0x0002, 0x0004, 0x0008,
0x0010, 0x0020, 0x0040, 0x0080);
0x0010, 0x0020, 0x0040, 0x0080);
const vector unsigned short mask2 = (vector unsigned short)
AVV(0x0100, 0x0200, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000);
0x0000, 0x0000, 0x0000, 0x0000);
const vector unsigned int vuint32_16 = vec_sl(vec_splat_u32(1), vec_splat_u32(4));
const vector unsigned int vuint32_1 = vec_splat_u32(1);
#define COMPARE(i) \
vector signed int sum##i; \
do { \
const vector unsigned char cmp##i = \
(vector unsigned char)vec_cmpgt(src##i, v_avg); \
const vector unsigned short cmpHi##i = \
(vector unsigned short)vec_mergeh(cmp##i, cmp##i); \
const vector unsigned short cmpLi##i = \
(vector unsigned short)vec_mergel(cmp##i, cmp##i); \
const vector signed short cmpHf##i = \
(vector signed short)vec_and(cmpHi##i, mask1); \
const vector signed short cmpLf##i = \
(vector signed short)vec_and(cmpLi##i, mask2); \
const vector signed int sump##i = vec_sum4s(cmpHf##i, zero); \
const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i); \
#define COMPARE(i) \
vector signed int sum##i; \
do { \
const vector unsigned char cmp##i = \
(vector unsigned char)vec_cmpgt(src##i, v_avg); \
const vector unsigned short cmpHi##i = \
(vector unsigned short)vec_mergeh(cmp##i, cmp##i); \
const vector unsigned short cmpLi##i = \
(vector unsigned short)vec_mergel(cmp##i, cmp##i); \
const vector signed short cmpHf##i = \
(vector signed short)vec_and(cmpHi##i, mask1); \
const vector signed short cmpLf##i = \
(vector signed short)vec_and(cmpLi##i, mask2); \
const vector signed int sump##i = vec_sum4s(cmpHf##i, zero); \
const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i); \
sum##i = vec_sums(sumq##i, zero); } while (0)
COMPARE(0);
@ -643,11 +643,11 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
const vector signed int t2B = vec_or(sumB, tB);
const vector signed int t2C = vec_or(sumC, tC);
const vector signed int t3A = vec_and(vec_sra(t2A, vuint32_1),
vec_sl(t2A, vuint32_1));
vec_sl(t2A, vuint32_1));
const vector signed int t3B = vec_and(vec_sra(t2B, vuint32_1),
vec_sl(t2B, vuint32_1));
vec_sl(t2B, vuint32_1));
const vector signed int t3C = vec_and(vec_sra(t2C, vuint32_1),
vec_sl(t2C, vuint32_1));
vec_sl(t2C, vuint32_1));
const vector signed int yA = vec_and(t2A, t3A);
const vector signed int yB = vec_and(t2B, t3B);
const vector signed int yC = vec_and(t2C, t3C);
@ -659,15 +659,15 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
const vector signed int sumBd4 = vec_perm(yB, yC, strangeperm1);
const vector signed int sumBd8 = vec_perm(yB, yC, strangeperm2);
const vector signed int sumAp = vec_and(yA,
vec_and(sumAd4,sumAd8));
vec_and(sumAd4,sumAd8));
const vector signed int sumBp = vec_and(yB,
vec_and(sumBd4,sumBd8));
vec_and(sumBd4,sumBd8));
sumA2 = vec_or(sumAp,
vec_sra(sumAp,
vuint32_16));
vec_sra(sumAp,
vuint32_16));
sumB2 = vec_or(sumBp,
vec_sra(sumBp,
vuint32_16));
vec_sra(sumBp,
vuint32_16));
}
vec_st(sumA2, 0, S);
vec_st(sumB2, 16, S);
@ -686,84 +686,84 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
const vector unsigned char permA1 = (vector unsigned char)
AVV(0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F,
0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
const vector unsigned char permA2 = (vector unsigned char)
AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11,
0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
const vector unsigned char permA1inc = (vector unsigned char)
AVV(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
const vector unsigned char permA2inc = (vector unsigned char)
AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
const vector unsigned char magic = (vector unsigned char)
AVV(0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
const vector unsigned char extractPerm = (vector unsigned char)
AVV(0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01,
0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01);
0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01);
const vector unsigned char extractPermInc = (vector unsigned char)
AVV(0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01);
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01);
const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0);
const vector unsigned char tenRight = (vector unsigned char)
AVV(0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
const vector unsigned char eightLeft = (vector unsigned char)
AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08);
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08);
#define F_INIT(i) \
vector unsigned char tenRightM##i = tenRight; \
vector unsigned char permA1M##i = permA1; \
vector unsigned char permA2M##i = permA2; \
#define F_INIT(i) \
vector unsigned char tenRightM##i = tenRight; \
vector unsigned char permA1M##i = permA1; \
vector unsigned char permA2M##i = permA2; \
vector unsigned char extractPermM##i = extractPerm
#define F2(i, j, k, l) \
if (S[i] & (1 << (l+1))) { \
const vector unsigned char a_##j##_A##l = \
vec_perm(src##i, src##j, permA1M##i); \
const vector unsigned char a_##j##_B##l = \
vec_perm(a_##j##_A##l, src##k, permA2M##i); \
const vector signed int a_##j##_sump##l = \
(vector signed int)vec_msum(a_##j##_B##l, magic, \
(vector unsigned int)zero); \
vector signed int F_##j##_##l = \
vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4); \
F_##j##_##l = vec_splat(F_##j##_##l, 3); \
const vector signed int p_##j##_##l = \
(vector signed int)vec_perm(src##j, \
(vector unsigned char)zero, \
extractPermM##i); \
const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2); \
const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2); \
vector signed int newpm_##j##_##l; \
if (vec_all_lt(sum_##j##_##l, F_##j##_##l)) \
newpm_##j##_##l = sum_##j##_##l; \
else if (vec_all_gt(diff_##j##_##l, F_##j##_##l)) \
newpm_##j##_##l = diff_##j##_##l; \
else newpm_##j##_##l = F_##j##_##l; \
const vector unsigned char newpm2_##j##_##l = \
vec_splat((vector unsigned char)newpm_##j##_##l, 15); \
const vector unsigned char mask##j##l = vec_add(identity, \
tenRightM##i); \
src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l); \
} \
permA1M##i = vec_add(permA1M##i, permA1inc); \
permA2M##i = vec_add(permA2M##i, permA2inc); \
tenRightM##i = vec_sro(tenRightM##i, eightLeft); \
#define F2(i, j, k, l) \
if (S[i] & (1 << (l+1))) { \
const vector unsigned char a_##j##_A##l = \
vec_perm(src##i, src##j, permA1M##i); \
const vector unsigned char a_##j##_B##l = \
vec_perm(a_##j##_A##l, src##k, permA2M##i); \
const vector signed int a_##j##_sump##l = \
(vector signed int)vec_msum(a_##j##_B##l, magic, \
(vector unsigned int)zero); \
vector signed int F_##j##_##l = \
vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4); \
F_##j##_##l = vec_splat(F_##j##_##l, 3); \
const vector signed int p_##j##_##l = \
(vector signed int)vec_perm(src##j, \
(vector unsigned char)zero, \
extractPermM##i); \
const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2);\
const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2);\
vector signed int newpm_##j##_##l; \
if (vec_all_lt(sum_##j##_##l, F_##j##_##l)) \
newpm_##j##_##l = sum_##j##_##l; \
else if (vec_all_gt(diff_##j##_##l, F_##j##_##l)) \
newpm_##j##_##l = diff_##j##_##l; \
else newpm_##j##_##l = F_##j##_##l; \
const vector unsigned char newpm2_##j##_##l = \
vec_splat((vector unsigned char)newpm_##j##_##l, 15); \
const vector unsigned char mask##j##l = vec_add(identity, \
tenRightM##i); \
src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l); \
} \
permA1M##i = vec_add(permA1M##i, permA1inc); \
permA2M##i = vec_add(permA2M##i, permA2inc); \
tenRightM##i = vec_sro(tenRightM##i, eightLeft); \
extractPermM##i = vec_add(extractPermM##i, extractPermInc)
#define ITER(i, j, k) \
F_INIT(i); \
F2(i, j, k, 0); \
F2(i, j, k, 1); \
F2(i, j, k, 2); \
F2(i, j, k, 3); \
F2(i, j, k, 4); \
F2(i, j, k, 5); \
F2(i, j, k, 6); \
#define ITER(i, j, k) \
F_INIT(i); \
F2(i, j, k, 0); \
F2(i, j, k, 1); \
F2(i, j, k, 2); \
F2(i, j, k, 3); \
F2(i, j, k, 4); \
F2(i, j, k, 5); \
F2(i, j, k, 6); \
F2(i, j, k, 7)
ITER(0, 1, 2);
@ -777,16 +777,16 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
const vector signed char neg1 = vec_splat_s8(-1);
#define STORE_LINE(i) \
const vector unsigned char permST##i = \
vec_lvsr(i * stride, srcCopy); \
const vector unsigned char maskST##i = \
vec_perm((vector unsigned char)zero, \
(vector unsigned char)neg1, permST##i); \
src##i = vec_perm(src##i ,src##i, permST##i); \
sA##i= vec_sel(sA##i, src##i, maskST##i); \
sB##i= vec_sel(src##i, sB##i, maskST##i); \
vec_st(sA##i, i * stride, srcCopy); \
#define STORE_LINE(i) \
const vector unsigned char permST##i = \
vec_lvsr(i * stride, srcCopy); \
const vector unsigned char maskST##i = \
vec_perm((vector unsigned char)zero, \
(vector unsigned char)neg1, permST##i); \
src##i = vec_perm(src##i ,src##i, permST##i); \
sA##i= vec_sel(sA##i, src##i, maskST##i); \
sB##i= vec_sel(src##i, sB##i, maskST##i); \
vec_st(sA##i, i * stride, srcCopy); \
vec_st(sB##i, i * stride + 16, srcCopy)
STORE_LINE(1);
@ -808,7 +808,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
#define do_a_deblock_altivec(a...) do_a_deblock_C(a)
static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
{
const vector signed int zero = vec_splat_s32(0);
const vector signed short vsint16_1 = vec_splat_s16(1);
@ -820,16 +820,16 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
tempBluredPast[128]= maxNoise[1];
tempBluredPast[129]= maxNoise[2];
#define LOAD_LINE(src, i) \
register int j##src##i = i * stride; \
vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \
const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \
#define LOAD_LINE(src, i) \
register int j##src##i = i * stride; \
vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \
const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \
const vector unsigned char v_##src##A2##i = vec_ld(j##src##i + 16, src); \
const vector unsigned char v_##src##A##i = \
vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i); \
vector signed short v_##src##Ass##i = \
(vector signed short)vec_mergeh((vector signed char)zero, \
(vector signed char)v_##src##A##i)
const vector unsigned char v_##src##A##i = \
vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i); \
vector signed short v_##src##Ass##i = \
(vector signed short)vec_mergeh((vector signed char)zero, \
(vector signed char)v_##src##A##i)
LOAD_LINE(src, 0);
LOAD_LINE(src, 1);
@ -850,10 +850,10 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
LOAD_LINE(tempBlured, 7);
#undef LOAD_LINE
#define ACCUMULATE_DIFFS(i) \
vector signed short v_d##i = vec_sub(v_tempBluredAss##i, \
v_srcAss##i); \
v_dp = vec_msums(v_d##i, v_d##i, v_dp); \
#define ACCUMULATE_DIFFS(i) \
vector signed short v_d##i = vec_sub(v_tempBluredAss##i, \
v_srcAss##i); \
v_dp = vec_msums(v_d##i, v_d##i, v_dp); \
v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp)
ACCUMULATE_DIFFS(0);
@ -916,12 +916,12 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
const vector signed short vsint16_4 = vec_splat_s16(4);
const vector unsigned short vuint16_3 = vec_splat_u16(3);
#define OP(i) \
const vector signed short v_temp##i = \
vec_mladd(v_tempBluredAss##i, \
vsint16_7, v_srcAss##i); \
const vector signed short v_temp2##i = \
vec_add(v_temp##i, vsint16_4); \
#define OP(i) \
const vector signed short v_temp##i = \
vec_mladd(v_tempBluredAss##i, \
vsint16_7, v_srcAss##i); \
const vector signed short v_temp2##i = \
vec_add(v_temp##i, vsint16_4); \
v_tempBluredAss##i = vec_sr(v_temp2##i, vuint16_3)
OP(0);
@ -937,12 +937,12 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
const vector signed short vsint16_3 = vec_splat_s16(3);
const vector signed short vsint16_2 = vec_splat_s16(2);
#define OP(i) \
const vector signed short v_temp##i = \
vec_mladd(v_tempBluredAss##i, \
vsint16_3, v_srcAss##i); \
const vector signed short v_temp2##i = \
vec_add(v_temp##i, vsint16_2); \
#define OP(i) \
const vector signed short v_temp##i = \
vec_mladd(v_tempBluredAss##i, \
vsint16_3, v_srcAss##i); \
const vector signed short v_temp2##i = \
vec_add(v_temp##i, vsint16_2); \
v_tempBluredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2)
OP(0);
@ -959,24 +959,24 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
const vector signed char neg1 = vec_splat_s8(-1);
const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
#define PACK_AND_STORE(src, i) \
const vector unsigned char perms##src##i = \
vec_lvsr(i * stride, src); \
const vector unsigned char vf##src##i = \
vec_packsu(v_tempBluredAss##i, (vector signed short)zero); \
const vector unsigned char vg##src##i = \
vec_perm(vf##src##i, v_##src##A##i, permHH); \
const vector unsigned char mask##src##i = \
#define PACK_AND_STORE(src, i) \
const vector unsigned char perms##src##i = \
vec_lvsr(i * stride, src); \
const vector unsigned char vf##src##i = \
vec_packsu(v_tempBluredAss##i, (vector signed short)zero); \
const vector unsigned char vg##src##i = \
vec_perm(vf##src##i, v_##src##A##i, permHH); \
const vector unsigned char mask##src##i = \
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \
const vector unsigned char vg2##src##i = \
vec_perm(vg##src##i, vg##src##i, perms##src##i); \
const vector unsigned char svA##src##i = \
vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i); \
const vector unsigned char svB##src##i = \
vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \
vec_st(svA##src##i, i * stride, src); \
const vector unsigned char vg2##src##i = \
vec_perm(vg##src##i, vg##src##i, perms##src##i); \
const vector unsigned char svA##src##i = \
vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i); \
const vector unsigned char svB##src##i = \
vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \
vec_st(svA##src##i, i * stride, src); \
vec_st(svB##src##i, i * stride + 16, src)
PACK_AND_STORE(src, 0);
@ -1001,14 +1001,14 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) {
const vector unsigned char zero = vec_splat_u8(0);
#define LOAD_DOUBLE_LINE(i, j) \
vector unsigned char perm1##i = vec_lvsl(i * stride, src); \
vector unsigned char perm2##i = vec_lvsl(j * stride, src); \
vector unsigned char srcA##i = vec_ld(i * stride, src); \
#define LOAD_DOUBLE_LINE(i, j) \
vector unsigned char perm1##i = vec_lvsl(i * stride, src); \
vector unsigned char perm2##i = vec_lvsl(j * stride, src); \
vector unsigned char srcA##i = vec_ld(i * stride, src); \
vector unsigned char srcB##i = vec_ld(i * stride + 16, src); \
vector unsigned char srcC##i = vec_ld(j * stride, src); \
vector unsigned char srcC##i = vec_ld(j * stride, src); \
vector unsigned char srcD##i = vec_ld(j * stride+ 16, src); \
vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i); \
vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i); \
vector unsigned char src##j = vec_perm(srcC##i, srcD##i, perm2##i)
LOAD_DOUBLE_LINE(0, 1);
@ -1107,10 +1107,10 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds
const vector unsigned char zero = vec_splat_u8(0);
const vector unsigned char magic_perm = (const vector unsigned char)
AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
#define LOAD_DOUBLE_LINE(i, j) \
vector unsigned char src##i = vec_ld(i * 16, src); \
#define LOAD_DOUBLE_LINE(i, j) \
vector unsigned char src##i = vec_ld(i * 16, src); \
vector unsigned char src##j = vec_ld(j * 16, src)
LOAD_DOUBLE_LINE(0, 1);
@ -1169,24 +1169,24 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds
const vector signed char neg1 = vec_splat_s8(-1);
#define STORE_DOUBLE_LINE(i, j) \
vector unsigned char dstA##i = vec_ld(i * stride, dst); \
vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \
vector unsigned char dstA##j = vec_ld(j * stride, dst); \
vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst); \
vector unsigned char align##i = vec_lvsr(i * stride, dst); \
vector unsigned char align##j = vec_lvsr(j * stride, dst); \
vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \
vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \
vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i); \
vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j); \
vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \
vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \
vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \
vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j); \
vec_st(dstAF##i, i * stride, dst); \
vec_st(dstBF##i, i * stride + 16, dst); \
vec_st(dstAF##j, j * stride, dst); \
#define STORE_DOUBLE_LINE(i, j) \
vector unsigned char dstA##i = vec_ld(i * stride, dst); \
vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \
vector unsigned char dstA##j = vec_ld(j * stride, dst); \
vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst); \
vector unsigned char align##i = vec_lvsr(i * stride, dst); \
vector unsigned char align##j = vec_lvsr(j * stride, dst); \
vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \
vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \
vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i); \
vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j); \
vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \
vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \
vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \
vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j); \
vec_st(dstAF##i, i * stride, dst); \
vec_st(dstBF##i, i * stride + 16, dst); \
vec_st(dstAF##j, j * stride, dst); \
vec_st(dstBF##j, j * stride + 16, dst)
STORE_DOUBLE_LINE(0,1);

View File

@ -21,42 +21,42 @@
* internal api header.
*/
#define V_DEBLOCK 0x01
#define H_DEBLOCK 0x02
#define DERING 0x04
#define LEVEL_FIX 0x08 ///< Brightness & Contrast
#define V_DEBLOCK 0x01
#define H_DEBLOCK 0x02
#define DERING 0x04
#define LEVEL_FIX 0x08 ///< Brightness & Contrast
#define LUM_V_DEBLOCK V_DEBLOCK // 1
#define LUM_H_DEBLOCK H_DEBLOCK // 2
#define CHROM_V_DEBLOCK (V_DEBLOCK<<4) // 16
#define CHROM_H_DEBLOCK (H_DEBLOCK<<4) // 32
#define LUM_DERING DERING // 4
#define CHROM_DERING (DERING<<4) // 64
#define LUM_LEVEL_FIX LEVEL_FIX // 8
#define CHROM_LEVEL_FIX (LEVEL_FIX<<4) // 128 (not implemented yet)
#define LUM_V_DEBLOCK V_DEBLOCK // 1
#define LUM_H_DEBLOCK H_DEBLOCK // 2
#define CHROM_V_DEBLOCK (V_DEBLOCK<<4) // 16
#define CHROM_H_DEBLOCK (H_DEBLOCK<<4) // 32
#define LUM_DERING DERING // 4
#define CHROM_DERING (DERING<<4) // 64
#define LUM_LEVEL_FIX LEVEL_FIX // 8
#define CHROM_LEVEL_FIX (LEVEL_FIX<<4) // 128 (not implemented yet)
// Experimental vertical filters
#define V_X1_FILTER 0x0200 // 512
#define V_A_DEBLOCK 0x0400
#define V_X1_FILTER 0x0200 // 512
#define V_A_DEBLOCK 0x0400
// Experimental horizontal filters
#define H_X1_FILTER 0x2000 // 8192
#define H_A_DEBLOCK 0x4000
#define H_X1_FILTER 0x2000 // 8192
#define H_A_DEBLOCK 0x4000
/// select between full y range (255-0) or standart one (234-16)
#define FULL_Y_RANGE 0x8000 // 32768
#define FULL_Y_RANGE 0x8000 // 32768
//Deinterlacing Filters
#define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536
#define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072
#define CUBIC_BLEND_DEINT_FILTER 0x8000 // (not implemented yet)
#define CUBIC_IPOL_DEINT_FILTER 0x40000 // 262144
#define MEDIAN_DEINT_FILTER 0x80000 // 524288
#define FFMPEG_DEINT_FILTER 0x400000
#define LOWPASS5_DEINT_FILTER 0x800000
#define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536
#define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072
#define CUBIC_BLEND_DEINT_FILTER 0x8000 // (not implemented yet)
#define CUBIC_IPOL_DEINT_FILTER 0x40000 // 262144
#define MEDIAN_DEINT_FILTER 0x80000 // 524288
#define FFMPEG_DEINT_FILTER 0x400000
#define LOWPASS5_DEINT_FILTER 0x800000
#define TEMP_NOISE_FILTER 0x100000
#define FORCE_QUANT 0x200000
#define TEMP_NOISE_FILTER 0x100000
#define FORCE_QUANT 0x200000
//use if u want a faster postprocessing code
//cant differentiate between chroma & luma filters (both on or both off)
@ -66,8 +66,8 @@
#if 1
static inline int CLIP(int a){
if(a&256) return ((a)>>31)^(-1);
else return a;
if(a&256) return ((a)>>31)^(-1);
else return a;
}
//#define CLIP(a) (((a)&256) ? ((a)>>31)^(-1) : (a))
#elif 0
@ -79,92 +79,92 @@ static inline int CLIP(int a){
* Postprocessng filter.
*/
struct PPFilter{
char *shortName;
char *longName;
int chromDefault; ///< is chrominance filtering on by default if this filter is manually activated
int minLumQuality; ///< minimum quality to turn luminance filtering on
int minChromQuality; ///< minimum quality to turn chrominance filtering on
int mask; ///< Bitmask to turn this filter on
char *shortName;
char *longName;
int chromDefault; ///< is chrominance filtering on by default if this filter is manually activated
int minLumQuality; ///< minimum quality to turn luminance filtering on
int minChromQuality; ///< minimum quality to turn chrominance filtering on
int mask; ///< Bitmask to turn this filter on
};
/**
* Postprocessng mode.
*/
typedef struct PPMode{
int lumMode; ///< acivates filters for luminance
int chromMode; ///< acivates filters for chrominance
int error; ///< non zero on error
int lumMode; ///< acivates filters for luminance
int chromMode; ///< acivates filters for chrominance
int error; ///< non zero on error
int minAllowedY; ///< for brigtness correction
int maxAllowedY; ///< for brihtness correction
float maxClippedThreshold; ///< amount of "black" u r willing to loose to get a brightness corrected picture
int minAllowedY; ///< for brigtness correction
int maxAllowedY; ///< for brihtness correction
float maxClippedThreshold; ///< amount of "black" u r willing to loose to get a brightness corrected picture
int maxTmpNoise[3]; ///< for Temporal Noise Reducing filter (Maximal sum of abs differences)
int maxTmpNoise[3]; ///< for Temporal Noise Reducing filter (Maximal sum of abs differences)
int baseDcDiff;
int flatnessThreshold;
int baseDcDiff;
int flatnessThreshold;
int forcedQuant; ///< quantizer if FORCE_QUANT is used
int forcedQuant; ///< quantizer if FORCE_QUANT is used
} PPMode;
/**
* postprocess context.
*/
typedef struct PPContext{
uint8_t *tempBlocks; ///<used for the horizontal code
uint8_t *tempBlocks; ///<used for the horizontal code
/**
* luma histogram.
* we need 64bit here otherwise we'll going to have a problem
* after watching a black picture for 5 hours
*/
uint64_t *yHistogram;
/**
* luma histogram.
* we need 64bit here otherwise we'll going to have a problem
* after watching a black picture for 5 hours
*/
uint64_t *yHistogram;
uint64_t __attribute__((aligned(8))) packedYOffset;
uint64_t __attribute__((aligned(8))) packedYScale;
uint64_t __attribute__((aligned(8))) packedYOffset;
uint64_t __attribute__((aligned(8))) packedYScale;
/** Temporal noise reducing buffers */
uint8_t *tempBlured[3];
int32_t *tempBluredPast[3];
/** Temporal noise reducing buffers */
uint8_t *tempBlured[3];
int32_t *tempBluredPast[3];
/** Temporary buffers for handling the last row(s) */
uint8_t *tempDst;
uint8_t *tempSrc;
/** Temporary buffers for handling the last row(s) */
uint8_t *tempDst;
uint8_t *tempSrc;
uint8_t *deintTemp;
uint8_t *deintTemp;
uint64_t __attribute__((aligned(8))) pQPb;
uint64_t __attribute__((aligned(8))) pQPb2;
uint64_t __attribute__((aligned(8))) pQPb;
uint64_t __attribute__((aligned(8))) pQPb2;
uint64_t __attribute__((aligned(8))) mmxDcOffset[64];
uint64_t __attribute__((aligned(8))) mmxDcThreshold[64];
uint64_t __attribute__((aligned(8))) mmxDcOffset[64];
uint64_t __attribute__((aligned(8))) mmxDcThreshold[64];
QP_STORE_T *stdQPTable; ///< used to fix MPEG2 style qscale
QP_STORE_T *nonBQPTable;
QP_STORE_T *forcedQPTable;
QP_STORE_T *stdQPTable; ///< used to fix MPEG2 style qscale
QP_STORE_T *nonBQPTable;
QP_STORE_T *forcedQPTable;
int QP;
int nonBQP;
int QP;
int nonBQP;
int frameNum;
int frameNum;
int cpuCaps;
int cpuCaps;
int qpStride; ///<size of qp buffers (needed to realloc them if needed)
int stride; ///<size of some buffers (needed to realloc them if needed)
int qpStride; ///<size of qp buffers (needed to realloc them if needed)
int stride; ///<size of some buffers (needed to realloc them if needed)
int hChromaSubSample;
int vChromaSubSample;
int hChromaSubSample;
int vChromaSubSample;
PPMode ppMode;
PPMode ppMode;
} PPContext;
static inline void linecpy(void *dest, void *src, int lines, int stride)
{
if (stride > 0) {
memcpy(dest, src, lines*stride);
} else {
memcpy(dest+(lines-1)*stride, src+(lines-1)*stride, -lines*stride);
}
if (stride > 0) {
memcpy(dest, src, lines*stride);
} else {
memcpy(dest+(lines-1)*stride, src+(lines-1)*stride, -lines*stride);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -64,8 +64,8 @@ void *av_malloc(unsigned int size)
Indeed, we should align it:
on 4 for 386
on 16 for 486
on 32 for 586, PPro - k6-III
on 64 for K7 (maybe for P3 too).
on 32 for 586, PPro - k6-III
on 64 for K7 (maybe for P3 too).
Because L1 and L2 caches are aligned on those values.
But I don't want to code such logic here!
*/
@ -76,13 +76,13 @@ void *av_malloc(unsigned int size)
Why not larger? because i didnt see a difference in benchmarks ...
*/
/* benchmarks with p3
memalign(64)+1 3071,3051,3032
memalign(64)+2 3051,3032,3041
memalign(64)+4 2911,2896,2915
memalign(64)+8 2545,2554,2550
memalign(64)+16 2543,2572,2563
memalign(64)+32 2546,2545,2571
memalign(64)+64 2570,2533,2558
memalign(64)+1 3071,3051,3032
memalign(64)+2 3051,3032,3041
memalign(64)+4 2911,2896,2915
memalign(64)+8 2545,2554,2550
memalign(64)+16 2543,2572,2563
memalign(64)+32 2546,2545,2571
memalign(64)+64 2570,2533,2558
btw, malloc seems to do 8 byte alignment by default here
*/

View File

@ -54,26 +54,26 @@ typedef struct MJpegContext {
/* JPEG marker codes */
typedef enum {
/* start of frame */
SOF0 = 0xc0, /* baseline */
SOF1 = 0xc1, /* extended sequential, huffman */
SOF2 = 0xc2, /* progressive, huffman */
SOF3 = 0xc3, /* lossless, huffman */
SOF0 = 0xc0, /* baseline */
SOF1 = 0xc1, /* extended sequential, huffman */
SOF2 = 0xc2, /* progressive, huffman */
SOF3 = 0xc3, /* lossless, huffman */
SOF5 = 0xc5, /* differential sequential, huffman */
SOF6 = 0xc6, /* differential progressive, huffman */
SOF7 = 0xc7, /* differential lossless, huffman */
JPG = 0xc8, /* reserved for JPEG extension */
SOF9 = 0xc9, /* extended sequential, arithmetic */
SOF10 = 0xca, /* progressive, arithmetic */
SOF11 = 0xcb, /* lossless, arithmetic */
SOF5 = 0xc5, /* differential sequential, huffman */
SOF6 = 0xc6, /* differential progressive, huffman */
SOF7 = 0xc7, /* differential lossless, huffman */
JPG = 0xc8, /* reserved for JPEG extension */
SOF9 = 0xc9, /* extended sequential, arithmetic */
SOF10 = 0xca, /* progressive, arithmetic */
SOF11 = 0xcb, /* lossless, arithmetic */
SOF13 = 0xcd, /* differential sequential, arithmetic */
SOF14 = 0xce, /* differential progressive, arithmetic */
SOF15 = 0xcf, /* differential lossless, arithmetic */
SOF13 = 0xcd, /* differential sequential, arithmetic */
SOF14 = 0xce, /* differential progressive, arithmetic */
SOF15 = 0xcf, /* differential lossless, arithmetic */
DHT = 0xc4, /* define huffman tables */
DHT = 0xc4, /* define huffman tables */
DAC = 0xcc, /* define arithmetic-coding conditioning */
DAC = 0xcc, /* define arithmetic-coding conditioning */
/* restart with modulo 8 count "m" */
RST0 = 0xd0,
@ -85,14 +85,14 @@ typedef enum {
RST6 = 0xd6,
RST7 = 0xd7,
SOI = 0xd8, /* start of image */
EOI = 0xd9, /* end of image */
SOS = 0xda, /* start of scan */
DQT = 0xdb, /* define quantization tables */
DNL = 0xdc, /* define number of lines */
DRI = 0xdd, /* define restart interval */
DHP = 0xde, /* define hierarchical progression */
EXP = 0xdf, /* expand reference components */
SOI = 0xd8, /* start of image */
EOI = 0xd9, /* end of image */
SOS = 0xda, /* start of scan */
DQT = 0xdb, /* define quantization tables */
DNL = 0xdc, /* define number of lines */
DRI = 0xdd, /* define restart interval */
DHP = 0xde, /* define hierarchical progression */
EXP = 0xdf, /* expand reference components */
APP0 = 0xe0,
APP1 = 0xe1,
@ -118,17 +118,17 @@ typedef enum {
JPG4 = 0xf4,
JPG5 = 0xf5,
JPG6 = 0xf6,
SOF48 = 0xf7, ///< JPEG-LS
LSE = 0xf8, ///< JPEG-LS extension parameters
SOF48 = 0xf7, ///< JPEG-LS
LSE = 0xf8, ///< JPEG-LS extension parameters
JPG9 = 0xf9,
JPG10 = 0xfa,
JPG11 = 0xfb,
JPG12 = 0xfc,
JPG13 = 0xfd,
COM = 0xfe, /* comment */
COM = 0xfe, /* comment */
TEM = 0x01, /* temporary private use for arithmetic coding */
TEM = 0x01, /* temporary private use for arithmetic coding */
/* 0x02 -> 0xbf reserved */
} JPEG_MARKER;
@ -583,7 +583,7 @@ void mjpeg_picture_trailer(MpegEncContext *s)
}
static inline void mjpeg_encode_dc(MpegEncContext *s, int val,
uint8_t *huff_size, uint16_t *huff_code)
uint8_t *huff_size, uint16_t *huff_code)
{
int mant, nbits;
@ -935,10 +935,10 @@ static int mjpeg_decode_init(AVCodecContext *avctx)
if (avctx->flags & CODEC_FLAG_EXTERN_HUFF)
{
av_log(avctx, AV_LOG_INFO, "mjpeg: using external huffman table\n");
init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8);
mjpeg_decode_dht(s);
/* should check for error - but dunno */
av_log(avctx, AV_LOG_INFO, "mjpeg: using external huffman table\n");
init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8);
mjpeg_decode_dht(s);
/* should check for error - but dunno */
}
return 0;
@ -1017,10 +1017,10 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s)
while (len >= 65) {
/* only 8 bit precision handled */
if (get_bits(&s->gb, 4) != 0)
{
dprintf("dqt: 16bit precision\n");
{
dprintf("dqt: 16bit precision\n");
return -1;
}
}
index = get_bits(&s->gb, 4);
if (index >= 4)
return -1;
@ -1028,14 +1028,14 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s)
/* read quant table */
for(i=0;i<64;i++) {
j = s->scantable.permutated[i];
s->quant_matrixes[index][j] = get_bits(&s->gb, 8);
s->quant_matrixes[index][j] = get_bits(&s->gb, 8);
}
//XXX FIXME finetune, and perhaps add dc too
s->qscale[index]= FFMAX(
s->quant_matrixes[index][s->scantable.permutated[1]],
s->quant_matrixes[index][s->scantable.permutated[8]]) >> 1;
dprintf("qscale[%d]: %d\n", index, s->qscale[index]);
dprintf("qscale[%d]: %d\n", index, s->qscale[index]);
len -= 65;
}
@ -1132,7 +1132,7 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s)
if (s->quant_index[i] >= 4)
return -1;
dprintf("component %d %d:%d id: %d quant:%d\n", i, s->h_count[i],
s->v_count[i], s->component_id[i], s->quant_index[i]);
s->v_count[i], s->component_id[i], s->quant_index[i]);
}
if(s->v_max==1 && s->h_max==1 && s->lossless==1) s->rgb=1;
@ -1151,7 +1151,7 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s)
s->org_height != 0 &&
s->height < ((s->org_height * 3) / 4)) {
s->interlaced = 1;
// s->bottom_field = (s->interlace_polarity) ? 1 : 0;
// s->bottom_field = (s->interlace_polarity) ? 1 : 0;
s->bottom_field = 0;
s->avctx->height *= 2;
}
@ -1202,7 +1202,7 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s)
if (len != (8+(3*nb_components)))
{
dprintf("decode_sof0: error, len(%d) mismatch\n", len);
dprintf("decode_sof0: error, len(%d) mismatch\n", len);
}
return 0;
@ -1214,7 +1214,7 @@ static inline int mjpeg_decode_dc(MJpegDecodeContext *s, int dc_index)
code = get_vlc2(&s->gb, s->vlcs[0][dc_index].table, 9, 2);
if (code < 0)
{
dprintf("mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index,
dprintf("mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index,
&s->vlcs[0][dc_index]);
return 0xffff;
}
@ -1247,7 +1247,7 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block,
ac_vlc = &s->vlcs[1][ac_index];
i = 1;
for(;;) {
code = get_vlc2(&s->gb, s->vlcs[1][ac_index].table, 9, 2);
code = get_vlc2(&s->gb, s->vlcs[1][ac_index].table, 9, 2);
if (code < 0) {
dprintf("error ac\n");
@ -1452,7 +1452,7 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s){
dprintf("error y=%d x=%d\n", mb_y, mb_x);
return -1;
}
// dprintf("mb: %d %d processed\n", mb_y, mb_x);
// dprintf("mb: %d %d processed\n", mb_y, mb_x);
ptr = s->picture.data[c] +
(((s->linesize[c] * (v * mb_y + y) * 8) +
(h * mb_x + x) * 8) >> s->avctx->lowres);
@ -1491,29 +1491,29 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s)
nb_components = get_bits(&s->gb, 8);
if (len != 6+2*nb_components)
{
dprintf("decode_sos: invalid len (%d)\n", len);
return -1;
dprintf("decode_sos: invalid len (%d)\n", len);
return -1;
}
/* XXX: only interleaved scan accepted */
if (nb_components != s->nb_components)
{
dprintf("decode_sos: components(%d) mismatch\n", nb_components);
dprintf("decode_sos: components(%d) mismatch\n", nb_components);
return -1;
}
vmax = 0;
hmax = 0;
for(i=0;i<nb_components;i++) {
id = get_bits(&s->gb, 8) - 1;
dprintf("component: %d\n", id);
dprintf("component: %d\n", id);
/* find component index */
for(index=0;index<s->nb_components;index++)
if (id == s->component_id[index])
break;
if (index == s->nb_components)
{
dprintf("decode_sos: index(%d) out of components\n", index);
{
dprintf("decode_sos: index(%d) out of components\n", index);
return -1;
}
}
s->comp_index[i] = index;
@ -1524,26 +1524,26 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s)
s->dc_index[i] = get_bits(&s->gb, 4);
s->ac_index[i] = get_bits(&s->gb, 4);
if (s->dc_index[i] < 0 || s->ac_index[i] < 0 ||
s->dc_index[i] >= 4 || s->ac_index[i] >= 4)
goto out_of_range;
if (s->dc_index[i] < 0 || s->ac_index[i] < 0 ||
s->dc_index[i] >= 4 || s->ac_index[i] >= 4)
goto out_of_range;
#if 0 //buggy
switch(s->start_code)
{
case SOF0:
if (dc_index[i] > 1 || ac_index[i] > 1)
goto out_of_range;
break;
case SOF1:
case SOF2:
if (dc_index[i] > 3 || ac_index[i] > 3)
goto out_of_range;
break;
case SOF3:
if (dc_index[i] > 3 || ac_index[i] != 0)
goto out_of_range;
break;
}
switch(s->start_code)
{
case SOF0:
if (dc_index[i] > 1 || ac_index[i] > 1)
goto out_of_range;
break;
case SOF1:
case SOF2:
if (dc_index[i] > 3 || ac_index[i] > 3)
goto out_of_range;
break;
case SOF3:
if (dc_index[i] > 3 || ac_index[i] != 0)
goto out_of_range;
break;
}
#endif
}
@ -1605,7 +1605,7 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s)
static int mjpeg_decode_dri(MJpegDecodeContext *s)
{
if (get_bits(&s->gb, 16) != 4)
return -1;
return -1;
s->restart_interval = get_bits(&s->gb, 16);
s->restart_count = 0;
dprintf("restart interval: %d\n", s->restart_interval);
@ -1619,7 +1619,7 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
len = get_bits(&s->gb, 16);
if (len < 5)
return -1;
return -1;
if(8*len + get_bits_count(&s->gb) > s->gb.size_in_bits)
return -1;
@ -1636,35 +1636,35 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
informations, but it's always present in AVID creates files */
if (id == ff_get_fourcc("AVI1"))
{
/* structure:
4bytes AVI1
1bytes polarity
1bytes always zero
4bytes field_size
4bytes field_size_less_padding
*/
s->buggy_avid = 1;
// if (s->first_picture)
// printf("mjpeg: workarounding buggy AVID\n");
s->interlace_polarity = get_bits(&s->gb, 8);
/* structure:
4bytes AVI1
1bytes polarity
1bytes always zero
4bytes field_size
4bytes field_size_less_padding
*/
s->buggy_avid = 1;
// if (s->first_picture)
// printf("mjpeg: workarounding buggy AVID\n");
s->interlace_polarity = get_bits(&s->gb, 8);
#if 0
skip_bits(&s->gb, 8);
skip_bits(&s->gb, 32);
skip_bits(&s->gb, 32);
len -= 10;
skip_bits(&s->gb, 8);
skip_bits(&s->gb, 32);
skip_bits(&s->gb, 32);
len -= 10;
#endif
// if (s->interlace_polarity)
// printf("mjpeg: interlace polarity: %d\n", s->interlace_polarity);
goto out;
// if (s->interlace_polarity)
// printf("mjpeg: interlace polarity: %d\n", s->interlace_polarity);
goto out;
}
// len -= 2;
if (id == ff_get_fourcc("JFIF"))
{
int t_w, t_h, v1, v2;
skip_bits(&s->gb, 8); /* the trailing zero-byte */
v1= get_bits(&s->gb, 8);
int t_w, t_h, v1, v2;
skip_bits(&s->gb, 8); /* the trailing zero-byte */
v1= get_bits(&s->gb, 8);
v2= get_bits(&s->gb, 8);
skip_bits(&s->gb, 8);
@ -1678,37 +1678,37 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
s->avctx->sample_aspect_ratio.den
);
t_w = get_bits(&s->gb, 8);
t_h = get_bits(&s->gb, 8);
if (t_w && t_h)
{
/* skip thumbnail */
if (len-10-(t_w*t_h*3) > 0)
len -= t_w*t_h*3;
}
len -= 10;
goto out;
t_w = get_bits(&s->gb, 8);
t_h = get_bits(&s->gb, 8);
if (t_w && t_h)
{
/* skip thumbnail */
if (len-10-(t_w*t_h*3) > 0)
len -= t_w*t_h*3;
}
len -= 10;
goto out;
}
if (id == ff_get_fourcc("Adob") && (get_bits(&s->gb, 8) == 'e'))
{
if (s->avctx->debug & FF_DEBUG_PICT_INFO)
av_log(s->avctx, AV_LOG_INFO, "mjpeg: Adobe header found\n");
skip_bits(&s->gb, 16); /* version */
skip_bits(&s->gb, 16); /* flags0 */
skip_bits(&s->gb, 16); /* flags1 */
skip_bits(&s->gb, 8); /* transform */
len -= 7;
goto out;
skip_bits(&s->gb, 16); /* version */
skip_bits(&s->gb, 16); /* flags0 */
skip_bits(&s->gb, 16); /* flags1 */
skip_bits(&s->gb, 8); /* transform */
len -= 7;
goto out;
}
if (id == ff_get_fourcc("LJIF")){
if (s->avctx->debug & FF_DEBUG_PICT_INFO)
av_log(s->avctx, AV_LOG_INFO, "Pegasus lossless jpeg header found\n");
skip_bits(&s->gb, 16); /* version ? */
skip_bits(&s->gb, 16); /* unknwon always 0? */
skip_bits(&s->gb, 16); /* unknwon always 0? */
skip_bits(&s->gb, 16); /* unknwon always 0? */
skip_bits(&s->gb, 16); /* version ? */
skip_bits(&s->gb, 16); /* unknwon always 0? */
skip_bits(&s->gb, 16); /* unknwon always 0? */
skip_bits(&s->gb, 16); /* unknwon always 0? */
switch( get_bits(&s->gb, 8)){
case 1:
s->rgb= 1;
@ -1728,32 +1728,32 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
/* Apple MJPEG-A */
if ((s->start_code == APP1) && (len > (0x28 - 8)))
{
id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16);
id = be2me_32(id);
len -= 4;
if (id == ff_get_fourcc("mjpg")) /* Apple MJPEG-A */
{
id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16);
id = be2me_32(id);
len -= 4;
if (id == ff_get_fourcc("mjpg")) /* Apple MJPEG-A */
{
#if 0
skip_bits(&s->gb, 32); /* field size */
skip_bits(&s->gb, 32); /* pad field size */
skip_bits(&s->gb, 32); /* next off */
skip_bits(&s->gb, 32); /* quant off */
skip_bits(&s->gb, 32); /* huff off */
skip_bits(&s->gb, 32); /* image off */
skip_bits(&s->gb, 32); /* scan off */
skip_bits(&s->gb, 32); /* data off */
skip_bits(&s->gb, 32); /* field size */
skip_bits(&s->gb, 32); /* pad field size */
skip_bits(&s->gb, 32); /* next off */
skip_bits(&s->gb, 32); /* quant off */
skip_bits(&s->gb, 32); /* huff off */
skip_bits(&s->gb, 32); /* image off */
skip_bits(&s->gb, 32); /* scan off */
skip_bits(&s->gb, 32); /* data off */
#endif
if (s->avctx->debug & FF_DEBUG_PICT_INFO)
av_log(s->avctx, AV_LOG_INFO, "mjpeg: Apple MJPEG-A header found\n");
}
av_log(s->avctx, AV_LOG_INFO, "mjpeg: Apple MJPEG-A header found\n");
}
}
out:
/* slow but needed for extreme adobe jpegs */
if (len < 0)
av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error, decode_app parser read over the end\n");
av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error, decode_app parser read over the end\n");
while(--len > 0)
skip_bits(&s->gb, 8);
skip_bits(&s->gb, 8);
return 0;
}
@ -1762,32 +1762,32 @@ static int mjpeg_decode_com(MJpegDecodeContext *s)
{
int len = get_bits(&s->gb, 16);
if (len >= 2 && 8*len - 16 + get_bits_count(&s->gb) <= s->gb.size_in_bits) {
uint8_t *cbuf = av_malloc(len - 1);
if (cbuf) {
int i;
for (i = 0; i < len - 2; i++)
cbuf[i] = get_bits(&s->gb, 8);
if (i > 0 && cbuf[i-1] == '\n')
cbuf[i-1] = 0;
else
cbuf[i] = 0;
uint8_t *cbuf = av_malloc(len - 1);
if (cbuf) {
int i;
for (i = 0; i < len - 2; i++)
cbuf[i] = get_bits(&s->gb, 8);
if (i > 0 && cbuf[i-1] == '\n')
cbuf[i-1] = 0;
else
cbuf[i] = 0;
if(s->avctx->debug & FF_DEBUG_PICT_INFO)
av_log(s->avctx, AV_LOG_INFO, "mjpeg comment: '%s'\n", cbuf);
/* buggy avid, it puts EOI only at every 10th frame */
if (!strcmp(cbuf, "AVID"))
{
s->buggy_avid = 1;
// if (s->first_picture)
// printf("mjpeg: workarounding buggy AVID\n");
}
/* buggy avid, it puts EOI only at every 10th frame */
if (!strcmp(cbuf, "AVID"))
{
s->buggy_avid = 1;
// if (s->first_picture)
// printf("mjpeg: workarounding buggy AVID\n");
}
else if(!strcmp(cbuf, "CS=ITU601")){
s->cs_itu601= 1;
}
av_free(cbuf);
}
av_free(cbuf);
}
}
return 0;
@ -1830,13 +1830,13 @@ static int find_marker(uint8_t **pbuf_ptr, uint8_t *buf_end)
buf_ptr = *pbuf_ptr;
while (buf_ptr < buf_end) {
v = *buf_ptr++;
v2 = *buf_ptr;
v2 = *buf_ptr;
if ((v == 0xff) && (v2 >= 0xc0) && (v2 <= 0xfe) && buf_ptr < buf_end) {
val = *buf_ptr++;
goto found;
val = *buf_ptr++;
goto found;
}
#ifdef DEBUG
skipped++;
skipped++;
#endif
}
val = -1;
@ -1862,74 +1862,74 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
while (buf_ptr < buf_end) {
/* find start next marker */
start_code = find_marker(&buf_ptr, buf_end);
{
/* EOF */
{
/* EOF */
if (start_code < 0) {
goto the_end;
goto the_end;
} else {
dprintf("marker=%x avail_size_in_buf=%d\n", start_code, buf_end - buf_ptr);
if ((buf_end - buf_ptr) > s->buffer_size)
{
av_free(s->buffer);
s->buffer_size = buf_end-buf_ptr;
if ((buf_end - buf_ptr) > s->buffer_size)
{
av_free(s->buffer);
s->buffer_size = buf_end-buf_ptr;
s->buffer = av_malloc(s->buffer_size + FF_INPUT_BUFFER_PADDING_SIZE);
dprintf("buffer too small, expanding to %d bytes\n",
s->buffer_size);
}
dprintf("buffer too small, expanding to %d bytes\n",
s->buffer_size);
}
/* unescape buffer of SOS */
if (start_code == SOS)
{
uint8_t *src = buf_ptr;
uint8_t *dst = s->buffer;
/* unescape buffer of SOS */
if (start_code == SOS)
{
uint8_t *src = buf_ptr;
uint8_t *dst = s->buffer;
while (src<buf_end)
{
uint8_t x = *(src++);
while (src<buf_end)
{
uint8_t x = *(src++);
*(dst++) = x;
if (x == 0xff)
{
*(dst++) = x;
if (x == 0xff)
{
while(src<buf_end && x == 0xff)
x = *(src++);
if (x >= 0xd0 && x <= 0xd7)
*(dst++) = x;
else if (x)
break;
}
}
init_get_bits(&s->gb, s->buffer, (dst - s->buffer)*8);
if (x >= 0xd0 && x <= 0xd7)
*(dst++) = x;
else if (x)
break;
}
}
init_get_bits(&s->gb, s->buffer, (dst - s->buffer)*8);
dprintf("escaping removed %d bytes\n",
(buf_end - buf_ptr) - (dst - s->buffer));
}
else
init_get_bits(&s->gb, buf_ptr, (buf_end - buf_ptr)*8);
dprintf("escaping removed %d bytes\n",
(buf_end - buf_ptr) - (dst - s->buffer));
}
else
init_get_bits(&s->gb, buf_ptr, (buf_end - buf_ptr)*8);
s->start_code = start_code;
s->start_code = start_code;
if(s->avctx->debug & FF_DEBUG_STARTCODE){
av_log(s->avctx, AV_LOG_DEBUG, "startcode: %X\n", start_code);
}
/* process markers */
if (start_code >= 0xd0 && start_code <= 0xd7) {
dprintf("restart marker: %d\n", start_code&0x0f);
/* APP fields */
} else if (start_code >= APP0 && start_code <= APP15) {
mjpeg_decode_app(s);
/* Comment */
} else if (start_code == COM){
mjpeg_decode_com(s);
}
/* process markers */
if (start_code >= 0xd0 && start_code <= 0xd7) {
dprintf("restart marker: %d\n", start_code&0x0f);
/* APP fields */
} else if (start_code >= APP0 && start_code <= APP15) {
mjpeg_decode_app(s);
/* Comment */
} else if (start_code == COM){
mjpeg_decode_com(s);
}
switch(start_code) {
case SOI:
s->restart_interval = 0;
s->restart_interval = 0;
reset_ls_coding_parameters(s, 1);
s->restart_count = 0;
s->restart_count = 0;
/* nothing to do on SOI */
break;
case DQT:
@ -1944,12 +1944,12 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
case SOF0:
s->lossless=0;
if (mjpeg_decode_sof(s) < 0)
return -1;
return -1;
break;
case SOF3:
s->lossless=1;
if (mjpeg_decode_sof(s) < 0)
return -1;
return -1;
break;
case SOF48:
s->lossless=1;
@ -1961,11 +1961,11 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
if (decode_lse(s) < 0)
return -1;
break;
case EOI:
if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
case EOI:
if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
break;
eoi_parser:
{
{
if (s->interlaced) {
s->bottom_field ^= 1;
/* if not bottom field, do not output image yet */
@ -1987,41 +1987,41 @@ eoi_parser:
goto the_end;
}
break;
break;
case SOS:
mjpeg_decode_sos(s);
/* buggy avid puts EOI every 10-20th frame */
/* if restart period is over process EOI */
if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
goto eoi_parser;
/* buggy avid puts EOI every 10-20th frame */
/* if restart period is over process EOI */
if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
goto eoi_parser;
break;
case DRI:
mjpeg_decode_dri(s);
break;
case SOF1:
case SOF2:
case SOF5:
case SOF6:
case SOF7:
case SOF9:
case SOF10:
case SOF11:
case SOF13:
case SOF14:
case SOF15:
case JPG:
av_log(s->avctx, AV_LOG_ERROR, "mjpeg: unsupported coding type (%x)\n", start_code);
break;
// default:
// printf("mjpeg: unsupported marker (%x)\n", start_code);
// break;
case DRI:
mjpeg_decode_dri(s);
break;
case SOF1:
case SOF2:
case SOF5:
case SOF6:
case SOF7:
case SOF9:
case SOF10:
case SOF11:
case SOF13:
case SOF14:
case SOF15:
case JPG:
av_log(s->avctx, AV_LOG_ERROR, "mjpeg: unsupported coding type (%x)\n", start_code);
break;
// default:
// printf("mjpeg: unsupported marker (%x)\n", start_code);
// break;
}
not_the_end:
/* eof process start code */
buf_ptr += (get_bits_count(&s->gb)+7)/8;
dprintf("marker parser used %d bytes (%d bits)\n",
(get_bits_count(&s->gb)+7)/8, get_bits_count(&s->gb));
/* eof process start code */
buf_ptr += (get_bits_count(&s->gb)+7)/8;
dprintf("marker parser used %d bytes (%d bits)\n",
(get_bits_count(&s->gb)+7)/8, get_bits_count(&s->gb));
}
}
}
@ -2057,8 +2057,8 @@ read_header:
if (get_bits_long(&hgb, 32) != be2me_32(ff_get_fourcc("mjpg")))
{
dprintf("not mjpeg-b (bad fourcc)\n");
return 0;
dprintf("not mjpeg-b (bad fourcc)\n");
return 0;
}
field_size = get_bits_long(&hgb, 32); /* field size */
@ -2067,34 +2067,34 @@ read_header:
second_field_offs = get_bits_long(&hgb, 32);
dprintf("second field offs: 0x%x\n", second_field_offs);
if (second_field_offs)
s->interlaced = 1;
s->interlaced = 1;
dqt_offs = get_bits_long(&hgb, 32);
dprintf("dqt offs: 0x%x\n", dqt_offs);
if (dqt_offs)
{
init_get_bits(&s->gb, buf+dqt_offs, (buf_end - (buf+dqt_offs))*8);
s->start_code = DQT;
mjpeg_decode_dqt(s);
init_get_bits(&s->gb, buf+dqt_offs, (buf_end - (buf+dqt_offs))*8);
s->start_code = DQT;
mjpeg_decode_dqt(s);
}
dht_offs = get_bits_long(&hgb, 32);
dprintf("dht offs: 0x%x\n", dht_offs);
if (dht_offs)
{
init_get_bits(&s->gb, buf+dht_offs, (buf_end - (buf+dht_offs))*8);
s->start_code = DHT;
mjpeg_decode_dht(s);
init_get_bits(&s->gb, buf+dht_offs, (buf_end - (buf+dht_offs))*8);
s->start_code = DHT;
mjpeg_decode_dht(s);
}
sof_offs = get_bits_long(&hgb, 32);
dprintf("sof offs: 0x%x\n", sof_offs);
if (sof_offs)
{
init_get_bits(&s->gb, buf+sof_offs, (buf_end - (buf+sof_offs))*8);
s->start_code = SOF0;
if (mjpeg_decode_sof(s) < 0)
return -1;
init_get_bits(&s->gb, buf+sof_offs, (buf_end - (buf+sof_offs))*8);
s->start_code = SOF0;
if (mjpeg_decode_sof(s) < 0)
return -1;
}
sos_offs = get_bits_long(&hgb, 32);
@ -2103,22 +2103,22 @@ read_header:
dprintf("sod offs: 0x%x\n", sod_offs);
if (sos_offs)
{
// init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8);
init_get_bits(&s->gb, buf+sos_offs, field_size*8);
s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16));
s->start_code = SOS;
mjpeg_decode_sos(s);
// init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8);
init_get_bits(&s->gb, buf+sos_offs, field_size*8);
s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16));
s->start_code = SOS;
mjpeg_decode_sos(s);
}
if (s->interlaced) {
s->bottom_field ^= 1;
/* if not bottom field, do not output image yet */
if (s->bottom_field && second_field_offs)
{
buf_ptr = buf + second_field_offs;
second_field_offs = 0;
goto read_header;
}
{
buf_ptr = buf + second_field_offs;
second_field_offs = 0;
goto read_header;
}
}
//XXX FIXME factorize, this looks very similar to the EOI code
@ -2153,7 +2153,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
int i = 0, j = 0;
if (!avctx->width || !avctx->height)
return -1;
return -1;
buf_ptr = buf;
buf_end = buf + buf_size;
@ -2161,7 +2161,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
#if 1
recoded = av_mallocz(buf_size + 1024);
if (!recoded)
return -1;
return -1;
/* SOI */
recoded[j++] = 0xFF;
@ -2187,9 +2187,9 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
for (i = 14; i < buf_size && j < buf_size+1024-2; i++)
{
recoded[j++] = buf[i];
if (buf[i] == 0xff)
recoded[j++] = 0;
recoded[j++] = buf[i];
if (buf[i] == 0xff)
recoded[j++] = 0;
}
/* EOI */
@ -2229,33 +2229,33 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
if (avctx->get_buffer(avctx, &s->picture) < 0)
{
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
return -1;
return -1;
}
s->picture.pict_type = I_TYPE;
s->picture.key_frame = 1;
for (i = 0; i < 3; i++)
s->linesize[i] = s->picture.linesize[i] << s->interlaced;
s->linesize[i] = s->picture.linesize[i] << s->interlaced;
/* DQT */
for (i = 0; i < 64; i++)
{
j = s->scantable.permutated[i];
s->quant_matrixes[0][j] = sp5x_quant_table[(qscale * 2) + i];
j = s->scantable.permutated[i];
s->quant_matrixes[0][j] = sp5x_quant_table[(qscale * 2) + i];
}
s->qscale[0] = FFMAX(
s->quant_matrixes[0][s->scantable.permutated[1]],
s->quant_matrixes[0][s->scantable.permutated[8]]) >> 1;
s->quant_matrixes[0][s->scantable.permutated[1]],
s->quant_matrixes[0][s->scantable.permutated[8]]) >> 1;
for (i = 0; i < 64; i++)
{
j = s->scantable.permutated[i];
s->quant_matrixes[1][j] = sp5x_quant_table[(qscale * 2) + 1 + i];
j = s->scantable.permutated[i];
s->quant_matrixes[1][j] = sp5x_quant_table[(qscale * 2) + 1 + i];
}
s->qscale[1] = FFMAX(
s->quant_matrixes[1][s->scantable.permutated[1]],
s->quant_matrixes[1][s->scantable.permutated[8]]) >> 1;
s->quant_matrixes[1][s->scantable.permutated[1]],
s->quant_matrixes[1][s->scantable.permutated[8]]) >> 1;
/* DHT */
@ -2282,7 +2282,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
s->ac_index[2] = 1;
for (i = 0; i < 3; i++)
s->last_dc[i] = 1024;
s->last_dc[i] = 1024;
s->mb_width = (s->width * s->h_max * 8 -1) / (s->h_max * 8);
s->mb_height = (s->height * s->v_max * 8 -1) / (s->v_max * 8);

View File

@ -61,7 +61,7 @@ static void add_pixels_clamped_mlib(const DCTELEM *block, uint8_t *pixels, int l
/* put block, width 16 pixel, height 8/16 */
static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
int stride, int height)
int stride, int height)
{
switch (height) {
case 8:
@ -78,7 +78,7 @@ static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
}
static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
int stride, int height)
int stride, int height)
{
switch (height) {
case 8:
@ -95,7 +95,7 @@ static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
}
static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
int stride, int height)
int stride, int height)
{
switch (height) {
case 8:
@ -112,7 +112,7 @@ static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
}
static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
int stride, int height)
int stride, int height)
{
switch (height) {
case 8:
@ -131,7 +131,7 @@ static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
/* put block, width 8 pixel, height 4/8/16 */
static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
int stride, int height)
int stride, int height)
{
switch (height) {
case 4:
@ -152,7 +152,7 @@ static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
}
static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
int stride, int height)
int stride, int height)
{
switch (height) {
case 4:
@ -173,7 +173,7 @@ static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
}
static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
int stride, int height)
int stride, int height)
{
switch (height) {
case 4:
@ -194,7 +194,7 @@ static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
}
static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
int stride, int height)
int stride, int height)
{
switch (height) {
case 4:
@ -217,7 +217,7 @@ static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
/* average block, width 16 pixel, height 8/16 */
static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
int stride, int height)
int stride, int height)
{
switch (height) {
case 8:
@ -234,7 +234,7 @@ static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
}
static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
int stride, int height)
int stride, int height)
{
switch (height) {
case 8:
@ -251,7 +251,7 @@ static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
}
static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
int stride, int height)
int stride, int height)
{
switch (height) {
case 8:
@ -268,7 +268,7 @@ static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
}
static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
int stride, int height)
int stride, int height)
{
switch (height) {
case 8:
@ -287,7 +287,7 @@ static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
/* average block, width 8 pixel, height 4/8/16 */
static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
int stride, int height)
int stride, int height)
{
switch (height) {
case 4:
@ -308,7 +308,7 @@ static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
}
static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
int stride, int height)
int stride, int height)
{
switch (height) {
case 4:
@ -329,7 +329,7 @@ static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
}
static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
int stride, int height)
int stride, int height)
{
switch (height) {
case 4:
@ -350,7 +350,7 @@ static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
}
static void avg_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
int stride, int height)
int stride, int height)
{
switch (height) {
case 4:
@ -450,7 +450,7 @@ void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
void MPV_common_init_mlib(MpegEncContext *s)
{
if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){
s->dsp.fdct = ff_fdct_mlib;
s->dsp.fdct = ff_fdct_mlib;
}
if(s->avctx->idct_algo==FF_IDCT_AUTO || s->avctx->idct_algo==FF_IDCT_MLIB){

View File

@ -45,7 +45,7 @@
#define P_MV1 P[9]
static inline int sad_hpel_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int *mx_ptr, int *my_ptr, int dmin,
int src_index, int ref_index,
int size, int h);
@ -293,25 +293,25 @@ static int pix_dev(uint8_t * pix, int line_size, int mean)
s = 0;
for (i = 0; i < 16; i++) {
for (j = 0; j < 16; j += 8) {
s += ABS(pix[0]-mean);
s += ABS(pix[1]-mean);
s += ABS(pix[2]-mean);
s += ABS(pix[3]-mean);
s += ABS(pix[4]-mean);
s += ABS(pix[5]-mean);
s += ABS(pix[6]-mean);
s += ABS(pix[7]-mean);
pix += 8;
}
pix += line_size - 16;
for (j = 0; j < 16; j += 8) {
s += ABS(pix[0]-mean);
s += ABS(pix[1]-mean);
s += ABS(pix[2]-mean);
s += ABS(pix[3]-mean);
s += ABS(pix[4]-mean);
s += ABS(pix[5]-mean);
s += ABS(pix[6]-mean);
s += ABS(pix[7]-mean);
pix += 8;
}
pix += line_size - 16;
}
return s;
}
#endif
static inline void no_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr)
int *mx_ptr, int *my_ptr)
{
*mx_ptr = 16 * s->mb_x;
*my_ptr = 16 * s->mb_y;
@ -328,35 +328,35 @@ static int full_motion_search(MpegEncContext * s,
xx = 16 * s->mb_x;
yy = 16 * s->mb_y;
x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */
x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */
if (x1 < xmin)
x1 = xmin;
x1 = xmin;
x2 = xx + range - 1;
if (x2 > xmax)
x2 = xmax;
x2 = xmax;
y1 = yy - range + 1;
if (y1 < ymin)
y1 = ymin;
y1 = ymin;
y2 = yy + range - 1;
if (y2 > ymax)
y2 = ymax;
y2 = ymax;
pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
dmin = 0x7fffffff;
mx = 0;
my = 0;
for (y = y1; y <= y2; y++) {
for (x = x1; x <= x2; x++) {
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
s->linesize, 16);
if (d < dmin ||
(d == dmin &&
(abs(x - xx) + abs(y - yy)) <
(abs(mx - xx) + abs(my - yy)))) {
dmin = d;
mx = x;
my = y;
}
}
for (x = x1; x <= x2; x++) {
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
s->linesize, 16);
if (d < dmin ||
(d == dmin &&
(abs(x - xx) + abs(y - yy)) <
(abs(mx - xx) + abs(my - yy)))) {
dmin = d;
mx = x;
my = y;
}
}
}
*mx_ptr = mx;
@ -364,8 +364,8 @@ static int full_motion_search(MpegEncContext * s,
#if 0
if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
*my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr);
*my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr);
}
#endif
return dmin;
@ -386,22 +386,22 @@ static int log_motion_search(MpegEncContext * s,
/* Left limit */
x1 = xx - range;
if (x1 < xmin)
x1 = xmin;
x1 = xmin;
/* Right limit */
x2 = xx + range;
if (x2 > xmax)
x2 = xmax;
x2 = xmax;
/* Upper limit */
y1 = yy - range;
if (y1 < ymin)
y1 = ymin;
y1 = ymin;
/* Lower limit */
y2 = yy + range;
if (y2 > ymax)
y2 = ymax;
y2 = ymax;
pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
dmin = 0x7fffffff;
@ -409,34 +409,34 @@ static int log_motion_search(MpegEncContext * s,
my = 0;
do {
for (y = y1; y <= y2; y += range) {
for (x = x1; x <= x2; x += range) {
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dmin = d;
mx = x;
my = y;
}
}
}
for (y = y1; y <= y2; y += range) {
for (x = x1; x <= x2; x += range) {
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dmin = d;
mx = x;
my = y;
}
}
}
range = range >> 1;
range = range >> 1;
x1 = mx - range;
if (x1 < xmin)
x1 = xmin;
x1 = mx - range;
if (x1 < xmin)
x1 = xmin;
x2 = mx + range;
if (x2 > xmax)
x2 = xmax;
x2 = mx + range;
if (x2 > xmax)
x2 = xmax;
y1 = my - range;
if (y1 < ymin)
y1 = ymin;
y1 = my - range;
if (y1 < ymin)
y1 = ymin;
y2 = my + range;
if (y2 > ymax)
y2 = ymax;
y2 = my + range;
if (y2 > ymax)
y2 = ymax;
} while (range >= 1);
@ -462,22 +462,22 @@ static int phods_motion_search(MpegEncContext * s,
/* Left limit */
x1 = xx - range;
if (x1 < xmin)
x1 = xmin;
x1 = xmin;
/* Right limit */
x2 = xx + range;
if (x2 > xmax)
x2 = xmax;
x2 = xmax;
/* Upper limit */
y1 = yy - range;
if (y1 < ymin)
y1 = ymin;
y1 = ymin;
/* Lower limit */
y2 = yy + range;
if (y2 > ymax)
y2 = ymax;
y2 = ymax;
pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
mx = 0;
@ -489,43 +489,43 @@ static int phods_motion_search(MpegEncContext * s,
dminx = 0x7fffffff;
dminy = 0x7fffffff;
lastx = x;
for (x = x1; x <= x2; x += range) {
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dminx = d;
mx = x;
}
}
lastx = x;
for (x = x1; x <= x2; x += range) {
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dminx = d;
mx = x;
}
}
x = lastx;
for (y = y1; y <= y2; y += range) {
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dminy = d;
my = y;
}
}
x = lastx;
for (y = y1; y <= y2; y += range) {
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dminy = d;
my = y;
}
}
range = range >> 1;
range = range >> 1;
x = mx;
y = my;
x1 = mx - range;
if (x1 < xmin)
x1 = xmin;
x = mx;
y = my;
x1 = mx - range;
if (x1 < xmin)
x1 = xmin;
x2 = mx + range;
if (x2 > xmax)
x2 = xmax;
x2 = mx + range;
if (x2 > xmax)
x2 = xmax;
y1 = my - range;
if (y1 < ymin)
y1 = ymin;
y1 = my - range;
if (y1 < ymin)
y1 = ymin;
y2 = my + range;
if (y2 > ymax)
y2 = ymax;
y2 = my + range;
if (y2 > ymax)
y2 = ymax;
} while (range >= 1);
@ -550,7 +550,7 @@ static int phods_motion_search(MpegEncContext * s,
}
static inline int sad_hpel_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int *mx_ptr, int *my_ptr, int dmin,
int src_index, int ref_index,
int size, int h)
{
@ -1190,24 +1190,24 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
switch(s->me_method) {
case ME_ZERO:
default:
no_motion_search(s, &mx, &my);
no_motion_search(s, &mx, &my);
mx-= mb_x*16;
my-= mb_y*16;
dmin = 0;
break;
#if 0
case ME_FULL:
dmin = full_motion_search(s, &mx, &my, range, ref_picture);
dmin = full_motion_search(s, &mx, &my, range, ref_picture);
mx-= mb_x*16;
my-= mb_y*16;
break;
case ME_LOG:
dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
mx-= mb_x*16;
my-= mb_y*16;
break;
case ME_PHODS:
dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
mx-= mb_x*16;
my-= mb_y*16;
break;
@ -1264,7 +1264,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
#if 0
printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
#endif
if(mb_type){
if (vard <= 64 || vard < varc)
@ -1479,24 +1479,24 @@ static int ff_estimate_motion_b(MpegEncContext * s,
switch(s->me_method) {
case ME_ZERO:
default:
no_motion_search(s, &mx, &my);
no_motion_search(s, &mx, &my);
dmin = 0;
mx-= mb_x*16;
my-= mb_y*16;
break;
#if 0
case ME_FULL:
dmin = full_motion_search(s, &mx, &my, range, ref_picture);
dmin = full_motion_search(s, &mx, &my, range, ref_picture);
mx-= mb_x*16;
my-= mb_y*16;
break;
case ME_LOG:
dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
mx-= mb_x*16;
my-= mb_y*16;
break;
case ME_PHODS:
dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
mx-= mb_x*16;
my-= mb_y*16;
break;

View File

@ -45,7 +45,7 @@
#if 0
static int hpel_motion_search)(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int *mx_ptr, int *my_ptr, int dmin,
uint8_t *ref_data[3],
int size)
{
@ -113,7 +113,7 @@ static int hpel_motion_search)(MpegEncContext * s,
#else
static int hpel_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int *mx_ptr, int *my_ptr, int dmin,
int src_index, int ref_index,
int size, int h)
{
@ -271,7 +271,7 @@ int inline ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
}
static int qpel_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int *mx_ptr, int *my_ptr, int dmin,
int src_index, int ref_index,
int size, int h)
{
@ -1005,7 +1005,7 @@ static int epzs_motion_search4(MpegEncContext * s,
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
/* first line */
if (s->first_slice_line) {
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
@ -1067,7 +1067,7 @@ static int epzs_motion_search2(MpegEncContext * s,
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
/* first line */
if (s->first_slice_line) {
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)

View File

@ -28,51 +28,51 @@
#define BUFFER_SIZE (2*MPA_FRAME_SIZE)
typedef struct Mp3AudioContext {
lame_global_flags *gfp;
int stereo;
lame_global_flags *gfp;
int stereo;
uint8_t buffer[BUFFER_SIZE];
int buffer_index;
} Mp3AudioContext;
static int MP3lame_encode_init(AVCodecContext *avctx)
{
Mp3AudioContext *s = avctx->priv_data;
Mp3AudioContext *s = avctx->priv_data;
if (avctx->channels > 2)
return -1;
if (avctx->channels > 2)
return -1;
s->stereo = avctx->channels > 1 ? 1 : 0;
s->stereo = avctx->channels > 1 ? 1 : 0;
if ((s->gfp = lame_init()) == NULL)
goto err;
lame_set_in_samplerate(s->gfp, avctx->sample_rate);
lame_set_out_samplerate(s->gfp, avctx->sample_rate);
lame_set_num_channels(s->gfp, avctx->channels);
/* lame 3.91 dies on quality != 5 */
lame_set_quality(s->gfp, 5);
/* lame 3.91 doesn't work in mono */
lame_set_mode(s->gfp, JOINT_STEREO);
lame_set_brate(s->gfp, avctx->bit_rate/1000);
if ((s->gfp = lame_init()) == NULL)
goto err;
lame_set_in_samplerate(s->gfp, avctx->sample_rate);
lame_set_out_samplerate(s->gfp, avctx->sample_rate);
lame_set_num_channels(s->gfp, avctx->channels);
/* lame 3.91 dies on quality != 5 */
lame_set_quality(s->gfp, 5);
/* lame 3.91 doesn't work in mono */
lame_set_mode(s->gfp, JOINT_STEREO);
lame_set_brate(s->gfp, avctx->bit_rate/1000);
if(avctx->flags & CODEC_FLAG_QSCALE) {
lame_set_brate(s->gfp, 0);
lame_set_VBR(s->gfp, vbr_default);
lame_set_VBR_q(s->gfp, avctx->global_quality / (float)FF_QP2LAMBDA);
}
lame_set_bWriteVbrTag(s->gfp,0);
if (lame_init_params(s->gfp) < 0)
goto err_close;
if (lame_init_params(s->gfp) < 0)
goto err_close;
avctx->frame_size = lame_get_framesize(s->gfp);
avctx->frame_size = lame_get_framesize(s->gfp);
avctx->coded_frame= avcodec_alloc_frame();
avctx->coded_frame->key_frame= 1;
return 0;
return 0;
err_close:
lame_close(s->gfp);
lame_close(s->gfp);
err:
return -1;
return -1;
}
static const int sSampleRates[3] = {
@ -136,11 +136,11 @@ static int mp3len(void *data, int *samplesPerFrame, int *sampleRate)
int MP3lame_encode_frame(AVCodecContext *avctx,
unsigned char *frame, int buf_size, void *data)
{
Mp3AudioContext *s = avctx->priv_data;
int len;
int lame_result;
Mp3AudioContext *s = avctx->priv_data;
int len;
int lame_result;
/* lame 3.91 dies on '1-channel interleaved' data */
/* lame 3.91 dies on '1-channel interleaved' data */
if(data){
if (s->stereo) {
@ -198,12 +198,12 @@ int MP3lame_encode_frame(AVCodecContext *avctx,
int MP3lame_encode_close(AVCodecContext *avctx)
{
Mp3AudioContext *s = avctx->priv_data;
Mp3AudioContext *s = avctx->priv_data;
av_freep(&avctx->coded_frame);
lame_close(s->gfp);
return 0;
lame_close(s->gfp);
return 0;
}

View File

@ -35,14 +35,14 @@
/* Start codes. */
#define SEQ_END_CODE 0x000001b7
#define SEQ_START_CODE 0x000001b3
#define GOP_START_CODE 0x000001b8
#define PICTURE_START_CODE 0x00000100
#define SLICE_MIN_START_CODE 0x00000101
#define SLICE_MAX_START_CODE 0x000001af
#define EXT_START_CODE 0x000001b5
#define USER_START_CODE 0x000001b2
#define SEQ_END_CODE 0x000001b7
#define SEQ_START_CODE 0x000001b3
#define GOP_START_CODE 0x000001b8
#define PICTURE_START_CODE 0x00000100
#define SLICE_MIN_START_CODE 0x00000101
#define SLICE_MAX_START_CODE 0x000001af
#define EXT_START_CODE 0x000001b5
#define USER_START_CODE 0x000001b2
#define DC_VLC_BITS 9
#define MV_VLC_BITS 9
@ -89,7 +89,7 @@ const enum PixelFormat pixfmt_yuv_444[]= {PIX_FMT_YUV444P,-1};
const enum PixelFormat pixfmt_xvmc_mpg2_420[] = {
PIX_FMT_XVMC_MPEG2_IDCT,
PIX_FMT_XVMC_MPEG2_MC,
-1};
-1};
#ifdef CONFIG_ENCODERS
static uint8_t (*mv_penalty)[MAX_MV*2+1]= NULL;
static uint8_t fcode_tab[MAX_MV*2+1];
@ -166,7 +166,7 @@ static void init_uni_ac_vlc(RLTable *rl, uint32_t *uni_ac_vlc_bits, uint8_t *uni
code= rl->index_run[0][run] + alevel - 1;
if (code < 111 /* rl->n */) {
/* store the vlc & sign at once */
/* store the vlc & sign at once */
len= mpeg1_vlc[code][1]+1;
bits= (mpeg1_vlc[code][0]<<1) + sign;
} else {
@ -764,38 +764,38 @@ void ff_mpeg1_encode_init(MpegEncContext *s)
if(!done){
int f_code;
int mv;
int i;
int i;
done=1;
init_rl(&rl_mpeg1, 1);
for(i=0; i<64; i++)
{
mpeg1_max_level[0][i]= rl_mpeg1.max_level[0][i];
mpeg1_index_run[0][i]= rl_mpeg1.index_run[0][i];
}
for(i=0; i<64; i++)
{
mpeg1_max_level[0][i]= rl_mpeg1.max_level[0][i];
mpeg1_index_run[0][i]= rl_mpeg1.index_run[0][i];
}
init_uni_ac_vlc(&rl_mpeg1, uni_mpeg1_ac_vlc_bits, uni_mpeg1_ac_vlc_len);
/* build unified dc encoding tables */
for(i=-255; i<256; i++)
{
int adiff, index;
int bits, code;
int diff=i;
/* build unified dc encoding tables */
for(i=-255; i<256; i++)
{
int adiff, index;
int bits, code;
int diff=i;
adiff = ABS(diff);
if(diff<0) diff--;
index = av_log2(2*adiff);
adiff = ABS(diff);
if(diff<0) diff--;
index = av_log2(2*adiff);
bits= vlc_dc_lum_bits[index] + index;
code= (vlc_dc_lum_code[index]<<index) + (diff & ((1 << index) - 1));
mpeg1_lum_dc_uni[i+255]= bits + (code<<8);
bits= vlc_dc_lum_bits[index] + index;
code= (vlc_dc_lum_code[index]<<index) + (diff & ((1 << index) - 1));
mpeg1_lum_dc_uni[i+255]= bits + (code<<8);
bits= vlc_dc_chroma_bits[index] + index;
code= (vlc_dc_chroma_code[index]<<index) + (diff & ((1 << index) - 1));
mpeg1_chr_dc_uni[i+255]= bits + (code<<8);
}
bits= vlc_dc_chroma_bits[index] + index;
code= (vlc_dc_chroma_code[index]<<index) + (diff & ((1 << index) - 1));
mpeg1_chr_dc_uni[i+255]= bits + (code<<8);
}
mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
@ -873,14 +873,14 @@ static inline void encode_dc(MpegEncContext *s, int diff, int component)
}else{
if (component == 0) {
put_bits(
&s->pb,
mpeg1_lum_dc_uni[diff+255]&0xFF,
mpeg1_lum_dc_uni[diff+255]>>8);
&s->pb,
mpeg1_lum_dc_uni[diff+255]&0xFF,
mpeg1_lum_dc_uni[diff+255]>>8);
} else {
put_bits(
&s->pb,
mpeg1_chr_dc_uni[diff+255]&0xFF,
mpeg1_chr_dc_uni[diff+255]>>8);
mpeg1_chr_dc_uni[diff+255]&0xFF,
mpeg1_chr_dc_uni[diff+255]>>8);
}
}
}
@ -946,10 +946,10 @@ static void mpeg1_encode_block(MpegEncContext *s,
// code = get_rl_index(rl, 0, run, alevel);
if (alevel <= mpeg1_max_level[0][run]){
code= mpeg1_index_run[0][run] + alevel - 1;
/* store the vlc & sign at once */
/* store the vlc & sign at once */
put_bits(&s->pb, mpeg1_vlc[code][1]+1, (mpeg1_vlc[code][0]<<1) + sign);
} else {
/* escape seems to be pretty rare <5% so i dont optimize it */
/* escape seems to be pretty rare <5% so i dont optimize it */
put_bits(&s->pb, mpeg1_vlc[111/*rl->n*/][1], mpeg1_vlc[111/*rl->n*/][0]);
/* escape: only clip in this case */
put_bits(&s->pb, 6, run);
@ -1376,8 +1376,8 @@ static int mpeg_decode_mb(MpegEncContext *s,
return -1;
}
if(mb_block_count > 6){
cbp<<= mb_block_count-6;
cbp |= get_bits(&s->gb, mb_block_count-6);
cbp<<= mb_block_count-6;
cbp |= get_bits(&s->gb, mb_block_count-6);
}
#ifdef HAVE_XVMC
@ -2074,7 +2074,7 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
uint8_t old_permutation[64];
if (
(s1->mpeg_enc_ctx_allocated == 0)||
(s1->mpeg_enc_ctx_allocated == 0)||
avctx->coded_width != s->width ||
avctx->coded_height != s->height||
s1->save_aspect_info != s->aspect_ratio_info||
@ -2088,8 +2088,8 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
s->parse_context= pc;
}
if( (s->width == 0 )||(s->height == 0))
return -2;
if( (s->width == 0 )||(s->height == 0))
return -2;
avcodec_set_dimensions(avctx, s->width, s->height);
avctx->bit_rate = s->bit_rate;
@ -2129,7 +2129,7 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
mpeg2_aspect[s->aspect_ratio_info],
(AVRational){s1->pan_scan.width, s1->pan_scan.height}
);
}
}
}else{
s->avctx->sample_aspect_ratio=
mpeg2_aspect[s->aspect_ratio_info];
@ -2312,16 +2312,16 @@ static void mpeg_decode_picture_display_extension(Mpeg1Context *s1)
nofco = 1;
if(s->progressive_sequence){
if(s->repeat_first_field){
nofco++;
if(s->top_field_first)
nofco++;
}
nofco++;
if(s->top_field_first)
nofco++;
}
}else{
if(s->picture_structure == PICT_FRAME){
nofco++;
if(s->repeat_first_field)
nofco++;
}
if(s->repeat_first_field)
nofco++;
}
}
for(i=0; i<nofco; i++){
s1->pan_scan.position[i][0]= get_sbits(&s->gb, 16);
@ -2985,8 +2985,8 @@ static void mpeg_decode_gop(AVCodecContext *avctx,
if(s->avctx->debug & FF_DEBUG_PICT_INFO)
av_log(s->avctx, AV_LOG_DEBUG, "GOP (%2d:%02d:%02d.[%02d]) broken_link=%d\n",
time_code_hours, time_code_minutes, time_code_seconds,
time_code_pictures, broken_link);
time_code_hours, time_code_minutes, time_code_seconds,
time_code_pictures, broken_link);
}
/**
* finds the end of the current frame in the bitstream.
@ -3044,13 +3044,13 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
dprintf("fill_buffer\n");
if (buf_size == 0) {
/* special case for last picture */
if (s2->low_delay==0 && s2->next_picture_ptr) {
*picture= *(AVFrame*)s2->next_picture_ptr;
s2->next_picture_ptr= NULL;
/* special case for last picture */
if (s2->low_delay==0 && s2->next_picture_ptr) {
*picture= *(AVFrame*)s2->next_picture_ptr;
s2->next_picture_ptr= NULL;
*data_size = sizeof(AVFrame);
}
*data_size = sizeof(AVFrame);
}
return 0;
}
@ -3111,13 +3111,13 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
switch(start_code) {
case SEQ_START_CODE:
mpeg1_decode_sequence(avctx, buf_ptr,
input_size);
input_size);
break;
case PICTURE_START_CODE:
/* we have a complete image : we try to decompress it */
mpeg1_decode_picture(avctx,
buf_ptr, input_size);
buf_ptr, input_size);
break;
case EXT_START_CODE:
mpeg_decode_extension(avctx,

View File

@ -4,14 +4,14 @@
*/
const int16_t ff_mpeg1_default_intra_matrix[64] = {
8, 16, 19, 22, 26, 27, 29, 34,
16, 16, 22, 24, 27, 29, 34, 37,
19, 22, 26, 27, 29, 34, 34, 38,
22, 22, 26, 27, 29, 34, 37, 40,
22, 26, 27, 29, 32, 35, 40, 48,
26, 27, 29, 32, 35, 40, 48, 58,
26, 27, 29, 34, 38, 46, 56, 69,
27, 29, 35, 38, 46, 56, 69, 83
8, 16, 19, 22, 26, 27, 29, 34,
16, 16, 22, 24, 27, 29, 34, 37,
19, 22, 26, 27, 29, 34, 34, 38,
22, 22, 26, 27, 29, 34, 37, 40,
22, 26, 27, 29, 32, 35, 40, 48,
26, 27, 29, 32, 35, 40, 48, 58,
26, 27, 29, 34, 38, 46, 56, 69,
27, 29, 35, 38, 46, 56, 69, 83
};
const int16_t ff_mpeg1_default_non_intra_matrix[64] = {

View File

@ -748,7 +748,7 @@ static void encode_frame(MpegAudioContext *s,
}
static int MPA_encode_frame(AVCodecContext *avctx,
unsigned char *frame, int buf_size, void *data)
unsigned char *frame, int buf_size, void *data)
{
MpegAudioContext *s = avctx->priv_data;
short *samples = data;

View File

@ -55,7 +55,7 @@ int l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
int mpa_decode_header(AVCodecContext *avctx, uint32_t head);
void ff_mpa_synth_init(MPA_INT *window);
void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
MPA_INT *window, int *dither_state,
MPA_INT *window, int *dither_state,
OUT_INT *samples, int incr,
int32_t sb_samples[SBLIMIT]);

View File

@ -64,7 +64,7 @@ static always_inline int MULH(int a, int b){
struct GranuleDef;
typedef struct MPADecodeContext {
uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE]; /* input buffer */
uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE]; /* input buffer */
int inbuf_index;
uint8_t *inbuf_ptr, *inbuf;
int frame_size;
@ -340,13 +340,13 @@ static int decode_init(AVCodecContext * avctx)
scale_factor_mult[i][2]);
}
ff_mpa_synth_init(window);
ff_mpa_synth_init(window);
/* huffman decode tables */
huff_code_table[0] = NULL;
for(i=1;i<16;i++) {
const HuffTable *h = &mpa_huff_tables[i];
int xsize, x, y;
int xsize, x, y;
unsigned int n;
uint8_t *code_table;
@ -378,11 +378,11 @@ static int decode_init(AVCodecContext * avctx)
band_index_long[i][22] = k;
}
/* compute n ^ (4/3) and store it in mantissa/exp format */
table_4_3_exp= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_exp[0]));
/* compute n ^ (4/3) and store it in mantissa/exp format */
table_4_3_exp= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_exp[0]));
if(!table_4_3_exp)
return -1;
table_4_3_value= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_value[0]));
return -1;
table_4_3_value= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_value[0]));
if(!table_4_3_value)
return -1;
@ -844,7 +844,7 @@ void ff_mpa_synth_init(MPA_INT *window)
32 samples. */
/* XXX: optimize by avoiding ring buffer usage */
void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
MPA_INT *window, int *dither_state,
MPA_INT *window, int *dither_state,
OUT_INT *samples, int incr,
int32_t sb_samples[SBLIMIT])
{
@ -2440,8 +2440,8 @@ static int mp_decode_frame(MPADecodeContext *s,
samples_ptr = samples + ch;
for(i=0;i<nb_frames;i++) {
ff_mpa_synth_filter(s->synth_buf[ch], &(s->synth_buf_offset[ch]),
window, &s->dither_state,
samples_ptr, s->nb_channels,
window, &s->dither_state,
samples_ptr, s->nb_channels,
s->sb_samples[ch][i]);
samples_ptr += 32 * s->nb_channels;
}
@ -2453,8 +2453,8 @@ static int mp_decode_frame(MPADecodeContext *s,
}
static int decode_frame(AVCodecContext * avctx,
void *data, int *data_size,
uint8_t * buf, int buf_size)
void *data, int *data_size,
uint8_t * buf, int buf_size)
{
MPADecodeContext *s = avctx->priv_data;
uint32_t header;
@ -2464,8 +2464,8 @@ static int decode_frame(AVCodecContext * avctx,
buf_ptr = buf;
while (buf_size > 0) {
len = s->inbuf_ptr - s->inbuf;
if (s->frame_size == 0) {
len = s->inbuf_ptr - s->inbuf;
if (s->frame_size == 0) {
/* special case for next header for first frame in free
format case (XXX: find a simpler method) */
if (s->free_format_next_header != 0) {
@ -2477,34 +2477,34 @@ static int decode_frame(AVCodecContext * avctx,
s->free_format_next_header = 0;
goto got_header;
}
/* no header seen : find one. We need at least HEADER_SIZE
/* no header seen : find one. We need at least HEADER_SIZE
bytes to parse it */
len = HEADER_SIZE - len;
if (len > buf_size)
len = buf_size;
if (len > 0) {
memcpy(s->inbuf_ptr, buf_ptr, len);
buf_ptr += len;
buf_size -= len;
s->inbuf_ptr += len;
}
if ((s->inbuf_ptr - s->inbuf) >= HEADER_SIZE) {
len = HEADER_SIZE - len;
if (len > buf_size)
len = buf_size;
if (len > 0) {
memcpy(s->inbuf_ptr, buf_ptr, len);
buf_ptr += len;
buf_size -= len;
s->inbuf_ptr += len;
}
if ((s->inbuf_ptr - s->inbuf) >= HEADER_SIZE) {
got_header:
header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
(s->inbuf[2] << 8) | s->inbuf[3];
header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
(s->inbuf[2] << 8) | s->inbuf[3];
if (ff_mpa_check_header(header) < 0) {
/* no sync found : move by one byte (inefficient, but simple!) */
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
s->inbuf_ptr--;
if (ff_mpa_check_header(header) < 0) {
/* no sync found : move by one byte (inefficient, but simple!) */
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
s->inbuf_ptr--;
dprintf("skip %x\n", header);
/* reset free format frame size to give a chance
to get a new bitrate */
s->free_format_frame_size = 0;
} else {
if (decode_header(s, header) == 1) {
} else {
if (decode_header(s, header) == 1) {
/* free format: prepare to compute frame size */
s->frame_size = -1;
s->frame_size = -1;
}
/* update codec info */
avctx->sample_rate = s->sample_rate;
@ -2525,18 +2525,18 @@ static int decode_frame(AVCodecContext * avctx,
avctx->frame_size = 1152;
break;
}
}
}
}
}
} else if (s->frame_size == -1) {
/* free format : find next sync to compute frame size */
len = MPA_MAX_CODED_FRAME_SIZE - len;
if (len > buf_size)
len = buf_size;
len = MPA_MAX_CODED_FRAME_SIZE - len;
if (len > buf_size)
len = buf_size;
if (len == 0) {
/* frame too long: resync */
/* frame too long: resync */
s->frame_size = 0;
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
s->inbuf_ptr--;
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
s->inbuf_ptr--;
} else {
uint8_t *p, *pend;
uint32_t header1;
@ -2580,17 +2580,17 @@ static int decode_frame(AVCodecContext * avctx,
s->inbuf_ptr += len;
buf_size -= len;
}
} else if (len < s->frame_size) {
} else if (len < s->frame_size) {
if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE)
s->frame_size = MPA_MAX_CODED_FRAME_SIZE;
len = s->frame_size - len;
if (len > buf_size)
len = buf_size;
memcpy(s->inbuf_ptr, buf_ptr, len);
buf_ptr += len;
s->inbuf_ptr += len;
buf_size -= len;
}
len = s->frame_size - len;
if (len > buf_size)
len = buf_size;
memcpy(s->inbuf_ptr, buf_ptr, len);
buf_ptr += len;
s->inbuf_ptr += len;
buf_size -= len;
}
next_data:
if (s->frame_size > 0 &&
(s->inbuf_ptr - s->inbuf) >= s->frame_size) {
@ -2601,22 +2601,22 @@ static int decode_frame(AVCodecContext * avctx,
} else {
out_size = mp_decode_frame(s, out_samples);
}
s->inbuf_ptr = s->inbuf;
s->frame_size = 0;
s->inbuf_ptr = s->inbuf;
s->frame_size = 0;
if(out_size>=0)
*data_size = out_size;
*data_size = out_size;
else
av_log(avctx, AV_LOG_DEBUG, "Error while decoding mpeg audio frame\n"); //FIXME return -1 / but also return the number of bytes consumed
break;
}
break;
}
}
return buf_ptr - buf;
}
static int decode_frame_adu(AVCodecContext * avctx,
void *data, int *data_size,
uint8_t * buf, int buf_size)
void *data, int *data_size,
uint8_t * buf, int buf_size)
{
MPADecodeContext *s = avctx->priv_data;
uint32_t header;
@ -2747,8 +2747,8 @@ static int decode_close_mp3on4(AVCodecContext * avctx)
static int decode_frame_mp3on4(AVCodecContext * avctx,
void *data, int *data_size,
uint8_t * buf, int buf_size)
void *data, int *data_size,
uint8_t * buf, int buf_size)
{
MP3On4DecodeContext *s = avctx->priv_data;
MPADecodeContext *m;

View File

@ -354,7 +354,7 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
if(r<0 || !pic->age || !pic->type || !pic->data[0]){
av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
return -1;
}
@ -913,7 +913,7 @@ int MPV_encode_init(AVCodecContext *avctx)
s->width = avctx->width;
s->height = avctx->height;
if(avctx->gop_size > 600){
av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
avctx->gop_size=600;
}
s->gop_size = avctx->gop_size;
@ -1120,7 +1120,7 @@ int MPV_encode_init(AVCodecContext *avctx)
s->out_format = FMT_MJPEG;
s->intra_only = 1; /* force intra only for jpeg */
s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
s->mjpeg_data_only_frames = 0; /* write all the needed headers */
s->mjpeg_data_only_frames = 0; /* write all the needed headers */
s->mjpeg_vsample[0] = 1<<chroma_v_shift;
s->mjpeg_vsample[1] = 1;
s->mjpeg_vsample[2] = 1;
@ -1143,24 +1143,24 @@ int MPV_encode_init(AVCodecContext *avctx)
return -1;
}
s->out_format = FMT_H263;
s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
avctx->delay=0;
s->low_delay=1;
break;
case CODEC_ID_H263P:
s->out_format = FMT_H263;
s->h263_plus = 1;
/* Fx */
/* Fx */
s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
s->modified_quant= s->h263_aic;
s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
s->modified_quant= s->h263_aic;
s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
/* /Fx */
/* /Fx */
/* These are just to be sure */
avctx->delay=0;
s->low_delay=1;
@ -2473,7 +2473,7 @@ static inline void gmc1_motion(MpegEncContext *s,
dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
if (s->no_rounding){
s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
}else{
s->dsp.put_pixels_tab [0][dxy](dest_y, ptr, linesize, 16);
}
@ -4148,7 +4148,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
}
}
s->dsp.get_pixels(s->block[0], ptr_y , wrap_y);
s->dsp.get_pixels(s->block[0], ptr_y , wrap_y);
s->dsp.get_pixels(s->block[1], ptr_y + 8, wrap_y);
s->dsp.get_pixels(s->block[2], ptr_y + dct_offset , wrap_y);
s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
@ -4157,7 +4157,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
skip_dct[4]= 1;
skip_dct[5]= 1;
}else{
s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
}
}else{
@ -4170,7 +4170,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
dest_cr = s->dest[2];
if ((!s->no_rounding) || s->pict_type==B_TYPE){
op_pix = s->dsp.put_pixels_tab;
op_pix = s->dsp.put_pixels_tab;
op_qpix= s->dsp.put_qpel_pixels_tab;
}else{
op_pix = s->dsp.put_no_rnd_pixels_tab;
@ -4208,7 +4208,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
}
}
s->dsp.diff_pixels(s->block[0], ptr_y , dest_y , wrap_y);
s->dsp.diff_pixels(s->block[0], ptr_y , dest_y , wrap_y);
s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset , dest_y + dct_offset , wrap_y);
s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
@ -4223,7 +4223,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
/* pre quantization */
if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
//FIXME optimize
if(s->dsp.sad[1](NULL, ptr_y , dest_y , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
if(s->dsp.sad[1](NULL, ptr_y , dest_y , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
if(s->dsp.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
if(s->dsp.sad[1](NULL, ptr_y +dct_offset , dest_y +dct_offset , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
@ -6265,7 +6265,7 @@ static int dct_quantize_c(MpegEncContext *s,
/* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
return last_non_zero;
}

View File

@ -126,7 +126,7 @@ typedef struct ScanTable{
uint8_t permutated[64];
uint8_t raster_end[64];
#ifdef ARCH_POWERPC
/** Used by dct_quantise_alitvec to find last-non-zero */
/** Used by dct_quantise_alitvec to find last-non-zero */
uint8_t __align8 inverse[64];
#endif
} ScanTable;
@ -181,7 +181,7 @@ typedef struct Picture{
uint16_t *mb_var; ///< Table for MB variances
uint16_t *mc_mb_var; ///< Table for motion compensated MB variances
uint8_t *mb_mean; ///< Table for MB luminance
int32_t *mb_cmp_score; ///< Table for MB cmp scores, for mb decision FIXME remove
int32_t *mb_cmp_score; ///< Table for MB cmp scores, for mb decision FIXME remove
int b_frame_score; /* */
} Picture;
@ -245,7 +245,7 @@ typedef struct MotionEstContext{
uint8_t (*mv_penalty)[MAX_MV*2+1]; ///< amount of bits needed to encode a MV
uint8_t *current_mv_penalty;
int (*sub_motion_search)(struct MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int *mx_ptr, int *my_ptr, int dmin,
int src_index, int ref_index,
int size, int h);
}MotionEstContext;

View File

@ -544,24 +544,24 @@ void msmpeg4_encode_mb(MpegEncContext * s,
handle_slices(s);
if (!s->mb_intra) {
/* compute cbp */
/* compute cbp */
set_stat(ST_INTER_MB);
cbp = 0;
for (i = 0; i < 6; i++) {
if (s->block_last_index[i] >= 0)
cbp |= 1 << (5 - i);
}
if (s->use_skip_mb_code && (cbp | motion_x | motion_y) == 0) {
/* skip macroblock */
put_bits(&s->pb, 1, 1);
cbp = 0;
for (i = 0; i < 6; i++) {
if (s->block_last_index[i] >= 0)
cbp |= 1 << (5 - i);
}
if (s->use_skip_mb_code && (cbp | motion_x | motion_y) == 0) {
/* skip macroblock */
put_bits(&s->pb, 1, 1);
s->last_bits++;
s->misc_bits++;
s->misc_bits++;
s->skip_count++;
return;
}
return;
}
if (s->use_skip_mb_code)
put_bits(&s->pb, 1, 0); /* mb coded */
put_bits(&s->pb, 1, 0); /* mb coded */
if(s->msmpeg4_version<=2){
put_bits(&s->pb,
@ -599,10 +599,10 @@ void msmpeg4_encode_mb(MpegEncContext * s,
}
s->p_tex_bits += get_bits_diff(s);
} else {
/* compute cbp */
cbp = 0;
/* compute cbp */
cbp = 0;
coded_cbp = 0;
for (i = 0; i < 6; i++) {
for (i = 0; i < 6; i++) {
int val, pred;
val = (s->block_last_index[i] >= 1);
cbp |= val << (5 - i);
@ -613,7 +613,7 @@ void msmpeg4_encode_mb(MpegEncContext * s,
val = val ^ pred;
}
coded_cbp |= val << (5 - i);
}
}
#if 0
if (coded_cbp)
printf("cbp=%x %x\n", cbp, coded_cbp);
@ -625,12 +625,12 @@ void msmpeg4_encode_mb(MpegEncContext * s,
v2_intra_cbpc[cbp&3][1], v2_intra_cbpc[cbp&3][0]);
} else {
if (s->use_skip_mb_code)
put_bits(&s->pb, 1, 0); /* mb coded */
put_bits(&s->pb, 1, 0); /* mb coded */
put_bits(&s->pb,
v2_mb_type[(cbp&3) + 4][1],
v2_mb_type[(cbp&3) + 4][0]);
}
put_bits(&s->pb, 1, 0); /* no AC prediction yet */
put_bits(&s->pb, 1, 0); /* no AC prediction yet */
put_bits(&s->pb,
cbpy_tab[cbp>>2][1],
cbpy_tab[cbp>>2][0]);
@ -641,13 +641,13 @@ void msmpeg4_encode_mb(MpegEncContext * s,
ff_msmp4_mb_i_table[coded_cbp][1], ff_msmp4_mb_i_table[coded_cbp][0]);
} else {
if (s->use_skip_mb_code)
put_bits(&s->pb, 1, 0); /* mb coded */
put_bits(&s->pb, 1, 0); /* mb coded */
put_bits(&s->pb,
table_mb_non_intra[cbp][1],
table_mb_non_intra[cbp][0]);
}
set_stat(ST_INTRA_MB);
put_bits(&s->pb, 1, 0); /* no AC prediction yet */
put_bits(&s->pb, 1, 0); /* no AC prediction yet */
if(s->inter_intra_pred){
s->h263_aic_dir=0;
put_bits(&s->pb, table_inter_intra[s->h263_aic_dir][1], table_inter_intra[s->h263_aic_dir][0]);
@ -702,9 +702,9 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
/* find prediction */
if (n < 4) {
scale = s->y_dc_scale;
scale = s->y_dc_scale;
} else {
scale = s->c_dc_scale;
scale = s->c_dc_scale;
}
wrap = s->block_wrap[n];
@ -727,22 +727,22 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
to problems if Q could vary !) */
#if (defined(ARCH_X86) || defined(ARCH_X86_64)) && !defined PIC
asm volatile(
"movl %3, %%eax \n\t"
"shrl $1, %%eax \n\t"
"addl %%eax, %2 \n\t"
"addl %%eax, %1 \n\t"
"addl %0, %%eax \n\t"
"mull %4 \n\t"
"movl %%edx, %0 \n\t"
"movl %1, %%eax \n\t"
"mull %4 \n\t"
"movl %%edx, %1 \n\t"
"movl %2, %%eax \n\t"
"mull %4 \n\t"
"movl %%edx, %2 \n\t"
: "+b" (a), "+c" (b), "+D" (c)
: "g" (scale), "S" (inverse[scale])
: "%eax", "%edx"
"movl %3, %%eax \n\t"
"shrl $1, %%eax \n\t"
"addl %%eax, %2 \n\t"
"addl %%eax, %1 \n\t"
"addl %0, %%eax \n\t"
"mull %4 \n\t"
"movl %%edx, %0 \n\t"
"movl %1, %%eax \n\t"
"mull %4 \n\t"
"movl %%edx, %1 \n\t"
"movl %2, %%eax \n\t"
"mull %4 \n\t"
"movl %%edx, %2 \n\t"
: "+b" (a), "+c" (b), "+D" (c)
: "g" (scale), "S" (inverse[scale])
: "%eax", "%edx"
);
#else
/* #elif defined (ARCH_ALPHA) */
@ -750,13 +750,13 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
common case. But they are costly everywhere...
*/
if (scale == 8) {
a = (a + (8 >> 1)) / 8;
b = (b + (8 >> 1)) / 8;
c = (c + (8 >> 1)) / 8;
a = (a + (8 >> 1)) / 8;
b = (b + (8 >> 1)) / 8;
c = (c + (8 >> 1)) / 8;
} else {
a = FASTDIV((a + (scale >> 1)), scale);
b = FASTDIV((b + (scale >> 1)), scale);
c = FASTDIV((c + (scale >> 1)), scale);
a = FASTDIV((a + (scale >> 1)), scale);
b = FASTDIV((b + (scale >> 1)), scale);
c = FASTDIV((c + (scale >> 1)), scale);
}
#endif
/* XXX: WARNING: they did not choose the same test as MPEG4. This
@ -957,17 +957,17 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
/* AC coefs */
last_non_zero = i - 1;
for (; i <= last_index; i++) {
j = scantable[i];
level = block[j];
if (level) {
run = i - last_non_zero - 1;
last = (i == last_index);
sign = 0;
slevel = level;
if (level < 0) {
sign = 1;
level = -level;
}
j = scantable[i];
level = block[j];
if (level) {
run = i - last_non_zero - 1;
last = (i == last_index);
sign = 0;
slevel = level;
if (level < 0) {
sign = 1;
level = -level;
}
if(level<=MAX_LEVEL && run<=MAX_RUN){
s->ac_stats[s->mb_intra][n>3][level][run][last]++;
@ -1030,8 +1030,8 @@ else
} else {
put_bits(&s->pb, 1, sign);
}
last_non_zero = i;
}
last_non_zero = i;
}
}
}
@ -1064,7 +1064,7 @@ static void init_h263_dc_for_msmpeg4(void)
v = abs(level);
while (v) {
v >>= 1;
size++;
size++;
}
if (level < 0)
@ -1301,11 +1301,11 @@ return -1;
}
s->no_rounding = 1;
if(s->avctx->debug&FF_DEBUG_PICT_INFO)
av_log(s->avctx, AV_LOG_DEBUG, "qscale:%d rlc:%d rl:%d dc:%d mbrl:%d slice:%d \n",
s->qscale,
s->rl_chroma_table_index,
s->rl_table_index,
s->dc_table_index,
av_log(s->avctx, AV_LOG_DEBUG, "qscale:%d rlc:%d rl:%d dc:%d mbrl:%d slice:%d \n",
s->qscale,
s->rl_chroma_table_index,
s->rl_table_index,
s->dc_table_index,
s->per_mb_rl_table,
s->slice_height);
} else {
@ -1349,20 +1349,20 @@ return -1;
}
if(s->avctx->debug&FF_DEBUG_PICT_INFO)
av_log(s->avctx, AV_LOG_DEBUG, "skip:%d rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d \n",
s->use_skip_mb_code,
s->rl_table_index,
s->rl_chroma_table_index,
s->dc_table_index,
s->mv_table_index,
av_log(s->avctx, AV_LOG_DEBUG, "skip:%d rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d \n",
s->use_skip_mb_code,
s->rl_table_index,
s->rl_chroma_table_index,
s->dc_table_index,
s->mv_table_index,
s->per_mb_rl_table,
s->qscale);
if(s->flipflop_rounding){
s->no_rounding ^= 1;
}else{
s->no_rounding = 0;
}
if(s->flipflop_rounding){
s->no_rounding ^= 1;
}else{
s->no_rounding = 0;
}
}
//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
@ -1557,10 +1557,10 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
s->dsp.clear_blocks(s->block[0]);
for (i = 0; i < 6; i++) {
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
{
{
av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
return -1;
}
}
}
return 0;
}
@ -1593,8 +1593,8 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3);
if (code < 0)
return -1;
//s->mb_intra = (code & 0x40) ? 0 : 1;
s->mb_intra = (~code & 0x40) >> 6;
//s->mb_intra = (code & 0x40) ? 0 : 1;
s->mb_intra = (~code & 0x40) >> 6;
cbp = code & 0x3f;
} else {
@ -1650,10 +1650,10 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
s->dsp.clear_blocks(s->block[0]);
for (i = 0; i < 6; i++) {
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
{
av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
return -1;
}
{
av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
return -1;
}
}
return 0;
@ -1672,7 +1672,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
qmul=1;
qadd=0;
/* DC coef */
/* DC coef */
set_stat(ST_DC);
level = msmpeg4_decode_dc(s, n, &dc_pred_dir);
@ -1808,8 +1808,8 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
}
}
#endif
//level = level * qmul + (level>0) * qadd - (level<=0) * qadd ;
if (level>0) level= level * qmul + qadd;
//level = level * qmul + (level>0) * qadd - (level<=0) * qadd ;
if (level>0) level= level * qmul + qadd;
else level= level * qmul - qadd;
#if 0 // waste of time too :(
if(level>2048 || level<-2048){

View File

@ -45,7 +45,7 @@ Theora_decode_frame(AVCodecContext *ctx, void *outdata, int *outdata_size,
thc->op.bytes = buf_size;
if(theora_decode_packetin(&thc->state, &thc->op))
return -1;
return -1;
theora_decode_YUVout(&thc->state, &yuv);
@ -78,7 +78,7 @@ Theora_decode_init(AVCodecContext *ctx)
uint8_t *cdp;
if(ctx->extradata_size < 6)
return -1;
return -1;
theora_info_init(&thc->info);
@ -87,25 +87,25 @@ Theora_decode_init(AVCodecContext *ctx)
size = ctx->extradata_size;
for(i = 0; i < 3; i++){
hs = *cdp++ << 8;
hs += *cdp++;
size -= 2;
hs = *cdp++ << 8;
hs += *cdp++;
size -= 2;
if(hs > size){
av_log(ctx, AV_LOG_ERROR, "extradata too small: %i > %i\n",
if(hs > size){
av_log(ctx, AV_LOG_ERROR, "extradata too small: %i > %i\n",
hs, size);
return -1;
}
return -1;
}
op.packet = cdp;
op.bytes = hs;
op.b_o_s = !i;
if(theora_decode_header(&thc->info, &thc->comment, &op))
return -1;
op.packetno++;
op.packet = cdp;
op.bytes = hs;
op.b_o_s = !i;
if(theora_decode_header(&thc->info, &thc->comment, &op))
return -1;
op.packetno++;
cdp += hs;
size -= hs;
cdp += hs;
size -= hs;
}
theora_decode_init(&thc->state, &thc->info);

View File

@ -40,13 +40,13 @@ static int oggvorbis_init_encoder(vorbis_info *vi, AVCodecContext *avccontext) {
return (vorbis_encode_setup_managed(vi, avccontext->channels,
avccontext->sample_rate, -1, avccontext->bit_rate, -1) ||
vorbis_encode_ctl(vi, OV_ECTL_RATEMANAGE_AVG, NULL) ||
vorbis_encode_setup_init(vi)) ;
vorbis_encode_ctl(vi, OV_ECTL_RATEMANAGE_AVG, NULL) ||
vorbis_encode_setup_init(vi)) ;
#else
/* constant bitrate */
return vorbis_encode_init(vi, avccontext->channels,
avccontext->sample_rate, -1, avccontext->bit_rate, -1) ;
avccontext->sample_rate, -1, avccontext->bit_rate, -1) ;
#endif
}
@ -58,8 +58,8 @@ static int oggvorbis_encode_init(AVCodecContext *avccontext) {
vorbis_info_init(&context->vi) ;
if(oggvorbis_init_encoder(&context->vi, avccontext) < 0) {
av_log(avccontext, AV_LOG_ERROR, "oggvorbis_encode_init: init_encoder failed") ;
return -1 ;
av_log(avccontext, AV_LOG_ERROR, "oggvorbis_encode_init: init_encoder failed") ;
return -1 ;
}
vorbis_analysis_init(&context->vd, &context->vi) ;
vorbis_block_init(&context->vd, &context->vb) ;
@ -101,8 +101,8 @@ static int oggvorbis_encode_init(AVCodecContext *avccontext) {
static int oggvorbis_encode_frame(AVCodecContext *avccontext,
unsigned char *packets,
int buf_size, void *data)
unsigned char *packets,
int buf_size, void *data)
{
OggVorbisContext *context = avccontext->priv_data ;
float **buffer ;
@ -113,22 +113,22 @@ static int oggvorbis_encode_frame(AVCodecContext *avccontext,
buffer = vorbis_analysis_buffer(&context->vd, samples) ;
if(context->vi.channels == 1) {
for(l = 0 ; l < samples ; l++)
buffer[0][l]=audio[l]/32768.f;
for(l = 0 ; l < samples ; l++)
buffer[0][l]=audio[l]/32768.f;
} else {
for(l = 0 ; l < samples ; l++){
buffer[0][l]=audio[l*2]/32768.f;
buffer[1][l]=audio[l*2+1]/32768.f;
}
for(l = 0 ; l < samples ; l++){
buffer[0][l]=audio[l*2]/32768.f;
buffer[1][l]=audio[l*2+1]/32768.f;
}
}
vorbis_analysis_wrote(&context->vd, samples) ;
while(vorbis_analysis_blockout(&context->vd, &context->vb) == 1) {
vorbis_analysis(&context->vb, NULL);
vorbis_bitrate_addblock(&context->vb) ;
vorbis_analysis(&context->vb, NULL);
vorbis_bitrate_addblock(&context->vb) ;
while(vorbis_bitrate_flushpacket(&context->vd, &op)) {
while(vorbis_bitrate_flushpacket(&context->vd, &op)) {
if(op.bytes==1) //id love to say this is a hack, bad sadly its not, appearently the end of stream decission is in libogg
continue;
memcpy(context->buffer + context->buffer_index, &op, sizeof(ogg_packet));
@ -136,7 +136,7 @@ static int oggvorbis_encode_frame(AVCodecContext *avccontext,
memcpy(context->buffer + context->buffer_index, op.packet, op.bytes);
context->buffer_index += op.bytes;
// av_log(avccontext, AV_LOG_DEBUG, "e%d / %d\n", context->buffer_index, op.bytes);
}
}
}
l=0;
@ -268,19 +268,19 @@ static inline int conv(int samples, float **pcm, char *buf, int channels) {
float *mono ;
for(i = 0 ; i < channels ; i++){
ptr = &data[i];
mono = pcm[i] ;
ptr = &data[i];
mono = pcm[i] ;
for(j = 0 ; j < samples ; j++) {
for(j = 0 ; j < samples ; j++) {
val = mono[j] * 32767.f;
val = mono[j] * 32767.f;
if(val > 32767) val = 32767 ;
if(val < -32768) val = -32768 ;
if(val > 32767) val = 32767 ;
if(val < -32768) val = -32768 ;
*ptr = val ;
ptr += channels;
}
*ptr = val ;
ptr += channels;
}
}
return 0 ;
@ -311,15 +311,15 @@ static int oggvorbis_decode_frame(AVCodecContext *avccontext,
av_log(avccontext, AV_LOG_DEBUG, "\n");*/
if(vorbis_synthesis(&context->vb, op) == 0)
vorbis_synthesis_blockin(&context->vd, &context->vb) ;
vorbis_synthesis_blockin(&context->vd, &context->vb) ;
total_samples = 0 ;
total_bytes = 0 ;
while((samples = vorbis_synthesis_pcmout(&context->vd, &pcm)) > 0) {
conv(samples, pcm, (char*)data + total_bytes, context->vi.channels) ;
total_bytes += samples * 2 * context->vi.channels ;
total_samples += samples ;
conv(samples, pcm, (char*)data + total_bytes, context->vi.channels) ;
total_bytes += samples * 2 * context->vi.channels ;
total_samples += samples ;
vorbis_synthesis_read(&context->vd, samples) ;
}

View File

@ -191,11 +191,11 @@ void av_parser_close(AVCodecParserContext *s)
//#define END_NOT_FOUND (-100)
#define PICTURE_START_CODE 0x00000100
#define SEQ_START_CODE 0x000001b3
#define EXT_START_CODE 0x000001b5
#define SLICE_MIN_START_CODE 0x00000101
#define SLICE_MAX_START_CODE 0x000001af
#define PICTURE_START_CODE 0x00000100
#define SEQ_START_CODE 0x000001b3
#define EXT_START_CODE 0x000001b5
#define SLICE_MIN_START_CODE 0x00000101
#define SLICE_MAX_START_CODE 0x000001af
typedef struct ParseContext1{
ParseContext pc;
@ -571,7 +571,7 @@ static int mpeg4video_split(AVCodecContext *avctx,
/*************************/
typedef struct MpegAudioParseContext {
uint8_t inbuf[MPA_MAX_CODED_FRAME_SIZE]; /* input buffer */
uint8_t inbuf[MPA_MAX_CODED_FRAME_SIZE]; /* input buffer */
uint8_t *inbuf_ptr;
int frame_size;
int free_format_frame_size;
@ -608,8 +608,8 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
*poutbuf_size = 0;
buf_ptr = buf;
while (buf_size > 0) {
len = s->inbuf_ptr - s->inbuf;
if (s->frame_size == 0) {
len = s->inbuf_ptr - s->inbuf;
if (s->frame_size == 0) {
/* special case for next header for first frame in free
format case (XXX: find a simpler method) */
if (s->free_format_next_header != 0) {
@ -621,34 +621,34 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
s->free_format_next_header = 0;
goto got_header;
}
/* no header seen : find one. We need at least MPA_HEADER_SIZE
/* no header seen : find one. We need at least MPA_HEADER_SIZE
bytes to parse it */
len = MPA_HEADER_SIZE - len;
if (len > buf_size)
len = buf_size;
if (len > 0) {
memcpy(s->inbuf_ptr, buf_ptr, len);
buf_ptr += len;
buf_size -= len;
s->inbuf_ptr += len;
}
if ((s->inbuf_ptr - s->inbuf) >= MPA_HEADER_SIZE) {
len = MPA_HEADER_SIZE - len;
if (len > buf_size)
len = buf_size;
if (len > 0) {
memcpy(s->inbuf_ptr, buf_ptr, len);
buf_ptr += len;
buf_size -= len;
s->inbuf_ptr += len;
}
if ((s->inbuf_ptr - s->inbuf) >= MPA_HEADER_SIZE) {
got_header:
sr= avctx->sample_rate;
header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
(s->inbuf[2] << 8) | s->inbuf[3];
header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
(s->inbuf[2] << 8) | s->inbuf[3];
ret = mpa_decode_header(avctx, header);
if (ret < 0) {
s->header_count= -2;
/* no sync found : move by one byte (inefficient, but simple!) */
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
s->inbuf_ptr--;
/* no sync found : move by one byte (inefficient, but simple!) */
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
s->inbuf_ptr--;
dprintf("skip %x\n", header);
/* reset free format frame size to give a chance
to get a new bitrate */
s->free_format_frame_size = 0;
} else {
} else {
if((header&SAME_HEADER_MASK) != (s->header&SAME_HEADER_MASK) && s->header)
s->header_count= -3;
s->header= header;
@ -657,26 +657,26 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
#if 0
/* free format: prepare to compute frame size */
if (decode_header(s, header) == 1) {
s->frame_size = -1;
if (decode_header(s, header) == 1) {
s->frame_size = -1;
}
#endif
}
}
if(s->header_count <= 0)
avctx->sample_rate= sr; //FIXME ugly
}
}
} else
#if 0
if (s->frame_size == -1) {
/* free format : find next sync to compute frame size */
len = MPA_MAX_CODED_FRAME_SIZE - len;
if (len > buf_size)
len = buf_size;
len = MPA_MAX_CODED_FRAME_SIZE - len;
if (len > buf_size)
len = buf_size;
if (len == 0) {
/* frame too long: resync */
/* frame too long: resync */
s->frame_size = 0;
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
s->inbuf_ptr--;
memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
s->inbuf_ptr--;
} else {
uint8_t *p, *pend;
uint32_t header1;
@ -720,19 +720,19 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
s->inbuf_ptr += len;
buf_size -= len;
}
} else
} else
#endif
if (len < s->frame_size) {
if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE)
s->frame_size = MPA_MAX_CODED_FRAME_SIZE;
len = s->frame_size - len;
if (len > buf_size)
len = buf_size;
memcpy(s->inbuf_ptr, buf_ptr, len);
buf_ptr += len;
s->inbuf_ptr += len;
buf_size -= len;
}
len = s->frame_size - len;
if (len > buf_size)
len = buf_size;
memcpy(s->inbuf_ptr, buf_ptr, len);
buf_ptr += len;
s->inbuf_ptr += len;
buf_size -= len;
}
// next_data:
if (s->frame_size > 0 &&
(s->inbuf_ptr - s->inbuf) >= s->frame_size) {
@ -740,10 +740,10 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
*poutbuf = s->inbuf;
*poutbuf_size = s->inbuf_ptr - s->inbuf;
}
s->inbuf_ptr = s->inbuf;
s->frame_size = 0;
break;
}
s->inbuf_ptr = s->inbuf;
s->frame_size = 0;
break;
}
}
return buf_ptr - buf;
}
@ -783,7 +783,7 @@ static int ac3_parse(AVCodecParserContext *s1,
const uint8_t *buf_ptr;
int len, sample_rate, bit_rate;
static const int ac3_channels[8] = {
2, 1, 2, 3, 3, 4, 4, 5
2, 1, 2, 3, 3, 4, 4, 5
};
*poutbuf = NULL;
@ -812,7 +812,7 @@ static int ac3_parse(AVCodecParserContext *s1,
memmove(s->inbuf, s->inbuf + 1, AC3_HEADER_SIZE - 1);
s->inbuf_ptr--;
} else {
s->frame_size = len;
s->frame_size = len;
/* update codec info */
avctx->sample_rate = sample_rate;
/* set channels,except if the user explicitly requests 1 or 2 channels, XXX/FIXME this is a bit ugly */
@ -821,7 +821,7 @@ static int ac3_parse(AVCodecParserContext *s1,
if (s->flags & A52_LFE)
avctx->channels++;
}
avctx->bit_rate = bit_rate;
avctx->bit_rate = bit_rate;
avctx->frame_size = 6 * 256;
}
}

View File

@ -27,48 +27,48 @@
/* from g711.c by SUN microsystems (unrestricted use) */
#define SIGN_BIT (0x80) /* Sign bit for a A-law byte. */
#define QUANT_MASK (0xf) /* Quantization field mask. */
#define NSEGS (8) /* Number of A-law segments. */
#define SEG_SHIFT (4) /* Left shift for segment number. */
#define SEG_MASK (0x70) /* Segment field mask. */
#define SIGN_BIT (0x80) /* Sign bit for a A-law byte. */
#define QUANT_MASK (0xf) /* Quantization field mask. */
#define NSEGS (8) /* Number of A-law segments. */
#define SEG_SHIFT (4) /* Left shift for segment number. */
#define SEG_MASK (0x70) /* Segment field mask. */
#define BIAS (0x84) /* Bias for linear code. */
#define BIAS (0x84) /* Bias for linear code. */
/*
* alaw2linear() - Convert an A-law value to 16-bit linear PCM
*
*/
static int alaw2linear(unsigned char a_val)
static int alaw2linear(unsigned char a_val)
{
int t;
int seg;
int t;
int seg;
a_val ^= 0x55;
a_val ^= 0x55;
t = a_val & QUANT_MASK;
seg = ((unsigned)a_val & SEG_MASK) >> SEG_SHIFT;
if(seg) t= (t + t + 1 + 32) << (seg + 2);
else t= (t + t + 1 ) << 3;
t = a_val & QUANT_MASK;
seg = ((unsigned)a_val & SEG_MASK) >> SEG_SHIFT;
if(seg) t= (t + t + 1 + 32) << (seg + 2);
else t= (t + t + 1 ) << 3;
return ((a_val & SIGN_BIT) ? t : -t);
return ((a_val & SIGN_BIT) ? t : -t);
}
static int ulaw2linear(unsigned char u_val)
static int ulaw2linear(unsigned char u_val)
{
int t;
int t;
/* Complement to obtain normal u-law value. */
u_val = ~u_val;
/* Complement to obtain normal u-law value. */
u_val = ~u_val;
/*
* Extract and bias the quantization bits. Then
* shift up by the segment number and subtract out the bias.
*/
t = ((u_val & QUANT_MASK) << 3) + BIAS;
t <<= ((unsigned)u_val & SEG_MASK) >> SEG_SHIFT;
/*
* Extract and bias the quantization bits. Then
* shift up by the segment number and subtract out the bias.
*/
t = ((u_val & QUANT_MASK) << 3) + BIAS;
t <<= ((unsigned)u_val & SEG_MASK) >> SEG_SHIFT;
return ((u_val & SIGN_BIT) ? (BIAS - t) : (t - BIAS));
return ((u_val & SIGN_BIT) ? (BIAS - t) : (t - BIAS));
}
/* 16384 entries per table */
@ -209,7 +209,7 @@ static inline void encode_from16(int bps, int le, int us,
}
static int pcm_encode_frame(AVCodecContext *avctx,
unsigned char *frame, int buf_size, void *data)
unsigned char *frame, int buf_size, void *data)
{
int n, sample_size, v;
short *samples;
@ -397,8 +397,8 @@ static inline void decode_to16(int bps, int le, int us,
}
static int pcm_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
uint8_t *buf, int buf_size)
void *data, int *data_size,
uint8_t *buf, int buf_size)
{
PCMDecode *s = avctx->priv_data;
int n;
@ -509,9 +509,9 @@ AVCodec name ## _encoder = { \
CODEC_TYPE_AUDIO, \
id, \
0, \
pcm_encode_init, \
pcm_encode_frame, \
pcm_encode_close, \
pcm_encode_init, \
pcm_encode_frame, \
pcm_encode_close, \
NULL, \
}; \
AVCodec name ## _decoder = { \
@ -519,7 +519,7 @@ AVCodec name ## _decoder = { \
CODEC_TYPE_AUDIO, \
id, \
sizeof(PCMDecode), \
pcm_decode_init, \
pcm_decode_init, \
NULL, \
NULL, \
pcm_decode_frame, \

View File

@ -67,7 +67,7 @@ int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h
/*
Read unaligned pixels into our vectors. The vectors are as follows:
pix1v: pix1[0]-pix1[15]
pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16]
pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16]
*/
tv = (vector unsigned char *) pix1;
pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
@ -184,7 +184,7 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int
fact to avoid a potentially expensive unaligned read, as well
as some splitting, and vector addition each time around the loop.
Read unaligned pixels into our vectors. The vectors are as follows:
pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16]
pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16]
Split the pixel vectors into shorts
*/
tv = (vector unsigned char *) &pix2[0];
@ -204,7 +204,7 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int
/*
Read unaligned pixels into our vectors. The vectors are as follows:
pix1v: pix1[0]-pix1[15]
pix3v: pix3[0]-pix3[15] pix3iv: pix3[1]-pix3[16]
pix3v: pix3[0]-pix3[15] pix3iv: pix3[1]-pix3[16]
*/
tv = (vector unsigned char *) pix1;
pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
@ -273,7 +273,7 @@ int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
for(i=0;i<h;i++) {
/* Read potentially unaligned pixels into t1 and t2 */
/* Read potentially unaligned pixels into t1 and t2 */
perm1 = vec_lvsl(0, pix1);
pix1v = (vector unsigned char *) pix1;
perm2 = vec_lvsl(0, pix2);
@ -281,12 +281,12 @@ int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
t1 = vec_perm(pix1v[0], pix1v[1], perm1);
t2 = vec_perm(pix2v[0], pix2v[1], perm2);
/* Calculate a sum of abs differences vector */
/* Calculate a sum of abs differences vector */
t3 = vec_max(t1, t2);
t4 = vec_min(t1, t2);
t5 = vec_sub(t3, t4);
/* Add each 4 pixel group together and put 4 results into sad */
/* Add each 4 pixel group together and put 4 results into sad */
sad = vec_sum4s(t5, sad);
pix1 += line_size;
@ -316,9 +316,9 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
for(i=0;i<h;i++) {
/* Read potentially unaligned pixels into t1 and t2
Since we're reading 16 pixels, and actually only want 8,
mask out the last 8 pixels. The 0s don't change the sum. */
/* Read potentially unaligned pixels into t1 and t2
Since we're reading 16 pixels, and actually only want 8,
mask out the last 8 pixels. The 0s don't change the sum. */
perm1 = vec_lvsl(0, pix1);
pix1v = (vector unsigned char *) pix1;
perm2 = vec_lvsl(0, pix2);
@ -326,12 +326,12 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);
t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);
/* Calculate a sum of abs differences vector */
/* Calculate a sum of abs differences vector */
t3 = vec_max(t1, t2);
t4 = vec_min(t1, t2);
t5 = vec_sub(t3, t4);
/* Add each 4 pixel group together and put 4 results into sad */
/* Add each 4 pixel group together and put 4 results into sad */
sad = vec_sum4s(t5, sad);
pix1 += line_size;
@ -398,9 +398,9 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
for(i=0;i<h;i++) {
/* Read potentially unaligned pixels into t1 and t2
Since we're reading 16 pixels, and actually only want 8,
mask out the last 8 pixels. The 0s don't change the sum. */
/* Read potentially unaligned pixels into t1 and t2
Since we're reading 16 pixels, and actually only want 8,
mask out the last 8 pixels. The 0s don't change the sum. */
perm1 = vec_lvsl(0, pix1);
pix1v = (vector unsigned char *) pix1;
perm2 = vec_lvsl(0, pix2);
@ -413,7 +413,7 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
of the fact that abs(a-b)^2 = (a-b)^2.
*/
/* Calculate abs differences vector */
/* Calculate abs differences vector */
t3 = vec_max(t1, t2);
t4 = vec_min(t1, t2);
t5 = vec_sub(t3, t4);
@ -451,7 +451,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
sum = (vector unsigned int)vec_splat_u32(0);
for(i=0;i<h;i++) {
/* Read potentially unaligned pixels into t1 and t2 */
/* Read potentially unaligned pixels into t1 and t2 */
perm1 = vec_lvsl(0, pix1);
pix1v = (vector unsigned char *) pix1;
perm2 = vec_lvsl(0, pix2);
@ -464,7 +464,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
of the fact that abs(a-b)^2 = (a-b)^2.
*/
/* Calculate abs differences vector */
/* Calculate abs differences vector */
t3 = vec_max(t1, t2);
t4 = vec_min(t1, t2);
t5 = vec_sub(t3, t4);
@ -498,12 +498,12 @@ int pix_sum_altivec(uint8_t * pix, int line_size)
sad = (vector unsigned int)vec_splat_u32(0);
for (i = 0; i < 16; i++) {
/* Read the potentially unaligned 16 pixels into t1 */
/* Read the potentially unaligned 16 pixels into t1 */
perm = vec_lvsl(0, pix);
pixv = (vector unsigned char *) pix;
t1 = vec_perm(pixv[0], pixv[1], perm);
/* Add each 4 pixel group together and put 4 results into sad */
/* Add each 4 pixel group together and put 4 results into sad */
sad = vec_sum4s(t1, sad);
pix += line_size;
@ -1335,32 +1335,32 @@ POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
0x00, 0x01, 0x02, 0x03,
0x04, 0x05, 0x06, 0x07);
#define ONEITERBUTTERFLY(i, res) \
{ \
register vector unsigned char src1, src2, srcO; \
register vector unsigned char dst1, dst2, dstO; \
src1 = vec_ld(stride * i, src); \
if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8) \
src2 = vec_ld((stride * i) + 16, src); \
srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
dst1 = vec_ld(stride * i, dst); \
if ((((stride * i) + (unsigned long)dst) & 0x0000000F) > 8) \
dst2 = vec_ld((stride * i) + 16, dst); \
dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
/* promote the unsigned chars to signed shorts */ \
/* we're in the 8x8 function, we only care for the first 8 */ \
register vector signed short srcV = \
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
register vector signed short dstV = \
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
/* substractions inside the first butterfly */ \
register vector signed short but0 = vec_sub(srcV, dstV); \
register vector signed short op1 = vec_perm(but0, but0, perm1); \
register vector signed short but1 = vec_mladd(but0, vprod1, op1); \
register vector signed short op2 = vec_perm(but1, but1, perm2); \
register vector signed short but2 = vec_mladd(but1, vprod2, op2); \
register vector signed short op3 = vec_perm(but2, but2, perm3); \
res = vec_mladd(but2, vprod3, op3); \
#define ONEITERBUTTERFLY(i, res) \
{ \
register vector unsigned char src1, src2, srcO; \
register vector unsigned char dst1, dst2, dstO; \
src1 = vec_ld(stride * i, src); \
if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8) \
src2 = vec_ld((stride * i) + 16, src); \
srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
dst1 = vec_ld(stride * i, dst); \
if ((((stride * i) + (unsigned long)dst) & 0x0000000F) > 8) \
dst2 = vec_ld((stride * i) + 16, dst); \
dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
/* promote the unsigned chars to signed shorts */ \
/* we're in the 8x8 function, we only care for the first 8 */ \
register vector signed short srcV = \
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
register vector signed short dstV = \
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
/* substractions inside the first butterfly */ \
register vector signed short but0 = vec_sub(srcV, dstV); \
register vector signed short op1 = vec_perm(but0, but0, perm1); \
register vector signed short but1 = vec_mladd(but0, vprod1, op1); \
register vector signed short op2 = vec_perm(but1, but1, perm2); \
register vector signed short but2 = vec_mladd(but1, vprod2, op2); \
register vector signed short op3 = vec_perm(but2, but2, perm3); \
res = vec_mladd(but2, vprod3, op3); \
}
ONEITERBUTTERFLY(0, temp0);
ONEITERBUTTERFLY(1, temp1);
@ -1480,26 +1480,26 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
0x00, 0x01, 0x02, 0x03,
0x04, 0x05, 0x06, 0x07);
#define ONEITERBUTTERFLY(i, res1, res2) \
{ \
#define ONEITERBUTTERFLY(i, res1, res2) \
{ \
register vector unsigned char src1 asm ("v22"), src2 asm ("v23"); \
register vector unsigned char dst1 asm ("v24"), dst2 asm ("v25"); \
src1 = vec_ld(stride * i, src); \
src2 = vec_ld((stride * i) + 16, src); \
src1 = vec_ld(stride * i, src); \
src2 = vec_ld((stride * i) + 16, src); \
register vector unsigned char srcO asm ("v22") = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
dst1 = vec_ld(stride * i, dst); \
dst2 = vec_ld((stride * i) + 16, dst); \
dst1 = vec_ld(stride * i, dst); \
dst2 = vec_ld((stride * i) + 16, dst); \
register vector unsigned char dstO asm ("v23") = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
/* promote the unsigned chars to signed shorts */ \
/* promote the unsigned chars to signed shorts */ \
register vector signed short srcV asm ("v24") = \
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
register vector signed short dstV asm ("v25") = \
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
register vector signed short srcW asm ("v26") = \
(vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \
(vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \
register vector signed short dstW asm ("v27") = \
(vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \
/* substractions inside the first butterfly */ \
(vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \
/* substractions inside the first butterfly */ \
register vector signed short but0 asm ("v28") = vec_sub(srcV, dstV); \
register vector signed short but0S asm ("v29") = vec_sub(srcW, dstW); \
register vector signed short op1 asm ("v30") = vec_perm(but0, but0, perm1); \
@ -1511,9 +1511,9 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
register vector signed short op2S asm ("v27") = vec_perm(but1S, but1S, perm2); \
register vector signed short but2S asm ("v28") = vec_mladd(but1S, vprod2, op2S); \
register vector signed short op3 asm ("v29") = vec_perm(but2, but2, perm3); \
res1 = vec_mladd(but2, vprod3, op3); \
res1 = vec_mladd(but2, vprod3, op3); \
register vector signed short op3S asm ("v30") = vec_perm(but2S, but2S, perm3); \
res2 = vec_mladd(but2S, vprod3, op3S); \
res2 = vec_mladd(but2S, vprod3, op3S); \
}
ONEITERBUTTERFLY(0, temp0, temp0S);
ONEITERBUTTERFLY(1, temp1, temp1S);
@ -1623,12 +1623,12 @@ POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1);
int has_altivec(void)
{
#ifdef __AMIGAOS4__
ULONG result = 0;
extern struct ExecIFace *IExec;
ULONG result = 0;
extern struct ExecIFace *IExec;
IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE);
if (result == VECTORTYPE_ALTIVEC) return 1;
return 0;
IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE);
if (result == VECTORTYPE_ALTIVEC) return 1;
return 0;
#else /* __AMIGAOS4__ */
#ifdef CONFIG_DARWIN

View File

@ -191,33 +191,33 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint
/* from dsputil.c */
static inline void put_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
int i;
for (i = 0; i < h; i++) {
uint32_t a, b;
a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
*((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b);
a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
*((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b);
}
int i;
for (i = 0; i < h; i++) {
uint32_t a, b;
a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
*((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b);
a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
*((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b);
}
} static inline void avg_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
int i;
for (i = 0; i < h; i++) {
uint32_t a, b;
a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
*((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b));
a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
*((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b));
}
int i;
for (i = 0; i < h; i++) {
uint32_t a, b;
a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
*((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b));
a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
*((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b));
}
} static inline void put_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
} static inline void avg_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
}
/* UNIMPLEMENTED YET !! */

View File

@ -87,16 +87,16 @@ void powerpc_display_perf_report(void)
{
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
{
if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
av_log(NULL, AV_LOG_INFO,
" Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
perfname[i],
j+1,
perfdata[j][i][powerpc_data_min],
perfdata[j][i][powerpc_data_max],
(double)perfdata[j][i][powerpc_data_sum] /
(double)perfdata[j][i][powerpc_data_num],
perfdata[j][i][powerpc_data_num]);
if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
av_log(NULL, AV_LOG_INFO,
" Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
perfname[i],
j+1,
perfdata[j][i][powerpc_data_min],
perfdata[j][i][powerpc_data_max],
(double)perfdata[j][i][powerpc_data_sum] /
(double)perfdata[j][i][powerpc_data_num],
perfdata[j][i][powerpc_data_num]);
}
}
}
@ -179,7 +179,7 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
}
else
for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
}
#else
memset(blocks, 0, sizeof(DCTELEM)*6*64);
@ -284,25 +284,25 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
c->gmc1 = gmc1_altivec;
c->gmc1 = gmc1_altivec;
#ifdef CONFIG_DARWIN // ATM gcc-3.3 and gcc-3.4 fail to compile these in linux...
c->hadamard8_diff[0] = hadamard8_diff16_altivec;
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
c->hadamard8_diff[0] = hadamard8_diff16_altivec;
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
#endif
#ifdef CONFIG_ENCODERS
if (avctx->dct_algo == FF_DCT_AUTO ||
avctx->dct_algo == FF_DCT_ALTIVEC)
{
c->fdct = fdct_altivec;
}
if (avctx->dct_algo == FF_DCT_AUTO ||
avctx->dct_algo == FF_DCT_ALTIVEC)
{
c->fdct = fdct_altivec;
}
#endif //CONFIG_ENCODERS
if (avctx->lowres==0)
@ -325,14 +325,14 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
int i, j;
for (i = 0 ; i < powerpc_perf_total ; i++)
{
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
{
perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
}
}
for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
{
perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
}
}
}
#endif /* POWERPC_PERFORMANCE_REPORT */
} else

View File

@ -114,10 +114,10 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][
#define POWERPC_GET_PMC6(a) do {} while (0)
#endif
#endif /* POWERPC_MODE_64BITS */
#define POWERPC_PERF_DECLARE(a, cond) \
POWERP_PMC_DATATYPE \
pmc_start[POWERPC_NUM_PMC_ENABLED], \
pmc_stop[POWERPC_NUM_PMC_ENABLED], \
#define POWERPC_PERF_DECLARE(a, cond) \
POWERP_PMC_DATATYPE \
pmc_start[POWERPC_NUM_PMC_ENABLED], \
pmc_stop[POWERPC_NUM_PMC_ENABLED], \
pmc_loop_index;
#define POWERPC_PERF_START_COUNT(a, cond) do { \
POWERPC_GET_PMC6(pmc_start[5]); \
@ -141,8 +141,8 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][
pmc_loop_index++) \
{ \
if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \
{ \
POWERP_PMC_DATATYPE diff = \
{ \
POWERP_PMC_DATATYPE diff = \
pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \
if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \

View File

@ -65,8 +65,8 @@ void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z)
POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
int ln = s->nbits;
int j, np, np2;
int nblocks, nloops;
int j, np, np2;
int nblocks, nloops;
register FFTComplex *p, *q;
FFTComplex *exptab = s->exptab;
int l;
@ -147,8 +147,8 @@ POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
#endif
int ln = s->nbits;
int j, np, np2;
int nblocks, nloops;
int j, np, np2;
int nblocks, nloops;
register FFTComplex *p, *q;
FFTComplex *cptr, *cptr1;
int k;

View File

@ -30,31 +30,31 @@
*/
static inline vector signed char ff_vmrglb (vector signed char const A,
vector signed char const B)
vector signed char const B)
{
static const vector unsigned char lowbyte = {
0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b, 0x1b,
0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f
0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b, 0x1b,
0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f
};
return vec_perm (A, B, lowbyte);
}
static inline vector signed short ff_vmrglh (vector signed short const A,
vector signed short const B)
vector signed short const B)
{
static const vector unsigned char lowhalf = {
0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b,
0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f
0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b,
0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f
};
return vec_perm (A, B, lowhalf);
}
static inline vector signed int ff_vmrglw (vector signed int const A,
vector signed int const B)
vector signed int const B)
{
static const vector unsigned char lowword = {
0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b,
0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f
0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b,
0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f
};
return vec_perm (A, B, lowword);
}

View File

@ -51,108 +51,108 @@
#define vector_s32_t vector signed int
#define vector_u32_t vector unsigned int
#define IDCT_HALF \
/* 1st stage */ \
t1 = vec_mradds (a1, vx7, vx1 ); \
t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \
t7 = vec_mradds (a2, vx5, vx3); \
t3 = vec_mradds (ma2, vx3, vx5); \
\
/* 2nd stage */ \
t5 = vec_adds (vx0, vx4); \
t0 = vec_subs (vx0, vx4); \
t2 = vec_mradds (a0, vx6, vx2); \
t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \
t6 = vec_adds (t8, t3); \
t3 = vec_subs (t8, t3); \
t8 = vec_subs (t1, t7); \
t1 = vec_adds (t1, t7); \
\
/* 3rd stage */ \
t7 = vec_adds (t5, t2); \
t2 = vec_subs (t5, t2); \
t5 = vec_adds (t0, t4); \
t0 = vec_subs (t0, t4); \
t4 = vec_subs (t8, t3); \
t3 = vec_adds (t8, t3); \
\
/* 4th stage */ \
vy0 = vec_adds (t7, t1); \
vy7 = vec_subs (t7, t1); \
vy1 = vec_mradds (c4, t3, t5); \
vy6 = vec_mradds (mc4, t3, t5); \
vy2 = vec_mradds (c4, t4, t0); \
vy5 = vec_mradds (mc4, t4, t0); \
vy3 = vec_adds (t2, t6); \
#define IDCT_HALF \
/* 1st stage */ \
t1 = vec_mradds (a1, vx7, vx1 ); \
t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \
t7 = vec_mradds (a2, vx5, vx3); \
t3 = vec_mradds (ma2, vx3, vx5); \
\
/* 2nd stage */ \
t5 = vec_adds (vx0, vx4); \
t0 = vec_subs (vx0, vx4); \
t2 = vec_mradds (a0, vx6, vx2); \
t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \
t6 = vec_adds (t8, t3); \
t3 = vec_subs (t8, t3); \
t8 = vec_subs (t1, t7); \
t1 = vec_adds (t1, t7); \
\
/* 3rd stage */ \
t7 = vec_adds (t5, t2); \
t2 = vec_subs (t5, t2); \
t5 = vec_adds (t0, t4); \
t0 = vec_subs (t0, t4); \
t4 = vec_subs (t8, t3); \
t3 = vec_adds (t8, t3); \
\
/* 4th stage */ \
vy0 = vec_adds (t7, t1); \
vy7 = vec_subs (t7, t1); \
vy1 = vec_mradds (c4, t3, t5); \
vy6 = vec_mradds (mc4, t3, t5); \
vy2 = vec_mradds (c4, t4, t0); \
vy5 = vec_mradds (mc4, t4, t0); \
vy3 = vec_adds (t2, t6); \
vy4 = vec_subs (t2, t6);
#define IDCT \
vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \
vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \
vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \
vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \
vector_u16_t shift; \
\
c4 = vec_splat (constants[0], 0); \
a0 = vec_splat (constants[0], 1); \
a1 = vec_splat (constants[0], 2); \
a2 = vec_splat (constants[0], 3); \
mc4 = vec_splat (constants[0], 4); \
ma2 = vec_splat (constants[0], 5); \
bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3); \
\
zero = vec_splat_s16 (0); \
shift = vec_splat_u16 (4); \
\
vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \
vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero); \
vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero); \
vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero); \
vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero); \
vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero); \
vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero); \
vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero); \
\
IDCT_HALF \
\
vx0 = vec_mergeh (vy0, vy4); \
vx1 = vec_mergel (vy0, vy4); \
vx2 = vec_mergeh (vy1, vy5); \
vx3 = vec_mergel (vy1, vy5); \
vx4 = vec_mergeh (vy2, vy6); \
vx5 = vec_mergel (vy2, vy6); \
vx6 = vec_mergeh (vy3, vy7); \
vx7 = vec_mergel (vy3, vy7); \
\
vy0 = vec_mergeh (vx0, vx4); \
vy1 = vec_mergel (vx0, vx4); \
vy2 = vec_mergeh (vx1, vx5); \
vy3 = vec_mergel (vx1, vx5); \
vy4 = vec_mergeh (vx2, vx6); \
vy5 = vec_mergel (vx2, vx6); \
vy6 = vec_mergeh (vx3, vx7); \
vy7 = vec_mergel (vx3, vx7); \
\
vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \
vx1 = vec_mergel (vy0, vy4); \
vx2 = vec_mergeh (vy1, vy5); \
vx3 = vec_mergel (vy1, vy5); \
vx4 = vec_mergeh (vy2, vy6); \
vx5 = vec_mergel (vy2, vy6); \
vx6 = vec_mergeh (vy3, vy7); \
vx7 = vec_mergel (vy3, vy7); \
\
IDCT_HALF \
\
shift = vec_splat_u16 (6); \
vx0 = vec_sra (vy0, shift); \
vx1 = vec_sra (vy1, shift); \
vx2 = vec_sra (vy2, shift); \
vx3 = vec_sra (vy3, shift); \
vx4 = vec_sra (vy4, shift); \
vx5 = vec_sra (vy5, shift); \
vx6 = vec_sra (vy6, shift); \
#define IDCT \
vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \
vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \
vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \
vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \
vector_u16_t shift; \
\
c4 = vec_splat (constants[0], 0); \
a0 = vec_splat (constants[0], 1); \
a1 = vec_splat (constants[0], 2); \
a2 = vec_splat (constants[0], 3); \
mc4 = vec_splat (constants[0], 4); \
ma2 = vec_splat (constants[0], 5); \
bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3); \
\
zero = vec_splat_s16 (0); \
shift = vec_splat_u16 (4); \
\
vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \
vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero); \
vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero); \
vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero); \
vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero); \
vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero); \
vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero); \
vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero); \
\
IDCT_HALF \
\
vx0 = vec_mergeh (vy0, vy4); \
vx1 = vec_mergel (vy0, vy4); \
vx2 = vec_mergeh (vy1, vy5); \
vx3 = vec_mergel (vy1, vy5); \
vx4 = vec_mergeh (vy2, vy6); \
vx5 = vec_mergel (vy2, vy6); \
vx6 = vec_mergeh (vy3, vy7); \
vx7 = vec_mergel (vy3, vy7); \
\
vy0 = vec_mergeh (vx0, vx4); \
vy1 = vec_mergel (vx0, vx4); \
vy2 = vec_mergeh (vx1, vx5); \
vy3 = vec_mergel (vx1, vx5); \
vy4 = vec_mergeh (vx2, vx6); \
vy5 = vec_mergel (vx2, vx6); \
vy6 = vec_mergeh (vx3, vx7); \
vy7 = vec_mergel (vx3, vx7); \
\
vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \
vx1 = vec_mergel (vy0, vy4); \
vx2 = vec_mergeh (vy1, vy5); \
vx3 = vec_mergel (vy1, vy5); \
vx4 = vec_mergeh (vy2, vy6); \
vx5 = vec_mergel (vy2, vy6); \
vx6 = vec_mergeh (vy3, vy7); \
vx7 = vec_mergel (vy3, vy7); \
\
IDCT_HALF \
\
shift = vec_splat_u16 (6); \
vx0 = vec_sra (vy0, shift); \
vx1 = vec_sra (vy1, shift); \
vx2 = vec_sra (vy2, shift); \
vx3 = vec_sra (vy3, shift); \
vx4 = vec_sra (vy4, shift); \
vx5 = vec_sra (vy5, shift); \
vx6 = vec_sra (vy6, shift); \
vx7 = vec_sra (vy7, shift);
@ -180,18 +180,18 @@ POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
#endif
IDCT
#define COPY(dest,src) \
tmp = vec_packsu (src, src); \
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \
#define COPY(dest,src) \
tmp = vec_packsu (src, src); \
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
COPY (dest, vx0) dest += stride;
COPY (dest, vx1) dest += stride;
COPY (dest, vx2) dest += stride;
COPY (dest, vx3) dest += stride;
COPY (dest, vx4) dest += stride;
COPY (dest, vx5) dest += stride;
COPY (dest, vx6) dest += stride;
COPY (dest, vx0) dest += stride;
COPY (dest, vx1) dest += stride;
COPY (dest, vx2) dest += stride;
COPY (dest, vx3) dest += stride;
COPY (dest, vx4) dest += stride;
COPY (dest, vx5) dest += stride;
COPY (dest, vx6) dest += stride;
COPY (dest, vx7)
POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
@ -225,22 +225,22 @@ POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
perm0 = vec_mergeh (p, p0);
perm1 = vec_mergeh (p, p1);
#define ADD(dest,src,perm) \
/* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \
tmp = vec_ld (0, dest); \
tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \
tmp3 = vec_adds (tmp2, src); \
tmp = vec_packsu (tmp3, tmp3); \
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \
#define ADD(dest,src,perm) \
/* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \
tmp = vec_ld (0, dest); \
tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \
tmp3 = vec_adds (tmp2, src); \
tmp = vec_packsu (tmp3, tmp3); \
vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \
vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
ADD (dest, vx0, perm0) dest += stride;
ADD (dest, vx1, perm1) dest += stride;
ADD (dest, vx2, perm0) dest += stride;
ADD (dest, vx3, perm1) dest += stride;
ADD (dest, vx4, perm0) dest += stride;
ADD (dest, vx5, perm1) dest += stride;
ADD (dest, vx6, perm0) dest += stride;
ADD (dest, vx0, perm0) dest += stride;
ADD (dest, vx1, perm1) dest += stride;
ADD (dest, vx2, perm0) dest += stride;
ADD (dest, vx3, perm1) dest += stride;
ADD (dest, vx4, perm0) dest += stride;
ADD (dest, vx5, perm1) dest += stride;
ADD (dest, vx6, perm0) dest += stride;
ADD (dest, vx7, perm1)
POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);

View File

@ -152,9 +152,9 @@ int dct_quantize_altivec(MpegEncContext* s,
}
// The following block could exist as a separate an altivec dct
// function. However, if we put it inline, the DCT data can remain
// in the vector local variables, as floats, which we'll use during the
// quantize step...
// function. However, if we put it inline, the DCT data can remain
// in the vector local variables, as floats, which we'll use during the
// quantize step...
{
const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f);
const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f);
@ -206,11 +206,11 @@ int dct_quantize_altivec(MpegEncContext* s,
z1 = vec_madd(vec_add(tmp12, tmp13), vec_0_541196100, (vector float)zero);
// dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
// CONST_BITS-PASS1_BITS);
// CONST_BITS-PASS1_BITS);
row2 = vec_madd(tmp13, vec_0_765366865, z1);
// dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
// CONST_BITS-PASS1_BITS);
// CONST_BITS-PASS1_BITS);
row6 = vec_madd(tmp12, vec_1_847759065, z1);
z1 = vec_add(tmp4, tmp7); // z1 = tmp4 + tmp7;
@ -315,7 +315,7 @@ int dct_quantize_altivec(MpegEncContext* s,
}
// Load the bias vector (We add 0.5 to the bias so that we're
// rounding when we convert to int, instead of flooring.)
// rounding when we convert to int, instead of flooring.)
{
vector signed int biasInt;
const vector float negOneFloat = (vector float)FOUROF(-1.0f);

View File

@ -80,7 +80,7 @@ static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size)
"pextlb $10, $0, $10 \n\t"
"sq $10, 80(%1) \n\t"
"pextlb $8, $0, $8 \n\t"
"sq $8, 96(%1) \n\t"
"sq $8, 96(%1) \n\t"
"pextlb $9, $0, $9 \n\t"
"sq $9, 112(%1) \n\t"
".set pop \n\t"
@ -112,7 +112,7 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz
asm volatile (
".set push \n\t"
".set mips3 \n\t"
"1: \n\t"
"1: \n\t"
"ldr $8, 0(%1) \n\t"
"add $11, %1, %3 \n\t"
"ldl $8, 7(%1) \n\t"
@ -133,7 +133,7 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz
"bgtz %2, 1b \n\t"
".set pop \n\t"
: "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size)
: "$8", "$9", "$10", "$11", "$12", "$13", "memory" );
: "$8", "$9", "$10", "$11", "$12", "$13", "memory" );
}

View File

@ -15,32 +15,32 @@
#include "../dsputil.h"
#include "mmi.h"
#define BITS_INV_ACC 5 // 4 or 5 for IEEE
#define SHIFT_INV_ROW (16 - BITS_INV_ACC)
#define BITS_INV_ACC 5 // 4 or 5 for IEEE
#define SHIFT_INV_ROW (16 - BITS_INV_ACC)
#define SHIFT_INV_COL (1 + BITS_INV_ACC)
#define TG1 6518
#define TG2 13573
#define TG3 21895
#define CS4 23170
#define TG1 6518
#define TG2 13573
#define TG3 21895
#define CS4 23170
#define ROUNDER_0 0
#define ROUNDER_1 16
#define ROUNDER_0 0
#define ROUNDER_1 16
#define TAB_i_04 (32+0)
#define TAB_i_17 (32+64)
#define TAB_i_26 (32+128)
#define TAB_i_35 (32+192)
#define TAB_i_04 (32+0)
#define TAB_i_17 (32+64)
#define TAB_i_26 (32+128)
#define TAB_i_35 (32+192)
#define TG_1_16 (32+256+0)
#define TG_2_16 (32+256+16)
#define TG_3_16 (32+256+32)
#define COS_4_16 (32+256+48)
#define TG_1_16 (32+256+0)
#define TG_2_16 (32+256+16)
#define TG_3_16 (32+256+32)
#define COS_4_16 (32+256+48)
#define CLIPMAX (32+256+64+0)
#define CLIPMAX (32+256+64+0)
static short consttable[] align16 = {
/* rounder 0*/ // assume SHIFT_INV_ROW == 11
/* rounder 0*/ // assume SHIFT_INV_ROW == 11
0x3ff, 1, 0x3ff, 1, 0x3ff, 1, 0x3ff, 1,
/* rounder 1*/
0x3ff, 0, 0x3ff, 0, 0x3ff, 0, 0x3ff, 0,
@ -75,274 +75,274 @@ static short consttable[] align16 = {
#define DCT_8_INV_ROW1(blk, rowoff, taboff, rnd, outreg) { \
lq(blk, rowoff, $16); /* r16 = x7 x5 x3 x1 x6 x4 x2 x0 */ \
/*slot*/ \
lq($24, 0+taboff, $17); /* r17 = w */ \
/*delay slot $16*/ \
lq($24, 16+taboff, $18);/* r18 = w */ \
prevh($16, $2); /* r2 = x1 x3 x5 x7 x0 x2 x4 x6 */ \
lq($24, 32+taboff, $19);/* r19 = w */ \
phmadh($17, $16, $17); /* r17 = b1"b0'a1"a0' */ \
lq($24, 48+taboff, $20);/* r20 = w */ \
phmadh($18, $2, $18); /* r18 = b1'b0"a1'a0" */ \
phmadh($19, $16, $19); /* r19 = b3"b2'a3"a2' */ \
phmadh($20, $2, $20); /* r20 = b3'b2"a3'a2" */ \
paddw($17, $18, $17); /* r17 = (b1)(b0)(a1)(a0) */ \
paddw($19, $20, $19); /* r19 = (b3)(b2)(a3)(a2) */ \
pcpyld($19, $17, $18); /* r18 = (a3)(a2)(a1)(a0) */ \
pcpyud($17, $19, $20); /* r20 = (b3)(b2)(b1)(b0) */ \
paddw($18, rnd, $18); /* r18 = (a3)(a2)(a1)(a0) */\
paddw($18, $20, $17); /* r17 = ()()()(a0+b0) */ \
psubw($18, $20, $20); /* r20 = ()()()(a0-b0) */ \
psraw($17, SHIFT_INV_ROW, $17); /* r17 = (y3 y2 y1 y0) */ \
psraw($20, SHIFT_INV_ROW, $20); /* r20 = (y4 y5 y6 y7) */ \
ppach($20, $17, outreg);/* out = y4 y5 y6 y7 y3 y2 y1 y0 Note order */ \
lq(blk, rowoff, $16); /* r16 = x7 x5 x3 x1 x6 x4 x2 x0 */ \
/*slot*/ \
lq($24, 0+taboff, $17); /* r17 = w */ \
/*delay slot $16*/ \
lq($24, 16+taboff, $18);/* r18 = w */ \
prevh($16, $2); /* r2 = x1 x3 x5 x7 x0 x2 x4 x6 */ \
lq($24, 32+taboff, $19);/* r19 = w */ \
phmadh($17, $16, $17); /* r17 = b1"b0'a1"a0' */ \
lq($24, 48+taboff, $20);/* r20 = w */ \
phmadh($18, $2, $18); /* r18 = b1'b0"a1'a0" */ \
phmadh($19, $16, $19); /* r19 = b3"b2'a3"a2' */ \
phmadh($20, $2, $20); /* r20 = b3'b2"a3'a2" */ \
paddw($17, $18, $17); /* r17 = (b1)(b0)(a1)(a0) */ \
paddw($19, $20, $19); /* r19 = (b3)(b2)(a3)(a2) */ \
pcpyld($19, $17, $18); /* r18 = (a3)(a2)(a1)(a0) */ \
pcpyud($17, $19, $20); /* r20 = (b3)(b2)(b1)(b0) */ \
paddw($18, rnd, $18); /* r18 = (a3)(a2)(a1)(a0) */\
paddw($18, $20, $17); /* r17 = ()()()(a0+b0) */ \
psubw($18, $20, $20); /* r20 = ()()()(a0-b0) */ \
psraw($17, SHIFT_INV_ROW, $17); /* r17 = (y3 y2 y1 y0) */ \
psraw($20, SHIFT_INV_ROW, $20); /* r20 = (y4 y5 y6 y7) */ \
ppach($20, $17, outreg);/* out = y4 y5 y6 y7 y3 y2 y1 y0 Note order */ \
\
prevh(outreg, $2); \
pcpyud($2, $2, $2); \
pcpyld($2, outreg, outreg); \
prevh(outreg, $2); \
pcpyud($2, $2, $2); \
pcpyld($2, outreg, outreg); \
}
#define DCT_8_INV_COL8() \
\
lq($24, TG_3_16, $2); /* r2 = tn3 */ \
lq($24, TG_3_16, $2); /* r2 = tn3 */ \
\
pmulth($11, $2, $17); /* r17 = x3 * tn3 (6420) */ \
psraw($17, 15, $17); \
pmfhl_uw($3); /* r3 = 7531 */ \
psraw($3, 15, $3); \
pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \
psubh($17, $13, $17); /* r17 = tm35 */ \
pmulth($11, $2, $17); /* r17 = x3 * tn3 (6420) */ \
psraw($17, 15, $17); \
pmfhl_uw($3); /* r3 = 7531 */ \
psraw($3, 15, $3); \
pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \
psubh($17, $13, $17); /* r17 = tm35 */ \
\
pmulth($13, $2, $18); /* r18 = x5 * tn3 (6420) */ \
psraw($18, 15, $18); \
pmfhl_uw($3); /* r3 = 7531 */ \
psraw($3, 15, $3); \
pinteh($3, $18, $18); /* r18 = x5 * tn3 */ \
paddh($18, $11, $18); /* r18 = tp35 */ \
pmulth($13, $2, $18); /* r18 = x5 * tn3 (6420) */ \
psraw($18, 15, $18); \
pmfhl_uw($3); /* r3 = 7531 */ \
psraw($3, 15, $3); \
pinteh($3, $18, $18); /* r18 = x5 * tn3 */ \
paddh($18, $11, $18); /* r18 = tp35 */ \
\
lq($24, TG_1_16, $2); /* r2 = tn1 */ \
lq($24, TG_1_16, $2); /* r2 = tn1 */ \
\
pmulth($15, $2, $19); /* r19 = x7 * tn1 (6420) */ \
psraw($19, 15, $19); \
pmfhl_uw($3); /* r3 = 7531 */ \
psraw($3, 15, $3); \
pinteh($3, $19, $19); /* r19 = x7 * tn1 */ \
paddh($19, $9, $19); /* r19 = tp17 */ \
pmulth($15, $2, $19); /* r19 = x7 * tn1 (6420) */ \
psraw($19, 15, $19); \
pmfhl_uw($3); /* r3 = 7531 */ \
psraw($3, 15, $3); \
pinteh($3, $19, $19); /* r19 = x7 * tn1 */ \
paddh($19, $9, $19); /* r19 = tp17 */ \
\
pmulth($9, $2, $20); /* r20 = x1 * tn1 (6420) */ \
psraw($20, 15, $20); \
pmfhl_uw($3); /* r3 = 7531 */ \
psraw($3, 15, $3); \
pinteh($3, $20, $20); /* r20 = x1 * tn1 */ \
psubh($20, $15, $20); /* r20 = tm17 */ \
pmulth($9, $2, $20); /* r20 = x1 * tn1 (6420) */ \
psraw($20, 15, $20); \
pmfhl_uw($3); /* r3 = 7531 */ \
psraw($3, 15, $3); \
pinteh($3, $20, $20); /* r20 = x1 * tn1 */ \
psubh($20, $15, $20); /* r20 = tm17 */ \
\
psubh($19, $18, $3); /* r3 = t1 */ \
paddh($20, $17, $16); /* r16 = t2 */ \
psubh($20, $17, $23); /* r23 = b3 */ \
paddh($19, $18, $20); /* r20 = b0 */ \
psubh($19, $18, $3); /* r3 = t1 */ \
paddh($20, $17, $16); /* r16 = t2 */ \
psubh($20, $17, $23); /* r23 = b3 */ \
paddh($19, $18, $20); /* r20 = b0 */ \
\
lq($24, COS_4_16, $2); /* r2 = cs4 */ \
lq($24, COS_4_16, $2); /* r2 = cs4 */ \
\
paddh($3, $16, $21); /* r21 = t1+t2 */ \
psubh($3, $16, $22); /* r22 = t1-t2 */ \
paddh($3, $16, $21); /* r21 = t1+t2 */ \
psubh($3, $16, $22); /* r22 = t1-t2 */ \
\
pmulth($21, $2, $21); /* r21 = cs4 * (t1+t2) 6420 */ \
psraw($21, 15, $21); \
pmfhl_uw($3); /* r3 = 7531 */ \
psraw($3, 15, $3); \
pinteh($3, $21, $21); /* r21 = b1 */ \
pmulth($21, $2, $21); /* r21 = cs4 * (t1+t2) 6420 */ \
psraw($21, 15, $21); \
pmfhl_uw($3); /* r3 = 7531 */ \
psraw($3, 15, $3); \
pinteh($3, $21, $21); /* r21 = b1 */ \
\
pmulth($22, $2, $22); /* r22 = cs4 * (t1-t2) 6420 */ \
psraw($22, 15, $22); \
pmfhl_uw($3); /* r3 = 7531 */ \
psraw($3, 15, $3); \
pinteh($3, $22, $22); /* r22 = b2 */ \
pmulth($22, $2, $22); /* r22 = cs4 * (t1-t2) 6420 */ \
psraw($22, 15, $22); \
pmfhl_uw($3); /* r3 = 7531 */ \
psraw($3, 15, $3); \
pinteh($3, $22, $22); /* r22 = b2 */ \
\
lq($24, TG_2_16, $2); /* r2 = tn2 */ \
lq($24, TG_2_16, $2); /* r2 = tn2 */ \
\
pmulth($10, $2, $17); /* r17 = x2 * tn2 (6420) */ \
psraw($17, 15, $17); \
pmfhl_uw($3); /* r3 = 7531 */ \
psraw($3, 15, $3); \
pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \
psubh($17, $14, $17); /* r17 = tm26 */ \
pmulth($10, $2, $17); /* r17 = x2 * tn2 (6420) */ \
psraw($17, 15, $17); \
pmfhl_uw($3); /* r3 = 7531 */ \
psraw($3, 15, $3); \
pinteh($3, $17, $17); /* r17 = x3 * tn3 */ \
psubh($17, $14, $17); /* r17 = tm26 */ \
\
pmulth($14, $2, $18); /* r18 = x6 * tn2 (6420) */ \
psraw($18, 15, $18); \
pmfhl_uw($3); /* r3 = 7531 */ \
psraw($3, 15, $3); \
pinteh($3, $18, $18); /* r18 = x6 * tn2 */ \
paddh($18, $10, $18); /* r18 = tp26 */ \
pmulth($14, $2, $18); /* r18 = x6 * tn2 (6420) */ \
psraw($18, 15, $18); \
pmfhl_uw($3); /* r3 = 7531 */ \
psraw($3, 15, $3); \
pinteh($3, $18, $18); /* r18 = x6 * tn2 */ \
paddh($18, $10, $18); /* r18 = tp26 */ \
\
paddh($8, $12, $2); /* r2 = tp04 */ \
psubh($8, $12, $3); /* r3 = tm04 */ \
paddh($8, $12, $2); /* r2 = tp04 */ \
psubh($8, $12, $3); /* r3 = tm04 */ \
\
paddh($2, $18, $16); /* r16 = a0 */ \
psubh($2, $18, $19); /* r19 = a3 */ \
psubh($3, $17, $18); /* r18 = a2 */ \
paddh($3, $17, $17); /* r17 = a1 */
paddh($2, $18, $16); /* r16 = a0 */ \
psubh($2, $18, $19); /* r19 = a3 */ \
psubh($3, $17, $18); /* r18 = a2 */ \
paddh($3, $17, $17); /* r17 = a1 */
#define DCT_8_INV_COL8_STORE(blk) \
\
paddh($16, $20, $2); /* y0 a0+b0 */ \
psubh($16, $20, $16); /* y7 a0-b0 */ \
psrah($2, SHIFT_INV_COL, $2); \
psrah($16, SHIFT_INV_COL, $16); \
sq($2, 0, blk); \
sq($16, 112, blk); \
paddh($16, $20, $2); /* y0 a0+b0 */ \
psubh($16, $20, $16); /* y7 a0-b0 */ \
psrah($2, SHIFT_INV_COL, $2); \
psrah($16, SHIFT_INV_COL, $16); \
sq($2, 0, blk); \
sq($16, 112, blk); \
\
paddh($17, $21, $3); /* y1 a1+b1 */ \
psubh($17, $21, $17); /* y6 a1-b1 */ \
psrah($3, SHIFT_INV_COL, $3); \
psrah($17, SHIFT_INV_COL, $17); \
sq($3, 16, blk); \
sq($17, 96, blk); \
paddh($17, $21, $3); /* y1 a1+b1 */ \
psubh($17, $21, $17); /* y6 a1-b1 */ \
psrah($3, SHIFT_INV_COL, $3); \
psrah($17, SHIFT_INV_COL, $17); \
sq($3, 16, blk); \
sq($17, 96, blk); \
\
paddh($18, $22, $2); /* y2 a2+b2 */ \
psubh($18, $22, $18); /* y5 a2-b2 */ \
psrah($2, SHIFT_INV_COL, $2); \
psrah($18, SHIFT_INV_COL, $18); \
sq($2, 32, blk); \
sq($18, 80, blk); \
paddh($18, $22, $2); /* y2 a2+b2 */ \
psubh($18, $22, $18); /* y5 a2-b2 */ \
psrah($2, SHIFT_INV_COL, $2); \
psrah($18, SHIFT_INV_COL, $18); \
sq($2, 32, blk); \
sq($18, 80, blk); \
\
paddh($19, $23, $3); /* y3 a3+b3 */ \
psubh($19, $23, $19); /* y4 a3-b3 */ \
psrah($3, SHIFT_INV_COL, $3); \
psrah($19, SHIFT_INV_COL, $19); \
sq($3, 48, blk); \
sq($19, 64, blk);
paddh($19, $23, $3); /* y3 a3+b3 */ \
psubh($19, $23, $19); /* y4 a3-b3 */ \
psrah($3, SHIFT_INV_COL, $3); \
psrah($19, SHIFT_INV_COL, $19); \
sq($3, 48, blk); \
sq($19, 64, blk);
#define DCT_8_INV_COL8_PMS() \
paddh($16, $20, $2); /* y0 a0+b0 */ \
psubh($16, $20, $20); /* y7 a0-b0 */ \
psrah($2, SHIFT_INV_COL, $16); \
psrah($20, SHIFT_INV_COL, $20); \
paddh($16, $20, $2); /* y0 a0+b0 */ \
psubh($16, $20, $20); /* y7 a0-b0 */ \
psrah($2, SHIFT_INV_COL, $16); \
psrah($20, SHIFT_INV_COL, $20); \
\
paddh($17, $21, $3); /* y1 a1+b1 */ \
psubh($17, $21, $21); /* y6 a1-b1 */ \
psrah($3, SHIFT_INV_COL, $17); \
psrah($21, SHIFT_INV_COL, $21); \
paddh($17, $21, $3); /* y1 a1+b1 */ \
psubh($17, $21, $21); /* y6 a1-b1 */ \
psrah($3, SHIFT_INV_COL, $17); \
psrah($21, SHIFT_INV_COL, $21); \
\
paddh($18, $22, $2); /* y2 a2+b2 */ \
psubh($18, $22, $22); /* y5 a2-b2 */ \
psrah($2, SHIFT_INV_COL, $18); \
psrah($22, SHIFT_INV_COL, $22); \
paddh($18, $22, $2); /* y2 a2+b2 */ \
psubh($18, $22, $22); /* y5 a2-b2 */ \
psrah($2, SHIFT_INV_COL, $18); \
psrah($22, SHIFT_INV_COL, $22); \
\
paddh($19, $23, $3); /* y3 a3+b3 */ \
psubh($19, $23, $23); /* y4 a3-b3 */ \
psrah($3, SHIFT_INV_COL, $19); \
psrah($23, SHIFT_INV_COL, $23);
paddh($19, $23, $3); /* y3 a3+b3 */ \
psubh($19, $23, $23); /* y4 a3-b3 */ \
psrah($3, SHIFT_INV_COL, $19); \
psrah($23, SHIFT_INV_COL, $23);
#define PUT(rs) \
pminh(rs, $11, $2); \
pmaxh($2, $0, $2); \
ppacb($0, $2, $2); \
sd3(2, 0, 4); \
__asm__ __volatile__ ("add $4, $5, $4");
#define PUT(rs) \
pminh(rs, $11, $2); \
pmaxh($2, $0, $2); \
ppacb($0, $2, $2); \
sd3(2, 0, 4); \
__asm__ __volatile__ ("add $4, $5, $4");
#define DCT_8_INV_COL8_PUT() \
PUT($16); \
PUT($17); \
PUT($18); \
PUT($19); \
PUT($23); \
PUT($22); \
PUT($21); \
PUT($20);
PUT($16); \
PUT($17); \
PUT($18); \
PUT($19); \
PUT($23); \
PUT($22); \
PUT($21); \
PUT($20);
#define ADD(rs) \
ld3(4, 0, 2); \
pextlb($0, $2, $2); \
paddh($2, rs, $2); \
pminh($2, $11, $2); \
pmaxh($2, $0, $2); \
ppacb($0, $2, $2); \
sd3(2, 0, 4); \
__asm__ __volatile__ ("add $4, $5, $4");
#define ADD(rs) \
ld3(4, 0, 2); \
pextlb($0, $2, $2); \
paddh($2, rs, $2); \
pminh($2, $11, $2); \
pmaxh($2, $0, $2); \
ppacb($0, $2, $2); \
sd3(2, 0, 4); \
__asm__ __volatile__ ("add $4, $5, $4");
/*fixme: schedule*/
#define DCT_8_INV_COL8_ADD() \
ADD($16); \
ADD($17); \
ADD($18); \
ADD($19); \
ADD($23); \
ADD($22); \
ADD($21); \
ADD($20);
ADD($16); \
ADD($17); \
ADD($18); \
ADD($19); \
ADD($23); \
ADD($22); \
ADD($21); \
ADD($20);
void ff_mmi_idct(int16_t * block)
{
/* $4 = block */
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
lq($24, ROUNDER_0, $8);
lq($24, ROUNDER_1, $7);
DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8);
DCT_8_INV_ROW1($4, 16, TAB_i_17, $7, $9);
DCT_8_INV_ROW1($4, 32, TAB_i_26, $7, $10);
DCT_8_INV_ROW1($4, 48, TAB_i_35, $7, $11);
DCT_8_INV_ROW1($4, 64, TAB_i_04, $7, $12);
DCT_8_INV_ROW1($4, 80, TAB_i_35, $7, $13);
DCT_8_INV_ROW1($4, 96, TAB_i_26, $7, $14);
DCT_8_INV_ROW1($4, 112, TAB_i_17, $7, $15);
DCT_8_INV_COL8();
DCT_8_INV_COL8_STORE($4);
/* $4 = block */
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
lq($24, ROUNDER_0, $8);
lq($24, ROUNDER_1, $7);
DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8);
DCT_8_INV_ROW1($4, 16, TAB_i_17, $7, $9);
DCT_8_INV_ROW1($4, 32, TAB_i_26, $7, $10);
DCT_8_INV_ROW1($4, 48, TAB_i_35, $7, $11);
DCT_8_INV_ROW1($4, 64, TAB_i_04, $7, $12);
DCT_8_INV_ROW1($4, 80, TAB_i_35, $7, $13);
DCT_8_INV_ROW1($4, 96, TAB_i_26, $7, $14);
DCT_8_INV_ROW1($4, 112, TAB_i_17, $7, $15);
DCT_8_INV_COL8();
DCT_8_INV_COL8_STORE($4);
//let savedtemp regs be saved
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
//let savedtemp regs be saved
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
}
void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
{
/* $4 = dest, $5 = line_size, $6 = block */
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
lq($24, ROUNDER_0, $8);
lq($24, ROUNDER_1, $7);
DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
DCT_8_INV_COL8();
lq($24, CLIPMAX, $11);
DCT_8_INV_COL8_PMS();
DCT_8_INV_COL8_PUT();
/* $4 = dest, $5 = line_size, $6 = block */
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
lq($24, ROUNDER_0, $8);
lq($24, ROUNDER_1, $7);
DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
DCT_8_INV_COL8();
lq($24, CLIPMAX, $11);
DCT_8_INV_COL8_PMS();
DCT_8_INV_COL8_PUT();
//let savedtemp regs be saved
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
//let savedtemp regs be saved
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
}
void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
{
/* $4 = dest, $5 = line_size, $6 = block */
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
lq($24, ROUNDER_0, $8);
lq($24, ROUNDER_1, $7);
DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
DCT_8_INV_COL8();
lq($24, CLIPMAX, $11);
DCT_8_INV_COL8_PMS();
DCT_8_INV_COL8_ADD();
/* $4 = dest, $5 = line_size, $6 = block */
__asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
lq($24, ROUNDER_0, $8);
lq($24, ROUNDER_1, $7);
DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
DCT_8_INV_COL8();
lq($24, CLIPMAX, $11);
DCT_8_INV_COL8_PMS();
DCT_8_INV_COL8_ADD();
//let savedtemp regs be saved
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
//let savedtemp regs be saved
__asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
}

View File

@ -5,148 +5,148 @@
/*
#define r0 $zero
#define r1 $at //assembler!
#define r2 $v0 //return
#define r3 $v1 //return
#define r4 $a0 //arg
#define r5 $a1 //arg
#define r6 $a2 //arg
#define r7 $a3 //arg
#define r8 $t0 //temp
#define r9 $t1 //temp
#define r10 $t2 //temp
#define r11 $t3 //temp
#define r12 $t4 //temp
#define r13 $t5 //temp
#define r14 $t6 //temp
#define r15 $t7 //temp
#define r16 $s0 //saved temp
#define r17 $s1 //saved temp
#define r18 $s2 //saved temp
#define r19 $s3 //saved temp
#define r20 $s4 //saved temp
#define r21 $s5 //saved temp
#define r22 $s6 //saved temp
#define r23 $s7 //saved temp
#define r24 $t8 //temp
#define r25 $t9 //temp
#define r26 $k0 //kernel
#define r27 $k1 //kernel
#define r28 $gp //global ptr
#define r29 $sp //stack ptr
#define r30 $fp //frame ptr
#define r31 $ra //return addr
#define r1 $at //assembler!
#define r2 $v0 //return
#define r3 $v1 //return
#define r4 $a0 //arg
#define r5 $a1 //arg
#define r6 $a2 //arg
#define r7 $a3 //arg
#define r8 $t0 //temp
#define r9 $t1 //temp
#define r10 $t2 //temp
#define r11 $t3 //temp
#define r12 $t4 //temp
#define r13 $t5 //temp
#define r14 $t6 //temp
#define r15 $t7 //temp
#define r16 $s0 //saved temp
#define r17 $s1 //saved temp
#define r18 $s2 //saved temp
#define r19 $s3 //saved temp
#define r20 $s4 //saved temp
#define r21 $s5 //saved temp
#define r22 $s6 //saved temp
#define r23 $s7 //saved temp
#define r24 $t8 //temp
#define r25 $t9 //temp
#define r26 $k0 //kernel
#define r27 $k1 //kernel
#define r28 $gp //global ptr
#define r29 $sp //stack ptr
#define r30 $fp //frame ptr
#define r31 $ra //return addr
*/
#define lq(base, off, reg) \
__asm__ __volatile__ ("lq " #reg ", %0("#base ")" : : "i" (off) )
#define lq(base, off, reg) \
__asm__ __volatile__ ("lq " #reg ", %0("#base ")" : : "i" (off) )
#define lq2(mem, reg) \
__asm__ __volatile__ ("lq " #reg ", %0" : : "r" (mem))
#define lq2(mem, reg) \
__asm__ __volatile__ ("lq " #reg ", %0" : : "r" (mem))
#define sq(reg, off, base) \
__asm__ __volatile__ ("sq " #reg ", %0("#base ")" : : "i" (off) )
#define sq(reg, off, base) \
__asm__ __volatile__ ("sq " #reg ", %0("#base ")" : : "i" (off) )
/*
#define ld(base, off, reg) \
__asm__ __volatile__ ("ld " #reg ", " #off "("#base ")")
#define ld(base, off, reg) \
__asm__ __volatile__ ("ld " #reg ", " #off "("#base ")")
*/
#define ld3(base, off, reg) \
__asm__ __volatile__ (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off)))
#define ld3(base, off, reg) \
__asm__ __volatile__ (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off)))
#define ldr3(base, off, reg) \
__asm__ __volatile__ (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off)))
#define ldr3(base, off, reg) \
__asm__ __volatile__ (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off)))
#define ldl3(base, off, reg) \
__asm__ __volatile__ (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off)))
#define ldl3(base, off, reg) \
__asm__ __volatile__ (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off)))
/*
#define sd(reg, off, base) \
__asm__ __volatile__ ("sd " #reg ", " #off "("#base ")")
#define sd(reg, off, base) \
__asm__ __volatile__ ("sd " #reg ", " #off "("#base ")")
*/
//seems assembler has bug encoding mnemonic 'sd', so DIY
#define sd3(reg, off, base) \
__asm__ __volatile__ (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off)))
#define sd3(reg, off, base) \
__asm__ __volatile__ (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off)))
#define sw(reg, off, base) \
__asm__ __volatile__ ("sw " #reg ", " #off "("#base ")")
#define sw(reg, off, base) \
__asm__ __volatile__ ("sw " #reg ", " #off "("#base ")")
#define sq2(reg, mem) \
__asm__ __volatile__ ("sq " #reg ", %0" : : "m" (*(mem)))
#define sq2(reg, mem) \
__asm__ __volatile__ ("sq " #reg ", %0" : : "m" (*(mem)))
#define pinth(rs, rt, rd) \
__asm__ __volatile__ ("pinth " #rd ", " #rs ", " #rt )
#define pinth(rs, rt, rd) \
__asm__ __volatile__ ("pinth " #rd ", " #rs ", " #rt )
#define phmadh(rs, rt, rd) \
__asm__ __volatile__ ("phmadh " #rd ", " #rs ", " #rt )
#define phmadh(rs, rt, rd) \
__asm__ __volatile__ ("phmadh " #rd ", " #rs ", " #rt )
#define pcpyud(rs, rt, rd) \
__asm__ __volatile__ ("pcpyud " #rd ", " #rs ", " #rt )
#define pcpyud(rs, rt, rd) \
__asm__ __volatile__ ("pcpyud " #rd ", " #rs ", " #rt )
#define pcpyld(rs, rt, rd) \
__asm__ __volatile__ ("pcpyld " #rd ", " #rs ", " #rt )
#define pcpyld(rs, rt, rd) \
__asm__ __volatile__ ("pcpyld " #rd ", " #rs ", " #rt )
#define pcpyh(rt, rd) \
__asm__ __volatile__ ("pcpyh " #rd ", " #rt )
#define pcpyh(rt, rd) \
__asm__ __volatile__ ("pcpyh " #rd ", " #rt )
#define paddw(rs, rt, rd) \
__asm__ __volatile__ ("paddw " #rd ", " #rs ", " #rt )
#define paddw(rs, rt, rd) \
__asm__ __volatile__ ("paddw " #rd ", " #rs ", " #rt )
#define pextlw(rs, rt, rd) \
__asm__ __volatile__ ("pextlw " #rd ", " #rs ", " #rt )
#define pextlw(rs, rt, rd) \
__asm__ __volatile__ ("pextlw " #rd ", " #rs ", " #rt )
#define pextuw(rs, rt, rd) \
__asm__ __volatile__ ("pextuw " #rd ", " #rs ", " #rt )
#define pextuw(rs, rt, rd) \
__asm__ __volatile__ ("pextuw " #rd ", " #rs ", " #rt )
#define pextlh(rs, rt, rd) \
__asm__ __volatile__ ("pextlh " #rd ", " #rs ", " #rt )
#define pextlh(rs, rt, rd) \
__asm__ __volatile__ ("pextlh " #rd ", " #rs ", " #rt )
#define pextuh(rs, rt, rd) \
__asm__ __volatile__ ("pextuh " #rd ", " #rs ", " #rt )
#define pextuh(rs, rt, rd) \
__asm__ __volatile__ ("pextuh " #rd ", " #rs ", " #rt )
#define psubw(rs, rt, rd) \
__asm__ __volatile__ ("psubw " #rd ", " #rs ", " #rt )
#define psubw(rs, rt, rd) \
__asm__ __volatile__ ("psubw " #rd ", " #rs ", " #rt )
#define psraw(rt, sa, rd) \
__asm__ __volatile__ ("psraw " #rd ", " #rt ", %0" : : "i"(sa) )
#define psraw(rt, sa, rd) \
__asm__ __volatile__ ("psraw " #rd ", " #rt ", %0" : : "i"(sa) )
#define ppach(rs, rt, rd) \
__asm__ __volatile__ ("ppach " #rd ", " #rs ", " #rt )
#define ppach(rs, rt, rd) \
__asm__ __volatile__ ("ppach " #rd ", " #rs ", " #rt )
#define ppacb(rs, rt, rd) \
__asm__ __volatile__ ("ppacb " #rd ", " #rs ", " #rt )
#define ppacb(rs, rt, rd) \
__asm__ __volatile__ ("ppacb " #rd ", " #rs ", " #rt )
#define prevh(rt, rd) \
__asm__ __volatile__ ("prevh " #rd ", " #rt )
#define prevh(rt, rd) \
__asm__ __volatile__ ("prevh " #rd ", " #rt )
#define pmulth(rs, rt, rd) \
__asm__ __volatile__ ("pmulth " #rd ", " #rs ", " #rt )
#define pmulth(rs, rt, rd) \
__asm__ __volatile__ ("pmulth " #rd ", " #rs ", " #rt )
#define pmaxh(rs, rt, rd) \
__asm__ __volatile__ ("pmaxh " #rd ", " #rs ", " #rt )
#define pmaxh(rs, rt, rd) \
__asm__ __volatile__ ("pmaxh " #rd ", " #rs ", " #rt )
#define pminh(rs, rt, rd) \
__asm__ __volatile__ ("pminh " #rd ", " #rs ", " #rt )
#define pminh(rs, rt, rd) \
__asm__ __volatile__ ("pminh " #rd ", " #rs ", " #rt )
#define pinteh(rs, rt, rd) \
__asm__ __volatile__ ("pinteh " #rd ", " #rs ", " #rt )
#define pinteh(rs, rt, rd) \
__asm__ __volatile__ ("pinteh " #rd ", " #rs ", " #rt )
#define paddh(rs, rt, rd) \
__asm__ __volatile__ ("paddh " #rd ", " #rs ", " #rt )
#define paddh(rs, rt, rd) \
__asm__ __volatile__ ("paddh " #rd ", " #rs ", " #rt )
#define psubh(rs, rt, rd) \
__asm__ __volatile__ ("psubh " #rd ", " #rs ", " #rt )
#define psubh(rs, rt, rd) \
__asm__ __volatile__ ("psubh " #rd ", " #rs ", " #rt )
#define psrah(rt, sa, rd) \
__asm__ __volatile__ ("psrah " #rd ", " #rt ", %0" : : "i"(sa) )
#define psrah(rt, sa, rd) \
__asm__ __volatile__ ("psrah " #rd ", " #rt ", %0" : : "i"(sa) )
#define pmfhl_uw(rd) \
__asm__ __volatile__ ("pmfhl.uw " #rd)
#define pmfhl_uw(rd) \
__asm__ __volatile__ ("pmfhl.uw " #rd)
#define pextlb(rs, rt, rd) \
__asm__ __volatile__ ("pextlb " #rd ", " #rs ", " #rt )
#define pextlb(rs, rt, rd) \
__asm__ __volatile__ ("pextlb " #rd ", " #rs ", " #rt )
#endif

View File

@ -41,7 +41,7 @@ static void dct_unquantize_h263_mmi(MpegEncContext *s,
level = block[0] * s->c_dc_scale;
}else {
qadd = 0;
level = block[0];
level = block[0];
}
nCoeffs= 63; //does not allways use zigzag table
} else {
@ -49,29 +49,29 @@ static void dct_unquantize_h263_mmi(MpegEncContext *s,
}
asm volatile(
"add $14, $0, %3 \n\t"
"pcpyld $8, %0, %0 \n\t"
"pcpyh $8, $8 \n\t" //r8 = qmul
"pcpyld $9, %1, %1 \n\t"
"pcpyh $9, $9 \n\t" //r9 = qadd
"add $14, $0, %3 \n\t"
"pcpyld $8, %0, %0 \n\t"
"pcpyh $8, $8 \n\t" //r8 = qmul
"pcpyld $9, %1, %1 \n\t"
"pcpyh $9, $9 \n\t" //r9 = qadd
".p2align 2 \n\t"
"1: \n\t"
"lq $10, 0($14) \n\t" //r10 = level
"addi $14, $14, 16 \n\t" //block+=8
"addi %2, %2, -8 \n\t"
"pcgth $11, $0, $10 \n\t" //r11 = level < 0 ? -1 : 0
"pcgth $12, $10, $0 \n\t" //r12 = level > 0 ? -1 : 0
"por $12, $11, $12 \n\t"
"pmulth $10, $10, $8 \n\t"
"paddh $13, $9, $11 \n\t"
"1: \n\t"
"lq $10, 0($14) \n\t" //r10 = level
"addi $14, $14, 16 \n\t" //block+=8
"addi %2, %2, -8 \n\t"
"pcgth $11, $0, $10 \n\t" //r11 = level < 0 ? -1 : 0
"pcgth $12, $10, $0 \n\t" //r12 = level > 0 ? -1 : 0
"por $12, $11, $12 \n\t"
"pmulth $10, $10, $8 \n\t"
"paddh $13, $9, $11 \n\t"
"pxor $13, $13, $11 \n\t" //r13 = level < 0 ? -qadd : qadd
"pmfhl.uw $11 \n\t"
"pinteh $10, $11, $10 \n\t" //r10 = level * qmul
"paddh $10, $10, $13 \n\t"
"pmfhl.uw $11 \n\t"
"pinteh $10, $11, $10 \n\t" //r10 = level * qmul
"paddh $10, $10, $13 \n\t"
"pand $10, $10, $12 \n\t"
"sq $10, -16($14) \n\t"
"bgez %2, 1b \n\t"
:: "r"(qmul), "r" (qadd), "r" (nCoeffs), "r" (block) : "$8", "$9", "$10", "$11", "$12", "$13", "$14", "memory" );
"sq $10, -16($14) \n\t"
"bgez %2, 1b \n\t"
:: "r"(qmul), "r" (qadd), "r" (nCoeffs), "r" (block) : "$8", "$9", "$10", "$11", "$12", "$13", "$14", "memory" );
if(s->mb_intra)
block[0]= level;

Some files were not shown because too many files have changed in this diff Show More