Compare commits
396 Commits
experiment
...
stable-vp9
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
422da555b0 | ||
|
|
29611db9f8 | ||
|
|
cbf394574d | ||
|
|
23e1a29fc7 | ||
|
|
eeae6f946d | ||
|
|
c73e4412b3 | ||
|
|
9ae985b23a | ||
|
|
2d58761993 | ||
|
|
e8a967d960 | ||
|
|
c4826c5941 | ||
|
|
0c3038234d | ||
|
|
040ffb6326 | ||
|
|
5d8642354e | ||
|
|
8fc95a1b11 | ||
|
|
1407cf8588 | ||
|
|
9c9a3b2775 | ||
|
|
324ebb704a | ||
|
|
86fb12b600 | ||
|
|
25655e5794 | ||
|
|
182366c736 | ||
|
|
9ee9918dad | ||
|
|
e7f2aa0fb8 | ||
|
|
cddde51ec5 | ||
|
|
4d018be950 | ||
|
|
66755abff4 | ||
|
|
7fb42d909e | ||
|
|
6a501462f8 | ||
|
|
b964646756 | ||
|
|
23845947c4 | ||
|
|
d09abfa9f7 | ||
|
|
d22a504d11 | ||
|
|
69fe840ec4 | ||
|
|
ac6093d179 | ||
|
|
079183c1a8 | ||
|
|
65fe7d7605 | ||
|
|
db487188b1 | ||
|
|
321c2fd178 | ||
|
|
cb24406da5 | ||
|
|
5d93feb6ad | ||
|
|
93ffd371eb | ||
|
|
3d22d3ae0c | ||
|
|
09830aa0ea | ||
|
|
0607abc3dd | ||
|
|
f6bc783d63 | ||
|
|
939791a129 | ||
|
|
2873d5608b | ||
|
|
87bc705fb5 | ||
|
|
f4a6f936b5 | ||
|
|
4f660cc018 | ||
|
|
901c495482 | ||
|
|
563c273738 | ||
|
|
fc5ec206a7 | ||
|
|
37705a3bc5 | ||
|
|
20abe595ec | ||
|
|
8f92a7efdb | ||
|
|
01b35c3c16 | ||
|
|
18c780a0ff | ||
|
|
c1913c9cf4 | ||
|
|
54a03e20dd | ||
|
|
132ef4295a | ||
|
|
b19126b291 | ||
|
|
740acd6891 | ||
|
|
65c2444e15 | ||
|
|
9faa7e8186 | ||
|
|
e378566060 | ||
|
|
09bc942b47 | ||
|
|
fb550ee620 | ||
|
|
d1268c5921 | ||
|
|
f15cdc7451 | ||
|
|
e378a89bd6 | ||
|
|
afffa3d9b0 | ||
|
|
dae17734ec | ||
|
|
e4e864586c | ||
|
|
3476404912 | ||
|
|
736114f44b | ||
|
|
170be56a74 | ||
|
|
4ad52a8f18 | ||
|
|
1c263d6918 | ||
|
|
2156ccaa4a | ||
|
|
458c2833c0 | ||
|
|
9fc3d32a50 | ||
|
|
9158b8956f | ||
|
|
7bc775d93d | ||
|
|
2e4ca9d1a5 | ||
|
|
e8feb2932f | ||
|
|
e5deed06c0 | ||
|
|
1ee66933c1 | ||
|
|
01c4e04424 | ||
|
|
e494df1a37 | ||
|
|
72872d3d8c | ||
|
|
3c05bda058 | ||
|
|
3b8614a8f6 | ||
|
|
890eee3b47 | ||
|
|
76a437a31b | ||
|
|
872c6d85c0 | ||
|
|
bb2313db28 | ||
|
|
9fd2767200 | ||
|
|
c3c21e3c14 | ||
|
|
79401542f7 | ||
|
|
532179e845 | ||
|
|
d6606d1ea7 | ||
|
|
5dda1d2394 | ||
|
|
1cf2272347 | ||
|
|
49317cddad | ||
|
|
010c0ad0eb | ||
|
|
948aaab4ca | ||
|
|
3cf46fa591 | ||
|
|
0ca7855f67 | ||
|
|
2b9baca4f0 | ||
|
|
e22bb0dc8e | ||
|
|
e326cecf18 | ||
|
|
1d44fc0c49 | ||
|
|
bc50961a74 | ||
|
|
ec4b2742e7 | ||
|
|
c86c5443eb | ||
|
|
1f4bf79d65 | ||
|
|
b6dbf11ed5 | ||
|
|
e83e8f0426 | ||
|
|
ee961599e1 | ||
|
|
d765df2796 | ||
|
|
aa05321262 | ||
|
|
924d74516a | ||
|
|
e80bf802a9 | ||
|
|
abff678866 | ||
|
|
aaa7b44460 | ||
|
|
22dc946a7e | ||
|
|
b71807082c | ||
|
|
db20806710 | ||
|
|
b62ddd5f8b | ||
|
|
e02dc84c1a | ||
|
|
851a2fd72c | ||
|
|
eb7acb5524 | ||
|
|
1d3f94efe2 | ||
|
|
7d058ef86c | ||
|
|
f1560ce035 | ||
|
|
a93992e725 | ||
|
|
3a679e56b2 | ||
|
|
ce04b1aa62 | ||
|
|
7b95f9bf39 | ||
|
|
ba10aed86d | ||
|
|
12e5931a9a | ||
|
|
f77c6973a1 | ||
|
|
f389ca2acc | ||
|
|
bfebe7e927 | ||
|
|
78e670fcf8 | ||
|
|
2d6aadd7e2 | ||
|
|
45125ee573 | ||
|
|
9482c07953 | ||
|
|
3e43e49ffd | ||
|
|
44b7854c84 | ||
|
|
36e9b82080 | ||
|
|
ba8fc71979 | ||
|
|
657ee2d719 | ||
|
|
69384f4fad | ||
|
|
bbb490f6a3 | ||
|
|
a5cb05c45d | ||
|
|
242460cb66 | ||
|
|
af13fbb70f | ||
|
|
b25589c6bb | ||
|
|
4505e8accb | ||
|
|
aa823f8667 | ||
|
|
6c5433c836 | ||
|
|
642696b678 | ||
|
|
45870619f3 | ||
|
|
4681197a58 | ||
|
|
5eed6e2224 | ||
|
|
166dc85bed | ||
|
|
66ccf5ddcf | ||
|
|
8b970da40d | ||
|
|
b19babe5e6 | ||
|
|
55b5a68d72 | ||
|
|
c8ba8c513c | ||
|
|
2c6ba737f8 | ||
|
|
5724b7e292 | ||
|
|
50ee61db4c | ||
|
|
480dd8ffbe | ||
|
|
e6c435b506 | ||
|
|
7194da2167 | ||
|
|
13930cf569 | ||
|
|
cd2cc27af1 | ||
|
|
8e04257bc5 | ||
|
|
78debf246b | ||
|
|
fb481913f0 | ||
|
|
11e3ac62a5 | ||
|
|
21d8e8590b | ||
|
|
656632b776 | ||
|
|
3f10831308 | ||
|
|
1c159c470a | ||
|
|
bef320aa07 | ||
|
|
b85367a608 | ||
|
|
aa5b67add0 | ||
|
|
f76f52df61 | ||
|
|
735b3a710a | ||
|
|
9655c2c7a6 | ||
|
|
33104cdd42 | ||
|
|
711aff9d9d | ||
|
|
d843ac5132 | ||
|
|
84f3b76e1c | ||
|
|
53f6f8ac93 | ||
|
|
4205d79273 | ||
|
|
4082bf9d7c | ||
|
|
604022d40b | ||
|
|
335b1d360b | ||
|
|
3c42657207 | ||
|
|
40ae02c247 | ||
|
|
13eed79c77 | ||
|
|
09858c239b | ||
|
|
a5726ac453 | ||
|
|
640dea4d9d | ||
|
|
8adc20ce35 | ||
|
|
da9a6ac9e7 | ||
|
|
01a37177d1 | ||
|
|
610642c130 | ||
|
|
8b810c7a78 | ||
|
|
f39bf458e5 | ||
|
|
94bfbaa84e | ||
|
|
96a1a59d21 | ||
|
|
a33f178491 | ||
|
|
359b571448 | ||
|
|
596c51087b | ||
|
|
cb05a451c6 | ||
|
|
64c0f5c592 | ||
|
|
fcb890d751 | ||
|
|
ccb6bdca75 | ||
|
|
42ab401fd3 | ||
|
|
85640f1c9d | ||
|
|
4172d7c584 | ||
|
|
6167355309 | ||
|
|
be60924f29 | ||
|
|
c43da352ab | ||
|
|
048ccb2849 | ||
|
|
3286abd82e | ||
|
|
687891238c | ||
|
|
a2f7619860 | ||
|
|
ac12f3926b | ||
|
|
2f1a0a0e2c | ||
|
|
0d8723f8d5 | ||
|
|
27a984fbd3 | ||
|
|
a3ae4c87fd | ||
|
|
ce28d0ca89 | ||
|
|
5b63963573 | ||
|
|
ae455fabd8 | ||
|
|
90027be251 | ||
|
|
7f814c6bf8 | ||
|
|
27de4fe922 | ||
|
|
62a2cd9ed2 | ||
|
|
381d3b8b7d | ||
|
|
d19ac4b66d | ||
|
|
37cda6dc4c | ||
|
|
1bf1428654 | ||
|
|
246381faf2 | ||
|
|
5826407f2a | ||
|
|
5baf510f74 | ||
|
|
039b0c4c9e | ||
|
|
2ffe64ad5c | ||
|
|
bb64c9a355 | ||
|
|
be5dc2321b | ||
|
|
f167433d9c | ||
|
|
e8923fe492 | ||
|
|
2612b99cc7 | ||
|
|
d8286dd56d | ||
|
|
c4048dbdd3 | ||
|
|
f70330a906 | ||
|
|
569ca37d09 | ||
|
|
3275ad701a | ||
|
|
82d4d9a008 | ||
|
|
31c97c2bdf | ||
|
|
5dc0b309ab | ||
|
|
2e3478a593 | ||
|
|
5a1a269f67 | ||
|
|
b34ce04378 | ||
|
|
f67919ae86 | ||
|
|
26e5b5e25d | ||
|
|
367cb10fcf | ||
|
|
1462433370 | ||
|
|
d514b778c4 | ||
|
|
65aa89af1a | ||
|
|
bdc785e976 | ||
|
|
df0715204c | ||
|
|
60ecd60c9a | ||
|
|
bba68342ce | ||
|
|
79f4c1b9a4 | ||
|
|
3e340880a8 | ||
|
|
4fa93bcef4 | ||
|
|
afd9bd3e3c | ||
|
|
5e80a49307 | ||
|
|
d5bec522da | ||
|
|
9451e8d37e | ||
|
|
939b1e4a8c | ||
|
|
a9aa7d07d0 | ||
|
|
63e140eaa7 | ||
|
|
68369ca897 | ||
|
|
c2ff1882ff | ||
|
|
ca983f34f7 | ||
|
|
bb3b817c1e | ||
|
|
6f4fa44c42 | ||
|
|
81d7bd50f5 | ||
|
|
67e53716e0 | ||
|
|
89a1fcf884 | ||
|
|
cefaaa86c7 | ||
|
|
b7616e387e | ||
|
|
b0646f9e98 | ||
|
|
4d73416099 | ||
|
|
24856b6abc | ||
|
|
ec01f52ffa | ||
|
|
1a3641d91b | ||
|
|
adfc54a464 | ||
|
|
eb2fbea621 | ||
|
|
ab21378a2e | ||
|
|
20395189cd | ||
|
|
8cb09719a3 | ||
|
|
39f42c8713 | ||
|
|
cf6beea661 | ||
|
|
bb072000e8 | ||
|
|
f2c073efaa | ||
|
|
0f1deccf86 | ||
|
|
635ba269be | ||
|
|
26fead7ecf | ||
|
|
54979b4350 | ||
|
|
3526f1cd5e | ||
|
|
fc50477082 | ||
|
|
bcc8e9d9c6 | ||
|
|
ecb78b3e0c | ||
|
|
7e0f88b6be | ||
|
|
8105ce6dce | ||
|
|
dc70fbe42d | ||
|
|
5459f68d71 | ||
|
|
8e35263bed | ||
|
|
902f9c7cbd | ||
|
|
39fe235032 | ||
|
|
2c7ae8c29a | ||
|
|
4417c04531 | ||
|
|
4cabbca4ce | ||
|
|
32006aadd8 | ||
|
|
78136edcdc | ||
|
|
14cc7b319f | ||
|
|
b89eef8f82 | ||
|
|
b214cd0dab | ||
|
|
98e3d73e16 | ||
|
|
1a5e6ffb02 | ||
|
|
9d5885b0ab | ||
|
|
c66320b3e4 | ||
|
|
bd1bc1d303 | ||
|
|
9a31d05e24 | ||
|
|
1aedfc992a | ||
|
|
76d166e413 | ||
|
|
a72e269318 | ||
|
|
c3b5ef7600 | ||
|
|
3984b41c87 | ||
|
|
8b0e6035a2 | ||
|
|
ad7021dd6c | ||
|
|
097046ae28 | ||
|
|
b84dc949c8 | ||
|
|
3c43ec206c | ||
|
|
58b07a6f9d | ||
|
|
67fe9d17cb | ||
|
|
e7c5ca8983 | ||
|
|
ef101af8ae | ||
|
|
f1559bdeaf | ||
|
|
f295774d43 | ||
|
|
125146034e | ||
|
|
cd0629fe68 | ||
|
|
ff7df102d9 | ||
|
|
816d6c989c | ||
|
|
8ffe85ad00 | ||
|
|
ace93a175d | ||
|
|
fa0cd61087 | ||
|
|
41251ae558 | ||
|
|
0eef1acbef | ||
|
|
12eb2d0267 | ||
|
|
6ec2b85bad | ||
|
|
6a7a4ba753 | ||
|
|
c7c9901845 | ||
|
|
e3c92bd21e | ||
|
|
6fd2407035 | ||
|
|
6a8ec3eac2 | ||
|
|
bc484ebf06 | ||
|
|
ee40e1a637 | ||
|
|
2158909fc3 | ||
|
|
9e3bcdd135 | ||
|
|
47fad4c2d7 | ||
|
|
ac008f0030 | ||
|
|
1ba91a84ad | ||
|
|
83ee80c045 | ||
|
|
aae6a4c895 | ||
|
|
6bfcce8c7a | ||
|
|
61c33d0ad5 | ||
|
|
a766d8918e | ||
|
|
82d7c6fb3c | ||
|
|
1492698ed3 | ||
|
|
debb9c68c8 | ||
|
|
8db2675b97 | ||
|
|
ea2348ca29 | ||
|
|
78182538d6 | ||
|
|
1c552e79bd | ||
|
|
5edc65d00d |
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/bin/sh
|
||||
##
|
||||
## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
##
|
||||
@@ -13,20 +13,20 @@
|
||||
verbose=0
|
||||
set -- $*
|
||||
for i; do
|
||||
if [ "$i" == "-o" ]; then
|
||||
if [ "$i" = "-o" ]; then
|
||||
on_of=1
|
||||
elif [ "$i" == "-v" ]; then
|
||||
elif [ "$i" = "-v" ]; then
|
||||
verbose=1
|
||||
elif [ "$i" == "-g" ]; then
|
||||
elif [ "$i" = "-g" ]; then
|
||||
args="${args} --debug"
|
||||
elif [ "$on_of" == "1" ]; then
|
||||
elif [ "$on_of" = "1" ]; then
|
||||
outfile=$i
|
||||
on_of=0
|
||||
elif [ -f "$i" ]; then
|
||||
infiles="$infiles $i"
|
||||
elif [ "${i:0:2}" == "-l" ]; then
|
||||
elif [ "${i#-l}" != "$i" ]; then
|
||||
libs="$libs ${i#-l}"
|
||||
elif [ "${i:0:2}" == "-L" ]; then
|
||||
elif [ "${i#-L}" != "$i" ]; then
|
||||
libpaths="${libpaths} ${i#-L}"
|
||||
else
|
||||
args="${args} ${i}"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/bin/sh
|
||||
##
|
||||
## configure.sh
|
||||
##
|
||||
@@ -198,11 +198,11 @@ add_extralibs() {
|
||||
#
|
||||
# Boolean Manipulation Functions
|
||||
#
|
||||
enable(){
|
||||
enable_feature(){
|
||||
set_all yes $*
|
||||
}
|
||||
|
||||
disable(){
|
||||
disable_feature(){
|
||||
set_all no $*
|
||||
}
|
||||
|
||||
@@ -219,7 +219,7 @@ soft_enable() {
|
||||
for var in $*; do
|
||||
if ! disabled $var; then
|
||||
log_echo " enabling $var"
|
||||
enable $var
|
||||
enable_feature $var
|
||||
fi
|
||||
done
|
||||
}
|
||||
@@ -228,7 +228,7 @@ soft_disable() {
|
||||
for var in $*; do
|
||||
if ! enabled $var; then
|
||||
log_echo " disabling $var"
|
||||
disable $var
|
||||
disable_feature $var
|
||||
fi
|
||||
done
|
||||
}
|
||||
@@ -251,10 +251,10 @@ tolower(){
|
||||
# Temporary File Functions
|
||||
#
|
||||
source_path=${0%/*}
|
||||
enable source_path_used
|
||||
enable_feature source_path_used
|
||||
if test -z "$source_path" -o "$source_path" = "." ; then
|
||||
source_path="`pwd`"
|
||||
disable source_path_used
|
||||
disable_feature source_path_used
|
||||
fi
|
||||
|
||||
if test ! -z "$TMPDIR" ; then
|
||||
@@ -264,12 +264,13 @@ elif test ! -z "$TEMPDIR" ; then
|
||||
else
|
||||
TMPDIRx="/tmp"
|
||||
fi
|
||||
TMP_H="${TMPDIRx}/vpx-conf-$$-${RANDOM}.h"
|
||||
TMP_C="${TMPDIRx}/vpx-conf-$$-${RANDOM}.c"
|
||||
TMP_CC="${TMPDIRx}/vpx-conf-$$-${RANDOM}.cc"
|
||||
TMP_O="${TMPDIRx}/vpx-conf-$$-${RANDOM}.o"
|
||||
TMP_X="${TMPDIRx}/vpx-conf-$$-${RANDOM}.x"
|
||||
TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RANDOM}.asm"
|
||||
RAND=$(awk 'BEGIN { srand(); printf "%d\n",(rand() * 32768)}')
|
||||
TMP_H="${TMPDIRx}/vpx-conf-$$-${RAND}.h"
|
||||
TMP_C="${TMPDIRx}/vpx-conf-$$-${RAND}.c"
|
||||
TMP_CC="${TMPDIRx}/vpx-conf-$$-${RAND}.cc"
|
||||
TMP_O="${TMPDIRx}/vpx-conf-$$-${RAND}.o"
|
||||
TMP_X="${TMPDIRx}/vpx-conf-$$-${RAND}.x"
|
||||
TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RAND}.asm"
|
||||
|
||||
clean_temp_files() {
|
||||
rm -f ${TMP_C} ${TMP_CC} ${TMP_H} ${TMP_O} ${TMP_X} ${TMP_ASM}
|
||||
@@ -316,8 +317,8 @@ check_header(){
|
||||
header=$1
|
||||
shift
|
||||
var=`echo $header | sed 's/[^A-Za-z0-9_]/_/g'`
|
||||
disable $var
|
||||
check_cpp "$@" <<EOF && enable $var
|
||||
disable_feature $var
|
||||
check_cpp "$@" <<EOF && enable_feature $var
|
||||
#include "$header"
|
||||
int x;
|
||||
EOF
|
||||
@@ -479,7 +480,7 @@ process_common_cmdline() {
|
||||
for opt in "$@"; do
|
||||
optval="${opt#*=}"
|
||||
case "$opt" in
|
||||
--child) enable child
|
||||
--child) enable_feature child
|
||||
;;
|
||||
--log*)
|
||||
logging="$optval"
|
||||
@@ -491,7 +492,7 @@ process_common_cmdline() {
|
||||
;;
|
||||
--target=*) toolchain="${toolchain:-${optval}}"
|
||||
;;
|
||||
--force-target=*) toolchain="${toolchain:-${optval}}"; enable force_toolchain
|
||||
--force-target=*) toolchain="${toolchain:-${optval}}"; enable_feature force_toolchain
|
||||
;;
|
||||
--cpu)
|
||||
;;
|
||||
@@ -511,7 +512,7 @@ process_common_cmdline() {
|
||||
echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null ||
|
||||
die_unknown $opt
|
||||
fi
|
||||
$action $option
|
||||
${action}_feature $option
|
||||
;;
|
||||
--require-?*)
|
||||
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
|
||||
@@ -523,11 +524,11 @@ process_common_cmdline() {
|
||||
;;
|
||||
--force-enable-?*|--force-disable-?*)
|
||||
eval `echo "$opt" | sed 's/--force-/action=/;s/-/ option=/;s/-/_/g'`
|
||||
$action $option
|
||||
${action}_feature $option
|
||||
;;
|
||||
--libc=*)
|
||||
[ -d "${optval}" ] || die "Not a directory: ${optval}"
|
||||
disable builtin_libc
|
||||
disable_feature builtin_libc
|
||||
alt_libc="${optval}"
|
||||
;;
|
||||
--as=*)
|
||||
@@ -696,13 +697,13 @@ process_common_toolchain() {
|
||||
|
||||
# Mark the specific ISA requested as enabled
|
||||
soft_enable ${tgt_isa}
|
||||
enable ${tgt_os}
|
||||
enable ${tgt_cc}
|
||||
enable_feature ${tgt_os}
|
||||
enable_feature ${tgt_cc}
|
||||
|
||||
# Enable the architecture family
|
||||
case ${tgt_isa} in
|
||||
arm*) enable arm;;
|
||||
mips*) enable mips;;
|
||||
arm*) enable_feature arm;;
|
||||
mips*) enable_feature mips;;
|
||||
esac
|
||||
|
||||
# PIC is probably what we want when building shared libs
|
||||
@@ -765,7 +766,7 @@ process_common_toolchain() {
|
||||
case ${toolchain} in
|
||||
sparc-solaris-*)
|
||||
add_extralibs -lposix4
|
||||
disable fast_unaligned
|
||||
disable_feature fast_unaligned
|
||||
;;
|
||||
*-solaris-*)
|
||||
add_extralibs -lposix4
|
||||
@@ -790,7 +791,7 @@ process_common_toolchain() {
|
||||
;;
|
||||
armv5te)
|
||||
soft_enable edsp
|
||||
disable fast_unaligned
|
||||
disable_feature fast_unaligned
|
||||
;;
|
||||
esac
|
||||
|
||||
@@ -805,7 +806,7 @@ process_common_toolchain() {
|
||||
arch_int=${arch_int%%te}
|
||||
check_add_asflags --defsym ARCHITECTURE=${arch_int}
|
||||
tune_cflags="-mtune="
|
||||
if [ ${tgt_isa} == "armv7" ]; then
|
||||
if [ ${tgt_isa} = "armv7" ]; then
|
||||
if [ -z "${float_abi}" ]; then
|
||||
check_cpp <<EOF && float_abi=hard || float_abi=softfp
|
||||
#ifndef __ARM_PCS_VFP
|
||||
@@ -842,8 +843,8 @@ EOF
|
||||
asm_conversion_cmd="${source_path}/build/make/ads2armasm_ms.pl"
|
||||
AS_SFX=.s
|
||||
msvs_arch_dir=arm-msvs
|
||||
disable multithread
|
||||
disable unit_tests
|
||||
disable_feature multithread
|
||||
disable_feature unit_tests
|
||||
;;
|
||||
rvct)
|
||||
CC=armcc
|
||||
@@ -855,7 +856,7 @@ EOF
|
||||
tune_cflags="--cpu="
|
||||
tune_asflags="--cpu="
|
||||
if [ -z "${tune_cpu}" ]; then
|
||||
if [ ${tgt_isa} == "armv7" ]; then
|
||||
if [ ${tgt_isa} = "armv7" ]; then
|
||||
if enabled neon
|
||||
then
|
||||
check_add_cflags --fpu=softvfp+vfpv3
|
||||
@@ -880,8 +881,8 @@ EOF
|
||||
|
||||
case ${tgt_os} in
|
||||
none*)
|
||||
disable multithread
|
||||
disable os_support
|
||||
disable_feature multithread
|
||||
disable_feature os_support
|
||||
;;
|
||||
|
||||
android*)
|
||||
@@ -913,9 +914,9 @@ EOF
|
||||
# Cortex-A8 implementations (NDK Dev Guide)
|
||||
add_ldflags "-Wl,--fix-cortex-a8"
|
||||
|
||||
enable pic
|
||||
enable_feature pic
|
||||
soft_enable realtime_only
|
||||
if [ ${tgt_isa} == "armv7" ]; then
|
||||
if [ ${tgt_isa} = "armv7" ]; then
|
||||
soft_enable runtime_cpu_detect
|
||||
fi
|
||||
if enabled runtime_cpu_detect; then
|
||||
@@ -969,7 +970,7 @@ EOF
|
||||
;;
|
||||
|
||||
linux*)
|
||||
enable linux
|
||||
enable_feature linux
|
||||
if enabled rvct; then
|
||||
# Check if we have CodeSourcery GCC in PATH. Needed for
|
||||
# libraries
|
||||
@@ -1000,14 +1001,14 @@ EOF
|
||||
tune_cflags="-mtune="
|
||||
if enabled dspr2; then
|
||||
check_add_cflags -mips32r2 -mdspr2
|
||||
disable fast_unaligned
|
||||
disable_feature fast_unaligned
|
||||
fi
|
||||
check_add_cflags -march=${tgt_isa}
|
||||
check_add_asflags -march=${tgt_isa}
|
||||
check_add_asflags -KPIC
|
||||
;;
|
||||
ppc*)
|
||||
enable ppc
|
||||
enable_feature ppc
|
||||
bits=${tgt_isa##ppc}
|
||||
link_with_cc=gcc
|
||||
setup_gnu_toolchain
|
||||
@@ -1155,7 +1156,7 @@ EOF
|
||||
;;
|
||||
universal*|*-gcc|generic-gnu)
|
||||
link_with_cc=gcc
|
||||
enable gcc
|
||||
enable_feature gcc
|
||||
setup_gnu_toolchain
|
||||
;;
|
||||
esac
|
||||
@@ -1191,7 +1192,7 @@ EOF
|
||||
|
||||
# default use_x86inc to yes if pic is no or 64bit or we are not on darwin
|
||||
echo " checking here for x86inc \"${tgt_isa}\" \"$pic\" "
|
||||
if [ ${tgt_isa} = x86_64 -o ! "$pic" == "yes" -o ! ${tgt_os:0:6} = darwin ]; then
|
||||
if [ ${tgt_isa} = x86_64 -o ! "$pic" = "yes" -o "${tgt_os#darwin}" = "${tgt_os}" ]; then
|
||||
soft_enable use_x86inc
|
||||
fi
|
||||
|
||||
@@ -1204,14 +1205,14 @@ EOF
|
||||
enabled linux && check_add_cflags -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0
|
||||
|
||||
# Check for strip utility variant
|
||||
${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable gnu_strip
|
||||
${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable_feature gnu_strip
|
||||
|
||||
# Try to determine target endianness
|
||||
check_cc <<EOF
|
||||
unsigned int e = 'O'<<24 | '2'<<16 | 'B'<<8 | 'E';
|
||||
EOF
|
||||
[ -f "${TMP_O}" ] && od -A n -t x1 "${TMP_O}" | tr -d '\n' |
|
||||
grep '4f *32 *42 *45' >/dev/null 2>&1 && enable big_endian
|
||||
grep '4f *32 *42 *45' >/dev/null 2>&1 && enable_feature big_endian
|
||||
|
||||
# Try to find which inline keywords are supported
|
||||
check_cc <<EOF && INLINE="inline"
|
||||
@@ -1236,7 +1237,7 @@ EOF
|
||||
if enabled dspr2; then
|
||||
if enabled big_endian; then
|
||||
echo "dspr2 optimizations are available only for little endian platforms"
|
||||
disable dspr2
|
||||
disable_feature dspr2
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
@@ -1287,8 +1288,8 @@ print_config_h() {
|
||||
|
||||
print_webm_license() {
|
||||
local destination=$1
|
||||
local prefix=$2
|
||||
local suffix=$3
|
||||
local prefix="$2"
|
||||
local suffix="$3"
|
||||
shift 3
|
||||
cat <<EOF > ${destination}
|
||||
${prefix} Copyright (c) 2011 The WebM project authors. All Rights Reserved.${suffix}
|
||||
@@ -1309,7 +1310,7 @@ process_detect() {
|
||||
true;
|
||||
}
|
||||
|
||||
enable logging
|
||||
enable_feature logging
|
||||
logfile="config.log"
|
||||
self=$0
|
||||
process() {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/bin/sh
|
||||
##
|
||||
## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
##
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/bin/sh
|
||||
##
|
||||
## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
##
|
||||
|
||||
85
configure
vendored
85
configure
vendored
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/bin/sh
|
||||
##
|
||||
## configure
|
||||
##
|
||||
@@ -38,6 +38,7 @@ Advanced options:
|
||||
${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders)
|
||||
${toggle_mem_tracker} track memory usage
|
||||
${toggle_postproc} postprocessing
|
||||
${toggle_vp9_postproc} vp9 specific postprocessing
|
||||
${toggle_multithread} multithreaded encoding and decoding
|
||||
${toggle_spatial_resampling} spatial sampling (scaling) support
|
||||
${toggle_realtime_only} enable this option while building for real-time encoding
|
||||
@@ -153,7 +154,7 @@ all_targets="libs examples docs"
|
||||
|
||||
# all targets available are enabled, by default.
|
||||
for t in ${all_targets}; do
|
||||
[ -f ${source_path}/${t}.mk ] && enable ${t}
|
||||
[ -f ${source_path}/${t}.mk ] && enable_feature ${t}
|
||||
done
|
||||
|
||||
# check installed doxygen version
|
||||
@@ -164,30 +165,30 @@ if [ ${doxy_major:-0} -ge 1 ]; then
|
||||
doxy_minor=${doxy_version%%.*}
|
||||
doxy_patch=${doxy_version##*.}
|
||||
|
||||
[ $doxy_major -gt 1 ] && enable doxygen
|
||||
[ $doxy_minor -gt 5 ] && enable doxygen
|
||||
[ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable doxygen
|
||||
[ $doxy_major -gt 1 ] && enable_feature doxygen
|
||||
[ $doxy_minor -gt 5 ] && enable_feature doxygen
|
||||
[ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable_feature doxygen
|
||||
fi
|
||||
|
||||
# install everything except the sources, by default. sources will have
|
||||
# to be enabled when doing dist builds, since that's no longer a common
|
||||
# case.
|
||||
enabled doxygen && php -v >/dev/null 2>&1 && enable install_docs
|
||||
enable install_bins
|
||||
enable install_libs
|
||||
enabled doxygen && php -v >/dev/null 2>&1 && enable_feature install_docs
|
||||
enable_feature install_bins
|
||||
enable_feature install_libs
|
||||
|
||||
enable static
|
||||
enable optimizations
|
||||
enable fast_unaligned #allow unaligned accesses, if supported by hw
|
||||
enable md5
|
||||
enable spatial_resampling
|
||||
enable multithread
|
||||
enable os_support
|
||||
enable temporal_denoising
|
||||
enable_feature static
|
||||
enable_feature optimizations
|
||||
enable_feature fast_unaligned #allow unaligned accesses, if supported by hw
|
||||
enable_feature md5
|
||||
enable_feature spatial_resampling
|
||||
enable_feature multithread
|
||||
enable_feature os_support
|
||||
enable_feature temporal_denoising
|
||||
|
||||
[ -d ${source_path}/../include ] && enable alt_tree_layout
|
||||
[ -d ${source_path}/../include ] && enable_feature alt_tree_layout
|
||||
for d in vp8 vp9; do
|
||||
[ -d ${source_path}/${d} ] && disable alt_tree_layout;
|
||||
[ -d ${source_path}/${d} ] && disable_feature alt_tree_layout;
|
||||
done
|
||||
|
||||
if ! enabled alt_tree_layout; then
|
||||
@@ -200,10 +201,10 @@ else
|
||||
[ -f ${source_path}/../include/vpx/vp8dx.h ] && CODECS="${CODECS} vp8_decoder"
|
||||
[ -f ${source_path}/../include/vpx/vp9cx.h ] && CODECS="${CODECS} vp9_encoder"
|
||||
[ -f ${source_path}/../include/vpx/vp9dx.h ] && CODECS="${CODECS} vp9_decoder"
|
||||
[ -f ${source_path}/../include/vpx/vp8cx.h ] || disable vp8_encoder
|
||||
[ -f ${source_path}/../include/vpx/vp8dx.h ] || disable vp8_decoder
|
||||
[ -f ${source_path}/../include/vpx/vp9cx.h ] || disable vp9_encoder
|
||||
[ -f ${source_path}/../include/vpx/vp9dx.h ] || disable vp9_decoder
|
||||
[ -f ${source_path}/../include/vpx/vp8cx.h ] || disable_feature vp8_encoder
|
||||
[ -f ${source_path}/../include/vpx/vp8dx.h ] || disable_feature vp8_decoder
|
||||
[ -f ${source_path}/../include/vpx/vp9cx.h ] || disable_feature vp9_encoder
|
||||
[ -f ${source_path}/../include/vpx/vp9dx.h ] || disable_feature vp9_decoder
|
||||
|
||||
[ -f ${source_path}/../lib/*/*mt.lib ] && soft_enable static_msvcrt
|
||||
fi
|
||||
@@ -279,6 +280,7 @@ CONFIG_LIST="
|
||||
dc_recon
|
||||
runtime_cpu_detect
|
||||
postproc
|
||||
vp9_postproc
|
||||
multithread
|
||||
internal_stats
|
||||
${CODECS}
|
||||
@@ -333,6 +335,7 @@ CMDLINE_SELECT="
|
||||
dequant_tokens
|
||||
dc_recon
|
||||
postproc
|
||||
vp9_postproc
|
||||
multithread
|
||||
internal_stats
|
||||
${CODECS}
|
||||
@@ -358,12 +361,12 @@ process_cmdline() {
|
||||
for opt do
|
||||
optval="${opt#*=}"
|
||||
case "$opt" in
|
||||
--disable-codecs) for c in ${CODECS}; do disable $c; done ;;
|
||||
--disable-codecs) for c in ${CODECS}; do disable_feature $c; done ;;
|
||||
--enable-?*|--disable-?*)
|
||||
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
|
||||
if echo "${EXPERIMENT_LIST}" | grep "^ *$option\$" >/dev/null; then
|
||||
if enabled experimental; then
|
||||
$action $option
|
||||
${action}_feature $option
|
||||
else
|
||||
log_echo "Ignoring $opt -- not in experimental mode."
|
||||
fi
|
||||
@@ -384,8 +387,8 @@ post_process_cmdline() {
|
||||
# If the codec family is enabled, enable all components of that family.
|
||||
log_echo "Configuring selected codecs"
|
||||
for c in ${CODECS}; do
|
||||
disabled ${c%%_*} && disable ${c}
|
||||
enabled ${c%%_*} && enable ${c}
|
||||
disabled ${c%%_*} && disable_feature ${c}
|
||||
enabled ${c%%_*} && enable_feature ${c}
|
||||
done
|
||||
|
||||
# Enable all detected codecs, if they haven't been disabled
|
||||
@@ -393,12 +396,12 @@ post_process_cmdline() {
|
||||
|
||||
# Enable the codec family if any component of that family is enabled
|
||||
for c in ${CODECS}; do
|
||||
enabled $c && enable ${c%_*}
|
||||
enabled $c && enable_feature ${c%_*}
|
||||
done
|
||||
|
||||
# Set the {en,de}coders variable if any algorithm in that class is enabled
|
||||
for c in ${CODECS}; do
|
||||
enabled ${c} && enable ${c##*_}s
|
||||
enabled ${c} && enable_feature ${c##*_}s
|
||||
done
|
||||
}
|
||||
|
||||
@@ -438,7 +441,7 @@ process_targets() {
|
||||
done
|
||||
enabled debug_libs && DIST_DIR="${DIST_DIR}-debug"
|
||||
enabled codec_srcs && DIST_DIR="${DIST_DIR}-src"
|
||||
! enabled postproc && DIST_DIR="${DIST_DIR}-nopost"
|
||||
! enabled postproc && ! enabled vp9_postproc && DIST_DIR="${DIST_DIR}-nopost"
|
||||
! enabled multithread && DIST_DIR="${DIST_DIR}-nomt"
|
||||
! enabled install_docs && DIST_DIR="${DIST_DIR}-nodocs"
|
||||
DIST_DIR="${DIST_DIR}-${tgt_isa}-${tgt_os}"
|
||||
@@ -508,13 +511,13 @@ process_detect() {
|
||||
fi
|
||||
if [ -z "$CC" ] || enabled external_build; then
|
||||
echo "Bypassing toolchain for environment detection."
|
||||
enable external_build
|
||||
enable_feature external_build
|
||||
check_header() {
|
||||
log fake_check_header "$@"
|
||||
header=$1
|
||||
shift
|
||||
var=`echo $header | sed 's/[^A-Za-z0-9_]/_/g'`
|
||||
disable $var
|
||||
disable_feature $var
|
||||
# Headers common to all environments
|
||||
case $header in
|
||||
stdio.h)
|
||||
@@ -526,7 +529,7 @@ process_detect() {
|
||||
[ -f "${d##-I}/$header" ] && result=true && break
|
||||
done
|
||||
${result:-true}
|
||||
esac && enable $var
|
||||
esac && enable_feature $var
|
||||
|
||||
# Specialize windows and POSIX environments.
|
||||
case $toolchain in
|
||||
@@ -534,7 +537,7 @@ process_detect() {
|
||||
case $header-$toolchain in
|
||||
stdint*-gcc) true;;
|
||||
*) false;;
|
||||
esac && enable $var
|
||||
esac && enable_feature $var
|
||||
;;
|
||||
*)
|
||||
case $header in
|
||||
@@ -543,7 +546,7 @@ process_detect() {
|
||||
sys/mman.h) true;;
|
||||
unistd.h) true;;
|
||||
*) false;;
|
||||
esac && enable $var
|
||||
esac && enable_feature $var
|
||||
esac
|
||||
enabled $var
|
||||
}
|
||||
@@ -561,7 +564,7 @@ EOF
|
||||
check_header sys/mman.h
|
||||
check_header unistd.h # for sysconf(3) and friends.
|
||||
|
||||
check_header vpx/vpx_integer.h -I${source_path} && enable vpx_ports
|
||||
check_header vpx/vpx_integer.h -I${source_path} && enable_feature vpx_ports
|
||||
}
|
||||
|
||||
process_toolchain() {
|
||||
@@ -643,14 +646,18 @@ process_toolchain() {
|
||||
# ccache only really works on gcc toolchains
|
||||
enabled gcc || soft_disable ccache
|
||||
if enabled mips; then
|
||||
enable dequant_tokens
|
||||
enable dc_recon
|
||||
enable_feature dequant_tokens
|
||||
enable_feature dc_recon
|
||||
fi
|
||||
|
||||
if enabled internal_stats; then
|
||||
enable_feature vp9_postproc
|
||||
fi
|
||||
|
||||
# Enable the postbuild target if building for visual studio.
|
||||
case "$tgt_cc" in
|
||||
vs*) enable msvs
|
||||
enable solution
|
||||
vs*) enable_feature msvs
|
||||
enable_feature solution
|
||||
vs_version=${tgt_cc##vs}
|
||||
case $vs_version in
|
||||
[789])
|
||||
|
||||
@@ -49,6 +49,9 @@ vpxenc.DESCRIPTION = Full featured encoder
|
||||
UTILS-$(CONFIG_VP8_ENCODER) += vp8_scalable_patterns.c
|
||||
vp8_scalable_patterns.GUID = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C
|
||||
vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder
|
||||
UTILS-$(CONFIG_VP8_ENCODER) += vp9_spatial_scalable_encoder.c
|
||||
vp8_scalable_patterns.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D
|
||||
vp8_scalable_patterns.DESCRIPTION = Spatial Scalable Encoder
|
||||
|
||||
# Clean up old ivfenc, ivfdec binaries.
|
||||
ifeq ($(CONFIG_MSVS),yes)
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef LIBVPX_TEST_ACM_RANDOM_H_
|
||||
#define LIBVPX_TEST_ACM_RANDOM_H_
|
||||
#ifndef TEST_ACM_RANDOM_H_
|
||||
#define TEST_ACM_RANDOM_H_
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
@@ -59,4 +59,4 @@ class ACMRandom {
|
||||
|
||||
} // namespace libvpx_test
|
||||
|
||||
#endif // LIBVPX_TEST_ACM_RANDOM_H_
|
||||
#endif // TEST_ACM_RANDOM_H_
|
||||
|
||||
@@ -29,8 +29,8 @@ class BordersTest : public ::libvpx_test::EncoderTest,
|
||||
|
||||
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
|
||||
::libvpx_test::Encoder *encoder) {
|
||||
if ( video->frame() == 1) {
|
||||
encoder->Control(VP8E_SET_CPUUSED, 0);
|
||||
if (video->frame() == 1) {
|
||||
encoder->Control(VP8E_SET_CPUUSED, 1);
|
||||
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
|
||||
encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
|
||||
encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
#ifndef TEST_CLEAR_SYSTEM_STATE_H_
|
||||
#define TEST_CLEAR_SYSTEM_STATE_H_
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
extern "C" {
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
# include "vpx_ports/x86.h"
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "test/acm_random.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
@@ -187,7 +188,7 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
|
||||
|
||||
protected:
|
||||
static const int kDataAlignment = 16;
|
||||
static const int kOuterBlockSize = 128;
|
||||
static const int kOuterBlockSize = 256;
|
||||
static const int kInputStride = kOuterBlockSize;
|
||||
static const int kOutputStride = kOuterBlockSize;
|
||||
static const int kMaxDimension = 64;
|
||||
@@ -224,6 +225,10 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
|
||||
input_[i] = prng.Rand8Extremes();
|
||||
}
|
||||
|
||||
void SetConstantInput(int value) {
|
||||
memset(input_, value, kInputBufferSize);
|
||||
}
|
||||
|
||||
void CheckGuardBlocks() {
|
||||
for (int i = 0; i < kOutputBufferSize; ++i) {
|
||||
if (IsIndexInBorder(i))
|
||||
@@ -456,45 +461,86 @@ DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = {
|
||||
{ 128}
|
||||
};
|
||||
|
||||
/* This test exercises the horizontal and vertical filter functions. */
|
||||
TEST_P(ConvolveTest, ChangeFilterWorks) {
|
||||
uint8_t* const in = input();
|
||||
uint8_t* const out = output();
|
||||
|
||||
/* Assume that the first input sample is at the 8/16th position. */
|
||||
const int kInitialSubPelOffset = 8;
|
||||
|
||||
/* Filters are 8-tap, so the first filter tap will be applied to the pixel
|
||||
* at position -3 with respect to the current filtering position. Since
|
||||
* kInitialSubPelOffset is set to 8, we first select sub-pixel filter 8,
|
||||
* which is non-zero only in the last tap. So, applying the filter at the
|
||||
* current input position will result in an output equal to the pixel at
|
||||
* offset +4 (-3 + 7) with respect to the current filtering position.
|
||||
*/
|
||||
const int kPixelSelected = 4;
|
||||
|
||||
/* Assume that each output pixel requires us to step on by 17/16th pixels in
|
||||
* the input.
|
||||
*/
|
||||
const int kInputPixelStep = 17;
|
||||
|
||||
/* The filters are setup in such a way that the expected output produces
|
||||
* sets of 8 identical output samples. As the filter position moves to the
|
||||
* next 1/16th pixel position the only active (=128) filter tap moves one
|
||||
* position to the left, resulting in the same input pixel being replicated
|
||||
* in to the output for 8 consecutive samples. After each set of 8 positions
|
||||
* the filters select a different input pixel. kFilterPeriodAdjust below
|
||||
* computes which input pixel is written to the output for a specified
|
||||
* x or y position.
|
||||
*/
|
||||
|
||||
/* Test the horizontal filter. */
|
||||
REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride,
|
||||
kChangeFilters[8], 17, kChangeFilters[4], 16,
|
||||
Width(), Height()));
|
||||
kChangeFilters[kInitialSubPelOffset],
|
||||
kInputPixelStep, NULL, 0, Width(), Height()));
|
||||
|
||||
for (int x = 0; x < Width(); ++x) {
|
||||
const int kQ4StepAdjust = x >> 4;
|
||||
const int kFilterPeriodAdjust = (x >> 3) << 3;
|
||||
const int ref_x = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;
|
||||
ASSERT_EQ(in[ref_x], out[x]) << "x == " << x;
|
||||
const int ref_x =
|
||||
kPixelSelected + ((kInitialSubPelOffset
|
||||
+ kFilterPeriodAdjust * kInputPixelStep)
|
||||
>> SUBPEL_BITS);
|
||||
ASSERT_EQ(in[ref_x], out[x]) << "x == " << x << "width = " << Width();
|
||||
}
|
||||
|
||||
/* Test the vertical filter. */
|
||||
REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride,
|
||||
kChangeFilters[4], 16, kChangeFilters[8], 17,
|
||||
Width(), Height()));
|
||||
NULL, 0, kChangeFilters[kInitialSubPelOffset],
|
||||
kInputPixelStep, Width(), Height()));
|
||||
|
||||
for (int y = 0; y < Height(); ++y) {
|
||||
const int kQ4StepAdjust = y >> 4;
|
||||
const int kFilterPeriodAdjust = (y >> 3) << 3;
|
||||
const int ref_y = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;
|
||||
const int ref_y =
|
||||
kPixelSelected + ((kInitialSubPelOffset
|
||||
+ kFilterPeriodAdjust * kInputPixelStep)
|
||||
>> SUBPEL_BITS);
|
||||
ASSERT_EQ(in[ref_y * kInputStride], out[y * kInputStride]) << "y == " << y;
|
||||
}
|
||||
|
||||
/* Test the horizontal and vertical filters in combination. */
|
||||
REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
|
||||
kChangeFilters[8], 17, kChangeFilters[8], 17,
|
||||
kChangeFilters[kInitialSubPelOffset],
|
||||
kInputPixelStep,
|
||||
kChangeFilters[kInitialSubPelOffset],
|
||||
kInputPixelStep,
|
||||
Width(), Height()));
|
||||
|
||||
for (int y = 0; y < Height(); ++y) {
|
||||
const int kQ4StepAdjustY = y >> 4;
|
||||
const int kFilterPeriodAdjustY = (y >> 3) << 3;
|
||||
const int ref_y = kQ4StepAdjustY + kFilterPeriodAdjustY + kPixelSelected;
|
||||
const int ref_y =
|
||||
kPixelSelected + ((kInitialSubPelOffset
|
||||
+ kFilterPeriodAdjustY * kInputPixelStep)
|
||||
>> SUBPEL_BITS);
|
||||
for (int x = 0; x < Width(); ++x) {
|
||||
const int kQ4StepAdjustX = x >> 4;
|
||||
const int kFilterPeriodAdjustX = (x >> 3) << 3;
|
||||
const int ref_x = kQ4StepAdjustX + kFilterPeriodAdjustX + kPixelSelected;
|
||||
const int ref_x =
|
||||
kPixelSelected + ((kInitialSubPelOffset
|
||||
+ kFilterPeriodAdjustX * kInputPixelStep)
|
||||
>> SUBPEL_BITS);
|
||||
|
||||
ASSERT_EQ(in[ref_y * kInputStride + ref_x], out[y * kOutputStride + x])
|
||||
<< "x == " << x << ", y == " << y;
|
||||
@@ -502,6 +548,34 @@ TEST_P(ConvolveTest, ChangeFilterWorks) {
|
||||
}
|
||||
}
|
||||
|
||||
/* This test exercises that enough rows and columns are filtered with every
|
||||
possible initial fractional positions and scaling steps. */
|
||||
TEST_P(ConvolveTest, CheckScalingFiltering) {
|
||||
uint8_t* const in = input();
|
||||
uint8_t* const out = output();
|
||||
|
||||
SetConstantInput(127);
|
||||
|
||||
for (int frac = 0; frac < 16; ++frac) {
|
||||
for (int step = 1; step <= 32; ++step) {
|
||||
/* Test the horizontal and vertical filters in combination. */
|
||||
REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
|
||||
vp9_sub_pel_filters_8[frac], step,
|
||||
vp9_sub_pel_filters_8[frac], step,
|
||||
Width(), Height()));
|
||||
|
||||
CheckGuardBlocks();
|
||||
|
||||
for (int y = 0; y < Height(); ++y) {
|
||||
for (int x = 0; x < Width(); ++x) {
|
||||
ASSERT_EQ(in[y * kInputStride + x], out[y * kOutputStride + x])
|
||||
<< "x == " << x << ", y == " << y
|
||||
<< ", frac == " << frac << ", step == " << step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
|
||||
@@ -108,5 +108,5 @@ using std::tr1::make_tuple;
|
||||
VP9_INSTANTIATE_TEST_CASE(
|
||||
CpuSpeedTest,
|
||||
::testing::Values(::libvpx_test::kTwoPassGood),
|
||||
::testing::Range(0, 3));
|
||||
::testing::Range(0, 5));
|
||||
} // namespace
|
||||
|
||||
@@ -75,7 +75,7 @@ class DatarateTest : public ::libvpx_test::EncoderTest,
|
||||
bits_in_buffer_model_ -= frame_size_in_bits;
|
||||
|
||||
// Update the running total of bits for end of test datarate checks.
|
||||
bits_total_ += frame_size_in_bits ;
|
||||
bits_total_ += frame_size_in_bits;
|
||||
|
||||
// If first drop not set and we have a drop set it to this time.
|
||||
if (!first_drop_ && duration > 1)
|
||||
|
||||
@@ -13,15 +13,16 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
|
||||
extern "C" {
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "vp9_rtcd.h"
|
||||
void vp9_short_idct16x16_add_c(short *input, uint8_t *output, int pitch);
|
||||
#include "./vp9_rtcd.h"
|
||||
void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *output, int pitch);
|
||||
}
|
||||
|
||||
#include "acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
@@ -31,12 +32,13 @@ namespace {
|
||||
#ifdef _MSC_VER
|
||||
static int round(double x) {
|
||||
if (x < 0)
|
||||
return (int)ceil(x - 0.5);
|
||||
return static_cast<int>(ceil(x - 0.5));
|
||||
else
|
||||
return (int)floor(x + 0.5);
|
||||
return static_cast<int>(floor(x + 0.5));
|
||||
}
|
||||
#endif
|
||||
|
||||
const int kNumCoeffs = 256;
|
||||
const double PI = 3.1415926535898;
|
||||
void reference2_16x16_idct_2d(double *input, double *output) {
|
||||
double x;
|
||||
@@ -45,7 +47,9 @@ void reference2_16x16_idct_2d(double *input, double *output) {
|
||||
double s = 0;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
for (int j = 0; j < 16; ++j) {
|
||||
x=cos(PI*j*(l+0.5)/16.0)*cos(PI*i*(k+0.5)/16.0)*input[i*16+j]/256;
|
||||
x = cos(PI * j * (l + 0.5) / 16.0) *
|
||||
cos(PI * i * (k + 0.5) / 16.0) *
|
||||
input[i * 16 + j] / 256;
|
||||
if (i != 0)
|
||||
x *= sqrt(2.0);
|
||||
if (j != 0)
|
||||
@@ -59,23 +63,23 @@ void reference2_16x16_idct_2d(double *input, double *output) {
|
||||
}
|
||||
|
||||
|
||||
static const double C1 = 0.995184726672197;
|
||||
static const double C2 = 0.98078528040323;
|
||||
static const double C3 = 0.956940335732209;
|
||||
static const double C4 = 0.923879532511287;
|
||||
static const double C5 = 0.881921264348355;
|
||||
static const double C6 = 0.831469612302545;
|
||||
static const double C7 = 0.773010453362737;
|
||||
static const double C8 = 0.707106781186548;
|
||||
static const double C9 = 0.634393284163646;
|
||||
static const double C10 = 0.555570233019602;
|
||||
static const double C11 = 0.471396736825998;
|
||||
static const double C12 = 0.38268343236509;
|
||||
static const double C13 = 0.290284677254462;
|
||||
static const double C14 = 0.195090322016128;
|
||||
static const double C15 = 0.098017140329561;
|
||||
const double C1 = 0.995184726672197;
|
||||
const double C2 = 0.98078528040323;
|
||||
const double C3 = 0.956940335732209;
|
||||
const double C4 = 0.923879532511287;
|
||||
const double C5 = 0.881921264348355;
|
||||
const double C6 = 0.831469612302545;
|
||||
const double C7 = 0.773010453362737;
|
||||
const double C8 = 0.707106781186548;
|
||||
const double C9 = 0.634393284163646;
|
||||
const double C10 = 0.555570233019602;
|
||||
const double C11 = 0.471396736825998;
|
||||
const double C12 = 0.38268343236509;
|
||||
const double C13 = 0.290284677254462;
|
||||
const double C14 = 0.195090322016128;
|
||||
const double C15 = 0.098017140329561;
|
||||
|
||||
static void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
double step[16];
|
||||
double intermediate[16];
|
||||
double temp1, temp2;
|
||||
@@ -108,36 +112,36 @@ static void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
output[6] = step[1] - step[6];
|
||||
output[7] = step[0] - step[7];
|
||||
|
||||
temp1 = step[ 8]*C7;
|
||||
temp2 = step[15]*C9;
|
||||
temp1 = step[ 8] * C7;
|
||||
temp2 = step[15] * C9;
|
||||
output[ 8] = temp1 + temp2;
|
||||
|
||||
temp1 = step[ 9]*C11;
|
||||
temp2 = step[14]*C5;
|
||||
temp1 = step[ 9] * C11;
|
||||
temp2 = step[14] * C5;
|
||||
output[ 9] = temp1 - temp2;
|
||||
|
||||
temp1 = step[10]*C3;
|
||||
temp2 = step[13]*C13;
|
||||
temp1 = step[10] * C3;
|
||||
temp2 = step[13] * C13;
|
||||
output[10] = temp1 + temp2;
|
||||
|
||||
temp1 = step[11]*C15;
|
||||
temp2 = step[12]*C1;
|
||||
temp1 = step[11] * C15;
|
||||
temp2 = step[12] * C1;
|
||||
output[11] = temp1 - temp2;
|
||||
|
||||
temp1 = step[11]*C1;
|
||||
temp2 = step[12]*C15;
|
||||
temp1 = step[11] * C1;
|
||||
temp2 = step[12] * C15;
|
||||
output[12] = temp2 + temp1;
|
||||
|
||||
temp1 = step[10]*C13;
|
||||
temp2 = step[13]*C3;
|
||||
temp1 = step[10] * C13;
|
||||
temp2 = step[13] * C3;
|
||||
output[13] = temp2 - temp1;
|
||||
|
||||
temp1 = step[ 9]*C5;
|
||||
temp2 = step[14]*C11;
|
||||
temp1 = step[ 9] * C5;
|
||||
temp2 = step[14] * C11;
|
||||
output[14] = temp2 + temp1;
|
||||
|
||||
temp1 = step[ 8]*C9;
|
||||
temp2 = step[15]*C7;
|
||||
temp1 = step[ 8] * C9;
|
||||
temp2 = step[15] * C7;
|
||||
output[15] = temp2 - temp1;
|
||||
|
||||
// step 3
|
||||
@@ -146,20 +150,20 @@ static void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
step[ 2] = output[1] - output[2];
|
||||
step[ 3] = output[0] - output[3];
|
||||
|
||||
temp1 = output[4]*C14;
|
||||
temp2 = output[7]*C2;
|
||||
temp1 = output[4] * C14;
|
||||
temp2 = output[7] * C2;
|
||||
step[ 4] = temp1 + temp2;
|
||||
|
||||
temp1 = output[5]*C10;
|
||||
temp2 = output[6]*C6;
|
||||
temp1 = output[5] * C10;
|
||||
temp2 = output[6] * C6;
|
||||
step[ 5] = temp1 + temp2;
|
||||
|
||||
temp1 = output[5]*C6;
|
||||
temp2 = output[6]*C10;
|
||||
temp1 = output[5] * C6;
|
||||
temp2 = output[6] * C10;
|
||||
step[ 6] = temp2 - temp1;
|
||||
|
||||
temp1 = output[4]*C2;
|
||||
temp2 = output[7]*C14;
|
||||
temp1 = output[4] * C2;
|
||||
temp2 = output[7] * C14;
|
||||
step[ 7] = temp2 - temp1;
|
||||
|
||||
step[ 8] = output[ 8] + output[11];
|
||||
@@ -176,18 +180,18 @@ static void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
output[ 0] = (step[ 0] + step[ 1]);
|
||||
output[ 8] = (step[ 0] - step[ 1]);
|
||||
|
||||
temp1 = step[2]*C12;
|
||||
temp2 = step[3]*C4;
|
||||
temp1 = step[2] * C12;
|
||||
temp2 = step[3] * C4;
|
||||
temp1 = temp1 + temp2;
|
||||
output[ 4] = 2*(temp1*C8);
|
||||
output[ 4] = 2*(temp1 * C8);
|
||||
|
||||
temp1 = step[2]*C4;
|
||||
temp2 = step[3]*C12;
|
||||
temp1 = step[2] * C4;
|
||||
temp2 = step[3] * C12;
|
||||
temp1 = temp2 - temp1;
|
||||
output[12] = 2*(temp1*C8);
|
||||
output[12] = 2 * (temp1 * C8);
|
||||
|
||||
output[ 2] = 2*((step[4] + step[ 5])*C8);
|
||||
output[14] = 2*((step[7] - step[ 6])*C8);
|
||||
output[ 2] = 2 * ((step[4] + step[ 5]) * C8);
|
||||
output[14] = 2 * ((step[7] - step[ 6]) * C8);
|
||||
|
||||
temp1 = step[4] - step[5];
|
||||
temp2 = step[6] + step[7];
|
||||
@@ -197,17 +201,17 @@ static void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
intermediate[8] = step[8] + step[14];
|
||||
intermediate[9] = step[9] + step[15];
|
||||
|
||||
temp1 = intermediate[8]*C12;
|
||||
temp2 = intermediate[9]*C4;
|
||||
temp1 = intermediate[8] * C12;
|
||||
temp2 = intermediate[9] * C4;
|
||||
temp1 = temp1 - temp2;
|
||||
output[3] = 2*(temp1*C8);
|
||||
output[3] = 2 * (temp1 * C8);
|
||||
|
||||
temp1 = intermediate[8]*C4;
|
||||
temp2 = intermediate[9]*C12;
|
||||
temp1 = intermediate[8] * C4;
|
||||
temp2 = intermediate[9] * C12;
|
||||
temp1 = temp2 + temp1;
|
||||
output[13] = 2*(temp1*C8);
|
||||
output[13] = 2 * (temp1 * C8);
|
||||
|
||||
output[ 9] = 2*((step[10] + step[11])*C8);
|
||||
output[ 9] = 2 * ((step[10] + step[11]) * C8);
|
||||
|
||||
intermediate[11] = step[10] - step[11];
|
||||
intermediate[12] = step[12] + step[13];
|
||||
@@ -218,207 +222,300 @@ static void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
output[15] = (intermediate[11] + intermediate[12]);
|
||||
output[ 1] = -(intermediate[11] - intermediate[12]);
|
||||
|
||||
output[ 7] = 2*(intermediate[13]*C8);
|
||||
output[ 7] = 2 * (intermediate[13] * C8);
|
||||
|
||||
temp1 = intermediate[14]*C12;
|
||||
temp2 = intermediate[15]*C4;
|
||||
temp1 = intermediate[14] * C12;
|
||||
temp2 = intermediate[15] * C4;
|
||||
temp1 = temp1 - temp2;
|
||||
output[11] = -2*(temp1*C8);
|
||||
output[11] = -2 * (temp1 * C8);
|
||||
|
||||
temp1 = intermediate[14]*C4;
|
||||
temp2 = intermediate[15]*C12;
|
||||
temp1 = intermediate[14] * C4;
|
||||
temp2 = intermediate[15] * C12;
|
||||
temp1 = temp2 + temp1;
|
||||
output[ 5] = 2*(temp1*C8);
|
||||
output[ 5] = 2 * (temp1 * C8);
|
||||
}
|
||||
|
||||
static void reference_16x16_dct_1d(double in[16], double out[16]) {
|
||||
const double kPi = 3.141592653589793238462643383279502884;
|
||||
const double kInvSqrt2 = 0.707106781186547524400844362104;
|
||||
for (int k = 0; k < 16; k++) {
|
||||
out[k] = 0.0;
|
||||
for (int n = 0; n < 16; n++)
|
||||
out[k] += in[n]*cos(kPi*(2*n+1)*k/32.0);
|
||||
if (k == 0)
|
||||
out[k] = out[k]*kInvSqrt2;
|
||||
}
|
||||
}
|
||||
|
||||
void reference_16x16_dct_2d(int16_t input[16*16], double output[16*16]) {
|
||||
void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
|
||||
// First transform columns
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
double temp_in[16], temp_out[16];
|
||||
for (int j = 0; j < 16; ++j)
|
||||
temp_in[j] = input[j*16 + i];
|
||||
temp_in[j] = input[j * 16 + i];
|
||||
butterfly_16x16_dct_1d(temp_in, temp_out);
|
||||
for (int j = 0; j < 16; ++j)
|
||||
output[j*16 + i] = temp_out[j];
|
||||
output[j * 16 + i] = temp_out[j];
|
||||
}
|
||||
// Then transform rows
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
double temp_in[16], temp_out[16];
|
||||
for (int j = 0; j < 16; ++j)
|
||||
temp_in[j] = output[j + i*16];
|
||||
temp_in[j] = output[j + i * 16];
|
||||
butterfly_16x16_dct_1d(temp_in, temp_out);
|
||||
// Scale by some magic number
|
||||
for (int j = 0; j < 16; ++j)
|
||||
output[j + i*16] = temp_out[j]/2;
|
||||
output[j + i * 16] = temp_out[j]/2;
|
||||
}
|
||||
}
|
||||
|
||||
void fdct16x16(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
||||
int stride, int /*tx_type*/) {
|
||||
typedef void (*fdct_t)(int16_t *in, int16_t *out, int stride);
|
||||
typedef void (*idct_t)(int16_t *in, uint8_t *out, int stride);
|
||||
typedef void (*fht_t) (int16_t *in, int16_t *out, int stride, int tx_type);
|
||||
typedef void (*iht_t) (int16_t *in, uint8_t *dst, int stride, int tx_type);
|
||||
|
||||
void fdct16x16_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
|
||||
vp9_short_fdct16x16_c(in, out, stride);
|
||||
}
|
||||
void idct16x16_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
||||
int stride, int /*tx_type*/) {
|
||||
vp9_short_idct16x16_add_c(out, dst, stride >> 1);
|
||||
}
|
||||
void fht16x16(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
||||
int stride, int tx_type) {
|
||||
// FIXME(jingning): need to test both SSE2 and c
|
||||
#if HAVE_SSE2
|
||||
vp9_short_fht16x16_sse2(in, out, stride >> 1, tx_type);
|
||||
#else
|
||||
vp9_short_fht16x16_c(in, out, stride >> 1, tx_type);
|
||||
#endif
|
||||
}
|
||||
void iht16x16_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
vp9_short_iht16x16_add_c(out, dst, stride >> 1, tx_type);
|
||||
|
||||
void fht16x16_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
|
||||
vp9_short_fht16x16_c(in, out, stride, tx_type);
|
||||
}
|
||||
|
||||
class FwdTrans16x16Test : public ::testing::TestWithParam<int> {
|
||||
class Trans16x16TestBase {
|
||||
public:
|
||||
virtual ~FwdTrans16x16Test() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
tx_type_ = GetParam();
|
||||
if (tx_type_ == 0) {
|
||||
fwd_txfm = fdct16x16;
|
||||
inv_txfm = idct16x16_add;
|
||||
} else {
|
||||
fwd_txfm = fht16x16;
|
||||
inv_txfm = iht16x16_add;
|
||||
}
|
||||
}
|
||||
virtual ~Trans16x16TestBase() {}
|
||||
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
(*fwd_txfm)(in, out, dst, stride, tx_type);
|
||||
}
|
||||
void RunInvTxfm(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
(*inv_txfm)(in, out, dst, stride, tx_type);
|
||||
virtual void RunFwdTxfm(int16_t *in, int16_t *out, int stride) = 0;
|
||||
|
||||
virtual void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) = 0;
|
||||
|
||||
void RunAccuracyCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
uint32_t max_error = 0;
|
||||
int64_t total_error = 0;
|
||||
const int count_test_block = 10000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
|
||||
test_temp_block, pitch_));
|
||||
REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
|
||||
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
const uint32_t diff = dst[j] - src[j];
|
||||
const uint32_t error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_GE(1u, max_error)
|
||||
<< "Error: 16x16 FHT/IHT has an individual round trip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block , total_error)
|
||||
<< "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
|
||||
}
|
||||
|
||||
void RunCoeffCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
|
||||
fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
|
||||
REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
|
||||
|
||||
// The minimum quant value is 4.
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
EXPECT_EQ(output_block[j], output_ref_block[j]);
|
||||
}
|
||||
}
|
||||
|
||||
void RunMemCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
|
||||
}
|
||||
if (i == 0)
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_extreme_block[j] = 255;
|
||||
if (i == 1)
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_extreme_block[j] = -255;
|
||||
|
||||
fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
|
||||
REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
|
||||
output_block, pitch_));
|
||||
|
||||
// The minimum quant value is 4.
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
EXPECT_EQ(output_block[j], output_ref_block[j]);
|
||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
|
||||
<< "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RunInvAccuracyCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
double out_r[kNumCoeffs];
|
||||
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
in[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
reference_16x16_dct_2d(in, out_r);
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
coeff[j] = round(out_r[j]);
|
||||
|
||||
const int pitch = 32;
|
||||
REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch));
|
||||
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
const uint32_t diff = dst[j] - src[j];
|
||||
const uint32_t error = diff * diff;
|
||||
EXPECT_GE(1u, error)
|
||||
<< "Error: 16x16 IDCT has error " << error
|
||||
<< " at index " << j;
|
||||
}
|
||||
}
|
||||
}
|
||||
int pitch_;
|
||||
int tx_type_;
|
||||
void (*fwd_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
|
||||
void (*inv_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
|
||||
fht_t fwd_txfm_ref;
|
||||
};
|
||||
|
||||
TEST_P(FwdTrans16x16Test, AccuracyCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int max_error = 0;
|
||||
double total_error = 0;
|
||||
const int count_test_block = 10000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 256);
|
||||
class Trans16x16DCT : public Trans16x16TestBase,
|
||||
public PARAMS(fdct_t, idct_t, int) {
|
||||
public:
|
||||
virtual ~Trans16x16DCT() {}
|
||||
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
}
|
||||
virtual void SetUp() {
|
||||
fwd_txfm_ = GET_PARAM(0);
|
||||
inv_txfm_ = GET_PARAM(1);
|
||||
tx_type_ = GET_PARAM(2);
|
||||
pitch_ = 32;
|
||||
fwd_txfm_ref = fdct16x16_ref;
|
||||
}
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
const int pitch = 32;
|
||||
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||
RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
|
||||
fwd_txfm_(in, out, stride);
|
||||
}
|
||||
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
|
||||
inv_txfm_(out, dst, stride >> 1);
|
||||
}
|
||||
|
||||
EXPECT_GE(1, max_error)
|
||||
<< "Error: 16x16 FHT/IHT has an individual round trip error > 1";
|
||||
fdct_t fwd_txfm_;
|
||||
idct_t inv_txfm_;
|
||||
};
|
||||
|
||||
EXPECT_GE(count_test_block , total_error)
|
||||
<< "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
|
||||
TEST_P(Trans16x16DCT, AccuracyCheck) {
|
||||
RunAccuracyCheck();
|
||||
}
|
||||
|
||||
TEST_P(FwdTrans16x16Test, CoeffSizeCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_extreme_block, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 256);
|
||||
TEST_P(Trans16x16DCT, CoeffCheck) {
|
||||
RunCoeffCheck();
|
||||
}
|
||||
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
|
||||
}
|
||||
if (i == 0)
|
||||
for (int j = 0; j < 256; ++j)
|
||||
input_extreme_block[j] = 255;
|
||||
TEST_P(Trans16x16DCT, MemCheck) {
|
||||
RunMemCheck();
|
||||
}
|
||||
|
||||
const int pitch = 32;
|
||||
RunFwdTxfm(input_block, output_block, dst, pitch, tx_type_);
|
||||
RunFwdTxfm(input_extreme_block, output_extreme_block, dst, pitch, tx_type_);
|
||||
TEST_P(Trans16x16DCT, InvAccuracyCheck) {
|
||||
RunInvAccuracyCheck();
|
||||
}
|
||||
|
||||
// The minimum quant value is 4.
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
|
||||
<< "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
|
||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_extreme_block[j]))
|
||||
<< "Error: 16x16 FDCT extreme has coefficient larger "
|
||||
<< "than 4*DCT_MAX_VALUE";
|
||||
}
|
||||
class Trans16x16HT : public Trans16x16TestBase,
|
||||
public PARAMS(fht_t, iht_t, int) {
|
||||
public:
|
||||
virtual ~Trans16x16HT() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
fwd_txfm_ = GET_PARAM(0);
|
||||
inv_txfm_ = GET_PARAM(1);
|
||||
tx_type_ = GET_PARAM(2);
|
||||
pitch_ = 16;
|
||||
fwd_txfm_ref = fht16x16_ref;
|
||||
}
|
||||
}
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(VP9, FwdTrans16x16Test, ::testing::Range(0, 4));
|
||||
|
||||
TEST(VP9Idct16x16Test, AccuracyCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int16_t in[256], coeff[256];
|
||||
uint8_t dst[256], src[256];
|
||||
double out_r[256];
|
||||
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
}
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 256; ++j)
|
||||
in[j] = src[j] - dst[j];
|
||||
|
||||
reference_16x16_dct_2d(in, out_r);
|
||||
for (int j = 0; j < 256; j++)
|
||||
coeff[j] = round(out_r[j]);
|
||||
vp9_short_idct16x16_add_c(coeff, dst, 16);
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
EXPECT_GE(1, error)
|
||||
<< "Error: 16x16 IDCT has error " << error
|
||||
<< " at index " << j;
|
||||
}
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
|
||||
fwd_txfm_(in, out, stride, tx_type_);
|
||||
}
|
||||
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
|
||||
inv_txfm_(out, dst, stride, tx_type_);
|
||||
}
|
||||
|
||||
fht_t fwd_txfm_;
|
||||
iht_t inv_txfm_;
|
||||
};
|
||||
|
||||
TEST_P(Trans16x16HT, AccuracyCheck) {
|
||||
RunAccuracyCheck();
|
||||
}
|
||||
|
||||
TEST_P(Trans16x16HT, CoeffCheck) {
|
||||
RunCoeffCheck();
|
||||
}
|
||||
|
||||
TEST_P(Trans16x16HT, MemCheck) {
|
||||
RunMemCheck();
|
||||
}
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Trans16x16DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct16x16_c, &vp9_short_idct16x16_add_c, 0)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Trans16x16HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 0),
|
||||
make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 1),
|
||||
make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 2),
|
||||
make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 3)));
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans16x16DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct16x16_sse2, &vp9_short_idct16x16_add_c, 0)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans16x16HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 0),
|
||||
make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 1),
|
||||
make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 2),
|
||||
make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 3)));
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
@@ -13,15 +13,17 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
|
||||
extern "C" {
|
||||
#include "./vpx_config.h"
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch);
|
||||
void vp9_short_idct32x32_add_c(short *input, uint8_t *output, int pitch);
|
||||
}
|
||||
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
@@ -30,35 +32,15 @@ namespace {
|
||||
#ifdef _MSC_VER
|
||||
static int round(double x) {
|
||||
if (x < 0)
|
||||
return (int)ceil(x - 0.5);
|
||||
return static_cast<int>(ceil(x - 0.5));
|
||||
else
|
||||
return (int)floor(x + 0.5);
|
||||
return static_cast<int>(floor(x + 0.5));
|
||||
}
|
||||
#endif
|
||||
|
||||
static const double kPi = 3.141592653589793238462643383279502884;
|
||||
static void reference2_32x32_idct_2d(double *input, double *output) {
|
||||
double x;
|
||||
for (int l = 0; l < 32; ++l) {
|
||||
for (int k = 0; k < 32; ++k) {
|
||||
double s = 0;
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
for (int j = 0; j < 32; ++j) {
|
||||
x = cos(kPi * j * (l + 0.5) / 32.0) *
|
||||
cos(kPi * i * (k + 0.5) / 32.0) * input[i * 32 + j] / 1024;
|
||||
if (i != 0)
|
||||
x *= sqrt(2.0);
|
||||
if (j != 0)
|
||||
x *= sqrt(2.0);
|
||||
s += x;
|
||||
}
|
||||
}
|
||||
output[k * 32 + l] = s / 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void reference_32x32_dct_1d(double in[32], double out[32], int stride) {
|
||||
const int kNumCoeffs = 1024;
|
||||
const double kPi = 3.141592653589793238462643383279502884;
|
||||
void reference_32x32_dct_1d(const double in[32], double out[32], int stride) {
|
||||
const double kInvSqrt2 = 0.707106781186547524400844362104;
|
||||
for (int k = 0; k < 32; k++) {
|
||||
out[k] = 0.0;
|
||||
@@ -69,7 +51,8 @@ static void reference_32x32_dct_1d(double in[32], double out[32], int stride) {
|
||||
}
|
||||
}
|
||||
|
||||
static void reference_32x32_dct_2d(int16_t input[32*32], double output[32*32]) {
|
||||
void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
|
||||
double output[kNumCoeffs]) {
|
||||
// First transform columns
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
double temp_in[32], temp_out[32];
|
||||
@@ -91,27 +74,165 @@ static void reference_32x32_dct_2d(int16_t input[32*32], double output[32*32]) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(VP9Idct32x32Test, AccuracyCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int16_t in[1024], coeff[1024];
|
||||
uint8_t dst[1024], src[1024];
|
||||
double out_r[1024];
|
||||
typedef void (*fwd_txfm_t)(int16_t *in, int16_t *out, int stride);
|
||||
typedef void (*inv_txfm_t)(int16_t *in, uint8_t *dst, int stride);
|
||||
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
class Trans32x32Test : public PARAMS(fwd_txfm_t, inv_txfm_t, int) {
|
||||
public:
|
||||
virtual ~Trans32x32Test() {}
|
||||
virtual void SetUp() {
|
||||
fwd_txfm_ = GET_PARAM(0);
|
||||
inv_txfm_ = GET_PARAM(1);
|
||||
version_ = GET_PARAM(2); // 0: high precision forward transform
|
||||
// 1: low precision version for rd loop
|
||||
}
|
||||
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
protected:
|
||||
int version_;
|
||||
fwd_txfm_t fwd_txfm_;
|
||||
inv_txfm_t inv_txfm_;
|
||||
};
|
||||
|
||||
TEST_P(Trans32x32Test, AccuracyCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
uint32_t max_error = 0;
|
||||
int64_t total_error = 0;
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
const int pitch = 64;
|
||||
REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, pitch));
|
||||
REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
|
||||
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
const uint32_t diff = dst[j] - src[j];
|
||||
const uint32_t error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
}
|
||||
|
||||
if (version_ == 1) {
|
||||
max_error /= 2;
|
||||
total_error /= 45;
|
||||
}
|
||||
|
||||
EXPECT_GE(1u, max_error)
|
||||
<< "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block, total_error)
|
||||
<< "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
|
||||
}
|
||||
|
||||
TEST_P(Trans32x32Test, CoeffCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
|
||||
const int pitch = 64;
|
||||
vp9_short_fdct32x32_c(input_block, output_ref_block, pitch);
|
||||
REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, pitch));
|
||||
|
||||
if (version_ == 0) {
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
EXPECT_EQ(output_block[j], output_ref_block[j])
|
||||
<< "Error: 32x32 FDCT versions have mismatched coefficients";
|
||||
} else {
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
|
||||
<< "Error: 32x32 FDCT rd has mismatched coefficients";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(Trans32x32Test, MemCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 2000;
|
||||
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 1024; ++j)
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
input_extreme_block[j] = rnd.Rand8() & 1 ? 255 : -255;
|
||||
}
|
||||
if (i == 0)
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_extreme_block[j] = 255;
|
||||
if (i == 1)
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_extreme_block[j] = -255;
|
||||
|
||||
const int pitch = 64;
|
||||
vp9_short_fdct32x32_c(input_extreme_block, output_ref_block, pitch);
|
||||
REGISTER_STATE_CHECK(fwd_txfm_(input_extreme_block, output_block, pitch));
|
||||
|
||||
// The minimum quant value is 4.
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
if (version_ == 0) {
|
||||
EXPECT_EQ(output_block[j], output_ref_block[j])
|
||||
<< "Error: 32x32 FDCT versions have mismatched coefficients";
|
||||
} else {
|
||||
EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
|
||||
<< "Error: 32x32 FDCT rd has mismatched coefficients";
|
||||
}
|
||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_ref_block[j]))
|
||||
<< "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
|
||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
|
||||
<< "Error: 32x32 FDCT has coefficient larger than "
|
||||
<< "4*DCT_MAX_VALUE";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(Trans32x32Test, InverseAccuracy) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
double out_r[kNumCoeffs];
|
||||
|
||||
// Initialize a test block with input range [-255, 255]
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
in[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
reference_32x32_dct_2d(in, out_r);
|
||||
for (int j = 0; j < 1024; j++)
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
coeff[j] = round(out_r[j]);
|
||||
vp9_short_idct32x32_add_c(coeff, dst, 32);
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
EXPECT_GE(1, error)
|
||||
@@ -121,72 +242,21 @@ TEST(VP9Idct32x32Test, AccuracyCheck) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(VP9Fdct32x32Test, AccuracyCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
unsigned int max_error = 0;
|
||||
int64_t total_error = 0;
|
||||
const int count_test_block = 1000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int16_t test_input_block[1024];
|
||||
int16_t test_temp_block[1024];
|
||||
uint8_t dst[1024], src[1024];
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
}
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 1024; ++j)
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Trans32x32Test,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct32x32_c, &vp9_short_idct32x32_add_c, 0),
|
||||
make_tuple(&vp9_short_fdct32x32_rd_c, &vp9_short_idct32x32_add_c, 1)));
|
||||
|
||||
const int pitch = 64;
|
||||
vp9_short_fdct32x32_c(test_input_block, test_temp_block, pitch);
|
||||
vp9_short_idct32x32_add_c(test_temp_block, dst, 32);
|
||||
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
const unsigned diff = dst[j] - src[j];
|
||||
const unsigned error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_GE(1u, max_error)
|
||||
<< "Error: 32x32 FDCT/IDCT has an individual roundtrip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block, total_error)
|
||||
<< "Error: 32x32 FDCT/IDCT has average roundtrip error > 1 per block";
|
||||
}
|
||||
|
||||
TEST(VP9Fdct32x32Test, CoeffSizeCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int16_t input_block[1024], input_extreme_block[1024];
|
||||
int16_t output_block[1024], output_extreme_block[1024];
|
||||
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
|
||||
}
|
||||
if (i == 0)
|
||||
for (int j = 0; j < 1024; ++j)
|
||||
input_extreme_block[j] = 255;
|
||||
|
||||
const int pitch = 64;
|
||||
vp9_short_fdct32x32_c(input_block, output_block, pitch);
|
||||
vp9_short_fdct32x32_c(input_extreme_block, output_extreme_block, pitch);
|
||||
|
||||
// The minimum quant value is 4.
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
EXPECT_GE(4*DCT_MAX_VALUE, abs(output_block[j]))
|
||||
<< "Error: 32x32 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
|
||||
EXPECT_GE(4*DCT_MAX_VALUE, abs(output_extreme_block[j]))
|
||||
<< "Error: 32x32 FDCT extreme has coefficient larger than "
|
||||
"4*DCT_MAX_VALUE";
|
||||
}
|
||||
}
|
||||
}
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans32x32Test,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct32x32_sse2,
|
||||
&vp9_short_idct32x32_add_sse2, 0),
|
||||
make_tuple(&vp9_short_fdct32x32_rd_sse2,
|
||||
&vp9_short_idct32x32_add_sse2, 1)));
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
#define TEST_DECODE_TEST_DRIVER_H_
|
||||
#include <cstring>
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx/vpx_decoder.h"
|
||||
|
||||
namespace libvpx_test {
|
||||
@@ -36,9 +36,8 @@ class DxDataIterator {
|
||||
};
|
||||
|
||||
// Provides a simplified interface to manage one video decoding.
|
||||
//
|
||||
// TODO: similar to Encoder class, the exact services should be
|
||||
// added as more tests are added.
|
||||
// Similar to Encoder class, the exact services should be added
|
||||
// as more tests are added.
|
||||
class Decoder {
|
||||
public:
|
||||
Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "test/codec_factory.h"
|
||||
#include "test/encode_test_driver.h"
|
||||
#include "test/decode_test_driver.h"
|
||||
@@ -114,19 +114,19 @@ static bool compare_img(const vpx_image_t *img1,
|
||||
const unsigned int height_y = img1->d_h;
|
||||
unsigned int i;
|
||||
for (i = 0; i < height_y; ++i)
|
||||
match = ( memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
|
||||
img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
|
||||
width_y) == 0) && match;
|
||||
match = (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
|
||||
img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
|
||||
width_y) == 0) && match;
|
||||
const unsigned int width_uv = (img1->d_w + 1) >> 1;
|
||||
const unsigned int height_uv = (img1->d_h + 1) >> 1;
|
||||
for (i = 0; i < height_uv; ++i)
|
||||
match = ( memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
|
||||
img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
|
||||
width_uv) == 0) && match;
|
||||
match = (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
|
||||
img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
|
||||
width_uv) == 0) && match;
|
||||
for (i = 0; i < height_uv; ++i)
|
||||
match = ( memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
|
||||
img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
|
||||
width_uv) == 0) && match;
|
||||
match = (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
|
||||
img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
|
||||
width_uv) == 0) && match;
|
||||
return match;
|
||||
}
|
||||
|
||||
@@ -158,7 +158,7 @@ void EncoderTest::RunLoop(VideoSource *video) {
|
||||
Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
|
||||
bool again;
|
||||
for (again = true, video->Begin(); again; video->Next()) {
|
||||
again = video->img() != NULL;
|
||||
again = (video->img() != NULL);
|
||||
|
||||
PreEncodeFrameHook(video);
|
||||
PreEncodeFrameHook(video, encoder);
|
||||
|
||||
@@ -62,7 +62,7 @@ class ErrorResilienceTest : public ::libvpx_test::EncoderTest,
|
||||
if (droppable_nframes_ > 0 &&
|
||||
(cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
|
||||
for (unsigned int i = 0; i < droppable_nframes_; ++i) {
|
||||
if (droppable_frames_[i] == nframes_) {
|
||||
if (droppable_frames_[i] == video->frame()) {
|
||||
std::cout << " Encoding droppable frame: "
|
||||
<< droppable_frames_[i] << "\n";
|
||||
frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST |
|
||||
@@ -148,7 +148,7 @@ TEST_P(ErrorResilienceTest, OnVersusOff) {
|
||||
const vpx_rational timebase = { 33333333, 1000000000 };
|
||||
cfg_.g_timebase = timebase;
|
||||
cfg_.rc_target_bitrate = 2000;
|
||||
cfg_.g_lag_in_frames = 25;
|
||||
cfg_.g_lag_in_frames = 10;
|
||||
|
||||
init_flags_ = VPX_CODEC_USE_PSNR;
|
||||
|
||||
@@ -179,6 +179,9 @@ TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) {
|
||||
const vpx_rational timebase = { 33333333, 1000000000 };
|
||||
cfg_.g_timebase = timebase;
|
||||
cfg_.rc_target_bitrate = 500;
|
||||
// FIXME(debargha): Fix this to work for any lag.
|
||||
// Currently this test only works for lag = 0
|
||||
cfg_.g_lag_in_frames = 0;
|
||||
|
||||
init_flags_ = VPX_CODEC_USE_PSNR;
|
||||
|
||||
|
||||
@@ -15,10 +15,10 @@
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
extern "C" {
|
||||
#include "vp9_rtcd.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
}
|
||||
|
||||
#include "acm_random.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
@@ -136,7 +136,7 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
|
||||
int max_error = 0;
|
||||
double total_error = 0;
|
||||
int total_error = 0;
|
||||
const int count_test_block = 1000000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
|
||||
@@ -156,7 +156,7 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
|
||||
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||
|
||||
for (int j = 0; j < 16; ++j) {
|
||||
if(test_temp_block[j] > 0) {
|
||||
if (test_temp_block[j] > 0) {
|
||||
test_temp_block[j] += 2;
|
||||
test_temp_block[j] /= 4;
|
||||
test_temp_block[j] *= 4;
|
||||
|
||||
@@ -13,14 +13,16 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
extern "C" {
|
||||
#include "vp9_rtcd.h"
|
||||
void vp9_short_idct8x8_add_c(short *input, uint8_t *output, int pitch);
|
||||
#include "./vp9_rtcd.h"
|
||||
void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *output, int pitch);
|
||||
}
|
||||
|
||||
#include "acm_random.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
@@ -62,6 +64,7 @@ class FwdTrans8x8Test : public ::testing::TestWithParam<int> {
|
||||
inv_txfm = iht8x8_add;
|
||||
}
|
||||
}
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
@@ -92,8 +95,9 @@ TEST_P(FwdTrans8x8Test, SignBiasCheck) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
|
||||
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type_);
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_output_block,
|
||||
NULL, pitch, tx_type_));
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_output_block[j] < 0)
|
||||
@@ -121,8 +125,9 @@ TEST_P(FwdTrans8x8Test, SignBiasCheck) {
|
||||
// Initialize a test block with input range [-15, 15].
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
|
||||
|
||||
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type_);
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_output_block,
|
||||
NULL, pitch, tx_type_));
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_output_block[j] < 0)
|
||||
@@ -148,7 +153,7 @@ TEST_P(FwdTrans8x8Test, SignBiasCheck) {
|
||||
TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int max_error = 0;
|
||||
double total_error = 0;
|
||||
int total_error = 0;
|
||||
const int count_test_block = 100000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
@@ -165,9 +170,11 @@ TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
|
||||
const int pitch = 16;
|
||||
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||
for (int j = 0; j < 64; ++j){
|
||||
if(test_temp_block[j] > 0) {
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_temp_block,
|
||||
dst, pitch, tx_type_));
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_temp_block[j] > 0) {
|
||||
test_temp_block[j] += 2;
|
||||
test_temp_block[j] /= 4;
|
||||
test_temp_block[j] *= 4;
|
||||
@@ -177,7 +184,9 @@ TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
|
||||
test_temp_block[j] *= 4;
|
||||
}
|
||||
}
|
||||
RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||
REGISTER_STATE_CHECK(
|
||||
RunInvTxfm(test_input_block, test_temp_block,
|
||||
dst, pitch, tx_type_));
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
@@ -199,7 +208,7 @@ TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
|
||||
TEST_P(FwdTrans8x8Test, ExtremalCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int max_error = 0;
|
||||
double total_error = 0;
|
||||
int total_error = 0;
|
||||
const int count_test_block = 100000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
@@ -216,8 +225,12 @@ TEST_P(FwdTrans8x8Test, ExtremalCheck) {
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
|
||||
const int pitch = 16;
|
||||
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||
RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_temp_block,
|
||||
dst, pitch, tx_type_));
|
||||
REGISTER_STATE_CHECK(
|
||||
RunInvTxfm(test_input_block, test_temp_block,
|
||||
dst, pitch, tx_type_));
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#define TEST_I420_VIDEO_SOURCE_H_
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
||||
#include "test/video_source.h"
|
||||
|
||||
@@ -34,7 +35,6 @@ class I420VideoSource : public VideoSource {
|
||||
height_(0),
|
||||
framerate_numerator_(rate_numerator),
|
||||
framerate_denominator_(rate_denominator) {
|
||||
|
||||
// This initializes raw_sz_, width_, height_ and allocates an img.
|
||||
SetSize(width, height);
|
||||
}
|
||||
|
||||
@@ -15,10 +15,10 @@
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
extern "C" {
|
||||
#include "vp9_rtcd.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
}
|
||||
|
||||
#include "acm_random.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
@@ -27,10 +27,10 @@ namespace {
|
||||
|
||||
#ifdef _MSC_VER
|
||||
static int round(double x) {
|
||||
if(x < 0)
|
||||
return (int)ceil(x - 0.5);
|
||||
if (x < 0)
|
||||
return static_cast<int>(ceil(x - 0.5));
|
||||
else
|
||||
return (int)floor(x + 0.5);
|
||||
return static_cast<int>(floor(x + 0.5));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -16,7 +16,9 @@ extern "C" {
|
||||
#include "test/register_state_check.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr,
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
typedef void (*idct_fn_t)(int16_t *input, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride);
|
||||
namespace {
|
||||
@@ -34,7 +36,7 @@ class IDCTTest : public ::testing::TestWithParam<idct_fn_t> {
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
idct_fn_t UUT;
|
||||
short input[16];
|
||||
int16_t input[16];
|
||||
unsigned char output[256];
|
||||
unsigned char predict[256];
|
||||
};
|
||||
|
||||
@@ -15,8 +15,8 @@
|
||||
#include "test/register_state_check.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
extern "C" {
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
}
|
||||
@@ -106,9 +106,9 @@ class IntraPredBase {
|
||||
for (int y = 0; y < block_size_; y++)
|
||||
sum += data_ptr_[p][y * stride_ - 1];
|
||||
expected = (sum + (1 << (shift - 1))) >> shift;
|
||||
} else
|
||||
} else {
|
||||
expected = 0x80;
|
||||
|
||||
}
|
||||
// check that all subsequent lines are equal to the first
|
||||
for (int y = 1; y < block_size_; ++y)
|
||||
ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_],
|
||||
|
||||
@@ -28,7 +28,7 @@ static unsigned int MemGetLe32(const uint8_t *mem) {
|
||||
// so that we can do actual file decodes.
|
||||
class IVFVideoSource : public CompressedVideoSource {
|
||||
public:
|
||||
IVFVideoSource(const std::string &file_name)
|
||||
explicit IVFVideoSource(const std::string &file_name)
|
||||
: file_name_(file_name),
|
||||
input_file_(NULL),
|
||||
compressed_frame_buf_(NULL),
|
||||
|
||||
@@ -132,7 +132,6 @@ TEST_P(KeyframeTest, TestAutoKeyframe) {
|
||||
// Verify that keyframes match the file keyframes in the file.
|
||||
for (std::vector<vpx_codec_pts_t>::const_iterator iter = kf_pts_list_.begin();
|
||||
iter != kf_pts_list_.end(); ++iter) {
|
||||
|
||||
if (deadline_ == VPX_DL_REALTIME && *iter > 0)
|
||||
EXPECT_EQ(0, (*iter - 1) % 30) << "Unexpected keyframe at frame "
|
||||
<< *iter;
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef LIBVPX_TEST_MD5_HELPER_H_
|
||||
#define LIBVPX_TEST_MD5_HELPER_H_
|
||||
#ifndef TEST_MD5_HELPER_H_
|
||||
#define TEST_MD5_HELPER_H_
|
||||
|
||||
extern "C" {
|
||||
#include "./md5_utils.h"
|
||||
@@ -25,9 +25,15 @@ class MD5 {
|
||||
|
||||
void Add(const vpx_image_t *img) {
|
||||
for (int plane = 0; plane < 3; ++plane) {
|
||||
uint8_t *buf = img->planes[plane];
|
||||
const int h = plane ? (img->d_h + 1) >> 1 : img->d_h;
|
||||
const int w = plane ? (img->d_w + 1) >> 1 : img->d_w;
|
||||
const uint8_t *buf = img->planes[plane];
|
||||
// Calculate the width and height to do the md5 check. For the chroma
|
||||
// plane, we never want to round down and thus skip a pixel so if
|
||||
// we are shifting by 1 (chroma_shift) we add 1 before doing the shift.
|
||||
// This works only for chroma_shift of 0 and 1.
|
||||
const int h = plane ? (img->d_h + img->y_chroma_shift) >>
|
||||
img->y_chroma_shift : img->d_h;
|
||||
const int w = plane ? (img->d_w + img->x_chroma_shift) >>
|
||||
img->x_chroma_shift : img->d_w;
|
||||
|
||||
for (int y = 0; y < h; ++y) {
|
||||
MD5Update(&md5_, buf, w);
|
||||
@@ -61,4 +67,4 @@ class MD5 {
|
||||
|
||||
} // namespace libvpx_test
|
||||
|
||||
#endif // LIBVPX_TEST_MD5_HELPER_H_
|
||||
#endif // TEST_MD5_HELPER_H_
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
#include "test/register_state_check.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
extern "C" {
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
}
|
||||
@@ -63,7 +63,8 @@ TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) {
|
||||
// Pointers to top-left pixel of block in the input and output images.
|
||||
uint8_t *const src_image_ptr = src_image + (input_stride << 1);
|
||||
uint8_t *const dst_image_ptr = dst_image + 8;
|
||||
uint8_t *const flimits = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
|
||||
uint8_t *const flimits =
|
||||
reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
|
||||
(void)vpx_memset(flimits, 255, block_width);
|
||||
|
||||
// Initialize pixels in the input:
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef LIBVPX_TEST_REGISTER_STATE_CHECK_H_
|
||||
#define LIBVPX_TEST_REGISTER_STATE_CHECK_H_
|
||||
#ifndef TEST_REGISTER_STATE_CHECK_H_
|
||||
#define TEST_REGISTER_STATE_CHECK_H_
|
||||
|
||||
#ifdef _WIN64
|
||||
|
||||
@@ -92,4 +92,4 @@ class RegisterStateCheck {};
|
||||
|
||||
#endif // _WIN64
|
||||
|
||||
#endif // LIBVPX_TEST_REGISTER_STATE_CHECK_H_
|
||||
#endif // TEST_REGISTER_STATE_CHECK_H_
|
||||
|
||||
@@ -16,8 +16,68 @@
|
||||
#include "test/video_source.h"
|
||||
#include "test/util.h"
|
||||
|
||||
// Enable(1) or Disable(0) writing of the compressed bitstream.
|
||||
#define WRITE_COMPRESSED_STREAM 0
|
||||
|
||||
namespace {
|
||||
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
static void mem_put_le16(char *const mem, const unsigned int val) {
|
||||
mem[0] = val;
|
||||
mem[1] = val >> 8;
|
||||
}
|
||||
|
||||
static void mem_put_le32(char *const mem, const unsigned int val) {
|
||||
mem[0] = val;
|
||||
mem[1] = val >> 8;
|
||||
mem[2] = val >> 16;
|
||||
mem[3] = val >> 24;
|
||||
}
|
||||
|
||||
static void write_ivf_file_header(const vpx_codec_enc_cfg_t *const cfg,
|
||||
int frame_cnt, FILE *const outfile) {
|
||||
char header[32];
|
||||
|
||||
header[0] = 'D';
|
||||
header[1] = 'K';
|
||||
header[2] = 'I';
|
||||
header[3] = 'F';
|
||||
mem_put_le16(header + 4, 0); /* version */
|
||||
mem_put_le16(header + 6, 32); /* headersize */
|
||||
mem_put_le32(header + 8, 0x30395056); /* fourcc (vp9) */
|
||||
mem_put_le16(header + 12, cfg->g_w); /* width */
|
||||
mem_put_le16(header + 14, cfg->g_h); /* height */
|
||||
mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
|
||||
mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
|
||||
mem_put_le32(header + 24, frame_cnt); /* length */
|
||||
mem_put_le32(header + 28, 0); /* unused */
|
||||
|
||||
(void)fwrite(header, 1, 32, outfile);
|
||||
}
|
||||
|
||||
static void write_ivf_frame_size(FILE *const outfile, const size_t size) {
|
||||
char header[4];
|
||||
mem_put_le32(header, static_cast<unsigned int>(size));
|
||||
(void)fwrite(header, 1, 4, outfile);
|
||||
}
|
||||
|
||||
static void write_ivf_frame_header(const vpx_codec_cx_pkt_t *const pkt,
|
||||
FILE *const outfile) {
|
||||
char header[12];
|
||||
vpx_codec_pts_t pts;
|
||||
|
||||
if (pkt->kind != VPX_CODEC_CX_FRAME_PKT)
|
||||
return;
|
||||
|
||||
pts = pkt->data.frame.pts;
|
||||
mem_put_le32(header, static_cast<unsigned int>(pkt->data.frame.sz));
|
||||
mem_put_le32(header + 4, pts & 0xFFFFFFFF);
|
||||
mem_put_le32(header + 8, pts >> 32);
|
||||
|
||||
(void)fwrite(header, 1, 12, outfile);
|
||||
}
|
||||
#endif // WRITE_COMPRESSED_STREAM
|
||||
|
||||
const unsigned int kInitialWidth = 320;
|
||||
const unsigned int kInitialHeight = 240;
|
||||
|
||||
@@ -42,6 +102,8 @@ class ResizingVideoSource : public ::libvpx_test::DummyVideoSource {
|
||||
limit_ = 60;
|
||||
}
|
||||
|
||||
virtual ~ResizingVideoSource() {}
|
||||
|
||||
protected:
|
||||
virtual void Next() {
|
||||
++frame_;
|
||||
@@ -56,13 +118,15 @@ class ResizeTest : public ::libvpx_test::EncoderTest,
|
||||
protected:
|
||||
ResizeTest() : EncoderTest(GET_PARAM(0)) {}
|
||||
|
||||
virtual ~ResizeTest() {}
|
||||
|
||||
struct FrameInfo {
|
||||
FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
|
||||
: pts(_pts), w(_w), h(_h) {}
|
||||
|
||||
vpx_codec_pts_t pts;
|
||||
unsigned int w;
|
||||
unsigned int h;
|
||||
unsigned int w;
|
||||
unsigned int h;
|
||||
};
|
||||
|
||||
virtual void SetUp() {
|
||||
@@ -95,17 +159,47 @@ TEST_P(ResizeTest, TestExternalResizeWorks) {
|
||||
}
|
||||
}
|
||||
|
||||
const unsigned int kStepDownFrame = 3;
|
||||
const unsigned int kStepUpFrame = 6;
|
||||
|
||||
class ResizeInternalTest : public ResizeTest {
|
||||
protected:
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
ResizeInternalTest()
|
||||
: ResizeTest(),
|
||||
frame0_psnr_(0.0),
|
||||
outfile_(NULL),
|
||||
out_frames_(0) {}
|
||||
#else
|
||||
ResizeInternalTest() : ResizeTest(), frame0_psnr_(0.0) {}
|
||||
#endif
|
||||
|
||||
virtual ~ResizeInternalTest() {}
|
||||
|
||||
virtual void BeginPassHook(unsigned int /*pass*/) {
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
outfile_ = fopen("vp90-2-05-resize.ivf", "wb");
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual void EndPassHook() {
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
if (outfile_) {
|
||||
if (!fseek(outfile_, 0, SEEK_SET))
|
||||
write_ivf_file_header(&cfg_, out_frames_, outfile_);
|
||||
fclose(outfile_);
|
||||
outfile_ = NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
|
||||
libvpx_test::Encoder *encoder) {
|
||||
if (video->frame() == 3) {
|
||||
if (video->frame() == kStepDownFrame) {
|
||||
struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE};
|
||||
encoder->Control(VP8E_SET_SCALEMODE, &mode);
|
||||
}
|
||||
if (video->frame() == 6) {
|
||||
if (video->frame() == kStepUpFrame) {
|
||||
struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
|
||||
encoder->Control(VP8E_SET_SCALEMODE, &mode);
|
||||
}
|
||||
@@ -117,21 +211,46 @@ class ResizeInternalTest : public ResizeTest {
|
||||
EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 1.0);
|
||||
}
|
||||
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
++out_frames_;
|
||||
|
||||
// Write initial file header if first frame.
|
||||
if (pkt->data.frame.pts == 0)
|
||||
write_ivf_file_header(&cfg_, 0, outfile_);
|
||||
|
||||
// Write frame header and data.
|
||||
write_ivf_frame_header(pkt, outfile_);
|
||||
(void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_);
|
||||
#endif
|
||||
}
|
||||
|
||||
double frame0_psnr_;
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
FILE *outfile_;
|
||||
unsigned int out_frames_;
|
||||
#endif
|
||||
};
|
||||
|
||||
TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
|
||||
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
|
||||
30, 1, 0, 10);
|
||||
init_flags_ = VPX_CODEC_USE_PSNR;
|
||||
|
||||
// q picked such that initial keyframe on this clip is ~30dB PSNR
|
||||
cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
|
||||
|
||||
// If the number of frames being encoded is smaller than g_lag_in_frames
|
||||
// the encoded frame is unavailable using the current API. Comparing
|
||||
// frames to detect mismatch would then not be possible. Set
|
||||
// g_lag_in_frames = 0 to get around this.
|
||||
cfg_.g_lag_in_frames = 0;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
|
||||
for (std::vector<FrameInfo>::iterator info = frame_info_list_.begin();
|
||||
info != frame_info_list_.end(); ++info) {
|
||||
const vpx_codec_pts_t pts = info->pts;
|
||||
if (pts >= 3 && pts < 6) {
|
||||
if (pts >= kStepDownFrame && pts < kStepUpFrame) {
|
||||
ASSERT_EQ(282U, info->w) << "Frame " << pts << " had unexpected width";
|
||||
ASSERT_EQ(173U, info->h) << "Frame " << pts << " had unexpected height";
|
||||
} else {
|
||||
|
||||
@@ -17,7 +17,6 @@ extern "C" {
|
||||
#include "./vpx_config.h"
|
||||
#if CONFIG_VP8_ENCODER
|
||||
#include "./vp8_rtcd.h"
|
||||
//#include "vp8/common/blockd.h"
|
||||
#endif
|
||||
#if CONFIG_VP9_ENCODER
|
||||
#include "./vp9_rtcd.h"
|
||||
|
||||
@@ -17,15 +17,19 @@
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
extern "C" {
|
||||
#include "vp8/encoder/onyx_int.h"
|
||||
}
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
namespace {
|
||||
|
||||
TEST(Vp8RoiMapTest, ParameterCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int delta_q[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
|
||||
int delta_lf[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
|
||||
unsigned int threshold[MAX_MB_SEGMENTS] = { 0, 100, 200, 300 };
|
||||
@@ -121,10 +125,10 @@ TEST(Vp8RoiMapTest, ParameterCheck) {
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
int rand_deltas[4];
|
||||
int deltas_valid;
|
||||
rand_deltas[0] = (rand() % 160) - 80;
|
||||
rand_deltas[1] = (rand() % 160) - 80;
|
||||
rand_deltas[2] = (rand() % 160) - 80;
|
||||
rand_deltas[3] = (rand() % 160) - 80;
|
||||
rand_deltas[0] = rnd(160) - 80;
|
||||
rand_deltas[1] = rnd(160) - 80;
|
||||
rand_deltas[2] = rnd(160) - 80;
|
||||
rand_deltas[3] = rnd(160) - 80;
|
||||
|
||||
deltas_valid = ((abs(rand_deltas[0]) <= 63) &&
|
||||
(abs(rand_deltas[1]) <= 63) &&
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
extern "C" {
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vp8/encoder/block.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
@@ -51,7 +51,7 @@ TEST_P(SubtractBlockTest, SimpleSubtract) {
|
||||
bd.predictor = reinterpret_cast<unsigned char*>(
|
||||
vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*bd.predictor)));
|
||||
|
||||
for(int i = 0; kSrcStride[i] > 0; ++i) {
|
||||
for (int i = 0; kSrcStride[i] > 0; ++i) {
|
||||
// start at block0
|
||||
be.src = 0;
|
||||
be.base_src = &source;
|
||||
|
||||
@@ -520,3 +520,12 @@ d17bc08eedfc60c4c23d576a6c964a21bf854d1f vp90-2-03-size-226x202.webm
|
||||
83c6d8f2969b759e10e5c6542baca1265c874c29 vp90-2-03-size-226x224.webm.md5
|
||||
fe0af2ee47b1e5f6a66db369e2d7e9d870b38dce vp90-2-03-size-226x226.webm
|
||||
94ad19b8b699cea105e2ff18f0df2afd7242bcf7 vp90-2-03-size-226x226.webm.md5
|
||||
b6524e4084d15b5d0caaa3d3d1368db30cbee69c vp90-2-03-deltaq.webm
|
||||
65f45ec9a55537aac76104818278e0978f94a678 vp90-2-03-deltaq.webm.md5
|
||||
4dbb87494c7f565ffc266c98d17d0d8c7a5c5aba vp90-2-05-resize.ivf
|
||||
7f6d8879336239a43dbb6c9f13178cb11cf7ed09 vp90-2-05-resize.ivf.md5
|
||||
bf61ddc1f716eba58d4c9837d4e91031d9ce4ffe vp90-2-06-bilinear.webm
|
||||
f6235f937552e11d8eb331ec55da6b3aa596b9ac vp90-2-06-bilinear.webm.md5
|
||||
495256cfd123fe777b2c0406862ed8468a1f4677 vp91-2-04-yv444.webm
|
||||
65e3a7ffef61ab340d9140f335ecc49125970c2c vp91-2-04-yv444.webm.md5
|
||||
|
||||
|
||||
10
test/test.mk
10
test/test.mk
@@ -24,7 +24,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += resize_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc
|
||||
|
||||
@@ -629,3 +629,11 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include <string>
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
extern "C" {
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#include "vpx_ports/x86.h"
|
||||
@@ -48,7 +48,9 @@ int main(int argc, char **argv) {
|
||||
#endif
|
||||
|
||||
#if !CONFIG_SHARED
|
||||
/* Shared library builds don't support whitebox tests that exercise internal symbols. */
|
||||
// Shared library builds don't support whitebox tests
|
||||
// that exercise internal symbols.
|
||||
|
||||
#if CONFIG_VP8
|
||||
vp8_rtcd();
|
||||
#endif
|
||||
|
||||
@@ -159,7 +159,11 @@ const char *kVP9TestVectors[] = {
|
||||
"vp90-2-03-size-226x198.webm", "vp90-2-03-size-226x200.webm",
|
||||
"vp90-2-03-size-226x202.webm", "vp90-2-03-size-226x208.webm",
|
||||
"vp90-2-03-size-226x210.webm", "vp90-2-03-size-226x224.webm",
|
||||
"vp90-2-03-size-226x226.webm"
|
||||
"vp90-2-03-size-226x226.webm", "vp90-2-03-deltaq.webm",
|
||||
"vp90-2-05-resize.ivf", "vp90-2-06-bilinear.webm",
|
||||
#if CONFIG_NON420
|
||||
"vp91-2-04-yv444.webm"
|
||||
#endif
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
@@ -16,16 +16,16 @@
|
||||
#include "test/register_state_check.h"
|
||||
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
extern "C" {
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#if CONFIG_VP8_ENCODER
|
||||
# include "vp8/common/variance.h"
|
||||
# include "vp8_rtcd.h"
|
||||
# include "./vp8_rtcd.h"
|
||||
#endif
|
||||
#if CONFIG_VP9_ENCODER
|
||||
# include "vp9/encoder/vp9_variance.h"
|
||||
# include "vp9_rtcd.h"
|
||||
# include "./vp9_rtcd.h"
|
||||
#endif
|
||||
}
|
||||
#include "test/acm_random.h"
|
||||
@@ -107,8 +107,8 @@ static unsigned int subpel_avg_variance_ref(const uint8_t *ref,
|
||||
}
|
||||
|
||||
template<typename VarianceFunctionType>
|
||||
class VarianceTest :
|
||||
public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > {
|
||||
class VarianceTest
|
||||
: public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > {
|
||||
public:
|
||||
virtual void SetUp() {
|
||||
const tuple<int, int, VarianceFunctionType>& params = this->GetParam();
|
||||
@@ -191,9 +191,9 @@ void VarianceTest<VarianceFunctionType>::OneQuarterTest() {
|
||||
}
|
||||
|
||||
template<typename SubpelVarianceFunctionType>
|
||||
class SubpelVarianceTest :
|
||||
public ::testing::TestWithParam<tuple<int, int,
|
||||
SubpelVarianceFunctionType> > {
|
||||
class SubpelVarianceTest
|
||||
: public ::testing::TestWithParam<tuple<int, int,
|
||||
SubpelVarianceFunctionType> > {
|
||||
public:
|
||||
virtual void SetUp() {
|
||||
const tuple<int, int, SubpelVarianceFunctionType>& params =
|
||||
|
||||
@@ -8,10 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
extern "C" {
|
||||
#include "vp8/encoder/boolhuff.h"
|
||||
#include "vp8/decoder/dboolhuff.h"
|
||||
}
|
||||
|
||||
#include <math.h>
|
||||
#include <stddef.h>
|
||||
@@ -24,6 +20,11 @@ extern "C" {
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
extern "C" {
|
||||
#include "vp8/encoder/boolhuff.h"
|
||||
#include "vp8/decoder/dboolhuff.h"
|
||||
}
|
||||
|
||||
namespace {
|
||||
const int num_tests = 10;
|
||||
|
||||
@@ -44,7 +45,7 @@ void encrypt_buffer(uint8_t *buffer, int size) {
|
||||
|
||||
void test_decrypt_cb(void *decrypt_state, const uint8_t *input,
|
||||
uint8_t *output, int count) {
|
||||
int offset = input - (uint8_t *)decrypt_state;
|
||||
int offset = input - reinterpret_cast<uint8_t *>(decrypt_state);
|
||||
for (int i = 0; i < count; i++) {
|
||||
output[i] = input[i] ^ secret_key[(offset + i) & 15];
|
||||
}
|
||||
@@ -58,10 +59,10 @@ TEST(VP8, TestBitIO) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
for (int n = 0; n < num_tests; ++n) {
|
||||
for (int method = 0; method <= 7; ++method) { // we generate various proba
|
||||
const int bits_to_test = 1000;
|
||||
uint8_t probas[bits_to_test];
|
||||
const int kBitsToTest = 1000;
|
||||
uint8_t probas[kBitsToTest];
|
||||
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
const int parity = i & 1;
|
||||
probas[i] =
|
||||
(method == 0) ? 0 : (method == 1) ? 255 :
|
||||
@@ -76,14 +77,14 @@ TEST(VP8, TestBitIO) {
|
||||
}
|
||||
for (int bit_method = 0; bit_method <= 3; ++bit_method) {
|
||||
const int random_seed = 6432;
|
||||
const int buffer_size = 10000;
|
||||
const int kBufferSize = 10000;
|
||||
ACMRandom bit_rnd(random_seed);
|
||||
BOOL_CODER bw;
|
||||
uint8_t bw_buffer[buffer_size];
|
||||
vp8_start_encode(&bw, bw_buffer, bw_buffer + buffer_size);
|
||||
uint8_t bw_buffer[kBufferSize];
|
||||
vp8_start_encode(&bw, bw_buffer, bw_buffer + kBufferSize);
|
||||
|
||||
int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
if (bit_method == 2) {
|
||||
bit = (i & 1);
|
||||
} else if (bit_method == 3) {
|
||||
@@ -98,19 +99,20 @@ TEST(VP8, TestBitIO) {
|
||||
#if CONFIG_DECRYPT
|
||||
encrypt_buffer(bw_buffer, buffer_size);
|
||||
vp8dx_start_decode(&br, bw_buffer, buffer_size,
|
||||
test_decrypt_cb, (void *)bw_buffer);
|
||||
test_decrypt_cb,
|
||||
reinterpret_cast<void *>(bw_buffer));
|
||||
#else
|
||||
vp8dx_start_decode(&br, bw_buffer, buffer_size, NULL, NULL);
|
||||
vp8dx_start_decode(&br, bw_buffer, kBufferSize, NULL, NULL);
|
||||
#endif
|
||||
bit_rnd.Reset(random_seed);
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
if (bit_method == 2) {
|
||||
bit = (i & 1);
|
||||
} else if (bit_method == 3) {
|
||||
bit = bit_rnd(2);
|
||||
}
|
||||
GTEST_ASSERT_EQ(vp8dx_decode_bool(&br, probas[i]), bit)
|
||||
<< "pos: "<< i << " / " << bits_to_test
|
||||
<< "pos: "<< i << " / " << kBitsToTest
|
||||
<< " bit_method: " << bit_method
|
||||
<< " method: " << method;
|
||||
}
|
||||
|
||||
@@ -26,7 +26,8 @@ const uint8_t test_key[16] = {
|
||||
0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf0
|
||||
};
|
||||
|
||||
void encrypt_buffer(const uint8_t *src, uint8_t *dst, int size, int offset = 0) {
|
||||
void encrypt_buffer(const uint8_t *src, uint8_t *dst,
|
||||
int size, int offset = 0) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
dst[i] = src[i] ^ test_key[(offset + i) & 15];
|
||||
}
|
||||
@@ -34,10 +35,11 @@ void encrypt_buffer(const uint8_t *src, uint8_t *dst, int size, int offset = 0)
|
||||
|
||||
void test_decrypt_cb(void *decrypt_state, const uint8_t *input,
|
||||
uint8_t *output, int count) {
|
||||
encrypt_buffer(input, output, count, input - (uint8_t *)decrypt_state);
|
||||
encrypt_buffer(input, output, count,
|
||||
input - reinterpret_cast<uint8_t *>(decrypt_state));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
||||
namespace libvpx_test {
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
|
||||
|
||||
extern "C" {
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
}
|
||||
|
||||
#include "test/acm_random.h"
|
||||
|
||||
@@ -19,7 +19,7 @@ extern "C" {
|
||||
#include "vp9/decoder/vp9_dboolhuff.h"
|
||||
}
|
||||
|
||||
#include "acm_random.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
@@ -32,10 +32,10 @@ TEST(VP9, TestBitIO) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
for (int n = 0; n < num_tests; ++n) {
|
||||
for (int method = 0; method <= 7; ++method) { // we generate various proba
|
||||
const int bits_to_test = 1000;
|
||||
uint8_t probas[bits_to_test];
|
||||
const int kBitsToTest = 1000;
|
||||
uint8_t probas[kBitsToTest];
|
||||
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
const int parity = i & 1;
|
||||
probas[i] =
|
||||
(method == 0) ? 0 : (method == 1) ? 255 :
|
||||
@@ -50,14 +50,14 @@ TEST(VP9, TestBitIO) {
|
||||
}
|
||||
for (int bit_method = 0; bit_method <= 3; ++bit_method) {
|
||||
const int random_seed = 6432;
|
||||
const int buffer_size = 10000;
|
||||
const int kBufferSize = 10000;
|
||||
ACMRandom bit_rnd(random_seed);
|
||||
vp9_writer bw;
|
||||
uint8_t bw_buffer[buffer_size];
|
||||
uint8_t bw_buffer[kBufferSize];
|
||||
vp9_start_encode(&bw, bw_buffer);
|
||||
|
||||
int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
if (bit_method == 2) {
|
||||
bit = (i & 1);
|
||||
} else if (bit_method == 3) {
|
||||
@@ -72,16 +72,16 @@ TEST(VP9, TestBitIO) {
|
||||
GTEST_ASSERT_EQ(bw_buffer[0] & 0x80, 0);
|
||||
|
||||
vp9_reader br;
|
||||
vp9_reader_init(&br, bw_buffer, buffer_size);
|
||||
vp9_reader_init(&br, bw_buffer, kBufferSize);
|
||||
bit_rnd.Reset(random_seed);
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
if (bit_method == 2) {
|
||||
bit = (i & 1);
|
||||
} else if (bit_method == 3) {
|
||||
bit = bit_rnd(2);
|
||||
}
|
||||
GTEST_ASSERT_EQ(vp9_read(&br, probas[i]), bit)
|
||||
<< "pos: " << i << " / " << bits_to_test
|
||||
<< "pos: " << i << " / " << kBitsToTest
|
||||
<< " bit_method: " << bit_method
|
||||
<< " method: " << method;
|
||||
}
|
||||
|
||||
@@ -39,8 +39,8 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
|
||||
// FIXME(rbultje) split in its own file
|
||||
for (BLOCK_SIZE_TYPE bsize = BLOCK_4X4; bsize < BLOCK_SIZE_TYPES;
|
||||
bsize = static_cast<BLOCK_SIZE_TYPE>(static_cast<int>(bsize) + 1)) {
|
||||
for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES;
|
||||
bsize = static_cast<BLOCK_SIZE>(static_cast<int>(bsize) + 1)) {
|
||||
const int block_width = 4 << b_width_log2(bsize);
|
||||
const int block_height = 4 << b_height_log2(bsize);
|
||||
int16_t *diff = reinterpret_cast<int16_t *>(
|
||||
|
||||
@@ -41,7 +41,8 @@ extern "C"
|
||||
{
|
||||
USAGE_STREAM_FROM_SERVER = 0x0,
|
||||
USAGE_LOCAL_FILE_PLAYBACK = 0x1,
|
||||
USAGE_CONSTRAINED_QUALITY = 0x2
|
||||
USAGE_CONSTRAINED_QUALITY = 0x2,
|
||||
USAGE_CONSTANT_QUALITY = 0x3
|
||||
} END_USAGE;
|
||||
|
||||
|
||||
|
||||
@@ -313,7 +313,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
/* Get baseline error score */
|
||||
|
||||
/* Copy the unfiltered / processed recon buffer to the new buffer */
|
||||
vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
|
||||
vp8cx_set_alt_lf_level(cpi, filt_mid);
|
||||
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_mid);
|
||||
@@ -339,7 +339,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
if(ss_err[filt_low] == 0)
|
||||
{
|
||||
/* Get Low filter error score */
|
||||
vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
vp8cx_set_alt_lf_level(cpi, filt_low);
|
||||
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low);
|
||||
|
||||
@@ -367,7 +367,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
{
|
||||
if(ss_err[filt_high] == 0)
|
||||
{
|
||||
vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
vp8cx_set_alt_lf_level(cpi, filt_high);
|
||||
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high);
|
||||
|
||||
|
||||
@@ -153,7 +153,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
|
||||
#else
|
||||
RANGE_CHECK_HI(cfg, g_lag_in_frames, 25);
|
||||
#endif
|
||||
RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CQ);
|
||||
RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_Q);
|
||||
RANGE_CHECK_HI(cfg, rc_undershoot_pct, 1000);
|
||||
RANGE_CHECK_HI(cfg, rc_overshoot_pct, 1000);
|
||||
RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);
|
||||
@@ -204,7 +204,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
|
||||
RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6);
|
||||
RANGE_CHECK(vp8_cfg, arnr_type, 1, 3);
|
||||
RANGE_CHECK(vp8_cfg, cq_level, 0, 63);
|
||||
if(finalize && cfg->rc_end_usage == VPX_CQ)
|
||||
if (finalize && (cfg->rc_end_usage == VPX_CQ || cfg->rc_end_usage == VPX_Q))
|
||||
RANGE_CHECK(vp8_cfg, cq_level,
|
||||
cfg->rc_min_quantizer, cfg->rc_max_quantizer);
|
||||
|
||||
@@ -327,17 +327,14 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
|
||||
oxcf->resample_up_water_mark = cfg.rc_resize_up_thresh;
|
||||
oxcf->resample_down_water_mark = cfg.rc_resize_down_thresh;
|
||||
|
||||
if (cfg.rc_end_usage == VPX_VBR)
|
||||
{
|
||||
oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;
|
||||
}
|
||||
else if (cfg.rc_end_usage == VPX_CBR)
|
||||
{
|
||||
oxcf->end_usage = USAGE_STREAM_FROM_SERVER;
|
||||
}
|
||||
else if (cfg.rc_end_usage == VPX_CQ)
|
||||
{
|
||||
oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
|
||||
if (cfg.rc_end_usage == VPX_VBR) {
|
||||
oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;
|
||||
} else if (cfg.rc_end_usage == VPX_CBR) {
|
||||
oxcf->end_usage = USAGE_STREAM_FROM_SERVER;
|
||||
} else if (cfg.rc_end_usage == VPX_CQ) {
|
||||
oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
|
||||
} else if (cfg.rc_end_usage == VPX_Q) {
|
||||
oxcf->end_usage = USAGE_CONSTANT_QUALITY;
|
||||
}
|
||||
|
||||
oxcf->target_bandwidth = cfg.rc_target_bitrate;
|
||||
@@ -1272,7 +1269,7 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
|
||||
1, /* g_delete_first_pass_file */
|
||||
"vp8.fpf" /* first pass filename */
|
||||
#endif
|
||||
|
||||
VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */
|
||||
1, /* ts_number_layers */
|
||||
{0}, /* ts_target_bitrate */
|
||||
{0}, /* ts_rate_decimator */
|
||||
|
||||
116
vp9/common/arm/neon/vp9_avg_neon.asm
Normal file
116
vp9/common/arm/neon/vp9_avg_neon.asm
Normal file
@@ -0,0 +1,116 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_convolve_avg_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
|vp9_convolve_avg_neon| PROC
|
||||
push {r4-r6, lr}
|
||||
ldrd r4, r5, [sp, #32]
|
||||
mov r6, r2
|
||||
|
||||
cmp r4, #32
|
||||
bgt avg64
|
||||
beq avg32
|
||||
cmp r4, #8
|
||||
bgt avg16
|
||||
beq avg8
|
||||
b avg4
|
||||
|
||||
avg64
|
||||
sub lr, r1, #32
|
||||
sub r4, r3, #32
|
||||
avg64_h
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q0-q1}, [r0]!
|
||||
vld1.8 {q2-q3}, [r0], lr
|
||||
pld [r2, r3]
|
||||
vld1.8 {q8-q9}, [r6@128]!
|
||||
vld1.8 {q10-q11}, [r6@128], r4
|
||||
vrhadd.u8 q0, q0, q8
|
||||
vrhadd.u8 q1, q1, q9
|
||||
vrhadd.u8 q2, q2, q10
|
||||
vrhadd.u8 q3, q3, q11
|
||||
vst1.8 {q0-q1}, [r2@128]!
|
||||
vst1.8 {q2-q3}, [r2@128], r4
|
||||
subs r5, r5, #1
|
||||
bgt avg64_h
|
||||
pop {r4-r6, pc}
|
||||
|
||||
avg32
|
||||
vld1.8 {q0-q1}, [r0], r1
|
||||
vld1.8 {q2-q3}, [r0], r1
|
||||
vld1.8 {q8-q9}, [r6@128], r3
|
||||
vld1.8 {q10-q11}, [r6@128], r3
|
||||
pld [r0]
|
||||
vrhadd.u8 q0, q0, q8
|
||||
pld [r0, r1]
|
||||
vrhadd.u8 q1, q1, q9
|
||||
pld [r6]
|
||||
vrhadd.u8 q2, q2, q10
|
||||
pld [r6, r3]
|
||||
vrhadd.u8 q3, q3, q11
|
||||
vst1.8 {q0-q1}, [r2@128], r3
|
||||
vst1.8 {q2-q3}, [r2@128], r3
|
||||
subs r5, r5, #2
|
||||
bgt avg32
|
||||
pop {r4-r6, pc}
|
||||
|
||||
avg16
|
||||
vld1.8 {q0}, [r0], r1
|
||||
vld1.8 {q1}, [r0], r1
|
||||
vld1.8 {q2}, [r6@128], r3
|
||||
vld1.8 {q3}, [r6@128], r3
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
vrhadd.u8 q0, q0, q2
|
||||
pld [r6]
|
||||
pld [r6, r3]
|
||||
vrhadd.u8 q1, q1, q3
|
||||
vst1.8 {q0}, [r2@128], r3
|
||||
vst1.8 {q1}, [r2@128], r3
|
||||
subs r5, r5, #2
|
||||
bgt avg16
|
||||
pop {r4-r6, pc}
|
||||
|
||||
avg8
|
||||
vld1.8 {d0}, [r0], r1
|
||||
vld1.8 {d1}, [r0], r1
|
||||
vld1.8 {d2}, [r6@64], r3
|
||||
vld1.8 {d3}, [r6@64], r3
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
vrhadd.u8 q0, q0, q1
|
||||
pld [r6]
|
||||
pld [r6, r3]
|
||||
vst1.8 {d0}, [r2@64], r3
|
||||
vst1.8 {d1}, [r2@64], r3
|
||||
subs r5, r5, #2
|
||||
bgt avg8
|
||||
pop {r4-r6, pc}
|
||||
|
||||
avg4
|
||||
vld1.32 {d0[0]}, [r0], r1
|
||||
vld1.32 {d0[1]}, [r0], r1
|
||||
vld1.32 {d2[0]}, [r6@32], r3
|
||||
vld1.32 {d2[1]}, [r6@32], r3
|
||||
vrhadd.u8 d0, d0, d2
|
||||
vst1.32 {d0[0]}, [r2@32], r3
|
||||
vst1.32 {d0[1]}, [r2@32], r3
|
||||
subs r5, r5, #2
|
||||
bgt avg4
|
||||
pop {r4-r6, pc}
|
||||
ENDP
|
||||
|
||||
END
|
||||
@@ -66,46 +66,64 @@
|
||||
|
||||
vld1.s16 {q0}, [r5] ; filter_x
|
||||
|
||||
add r8, r1, r1, lsl #1 ; src_stride * 3
|
||||
add r8, r8, #4 ; src_stride * 3 + 4
|
||||
rsb r8, r8, #0 ; reset for src
|
||||
sub r8, r1, r1, lsl #2 ; -src_stride * 3
|
||||
add r8, r8, #4 ; -src_stride * 3 + 4
|
||||
|
||||
add r4, r3, r3, lsl #1 ; dst_stride * 3
|
||||
sub r4, r4, #4 ; dst_stride * 3 - 4
|
||||
rsb r4, r4, #0 ; reset for dst
|
||||
sub r4, r3, r3, lsl #2 ; -dst_stride * 3
|
||||
add r4, r4, #4 ; -dst_stride * 3 + 4
|
||||
|
||||
sub r9, r1, #8 ; post increment for src load
|
||||
|
||||
rsb r1, r6, r1, lsl #2 ; reset src for outer loop
|
||||
rsb r9, r6, r1, lsl #2 ; reset src for outer loop
|
||||
sub r9, r9, #7
|
||||
rsb r12, r6, r3, lsl #2 ; reset dst for outer loop
|
||||
|
||||
mov r10, r6 ; w loop counter
|
||||
|
||||
loop_horiz
|
||||
vld1.8 {d24}, [r0]!
|
||||
vld3.u8 {d28[0], d29[0], d30[0]}, [r0], r9
|
||||
|
||||
vld1.8 {d25}, [r0]!
|
||||
vld3.u8 {d28[1], d29[1], d30[1]}, [r0], r9
|
||||
|
||||
vld1.8 {d26}, [r0]!
|
||||
vld3.u8 {d28[2], d29[2], d30[2]}, [r0], r9
|
||||
|
||||
vld1.8 {d27}, [r0]!
|
||||
vld3.u8 {d28[3], d29[3], d30[3]}, [r0], r8
|
||||
loop_horiz_v
|
||||
vld1.8 {d24}, [r0], r1
|
||||
vld1.8 {d25}, [r0], r1
|
||||
vld1.8 {d26}, [r0], r1
|
||||
vld1.8 {d27}, [r0], r8
|
||||
|
||||
vtrn.16 q12, q13
|
||||
vtrn.8 d24, d25
|
||||
vtrn.8 d26, d27
|
||||
|
||||
; extract to s16
|
||||
pld [r0, r1, lsl #2]
|
||||
|
||||
vmovl.u8 q8, d24
|
||||
vmovl.u8 q9, d25
|
||||
vmovl.u8 q10, d26
|
||||
vmovl.u8 q11, d27
|
||||
vtrn.32 d28, d29 ; only the first half is populated
|
||||
|
||||
; save a few instructions in the inner loop
|
||||
vswp d17, d18
|
||||
vmov d23, d21
|
||||
|
||||
add r0, r0, #3
|
||||
|
||||
loop_horiz
|
||||
add r5, r0, #64
|
||||
|
||||
vld1.32 {d28[]}, [r0], r1
|
||||
vld1.32 {d29[]}, [r0], r1
|
||||
vld1.32 {d31[]}, [r0], r1
|
||||
vld1.32 {d30[]}, [r0], r8
|
||||
|
||||
pld [r5]
|
||||
|
||||
vtrn.16 d28, d31
|
||||
vtrn.16 d29, d30
|
||||
vtrn.8 d28, d29
|
||||
vtrn.8 d31, d30
|
||||
|
||||
pld [r5, r1]
|
||||
|
||||
; extract to s16
|
||||
vtrn.32 q14, q15
|
||||
vmovl.u8 q12, d28
|
||||
vmovl.u8 q13, d30
|
||||
vmovl.u8 q13, d29
|
||||
|
||||
pld [r5, r1, lsl #1]
|
||||
|
||||
; slightly out of order load to match the existing data
|
||||
vld1.u32 {d6[0]}, [r2], r3
|
||||
@@ -116,10 +134,12 @@ loop_horiz
|
||||
sub r2, r2, r3, lsl #2 ; reset for store
|
||||
|
||||
; src[] * filter_x
|
||||
MULTIPLY_BY_Q0 q1, d16, d18, d20, d22, d17, d19, d21, d23
|
||||
MULTIPLY_BY_Q0 q2, d18, d20, d22, d17, d19, d21, d23, d24
|
||||
MULTIPLY_BY_Q0 q14, d20, d22, d17, d19, d21, d23, d24, d25
|
||||
MULTIPLY_BY_Q0 q15, d22, d17, d19, d21, d23, d24, d25, d26
|
||||
MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24
|
||||
MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26
|
||||
MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27
|
||||
MULTIPLY_BY_Q0 q15, d22, d18, d19, d23, d24, d26, d27, d25
|
||||
|
||||
pld [r5, -r8]
|
||||
|
||||
; += 64 >> 7
|
||||
vqrshrun.s32 d2, q1, #7
|
||||
@@ -135,24 +155,29 @@ loop_horiz
|
||||
vtrn.16 d2, d3
|
||||
vtrn.32 d2, d3
|
||||
vtrn.8 d2, d3
|
||||
|
||||
|
||||
; average the new value and the dst value
|
||||
vrhadd.u8 q1, q1, q3
|
||||
|
||||
vst1.u32 {d2[0]}, [r2], r3
|
||||
vst1.u32 {d3[0]}, [r2], r3
|
||||
vst1.u32 {d2[1]}, [r2], r3
|
||||
vst1.u32 {d3[1]}, [r2], r4
|
||||
vst1.u32 {d2[0]}, [r2@32], r3
|
||||
vst1.u32 {d3[0]}, [r2@32], r3
|
||||
vst1.u32 {d2[1]}, [r2@32], r3
|
||||
vst1.u32 {d3[1]}, [r2@32], r4
|
||||
|
||||
vmov q8, q9
|
||||
vmov d20, d23
|
||||
vmov q11, q12
|
||||
vmov q9, q13
|
||||
|
||||
subs r6, r6, #4 ; w -= 4
|
||||
bgt loop_horiz
|
||||
|
||||
; outer loop
|
||||
mov r6, r10 ; restore w counter
|
||||
add r0, r0, r1 ; src += src_stride * 4 - w
|
||||
add r0, r0, r9 ; src += src_stride * 4 - w
|
||||
add r2, r2, r12 ; dst += dst_stride * 4 - w
|
||||
subs r7, r7, #4 ; h -= 4
|
||||
bgt loop_horiz
|
||||
bgt loop_horiz_v
|
||||
|
||||
pop {r4-r10, pc}
|
||||
|
||||
@@ -163,66 +188,77 @@ loop_horiz
|
||||
cmp r12, #16
|
||||
bne vp9_convolve8_avg_vert_c
|
||||
|
||||
push {r4-r10, lr}
|
||||
push {r4-r8, lr}
|
||||
|
||||
; adjust for taps
|
||||
sub r0, r0, r1
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
ldr r7, [sp, #40] ; filter_y
|
||||
ldr r8, [sp, #48] ; w
|
||||
ldr r9, [sp, #52] ; h
|
||||
ldr r4, [sp, #32] ; filter_y
|
||||
ldr r6, [sp, #40] ; w
|
||||
ldr lr, [sp, #44] ; h
|
||||
|
||||
vld1.s16 {q0}, [r7] ; filter_y
|
||||
vld1.s16 {q0}, [r4] ; filter_y
|
||||
|
||||
mov r5, r1, lsl #1 ; src_stride * 2
|
||||
add r5, r5, r1, lsl #3 ; src_stride * 10
|
||||
sub r5, r5, #4 ; src_stride * 10 + 4
|
||||
rsb r5, r5, #0 ; reset for src
|
||||
lsl r1, r1, #1
|
||||
lsl r3, r3, #1
|
||||
|
||||
add r6, r3, r3, lsl #1 ; dst_stride * 3
|
||||
sub r6, r6, #4 ; dst_stride * 3 - 4
|
||||
rsb r6, r6, #0 ; reset for dst
|
||||
loop_vert_h
|
||||
mov r4, r0
|
||||
add r7, r0, r1, asr #1
|
||||
mov r5, r2
|
||||
add r8, r2, r3, asr #1
|
||||
mov r12, lr ; h loop counter
|
||||
|
||||
rsb r7, r8, r1, lsl #2 ; reset src for outer loop
|
||||
rsb r12, r8, r3, lsl #2 ; reset dst for outer loop
|
||||
vld1.u32 {d16[0]}, [r4], r1
|
||||
vld1.u32 {d16[1]}, [r7], r1
|
||||
vld1.u32 {d18[0]}, [r4], r1
|
||||
vld1.u32 {d18[1]}, [r7], r1
|
||||
vld1.u32 {d20[0]}, [r4], r1
|
||||
vld1.u32 {d20[1]}, [r7], r1
|
||||
vld1.u32 {d22[0]}, [r4], r1
|
||||
|
||||
mov r10, r8 ; w loop counter
|
||||
|
||||
loop_vert
|
||||
; always process a 4x4 block at a time
|
||||
vld1.u32 {d16[0]}, [r0], r1
|
||||
vld1.u32 {d16[1]}, [r0], r1
|
||||
vld1.u32 {d18[0]}, [r0], r1
|
||||
vld1.u32 {d18[1]}, [r0], r1
|
||||
vld1.u32 {d20[0]}, [r0], r1
|
||||
vld1.u32 {d20[1]}, [r0], r1
|
||||
vld1.u32 {d22[0]}, [r0], r1
|
||||
vld1.u32 {d22[1]}, [r0], r1
|
||||
vld1.u32 {d24[0]}, [r0], r1
|
||||
vld1.u32 {d24[1]}, [r0], r1
|
||||
vld1.u32 {d26[0]}, [r0], r5
|
||||
|
||||
; extract to s16
|
||||
vmovl.u8 q8, d16
|
||||
vmovl.u8 q9, d18
|
||||
vmovl.u8 q10, d20
|
||||
vmovl.u8 q11, d22
|
||||
|
||||
loop_vert
|
||||
; always process a 4x4 block at a time
|
||||
vld1.u32 {d24[0]}, [r7], r1
|
||||
vld1.u32 {d26[0]}, [r4], r1
|
||||
vld1.u32 {d26[1]}, [r7], r1
|
||||
vld1.u32 {d24[1]}, [r4], r1
|
||||
|
||||
; extract to s16
|
||||
vmovl.u8 q12, d24
|
||||
vmovl.u8 q13, d26
|
||||
|
||||
vld1.u32 {d6[0]}, [r2], r3
|
||||
vld1.u32 {d6[1]}, [r2], r3
|
||||
vld1.u32 {d7[0]}, [r2], r3
|
||||
vld1.u32 {d7[1]}, [r2], r3
|
||||
vld1.u32 {d6[0]}, [r5@32], r3
|
||||
vld1.u32 {d6[1]}, [r8@32], r3
|
||||
vld1.u32 {d7[0]}, [r5@32], r3
|
||||
vld1.u32 {d7[1]}, [r8@32], r3
|
||||
|
||||
sub r2, r2, r3, lsl #2 ; reset for store
|
||||
pld [r7]
|
||||
pld [r4]
|
||||
|
||||
; src[] * filter_y
|
||||
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d23
|
||||
MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d23, d24
|
||||
MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d23, d24, d25
|
||||
MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d23, d24, d25, d26
|
||||
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24
|
||||
|
||||
pld [r7, r1]
|
||||
pld [r4, r1]
|
||||
|
||||
MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d24, d26
|
||||
|
||||
pld [r5]
|
||||
pld [r8]
|
||||
|
||||
MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d24, d26, d27
|
||||
|
||||
pld [r5, r3]
|
||||
pld [r8, r3]
|
||||
|
||||
MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d24, d26, d27, d25
|
||||
|
||||
; += 64 >> 7
|
||||
vqrshrun.s32 d2, q1, #7
|
||||
@@ -237,22 +273,30 @@ loop_vert
|
||||
; average the new value and the dst value
|
||||
vrhadd.u8 q1, q1, q3
|
||||
|
||||
vst1.u32 {d2[0]}, [r2], r3
|
||||
vst1.u32 {d2[1]}, [r2], r3
|
||||
vst1.u32 {d3[0]}, [r2], r3
|
||||
vst1.u32 {d3[1]}, [r2], r6
|
||||
sub r5, r5, r3, lsl #1 ; reset for store
|
||||
sub r8, r8, r3, lsl #1
|
||||
|
||||
subs r8, r8, #4 ; w -= 4
|
||||
vst1.u32 {d2[0]}, [r5@32], r3
|
||||
vst1.u32 {d2[1]}, [r8@32], r3
|
||||
vst1.u32 {d3[0]}, [r5@32], r3
|
||||
vst1.u32 {d3[1]}, [r8@32], r3
|
||||
|
||||
vmov q8, q10
|
||||
vmov d18, d22
|
||||
vmov d19, d24
|
||||
vmov q10, q13
|
||||
vmov d22, d25
|
||||
|
||||
subs r12, r12, #4 ; h -= 4
|
||||
bgt loop_vert
|
||||
|
||||
; outer loop
|
||||
mov r8, r10 ; restore w counter
|
||||
add r0, r0, r7 ; src += 4 * src_stride - w
|
||||
add r2, r2, r12 ; dst += 4 * dst_stride - w
|
||||
subs r9, r9, #4 ; h -= 4
|
||||
bgt loop_vert
|
||||
add r0, r0, #4
|
||||
add r2, r2, #4
|
||||
subs r6, r6, #4 ; w -= 4
|
||||
bgt loop_vert_h
|
||||
|
||||
pop {r4-r10, pc}
|
||||
pop {r4-r8, pc}
|
||||
|
||||
ENDP
|
||||
END
|
||||
|
||||
@@ -66,52 +66,72 @@
|
||||
|
||||
vld1.s16 {q0}, [r5] ; filter_x
|
||||
|
||||
add r8, r1, r1, lsl #1 ; src_stride * 3
|
||||
add r8, r8, #4 ; src_stride * 3 + 4
|
||||
rsb r8, r8, #0 ; reset for src
|
||||
sub r8, r1, r1, lsl #2 ; -src_stride * 3
|
||||
add r8, r8, #4 ; -src_stride * 3 + 4
|
||||
|
||||
add r4, r3, r3, lsl #1 ; dst_stride * 3
|
||||
sub r4, r4, #4 ; dst_stride * 3 - 4
|
||||
rsb r4, r4, #0 ; reset for dst
|
||||
sub r4, r3, r3, lsl #2 ; -dst_stride * 3
|
||||
add r4, r4, #4 ; -dst_stride * 3 + 4
|
||||
|
||||
sub r9, r1, #8 ; post increment for src load
|
||||
|
||||
rsb r1, r6, r1, lsl #2 ; reset src for outer loop
|
||||
rsb r9, r6, r1, lsl #2 ; reset src for outer loop
|
||||
sub r9, r9, #7
|
||||
rsb r12, r6, r3, lsl #2 ; reset dst for outer loop
|
||||
|
||||
mov r10, r6 ; w loop counter
|
||||
|
||||
loop_horiz
|
||||
vld1.8 {d24}, [r0]!
|
||||
vld3.u8 {d28[0], d29[0], d30[0]}, [r0], r9
|
||||
|
||||
vld1.8 {d25}, [r0]!
|
||||
vld3.u8 {d28[1], d29[1], d30[1]}, [r0], r9
|
||||
|
||||
vld1.8 {d26}, [r0]!
|
||||
vld3.u8 {d28[2], d29[2], d30[2]}, [r0], r9
|
||||
|
||||
vld1.8 {d27}, [r0]!
|
||||
vld3.u8 {d28[3], d29[3], d30[3]}, [r0], r8
|
||||
loop_horiz_v
|
||||
vld1.8 {d24}, [r0], r1
|
||||
vld1.8 {d25}, [r0], r1
|
||||
vld1.8 {d26}, [r0], r1
|
||||
vld1.8 {d27}, [r0], r8
|
||||
|
||||
vtrn.16 q12, q13
|
||||
vtrn.8 d24, d25
|
||||
vtrn.8 d26, d27
|
||||
|
||||
; extract to s16
|
||||
pld [r0, r1, lsl #2]
|
||||
|
||||
vmovl.u8 q8, d24
|
||||
vmovl.u8 q9, d25
|
||||
vmovl.u8 q10, d26
|
||||
vmovl.u8 q11, d27
|
||||
vtrn.32 d28, d29 ; only the first half is populated
|
||||
|
||||
; save a few instructions in the inner loop
|
||||
vswp d17, d18
|
||||
vmov d23, d21
|
||||
|
||||
add r0, r0, #3
|
||||
|
||||
loop_horiz
|
||||
add r5, r0, #64
|
||||
|
||||
vld1.32 {d28[]}, [r0], r1
|
||||
vld1.32 {d29[]}, [r0], r1
|
||||
vld1.32 {d31[]}, [r0], r1
|
||||
vld1.32 {d30[]}, [r0], r8
|
||||
|
||||
pld [r5]
|
||||
|
||||
vtrn.16 d28, d31
|
||||
vtrn.16 d29, d30
|
||||
vtrn.8 d28, d29
|
||||
vtrn.8 d31, d30
|
||||
|
||||
pld [r5, r1]
|
||||
|
||||
; extract to s16
|
||||
vtrn.32 q14, q15
|
||||
vmovl.u8 q12, d28
|
||||
vmovl.u8 q13, d30
|
||||
vmovl.u8 q13, d29
|
||||
|
||||
pld [r5, r1, lsl #1]
|
||||
|
||||
; src[] * filter_x
|
||||
MULTIPLY_BY_Q0 q1, d16, d18, d20, d22, d17, d19, d21, d23
|
||||
MULTIPLY_BY_Q0 q2, d18, d20, d22, d17, d19, d21, d23, d24
|
||||
MULTIPLY_BY_Q0 q14, d20, d22, d17, d19, d21, d23, d24, d25
|
||||
MULTIPLY_BY_Q0 q15, d22, d17, d19, d21, d23, d24, d25, d26
|
||||
MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24
|
||||
MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26
|
||||
MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27
|
||||
MULTIPLY_BY_Q0 q15, d22, d18, d19, d23, d24, d26, d27, d25
|
||||
|
||||
pld [r5, -r8]
|
||||
|
||||
; += 64 >> 7
|
||||
vqrshrun.s32 d2, q1, #7
|
||||
@@ -128,20 +148,25 @@ loop_horiz
|
||||
vtrn.32 d2, d3
|
||||
vtrn.8 d2, d3
|
||||
|
||||
vst1.u32 {d2[0]}, [r2], r3
|
||||
vst1.u32 {d3[0]}, [r2], r3
|
||||
vst1.u32 {d2[1]}, [r2], r3
|
||||
vst1.u32 {d3[1]}, [r2], r4
|
||||
vst1.u32 {d2[0]}, [r2@32], r3
|
||||
vst1.u32 {d3[0]}, [r2@32], r3
|
||||
vst1.u32 {d2[1]}, [r2@32], r3
|
||||
vst1.u32 {d3[1]}, [r2@32], r4
|
||||
|
||||
vmov q8, q9
|
||||
vmov d20, d23
|
||||
vmov q11, q12
|
||||
vmov q9, q13
|
||||
|
||||
subs r6, r6, #4 ; w -= 4
|
||||
bgt loop_horiz
|
||||
|
||||
; outer loop
|
||||
mov r6, r10 ; restore w counter
|
||||
add r0, r0, r1 ; src += src_stride * 4 - w
|
||||
add r0, r0, r9 ; src += src_stride * 4 - w
|
||||
add r2, r2, r12 ; dst += dst_stride * 4 - w
|
||||
subs r7, r7, #4 ; h -= 4
|
||||
bgt loop_horiz
|
||||
bgt loop_horiz_v
|
||||
|
||||
pop {r4-r10, pc}
|
||||
|
||||
@@ -152,59 +177,72 @@ loop_horiz
|
||||
cmp r12, #16
|
||||
bne vp9_convolve8_vert_c
|
||||
|
||||
push {r4-r10, lr}
|
||||
push {r4-r8, lr}
|
||||
|
||||
; adjust for taps
|
||||
sub r0, r0, r1
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
ldr r7, [sp, #40] ; filter_y
|
||||
ldr r8, [sp, #48] ; w
|
||||
ldr r9, [sp, #52] ; h
|
||||
ldr r4, [sp, #32] ; filter_y
|
||||
ldr r6, [sp, #40] ; w
|
||||
ldr lr, [sp, #44] ; h
|
||||
|
||||
vld1.s16 {q0}, [r7] ; filter_y
|
||||
vld1.s16 {q0}, [r4] ; filter_y
|
||||
|
||||
mov r5, r1, lsl #1 ; src_stride * 2
|
||||
add r5, r5, r1, lsl #3 ; src_stride * 10
|
||||
sub r5, r5, #4 ; src_stride * 10 + 4
|
||||
rsb r5, r5, #0 ; reset for src
|
||||
lsl r1, r1, #1
|
||||
lsl r3, r3, #1
|
||||
|
||||
add r6, r3, r3, lsl #1 ; dst_stride * 3
|
||||
sub r6, r6, #4 ; dst_stride * 3 - 4
|
||||
rsb r6, r6, #0 ; reset for dst
|
||||
loop_vert_h
|
||||
mov r4, r0
|
||||
add r7, r0, r1, asr #1
|
||||
mov r5, r2
|
||||
add r8, r2, r3, asr #1
|
||||
mov r12, lr ; h loop counter
|
||||
|
||||
rsb r7, r8, r1, lsl #2 ; reset src for outer loop
|
||||
rsb r12, r8, r3, lsl #2 ; reset dst for outer loop
|
||||
vld1.u32 {d16[0]}, [r4], r1
|
||||
vld1.u32 {d16[1]}, [r7], r1
|
||||
vld1.u32 {d18[0]}, [r4], r1
|
||||
vld1.u32 {d18[1]}, [r7], r1
|
||||
vld1.u32 {d20[0]}, [r4], r1
|
||||
vld1.u32 {d20[1]}, [r7], r1
|
||||
vld1.u32 {d22[0]}, [r4], r1
|
||||
|
||||
mov r10, r8 ; w loop counter
|
||||
|
||||
loop_vert
|
||||
; always process a 4x4 block at a time
|
||||
vld1.u32 {d16[0]}, [r0], r1
|
||||
vld1.u32 {d16[1]}, [r0], r1
|
||||
vld1.u32 {d18[0]}, [r0], r1
|
||||
vld1.u32 {d18[1]}, [r0], r1
|
||||
vld1.u32 {d20[0]}, [r0], r1
|
||||
vld1.u32 {d20[1]}, [r0], r1
|
||||
vld1.u32 {d22[0]}, [r0], r1
|
||||
vld1.u32 {d22[1]}, [r0], r1
|
||||
vld1.u32 {d24[0]}, [r0], r1
|
||||
vld1.u32 {d24[1]}, [r0], r1
|
||||
vld1.u32 {d26[0]}, [r0], r5
|
||||
|
||||
; extract to s16
|
||||
vmovl.u8 q8, d16
|
||||
vmovl.u8 q9, d18
|
||||
vmovl.u8 q10, d20
|
||||
vmovl.u8 q11, d22
|
||||
|
||||
loop_vert
|
||||
; always process a 4x4 block at a time
|
||||
vld1.u32 {d24[0]}, [r7], r1
|
||||
vld1.u32 {d26[0]}, [r4], r1
|
||||
vld1.u32 {d26[1]}, [r7], r1
|
||||
vld1.u32 {d24[1]}, [r4], r1
|
||||
|
||||
; extract to s16
|
||||
vmovl.u8 q12, d24
|
||||
vmovl.u8 q13, d26
|
||||
|
||||
pld [r5]
|
||||
pld [r8]
|
||||
|
||||
; src[] * filter_y
|
||||
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d23
|
||||
MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d23, d24
|
||||
MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d23, d24, d25
|
||||
MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d23, d24, d25, d26
|
||||
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24
|
||||
|
||||
pld [r5, r3]
|
||||
pld [r8, r3]
|
||||
|
||||
MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d24, d26
|
||||
|
||||
pld [r7]
|
||||
pld [r4]
|
||||
|
||||
MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d24, d26, d27
|
||||
|
||||
pld [r7, r1]
|
||||
pld [r4, r1]
|
||||
|
||||
MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d24, d26, d27, d25
|
||||
|
||||
; += 64 >> 7
|
||||
vqrshrun.s32 d2, q1, #7
|
||||
@@ -216,22 +254,27 @@ loop_vert
|
||||
vqmovn.u16 d2, q1
|
||||
vqmovn.u16 d3, q2
|
||||
|
||||
vst1.u32 {d2[0]}, [r2], r3
|
||||
vst1.u32 {d2[1]}, [r2], r3
|
||||
vst1.u32 {d3[0]}, [r2], r3
|
||||
vst1.u32 {d3[1]}, [r2], r6
|
||||
vst1.u32 {d2[0]}, [r5@32], r3
|
||||
vst1.u32 {d2[1]}, [r8@32], r3
|
||||
vst1.u32 {d3[0]}, [r5@32], r3
|
||||
vst1.u32 {d3[1]}, [r8@32], r3
|
||||
|
||||
subs r8, r8, #4 ; w -= 4
|
||||
vmov q8, q10
|
||||
vmov d18, d22
|
||||
vmov d19, d24
|
||||
vmov q10, q13
|
||||
vmov d22, d25
|
||||
|
||||
subs r12, r12, #4 ; h -= 4
|
||||
bgt loop_vert
|
||||
|
||||
; outer loop
|
||||
mov r8, r10 ; restore w counter
|
||||
add r0, r0, r7 ; src += 4 * src_stride - w
|
||||
add r2, r2, r12 ; dst += 4 * dst_stride - w
|
||||
subs r9, r9, #4 ; h -= 4
|
||||
bgt loop_vert
|
||||
add r0, r0, #4
|
||||
add r2, r2, #4
|
||||
subs r6, r6, #4 ; w -= 4
|
||||
bgt loop_vert_h
|
||||
|
||||
pop {r4-r10, pc}
|
||||
pop {r4-r8, pc}
|
||||
|
||||
ENDP
|
||||
END
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
@@ -19,7 +20,7 @@ void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||
/* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
|
||||
* maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
|
||||
*/
|
||||
uint8_t temp[64 * 72];
|
||||
DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72);
|
||||
|
||||
// Account for the vertical phase needing 3 lines prior and 4 lines post
|
||||
int intermediate_height = h + 7;
|
||||
@@ -53,7 +54,7 @@ void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h) {
|
||||
uint8_t temp[64 * 72];
|
||||
DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72);
|
||||
int intermediate_height = h + 7;
|
||||
|
||||
if (x_step_q4 != 16 || y_step_q4 != 16)
|
||||
|
||||
84
vp9/common/arm/neon/vp9_copy_neon.asm
Normal file
84
vp9/common/arm/neon/vp9_copy_neon.asm
Normal file
@@ -0,0 +1,84 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_convolve_copy_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
|vp9_convolve_copy_neon| PROC
|
||||
push {r4-r5, lr}
|
||||
ldrd r4, r5, [sp, #28]
|
||||
|
||||
cmp r4, #32
|
||||
bgt copy64
|
||||
beq copy32
|
||||
cmp r4, #8
|
||||
bgt copy16
|
||||
beq copy8
|
||||
b copy4
|
||||
|
||||
copy64
|
||||
sub lr, r1, #32
|
||||
sub r3, r3, #32
|
||||
copy64_h
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q0-q1}, [r0]!
|
||||
vld1.8 {q2-q3}, [r0], lr
|
||||
vst1.8 {q0-q1}, [r2@128]!
|
||||
vst1.8 {q2-q3}, [r2@128], r3
|
||||
subs r5, r5, #1
|
||||
bgt copy64_h
|
||||
pop {r4-r5, pc}
|
||||
|
||||
copy32
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q0-q1}, [r0], r1
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q2-q3}, [r0], r1
|
||||
vst1.8 {q0-q1}, [r2@128], r3
|
||||
vst1.8 {q2-q3}, [r2@128], r3
|
||||
subs r5, r5, #2
|
||||
bgt copy32
|
||||
pop {r4-r5, pc}
|
||||
|
||||
copy16
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q0}, [r0], r1
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q1}, [r0], r1
|
||||
vst1.8 {q0}, [r2@128], r3
|
||||
vst1.8 {q1}, [r2@128], r3
|
||||
subs r5, r5, #2
|
||||
bgt copy16
|
||||
pop {r4-r5, pc}
|
||||
|
||||
copy8
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {d0}, [r0], r1
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {d2}, [r0], r1
|
||||
vst1.8 {d0}, [r2@64], r3
|
||||
vst1.8 {d2}, [r2@64], r3
|
||||
subs r5, r5, #2
|
||||
bgt copy8
|
||||
pop {r4-r5, pc}
|
||||
|
||||
copy4
|
||||
ldr r12, [r0], r1
|
||||
str r12, [r2], r3
|
||||
subs r5, r5, #1
|
||||
bgt copy4
|
||||
pop {r4-r5, pc}
|
||||
ENDP
|
||||
|
||||
END
|
||||
169
vp9/common/arm/neon/vp9_idct16x16_neon.c
Normal file
169
vp9/common/arm/neon/vp9_idct16x16_neon.c
Normal file
@@ -0,0 +1,169 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
|
||||
extern void vp9_short_idct16x16_add_neon_pass1(int16_t *input,
|
||||
int16_t *output,
|
||||
int output_stride);
|
||||
extern void vp9_short_idct16x16_add_neon_pass2(int16_t *src,
|
||||
int16_t *output,
|
||||
int16_t *pass1Output,
|
||||
int16_t skip_adding,
|
||||
uint8_t *dest,
|
||||
int dest_stride);
|
||||
extern void vp9_short_idct10_16x16_add_neon_pass1(int16_t *input,
|
||||
int16_t *output,
|
||||
int output_stride);
|
||||
extern void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,
|
||||
int16_t *output,
|
||||
int16_t *pass1Output,
|
||||
int16_t skip_adding,
|
||||
uint8_t *dest,
|
||||
int dest_stride);
|
||||
extern void save_neon_registers();
|
||||
extern void restore_neon_registers();
|
||||
|
||||
|
||||
void vp9_short_idct16x16_add_neon(int16_t *input,
|
||||
uint8_t *dest, int dest_stride) {
|
||||
int16_t pass1_output[16*16] = {0};
|
||||
int16_t row_idct_output[16*16] = {0};
|
||||
|
||||
// save d8-d15 register values.
|
||||
save_neon_registers();
|
||||
|
||||
/* Parallel idct on the upper 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(input, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7
|
||||
// which will be saved into row_idct_output.
|
||||
vp9_short_idct16x16_add_neon_pass2(input+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
0,
|
||||
dest,
|
||||
dest_stride);
|
||||
|
||||
/* Parallel idct on the lower 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(input+8*16, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7
|
||||
// which will be saved into row_idct_output.
|
||||
vp9_short_idct16x16_add_neon_pass2(input+8*16+1,
|
||||
row_idct_output+8,
|
||||
pass1_output,
|
||||
0,
|
||||
dest,
|
||||
dest_stride);
|
||||
|
||||
/* Parallel idct on the left 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
1,
|
||||
dest,
|
||||
dest_stride);
|
||||
|
||||
/* Parallel idct on the right 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,
|
||||
row_idct_output+8,
|
||||
pass1_output,
|
||||
1,
|
||||
dest+8,
|
||||
dest_stride);
|
||||
|
||||
// restore d8-d15 register values.
|
||||
restore_neon_registers();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void vp9_short_idct10_16x16_add_neon(int16_t *input,
|
||||
uint8_t *dest, int dest_stride) {
|
||||
int16_t pass1_output[16*16] = {0};
|
||||
int16_t row_idct_output[16*16] = {0};
|
||||
|
||||
// save d8-d15 register values.
|
||||
save_neon_registers();
|
||||
|
||||
/* Parallel idct on the upper 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct10_16x16_add_neon_pass1(input, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7
|
||||
// which will be saved into row_idct_output.
|
||||
vp9_short_idct10_16x16_add_neon_pass2(input+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
0,
|
||||
dest,
|
||||
dest_stride);
|
||||
|
||||
/* Skip Parallel idct on the lower 8 rows as they are all 0s */
|
||||
|
||||
/* Parallel idct on the left 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
1,
|
||||
dest,
|
||||
dest_stride);
|
||||
|
||||
/* Parallel idct on the right 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,
|
||||
row_idct_output+8,
|
||||
pass1_output,
|
||||
1,
|
||||
dest+8,
|
||||
dest_stride);
|
||||
|
||||
// restore d8-d15 register values.
|
||||
restore_neon_registers();
|
||||
|
||||
return;
|
||||
}
|
||||
47
vp9/common/arm/neon/vp9_idct32x32_neon.c
Normal file
47
vp9/common/arm/neon/vp9_idct32x32_neon.c
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vp9/common/vp9_common.h"
|
||||
|
||||
// defined in vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
|
||||
extern void idct32_transpose_and_transform(int16_t *transpose_buffer,
|
||||
int16_t *output, int16_t *input);
|
||||
extern void idct32_combine_add(uint8_t *dest, int16_t *out, int dest_stride);
|
||||
|
||||
|
||||
// defined in vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
|
||||
extern void save_neon_registers();
|
||||
extern void restore_neon_registers();
|
||||
|
||||
void vp9_short_idct32x32_add_neon(int16_t *input, uint8_t *dest,
|
||||
int dest_stride) {
|
||||
// TODO(cd): move the creation of these buffers within the ASM file
|
||||
// internal buffer used to transpose 8 lines into before transforming them
|
||||
int16_t transpose_buffer[32 * 8];
|
||||
// results of the first pass (transpose and transform rows)
|
||||
int16_t pass1[32 * 32];
|
||||
// results of the second pass (transpose and transform columns)
|
||||
int16_t pass2[32 * 32];
|
||||
|
||||
// save register we need to preserve
|
||||
save_neon_registers();
|
||||
// process rows
|
||||
idct32_transpose_and_transform(transpose_buffer, pass1, input);
|
||||
// process columns
|
||||
// TODO(cd): do these two steps/passes within the ASM file
|
||||
idct32_transpose_and_transform(transpose_buffer, pass2, pass1);
|
||||
// combine and add to dest
|
||||
// TODO(cd): integrate this within the last storage step of the second pass
|
||||
idct32_combine_add(dest, pass2, dest_stride);
|
||||
// restore register we need to preserve
|
||||
restore_neon_registers();
|
||||
}
|
||||
|
||||
// TODO(cd): Eliminate this file altogether when everything is in ASM file
|
||||
@@ -361,8 +361,6 @@ v_end
|
||||
|
||||
vand d16, d20, d19 ; flat && mask
|
||||
vmov r5, r6, d16
|
||||
orrs r5, r5, r6 ; Check for 0
|
||||
orreq r7, r7, #1 ; Only do filter branch
|
||||
|
||||
; flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7)
|
||||
vabd.u8 d22, d3, d7 ; abs(p4 - p0)
|
||||
@@ -388,10 +386,11 @@ v_end
|
||||
|
||||
vmov.u8 d22, #0x80
|
||||
|
||||
orrs r5, r5, r6 ; Check for 0
|
||||
orreq r7, r7, #1 ; Only do filter branch
|
||||
|
||||
vand d17, d18, d16 ; flat2 && flat && mask
|
||||
vmov r5, r6, d17
|
||||
orrs r5, r5, r6 ; Check for 0
|
||||
orreq r7, r7, #2 ; Only do mbfilter branch
|
||||
|
||||
; mbfilter() function
|
||||
|
||||
@@ -405,15 +404,10 @@ v_end
|
||||
vmov.u8 d27, #3
|
||||
|
||||
vsub.s8 d28, d23, d24 ; ( qs0 - ps0)
|
||||
|
||||
vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1)
|
||||
|
||||
vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0)
|
||||
|
||||
vand d29, d29, d21 ; filter &= hev
|
||||
|
||||
vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0)
|
||||
|
||||
vmov.u8 d29, #4
|
||||
|
||||
; filter = clamp(filter + 3 * ( qs0 - ps0))
|
||||
@@ -452,37 +446,37 @@ v_end
|
||||
vaddl.u8 q15, d7, d8 ; op2 = p0 + q0
|
||||
vmlal.u8 q15, d4, d27 ; op2 = p0 + q0 + p3 * 3
|
||||
vmlal.u8 q15, d5, d29 ; op2 = p0 + q0 + p3 * 3 + p2 * 2
|
||||
vaddl.u8 q10, d4, d5
|
||||
vaddw.u8 q15, d6 ; op2=p1 + p0 + q0 + p3 * 3 + p2 *2
|
||||
vaddl.u8 q14, d6, d9
|
||||
vqrshrn.u16 d18, q15, #3 ; r_op2
|
||||
|
||||
vsubw.u8 q15, d4 ; op1 = op2 - p3
|
||||
vsubw.u8 q15, d5 ; op1 -= p2
|
||||
vaddw.u8 q15, d6 ; op1 += p1
|
||||
vaddw.u8 q15, d9 ; op1 += q1
|
||||
vsub.i16 q15, q10
|
||||
vaddl.u8 q10, d4, d6
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d7, d10
|
||||
vqrshrn.u16 d19, q15, #3 ; r_op1
|
||||
|
||||
vsubw.u8 q15, d4 ; op0 = op1 - p3
|
||||
vsubw.u8 q15, d6 ; op0 -= p1
|
||||
vaddw.u8 q15, d7 ; op0 += p0
|
||||
vaddw.u8 q15, d10 ; op0 += q2
|
||||
vsub.i16 q15, q10
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d8, d11
|
||||
vqrshrn.u16 d20, q15, #3 ; r_op0
|
||||
|
||||
vsubw.u8 q15, d4 ; oq0 = op0 - p3
|
||||
vsubw.u8 q15, d7 ; oq0 -= p0
|
||||
vaddw.u8 q15, d8 ; oq0 += q0
|
||||
vaddw.u8 q15, d11 ; oq0 += q3
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d9, d11
|
||||
vqrshrn.u16 d21, q15, #3 ; r_oq0
|
||||
|
||||
vsubw.u8 q15, d5 ; oq1 = oq0 - p2
|
||||
vsubw.u8 q15, d8 ; oq1 -= q0
|
||||
vaddw.u8 q15, d9 ; oq1 += q1
|
||||
vaddw.u8 q15, d11 ; oq1 += q3
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d10, d11
|
||||
vqrshrn.u16 d22, q15, #3 ; r_oq1
|
||||
|
||||
vsubw.u8 q15, d6 ; oq2 = oq0 - p1
|
||||
vsubw.u8 q15, d9 ; oq2 -= q1
|
||||
vaddw.u8 q15, d10 ; oq2 += q2
|
||||
vaddw.u8 q15, d11 ; oq2 += q3
|
||||
vadd.i16 q15, q14
|
||||
vqrshrn.u16 d27, q15, #3 ; r_oq2
|
||||
|
||||
; Filter does not set op2 or oq2, so use p2 and q2.
|
||||
@@ -501,113 +495,104 @@ v_end
|
||||
; wide_mbfilter flat2 && flat && mask branch
|
||||
vmov.u8 d16, #7
|
||||
vaddl.u8 q15, d7, d8 ; op6 = p0 + q0
|
||||
vaddl.u8 q12, d2, d3
|
||||
vaddl.u8 q13, d4, d5
|
||||
vaddl.u8 q14, d1, d6
|
||||
vmlal.u8 q15, d0, d16 ; op6 += p7 * 3
|
||||
vmlal.u8 q15, d1, d29 ; op6 += p6 * 2
|
||||
vaddw.u8 q15, d2 ; op6 += p5
|
||||
vaddw.u8 q15, d3 ; op6 += p4
|
||||
vaddw.u8 q15, d4 ; op6 += p3
|
||||
vaddw.u8 q15, d5 ; op6 += p2
|
||||
vaddw.u8 q15, d6 ; op6 += p1
|
||||
vadd.i16 q12, q13
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d2, d9
|
||||
vadd.i16 q15, q12
|
||||
vaddl.u8 q12, d0, d1
|
||||
vaddw.u8 q15, d1
|
||||
vaddl.u8 q13, d0, d2
|
||||
vadd.i16 q14, q15, q14
|
||||
vqrshrn.u16 d16, q15, #4 ; w_op6
|
||||
|
||||
vsubw.u8 q15, d0 ; op5 = op6 - p7
|
||||
vsubw.u8 q15, d1 ; op5 -= p6
|
||||
vaddw.u8 q15, d2 ; op5 += p5
|
||||
vaddw.u8 q15, d9 ; op5 += q1
|
||||
vsub.i16 q15, q14, q12
|
||||
vaddl.u8 q14, d3, d10
|
||||
vqrshrn.u16 d24, q15, #4 ; w_op5
|
||||
|
||||
vsubw.u8 q15, d0 ; op4 = op5 - p7
|
||||
vsubw.u8 q15, d2 ; op4 -= p5
|
||||
vaddw.u8 q15, d3 ; op4 += p4
|
||||
vaddw.u8 q15, d10 ; op4 += q2
|
||||
vsub.i16 q15, q13
|
||||
vaddl.u8 q13, d0, d3
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d4, d11
|
||||
vqrshrn.u16 d25, q15, #4 ; w_op4
|
||||
|
||||
vsubw.u8 q15, d0 ; op3 = op4 - p7
|
||||
vsubw.u8 q15, d3 ; op3 -= p4
|
||||
vaddw.u8 q15, d4 ; op3 += p3
|
||||
vaddw.u8 q15, d11 ; op3 += q3
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d0, d4
|
||||
vsub.i16 q15, q13
|
||||
vsub.i16 q14, q15, q14
|
||||
vqrshrn.u16 d26, q15, #4 ; w_op3
|
||||
|
||||
vsubw.u8 q15, d0 ; op2 = op3 - p7
|
||||
vsubw.u8 q15, d4 ; op2 -= p3
|
||||
vaddw.u8 q15, d5 ; op2 += p2
|
||||
vaddw.u8 q15, q14, d5 ; op2 += p2
|
||||
vaddl.u8 q14, d0, d5
|
||||
vaddw.u8 q15, d12 ; op2 += q4
|
||||
vbif d26, d4, d17 ; op3 |= p3 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d27, q15, #4 ; w_op2
|
||||
|
||||
vbif d27, d18, d17 ; op2 |= t_op2 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d0 ; op1 = op2 - p7
|
||||
vsubw.u8 q15, d5 ; op1 -= p2
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d0, d6
|
||||
vaddw.u8 q15, d6 ; op1 += p1
|
||||
vaddw.u8 q15, d13 ; op1 += q5
|
||||
vbif d27, d18, d17 ; op2 |= t_op2 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d18, q15, #4 ; w_op1
|
||||
|
||||
vbif d18, d19, d17 ; op1 |= t_op1 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d0 ; op0 = op1 - p7
|
||||
vsubw.u8 q15, d6 ; op0 -= p1
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d0, d7
|
||||
vaddw.u8 q15, d7 ; op0 += p0
|
||||
vaddw.u8 q15, d14 ; op0 += q6
|
||||
vbif d18, d19, d17 ; op1 |= t_op1 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d19, q15, #4 ; w_op0
|
||||
|
||||
vbif d19, d20, d17 ; op0 |= t_op0 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d0 ; oq0 = op0 - p7
|
||||
vsubw.u8 q15, d7 ; oq0 -= p0
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d1, d8
|
||||
vaddw.u8 q15, d8 ; oq0 += q0
|
||||
vaddw.u8 q15, d15 ; oq0 += q7
|
||||
vbif d19, d20, d17 ; op0 |= t_op0 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d20, q15, #4 ; w_oq0
|
||||
|
||||
vbif d20, d21, d17 ; oq0 |= t_oq0 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d1 ; oq1 = oq0 - p6
|
||||
vsubw.u8 q15, d8 ; oq1 -= q0
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d2, d9
|
||||
vaddw.u8 q15, d9 ; oq1 += q1
|
||||
vaddl.u8 q4, d10, d15
|
||||
vaddw.u8 q15, d15 ; oq1 += q7
|
||||
vbif d20, d21, d17 ; oq0 |= t_oq0 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d21, q15, #4 ; w_oq1
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d3, d10
|
||||
vadd.i16 q15, q4
|
||||
vaddl.u8 q4, d11, d15
|
||||
vbif d21, d22, d17 ; oq1 |= t_oq1 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d2 ; oq2 = oq1 - p5
|
||||
vsubw.u8 q15, d9 ; oq2 -= q1
|
||||
vaddw.u8 q15, d10 ; oq2 += q2
|
||||
vaddw.u8 q15, d15 ; oq2 += q7
|
||||
vqrshrn.u16 d22, q15, #4 ; w_oq2
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d4, d11
|
||||
vadd.i16 q15, q4
|
||||
vaddl.u8 q4, d12, d15
|
||||
vbif d22, d23, d17 ; oq2 |= t_oq2 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d3 ; oq3 = oq2 - p4
|
||||
vsubw.u8 q15, d10 ; oq3 -= q2
|
||||
vaddw.u8 q15, d11 ; oq3 += q3
|
||||
vaddw.u8 q15, d15 ; oq3 += q7
|
||||
vqrshrn.u16 d23, q15, #4 ; w_oq3
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d5, d12
|
||||
vadd.i16 q15, q4
|
||||
vaddl.u8 q4, d13, d15
|
||||
vbif d16, d1, d17 ; op6 |= p6 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d4 ; oq4 = oq3 - p3
|
||||
vsubw.u8 q15, d11 ; oq4 -= q3
|
||||
vaddw.u8 q15, d12 ; oq4 += q4
|
||||
vaddw.u8 q15, d15 ; oq4 += q7
|
||||
vqrshrn.u16 d1, q15, #4 ; w_oq4
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d6, d13
|
||||
vadd.i16 q15, q4
|
||||
vaddl.u8 q4, d14, d15
|
||||
vbif d24, d2, d17 ; op5 |= p5 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d5 ; oq5 = oq4 - p2
|
||||
vsubw.u8 q15, d12 ; oq5 -= q4
|
||||
vaddw.u8 q15, d13 ; oq5 += q5
|
||||
vaddw.u8 q15, d15 ; oq5 += q7
|
||||
vqrshrn.u16 d2, q15, #4 ; w_oq5
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vbif d25, d3, d17 ; op4 |= p4 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d6 ; oq6 = oq5 - p1
|
||||
vsubw.u8 q15, d13 ; oq6 -= q5
|
||||
vaddw.u8 q15, d14 ; oq6 += q6
|
||||
vaddw.u8 q15, d15 ; oq6 += q7
|
||||
vqrshrn.u16 d3, q15, #4 ; w_oq6
|
||||
|
||||
vbif d26, d4, d17 ; op3 |= p3 & ~(f2 & f & m)
|
||||
vadd.i16 q15, q4
|
||||
vbif d23, d11, d17 ; oq3 |= q3 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d3, q15, #4 ; w_oq6
|
||||
vbif d1, d12, d17 ; oq4 |= q4 & ~(f2 & f & m)
|
||||
vbif d2, d13, d17 ; oq5 |= q5 & ~(f2 & f & m)
|
||||
vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m)
|
||||
|
||||
198
vp9/common/arm/neon/vp9_short_idct16x16_1_add_neon.asm
Normal file
198
vp9/common/arm/neon/vp9_short_idct16x16_1_add_neon.asm
Normal file
@@ -0,0 +1,198 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp9_short_idct16x16_1_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
;void vp9_short_idct16x16_1_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct16x16_1_add_neon| PROC
|
||||
ldrsh r0, [r0]
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r12, #0x2d00
|
||||
add r12, #0x41
|
||||
|
||||
; out = dct_const_round_shift(input[0] * cospi_16_64)
|
||||
mul r0, r0, r12 ; input[0] * cospi_16_64
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; out = dct_const_round_shift(out * cospi_16_64)
|
||||
mul r0, r0, r12 ; out * cospi_16_64
|
||||
mov r12, r1 ; save dest
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; a1 = ROUND_POWER_OF_TWO(out, 6)
|
||||
add r0, r0, #32 ; + (1 <<((6) - 1))
|
||||
asr r0, r0, #6 ; >> 6
|
||||
|
||||
vdup.s16 q0, r0 ; duplicate a1
|
||||
mov r0, #8
|
||||
sub r2, #8
|
||||
|
||||
; load destination data row0 - row3
|
||||
vld1.64 {d2}, [r1], r0
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r0
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r0
|
||||
vld1.64 {d7}, [r1], r2
|
||||
vld1.64 {d16}, [r1], r0
|
||||
vld1.64 {d17}, [r1], r2
|
||||
|
||||
vaddw.u8 q9, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d3 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d4 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d5 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
vaddw.u8 q9, q0, d6 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d7 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d16 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d17 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
; load destination data row4 - row7
|
||||
vld1.64 {d2}, [r1], r0
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r0
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r0
|
||||
vld1.64 {d7}, [r1], r2
|
||||
vld1.64 {d16}, [r1], r0
|
||||
vld1.64 {d17}, [r1], r2
|
||||
|
||||
vaddw.u8 q9, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d3 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d4 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d5 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
vaddw.u8 q9, q0, d6 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d7 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d16 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d17 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
; load destination data row8 - row11
|
||||
vld1.64 {d2}, [r1], r0
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r0
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r0
|
||||
vld1.64 {d7}, [r1], r2
|
||||
vld1.64 {d16}, [r1], r0
|
||||
vld1.64 {d17}, [r1], r2
|
||||
|
||||
vaddw.u8 q9, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d3 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d4 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d5 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
vaddw.u8 q9, q0, d6 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d7 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d16 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d17 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
; load destination data row12 - row15
|
||||
vld1.64 {d2}, [r1], r0
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r0
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r0
|
||||
vld1.64 {d7}, [r1], r2
|
||||
vld1.64 {d16}, [r1], r0
|
||||
vld1.64 {d17}, [r1], r2
|
||||
|
||||
vaddw.u8 q9, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d3 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d4 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d5 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
vaddw.u8 q9, q0, d6 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d7 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d16 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d17 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct16x16_1_add_neon|
|
||||
|
||||
END
|
||||
1191
vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
Normal file
1191
vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
Normal file
File diff suppressed because it is too large
Load Diff
1013
vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
Normal file
1013
vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
Normal file
File diff suppressed because it is too large
Load Diff
68
vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm
Normal file
68
vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm
Normal file
@@ -0,0 +1,68 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp9_short_idct4x4_1_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
;void vp9_short_idct4x4_1_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct4x4_1_add_neon| PROC
|
||||
ldrsh r0, [r0]
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r12, #0x2d00
|
||||
add r12, #0x41
|
||||
|
||||
; out = dct_const_round_shift(input[0] * cospi_16_64)
|
||||
mul r0, r0, r12 ; input[0] * cospi_16_64
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; out = dct_const_round_shift(out * cospi_16_64)
|
||||
mul r0, r0, r12 ; out * cospi_16_64
|
||||
mov r12, r1 ; save dest
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; a1 = ROUND_POWER_OF_TWO(out, 4)
|
||||
add r0, r0, #8 ; + (1 <<((4) - 1))
|
||||
asr r0, r0, #4 ; >> 4
|
||||
|
||||
vdup.s16 q0, r0 ; duplicate a1
|
||||
|
||||
vld1.32 {d2[0]}, [r1], r2
|
||||
vld1.32 {d2[1]}, [r1], r2
|
||||
vld1.32 {d4[0]}, [r1], r2
|
||||
vld1.32 {d4[1]}, [r1]
|
||||
|
||||
vaddw.u8 q8, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q9, q0, d4
|
||||
|
||||
vqmovun.s16 d6, q8 ; clip_pixel
|
||||
vqmovun.s16 d7, q9
|
||||
|
||||
vst1.32 {d6[0]}, [r12], r2
|
||||
vst1.32 {d6[1]}, [r12], r2
|
||||
vst1.32 {d7[0]}, [r12], r2
|
||||
vst1.32 {d7[1]}, [r12]
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct4x4_1_add_neon|
|
||||
|
||||
END
|
||||
190
vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm
Normal file
190
vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm
Normal file
@@ -0,0 +1,190 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_short_idct4x4_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void vp9_short_idct4x4_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct4x4_add_neon| PROC
|
||||
|
||||
; The 2D transform is done with two passes which are actually pretty
|
||||
; similar. We first transform the rows. This is done by transposing
|
||||
; the inputs, doing an SIMD column transform (the columns are the
|
||||
; transposed rows) and then transpose the results (so that it goes back
|
||||
; in normal/row positions). Then, we transform the columns by doing
|
||||
; another SIMD column transform.
|
||||
; So, two passes of a transpose followed by a column transform.
|
||||
|
||||
; load the inputs into q8-q9, d16-d19
|
||||
vld1.s16 {q8,q9}, [r0]!
|
||||
|
||||
; generate scalar constants
|
||||
; cospi_8_64 = 15137 = 0x3b21
|
||||
mov r0, #0x3b00
|
||||
add r0, #0x21
|
||||
; cospi_16_64 = 11585 = 0x2d41
|
||||
mov r3, #0x2d00
|
||||
add r3, #0x41
|
||||
; cospi_24_64 = 6270 = 0x 187e
|
||||
mov r12, #0x1800
|
||||
add r12, #0x7e
|
||||
|
||||
; transpose the input data
|
||||
; 00 01 02 03 d16
|
||||
; 10 11 12 13 d17
|
||||
; 20 21 22 23 d18
|
||||
; 30 31 32 33 d19
|
||||
vtrn.16 d16, d17
|
||||
vtrn.16 d18, d19
|
||||
|
||||
; generate constant vectors
|
||||
vdup.16 d20, r0 ; replicate cospi_8_64
|
||||
vdup.16 d21, r3 ; replicate cospi_16_64
|
||||
|
||||
; 00 10 02 12 d16
|
||||
; 01 11 03 13 d17
|
||||
; 20 30 22 32 d18
|
||||
; 21 31 23 33 d19
|
||||
vtrn.32 q8, q9
|
||||
; 00 10 20 30 d16
|
||||
; 01 11 21 31 d17
|
||||
; 02 12 22 32 d18
|
||||
; 03 13 23 33 d19
|
||||
|
||||
vdup.16 d22, r12 ; replicate cospi_24_64
|
||||
|
||||
; do the transform on transposed rows
|
||||
|
||||
; stage 1
|
||||
vadd.s16 d23, d16, d18 ; (input[0] + input[2])
|
||||
vsub.s16 d24, d16, d18 ; (input[0] - input[2])
|
||||
|
||||
vmull.s16 q15, d17, d22 ; input[1] * cospi_24_64
|
||||
vmull.s16 q1, d17, d20 ; input[1] * cospi_8_64
|
||||
|
||||
; (input[0] + input[2]) * cospi_16_64;
|
||||
; (input[0] - input[2]) * cospi_16_64;
|
||||
vmull.s16 q13, d23, d21
|
||||
vmull.s16 q14, d24, d21
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64;
|
||||
; input[1] * cospi_8_64 + input[3] * cospi_24_64;
|
||||
vmlsl.s16 q15, d19, d20
|
||||
vmlal.s16 q1, d19, d22
|
||||
|
||||
; dct_const_round_shift
|
||||
vqrshrn.s32 d26, q13, #14
|
||||
vqrshrn.s32 d27, q14, #14
|
||||
vqrshrn.s32 d29, q15, #14
|
||||
vqrshrn.s32 d28, q1, #14
|
||||
|
||||
; stage 2
|
||||
; output[0] = step[0] + step[3];
|
||||
; output[1] = step[1] + step[2];
|
||||
; output[3] = step[0] - step[3];
|
||||
; output[2] = step[1] - step[2];
|
||||
vadd.s16 q8, q13, q14
|
||||
vsub.s16 q9, q13, q14
|
||||
vswp d18, d19
|
||||
|
||||
; transpose the results
|
||||
; 00 01 02 03 d16
|
||||
; 10 11 12 13 d17
|
||||
; 20 21 22 23 d18
|
||||
; 30 31 32 33 d19
|
||||
vtrn.16 d16, d17
|
||||
vtrn.16 d18, d19
|
||||
; 00 10 02 12 d16
|
||||
; 01 11 03 13 d17
|
||||
; 20 30 22 32 d18
|
||||
; 21 31 23 33 d19
|
||||
vtrn.32 q8, q9
|
||||
; 00 10 20 30 d16
|
||||
; 01 11 21 31 d17
|
||||
; 02 12 22 32 d18
|
||||
; 03 13 23 33 d19
|
||||
|
||||
; do the transform on columns
|
||||
|
||||
; stage 1
|
||||
vadd.s16 d23, d16, d18 ; (input[0] + input[2])
|
||||
vsub.s16 d24, d16, d18 ; (input[0] - input[2])
|
||||
|
||||
vmull.s16 q15, d17, d22 ; input[1] * cospi_24_64
|
||||
vmull.s16 q1, d17, d20 ; input[1] * cospi_8_64
|
||||
|
||||
; (input[0] + input[2]) * cospi_16_64;
|
||||
; (input[0] - input[2]) * cospi_16_64;
|
||||
vmull.s16 q13, d23, d21
|
||||
vmull.s16 q14, d24, d21
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64;
|
||||
; input[1] * cospi_8_64 + input[3] * cospi_24_64;
|
||||
vmlsl.s16 q15, d19, d20
|
||||
vmlal.s16 q1, d19, d22
|
||||
|
||||
; dct_const_round_shift
|
||||
vqrshrn.s32 d26, q13, #14
|
||||
vqrshrn.s32 d27, q14, #14
|
||||
vqrshrn.s32 d29, q15, #14
|
||||
vqrshrn.s32 d28, q1, #14
|
||||
|
||||
; stage 2
|
||||
; output[0] = step[0] + step[3];
|
||||
; output[1] = step[1] + step[2];
|
||||
; output[3] = step[0] - step[3];
|
||||
; output[2] = step[1] - step[2];
|
||||
vadd.s16 q8, q13, q14
|
||||
vsub.s16 q9, q13, q14
|
||||
|
||||
; The results are in two registers, one of them being swapped. This will
|
||||
; be taken care of by loading the 'dest' value in a swapped fashion and
|
||||
; also storing them in the same swapped fashion.
|
||||
; temp_out[0, 1] = d16, d17 = q8
|
||||
; temp_out[2, 3] = d19, d18 = q9 swapped
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 4)
|
||||
vrshr.s16 q8, q8, #4
|
||||
vrshr.s16 q9, q9, #4
|
||||
|
||||
vld1.32 {d26[0]}, [r1], r2
|
||||
vld1.32 {d26[1]}, [r1], r2
|
||||
vld1.32 {d27[1]}, [r1], r2
|
||||
vld1.32 {d27[0]}, [r1] ; no post-increment
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]
|
||||
vaddw.u8 q8, q8, d26
|
||||
vaddw.u8 q9, q9, d27
|
||||
|
||||
; clip_pixel
|
||||
vqmovun.s16 d26, q8
|
||||
vqmovun.s16 d27, q9
|
||||
|
||||
; do the stores in reverse order with negative post-increment, by changing
|
||||
; the sign of the stride
|
||||
rsb r2, r2, #0
|
||||
vst1.32 {d27[0]}, [r1], r2
|
||||
vst1.32 {d27[1]}, [r1], r2
|
||||
vst1.32 {d26[1]}, [r1], r2
|
||||
vst1.32 {d26[0]}, [r1] ; no post-increment
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct4x4_add_neon|
|
||||
|
||||
END
|
||||
88
vp9/common/arm/neon/vp9_short_idct8x8_1_add_neon.asm
Normal file
88
vp9/common/arm/neon/vp9_short_idct8x8_1_add_neon.asm
Normal file
@@ -0,0 +1,88 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp9_short_idct8x8_1_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
;void vp9_short_idct8x8_1_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct8x8_1_add_neon| PROC
|
||||
ldrsh r0, [r0]
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r12, #0x2d00
|
||||
add r12, #0x41
|
||||
|
||||
; out = dct_const_round_shift(input[0] * cospi_16_64)
|
||||
mul r0, r0, r12 ; input[0] * cospi_16_64
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; out = dct_const_round_shift(out * cospi_16_64)
|
||||
mul r0, r0, r12 ; out * cospi_16_64
|
||||
mov r12, r1 ; save dest
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; a1 = ROUND_POWER_OF_TWO(out, 5)
|
||||
add r0, r0, #16 ; + (1 <<((5) - 1))
|
||||
asr r0, r0, #5 ; >> 5
|
||||
|
||||
vdup.s16 q0, r0 ; duplicate a1
|
||||
|
||||
; load destination data
|
||||
vld1.64 {d2}, [r1], r2
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r2
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r2
|
||||
vld1.64 {d7}, [r1], r2
|
||||
vld1.64 {d16}, [r1], r2
|
||||
vld1.64 {d17}, [r1]
|
||||
|
||||
vaddw.u8 q9, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d3 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d4 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d5 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r2
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r2
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
vaddw.u8 q9, q0, d6 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d7 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d16 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d17 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r2
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r2
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct8x8_1_add_neon|
|
||||
|
||||
END
|
||||
@@ -9,6 +9,7 @@
|
||||
;
|
||||
|
||||
EXPORT |vp9_short_idct8x8_add_neon|
|
||||
EXPORT |vp9_short_idct10_8x8_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
@@ -24,191 +25,149 @@
|
||||
; stage 1
|
||||
vdup.16 d0, r3 ; duplicate cospi_28_64
|
||||
vdup.16 d1, r4 ; duplicate cospi_4_64
|
||||
vdup.16 d2, r5 ; duplicate cospi_12_64
|
||||
vdup.16 d3, r6 ; duplicate cospi_20_64
|
||||
|
||||
; input[1] * cospi_28_64
|
||||
vmull.s16 q2, d18, d0
|
||||
vmull.s16 q3, d19, d0
|
||||
|
||||
; input[7] * cospi_4_64
|
||||
vmull.s16 q4, d30, d1
|
||||
vmull.s16 q5, d31, d1
|
||||
; input[5] * cospi_12_64
|
||||
vmull.s16 q5, d26, d2
|
||||
vmull.s16 q6, d27, d2
|
||||
|
||||
; input[1]*cospi_28_64-input[7]*cospi_4_64
|
||||
vsub.s32 q6, q2, q4
|
||||
vsub.s32 q7, q3, q5
|
||||
vmlsl.s16 q2, d30, d1
|
||||
vmlsl.s16 q3, d31, d1
|
||||
|
||||
; input[5] * cospi_12_64 - input[3] * cospi_20_64
|
||||
vmlsl.s16 q5, d22, d3
|
||||
vmlsl.s16 q6, d23, d3
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d8, q6, #14 ; >> 14
|
||||
vqrshrn.s32 d9, q7, #14 ; >> 14
|
||||
vqrshrn.s32 d8, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d9, q3, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q5, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q6, #14 ; >> 14
|
||||
|
||||
; input[1] * cospi_4_64
|
||||
vmull.s16 q2, d18, d1
|
||||
vmull.s16 q3, d19, d1
|
||||
|
||||
; input[7] * cospi_28_64
|
||||
vmull.s16 q1, d30, d0
|
||||
vmull.s16 q5, d31, d0
|
||||
; input[5] * cospi_20_64
|
||||
vmull.s16 q9, d26, d3
|
||||
vmull.s16 q13, d27, d3
|
||||
|
||||
; input[1]*cospi_4_64+input[7]*cospi_28_64
|
||||
vadd.s32 q2, q2, q1
|
||||
vadd.s32 q3, q3, q5
|
||||
vmlal.s16 q2, d30, d0
|
||||
vmlal.s16 q3, d31, d0
|
||||
|
||||
; input[5] * cospi_20_64 + input[3] * cospi_12_64
|
||||
vmlal.s16 q9, d22, d2
|
||||
vmlal.s16 q13, d23, d2
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d14, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d15, q3, #14 ; >> 14
|
||||
|
||||
vdup.16 d0, r5 ; duplicate cospi_12_64
|
||||
vdup.16 d1, r6 ; duplicate cospi_20_64
|
||||
|
||||
; input[5] * cospi_12_64
|
||||
vmull.s16 q2, d26, d0
|
||||
vmull.s16 q3, d27, d0
|
||||
|
||||
; input[3] * cospi_20_64
|
||||
vmull.s16 q5, d22, d1
|
||||
vmull.s16 q6, d23, d1
|
||||
|
||||
; input[5] * cospi_12_64 - input[3] * cospi_20_64
|
||||
vsub.s32 q2, q2, q5
|
||||
vsub.s32 q3, q3, q6
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q3, #14 ; >> 14
|
||||
|
||||
; input[5] * cospi_20_64
|
||||
vmull.s16 q2, d26, d1
|
||||
vmull.s16 q3, d27, d1
|
||||
|
||||
; input[3] * cospi_12_64
|
||||
vmull.s16 q9, d22, d0
|
||||
vmull.s16 q15, d23, d0
|
||||
|
||||
; input[5] * cospi_20_64 + input[3] * cospi_12_64
|
||||
vadd.s32 q0, q2, q9
|
||||
vadd.s32 q1, q3, q15
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q0, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q1, #14 ; >> 14
|
||||
|
||||
; stage 2 & stage 3 - even half
|
||||
vdup.16 d0, r7 ; duplicate cospi_16_64
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q13, #14 ; >> 14
|
||||
|
||||
; input[0] * cospi_16_64
|
||||
vmull.s16 q2, d16, d0
|
||||
vmull.s16 q3, d17, d0
|
||||
|
||||
; input[2] * cospi_16_64
|
||||
vmull.s16 q9, d24, d0
|
||||
vmull.s16 q11, d25, d0
|
||||
; input[0] * cospi_16_64
|
||||
vmull.s16 q13, d16, d0
|
||||
vmull.s16 q15, d17, d0
|
||||
|
||||
; (input[0] + input[2]) * cospi_16_64
|
||||
vadd.s32 q9, q2, q9
|
||||
vadd.s32 q11, q3, q11
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d18, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d19, q11, #14 ; >> 14
|
||||
|
||||
; input[0] * cospi_16_64
|
||||
vmull.s16 q2, d16, d0
|
||||
vmull.s16 q3, d17, d0
|
||||
|
||||
; input[2] * cospi_16_64
|
||||
vmull.s16 q13, d24, d0
|
||||
vmull.s16 q15, d25, d0
|
||||
vmlal.s16 q2, d24, d0
|
||||
vmlal.s16 q3, d25, d0
|
||||
|
||||
; (input[0] - input[2]) * cospi_16_64
|
||||
vsub.s32 q2, q2, q13
|
||||
vsub.s32 q3, q3, q15
|
||||
vmlsl.s16 q13, d24, d0
|
||||
vmlsl.s16 q15, d25, d0
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d22, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d23, q3, #14 ; >> 14
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64
|
||||
vdup.16 d0, r8 ; duplicate cospi_24_64
|
||||
vdup.16 d1, r9 ; duplicate cospi_8_64
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d18, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d19, q3, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d22, q13, #14 ; >> 14
|
||||
vqrshrn.s32 d23, q15, #14 ; >> 14
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64
|
||||
; input[1] * cospi_24_64
|
||||
vmull.s16 q2, d20, d0
|
||||
vmull.s16 q3, d21, d0
|
||||
|
||||
; input[3] * cospi_8_64
|
||||
vmull.s16 q13, d28, d1
|
||||
vmull.s16 q15, d29, d1
|
||||
; input[1] * cospi_8_64
|
||||
vmull.s16 q8, d20, d1
|
||||
vmull.s16 q12, d21, d1
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64
|
||||
vsub.s32 q2, q2, q13
|
||||
vsub.s32 q3, q3, q15
|
||||
vmlsl.s16 q2, d28, d1
|
||||
vmlsl.s16 q3, d29, d1
|
||||
|
||||
; input[1] * cospi_8_64 + input[3] * cospi_24_64
|
||||
vmlal.s16 q8, d28, d0
|
||||
vmlal.s16 q12, d29, d0
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d26, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d27, q3, #14 ; >> 14
|
||||
|
||||
; input[1] * cospi_8_64
|
||||
vmull.s16 q2, d20, d1
|
||||
vmull.s16 q3, d21, d1
|
||||
|
||||
; input[3] * cospi_24_64
|
||||
vmull.s16 q8, d28, d0
|
||||
vmull.s16 q10, d29, d0
|
||||
|
||||
; input[1] * cospi_8_64 + input[3] * cospi_24_64
|
||||
vadd.s32 q0, q2, q8
|
||||
vadd.s32 q1, q3, q10
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d30, q0, #14 ; >> 14
|
||||
vqrshrn.s32 d31, q1, #14 ; >> 14
|
||||
|
||||
vqrshrn.s32 d30, q8, #14 ; >> 14
|
||||
vqrshrn.s32 d31, q12, #14 ; >> 14
|
||||
|
||||
vadd.s16 q0, q9, q15 ; output[0] = step[0] + step[3]
|
||||
vadd.s16 q1, q11, q13 ; output[1] = step[1] + step[2]
|
||||
vsub.s16 q2, q11, q13 ; output[2] = step[1] - step[2]
|
||||
vsub.s16 q3, q9, q15 ; output[3] = step[0] - step[3]
|
||||
|
||||
; stage 3 -odd half
|
||||
vdup.16 d16, r7 ; duplicate cospi_16_64
|
||||
|
||||
; stage 2 - odd half
|
||||
vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5]
|
||||
vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5]
|
||||
vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7]
|
||||
vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7]
|
||||
|
||||
; stage 3 -odd half
|
||||
vdup.16 d16, r7 ; duplicate cospi_16_64
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q9, d28, d16
|
||||
vmull.s16 q10, d29, d16
|
||||
|
||||
; step2[5] * cospi_16_64
|
||||
vmull.s16 q11, d26, d16
|
||||
vmull.s16 q12, d27, d16
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q11, d28, d16
|
||||
vmull.s16 q12, d29, d16
|
||||
|
||||
; (step2[6] - step2[5]) * cospi_16_64
|
||||
vsub.s32 q9, q9, q11
|
||||
vsub.s32 q10, q10, q12
|
||||
vmlsl.s16 q9, d26, d16
|
||||
vmlsl.s16 q10, d27, d16
|
||||
|
||||
; (step2[5] + step2[6]) * cospi_16_64
|
||||
vmlal.s16 q11, d26, d16
|
||||
vmlal.s16 q12, d27, d16
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q10, #14 ; >> 14
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q9, d28, d16
|
||||
vmull.s16 q10, d29, d16
|
||||
|
||||
; step2[5] * cospi_16_64
|
||||
vmull.s16 q11, d26, d16
|
||||
vmull.s16 q12, d27, d16
|
||||
|
||||
; (step2[5] + step2[6]) * cospi_16_64
|
||||
vadd.s32 q9, q9, q11
|
||||
vadd.s32 q10, q10, q12
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q10, #14 ; >> 14
|
||||
vqrshrn.s32 d12, q11, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q12, #14 ; >> 14
|
||||
|
||||
; stage 4
|
||||
vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7];
|
||||
@@ -247,14 +206,11 @@
|
||||
|
||||
|vp9_short_idct8x8_add_neon| PROC
|
||||
push {r4-r9}
|
||||
vld1.s16 {q8}, [r0]!
|
||||
vld1.s16 {q9}, [r0]!
|
||||
vld1.s16 {q10}, [r0]!
|
||||
vld1.s16 {q11}, [r0]!
|
||||
vld1.s16 {q12}, [r0]!
|
||||
vld1.s16 {q13}, [r0]!
|
||||
vld1.s16 {q14}, [r0]!
|
||||
vld1.s16 {q15}, [r0]!
|
||||
vpush {d8-d15}
|
||||
vld1.s16 {q8,q9}, [r0]!
|
||||
vld1.s16 {q10,q11}, [r0]!
|
||||
vld1.s16 {q12,q13}, [r0]!
|
||||
vld1.s16 {q14,q15}, [r0]!
|
||||
|
||||
; transpose the input data
|
||||
TRANSPOSE8X8
|
||||
@@ -349,8 +305,215 @@
|
||||
vst1.64 {d6}, [r0], r2
|
||||
vst1.64 {d7}, [r0], r2
|
||||
|
||||
vpop {d8-d15}
|
||||
pop {r4-r9}
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct8x8_add_neon|
|
||||
|
||||
;void vp9_short_idct10_8x8_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct10_8x8_add_neon| PROC
|
||||
push {r4-r9}
|
||||
vpush {d8-d15}
|
||||
vld1.s16 {q8,q9}, [r0]!
|
||||
vld1.s16 {q10,q11}, [r0]!
|
||||
vld1.s16 {q12,q13}, [r0]!
|
||||
vld1.s16 {q14,q15}, [r0]!
|
||||
|
||||
; transpose the input data
|
||||
TRANSPOSE8X8
|
||||
|
||||
; generate cospi_28_64 = 3196
|
||||
mov r3, #0x0c00
|
||||
add r3, #0x7c
|
||||
|
||||
; generate cospi_4_64 = 16069
|
||||
mov r4, #0x3e00
|
||||
add r4, #0xc5
|
||||
|
||||
; generate cospi_12_64 = 13623
|
||||
mov r5, #0x3500
|
||||
add r5, #0x37
|
||||
|
||||
; generate cospi_20_64 = 9102
|
||||
mov r6, #0x2300
|
||||
add r6, #0x8e
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r7, #0x2d00
|
||||
add r7, #0x41
|
||||
|
||||
; generate cospi_24_64 = 6270
|
||||
mov r8, #0x1800
|
||||
add r8, #0x7e
|
||||
|
||||
; generate cospi_8_64 = 15137
|
||||
mov r9, #0x3b00
|
||||
add r9, #0x21
|
||||
|
||||
; First transform rows
|
||||
; stage 1
|
||||
; The following instructions use vqrdmulh to do the
|
||||
; dct_const_round_shift(input[1] * cospi_28_64). vqrdmulh will do doubling
|
||||
; multiply and shift the result by 16 bits instead of 14 bits. So we need
|
||||
; to double the constants before multiplying to compensate this.
|
||||
mov r12, r3, lsl #1
|
||||
vdup.16 q0, r12 ; duplicate cospi_28_64*2
|
||||
mov r12, r4, lsl #1
|
||||
vdup.16 q1, r12 ; duplicate cospi_4_64*2
|
||||
|
||||
; dct_const_round_shift(input[1] * cospi_28_64)
|
||||
vqrdmulh.s16 q4, q9, q0
|
||||
|
||||
mov r12, r6, lsl #1
|
||||
rsb r12, #0
|
||||
vdup.16 q0, r12 ; duplicate -cospi_20_64*2
|
||||
|
||||
; dct_const_round_shift(input[1] * cospi_4_64)
|
||||
vqrdmulh.s16 q7, q9, q1
|
||||
|
||||
mov r12, r5, lsl #1
|
||||
vdup.16 q1, r12 ; duplicate cospi_12_64*2
|
||||
|
||||
; dct_const_round_shift(- input[3] * cospi_20_64)
|
||||
vqrdmulh.s16 q5, q11, q0
|
||||
|
||||
mov r12, r7, lsl #1
|
||||
vdup.16 q0, r12 ; duplicate cospi_16_64*2
|
||||
|
||||
; dct_const_round_shift(input[3] * cospi_12_64)
|
||||
vqrdmulh.s16 q6, q11, q1
|
||||
|
||||
; stage 2 & stage 3 - even half
|
||||
mov r12, r8, lsl #1
|
||||
vdup.16 q1, r12 ; duplicate cospi_24_64*2
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrdmulh.s16 q9, q8, q0
|
||||
|
||||
mov r12, r9, lsl #1
|
||||
vdup.16 q0, r12 ; duplicate cospi_8_64*2
|
||||
|
||||
; dct_const_round_shift(input[1] * cospi_24_64)
|
||||
vqrdmulh.s16 q13, q10, q1
|
||||
|
||||
; dct_const_round_shift(input[1] * cospi_8_64)
|
||||
vqrdmulh.s16 q15, q10, q0
|
||||
|
||||
; stage 3 -odd half
|
||||
vdup.16 d16, r7 ; duplicate cospi_16_64
|
||||
|
||||
vadd.s16 q0, q9, q15 ; output[0] = step[0] + step[3]
|
||||
vadd.s16 q1, q9, q13 ; output[1] = step[1] + step[2]
|
||||
vsub.s16 q2, q9, q13 ; output[2] = step[1] - step[2]
|
||||
vsub.s16 q3, q9, q15 ; output[3] = step[0] - step[3]
|
||||
|
||||
; stage 2 - odd half
|
||||
vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5]
|
||||
vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5]
|
||||
vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7]
|
||||
vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7]
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q9, d28, d16
|
||||
vmull.s16 q10, d29, d16
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q11, d28, d16
|
||||
vmull.s16 q12, d29, d16
|
||||
|
||||
; (step2[6] - step2[5]) * cospi_16_64
|
||||
vmlsl.s16 q9, d26, d16
|
||||
vmlsl.s16 q10, d27, d16
|
||||
|
||||
; (step2[5] + step2[6]) * cospi_16_64
|
||||
vmlal.s16 q11, d26, d16
|
||||
vmlal.s16 q12, d27, d16
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q10, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q11, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q12, #14 ; >> 14
|
||||
|
||||
; stage 4
|
||||
vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7];
|
||||
vadd.s16 q9, q1, q6 ; output[1] = step1[1] + step1[6];
|
||||
vadd.s16 q10, q2, q5 ; output[2] = step1[2] + step1[5];
|
||||
vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4];
|
||||
vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4];
|
||||
vsub.s16 q13, q2, q5 ; output[5] = step1[2] - step1[5];
|
||||
vsub.s16 q14, q1, q6 ; output[6] = step1[1] - step1[6];
|
||||
vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7];
|
||||
|
||||
; Transpose the matrix
|
||||
TRANSPOSE8X8
|
||||
|
||||
; Then transform columns
|
||||
IDCT8x8_1D
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 5)
|
||||
vrshr.s16 q8, q8, #5
|
||||
vrshr.s16 q9, q9, #5
|
||||
vrshr.s16 q10, q10, #5
|
||||
vrshr.s16 q11, q11, #5
|
||||
vrshr.s16 q12, q12, #5
|
||||
vrshr.s16 q13, q13, #5
|
||||
vrshr.s16 q14, q14, #5
|
||||
vrshr.s16 q15, q15, #5
|
||||
|
||||
; save dest pointer
|
||||
mov r0, r1
|
||||
|
||||
; load destination data
|
||||
vld1.64 {d0}, [r1], r2
|
||||
vld1.64 {d1}, [r1], r2
|
||||
vld1.64 {d2}, [r1], r2
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r2
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r2
|
||||
vld1.64 {d7}, [r1]
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]
|
||||
vaddw.u8 q8, q8, d0
|
||||
vaddw.u8 q9, q9, d1
|
||||
vaddw.u8 q10, q10, d2
|
||||
vaddw.u8 q11, q11, d3
|
||||
vaddw.u8 q12, q12, d4
|
||||
vaddw.u8 q13, q13, d5
|
||||
vaddw.u8 q14, q14, d6
|
||||
vaddw.u8 q15, q15, d7
|
||||
|
||||
; clip_pixel
|
||||
vqmovun.s16 d0, q8
|
||||
vqmovun.s16 d1, q9
|
||||
vqmovun.s16 d2, q10
|
||||
vqmovun.s16 d3, q11
|
||||
vqmovun.s16 d4, q12
|
||||
vqmovun.s16 d5, q13
|
||||
vqmovun.s16 d6, q14
|
||||
vqmovun.s16 d7, q15
|
||||
|
||||
; store the data
|
||||
vst1.64 {d0}, [r0], r2
|
||||
vst1.64 {d1}, [r0], r2
|
||||
vst1.64 {d2}, [r0], r2
|
||||
vst1.64 {d3}, [r0], r2
|
||||
vst1.64 {d4}, [r0], r2
|
||||
vst1.64 {d5}, [r0], r2
|
||||
vst1.64 {d6}, [r0], r2
|
||||
vst1.64 {d7}, [r0], r2
|
||||
|
||||
vpop {d8-d15}
|
||||
pop {r4-r9}
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct10_8x8_add_neon|
|
||||
|
||||
END
|
||||
|
||||
237
vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm
Normal file
237
vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm
Normal file
@@ -0,0 +1,237 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_short_iht4x4_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; Parallel 1D IDCT on all the columns of a 4x4 16bits data matrix which are
|
||||
; loaded in d16-d19. d0 must contain cospi_8_64. d1 must contain
|
||||
; cospi_16_64. d2 must contain cospi_24_64. The output will be stored back
|
||||
; into d16-d19 registers. This macro will touch q10- q15 registers and use
|
||||
; them as buffer during calculation.
|
||||
MACRO
|
||||
IDCT4x4_1D
|
||||
; stage 1
|
||||
vadd.s16 d23, d16, d18 ; (input[0] + input[2])
|
||||
vsub.s16 d24, d16, d18 ; (input[0] - input[2])
|
||||
|
||||
vmull.s16 q15, d17, d2 ; input[1] * cospi_24_64
|
||||
vmull.s16 q10, d17, d0 ; input[1] * cospi_8_64
|
||||
vmull.s16 q13, d23, d1 ; (input[0] + input[2]) * cospi_16_64
|
||||
vmull.s16 q14, d24, d1 ; (input[0] - input[2]) * cospi_16_64
|
||||
vmlsl.s16 q15, d19, d0 ; input[1] * cospi_24_64 - input[3] * cospi_8_64
|
||||
vmlal.s16 q10, d19, d2 ; input[1] * cospi_8_64 + input[3] * cospi_24_64
|
||||
|
||||
; dct_const_round_shift
|
||||
vqrshrn.s32 d26, q13, #14
|
||||
vqrshrn.s32 d27, q14, #14
|
||||
vqrshrn.s32 d29, q15, #14
|
||||
vqrshrn.s32 d28, q10, #14
|
||||
|
||||
; stage 2
|
||||
; output[0] = step[0] + step[3];
|
||||
; output[1] = step[1] + step[2];
|
||||
; output[3] = step[0] - step[3];
|
||||
; output[2] = step[1] - step[2];
|
||||
vadd.s16 q8, q13, q14
|
||||
vsub.s16 q9, q13, q14
|
||||
vswp d18, d19
|
||||
MEND
|
||||
|
||||
; Parallel 1D IADST on all the columns of a 4x4 16bits data matrix which
|
||||
; loaded in d16-d19. d3 must contain sinpi_1_9. d4 must contain sinpi_2_9.
|
||||
; d5 must contain sinpi_4_9. d6 must contain sinpi_3_9. The output will be
|
||||
; stored back into d16-d19 registers. This macro will touch q11,q12,q13,
|
||||
; q14,q15 registers and use them as buffer during calculation.
|
||||
MACRO
|
||||
IADST4x4_1D
|
||||
vmull.s16 q10, d3, d16 ; s0 = sinpi_1_9 * x0
|
||||
vmull.s16 q11, d4, d16 ; s1 = sinpi_2_9 * x0
|
||||
vmull.s16 q12, d6, d17 ; s2 = sinpi_3_9 * x1
|
||||
vmull.s16 q13, d5, d18 ; s3 = sinpi_4_9 * x2
|
||||
vmull.s16 q14, d3, d18 ; s4 = sinpi_1_9 * x2
|
||||
vmovl.s16 q15, d16 ; expand x0 from 16 bit to 32 bit
|
||||
vaddw.s16 q15, q15, d19 ; x0 + x3
|
||||
vmull.s16 q8, d4, d19 ; s5 = sinpi_2_9 * x3
|
||||
vsubw.s16 q15, q15, d18 ; s7 = x0 + x3 - x2
|
||||
vmull.s16 q9, d5, d19 ; s6 = sinpi_4_9 * x3
|
||||
|
||||
vadd.s32 q10, q10, q13 ; x0 = s0 + s3 + s5
|
||||
vadd.s32 q10, q10, q8
|
||||
vsub.s32 q11, q11, q14 ; x1 = s1 - s4 - s6
|
||||
vdup.32 q8, r0 ; duplicate sinpi_3_9
|
||||
vsub.s32 q11, q11, q9
|
||||
vmul.s32 q15, q15, q8 ; x2 = sinpi_3_9 * s7
|
||||
|
||||
vadd.s32 q13, q10, q12 ; s0 = x0 + x3
|
||||
vadd.s32 q10, q10, q11 ; x0 + x1
|
||||
vadd.s32 q14, q11, q12 ; s1 = x1 + x3
|
||||
vsub.s32 q10, q10, q12 ; s3 = x0 + x1 - x3
|
||||
|
||||
; dct_const_round_shift
|
||||
vqrshrn.s32 d16, q13, #14
|
||||
vqrshrn.s32 d17, q14, #14
|
||||
vqrshrn.s32 d18, q15, #14
|
||||
vqrshrn.s32 d19, q10, #14
|
||||
MEND
|
||||
|
||||
; Generate cosine constants in d6 - d8 for the IDCT
|
||||
MACRO
|
||||
GENERATE_COSINE_CONSTANTS
|
||||
; cospi_8_64 = 15137 = 0x3b21
|
||||
mov r0, #0x3b00
|
||||
add r0, #0x21
|
||||
; cospi_16_64 = 11585 = 0x2d41
|
||||
mov r3, #0x2d00
|
||||
add r3, #0x41
|
||||
; cospi_24_64 = 6270 = 0x187e
|
||||
mov r12, #0x1800
|
||||
add r12, #0x7e
|
||||
|
||||
; generate constant vectors
|
||||
vdup.16 d0, r0 ; duplicate cospi_8_64
|
||||
vdup.16 d1, r3 ; duplicate cospi_16_64
|
||||
vdup.16 d2, r12 ; duplicate cospi_24_64
|
||||
MEND
|
||||
|
||||
; Generate sine constants in d1 - d4 for the IADST.
|
||||
MACRO
|
||||
GENERATE_SINE_CONSTANTS
|
||||
; sinpi_1_9 = 5283 = 0x14A3
|
||||
mov r0, #0x1400
|
||||
add r0, #0xa3
|
||||
; sinpi_2_9 = 9929 = 0x26C9
|
||||
mov r3, #0x2600
|
||||
add r3, #0xc9
|
||||
; sinpi_4_9 = 15212 = 0x3B6C
|
||||
mov r12, #0x3b00
|
||||
add r12, #0x6c
|
||||
|
||||
; generate constant vectors
|
||||
vdup.16 d3, r0 ; duplicate sinpi_1_9
|
||||
|
||||
; sinpi_3_9 = 13377 = 0x3441
|
||||
mov r0, #0x3400
|
||||
add r0, #0x41
|
||||
|
||||
vdup.16 d4, r3 ; duplicate sinpi_2_9
|
||||
vdup.16 d5, r12 ; duplicate sinpi_4_9
|
||||
vdup.16 q3, r0 ; duplicate sinpi_3_9
|
||||
MEND
|
||||
|
||||
; Transpose a 4x4 16bits data matrix. Datas are loaded in d16-d19.
|
||||
MACRO
|
||||
TRANSPOSE4X4
|
||||
vtrn.16 d16, d17
|
||||
vtrn.16 d18, d19
|
||||
vtrn.32 q8, q9
|
||||
MEND
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void vp9_short_iht4x4_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride, int tx_type)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride
|
||||
; r3 int tx_type)
|
||||
; This function will only handle tx_type of 1,2,3.
|
||||
|vp9_short_iht4x4_add_neon| PROC
|
||||
|
||||
; load the inputs into d16-d19
|
||||
vld1.s16 {q8,q9}, [r0]!
|
||||
|
||||
; transpose the input data
|
||||
TRANSPOSE4X4
|
||||
|
||||
; decide the type of transform
|
||||
cmp r3, #2
|
||||
beq idct_iadst
|
||||
cmp r3, #3
|
||||
beq iadst_iadst
|
||||
|
||||
iadst_idct
|
||||
; generate constants
|
||||
GENERATE_COSINE_CONSTANTS
|
||||
GENERATE_SINE_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IDCT4x4_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE4X4
|
||||
|
||||
; then transform columns
|
||||
IADST4x4_1D
|
||||
|
||||
b end_vp9_short_iht4x4_add_neon
|
||||
|
||||
idct_iadst
|
||||
; generate constants
|
||||
GENERATE_COSINE_CONSTANTS
|
||||
GENERATE_SINE_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IADST4x4_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE4X4
|
||||
|
||||
; then transform columns
|
||||
IDCT4x4_1D
|
||||
|
||||
b end_vp9_short_iht4x4_add_neon
|
||||
|
||||
iadst_iadst
|
||||
; generate constants
|
||||
GENERATE_SINE_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IADST4x4_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE4X4
|
||||
|
||||
; then transform columns
|
||||
IADST4x4_1D
|
||||
|
||||
end_vp9_short_iht4x4_add_neon
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 4)
|
||||
vrshr.s16 q8, q8, #4
|
||||
vrshr.s16 q9, q9, #4
|
||||
|
||||
vld1.32 {d26[0]}, [r1], r2
|
||||
vld1.32 {d26[1]}, [r1], r2
|
||||
vld1.32 {d27[0]}, [r1], r2
|
||||
vld1.32 {d27[1]}, [r1]
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]
|
||||
vaddw.u8 q8, q8, d26
|
||||
vaddw.u8 q9, q9, d27
|
||||
|
||||
; clip_pixel
|
||||
vqmovun.s16 d26, q8
|
||||
vqmovun.s16 d27, q9
|
||||
|
||||
; do the stores in reverse order with negative post-increment, by changing
|
||||
; the sign of the stride
|
||||
rsb r2, r2, #0
|
||||
vst1.32 {d27[1]}, [r1], r2
|
||||
vst1.32 {d27[0]}, [r1], r2
|
||||
vst1.32 {d26[1]}, [r1], r2
|
||||
vst1.32 {d26[0]}, [r1] ; no post-increment
|
||||
bx lr
|
||||
ENDP ; |vp9_short_iht4x4_add_neon|
|
||||
|
||||
END
|
||||
696
vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm
Normal file
696
vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm
Normal file
@@ -0,0 +1,696 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_short_iht8x8_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; Generate IADST constants in r0 - r12 for the IADST.
|
||||
MACRO
|
||||
GENERATE_IADST_CONSTANTS
|
||||
; generate cospi_2_64 = 16305
|
||||
mov r0, #0x3f00
|
||||
add r0, #0xb1
|
||||
|
||||
; generate cospi_30_64 = 1606
|
||||
mov r1, #0x600
|
||||
add r1, #0x46
|
||||
|
||||
; generate cospi_10_64 = 14449
|
||||
mov r2, #0x3800
|
||||
add r2, #0x71
|
||||
|
||||
; generate cospi_22_64 = 7723
|
||||
mov r3, #0x1e00
|
||||
add r3, #0x2b
|
||||
|
||||
; generate cospi_18_64 = 10394
|
||||
mov r4, #0x2800
|
||||
add r4, #0x9a
|
||||
|
||||
; generate cospi_14_64 = 12665
|
||||
mov r5, #0x3100
|
||||
add r5, #0x79
|
||||
|
||||
; generate cospi_26_64 = 4756
|
||||
mov r6, #0x1200
|
||||
add r6, #0x94
|
||||
|
||||
; generate cospi_6_64 = 15679
|
||||
mov r7, #0x3d00
|
||||
add r7, #0x3f
|
||||
|
||||
; generate cospi_8_64 = 15137
|
||||
mov r8, #0x3b00
|
||||
add r8, #0x21
|
||||
|
||||
; generate cospi_24_64 = 6270
|
||||
mov r9, #0x1800
|
||||
add r9, #0x7e
|
||||
|
||||
; generate 0
|
||||
mov r10, #0
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r12, #0x2d00
|
||||
add r12, #0x41
|
||||
MEND
|
||||
|
||||
; Generate IDCT constants in r3 - r9 for the IDCT.
|
||||
MACRO
|
||||
GENERATE_IDCT_CONSTANTS
|
||||
; generate cospi_28_64 = 3196
|
||||
mov r3, #0x0c00
|
||||
add r3, #0x7c
|
||||
|
||||
; generate cospi_4_64 = 16069
|
||||
mov r4, #0x3e00
|
||||
add r4, #0xc5
|
||||
|
||||
; generate cospi_12_64 = 13623
|
||||
mov r5, #0x3500
|
||||
add r5, #0x37
|
||||
|
||||
; generate cospi_20_64 = 9102
|
||||
mov r6, #0x2300
|
||||
add r6, #0x8e
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r7, #0x2d00
|
||||
add r7, #0x41
|
||||
|
||||
; generate cospi_24_64 = 6270
|
||||
mov r8, #0x1800
|
||||
add r8, #0x7e
|
||||
|
||||
; generate cospi_8_64 = 15137
|
||||
mov r9, #0x3b00
|
||||
add r9, #0x21
|
||||
MEND
|
||||
|
||||
; Transpose a 8x8 16bits data matrix. Datas are loaded in q8-q15.
|
||||
MACRO
|
||||
TRANSPOSE8X8
|
||||
vswp d17, d24
|
||||
vswp d23, d30
|
||||
vswp d21, d28
|
||||
vswp d19, d26
|
||||
vtrn.32 q8, q10
|
||||
vtrn.32 q9, q11
|
||||
vtrn.32 q12, q14
|
||||
vtrn.32 q13, q15
|
||||
vtrn.16 q8, q9
|
||||
vtrn.16 q10, q11
|
||||
vtrn.16 q12, q13
|
||||
vtrn.16 q14, q15
|
||||
MEND
|
||||
|
||||
; Parallel 1D IDCT on all the columns of a 8x8 16bits data matrix which are
|
||||
; loaded in q8-q15. The IDCT constants are loaded in r3 - r9. The output
|
||||
; will be stored back into q8-q15 registers. This macro will touch q0-q7
|
||||
; registers and use them as buffer during calculation.
|
||||
MACRO
|
||||
IDCT8x8_1D
|
||||
; stage 1
|
||||
vdup.16 d0, r3 ; duplicate cospi_28_64
|
||||
vdup.16 d1, r4 ; duplicate cospi_4_64
|
||||
vdup.16 d2, r5 ; duplicate cospi_12_64
|
||||
vdup.16 d3, r6 ; duplicate cospi_20_64
|
||||
|
||||
; input[1] * cospi_28_64
|
||||
vmull.s16 q2, d18, d0
|
||||
vmull.s16 q3, d19, d0
|
||||
|
||||
; input[5] * cospi_12_64
|
||||
vmull.s16 q5, d26, d2
|
||||
vmull.s16 q6, d27, d2
|
||||
|
||||
; input[1]*cospi_28_64-input[7]*cospi_4_64
|
||||
vmlsl.s16 q2, d30, d1
|
||||
vmlsl.s16 q3, d31, d1
|
||||
|
||||
; input[5] * cospi_12_64 - input[3] * cospi_20_64
|
||||
vmlsl.s16 q5, d22, d3
|
||||
vmlsl.s16 q6, d23, d3
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d8, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d9, q3, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q5, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q6, #14 ; >> 14
|
||||
|
||||
; input[1] * cospi_4_64
|
||||
vmull.s16 q2, d18, d1
|
||||
vmull.s16 q3, d19, d1
|
||||
|
||||
; input[5] * cospi_20_64
|
||||
vmull.s16 q9, d26, d3
|
||||
vmull.s16 q13, d27, d3
|
||||
|
||||
; input[1]*cospi_4_64+input[7]*cospi_28_64
|
||||
vmlal.s16 q2, d30, d0
|
||||
vmlal.s16 q3, d31, d0
|
||||
|
||||
; input[5] * cospi_20_64 + input[3] * cospi_12_64
|
||||
vmlal.s16 q9, d22, d2
|
||||
vmlal.s16 q13, d23, d2
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d14, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d15, q3, #14 ; >> 14
|
||||
|
||||
; stage 2 & stage 3 - even half
|
||||
vdup.16 d0, r7 ; duplicate cospi_16_64
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q13, #14 ; >> 14
|
||||
|
||||
; input[0] * cospi_16_64
|
||||
vmull.s16 q2, d16, d0
|
||||
vmull.s16 q3, d17, d0
|
||||
|
||||
; input[0] * cospi_16_64
|
||||
vmull.s16 q13, d16, d0
|
||||
vmull.s16 q15, d17, d0
|
||||
|
||||
; (input[0] + input[2]) * cospi_16_64
|
||||
vmlal.s16 q2, d24, d0
|
||||
vmlal.s16 q3, d25, d0
|
||||
|
||||
; (input[0] - input[2]) * cospi_16_64
|
||||
vmlsl.s16 q13, d24, d0
|
||||
vmlsl.s16 q15, d25, d0
|
||||
|
||||
vdup.16 d0, r8 ; duplicate cospi_24_64
|
||||
vdup.16 d1, r9 ; duplicate cospi_8_64
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d18, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d19, q3, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d22, q13, #14 ; >> 14
|
||||
vqrshrn.s32 d23, q15, #14 ; >> 14
|
||||
|
||||
; input[1] * cospi_24_64
|
||||
vmull.s16 q2, d20, d0
|
||||
vmull.s16 q3, d21, d0
|
||||
|
||||
; input[1] * cospi_8_64
|
||||
vmull.s16 q8, d20, d1
|
||||
vmull.s16 q12, d21, d1
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64
|
||||
vmlsl.s16 q2, d28, d1
|
||||
vmlsl.s16 q3, d29, d1
|
||||
|
||||
; input[1] * cospi_8_64 + input[3] * cospi_24_64
|
||||
vmlal.s16 q8, d28, d0
|
||||
vmlal.s16 q12, d29, d0
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d26, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d27, q3, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d30, q8, #14 ; >> 14
|
||||
vqrshrn.s32 d31, q12, #14 ; >> 14
|
||||
|
||||
vadd.s16 q0, q9, q15 ; output[0] = step[0] + step[3]
|
||||
vadd.s16 q1, q11, q13 ; output[1] = step[1] + step[2]
|
||||
vsub.s16 q2, q11, q13 ; output[2] = step[1] - step[2]
|
||||
vsub.s16 q3, q9, q15 ; output[3] = step[0] - step[3]
|
||||
|
||||
; stage 3 -odd half
|
||||
vdup.16 d16, r7 ; duplicate cospi_16_64
|
||||
|
||||
; stage 2 - odd half
|
||||
vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5]
|
||||
vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5]
|
||||
vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7]
|
||||
vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7]
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q9, d28, d16
|
||||
vmull.s16 q10, d29, d16
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q11, d28, d16
|
||||
vmull.s16 q12, d29, d16
|
||||
|
||||
; (step2[6] - step2[5]) * cospi_16_64
|
||||
vmlsl.s16 q9, d26, d16
|
||||
vmlsl.s16 q10, d27, d16
|
||||
|
||||
; (step2[5] + step2[6]) * cospi_16_64
|
||||
vmlal.s16 q11, d26, d16
|
||||
vmlal.s16 q12, d27, d16
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q10, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q11, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q12, #14 ; >> 14
|
||||
|
||||
; stage 4
|
||||
vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7];
|
||||
vadd.s16 q9, q1, q6 ; output[1] = step1[1] + step1[6];
|
||||
vadd.s16 q10, q2, q5 ; output[2] = step1[2] + step1[5];
|
||||
vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4];
|
||||
vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4];
|
||||
vsub.s16 q13, q2, q5 ; output[5] = step1[2] - step1[5];
|
||||
vsub.s16 q14, q1, q6 ; output[6] = step1[1] - step1[6];
|
||||
vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7];
|
||||
MEND
|
||||
|
||||
; Parallel 1D IADST on all the columns of a 8x8 16bits data matrix which
|
||||
; loaded in q8-q15. IADST constants are loaded in r0 - r12 registers. The
|
||||
; output will be stored back into q8-q15 registers. This macro will touch
|
||||
; q0 - q7 registers and use them as buffer during calculation.
|
||||
MACRO
|
||||
IADST8X8_1D
|
||||
vdup.16 d14, r0 ; duplicate cospi_2_64
|
||||
vdup.16 d15, r1 ; duplicate cospi_30_64
|
||||
|
||||
; cospi_2_64 * x0
|
||||
vmull.s16 q1, d30, d14
|
||||
vmull.s16 q2, d31, d14
|
||||
|
||||
; cospi_30_64 * x0
|
||||
vmull.s16 q3, d30, d15
|
||||
vmull.s16 q4, d31, d15
|
||||
|
||||
vdup.16 d30, r4 ; duplicate cospi_18_64
|
||||
vdup.16 d31, r5 ; duplicate cospi_14_64
|
||||
|
||||
; s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
|
||||
vmlal.s16 q1, d16, d15
|
||||
vmlal.s16 q2, d17, d15
|
||||
|
||||
; s1 = cospi_30_64 * x0 - cospi_2_64 * x1
|
||||
vmlsl.s16 q3, d16, d14
|
||||
vmlsl.s16 q4, d17, d14
|
||||
|
||||
; cospi_18_64 * x4
|
||||
vmull.s16 q5, d22, d30
|
||||
vmull.s16 q6, d23, d30
|
||||
|
||||
; cospi_14_64 * x4
|
||||
vmull.s16 q7, d22, d31
|
||||
vmull.s16 q8, d23, d31
|
||||
|
||||
; s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
|
||||
vmlal.s16 q5, d24, d31
|
||||
vmlal.s16 q6, d25, d31
|
||||
|
||||
; s5 = cospi_14_64 * x4 - cospi_18_64 * x5
|
||||
vmlsl.s16 q7, d24, d30
|
||||
vmlsl.s16 q8, d25, d30
|
||||
|
||||
; (s0 + s4)
|
||||
vadd.s32 q11, q1, q5
|
||||
vadd.s32 q12, q2, q6
|
||||
|
||||
vdup.16 d0, r2 ; duplicate cospi_10_64
|
||||
vdup.16 d1, r3 ; duplicate cospi_22_64
|
||||
|
||||
; (s0 - s4)
|
||||
vsub.s32 q1, q1, q5
|
||||
vsub.s32 q2, q2, q6
|
||||
|
||||
; x0 = dct_const_round_shift(s0 + s4);
|
||||
vqrshrn.s32 d22, q11, #14 ; >> 14
|
||||
vqrshrn.s32 d23, q12, #14 ; >> 14
|
||||
|
||||
; (s1 + s5)
|
||||
vadd.s32 q12, q3, q7
|
||||
vadd.s32 q15, q4, q8
|
||||
|
||||
; (s1 - s5)
|
||||
vsub.s32 q3, q3, q7
|
||||
vsub.s32 q4, q4, q8
|
||||
|
||||
; x4 = dct_const_round_shift(s0 - s4);
|
||||
vqrshrn.s32 d2, q1, #14 ; >> 14
|
||||
vqrshrn.s32 d3, q2, #14 ; >> 14
|
||||
|
||||
; x1 = dct_const_round_shift(s1 + s5);
|
||||
vqrshrn.s32 d24, q12, #14 ; >> 14
|
||||
vqrshrn.s32 d25, q15, #14 ; >> 14
|
||||
|
||||
; x5 = dct_const_round_shift(s1 - s5);
|
||||
vqrshrn.s32 d6, q3, #14 ; >> 14
|
||||
vqrshrn.s32 d7, q4, #14 ; >> 14
|
||||
|
||||
; cospi_10_64 * x2
|
||||
vmull.s16 q4, d26, d0
|
||||
vmull.s16 q5, d27, d0
|
||||
|
||||
; cospi_22_64 * x2
|
||||
vmull.s16 q2, d26, d1
|
||||
vmull.s16 q6, d27, d1
|
||||
|
||||
vdup.16 d30, r6 ; duplicate cospi_26_64
|
||||
vdup.16 d31, r7 ; duplicate cospi_6_64
|
||||
|
||||
; s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
|
||||
vmlal.s16 q4, d20, d1
|
||||
vmlal.s16 q5, d21, d1
|
||||
|
||||
; s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
|
||||
vmlsl.s16 q2, d20, d0
|
||||
vmlsl.s16 q6, d21, d0
|
||||
|
||||
; cospi_26_64 * x6
|
||||
vmull.s16 q0, d18, d30
|
||||
vmull.s16 q13, d19, d30
|
||||
|
||||
; s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
|
||||
vmlal.s16 q0, d28, d31
|
||||
vmlal.s16 q13, d29, d31
|
||||
|
||||
; cospi_6_64 * x6
|
||||
vmull.s16 q10, d18, d31
|
||||
vmull.s16 q9, d19, d31
|
||||
|
||||
; s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
|
||||
vmlsl.s16 q10, d28, d30
|
||||
vmlsl.s16 q9, d29, d30
|
||||
|
||||
; (s3 + s7)
|
||||
vadd.s32 q14, q2, q10
|
||||
vadd.s32 q15, q6, q9
|
||||
|
||||
; (s3 - s7)
|
||||
vsub.s32 q2, q2, q10
|
||||
vsub.s32 q6, q6, q9
|
||||
|
||||
; x3 = dct_const_round_shift(s3 + s7);
|
||||
vqrshrn.s32 d28, q14, #14 ; >> 14
|
||||
vqrshrn.s32 d29, q15, #14 ; >> 14
|
||||
|
||||
; x7 = dct_const_round_shift(s3 - s7);
|
||||
vqrshrn.s32 d4, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d5, q6, #14 ; >> 14
|
||||
|
||||
; (s2 + s6)
|
||||
vadd.s32 q9, q4, q0
|
||||
vadd.s32 q10, q5, q13
|
||||
|
||||
; (s2 - s6)
|
||||
vsub.s32 q4, q4, q0
|
||||
vsub.s32 q5, q5, q13
|
||||
|
||||
vdup.16 d30, r8 ; duplicate cospi_8_64
|
||||
vdup.16 d31, r9 ; duplicate cospi_24_64
|
||||
|
||||
; x2 = dct_const_round_shift(s2 + s6);
|
||||
vqrshrn.s32 d18, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d19, q10, #14 ; >> 14
|
||||
|
||||
; x6 = dct_const_round_shift(s2 - s6);
|
||||
vqrshrn.s32 d8, q4, #14 ; >> 14
|
||||
vqrshrn.s32 d9, q5, #14 ; >> 14
|
||||
|
||||
; cospi_8_64 * x4
|
||||
vmull.s16 q5, d2, d30
|
||||
vmull.s16 q6, d3, d30
|
||||
|
||||
; cospi_24_64 * x4
|
||||
vmull.s16 q7, d2, d31
|
||||
vmull.s16 q0, d3, d31
|
||||
|
||||
; s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
|
||||
vmlal.s16 q5, d6, d31
|
||||
vmlal.s16 q6, d7, d31
|
||||
|
||||
; s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
|
||||
vmlsl.s16 q7, d6, d30
|
||||
vmlsl.s16 q0, d7, d30
|
||||
|
||||
; cospi_8_64 * x7
|
||||
vmull.s16 q1, d4, d30
|
||||
vmull.s16 q3, d5, d30
|
||||
|
||||
; cospi_24_64 * x7
|
||||
vmull.s16 q10, d4, d31
|
||||
vmull.s16 q2, d5, d31
|
||||
|
||||
; s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
|
||||
vmlsl.s16 q1, d8, d31
|
||||
vmlsl.s16 q3, d9, d31
|
||||
|
||||
; s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
|
||||
vmlal.s16 q10, d8, d30
|
||||
vmlal.s16 q2, d9, d30
|
||||
|
||||
vadd.s16 q8, q11, q9 ; x0 = s0 + s2;
|
||||
|
||||
vsub.s16 q11, q11, q9 ; x2 = s0 - s2;
|
||||
|
||||
vadd.s16 q4, q12, q14 ; x1 = s1 + s3;
|
||||
|
||||
vsub.s16 q12, q12, q14 ; x3 = s1 - s3;
|
||||
|
||||
; (s4 + s6)
|
||||
vadd.s32 q14, q5, q1
|
||||
vadd.s32 q15, q6, q3
|
||||
|
||||
; (s4 - s6)
|
||||
vsub.s32 q5, q5, q1
|
||||
vsub.s32 q6, q6, q3
|
||||
|
||||
; x4 = dct_const_round_shift(s4 + s6);
|
||||
vqrshrn.s32 d18, q14, #14 ; >> 14
|
||||
vqrshrn.s32 d19, q15, #14 ; >> 14
|
||||
|
||||
; x6 = dct_const_round_shift(s4 - s6);
|
||||
vqrshrn.s32 d10, q5, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q6, #14 ; >> 14
|
||||
|
||||
; (s5 + s7)
|
||||
vadd.s32 q1, q7, q10
|
||||
vadd.s32 q3, q0, q2
|
||||
|
||||
; (s5 - s7))
|
||||
vsub.s32 q7, q7, q10
|
||||
vsub.s32 q0, q0, q2
|
||||
|
||||
; x5 = dct_const_round_shift(s5 + s7);
|
||||
vqrshrn.s32 d28, q1, #14 ; >> 14
|
||||
vqrshrn.s32 d29, q3, #14 ; >> 14
|
||||
|
||||
; x7 = dct_const_round_shift(s5 - s7);
|
||||
vqrshrn.s32 d14, q7, #14 ; >> 14
|
||||
vqrshrn.s32 d15, q0, #14 ; >> 14
|
||||
|
||||
vdup.16 d30, r12 ; duplicate cospi_16_64
|
||||
|
||||
; cospi_16_64 * x2
|
||||
vmull.s16 q2, d22, d30
|
||||
vmull.s16 q3, d23, d30
|
||||
|
||||
; cospi_6_64 * x6
|
||||
vmull.s16 q13, d22, d30
|
||||
vmull.s16 q1, d23, d30
|
||||
|
||||
; cospi_16_64 * x2 + cospi_16_64 * x3;
|
||||
vmlal.s16 q2, d24, d30
|
||||
vmlal.s16 q3, d25, d30
|
||||
|
||||
; cospi_16_64 * x2 - cospi_16_64 * x3;
|
||||
vmlsl.s16 q13, d24, d30
|
||||
vmlsl.s16 q1, d25, d30
|
||||
|
||||
; x2 = dct_const_round_shift(s2);
|
||||
vqrshrn.s32 d4, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d5, q3, #14 ; >> 14
|
||||
|
||||
;x3 = dct_const_round_shift(s3);
|
||||
vqrshrn.s32 d24, q13, #14 ; >> 14
|
||||
vqrshrn.s32 d25, q1, #14 ; >> 14
|
||||
|
||||
; cospi_16_64 * x6
|
||||
vmull.s16 q13, d10, d30
|
||||
vmull.s16 q1, d11, d30
|
||||
|
||||
; cospi_6_64 * x6
|
||||
vmull.s16 q11, d10, d30
|
||||
vmull.s16 q0, d11, d30
|
||||
|
||||
; cospi_16_64 * x6 + cospi_16_64 * x7;
|
||||
vmlal.s16 q13, d14, d30
|
||||
vmlal.s16 q1, d15, d30
|
||||
|
||||
; cospi_16_64 * x6 - cospi_16_64 * x7;
|
||||
vmlsl.s16 q11, d14, d30
|
||||
vmlsl.s16 q0, d15, d30
|
||||
|
||||
; x6 = dct_const_round_shift(s6);
|
||||
vqrshrn.s32 d20, q13, #14 ; >> 14
|
||||
vqrshrn.s32 d21, q1, #14 ; >> 14
|
||||
|
||||
;x7 = dct_const_round_shift(s7);
|
||||
vqrshrn.s32 d12, q11, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q0, #14 ; >> 14
|
||||
|
||||
vdup.16 q5, r10 ; duplicate 0
|
||||
|
||||
vsub.s16 q9, q5, q9 ; output[1] = -x4;
|
||||
vsub.s16 q11, q5, q2 ; output[3] = -x2;
|
||||
vsub.s16 q13, q5, q6 ; output[5] = -x7;
|
||||
vsub.s16 q15, q5, q4 ; output[7] = -x1;
|
||||
MEND
|
||||
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void vp9_short_iht8x8_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride, int tx_type)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride
|
||||
; r3 int tx_type)
|
||||
; This function will only handle tx_type of 1,2,3.
|
||||
|vp9_short_iht8x8_add_neon| PROC
|
||||
|
||||
; load the inputs into d16-d19
|
||||
vld1.s16 {q8,q9}, [r0]!
|
||||
vld1.s16 {q10,q11}, [r0]!
|
||||
vld1.s16 {q12,q13}, [r0]!
|
||||
vld1.s16 {q14,q15}, [r0]!
|
||||
|
||||
push {r0-r10}
|
||||
|
||||
; transpose the input data
|
||||
TRANSPOSE8X8
|
||||
|
||||
; decide the type of transform
|
||||
cmp r3, #2
|
||||
beq idct_iadst
|
||||
cmp r3, #3
|
||||
beq iadst_iadst
|
||||
|
||||
iadst_idct
|
||||
; generate IDCT constants
|
||||
GENERATE_IDCT_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IDCT8x8_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE8X8
|
||||
|
||||
; generate IADST constants
|
||||
GENERATE_IADST_CONSTANTS
|
||||
|
||||
; then transform columns
|
||||
IADST8X8_1D
|
||||
|
||||
b end_vp9_short_iht8x8_add_neon
|
||||
|
||||
idct_iadst
|
||||
; generate IADST constants
|
||||
GENERATE_IADST_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IADST8X8_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE8X8
|
||||
|
||||
; generate IDCT constants
|
||||
GENERATE_IDCT_CONSTANTS
|
||||
|
||||
; then transform columns
|
||||
IDCT8x8_1D
|
||||
|
||||
b end_vp9_short_iht8x8_add_neon
|
||||
|
||||
iadst_iadst
|
||||
; generate IADST constants
|
||||
GENERATE_IADST_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IADST8X8_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE8X8
|
||||
|
||||
; then transform columns
|
||||
IADST8X8_1D
|
||||
|
||||
end_vp9_short_iht8x8_add_neon
|
||||
pop {r0-r10}
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 5)
|
||||
vrshr.s16 q8, q8, #5
|
||||
vrshr.s16 q9, q9, #5
|
||||
vrshr.s16 q10, q10, #5
|
||||
vrshr.s16 q11, q11, #5
|
||||
vrshr.s16 q12, q12, #5
|
||||
vrshr.s16 q13, q13, #5
|
||||
vrshr.s16 q14, q14, #5
|
||||
vrshr.s16 q15, q15, #5
|
||||
|
||||
; save dest pointer
|
||||
mov r0, r1
|
||||
|
||||
; load destination data
|
||||
vld1.64 {d0}, [r1], r2
|
||||
vld1.64 {d1}, [r1], r2
|
||||
vld1.64 {d2}, [r1], r2
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r2
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r2
|
||||
vld1.64 {d7}, [r1]
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]
|
||||
vaddw.u8 q8, q8, d0
|
||||
vaddw.u8 q9, q9, d1
|
||||
vaddw.u8 q10, q10, d2
|
||||
vaddw.u8 q11, q11, d3
|
||||
vaddw.u8 q12, q12, d4
|
||||
vaddw.u8 q13, q13, d5
|
||||
vaddw.u8 q14, q14, d6
|
||||
vaddw.u8 q15, q15, d7
|
||||
|
||||
; clip_pixel
|
||||
vqmovun.s16 d0, q8
|
||||
vqmovun.s16 d1, q9
|
||||
vqmovun.s16 d2, q10
|
||||
vqmovun.s16 d3, q11
|
||||
vqmovun.s16 d4, q12
|
||||
vqmovun.s16 d5, q13
|
||||
vqmovun.s16 d6, q14
|
||||
vqmovun.s16 d7, q15
|
||||
|
||||
; store the data
|
||||
vst1.64 {d0}, [r0], r2
|
||||
vst1.64 {d1}, [r0], r2
|
||||
vst1.64 {d2}, [r0], r2
|
||||
vst1.64 {d3}, [r0], r2
|
||||
vst1.64 {d4}, [r0], r2
|
||||
vst1.64 {d5}, [r0], r2
|
||||
vst1.64 {d6}, [r0], r2
|
||||
vst1.64 {d7}, [r0], r2
|
||||
bx lr
|
||||
ENDP ; |vp9_short_iht8x8_add_neon|
|
||||
|
||||
END
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_onyxc_int.h"
|
||||
|
||||
void vp9_machine_specific_config(VP9_COMMON *ctx) {
|
||||
void vp9_machine_specific_config(VP9_COMMON *cm) {
|
||||
(void)cm;
|
||||
vp9_rtcd();
|
||||
}
|
||||
|
||||
@@ -31,40 +31,30 @@ void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi) {
|
||||
vpx_memset(&mi[i * stride], 0, sizeof(MODE_INFO));
|
||||
}
|
||||
|
||||
void vp9_update_mode_info_in_image(VP9_COMMON *cm, MODE_INFO *mi) {
|
||||
int i, j;
|
||||
|
||||
// For each in image mode_info element set the in image flag to 1
|
||||
for (i = 0; i < cm->mi_rows; i++) {
|
||||
MODE_INFO *ptr = mi;
|
||||
for (j = 0; j < cm->mi_cols; j++) {
|
||||
ptr->mbmi.mb_in_image = 1;
|
||||
ptr++; // Next element in the row
|
||||
}
|
||||
|
||||
// Step over border element at start of next row
|
||||
mi += cm->mode_info_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_free_frame_buffers(VP9_COMMON *oci) {
|
||||
void vp9_free_frame_buffers(VP9_COMMON *cm) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++)
|
||||
vp9_free_frame_buffer(&oci->yv12_fb[i]);
|
||||
vp9_free_frame_buffer(&cm->yv12_fb[i]);
|
||||
|
||||
vp9_free_frame_buffer(&oci->post_proc_buffer);
|
||||
vp9_free_frame_buffer(&cm->post_proc_buffer);
|
||||
|
||||
vpx_free(oci->mip);
|
||||
vpx_free(oci->prev_mip);
|
||||
vpx_free(oci->above_seg_context);
|
||||
vpx_free(cm->mip);
|
||||
vpx_free(cm->prev_mip);
|
||||
vpx_free(cm->above_seg_context);
|
||||
vpx_free(cm->last_frame_seg_map);
|
||||
vpx_free(cm->mi_grid_base);
|
||||
vpx_free(cm->prev_mi_grid_base);
|
||||
|
||||
vpx_free(oci->above_context[0]);
|
||||
vpx_free(cm->above_context[0]);
|
||||
for (i = 0; i < MAX_MB_PLANE; i++)
|
||||
oci->above_context[i] = 0;
|
||||
oci->mip = NULL;
|
||||
oci->prev_mip = NULL;
|
||||
oci->above_seg_context = NULL;
|
||||
cm->above_context[i] = 0;
|
||||
cm->mip = NULL;
|
||||
cm->prev_mip = NULL;
|
||||
cm->above_seg_context = NULL;
|
||||
cm->last_frame_seg_map = NULL;
|
||||
cm->mi_grid_base = NULL;
|
||||
cm->prev_mi_grid_base = NULL;
|
||||
}
|
||||
|
||||
static void set_mb_mi(VP9_COMMON *cm, int aligned_width, int aligned_height) {
|
||||
@@ -72,112 +62,125 @@ static void set_mb_mi(VP9_COMMON *cm, int aligned_width, int aligned_height) {
|
||||
cm->mb_rows = (aligned_height + 8) >> 4;
|
||||
cm->MBs = cm->mb_rows * cm->mb_cols;
|
||||
|
||||
cm->mi_cols = aligned_width >> LOG2_MI_SIZE;
|
||||
cm->mi_rows = aligned_height >> LOG2_MI_SIZE;
|
||||
cm->mi_cols = aligned_width >> MI_SIZE_LOG2;
|
||||
cm->mi_rows = aligned_height >> MI_SIZE_LOG2;
|
||||
cm->mode_info_stride = cm->mi_cols + MI_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
static void setup_mi(VP9_COMMON *cm) {
|
||||
cm->mi = cm->mip + cm->mode_info_stride + 1;
|
||||
cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;
|
||||
cm->mi_grid_visible = cm->mi_grid_base + cm->mode_info_stride + 1;
|
||||
cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1;
|
||||
|
||||
vpx_memset(cm->mip, 0,
|
||||
cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO));
|
||||
|
||||
vp9_update_mode_info_border(cm, cm->mip);
|
||||
vp9_update_mode_info_in_image(cm, cm->mi);
|
||||
vpx_memset(cm->mi_grid_base, 0,
|
||||
cm->mode_info_stride * (cm->mi_rows + 1) *
|
||||
sizeof(*cm->mi_grid_base));
|
||||
|
||||
vp9_update_mode_info_border(cm, cm->mip);
|
||||
vp9_update_mode_info_border(cm, cm->prev_mip);
|
||||
vp9_update_mode_info_in_image(cm, cm->prev_mi);
|
||||
}
|
||||
|
||||
int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
|
||||
int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
|
||||
int i, mi_cols;
|
||||
|
||||
const int aligned_width = ALIGN_POWER_OF_TWO(width, LOG2_MI_SIZE);
|
||||
const int aligned_height = ALIGN_POWER_OF_TWO(height, LOG2_MI_SIZE);
|
||||
const int ss_x = oci->subsampling_x;
|
||||
const int ss_y = oci->subsampling_y;
|
||||
const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
|
||||
const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
|
||||
const int ss_x = cm->subsampling_x;
|
||||
const int ss_y = cm->subsampling_y;
|
||||
int mi_size;
|
||||
|
||||
vp9_free_frame_buffers(oci);
|
||||
vp9_free_frame_buffers(cm);
|
||||
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++) {
|
||||
oci->fb_idx_ref_cnt[i] = 0;
|
||||
if (vp9_alloc_frame_buffer(&oci->yv12_fb[i], width, height, ss_x, ss_y,
|
||||
cm->fb_idx_ref_cnt[i] = 0;
|
||||
if (vp9_alloc_frame_buffer(&cm->yv12_fb[i], width, height, ss_x, ss_y,
|
||||
VP9BORDERINPIXELS) < 0)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
oci->new_fb_idx = NUM_YV12_BUFFERS - 1;
|
||||
oci->fb_idx_ref_cnt[oci->new_fb_idx] = 1;
|
||||
cm->new_fb_idx = NUM_YV12_BUFFERS - 1;
|
||||
cm->fb_idx_ref_cnt[cm->new_fb_idx] = 1;
|
||||
|
||||
for (i = 0; i < ALLOWED_REFS_PER_FRAME; i++)
|
||||
oci->active_ref_idx[i] = i;
|
||||
cm->active_ref_idx[i] = i;
|
||||
|
||||
for (i = 0; i < NUM_REF_FRAMES; i++) {
|
||||
oci->ref_frame_map[i] = i;
|
||||
oci->fb_idx_ref_cnt[i] = 1;
|
||||
cm->ref_frame_map[i] = i;
|
||||
cm->fb_idx_ref_cnt[i] = 1;
|
||||
}
|
||||
|
||||
if (vp9_alloc_frame_buffer(&oci->post_proc_buffer, width, height, ss_x, ss_y,
|
||||
if (vp9_alloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
|
||||
VP9BORDERINPIXELS) < 0)
|
||||
goto fail;
|
||||
|
||||
set_mb_mi(oci, aligned_width, aligned_height);
|
||||
set_mb_mi(cm, aligned_width, aligned_height);
|
||||
|
||||
// Allocation
|
||||
mi_size = oci->mode_info_stride * (oci->mi_rows + MI_BLOCK_SIZE);
|
||||
mi_size = cm->mode_info_stride * (cm->mi_rows + MI_BLOCK_SIZE);
|
||||
|
||||
oci->mip = vpx_calloc(mi_size, sizeof(MODE_INFO));
|
||||
if (!oci->mip)
|
||||
cm->mip = vpx_calloc(mi_size, sizeof(MODE_INFO));
|
||||
if (!cm->mip)
|
||||
goto fail;
|
||||
|
||||
oci->prev_mip = vpx_calloc(mi_size, sizeof(MODE_INFO));
|
||||
if (!oci->prev_mip)
|
||||
cm->prev_mip = vpx_calloc(mi_size, sizeof(MODE_INFO));
|
||||
if (!cm->prev_mip)
|
||||
goto fail;
|
||||
|
||||
setup_mi(oci);
|
||||
cm->mi_grid_base = vpx_calloc(mi_size, sizeof(*cm->mi_grid_base));
|
||||
if (!cm->mi_grid_base)
|
||||
goto fail;
|
||||
|
||||
cm->prev_mi_grid_base = vpx_calloc(mi_size, sizeof(*cm->prev_mi_grid_base));
|
||||
if (!cm->prev_mi_grid_base)
|
||||
goto fail;
|
||||
|
||||
setup_mi(cm);
|
||||
|
||||
// FIXME(jkoleszar): allocate subsampled arrays for U/V once subsampling
|
||||
// information is exposed at this level
|
||||
mi_cols = mi_cols_aligned_to_sb(oci->mi_cols);
|
||||
mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
|
||||
|
||||
// 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
|
||||
// block where mi unit size is 8x8.
|
||||
# if CONFIG_ALPHA
|
||||
oci->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * 8 * mi_cols, 1);
|
||||
#else
|
||||
oci->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * 6 * mi_cols, 1);
|
||||
#endif
|
||||
if (!oci->above_context[0])
|
||||
cm->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * MAX_MB_PLANE *
|
||||
(2 * mi_cols), 1);
|
||||
if (!cm->above_context[0])
|
||||
goto fail;
|
||||
|
||||
oci->above_seg_context = vpx_calloc(sizeof(PARTITION_CONTEXT) * mi_cols, 1);
|
||||
if (!oci->above_seg_context)
|
||||
cm->above_seg_context = vpx_calloc(sizeof(PARTITION_CONTEXT) * mi_cols, 1);
|
||||
if (!cm->above_seg_context)
|
||||
goto fail;
|
||||
|
||||
// Create the segmentation map structure and set to 0.
|
||||
cm->last_frame_seg_map = vpx_calloc(cm->mi_rows * cm->mi_cols, 1);
|
||||
if (!cm->last_frame_seg_map)
|
||||
goto fail;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
vp9_free_frame_buffers(oci);
|
||||
vp9_free_frame_buffers(cm);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void vp9_create_common(VP9_COMMON *oci) {
|
||||
vp9_machine_specific_config(oci);
|
||||
void vp9_create_common(VP9_COMMON *cm) {
|
||||
vp9_machine_specific_config(cm);
|
||||
|
||||
vp9_init_mbmode_probs(oci);
|
||||
vp9_init_mbmode_probs(cm);
|
||||
|
||||
oci->tx_mode = ONLY_4X4;
|
||||
oci->comp_pred_mode = HYBRID_PREDICTION;
|
||||
cm->tx_mode = ONLY_4X4;
|
||||
cm->comp_pred_mode = HYBRID_PREDICTION;
|
||||
|
||||
// Initialize reference frame sign bias structure to defaults
|
||||
vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
|
||||
vpx_memset(cm->ref_frame_sign_bias, 0, sizeof(cm->ref_frame_sign_bias));
|
||||
}
|
||||
|
||||
void vp9_remove_common(VP9_COMMON *oci) {
|
||||
vp9_free_frame_buffers(oci);
|
||||
void vp9_remove_common(VP9_COMMON *cm) {
|
||||
vp9_free_frame_buffers(cm);
|
||||
}
|
||||
|
||||
void vp9_initialize_common() {
|
||||
@@ -188,8 +191,8 @@ void vp9_initialize_common() {
|
||||
|
||||
void vp9_update_frame_size(VP9_COMMON *cm) {
|
||||
int i, mi_cols;
|
||||
const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, LOG2_MI_SIZE);
|
||||
const int aligned_height = ALIGN_POWER_OF_TWO(cm->height, LOG2_MI_SIZE);
|
||||
const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, MI_SIZE_LOG2);
|
||||
const int aligned_height = ALIGN_POWER_OF_TWO(cm->height, MI_SIZE_LOG2);
|
||||
|
||||
set_mb_mi(cm, aligned_width, aligned_height);
|
||||
setup_mi(cm);
|
||||
@@ -198,4 +201,8 @@ void vp9_update_frame_size(VP9_COMMON *cm) {
|
||||
for (i = 1; i < MAX_MB_PLANE; i++)
|
||||
cm->above_context[i] =
|
||||
cm->above_context[0] + i * sizeof(ENTROPY_CONTEXT) * 2 * mi_cols;
|
||||
|
||||
// Initialize the previous frame segment map to 0.
|
||||
if (cm->last_frame_seg_map)
|
||||
vpx_memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
|
||||
}
|
||||
|
||||
@@ -16,14 +16,13 @@
|
||||
|
||||
void vp9_initialize_common();
|
||||
|
||||
void vp9_update_mode_info_border(VP9_COMMON *cpi, MODE_INFO *mi);
|
||||
void vp9_update_mode_info_in_image(VP9_COMMON *cpi, MODE_INFO *mi);
|
||||
void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi);
|
||||
|
||||
void vp9_create_common(VP9_COMMON *oci);
|
||||
void vp9_remove_common(VP9_COMMON *oci);
|
||||
void vp9_create_common(VP9_COMMON *cm);
|
||||
void vp9_remove_common(VP9_COMMON *cm);
|
||||
|
||||
int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height);
|
||||
void vp9_free_frame_buffers(VP9_COMMON *oci);
|
||||
int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height);
|
||||
void vp9_free_frame_buffers(VP9_COMMON *cm);
|
||||
|
||||
|
||||
void vp9_update_frame_size(VP9_COMMON *cm);
|
||||
|
||||
@@ -19,9 +19,9 @@
|
||||
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vp9/common/vp9_common_data.h"
|
||||
#include "vp9/common/vp9_convolve.h"
|
||||
#include "vp9/common/vp9_enums.h"
|
||||
#include "vp9/common/vp9_mv.h"
|
||||
#include "vp9/common/vp9_scale.h"
|
||||
#include "vp9/common/vp9_seg_common.h"
|
||||
#include "vp9/common/vp9_treecoder.h"
|
||||
|
||||
@@ -71,7 +71,7 @@ typedef enum {
|
||||
D135_PRED, // Directional 135 deg = 180 - 45
|
||||
D117_PRED, // Directional 117 deg = 180 - 63
|
||||
D153_PRED, // Directional 153 deg = 180 - 27
|
||||
D27_PRED, // Directional 27 deg = round(arctan(1/2) * 180/pi)
|
||||
D207_PRED, // Directional 207 deg = 180 + 27
|
||||
D63_PRED, // Directional 63 deg = round(arctan(2/1) * 180/pi)
|
||||
TM_PRED, // True-motion
|
||||
NEARESTMV,
|
||||
@@ -89,9 +89,9 @@ static INLINE int is_inter_mode(MB_PREDICTION_MODE mode) {
|
||||
return mode >= NEARESTMV && mode <= NEWMV;
|
||||
}
|
||||
|
||||
#define VP9_INTRA_MODES (TM_PRED + 1)
|
||||
#define INTRA_MODES (TM_PRED + 1)
|
||||
|
||||
#define VP9_INTER_MODES (1 + NEWMV - NEARESTMV)
|
||||
#define INTER_MODES (1 + NEWMV - NEARESTMV)
|
||||
|
||||
static INLINE int inter_mode_offset(MB_PREDICTION_MODE mode) {
|
||||
return (mode - NEARESTMV);
|
||||
@@ -115,45 +115,41 @@ typedef enum {
|
||||
MAX_REF_FRAMES = 4
|
||||
} MV_REFERENCE_FRAME;
|
||||
|
||||
static INLINE int b_width_log2(BLOCK_SIZE_TYPE sb_type) {
|
||||
static INLINE int b_width_log2(BLOCK_SIZE sb_type) {
|
||||
return b_width_log2_lookup[sb_type];
|
||||
}
|
||||
static INLINE int b_height_log2(BLOCK_SIZE_TYPE sb_type) {
|
||||
static INLINE int b_height_log2(BLOCK_SIZE sb_type) {
|
||||
return b_height_log2_lookup[sb_type];
|
||||
}
|
||||
|
||||
static INLINE int mi_width_log2(BLOCK_SIZE_TYPE sb_type) {
|
||||
static INLINE int mi_width_log2(BLOCK_SIZE sb_type) {
|
||||
return mi_width_log2_lookup[sb_type];
|
||||
}
|
||||
|
||||
static INLINE int mi_height_log2(BLOCK_SIZE_TYPE sb_type) {
|
||||
static INLINE int mi_height_log2(BLOCK_SIZE sb_type) {
|
||||
return mi_height_log2_lookup[sb_type];
|
||||
}
|
||||
|
||||
// This structure now relates to 8x8 block regions.
|
||||
typedef struct {
|
||||
MB_PREDICTION_MODE mode, uv_mode;
|
||||
MV_REFERENCE_FRAME ref_frame[2];
|
||||
TX_SIZE txfm_size;
|
||||
int_mv mv[2]; // for each reference frame used
|
||||
TX_SIZE tx_size;
|
||||
int_mv mv[2]; // for each reference frame used
|
||||
int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
|
||||
int_mv best_mv, best_second_mv;
|
||||
|
||||
uint8_t mb_mode_context[MAX_REF_FRAMES];
|
||||
uint8_t mode_context[MAX_REF_FRAMES];
|
||||
|
||||
unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
|
||||
unsigned char segment_id; // Segment id for current frame
|
||||
unsigned char skip_coeff; // 0=need to decode coeffs, 1=no coefficients
|
||||
unsigned char segment_id; // Segment id for this block.
|
||||
|
||||
// Flags used for prediction status of various bistream signals
|
||||
// Flags used for prediction status of various bit-stream signals
|
||||
unsigned char seg_id_predicted;
|
||||
|
||||
// Indicates if the mb is part of the image (1) vs border (0)
|
||||
// This can be useful in determining whether the MB provides
|
||||
// a valid predictor
|
||||
unsigned char mb_in_image;
|
||||
|
||||
INTERPOLATIONFILTERTYPE interp_filter;
|
||||
|
||||
BLOCK_SIZE_TYPE sb_type;
|
||||
BLOCK_SIZE sb_type;
|
||||
} MB_MODE_INFO;
|
||||
|
||||
typedef struct {
|
||||
@@ -161,36 +157,19 @@ typedef struct {
|
||||
union b_mode_info bmi[4];
|
||||
} MODE_INFO;
|
||||
|
||||
static int is_inter_block(const MB_MODE_INFO *mbmi) {
|
||||
static INLINE int is_inter_block(const MB_MODE_INFO *mbmi) {
|
||||
return mbmi->ref_frame[0] > INTRA_FRAME;
|
||||
}
|
||||
|
||||
static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) {
|
||||
return mbmi->ref_frame[1] > INTRA_FRAME;
|
||||
}
|
||||
|
||||
enum mv_precision {
|
||||
MV_PRECISION_Q3,
|
||||
MV_PRECISION_Q4
|
||||
};
|
||||
|
||||
#define VP9_REF_SCALE_SHIFT 14
|
||||
#define VP9_REF_NO_SCALE (1 << VP9_REF_SCALE_SHIFT)
|
||||
|
||||
struct scale_factors {
|
||||
int x_scale_fp; // horizontal fixed point scale factor
|
||||
int y_scale_fp; // vertical fixed point scale factor
|
||||
int x_offset_q4;
|
||||
int x_step_q4;
|
||||
int y_offset_q4;
|
||||
int y_step_q4;
|
||||
|
||||
int (*scale_value_x)(int val, const struct scale_factors *scale);
|
||||
int (*scale_value_y)(int val, const struct scale_factors *scale);
|
||||
void (*set_scaled_offsets)(struct scale_factors *scale, int row, int col);
|
||||
MV32 (*scale_mv_q3_to_q4)(const MV *mv, const struct scale_factors *scale);
|
||||
MV32 (*scale_mv_q4)(const MV *mv, const struct scale_factors *scale);
|
||||
|
||||
convolve_fn_t predict[2][2][2]; // horiz, vert, avg
|
||||
};
|
||||
|
||||
#if CONFIG_ALPHA
|
||||
enum { MAX_MB_PLANE = 4 };
|
||||
#else
|
||||
@@ -216,45 +195,27 @@ struct macroblockd_plane {
|
||||
ENTROPY_CONTEXT *left_context;
|
||||
};
|
||||
|
||||
#define BLOCK_OFFSET(x, i, n) ((x) + (i) * (n))
|
||||
|
||||
#define MAX_REF_LF_DELTAS 4
|
||||
#define MAX_MODE_LF_DELTAS 2
|
||||
|
||||
struct loopfilter {
|
||||
int filter_level;
|
||||
|
||||
int sharpness_level;
|
||||
int last_sharpness_level;
|
||||
|
||||
uint8_t mode_ref_delta_enabled;
|
||||
uint8_t mode_ref_delta_update;
|
||||
|
||||
// 0 = Intra, Last, GF, ARF
|
||||
signed char ref_deltas[MAX_REF_LF_DELTAS];
|
||||
signed char last_ref_deltas[MAX_REF_LF_DELTAS];
|
||||
|
||||
// 0 = ZERO_MV, MV
|
||||
signed char mode_deltas[MAX_MODE_LF_DELTAS];
|
||||
signed char last_mode_deltas[MAX_MODE_LF_DELTAS];
|
||||
};
|
||||
#define BLOCK_OFFSET(x, i) ((x) + (i) * 16)
|
||||
|
||||
typedef struct macroblockd {
|
||||
struct macroblockd_plane plane[MAX_MB_PLANE];
|
||||
|
||||
struct scale_factors scale_factor[2];
|
||||
|
||||
MODE_INFO *prev_mode_info_context;
|
||||
MODE_INFO *mode_info_context;
|
||||
MODE_INFO *last_mi;
|
||||
MODE_INFO *this_mi;
|
||||
int mode_info_stride;
|
||||
|
||||
MODE_INFO *mic_stream_ptr;
|
||||
|
||||
// A NULL indicates that the 8x8 is not part of the image
|
||||
MODE_INFO **mi_8x8;
|
||||
MODE_INFO **prev_mi_8x8;
|
||||
|
||||
int up_available;
|
||||
int left_available;
|
||||
int right_available;
|
||||
|
||||
struct segmentation seg;
|
||||
struct loopfilter lf;
|
||||
|
||||
// partition contexts
|
||||
PARTITION_CONTEXT *above_seg_context;
|
||||
PARTITION_CONTEXT *left_seg_context;
|
||||
@@ -286,7 +247,7 @@ typedef struct macroblockd {
|
||||
|
||||
} MACROBLOCKD;
|
||||
|
||||
static INLINE unsigned char *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) {
|
||||
static INLINE unsigned char *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE subsize) {
|
||||
switch (subsize) {
|
||||
case BLOCK_64X64:
|
||||
case BLOCK_64X32:
|
||||
@@ -311,9 +272,8 @@ static INLINE unsigned char *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsi
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void update_partition_context(MACROBLOCKD *xd,
|
||||
BLOCK_SIZE_TYPE sb_type,
|
||||
BLOCK_SIZE_TYPE sb_size) {
|
||||
static INLINE void update_partition_context(MACROBLOCKD *xd, BLOCK_SIZE sb_type,
|
||||
BLOCK_SIZE sb_size) {
|
||||
const int bsl = b_width_log2(sb_size), bs = (1 << bsl) / 2;
|
||||
const int bwl = b_width_log2(sb_type);
|
||||
const int bhl = b_height_log2(sb_type);
|
||||
@@ -331,8 +291,7 @@ static INLINE void update_partition_context(MACROBLOCKD *xd,
|
||||
vpx_memset(xd->left_seg_context, pcvalue[bhl == bsl], bs);
|
||||
}
|
||||
|
||||
static INLINE int partition_plane_context(MACROBLOCKD *xd,
|
||||
BLOCK_SIZE_TYPE sb_type) {
|
||||
static INLINE int partition_plane_context(MACROBLOCKD *xd, BLOCK_SIZE sb_type) {
|
||||
int bsl = mi_width_log2(sb_type), bs = 1 << bsl;
|
||||
int above = 0, left = 0, i;
|
||||
int boffset = mi_width_log2(BLOCK_64X64) - bsl;
|
||||
@@ -352,10 +311,9 @@ static INLINE int partition_plane_context(MACROBLOCKD *xd,
|
||||
return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
|
||||
}
|
||||
|
||||
static BLOCK_SIZE_TYPE get_subsize(BLOCK_SIZE_TYPE bsize,
|
||||
PARTITION_TYPE partition) {
|
||||
BLOCK_SIZE_TYPE subsize = subsize_lookup[partition][bsize];
|
||||
assert(subsize != BLOCK_SIZE_TYPES);
|
||||
static BLOCK_SIZE get_subsize(BLOCK_SIZE bsize, PARTITION_TYPE partition) {
|
||||
const BLOCK_SIZE subsize = subsize_lookup[partition][bsize];
|
||||
assert(subsize < BLOCK_SIZES);
|
||||
return subsize;
|
||||
}
|
||||
|
||||
@@ -363,7 +321,7 @@ extern const TX_TYPE mode2txfm_map[MB_MODE_COUNT];
|
||||
|
||||
static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type,
|
||||
const MACROBLOCKD *xd, int ib) {
|
||||
const MODE_INFO *const mi = xd->mode_info_context;
|
||||
const MODE_INFO *const mi = xd->this_mi;
|
||||
const MB_MODE_INFO *const mbmi = &mi->mbmi;
|
||||
|
||||
if (plane_type != PLANE_TYPE_Y_WITH_DC ||
|
||||
@@ -378,13 +336,13 @@ static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type,
|
||||
static INLINE TX_TYPE get_tx_type_8x8(PLANE_TYPE plane_type,
|
||||
const MACROBLOCKD *xd) {
|
||||
return plane_type == PLANE_TYPE_Y_WITH_DC ?
|
||||
mode2txfm_map[xd->mode_info_context->mbmi.mode] : DCT_DCT;
|
||||
mode2txfm_map[xd->this_mi->mbmi.mode] : DCT_DCT;
|
||||
}
|
||||
|
||||
static INLINE TX_TYPE get_tx_type_16x16(PLANE_TYPE plane_type,
|
||||
const MACROBLOCKD *xd) {
|
||||
return plane_type == PLANE_TYPE_Y_WITH_DC ?
|
||||
mode2txfm_map[xd->mode_info_context->mbmi.mode] : DCT_DCT;
|
||||
mode2txfm_map[xd->this_mi->mbmi.mode] : DCT_DCT;
|
||||
}
|
||||
|
||||
static void setup_block_dptrs(MACROBLOCKD *xd, int ss_x, int ss_y) {
|
||||
@@ -404,259 +362,147 @@ static void setup_block_dptrs(MACROBLOCKD *xd, int ss_x, int ss_y) {
|
||||
|
||||
|
||||
static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) {
|
||||
return MIN(mbmi->txfm_size, max_uv_txsize_lookup[mbmi->sb_type]);
|
||||
return MIN(mbmi->tx_size, max_uv_txsize_lookup[mbmi->sb_type]);
|
||||
}
|
||||
|
||||
struct plane_block_idx {
|
||||
int plane;
|
||||
int block;
|
||||
};
|
||||
|
||||
// TODO(jkoleszar): returning a struct so it can be used in a const context,
|
||||
// expect to refactor this further later.
|
||||
static INLINE struct plane_block_idx plane_block_idx(int y_blocks,
|
||||
int b_idx) {
|
||||
const int v_offset = y_blocks * 5 / 4;
|
||||
struct plane_block_idx res;
|
||||
|
||||
if (b_idx < y_blocks) {
|
||||
res.plane = 0;
|
||||
res.block = b_idx;
|
||||
} else if (b_idx < v_offset) {
|
||||
res.plane = 1;
|
||||
res.block = b_idx - y_blocks;
|
||||
} else {
|
||||
assert(b_idx < y_blocks * 3 / 2);
|
||||
res.plane = 2;
|
||||
res.block = b_idx - v_offset;
|
||||
}
|
||||
return res;
|
||||
static BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize,
|
||||
const struct macroblockd_plane *pd) {
|
||||
BLOCK_SIZE bs = ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y];
|
||||
assert(bs < BLOCK_SIZES);
|
||||
return bs;
|
||||
}
|
||||
|
||||
static INLINE int plane_block_width(BLOCK_SIZE_TYPE bsize,
|
||||
static INLINE int plane_block_width(BLOCK_SIZE bsize,
|
||||
const struct macroblockd_plane* plane) {
|
||||
return 4 << (b_width_log2(bsize) - plane->subsampling_x);
|
||||
}
|
||||
|
||||
static INLINE int plane_block_height(BLOCK_SIZE_TYPE bsize,
|
||||
static INLINE int plane_block_height(BLOCK_SIZE bsize,
|
||||
const struct macroblockd_plane* plane) {
|
||||
return 4 << (b_height_log2(bsize) - plane->subsampling_y);
|
||||
}
|
||||
|
||||
static INLINE int plane_block_width_log2by4(
|
||||
BLOCK_SIZE_TYPE bsize, const struct macroblockd_plane* plane) {
|
||||
return (b_width_log2(bsize) - plane->subsampling_x);
|
||||
}
|
||||
|
||||
static INLINE int plane_block_height_log2by4(
|
||||
BLOCK_SIZE_TYPE bsize, const struct macroblockd_plane* plane) {
|
||||
return (b_height_log2(bsize) - plane->subsampling_y);
|
||||
}
|
||||
|
||||
typedef void (*foreach_transformed_block_visitor)(int plane, int block,
|
||||
BLOCK_SIZE_TYPE bsize,
|
||||
int ss_txfrm_size,
|
||||
BLOCK_SIZE plane_bsize,
|
||||
TX_SIZE tx_size,
|
||||
void *arg);
|
||||
|
||||
static INLINE void foreach_transformed_block_in_plane(
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, int plane,
|
||||
const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
|
||||
foreach_transformed_block_visitor visit, void *arg) {
|
||||
const int bw = b_width_log2(bsize), bh = b_height_log2(bsize);
|
||||
|
||||
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||
const MB_MODE_INFO* mbmi = &xd->this_mi->mbmi;
|
||||
// block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
|
||||
// 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
|
||||
// transform size varies per plane, look it up in a common way.
|
||||
const MB_MODE_INFO* mbmi = &xd->mode_info_context->mbmi;
|
||||
const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi)
|
||||
: mbmi->txfm_size;
|
||||
const int block_size_b = bw + bh;
|
||||
const int txfrm_size_b = tx_size * 2;
|
||||
|
||||
// subsampled size of the block
|
||||
const int ss_sum = xd->plane[plane].subsampling_x
|
||||
+ xd->plane[plane].subsampling_y;
|
||||
const int ss_block_size = block_size_b - ss_sum;
|
||||
|
||||
const int step = 1 << txfrm_size_b;
|
||||
|
||||
: mbmi->tx_size;
|
||||
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
|
||||
const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
|
||||
const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
|
||||
const int step = 1 << (tx_size << 1);
|
||||
int i;
|
||||
|
||||
assert(txfrm_size_b <= block_size_b);
|
||||
assert(txfrm_size_b <= ss_block_size);
|
||||
|
||||
// If mb_to_right_edge is < 0 we are in a situation in which
|
||||
// the current block size extends into the UMV and we won't
|
||||
// visit the sub blocks that are wholly within the UMV.
|
||||
if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) {
|
||||
int r, c;
|
||||
const int sw = bw - xd->plane[plane].subsampling_x;
|
||||
const int sh = bh - xd->plane[plane].subsampling_y;
|
||||
int max_blocks_wide = 1 << sw;
|
||||
int max_blocks_high = 1 << sh;
|
||||
|
||||
int max_blocks_wide = num_4x4_w;
|
||||
int max_blocks_high = num_4x4_h;
|
||||
|
||||
// xd->mb_to_right_edge is in units of pixels * 8. This converts
|
||||
// it to 4x4 block sizes.
|
||||
if (xd->mb_to_right_edge < 0)
|
||||
max_blocks_wide +=
|
||||
(xd->mb_to_right_edge >> (5 + xd->plane[plane].subsampling_x));
|
||||
max_blocks_wide += (xd->mb_to_right_edge >> (5 + pd->subsampling_x));
|
||||
|
||||
if (xd->mb_to_bottom_edge < 0)
|
||||
max_blocks_high +=
|
||||
(xd->mb_to_bottom_edge >> (5 + xd->plane[plane].subsampling_y));
|
||||
max_blocks_high += (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
|
||||
|
||||
i = 0;
|
||||
// Unlike the normal case - in here we have to keep track of the
|
||||
// row and column of the blocks we use so that we know if we are in
|
||||
// the unrestricted motion border.
|
||||
for (r = 0; r < (1 << sh); r += (1 << tx_size)) {
|
||||
for (c = 0; c < (1 << sw); c += (1 << tx_size)) {
|
||||
for (r = 0; r < num_4x4_h; r += (1 << tx_size)) {
|
||||
for (c = 0; c < num_4x4_w; c += (1 << tx_size)) {
|
||||
if (r < max_blocks_high && c < max_blocks_wide)
|
||||
visit(plane, i, bsize, txfrm_size_b, arg);
|
||||
visit(plane, i, plane_bsize, tx_size, arg);
|
||||
i += step;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < (1 << ss_block_size); i += step) {
|
||||
visit(plane, i, bsize, txfrm_size_b, arg);
|
||||
}
|
||||
for (i = 0; i < num_4x4_w * num_4x4_h; i += step)
|
||||
visit(plane, i, plane_bsize, tx_size, arg);
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void foreach_transformed_block(
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize,
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE bsize,
|
||||
foreach_transformed_block_visitor visit, void *arg) {
|
||||
int plane;
|
||||
|
||||
for (plane = 0; plane < MAX_MB_PLANE; plane++) {
|
||||
foreach_transformed_block_in_plane(xd, bsize, plane,
|
||||
visit, arg);
|
||||
}
|
||||
for (plane = 0; plane < MAX_MB_PLANE; plane++)
|
||||
foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
|
||||
}
|
||||
|
||||
static INLINE void foreach_transformed_block_uv(
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize,
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE bsize,
|
||||
foreach_transformed_block_visitor visit, void *arg) {
|
||||
int plane;
|
||||
|
||||
for (plane = 1; plane < MAX_MB_PLANE; plane++) {
|
||||
foreach_transformed_block_in_plane(xd, bsize, plane,
|
||||
visit, arg);
|
||||
}
|
||||
for (plane = 1; plane < MAX_MB_PLANE; plane++)
|
||||
foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
|
||||
}
|
||||
|
||||
// TODO(jkoleszar): In principle, pred_w, pred_h are unnecessary, as we could
|
||||
// calculate the subsampled BLOCK_SIZE_TYPE, but that type isn't defined for
|
||||
// sizes smaller than 16x16 yet.
|
||||
typedef void (*foreach_predicted_block_visitor)(int plane, int block,
|
||||
BLOCK_SIZE_TYPE bsize,
|
||||
int pred_w, int pred_h,
|
||||
void *arg);
|
||||
static INLINE void foreach_predicted_block_in_plane(
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, int plane,
|
||||
foreach_predicted_block_visitor visit, void *arg) {
|
||||
int i, x, y;
|
||||
|
||||
// block sizes in number of 4x4 blocks log 2 ("*_b")
|
||||
// 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
|
||||
// subsampled size of the block
|
||||
const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
|
||||
const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
|
||||
|
||||
// size of the predictor to use.
|
||||
int pred_w, pred_h;
|
||||
|
||||
if (xd->mode_info_context->mbmi.sb_type < BLOCK_8X8) {
|
||||
assert(bsize == BLOCK_8X8);
|
||||
pred_w = 0;
|
||||
pred_h = 0;
|
||||
} else {
|
||||
pred_w = bwl;
|
||||
pred_h = bhl;
|
||||
}
|
||||
assert(pred_w <= bwl);
|
||||
assert(pred_h <= bhl);
|
||||
|
||||
// visit each subblock in raster order
|
||||
i = 0;
|
||||
for (y = 0; y < 1 << bhl; y += 1 << pred_h) {
|
||||
for (x = 0; x < 1 << bwl; x += 1 << pred_w) {
|
||||
visit(plane, i, bsize, pred_w, pred_h, arg);
|
||||
i += 1 << pred_w;
|
||||
}
|
||||
i += (1 << (bwl + pred_h)) - (1 << bwl);
|
||||
}
|
||||
}
|
||||
static INLINE void foreach_predicted_block(
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize,
|
||||
foreach_predicted_block_visitor visit, void *arg) {
|
||||
int plane;
|
||||
|
||||
for (plane = 0; plane < MAX_MB_PLANE; plane++) {
|
||||
foreach_predicted_block_in_plane(xd, bsize, plane, visit, arg);
|
||||
}
|
||||
}
|
||||
static INLINE void foreach_predicted_block_uv(
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize,
|
||||
foreach_predicted_block_visitor visit, void *arg) {
|
||||
int plane;
|
||||
|
||||
for (plane = 1; plane < MAX_MB_PLANE; plane++) {
|
||||
foreach_predicted_block_in_plane(xd, bsize, plane, visit, arg);
|
||||
}
|
||||
}
|
||||
static int raster_block_offset(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
|
||||
int plane, int block, int stride) {
|
||||
const int bw = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
|
||||
const int y = 4 * (block >> bw), x = 4 * (block & ((1 << bw) - 1));
|
||||
static int raster_block_offset(BLOCK_SIZE plane_bsize,
|
||||
int raster_block, int stride) {
|
||||
const int bw = b_width_log2(plane_bsize);
|
||||
const int y = 4 * (raster_block >> bw);
|
||||
const int x = 4 * (raster_block & ((1 << bw) - 1));
|
||||
return y * stride + x;
|
||||
}
|
||||
static int16_t* raster_block_offset_int16(MACROBLOCKD *xd,
|
||||
BLOCK_SIZE_TYPE bsize,
|
||||
int plane, int block, int16_t *base) {
|
||||
const int stride = plane_block_width(bsize, &xd->plane[plane]);
|
||||
return base + raster_block_offset(xd, bsize, plane, block, stride);
|
||||
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
|
||||
int raster_block, int16_t *base) {
|
||||
const int stride = 4 << b_width_log2(plane_bsize);
|
||||
return base + raster_block_offset(plane_bsize, raster_block, stride);
|
||||
}
|
||||
static uint8_t* raster_block_offset_uint8(MACROBLOCKD *xd,
|
||||
BLOCK_SIZE_TYPE bsize,
|
||||
int plane, int block,
|
||||
uint8_t *base, int stride) {
|
||||
return base + raster_block_offset(xd, bsize, plane, block, stride);
|
||||
static uint8_t* raster_block_offset_uint8(BLOCK_SIZE plane_bsize,
|
||||
int raster_block, uint8_t *base,
|
||||
int stride) {
|
||||
return base + raster_block_offset(plane_bsize, raster_block, stride);
|
||||
}
|
||||
|
||||
static int txfrm_block_to_raster_block(MACROBLOCKD *xd,
|
||||
BLOCK_SIZE_TYPE bsize,
|
||||
int plane, int block,
|
||||
int ss_txfrm_size) {
|
||||
const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
|
||||
const int txwl = ss_txfrm_size / 2;
|
||||
const int tx_cols_log2 = bwl - txwl;
|
||||
static int txfrm_block_to_raster_block(BLOCK_SIZE plane_bsize,
|
||||
TX_SIZE tx_size, int block) {
|
||||
const int bwl = b_width_log2(plane_bsize);
|
||||
const int tx_cols_log2 = bwl - tx_size;
|
||||
const int tx_cols = 1 << tx_cols_log2;
|
||||
const int raster_mb = block >> ss_txfrm_size;
|
||||
const int x = (raster_mb & (tx_cols - 1)) << (txwl);
|
||||
const int y = raster_mb >> tx_cols_log2 << (txwl);
|
||||
const int raster_mb = block >> (tx_size << 1);
|
||||
const int x = (raster_mb & (tx_cols - 1)) << tx_size;
|
||||
const int y = (raster_mb >> tx_cols_log2) << tx_size;
|
||||
return x + (y << bwl);
|
||||
}
|
||||
|
||||
static void txfrm_block_to_raster_xy(MACROBLOCKD *xd,
|
||||
BLOCK_SIZE_TYPE bsize,
|
||||
int plane, int block,
|
||||
int ss_txfrm_size,
|
||||
static void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize,
|
||||
TX_SIZE tx_size, int block,
|
||||
int *x, int *y) {
|
||||
const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
|
||||
const int txwl = ss_txfrm_size / 2;
|
||||
const int tx_cols_log2 = bwl - txwl;
|
||||
const int bwl = b_width_log2(plane_bsize);
|
||||
const int tx_cols_log2 = bwl - tx_size;
|
||||
const int tx_cols = 1 << tx_cols_log2;
|
||||
const int raster_mb = block >> ss_txfrm_size;
|
||||
*x = (raster_mb & (tx_cols - 1)) << (txwl);
|
||||
*y = raster_mb >> tx_cols_log2 << (txwl);
|
||||
const int raster_mb = block >> (tx_size << 1);
|
||||
*x = (raster_mb & (tx_cols - 1)) << tx_size;
|
||||
*y = (raster_mb >> tx_cols_log2) << tx_size;
|
||||
}
|
||||
|
||||
static void extend_for_intra(MACROBLOCKD* const xd, int plane, int block,
|
||||
BLOCK_SIZE_TYPE bsize, int ss_txfrm_size) {
|
||||
const int bw = plane_block_width(bsize, &xd->plane[plane]);
|
||||
const int bh = plane_block_height(bsize, &xd->plane[plane]);
|
||||
static void extend_for_intra(MACROBLOCKD* const xd, BLOCK_SIZE plane_bsize,
|
||||
int plane, int block, TX_SIZE tx_size) {
|
||||
struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||
uint8_t *const buf = pd->dst.buf;
|
||||
const int stride = pd->dst.stride;
|
||||
|
||||
int x, y;
|
||||
txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y);
|
||||
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
|
||||
x = x * 4 - 1;
|
||||
y = y * 4 - 1;
|
||||
// Copy a pixel into the umv if we are in a situation where the block size
|
||||
@@ -664,41 +510,38 @@ static void extend_for_intra(MACROBLOCKD* const xd, int plane, int block,
|
||||
// TODO(JBB): Should be able to do the full extend in place so we don't have
|
||||
// to do this multiple times.
|
||||
if (xd->mb_to_right_edge < 0) {
|
||||
int umv_border_start = bw
|
||||
+ (xd->mb_to_right_edge >> (3 + xd->plane[plane].subsampling_x));
|
||||
const int bw = 4 << b_width_log2(plane_bsize);
|
||||
const int umv_border_start = bw + (xd->mb_to_right_edge >>
|
||||
(3 + pd->subsampling_x));
|
||||
|
||||
if (x + bw > umv_border_start)
|
||||
vpx_memset(
|
||||
xd->plane[plane].dst.buf + y * xd->plane[plane].dst.stride
|
||||
+ umv_border_start,
|
||||
*(xd->plane[plane].dst.buf + y * xd->plane[plane].dst.stride
|
||||
+ umv_border_start - 1),
|
||||
bw);
|
||||
vpx_memset(&buf[y * stride + umv_border_start],
|
||||
buf[y * stride + umv_border_start - 1], bw);
|
||||
}
|
||||
if (xd->mb_to_bottom_edge < 0) {
|
||||
int umv_border_start = bh
|
||||
+ (xd->mb_to_bottom_edge >> (3 + xd->plane[plane].subsampling_y));
|
||||
int i;
|
||||
uint8_t c = *(xd->plane[plane].dst.buf
|
||||
+ (umv_border_start - 1) * xd->plane[plane].dst.stride + x);
|
||||
|
||||
uint8_t *d = xd->plane[plane].dst.buf
|
||||
+ umv_border_start * xd->plane[plane].dst.stride + x;
|
||||
if (xd->mb_to_bottom_edge < 0) {
|
||||
const int bh = 4 << b_height_log2(plane_bsize);
|
||||
const int umv_border_start = bh + (xd->mb_to_bottom_edge >>
|
||||
(3 + pd->subsampling_y));
|
||||
int i;
|
||||
const uint8_t c = buf[(umv_border_start - 1) * stride + x];
|
||||
uint8_t *d = &buf[umv_border_start * stride + x];
|
||||
|
||||
if (y + bh > umv_border_start)
|
||||
for (i = 0; i < bh; i++, d += xd->plane[plane].dst.stride)
|
||||
for (i = 0; i < bh; ++i, d += stride)
|
||||
*d = c;
|
||||
}
|
||||
}
|
||||
static void set_contexts_on_border(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
|
||||
int plane, int tx_size_in_blocks,
|
||||
int eob, int aoff, int loff,
|
||||
static void set_contexts_on_border(MACROBLOCKD *xd,
|
||||
struct macroblockd_plane *pd,
|
||||
BLOCK_SIZE plane_bsize,
|
||||
int tx_size_in_blocks, int has_eob,
|
||||
int aoff, int loff,
|
||||
ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) {
|
||||
struct macroblockd_plane *pd = &xd->plane[plane];
|
||||
int mi_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
|
||||
int mi_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
|
||||
int above_contexts = tx_size_in_blocks;
|
||||
int left_contexts = tx_size_in_blocks;
|
||||
int mi_blocks_wide = 1 << plane_block_width_log2by4(bsize, pd);
|
||||
int mi_blocks_high = 1 << plane_block_height_log2by4(bsize, pd);
|
||||
int pt;
|
||||
|
||||
// xd->mb_to_right_edge is in units of pixels * 8. This converts
|
||||
@@ -706,26 +549,47 @@ static void set_contexts_on_border(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
|
||||
if (xd->mb_to_right_edge < 0)
|
||||
mi_blocks_wide += (xd->mb_to_right_edge >> (5 + pd->subsampling_x));
|
||||
|
||||
if (xd->mb_to_bottom_edge < 0)
|
||||
mi_blocks_high += (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
|
||||
|
||||
// this code attempts to avoid copying into contexts that are outside
|
||||
// our border. Any blocks that do are set to 0...
|
||||
if (above_contexts + aoff > mi_blocks_wide)
|
||||
above_contexts = mi_blocks_wide - aoff;
|
||||
|
||||
if (xd->mb_to_bottom_edge < 0)
|
||||
mi_blocks_high += (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
|
||||
|
||||
if (left_contexts + loff > mi_blocks_high)
|
||||
left_contexts = mi_blocks_high - loff;
|
||||
|
||||
for (pt = 0; pt < above_contexts; pt++)
|
||||
A[pt] = eob > 0;
|
||||
A[pt] = has_eob;
|
||||
for (pt = above_contexts; pt < tx_size_in_blocks; pt++)
|
||||
A[pt] = 0;
|
||||
for (pt = 0; pt < left_contexts; pt++)
|
||||
L[pt] = eob > 0;
|
||||
L[pt] = has_eob;
|
||||
for (pt = left_contexts; pt < tx_size_in_blocks; pt++)
|
||||
L[pt] = 0;
|
||||
}
|
||||
|
||||
static void set_contexts(MACROBLOCKD *xd, struct macroblockd_plane *pd,
|
||||
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
|
||||
int has_eob, int aoff, int loff) {
|
||||
ENTROPY_CONTEXT *const A = pd->above_context + aoff;
|
||||
ENTROPY_CONTEXT *const L = pd->left_context + loff;
|
||||
const int tx_size_in_blocks = 1 << tx_size;
|
||||
|
||||
if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) {
|
||||
set_contexts_on_border(xd, pd, plane_bsize, tx_size_in_blocks, has_eob,
|
||||
aoff, loff, A, L);
|
||||
} else {
|
||||
vpx_memset(A, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
|
||||
vpx_memset(L, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
|
||||
}
|
||||
}
|
||||
|
||||
static int get_tx_eob(struct segmentation *seg, int segment_id,
|
||||
TX_SIZE tx_size) {
|
||||
const int eob_max = 16 << (tx_size << 1);
|
||||
return vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
|
||||
}
|
||||
|
||||
#endif // VP9_COMMON_VP9_BLOCKD_H_
|
||||
|
||||
@@ -13,33 +13,33 @@
|
||||
#include "vp9/common/vp9_common_data.h"
|
||||
|
||||
// Log 2 conversion lookup tables for block width and height
|
||||
const int b_width_log2_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int b_width_log2_lookup[BLOCK_SIZES] =
|
||||
{0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4};
|
||||
const int b_height_log2_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int b_height_log2_lookup[BLOCK_SIZES] =
|
||||
{0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4};
|
||||
const int num_4x4_blocks_wide_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int num_4x4_blocks_wide_lookup[BLOCK_SIZES] =
|
||||
{1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16};
|
||||
const int num_4x4_blocks_high_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int num_4x4_blocks_high_lookup[BLOCK_SIZES] =
|
||||
{1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16};
|
||||
// Log 2 conversion lookup tables for modeinfo width and height
|
||||
const int mi_width_log2_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int mi_width_log2_lookup[BLOCK_SIZES] =
|
||||
{0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3};
|
||||
const int num_8x8_blocks_wide_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int num_8x8_blocks_wide_lookup[BLOCK_SIZES] =
|
||||
{1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8};
|
||||
const int mi_height_log2_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int mi_height_log2_lookup[BLOCK_SIZES] =
|
||||
{0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3};
|
||||
const int num_8x8_blocks_high_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int num_8x8_blocks_high_lookup[BLOCK_SIZES] =
|
||||
{1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8};
|
||||
|
||||
// MIN(3, MIN(b_width_log2(bsize), b_height_log2(bsize)))
|
||||
const int size_group_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int size_group_lookup[BLOCK_SIZES] =
|
||||
{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3};
|
||||
|
||||
const int num_pels_log2_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int num_pels_log2_lookup[BLOCK_SIZES] =
|
||||
{4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12};
|
||||
|
||||
|
||||
const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES] = {
|
||||
const PARTITION_TYPE partition_lookup[][BLOCK_SIZES] = {
|
||||
{ // 4X4
|
||||
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
|
||||
PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID,
|
||||
@@ -74,51 +74,62 @@ const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES] = {
|
||||
}
|
||||
};
|
||||
|
||||
const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES] = {
|
||||
const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = {
|
||||
{ // PARTITION_NONE
|
||||
BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
|
||||
BLOCK_8X8, BLOCK_8X16, BLOCK_16X8,
|
||||
BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
|
||||
BLOCK_8X8, BLOCK_8X16, BLOCK_16X8,
|
||||
BLOCK_16X16, BLOCK_16X32, BLOCK_32X16,
|
||||
BLOCK_32X32, BLOCK_32X64, BLOCK_64X32,
|
||||
BLOCK_64X64,
|
||||
}, { // PARTITION_HORZ
|
||||
BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_8X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_16X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_32X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_64X32,
|
||||
}, { // PARTITION_VERT
|
||||
BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_4X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_8X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_16X32, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_32X64,
|
||||
}, { // PARTITION_SPLIT
|
||||
BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_4X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_8X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_16X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_4X4, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_8X8, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_16X16, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_32X32,
|
||||
}
|
||||
};
|
||||
|
||||
const TX_SIZE max_txsize_lookup[BLOCK_SIZE_TYPES] = {
|
||||
TX_4X4, TX_4X4, TX_4X4,
|
||||
TX_8X8, TX_8X8, TX_8X8,
|
||||
const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = {
|
||||
TX_4X4, TX_4X4, TX_4X4,
|
||||
TX_8X8, TX_8X8, TX_8X8,
|
||||
TX_16X16, TX_16X16, TX_16X16,
|
||||
TX_32X32, TX_32X32, TX_32X32, TX_32X32
|
||||
};
|
||||
const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES] = {
|
||||
TX_4X4, TX_4X4, TX_4X4,
|
||||
TX_4X4, TX_4X4, TX_4X4,
|
||||
TX_8X8, TX_8X8, TX_8X8,
|
||||
const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZES] = {
|
||||
TX_4X4, TX_4X4, TX_4X4,
|
||||
TX_4X4, TX_4X4, TX_4X4,
|
||||
TX_8X8, TX_8X8, TX_8X8,
|
||||
TX_16X16, TX_16X16, TX_16X16, TX_32X32
|
||||
};
|
||||
|
||||
const BLOCK_SIZE_TYPE bsize_from_dim_lookup[5][5] = {
|
||||
{ BLOCK_4X4, BLOCK_4X8, BLOCK_4X8, BLOCK_4X8, BLOCK_4X8 },
|
||||
{ BLOCK_8X4, BLOCK_8X8, BLOCK_8X16, BLOCK_8X16, BLOCK_8X16 },
|
||||
{ BLOCK_16X8, BLOCK_16X8, BLOCK_16X16, BLOCK_16X32, BLOCK_16X32 },
|
||||
{ BLOCK_32X16, BLOCK_32X16, BLOCK_32X16, BLOCK_32X32, BLOCK_32X64 },
|
||||
{ BLOCK_64X32, BLOCK_64X32, BLOCK_64X32, BLOCK_64X32, BLOCK_64X64 }
|
||||
const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = {
|
||||
// ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1
|
||||
// ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1
|
||||
{{BLOCK_4X4, BLOCK_INVALID}, {BLOCK_INVALID, BLOCK_INVALID}},
|
||||
{{BLOCK_4X8, BLOCK_4X4}, {BLOCK_INVALID, BLOCK_INVALID}},
|
||||
{{BLOCK_8X4, BLOCK_INVALID}, {BLOCK_4X4, BLOCK_INVALID}},
|
||||
{{BLOCK_8X8, BLOCK_8X4}, {BLOCK_4X8, BLOCK_4X4}},
|
||||
{{BLOCK_8X16, BLOCK_8X8}, {BLOCK_INVALID, BLOCK_4X8}},
|
||||
{{BLOCK_16X8, BLOCK_INVALID}, {BLOCK_8X8, BLOCK_8X4}},
|
||||
{{BLOCK_16X16, BLOCK_16X8}, {BLOCK_8X16, BLOCK_8X8}},
|
||||
{{BLOCK_16X32, BLOCK_16X16}, {BLOCK_INVALID, BLOCK_8X16}},
|
||||
{{BLOCK_32X16, BLOCK_INVALID}, {BLOCK_16X16, BLOCK_16X8}},
|
||||
{{BLOCK_32X32, BLOCK_32X16}, {BLOCK_16X32, BLOCK_16X16}},
|
||||
{{BLOCK_32X64, BLOCK_32X32}, {BLOCK_INVALID, BLOCK_16X32}},
|
||||
{{BLOCK_64X32, BLOCK_INVALID}, {BLOCK_32X32, BLOCK_32X16}},
|
||||
{{BLOCK_64X64, BLOCK_64X32}, {BLOCK_32X64, BLOCK_32X32}},
|
||||
};
|
||||
|
||||
|
||||
@@ -13,20 +13,20 @@
|
||||
|
||||
#include "vp9/common/vp9_enums.h"
|
||||
|
||||
extern const int b_width_log2_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int b_height_log2_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int mi_width_log2_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int mi_height_log2_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int num_8x8_blocks_wide_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int num_8x8_blocks_high_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int num_4x4_blocks_high_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int num_4x4_blocks_wide_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int size_group_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int num_pels_log2_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES];
|
||||
extern const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES];
|
||||
extern const TX_SIZE max_txsize_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const BLOCK_SIZE_TYPE bsize_from_dim_lookup[5][5];
|
||||
extern const int b_width_log2_lookup[BLOCK_SIZES];
|
||||
extern const int b_height_log2_lookup[BLOCK_SIZES];
|
||||
extern const int mi_width_log2_lookup[BLOCK_SIZES];
|
||||
extern const int mi_height_log2_lookup[BLOCK_SIZES];
|
||||
extern const int num_8x8_blocks_wide_lookup[BLOCK_SIZES];
|
||||
extern const int num_8x8_blocks_high_lookup[BLOCK_SIZES];
|
||||
extern const int num_4x4_blocks_high_lookup[BLOCK_SIZES];
|
||||
extern const int num_4x4_blocks_wide_lookup[BLOCK_SIZES];
|
||||
extern const int size_group_lookup[BLOCK_SIZES];
|
||||
extern const int num_pels_log2_lookup[BLOCK_SIZES];
|
||||
extern const PARTITION_TYPE partition_lookup[][BLOCK_SIZES];
|
||||
extern const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES];
|
||||
extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES];
|
||||
extern const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZES];
|
||||
extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2];
|
||||
|
||||
#endif // VP9_COMMON_VP9_COMMON_DATA_H
|
||||
|
||||
@@ -14,66 +14,45 @@
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vp9/common/vp9_filter.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#define VP9_FILTER_WEIGHT 128
|
||||
#define VP9_FILTER_SHIFT 7
|
||||
|
||||
/* Assume a bank of 16 filters to choose from. There are two implementations
|
||||
* for filter wrapping behavior, since we want to be able to pick which filter
|
||||
* to start with. We could either:
|
||||
*
|
||||
* 1) make filter_ a pointer to the base of the filter array, and then add an
|
||||
* additional offset parameter, to choose the starting filter.
|
||||
* 2) use a pointer to 2 periods worth of filters, so that even if the original
|
||||
* phase offset is at 15/16, we'll have valid data to read. The filter
|
||||
* tables become [32][8], and the second half is duplicated.
|
||||
* 3) fix the alignment of the filter tables, so that we know the 0/16 is
|
||||
* always 256 byte aligned.
|
||||
*
|
||||
* Implementations 2 and 3 are likely preferable, as they avoid an extra 2
|
||||
* parameters, and switching between them is trivial, with the
|
||||
* ALIGN_FILTERS_256 macro, below.
|
||||
*/
|
||||
#define ALIGN_FILTERS_256 1
|
||||
|
||||
static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x0, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h, int taps) {
|
||||
int x, y, k, sum;
|
||||
const int16_t *filter_x_base = filter_x0;
|
||||
int x, y, k;
|
||||
|
||||
#if ALIGN_FILTERS_256
|
||||
filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
|
||||
#endif
|
||||
/* NOTE: This assumes that the filter table is 256-byte aligned. */
|
||||
/* TODO(agrange) Modify to make independent of table alignment. */
|
||||
const int16_t *const filter_x_base =
|
||||
(const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
|
||||
|
||||
/* Adjust base pointer address for this source line */
|
||||
src -= taps / 2 - 1;
|
||||
|
||||
for (y = 0; y < h; ++y) {
|
||||
/* Pointer to filter to use */
|
||||
const int16_t *filter_x = filter_x0;
|
||||
|
||||
/* Initial phase offset */
|
||||
int x0_q4 = (filter_x - filter_x_base) / taps;
|
||||
int x_q4 = x0_q4;
|
||||
int x_q4 = (filter_x0 - filter_x_base) / taps;
|
||||
|
||||
for (x = 0; x < w; ++x) {
|
||||
/* Per-pixel src offset */
|
||||
int src_x = (x_q4 - x0_q4) >> 4;
|
||||
const int src_x = x_q4 >> SUBPEL_BITS;
|
||||
int sum = 0;
|
||||
|
||||
for (sum = 0, k = 0; k < taps; ++k) {
|
||||
/* Pointer to filter to use */
|
||||
const int16_t *const filter_x = filter_x_base +
|
||||
(x_q4 & SUBPEL_MASK) * taps;
|
||||
|
||||
for (k = 0; k < taps; ++k)
|
||||
sum += src[src_x + k] * filter_x[k];
|
||||
}
|
||||
sum += (VP9_FILTER_WEIGHT >> 1);
|
||||
dst[x] = clip_pixel(sum >> VP9_FILTER_SHIFT);
|
||||
|
||||
/* Adjust source and filter to use for the next pixel */
|
||||
dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
|
||||
|
||||
/* Move to the next source pixel */
|
||||
x_q4 += x_step_q4;
|
||||
filter_x = filter_x_base + (x_q4 & 0xf) * taps;
|
||||
}
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
@@ -85,37 +64,37 @@ static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_x0, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h, int taps) {
|
||||
int x, y, k, sum;
|
||||
const int16_t *filter_x_base = filter_x0;
|
||||
int x, y, k;
|
||||
|
||||
#if ALIGN_FILTERS_256
|
||||
filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
|
||||
#endif
|
||||
/* NOTE: This assumes that the filter table is 256-byte aligned. */
|
||||
/* TODO(agrange) Modify to make independent of table alignment. */
|
||||
const int16_t *const filter_x_base =
|
||||
(const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
|
||||
|
||||
/* Adjust base pointer address for this source line */
|
||||
src -= taps / 2 - 1;
|
||||
|
||||
for (y = 0; y < h; ++y) {
|
||||
/* Pointer to filter to use */
|
||||
const int16_t *filter_x = filter_x0;
|
||||
|
||||
/* Initial phase offset */
|
||||
int x0_q4 = (filter_x - filter_x_base) / taps;
|
||||
int x_q4 = x0_q4;
|
||||
int x_q4 = (filter_x0 - filter_x_base) / taps;
|
||||
|
||||
for (x = 0; x < w; ++x) {
|
||||
/* Per-pixel src offset */
|
||||
int src_x = (x_q4 - x0_q4) >> 4;
|
||||
const int src_x = x_q4 >> SUBPEL_BITS;
|
||||
int sum = 0;
|
||||
|
||||
for (sum = 0, k = 0; k < taps; ++k) {
|
||||
/* Pointer to filter to use */
|
||||
const int16_t *const filter_x = filter_x_base +
|
||||
(x_q4 & SUBPEL_MASK) * taps;
|
||||
|
||||
for (k = 0; k < taps; ++k)
|
||||
sum += src[src_x + k] * filter_x[k];
|
||||
}
|
||||
sum += (VP9_FILTER_WEIGHT >> 1);
|
||||
dst[x] = (dst[x] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1;
|
||||
|
||||
/* Adjust source and filter to use for the next pixel */
|
||||
dst[x] = ROUND_POWER_OF_TWO(dst[x] +
|
||||
clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
|
||||
|
||||
/* Move to the next source pixel */
|
||||
x_q4 += x_step_q4;
|
||||
filter_x = filter_x_base + (x_q4 & 0xf) * taps;
|
||||
}
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
@@ -127,37 +106,37 @@ static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y0, int y_step_q4,
|
||||
int w, int h, int taps) {
|
||||
int x, y, k, sum;
|
||||
int x, y, k;
|
||||
|
||||
const int16_t *filter_y_base = filter_y0;
|
||||
|
||||
#if ALIGN_FILTERS_256
|
||||
filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
|
||||
#endif
|
||||
/* NOTE: This assumes that the filter table is 256-byte aligned. */
|
||||
/* TODO(agrange) Modify to make independent of table alignment. */
|
||||
const int16_t *const filter_y_base =
|
||||
(const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
|
||||
|
||||
/* Adjust base pointer address for this source column */
|
||||
src -= src_stride * (taps / 2 - 1);
|
||||
for (x = 0; x < w; ++x) {
|
||||
/* Pointer to filter to use */
|
||||
const int16_t *filter_y = filter_y0;
|
||||
|
||||
for (x = 0; x < w; ++x) {
|
||||
/* Initial phase offset */
|
||||
int y0_q4 = (filter_y - filter_y_base) / taps;
|
||||
int y_q4 = y0_q4;
|
||||
int y_q4 = (filter_y0 - filter_y_base) / taps;
|
||||
|
||||
for (y = 0; y < h; ++y) {
|
||||
/* Per-pixel src offset */
|
||||
int src_y = (y_q4 - y0_q4) >> 4;
|
||||
const int src_y = y_q4 >> SUBPEL_BITS;
|
||||
int sum = 0;
|
||||
|
||||
for (sum = 0, k = 0; k < taps; ++k) {
|
||||
/* Pointer to filter to use */
|
||||
const int16_t *const filter_y = filter_y_base +
|
||||
(y_q4 & SUBPEL_MASK) * taps;
|
||||
|
||||
for (k = 0; k < taps; ++k)
|
||||
sum += src[(src_y + k) * src_stride] * filter_y[k];
|
||||
}
|
||||
sum += (VP9_FILTER_WEIGHT >> 1);
|
||||
dst[y * dst_stride] = clip_pixel(sum >> VP9_FILTER_SHIFT);
|
||||
|
||||
/* Adjust source and filter to use for the next pixel */
|
||||
dst[y * dst_stride] =
|
||||
clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
|
||||
|
||||
/* Move to the next source pixel */
|
||||
y_q4 += y_step_q4;
|
||||
filter_y = filter_y_base + (y_q4 & 0xf) * taps;
|
||||
}
|
||||
++src;
|
||||
++dst;
|
||||
@@ -169,38 +148,37 @@ static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y0, int y_step_q4,
|
||||
int w, int h, int taps) {
|
||||
int x, y, k, sum;
|
||||
int x, y, k;
|
||||
|
||||
const int16_t *filter_y_base = filter_y0;
|
||||
|
||||
#if ALIGN_FILTERS_256
|
||||
filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
|
||||
#endif
|
||||
/* NOTE: This assumes that the filter table is 256-byte aligned. */
|
||||
/* TODO(agrange) Modify to make independent of table alignment. */
|
||||
const int16_t *const filter_y_base =
|
||||
(const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
|
||||
|
||||
/* Adjust base pointer address for this source column */
|
||||
src -= src_stride * (taps / 2 - 1);
|
||||
for (x = 0; x < w; ++x) {
|
||||
/* Pointer to filter to use */
|
||||
const int16_t *filter_y = filter_y0;
|
||||
|
||||
for (x = 0; x < w; ++x) {
|
||||
/* Initial phase offset */
|
||||
int y0_q4 = (filter_y - filter_y_base) / taps;
|
||||
int y_q4 = y0_q4;
|
||||
int y_q4 = (filter_y0 - filter_y_base) / taps;
|
||||
|
||||
for (y = 0; y < h; ++y) {
|
||||
/* Per-pixel src offset */
|
||||
int src_y = (y_q4 - y0_q4) >> 4;
|
||||
const int src_y = y_q4 >> SUBPEL_BITS;
|
||||
int sum = 0;
|
||||
|
||||
for (sum = 0, k = 0; k < taps; ++k) {
|
||||
/* Pointer to filter to use */
|
||||
const int16_t *const filter_y = filter_y_base +
|
||||
(y_q4 & SUBPEL_MASK) * taps;
|
||||
|
||||
for (k = 0; k < taps; ++k)
|
||||
sum += src[(src_y + k) * src_stride] * filter_y[k];
|
||||
}
|
||||
sum += (VP9_FILTER_WEIGHT >> 1);
|
||||
dst[y * dst_stride] =
|
||||
(dst[y * dst_stride] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1;
|
||||
|
||||
/* Adjust source and filter to use for the next pixel */
|
||||
dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
|
||||
clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
|
||||
|
||||
/* Move to the next source pixel */
|
||||
y_q4 += y_step_q4;
|
||||
filter_y = filter_y_base + (y_q4 & 0xf) * taps;
|
||||
}
|
||||
++src;
|
||||
++dst;
|
||||
@@ -213,58 +191,27 @@ static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h, int taps) {
|
||||
/* Fixed size intermediate buffer places limits on parameters.
|
||||
* Maximum intermediate_height is 135, for y_step_q4 == 32,
|
||||
* Maximum intermediate_height is 324, for y_step_q4 == 80,
|
||||
* h == 64, taps == 8.
|
||||
* y_step_q4 of 80 allows for 1/10 scale for 5 layer svc
|
||||
*/
|
||||
uint8_t temp[64 * 135];
|
||||
int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1;
|
||||
uint8_t temp[64 * 324];
|
||||
int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + taps;
|
||||
|
||||
assert(w <= 64);
|
||||
assert(h <= 64);
|
||||
assert(taps <= 8);
|
||||
assert(y_step_q4 <= 32);
|
||||
assert(x_step_q4 <= 32);
|
||||
assert(y_step_q4 <= 80);
|
||||
assert(x_step_q4 <= 80);
|
||||
|
||||
if (intermediate_height < h)
|
||||
intermediate_height = h;
|
||||
|
||||
convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,
|
||||
temp, 64,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, intermediate_height, taps);
|
||||
convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h, taps);
|
||||
}
|
||||
|
||||
static void convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h, int taps) {
|
||||
/* Fixed size intermediate buffer places limits on parameters.
|
||||
* Maximum intermediate_height is 135, for y_step_q4 == 32,
|
||||
* h == 64, taps == 8.
|
||||
*/
|
||||
uint8_t temp[64 * 135];
|
||||
int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1;
|
||||
|
||||
assert(w <= 64);
|
||||
assert(h <= 64);
|
||||
assert(taps <= 8);
|
||||
assert(y_step_q4 <= 32);
|
||||
assert(x_step_q4 <= 32);
|
||||
|
||||
if (intermediate_height < h)
|
||||
intermediate_height = h;
|
||||
|
||||
convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,
|
||||
temp, 64,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, intermediate_height, taps);
|
||||
convolve_avg_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h, taps);
|
||||
convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4, w,
|
||||
intermediate_height, taps);
|
||||
convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x,
|
||||
x_step_q4, filter_y, y_step_q4, w, h, taps);
|
||||
}
|
||||
|
||||
void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
@@ -273,8 +220,7 @@ void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h) {
|
||||
convolve_horiz_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h, 8);
|
||||
filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
|
||||
}
|
||||
|
||||
void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
@@ -283,8 +229,7 @@ void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h) {
|
||||
convolve_avg_horiz_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h, 8);
|
||||
filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
|
||||
}
|
||||
|
||||
void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
@@ -293,8 +238,7 @@ void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h) {
|
||||
convolve_vert_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h, 8);
|
||||
filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
|
||||
}
|
||||
|
||||
void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
@@ -303,8 +247,7 @@ void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h) {
|
||||
convolve_avg_vert_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h, 8);
|
||||
filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
|
||||
}
|
||||
|
||||
void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
@@ -313,8 +256,7 @@ void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h) {
|
||||
convolve_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h, 8);
|
||||
filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
|
||||
}
|
||||
|
||||
void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
@@ -327,16 +269,9 @@ void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
assert(w <= 64);
|
||||
assert(h <= 64);
|
||||
|
||||
vp9_convolve8(src, src_stride,
|
||||
temp, 64,
|
||||
filter_x, x_step_q4,
|
||||
filter_y, y_step_q4,
|
||||
w, h);
|
||||
vp9_convolve_avg(temp, 64,
|
||||
dst, dst_stride,
|
||||
NULL, 0, /* These unused parameter should be removed! */
|
||||
NULL, 0, /* These unused parameter should be removed! */
|
||||
w, h);
|
||||
vp9_convolve8(src, src_stride, temp, 64,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4, w, h);
|
||||
vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
|
||||
}
|
||||
|
||||
void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
@@ -361,9 +296,9 @@ void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
int x, y;
|
||||
|
||||
for (y = 0; y < h; ++y) {
|
||||
for (x = 0; x < w; ++x) {
|
||||
dst[x] = (dst[x] + src[x] + 1) >> 1;
|
||||
}
|
||||
for (x = 0; x < w; ++x)
|
||||
dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#define FILTER_BITS 7
|
||||
|
||||
typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
|
||||
@@ -22,23 +22,24 @@ static void log_frame_info(VP9_COMMON *cm, const char *str, FILE *f) {
|
||||
* and uses the passed in member offset to print out the value of an integer
|
||||
* for each mbmi member value in the mi structure.
|
||||
*/
|
||||
static void print_mi_data(VP9_COMMON *common, FILE *file, char *descriptor,
|
||||
static void print_mi_data(VP9_COMMON *cm, FILE *file, char *descriptor,
|
||||
size_t member_offset) {
|
||||
int mi_row;
|
||||
int mi_col;
|
||||
int mi_index = 0;
|
||||
MODE_INFO *mi = common->mi;
|
||||
int rows = common->mi_rows;
|
||||
int cols = common->mi_cols;
|
||||
MODE_INFO **mi_8x8 = cm->mi_grid_visible;
|
||||
int rows = cm->mi_rows;
|
||||
int cols = cm->mi_cols;
|
||||
char prefix = descriptor[0];
|
||||
|
||||
log_frame_info(common, descriptor, file);
|
||||
log_frame_info(cm, descriptor, file);
|
||||
mi_index = 0;
|
||||
for (mi_row = 0; mi_row < rows; mi_row++) {
|
||||
fprintf(file, "%c ", prefix);
|
||||
for (mi_col = 0; mi_col < cols; mi_col++) {
|
||||
fprintf(file, "%2d ",
|
||||
*((int*) ((char *) (&mi[mi_index].mbmi) + member_offset)));
|
||||
*((int*) ((char *) (&mi_8x8[mi_index]->mbmi) +
|
||||
member_offset)));
|
||||
mi_index++;
|
||||
}
|
||||
fprintf(file, "\n");
|
||||
@@ -51,23 +52,23 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, char *file) {
|
||||
int mi_col;
|
||||
int mi_index = 0;
|
||||
FILE *mvs = fopen(file, "a");
|
||||
MODE_INFO *mi = cm->mi;
|
||||
MODE_INFO **mi_8x8 = cm->mi_grid_visible;
|
||||
int rows = cm->mi_rows;
|
||||
int cols = cm->mi_cols;
|
||||
|
||||
print_mi_data(cm, mvs, "Partitions:", offsetof(MB_MODE_INFO, sb_type));
|
||||
print_mi_data(cm, mvs, "Modes:", offsetof(MB_MODE_INFO, mode));
|
||||
print_mi_data(cm, mvs, "Skips:", offsetof(MB_MODE_INFO, mb_skip_coeff));
|
||||
print_mi_data(cm, mvs, "Skips:", offsetof(MB_MODE_INFO, skip_coeff));
|
||||
print_mi_data(cm, mvs, "Ref frame:", offsetof(MB_MODE_INFO, ref_frame[0]));
|
||||
print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, txfm_size));
|
||||
print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, tx_size));
|
||||
print_mi_data(cm, mvs, "UV Modes:", offsetof(MB_MODE_INFO, uv_mode));
|
||||
|
||||
log_frame_info(cm, "Vectors ",mvs);
|
||||
for (mi_row = 0; mi_row < rows; mi_row++) {
|
||||
fprintf(mvs,"V ");
|
||||
for (mi_col = 0; mi_col < cols; mi_col++) {
|
||||
fprintf(mvs, "%4d:%4d ", mi[mi_index].mbmi.mv[0].as_mv.row,
|
||||
mi[mi_index].mbmi.mv[0].as_mv.col);
|
||||
fprintf(mvs, "%4d:%4d ", mi_8x8[mi_index]->mbmi.mv[0].as_mv.row,
|
||||
mi_8x8[mi_index]->mbmi.mv[0].as_mv.col);
|
||||
mi_index++;
|
||||
}
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
@@ -377,7 +377,7 @@ static const vp9_prob modelcoefprobs_pareto8[COEFPROB_MODELS][MODEL_NODES] = {
|
||||
|
||||
static void extend_model_to_full_distribution(vp9_prob p,
|
||||
vp9_prob *tree_probs) {
|
||||
const int l = ((p - 1) / 2);
|
||||
const int l = (p - 1) / 2;
|
||||
const vp9_prob (*model)[MODEL_NODES] = modelcoefprobs_pareto8;
|
||||
if (p & 1) {
|
||||
vpx_memcpy(tree_probs + UNCONSTRAINED_NODES,
|
||||
@@ -436,11 +436,11 @@ const vp9_extra_bit vp9_extra_bits[12] = {
|
||||
|
||||
#include "vp9/common/vp9_default_coef_probs.h"
|
||||
|
||||
void vp9_default_coef_probs(VP9_COMMON *pc) {
|
||||
vp9_copy(pc->fc.coef_probs[TX_4X4], default_coef_probs_4x4);
|
||||
vp9_copy(pc->fc.coef_probs[TX_8X8], default_coef_probs_8x8);
|
||||
vp9_copy(pc->fc.coef_probs[TX_16X16], default_coef_probs_16x16);
|
||||
vp9_copy(pc->fc.coef_probs[TX_32X32], default_coef_probs_32x32);
|
||||
void vp9_default_coef_probs(VP9_COMMON *cm) {
|
||||
vp9_copy(cm->fc.coef_probs[TX_4X4], default_coef_probs_4x4);
|
||||
vp9_copy(cm->fc.coef_probs[TX_8X8], default_coef_probs_8x8);
|
||||
vp9_copy(cm->fc.coef_probs[TX_16X16], default_coef_probs_16x16);
|
||||
vp9_copy(cm->fc.coef_probs[TX_32X32], default_coef_probs_32x32);
|
||||
}
|
||||
|
||||
// Neighborhood 5-tuples for various scans and blocksizes,
|
||||
@@ -622,7 +622,6 @@ static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE tx_size,
|
||||
int t, i, j, k, l;
|
||||
unsigned int branch_ct[UNCONSTRAINED_NODES][2];
|
||||
vp9_prob coef_probs[UNCONSTRAINED_NODES];
|
||||
int entropy_nodes_adapt = UNCONSTRAINED_NODES;
|
||||
|
||||
for (i = 0; i < BLOCK_TYPES; ++i)
|
||||
for (j = 0; j < REF_TYPES; ++j)
|
||||
@@ -635,7 +634,7 @@ static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE tx_size,
|
||||
0);
|
||||
branch_ct[0][1] = eob_branch_count[i][j][k][l] - branch_ct[0][0];
|
||||
coef_probs[0] = get_binary_prob(branch_ct[0][0], branch_ct[0][1]);
|
||||
for (t = 0; t < entropy_nodes_adapt; ++t)
|
||||
for (t = 0; t < UNCONSTRAINED_NODES; ++t)
|
||||
dst_coef_probs[i][j][k][l][t] = merge_probs(
|
||||
pre_coef_probs[i][j][k][l][t], coef_probs[t],
|
||||
branch_ct[t], count_sat, update_factor);
|
||||
|
||||
@@ -95,7 +95,7 @@ typedef vp9_prob vp9_coeff_probs[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
|
||||
#define MODULUS_PARAM 13 /* Modulus parameter */
|
||||
|
||||
struct VP9Common;
|
||||
void vp9_default_coef_probs(struct VP9Common *);
|
||||
void vp9_default_coef_probs(struct VP9Common *cm);
|
||||
extern DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_4x4[16]);
|
||||
|
||||
extern DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_4x4[16]);
|
||||
@@ -154,19 +154,17 @@ extern DECLARE_ALIGNED(16, int16_t,
|
||||
vp9_default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]);
|
||||
|
||||
void vp9_coef_tree_initialize(void);
|
||||
void vp9_adapt_coef_probs(struct VP9Common *);
|
||||
void vp9_adapt_coef_probs(struct VP9Common *cm);
|
||||
|
||||
static INLINE void vp9_reset_sb_tokens_context(MACROBLOCKD* const xd,
|
||||
BLOCK_SIZE_TYPE bsize) {
|
||||
/* Clear entropy contexts */
|
||||
const int bw = 1 << b_width_log2(bsize);
|
||||
const int bh = 1 << b_height_log2(bsize);
|
||||
static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
|
||||
int i;
|
||||
for (i = 0; i < MAX_MB_PLANE; i++) {
|
||||
vpx_memset(xd->plane[i].above_context, 0,
|
||||
sizeof(ENTROPY_CONTEXT) * bw >> xd->plane[i].subsampling_x);
|
||||
vpx_memset(xd->plane[i].left_context, 0,
|
||||
sizeof(ENTROPY_CONTEXT) * bh >> xd->plane[i].subsampling_y);
|
||||
struct macroblockd_plane *const pd = &xd->plane[i];
|
||||
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
|
||||
vpx_memset(pd->above_context, 0, sizeof(ENTROPY_CONTEXT) *
|
||||
num_4x4_blocks_wide_lookup[plane_bsize]);
|
||||
vpx_memset(pd->left_context, 0, sizeof(ENTROPY_CONTEXT) *
|
||||
num_4x4_blocks_high_lookup[plane_bsize]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -338,6 +336,45 @@ static INLINE const int16_t* get_iscan_16x16(TX_TYPE tx_type) {
|
||||
}
|
||||
}
|
||||
|
||||
static int get_entropy_context(const MACROBLOCKD *xd, TX_SIZE tx_size,
|
||||
PLANE_TYPE type, int block_idx,
|
||||
ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
|
||||
const int16_t **scan,
|
||||
const uint8_t **band_translate) {
|
||||
ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
|
||||
|
||||
switch (tx_size) {
|
||||
case TX_4X4:
|
||||
*scan = get_scan_4x4(get_tx_type_4x4(type, xd, block_idx));
|
||||
*band_translate = vp9_coefband_trans_4x4;
|
||||
above_ec = A[0] != 0;
|
||||
left_ec = L[0] != 0;
|
||||
break;
|
||||
case TX_8X8:
|
||||
*scan = get_scan_8x8(get_tx_type_8x8(type, xd));
|
||||
*band_translate = vp9_coefband_trans_8x8plus;
|
||||
above_ec = !!*(uint16_t *)A;
|
||||
left_ec = !!*(uint16_t *)L;
|
||||
break;
|
||||
case TX_16X16:
|
||||
*scan = get_scan_16x16(get_tx_type_16x16(type, xd));
|
||||
*band_translate = vp9_coefband_trans_8x8plus;
|
||||
above_ec = !!*(uint32_t *)A;
|
||||
left_ec = !!*(uint32_t *)L;
|
||||
break;
|
||||
case TX_32X32:
|
||||
*scan = vp9_default_scan_32x32;
|
||||
*band_translate = vp9_coefband_trans_8x8plus;
|
||||
above_ec = !!*(uint64_t *)A;
|
||||
left_ec = !!*(uint64_t *)L;
|
||||
break;
|
||||
default:
|
||||
assert(!"Invalid transform size.");
|
||||
}
|
||||
|
||||
return combine_entropy_contexts(above_ec, left_ec);
|
||||
}
|
||||
|
||||
enum { VP9_COEF_UPDATE_PROB = 252 };
|
||||
|
||||
#endif // VP9_COMMON_VP9_ENTROPY_H_
|
||||
|
||||
@@ -14,8 +14,8 @@
|
||||
#include "vp9/common/vp9_onyxc_int.h"
|
||||
#include "vp9/common/vp9_seg_common.h"
|
||||
|
||||
const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES]
|
||||
[VP9_INTRA_MODES - 1] = {
|
||||
const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES]
|
||||
[INTRA_MODES - 1] = {
|
||||
{ 144, 11, 54, 157, 195, 130, 46, 58, 108 } /* y = dc */,
|
||||
{ 118, 15, 123, 148, 131, 101, 44, 93, 131 } /* y = v */,
|
||||
{ 113, 12, 23, 188, 226, 142, 26, 32, 125 } /* y = h */,
|
||||
@@ -23,21 +23,21 @@ const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES]
|
||||
{ 113, 9, 36, 155, 111, 157, 32, 44, 161 } /* y = d135 */,
|
||||
{ 116, 9, 55, 176, 76, 96, 37, 61, 149 } /* y = d117 */,
|
||||
{ 115, 9, 28, 141, 161, 167, 21, 25, 193 } /* y = d153 */,
|
||||
{ 120, 12, 32, 145, 195, 142, 32, 38, 86 } /* y = d27 */,
|
||||
{ 120, 12, 32, 145, 195, 142, 32, 38, 86 } /* y = d207 */,
|
||||
{ 116, 12, 64, 120, 140, 125, 49, 115, 121 } /* y = d63 */,
|
||||
{ 102, 19, 66, 162, 182, 122, 35, 59, 128 } /* y = tm */
|
||||
};
|
||||
|
||||
static const vp9_prob default_if_y_probs[BLOCK_SIZE_GROUPS]
|
||||
[VP9_INTRA_MODES - 1] = {
|
||||
[INTRA_MODES - 1] = {
|
||||
{ 65, 32, 18, 144, 162, 194, 41, 51, 98 } /* block_size < 8x8 */,
|
||||
{ 132, 68, 18, 165, 217, 196, 45, 40, 78 } /* block_size < 16x16 */,
|
||||
{ 173, 80, 19, 176, 240, 193, 64, 35, 46 } /* block_size < 32x32 */,
|
||||
{ 221, 135, 38, 194, 248, 121, 96, 85, 29 } /* block_size >= 32x32 */
|
||||
};
|
||||
|
||||
static const vp9_prob default_if_uv_probs[VP9_INTRA_MODES]
|
||||
[VP9_INTRA_MODES - 1] = {
|
||||
static const vp9_prob default_if_uv_probs[INTRA_MODES]
|
||||
[INTRA_MODES - 1] = {
|
||||
{ 120, 7, 76, 176, 208, 126, 28, 54, 103 } /* y = dc */,
|
||||
{ 48, 12, 154, 155, 139, 90, 34, 117, 119 } /* y = v */,
|
||||
{ 67, 6, 25, 204, 243, 158, 13, 21, 96 } /* y = h */,
|
||||
@@ -45,7 +45,7 @@ static const vp9_prob default_if_uv_probs[VP9_INTRA_MODES]
|
||||
{ 83, 5, 42, 156, 111, 152, 26, 49, 152 } /* y = d135 */,
|
||||
{ 80, 5, 58, 178, 74, 83, 33, 62, 145 } /* y = d117 */,
|
||||
{ 86, 5, 32, 154, 192, 168, 14, 22, 163 } /* y = d153 */,
|
||||
{ 85, 5, 32, 156, 216, 148, 19, 29, 73 } /* y = d27 */,
|
||||
{ 85, 5, 32, 156, 216, 148, 19, 29, 73 } /* y = d207 */,
|
||||
{ 77, 7, 64, 116, 132, 122, 37, 126, 120 } /* y = d63 */,
|
||||
{ 101, 21, 107, 181, 192, 103, 19, 67, 125 } /* y = tm */
|
||||
};
|
||||
@@ -98,9 +98,9 @@ static const vp9_prob default_partition_probs[NUM_FRAME_TYPES]
|
||||
}
|
||||
};
|
||||
|
||||
const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
[VP9_INTRA_MODES]
|
||||
[VP9_INTRA_MODES - 1] = {
|
||||
const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES]
|
||||
[INTRA_MODES]
|
||||
[INTRA_MODES - 1] = {
|
||||
{ /* above = dc */
|
||||
{ 137, 30, 42, 148, 151, 207, 70, 52, 91 } /* left = dc */,
|
||||
{ 92, 45, 102, 136, 116, 180, 74, 90, 100 } /* left = v */,
|
||||
@@ -109,7 +109,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 72, 35, 36, 149, 68, 206, 68, 63, 105 } /* left = d135 */,
|
||||
{ 73, 31, 28, 138, 57, 124, 55, 122, 151 } /* left = d117 */,
|
||||
{ 67, 23, 21, 140, 126, 197, 40, 37, 171 } /* left = d153 */,
|
||||
{ 86, 27, 28, 128, 154, 212, 45, 43, 53 } /* left = d27 */,
|
||||
{ 86, 27, 28, 128, 154, 212, 45, 43, 53 } /* left = d207 */,
|
||||
{ 74, 32, 27, 107, 86, 160, 63, 134, 102 } /* left = d63 */,
|
||||
{ 59, 67, 44, 140, 161, 202, 78, 67, 119 } /* left = tm */
|
||||
}, { /* above = v */
|
||||
@@ -120,7 +120,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 46, 41, 76, 140, 63, 184, 69, 112, 57 } /* left = d135 */,
|
||||
{ 38, 32, 85, 140, 46, 112, 54, 151, 133 } /* left = d117 */,
|
||||
{ 39, 27, 61, 131, 110, 175, 44, 75, 136 } /* left = d153 */,
|
||||
{ 52, 30, 74, 113, 130, 175, 51, 64, 58 } /* left = d27 */,
|
||||
{ 52, 30, 74, 113, 130, 175, 51, 64, 58 } /* left = d207 */,
|
||||
{ 47, 35, 80, 100, 74, 143, 64, 163, 74 } /* left = d63 */,
|
||||
{ 36, 61, 116, 114, 128, 162, 80, 125, 82 } /* left = tm */
|
||||
}, { /* above = h */
|
||||
@@ -131,7 +131,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 58, 50, 25, 139, 115, 232, 39, 52, 118 } /* left = d135 */,
|
||||
{ 50, 35, 33, 153, 104, 162, 64, 59, 131 } /* left = d117 */,
|
||||
{ 44, 24, 16, 150, 177, 202, 33, 19, 156 } /* left = d153 */,
|
||||
{ 55, 27, 12, 153, 203, 218, 26, 27, 49 } /* left = d27 */,
|
||||
{ 55, 27, 12, 153, 203, 218, 26, 27, 49 } /* left = d207 */,
|
||||
{ 53, 49, 21, 110, 116, 168, 59, 80, 76 } /* left = d63 */,
|
||||
{ 38, 72, 19, 168, 203, 212, 50, 50, 107 } /* left = tm */
|
||||
}, { /* above = d45 */
|
||||
@@ -142,7 +142,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 60, 32, 33, 112, 71, 220, 64, 89, 104 } /* left = d135 */,
|
||||
{ 53, 26, 34, 130, 56, 149, 84, 120, 103 } /* left = d117 */,
|
||||
{ 53, 21, 23, 133, 109, 210, 56, 77, 172 } /* left = d153 */,
|
||||
{ 77, 19, 29, 112, 142, 228, 55, 66, 36 } /* left = d27 */,
|
||||
{ 77, 19, 29, 112, 142, 228, 55, 66, 36 } /* left = d207 */,
|
||||
{ 61, 29, 29, 93, 97, 165, 83, 175, 162 } /* left = d63 */,
|
||||
{ 47, 47, 43, 114, 137, 181, 100, 99, 95 } /* left = tm */
|
||||
}, { /* above = d135 */
|
||||
@@ -153,7 +153,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 52, 31, 22, 158, 40, 209, 58, 62, 89 } /* left = d135 */,
|
||||
{ 44, 31, 29, 147, 46, 158, 56, 102, 198 } /* left = d117 */,
|
||||
{ 35, 19, 12, 135, 87, 209, 41, 45, 167 } /* left = d153 */,
|
||||
{ 55, 25, 21, 118, 95, 215, 38, 39, 66 } /* left = d27 */,
|
||||
{ 55, 25, 21, 118, 95, 215, 38, 39, 66 } /* left = d207 */,
|
||||
{ 51, 38, 25, 113, 58, 164, 70, 93, 97 } /* left = d63 */,
|
||||
{ 47, 54, 34, 146, 108, 203, 72, 103, 151 } /* left = tm */
|
||||
}, { /* above = d117 */
|
||||
@@ -164,7 +164,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 40, 26, 35, 154, 40, 185, 51, 97, 123 } /* left = d135 */,
|
||||
{ 35, 19, 34, 179, 19, 97, 48, 129, 124 } /* left = d117 */,
|
||||
{ 36, 20, 26, 136, 62, 164, 33, 77, 154 } /* left = d153 */,
|
||||
{ 45, 18, 32, 130, 90, 157, 40, 79, 91 } /* left = d27 */,
|
||||
{ 45, 18, 32, 130, 90, 157, 40, 79, 91 } /* left = d207 */,
|
||||
{ 45, 26, 28, 129, 45, 129, 49, 147, 123 } /* left = d63 */,
|
||||
{ 38, 44, 51, 136, 74, 162, 57, 97, 121 } /* left = tm */
|
||||
}, { /* above = d153 */
|
||||
@@ -175,10 +175,10 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 47, 29, 17, 153, 64, 220, 59, 51, 114 } /* left = d135 */,
|
||||
{ 46, 16, 24, 136, 76, 147, 41, 64, 172 } /* left = d117 */,
|
||||
{ 34, 17, 11, 108, 152, 187, 13, 15, 209 } /* left = d153 */,
|
||||
{ 51, 24, 14, 115, 133, 209, 32, 26, 104 } /* left = d27 */,
|
||||
{ 51, 24, 14, 115, 133, 209, 32, 26, 104 } /* left = d207 */,
|
||||
{ 55, 30, 18, 122, 79, 179, 44, 88, 116 } /* left = d63 */,
|
||||
{ 37, 49, 25, 129, 168, 164, 41, 54, 148 } /* left = tm */
|
||||
}, { /* above = d27 */
|
||||
}, { /* above = d207 */
|
||||
{ 82, 22, 32, 127, 143, 213, 39, 41, 70 } /* left = dc */,
|
||||
{ 62, 44, 61, 123, 105, 189, 48, 57, 64 } /* left = v */,
|
||||
{ 47, 25, 17, 175, 222, 220, 24, 30, 86 } /* left = h */,
|
||||
@@ -186,7 +186,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 57, 39, 23, 151, 68, 216, 55, 63, 58 } /* left = d135 */,
|
||||
{ 49, 30, 35, 141, 70, 168, 82, 40, 115 } /* left = d117 */,
|
||||
{ 51, 25, 15, 136, 129, 202, 38, 35, 139 } /* left = d153 */,
|
||||
{ 68, 26, 16, 111, 141, 215, 29, 28, 28 } /* left = d27 */,
|
||||
{ 68, 26, 16, 111, 141, 215, 29, 28, 28 } /* left = d207 */,
|
||||
{ 59, 39, 19, 114, 75, 180, 77, 104, 42 } /* left = d63 */,
|
||||
{ 40, 61, 26, 126, 152, 206, 61, 59, 93 } /* left = tm */
|
||||
}, { /* above = d63 */
|
||||
@@ -197,7 +197,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 48, 31, 27, 114, 63, 183, 82, 116, 56 } /* left = d135 */,
|
||||
{ 43, 28, 37, 121, 63, 123, 61, 192, 169 } /* left = d117 */,
|
||||
{ 42, 17, 24, 109, 97, 177, 56, 76, 122 } /* left = d153 */,
|
||||
{ 58, 18, 28, 105, 139, 182, 70, 92, 63 } /* left = d27 */,
|
||||
{ 58, 18, 28, 105, 139, 182, 70, 92, 63 } /* left = d207 */,
|
||||
{ 46, 23, 32, 74, 86, 150, 67, 183, 88 } /* left = d63 */,
|
||||
{ 36, 38, 48, 92, 122, 165, 88, 137, 91 } /* left = tm */
|
||||
}, { /* above = tm */
|
||||
@@ -208,14 +208,14 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 49, 50, 35, 144, 95, 205, 63, 78, 59 } /* left = d135 */,
|
||||
{ 41, 53, 52, 148, 71, 142, 65, 128, 51 } /* left = d117 */,
|
||||
{ 40, 36, 28, 143, 143, 202, 40, 55, 137 } /* left = d153 */,
|
||||
{ 52, 34, 29, 129, 183, 227, 42, 35, 43 } /* left = d27 */,
|
||||
{ 52, 34, 29, 129, 183, 227, 42, 35, 43 } /* left = d207 */,
|
||||
{ 42, 44, 44, 104, 105, 164, 64, 130, 80 } /* left = d63 */,
|
||||
{ 43, 81, 53, 140, 169, 204, 68, 84, 72 } /* left = tm */
|
||||
}
|
||||
};
|
||||
|
||||
static const vp9_prob default_inter_mode_probs[INTER_MODE_CONTEXTS]
|
||||
[VP9_INTER_MODES - 1] = {
|
||||
[INTER_MODES - 1] = {
|
||||
{2, 173, 34}, // 0 = both zero mv
|
||||
{7, 145, 85}, // 1 = one zero mv + one a predicted mv
|
||||
{7, 166, 63}, // 2 = two predicted mvs
|
||||
@@ -226,7 +226,7 @@ static const vp9_prob default_inter_mode_probs[INTER_MODE_CONTEXTS]
|
||||
};
|
||||
|
||||
/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
|
||||
const vp9_tree_index vp9_intra_mode_tree[VP9_INTRA_MODES * 2 - 2] = {
|
||||
const vp9_tree_index vp9_intra_mode_tree[INTRA_MODES * 2 - 2] = {
|
||||
-DC_PRED, 2, /* 0 = DC_NODE */
|
||||
-TM_PRED, 4, /* 1 = TM_NODE */
|
||||
-V_PRED, 6, /* 2 = V_NODE */
|
||||
@@ -235,7 +235,7 @@ const vp9_tree_index vp9_intra_mode_tree[VP9_INTRA_MODES * 2 - 2] = {
|
||||
-D135_PRED, -D117_PRED, /* 5 = D135_NODE */
|
||||
-D45_PRED, 14, /* 6 = D45_NODE */
|
||||
-D63_PRED, 16, /* 7 = D63_NODE */
|
||||
-D153_PRED, -D27_PRED /* 8 = D153_NODE */
|
||||
-D153_PRED, -D207_PRED /* 8 = D153_NODE */
|
||||
};
|
||||
|
||||
const vp9_tree_index vp9_inter_mode_tree[6] = {
|
||||
@@ -250,8 +250,8 @@ const vp9_tree_index vp9_partition_tree[6] = {
|
||||
-PARTITION_VERT, -PARTITION_SPLIT
|
||||
};
|
||||
|
||||
struct vp9_token vp9_intra_mode_encodings[VP9_INTRA_MODES];
|
||||
struct vp9_token vp9_inter_mode_encodings[VP9_INTER_MODES];
|
||||
struct vp9_token vp9_intra_mode_encodings[INTRA_MODES];
|
||||
struct vp9_token vp9_inter_mode_encodings[INTER_MODES];
|
||||
|
||||
struct vp9_token vp9_partition_encodings[PARTITION_TYPES];
|
||||
|
||||
@@ -317,8 +317,8 @@ static const vp9_prob default_mbskip_probs[MBSKIP_CONTEXTS] = {
|
||||
192, 128, 64
|
||||
};
|
||||
|
||||
static const vp9_prob default_switchable_interp_prob[VP9_SWITCHABLE_FILTERS+1]
|
||||
[VP9_SWITCHABLE_FILTERS-1] = {
|
||||
static const vp9_prob default_switchable_interp_prob[SWITCHABLE_FILTERS+1]
|
||||
[SWITCHABLE_FILTERS-1] = {
|
||||
{ 235, 162, },
|
||||
{ 36, 255, },
|
||||
{ 34, 3, },
|
||||
@@ -338,11 +338,11 @@ void vp9_init_mbmode_probs(VP9_COMMON *cm) {
|
||||
vp9_copy(cm->fc.mbskip_probs, default_mbskip_probs);
|
||||
}
|
||||
|
||||
const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = {
|
||||
const vp9_tree_index vp9_switchable_interp_tree[SWITCHABLE_FILTERS*2-2] = {
|
||||
-EIGHTTAP, 2,
|
||||
-EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP
|
||||
};
|
||||
struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
|
||||
struct vp9_token vp9_switchable_interp_encodings[SWITCHABLE_FILTERS];
|
||||
|
||||
void vp9_entropy_mode_init() {
|
||||
vp9_tokens_from_tree(vp9_intra_mode_encodings, vp9_intra_mode_tree);
|
||||
@@ -400,17 +400,17 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
|
||||
counts->single_ref[i][j]);
|
||||
|
||||
for (i = 0; i < INTER_MODE_CONTEXTS; i++)
|
||||
update_mode_probs(VP9_INTER_MODES, vp9_inter_mode_tree,
|
||||
update_mode_probs(INTER_MODES, vp9_inter_mode_tree,
|
||||
counts->inter_mode[i], pre_fc->inter_mode_probs[i],
|
||||
fc->inter_mode_probs[i], NEARESTMV);
|
||||
|
||||
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
|
||||
update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree,
|
||||
update_mode_probs(INTRA_MODES, vp9_intra_mode_tree,
|
||||
counts->y_mode[i], pre_fc->y_mode_prob[i],
|
||||
fc->y_mode_prob[i], 0);
|
||||
|
||||
for (i = 0; i < VP9_INTRA_MODES; ++i)
|
||||
update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree,
|
||||
for (i = 0; i < INTRA_MODES; ++i)
|
||||
update_mode_probs(INTRA_MODES, vp9_intra_mode_tree,
|
||||
counts->uv_mode[i], pre_fc->uv_mode_prob[i],
|
||||
fc->uv_mode_prob[i], 0);
|
||||
|
||||
@@ -421,8 +421,8 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
|
||||
fc->partition_prob[INTER_FRAME][i], 0);
|
||||
|
||||
if (cm->mcomp_filter_type == SWITCHABLE) {
|
||||
for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
|
||||
update_mode_probs(VP9_SWITCHABLE_FILTERS, vp9_switchable_interp_tree,
|
||||
for (i = 0; i <= SWITCHABLE_FILTERS; i++)
|
||||
update_mode_probs(SWITCHABLE_FILTERS, vp9_switchable_interp_tree,
|
||||
counts->switchable_interp[i],
|
||||
pre_fc->switchable_interp_prob[i],
|
||||
fc->switchable_interp_prob[i], 0);
|
||||
@@ -440,14 +440,12 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
|
||||
fc->tx_probs.p8x8[i][j] = update_ct2(pre_fc->tx_probs.p8x8[i][j],
|
||||
branch_ct_8x8p[j]);
|
||||
|
||||
tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i],
|
||||
branch_ct_16x16p);
|
||||
tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], branch_ct_16x16p);
|
||||
for (j = 0; j < TX_SIZES - 2; ++j)
|
||||
fc->tx_probs.p16x16[i][j] = update_ct2(pre_fc->tx_probs.p16x16[i][j],
|
||||
branch_ct_16x16p[j]);
|
||||
|
||||
tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i],
|
||||
branch_ct_32x32p);
|
||||
tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], branch_ct_32x32p);
|
||||
for (j = 0; j < TX_SIZES - 1; ++j)
|
||||
fc->tx_probs.p32x32[i][j] = update_ct2(pre_fc->tx_probs.p32x32[i][j],
|
||||
branch_ct_32x32p[j]);
|
||||
@@ -472,14 +470,14 @@ static void set_default_lf_deltas(struct loopfilter *lf) {
|
||||
lf->mode_deltas[1] = 0;
|
||||
}
|
||||
|
||||
void vp9_setup_past_independence(VP9_COMMON *cm, MACROBLOCKD *xd) {
|
||||
void vp9_setup_past_independence(VP9_COMMON *cm) {
|
||||
// Reset the segment feature data to the default stats:
|
||||
// Features disabled, 0, with delta coding (Default state).
|
||||
struct loopfilter *const lf = &xd->lf;
|
||||
struct loopfilter *const lf = &cm->lf;
|
||||
|
||||
int i;
|
||||
vp9_clearall_segfeatures(&xd->seg);
|
||||
xd->seg.abs_delta = SEGMENT_DELTADATA;
|
||||
vp9_clearall_segfeatures(&cm->seg);
|
||||
cm->seg.abs_delta = SEGMENT_DELTADATA;
|
||||
if (cm->last_frame_seg_map)
|
||||
vpx_memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
|
||||
|
||||
@@ -512,10 +510,7 @@ void vp9_setup_past_independence(VP9_COMMON *cm, MACROBLOCKD *xd) {
|
||||
cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO));
|
||||
|
||||
vp9_update_mode_info_border(cm, cm->mip);
|
||||
vp9_update_mode_info_in_image(cm, cm->mi);
|
||||
|
||||
vp9_update_mode_info_border(cm, cm->prev_mip);
|
||||
vp9_update_mode_info_in_image(cm, cm->prev_mi);
|
||||
|
||||
vp9_zero(cm->ref_frame_sign_bias);
|
||||
|
||||
|
||||
@@ -16,8 +16,8 @@
|
||||
|
||||
#define SUBMVREF_COUNT 5
|
||||
#define TX_SIZE_CONTEXTS 2
|
||||
#define VP9_MODE_UPDATE_PROB 252
|
||||
#define VP9_SWITCHABLE_FILTERS 3 // number of switchable filters
|
||||
#define MODE_UPDATE_PROB 252
|
||||
#define SWITCHABLE_FILTERS 3 // number of switchable filters
|
||||
|
||||
// #define MODE_STATS
|
||||
|
||||
@@ -35,32 +35,32 @@ struct tx_counts {
|
||||
unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2];
|
||||
};
|
||||
|
||||
extern const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1];
|
||||
extern const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES]
|
||||
[VP9_INTRA_MODES - 1];
|
||||
extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
|
||||
extern const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES]
|
||||
[INTRA_MODES - 1];
|
||||
|
||||
extern const vp9_tree_index vp9_intra_mode_tree[];
|
||||
extern const vp9_tree_index vp9_inter_mode_tree[];
|
||||
|
||||
extern struct vp9_token vp9_intra_mode_encodings[VP9_INTRA_MODES];
|
||||
extern struct vp9_token vp9_inter_mode_encodings[VP9_INTER_MODES];
|
||||
extern struct vp9_token vp9_intra_mode_encodings[INTRA_MODES];
|
||||
extern struct vp9_token vp9_inter_mode_encodings[INTER_MODES];
|
||||
|
||||
// probability models for partition information
|
||||
extern const vp9_tree_index vp9_partition_tree[];
|
||||
extern struct vp9_token vp9_partition_encodings[PARTITION_TYPES];
|
||||
|
||||
extern const vp9_tree_index vp9_switchable_interp_tree
|
||||
[2 * (VP9_SWITCHABLE_FILTERS - 1)];
|
||||
[2 * (SWITCHABLE_FILTERS - 1)];
|
||||
|
||||
extern struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
|
||||
extern struct vp9_token vp9_switchable_interp_encodings[SWITCHABLE_FILTERS];
|
||||
|
||||
void vp9_entropy_mode_init();
|
||||
|
||||
void vp9_setup_past_independence(struct VP9Common *cm, MACROBLOCKD *xd);
|
||||
void vp9_setup_past_independence(struct VP9Common *cm);
|
||||
|
||||
void vp9_init_mbmode_probs(struct VP9Common *x);
|
||||
void vp9_init_mbmode_probs(struct VP9Common *cm);
|
||||
|
||||
void vp9_adapt_mode_probs(struct VP9Common *);
|
||||
void vp9_adapt_mode_probs(struct VP9Common *cm);
|
||||
|
||||
void tx_counts_to_branch_counts_32x32(unsigned int *tx_count_32x32p,
|
||||
unsigned int (*ct_32x32p)[2]);
|
||||
|
||||
@@ -79,20 +79,59 @@ static const nmv_context default_nmv_context = {
|
||||
|
||||
#define mv_class_base(c) ((c) ? (CLASS0_SIZE << (c + 2)) : 0)
|
||||
|
||||
static const uint8_t log_in_base_2[] = {
|
||||
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10
|
||||
};
|
||||
|
||||
MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) {
|
||||
MV_CLASS_TYPE c = MV_CLASS_0;
|
||||
if (z < CLASS0_SIZE * 8) c = MV_CLASS_0;
|
||||
else if (z < CLASS0_SIZE * 16) c = MV_CLASS_1;
|
||||
else if (z < CLASS0_SIZE * 32) c = MV_CLASS_2;
|
||||
else if (z < CLASS0_SIZE * 64) c = MV_CLASS_3;
|
||||
else if (z < CLASS0_SIZE * 128) c = MV_CLASS_4;
|
||||
else if (z < CLASS0_SIZE * 256) c = MV_CLASS_5;
|
||||
else if (z < CLASS0_SIZE * 512) c = MV_CLASS_6;
|
||||
else if (z < CLASS0_SIZE * 1024) c = MV_CLASS_7;
|
||||
else if (z < CLASS0_SIZE * 2048) c = MV_CLASS_8;
|
||||
else if (z < CLASS0_SIZE * 4096) c = MV_CLASS_9;
|
||||
else if (z < CLASS0_SIZE * 8192) c = MV_CLASS_10;
|
||||
else assert(0);
|
||||
if (z >= CLASS0_SIZE * 4096)
|
||||
c = MV_CLASS_10;
|
||||
else
|
||||
c = log_in_base_2[z >> 3];
|
||||
|
||||
if (offset)
|
||||
*offset = z - mv_class_base(c);
|
||||
return c;
|
||||
@@ -110,8 +149,6 @@ int vp9_get_mv_mag(MV_CLASS_TYPE c, int offset) {
|
||||
static void inc_mv_component(int v, nmv_component_counts *comp_counts,
|
||||
int incr, int usehp) {
|
||||
int s, z, c, o, d, e, f;
|
||||
if (!incr)
|
||||
return;
|
||||
assert (v != 0); /* should not be zero */
|
||||
s = v < 0;
|
||||
comp_counts->sign[s] += incr;
|
||||
@@ -123,61 +160,39 @@ static void inc_mv_component(int v, nmv_component_counts *comp_counts,
|
||||
d = (o >> 3); /* int mv data */
|
||||
f = (o >> 1) & 3; /* fractional pel mv data */
|
||||
e = (o & 1); /* high precision mv data */
|
||||
|
||||
if (c == MV_CLASS_0) {
|
||||
comp_counts->class0[d] += incr;
|
||||
comp_counts->class0_fp[d][f] += incr;
|
||||
comp_counts->class0_hp[e] += usehp * incr;
|
||||
} else {
|
||||
int i;
|
||||
int b = c + CLASS0_BITS - 1; // number of bits
|
||||
for (i = 0; i < b; ++i)
|
||||
comp_counts->bits[i][((d >> i) & 1)] += incr;
|
||||
}
|
||||
|
||||
/* Code the fractional pel bits */
|
||||
if (c == MV_CLASS_0) {
|
||||
comp_counts->class0_fp[d][f] += incr;
|
||||
} else {
|
||||
comp_counts->fp[f] += incr;
|
||||
}
|
||||
|
||||
/* Code the high precision bit */
|
||||
if (usehp) {
|
||||
if (c == MV_CLASS_0) {
|
||||
comp_counts->class0_hp[e] += incr;
|
||||
} else {
|
||||
comp_counts->hp[e] += incr;
|
||||
}
|
||||
comp_counts->hp[e] += usehp * incr;
|
||||
}
|
||||
}
|
||||
|
||||
static void counts_to_context(nmv_component_counts *mvcomp, int usehp) {
|
||||
int v;
|
||||
vpx_memset(mvcomp->sign, 0, sizeof(nmv_component_counts) - sizeof(mvcomp->mvcount));
|
||||
for (v = 1; v <= MV_MAX; v++) {
|
||||
inc_mv_component(-v, mvcomp, mvcomp->mvcount[MV_MAX - v], usehp);
|
||||
inc_mv_component( v, mvcomp, mvcomp->mvcount[MV_MAX + v], usehp);
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_inc_mv(const MV *mv, nmv_context_counts *counts) {
|
||||
const MV_JOINT_TYPE j = vp9_get_mv_joint(mv);
|
||||
++counts->joints[j];
|
||||
|
||||
if (mv_joint_vertical(j))
|
||||
++counts->comps[0].mvcount[MV_MAX + mv->row];
|
||||
if (mv_joint_vertical(j)) {
|
||||
inc_mv_component(mv->row, &counts->comps[0], 1, 1);
|
||||
}
|
||||
|
||||
if (mv_joint_horizontal(j))
|
||||
++counts->comps[1].mvcount[MV_MAX + mv->col];
|
||||
if (mv_joint_horizontal(j)) {
|
||||
inc_mv_component(mv->col, &counts->comps[1], 1, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static vp9_prob adapt_prob(vp9_prob prep, const unsigned int ct[2]) {
|
||||
return merge_probs2(prep, ct, MV_COUNT_SAT, MV_MAX_UPDATE_FACTOR);
|
||||
}
|
||||
|
||||
void vp9_counts_process(nmv_context_counts *nmv_count, int usehp) {
|
||||
counts_to_context(&nmv_count->comps[0], usehp);
|
||||
counts_to_context(&nmv_count->comps[1], usehp);
|
||||
}
|
||||
|
||||
static unsigned int adapt_probs(unsigned int i,
|
||||
vp9_tree tree,
|
||||
vp9_prob this_probs[],
|
||||
@@ -207,8 +222,6 @@ void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) {
|
||||
nmv_context *pre_ctx = &pre_fc->nmvc;
|
||||
nmv_context_counts *cts = &cm->counts.mv;
|
||||
|
||||
vp9_counts_process(cts, allow_hp);
|
||||
|
||||
adapt_probs(0, vp9_mv_joint_tree, ctx->joints, pre_ctx->joints, cts->joints);
|
||||
|
||||
for (i = 0; i < 2; ++i) {
|
||||
|
||||
@@ -24,7 +24,7 @@ void vp9_init_mv_probs(struct VP9Common *cm);
|
||||
void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp);
|
||||
int vp9_use_mv_hp(const MV *ref);
|
||||
|
||||
#define VP9_NMV_UPDATE_PROB 252
|
||||
#define NMV_UPDATE_PROB 252
|
||||
|
||||
/* Symbols for coding which components are zero jointly */
|
||||
#define MV_JOINTS 4
|
||||
@@ -126,6 +126,4 @@ typedef struct {
|
||||
|
||||
void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx);
|
||||
|
||||
void vp9_counts_process(nmv_context_counts *NMVcount, int usehp);
|
||||
|
||||
#endif // VP9_COMMON_VP9_ENTROPYMV_H_
|
||||
|
||||
@@ -13,15 +13,16 @@
|
||||
|
||||
#include "./vpx_config.h"
|
||||
|
||||
#define LOG2_MI_SIZE 3
|
||||
#define LOG2_MI_BLOCK_SIZE (6 - LOG2_MI_SIZE) // 64 = 2^6
|
||||
#define MI_SIZE_LOG2 3
|
||||
#define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2) // 64 = 2^6
|
||||
|
||||
#define MI_SIZE (1 << LOG2_MI_SIZE) // pixels per mi-unit
|
||||
#define MI_BLOCK_SIZE (1 << LOG2_MI_BLOCK_SIZE) // mi-units per max block
|
||||
#define MI_SIZE (1 << MI_SIZE_LOG2) // pixels per mi-unit
|
||||
#define MI_BLOCK_SIZE (1 << MI_BLOCK_SIZE_LOG2) // mi-units per max block
|
||||
|
||||
#define MI_MASK (MI_BLOCK_SIZE - 1)
|
||||
|
||||
typedef enum BLOCK_SIZE_TYPE {
|
||||
|
||||
typedef enum BLOCK_SIZE {
|
||||
BLOCK_4X4,
|
||||
BLOCK_4X8,
|
||||
BLOCK_8X4,
|
||||
@@ -35,15 +36,17 @@ typedef enum BLOCK_SIZE_TYPE {
|
||||
BLOCK_32X64,
|
||||
BLOCK_64X32,
|
||||
BLOCK_64X64,
|
||||
BLOCK_SIZE_TYPES
|
||||
} BLOCK_SIZE_TYPE;
|
||||
BLOCK_SIZES,
|
||||
BLOCK_INVALID = BLOCK_SIZES
|
||||
} BLOCK_SIZE;
|
||||
|
||||
typedef enum PARTITION_TYPE {
|
||||
PARTITION_NONE,
|
||||
PARTITION_HORZ,
|
||||
PARTITION_VERT,
|
||||
PARTITION_SPLIT,
|
||||
PARTITION_TYPES, PARTITION_INVALID = PARTITION_TYPES
|
||||
PARTITION_TYPES,
|
||||
PARTITION_INVALID = PARTITION_TYPES
|
||||
} PARTITION_TYPE;
|
||||
|
||||
#define PARTITION_PLOFFSET 4 // number of probability models per block size
|
||||
|
||||
@@ -57,15 +57,23 @@ static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
|
||||
|
||||
void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst) {
|
||||
const int et_y = dst->border;
|
||||
const int el_y = dst->border;
|
||||
const int eb_y = dst->border + dst->y_height - src->y_height;
|
||||
const int er_y = dst->border + dst->y_width - src->y_width;
|
||||
|
||||
const int et_uv = dst->border >> (dst->uv_height != dst->y_height);
|
||||
const int el_uv = dst->border >> (dst->uv_width != dst->y_width);
|
||||
const int eb_uv = et_uv + dst->uv_height - src->uv_height;
|
||||
const int er_uv = el_uv + dst->uv_width - src->uv_width;
|
||||
// Extend src frame in buffer
|
||||
// Altref filtering assumes 16 pixel extension
|
||||
const int et_y = 16;
|
||||
const int el_y = 16;
|
||||
// Motion estimation may use src block variance with the block size up
|
||||
// to 64x64, so the right and bottom need to be extended to 64 mulitple
|
||||
// or up to 16, whichever is greater.
|
||||
const int eb_y = MAX(ALIGN_POWER_OF_TWO(src->y_width, 6) - src->y_width,
|
||||
16);
|
||||
const int er_y = MAX(ALIGN_POWER_OF_TWO(src->y_height, 6) - src->y_height,
|
||||
16);
|
||||
const int uv_width_subsampling = (src->uv_width != src->y_width);
|
||||
const int uv_height_subsampling = (src->uv_height != src->y_height);
|
||||
const int et_uv = et_y >> uv_height_subsampling;
|
||||
const int el_uv = el_y >> uv_width_subsampling;
|
||||
const int eb_uv = eb_y >> uv_height_subsampling;
|
||||
const int er_uv = er_y >> uv_width_subsampling;
|
||||
|
||||
#if CONFIG_ALPHA
|
||||
const int et_a = dst->border >> (dst->alpha_height != dst->y_height);
|
||||
|
||||
@@ -8,14 +8,12 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "vp9/common/vp9_filter.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
|
||||
DECLARE_ALIGNED(256, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][8]) = {
|
||||
#include "vp9/common/vp9_filter.h"
|
||||
|
||||
DECLARE_ALIGNED(256, const int16_t,
|
||||
vp9_bilinear_filters[SUBPEL_SHIFTS][SUBPEL_TAPS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0 },
|
||||
{ 0, 0, 0, 120, 8, 0, 0, 0 },
|
||||
{ 0, 0, 0, 112, 16, 0, 0, 0 },
|
||||
@@ -34,8 +32,9 @@ DECLARE_ALIGNED(256, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][8]) = {
|
||||
{ 0, 0, 0, 8, 120, 0, 0, 0 }
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = {
|
||||
/* Lagrangian interpolation filter */
|
||||
// Lagrangian interpolation filter
|
||||
DECLARE_ALIGNED(256, const int16_t,
|
||||
vp9_sub_pel_filters_8[SUBPEL_SHIFTS][SUBPEL_TAPS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0},
|
||||
{ 0, 1, -5, 126, 8, -3, 1, 0},
|
||||
{ -1, 3, -10, 122, 18, -6, 2, 0},
|
||||
@@ -54,9 +53,9 @@ DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = {
|
||||
{ 0, 1, -3, 8, 126, -5, 1, 0}
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8])
|
||||
= {
|
||||
/* dct based filter */
|
||||
// DCT based filter
|
||||
DECLARE_ALIGNED(256, const int16_t,
|
||||
vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][SUBPEL_TAPS]) = {
|
||||
{0, 0, 0, 128, 0, 0, 0, 0},
|
||||
{-1, 3, -7, 127, 8, -3, 1, 0},
|
||||
{-2, 5, -13, 125, 17, -6, 3, -1},
|
||||
@@ -75,9 +74,9 @@ DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8])
|
||||
{0, 1, -3, 8, 127, -7, 3, -1}
|
||||
};
|
||||
|
||||
// freqmultiplier = 0.5
|
||||
DECLARE_ALIGNED(256, const int16_t,
|
||||
vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8]) = {
|
||||
/* freqmultiplier = 0.5 */
|
||||
vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][SUBPEL_TAPS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0},
|
||||
{-3, -1, 32, 64, 38, 1, -3, 0},
|
||||
{-2, -2, 29, 63, 41, 2, -3, 0},
|
||||
|
||||
@@ -12,26 +12,22 @@
|
||||
#define VP9_COMMON_VP9_FILTER_H_
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP9_FILTER_WEIGHT 128
|
||||
#define VP9_FILTER_SHIFT 7
|
||||
#define SUBPEL_BITS 4
|
||||
#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
|
||||
#define SUBPEL_SHIFTS (1 << SUBPEL_BITS)
|
||||
#define SUBPEL_TAPS 8
|
||||
|
||||
#define SUBPEL_SHIFTS 16
|
||||
|
||||
extern const int16_t vp9_bilinear_filters[SUBPEL_SHIFTS][8];
|
||||
extern const int16_t vp9_sub_pel_filters_6[SUBPEL_SHIFTS][8];
|
||||
extern const int16_t vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8];
|
||||
extern const int16_t vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8];
|
||||
extern const int16_t vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8];
|
||||
extern const int16_t vp9_bilinear_filters[SUBPEL_SHIFTS][SUBPEL_TAPS];
|
||||
extern const int16_t vp9_sub_pel_filters_6[SUBPEL_SHIFTS][SUBPEL_TAPS];
|
||||
extern const int16_t vp9_sub_pel_filters_8[SUBPEL_SHIFTS][SUBPEL_TAPS];
|
||||
extern const int16_t vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][SUBPEL_TAPS];
|
||||
extern const int16_t vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][SUBPEL_TAPS];
|
||||
|
||||
// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
|
||||
// filter kernel as a 2 tap filter.
|
||||
#define BF_LENGTH (sizeof(vp9_bilinear_filters[0]) / \
|
||||
sizeof(vp9_bilinear_filters[0][0]))
|
||||
#define BF_OFFSET (BF_LENGTH / 2 - 1)
|
||||
#define VP9_BILINEAR_FILTERS_2TAP(x) (vp9_bilinear_filters[x] + BF_OFFSET)
|
||||
#define BILINEAR_FILTERS_2TAP(x) \
|
||||
(vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
|
||||
|
||||
#endif // VP9_COMMON_VP9_FILTER_H_
|
||||
|
||||
@@ -8,11 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include "vp9/common/vp9_findnearmv.h"
|
||||
#include "vp9/common/vp9_mvref_common.h"
|
||||
#include "vp9/common/vp9_sadmxn.h"
|
||||
|
||||
static void lower_mv_precision(MV *mv, int allow_hp) {
|
||||
const int use_hp = allow_hp && vp9_use_mv_hp(mv);
|
||||
@@ -46,17 +43,14 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
|
||||
int mi_row, int mi_col) {
|
||||
int_mv dst_list[MAX_MV_REF_CANDIDATES];
|
||||
int_mv mv_list[MAX_MV_REF_CANDIDATES];
|
||||
MODE_INFO *mi = xd->mode_info_context;
|
||||
MB_MODE_INFO *const mbmi = &mi->mbmi;
|
||||
MODE_INFO *const mi = xd->this_mi;
|
||||
|
||||
assert(ref_idx == 0 || ref_idx == 1);
|
||||
assert(MAX_MV_REF_CANDIDATES == 2); // makes code here slightly easier
|
||||
|
||||
vp9_find_mv_refs_idx(cm, xd, xd->mode_info_context,
|
||||
xd->prev_mode_info_context,
|
||||
mbmi->ref_frame[ref_idx],
|
||||
mv_list, cm->ref_frame_sign_bias, block_idx,
|
||||
mi_row, mi_col);
|
||||
vp9_find_mv_refs_idx(cm, xd, mi, xd->last_mi,
|
||||
mi->mbmi.ref_frame[ref_idx],
|
||||
mv_list, block_idx, mi_row, mi_col);
|
||||
|
||||
dst_list[1].as_int = 0;
|
||||
if (block_idx == 0) {
|
||||
|
||||
@@ -36,48 +36,57 @@ static void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
|
||||
xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
|
||||
}
|
||||
|
||||
void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *pc,
|
||||
void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm,
|
||||
MACROBLOCKD *xd,
|
||||
int_mv *dst_nearest,
|
||||
int_mv *dst_near,
|
||||
int block_idx, int ref_idx,
|
||||
int mi_row, int mi_col);
|
||||
|
||||
static MB_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) {
|
||||
static MB_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb,
|
||||
const MODE_INFO *left_mb, int b) {
|
||||
// FIXME(rbultje, jingning): temporary hack because jenkins doesn't
|
||||
// understand this condition. This will go away soon.
|
||||
const MODE_INFO *mi = cur_mb;
|
||||
|
||||
if (b == 0 || b == 2) {
|
||||
/* On L edge, get from MB to left of us */
|
||||
--cur_mb;
|
||||
|
||||
if (is_inter_block(&cur_mb->mbmi)) {
|
||||
mi = left_mb;
|
||||
if (!mi)
|
||||
return DC_PRED;
|
||||
} else if (cur_mb->mbmi.sb_type < BLOCK_8X8) {
|
||||
return (cur_mb->bmi + 1 + b)->as_mode;
|
||||
|
||||
if (mi->mbmi.ref_frame[0] != INTRA_FRAME) {
|
||||
return DC_PRED;
|
||||
} else if (mi->mbmi.sb_type < BLOCK_8X8) {
|
||||
return ((mi->bmi + 1 + b)->as_mode);
|
||||
} else {
|
||||
return cur_mb->mbmi.mode;
|
||||
return mi->mbmi.mode;
|
||||
}
|
||||
}
|
||||
assert(b == 1 || b == 3);
|
||||
return (cur_mb->bmi + b - 1)->as_mode;
|
||||
return (mi->bmi + b - 1)->as_mode;
|
||||
}
|
||||
|
||||
static MB_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb,
|
||||
int b, int mi_stride) {
|
||||
const MODE_INFO *above_mb, int b) {
|
||||
const MODE_INFO *mi = cur_mb;
|
||||
|
||||
if (!(b >> 1)) {
|
||||
/* On top edge, get from MB above us */
|
||||
cur_mb -= mi_stride;
|
||||
|
||||
if (is_inter_block(&cur_mb->mbmi)) {
|
||||
mi = above_mb;
|
||||
if (!mi)
|
||||
return DC_PRED;
|
||||
} else if (cur_mb->mbmi.sb_type < BLOCK_8X8) {
|
||||
return (cur_mb->bmi + 2 + b)->as_mode;
|
||||
|
||||
if (mi->mbmi.ref_frame[0] != INTRA_FRAME) {
|
||||
return DC_PRED;
|
||||
} else if (mi->mbmi.sb_type < BLOCK_8X8) {
|
||||
return ((mi->bmi + 2 + b)->as_mode);
|
||||
} else {
|
||||
return cur_mb->mbmi.mode;
|
||||
return mi->mbmi.mode;
|
||||
}
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 2)->as_mode;
|
||||
return (mi->bmi + b - 2)->as_mode;
|
||||
}
|
||||
|
||||
#endif // VP9_COMMON_VP9_FINDNEARMV_H_
|
||||
|
||||
@@ -27,6 +27,9 @@
|
||||
#define pair_set_epi16(a, b) \
|
||||
_mm_set1_epi32(((uint16_t)(a)) + (((uint16_t)(b)) << 16))
|
||||
|
||||
#define pair_set_epi32(a, b) \
|
||||
_mm_set_epi32(b, a, b, a)
|
||||
|
||||
// Constants:
|
||||
// for (int i = 1; i< 32; ++i)
|
||||
// printf("static const int cospi_%d_64 = %.0f;\n", i,
|
||||
|
||||
@@ -22,13 +22,217 @@ struct loop_filter_info {
|
||||
const uint8_t *hev_thr;
|
||||
};
|
||||
|
||||
// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
|
||||
// Each 1 bit represents a position in which we want to apply the loop filter.
|
||||
// Left_ entries refer to whether we apply a filter on the border to the
|
||||
// left of the block. Above_ entries refer to whether or not to apply a
|
||||
// filter on the above border. Int_ entries refer to whether or not to
|
||||
// apply borders on the 4x4 edges within the 8x8 block that each bit
|
||||
// represents.
|
||||
// Since each transform is accompanied by a potentially different type of
|
||||
// loop filter there is a different entry in the array for each transform size.
|
||||
typedef struct {
|
||||
uint64_t left_y[TX_SIZES];
|
||||
uint64_t above_y[TX_SIZES];
|
||||
uint64_t int_4x4_y;
|
||||
uint16_t left_uv[TX_SIZES];
|
||||
uint16_t above_uv[TX_SIZES];
|
||||
uint16_t int_4x4_uv;
|
||||
} LOOP_FILTER_MASK;
|
||||
|
||||
// 64 bit masks for left transform size. Each 1 represents a position where
|
||||
// we should apply a loop filter across the left border of an 8x8 block
|
||||
// boundary.
|
||||
//
|
||||
// In the case of TX_16X16-> ( in low order byte first we end up with
|
||||
// a mask that looks like this
|
||||
//
|
||||
// 10101010
|
||||
// 10101010
|
||||
// 10101010
|
||||
// 10101010
|
||||
// 10101010
|
||||
// 10101010
|
||||
// 10101010
|
||||
// 10101010
|
||||
//
|
||||
// A loopfilter should be applied to every other 8x8 horizontally.
|
||||
static const uint64_t left_64x64_txform_mask[TX_SIZES]= {
|
||||
0xffffffffffffffff, // TX_4X4
|
||||
0xffffffffffffffff, // TX_8x8
|
||||
0x5555555555555555, // TX_16x16
|
||||
0x1111111111111111, // TX_32x32
|
||||
};
|
||||
|
||||
// 64 bit masks for above transform size. Each 1 represents a position where
|
||||
// we should apply a loop filter across the top border of an 8x8 block
|
||||
// boundary.
|
||||
//
|
||||
// In the case of TX_32x32 -> ( in low order byte first we end up with
|
||||
// a mask that looks like this
|
||||
//
|
||||
// 11111111
|
||||
// 00000000
|
||||
// 00000000
|
||||
// 00000000
|
||||
// 11111111
|
||||
// 00000000
|
||||
// 00000000
|
||||
// 00000000
|
||||
//
|
||||
// A loopfilter should be applied to every other 4 the row vertically.
|
||||
static const uint64_t above_64x64_txform_mask[TX_SIZES]= {
|
||||
0xffffffffffffffff, // TX_4X4
|
||||
0xffffffffffffffff, // TX_8x8
|
||||
0x00ff00ff00ff00ff, // TX_16x16
|
||||
0x000000ff000000ff, // TX_32x32
|
||||
};
|
||||
|
||||
// 64 bit masks for prediction sizes (left). Each 1 represents a position
|
||||
// where left border of an 8x8 block. These are aligned to the right most
|
||||
// appropriate bit, and then shifted into place.
|
||||
//
|
||||
// In the case of TX_16x32 -> ( low order byte first ) we end up with
|
||||
// a mask that looks like this :
|
||||
//
|
||||
// 10000000
|
||||
// 10000000
|
||||
// 10000000
|
||||
// 10000000
|
||||
// 00000000
|
||||
// 00000000
|
||||
// 00000000
|
||||
// 00000000
|
||||
static const uint64_t left_prediction_mask[BLOCK_SIZES] = {
|
||||
0x0000000000000001, // BLOCK_4X4,
|
||||
0x0000000000000001, // BLOCK_4X8,
|
||||
0x0000000000000001, // BLOCK_8X4,
|
||||
0x0000000000000001, // BLOCK_8X8,
|
||||
0x0000000000000101, // BLOCK_8X16,
|
||||
0x0000000000000001, // BLOCK_16X8,
|
||||
0x0000000000000101, // BLOCK_16X16,
|
||||
0x0000000001010101, // BLOCK_16X32,
|
||||
0x0000000000000101, // BLOCK_32X16,
|
||||
0x0000000001010101, // BLOCK_32X32,
|
||||
0x0101010101010101, // BLOCK_32X64,
|
||||
0x0000000001010101, // BLOCK_64X32,
|
||||
0x0101010101010101, // BLOCK_64X64
|
||||
};
|
||||
|
||||
// 64 bit mask to shift and set for each prediction size.
|
||||
static const uint64_t above_prediction_mask[BLOCK_SIZES] = {
|
||||
0x0000000000000001, // BLOCK_4X4
|
||||
0x0000000000000001, // BLOCK_4X8
|
||||
0x0000000000000001, // BLOCK_8X4
|
||||
0x0000000000000001, // BLOCK_8X8
|
||||
0x0000000000000001, // BLOCK_8X16,
|
||||
0x0000000000000003, // BLOCK_16X8
|
||||
0x0000000000000003, // BLOCK_16X16
|
||||
0x0000000000000003, // BLOCK_16X32,
|
||||
0x000000000000000f, // BLOCK_32X16,
|
||||
0x000000000000000f, // BLOCK_32X32,
|
||||
0x000000000000000f, // BLOCK_32X64,
|
||||
0x00000000000000ff, // BLOCK_64X32,
|
||||
0x00000000000000ff, // BLOCK_64X64
|
||||
};
|
||||
// 64 bit mask to shift and set for each prediction size. A bit is set for
|
||||
// each 8x8 block that would be in the left most block of the given block
|
||||
// size in the 64x64 block.
|
||||
static const uint64_t size_mask[BLOCK_SIZES] = {
|
||||
0x0000000000000001, // BLOCK_4X4
|
||||
0x0000000000000001, // BLOCK_4X8
|
||||
0x0000000000000001, // BLOCK_8X4
|
||||
0x0000000000000001, // BLOCK_8X8
|
||||
0x0000000000000101, // BLOCK_8X16,
|
||||
0x0000000000000003, // BLOCK_16X8
|
||||
0x0000000000000303, // BLOCK_16X16
|
||||
0x0000000003030303, // BLOCK_16X32,
|
||||
0x0000000000000f0f, // BLOCK_32X16,
|
||||
0x000000000f0f0f0f, // BLOCK_32X32,
|
||||
0x0f0f0f0f0f0f0f0f, // BLOCK_32X64,
|
||||
0x00000000ffffffff, // BLOCK_64X32,
|
||||
0xffffffffffffffff, // BLOCK_64X64
|
||||
};
|
||||
|
||||
// These are used for masking the left and above borders.
|
||||
static const uint64_t left_border = 0x1111111111111111;
|
||||
static const uint64_t above_border = 0x000000ff000000ff;
|
||||
|
||||
// 16 bit masks for uv transform sizes.
|
||||
static const uint16_t left_64x64_txform_mask_uv[TX_SIZES]= {
|
||||
0xffff, // TX_4X4
|
||||
0xffff, // TX_8x8
|
||||
0x5555, // TX_16x16
|
||||
0x1111, // TX_32x32
|
||||
};
|
||||
|
||||
static const uint16_t above_64x64_txform_mask_uv[TX_SIZES]= {
|
||||
0xffff, // TX_4X4
|
||||
0xffff, // TX_8x8
|
||||
0x0f0f, // TX_16x16
|
||||
0x000f, // TX_32x32
|
||||
};
|
||||
|
||||
// 16 bit left mask to shift and set for each uv prediction size.
|
||||
static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = {
|
||||
0x0001, // BLOCK_4X4,
|
||||
0x0001, // BLOCK_4X8,
|
||||
0x0001, // BLOCK_8X4,
|
||||
0x0001, // BLOCK_8X8,
|
||||
0x0001, // BLOCK_8X16,
|
||||
0x0001, // BLOCK_16X8,
|
||||
0x0001, // BLOCK_16X16,
|
||||
0x0011, // BLOCK_16X32,
|
||||
0x0001, // BLOCK_32X16,
|
||||
0x0011, // BLOCK_32X32,
|
||||
0x1111, // BLOCK_32X64
|
||||
0x0011, // BLOCK_64X32,
|
||||
0x1111, // BLOCK_64X64
|
||||
};
|
||||
// 16 bit above mask to shift and set for uv each prediction size.
|
||||
static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = {
|
||||
0x0001, // BLOCK_4X4
|
||||
0x0001, // BLOCK_4X8
|
||||
0x0001, // BLOCK_8X4
|
||||
0x0001, // BLOCK_8X8
|
||||
0x0001, // BLOCK_8X16,
|
||||
0x0001, // BLOCK_16X8
|
||||
0x0001, // BLOCK_16X16
|
||||
0x0001, // BLOCK_16X32,
|
||||
0x0003, // BLOCK_32X16,
|
||||
0x0003, // BLOCK_32X32,
|
||||
0x0003, // BLOCK_32X64,
|
||||
0x000f, // BLOCK_64X32,
|
||||
0x000f, // BLOCK_64X64
|
||||
};
|
||||
|
||||
// 64 bit mask to shift and set for each uv prediction size
|
||||
static const uint16_t size_mask_uv[BLOCK_SIZES] = {
|
||||
0x0001, // BLOCK_4X4
|
||||
0x0001, // BLOCK_4X8
|
||||
0x0001, // BLOCK_8X4
|
||||
0x0001, // BLOCK_8X8
|
||||
0x0001, // BLOCK_8X16,
|
||||
0x0001, // BLOCK_16X8
|
||||
0x0001, // BLOCK_16X16
|
||||
0x0011, // BLOCK_16X32,
|
||||
0x0003, // BLOCK_32X16,
|
||||
0x0033, // BLOCK_32X32,
|
||||
0x3333, // BLOCK_32X64,
|
||||
0x00ff, // BLOCK_64X32,
|
||||
0xffff, // BLOCK_64X64
|
||||
};
|
||||
static const uint16_t left_border_uv = 0x1111;
|
||||
static const uint16_t above_border_uv = 0x000f;
|
||||
|
||||
|
||||
static void lf_init_lut(loop_filter_info_n *lfi) {
|
||||
lfi->mode_lf_lut[DC_PRED] = 0;
|
||||
lfi->mode_lf_lut[D45_PRED] = 0;
|
||||
lfi->mode_lf_lut[D135_PRED] = 0;
|
||||
lfi->mode_lf_lut[D117_PRED] = 0;
|
||||
lfi->mode_lf_lut[D153_PRED] = 0;
|
||||
lfi->mode_lf_lut[D27_PRED] = 0;
|
||||
lfi->mode_lf_lut[D207_PRED] = 0;
|
||||
lfi->mode_lf_lut[D63_PRED] = 0;
|
||||
lfi->mode_lf_lut[V_PRED] = 0;
|
||||
lfi->mode_lf_lut[H_PRED] = 0;
|
||||
@@ -39,7 +243,7 @@ static void lf_init_lut(loop_filter_info_n *lfi) {
|
||||
lfi->mode_lf_lut[NEWMV] = 1;
|
||||
}
|
||||
|
||||
static void update_sharpness(loop_filter_info_n *const lfi, int sharpness_lvl) {
|
||||
static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
|
||||
int lvl;
|
||||
|
||||
// For each possible value for the loop filter fill out limits
|
||||
@@ -61,8 +265,9 @@ static void update_sharpness(loop_filter_info_n *const lfi, int sharpness_lvl) {
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_loop_filter_init(VP9_COMMON *cm, struct loopfilter *lf) {
|
||||
void vp9_loop_filter_init(VP9_COMMON *cm) {
|
||||
loop_filter_info_n *lfi = &cm->lf_info;
|
||||
struct loopfilter *lf = &cm->lf;
|
||||
int i;
|
||||
|
||||
// init limits for given sharpness
|
||||
@@ -77,16 +282,15 @@ void vp9_loop_filter_init(VP9_COMMON *cm, struct loopfilter *lf) {
|
||||
vpx_memset(lfi->hev_thr[i], i, SIMD_WIDTH);
|
||||
}
|
||||
|
||||
void vp9_loop_filter_frame_init(VP9_COMMON *const cm, MACROBLOCKD *const xd,
|
||||
int default_filt_lvl) {
|
||||
void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
|
||||
int seg_id;
|
||||
// n_shift is the a multiplier for lf_deltas
|
||||
// the multiplier is 1 for when filter_lvl is between 0 and 31;
|
||||
// 2 when filter_lvl is between 32 and 63
|
||||
const int n_shift = default_filt_lvl >> 5;
|
||||
loop_filter_info_n *const lfi = &cm->lf_info;
|
||||
struct loopfilter *const lf = &xd->lf;
|
||||
struct segmentation *const seg = &xd->seg;
|
||||
struct loopfilter *const lf = &cm->lf;
|
||||
struct segmentation *const seg = &cm->seg;
|
||||
|
||||
// update limits if sharpness has changed
|
||||
if (lf->last_sharpness_level != lf->sharpness_level) {
|
||||
@@ -98,7 +302,7 @@ void vp9_loop_filter_frame_init(VP9_COMMON *const cm, MACROBLOCKD *const xd,
|
||||
int lvl_seg = default_filt_lvl, ref, mode, intra_lvl;
|
||||
|
||||
// Set the baseline filter values for each segment
|
||||
if (vp9_segfeature_active(&xd->seg, seg_id, SEG_LVL_ALT_LF)) {
|
||||
if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
|
||||
const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
|
||||
lvl_seg = seg->abs_delta == SEGMENT_ABSDATA
|
||||
? data
|
||||
@@ -108,7 +312,7 @@ void vp9_loop_filter_frame_init(VP9_COMMON *const cm, MACROBLOCKD *const xd,
|
||||
if (!lf->mode_ref_delta_enabled) {
|
||||
// we could get rid of this if we assume that deltas are set to
|
||||
// zero when not in use; encoder always uses deltas
|
||||
vpx_memset(lfi->lvl[seg_id][0], lvl_seg, 4 * 4);
|
||||
vpx_memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -124,9 +328,9 @@ void vp9_loop_filter_frame_init(VP9_COMMON *const cm, MACROBLOCKD *const xd,
|
||||
}
|
||||
}
|
||||
|
||||
static int build_lfi(const loop_filter_info_n *const lfi_n,
|
||||
const MB_MODE_INFO *const mbmi,
|
||||
struct loop_filter_info *const lfi) {
|
||||
static int build_lfi(const loop_filter_info_n *lfi_n,
|
||||
const MB_MODE_INFO *mbmi,
|
||||
struct loop_filter_info *lfi) {
|
||||
const int seg = mbmi->segment_id;
|
||||
const int ref = mbmi->ref_frame[0];
|
||||
const int mode = lfi_n->mode_lf_lut[mbmi->mode];
|
||||
@@ -236,10 +440,360 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
||||
}
|
||||
}
|
||||
|
||||
static void filter_block_plane(VP9_COMMON *const cm,
|
||||
struct macroblockd_plane *const plane,
|
||||
const MODE_INFO *mi,
|
||||
int mi_row, int mi_col) {
|
||||
// This function ors into the current lfm structure, where to do loop
|
||||
// filters for the specific mi we are looking at. It uses information
|
||||
// including the block_size_type (32x16, 32x32, etc), the transform size,
|
||||
// whether there were any coefficients encoded, and the loop filter strength
|
||||
// block we are currently looking at. Shift is used to position the
|
||||
// 1's we produce.
|
||||
// TODO(JBB) Need another function for different resolution color..
|
||||
static void build_masks(const loop_filter_info_n *const lfi_n,
|
||||
const MODE_INFO *mi, const int shift_y,
|
||||
const int shift_uv,
|
||||
LOOP_FILTER_MASK *lfm) {
|
||||
const BLOCK_SIZE block_size = mi->mbmi.sb_type;
|
||||
const TX_SIZE tx_size_y = mi->mbmi.tx_size;
|
||||
const TX_SIZE tx_size_uv = get_uv_tx_size(&mi->mbmi);
|
||||
const int skip = mi->mbmi.skip_coeff;
|
||||
const int seg = mi->mbmi.segment_id;
|
||||
const int ref = mi->mbmi.ref_frame[0];
|
||||
const int mode = lfi_n->mode_lf_lut[mi->mbmi.mode];
|
||||
const int filter_level = lfi_n->lvl[seg][ref][mode];
|
||||
uint64_t *left_y = &lfm->left_y[tx_size_y];
|
||||
uint64_t *above_y = &lfm->above_y[tx_size_y];
|
||||
uint64_t *int_4x4_y = &lfm->int_4x4_y;
|
||||
uint16_t *left_uv = &lfm->left_uv[tx_size_uv];
|
||||
uint16_t *above_uv = &lfm->above_uv[tx_size_uv];
|
||||
uint16_t *int_4x4_uv = &lfm->int_4x4_uv;
|
||||
|
||||
// If filter level is 0 we don't loop filter.
|
||||
if (!filter_level)
|
||||
return;
|
||||
|
||||
// These set 1 in the current block size for the block size edges.
|
||||
// For instance if the block size is 32x16, we'll set :
|
||||
// above = 1111
|
||||
// 0000
|
||||
// and
|
||||
// left = 1000
|
||||
// = 1000
|
||||
// NOTE : In this example the low bit is left most ( 1000 ) is stored as
|
||||
// 1, not 8...
|
||||
//
|
||||
// U and v set things on a 16 bit scale.
|
||||
//
|
||||
*above_y |= above_prediction_mask[block_size] << shift_y;
|
||||
*above_uv |= above_prediction_mask_uv[block_size] << shift_uv;
|
||||
*left_y |= left_prediction_mask[block_size] << shift_y;
|
||||
*left_uv |= left_prediction_mask_uv[block_size] << shift_uv;
|
||||
|
||||
// If the block has no coefficients and is not intra we skip applying
|
||||
// the loop filter on block edges.
|
||||
if (skip && ref > INTRA_FRAME)
|
||||
return;
|
||||
|
||||
// Here we are adding a mask for the transform size. The transform
|
||||
// size mask is set to be correct for a 64x64 prediction block size. We
|
||||
// mask to match the size of the block we are working on and then shift it
|
||||
// into place..
|
||||
*above_y |= (size_mask[block_size] &
|
||||
above_64x64_txform_mask[tx_size_y]) << shift_y;
|
||||
*above_uv |= (size_mask_uv[block_size] &
|
||||
above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv;
|
||||
|
||||
*left_y |= (size_mask[block_size] &
|
||||
left_64x64_txform_mask[tx_size_y]) << shift_y;
|
||||
*left_uv |= (size_mask_uv[block_size] &
|
||||
left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv;
|
||||
|
||||
// Here we are trying to determine what to do with the internal 4x4 block
|
||||
// boundaries. These differ from the 4x4 boundaries on the outside edge of
|
||||
// an 8x8 in that the internal ones can be skipped and don't depend on
|
||||
// the prediction block size.
|
||||
if (tx_size_y == TX_4X4) {
|
||||
*int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y;
|
||||
}
|
||||
if (tx_size_uv == TX_4X4) {
|
||||
*int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
|
||||
}
|
||||
}
|
||||
|
||||
// This function does the same thing as the one above with the exception that
|
||||
// it only affects the y masks. It exists because for blocks < 16x16 in size,
|
||||
// we only update u and v masks on the first block.
|
||||
static void build_y_mask(const loop_filter_info_n *const lfi_n,
|
||||
const MODE_INFO *mi, const int shift_y,
|
||||
LOOP_FILTER_MASK *lfm) {
|
||||
const BLOCK_SIZE block_size = mi->mbmi.sb_type;
|
||||
const TX_SIZE tx_size_y = mi->mbmi.tx_size;
|
||||
const int skip = mi->mbmi.skip_coeff;
|
||||
const int seg = mi->mbmi.segment_id;
|
||||
const int ref = mi->mbmi.ref_frame[0];
|
||||
const int mode = lfi_n->mode_lf_lut[mi->mbmi.mode];
|
||||
const int filter_level = lfi_n->lvl[seg][ref][mode];
|
||||
uint64_t *left_y = &lfm->left_y[tx_size_y];
|
||||
uint64_t *above_y = &lfm->above_y[tx_size_y];
|
||||
uint64_t *int_4x4_y = &lfm->int_4x4_y;
|
||||
|
||||
if (!filter_level)
|
||||
return;
|
||||
|
||||
*above_y |= above_prediction_mask[block_size] << shift_y;
|
||||
*left_y |= left_prediction_mask[block_size] << shift_y;
|
||||
|
||||
if (skip && ref > INTRA_FRAME)
|
||||
return;
|
||||
|
||||
*above_y |= (size_mask[block_size] &
|
||||
above_64x64_txform_mask[tx_size_y]) << shift_y;
|
||||
|
||||
*left_y |= (size_mask[block_size] &
|
||||
left_64x64_txform_mask[tx_size_y]) << shift_y;
|
||||
|
||||
if (tx_size_y == TX_4X4) {
|
||||
*int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y;
|
||||
}
|
||||
}
|
||||
|
||||
// This function sets up the bit masks for the entire 64x64 region represented
|
||||
// by mi_row, mi_col.
|
||||
// TODO(JBB): This function only works for yv12.
|
||||
static void setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
|
||||
MODE_INFO **mi_8x8, const int mode_info_stride,
|
||||
LOOP_FILTER_MASK *lfm) {
|
||||
int idx_32, idx_16, idx_8;
|
||||
const loop_filter_info_n *const lfi_n = &cm->lf_info;
|
||||
MODE_INFO **mip = mi_8x8;
|
||||
MODE_INFO **mip2 = mi_8x8;
|
||||
|
||||
// These are offsets to the next mi in the 64x64 block. It is what gets
|
||||
// added to the mi ptr as we go through each loop. It helps us to avoids
|
||||
// setting up special row and column counters for each index. The last step
|
||||
// brings us out back to the starting position.
|
||||
const int offset_32[] = {4, (mode_info_stride << 2) - 4, 4,
|
||||
-(mode_info_stride << 2) - 4};
|
||||
const int offset_16[] = {2, (mode_info_stride << 1) - 2, 2,
|
||||
-(mode_info_stride << 1) - 2};
|
||||
const int offset[] = {1, mode_info_stride - 1, 1, -mode_info_stride - 1};
|
||||
|
||||
// Following variables represent shifts to position the current block
|
||||
// mask over the appropriate block. A shift of 36 to the left will move
|
||||
// the bits for the final 32 by 32 block in the 64x64 up 4 rows and left
|
||||
// 4 rows to the appropriate spot.
|
||||
const int shift_32_y[] = {0, 4, 32, 36};
|
||||
const int shift_16_y[] = {0, 2, 16, 18};
|
||||
const int shift_8_y[] = {0, 1, 8, 9};
|
||||
const int shift_32_uv[] = {0, 2, 8, 10};
|
||||
const int shift_16_uv[] = {0, 1, 4, 5};
|
||||
int i;
|
||||
const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ?
|
||||
cm->mi_rows - mi_row : MI_BLOCK_SIZE);
|
||||
const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ?
|
||||
cm->mi_cols - mi_col : MI_BLOCK_SIZE);
|
||||
|
||||
vp9_zero(*lfm);
|
||||
|
||||
// TODO(jimbankoski): Try moving most of the following code into decode
|
||||
// loop and storing lfm in the mbmi structure so that we don't have to go
|
||||
// through the recursive loop structure multiple times.
|
||||
switch (mip[0]->mbmi.sb_type) {
|
||||
case BLOCK_64X64:
|
||||
build_masks(lfi_n, mip[0] , 0, 0, lfm);
|
||||
break;
|
||||
case BLOCK_64X32:
|
||||
build_masks(lfi_n, mip[0], 0, 0, lfm);
|
||||
mip2 = mip + mode_info_stride * 4;
|
||||
if (4 >= max_rows)
|
||||
break;
|
||||
build_masks(lfi_n, mip2[0], 32, 8, lfm);
|
||||
break;
|
||||
case BLOCK_32X64:
|
||||
build_masks(lfi_n, mip[0], 0, 0, lfm);
|
||||
mip2 = mip + 4;
|
||||
if (4 >= max_cols)
|
||||
break;
|
||||
build_masks(lfi_n, mip2[0], 4, 2, lfm);
|
||||
break;
|
||||
default:
|
||||
for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
|
||||
const int shift_y = shift_32_y[idx_32];
|
||||
const int shift_uv = shift_32_uv[idx_32];
|
||||
const int mi_32_col_offset = ((idx_32 & 1) << 2);
|
||||
const int mi_32_row_offset = ((idx_32 >> 1) << 2);
|
||||
if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
|
||||
continue;
|
||||
switch (mip[0]->mbmi.sb_type) {
|
||||
case BLOCK_32X32:
|
||||
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
|
||||
break;
|
||||
case BLOCK_32X16:
|
||||
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
|
||||
if (mi_32_row_offset + 2 >= max_rows)
|
||||
continue;
|
||||
mip2 = mip + mode_info_stride * 2;
|
||||
build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm);
|
||||
break;
|
||||
case BLOCK_16X32:
|
||||
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
|
||||
if (mi_32_col_offset + 2 >= max_cols)
|
||||
continue;
|
||||
mip2 = mip + 2;
|
||||
build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm);
|
||||
break;
|
||||
default:
|
||||
for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
|
||||
const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16];
|
||||
const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16];
|
||||
const int mi_16_col_offset = mi_32_col_offset +
|
||||
((idx_16 & 1) << 1);
|
||||
const int mi_16_row_offset = mi_32_row_offset +
|
||||
((idx_16 >> 1) << 1);
|
||||
|
||||
if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows)
|
||||
continue;
|
||||
|
||||
switch (mip[0]->mbmi.sb_type) {
|
||||
case BLOCK_16X16:
|
||||
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
|
||||
break;
|
||||
case BLOCK_16X8:
|
||||
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
|
||||
if (mi_16_row_offset + 1 >= max_rows)
|
||||
continue;
|
||||
mip2 = mip + mode_info_stride;
|
||||
build_y_mask(lfi_n, mip2[0], shift_y+8, lfm);
|
||||
break;
|
||||
case BLOCK_8X16:
|
||||
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
|
||||
if (mi_16_col_offset +1 >= max_cols)
|
||||
continue;
|
||||
mip2 = mip + 1;
|
||||
build_y_mask(lfi_n, mip2[0], shift_y+1, lfm);
|
||||
break;
|
||||
default: {
|
||||
const int shift_y = shift_32_y[idx_32] +
|
||||
shift_16_y[idx_16] +
|
||||
shift_8_y[0];
|
||||
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
|
||||
mip += offset[0];
|
||||
for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
|
||||
const int shift_y = shift_32_y[idx_32] +
|
||||
shift_16_y[idx_16] +
|
||||
shift_8_y[idx_8];
|
||||
const int mi_8_col_offset = mi_16_col_offset +
|
||||
((idx_8 & 1));
|
||||
const int mi_8_row_offset = mi_16_row_offset +
|
||||
((idx_8 >> 1));
|
||||
|
||||
if (mi_8_col_offset >= max_cols ||
|
||||
mi_8_row_offset >= max_rows)
|
||||
continue;
|
||||
build_y_mask(lfi_n, mip[0], shift_y, lfm);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
// The largest loopfilter we have is 16x16 so we use the 16x16 mask
|
||||
// for 32x32 transforms also also.
|
||||
lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32];
|
||||
lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32];
|
||||
lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32];
|
||||
lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32];
|
||||
|
||||
// We do at least 8 tap filter on every 32x32 even if the transform size
|
||||
// is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and
|
||||
// remove it from the 4x4.
|
||||
lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border;
|
||||
lfm->left_y[TX_4X4] &= ~left_border;
|
||||
lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border;
|
||||
lfm->above_y[TX_4X4] &= ~above_border;
|
||||
lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv;
|
||||
lfm->left_uv[TX_4X4] &= ~left_border_uv;
|
||||
lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv;
|
||||
lfm->above_uv[TX_4X4] &= ~above_border_uv;
|
||||
|
||||
// We do some special edge handling.
|
||||
if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) {
|
||||
const uint64_t rows = cm->mi_rows - mi_row;
|
||||
|
||||
// Each pixel inside the border gets a 1,
|
||||
const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1);
|
||||
const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1);
|
||||
|
||||
// Remove values completely outside our border.
|
||||
for (i = 0; i < TX_32X32; i++) {
|
||||
lfm->left_y[i] &= mask_y;
|
||||
lfm->above_y[i] &= mask_y;
|
||||
lfm->left_uv[i] &= mask_uv;
|
||||
lfm->above_uv[i] &= mask_uv;
|
||||
}
|
||||
lfm->int_4x4_y &= mask_y;
|
||||
lfm->int_4x4_uv &= mask_uv;
|
||||
|
||||
// We don't apply a wide loop filter on the last uv block row. If set
|
||||
// apply the shorter one instead.
|
||||
if (rows == 1) {
|
||||
lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16];
|
||||
lfm->above_uv[TX_16X16] = 0;
|
||||
}
|
||||
if (rows == 5) {
|
||||
lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00;
|
||||
lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00);
|
||||
}
|
||||
}
|
||||
|
||||
if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) {
|
||||
const uint64_t columns = cm->mi_cols - mi_col;
|
||||
|
||||
// Each pixel inside the border gets a 1, the multiply copies the border
|
||||
// to where we need it.
|
||||
const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101;
|
||||
const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111;
|
||||
|
||||
// Internal edges are not applied on the last column of the image so
|
||||
// we mask 1 more for the internal edges
|
||||
const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111;
|
||||
|
||||
// Remove the bits outside the image edge.
|
||||
for (i = 0; i < TX_32X32; i++) {
|
||||
lfm->left_y[i] &= mask_y;
|
||||
lfm->above_y[i] &= mask_y;
|
||||
lfm->left_uv[i] &= mask_uv;
|
||||
lfm->above_uv[i] &= mask_uv;
|
||||
}
|
||||
lfm->int_4x4_y &= mask_y;
|
||||
lfm->int_4x4_uv &= mask_uv_int;
|
||||
|
||||
// We don't apply a wide loop filter on the last uv column. If set
|
||||
// apply the shorter one instead.
|
||||
if (columns == 1) {
|
||||
lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16];
|
||||
lfm->left_uv[TX_16X16] = 0;
|
||||
}
|
||||
if (columns == 5) {
|
||||
lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc);
|
||||
lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc);
|
||||
}
|
||||
}
|
||||
// We don't a loop filter on the first column in the image. Mask that out.
|
||||
if (mi_col == 0) {
|
||||
for (i = 0; i < TX_32X32; i++) {
|
||||
lfm->left_y[i] &= 0xfefefefefefefefe;
|
||||
lfm->left_uv[i] &= 0xeeee;
|
||||
}
|
||||
}
|
||||
}
|
||||
#if CONFIG_NON420
|
||||
static void filter_block_plane_non420(VP9_COMMON *cm,
|
||||
struct macroblockd_plane *plane,
|
||||
MODE_INFO **mi_8x8,
|
||||
int mi_row, int mi_col) {
|
||||
const int ss_x = plane->subsampling_x;
|
||||
const int ss_y = plane->subsampling_y;
|
||||
const int row_step = 1 << ss_x;
|
||||
@@ -262,24 +816,25 @@ static void filter_block_plane(VP9_COMMON *const cm,
|
||||
|
||||
// Determine the vertical edges that need filtering
|
||||
for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
|
||||
const int skip_this = mi[c].mbmi.mb_skip_coeff
|
||||
&& is_inter_block(&mi[c].mbmi);
|
||||
const MODE_INFO *mi = mi_8x8[c];
|
||||
const int skip_this = mi[0].mbmi.skip_coeff
|
||||
&& is_inter_block(&mi[0].mbmi);
|
||||
// left edge of current unit is block/partition edge -> no skip
|
||||
const int block_edge_left = b_width_log2(mi[c].mbmi.sb_type) ?
|
||||
!(c & ((1 << (b_width_log2(mi[c].mbmi.sb_type)-1)) - 1)) : 1;
|
||||
const int block_edge_left = b_width_log2(mi[0].mbmi.sb_type) ?
|
||||
!(c & ((1 << (b_width_log2(mi[0].mbmi.sb_type)-1)) - 1)) : 1;
|
||||
const int skip_this_c = skip_this && !block_edge_left;
|
||||
// top edge of current unit is block/partition edge -> no skip
|
||||
const int block_edge_above = b_height_log2(mi[c].mbmi.sb_type) ?
|
||||
!(r & ((1 << (b_height_log2(mi[c].mbmi.sb_type)-1)) - 1)) : 1;
|
||||
const int block_edge_above = b_height_log2(mi[0].mbmi.sb_type) ?
|
||||
!(r & ((1 << (b_height_log2(mi[0].mbmi.sb_type)-1)) - 1)) : 1;
|
||||
const int skip_this_r = skip_this && !block_edge_above;
|
||||
const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
|
||||
? get_uv_tx_size(&mi[c].mbmi)
|
||||
: mi[c].mbmi.txfm_size;
|
||||
? get_uv_tx_size(&mi[0].mbmi)
|
||||
: mi[0].mbmi.tx_size;
|
||||
const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
|
||||
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
|
||||
|
||||
// Filter level can vary per MI
|
||||
if (!build_lfi(&cm->lf_info, &mi[c].mbmi, lfi[r] + (c >> ss_x)))
|
||||
if (!build_lfi(&cm->lf_info, &mi[0].mbmi, lfi[r] + (c >> ss_x)))
|
||||
continue;
|
||||
|
||||
// Build masks based on the transform size of each block
|
||||
@@ -338,7 +893,7 @@ static void filter_block_plane(VP9_COMMON *const cm,
|
||||
mask_4x4_c & border_mask,
|
||||
mask_4x4_int[r], lfi[r]);
|
||||
dst->buf += 8 * dst->stride;
|
||||
mi += row_step_stride;
|
||||
mi_8x8 += row_step_stride;
|
||||
}
|
||||
|
||||
// Now do horizontal pass
|
||||
@@ -355,33 +910,146 @@ static void filter_block_plane(VP9_COMMON *const cm,
|
||||
dst->buf += 8 * dst->stride;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void filter_block_plane(VP9_COMMON *const cm,
|
||||
struct macroblockd_plane *const plane,
|
||||
MODE_INFO **mi_8x8,
|
||||
int mi_row, int mi_col,
|
||||
LOOP_FILTER_MASK *lfm) {
|
||||
const int ss_x = plane->subsampling_x;
|
||||
const int ss_y = plane->subsampling_y;
|
||||
const int row_step = 1 << ss_x;
|
||||
const int col_step = 1 << ss_y;
|
||||
const int row_step_stride = cm->mode_info_stride * row_step;
|
||||
struct buf_2d *const dst = &plane->dst;
|
||||
uint8_t* const dst0 = dst->buf;
|
||||
unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0};
|
||||
struct loop_filter_info lfi[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
|
||||
int r, c;
|
||||
int row_shift = 3 - ss_x;
|
||||
int row_mask = 0xff >> (ss_x << 2);
|
||||
|
||||
#define MASK_ROW(value) ((value >> (r_sampled << row_shift)) & row_mask)
|
||||
|
||||
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
|
||||
int r_sampled = r >> ss_x;
|
||||
|
||||
// Determine the vertical edges that need filtering
|
||||
for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
|
||||
const MODE_INFO *mi = mi_8x8[c];
|
||||
if (!build_lfi(&cm->lf_info, &mi[0].mbmi, lfi[r] + (c >> ss_x)))
|
||||
continue;
|
||||
}
|
||||
if (!plane->plane_type) {
|
||||
mask_4x4_int[r] = MASK_ROW(lfm->int_4x4_y);
|
||||
// Disable filtering on the leftmost column
|
||||
filter_selectively_vert(dst->buf, dst->stride,
|
||||
MASK_ROW(lfm->left_y[TX_16X16]),
|
||||
MASK_ROW(lfm->left_y[TX_8X8]),
|
||||
MASK_ROW(lfm->left_y[TX_4X4]),
|
||||
MASK_ROW(lfm->int_4x4_y),
|
||||
lfi[r]);
|
||||
} else {
|
||||
mask_4x4_int[r] = MASK_ROW(lfm->int_4x4_uv);
|
||||
// Disable filtering on the leftmost column
|
||||
filter_selectively_vert(dst->buf, dst->stride,
|
||||
MASK_ROW(lfm->left_uv[TX_16X16]),
|
||||
MASK_ROW(lfm->left_uv[TX_8X8]),
|
||||
MASK_ROW(lfm->left_uv[TX_4X4]),
|
||||
MASK_ROW(lfm->int_4x4_uv),
|
||||
lfi[r]);
|
||||
}
|
||||
dst->buf += 8 * dst->stride;
|
||||
mi_8x8 += row_step_stride;
|
||||
}
|
||||
|
||||
// Now do horizontal pass
|
||||
dst->buf = dst0;
|
||||
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
|
||||
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
|
||||
const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
|
||||
int r_sampled = r >> ss_x;
|
||||
|
||||
if (!plane->plane_type) {
|
||||
filter_selectively_horiz(dst->buf, dst->stride,
|
||||
MASK_ROW(lfm->above_y[TX_16X16]),
|
||||
MASK_ROW(lfm->above_y[TX_8X8]),
|
||||
MASK_ROW(lfm->above_y[TX_4X4]),
|
||||
MASK_ROW(lfm->int_4x4_y),
|
||||
mi_row + r == 0, lfi[r]);
|
||||
} else {
|
||||
filter_selectively_horiz(dst->buf, dst->stride,
|
||||
MASK_ROW(lfm->above_uv[TX_16X16]),
|
||||
MASK_ROW(lfm->above_uv[TX_8X8]),
|
||||
MASK_ROW(lfm->above_uv[TX_4X4]),
|
||||
mask_4x4_int_r,
|
||||
mi_row + r == 0, lfi[r]);
|
||||
}
|
||||
dst->buf += 8 * dst->stride;
|
||||
}
|
||||
#undef MASK_ROW
|
||||
}
|
||||
|
||||
void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
|
||||
VP9_COMMON *cm, MACROBLOCKD *xd,
|
||||
int start, int stop, int y_only) {
|
||||
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
|
||||
int mi_row, mi_col;
|
||||
LOOP_FILTER_MASK lfm;
|
||||
#if CONFIG_NON420
|
||||
int use_420 = y_only || (xd->plane[1].subsampling_y == 1 &&
|
||||
xd->plane[1].subsampling_x == 1);
|
||||
#endif
|
||||
|
||||
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
|
||||
MODE_INFO* const mi = cm->mi + mi_row * cm->mode_info_stride;
|
||||
MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mode_info_stride;
|
||||
|
||||
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
|
||||
int plane;
|
||||
|
||||
setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
|
||||
|
||||
// TODO(JBB): Make setup_mask work for non 420.
|
||||
#if CONFIG_NON420
|
||||
if (use_420)
|
||||
#endif
|
||||
setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mode_info_stride,
|
||||
&lfm);
|
||||
|
||||
for (plane = 0; plane < num_planes; ++plane) {
|
||||
filter_block_plane(cm, &xd->plane[plane], mi + mi_col, mi_row, mi_col);
|
||||
#if CONFIG_NON420
|
||||
if (use_420)
|
||||
#endif
|
||||
filter_block_plane(cm, &xd->plane[plane], mi_8x8 + mi_col, mi_row,
|
||||
mi_col, &lfm);
|
||||
#if CONFIG_NON420
|
||||
else
|
||||
filter_block_plane_non420(cm, &xd->plane[plane], mi_8x8 + mi_col,
|
||||
mi_row, mi_col);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd,
|
||||
int frame_filter_level, int y_only) {
|
||||
int frame_filter_level,
|
||||
int y_only, int partial) {
|
||||
int start_mi_row, end_mi_row, mi_rows_to_filter;
|
||||
if (!frame_filter_level) return;
|
||||
vp9_loop_filter_frame_init(cm, xd, frame_filter_level);
|
||||
start_mi_row = 0;
|
||||
mi_rows_to_filter = cm->mi_rows;
|
||||
if (partial && cm->mi_rows > 8) {
|
||||
start_mi_row = cm->mi_rows >> 1;
|
||||
start_mi_row &= 0xfffffff8;
|
||||
mi_rows_to_filter = MAX(cm->mi_rows / 8, 8);
|
||||
}
|
||||
end_mi_row = start_mi_row + mi_rows_to_filter;
|
||||
vp9_loop_filter_frame_init(cm, frame_filter_level);
|
||||
vp9_loop_filter_rows(cm->frame_to_show, cm, xd,
|
||||
0, cm->mi_rows, y_only);
|
||||
start_mi_row, end_mi_row,
|
||||
y_only);
|
||||
}
|
||||
|
||||
int vp9_loop_filter_worker(void *arg1, void *arg2) {
|
||||
|
||||
@@ -22,6 +22,27 @@
|
||||
|
||||
#define SIMD_WIDTH 16
|
||||
|
||||
#define MAX_REF_LF_DELTAS 4
|
||||
#define MAX_MODE_LF_DELTAS 2
|
||||
|
||||
struct loopfilter {
|
||||
int filter_level;
|
||||
|
||||
int sharpness_level;
|
||||
int last_sharpness_level;
|
||||
|
||||
uint8_t mode_ref_delta_enabled;
|
||||
uint8_t mode_ref_delta_update;
|
||||
|
||||
// 0 = Intra, Last, GF, ARF
|
||||
signed char ref_deltas[MAX_REF_LF_DELTAS];
|
||||
signed char last_ref_deltas[MAX_REF_LF_DELTAS];
|
||||
|
||||
// 0 = ZERO_MV, MV
|
||||
signed char mode_deltas[MAX_MODE_LF_DELTAS];
|
||||
signed char last_mode_deltas[MAX_MODE_LF_DELTAS];
|
||||
};
|
||||
|
||||
// Need to align this structure so when it is declared and
|
||||
// passed it can be loaded into vector registers.
|
||||
typedef struct {
|
||||
@@ -39,19 +60,17 @@ typedef struct {
|
||||
struct VP9Common;
|
||||
struct macroblockd;
|
||||
|
||||
void vp9_loop_filter_init(struct VP9Common *cm, struct loopfilter *lf);
|
||||
void vp9_loop_filter_init(struct VP9Common *cm);
|
||||
|
||||
// Update the loop filter for the current frame.
|
||||
// This should be called before vp9_loop_filter_rows(), vp9_loop_filter_frame()
|
||||
// calls this function directly.
|
||||
void vp9_loop_filter_frame_init(struct VP9Common *const cm,
|
||||
struct macroblockd *const xd,
|
||||
int default_filt_lvl);
|
||||
void vp9_loop_filter_frame_init(struct VP9Common *cm, int default_filt_lvl);
|
||||
|
||||
void vp9_loop_filter_frame(struct VP9Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
int filter_level,
|
||||
int y_only);
|
||||
int y_only, int partial);
|
||||
|
||||
// Apply the loop filter to [start, stop) macro block rows in frame_buffer.
|
||||
void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
@@ -36,7 +37,7 @@ static const int mode_2_counter[MB_MODE_COUNT] = {
|
||||
9, // D135_PRED
|
||||
9, // D117_PRED
|
||||
9, // D153_PRED
|
||||
9, // D27_PRED
|
||||
9, // D207_PRED
|
||||
9, // D63_PRED
|
||||
9, // TM_PRED
|
||||
0, // NEARESTMV
|
||||
@@ -70,33 +71,33 @@ static const int counter_to_context[19] = {
|
||||
BOTH_INTRA // 18
|
||||
};
|
||||
|
||||
static const int mv_ref_blocks[BLOCK_SIZE_TYPES][MVREF_NEIGHBOURS][2] = {
|
||||
static const MV mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = {
|
||||
// 4X4
|
||||
{{0, -1}, {-1, 0}, {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}},
|
||||
{{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
|
||||
// 4X8
|
||||
{{0, -1}, {-1, 0}, {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}},
|
||||
{{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
|
||||
// 8X4
|
||||
{{0, -1}, {-1, 0}, {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}},
|
||||
{{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
|
||||
// 8X8
|
||||
{{0, -1}, {-1, 0}, {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}},
|
||||
{{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
|
||||
// 8X16
|
||||
{{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}},
|
||||
// 16X8
|
||||
{{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}},
|
||||
// 16X8
|
||||
{{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}},
|
||||
// 16X16
|
||||
{{0, -1}, {-1, 0}, {1, -1}, {-1, 1}, {-1, -1}, {0, -3}, {-3, 0}, {-3, -3}},
|
||||
{{-1, 0}, {0, -1}, {-1, 1}, {1, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
|
||||
// 16X32
|
||||
{{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
|
||||
// 32X16
|
||||
{{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}},
|
||||
// 32X16
|
||||
{{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
|
||||
// 32X32
|
||||
{{1, -1}, {-1, 1}, {2, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {-3, -3}},
|
||||
{{-1, 1}, {1, -1}, {-1, 2}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
|
||||
// 32X64
|
||||
{{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}},
|
||||
// 64X32
|
||||
{{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}},
|
||||
// 64X32
|
||||
{{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}},
|
||||
// 64X64
|
||||
{{3, -1}, {-1, 3}, {4, -1}, {-1, 4}, {-1, -1}, {0, -1}, {-1, 0}, {6, -1}}
|
||||
{{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}}
|
||||
};
|
||||
|
||||
static const int idx_n_column_to_subblock[4][2] = {
|
||||
@@ -121,78 +122,75 @@ static void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
|
||||
static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate,
|
||||
int check_sub_blocks, int which_mv,
|
||||
int search_col, int block_idx) {
|
||||
return (check_sub_blocks && candidate->mbmi.sb_type < BLOCK_8X8
|
||||
return check_sub_blocks && candidate->mbmi.sb_type < BLOCK_8X8
|
||||
? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
|
||||
.as_mv[which_mv]
|
||||
: candidate->mbmi.mv[which_mv]);
|
||||
: candidate->mbmi.mv[which_mv];
|
||||
}
|
||||
|
||||
|
||||
// Performs mv sign inversion if indicated by the reference frame combination.
|
||||
static INLINE int_mv scale_mv(const MODE_INFO *candidate, const int which_mv,
|
||||
static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
|
||||
const MV_REFERENCE_FRAME this_ref_frame,
|
||||
const int *ref_sign_bias) {
|
||||
int_mv return_mv = candidate->mbmi.mv[which_mv];
|
||||
|
||||
// Sign inversion where appropriate.
|
||||
if (ref_sign_bias[candidate->mbmi.ref_frame[which_mv]] !=
|
||||
ref_sign_bias[this_ref_frame]) {
|
||||
return_mv.as_mv.row *= -1;
|
||||
return_mv.as_mv.col *= -1;
|
||||
int_mv mv = mbmi->mv[ref];
|
||||
if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
|
||||
mv.as_mv.row *= -1;
|
||||
mv.as_mv.col *= -1;
|
||||
}
|
||||
return return_mv;
|
||||
return mv;
|
||||
}
|
||||
|
||||
// This macro is used to add a motion vector mv_ref list if it isn't
|
||||
// already in the list. If it's the second motion vector it will also
|
||||
// skip all additional processing and jump to done!
|
||||
#define ADD_MV_REF_LIST(MV) \
|
||||
if (refmv_count) { \
|
||||
if ((MV).as_int != mv_ref_list[0].as_int) { \
|
||||
mv_ref_list[refmv_count] = (MV); \
|
||||
goto Done; \
|
||||
do { \
|
||||
if (refmv_count) { \
|
||||
if ((MV).as_int != mv_ref_list[0].as_int) { \
|
||||
mv_ref_list[refmv_count] = (MV); \
|
||||
goto Done; \
|
||||
} \
|
||||
} else { \
|
||||
mv_ref_list[refmv_count++] = (MV); \
|
||||
} \
|
||||
} else { \
|
||||
mv_ref_list[refmv_count++] = (MV); \
|
||||
}
|
||||
} while (0)
|
||||
|
||||
// If either reference frame is different, not INTRA, and they
|
||||
// are different from each other scale and add the mv to our list.
|
||||
#define IF_DIFF_REF_FRAME_ADD_MV(CANDIDATE) \
|
||||
if ((CANDIDATE)->mbmi.ref_frame[0] != ref_frame) { \
|
||||
ADD_MV_REF_LIST(scale_mv((CANDIDATE), 0, ref_frame, ref_sign_bias)); \
|
||||
} \
|
||||
if ((CANDIDATE)->mbmi.ref_frame[1] != ref_frame && \
|
||||
(CANDIDATE)->mbmi.ref_frame[1] > INTRA_FRAME && \
|
||||
(CANDIDATE)->mbmi.mv[1].as_int != (CANDIDATE)->mbmi.mv[0].as_int) { \
|
||||
ADD_MV_REF_LIST(scale_mv((CANDIDATE), 1, ref_frame, ref_sign_bias)); \
|
||||
}
|
||||
do { \
|
||||
if ((CANDIDATE)->ref_frame[0] != ref_frame) \
|
||||
ADD_MV_REF_LIST(scale_mv((CANDIDATE), 0, ref_frame, ref_sign_bias)); \
|
||||
if ((CANDIDATE)->ref_frame[1] != ref_frame && \
|
||||
has_second_ref(CANDIDATE) && \
|
||||
(CANDIDATE)->mv[1].as_int != (CANDIDATE)->mv[0].as_int) \
|
||||
ADD_MV_REF_LIST(scale_mv((CANDIDATE), 1, ref_frame, ref_sign_bias)); \
|
||||
} while (0)
|
||||
|
||||
|
||||
// Checks that the given mi_row, mi_col and search point
|
||||
// are inside the borders of the tile.
|
||||
static INLINE int is_inside(int mi_col, int mi_row, int cur_tile_mi_col_start,
|
||||
const int mv_ref[2]) {
|
||||
// Check that the candidate is within the border. We only need to check
|
||||
// the left side because all the positive right side ones are for blocks that
|
||||
// are large enough to support the + value they have within their border.
|
||||
return !(mi_row + mv_ref[1] < 0 ||
|
||||
mi_col + mv_ref[0] < cur_tile_mi_col_start);
|
||||
static INLINE int is_inside(const VP9_COMMON *cm, int mi_col, int mi_row,
|
||||
const MV *mv) {
|
||||
return !(mi_row + mv->row < 0 ||
|
||||
mi_col + mv->col < cm->cur_tile_mi_col_start ||
|
||||
mi_row + mv->row >= cm->mi_rows ||
|
||||
mi_col + mv->col >= cm->cur_tile_mi_col_end);
|
||||
}
|
||||
|
||||
// This function searches the neighbourhood of a given MB/SB
|
||||
// to try and find candidate reference vectors.
|
||||
void vp9_find_mv_refs_idx(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here,
|
||||
const MODE_INFO *lf_here,
|
||||
const MV_REFERENCE_FRAME ref_frame,
|
||||
int_mv *mv_ref_list, const int *ref_sign_bias,
|
||||
const int block_idx,
|
||||
const int mi_row, const int mi_col) {
|
||||
int idx;
|
||||
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
|
||||
int refmv_count = 0;
|
||||
const int (*mv_ref_search)[2] = mv_ref_blocks[mbmi->sb_type];
|
||||
const MODE_INFO *candidate;
|
||||
const int check_sub_blocks = block_idx >= 0;
|
||||
void vp9_find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
|
||||
MODE_INFO *mi, const MODE_INFO *prev_mi,
|
||||
MV_REFERENCE_FRAME ref_frame,
|
||||
int_mv *mv_ref_list,
|
||||
int block_idx,
|
||||
int mi_row, int mi_col) {
|
||||
const int *ref_sign_bias = cm->ref_frame_sign_bias;
|
||||
int i, refmv_count = 0;
|
||||
const MV *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
|
||||
const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL;
|
||||
int different_ref_found = 0;
|
||||
int context_counter = 0;
|
||||
|
||||
@@ -202,28 +200,27 @@ void vp9_find_mv_refs_idx(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here,
|
||||
// The nearest 2 blocks are treated differently
|
||||
// if the size < 8x8 we get the mv from the bmi substructure,
|
||||
// and we also need to keep a mode count.
|
||||
for (idx = 0; idx < 2; ++idx) {
|
||||
const int *mv_ref = mv_ref_search[idx];
|
||||
for (i = 0; i < 2; ++i) {
|
||||
const MV *const mv_ref = &mv_ref_search[i];
|
||||
if (is_inside(cm, mi_col, mi_row, mv_ref)) {
|
||||
const int check_sub_blocks = block_idx >= 0;
|
||||
const MODE_INFO *const candidate_mi = xd->mi_8x8[mv_ref->col + mv_ref->row
|
||||
* xd->mode_info_stride];
|
||||
const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
|
||||
// Keep counts for entropy encoding.
|
||||
context_counter += mode_2_counter[candidate->mode];
|
||||
|
||||
if (!is_inside(mi_col, mi_row, cm->cur_tile_mi_col_start, mv_ref))
|
||||
continue;
|
||||
|
||||
candidate = here + mv_ref[0] + mv_ref[1] * xd->mode_info_stride;
|
||||
|
||||
// Keep counts for entropy encoding.
|
||||
context_counter += mode_2_counter[candidate->mbmi.mode];
|
||||
|
||||
// Check if the candidate comes from the same reference frame.
|
||||
if (candidate->mbmi.ref_frame[0] == ref_frame) {
|
||||
ADD_MV_REF_LIST(get_sub_block_mv(candidate, check_sub_blocks, 0,
|
||||
mv_ref[0], block_idx));
|
||||
different_ref_found = candidate->mbmi.ref_frame[1] != ref_frame;
|
||||
} else {
|
||||
different_ref_found = 1;
|
||||
if (candidate->mbmi.ref_frame[1] == ref_frame) {
|
||||
// Add second motion vector if it has the same ref_frame.
|
||||
ADD_MV_REF_LIST(get_sub_block_mv(candidate, check_sub_blocks, 1,
|
||||
mv_ref[0], block_idx));
|
||||
// Check if the candidate comes from the same reference frame.
|
||||
if (candidate->ref_frame[0] == ref_frame) {
|
||||
ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, check_sub_blocks, 0,
|
||||
mv_ref->col, block_idx));
|
||||
different_ref_found = candidate->ref_frame[1] != ref_frame;
|
||||
} else {
|
||||
if (candidate->ref_frame[1] == ref_frame)
|
||||
// Add second motion vector if it has the same ref_frame.
|
||||
ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, check_sub_blocks, 1,
|
||||
mv_ref->col, block_idx));
|
||||
different_ref_found = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -231,65 +228,59 @@ void vp9_find_mv_refs_idx(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here,
|
||||
// Check the rest of the neighbors in much the same way
|
||||
// as before except we don't need to keep track of sub blocks or
|
||||
// mode counts.
|
||||
for (; idx < MVREF_NEIGHBOURS; ++idx) {
|
||||
const int *mv_ref = mv_ref_search[idx];
|
||||
if (!is_inside(mi_col, mi_row, cm->cur_tile_mi_col_start, mv_ref))
|
||||
continue;
|
||||
for (; i < MVREF_NEIGHBOURS; ++i) {
|
||||
const MV *const mv_ref = &mv_ref_search[i];
|
||||
if (is_inside(cm, mi_col, mi_row, mv_ref)) {
|
||||
const MB_MODE_INFO *const candidate = &xd->mi_8x8[mv_ref->col +
|
||||
mv_ref->row
|
||||
* xd->mode_info_stride]->mbmi;
|
||||
|
||||
candidate = here + mv_ref[0] + mv_ref[1] * xd->mode_info_stride;
|
||||
|
||||
if (candidate->mbmi.ref_frame[0] == ref_frame) {
|
||||
ADD_MV_REF_LIST(candidate->mbmi.mv[0]);
|
||||
different_ref_found = candidate->mbmi.ref_frame[1] != ref_frame;
|
||||
} else {
|
||||
different_ref_found = 1;
|
||||
if (candidate->mbmi.ref_frame[1] == ref_frame) {
|
||||
ADD_MV_REF_LIST(candidate->mbmi.mv[1]);
|
||||
if (candidate->ref_frame[0] == ref_frame) {
|
||||
ADD_MV_REF_LIST(candidate->mv[0]);
|
||||
different_ref_found = candidate->ref_frame[1] != ref_frame;
|
||||
} else {
|
||||
if (candidate->ref_frame[1] == ref_frame)
|
||||
ADD_MV_REF_LIST(candidate->mv[1]);
|
||||
different_ref_found = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check the last frame's mode and mv info.
|
||||
if (lf_here != NULL) {
|
||||
if (lf_here->mbmi.ref_frame[0] == ref_frame) {
|
||||
ADD_MV_REF_LIST(lf_here->mbmi.mv[0]);
|
||||
} else if (lf_here->mbmi.ref_frame[1] == ref_frame) {
|
||||
ADD_MV_REF_LIST(lf_here->mbmi.mv[1]);
|
||||
}
|
||||
if (prev_mbmi) {
|
||||
if (prev_mbmi->ref_frame[0] == ref_frame)
|
||||
ADD_MV_REF_LIST(prev_mbmi->mv[0]);
|
||||
else if (prev_mbmi->ref_frame[1] == ref_frame)
|
||||
ADD_MV_REF_LIST(prev_mbmi->mv[1]);
|
||||
}
|
||||
|
||||
// Since we couldn't find 2 mvs from the same reference frame
|
||||
// go back through the neighbors and find motion vectors from
|
||||
// different reference frames.
|
||||
if (different_ref_found) {
|
||||
for (idx = 0; idx < MVREF_NEIGHBOURS; ++idx) {
|
||||
const int *mv_ref = mv_ref_search[idx];
|
||||
if (!is_inside(mi_col, mi_row, cm->cur_tile_mi_col_start, mv_ref))
|
||||
continue;
|
||||
for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
|
||||
const MV *mv_ref = &mv_ref_search[i];
|
||||
if (is_inside(cm, mi_col, mi_row, mv_ref)) {
|
||||
const MB_MODE_INFO *const candidate = &xd->mi_8x8[mv_ref->col +
|
||||
mv_ref->row
|
||||
* xd->mode_info_stride]->mbmi;
|
||||
|
||||
candidate = here + mv_ref[0] + mv_ref[1] * xd->mode_info_stride;
|
||||
|
||||
// If the candidate is INTRA we don't want to consider its mv.
|
||||
if (!is_inter_block(&candidate->mbmi))
|
||||
continue;
|
||||
|
||||
IF_DIFF_REF_FRAME_ADD_MV(candidate);
|
||||
// If the candidate is INTRA we don't want to consider its mv.
|
||||
if (is_inter_block(candidate))
|
||||
IF_DIFF_REF_FRAME_ADD_MV(candidate);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Since we still don't have a candidate we'll try the last frame.
|
||||
if (lf_here != NULL && is_inter_block(&lf_here->mbmi)) {
|
||||
IF_DIFF_REF_FRAME_ADD_MV(lf_here);
|
||||
}
|
||||
if (prev_mbmi && is_inter_block(prev_mbmi))
|
||||
IF_DIFF_REF_FRAME_ADD_MV(prev_mbmi);
|
||||
|
||||
Done:
|
||||
|
||||
mbmi->mb_mode_context[ref_frame] = counter_to_context[context_counter];
|
||||
mi->mbmi.mode_context[ref_frame] = counter_to_context[context_counter];
|
||||
|
||||
// Clamp vectors
|
||||
for (idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx)
|
||||
clamp_mv_ref(&mv_ref_list[idx].as_mv, xd);
|
||||
for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
|
||||
clamp_mv_ref(&mv_ref_list[i].as_mv, xd);
|
||||
}
|
||||
|
||||
#undef ADD_MV_REF_LIST
|
||||
#undef IF_DIFF_REF_FRAME_ADD_MV
|
||||
|
||||
@@ -14,27 +14,20 @@
|
||||
#ifndef VP9_COMMON_VP9_MVREF_COMMON_H_
|
||||
#define VP9_COMMON_VP9_MVREF_COMMON_H_
|
||||
|
||||
void vp9_find_mv_refs_idx(VP9_COMMON *cm,
|
||||
MACROBLOCKD *xd,
|
||||
MODE_INFO *here,
|
||||
const MODE_INFO *lf_here,
|
||||
const MV_REFERENCE_FRAME ref_frame,
|
||||
void vp9_find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
|
||||
MODE_INFO *mi, const MODE_INFO *prev_mi,
|
||||
MV_REFERENCE_FRAME ref_frame,
|
||||
int_mv *mv_ref_list,
|
||||
const int *ref_sign_bias,
|
||||
const int block_idx,
|
||||
const int mi_row,
|
||||
const int mi_col);
|
||||
int block_idx,
|
||||
int mi_row, int mi_col);
|
||||
|
||||
static INLINE void vp9_find_mv_refs(VP9_COMMON *cm,
|
||||
MACROBLOCKD *xd,
|
||||
MODE_INFO *here,
|
||||
MODE_INFO *lf_here,
|
||||
static INLINE void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
|
||||
MODE_INFO *mi, const MODE_INFO *prev_mi,
|
||||
MV_REFERENCE_FRAME ref_frame,
|
||||
int_mv *mv_ref_list,
|
||||
int *ref_sign_bias,
|
||||
int mi_row, int mi_col) {
|
||||
vp9_find_mv_refs_idx(cm, xd, here, lf_here, ref_frame,
|
||||
mv_ref_list, ref_sign_bias, -1, mi_row, mi_col);
|
||||
vp9_find_mv_refs_idx(cm, xd, mi, prev_mi, ref_frame,
|
||||
mv_ref_list, -1, mi_row, mi_col);
|
||||
}
|
||||
|
||||
#endif // VP9_COMMON_VP9_MVREF_COMMON_H_
|
||||
|
||||
@@ -46,7 +46,8 @@ extern "C"
|
||||
typedef enum {
|
||||
USAGE_STREAM_FROM_SERVER = 0x0,
|
||||
USAGE_LOCAL_FILE_PLAYBACK = 0x1,
|
||||
USAGE_CONSTRAINED_QUALITY = 0x2
|
||||
USAGE_CONSTRAINED_QUALITY = 0x2,
|
||||
USAGE_CONSTANT_QUALITY = 0x3,
|
||||
} END_USAGE;
|
||||
|
||||
|
||||
@@ -130,6 +131,8 @@ extern "C"
|
||||
// END DATARATE CONTROL OPTIONS
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
// Spatial scalability
|
||||
int ss_number_layers;
|
||||
|
||||
// these parameters aren't to be used in final build don't use!!!
|
||||
int play_alternate;
|
||||
@@ -210,6 +213,13 @@ extern "C"
|
||||
int vp9_set_internal_size(VP9_PTR comp,
|
||||
VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
|
||||
|
||||
int vp9_set_size_literal(VP9_PTR comp, unsigned int width,
|
||||
unsigned int height);
|
||||
|
||||
int vp9_switch_layer(VP9_PTR comp, int layer);
|
||||
|
||||
void vp9_set_svc(VP9_PTR comp, int use_svc);
|
||||
|
||||
int vp9_get_quantizer(VP9_PTR c);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
#include "vp9/common/vp9_entropymode.h"
|
||||
#include "vp9/common/vp9_quant_common.h"
|
||||
|
||||
#if CONFIG_POSTPROC
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
#include "vp9/common/vp9_postproc.h"
|
||||
#endif
|
||||
|
||||
@@ -38,14 +38,14 @@
|
||||
#define NUM_FRAME_CONTEXTS (1 << NUM_FRAME_CONTEXTS_LOG2)
|
||||
|
||||
typedef struct frame_contexts {
|
||||
vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][VP9_INTRA_MODES - 1];
|
||||
vp9_prob uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1];
|
||||
vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
|
||||
vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
|
||||
vp9_prob partition_prob[NUM_FRAME_TYPES][NUM_PARTITION_CONTEXTS]
|
||||
[PARTITION_TYPES - 1];
|
||||
vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES];
|
||||
vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
|
||||
[VP9_SWITCHABLE_FILTERS - 1];
|
||||
vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][VP9_INTER_MODES - 1];
|
||||
vp9_prob switchable_interp_prob[SWITCHABLE_FILTERS + 1]
|
||||
[SWITCHABLE_FILTERS - 1];
|
||||
vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
|
||||
vp9_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
|
||||
vp9_prob comp_inter_prob[COMP_INTER_CONTEXTS];
|
||||
vp9_prob single_ref_prob[REF_CONTEXTS][2];
|
||||
@@ -56,15 +56,15 @@ typedef struct frame_contexts {
|
||||
} FRAME_CONTEXT;
|
||||
|
||||
typedef struct {
|
||||
unsigned int y_mode[BLOCK_SIZE_GROUPS][VP9_INTRA_MODES];
|
||||
unsigned int uv_mode[VP9_INTRA_MODES][VP9_INTRA_MODES];
|
||||
unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
|
||||
unsigned int uv_mode[INTRA_MODES][INTRA_MODES];
|
||||
unsigned int partition[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
|
||||
vp9_coeff_count_model coef[TX_SIZES][BLOCK_TYPES];
|
||||
unsigned int eob_branch[TX_SIZES][BLOCK_TYPES][REF_TYPES]
|
||||
[COEF_BANDS][PREV_COEF_CONTEXTS];
|
||||
unsigned int switchable_interp[VP9_SWITCHABLE_FILTERS + 1]
|
||||
[VP9_SWITCHABLE_FILTERS];
|
||||
unsigned int inter_mode[INTER_MODE_CONTEXTS][VP9_INTER_MODES];
|
||||
unsigned int switchable_interp[SWITCHABLE_FILTERS + 1]
|
||||
[SWITCHABLE_FILTERS];
|
||||
unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES];
|
||||
unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
|
||||
unsigned int comp_inter[COMP_INTER_CONTEXTS][2];
|
||||
unsigned int single_ref[REF_CONTEXTS][2][2];
|
||||
@@ -164,6 +164,10 @@ typedef struct VP9Common {
|
||||
MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
|
||||
MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
|
||||
|
||||
MODE_INFO **mi_grid_base;
|
||||
MODE_INFO **mi_grid_visible;
|
||||
MODE_INFO **prev_mi_grid_base;
|
||||
MODE_INFO **prev_mi_grid_visible;
|
||||
|
||||
// Persistent mb segment id map used in prediction.
|
||||
unsigned char *last_frame_seg_map;
|
||||
@@ -176,6 +180,9 @@ typedef struct VP9Common {
|
||||
|
||||
int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */
|
||||
|
||||
struct loopfilter lf;
|
||||
struct segmentation seg;
|
||||
|
||||
/* Y,U,V */
|
||||
ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
|
||||
ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16];
|
||||
@@ -198,7 +205,7 @@ typedef struct VP9Common {
|
||||
unsigned int current_video_frame;
|
||||
int version;
|
||||
|
||||
#if CONFIG_POSTPROC
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
struct postproc_state postproc_state;
|
||||
#endif
|
||||
|
||||
@@ -231,7 +238,19 @@ static void ref_cnt_fb(int *buf, int *idx, int new_idx) {
|
||||
}
|
||||
|
||||
static int mi_cols_aligned_to_sb(int n_mis) {
|
||||
return ALIGN_POWER_OF_TWO(n_mis, LOG2_MI_BLOCK_SIZE);
|
||||
return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2);
|
||||
}
|
||||
|
||||
static INLINE void set_skip_context(VP9_COMMON *cm, MACROBLOCKD *xd,
|
||||
int mi_row, int mi_col) {
|
||||
const int above_idx = mi_col * 2;
|
||||
const int left_idx = (mi_row * 2) & 15;
|
||||
int i;
|
||||
for (i = 0; i < MAX_MB_PLANE; i++) {
|
||||
struct macroblockd_plane *const pd = &xd->plane[i];
|
||||
pd->above_context = cm->above_context[i] + (above_idx >> pd->subsampling_x);
|
||||
pd->left_context = cm->left_context[i] + (left_idx >> pd->subsampling_y);
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void set_partition_seg_context(VP9_COMMON *cm, MACROBLOCKD *xd,
|
||||
@@ -240,25 +259,20 @@ static INLINE void set_partition_seg_context(VP9_COMMON *cm, MACROBLOCKD *xd,
|
||||
xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
|
||||
}
|
||||
|
||||
static int check_bsize_coverage(VP9_COMMON *cm, int mi_row, int mi_col,
|
||||
BLOCK_SIZE_TYPE bsize) {
|
||||
int bsl = mi_width_log2(bsize), bs = 1 << bsl;
|
||||
int ms = bs / 2;
|
||||
// return the node index in the prob tree for binary coding
|
||||
static int check_bsize_coverage(int bs, int mi_rows, int mi_cols,
|
||||
int mi_row, int mi_col) {
|
||||
const int r = (mi_row + bs < mi_rows);
|
||||
const int c = (mi_col + bs < mi_cols);
|
||||
|
||||
if ((mi_row + ms < cm->mi_rows) && (mi_col + ms < cm->mi_cols))
|
||||
if (r && c)
|
||||
return 0;
|
||||
|
||||
// frame width/height are multiples of 8, hence 8x8 block should always
|
||||
// pass the above check
|
||||
assert(bsize > BLOCK_8X8);
|
||||
if (c && !r)
|
||||
return 1; // only allow horizontal/split partition types
|
||||
|
||||
// return the node index in the prob tree for binary coding
|
||||
// only allow horizontal/split partition types
|
||||
if ((mi_col + ms < cm->mi_cols) && (mi_row + ms >= cm->mi_rows))
|
||||
return 1;
|
||||
// only allow vertical/split partition types
|
||||
if ((mi_row + ms < cm->mi_rows) && (mi_col + ms >= cm->mi_cols))
|
||||
return 2;
|
||||
if (r && !c)
|
||||
return 2; // only allow vertical/split partition types
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -53,7 +53,7 @@ static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] = {
|
||||
{ RGB_TO_YUV(0xCC33FF) }, /* Magenta */
|
||||
};
|
||||
|
||||
static const unsigned char B_PREDICTION_MODE_colors[VP9_INTRA_MODES][3] = {
|
||||
static const unsigned char B_PREDICTION_MODE_colors[INTRA_MODES][3] = {
|
||||
{ RGB_TO_YUV(0x6633ff) }, /* Purple */
|
||||
{ RGB_TO_YUV(0xcc33ff) }, /* Magenta */
|
||||
{ RGB_TO_YUV(0xff33cc) }, /* Pink */
|
||||
@@ -630,23 +630,21 @@ static void constrain_line(int x0, int *x1, int y0, int *y1,
|
||||
}
|
||||
}
|
||||
|
||||
int vp9_post_proc_frame(struct VP9Common *oci,
|
||||
struct loopfilter *lf,
|
||||
YV12_BUFFER_CONFIG *dest,
|
||||
vp9_ppflags_t *ppflags) {
|
||||
int q = lf->filter_level * 10 / 6;
|
||||
int vp9_post_proc_frame(struct VP9Common *cm,
|
||||
YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *ppflags) {
|
||||
int q = cm->lf.filter_level * 10 / 6;
|
||||
int flags = ppflags->post_proc_flag;
|
||||
int deblock_level = ppflags->deblocking_level;
|
||||
int noise_level = ppflags->noise_level;
|
||||
|
||||
if (!oci->frame_to_show)
|
||||
if (!cm->frame_to_show)
|
||||
return -1;
|
||||
|
||||
if (q > 63)
|
||||
q = 63;
|
||||
|
||||
if (!flags) {
|
||||
*dest = *oci->frame_to_show;
|
||||
*dest = *cm->frame_to_show;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -655,52 +653,52 @@ int vp9_post_proc_frame(struct VP9Common *oci,
|
||||
#endif
|
||||
|
||||
if (flags & VP9D_DEMACROBLOCK) {
|
||||
deblock_and_de_macro_block(oci->frame_to_show, &oci->post_proc_buffer,
|
||||
deblock_and_de_macro_block(cm->frame_to_show, &cm->post_proc_buffer,
|
||||
q + (deblock_level - 5) * 10, 1, 0);
|
||||
} else if (flags & VP9D_DEBLOCK) {
|
||||
vp9_deblock(oci->frame_to_show, &oci->post_proc_buffer, q);
|
||||
vp9_deblock(cm->frame_to_show, &cm->post_proc_buffer, q);
|
||||
} else {
|
||||
vp8_yv12_copy_frame(oci->frame_to_show, &oci->post_proc_buffer);
|
||||
vp8_yv12_copy_frame(cm->frame_to_show, &cm->post_proc_buffer);
|
||||
}
|
||||
|
||||
if (flags & VP9D_ADDNOISE) {
|
||||
if (oci->postproc_state.last_q != q
|
||||
|| oci->postproc_state.last_noise != noise_level) {
|
||||
fillrd(&oci->postproc_state, 63 - q, noise_level);
|
||||
if (cm->postproc_state.last_q != q
|
||||
|| cm->postproc_state.last_noise != noise_level) {
|
||||
fillrd(&cm->postproc_state, 63 - q, noise_level);
|
||||
}
|
||||
|
||||
vp9_plane_add_noise(oci->post_proc_buffer.y_buffer,
|
||||
oci->postproc_state.noise,
|
||||
oci->postproc_state.blackclamp,
|
||||
oci->postproc_state.whiteclamp,
|
||||
oci->postproc_state.bothclamp,
|
||||
oci->post_proc_buffer.y_width,
|
||||
oci->post_proc_buffer.y_height,
|
||||
oci->post_proc_buffer.y_stride);
|
||||
vp9_plane_add_noise(cm->post_proc_buffer.y_buffer,
|
||||
cm->postproc_state.noise,
|
||||
cm->postproc_state.blackclamp,
|
||||
cm->postproc_state.whiteclamp,
|
||||
cm->postproc_state.bothclamp,
|
||||
cm->post_proc_buffer.y_width,
|
||||
cm->post_proc_buffer.y_height,
|
||||
cm->post_proc_buffer.y_stride);
|
||||
}
|
||||
|
||||
#if 0 && CONFIG_POSTPROC_VISUALIZER
|
||||
if (flags & VP9D_DEBUG_TXT_FRAME_INFO) {
|
||||
char message[512];
|
||||
sprintf(message, "F%1dG%1dQ%3dF%3dP%d_s%dx%d",
|
||||
(oci->frame_type == KEY_FRAME),
|
||||
oci->refresh_golden_frame,
|
||||
oci->base_qindex,
|
||||
oci->filter_level,
|
||||
(cm->frame_type == KEY_FRAME),
|
||||
cm->refresh_golden_frame,
|
||||
cm->base_qindex,
|
||||
cm->filter_level,
|
||||
flags,
|
||||
oci->mb_cols, oci->mb_rows);
|
||||
vp9_blit_text(message, oci->post_proc_buffer.y_buffer,
|
||||
oci->post_proc_buffer.y_stride);
|
||||
cm->mb_cols, cm->mb_rows);
|
||||
vp9_blit_text(message, cm->post_proc_buffer.y_buffer,
|
||||
cm->post_proc_buffer.y_stride);
|
||||
}
|
||||
|
||||
if (flags & VP9D_DEBUG_TXT_MBLK_MODES) {
|
||||
int i, j;
|
||||
uint8_t *y_ptr;
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG *post = &cm->post_proc_buffer;
|
||||
int mb_rows = post->y_height >> 4;
|
||||
int mb_cols = post->y_width >> 4;
|
||||
int mb_index = 0;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
MODE_INFO *mi = cm->mi;
|
||||
|
||||
y_ptr = post->y_buffer + 4 * post->y_stride + 4;
|
||||
|
||||
@@ -725,11 +723,11 @@ int vp9_post_proc_frame(struct VP9Common *oci,
|
||||
if (flags & VP9D_DEBUG_TXT_DC_DIFF) {
|
||||
int i, j;
|
||||
uint8_t *y_ptr;
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG *post = &cm->post_proc_buffer;
|
||||
int mb_rows = post->y_height >> 4;
|
||||
int mb_cols = post->y_width >> 4;
|
||||
int mb_index = 0;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
MODE_INFO *mi = cm->mi;
|
||||
|
||||
y_ptr = post->y_buffer + 4 * post->y_stride + 4;
|
||||
|
||||
@@ -739,9 +737,9 @@ int vp9_post_proc_frame(struct VP9Common *oci,
|
||||
char zz[4];
|
||||
int dc_diff = !(mi[mb_index].mbmi.mode != I4X4_PRED &&
|
||||
mi[mb_index].mbmi.mode != SPLITMV &&
|
||||
mi[mb_index].mbmi.mb_skip_coeff);
|
||||
mi[mb_index].mbmi.skip_coeff);
|
||||
|
||||
if (oci->frame_type == KEY_FRAME)
|
||||
if (cm->frame_type == KEY_FRAME)
|
||||
sprintf(zz, "a");
|
||||
else
|
||||
sprintf(zz, "%c", dc_diff + '0');
|
||||
@@ -761,19 +759,19 @@ int vp9_post_proc_frame(struct VP9Common *oci,
|
||||
char message[512];
|
||||
snprintf(message, sizeof(message),
|
||||
"Bitrate: %10.2f framerate: %10.2f ",
|
||||
oci->bitrate, oci->framerate);
|
||||
vp9_blit_text(message, oci->post_proc_buffer.y_buffer,
|
||||
oci->post_proc_buffer.y_stride);
|
||||
cm->bitrate, cm->framerate);
|
||||
vp9_blit_text(message, cm->post_proc_buffer.y_buffer,
|
||||
cm->post_proc_buffer.y_stride);
|
||||
}
|
||||
|
||||
/* Draw motion vectors */
|
||||
if ((flags & VP9D_DEBUG_DRAW_MV) && ppflags->display_mv_flag) {
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG *post = &cm->post_proc_buffer;
|
||||
int width = post->y_width;
|
||||
int height = post->y_height;
|
||||
uint8_t *y_buffer = oci->post_proc_buffer.y_buffer;
|
||||
int y_stride = oci->post_proc_buffer.y_stride;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
uint8_t *y_buffer = cm->post_proc_buffer.y_buffer;
|
||||
int y_stride = cm->post_proc_buffer.y_stride;
|
||||
MODE_INFO *mi = cm->mi;
|
||||
int x0, y0;
|
||||
|
||||
for (y0 = 0; y0 < height; y0 += 16) {
|
||||
@@ -882,7 +880,7 @@ int vp9_post_proc_frame(struct VP9Common *oci,
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (mi->mbmi.mode >= NEARESTMV) {
|
||||
} else if (is_inter_mode(mi->mbmi.mode)) {
|
||||
MV *mv = &mi->mbmi.mv.as_mv;
|
||||
const int lx0 = x0 + 8;
|
||||
const int ly0 = y0 + 8;
|
||||
@@ -910,14 +908,14 @@ int vp9_post_proc_frame(struct VP9Common *oci,
|
||||
if ((flags & VP9D_DEBUG_CLR_BLK_MODES)
|
||||
&& (ppflags->display_mb_modes_flag || ppflags->display_b_modes_flag)) {
|
||||
int y, x;
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG *post = &cm->post_proc_buffer;
|
||||
int width = post->y_width;
|
||||
int height = post->y_height;
|
||||
uint8_t *y_ptr = oci->post_proc_buffer.y_buffer;
|
||||
uint8_t *u_ptr = oci->post_proc_buffer.u_buffer;
|
||||
uint8_t *v_ptr = oci->post_proc_buffer.v_buffer;
|
||||
int y_stride = oci->post_proc_buffer.y_stride;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
uint8_t *y_ptr = cm->post_proc_buffer.y_buffer;
|
||||
uint8_t *u_ptr = cm->post_proc_buffer.u_buffer;
|
||||
uint8_t *v_ptr = cm->post_proc_buffer.v_buffer;
|
||||
int y_stride = cm->post_proc_buffer.y_stride;
|
||||
MODE_INFO *mi = cm->mi;
|
||||
|
||||
for (y = 0; y < height; y += 16) {
|
||||
for (x = 0; x < width; x += 16) {
|
||||
@@ -975,14 +973,14 @@ int vp9_post_proc_frame(struct VP9Common *oci,
|
||||
if ((flags & VP9D_DEBUG_CLR_FRM_REF_BLKS) &&
|
||||
ppflags->display_ref_frame_flag) {
|
||||
int y, x;
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG *post = &cm->post_proc_buffer;
|
||||
int width = post->y_width;
|
||||
int height = post->y_height;
|
||||
uint8_t *y_ptr = oci->post_proc_buffer.y_buffer;
|
||||
uint8_t *u_ptr = oci->post_proc_buffer.u_buffer;
|
||||
uint8_t *v_ptr = oci->post_proc_buffer.v_buffer;
|
||||
int y_stride = oci->post_proc_buffer.y_stride;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
uint8_t *y_ptr = cm->post_proc_buffer.y_buffer;
|
||||
uint8_t *u_ptr = cm->post_proc_buffer.u_buffer;
|
||||
uint8_t *v_ptr = cm->post_proc_buffer.v_buffer;
|
||||
int y_stride = cm->post_proc_buffer.y_stride;
|
||||
MODE_INFO *mi = cm->mi;
|
||||
|
||||
for (y = 0; y < height; y += 16) {
|
||||
for (x = 0; x < width; x += 16) {
|
||||
@@ -1008,12 +1006,13 @@ int vp9_post_proc_frame(struct VP9Common *oci,
|
||||
}
|
||||
#endif
|
||||
|
||||
*dest = oci->post_proc_buffer;
|
||||
*dest = cm->post_proc_buffer;
|
||||
|
||||
/* handle problem with extending borders */
|
||||
dest->y_width = oci->width;
|
||||
dest->y_height = oci->height;
|
||||
dest->uv_height = dest->y_height / 2;
|
||||
dest->y_width = cm->width;
|
||||
dest->y_height = cm->height;
|
||||
dest->uv_width = dest->y_width >> cm->subsampling_x;
|
||||
dest->uv_height = dest->y_height >> cm->subsampling_y;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ struct postproc_state {
|
||||
#include "vp9/common/vp9_onyxc_int.h"
|
||||
#include "vp9/common/vp9_ppflags.h"
|
||||
|
||||
int vp9_post_proc_frame(struct VP9Common *oci, struct loopfilter *lf,
|
||||
int vp9_post_proc_frame(struct VP9Common *cm,
|
||||
YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *flags);
|
||||
|
||||
void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q);
|
||||
|
||||
@@ -18,48 +18,49 @@
|
||||
|
||||
// Returns a context number for the given MB prediction signal
|
||||
unsigned char vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
|
||||
const MODE_INFO *const mi = xd->mode_info_context;
|
||||
const MB_MODE_INFO *const above_mbmi = &mi[-xd->mode_info_stride].mbmi;
|
||||
const MB_MODE_INFO *const left_mbmi = &mi[-1].mbmi;
|
||||
const int left_in_image = xd->left_available && left_mbmi->mb_in_image;
|
||||
const int above_in_image = xd->up_available && above_mbmi->mb_in_image;
|
||||
const MODE_INFO * const above_mi = xd->mi_8x8[-xd->mode_info_stride];
|
||||
const MODE_INFO * const left_mi = xd->mi_8x8[-1];
|
||||
const int left_in_image = xd->left_available && left_mi;
|
||||
const int above_in_image = xd->up_available && above_mi;
|
||||
// Note:
|
||||
// The mode info data structure has a one element border above and to the
|
||||
// left of the entries correpsonding to real macroblocks.
|
||||
// The prediction flags in these dummy entries are initialised to 0.
|
||||
// left
|
||||
const int left_mv_pred = is_inter_mode(left_mbmi->mode);
|
||||
const int left_mv_pred = left_in_image ? is_inter_mode(left_mi->mbmi.mode)
|
||||
: 0;
|
||||
const int left_interp = left_in_image && left_mv_pred
|
||||
? left_mbmi->interp_filter
|
||||
: VP9_SWITCHABLE_FILTERS;
|
||||
? left_mi->mbmi.interp_filter
|
||||
: SWITCHABLE_FILTERS;
|
||||
|
||||
// above
|
||||
const int above_mv_pred = is_inter_mode(above_mbmi->mode);
|
||||
const int above_mv_pred = above_in_image ? is_inter_mode(above_mi->mbmi.mode)
|
||||
: 0;
|
||||
const int above_interp = above_in_image && above_mv_pred
|
||||
? above_mbmi->interp_filter
|
||||
: VP9_SWITCHABLE_FILTERS;
|
||||
|
||||
? above_mi->mbmi.interp_filter
|
||||
: SWITCHABLE_FILTERS;
|
||||
|
||||
if (left_interp == above_interp)
|
||||
return left_interp;
|
||||
else if (left_interp == VP9_SWITCHABLE_FILTERS &&
|
||||
above_interp != VP9_SWITCHABLE_FILTERS)
|
||||
else if (left_interp == SWITCHABLE_FILTERS &&
|
||||
above_interp != SWITCHABLE_FILTERS)
|
||||
return above_interp;
|
||||
else if (left_interp != VP9_SWITCHABLE_FILTERS &&
|
||||
above_interp == VP9_SWITCHABLE_FILTERS)
|
||||
else if (left_interp != SWITCHABLE_FILTERS &&
|
||||
above_interp == SWITCHABLE_FILTERS)
|
||||
return left_interp;
|
||||
else
|
||||
return VP9_SWITCHABLE_FILTERS;
|
||||
return SWITCHABLE_FILTERS;
|
||||
}
|
||||
// Returns a context number for the given MB prediction signal
|
||||
unsigned char vp9_get_pred_context_intra_inter(const MACROBLOCKD *xd) {
|
||||
const MODE_INFO *const mi = xd->mode_info_context;
|
||||
const MB_MODE_INFO *const above_mbmi = &mi[-xd->mode_info_stride].mbmi;
|
||||
const MB_MODE_INFO *const left_mbmi = &mi[-1].mbmi;
|
||||
const int left_in_image = xd->left_available && left_mbmi->mb_in_image;
|
||||
const int above_in_image = xd->up_available && above_mbmi->mb_in_image;
|
||||
const int left_intra = !is_inter_block(left_mbmi);
|
||||
const int above_intra = !is_inter_block(above_mbmi);
|
||||
const MODE_INFO * const above_mi = xd->mi_8x8[-xd->mode_info_stride];
|
||||
const MODE_INFO * const left_mi = xd->mi_8x8[-1];
|
||||
const MB_MODE_INFO *const above_mbmi = above_mi ? &above_mi->mbmi : 0;
|
||||
const MB_MODE_INFO *const left_mbmi = left_mi ? &left_mi->mbmi : 0;
|
||||
const int left_in_image = xd->left_available && left_mi;
|
||||
const int above_in_image = xd->up_available && above_mi;
|
||||
const int left_intra = left_in_image ? !is_inter_block(left_mbmi) : 1;
|
||||
const int above_intra = above_in_image ? !is_inter_block(above_mbmi) : 1;
|
||||
|
||||
// The mode info data structure has a one element border above and to the
|
||||
// left of the entries corresponding to real macroblocks.
|
||||
@@ -80,35 +81,35 @@ unsigned char vp9_get_pred_context_intra_inter(const MACROBLOCKD *xd) {
|
||||
unsigned char vp9_get_pred_context_comp_inter_inter(const VP9_COMMON *cm,
|
||||
const MACROBLOCKD *xd) {
|
||||
int pred_context;
|
||||
const MODE_INFO *const mi = xd->mode_info_context;
|
||||
const MB_MODE_INFO *const above_mbmi = &mi[-cm->mode_info_stride].mbmi;
|
||||
const MB_MODE_INFO *const left_mbmi = &mi[-1].mbmi;
|
||||
const int left_in_image = xd->left_available && left_mbmi->mb_in_image;
|
||||
const int above_in_image = xd->up_available && above_mbmi->mb_in_image;
|
||||
const MODE_INFO * const above_mi = xd->mi_8x8[-xd->mode_info_stride];
|
||||
const MODE_INFO * const left_mi = xd->mi_8x8[-1];
|
||||
const MB_MODE_INFO *const above_mbmi = above_mi ? &above_mi->mbmi : 0;
|
||||
const MB_MODE_INFO *const left_mbmi = left_mi ? &left_mi->mbmi : 0;
|
||||
const int left_in_image = xd->left_available && left_mi;
|
||||
const int above_in_image = xd->up_available && above_mi;
|
||||
// Note:
|
||||
// The mode info data structure has a one element border above and to the
|
||||
// left of the entries correpsonding to real macroblocks.
|
||||
// The prediction flags in these dummy entries are initialised to 0.
|
||||
if (above_in_image && left_in_image) { // both edges available
|
||||
if (above_mbmi->ref_frame[1] <= INTRA_FRAME &&
|
||||
left_mbmi->ref_frame[1] <= INTRA_FRAME)
|
||||
if (!has_second_ref(above_mbmi) && !has_second_ref(left_mbmi))
|
||||
// neither edge uses comp pred (0/1)
|
||||
pred_context = (above_mbmi->ref_frame[0] == cm->comp_fixed_ref) ^
|
||||
(left_mbmi->ref_frame[0] == cm->comp_fixed_ref);
|
||||
else if (above_mbmi->ref_frame[1] <= INTRA_FRAME)
|
||||
else if (!has_second_ref(above_mbmi))
|
||||
// one of two edges uses comp pred (2/3)
|
||||
pred_context = 2 + (above_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
|
||||
above_mbmi->ref_frame[0] == INTRA_FRAME);
|
||||
else if (left_mbmi->ref_frame[1] <= INTRA_FRAME)
|
||||
!is_inter_block(above_mbmi));
|
||||
else if (!has_second_ref(left_mbmi))
|
||||
// one of two edges uses comp pred (2/3)
|
||||
pred_context = 2 + (left_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
|
||||
left_mbmi->ref_frame[0] == INTRA_FRAME);
|
||||
!is_inter_block(left_mbmi));
|
||||
else // both edges use comp pred (4)
|
||||
pred_context = 4;
|
||||
} else if (above_in_image || left_in_image) { // one edge available
|
||||
const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
|
||||
|
||||
if (edge_mbmi->ref_frame[1] <= INTRA_FRAME)
|
||||
if (!has_second_ref(edge_mbmi))
|
||||
// edge does not use comp pred (0/1)
|
||||
pred_context = edge_mbmi->ref_frame[0] == cm->comp_fixed_ref;
|
||||
else
|
||||
@@ -125,11 +126,14 @@ unsigned char vp9_get_pred_context_comp_inter_inter(const VP9_COMMON *cm,
|
||||
unsigned char vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
|
||||
const MACROBLOCKD *xd) {
|
||||
int pred_context;
|
||||
const MODE_INFO *const mi = xd->mode_info_context;
|
||||
const MB_MODE_INFO *const above_mbmi = &mi[-cm->mode_info_stride].mbmi;
|
||||
const MB_MODE_INFO *const left_mbmi = &mi[-1].mbmi;
|
||||
const int left_in_image = xd->left_available && left_mbmi->mb_in_image;
|
||||
const int above_in_image = xd->up_available && above_mbmi->mb_in_image;
|
||||
const MODE_INFO * const above_mi = xd->mi_8x8[-cm->mode_info_stride];
|
||||
const MODE_INFO * const left_mi = xd->mi_8x8[-1];
|
||||
const MB_MODE_INFO *const above_mbmi = above_mi ? &above_mi->mbmi : 0;
|
||||
const MB_MODE_INFO *const left_mbmi = left_mi ? &left_mi->mbmi : 0;
|
||||
const int left_in_image = xd->left_available && left_mi;
|
||||
const int above_in_image = xd->up_available && above_mi;
|
||||
const int left_intra = left_in_image ? !is_inter_block(left_mbmi) : 1;
|
||||
const int above_intra = above_in_image ? !is_inter_block(above_mbmi) : 1;
|
||||
// Note:
|
||||
// The mode info data structure has a one element border above and to the
|
||||
// left of the entries correpsonding to real macroblocks.
|
||||
@@ -138,22 +142,19 @@ unsigned char vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
|
||||
const int var_ref_idx = !fix_ref_idx;
|
||||
|
||||
if (above_in_image && left_in_image) { // both edges available
|
||||
if (above_mbmi->ref_frame[0] == INTRA_FRAME &&
|
||||
left_mbmi->ref_frame[0] == INTRA_FRAME) { // intra/intra (2)
|
||||
if (above_intra && left_intra) { // intra/intra (2)
|
||||
pred_context = 2;
|
||||
} else if (above_mbmi->ref_frame[0] == INTRA_FRAME ||
|
||||
left_mbmi->ref_frame[0] == INTRA_FRAME) { // intra/inter
|
||||
const MB_MODE_INFO *edge_mbmi = above_mbmi->ref_frame[0] == INTRA_FRAME ?
|
||||
left_mbmi : above_mbmi;
|
||||
} else if (above_intra || left_intra) { // intra/inter
|
||||
const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
|
||||
|
||||
if (edge_mbmi->ref_frame[1] <= INTRA_FRAME) // single pred (1/3)
|
||||
if (!has_second_ref(edge_mbmi)) // single pred (1/3)
|
||||
pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]);
|
||||
else // comp pred (1/3)
|
||||
pred_context = 1 + 2 * (edge_mbmi->ref_frame[var_ref_idx]
|
||||
!= cm->comp_var_ref[1]);
|
||||
} else { // inter/inter
|
||||
int l_sg = left_mbmi->ref_frame[1] <= INTRA_FRAME;
|
||||
int a_sg = above_mbmi->ref_frame[1] <= INTRA_FRAME;
|
||||
const int l_sg = !has_second_ref(left_mbmi);
|
||||
const int a_sg = !has_second_ref(above_mbmi);
|
||||
MV_REFERENCE_FRAME vrfa = a_sg ? above_mbmi->ref_frame[0]
|
||||
: above_mbmi->ref_frame[var_ref_idx];
|
||||
MV_REFERENCE_FRAME vrfl = l_sg ? left_mbmi->ref_frame[0]
|
||||
@@ -187,13 +188,15 @@ unsigned char vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
|
||||
} else if (above_in_image || left_in_image) { // one edge available
|
||||
const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
|
||||
|
||||
if (edge_mbmi->ref_frame[0] == INTRA_FRAME)
|
||||
if (!is_inter_block(edge_mbmi)) {
|
||||
pred_context = 2;
|
||||
else if (edge_mbmi->ref_frame[1] > INTRA_FRAME)
|
||||
pred_context = 4 * (edge_mbmi->ref_frame[var_ref_idx]
|
||||
} else {
|
||||
if (has_second_ref(edge_mbmi))
|
||||
pred_context = 4 * (edge_mbmi->ref_frame[var_ref_idx]
|
||||
!= cm->comp_var_ref[1]);
|
||||
else
|
||||
pred_context = 3 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]);
|
||||
else
|
||||
pred_context = 3 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]);
|
||||
}
|
||||
} else { // no edges available (2)
|
||||
pred_context = 2;
|
||||
}
|
||||
@@ -203,91 +206,91 @@ unsigned char vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
|
||||
}
|
||||
unsigned char vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
|
||||
int pred_context;
|
||||
const MODE_INFO *const mi = xd->mode_info_context;
|
||||
const MB_MODE_INFO *const above_mbmi = &mi[-xd->mode_info_stride].mbmi;
|
||||
const MB_MODE_INFO *const left_mbmi = &mi[-1].mbmi;
|
||||
const int left_in_image = xd->left_available && left_mbmi->mb_in_image;
|
||||
const int above_in_image = xd->up_available && above_mbmi->mb_in_image;
|
||||
const MODE_INFO * const above_mi = xd->mi_8x8[-xd->mode_info_stride];
|
||||
const MODE_INFO * const left_mi = xd->mi_8x8[-1];
|
||||
const MB_MODE_INFO *const above_mbmi = above_mi ? &above_mi->mbmi : 0;
|
||||
const MB_MODE_INFO *const left_mbmi = left_mi ? &left_mi->mbmi : 0;
|
||||
const int left_in_image = xd->left_available && left_mi;
|
||||
const int above_in_image = xd->up_available && above_mi;
|
||||
const int left_intra = left_in_image ? !is_inter_block(left_mbmi) : 1;
|
||||
const int above_intra = above_in_image ? !is_inter_block(above_mbmi) : 1;
|
||||
// Note:
|
||||
// The mode info data structure has a one element border above and to the
|
||||
// left of the entries correpsonding to real macroblocks.
|
||||
// The prediction flags in these dummy entries are initialised to 0.
|
||||
if (above_in_image && left_in_image) { // both edges available
|
||||
if (above_mbmi->ref_frame[0] == INTRA_FRAME &&
|
||||
left_mbmi->ref_frame[0] == INTRA_FRAME) {
|
||||
if (above_intra && left_intra) { // intra/intra
|
||||
pred_context = 2;
|
||||
} else if (above_mbmi->ref_frame[0] == INTRA_FRAME ||
|
||||
left_mbmi->ref_frame[0] == INTRA_FRAME) {
|
||||
const MB_MODE_INFO *edge_mbmi = above_mbmi->ref_frame[0] == INTRA_FRAME ?
|
||||
left_mbmi : above_mbmi;
|
||||
|
||||
if (edge_mbmi->ref_frame[1] <= INTRA_FRAME)
|
||||
} else if (above_intra || left_intra) { // intra/inter or inter/intra
|
||||
const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
|
||||
if (!has_second_ref(edge_mbmi))
|
||||
pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
|
||||
else
|
||||
pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME ||
|
||||
edge_mbmi->ref_frame[1] == LAST_FRAME);
|
||||
} else if (above_mbmi->ref_frame[1] <= INTRA_FRAME &&
|
||||
left_mbmi->ref_frame[1] <= INTRA_FRAME) {
|
||||
pred_context = 2 * (above_mbmi->ref_frame[0] == LAST_FRAME) +
|
||||
2 * (left_mbmi->ref_frame[0] == LAST_FRAME);
|
||||
} else if (above_mbmi->ref_frame[1] > INTRA_FRAME &&
|
||||
left_mbmi->ref_frame[1] > INTRA_FRAME) {
|
||||
pred_context = 1 + (above_mbmi->ref_frame[0] == LAST_FRAME ||
|
||||
above_mbmi->ref_frame[1] == LAST_FRAME ||
|
||||
left_mbmi->ref_frame[0] == LAST_FRAME ||
|
||||
left_mbmi->ref_frame[1] == LAST_FRAME);
|
||||
} else {
|
||||
MV_REFERENCE_FRAME rfs = above_mbmi->ref_frame[1] <= INTRA_FRAME ?
|
||||
above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
|
||||
MV_REFERENCE_FRAME crf1 = above_mbmi->ref_frame[1] > INTRA_FRAME ?
|
||||
above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
|
||||
MV_REFERENCE_FRAME crf2 = above_mbmi->ref_frame[1] > INTRA_FRAME ?
|
||||
above_mbmi->ref_frame[1] : left_mbmi->ref_frame[1];
|
||||
} else { // inter/inter
|
||||
if (!has_second_ref(above_mbmi) && !has_second_ref(left_mbmi)) {
|
||||
pred_context = 2 * (above_mbmi->ref_frame[0] == LAST_FRAME) +
|
||||
2 * (left_mbmi->ref_frame[0] == LAST_FRAME);
|
||||
} else if (has_second_ref(above_mbmi) && has_second_ref(left_mbmi)) {
|
||||
pred_context = 1 + (above_mbmi->ref_frame[0] == LAST_FRAME ||
|
||||
above_mbmi->ref_frame[1] == LAST_FRAME ||
|
||||
left_mbmi->ref_frame[0] == LAST_FRAME ||
|
||||
left_mbmi->ref_frame[1] == LAST_FRAME);
|
||||
} else {
|
||||
const MV_REFERENCE_FRAME rfs = !has_second_ref(above_mbmi) ?
|
||||
above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
|
||||
const MV_REFERENCE_FRAME crf1 = has_second_ref(above_mbmi) ?
|
||||
above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
|
||||
const MV_REFERENCE_FRAME crf2 = has_second_ref(above_mbmi) ?
|
||||
above_mbmi->ref_frame[1] : left_mbmi->ref_frame[1];
|
||||
|
||||
if (rfs == LAST_FRAME)
|
||||
pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
|
||||
else
|
||||
pred_context = crf1 == LAST_FRAME || crf2 == LAST_FRAME;
|
||||
if (rfs == LAST_FRAME)
|
||||
pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
|
||||
else
|
||||
pred_context = crf1 == LAST_FRAME || crf2 == LAST_FRAME;
|
||||
}
|
||||
}
|
||||
} else if (above_in_image || left_in_image) { // one edge available
|
||||
const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
|
||||
|
||||
if (edge_mbmi->ref_frame[0] == INTRA_FRAME)
|
||||
if (!is_inter_block(edge_mbmi)) { // intra
|
||||
pred_context = 2;
|
||||
else if (edge_mbmi->ref_frame[1] <= INTRA_FRAME)
|
||||
pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
|
||||
else
|
||||
pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME ||
|
||||
edge_mbmi->ref_frame[1] == LAST_FRAME);
|
||||
} else { // no edges available (2)
|
||||
} else { // inter
|
||||
if (!has_second_ref(edge_mbmi))
|
||||
pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
|
||||
else
|
||||
pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME ||
|
||||
edge_mbmi->ref_frame[1] == LAST_FRAME);
|
||||
}
|
||||
} else { // no edges available
|
||||
pred_context = 2;
|
||||
}
|
||||
|
||||
assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
|
||||
return pred_context;
|
||||
}
|
||||
|
||||
unsigned char vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
|
||||
int pred_context;
|
||||
const MODE_INFO *const mi = xd->mode_info_context;
|
||||
const MB_MODE_INFO *const above_mbmi = &mi[-xd->mode_info_stride].mbmi;
|
||||
const MB_MODE_INFO *const left_mbmi = &mi[-1].mbmi;
|
||||
const int left_in_image = xd->left_available && left_mbmi->mb_in_image;
|
||||
const int above_in_image = xd->up_available && above_mbmi->mb_in_image;
|
||||
const MODE_INFO * const above_mi = xd->mi_8x8[-xd->mode_info_stride];
|
||||
const MODE_INFO * const left_mi = xd->mi_8x8[-1];
|
||||
const MB_MODE_INFO *const above_mbmi = above_mi ? &above_mi->mbmi : 0;
|
||||
const MB_MODE_INFO *const left_mbmi = left_mi ? &left_mi->mbmi : 0;
|
||||
const int left_in_image = xd->left_available && left_mi;
|
||||
const int above_in_image = xd->up_available && above_mi;
|
||||
const int left_intra = left_in_image ? !is_inter_block(left_mbmi) : 1;
|
||||
const int above_intra = above_in_image ? !is_inter_block(above_mbmi) : 1;
|
||||
|
||||
// Note:
|
||||
// The mode info data structure has a one element border above and to the
|
||||
// left of the entries correpsonding to real macroblocks.
|
||||
// The prediction flags in these dummy entries are initialised to 0.
|
||||
if (above_in_image && left_in_image) { // both edges available
|
||||
if (above_mbmi->ref_frame[0] == INTRA_FRAME &&
|
||||
left_mbmi->ref_frame[0] == INTRA_FRAME) {
|
||||
if (above_intra && left_intra) { // intra/intra
|
||||
pred_context = 2;
|
||||
} else if (above_mbmi->ref_frame[0] == INTRA_FRAME ||
|
||||
left_mbmi->ref_frame[0] == INTRA_FRAME) {
|
||||
const MB_MODE_INFO *edge_mbmi = above_mbmi->ref_frame[0] == INTRA_FRAME ?
|
||||
left_mbmi : above_mbmi;
|
||||
|
||||
if (edge_mbmi->ref_frame[1] <= INTRA_FRAME) {
|
||||
} else if (above_intra || left_intra) { // intra/inter or inter/intra
|
||||
const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
|
||||
if (!has_second_ref(edge_mbmi)) {
|
||||
if (edge_mbmi->ref_frame[0] == LAST_FRAME)
|
||||
pred_context = 3;
|
||||
else
|
||||
@@ -296,54 +299,53 @@ unsigned char vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
|
||||
pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME ||
|
||||
edge_mbmi->ref_frame[1] == GOLDEN_FRAME);
|
||||
}
|
||||
} else if (above_mbmi->ref_frame[1] <= INTRA_FRAME &&
|
||||
left_mbmi->ref_frame[1] <= INTRA_FRAME) {
|
||||
if (above_mbmi->ref_frame[0] == LAST_FRAME &&
|
||||
left_mbmi->ref_frame[0] == LAST_FRAME) {
|
||||
pred_context = 3;
|
||||
} else if (above_mbmi->ref_frame[0] == LAST_FRAME ||
|
||||
left_mbmi->ref_frame[0] == LAST_FRAME) {
|
||||
const MB_MODE_INFO *edge_mbmi = above_mbmi->ref_frame[0] == LAST_FRAME ?
|
||||
left_mbmi : above_mbmi;
|
||||
} else { // inter/inter
|
||||
if (!has_second_ref(above_mbmi) && !has_second_ref(left_mbmi)) {
|
||||
if (above_mbmi->ref_frame[0] == LAST_FRAME &&
|
||||
left_mbmi->ref_frame[0] == LAST_FRAME) {
|
||||
pred_context = 3;
|
||||
} else if (above_mbmi->ref_frame[0] == LAST_FRAME ||
|
||||
left_mbmi->ref_frame[0] == LAST_FRAME) {
|
||||
const MB_MODE_INFO *edge_mbmi =
|
||||
above_mbmi->ref_frame[0] == LAST_FRAME ? left_mbmi : above_mbmi;
|
||||
|
||||
pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
|
||||
pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
|
||||
} else {
|
||||
pred_context = 2 * (above_mbmi->ref_frame[0] == GOLDEN_FRAME) +
|
||||
2 * (left_mbmi->ref_frame[0] == GOLDEN_FRAME);
|
||||
}
|
||||
} else if (has_second_ref(above_mbmi) && has_second_ref(left_mbmi)) {
|
||||
if (above_mbmi->ref_frame[0] == left_mbmi->ref_frame[0] &&
|
||||
above_mbmi->ref_frame[1] == left_mbmi->ref_frame[1])
|
||||
pred_context = 3 * (above_mbmi->ref_frame[0] == GOLDEN_FRAME ||
|
||||
above_mbmi->ref_frame[1] == GOLDEN_FRAME ||
|
||||
left_mbmi->ref_frame[0] == GOLDEN_FRAME ||
|
||||
left_mbmi->ref_frame[1] == GOLDEN_FRAME);
|
||||
else
|
||||
pred_context = 2;
|
||||
} else {
|
||||
pred_context = 2 * (above_mbmi->ref_frame[0] == GOLDEN_FRAME) +
|
||||
2 * (left_mbmi->ref_frame[0] == GOLDEN_FRAME);
|
||||
}
|
||||
} else if (above_mbmi->ref_frame[1] > INTRA_FRAME &&
|
||||
left_mbmi->ref_frame[1] > INTRA_FRAME) {
|
||||
if (above_mbmi->ref_frame[0] == left_mbmi->ref_frame[0] &&
|
||||
above_mbmi->ref_frame[1] == left_mbmi->ref_frame[1])
|
||||
pred_context = 3 * (above_mbmi->ref_frame[0] == GOLDEN_FRAME ||
|
||||
above_mbmi->ref_frame[1] == GOLDEN_FRAME ||
|
||||
left_mbmi->ref_frame[0] == GOLDEN_FRAME ||
|
||||
left_mbmi->ref_frame[1] == GOLDEN_FRAME);
|
||||
else
|
||||
pred_context = 2;
|
||||
} else {
|
||||
MV_REFERENCE_FRAME rfs = above_mbmi->ref_frame[1] <= INTRA_FRAME ?
|
||||
above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
|
||||
MV_REFERENCE_FRAME crf1 = above_mbmi->ref_frame[1] > INTRA_FRAME ?
|
||||
above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
|
||||
MV_REFERENCE_FRAME crf2 = above_mbmi->ref_frame[1] > INTRA_FRAME ?
|
||||
above_mbmi->ref_frame[1] : left_mbmi->ref_frame[1];
|
||||
const MV_REFERENCE_FRAME rfs = !has_second_ref(above_mbmi) ?
|
||||
above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
|
||||
const MV_REFERENCE_FRAME crf1 = has_second_ref(above_mbmi) ?
|
||||
above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
|
||||
const MV_REFERENCE_FRAME crf2 = has_second_ref(above_mbmi) ?
|
||||
above_mbmi->ref_frame[1] : left_mbmi->ref_frame[1];
|
||||
|
||||
if (rfs == GOLDEN_FRAME)
|
||||
pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
|
||||
else if (rfs == ALTREF_FRAME)
|
||||
pred_context = crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME;
|
||||
else
|
||||
pred_context = 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
|
||||
if (rfs == GOLDEN_FRAME)
|
||||
pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
|
||||
else if (rfs == ALTREF_FRAME)
|
||||
pred_context = crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME;
|
||||
else
|
||||
pred_context = 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
|
||||
}
|
||||
}
|
||||
} else if (above_in_image || left_in_image) { // one edge available
|
||||
const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
|
||||
|
||||
if (edge_mbmi->ref_frame[0] == INTRA_FRAME ||
|
||||
(edge_mbmi->ref_frame[0] == LAST_FRAME &&
|
||||
edge_mbmi->ref_frame[1] <= INTRA_FRAME))
|
||||
if (!is_inter_block(edge_mbmi) ||
|
||||
(edge_mbmi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mbmi)))
|
||||
pred_context = 2;
|
||||
else if (edge_mbmi->ref_frame[1] <= INTRA_FRAME)
|
||||
else if (!has_second_ref(edge_mbmi))
|
||||
pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
|
||||
else
|
||||
pred_context = 3 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME ||
|
||||
@@ -359,22 +361,23 @@ unsigned char vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
|
||||
// left of the entries corresponding to real blocks.
|
||||
// The prediction flags in these dummy entries are initialized to 0.
|
||||
unsigned char vp9_get_pred_context_tx_size(const MACROBLOCKD *xd) {
|
||||
const MODE_INFO *const mi = xd->mode_info_context;
|
||||
const MB_MODE_INFO *const above_mbmi = &mi[-xd->mode_info_stride].mbmi;
|
||||
const MB_MODE_INFO *const left_mbmi = &mi[-1].mbmi;
|
||||
const int left_in_image = xd->left_available && left_mbmi->mb_in_image;
|
||||
const int above_in_image = xd->up_available && above_mbmi->mb_in_image;
|
||||
const int max_tx_size = max_txsize_lookup[mi->mbmi.sb_type];
|
||||
const MODE_INFO * const above_mi = xd->mi_8x8[-xd->mode_info_stride];
|
||||
const MODE_INFO * const left_mi = xd->mi_8x8[-1];
|
||||
const MB_MODE_INFO *const above_mbmi = above_mi ? &above_mi->mbmi : 0;
|
||||
const MB_MODE_INFO *const left_mbmi = left_mi ? &left_mi->mbmi : 0;
|
||||
const int left_in_image = xd->left_available && left_mi;
|
||||
const int above_in_image = xd->up_available && above_mi;
|
||||
const int max_tx_size = max_txsize_lookup[xd->mi_8x8[0]->mbmi.sb_type];
|
||||
int above_context = max_tx_size;
|
||||
int left_context = max_tx_size;
|
||||
|
||||
if (above_in_image)
|
||||
above_context = above_mbmi->mb_skip_coeff ? max_tx_size
|
||||
: above_mbmi->txfm_size;
|
||||
above_context = above_mbmi->skip_coeff ? max_tx_size
|
||||
: above_mbmi->tx_size;
|
||||
|
||||
if (left_in_image)
|
||||
left_context = left_mbmi->mb_skip_coeff ? max_tx_size
|
||||
: left_mbmi->txfm_size;
|
||||
left_context = left_mbmi->skip_coeff ? max_tx_size
|
||||
: left_mbmi->tx_size;
|
||||
|
||||
if (!left_in_image)
|
||||
left_context = above_context;
|
||||
@@ -385,36 +388,17 @@ unsigned char vp9_get_pred_context_tx_size(const MACROBLOCKD *xd) {
|
||||
return above_context + left_context > max_tx_size;
|
||||
}
|
||||
|
||||
void vp9_set_pred_flag_seg_id(VP9_COMMON *cm, BLOCK_SIZE_TYPE bsize,
|
||||
int mi_row, int mi_col, uint8_t pred_flag) {
|
||||
MODE_INFO *mi = &cm->mi[mi_row * cm->mode_info_stride + mi_col];
|
||||
const int bw = 1 << mi_width_log2(bsize);
|
||||
const int bh = 1 << mi_height_log2(bsize);
|
||||
const int xmis = MIN(cm->mi_cols - mi_col, bw);
|
||||
const int ymis = MIN(cm->mi_rows - mi_row, bh);
|
||||
int x, y;
|
||||
|
||||
for (y = 0; y < ymis; y++)
|
||||
for (x = 0; x < xmis; x++)
|
||||
mi[y * cm->mode_info_stride + x].mbmi.seg_id_predicted = pred_flag;
|
||||
void vp9_set_pred_flag_seg_id(MACROBLOCKD *xd, uint8_t pred_flag) {
|
||||
xd->this_mi->mbmi.seg_id_predicted = pred_flag;
|
||||
}
|
||||
|
||||
void vp9_set_pred_flag_mbskip(VP9_COMMON *cm, BLOCK_SIZE_TYPE bsize,
|
||||
int mi_row, int mi_col, uint8_t pred_flag) {
|
||||
MODE_INFO *mi = &cm->mi[mi_row * cm->mode_info_stride + mi_col];
|
||||
const int bw = 1 << mi_width_log2(bsize);
|
||||
const int bh = 1 << mi_height_log2(bsize);
|
||||
const int xmis = MIN(cm->mi_cols - mi_col, bw);
|
||||
const int ymis = MIN(cm->mi_rows - mi_row, bh);
|
||||
int x, y;
|
||||
|
||||
for (y = 0; y < ymis; y++)
|
||||
for (x = 0; x < xmis; x++)
|
||||
mi[y * cm->mode_info_stride + x].mbmi.mb_skip_coeff = pred_flag;
|
||||
void vp9_set_pred_flag_mbskip(MACROBLOCKD *xd, BLOCK_SIZE bsize,
|
||||
uint8_t pred_flag) {
|
||||
xd->this_mi->mbmi.skip_coeff = pred_flag;
|
||||
}
|
||||
|
||||
int vp9_get_segment_id(VP9_COMMON *cm, const uint8_t *segment_ids,
|
||||
BLOCK_SIZE_TYPE bsize, int mi_row, int mi_col) {
|
||||
BLOCK_SIZE bsize, int mi_row, int mi_col) {
|
||||
const int mi_offset = mi_row * cm->mi_cols + mi_col;
|
||||
const int bw = 1 << mi_width_log2(bsize);
|
||||
const int bh = 1 << mi_height_log2(bsize);
|
||||
|
||||
@@ -15,32 +15,32 @@
|
||||
#include "vp9/common/vp9_onyxc_int.h"
|
||||
|
||||
int vp9_get_segment_id(VP9_COMMON *cm, const uint8_t *segment_ids,
|
||||
BLOCK_SIZE_TYPE bsize, int mi_row, int mi_col);
|
||||
BLOCK_SIZE bsize, int mi_row, int mi_col);
|
||||
|
||||
|
||||
static INLINE int vp9_get_pred_context_seg_id(const MACROBLOCKD *xd) {
|
||||
const MODE_INFO *const mi = xd->mode_info_context;
|
||||
const MB_MODE_INFO *const above_mbmi = &mi[-xd->mode_info_stride].mbmi;
|
||||
const MB_MODE_INFO *const left_mbmi = &mi[-1].mbmi;
|
||||
const MODE_INFO * const above_mi = xd->mi_8x8[-xd->mode_info_stride];
|
||||
const MODE_INFO * const left_mi = xd->mi_8x8[-1];
|
||||
const int above_sip = above_mi ? above_mi->mbmi.seg_id_predicted : 0;
|
||||
const int left_sip = left_mi ? left_mi->mbmi.seg_id_predicted : 0;
|
||||
|
||||
return above_mbmi->seg_id_predicted +
|
||||
(xd->left_available ? left_mbmi->seg_id_predicted : 0);
|
||||
return above_sip + (xd->left_available ? left_sip : 0);
|
||||
}
|
||||
|
||||
static INLINE vp9_prob vp9_get_pred_prob_seg_id(const MACROBLOCKD *xd) {
|
||||
return xd->seg.pred_probs[vp9_get_pred_context_seg_id(xd)];
|
||||
static INLINE vp9_prob vp9_get_pred_prob_seg_id(struct segmentation *seg,
|
||||
const MACROBLOCKD *xd) {
|
||||
return seg->pred_probs[vp9_get_pred_context_seg_id(xd)];
|
||||
}
|
||||
|
||||
void vp9_set_pred_flag_seg_id(VP9_COMMON *cm, BLOCK_SIZE_TYPE bsize,
|
||||
int mi_row, int mi_col, uint8_t pred_flag);
|
||||
void vp9_set_pred_flag_seg_id(MACROBLOCKD *xd, uint8_t pred_flag);
|
||||
|
||||
static INLINE int vp9_get_pred_context_mbskip(const MACROBLOCKD *xd) {
|
||||
const MODE_INFO *const mi = xd->mode_info_context;
|
||||
const MB_MODE_INFO *const above_mbmi = &mi[-xd->mode_info_stride].mbmi;
|
||||
const MB_MODE_INFO *const left_mbmi = &mi[-1].mbmi;
|
||||
const MODE_INFO * const above_mi = xd->mi_8x8[-xd->mode_info_stride];
|
||||
const MODE_INFO * const left_mi = xd->mi_8x8[-1];
|
||||
const int above_skip_coeff = above_mi ? above_mi->mbmi.skip_coeff : 0;
|
||||
const int left_skip_coeff = left_mi ? left_mi->mbmi.skip_coeff : 0;
|
||||
|
||||
return above_mbmi->mb_skip_coeff +
|
||||
(xd->left_available ? left_mbmi->mb_skip_coeff : 0);
|
||||
return above_skip_coeff + (xd->left_available ? left_skip_coeff : 0);
|
||||
}
|
||||
|
||||
static INLINE vp9_prob vp9_get_pred_prob_mbskip(const VP9_COMMON *cm,
|
||||
@@ -49,11 +49,11 @@ static INLINE vp9_prob vp9_get_pred_prob_mbskip(const VP9_COMMON *cm,
|
||||
}
|
||||
|
||||
static INLINE unsigned char vp9_get_pred_flag_mbskip(const MACROBLOCKD *xd) {
|
||||
return xd->mode_info_context->mbmi.mb_skip_coeff;
|
||||
return xd->this_mi->mbmi.skip_coeff;
|
||||
}
|
||||
|
||||
void vp9_set_pred_flag_mbskip(VP9_COMMON *cm, BLOCK_SIZE_TYPE bsize,
|
||||
int mi_row, int mi_col, uint8_t pred_flag);
|
||||
void vp9_set_pred_flag_mbskip(MACROBLOCKD *xd, BLOCK_SIZE bsize,
|
||||
uint8_t pred_flag);
|
||||
|
||||
unsigned char vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd);
|
||||
|
||||
@@ -102,7 +102,7 @@ static INLINE vp9_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm,
|
||||
|
||||
unsigned char vp9_get_pred_context_tx_size(const MACROBLOCKD *xd);
|
||||
|
||||
static const vp9_prob *get_tx_probs(BLOCK_SIZE_TYPE bsize, uint8_t context,
|
||||
static const vp9_prob *get_tx_probs(BLOCK_SIZE bsize, uint8_t context,
|
||||
const struct tx_probs *tx_probs) {
|
||||
if (bsize < BLOCK_16X16)
|
||||
return tx_probs->p8x8[context];
|
||||
@@ -113,13 +113,14 @@ static const vp9_prob *get_tx_probs(BLOCK_SIZE_TYPE bsize, uint8_t context,
|
||||
}
|
||||
|
||||
static const vp9_prob *get_tx_probs2(const MACROBLOCKD *xd,
|
||||
const struct tx_probs *tx_probs) {
|
||||
const BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
|
||||
const struct tx_probs *tx_probs,
|
||||
const MODE_INFO *m) {
|
||||
const BLOCK_SIZE bsize = m->mbmi.sb_type;
|
||||
const int context = vp9_get_pred_context_tx_size(xd);
|
||||
return get_tx_probs(bsize, context, tx_probs);
|
||||
}
|
||||
|
||||
static void update_tx_counts(BLOCK_SIZE_TYPE bsize, uint8_t context,
|
||||
static void update_tx_counts(BLOCK_SIZE bsize, uint8_t context,
|
||||
TX_SIZE tx_size, struct tx_counts *tx_counts) {
|
||||
if (bsize >= BLOCK_32X32)
|
||||
tx_counts->p32x32[context][tx_size]++;
|
||||
|
||||
@@ -130,12 +130,12 @@ int16_t vp9_ac_quant(int qindex, int delta) {
|
||||
}
|
||||
|
||||
|
||||
int vp9_get_qindex(MACROBLOCKD *xd, int segment_id, int base_qindex) {
|
||||
if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_ALT_Q)) {
|
||||
const int data = vp9_get_segdata(&xd->seg, segment_id, SEG_LVL_ALT_Q);
|
||||
return xd->seg.abs_delta == SEGMENT_ABSDATA ?
|
||||
data : // Abs value
|
||||
clamp(base_qindex + data, 0, MAXQ); // Delta value
|
||||
int vp9_get_qindex(struct segmentation *seg, int segment_id, int base_qindex) {
|
||||
if (vp9_segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
|
||||
const int data = vp9_get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
|
||||
return seg->abs_delta == SEGMENT_ABSDATA ?
|
||||
data : // Abs value
|
||||
clamp(base_qindex + data, 0, MAXQ); // Delta value
|
||||
} else {
|
||||
return base_qindex;
|
||||
}
|
||||
|
||||
@@ -23,6 +23,6 @@ void vp9_init_quant_tables();
|
||||
int16_t vp9_dc_quant(int qindex, int delta);
|
||||
int16_t vp9_ac_quant(int qindex, int delta);
|
||||
|
||||
int vp9_get_qindex(MACROBLOCKD *mb, int segment_id, int base_qindex);
|
||||
int vp9_get_qindex(struct segmentation *seg, int segment_id, int base_qindex);
|
||||
|
||||
#endif // VP9_COMMON_VP9_QUANT_COMMON_H_
|
||||
|
||||
@@ -10,171 +10,27 @@
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "./vpx_scale_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#include "vp9/common/vp9_blockd.h"
|
||||
#include "vp9/common/vp9_filter.h"
|
||||
#include "vp9/common/vp9_reconinter.h"
|
||||
#include "vp9/common/vp9_reconintra.h"
|
||||
#include "./vpx_scale_rtcd.h"
|
||||
|
||||
static int scale_value_x_with_scaling(int val,
|
||||
const struct scale_factors *scale) {
|
||||
return (val * scale->x_scale_fp >> VP9_REF_SCALE_SHIFT);
|
||||
}
|
||||
|
||||
static int scale_value_y_with_scaling(int val,
|
||||
const struct scale_factors *scale) {
|
||||
return (val * scale->y_scale_fp >> VP9_REF_SCALE_SHIFT);
|
||||
}
|
||||
|
||||
static int unscaled_value(int val, const struct scale_factors *scale) {
|
||||
(void) scale;
|
||||
return val;
|
||||
}
|
||||
|
||||
static MV32 mv_q3_to_q4_with_scaling(const MV *mv,
|
||||
const struct scale_factors *scale) {
|
||||
const MV32 res = {
|
||||
((mv->row << 1) * scale->y_scale_fp >> VP9_REF_SCALE_SHIFT)
|
||||
+ scale->y_offset_q4,
|
||||
((mv->col << 1) * scale->x_scale_fp >> VP9_REF_SCALE_SHIFT)
|
||||
+ scale->x_offset_q4
|
||||
};
|
||||
return res;
|
||||
}
|
||||
|
||||
static MV32 mv_q3_to_q4_without_scaling(const MV *mv,
|
||||
const struct scale_factors *scale) {
|
||||
const MV32 res = {
|
||||
mv->row << 1,
|
||||
mv->col << 1
|
||||
};
|
||||
return res;
|
||||
}
|
||||
|
||||
static MV32 mv_q4_with_scaling(const MV *mv,
|
||||
const struct scale_factors *scale) {
|
||||
const MV32 res = {
|
||||
(mv->row * scale->y_scale_fp >> VP9_REF_SCALE_SHIFT) + scale->y_offset_q4,
|
||||
(mv->col * scale->x_scale_fp >> VP9_REF_SCALE_SHIFT) + scale->x_offset_q4
|
||||
};
|
||||
return res;
|
||||
}
|
||||
|
||||
static MV32 mv_q4_without_scaling(const MV *mv,
|
||||
const struct scale_factors *scale) {
|
||||
const MV32 res = {
|
||||
mv->row,
|
||||
mv->col
|
||||
};
|
||||
return res;
|
||||
}
|
||||
|
||||
static void set_offsets_with_scaling(struct scale_factors *scale,
|
||||
int row, int col) {
|
||||
const int x_q4 = 16 * col;
|
||||
const int y_q4 = 16 * row;
|
||||
|
||||
scale->x_offset_q4 = (x_q4 * scale->x_scale_fp >> VP9_REF_SCALE_SHIFT) & 0xf;
|
||||
scale->y_offset_q4 = (y_q4 * scale->y_scale_fp >> VP9_REF_SCALE_SHIFT) & 0xf;
|
||||
}
|
||||
|
||||
static void set_offsets_without_scaling(struct scale_factors *scale,
|
||||
int row, int col) {
|
||||
scale->x_offset_q4 = 0;
|
||||
scale->y_offset_q4 = 0;
|
||||
}
|
||||
|
||||
static int get_fixed_point_scale_factor(int other_size, int this_size) {
|
||||
// Calculate scaling factor once for each reference frame
|
||||
// and use fixed point scaling factors in decoding and encoding routines.
|
||||
// Hardware implementations can calculate scale factor in device driver
|
||||
// and use multiplication and shifting on hardware instead of division.
|
||||
return (other_size << VP9_REF_SCALE_SHIFT) / this_size;
|
||||
}
|
||||
|
||||
void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
|
||||
int other_w, int other_h,
|
||||
int this_w, int this_h) {
|
||||
scale->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w);
|
||||
scale->x_offset_q4 = 0; // calculated per-mb
|
||||
scale->x_step_q4 = (16 * scale->x_scale_fp >> VP9_REF_SCALE_SHIFT);
|
||||
|
||||
scale->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h);
|
||||
scale->y_offset_q4 = 0; // calculated per-mb
|
||||
scale->y_step_q4 = (16 * scale->y_scale_fp >> VP9_REF_SCALE_SHIFT);
|
||||
|
||||
if ((other_w == this_w) && (other_h == this_h)) {
|
||||
scale->scale_value_x = unscaled_value;
|
||||
scale->scale_value_y = unscaled_value;
|
||||
scale->set_scaled_offsets = set_offsets_without_scaling;
|
||||
scale->scale_mv_q3_to_q4 = mv_q3_to_q4_without_scaling;
|
||||
scale->scale_mv_q4 = mv_q4_without_scaling;
|
||||
} else {
|
||||
scale->scale_value_x = scale_value_x_with_scaling;
|
||||
scale->scale_value_y = scale_value_y_with_scaling;
|
||||
scale->set_scaled_offsets = set_offsets_with_scaling;
|
||||
scale->scale_mv_q3_to_q4 = mv_q3_to_q4_with_scaling;
|
||||
scale->scale_mv_q4 = mv_q4_with_scaling;
|
||||
}
|
||||
|
||||
// TODO(agrange): Investigate the best choice of functions to use here
|
||||
// for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what
|
||||
// to do at full-pel offsets. The current selection, where the filter is
|
||||
// applied in one direction only, and not at all for 0,0, seems to give the
|
||||
// best quality, but it may be worth trying an additional mode that does
|
||||
// do the filtering on full-pel.
|
||||
if (scale->x_step_q4 == 16) {
|
||||
if (scale->y_step_q4 == 16) {
|
||||
// No scaling in either direction.
|
||||
scale->predict[0][0][0] = vp9_convolve_copy;
|
||||
scale->predict[0][0][1] = vp9_convolve_avg;
|
||||
scale->predict[0][1][0] = vp9_convolve8_vert;
|
||||
scale->predict[0][1][1] = vp9_convolve8_avg_vert;
|
||||
scale->predict[1][0][0] = vp9_convolve8_horiz;
|
||||
scale->predict[1][0][1] = vp9_convolve8_avg_horiz;
|
||||
} else {
|
||||
// No scaling in x direction. Must always scale in the y direction.
|
||||
scale->predict[0][0][0] = vp9_convolve8_vert;
|
||||
scale->predict[0][0][1] = vp9_convolve8_avg_vert;
|
||||
scale->predict[0][1][0] = vp9_convolve8_vert;
|
||||
scale->predict[0][1][1] = vp9_convolve8_avg_vert;
|
||||
scale->predict[1][0][0] = vp9_convolve8;
|
||||
scale->predict[1][0][1] = vp9_convolve8_avg;
|
||||
}
|
||||
} else {
|
||||
if (scale->y_step_q4 == 16) {
|
||||
// No scaling in the y direction. Must always scale in the x direction.
|
||||
scale->predict[0][0][0] = vp9_convolve8_horiz;
|
||||
scale->predict[0][0][1] = vp9_convolve8_avg_horiz;
|
||||
scale->predict[0][1][0] = vp9_convolve8;
|
||||
scale->predict[0][1][1] = vp9_convolve8_avg;
|
||||
scale->predict[1][0][0] = vp9_convolve8_horiz;
|
||||
scale->predict[1][0][1] = vp9_convolve8_avg_horiz;
|
||||
} else {
|
||||
// Must always scale in both directions.
|
||||
scale->predict[0][0][0] = vp9_convolve8;
|
||||
scale->predict[0][0][1] = vp9_convolve8_avg;
|
||||
scale->predict[0][1][0] = vp9_convolve8;
|
||||
scale->predict[0][1][1] = vp9_convolve8_avg;
|
||||
scale->predict[1][0][0] = vp9_convolve8;
|
||||
scale->predict[1][0][1] = vp9_convolve8_avg;
|
||||
}
|
||||
}
|
||||
// 2D subpel motion always gets filtered in both directions
|
||||
scale->predict[1][1][0] = vp9_convolve8;
|
||||
scale->predict[1][1][1] = vp9_convolve8_avg;
|
||||
}
|
||||
|
||||
void vp9_setup_interp_filters(MACROBLOCKD *xd,
|
||||
INTERPOLATIONFILTERTYPE mcomp_filter_type,
|
||||
VP9_COMMON *cm) {
|
||||
if (xd->mode_info_context) {
|
||||
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
|
||||
if (xd->mi_8x8 && xd->this_mi) {
|
||||
MB_MODE_INFO * mbmi = &xd->this_mi->mbmi;
|
||||
|
||||
set_scale_factors(xd, mbmi->ref_frame[0] - 1, mbmi->ref_frame[1] - 1,
|
||||
cm->active_ref_scale);
|
||||
} else {
|
||||
set_scale_factors(xd, -1, -1, cm->active_ref_scale);
|
||||
}
|
||||
|
||||
switch (mcomp_filter_type) {
|
||||
@@ -199,17 +55,18 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride,
|
||||
const MV *src_mv,
|
||||
const struct scale_factors *scale,
|
||||
int w, int h, int weight,
|
||||
int w, int h, int ref,
|
||||
const struct subpix_fn_table *subpix,
|
||||
enum mv_precision precision) {
|
||||
const MV32 mv = precision == MV_PRECISION_Q4
|
||||
? scale->scale_mv_q4(src_mv, scale)
|
||||
: scale->scale_mv_q3_to_q4(src_mv, scale);
|
||||
const int subpel_x = mv.col & 15;
|
||||
const int subpel_y = mv.row & 15;
|
||||
const int is_q4 = precision == MV_PRECISION_Q4;
|
||||
const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row << 1,
|
||||
is_q4 ? src_mv->col : src_mv->col << 1 };
|
||||
const MV32 mv = scale->scale_mv(&mv_q4, scale);
|
||||
const int subpel_x = mv.col & SUBPEL_MASK;
|
||||
const int subpel_y = mv.row & SUBPEL_MASK;
|
||||
|
||||
src += (mv.row >> 4) * src_stride + (mv.col >> 4);
|
||||
scale->predict[!!subpel_x][!!subpel_y][weight](
|
||||
src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
|
||||
scale->predict[subpel_x != 0][subpel_y != 0][ref](
|
||||
src, src_stride, dst, dst_stride,
|
||||
subpix->filter_x[subpel_x], scale->x_step_q4,
|
||||
subpix->filter_y[subpel_y], scale->y_step_q4,
|
||||
@@ -232,20 +89,16 @@ static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) {
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// TODO(jkoleszar): yet another mv clamping function :-(
|
||||
MV clamp_mv_to_umv_border_sb(const MV *src_mv,
|
||||
int bwl, int bhl, int ss_x, int ss_y,
|
||||
int mb_to_left_edge, int mb_to_top_edge,
|
||||
int mb_to_right_edge, int mb_to_bottom_edge) {
|
||||
MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv,
|
||||
int bw, int bh, int ss_x, int ss_y) {
|
||||
// If the MV points so far into the UMV border that no visible pixels
|
||||
// are used for reconstruction, the subpel part of the MV can be
|
||||
// discarded and the MV limited to 16 pixels with equivalent results.
|
||||
const int spel_left = (VP9_INTERP_EXTEND + (4 << bwl)) << 4;
|
||||
const int spel_right = spel_left - (1 << 4);
|
||||
const int spel_top = (VP9_INTERP_EXTEND + (4 << bhl)) << 4;
|
||||
const int spel_bottom = spel_top - (1 << 4);
|
||||
const int spel_left = (VP9_INTERP_EXTEND + bw) << SUBPEL_BITS;
|
||||
const int spel_right = spel_left - SUBPEL_SHIFTS;
|
||||
const int spel_top = (VP9_INTERP_EXTEND + bh) << SUBPEL_BITS;
|
||||
const int spel_bottom = spel_top - SUBPEL_SHIFTS;
|
||||
MV clamped_mv = {
|
||||
src_mv->row << (1 - ss_y),
|
||||
src_mv->col << (1 - ss_x)
|
||||
@@ -253,130 +106,143 @@ MV clamp_mv_to_umv_border_sb(const MV *src_mv,
|
||||
assert(ss_x <= 1);
|
||||
assert(ss_y <= 1);
|
||||
|
||||
clamp_mv(&clamped_mv, (mb_to_left_edge << (1 - ss_x)) - spel_left,
|
||||
(mb_to_right_edge << (1 - ss_x)) + spel_right,
|
||||
(mb_to_top_edge << (1 - ss_y)) - spel_top,
|
||||
(mb_to_bottom_edge << (1 - ss_y)) + spel_bottom);
|
||||
clamp_mv(&clamped_mv, (xd->mb_to_left_edge << (1 - ss_x)) - spel_left,
|
||||
(xd->mb_to_right_edge << (1 - ss_x)) + spel_right,
|
||||
(xd->mb_to_top_edge << (1 - ss_y)) - spel_top,
|
||||
(xd->mb_to_bottom_edge << (1 - ss_y)) + spel_bottom);
|
||||
|
||||
return clamped_mv;
|
||||
}
|
||||
|
||||
struct build_inter_predictors_args {
|
||||
MACROBLOCKD *xd;
|
||||
int x;
|
||||
int y;
|
||||
uint8_t* dst[MAX_MB_PLANE];
|
||||
int dst_stride[MAX_MB_PLANE];
|
||||
uint8_t* pre[2][MAX_MB_PLANE];
|
||||
int pre_stride[2][MAX_MB_PLANE];
|
||||
int x, y;
|
||||
};
|
||||
static void build_inter_predictors(int plane, int block,
|
||||
BLOCK_SIZE_TYPE bsize,
|
||||
|
||||
static void build_inter_predictors(int plane, int block, BLOCK_SIZE bsize,
|
||||
int pred_w, int pred_h,
|
||||
void *argv) {
|
||||
const struct build_inter_predictors_args* const arg = argv;
|
||||
MACROBLOCKD * const xd = arg->xd;
|
||||
const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
|
||||
const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
|
||||
const int x = 4 * (block & ((1 << bwl) - 1)), y = 4 * (block >> bwl);
|
||||
const MODE_INFO *const mi = xd->mode_info_context;
|
||||
MACROBLOCKD *const xd = arg->xd;
|
||||
struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||
const int bwl = b_width_log2(bsize) - pd->subsampling_x;
|
||||
const int bw = 4 << bwl;
|
||||
const int bh = plane_block_height(bsize, pd);
|
||||
const int x = 4 * (block & ((1 << bwl) - 1));
|
||||
const int y = 4 * (block >> bwl);
|
||||
const MODE_INFO *mi = xd->this_mi;
|
||||
const int use_second_ref = mi->mbmi.ref_frame[1] > 0;
|
||||
int which_mv;
|
||||
int ref;
|
||||
|
||||
assert(x < (4 << bwl));
|
||||
assert(y < (4 << bhl));
|
||||
assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_w == (4 << bwl));
|
||||
assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_h == (4 << bhl));
|
||||
assert(x < bw);
|
||||
assert(y < bh);
|
||||
assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_w == bw);
|
||||
assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_h == bh);
|
||||
|
||||
for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
|
||||
// source
|
||||
const uint8_t * const base_pre = arg->pre[which_mv][plane];
|
||||
const int pre_stride = arg->pre_stride[which_mv][plane];
|
||||
const uint8_t *const pre = base_pre +
|
||||
scaled_buffer_offset(x, y, pre_stride, &xd->scale_factor[which_mv]);
|
||||
struct scale_factors * const scale = &xd->scale_factor[which_mv];
|
||||
for (ref = 0; ref < 1 + use_second_ref; ++ref) {
|
||||
struct scale_factors *const scale = &xd->scale_factor[ref];
|
||||
struct buf_2d *const pre_buf = &pd->pre[ref];
|
||||
struct buf_2d *const dst_buf = &pd->dst;
|
||||
|
||||
// dest
|
||||
uint8_t *const dst = arg->dst[plane] + arg->dst_stride[plane] * y + x;
|
||||
const uint8_t *const pre = pre_buf->buf + scaled_buffer_offset(x, y,
|
||||
pre_buf->stride, scale);
|
||||
|
||||
uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
|
||||
|
||||
// TODO(jkoleszar): All chroma MVs in SPLITMV mode are taken as the
|
||||
// same MV (the average of the 4 luma MVs) but we could do something
|
||||
// smarter for non-4:2:0. Just punt for now, pending the changes to get
|
||||
// rid of SPLITMV mode entirely.
|
||||
const MV mv = mi->mbmi.sb_type < BLOCK_8X8
|
||||
? (plane == 0 ? mi->bmi[block].as_mv[which_mv].as_mv
|
||||
: mi_mv_pred_q4(mi, which_mv))
|
||||
: mi->mbmi.mv[which_mv].as_mv;
|
||||
? (plane == 0 ? mi->bmi[block].as_mv[ref].as_mv
|
||||
: mi_mv_pred_q4(mi, ref))
|
||||
: mi->mbmi.mv[ref].as_mv;
|
||||
|
||||
// TODO(jkoleszar): This clamping is done in the incorrect place for the
|
||||
// scaling case. It needs to be done on the scaled MV, not the pre-scaling
|
||||
// MV. Note however that it performs the subsampling aware scaling so
|
||||
// that the result is always q4.
|
||||
const MV res_mv = clamp_mv_to_umv_border_sb(&mv, bwl, bhl,
|
||||
xd->plane[plane].subsampling_x,
|
||||
xd->plane[plane].subsampling_y,
|
||||
xd->mb_to_left_edge,
|
||||
xd->mb_to_top_edge,
|
||||
xd->mb_to_right_edge,
|
||||
xd->mb_to_bottom_edge);
|
||||
const MV res_mv = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh,
|
||||
pd->subsampling_x,
|
||||
pd->subsampling_y);
|
||||
|
||||
scale->set_scaled_offsets(scale, arg->y + y, arg->x + x);
|
||||
vp9_build_inter_predictor(pre, pre_stride,
|
||||
dst, arg->dst_stride[plane],
|
||||
&res_mv, &xd->scale_factor[which_mv],
|
||||
4 << pred_w, 4 << pred_h, which_mv,
|
||||
vp9_build_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
|
||||
&res_mv, scale,
|
||||
4 << pred_w, 4 << pred_h, ref,
|
||||
&xd->subpix, MV_PRECISION_Q4);
|
||||
}
|
||||
}
|
||||
void vp9_build_inter_predictors_sby(MACROBLOCKD *xd,
|
||||
int mi_row,
|
||||
int mi_col,
|
||||
BLOCK_SIZE_TYPE bsize) {
|
||||
struct build_inter_predictors_args args = {
|
||||
xd, mi_col * MI_SIZE, mi_row * MI_SIZE,
|
||||
{xd->plane[0].dst.buf, NULL, NULL}, {xd->plane[0].dst.stride, 0, 0},
|
||||
{{xd->plane[0].pre[0].buf, NULL, NULL},
|
||||
{xd->plane[0].pre[1].buf, NULL, NULL}},
|
||||
{{xd->plane[0].pre[0].stride, 0, 0}, {xd->plane[0].pre[1].stride, 0, 0}},
|
||||
};
|
||||
|
||||
foreach_predicted_block_in_plane(xd, bsize, 0, build_inter_predictors, &args);
|
||||
}
|
||||
void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd,
|
||||
int mi_row,
|
||||
int mi_col,
|
||||
BLOCK_SIZE_TYPE bsize) {
|
||||
struct build_inter_predictors_args args = {
|
||||
xd, mi_col * MI_SIZE, mi_row * MI_SIZE,
|
||||
#if CONFIG_ALPHA
|
||||
{NULL, xd->plane[1].dst.buf, xd->plane[2].dst.buf,
|
||||
xd->plane[3].dst.buf},
|
||||
{0, xd->plane[1].dst.stride, xd->plane[1].dst.stride,
|
||||
xd->plane[3].dst.stride},
|
||||
{{NULL, xd->plane[1].pre[0].buf, xd->plane[2].pre[0].buf,
|
||||
xd->plane[3].pre[0].buf},
|
||||
{NULL, xd->plane[1].pre[1].buf, xd->plane[2].pre[1].buf,
|
||||
xd->plane[3].pre[1].buf}},
|
||||
{{0, xd->plane[1].pre[0].stride, xd->plane[1].pre[0].stride,
|
||||
xd->plane[3].pre[0].stride},
|
||||
{0, xd->plane[1].pre[1].stride, xd->plane[1].pre[1].stride,
|
||||
xd->plane[3].pre[1].stride}},
|
||||
#else
|
||||
{NULL, xd->plane[1].dst.buf, xd->plane[2].dst.buf},
|
||||
{0, xd->plane[1].dst.stride, xd->plane[1].dst.stride},
|
||||
{{NULL, xd->plane[1].pre[0].buf, xd->plane[2].pre[0].buf},
|
||||
{NULL, xd->plane[1].pre[1].buf, xd->plane[2].pre[1].buf}},
|
||||
{{0, xd->plane[1].pre[0].stride, xd->plane[1].pre[0].stride},
|
||||
{0, xd->plane[1].pre[1].stride, xd->plane[1].pre[1].stride}},
|
||||
#endif
|
||||
};
|
||||
foreach_predicted_block_uv(xd, bsize, build_inter_predictors, &args);
|
||||
}
|
||||
void vp9_build_inter_predictors_sb(MACROBLOCKD *xd,
|
||||
int mi_row, int mi_col,
|
||||
BLOCK_SIZE_TYPE bsize) {
|
||||
// TODO(jkoleszar): In principle, pred_w, pred_h are unnecessary, as we could
|
||||
// calculate the subsampled BLOCK_SIZE, but that type isn't defined for
|
||||
// sizes smaller than 16x16 yet.
|
||||
typedef void (*foreach_predicted_block_visitor)(int plane, int block,
|
||||
BLOCK_SIZE bsize,
|
||||
int pred_w, int pred_h,
|
||||
void *arg);
|
||||
static INLINE void foreach_predicted_block_in_plane(
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE bsize, int plane,
|
||||
foreach_predicted_block_visitor visit, void *arg) {
|
||||
int i, x, y;
|
||||
|
||||
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
|
||||
vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, bsize);
|
||||
// block sizes in number of 4x4 blocks log 2 ("*_b")
|
||||
// 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
|
||||
// subsampled size of the block
|
||||
const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
|
||||
const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
|
||||
|
||||
// size of the predictor to use.
|
||||
int pred_w, pred_h;
|
||||
|
||||
if (xd->this_mi->mbmi.sb_type < BLOCK_8X8) {
|
||||
assert(bsize == BLOCK_8X8);
|
||||
pred_w = 0;
|
||||
pred_h = 0;
|
||||
} else {
|
||||
pred_w = bwl;
|
||||
pred_h = bhl;
|
||||
}
|
||||
assert(pred_w <= bwl);
|
||||
assert(pred_h <= bhl);
|
||||
|
||||
// visit each subblock in raster order
|
||||
i = 0;
|
||||
for (y = 0; y < 1 << bhl; y += 1 << pred_h) {
|
||||
for (x = 0; x < 1 << bwl; x += 1 << pred_w) {
|
||||
visit(plane, i, bsize, pred_w, pred_h, arg);
|
||||
i += 1 << pred_w;
|
||||
}
|
||||
i += (1 << (bwl + pred_h)) - (1 << bwl);
|
||||
}
|
||||
}
|
||||
|
||||
static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize,
|
||||
int mi_row, int mi_col,
|
||||
int plane_from, int plane_to) {
|
||||
int plane;
|
||||
for (plane = plane_from; plane <= plane_to; ++plane) {
|
||||
struct build_inter_predictors_args args = {
|
||||
xd, mi_col * MI_SIZE, mi_row * MI_SIZE,
|
||||
};
|
||||
foreach_predicted_block_in_plane(xd, bsize, plane, build_inter_predictors,
|
||||
&args);
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
|
||||
BLOCK_SIZE bsize) {
|
||||
build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, 0);
|
||||
}
|
||||
void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col,
|
||||
BLOCK_SIZE bsize) {
|
||||
build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 1,
|
||||
MAX_MB_PLANE - 1);
|
||||
}
|
||||
void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
|
||||
BLOCK_SIZE bsize) {
|
||||
build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0,
|
||||
MAX_MB_PLANE - 1);
|
||||
}
|
||||
|
||||
// TODO(dkovalev: find better place for this function)
|
||||
@@ -391,8 +257,7 @@ void vp9_setup_scale_factors(VP9_COMMON *cm, int i) {
|
||||
fb->y_crop_width, fb->y_crop_height,
|
||||
cm->width, cm->height);
|
||||
|
||||
if (sf->x_scale_fp != VP9_REF_NO_SCALE ||
|
||||
sf->y_scale_fp != VP9_REF_NO_SCALE)
|
||||
if (vp9_is_scaled(sf))
|
||||
vp9_extend_frame_borders(fb, cm->subsampling_x, cm->subsampling_y);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,28 +15,19 @@
|
||||
#include "vp9/common/vp9_onyxc_int.h"
|
||||
|
||||
struct subpix_fn_table;
|
||||
void vp9_build_inter_predictors_sby(MACROBLOCKD *xd,
|
||||
int mb_row,
|
||||
int mb_col,
|
||||
BLOCK_SIZE_TYPE bsize);
|
||||
void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
|
||||
BLOCK_SIZE bsize);
|
||||
|
||||
void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd,
|
||||
int mb_row,
|
||||
int mb_col,
|
||||
BLOCK_SIZE_TYPE bsize);
|
||||
void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col,
|
||||
BLOCK_SIZE bsize);
|
||||
|
||||
void vp9_build_inter_predictors_sb(MACROBLOCKD *mb,
|
||||
int mb_row, int mb_col,
|
||||
BLOCK_SIZE_TYPE bsize);
|
||||
void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
|
||||
BLOCK_SIZE bsize);
|
||||
|
||||
void vp9_setup_interp_filters(MACROBLOCKD *xd,
|
||||
INTERPOLATIONFILTERTYPE filter,
|
||||
VP9_COMMON *cm);
|
||||
|
||||
void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
|
||||
int other_w, int other_h,
|
||||
int this_w, int this_h);
|
||||
|
||||
void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride,
|
||||
const MV *mv_q3,
|
||||
|
||||
@@ -8,15 +8,16 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_reconintra.h"
|
||||
#include "vp9/common/vp9_onyxc_int.h"
|
||||
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vpx_ports/vpx_once.h"
|
||||
|
||||
#include "vp9_rtcd.h"
|
||||
|
||||
#include "vp9/common/vp9_reconintra.h"
|
||||
#include "vp9/common/vp9_onyxc_int.h"
|
||||
|
||||
const TX_TYPE mode2txfm_map[MB_MODE_COUNT] = {
|
||||
DCT_DCT, // DC
|
||||
ADST_DCT, // V
|
||||
@@ -25,7 +26,7 @@ const TX_TYPE mode2txfm_map[MB_MODE_COUNT] = {
|
||||
ADST_ADST, // D135
|
||||
ADST_DCT, // D117
|
||||
DCT_ADST, // D153
|
||||
DCT_ADST, // D27
|
||||
DCT_ADST, // D207
|
||||
ADST_DCT, // D63
|
||||
ADST_ADST, // TM
|
||||
DCT_DCT, // NEARESTMV
|
||||
@@ -35,294 +36,256 @@ const TX_TYPE mode2txfm_map[MB_MODE_COUNT] = {
|
||||
};
|
||||
|
||||
#define intra_pred_sized(type, size) \
|
||||
void vp9_##type##_predictor_##size##x##size##_c(uint8_t *pred_ptr, \
|
||||
ptrdiff_t stride, \
|
||||
uint8_t *above_row, \
|
||||
uint8_t *left_col) { \
|
||||
type##_predictor(pred_ptr, stride, size, above_row, left_col); \
|
||||
}
|
||||
void vp9_##type##_predictor_##size##x##size##_c(uint8_t *dst, \
|
||||
ptrdiff_t stride, \
|
||||
const uint8_t *above, \
|
||||
const uint8_t *left) { \
|
||||
type##_predictor(dst, stride, size, above, left); \
|
||||
}
|
||||
|
||||
#define intra_pred_allsizes(type) \
|
||||
intra_pred_sized(type, 4) \
|
||||
intra_pred_sized(type, 8) \
|
||||
intra_pred_sized(type, 16) \
|
||||
intra_pred_sized(type, 32)
|
||||
|
||||
static INLINE void d27_predictor(uint8_t *pred_ptr, ptrdiff_t stride, int bs,
|
||||
uint8_t *above_row, uint8_t *left_col) {
|
||||
static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
||||
const uint8_t *above, const uint8_t *left) {
|
||||
int r, c;
|
||||
|
||||
// first column
|
||||
for (r = 0; r < bs - 1; ++r) {
|
||||
pred_ptr[r * stride] = ROUND_POWER_OF_TWO(left_col[r] +
|
||||
left_col[r + 1], 1);
|
||||
}
|
||||
pred_ptr[(bs - 1) * stride] = left_col[bs - 1];
|
||||
pred_ptr++;
|
||||
for (r = 0; r < bs - 1; ++r)
|
||||
dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1], 1);
|
||||
dst[(bs - 1) * stride] = left[bs - 1];
|
||||
dst++;
|
||||
|
||||
// second column
|
||||
for (r = 0; r < bs - 2; ++r) {
|
||||
pred_ptr[r * stride] = ROUND_POWER_OF_TWO(left_col[r] +
|
||||
left_col[r + 1] * 2 +
|
||||
left_col[r + 2], 2);
|
||||
}
|
||||
pred_ptr[(bs - 2) * stride] = ROUND_POWER_OF_TWO(left_col[bs - 2] +
|
||||
left_col[bs - 1] * 3,
|
||||
2);
|
||||
pred_ptr[(bs - 1) * stride] = left_col[bs - 1];
|
||||
pred_ptr++;
|
||||
for (r = 0; r < bs - 2; ++r)
|
||||
dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1] * 2 +
|
||||
left[r + 2], 2);
|
||||
dst[(bs - 2) * stride] = ROUND_POWER_OF_TWO(left[bs - 2] +
|
||||
left[bs - 1] * 3, 2);
|
||||
dst[(bs - 1) * stride] = left[bs - 1];
|
||||
dst++;
|
||||
|
||||
// rest of last row
|
||||
for (c = 0; c < bs - 2; ++c) {
|
||||
pred_ptr[(bs - 1) * stride + c] = left_col[bs - 1];
|
||||
}
|
||||
for (c = 0; c < bs - 2; ++c)
|
||||
dst[(bs - 1) * stride + c] = left[bs - 1];
|
||||
|
||||
for (r = bs - 2; r >= 0; --r) {
|
||||
for (c = 0; c < bs - 2; ++c) {
|
||||
pred_ptr[r * stride + c] = pred_ptr[(r + 1) * stride + c - 2];
|
||||
}
|
||||
}
|
||||
for (r = bs - 2; r >= 0; --r)
|
||||
for (c = 0; c < bs - 2; ++c)
|
||||
dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
|
||||
}
|
||||
intra_pred_allsizes(d27)
|
||||
intra_pred_allsizes(d207)
|
||||
|
||||
static INLINE void d63_predictor(uint8_t *pred_ptr, ptrdiff_t stride, int bs,
|
||||
uint8_t *above_row, uint8_t *left_col) {
|
||||
static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
||||
const uint8_t *above, const uint8_t *left) {
|
||||
int r, c;
|
||||
for (r = 0; r < bs; ++r) {
|
||||
for (c = 0; c < bs; ++c) {
|
||||
if (r & 1) {
|
||||
pred_ptr[c] = ROUND_POWER_OF_TWO(above_row[r/2 + c] +
|
||||
above_row[r/2 + c + 1] * 2 +
|
||||
above_row[r/2 + c + 2], 2);
|
||||
} else {
|
||||
pred_ptr[c] = ROUND_POWER_OF_TWO(above_row[r/2 + c] +
|
||||
above_row[r/2+ c + 1], 1);
|
||||
}
|
||||
}
|
||||
pred_ptr += stride;
|
||||
for (c = 0; c < bs; ++c)
|
||||
dst[c] = r & 1 ? ROUND_POWER_OF_TWO(above[r/2 + c] +
|
||||
above[r/2 + c + 1] * 2 +
|
||||
above[r/2 + c + 2], 2)
|
||||
: ROUND_POWER_OF_TWO(above[r/2 + c] +
|
||||
above[r/2 + c + 1], 1);
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
intra_pred_allsizes(d63)
|
||||
|
||||
static INLINE void d45_predictor(uint8_t *pred_ptr, ptrdiff_t stride, int bs,
|
||||
uint8_t *above_row, uint8_t *left_col) {
|
||||
static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
||||
const uint8_t *above, const uint8_t *left) {
|
||||
int r, c;
|
||||
for (r = 0; r < bs; ++r) {
|
||||
for (c = 0; c < bs; ++c) {
|
||||
if (r + c + 2 < bs * 2)
|
||||
pred_ptr[c] = ROUND_POWER_OF_TWO(above_row[r + c] +
|
||||
above_row[r + c + 1] * 2 +
|
||||
above_row[r + c + 2], 2);
|
||||
else
|
||||
pred_ptr[c] = above_row[bs * 2 - 1];
|
||||
}
|
||||
pred_ptr += stride;
|
||||
for (c = 0; c < bs; ++c)
|
||||
dst[c] = r + c + 2 < bs * 2 ? ROUND_POWER_OF_TWO(above[r + c] +
|
||||
above[r + c + 1] * 2 +
|
||||
above[r + c + 2], 2)
|
||||
: above[bs * 2 - 1];
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
intra_pred_allsizes(d45)
|
||||
|
||||
static INLINE void d117_predictor(uint8_t *pred_ptr, ptrdiff_t stride, int bs,
|
||||
uint8_t *above_row, uint8_t *left_col) {
|
||||
static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
||||
const uint8_t *above, const uint8_t *left) {
|
||||
int r, c;
|
||||
|
||||
// first row
|
||||
for (c = 0; c < bs; c++)
|
||||
pred_ptr[c] = ROUND_POWER_OF_TWO(above_row[c - 1] + above_row[c], 1);
|
||||
pred_ptr += stride;
|
||||
dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c], 1);
|
||||
dst += stride;
|
||||
|
||||
// second row
|
||||
pred_ptr[0] = ROUND_POWER_OF_TWO(left_col[0] +
|
||||
above_row[-1] * 2 +
|
||||
above_row[0], 2);
|
||||
dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
|
||||
for (c = 1; c < bs; c++)
|
||||
pred_ptr[c] = ROUND_POWER_OF_TWO(above_row[c - 2] +
|
||||
above_row[c - 1] * 2 +
|
||||
above_row[c], 2);
|
||||
pred_ptr += stride;
|
||||
dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2);
|
||||
dst += stride;
|
||||
|
||||
// the rest of first col
|
||||
pred_ptr[0] = ROUND_POWER_OF_TWO(above_row[-1] +
|
||||
left_col[0] * 2 +
|
||||
left_col[1], 2);
|
||||
dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
|
||||
for (r = 3; r < bs; ++r)
|
||||
pred_ptr[(r-2) * stride] = ROUND_POWER_OF_TWO(left_col[r - 3] +
|
||||
left_col[r - 2] * 2 +
|
||||
left_col[r - 1], 2);
|
||||
dst[(r - 2) * stride] = ROUND_POWER_OF_TWO(left[r - 3] + left[r - 2] * 2 +
|
||||
left[r - 1], 2);
|
||||
|
||||
// the rest of the block
|
||||
for (r = 2; r < bs; ++r) {
|
||||
for (c = 1; c < bs; c++)
|
||||
pred_ptr[c] = pred_ptr[-2 * stride + c - 1];
|
||||
pred_ptr += stride;
|
||||
dst[c] = dst[-2 * stride + c - 1];
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
intra_pred_allsizes(d117)
|
||||
|
||||
static INLINE void d135_predictor(uint8_t *pred_ptr, ptrdiff_t stride, int bs,
|
||||
uint8_t *above_row, uint8_t *left_col) {
|
||||
static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
||||
const uint8_t *above, const uint8_t *left) {
|
||||
int r, c;
|
||||
pred_ptr[0] = ROUND_POWER_OF_TWO(left_col[0] +
|
||||
above_row[-1] * 2 +
|
||||
above_row[0], 2);
|
||||
dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
|
||||
for (c = 1; c < bs; c++)
|
||||
pred_ptr[c] = ROUND_POWER_OF_TWO(above_row[c - 2] +
|
||||
above_row[c - 1] * 2 +
|
||||
above_row[c], 2);
|
||||
dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2);
|
||||
|
||||
pred_ptr[stride] = ROUND_POWER_OF_TWO(above_row[-1] +
|
||||
left_col[0] * 2 +
|
||||
left_col[1], 2);
|
||||
dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
|
||||
for (r = 2; r < bs; ++r)
|
||||
pred_ptr[r * stride] = ROUND_POWER_OF_TWO(left_col[r - 2] +
|
||||
left_col[r - 1] * 2 +
|
||||
left_col[r], 2);
|
||||
dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 +
|
||||
left[r], 2);
|
||||
|
||||
pred_ptr += stride;
|
||||
dst += stride;
|
||||
for (r = 1; r < bs; ++r) {
|
||||
for (c = 1; c < bs; c++)
|
||||
pred_ptr[c] = pred_ptr[-stride + c - 1];
|
||||
pred_ptr += stride;
|
||||
dst[c] = dst[-stride + c - 1];
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
intra_pred_allsizes(d135)
|
||||
|
||||
static INLINE void d153_predictor(uint8_t *pred_ptr, ptrdiff_t stride, int bs,
|
||||
uint8_t *above_row, uint8_t *left_col) {
|
||||
static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
||||
const uint8_t *above, const uint8_t *left) {
|
||||
int r, c;
|
||||
pred_ptr[0] = ROUND_POWER_OF_TWO(above_row[-1] + left_col[0], 1);
|
||||
dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0], 1);
|
||||
for (r = 1; r < bs; r++)
|
||||
pred_ptr[r * stride] =
|
||||
ROUND_POWER_OF_TWO(left_col[r - 1] + left_col[r], 1);
|
||||
pred_ptr++;
|
||||
dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 1] + left[r], 1);
|
||||
dst++;
|
||||
|
||||
pred_ptr[0] = ROUND_POWER_OF_TWO(left_col[0] +
|
||||
above_row[-1] * 2 +
|
||||
above_row[0], 2);
|
||||
pred_ptr[stride] = ROUND_POWER_OF_TWO(above_row[-1] +
|
||||
left_col[0] * 2 +
|
||||
left_col[1], 2);
|
||||
dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
|
||||
dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
|
||||
for (r = 2; r < bs; r++)
|
||||
pred_ptr[r * stride] = ROUND_POWER_OF_TWO(left_col[r - 2] +
|
||||
left_col[r - 1] * 2 +
|
||||
left_col[r], 2);
|
||||
pred_ptr++;
|
||||
dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 +
|
||||
left[r], 2);
|
||||
dst++;
|
||||
|
||||
for (c = 0; c < bs - 2; c++)
|
||||
pred_ptr[c] = ROUND_POWER_OF_TWO(above_row[c - 1] +
|
||||
above_row[c] * 2 +
|
||||
above_row[c + 1], 2);
|
||||
pred_ptr += stride;
|
||||
dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c] * 2 + above[c + 1], 2);
|
||||
dst += stride;
|
||||
|
||||
for (r = 1; r < bs; ++r) {
|
||||
for (c = 0; c < bs - 2; c++)
|
||||
pred_ptr[c] = pred_ptr[-stride + c - 2];
|
||||
pred_ptr += stride;
|
||||
dst[c] = dst[-stride + c - 2];
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
intra_pred_allsizes(d153)
|
||||
|
||||
static INLINE void v_predictor(uint8_t *pred_ptr, ptrdiff_t stride, int bs,
|
||||
uint8_t *above_row, uint8_t *left_col) {
|
||||
static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
||||
const uint8_t *above, const uint8_t *left) {
|
||||
int r;
|
||||
|
||||
for (r = 0; r < bs; r++) {
|
||||
vpx_memcpy(pred_ptr, above_row, bs);
|
||||
pred_ptr += stride;
|
||||
vpx_memcpy(dst, above, bs);
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
intra_pred_allsizes(v)
|
||||
|
||||
static INLINE void h_predictor(uint8_t *pred_ptr, ptrdiff_t stride, int bs,
|
||||
uint8_t *above_row, uint8_t *left_col) {
|
||||
static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
||||
const uint8_t *above, const uint8_t *left) {
|
||||
int r;
|
||||
|
||||
for (r = 0; r < bs; r++) {
|
||||
vpx_memset(pred_ptr, left_col[r], bs);
|
||||
pred_ptr += stride;
|
||||
vpx_memset(dst, left[r], bs);
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
intra_pred_allsizes(h)
|
||||
|
||||
static INLINE void tm_predictor(uint8_t *pred_ptr, ptrdiff_t stride, int bs,
|
||||
uint8_t *above_row, uint8_t *left_col) {
|
||||
static INLINE void tm_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
||||
const uint8_t *above, const uint8_t *left) {
|
||||
int r, c;
|
||||
int ytop_left = above_row[-1];
|
||||
int ytop_left = above[-1];
|
||||
|
||||
for (r = 0; r < bs; r++) {
|
||||
for (c = 0; c < bs; c++)
|
||||
pred_ptr[c] = clip_pixel(left_col[r] + above_row[c] - ytop_left);
|
||||
pred_ptr += stride;
|
||||
dst[c] = clip_pixel(left[r] + above[c] - ytop_left);
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
intra_pred_allsizes(tm)
|
||||
|
||||
static INLINE void dc_128_predictor(uint8_t *pred_ptr, ptrdiff_t stride, int bs,
|
||||
uint8_t *above_row, uint8_t *left_col) {
|
||||
static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
||||
const uint8_t *above, const uint8_t *left) {
|
||||
int r;
|
||||
|
||||
for (r = 0; r < bs; r++) {
|
||||
vpx_memset(pred_ptr, 128, bs);
|
||||
pred_ptr += stride;
|
||||
vpx_memset(dst, 128, bs);
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
intra_pred_allsizes(dc_128)
|
||||
|
||||
static INLINE void dc_left_predictor(uint8_t *pred_ptr, ptrdiff_t stride,
|
||||
int bs,
|
||||
uint8_t *above_row, uint8_t *left_col) {
|
||||
int i, r;
|
||||
int expected_dc = 128;
|
||||
int average = 0;
|
||||
const int count = bs;
|
||||
static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
||||
const uint8_t *above,
|
||||
const uint8_t *left) {
|
||||
int i, r, expected_dc, sum = 0;
|
||||
|
||||
for (i = 0; i < bs; i++)
|
||||
average += left_col[i];
|
||||
expected_dc = (average + (count >> 1)) / count;
|
||||
sum += left[i];
|
||||
expected_dc = (sum + (bs >> 1)) / bs;
|
||||
|
||||
for (r = 0; r < bs; r++) {
|
||||
vpx_memset(pred_ptr, expected_dc, bs);
|
||||
pred_ptr += stride;
|
||||
vpx_memset(dst, expected_dc, bs);
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
intra_pred_allsizes(dc_left)
|
||||
|
||||
static INLINE void dc_top_predictor(uint8_t *pred_ptr, ptrdiff_t stride, int bs,
|
||||
uint8_t *above_row, uint8_t *left_col) {
|
||||
int i, r;
|
||||
int expected_dc = 128;
|
||||
int average = 0;
|
||||
const int count = bs;
|
||||
static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
||||
const uint8_t *above, const uint8_t *left) {
|
||||
int i, r, expected_dc, sum = 0;
|
||||
|
||||
for (i = 0; i < bs; i++)
|
||||
average += above_row[i];
|
||||
expected_dc = (average + (count >> 1)) / count;
|
||||
sum += above[i];
|
||||
expected_dc = (sum + (bs >> 1)) / bs;
|
||||
|
||||
for (r = 0; r < bs; r++) {
|
||||
vpx_memset(pred_ptr, expected_dc, bs);
|
||||
pred_ptr += stride;
|
||||
vpx_memset(dst, expected_dc, bs);
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
intra_pred_allsizes(dc_top)
|
||||
|
||||
static INLINE void dc_predictor(uint8_t *pred_ptr, ptrdiff_t stride, int bs,
|
||||
uint8_t *above_row, uint8_t *left_col) {
|
||||
int i, r;
|
||||
int expected_dc = 128;
|
||||
int average = 0;
|
||||
static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
||||
const uint8_t *above, const uint8_t *left) {
|
||||
int i, r, expected_dc, sum = 0;
|
||||
const int count = 2 * bs;
|
||||
|
||||
for (i = 0; i < bs; i++)
|
||||
average += above_row[i];
|
||||
for (i = 0; i < bs; i++)
|
||||
average += left_col[i];
|
||||
expected_dc = (average + (count >> 1)) / count;
|
||||
for (i = 0; i < bs; i++) {
|
||||
sum += above[i];
|
||||
sum += left[i];
|
||||
}
|
||||
|
||||
expected_dc = (sum + (count >> 1)) / count;
|
||||
|
||||
for (r = 0; r < bs; r++) {
|
||||
vpx_memset(pred_ptr, expected_dc, bs);
|
||||
pred_ptr += stride;
|
||||
vpx_memset(dst, expected_dc, bs);
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
intra_pred_allsizes(dc)
|
||||
#undef intra_pred_allsizes
|
||||
|
||||
typedef void (*intra_pred_fn)(uint8_t *pred_ptr, ptrdiff_t stride,
|
||||
uint8_t *above_row, uint8_t *left_col);
|
||||
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
|
||||
const uint8_t *above, const uint8_t *left);
|
||||
|
||||
static intra_pred_fn pred[VP9_INTRA_MODES][4];
|
||||
static intra_pred_fn pred[INTRA_MODES][4];
|
||||
static intra_pred_fn dc_pred[2][2][4];
|
||||
|
||||
static void init_intra_pred_fn_ptrs(void) {
|
||||
@@ -334,7 +297,7 @@ static void init_intra_pred_fn_ptrs(void) {
|
||||
|
||||
intra_pred_allsizes(pred[V_PRED], v);
|
||||
intra_pred_allsizes(pred[H_PRED], h);
|
||||
intra_pred_allsizes(pred[D27_PRED], d27);
|
||||
intra_pred_allsizes(pred[D207_PRED], d207);
|
||||
intra_pred_allsizes(pred[D45_PRED], d45);
|
||||
intra_pred_allsizes(pred[D63_PRED], d63);
|
||||
intra_pred_allsizes(pred[D117_PRED], d117);
|
||||
@@ -350,16 +313,17 @@ static void init_intra_pred_fn_ptrs(void) {
|
||||
#undef intra_pred_allsizes
|
||||
}
|
||||
|
||||
static void build_intra_predictors(uint8_t *src, int src_stride,
|
||||
uint8_t *pred_ptr, int stride,
|
||||
MB_PREDICTION_MODE mode, TX_SIZE txsz,
|
||||
static void build_intra_predictors(const uint8_t *ref, int ref_stride,
|
||||
uint8_t *dst, int dst_stride,
|
||||
MB_PREDICTION_MODE mode, TX_SIZE tx_size,
|
||||
int up_available, int left_available,
|
||||
int right_available) {
|
||||
int i;
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, left_col, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, yabove_data, 128 + 16);
|
||||
uint8_t *above_row = yabove_data + 16;
|
||||
const int bs = 4 << txsz;
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, above_data, 128 + 16);
|
||||
uint8_t *above_row = above_data + 16;
|
||||
const uint8_t *const_above_row = above_row;
|
||||
const int bs = 4 << tx_size;
|
||||
|
||||
// 127 127 127 .. 127 127 127 127 127 127
|
||||
// 129 A B .. Y Z
|
||||
@@ -369,45 +333,46 @@ static void build_intra_predictors(uint8_t *src, int src_stride,
|
||||
// ..
|
||||
|
||||
once(init_intra_pred_fn_ptrs);
|
||||
|
||||
// left
|
||||
if (left_available) {
|
||||
for (i = 0; i < bs; i++)
|
||||
left_col[i] = src[i * src_stride - 1];
|
||||
left_col[i] = ref[i * ref_stride - 1];
|
||||
} else {
|
||||
vpx_memset(left_col, 129, bs);
|
||||
}
|
||||
|
||||
// above
|
||||
if (up_available) {
|
||||
uint8_t *above_ptr = src - src_stride;
|
||||
const uint8_t *above_ref = ref - ref_stride;
|
||||
if (bs == 4 && right_available && left_available) {
|
||||
above_row = above_ptr;
|
||||
const_above_row = above_ref;
|
||||
} else {
|
||||
vpx_memcpy(above_row, above_ptr, bs);
|
||||
vpx_memcpy(above_row, above_ref, bs);
|
||||
if (bs == 4 && right_available)
|
||||
vpx_memcpy(above_row + bs, above_ptr + bs, bs);
|
||||
vpx_memcpy(above_row + bs, above_ref + bs, bs);
|
||||
else
|
||||
vpx_memset(above_row + bs, above_row[bs - 1], bs);
|
||||
above_row[-1] = left_available ? above_ptr[-1] : 129;
|
||||
above_row[-1] = left_available ? above_ref[-1] : 129;
|
||||
}
|
||||
} else {
|
||||
vpx_memset(above_row, 127, bs * 2);
|
||||
above_row[-1] = 127;
|
||||
}
|
||||
|
||||
// predict
|
||||
if (mode == DC_PRED) {
|
||||
dc_pred[left_available][up_available][txsz](pred_ptr, stride,
|
||||
above_row, left_col);
|
||||
dc_pred[left_available][up_available][tx_size](dst, dst_stride,
|
||||
const_above_row, left_col);
|
||||
} else {
|
||||
pred[mode][txsz](pred_ptr, stride, above_row, left_col);
|
||||
pred[mode][tx_size](dst, dst_stride, const_above_row, left_col);
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_predict_intra_block(MACROBLOCKD *xd,
|
||||
int block_idx,
|
||||
int bwl_in,
|
||||
TX_SIZE tx_size,
|
||||
int mode,
|
||||
uint8_t *reference, int ref_stride,
|
||||
uint8_t *predictor, int pre_stride) {
|
||||
void vp9_predict_intra_block(MACROBLOCKD *xd, int block_idx, int bwl_in,
|
||||
TX_SIZE tx_size, int mode,
|
||||
const uint8_t *ref, int ref_stride,
|
||||
uint8_t *dst, int dst_stride) {
|
||||
const int bwl = bwl_in - tx_size;
|
||||
const int wmask = (1 << bwl) - 1;
|
||||
const int have_top = (block_idx >> bwl) || xd->up_available;
|
||||
@@ -415,10 +380,6 @@ void vp9_predict_intra_block(MACROBLOCKD *xd,
|
||||
const int have_right = ((block_idx & wmask) != wmask);
|
||||
|
||||
assert(bwl >= 0);
|
||||
build_intra_predictors(reference, ref_stride,
|
||||
predictor, pre_stride,
|
||||
mode,
|
||||
tx_size,
|
||||
have_top, have_left,
|
||||
have_right);
|
||||
build_intra_predictors(ref, ref_stride, dst, dst_stride, mode, tx_size,
|
||||
have_top, have_left, have_right);
|
||||
}
|
||||
|
||||
@@ -14,17 +14,8 @@
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vp9/common/vp9_blockd.h"
|
||||
|
||||
MB_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr,
|
||||
int stride, int n,
|
||||
int tx, int ty);
|
||||
|
||||
MB_PREDICTION_MODE vp9_find_bpred_context(MACROBLOCKD *xd, int block,
|
||||
uint8_t *ptr, int stride);
|
||||
|
||||
void vp9_predict_intra_block(MACROBLOCKD *xd,
|
||||
int block_idx,
|
||||
int bwl_in,
|
||||
TX_SIZE tx_size,
|
||||
int mode, uint8_t *ref, int ref_stride,
|
||||
uint8_t *predictor, int pre_stride);
|
||||
void vp9_predict_intra_block(MACROBLOCKD *xd, int block_idx, int bwl_in,
|
||||
TX_SIZE tx_size, int mode,
|
||||
const uint8_t *ref, int ref_stride,
|
||||
uint8_t *dst, int dst_stride);
|
||||
#endif // VP9_COMMON_VP9_RECONINTRA_H_
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user