Compare commits
605 Commits
experiment
...
pcs-2013
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6d3bd96607 | ||
|
|
6c2082db71 | ||
|
|
3c4e9e341f | ||
|
|
771f3ef5ad | ||
|
|
e83ebc8992 | ||
|
|
825b7c301d | ||
|
|
691177842c | ||
|
|
d0308b7daa | ||
|
|
c52d85442c | ||
|
|
5491a1f33e | ||
|
|
c4627a9ff1 | ||
|
|
b6e2f9b752 | ||
|
|
6b78f11a03 | ||
|
|
dcab9896e8 | ||
|
|
03698aa6d8 | ||
|
|
df8e156432 | ||
|
|
5c66f6f5eb | ||
|
|
40047bef5d | ||
|
|
cd945c7bd9 | ||
|
|
195061feda | ||
|
|
c151bdd412 | ||
|
|
1a9d4fedf3 | ||
|
|
548671dd20 | ||
|
|
4906fe45e2 | ||
|
|
fd09be0984 | ||
|
|
e3c1f0880f | ||
|
|
509ba98938 | ||
|
|
7ddd9f7f27 | ||
|
|
c424c5e808 | ||
|
|
282704145d | ||
|
|
58a09c32c2 | ||
|
|
9e056fa094 | ||
|
|
d2a4ddf982 | ||
|
|
cbdcc215b3 | ||
|
|
d35e9a0c53 | ||
|
|
14916b0ca6 | ||
|
|
4e5d99ca72 | ||
|
|
bc1b089372 | ||
|
|
0f8805e086 | ||
|
|
d12a502ef9 | ||
|
|
7f13b33a78 | ||
|
|
1a2f4fd2f5 | ||
|
|
88251c86dc | ||
|
|
68b8d1ea0a | ||
|
|
821b987486 | ||
|
|
fad3d07df3 | ||
|
|
a76caa7ff4 | ||
|
|
777460329b | ||
|
|
7019e34c34 | ||
|
|
f6d7e3679c | ||
|
|
c66bfc70d1 | ||
|
|
a57912f893 | ||
|
|
8f229caf87 | ||
|
|
623e163f84 | ||
|
|
c288b94ab9 | ||
|
|
03df17070b | ||
|
|
6249a5b17e | ||
|
|
855d078f95 | ||
|
|
2b5bf7b8d8 | ||
|
|
716d37f8bf | ||
|
|
2ecd0dae1e | ||
|
|
7a59efe7f8 | ||
|
|
152fd59964 | ||
|
|
ec421b7810 | ||
|
|
31ceb6b13c | ||
|
|
11cf0c39c9 | ||
|
|
01d43aaa24 | ||
|
|
ab03c00504 | ||
|
|
eb506a6590 | ||
|
|
fb6e6cd24d | ||
|
|
d052117319 | ||
|
|
efc8638890 | ||
|
|
4ecdf26d9c | ||
|
|
0f9efe9e7a | ||
|
|
8e45778eaf | ||
|
|
8486741e15 | ||
|
|
8d0b712af6 | ||
|
|
8d50d766d4 | ||
|
|
a88f3110f8 | ||
|
|
b927620231 | ||
|
|
29815ca729 | ||
|
|
4ab01fb5f7 | ||
|
|
b3d3578ee4 | ||
|
|
7343681675 | ||
|
|
efbacc9f89 | ||
|
|
5df8b1d05b | ||
|
|
3bb773d03e | ||
|
|
cf688474ea | ||
|
|
33c7ed4478 | ||
|
|
11fe8ecf57 | ||
|
|
67a0a89272 | ||
|
|
ef6d82358d | ||
|
|
fff4caeac1 | ||
|
|
2ce70a15d2 | ||
|
|
da17ffa937 | ||
|
|
e81a3ede4c | ||
|
|
681fb22820 | ||
|
|
cfbc246d57 | ||
|
|
6903efa93d | ||
|
|
b6c5dbe9ef | ||
|
|
b10e6b2943 | ||
|
|
bd9c057433 | ||
|
|
ceaa3c37a9 | ||
|
|
3fab2125ff | ||
|
|
b1b4ba1bdd | ||
|
|
209c6cbf8f | ||
|
|
80d582239e | ||
|
|
db60c02c9e | ||
|
|
36d2794369 | ||
|
|
35830879db | ||
|
|
398ddafb62 | ||
|
|
931c34e955 | ||
|
|
3c465af2ab | ||
|
|
15a36a0a0d | ||
|
|
2b426969c3 | ||
|
|
b55170ce95 | ||
|
|
437f63144a | ||
|
|
253fd256bf | ||
|
|
794a7ccd78 | ||
|
|
da0ce28fe3 | ||
|
|
831d72ac5f | ||
|
|
e45f4a4a4c | ||
|
|
5b1dc1515f | ||
|
|
eda4e24c0d | ||
|
|
7755b9dada | ||
|
|
6b5490cf68 | ||
|
|
0c02bfcc2a | ||
|
|
b5242368f3 | ||
|
|
8266da1cd1 | ||
|
|
f9e2140cab | ||
|
|
64eff7f360 | ||
|
|
2b5670238b | ||
|
|
e2c92d1510 | ||
|
|
87a214c277 | ||
|
|
9cd14ea6ed | ||
|
|
49f5efa8d8 | ||
|
|
208658490c | ||
|
|
d445945a84 | ||
|
|
d0365c4a2c | ||
|
|
c7b7b1da86 | ||
|
|
682c27239f | ||
|
|
450cbfe53a | ||
|
|
12d57a9409 | ||
|
|
d571e4e785 | ||
|
|
57272e41dd | ||
|
|
35c5d79e6b | ||
|
|
b87696ac37 | ||
|
|
b1c58f57a7 | ||
|
|
30888742f4 | ||
|
|
71cfaaa689 | ||
|
|
9be0bb19df | ||
|
|
6037f17942 | ||
|
|
ff1ae7f713 | ||
|
|
fe533c9741 | ||
|
|
f24b9b4f87 | ||
|
|
f1a627e8a2 | ||
|
|
b6aa783d80 | ||
|
|
9ba08208d3 | ||
|
|
9bcd750565 | ||
|
|
24ad692572 | ||
|
|
b7a93578e5 | ||
|
|
bacb5925ff | ||
|
|
92a29c157f | ||
|
|
13c7715a75 | ||
|
|
e85eaf6acd | ||
|
|
db92356577 | ||
|
|
fbb62c6d2b | ||
|
|
8c2e5e4964 | ||
|
|
c701eeb59f | ||
|
|
838eae3961 | ||
|
|
071395de6a | ||
|
|
a517343ca3 | ||
|
|
54c87058bf | ||
|
|
d11221f433 | ||
|
|
14330abdc6 | ||
|
|
e023e0d93b | ||
|
|
a6a00fc6a3 | ||
|
|
78fbb10642 | ||
|
|
bb5e2bf86a | ||
|
|
e51e7a0e8d | ||
|
|
39c7b01d3c | ||
|
|
24df77e951 | ||
|
|
44b708b4c4 | ||
|
|
f363aa3a15 | ||
|
|
b0211e7edf | ||
|
|
79af591368 | ||
|
|
014acfa2af | ||
|
|
a23c2a9e7b | ||
|
|
2a233dd31d | ||
|
|
1600707d35 | ||
|
|
cda802ac86 | ||
|
|
0fcb0e17bc | ||
|
|
a7b7f94ae8 | ||
|
|
9d901217c6 | ||
|
|
bb30fff978 | ||
|
|
98cf0145b1 | ||
|
|
72fd127f8c | ||
|
|
245ca04bab | ||
|
|
a0fcbcfa5f | ||
|
|
85fd8bdb01 | ||
|
|
c437bbcde0 | ||
|
|
88c8ff2508 | ||
|
|
a783da80e7 | ||
|
|
2b3bfaa9ce | ||
|
|
84758960db | ||
|
|
90a52694f3 | ||
|
|
4bd171152d | ||
|
|
f582aa6eda | ||
|
|
5b23666e67 | ||
|
|
cbf394574d | ||
|
|
23e1a29fc7 | ||
|
|
eeae6f946d | ||
|
|
cb50dc7f33 | ||
|
|
3b01778450 | ||
|
|
c73e4412b3 | ||
|
|
9ae985b23a | ||
|
|
2d58761993 | ||
|
|
e8a967d960 | ||
|
|
c4826c5941 | ||
|
|
0c3038234d | ||
|
|
040ffb6326 | ||
|
|
5d8642354e | ||
|
|
8fc95a1b11 | ||
|
|
1407cf8588 | ||
|
|
9c9a3b2775 | ||
|
|
324ebb704a | ||
|
|
86fb12b600 | ||
|
|
25655e5794 | ||
|
|
182366c736 | ||
|
|
9ee9918dad | ||
|
|
e7f2aa0fb8 | ||
|
|
cddde51ec5 | ||
|
|
4d018be950 | ||
|
|
66755abff4 | ||
|
|
7fb42d909e | ||
|
|
6a501462f8 | ||
|
|
b964646756 | ||
|
|
23845947c4 | ||
|
|
d09abfa9f7 | ||
|
|
d22a504d11 | ||
|
|
69fe840ec4 | ||
|
|
ac6093d179 | ||
|
|
079183c1a8 | ||
|
|
65fe7d7605 | ||
|
|
db487188b1 | ||
|
|
321c2fd178 | ||
|
|
cb24406da5 | ||
|
|
5d93feb6ad | ||
|
|
93ffd371eb | ||
|
|
3d22d3ae0c | ||
|
|
09830aa0ea | ||
|
|
0607abc3dd | ||
|
|
f6bc783d63 | ||
|
|
939791a129 | ||
|
|
2873d5608b | ||
|
|
87bc705fb5 | ||
|
|
f4a6f936b5 | ||
|
|
4f660cc018 | ||
|
|
901c495482 | ||
|
|
563c273738 | ||
|
|
fc5ec206a7 | ||
|
|
37705a3bc5 | ||
|
|
20abe595ec | ||
|
|
8f92a7efdb | ||
|
|
01b35c3c16 | ||
|
|
18c780a0ff | ||
|
|
c1913c9cf4 | ||
|
|
54a03e20dd | ||
|
|
132ef4295a | ||
|
|
b19126b291 | ||
|
|
740acd6891 | ||
|
|
65c2444e15 | ||
|
|
9faa7e8186 | ||
|
|
e378566060 | ||
|
|
09bc942b47 | ||
|
|
fb550ee620 | ||
|
|
d1268c5921 | ||
|
|
f15cdc7451 | ||
|
|
e378a89bd6 | ||
|
|
afffa3d9b0 | ||
|
|
dae17734ec | ||
|
|
e4e864586c | ||
|
|
3476404912 | ||
|
|
736114f44b | ||
|
|
170be56a74 | ||
|
|
4ad52a8f18 | ||
|
|
1c263d6918 | ||
|
|
2156ccaa4a | ||
|
|
458c2833c0 | ||
|
|
9fc3d32a50 | ||
|
|
9158b8956f | ||
|
|
7bc775d93d | ||
|
|
2e4ca9d1a5 | ||
|
|
e8feb2932f | ||
|
|
e5deed06c0 | ||
|
|
1ee66933c1 | ||
|
|
01c4e04424 | ||
|
|
e494df1a37 | ||
|
|
72872d3d8c | ||
|
|
3c05bda058 | ||
|
|
3b8614a8f6 | ||
|
|
890eee3b47 | ||
|
|
76a437a31b | ||
|
|
872c6d85c0 | ||
|
|
bb2313db28 | ||
|
|
9fd2767200 | ||
|
|
c3c21e3c14 | ||
|
|
79401542f7 | ||
|
|
532179e845 | ||
|
|
d6606d1ea7 | ||
|
|
5dda1d2394 | ||
|
|
1cf2272347 | ||
|
|
49317cddad | ||
|
|
010c0ad0eb | ||
|
|
948aaab4ca | ||
|
|
3cf46fa591 | ||
|
|
0ca7855f67 | ||
|
|
2b9baca4f0 | ||
|
|
e22bb0dc8e | ||
|
|
e326cecf18 | ||
|
|
1d44fc0c49 | ||
|
|
bc50961a74 | ||
|
|
ec4b2742e7 | ||
|
|
c86c5443eb | ||
|
|
1f4bf79d65 | ||
|
|
b6dbf11ed5 | ||
|
|
e83e8f0426 | ||
|
|
ee961599e1 | ||
|
|
d765df2796 | ||
|
|
aa05321262 | ||
|
|
924d74516a | ||
|
|
e80bf802a9 | ||
|
|
abff678866 | ||
|
|
aaa7b44460 | ||
|
|
22dc946a7e | ||
|
|
b71807082c | ||
|
|
db20806710 | ||
|
|
b62ddd5f8b | ||
|
|
e02dc84c1a | ||
|
|
851a2fd72c | ||
|
|
eb7acb5524 | ||
|
|
1d3f94efe2 | ||
|
|
7d058ef86c | ||
|
|
f1560ce035 | ||
|
|
a93992e725 | ||
|
|
3a679e56b2 | ||
|
|
ce04b1aa62 | ||
|
|
7b95f9bf39 | ||
|
|
ba10aed86d | ||
|
|
12e5931a9a | ||
|
|
f77c6973a1 | ||
|
|
f389ca2acc | ||
|
|
bfebe7e927 | ||
|
|
78e670fcf8 | ||
|
|
2d6aadd7e2 | ||
|
|
45125ee573 | ||
|
|
9482c07953 | ||
|
|
3e43e49ffd | ||
|
|
44b7854c84 | ||
|
|
36e9b82080 | ||
|
|
ba8fc71979 | ||
|
|
657ee2d719 | ||
|
|
69384f4fad | ||
|
|
bbb490f6a3 | ||
|
|
a5cb05c45d | ||
|
|
242460cb66 | ||
|
|
af13fbb70f | ||
|
|
b25589c6bb | ||
|
|
4505e8accb | ||
|
|
aa823f8667 | ||
|
|
6c5433c836 | ||
|
|
642696b678 | ||
|
|
45870619f3 | ||
|
|
4681197a58 | ||
|
|
5eed6e2224 | ||
|
|
166dc85bed | ||
|
|
66ccf5ddcf | ||
|
|
8b970da40d | ||
|
|
b19babe5e6 | ||
|
|
55b5a68d72 | ||
|
|
c8ba8c513c | ||
|
|
2c6ba737f8 | ||
|
|
5724b7e292 | ||
|
|
50ee61db4c | ||
|
|
480dd8ffbe | ||
|
|
e6c435b506 | ||
|
|
7194da2167 | ||
|
|
13930cf569 | ||
|
|
cd2cc27af1 | ||
|
|
8e04257bc5 | ||
|
|
78debf246b | ||
|
|
fb481913f0 | ||
|
|
11e3ac62a5 | ||
|
|
21d8e8590b | ||
|
|
656632b776 | ||
|
|
3f10831308 | ||
|
|
1c159c470a | ||
|
|
bef320aa07 | ||
|
|
b85367a608 | ||
|
|
aa5b67add0 | ||
|
|
f76f52df61 | ||
|
|
735b3a710a | ||
|
|
9655c2c7a6 | ||
|
|
33104cdd42 | ||
|
|
711aff9d9d | ||
|
|
d843ac5132 | ||
|
|
84f3b76e1c | ||
|
|
53f6f8ac93 | ||
|
|
4205d79273 | ||
|
|
4082bf9d7c | ||
|
|
604022d40b | ||
|
|
335b1d360b | ||
|
|
3c42657207 | ||
|
|
40ae02c247 | ||
|
|
13eed79c77 | ||
|
|
09858c239b | ||
|
|
a5726ac453 | ||
|
|
640dea4d9d | ||
|
|
8adc20ce35 | ||
|
|
da9a6ac9e7 | ||
|
|
01a37177d1 | ||
|
|
610642c130 | ||
|
|
8b810c7a78 | ||
|
|
f39bf458e5 | ||
|
|
94bfbaa84e | ||
|
|
96a1a59d21 | ||
|
|
a33f178491 | ||
|
|
359b571448 | ||
|
|
596c51087b | ||
|
|
cb05a451c6 | ||
|
|
64c0f5c592 | ||
|
|
fcb890d751 | ||
|
|
ccb6bdca75 | ||
|
|
42ab401fd3 | ||
|
|
85640f1c9d | ||
|
|
4172d7c584 | ||
|
|
6167355309 | ||
|
|
be60924f29 | ||
|
|
c43da352ab | ||
|
|
048ccb2849 | ||
|
|
3286abd82e | ||
|
|
687891238c | ||
|
|
a2f7619860 | ||
|
|
ac12f3926b | ||
|
|
2f1a0a0e2c | ||
|
|
0d8723f8d5 | ||
|
|
27a984fbd3 | ||
|
|
a3ae4c87fd | ||
|
|
ce28d0ca89 | ||
|
|
5b63963573 | ||
|
|
ae455fabd8 | ||
|
|
90027be251 | ||
|
|
7f814c6bf8 | ||
|
|
27de4fe922 | ||
|
|
62a2cd9ed2 | ||
|
|
381d3b8b7d | ||
|
|
d19ac4b66d | ||
|
|
37cda6dc4c | ||
|
|
1bf1428654 | ||
|
|
246381faf2 | ||
|
|
5826407f2a | ||
|
|
5baf510f74 | ||
|
|
039b0c4c9e | ||
|
|
2ffe64ad5c | ||
|
|
bb64c9a355 | ||
|
|
be5dc2321b | ||
|
|
f167433d9c | ||
|
|
e8923fe492 | ||
|
|
2612b99cc7 | ||
|
|
d8286dd56d | ||
|
|
c4048dbdd3 | ||
|
|
f70330a906 | ||
|
|
569ca37d09 | ||
|
|
3275ad701a | ||
|
|
82d4d9a008 | ||
|
|
31c97c2bdf | ||
|
|
5dc0b309ab | ||
|
|
2e3478a593 | ||
|
|
5a1a269f67 | ||
|
|
b34ce04378 | ||
|
|
f67919ae86 | ||
|
|
26e5b5e25d | ||
|
|
367cb10fcf | ||
|
|
1462433370 | ||
|
|
d514b778c4 | ||
|
|
65aa89af1a | ||
|
|
bdc785e976 | ||
|
|
df0715204c | ||
|
|
60ecd60c9a | ||
|
|
bba68342ce | ||
|
|
79f4c1b9a4 | ||
|
|
3e340880a8 | ||
|
|
4fa93bcef4 | ||
|
|
afd9bd3e3c | ||
|
|
5e80a49307 | ||
|
|
d5bec522da | ||
|
|
9451e8d37e | ||
|
|
939b1e4a8c | ||
|
|
a9aa7d07d0 | ||
|
|
63e140eaa7 | ||
|
|
68369ca897 | ||
|
|
c2ff1882ff | ||
|
|
ca983f34f7 | ||
|
|
bb3b817c1e | ||
|
|
6f4fa44c42 | ||
|
|
81d7bd50f5 | ||
|
|
67e53716e0 | ||
|
|
89a1fcf884 | ||
|
|
cefaaa86c7 | ||
|
|
b7616e387e | ||
|
|
b0646f9e98 | ||
|
|
4d73416099 | ||
|
|
24856b6abc | ||
|
|
ec01f52ffa | ||
|
|
1a3641d91b | ||
|
|
adfc54a464 | ||
|
|
eb2fbea621 | ||
|
|
ab21378a2e | ||
|
|
20395189cd | ||
|
|
8cb09719a3 | ||
|
|
39f42c8713 | ||
|
|
cf6beea661 | ||
|
|
bb072000e8 | ||
|
|
f2c073efaa | ||
|
|
0f1deccf86 | ||
|
|
635ba269be | ||
|
|
26fead7ecf | ||
|
|
54979b4350 | ||
|
|
3526f1cd5e | ||
|
|
fc50477082 | ||
|
|
bcc8e9d9c6 | ||
|
|
ecb78b3e0c | ||
|
|
7e0f88b6be | ||
|
|
8105ce6dce | ||
|
|
dc70fbe42d | ||
|
|
5459f68d71 | ||
|
|
8e35263bed | ||
|
|
902f9c7cbd | ||
|
|
39fe235032 | ||
|
|
2c7ae8c29a | ||
|
|
4417c04531 | ||
|
|
4cabbca4ce | ||
|
|
32006aadd8 | ||
|
|
78136edcdc | ||
|
|
14cc7b319f | ||
|
|
b89eef8f82 | ||
|
|
b214cd0dab | ||
|
|
98e3d73e16 | ||
|
|
1a5e6ffb02 | ||
|
|
9d5885b0ab | ||
|
|
c66320b3e4 | ||
|
|
bd1bc1d303 | ||
|
|
9a31d05e24 | ||
|
|
1aedfc992a | ||
|
|
76d166e413 | ||
|
|
a72e269318 | ||
|
|
c3b5ef7600 | ||
|
|
3984b41c87 | ||
|
|
8b0e6035a2 | ||
|
|
ad7021dd6c | ||
|
|
097046ae28 | ||
|
|
b84dc949c8 | ||
|
|
3c43ec206c | ||
|
|
58b07a6f9d | ||
|
|
67fe9d17cb | ||
|
|
e7c5ca8983 | ||
|
|
ef101af8ae | ||
|
|
f1559bdeaf | ||
|
|
f295774d43 | ||
|
|
125146034e | ||
|
|
cd0629fe68 | ||
|
|
ff7df102d9 | ||
|
|
816d6c989c | ||
|
|
8ffe85ad00 | ||
|
|
ace93a175d | ||
|
|
fa0cd61087 | ||
|
|
41251ae558 | ||
|
|
0eef1acbef | ||
|
|
12eb2d0267 | ||
|
|
6ec2b85bad | ||
|
|
6a7a4ba753 | ||
|
|
c7c9901845 | ||
|
|
e3c92bd21e | ||
|
|
6fd2407035 | ||
|
|
6a8ec3eac2 | ||
|
|
bc484ebf06 | ||
|
|
ee40e1a637 | ||
|
|
2158909fc3 | ||
|
|
9e3bcdd135 | ||
|
|
47fad4c2d7 | ||
|
|
ac008f0030 | ||
|
|
1ba91a84ad | ||
|
|
83ee80c045 | ||
|
|
aae6a4c895 | ||
|
|
6bfcce8c7a | ||
|
|
61c33d0ad5 | ||
|
|
a766d8918e | ||
|
|
82d7c6fb3c | ||
|
|
1492698ed3 | ||
|
|
debb9c68c8 | ||
|
|
8db2675b97 | ||
|
|
ea2348ca29 | ||
|
|
78182538d6 | ||
|
|
1c552e79bd | ||
|
|
5edc65d00d |
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/bin/sh
|
||||
##
|
||||
## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
##
|
||||
@@ -13,20 +13,20 @@
|
||||
verbose=0
|
||||
set -- $*
|
||||
for i; do
|
||||
if [ "$i" == "-o" ]; then
|
||||
if [ "$i" = "-o" ]; then
|
||||
on_of=1
|
||||
elif [ "$i" == "-v" ]; then
|
||||
elif [ "$i" = "-v" ]; then
|
||||
verbose=1
|
||||
elif [ "$i" == "-g" ]; then
|
||||
elif [ "$i" = "-g" ]; then
|
||||
args="${args} --debug"
|
||||
elif [ "$on_of" == "1" ]; then
|
||||
elif [ "$on_of" = "1" ]; then
|
||||
outfile=$i
|
||||
on_of=0
|
||||
elif [ -f "$i" ]; then
|
||||
infiles="$infiles $i"
|
||||
elif [ "${i:0:2}" == "-l" ]; then
|
||||
elif [ "${i#-l}" != "$i" ]; then
|
||||
libs="$libs ${i#-l}"
|
||||
elif [ "${i:0:2}" == "-L" ]; then
|
||||
elif [ "${i#-L}" != "$i" ]; then
|
||||
libpaths="${libpaths} ${i#-L}"
|
||||
else
|
||||
args="${args} ${i}"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/bin/sh
|
||||
##
|
||||
## configure.sh
|
||||
##
|
||||
@@ -198,11 +198,11 @@ add_extralibs() {
|
||||
#
|
||||
# Boolean Manipulation Functions
|
||||
#
|
||||
enable(){
|
||||
enable_feature(){
|
||||
set_all yes $*
|
||||
}
|
||||
|
||||
disable(){
|
||||
disable_feature(){
|
||||
set_all no $*
|
||||
}
|
||||
|
||||
@@ -219,7 +219,7 @@ soft_enable() {
|
||||
for var in $*; do
|
||||
if ! disabled $var; then
|
||||
log_echo " enabling $var"
|
||||
enable $var
|
||||
enable_feature $var
|
||||
fi
|
||||
done
|
||||
}
|
||||
@@ -228,7 +228,7 @@ soft_disable() {
|
||||
for var in $*; do
|
||||
if ! enabled $var; then
|
||||
log_echo " disabling $var"
|
||||
disable $var
|
||||
disable_feature $var
|
||||
fi
|
||||
done
|
||||
}
|
||||
@@ -251,10 +251,10 @@ tolower(){
|
||||
# Temporary File Functions
|
||||
#
|
||||
source_path=${0%/*}
|
||||
enable source_path_used
|
||||
enable_feature source_path_used
|
||||
if test -z "$source_path" -o "$source_path" = "." ; then
|
||||
source_path="`pwd`"
|
||||
disable source_path_used
|
||||
disable_feature source_path_used
|
||||
fi
|
||||
|
||||
if test ! -z "$TMPDIR" ; then
|
||||
@@ -264,12 +264,13 @@ elif test ! -z "$TEMPDIR" ; then
|
||||
else
|
||||
TMPDIRx="/tmp"
|
||||
fi
|
||||
TMP_H="${TMPDIRx}/vpx-conf-$$-${RANDOM}.h"
|
||||
TMP_C="${TMPDIRx}/vpx-conf-$$-${RANDOM}.c"
|
||||
TMP_CC="${TMPDIRx}/vpx-conf-$$-${RANDOM}.cc"
|
||||
TMP_O="${TMPDIRx}/vpx-conf-$$-${RANDOM}.o"
|
||||
TMP_X="${TMPDIRx}/vpx-conf-$$-${RANDOM}.x"
|
||||
TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RANDOM}.asm"
|
||||
RAND=$(awk 'BEGIN { srand(); printf "%d\n",(rand() * 32768)}')
|
||||
TMP_H="${TMPDIRx}/vpx-conf-$$-${RAND}.h"
|
||||
TMP_C="${TMPDIRx}/vpx-conf-$$-${RAND}.c"
|
||||
TMP_CC="${TMPDIRx}/vpx-conf-$$-${RAND}.cc"
|
||||
TMP_O="${TMPDIRx}/vpx-conf-$$-${RAND}.o"
|
||||
TMP_X="${TMPDIRx}/vpx-conf-$$-${RAND}.x"
|
||||
TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RAND}.asm"
|
||||
|
||||
clean_temp_files() {
|
||||
rm -f ${TMP_C} ${TMP_CC} ${TMP_H} ${TMP_O} ${TMP_X} ${TMP_ASM}
|
||||
@@ -316,8 +317,8 @@ check_header(){
|
||||
header=$1
|
||||
shift
|
||||
var=`echo $header | sed 's/[^A-Za-z0-9_]/_/g'`
|
||||
disable $var
|
||||
check_cpp "$@" <<EOF && enable $var
|
||||
disable_feature $var
|
||||
check_cpp "$@" <<EOF && enable_feature $var
|
||||
#include "$header"
|
||||
int x;
|
||||
EOF
|
||||
@@ -479,7 +480,7 @@ process_common_cmdline() {
|
||||
for opt in "$@"; do
|
||||
optval="${opt#*=}"
|
||||
case "$opt" in
|
||||
--child) enable child
|
||||
--child) enable_feature child
|
||||
;;
|
||||
--log*)
|
||||
logging="$optval"
|
||||
@@ -491,7 +492,7 @@ process_common_cmdline() {
|
||||
;;
|
||||
--target=*) toolchain="${toolchain:-${optval}}"
|
||||
;;
|
||||
--force-target=*) toolchain="${toolchain:-${optval}}"; enable force_toolchain
|
||||
--force-target=*) toolchain="${toolchain:-${optval}}"; enable_feature force_toolchain
|
||||
;;
|
||||
--cpu)
|
||||
;;
|
||||
@@ -511,7 +512,7 @@ process_common_cmdline() {
|
||||
echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null ||
|
||||
die_unknown $opt
|
||||
fi
|
||||
$action $option
|
||||
${action}_feature $option
|
||||
;;
|
||||
--require-?*)
|
||||
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
|
||||
@@ -523,11 +524,11 @@ process_common_cmdline() {
|
||||
;;
|
||||
--force-enable-?*|--force-disable-?*)
|
||||
eval `echo "$opt" | sed 's/--force-/action=/;s/-/ option=/;s/-/_/g'`
|
||||
$action $option
|
||||
${action}_feature $option
|
||||
;;
|
||||
--libc=*)
|
||||
[ -d "${optval}" ] || die "Not a directory: ${optval}"
|
||||
disable builtin_libc
|
||||
disable_feature builtin_libc
|
||||
alt_libc="${optval}"
|
||||
;;
|
||||
--as=*)
|
||||
@@ -696,13 +697,13 @@ process_common_toolchain() {
|
||||
|
||||
# Mark the specific ISA requested as enabled
|
||||
soft_enable ${tgt_isa}
|
||||
enable ${tgt_os}
|
||||
enable ${tgt_cc}
|
||||
enable_feature ${tgt_os}
|
||||
enable_feature ${tgt_cc}
|
||||
|
||||
# Enable the architecture family
|
||||
case ${tgt_isa} in
|
||||
arm*) enable arm;;
|
||||
mips*) enable mips;;
|
||||
arm*) enable_feature arm;;
|
||||
mips*) enable_feature mips;;
|
||||
esac
|
||||
|
||||
# PIC is probably what we want when building shared libs
|
||||
@@ -765,7 +766,7 @@ process_common_toolchain() {
|
||||
case ${toolchain} in
|
||||
sparc-solaris-*)
|
||||
add_extralibs -lposix4
|
||||
disable fast_unaligned
|
||||
disable_feature fast_unaligned
|
||||
;;
|
||||
*-solaris-*)
|
||||
add_extralibs -lposix4
|
||||
@@ -790,7 +791,7 @@ process_common_toolchain() {
|
||||
;;
|
||||
armv5te)
|
||||
soft_enable edsp
|
||||
disable fast_unaligned
|
||||
disable_feature fast_unaligned
|
||||
;;
|
||||
esac
|
||||
|
||||
@@ -805,7 +806,7 @@ process_common_toolchain() {
|
||||
arch_int=${arch_int%%te}
|
||||
check_add_asflags --defsym ARCHITECTURE=${arch_int}
|
||||
tune_cflags="-mtune="
|
||||
if [ ${tgt_isa} == "armv7" ]; then
|
||||
if [ ${tgt_isa} = "armv7" ]; then
|
||||
if [ -z "${float_abi}" ]; then
|
||||
check_cpp <<EOF && float_abi=hard || float_abi=softfp
|
||||
#ifndef __ARM_PCS_VFP
|
||||
@@ -842,8 +843,8 @@ EOF
|
||||
asm_conversion_cmd="${source_path}/build/make/ads2armasm_ms.pl"
|
||||
AS_SFX=.s
|
||||
msvs_arch_dir=arm-msvs
|
||||
disable multithread
|
||||
disable unit_tests
|
||||
disable_feature multithread
|
||||
disable_feature unit_tests
|
||||
;;
|
||||
rvct)
|
||||
CC=armcc
|
||||
@@ -855,7 +856,7 @@ EOF
|
||||
tune_cflags="--cpu="
|
||||
tune_asflags="--cpu="
|
||||
if [ -z "${tune_cpu}" ]; then
|
||||
if [ ${tgt_isa} == "armv7" ]; then
|
||||
if [ ${tgt_isa} = "armv7" ]; then
|
||||
if enabled neon
|
||||
then
|
||||
check_add_cflags --fpu=softvfp+vfpv3
|
||||
@@ -880,8 +881,8 @@ EOF
|
||||
|
||||
case ${tgt_os} in
|
||||
none*)
|
||||
disable multithread
|
||||
disable os_support
|
||||
disable_feature multithread
|
||||
disable_feature os_support
|
||||
;;
|
||||
|
||||
android*)
|
||||
@@ -913,9 +914,9 @@ EOF
|
||||
# Cortex-A8 implementations (NDK Dev Guide)
|
||||
add_ldflags "-Wl,--fix-cortex-a8"
|
||||
|
||||
enable pic
|
||||
enable_feature pic
|
||||
soft_enable realtime_only
|
||||
if [ ${tgt_isa} == "armv7" ]; then
|
||||
if [ ${tgt_isa} = "armv7" ]; then
|
||||
soft_enable runtime_cpu_detect
|
||||
fi
|
||||
if enabled runtime_cpu_detect; then
|
||||
@@ -969,7 +970,7 @@ EOF
|
||||
;;
|
||||
|
||||
linux*)
|
||||
enable linux
|
||||
enable_feature linux
|
||||
if enabled rvct; then
|
||||
# Check if we have CodeSourcery GCC in PATH. Needed for
|
||||
# libraries
|
||||
@@ -1000,14 +1001,14 @@ EOF
|
||||
tune_cflags="-mtune="
|
||||
if enabled dspr2; then
|
||||
check_add_cflags -mips32r2 -mdspr2
|
||||
disable fast_unaligned
|
||||
disable_feature fast_unaligned
|
||||
fi
|
||||
check_add_cflags -march=${tgt_isa}
|
||||
check_add_asflags -march=${tgt_isa}
|
||||
check_add_asflags -KPIC
|
||||
;;
|
||||
ppc*)
|
||||
enable ppc
|
||||
enable_feature ppc
|
||||
bits=${tgt_isa##ppc}
|
||||
link_with_cc=gcc
|
||||
setup_gnu_toolchain
|
||||
@@ -1061,7 +1062,7 @@ EOF
|
||||
setup_gnu_toolchain
|
||||
add_cflags -use-msasm -use-asm
|
||||
add_ldflags -i-static
|
||||
enabled x86_64 && add_cflags -ipo -no-prec-div -static -xSSE2 -axSSE2
|
||||
enabled x86_64 && add_cflags -ipo -static -O3
|
||||
enabled x86_64 && AR=xiar
|
||||
case ${tune_cpu} in
|
||||
atom*)
|
||||
@@ -1155,7 +1156,7 @@ EOF
|
||||
;;
|
||||
universal*|*-gcc|generic-gnu)
|
||||
link_with_cc=gcc
|
||||
enable gcc
|
||||
enable_feature gcc
|
||||
setup_gnu_toolchain
|
||||
;;
|
||||
esac
|
||||
@@ -1191,7 +1192,7 @@ EOF
|
||||
|
||||
# default use_x86inc to yes if pic is no or 64bit or we are not on darwin
|
||||
echo " checking here for x86inc \"${tgt_isa}\" \"$pic\" "
|
||||
if [ ${tgt_isa} = x86_64 -o ! "$pic" == "yes" -o ! ${tgt_os:0:6} = darwin ]; then
|
||||
if [ ${tgt_isa} = x86_64 -o ! "$pic" = "yes" -o "${tgt_os#darwin}" = "${tgt_os}" ]; then
|
||||
soft_enable use_x86inc
|
||||
fi
|
||||
|
||||
@@ -1204,14 +1205,14 @@ EOF
|
||||
enabled linux && check_add_cflags -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0
|
||||
|
||||
# Check for strip utility variant
|
||||
${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable gnu_strip
|
||||
${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable_feature gnu_strip
|
||||
|
||||
# Try to determine target endianness
|
||||
check_cc <<EOF
|
||||
unsigned int e = 'O'<<24 | '2'<<16 | 'B'<<8 | 'E';
|
||||
EOF
|
||||
[ -f "${TMP_O}" ] && od -A n -t x1 "${TMP_O}" | tr -d '\n' |
|
||||
grep '4f *32 *42 *45' >/dev/null 2>&1 && enable big_endian
|
||||
grep '4f *32 *42 *45' >/dev/null 2>&1 && enable_feature big_endian
|
||||
|
||||
# Try to find which inline keywords are supported
|
||||
check_cc <<EOF && INLINE="inline"
|
||||
@@ -1236,7 +1237,7 @@ EOF
|
||||
if enabled dspr2; then
|
||||
if enabled big_endian; then
|
||||
echo "dspr2 optimizations are available only for little endian platforms"
|
||||
disable dspr2
|
||||
disable_feature dspr2
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
@@ -1287,8 +1288,8 @@ print_config_h() {
|
||||
|
||||
print_webm_license() {
|
||||
local destination=$1
|
||||
local prefix=$2
|
||||
local suffix=$3
|
||||
local prefix="$2"
|
||||
local suffix="$3"
|
||||
shift 3
|
||||
cat <<EOF > ${destination}
|
||||
${prefix} Copyright (c) 2011 The WebM project authors. All Rights Reserved.${suffix}
|
||||
@@ -1309,7 +1310,7 @@ process_detect() {
|
||||
true;
|
||||
}
|
||||
|
||||
enable logging
|
||||
enable_feature logging
|
||||
logfile="config.log"
|
||||
self=$0
|
||||
process() {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/bin/sh
|
||||
##
|
||||
## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
##
|
||||
|
||||
@@ -290,9 +290,11 @@ static void setup_rtcd_internal(void)
|
||||
{
|
||||
$(set_function_pointers c $ALL_ARCHS)
|
||||
#if HAVE_DSPR2
|
||||
#if CONFIG_VP8
|
||||
void dsputil_static_init();
|
||||
dsputil_static_init();
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
$(common_bottom)
|
||||
|
||||
@@ -47,7 +47,7 @@ sub FixThumbInstructions($$)
|
||||
# this is used, it's used for two subsequent load instructions,
|
||||
# where a hand-written version of it could merge two subsequent
|
||||
# add and sub instructions.
|
||||
s/^(\s*)((ldr|str)(ne)?)(\s+)(r\d+),\s*\[(\w+), -([^\]]+)\]/$1sub$4$5$7, $7, $8\n$1$2$5$6, [$7]\n$1add$4$5$7, $7, $8/g;
|
||||
s/^(\s*)((ldr|str|pld)(ne)?)(\s+)(r\d+,\s*)?\[(\w+), -([^\]]+)\]/$1sub$4$5$7, $7, $8\n$1$2$5$6\[$7\]\n$1add$4$5$7, $7, $8/g;
|
||||
|
||||
# Convert register post indexing to a separate add instruction.
|
||||
# This converts "ldrneb r9, [r0], r2" into "ldrneb r9, [r0]",
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/bin/sh
|
||||
##
|
||||
## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
##
|
||||
|
||||
85
configure
vendored
85
configure
vendored
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/bin/sh
|
||||
##
|
||||
## configure
|
||||
##
|
||||
@@ -38,6 +38,7 @@ Advanced options:
|
||||
${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders)
|
||||
${toggle_mem_tracker} track memory usage
|
||||
${toggle_postproc} postprocessing
|
||||
${toggle_vp9_postproc} vp9 specific postprocessing
|
||||
${toggle_multithread} multithreaded encoding and decoding
|
||||
${toggle_spatial_resampling} spatial sampling (scaling) support
|
||||
${toggle_realtime_only} enable this option while building for real-time encoding
|
||||
@@ -153,7 +154,7 @@ all_targets="libs examples docs"
|
||||
|
||||
# all targets available are enabled, by default.
|
||||
for t in ${all_targets}; do
|
||||
[ -f ${source_path}/${t}.mk ] && enable ${t}
|
||||
[ -f ${source_path}/${t}.mk ] && enable_feature ${t}
|
||||
done
|
||||
|
||||
# check installed doxygen version
|
||||
@@ -164,30 +165,30 @@ if [ ${doxy_major:-0} -ge 1 ]; then
|
||||
doxy_minor=${doxy_version%%.*}
|
||||
doxy_patch=${doxy_version##*.}
|
||||
|
||||
[ $doxy_major -gt 1 ] && enable doxygen
|
||||
[ $doxy_minor -gt 5 ] && enable doxygen
|
||||
[ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable doxygen
|
||||
[ $doxy_major -gt 1 ] && enable_feature doxygen
|
||||
[ $doxy_minor -gt 5 ] && enable_feature doxygen
|
||||
[ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable_feature doxygen
|
||||
fi
|
||||
|
||||
# install everything except the sources, by default. sources will have
|
||||
# to be enabled when doing dist builds, since that's no longer a common
|
||||
# case.
|
||||
enabled doxygen && php -v >/dev/null 2>&1 && enable install_docs
|
||||
enable install_bins
|
||||
enable install_libs
|
||||
enabled doxygen && php -v >/dev/null 2>&1 && enable_feature install_docs
|
||||
enable_feature install_bins
|
||||
enable_feature install_libs
|
||||
|
||||
enable static
|
||||
enable optimizations
|
||||
enable fast_unaligned #allow unaligned accesses, if supported by hw
|
||||
enable md5
|
||||
enable spatial_resampling
|
||||
enable multithread
|
||||
enable os_support
|
||||
enable temporal_denoising
|
||||
enable_feature static
|
||||
enable_feature optimizations
|
||||
enable_feature fast_unaligned #allow unaligned accesses, if supported by hw
|
||||
enable_feature md5
|
||||
enable_feature spatial_resampling
|
||||
enable_feature multithread
|
||||
enable_feature os_support
|
||||
enable_feature temporal_denoising
|
||||
|
||||
[ -d ${source_path}/../include ] && enable alt_tree_layout
|
||||
[ -d ${source_path}/../include ] && enable_feature alt_tree_layout
|
||||
for d in vp8 vp9; do
|
||||
[ -d ${source_path}/${d} ] && disable alt_tree_layout;
|
||||
[ -d ${source_path}/${d} ] && disable_feature alt_tree_layout;
|
||||
done
|
||||
|
||||
if ! enabled alt_tree_layout; then
|
||||
@@ -200,10 +201,10 @@ else
|
||||
[ -f ${source_path}/../include/vpx/vp8dx.h ] && CODECS="${CODECS} vp8_decoder"
|
||||
[ -f ${source_path}/../include/vpx/vp9cx.h ] && CODECS="${CODECS} vp9_encoder"
|
||||
[ -f ${source_path}/../include/vpx/vp9dx.h ] && CODECS="${CODECS} vp9_decoder"
|
||||
[ -f ${source_path}/../include/vpx/vp8cx.h ] || disable vp8_encoder
|
||||
[ -f ${source_path}/../include/vpx/vp8dx.h ] || disable vp8_decoder
|
||||
[ -f ${source_path}/../include/vpx/vp9cx.h ] || disable vp9_encoder
|
||||
[ -f ${source_path}/../include/vpx/vp9dx.h ] || disable vp9_decoder
|
||||
[ -f ${source_path}/../include/vpx/vp8cx.h ] || disable_feature vp8_encoder
|
||||
[ -f ${source_path}/../include/vpx/vp8dx.h ] || disable_feature vp8_decoder
|
||||
[ -f ${source_path}/../include/vpx/vp9cx.h ] || disable_feature vp9_encoder
|
||||
[ -f ${source_path}/../include/vpx/vp9dx.h ] || disable_feature vp9_decoder
|
||||
|
||||
[ -f ${source_path}/../lib/*/*mt.lib ] && soft_enable static_msvcrt
|
||||
fi
|
||||
@@ -279,6 +280,7 @@ CONFIG_LIST="
|
||||
dc_recon
|
||||
runtime_cpu_detect
|
||||
postproc
|
||||
vp9_postproc
|
||||
multithread
|
||||
internal_stats
|
||||
${CODECS}
|
||||
@@ -333,6 +335,7 @@ CMDLINE_SELECT="
|
||||
dequant_tokens
|
||||
dc_recon
|
||||
postproc
|
||||
vp9_postproc
|
||||
multithread
|
||||
internal_stats
|
||||
${CODECS}
|
||||
@@ -358,12 +361,12 @@ process_cmdline() {
|
||||
for opt do
|
||||
optval="${opt#*=}"
|
||||
case "$opt" in
|
||||
--disable-codecs) for c in ${CODECS}; do disable $c; done ;;
|
||||
--disable-codecs) for c in ${CODECS}; do disable_feature $c; done ;;
|
||||
--enable-?*|--disable-?*)
|
||||
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
|
||||
if echo "${EXPERIMENT_LIST}" | grep "^ *$option\$" >/dev/null; then
|
||||
if enabled experimental; then
|
||||
$action $option
|
||||
${action}_feature $option
|
||||
else
|
||||
log_echo "Ignoring $opt -- not in experimental mode."
|
||||
fi
|
||||
@@ -384,8 +387,8 @@ post_process_cmdline() {
|
||||
# If the codec family is enabled, enable all components of that family.
|
||||
log_echo "Configuring selected codecs"
|
||||
for c in ${CODECS}; do
|
||||
disabled ${c%%_*} && disable ${c}
|
||||
enabled ${c%%_*} && enable ${c}
|
||||
disabled ${c%%_*} && disable_feature ${c}
|
||||
enabled ${c%%_*} && enable_feature ${c}
|
||||
done
|
||||
|
||||
# Enable all detected codecs, if they haven't been disabled
|
||||
@@ -393,12 +396,12 @@ post_process_cmdline() {
|
||||
|
||||
# Enable the codec family if any component of that family is enabled
|
||||
for c in ${CODECS}; do
|
||||
enabled $c && enable ${c%_*}
|
||||
enabled $c && enable_feature ${c%_*}
|
||||
done
|
||||
|
||||
# Set the {en,de}coders variable if any algorithm in that class is enabled
|
||||
for c in ${CODECS}; do
|
||||
enabled ${c} && enable ${c##*_}s
|
||||
enabled ${c} && enable_feature ${c##*_}s
|
||||
done
|
||||
}
|
||||
|
||||
@@ -438,7 +441,7 @@ process_targets() {
|
||||
done
|
||||
enabled debug_libs && DIST_DIR="${DIST_DIR}-debug"
|
||||
enabled codec_srcs && DIST_DIR="${DIST_DIR}-src"
|
||||
! enabled postproc && DIST_DIR="${DIST_DIR}-nopost"
|
||||
! enabled postproc && ! enabled vp9_postproc && DIST_DIR="${DIST_DIR}-nopost"
|
||||
! enabled multithread && DIST_DIR="${DIST_DIR}-nomt"
|
||||
! enabled install_docs && DIST_DIR="${DIST_DIR}-nodocs"
|
||||
DIST_DIR="${DIST_DIR}-${tgt_isa}-${tgt_os}"
|
||||
@@ -508,13 +511,13 @@ process_detect() {
|
||||
fi
|
||||
if [ -z "$CC" ] || enabled external_build; then
|
||||
echo "Bypassing toolchain for environment detection."
|
||||
enable external_build
|
||||
enable_feature external_build
|
||||
check_header() {
|
||||
log fake_check_header "$@"
|
||||
header=$1
|
||||
shift
|
||||
var=`echo $header | sed 's/[^A-Za-z0-9_]/_/g'`
|
||||
disable $var
|
||||
disable_feature $var
|
||||
# Headers common to all environments
|
||||
case $header in
|
||||
stdio.h)
|
||||
@@ -526,7 +529,7 @@ process_detect() {
|
||||
[ -f "${d##-I}/$header" ] && result=true && break
|
||||
done
|
||||
${result:-true}
|
||||
esac && enable $var
|
||||
esac && enable_feature $var
|
||||
|
||||
# Specialize windows and POSIX environments.
|
||||
case $toolchain in
|
||||
@@ -534,7 +537,7 @@ process_detect() {
|
||||
case $header-$toolchain in
|
||||
stdint*-gcc) true;;
|
||||
*) false;;
|
||||
esac && enable $var
|
||||
esac && enable_feature $var
|
||||
;;
|
||||
*)
|
||||
case $header in
|
||||
@@ -543,7 +546,7 @@ process_detect() {
|
||||
sys/mman.h) true;;
|
||||
unistd.h) true;;
|
||||
*) false;;
|
||||
esac && enable $var
|
||||
esac && enable_feature $var
|
||||
esac
|
||||
enabled $var
|
||||
}
|
||||
@@ -561,7 +564,7 @@ EOF
|
||||
check_header sys/mman.h
|
||||
check_header unistd.h # for sysconf(3) and friends.
|
||||
|
||||
check_header vpx/vpx_integer.h -I${source_path} && enable vpx_ports
|
||||
check_header vpx/vpx_integer.h -I${source_path} && enable_feature vpx_ports
|
||||
}
|
||||
|
||||
process_toolchain() {
|
||||
@@ -643,14 +646,18 @@ process_toolchain() {
|
||||
# ccache only really works on gcc toolchains
|
||||
enabled gcc || soft_disable ccache
|
||||
if enabled mips; then
|
||||
enable dequant_tokens
|
||||
enable dc_recon
|
||||
enable_feature dequant_tokens
|
||||
enable_feature dc_recon
|
||||
fi
|
||||
|
||||
if enabled internal_stats; then
|
||||
enable_feature vp9_postproc
|
||||
fi
|
||||
|
||||
# Enable the postbuild target if building for visual studio.
|
||||
case "$tgt_cc" in
|
||||
vs*) enable msvs
|
||||
enable solution
|
||||
vs*) enable_feature msvs
|
||||
enable_feature solution
|
||||
vs_version=${tgt_cc##vs}
|
||||
case $vs_version in
|
||||
[789])
|
||||
|
||||
@@ -49,6 +49,9 @@ vpxenc.DESCRIPTION = Full featured encoder
|
||||
UTILS-$(CONFIG_VP8_ENCODER) += vp8_scalable_patterns.c
|
||||
vp8_scalable_patterns.GUID = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C
|
||||
vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder
|
||||
UTILS-$(CONFIG_VP9_ENCODER) += vp9_spatial_scalable_encoder.c
|
||||
vp9_spatial_scalable_encoder.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D
|
||||
vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder
|
||||
|
||||
# Clean up old ivfenc, ivfdec binaries.
|
||||
ifeq ($(CONFIG_MSVS),yes)
|
||||
|
||||
@@ -105,7 +105,7 @@ void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned l
|
||||
void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) {
|
||||
int size;
|
||||
for (size = 4; size > 1; size--) {
|
||||
if (bin & 0x000000ff << ((size - 1) * 8))
|
||||
if (bin & (unsigned int)0x000000ff << ((size - 1) * 8))
|
||||
break;
|
||||
}
|
||||
Ebml_WriteID(glob, class_id);
|
||||
|
||||
2
libs.mk
2
libs.mk
@@ -395,7 +395,7 @@ libvpx_test_srcs.txt:
|
||||
@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | sort -u > $@
|
||||
CLEAN-OBJS += libvpx_test_srcs.txt
|
||||
|
||||
$(LIBVPX_TEST_DATA):
|
||||
$(LIBVPX_TEST_DATA): $(SRC_PATH_BARE)/test/test-data.sha1
|
||||
@echo " [DOWNLOAD] $@"
|
||||
$(qexec)trap 'rm -f $@' INT TERM &&\
|
||||
curl -L -o $@ $(call libvpx_test_data_url,$(@F))
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef LIBVPX_TEST_ACM_RANDOM_H_
|
||||
#define LIBVPX_TEST_ACM_RANDOM_H_
|
||||
#ifndef TEST_ACM_RANDOM_H_
|
||||
#define TEST_ACM_RANDOM_H_
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
@@ -59,4 +59,4 @@ class ACMRandom {
|
||||
|
||||
} // namespace libvpx_test
|
||||
|
||||
#endif // LIBVPX_TEST_ACM_RANDOM_H_
|
||||
#endif // TEST_ACM_RANDOM_H_
|
||||
|
||||
@@ -29,8 +29,8 @@ class BordersTest : public ::libvpx_test::EncoderTest,
|
||||
|
||||
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
|
||||
::libvpx_test::Encoder *encoder) {
|
||||
if ( video->frame() == 1) {
|
||||
encoder->Control(VP8E_SET_CPUUSED, 0);
|
||||
if (video->frame() == 1) {
|
||||
encoder->Control(VP8E_SET_CPUUSED, 1);
|
||||
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
|
||||
encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
|
||||
encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
#ifndef TEST_CLEAR_SYSTEM_STATE_H_
|
||||
#define TEST_CLEAR_SYSTEM_STATE_H_
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
extern "C" {
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
# include "vpx_ports/x86.h"
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "test/acm_random.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
@@ -187,7 +188,7 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
|
||||
|
||||
protected:
|
||||
static const int kDataAlignment = 16;
|
||||
static const int kOuterBlockSize = 128;
|
||||
static const int kOuterBlockSize = 256;
|
||||
static const int kInputStride = kOuterBlockSize;
|
||||
static const int kOutputStride = kOuterBlockSize;
|
||||
static const int kMaxDimension = 64;
|
||||
@@ -224,6 +225,10 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
|
||||
input_[i] = prng.Rand8Extremes();
|
||||
}
|
||||
|
||||
void SetConstantInput(int value) {
|
||||
memset(input_, value, kInputBufferSize);
|
||||
}
|
||||
|
||||
void CheckGuardBlocks() {
|
||||
for (int i = 0; i < kOutputBufferSize; ++i) {
|
||||
if (IsIndexInBorder(i))
|
||||
@@ -456,45 +461,86 @@ DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = {
|
||||
{ 128}
|
||||
};
|
||||
|
||||
/* This test exercises the horizontal and vertical filter functions. */
|
||||
TEST_P(ConvolveTest, ChangeFilterWorks) {
|
||||
uint8_t* const in = input();
|
||||
uint8_t* const out = output();
|
||||
|
||||
/* Assume that the first input sample is at the 8/16th position. */
|
||||
const int kInitialSubPelOffset = 8;
|
||||
|
||||
/* Filters are 8-tap, so the first filter tap will be applied to the pixel
|
||||
* at position -3 with respect to the current filtering position. Since
|
||||
* kInitialSubPelOffset is set to 8, we first select sub-pixel filter 8,
|
||||
* which is non-zero only in the last tap. So, applying the filter at the
|
||||
* current input position will result in an output equal to the pixel at
|
||||
* offset +4 (-3 + 7) with respect to the current filtering position.
|
||||
*/
|
||||
const int kPixelSelected = 4;
|
||||
|
||||
/* Assume that each output pixel requires us to step on by 17/16th pixels in
|
||||
* the input.
|
||||
*/
|
||||
const int kInputPixelStep = 17;
|
||||
|
||||
/* The filters are setup in such a way that the expected output produces
|
||||
* sets of 8 identical output samples. As the filter position moves to the
|
||||
* next 1/16th pixel position the only active (=128) filter tap moves one
|
||||
* position to the left, resulting in the same input pixel being replicated
|
||||
* in to the output for 8 consecutive samples. After each set of 8 positions
|
||||
* the filters select a different input pixel. kFilterPeriodAdjust below
|
||||
* computes which input pixel is written to the output for a specified
|
||||
* x or y position.
|
||||
*/
|
||||
|
||||
/* Test the horizontal filter. */
|
||||
REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride,
|
||||
kChangeFilters[8], 17, kChangeFilters[4], 16,
|
||||
Width(), Height()));
|
||||
kChangeFilters[kInitialSubPelOffset],
|
||||
kInputPixelStep, NULL, 0, Width(), Height()));
|
||||
|
||||
for (int x = 0; x < Width(); ++x) {
|
||||
const int kQ4StepAdjust = x >> 4;
|
||||
const int kFilterPeriodAdjust = (x >> 3) << 3;
|
||||
const int ref_x = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;
|
||||
ASSERT_EQ(in[ref_x], out[x]) << "x == " << x;
|
||||
const int ref_x =
|
||||
kPixelSelected + ((kInitialSubPelOffset
|
||||
+ kFilterPeriodAdjust * kInputPixelStep)
|
||||
>> SUBPEL_BITS);
|
||||
ASSERT_EQ(in[ref_x], out[x]) << "x == " << x << "width = " << Width();
|
||||
}
|
||||
|
||||
/* Test the vertical filter. */
|
||||
REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride,
|
||||
kChangeFilters[4], 16, kChangeFilters[8], 17,
|
||||
Width(), Height()));
|
||||
NULL, 0, kChangeFilters[kInitialSubPelOffset],
|
||||
kInputPixelStep, Width(), Height()));
|
||||
|
||||
for (int y = 0; y < Height(); ++y) {
|
||||
const int kQ4StepAdjust = y >> 4;
|
||||
const int kFilterPeriodAdjust = (y >> 3) << 3;
|
||||
const int ref_y = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;
|
||||
const int ref_y =
|
||||
kPixelSelected + ((kInitialSubPelOffset
|
||||
+ kFilterPeriodAdjust * kInputPixelStep)
|
||||
>> SUBPEL_BITS);
|
||||
ASSERT_EQ(in[ref_y * kInputStride], out[y * kInputStride]) << "y == " << y;
|
||||
}
|
||||
|
||||
/* Test the horizontal and vertical filters in combination. */
|
||||
REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
|
||||
kChangeFilters[8], 17, kChangeFilters[8], 17,
|
||||
kChangeFilters[kInitialSubPelOffset],
|
||||
kInputPixelStep,
|
||||
kChangeFilters[kInitialSubPelOffset],
|
||||
kInputPixelStep,
|
||||
Width(), Height()));
|
||||
|
||||
for (int y = 0; y < Height(); ++y) {
|
||||
const int kQ4StepAdjustY = y >> 4;
|
||||
const int kFilterPeriodAdjustY = (y >> 3) << 3;
|
||||
const int ref_y = kQ4StepAdjustY + kFilterPeriodAdjustY + kPixelSelected;
|
||||
const int ref_y =
|
||||
kPixelSelected + ((kInitialSubPelOffset
|
||||
+ kFilterPeriodAdjustY * kInputPixelStep)
|
||||
>> SUBPEL_BITS);
|
||||
for (int x = 0; x < Width(); ++x) {
|
||||
const int kQ4StepAdjustX = x >> 4;
|
||||
const int kFilterPeriodAdjustX = (x >> 3) << 3;
|
||||
const int ref_x = kQ4StepAdjustX + kFilterPeriodAdjustX + kPixelSelected;
|
||||
const int ref_x =
|
||||
kPixelSelected + ((kInitialSubPelOffset
|
||||
+ kFilterPeriodAdjustX * kInputPixelStep)
|
||||
>> SUBPEL_BITS);
|
||||
|
||||
ASSERT_EQ(in[ref_y * kInputStride + ref_x], out[y * kOutputStride + x])
|
||||
<< "x == " << x << ", y == " << y;
|
||||
@@ -502,6 +548,34 @@ TEST_P(ConvolveTest, ChangeFilterWorks) {
|
||||
}
|
||||
}
|
||||
|
||||
/* This test exercises that enough rows and columns are filtered with every
|
||||
possible initial fractional positions and scaling steps. */
|
||||
TEST_P(ConvolveTest, CheckScalingFiltering) {
|
||||
uint8_t* const in = input();
|
||||
uint8_t* const out = output();
|
||||
|
||||
SetConstantInput(127);
|
||||
|
||||
for (int frac = 0; frac < 16; ++frac) {
|
||||
for (int step = 1; step <= 32; ++step) {
|
||||
/* Test the horizontal and vertical filters in combination. */
|
||||
REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
|
||||
vp9_sub_pel_filters_8[frac], step,
|
||||
vp9_sub_pel_filters_8[frac], step,
|
||||
Width(), Height()));
|
||||
|
||||
CheckGuardBlocks();
|
||||
|
||||
for (int y = 0; y < Height(); ++y) {
|
||||
for (int x = 0; x < Width(); ++x) {
|
||||
ASSERT_EQ(in[y * kInputStride + x], out[y * kOutputStride + x])
|
||||
<< "x == " << x << ", y == " << y
|
||||
<< ", frac == " << frac << ", step == " << step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
|
||||
@@ -108,5 +108,5 @@ using std::tr1::make_tuple;
|
||||
VP9_INSTANTIATE_TEST_CASE(
|
||||
CpuSpeedTest,
|
||||
::testing::Values(::libvpx_test::kTwoPassGood),
|
||||
::testing::Range(0, 3));
|
||||
::testing::Range(0, 5));
|
||||
} // namespace
|
||||
|
||||
@@ -75,7 +75,7 @@ class DatarateTest : public ::libvpx_test::EncoderTest,
|
||||
bits_in_buffer_model_ -= frame_size_in_bits;
|
||||
|
||||
// Update the running total of bits for end of test datarate checks.
|
||||
bits_total_ += frame_size_in_bits ;
|
||||
bits_total_ += frame_size_in_bits;
|
||||
|
||||
// If first drop not set and we have a drop set it to this time.
|
||||
if (!first_drop_ && duration > 1)
|
||||
|
||||
@@ -13,15 +13,16 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
|
||||
extern "C" {
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "vp9_rtcd.h"
|
||||
void vp9_short_idct16x16_add_c(short *input, uint8_t *output, int pitch);
|
||||
#include "./vp9_rtcd.h"
|
||||
void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *output, int pitch);
|
||||
}
|
||||
|
||||
#include "acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
@@ -31,12 +32,13 @@ namespace {
|
||||
#ifdef _MSC_VER
|
||||
static int round(double x) {
|
||||
if (x < 0)
|
||||
return (int)ceil(x - 0.5);
|
||||
return static_cast<int>(ceil(x - 0.5));
|
||||
else
|
||||
return (int)floor(x + 0.5);
|
||||
return static_cast<int>(floor(x + 0.5));
|
||||
}
|
||||
#endif
|
||||
|
||||
const int kNumCoeffs = 256;
|
||||
const double PI = 3.1415926535898;
|
||||
void reference2_16x16_idct_2d(double *input, double *output) {
|
||||
double x;
|
||||
@@ -45,7 +47,9 @@ void reference2_16x16_idct_2d(double *input, double *output) {
|
||||
double s = 0;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
for (int j = 0; j < 16; ++j) {
|
||||
x=cos(PI*j*(l+0.5)/16.0)*cos(PI*i*(k+0.5)/16.0)*input[i*16+j]/256;
|
||||
x = cos(PI * j * (l + 0.5) / 16.0) *
|
||||
cos(PI * i * (k + 0.5) / 16.0) *
|
||||
input[i * 16 + j] / 256;
|
||||
if (i != 0)
|
||||
x *= sqrt(2.0);
|
||||
if (j != 0)
|
||||
@@ -59,23 +63,23 @@ void reference2_16x16_idct_2d(double *input, double *output) {
|
||||
}
|
||||
|
||||
|
||||
static const double C1 = 0.995184726672197;
|
||||
static const double C2 = 0.98078528040323;
|
||||
static const double C3 = 0.956940335732209;
|
||||
static const double C4 = 0.923879532511287;
|
||||
static const double C5 = 0.881921264348355;
|
||||
static const double C6 = 0.831469612302545;
|
||||
static const double C7 = 0.773010453362737;
|
||||
static const double C8 = 0.707106781186548;
|
||||
static const double C9 = 0.634393284163646;
|
||||
static const double C10 = 0.555570233019602;
|
||||
static const double C11 = 0.471396736825998;
|
||||
static const double C12 = 0.38268343236509;
|
||||
static const double C13 = 0.290284677254462;
|
||||
static const double C14 = 0.195090322016128;
|
||||
static const double C15 = 0.098017140329561;
|
||||
const double C1 = 0.995184726672197;
|
||||
const double C2 = 0.98078528040323;
|
||||
const double C3 = 0.956940335732209;
|
||||
const double C4 = 0.923879532511287;
|
||||
const double C5 = 0.881921264348355;
|
||||
const double C6 = 0.831469612302545;
|
||||
const double C7 = 0.773010453362737;
|
||||
const double C8 = 0.707106781186548;
|
||||
const double C9 = 0.634393284163646;
|
||||
const double C10 = 0.555570233019602;
|
||||
const double C11 = 0.471396736825998;
|
||||
const double C12 = 0.38268343236509;
|
||||
const double C13 = 0.290284677254462;
|
||||
const double C14 = 0.195090322016128;
|
||||
const double C15 = 0.098017140329561;
|
||||
|
||||
static void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
double step[16];
|
||||
double intermediate[16];
|
||||
double temp1, temp2;
|
||||
@@ -108,36 +112,36 @@ static void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
output[6] = step[1] - step[6];
|
||||
output[7] = step[0] - step[7];
|
||||
|
||||
temp1 = step[ 8]*C7;
|
||||
temp2 = step[15]*C9;
|
||||
temp1 = step[ 8] * C7;
|
||||
temp2 = step[15] * C9;
|
||||
output[ 8] = temp1 + temp2;
|
||||
|
||||
temp1 = step[ 9]*C11;
|
||||
temp2 = step[14]*C5;
|
||||
temp1 = step[ 9] * C11;
|
||||
temp2 = step[14] * C5;
|
||||
output[ 9] = temp1 - temp2;
|
||||
|
||||
temp1 = step[10]*C3;
|
||||
temp2 = step[13]*C13;
|
||||
temp1 = step[10] * C3;
|
||||
temp2 = step[13] * C13;
|
||||
output[10] = temp1 + temp2;
|
||||
|
||||
temp1 = step[11]*C15;
|
||||
temp2 = step[12]*C1;
|
||||
temp1 = step[11] * C15;
|
||||
temp2 = step[12] * C1;
|
||||
output[11] = temp1 - temp2;
|
||||
|
||||
temp1 = step[11]*C1;
|
||||
temp2 = step[12]*C15;
|
||||
temp1 = step[11] * C1;
|
||||
temp2 = step[12] * C15;
|
||||
output[12] = temp2 + temp1;
|
||||
|
||||
temp1 = step[10]*C13;
|
||||
temp2 = step[13]*C3;
|
||||
temp1 = step[10] * C13;
|
||||
temp2 = step[13] * C3;
|
||||
output[13] = temp2 - temp1;
|
||||
|
||||
temp1 = step[ 9]*C5;
|
||||
temp2 = step[14]*C11;
|
||||
temp1 = step[ 9] * C5;
|
||||
temp2 = step[14] * C11;
|
||||
output[14] = temp2 + temp1;
|
||||
|
||||
temp1 = step[ 8]*C9;
|
||||
temp2 = step[15]*C7;
|
||||
temp1 = step[ 8] * C9;
|
||||
temp2 = step[15] * C7;
|
||||
output[15] = temp2 - temp1;
|
||||
|
||||
// step 3
|
||||
@@ -146,20 +150,20 @@ static void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
step[ 2] = output[1] - output[2];
|
||||
step[ 3] = output[0] - output[3];
|
||||
|
||||
temp1 = output[4]*C14;
|
||||
temp2 = output[7]*C2;
|
||||
temp1 = output[4] * C14;
|
||||
temp2 = output[7] * C2;
|
||||
step[ 4] = temp1 + temp2;
|
||||
|
||||
temp1 = output[5]*C10;
|
||||
temp2 = output[6]*C6;
|
||||
temp1 = output[5] * C10;
|
||||
temp2 = output[6] * C6;
|
||||
step[ 5] = temp1 + temp2;
|
||||
|
||||
temp1 = output[5]*C6;
|
||||
temp2 = output[6]*C10;
|
||||
temp1 = output[5] * C6;
|
||||
temp2 = output[6] * C10;
|
||||
step[ 6] = temp2 - temp1;
|
||||
|
||||
temp1 = output[4]*C2;
|
||||
temp2 = output[7]*C14;
|
||||
temp1 = output[4] * C2;
|
||||
temp2 = output[7] * C14;
|
||||
step[ 7] = temp2 - temp1;
|
||||
|
||||
step[ 8] = output[ 8] + output[11];
|
||||
@@ -176,18 +180,18 @@ static void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
output[ 0] = (step[ 0] + step[ 1]);
|
||||
output[ 8] = (step[ 0] - step[ 1]);
|
||||
|
||||
temp1 = step[2]*C12;
|
||||
temp2 = step[3]*C4;
|
||||
temp1 = step[2] * C12;
|
||||
temp2 = step[3] * C4;
|
||||
temp1 = temp1 + temp2;
|
||||
output[ 4] = 2*(temp1*C8);
|
||||
output[ 4] = 2*(temp1 * C8);
|
||||
|
||||
temp1 = step[2]*C4;
|
||||
temp2 = step[3]*C12;
|
||||
temp1 = step[2] * C4;
|
||||
temp2 = step[3] * C12;
|
||||
temp1 = temp2 - temp1;
|
||||
output[12] = 2*(temp1*C8);
|
||||
output[12] = 2 * (temp1 * C8);
|
||||
|
||||
output[ 2] = 2*((step[4] + step[ 5])*C8);
|
||||
output[14] = 2*((step[7] - step[ 6])*C8);
|
||||
output[ 2] = 2 * ((step[4] + step[ 5]) * C8);
|
||||
output[14] = 2 * ((step[7] - step[ 6]) * C8);
|
||||
|
||||
temp1 = step[4] - step[5];
|
||||
temp2 = step[6] + step[7];
|
||||
@@ -197,17 +201,17 @@ static void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
intermediate[8] = step[8] + step[14];
|
||||
intermediate[9] = step[9] + step[15];
|
||||
|
||||
temp1 = intermediate[8]*C12;
|
||||
temp2 = intermediate[9]*C4;
|
||||
temp1 = intermediate[8] * C12;
|
||||
temp2 = intermediate[9] * C4;
|
||||
temp1 = temp1 - temp2;
|
||||
output[3] = 2*(temp1*C8);
|
||||
output[3] = 2 * (temp1 * C8);
|
||||
|
||||
temp1 = intermediate[8]*C4;
|
||||
temp2 = intermediate[9]*C12;
|
||||
temp1 = intermediate[8] * C4;
|
||||
temp2 = intermediate[9] * C12;
|
||||
temp1 = temp2 + temp1;
|
||||
output[13] = 2*(temp1*C8);
|
||||
output[13] = 2 * (temp1 * C8);
|
||||
|
||||
output[ 9] = 2*((step[10] + step[11])*C8);
|
||||
output[ 9] = 2 * ((step[10] + step[11]) * C8);
|
||||
|
||||
intermediate[11] = step[10] - step[11];
|
||||
intermediate[12] = step[12] + step[13];
|
||||
@@ -218,207 +222,301 @@ static void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
output[15] = (intermediate[11] + intermediate[12]);
|
||||
output[ 1] = -(intermediate[11] - intermediate[12]);
|
||||
|
||||
output[ 7] = 2*(intermediate[13]*C8);
|
||||
output[ 7] = 2 * (intermediate[13] * C8);
|
||||
|
||||
temp1 = intermediate[14]*C12;
|
||||
temp2 = intermediate[15]*C4;
|
||||
temp1 = intermediate[14] * C12;
|
||||
temp2 = intermediate[15] * C4;
|
||||
temp1 = temp1 - temp2;
|
||||
output[11] = -2*(temp1*C8);
|
||||
output[11] = -2 * (temp1 * C8);
|
||||
|
||||
temp1 = intermediate[14]*C4;
|
||||
temp2 = intermediate[15]*C12;
|
||||
temp1 = intermediate[14] * C4;
|
||||
temp2 = intermediate[15] * C12;
|
||||
temp1 = temp2 + temp1;
|
||||
output[ 5] = 2*(temp1*C8);
|
||||
output[ 5] = 2 * (temp1 * C8);
|
||||
}
|
||||
|
||||
static void reference_16x16_dct_1d(double in[16], double out[16]) {
|
||||
const double kPi = 3.141592653589793238462643383279502884;
|
||||
const double kInvSqrt2 = 0.707106781186547524400844362104;
|
||||
for (int k = 0; k < 16; k++) {
|
||||
out[k] = 0.0;
|
||||
for (int n = 0; n < 16; n++)
|
||||
out[k] += in[n]*cos(kPi*(2*n+1)*k/32.0);
|
||||
if (k == 0)
|
||||
out[k] = out[k]*kInvSqrt2;
|
||||
}
|
||||
}
|
||||
|
||||
void reference_16x16_dct_2d(int16_t input[16*16], double output[16*16]) {
|
||||
void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
|
||||
// First transform columns
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
double temp_in[16], temp_out[16];
|
||||
for (int j = 0; j < 16; ++j)
|
||||
temp_in[j] = input[j*16 + i];
|
||||
temp_in[j] = input[j * 16 + i];
|
||||
butterfly_16x16_dct_1d(temp_in, temp_out);
|
||||
for (int j = 0; j < 16; ++j)
|
||||
output[j*16 + i] = temp_out[j];
|
||||
output[j * 16 + i] = temp_out[j];
|
||||
}
|
||||
// Then transform rows
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
double temp_in[16], temp_out[16];
|
||||
for (int j = 0; j < 16; ++j)
|
||||
temp_in[j] = output[j + i*16];
|
||||
temp_in[j] = output[j + i * 16];
|
||||
butterfly_16x16_dct_1d(temp_in, temp_out);
|
||||
// Scale by some magic number
|
||||
for (int j = 0; j < 16; ++j)
|
||||
output[j + i*16] = temp_out[j]/2;
|
||||
output[j + i * 16] = temp_out[j]/2;
|
||||
}
|
||||
}
|
||||
|
||||
void fdct16x16(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
||||
int stride, int /*tx_type*/) {
|
||||
typedef void (*fdct_t)(int16_t *in, int16_t *out, int stride);
|
||||
typedef void (*idct_t)(int16_t *in, uint8_t *dst, int stride);
|
||||
typedef void (*fht_t) (int16_t *in, int16_t *out, int stride, int tx_type);
|
||||
typedef void (*iht_t) (int16_t *in, uint8_t *dst, int stride, int tx_type);
|
||||
|
||||
void fdct16x16_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
|
||||
vp9_short_fdct16x16_c(in, out, stride);
|
||||
}
|
||||
void idct16x16_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
||||
int stride, int /*tx_type*/) {
|
||||
vp9_short_idct16x16_add_c(out, dst, stride >> 1);
|
||||
}
|
||||
void fht16x16(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
||||
int stride, int tx_type) {
|
||||
// FIXME(jingning): need to test both SSE2 and c
|
||||
#if HAVE_SSE2
|
||||
vp9_short_fht16x16_sse2(in, out, stride >> 1, tx_type);
|
||||
#else
|
||||
vp9_short_fht16x16_c(in, out, stride >> 1, tx_type);
|
||||
#endif
|
||||
}
|
||||
void iht16x16_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
vp9_short_iht16x16_add_c(out, dst, stride >> 1, tx_type);
|
||||
|
||||
void fht16x16_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
|
||||
vp9_short_fht16x16_c(in, out, stride, tx_type);
|
||||
}
|
||||
|
||||
class FwdTrans16x16Test : public ::testing::TestWithParam<int> {
|
||||
class Trans16x16TestBase {
|
||||
public:
|
||||
virtual ~FwdTrans16x16Test() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
tx_type_ = GetParam();
|
||||
if (tx_type_ == 0) {
|
||||
fwd_txfm = fdct16x16;
|
||||
inv_txfm = idct16x16_add;
|
||||
} else {
|
||||
fwd_txfm = fht16x16;
|
||||
inv_txfm = iht16x16_add;
|
||||
}
|
||||
}
|
||||
virtual ~Trans16x16TestBase() {}
|
||||
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
(*fwd_txfm)(in, out, dst, stride, tx_type);
|
||||
}
|
||||
void RunInvTxfm(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
(*inv_txfm)(in, out, dst, stride, tx_type);
|
||||
virtual void RunFwdTxfm(int16_t *in, int16_t *out, int stride) = 0;
|
||||
|
||||
virtual void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) = 0;
|
||||
|
||||
void RunAccuracyCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
uint32_t max_error = 0;
|
||||
int64_t total_error = 0;
|
||||
const int count_test_block = 10000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
|
||||
test_temp_block, pitch_));
|
||||
REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
|
||||
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
const uint32_t diff = dst[j] - src[j];
|
||||
const uint32_t error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_GE(1u, max_error)
|
||||
<< "Error: 16x16 FHT/IHT has an individual round trip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block , total_error)
|
||||
<< "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
|
||||
}
|
||||
|
||||
void RunCoeffCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
|
||||
fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
|
||||
REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
|
||||
|
||||
// The minimum quant value is 4.
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
EXPECT_EQ(output_block[j], output_ref_block[j]);
|
||||
}
|
||||
}
|
||||
|
||||
void RunMemCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
|
||||
}
|
||||
if (i == 0)
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_extreme_block[j] = 255;
|
||||
if (i == 1)
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_extreme_block[j] = -255;
|
||||
|
||||
fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
|
||||
REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
|
||||
output_block, pitch_));
|
||||
|
||||
// The minimum quant value is 4.
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
EXPECT_EQ(output_block[j], output_ref_block[j]);
|
||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
|
||||
<< "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RunInvAccuracyCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
double out_r[kNumCoeffs];
|
||||
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
in[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
reference_16x16_dct_2d(in, out_r);
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
coeff[j] = round(out_r[j]);
|
||||
|
||||
const int pitch = 32;
|
||||
REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch));
|
||||
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
const uint32_t diff = dst[j] - src[j];
|
||||
const uint32_t error = diff * diff;
|
||||
EXPECT_GE(1u, error)
|
||||
<< "Error: 16x16 IDCT has error " << error
|
||||
<< " at index " << j;
|
||||
}
|
||||
}
|
||||
}
|
||||
int pitch_;
|
||||
int tx_type_;
|
||||
void (*fwd_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
|
||||
void (*inv_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
|
||||
fht_t fwd_txfm_ref;
|
||||
};
|
||||
|
||||
TEST_P(FwdTrans16x16Test, AccuracyCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int max_error = 0;
|
||||
double total_error = 0;
|
||||
const int count_test_block = 10000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 256);
|
||||
class Trans16x16DCT : public Trans16x16TestBase,
|
||||
public PARAMS(fdct_t, idct_t, int) {
|
||||
public:
|
||||
virtual ~Trans16x16DCT() {}
|
||||
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
}
|
||||
virtual void SetUp() {
|
||||
fwd_txfm_ = GET_PARAM(0);
|
||||
inv_txfm_ = GET_PARAM(1);
|
||||
tx_type_ = GET_PARAM(2);
|
||||
pitch_ = 32;
|
||||
fwd_txfm_ref = fdct16x16_ref;
|
||||
}
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
const int pitch = 32;
|
||||
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||
RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
|
||||
fwd_txfm_(in, out, stride);
|
||||
}
|
||||
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
|
||||
inv_txfm_(out, dst, stride >> 1);
|
||||
}
|
||||
|
||||
EXPECT_GE(1, max_error)
|
||||
<< "Error: 16x16 FHT/IHT has an individual round trip error > 1";
|
||||
fdct_t fwd_txfm_;
|
||||
idct_t inv_txfm_;
|
||||
};
|
||||
|
||||
EXPECT_GE(count_test_block , total_error)
|
||||
<< "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
|
||||
TEST_P(Trans16x16DCT, AccuracyCheck) {
|
||||
RunAccuracyCheck();
|
||||
}
|
||||
|
||||
TEST_P(FwdTrans16x16Test, CoeffSizeCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_extreme_block, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 256);
|
||||
TEST_P(Trans16x16DCT, CoeffCheck) {
|
||||
RunCoeffCheck();
|
||||
}
|
||||
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
|
||||
}
|
||||
if (i == 0)
|
||||
for (int j = 0; j < 256; ++j)
|
||||
input_extreme_block[j] = 255;
|
||||
TEST_P(Trans16x16DCT, MemCheck) {
|
||||
RunMemCheck();
|
||||
}
|
||||
|
||||
const int pitch = 32;
|
||||
RunFwdTxfm(input_block, output_block, dst, pitch, tx_type_);
|
||||
RunFwdTxfm(input_extreme_block, output_extreme_block, dst, pitch, tx_type_);
|
||||
TEST_P(Trans16x16DCT, InvAccuracyCheck) {
|
||||
RunInvAccuracyCheck();
|
||||
}
|
||||
|
||||
// The minimum quant value is 4.
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
|
||||
<< "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
|
||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_extreme_block[j]))
|
||||
<< "Error: 16x16 FDCT extreme has coefficient larger "
|
||||
<< "than 4*DCT_MAX_VALUE";
|
||||
}
|
||||
class Trans16x16HT : public Trans16x16TestBase,
|
||||
public PARAMS(fht_t, iht_t, int) {
|
||||
public:
|
||||
virtual ~Trans16x16HT() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
fwd_txfm_ = GET_PARAM(0);
|
||||
inv_txfm_ = GET_PARAM(1);
|
||||
tx_type_ = GET_PARAM(2);
|
||||
pitch_ = 16;
|
||||
fwd_txfm_ref = fht16x16_ref;
|
||||
}
|
||||
}
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(VP9, FwdTrans16x16Test, ::testing::Range(0, 4));
|
||||
|
||||
TEST(VP9Idct16x16Test, AccuracyCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int16_t in[256], coeff[256];
|
||||
uint8_t dst[256], src[256];
|
||||
double out_r[256];
|
||||
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
}
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 256; ++j)
|
||||
in[j] = src[j] - dst[j];
|
||||
|
||||
reference_16x16_dct_2d(in, out_r);
|
||||
for (int j = 0; j < 256; j++)
|
||||
coeff[j] = round(out_r[j]);
|
||||
vp9_short_idct16x16_add_c(coeff, dst, 16);
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
EXPECT_GE(1, error)
|
||||
<< "Error: 16x16 IDCT has error " << error
|
||||
<< " at index " << j;
|
||||
}
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
|
||||
fwd_txfm_(in, out, stride, tx_type_);
|
||||
}
|
||||
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
|
||||
inv_txfm_(out, dst, stride, tx_type_);
|
||||
}
|
||||
|
||||
fht_t fwd_txfm_;
|
||||
iht_t inv_txfm_;
|
||||
};
|
||||
|
||||
TEST_P(Trans16x16HT, AccuracyCheck) {
|
||||
RunAccuracyCheck();
|
||||
}
|
||||
|
||||
TEST_P(Trans16x16HT, CoeffCheck) {
|
||||
RunCoeffCheck();
|
||||
}
|
||||
|
||||
TEST_P(Trans16x16HT, MemCheck) {
|
||||
RunMemCheck();
|
||||
}
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Trans16x16DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct16x16_c, &vp9_short_idct16x16_add_c, 0)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Trans16x16HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 0),
|
||||
make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 1),
|
||||
make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 2),
|
||||
make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 3)));
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans16x16DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct16x16_sse2,
|
||||
&vp9_short_idct16x16_add_sse2, 0)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans16x16HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 0),
|
||||
make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 1),
|
||||
make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 2),
|
||||
make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 3)));
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
@@ -13,15 +13,17 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
|
||||
extern "C" {
|
||||
#include "./vpx_config.h"
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch);
|
||||
void vp9_short_idct32x32_add_c(short *input, uint8_t *output, int pitch);
|
||||
}
|
||||
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
@@ -30,35 +32,15 @@ namespace {
|
||||
#ifdef _MSC_VER
|
||||
static int round(double x) {
|
||||
if (x < 0)
|
||||
return (int)ceil(x - 0.5);
|
||||
return static_cast<int>(ceil(x - 0.5));
|
||||
else
|
||||
return (int)floor(x + 0.5);
|
||||
return static_cast<int>(floor(x + 0.5));
|
||||
}
|
||||
#endif
|
||||
|
||||
static const double kPi = 3.141592653589793238462643383279502884;
|
||||
static void reference2_32x32_idct_2d(double *input, double *output) {
|
||||
double x;
|
||||
for (int l = 0; l < 32; ++l) {
|
||||
for (int k = 0; k < 32; ++k) {
|
||||
double s = 0;
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
for (int j = 0; j < 32; ++j) {
|
||||
x = cos(kPi * j * (l + 0.5) / 32.0) *
|
||||
cos(kPi * i * (k + 0.5) / 32.0) * input[i * 32 + j] / 1024;
|
||||
if (i != 0)
|
||||
x *= sqrt(2.0);
|
||||
if (j != 0)
|
||||
x *= sqrt(2.0);
|
||||
s += x;
|
||||
}
|
||||
}
|
||||
output[k * 32 + l] = s / 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void reference_32x32_dct_1d(double in[32], double out[32], int stride) {
|
||||
const int kNumCoeffs = 1024;
|
||||
const double kPi = 3.141592653589793238462643383279502884;
|
||||
void reference_32x32_dct_1d(const double in[32], double out[32], int stride) {
|
||||
const double kInvSqrt2 = 0.707106781186547524400844362104;
|
||||
for (int k = 0; k < 32; k++) {
|
||||
out[k] = 0.0;
|
||||
@@ -69,7 +51,8 @@ static void reference_32x32_dct_1d(double in[32], double out[32], int stride) {
|
||||
}
|
||||
}
|
||||
|
||||
static void reference_32x32_dct_2d(int16_t input[32*32], double output[32*32]) {
|
||||
void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
|
||||
double output[kNumCoeffs]) {
|
||||
// First transform columns
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
double temp_in[32], temp_out[32];
|
||||
@@ -91,27 +74,165 @@ static void reference_32x32_dct_2d(int16_t input[32*32], double output[32*32]) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(VP9Idct32x32Test, AccuracyCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int16_t in[1024], coeff[1024];
|
||||
uint8_t dst[1024], src[1024];
|
||||
double out_r[1024];
|
||||
typedef void (*fwd_txfm_t)(int16_t *in, int16_t *out, int stride);
|
||||
typedef void (*inv_txfm_t)(int16_t *in, uint8_t *dst, int stride);
|
||||
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
class Trans32x32Test : public PARAMS(fwd_txfm_t, inv_txfm_t, int) {
|
||||
public:
|
||||
virtual ~Trans32x32Test() {}
|
||||
virtual void SetUp() {
|
||||
fwd_txfm_ = GET_PARAM(0);
|
||||
inv_txfm_ = GET_PARAM(1);
|
||||
version_ = GET_PARAM(2); // 0: high precision forward transform
|
||||
// 1: low precision version for rd loop
|
||||
}
|
||||
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
protected:
|
||||
int version_;
|
||||
fwd_txfm_t fwd_txfm_;
|
||||
inv_txfm_t inv_txfm_;
|
||||
};
|
||||
|
||||
TEST_P(Trans32x32Test, AccuracyCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
uint32_t max_error = 0;
|
||||
int64_t total_error = 0;
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
const int pitch = 64;
|
||||
REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, pitch));
|
||||
REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
|
||||
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
const uint32_t diff = dst[j] - src[j];
|
||||
const uint32_t error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
}
|
||||
|
||||
if (version_ == 1) {
|
||||
max_error /= 2;
|
||||
total_error /= 45;
|
||||
}
|
||||
|
||||
EXPECT_GE(1u, max_error)
|
||||
<< "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block, total_error)
|
||||
<< "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
|
||||
}
|
||||
|
||||
TEST_P(Trans32x32Test, CoeffCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
|
||||
const int pitch = 64;
|
||||
vp9_short_fdct32x32_c(input_block, output_ref_block, pitch);
|
||||
REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, pitch));
|
||||
|
||||
if (version_ == 0) {
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
EXPECT_EQ(output_block[j], output_ref_block[j])
|
||||
<< "Error: 32x32 FDCT versions have mismatched coefficients";
|
||||
} else {
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
|
||||
<< "Error: 32x32 FDCT rd has mismatched coefficients";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(Trans32x32Test, MemCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 2000;
|
||||
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 1024; ++j)
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
input_extreme_block[j] = rnd.Rand8() & 1 ? 255 : -255;
|
||||
}
|
||||
if (i == 0)
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_extreme_block[j] = 255;
|
||||
if (i == 1)
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_extreme_block[j] = -255;
|
||||
|
||||
const int pitch = 64;
|
||||
vp9_short_fdct32x32_c(input_extreme_block, output_ref_block, pitch);
|
||||
REGISTER_STATE_CHECK(fwd_txfm_(input_extreme_block, output_block, pitch));
|
||||
|
||||
// The minimum quant value is 4.
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
if (version_ == 0) {
|
||||
EXPECT_EQ(output_block[j], output_ref_block[j])
|
||||
<< "Error: 32x32 FDCT versions have mismatched coefficients";
|
||||
} else {
|
||||
EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
|
||||
<< "Error: 32x32 FDCT rd has mismatched coefficients";
|
||||
}
|
||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_ref_block[j]))
|
||||
<< "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
|
||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
|
||||
<< "Error: 32x32 FDCT has coefficient larger than "
|
||||
<< "4*DCT_MAX_VALUE";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(Trans32x32Test, InverseAccuracy) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
double out_r[kNumCoeffs];
|
||||
|
||||
// Initialize a test block with input range [-255, 255]
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
in[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
reference_32x32_dct_2d(in, out_r);
|
||||
for (int j = 0; j < 1024; j++)
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
coeff[j] = round(out_r[j]);
|
||||
vp9_short_idct32x32_add_c(coeff, dst, 32);
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
EXPECT_GE(1, error)
|
||||
@@ -121,72 +242,21 @@ TEST(VP9Idct32x32Test, AccuracyCheck) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(VP9Fdct32x32Test, AccuracyCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
unsigned int max_error = 0;
|
||||
int64_t total_error = 0;
|
||||
const int count_test_block = 1000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int16_t test_input_block[1024];
|
||||
int16_t test_temp_block[1024];
|
||||
uint8_t dst[1024], src[1024];
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
}
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 1024; ++j)
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Trans32x32Test,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct32x32_c, &vp9_short_idct32x32_add_c, 0),
|
||||
make_tuple(&vp9_short_fdct32x32_rd_c, &vp9_short_idct32x32_add_c, 1)));
|
||||
|
||||
const int pitch = 64;
|
||||
vp9_short_fdct32x32_c(test_input_block, test_temp_block, pitch);
|
||||
vp9_short_idct32x32_add_c(test_temp_block, dst, 32);
|
||||
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
const unsigned diff = dst[j] - src[j];
|
||||
const unsigned error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_GE(1u, max_error)
|
||||
<< "Error: 32x32 FDCT/IDCT has an individual roundtrip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block, total_error)
|
||||
<< "Error: 32x32 FDCT/IDCT has average roundtrip error > 1 per block";
|
||||
}
|
||||
|
||||
TEST(VP9Fdct32x32Test, CoeffSizeCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int16_t input_block[1024], input_extreme_block[1024];
|
||||
int16_t output_block[1024], output_extreme_block[1024];
|
||||
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
|
||||
}
|
||||
if (i == 0)
|
||||
for (int j = 0; j < 1024; ++j)
|
||||
input_extreme_block[j] = 255;
|
||||
|
||||
const int pitch = 64;
|
||||
vp9_short_fdct32x32_c(input_block, output_block, pitch);
|
||||
vp9_short_fdct32x32_c(input_extreme_block, output_extreme_block, pitch);
|
||||
|
||||
// The minimum quant value is 4.
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
EXPECT_GE(4*DCT_MAX_VALUE, abs(output_block[j]))
|
||||
<< "Error: 32x32 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
|
||||
EXPECT_GE(4*DCT_MAX_VALUE, abs(output_extreme_block[j]))
|
||||
<< "Error: 32x32 FDCT extreme has coefficient larger than "
|
||||
"4*DCT_MAX_VALUE";
|
||||
}
|
||||
}
|
||||
}
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans32x32Test,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct32x32_sse2,
|
||||
&vp9_short_idct32x32_add_sse2, 0),
|
||||
make_tuple(&vp9_short_fdct32x32_rd_sse2,
|
||||
&vp9_short_idct32x32_add_sse2, 1)));
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
#define TEST_DECODE_TEST_DRIVER_H_
|
||||
#include <cstring>
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx/vpx_decoder.h"
|
||||
|
||||
namespace libvpx_test {
|
||||
@@ -36,9 +36,8 @@ class DxDataIterator {
|
||||
};
|
||||
|
||||
// Provides a simplified interface to manage one video decoding.
|
||||
//
|
||||
// TODO: similar to Encoder class, the exact services should be
|
||||
// added as more tests are added.
|
||||
// Similar to Encoder class, the exact services should be added
|
||||
// as more tests are added.
|
||||
class Decoder {
|
||||
public:
|
||||
Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "test/codec_factory.h"
|
||||
#include "test/encode_test_driver.h"
|
||||
#include "test/decode_test_driver.h"
|
||||
@@ -114,19 +114,19 @@ static bool compare_img(const vpx_image_t *img1,
|
||||
const unsigned int height_y = img1->d_h;
|
||||
unsigned int i;
|
||||
for (i = 0; i < height_y; ++i)
|
||||
match = ( memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
|
||||
img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
|
||||
width_y) == 0) && match;
|
||||
match = (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
|
||||
img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
|
||||
width_y) == 0) && match;
|
||||
const unsigned int width_uv = (img1->d_w + 1) >> 1;
|
||||
const unsigned int height_uv = (img1->d_h + 1) >> 1;
|
||||
for (i = 0; i < height_uv; ++i)
|
||||
match = ( memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
|
||||
img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
|
||||
width_uv) == 0) && match;
|
||||
match = (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
|
||||
img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
|
||||
width_uv) == 0) && match;
|
||||
for (i = 0; i < height_uv; ++i)
|
||||
match = ( memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
|
||||
img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
|
||||
width_uv) == 0) && match;
|
||||
match = (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
|
||||
img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
|
||||
width_uv) == 0) && match;
|
||||
return match;
|
||||
}
|
||||
|
||||
@@ -158,7 +158,7 @@ void EncoderTest::RunLoop(VideoSource *video) {
|
||||
Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
|
||||
bool again;
|
||||
for (again = true, video->Begin(); again; video->Next()) {
|
||||
again = video->img() != NULL;
|
||||
again = (video->img() != NULL);
|
||||
|
||||
PreEncodeFrameHook(video);
|
||||
PreEncodeFrameHook(video, encoder);
|
||||
|
||||
@@ -62,7 +62,7 @@ class ErrorResilienceTest : public ::libvpx_test::EncoderTest,
|
||||
if (droppable_nframes_ > 0 &&
|
||||
(cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
|
||||
for (unsigned int i = 0; i < droppable_nframes_; ++i) {
|
||||
if (droppable_frames_[i] == nframes_) {
|
||||
if (droppable_frames_[i] == video->frame()) {
|
||||
std::cout << " Encoding droppable frame: "
|
||||
<< droppable_frames_[i] << "\n";
|
||||
frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST |
|
||||
@@ -148,7 +148,7 @@ TEST_P(ErrorResilienceTest, OnVersusOff) {
|
||||
const vpx_rational timebase = { 33333333, 1000000000 };
|
||||
cfg_.g_timebase = timebase;
|
||||
cfg_.rc_target_bitrate = 2000;
|
||||
cfg_.g_lag_in_frames = 25;
|
||||
cfg_.g_lag_in_frames = 10;
|
||||
|
||||
init_flags_ = VPX_CODEC_USE_PSNR;
|
||||
|
||||
@@ -179,6 +179,9 @@ TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) {
|
||||
const vpx_rational timebase = { 33333333, 1000000000 };
|
||||
cfg_.g_timebase = timebase;
|
||||
cfg_.rc_target_bitrate = 500;
|
||||
// FIXME(debargha): Fix this to work for any lag.
|
||||
// Currently this test only works for lag = 0
|
||||
cfg_.g_lag_in_frames = 0;
|
||||
|
||||
init_flags_ = VPX_CODEC_USE_PSNR;
|
||||
|
||||
|
||||
@@ -15,10 +15,10 @@
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
extern "C" {
|
||||
#include "vp9_rtcd.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
}
|
||||
|
||||
#include "acm_random.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
@@ -136,7 +136,7 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
|
||||
int max_error = 0;
|
||||
double total_error = 0;
|
||||
int total_error = 0;
|
||||
const int count_test_block = 1000000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
|
||||
@@ -156,7 +156,7 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
|
||||
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||
|
||||
for (int j = 0; j < 16; ++j) {
|
||||
if(test_temp_block[j] > 0) {
|
||||
if (test_temp_block[j] > 0) {
|
||||
test_temp_block[j] += 2;
|
||||
test_temp_block[j] /= 4;
|
||||
test_temp_block[j] *= 4;
|
||||
|
||||
@@ -13,229 +13,309 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
|
||||
extern "C" {
|
||||
#include "vp9_rtcd.h"
|
||||
void vp9_short_idct8x8_add_c(short *input, uint8_t *output, int pitch);
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *output, int pitch);
|
||||
}
|
||||
|
||||
#include "acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
namespace {
|
||||
void fdct8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
||||
int stride, int /*tx_type*/) {
|
||||
typedef void (*fdct_t)(int16_t *in, int16_t *out, int stride);
|
||||
typedef void (*idct_t)(int16_t *in, uint8_t *dst, int stride);
|
||||
typedef void (*fht_t) (int16_t *in, int16_t *out, int stride, int tx_type);
|
||||
typedef void (*iht_t) (int16_t *in, uint8_t *dst, int stride, int tx_type);
|
||||
|
||||
void fdct8x8_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
|
||||
vp9_short_fdct8x8_c(in, out, stride);
|
||||
}
|
||||
void idct8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
||||
int stride, int /*tx_type*/) {
|
||||
vp9_short_idct8x8_add_c(out, dst, stride >> 1);
|
||||
}
|
||||
void fht8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
||||
int stride, int tx_type) {
|
||||
// TODO(jingning): need to refactor this to test both _c and _sse2 functions,
|
||||
// when we have all inverse dct functions done sse2.
|
||||
#if HAVE_SSE2
|
||||
vp9_short_fht8x8_sse2(in, out, stride >> 1, tx_type);
|
||||
#else
|
||||
vp9_short_fht8x8_c(in, out, stride >> 1, tx_type);
|
||||
#endif
|
||||
}
|
||||
void iht8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
vp9_short_iht8x8_add_c(out, dst, stride >> 1, tx_type);
|
||||
|
||||
void fht8x8_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
|
||||
vp9_short_fht8x8_c(in, out, stride, tx_type);
|
||||
}
|
||||
|
||||
class FwdTrans8x8Test : public ::testing::TestWithParam<int> {
|
||||
class FwdTrans8x8TestBase {
|
||||
public:
|
||||
virtual ~FwdTrans8x8Test() {}
|
||||
virtual void SetUp() {
|
||||
tx_type_ = GetParam();
|
||||
if (tx_type_ == 0) {
|
||||
fwd_txfm = fdct8x8;
|
||||
inv_txfm = idct8x8_add;
|
||||
} else {
|
||||
fwd_txfm = fht8x8;
|
||||
inv_txfm = iht8x8_add;
|
||||
}
|
||||
}
|
||||
virtual ~FwdTrans8x8TestBase() {}
|
||||
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
(*fwd_txfm)(in, out, dst, stride, tx_type);
|
||||
}
|
||||
void RunInvTxfm(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
(*inv_txfm)(in, out, dst, stride, tx_type);
|
||||
}
|
||||
virtual void RunFwdTxfm(int16_t *in, int16_t *out, int stride) = 0;
|
||||
virtual void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) = 0;
|
||||
|
||||
int tx_type_;
|
||||
void (*fwd_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
|
||||
void (*inv_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
|
||||
};
|
||||
void RunSignBiasCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 64);
|
||||
int count_sign_block[64][2];
|
||||
const int count_test_block = 100000;
|
||||
|
||||
TEST_P(FwdTrans8x8Test, SignBiasCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 64);
|
||||
const int pitch = 16;
|
||||
int count_sign_block[64][2];
|
||||
const int count_test_block = 100000;
|
||||
memset(count_sign_block, 0, sizeof(count_sign_block));
|
||||
|
||||
memset(count_sign_block, 0, sizeof(count_sign_block));
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_output_block, pitch_));
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
|
||||
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type_);
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_output_block[j] < 0)
|
||||
++count_sign_block[j][0];
|
||||
else if (test_output_block[j] > 0)
|
||||
++count_sign_block[j][1];
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_output_block[j] < 0)
|
||||
++count_sign_block[j][0];
|
||||
else if (test_output_block[j] > 0)
|
||||
++count_sign_block[j][1];
|
||||
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
|
||||
const int max_diff = 1125;
|
||||
EXPECT_LT(diff, max_diff)
|
||||
<< "Error: 8x8 FDCT/FHT has a sign bias > "
|
||||
<< 1. * max_diff / count_test_block * 100 << "%"
|
||||
<< " for input range [-255, 255] at index " << j
|
||||
<< " count0: " << count_sign_block[j][0]
|
||||
<< " count1: " << count_sign_block[j][1]
|
||||
<< " diff: " << diff;
|
||||
}
|
||||
|
||||
memset(count_sign_block, 0, sizeof(count_sign_block));
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-15, 15].
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_output_block, pitch_));
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_output_block[j] < 0)
|
||||
++count_sign_block[j][0];
|
||||
else if (test_output_block[j] > 0)
|
||||
++count_sign_block[j][1];
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
|
||||
const int max_diff = 10000;
|
||||
EXPECT_LT(diff, max_diff)
|
||||
<< "Error: 4x4 FDCT/FHT has a sign bias > "
|
||||
<< 1. * max_diff / count_test_block * 100 << "%"
|
||||
<< " for input range [-15, 15] at index " << j
|
||||
<< " count0: " << count_sign_block[j][0]
|
||||
<< " count1: " << count_sign_block[j][1]
|
||||
<< " diff: " << diff;
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
|
||||
const int max_diff = 1125;
|
||||
EXPECT_LT(diff, max_diff)
|
||||
<< "Error: 8x8 FDCT/FHT has a sign bias > "
|
||||
<< 1. * max_diff / count_test_block * 100 << "%"
|
||||
<< " for input range [-255, 255] at index " << j
|
||||
<< " count0: " << count_sign_block[j][0]
|
||||
<< " count1: " << count_sign_block[j][1]
|
||||
<< " diff: " << diff;
|
||||
}
|
||||
|
||||
memset(count_sign_block, 0, sizeof(count_sign_block));
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-15, 15].
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
|
||||
|
||||
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type_);
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_output_block[j] < 0)
|
||||
++count_sign_block[j][0];
|
||||
else if (test_output_block[j] > 0)
|
||||
++count_sign_block[j][1];
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
|
||||
const int max_diff = 10000;
|
||||
EXPECT_LT(diff, max_diff)
|
||||
<< "Error: 4x4 FDCT/FHT has a sign bias > "
|
||||
<< 1. * max_diff / count_test_block * 100 << "%"
|
||||
<< " for input range [-15, 15] at index " << j
|
||||
<< " count0: " << count_sign_block[j][0]
|
||||
<< " count1: " << count_sign_block[j][1]
|
||||
<< " diff: " << diff;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int max_error = 0;
|
||||
double total_error = 0;
|
||||
const int count_test_block = 100000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
void RunRoundTripErrorCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int max_error = 0;
|
||||
int total_error = 0;
|
||||
const int count_test_block = 100000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
}
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
const int pitch = 16;
|
||||
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||
for (int j = 0; j < 64; ++j){
|
||||
if(test_temp_block[j] > 0) {
|
||||
test_temp_block[j] += 2;
|
||||
test_temp_block[j] /= 4;
|
||||
test_temp_block[j] *= 4;
|
||||
} else {
|
||||
test_temp_block[j] -= 2;
|
||||
test_temp_block[j] /= 4;
|
||||
test_temp_block[j] *= 4;
|
||||
}
|
||||
}
|
||||
RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_temp_block, pitch_));
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_temp_block[j] > 0) {
|
||||
test_temp_block[j] += 2;
|
||||
test_temp_block[j] /= 4;
|
||||
test_temp_block[j] *= 4;
|
||||
} else {
|
||||
test_temp_block[j] -= 2;
|
||||
test_temp_block[j] /= 4;
|
||||
test_temp_block[j] *= 4;
|
||||
}
|
||||
}
|
||||
REGISTER_STATE_CHECK(
|
||||
RunInvTxfm(test_temp_block, dst, pitch_));
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_GE(1, max_error)
|
||||
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual roundtrip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block/5, total_error)
|
||||
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
|
||||
"error > 1/5 per block";
|
||||
}
|
||||
|
||||
TEST_P(FwdTrans8x8Test, ExtremalCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int max_error = 0;
|
||||
double total_error = 0;
|
||||
const int count_test_block = 100000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
src[j] = rnd.Rand8() % 2 ? 255 : 0;
|
||||
dst[j] = src[j] > 0 ? 0 : 255;
|
||||
}
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
|
||||
const int pitch = 16;
|
||||
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||
RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_GE(1, max_error)
|
||||
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has an"
|
||||
<< " individual roundtrip error > 1";
|
||||
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
|
||||
<< " roundtrip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block/5, total_error)
|
||||
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
|
||||
<< " roundtrip error > 1/5 per block";
|
||||
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
|
||||
<< "error > 1/5 per block";
|
||||
}
|
||||
|
||||
void RunExtremalCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int max_error = 0;
|
||||
int total_error = 0;
|
||||
const int count_test_block = 100000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
src[j] = rnd.Rand8() % 2 ? 255 : 0;
|
||||
dst[j] = src[j] > 0 ? 0 : 255;
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_temp_block, pitch_));
|
||||
REGISTER_STATE_CHECK(
|
||||
RunInvTxfm(test_temp_block, dst, pitch_));
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
|
||||
EXPECT_GE(1, max_error)
|
||||
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
|
||||
<< "an individual roundtrip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block/5, total_error)
|
||||
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
|
||||
<< " roundtrip error > 1/5 per block";
|
||||
}
|
||||
}
|
||||
|
||||
int pitch_;
|
||||
int tx_type_;
|
||||
fht_t fwd_txfm_ref;
|
||||
};
|
||||
|
||||
class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
|
||||
public PARAMS(fdct_t, idct_t, int) {
|
||||
public:
|
||||
virtual ~FwdTrans8x8DCT() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
fwd_txfm_ = GET_PARAM(0);
|
||||
inv_txfm_ = GET_PARAM(1);
|
||||
tx_type_ = GET_PARAM(2);
|
||||
pitch_ = 16;
|
||||
fwd_txfm_ref = fdct8x8_ref;
|
||||
}
|
||||
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
|
||||
fwd_txfm_(in, out, stride);
|
||||
}
|
||||
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
|
||||
inv_txfm_(out, dst, stride >> 1);
|
||||
}
|
||||
|
||||
fdct_t fwd_txfm_;
|
||||
idct_t inv_txfm_;
|
||||
};
|
||||
|
||||
TEST_P(FwdTrans8x8DCT, SignBiasCheck) {
|
||||
RunSignBiasCheck();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(VP9, FwdTrans8x8Test, ::testing::Range(0, 4));
|
||||
TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) {
|
||||
RunRoundTripErrorCheck();
|
||||
}
|
||||
|
||||
TEST_P(FwdTrans8x8DCT, ExtremalCheck) {
|
||||
RunExtremalCheck();
|
||||
}
|
||||
|
||||
class FwdTrans8x8HT : public FwdTrans8x8TestBase,
|
||||
public PARAMS(fht_t, iht_t, int) {
|
||||
public:
|
||||
virtual ~FwdTrans8x8HT() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
fwd_txfm_ = GET_PARAM(0);
|
||||
inv_txfm_ = GET_PARAM(1);
|
||||
tx_type_ = GET_PARAM(2);
|
||||
pitch_ = 8;
|
||||
fwd_txfm_ref = fht8x8_ref;
|
||||
}
|
||||
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
|
||||
fwd_txfm_(in, out, stride, tx_type_);
|
||||
}
|
||||
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
|
||||
inv_txfm_(out, dst, stride, tx_type_);
|
||||
}
|
||||
|
||||
fht_t fwd_txfm_;
|
||||
iht_t inv_txfm_;
|
||||
};
|
||||
|
||||
TEST_P(FwdTrans8x8HT, SignBiasCheck) {
|
||||
RunSignBiasCheck();
|
||||
}
|
||||
|
||||
TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) {
|
||||
RunRoundTripErrorCheck();
|
||||
}
|
||||
|
||||
TEST_P(FwdTrans8x8HT, ExtremalCheck) {
|
||||
RunExtremalCheck();
|
||||
}
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, FwdTrans8x8DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct8x8_c, &vp9_short_idct8x8_add_c, 0)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, FwdTrans8x8HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 0),
|
||||
make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 1),
|
||||
make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 2),
|
||||
make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 3)));
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, FwdTrans8x8DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct8x8_sse2, &vp9_short_idct8x8_add_sse2, 0)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, FwdTrans8x8HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 0),
|
||||
make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 1),
|
||||
make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 2),
|
||||
make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 3)));
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#define TEST_I420_VIDEO_SOURCE_H_
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
||||
#include "test/video_source.h"
|
||||
|
||||
@@ -34,7 +35,6 @@ class I420VideoSource : public VideoSource {
|
||||
height_(0),
|
||||
framerate_numerator_(rate_numerator),
|
||||
framerate_denominator_(rate_denominator) {
|
||||
|
||||
// This initializes raw_sz_, width_, height_ and allocates an img.
|
||||
SetSize(width, height);
|
||||
}
|
||||
|
||||
@@ -15,10 +15,10 @@
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
extern "C" {
|
||||
#include "vp9_rtcd.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
}
|
||||
|
||||
#include "acm_random.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
@@ -27,10 +27,10 @@ namespace {
|
||||
|
||||
#ifdef _MSC_VER
|
||||
static int round(double x) {
|
||||
if(x < 0)
|
||||
return (int)ceil(x - 0.5);
|
||||
if (x < 0)
|
||||
return static_cast<int>(ceil(x - 0.5));
|
||||
else
|
||||
return (int)floor(x + 0.5);
|
||||
return static_cast<int>(floor(x + 0.5));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -16,7 +16,9 @@ extern "C" {
|
||||
#include "test/register_state_check.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr,
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
typedef void (*idct_fn_t)(int16_t *input, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride);
|
||||
namespace {
|
||||
@@ -34,7 +36,7 @@ class IDCTTest : public ::testing::TestWithParam<idct_fn_t> {
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
idct_fn_t UUT;
|
||||
short input[16];
|
||||
int16_t input[16];
|
||||
unsigned char output[256];
|
||||
unsigned char predict[256];
|
||||
};
|
||||
|
||||
@@ -15,8 +15,8 @@
|
||||
#include "test/register_state_check.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
extern "C" {
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
}
|
||||
@@ -34,13 +34,17 @@ class IntraPredBase {
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetupMacroblock(uint8_t *data, int block_size, int stride,
|
||||
void SetupMacroblock(MACROBLOCKD *mbptr,
|
||||
MODE_INFO *miptr,
|
||||
uint8_t *data,
|
||||
int block_size,
|
||||
int stride,
|
||||
int num_planes) {
|
||||
memset(&mb_, 0, sizeof(mb_));
|
||||
memset(&mi_, 0, sizeof(mi_));
|
||||
mb_.up_available = 1;
|
||||
mb_.left_available = 1;
|
||||
mb_.mode_info_context = &mi_;
|
||||
mbptr_ = mbptr;
|
||||
miptr_ = miptr;
|
||||
mbptr_->up_available = 1;
|
||||
mbptr_->left_available = 1;
|
||||
mbptr_->mode_info_context = miptr_;
|
||||
stride_ = stride;
|
||||
block_size_ = block_size;
|
||||
num_planes_ = num_planes;
|
||||
@@ -63,14 +67,14 @@ class IntraPredBase {
|
||||
virtual void Predict(MB_PREDICTION_MODE mode) = 0;
|
||||
|
||||
void SetLeftUnavailable() {
|
||||
mb_.left_available = 0;
|
||||
mbptr_->left_available = 0;
|
||||
for (int p = 0; p < num_planes_; p++)
|
||||
for (int i = -1; i < block_size_; ++i)
|
||||
data_ptr_[p][stride_ * i - 1] = 129;
|
||||
}
|
||||
|
||||
void SetTopUnavailable() {
|
||||
mb_.up_available = 0;
|
||||
mbptr_->up_available = 0;
|
||||
for (int p = 0; p < num_planes_; p++)
|
||||
memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2);
|
||||
}
|
||||
@@ -96,19 +100,19 @@ class IntraPredBase {
|
||||
for (int p = 0; p < num_planes_; p++) {
|
||||
// calculate expected DC
|
||||
int expected;
|
||||
if (mb_.up_available || mb_.left_available) {
|
||||
int sum = 0, shift = BlockSizeLog2Min1() + mb_.up_available +
|
||||
mb_.left_available;
|
||||
if (mb_.up_available)
|
||||
if (mbptr_->up_available || mbptr_->left_available) {
|
||||
int sum = 0, shift = BlockSizeLog2Min1() + mbptr_->up_available +
|
||||
mbptr_->left_available;
|
||||
if (mbptr_->up_available)
|
||||
for (int x = 0; x < block_size_; x++)
|
||||
sum += data_ptr_[p][x - stride_];
|
||||
if (mb_.left_available)
|
||||
if (mbptr_->left_available)
|
||||
for (int y = 0; y < block_size_; y++)
|
||||
sum += data_ptr_[p][y * stride_ - 1];
|
||||
expected = (sum + (1 << (shift - 1))) >> shift;
|
||||
} else
|
||||
} else {
|
||||
expected = 0x80;
|
||||
|
||||
}
|
||||
// check that all subsequent lines are equal to the first
|
||||
for (int y = 1; y < block_size_; ++y)
|
||||
ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_],
|
||||
@@ -209,8 +213,8 @@ class IntraPredBase {
|
||||
}
|
||||
}
|
||||
|
||||
MACROBLOCKD mb_;
|
||||
MODE_INFO mi_;
|
||||
MACROBLOCKD *mbptr_;
|
||||
MODE_INFO *miptr_;
|
||||
uint8_t *data_ptr_[2]; // in the case of Y, only [0] is used
|
||||
int stride_;
|
||||
int block_size_;
|
||||
@@ -228,12 +232,18 @@ class IntraPredYTest : public ::testing::TestWithParam<intra_pred_y_fn_t>,
|
||||
protected IntraPredBase {
|
||||
public:
|
||||
static void SetUpTestCase() {
|
||||
mb_ = reinterpret_cast<MACROBLOCKD*>(
|
||||
vpx_memalign(32, sizeof(MACROBLOCKD)));
|
||||
mi_ = reinterpret_cast<MODE_INFO*>(
|
||||
vpx_memalign(32, sizeof(MODE_INFO)));
|
||||
data_array_ = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(kDataAlignment, kDataBufferSize));
|
||||
}
|
||||
|
||||
static void TearDownTestCase() {
|
||||
vpx_free(data_array_);
|
||||
vpx_free(mi_);
|
||||
vpx_free(mb_);
|
||||
data_array_ = NULL;
|
||||
}
|
||||
|
||||
@@ -250,12 +260,12 @@ class IntraPredYTest : public ::testing::TestWithParam<intra_pred_y_fn_t>,
|
||||
|
||||
virtual void SetUp() {
|
||||
pred_fn_ = GetParam();
|
||||
SetupMacroblock(data_array_, kBlockSize, kStride, 1);
|
||||
SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 1);
|
||||
}
|
||||
|
||||
virtual void Predict(MB_PREDICTION_MODE mode) {
|
||||
mb_.mode_info_context->mbmi.mode = mode;
|
||||
REGISTER_STATE_CHECK(pred_fn_(&mb_,
|
||||
mbptr_->mode_info_context->mbmi.mode = mode;
|
||||
REGISTER_STATE_CHECK(pred_fn_(mbptr_,
|
||||
data_ptr_[0] - kStride,
|
||||
data_ptr_[0] - 1, kStride,
|
||||
data_ptr_[0], kStride));
|
||||
@@ -263,8 +273,12 @@ class IntraPredYTest : public ::testing::TestWithParam<intra_pred_y_fn_t>,
|
||||
|
||||
intra_pred_y_fn_t pred_fn_;
|
||||
static uint8_t* data_array_;
|
||||
static MACROBLOCKD * mb_;
|
||||
static MODE_INFO *mi_;
|
||||
};
|
||||
|
||||
MACROBLOCKD* IntraPredYTest::mb_ = NULL;
|
||||
MODE_INFO* IntraPredYTest::mi_ = NULL;
|
||||
uint8_t* IntraPredYTest::data_array_ = NULL;
|
||||
|
||||
TEST_P(IntraPredYTest, IntraPredTests) {
|
||||
@@ -299,12 +313,18 @@ class IntraPredUVTest : public ::testing::TestWithParam<intra_pred_uv_fn_t>,
|
||||
protected IntraPredBase {
|
||||
public:
|
||||
static void SetUpTestCase() {
|
||||
mb_ = reinterpret_cast<MACROBLOCKD*>(
|
||||
vpx_memalign(32, sizeof(MACROBLOCKD)));
|
||||
mi_ = reinterpret_cast<MODE_INFO*>(
|
||||
vpx_memalign(32, sizeof(MODE_INFO)));
|
||||
data_array_ = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(kDataAlignment, kDataBufferSize));
|
||||
}
|
||||
|
||||
static void TearDownTestCase() {
|
||||
vpx_free(data_array_);
|
||||
vpx_free(mi_);
|
||||
vpx_free(mb_);
|
||||
data_array_ = NULL;
|
||||
}
|
||||
|
||||
@@ -322,12 +342,12 @@ class IntraPredUVTest : public ::testing::TestWithParam<intra_pred_uv_fn_t>,
|
||||
|
||||
virtual void SetUp() {
|
||||
pred_fn_ = GetParam();
|
||||
SetupMacroblock(data_array_, kBlockSize, kStride, 2);
|
||||
SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 2);
|
||||
}
|
||||
|
||||
virtual void Predict(MB_PREDICTION_MODE mode) {
|
||||
mb_.mode_info_context->mbmi.uv_mode = mode;
|
||||
pred_fn_(&mb_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
|
||||
mbptr_->mode_info_context->mbmi.uv_mode = mode;
|
||||
pred_fn_(mbptr_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
|
||||
data_ptr_[0] - 1, data_ptr_[1] - 1, kStride,
|
||||
data_ptr_[0], data_ptr_[1], kStride);
|
||||
}
|
||||
@@ -340,8 +360,12 @@ class IntraPredUVTest : public ::testing::TestWithParam<intra_pred_uv_fn_t>,
|
||||
// We use 9 lines so we have one line above us for top-prediction.
|
||||
// [0] = U, [1] = V
|
||||
static uint8_t* data_array_;
|
||||
static MACROBLOCKD* mb_;
|
||||
static MODE_INFO* mi_;
|
||||
};
|
||||
|
||||
MACROBLOCKD* IntraPredUVTest::mb_ = NULL;
|
||||
MODE_INFO* IntraPredUVTest::mi_ = NULL;
|
||||
uint8_t* IntraPredUVTest::data_array_ = NULL;
|
||||
|
||||
TEST_P(IntraPredUVTest, IntraPredTests) {
|
||||
|
||||
@@ -28,7 +28,7 @@ static unsigned int MemGetLe32(const uint8_t *mem) {
|
||||
// so that we can do actual file decodes.
|
||||
class IVFVideoSource : public CompressedVideoSource {
|
||||
public:
|
||||
IVFVideoSource(const std::string &file_name)
|
||||
explicit IVFVideoSource(const std::string &file_name)
|
||||
: file_name_(file_name),
|
||||
input_file_(NULL),
|
||||
compressed_frame_buf_(NULL),
|
||||
|
||||
@@ -132,7 +132,6 @@ TEST_P(KeyframeTest, TestAutoKeyframe) {
|
||||
// Verify that keyframes match the file keyframes in the file.
|
||||
for (std::vector<vpx_codec_pts_t>::const_iterator iter = kf_pts_list_.begin();
|
||||
iter != kf_pts_list_.end(); ++iter) {
|
||||
|
||||
if (deadline_ == VPX_DL_REALTIME && *iter > 0)
|
||||
EXPECT_EQ(0, (*iter - 1) % 30) << "Unexpected keyframe at frame "
|
||||
<< *iter;
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef LIBVPX_TEST_MD5_HELPER_H_
|
||||
#define LIBVPX_TEST_MD5_HELPER_H_
|
||||
#ifndef TEST_MD5_HELPER_H_
|
||||
#define TEST_MD5_HELPER_H_
|
||||
|
||||
extern "C" {
|
||||
#include "./md5_utils.h"
|
||||
@@ -25,9 +25,15 @@ class MD5 {
|
||||
|
||||
void Add(const vpx_image_t *img) {
|
||||
for (int plane = 0; plane < 3; ++plane) {
|
||||
uint8_t *buf = img->planes[plane];
|
||||
const int h = plane ? (img->d_h + 1) >> 1 : img->d_h;
|
||||
const int w = plane ? (img->d_w + 1) >> 1 : img->d_w;
|
||||
const uint8_t *buf = img->planes[plane];
|
||||
// Calculate the width and height to do the md5 check. For the chroma
|
||||
// plane, we never want to round down and thus skip a pixel so if
|
||||
// we are shifting by 1 (chroma_shift) we add 1 before doing the shift.
|
||||
// This works only for chroma_shift of 0 and 1.
|
||||
const int h = plane ? (img->d_h + img->y_chroma_shift) >>
|
||||
img->y_chroma_shift : img->d_h;
|
||||
const int w = plane ? (img->d_w + img->x_chroma_shift) >>
|
||||
img->x_chroma_shift : img->d_w;
|
||||
|
||||
for (int y = 0; y < h; ++y) {
|
||||
MD5Update(&md5_, buf, w);
|
||||
@@ -61,4 +67,4 @@ class MD5 {
|
||||
|
||||
} // namespace libvpx_test
|
||||
|
||||
#endif // LIBVPX_TEST_MD5_HELPER_H_
|
||||
#endif // TEST_MD5_HELPER_H_
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
#include "test/register_state_check.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
extern "C" {
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
}
|
||||
@@ -63,7 +63,8 @@ TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) {
|
||||
// Pointers to top-left pixel of block in the input and output images.
|
||||
uint8_t *const src_image_ptr = src_image + (input_stride << 1);
|
||||
uint8_t *const dst_image_ptr = dst_image + 8;
|
||||
uint8_t *const flimits = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
|
||||
uint8_t *const flimits =
|
||||
reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
|
||||
(void)vpx_memset(flimits, 255, block_width);
|
||||
|
||||
// Initialize pixels in the input:
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef LIBVPX_TEST_REGISTER_STATE_CHECK_H_
|
||||
#define LIBVPX_TEST_REGISTER_STATE_CHECK_H_
|
||||
#ifndef TEST_REGISTER_STATE_CHECK_H_
|
||||
#define TEST_REGISTER_STATE_CHECK_H_
|
||||
|
||||
#ifdef _WIN64
|
||||
|
||||
@@ -92,4 +92,4 @@ class RegisterStateCheck {};
|
||||
|
||||
#endif // _WIN64
|
||||
|
||||
#endif // LIBVPX_TEST_REGISTER_STATE_CHECK_H_
|
||||
#endif // TEST_REGISTER_STATE_CHECK_H_
|
||||
|
||||
@@ -16,8 +16,68 @@
|
||||
#include "test/video_source.h"
|
||||
#include "test/util.h"
|
||||
|
||||
// Enable(1) or Disable(0) writing of the compressed bitstream.
|
||||
#define WRITE_COMPRESSED_STREAM 0
|
||||
|
||||
namespace {
|
||||
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
static void mem_put_le16(char *const mem, const unsigned int val) {
|
||||
mem[0] = val;
|
||||
mem[1] = val >> 8;
|
||||
}
|
||||
|
||||
static void mem_put_le32(char *const mem, const unsigned int val) {
|
||||
mem[0] = val;
|
||||
mem[1] = val >> 8;
|
||||
mem[2] = val >> 16;
|
||||
mem[3] = val >> 24;
|
||||
}
|
||||
|
||||
static void write_ivf_file_header(const vpx_codec_enc_cfg_t *const cfg,
|
||||
int frame_cnt, FILE *const outfile) {
|
||||
char header[32];
|
||||
|
||||
header[0] = 'D';
|
||||
header[1] = 'K';
|
||||
header[2] = 'I';
|
||||
header[3] = 'F';
|
||||
mem_put_le16(header + 4, 0); /* version */
|
||||
mem_put_le16(header + 6, 32); /* headersize */
|
||||
mem_put_le32(header + 8, 0x30395056); /* fourcc (vp9) */
|
||||
mem_put_le16(header + 12, cfg->g_w); /* width */
|
||||
mem_put_le16(header + 14, cfg->g_h); /* height */
|
||||
mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
|
||||
mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
|
||||
mem_put_le32(header + 24, frame_cnt); /* length */
|
||||
mem_put_le32(header + 28, 0); /* unused */
|
||||
|
||||
(void)fwrite(header, 1, 32, outfile);
|
||||
}
|
||||
|
||||
static void write_ivf_frame_size(FILE *const outfile, const size_t size) {
|
||||
char header[4];
|
||||
mem_put_le32(header, static_cast<unsigned int>(size));
|
||||
(void)fwrite(header, 1, 4, outfile);
|
||||
}
|
||||
|
||||
static void write_ivf_frame_header(const vpx_codec_cx_pkt_t *const pkt,
|
||||
FILE *const outfile) {
|
||||
char header[12];
|
||||
vpx_codec_pts_t pts;
|
||||
|
||||
if (pkt->kind != VPX_CODEC_CX_FRAME_PKT)
|
||||
return;
|
||||
|
||||
pts = pkt->data.frame.pts;
|
||||
mem_put_le32(header, static_cast<unsigned int>(pkt->data.frame.sz));
|
||||
mem_put_le32(header + 4, pts & 0xFFFFFFFF);
|
||||
mem_put_le32(header + 8, pts >> 32);
|
||||
|
||||
(void)fwrite(header, 1, 12, outfile);
|
||||
}
|
||||
#endif // WRITE_COMPRESSED_STREAM
|
||||
|
||||
const unsigned int kInitialWidth = 320;
|
||||
const unsigned int kInitialHeight = 240;
|
||||
|
||||
@@ -42,6 +102,8 @@ class ResizingVideoSource : public ::libvpx_test::DummyVideoSource {
|
||||
limit_ = 60;
|
||||
}
|
||||
|
||||
virtual ~ResizingVideoSource() {}
|
||||
|
||||
protected:
|
||||
virtual void Next() {
|
||||
++frame_;
|
||||
@@ -56,13 +118,15 @@ class ResizeTest : public ::libvpx_test::EncoderTest,
|
||||
protected:
|
||||
ResizeTest() : EncoderTest(GET_PARAM(0)) {}
|
||||
|
||||
virtual ~ResizeTest() {}
|
||||
|
||||
struct FrameInfo {
|
||||
FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
|
||||
: pts(_pts), w(_w), h(_h) {}
|
||||
|
||||
vpx_codec_pts_t pts;
|
||||
unsigned int w;
|
||||
unsigned int h;
|
||||
unsigned int w;
|
||||
unsigned int h;
|
||||
};
|
||||
|
||||
virtual void SetUp() {
|
||||
@@ -95,17 +159,47 @@ TEST_P(ResizeTest, TestExternalResizeWorks) {
|
||||
}
|
||||
}
|
||||
|
||||
const unsigned int kStepDownFrame = 3;
|
||||
const unsigned int kStepUpFrame = 6;
|
||||
|
||||
class ResizeInternalTest : public ResizeTest {
|
||||
protected:
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
ResizeInternalTest()
|
||||
: ResizeTest(),
|
||||
frame0_psnr_(0.0),
|
||||
outfile_(NULL),
|
||||
out_frames_(0) {}
|
||||
#else
|
||||
ResizeInternalTest() : ResizeTest(), frame0_psnr_(0.0) {}
|
||||
#endif
|
||||
|
||||
virtual ~ResizeInternalTest() {}
|
||||
|
||||
virtual void BeginPassHook(unsigned int /*pass*/) {
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
outfile_ = fopen("vp90-2-05-resize.ivf", "wb");
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual void EndPassHook() {
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
if (outfile_) {
|
||||
if (!fseek(outfile_, 0, SEEK_SET))
|
||||
write_ivf_file_header(&cfg_, out_frames_, outfile_);
|
||||
fclose(outfile_);
|
||||
outfile_ = NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
|
||||
libvpx_test::Encoder *encoder) {
|
||||
if (video->frame() == 3) {
|
||||
if (video->frame() == kStepDownFrame) {
|
||||
struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE};
|
||||
encoder->Control(VP8E_SET_SCALEMODE, &mode);
|
||||
}
|
||||
if (video->frame() == 6) {
|
||||
if (video->frame() == kStepUpFrame) {
|
||||
struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
|
||||
encoder->Control(VP8E_SET_SCALEMODE, &mode);
|
||||
}
|
||||
@@ -117,21 +211,46 @@ class ResizeInternalTest : public ResizeTest {
|
||||
EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 1.0);
|
||||
}
|
||||
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
++out_frames_;
|
||||
|
||||
// Write initial file header if first frame.
|
||||
if (pkt->data.frame.pts == 0)
|
||||
write_ivf_file_header(&cfg_, 0, outfile_);
|
||||
|
||||
// Write frame header and data.
|
||||
write_ivf_frame_header(pkt, outfile_);
|
||||
(void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_);
|
||||
#endif
|
||||
}
|
||||
|
||||
double frame0_psnr_;
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
FILE *outfile_;
|
||||
unsigned int out_frames_;
|
||||
#endif
|
||||
};
|
||||
|
||||
TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
|
||||
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
|
||||
30, 1, 0, 10);
|
||||
init_flags_ = VPX_CODEC_USE_PSNR;
|
||||
|
||||
// q picked such that initial keyframe on this clip is ~30dB PSNR
|
||||
cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
|
||||
|
||||
// If the number of frames being encoded is smaller than g_lag_in_frames
|
||||
// the encoded frame is unavailable using the current API. Comparing
|
||||
// frames to detect mismatch would then not be possible. Set
|
||||
// g_lag_in_frames = 0 to get around this.
|
||||
cfg_.g_lag_in_frames = 0;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
|
||||
for (std::vector<FrameInfo>::iterator info = frame_info_list_.begin();
|
||||
info != frame_info_list_.end(); ++info) {
|
||||
const vpx_codec_pts_t pts = info->pts;
|
||||
if (pts >= 3 && pts < 6) {
|
||||
if (pts >= kStepDownFrame && pts < kStepUpFrame) {
|
||||
ASSERT_EQ(282U, info->w) << "Frame " << pts << " had unexpected width";
|
||||
ASSERT_EQ(173U, info->h) << "Frame " << pts << " had unexpected height";
|
||||
} else {
|
||||
|
||||
@@ -17,7 +17,6 @@ extern "C" {
|
||||
#include "./vpx_config.h"
|
||||
#if CONFIG_VP8_ENCODER
|
||||
#include "./vp8_rtcd.h"
|
||||
//#include "vp8/common/blockd.h"
|
||||
#endif
|
||||
#if CONFIG_VP9_ENCODER
|
||||
#include "./vp9_rtcd.h"
|
||||
|
||||
@@ -17,15 +17,19 @@
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
extern "C" {
|
||||
#include "vp8/encoder/onyx_int.h"
|
||||
}
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
namespace {
|
||||
|
||||
TEST(Vp8RoiMapTest, ParameterCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int delta_q[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
|
||||
int delta_lf[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
|
||||
unsigned int threshold[MAX_MB_SEGMENTS] = { 0, 100, 200, 300 };
|
||||
@@ -121,10 +125,10 @@ TEST(Vp8RoiMapTest, ParameterCheck) {
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
int rand_deltas[4];
|
||||
int deltas_valid;
|
||||
rand_deltas[0] = (rand() % 160) - 80;
|
||||
rand_deltas[1] = (rand() % 160) - 80;
|
||||
rand_deltas[2] = (rand() % 160) - 80;
|
||||
rand_deltas[3] = (rand() % 160) - 80;
|
||||
rand_deltas[0] = rnd(160) - 80;
|
||||
rand_deltas[1] = rnd(160) - 80;
|
||||
rand_deltas[2] = rnd(160) - 80;
|
||||
rand_deltas[3] = rnd(160) - 80;
|
||||
|
||||
deltas_valid = ((abs(rand_deltas[0]) <= 63) &&
|
||||
(abs(rand_deltas[1]) <= 63) &&
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
extern "C" {
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vp8/encoder/block.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
@@ -51,7 +51,7 @@ TEST_P(SubtractBlockTest, SimpleSubtract) {
|
||||
bd.predictor = reinterpret_cast<unsigned char*>(
|
||||
vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*bd.predictor)));
|
||||
|
||||
for(int i = 0; kSrcStride[i] > 0; ++i) {
|
||||
for (int i = 0; kSrcStride[i] > 0; ++i) {
|
||||
// start at block0
|
||||
be.src = 0;
|
||||
be.base_src = &source;
|
||||
|
||||
@@ -520,3 +520,10 @@ d17bc08eedfc60c4c23d576a6c964a21bf854d1f vp90-2-03-size-226x202.webm
|
||||
83c6d8f2969b759e10e5c6542baca1265c874c29 vp90-2-03-size-226x224.webm.md5
|
||||
fe0af2ee47b1e5f6a66db369e2d7e9d870b38dce vp90-2-03-size-226x226.webm
|
||||
94ad19b8b699cea105e2ff18f0df2afd7242bcf7 vp90-2-03-size-226x226.webm.md5
|
||||
b6524e4084d15b5d0caaa3d3d1368db30cbee69c vp90-2-03-deltaq.webm
|
||||
65f45ec9a55537aac76104818278e0978f94a678 vp90-2-03-deltaq.webm.md5
|
||||
4dbb87494c7f565ffc266c98d17d0d8c7a5c5aba vp90-2-05-resize.ivf
|
||||
7f6d8879336239a43dbb6c9f13178cb11cf7ed09 vp90-2-05-resize.ivf.md5
|
||||
495256cfd123fe777b2c0406862ed8468a1f4677 vp91-2-04-yv444.webm
|
||||
65e3a7ffef61ab340d9140f335ecc49125970c2c vp91-2-04-yv444.webm.md5
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += resize_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc
|
||||
|
||||
@@ -629,3 +629,9 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include <string>
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
extern "C" {
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#include "vpx_ports/x86.h"
|
||||
@@ -48,7 +48,9 @@ int main(int argc, char **argv) {
|
||||
#endif
|
||||
|
||||
#if !CONFIG_SHARED
|
||||
/* Shared library builds don't support whitebox tests that exercise internal symbols. */
|
||||
// Shared library builds don't support whitebox tests
|
||||
// that exercise internal symbols.
|
||||
|
||||
#if CONFIG_VP8
|
||||
vp8_rtcd();
|
||||
#endif
|
||||
|
||||
@@ -159,7 +159,11 @@ const char *kVP9TestVectors[] = {
|
||||
"vp90-2-03-size-226x198.webm", "vp90-2-03-size-226x200.webm",
|
||||
"vp90-2-03-size-226x202.webm", "vp90-2-03-size-226x208.webm",
|
||||
"vp90-2-03-size-226x210.webm", "vp90-2-03-size-226x224.webm",
|
||||
"vp90-2-03-size-226x226.webm"
|
||||
"vp90-2-03-size-226x226.webm", "vp90-2-03-deltaq.webm",
|
||||
"vp90-2-05-resize.ivf",
|
||||
#if CONFIG_NON420
|
||||
"vp91-2-04-yv444.webm"
|
||||
#endif
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
@@ -16,16 +16,16 @@
|
||||
#include "test/register_state_check.h"
|
||||
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
extern "C" {
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#if CONFIG_VP8_ENCODER
|
||||
# include "vp8/common/variance.h"
|
||||
# include "vp8_rtcd.h"
|
||||
# include "./vp8_rtcd.h"
|
||||
#endif
|
||||
#if CONFIG_VP9_ENCODER
|
||||
# include "vp9/encoder/vp9_variance.h"
|
||||
# include "vp9_rtcd.h"
|
||||
# include "./vp9_rtcd.h"
|
||||
#endif
|
||||
}
|
||||
#include "test/acm_random.h"
|
||||
@@ -78,37 +78,9 @@ static unsigned int subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
|
||||
return sse - (((int64_t) se * se) >> (l2w + l2h));
|
||||
}
|
||||
|
||||
static unsigned int subpel_avg_variance_ref(const uint8_t *ref,
|
||||
const uint8_t *src,
|
||||
const uint8_t *second_pred,
|
||||
int l2w, int l2h,
|
||||
int xoff, int yoff,
|
||||
unsigned int *sse_ptr) {
|
||||
int se = 0;
|
||||
unsigned int sse = 0;
|
||||
const int w = 1 << l2w, h = 1 << l2h;
|
||||
for (int y = 0; y < h; y++) {
|
||||
for (int x = 0; x < w; x++) {
|
||||
// bilinear interpolation at a 16th pel step
|
||||
const int a1 = ref[(w + 1) * (y + 0) + x + 0];
|
||||
const int a2 = ref[(w + 1) * (y + 0) + x + 1];
|
||||
const int b1 = ref[(w + 1) * (y + 1) + x + 0];
|
||||
const int b2 = ref[(w + 1) * (y + 1) + x + 1];
|
||||
const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
|
||||
const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
|
||||
const int r = a + (((b - a) * yoff + 8) >> 4);
|
||||
int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
|
||||
se += diff;
|
||||
sse += diff * diff;
|
||||
}
|
||||
}
|
||||
*sse_ptr = sse;
|
||||
return sse - (((int64_t) se * se) >> (l2w + l2h));
|
||||
}
|
||||
|
||||
template<typename VarianceFunctionType>
|
||||
class VarianceTest :
|
||||
public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > {
|
||||
class VarianceTest
|
||||
: public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > {
|
||||
public:
|
||||
virtual void SetUp() {
|
||||
const tuple<int, int, VarianceFunctionType>& params = this->GetParam();
|
||||
@@ -190,10 +162,40 @@ void VarianceTest<VarianceFunctionType>::OneQuarterTest() {
|
||||
EXPECT_EQ(expected, var);
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
|
||||
unsigned int subpel_avg_variance_ref(const uint8_t *ref,
|
||||
const uint8_t *src,
|
||||
const uint8_t *second_pred,
|
||||
int l2w, int l2h,
|
||||
int xoff, int yoff,
|
||||
unsigned int *sse_ptr) {
|
||||
int se = 0;
|
||||
unsigned int sse = 0;
|
||||
const int w = 1 << l2w, h = 1 << l2h;
|
||||
for (int y = 0; y < h; y++) {
|
||||
for (int x = 0; x < w; x++) {
|
||||
// bilinear interpolation at a 16th pel step
|
||||
const int a1 = ref[(w + 1) * (y + 0) + x + 0];
|
||||
const int a2 = ref[(w + 1) * (y + 0) + x + 1];
|
||||
const int b1 = ref[(w + 1) * (y + 1) + x + 0];
|
||||
const int b2 = ref[(w + 1) * (y + 1) + x + 1];
|
||||
const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
|
||||
const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
|
||||
const int r = a + (((b - a) * yoff + 8) >> 4);
|
||||
int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
|
||||
se += diff;
|
||||
sse += diff * diff;
|
||||
}
|
||||
}
|
||||
*sse_ptr = sse;
|
||||
return sse - (((int64_t) se * se) >> (l2w + l2h));
|
||||
}
|
||||
|
||||
template<typename SubpelVarianceFunctionType>
|
||||
class SubpelVarianceTest :
|
||||
public ::testing::TestWithParam<tuple<int, int,
|
||||
SubpelVarianceFunctionType> > {
|
||||
class SubpelVarianceTest
|
||||
: public ::testing::TestWithParam<tuple<int, int,
|
||||
SubpelVarianceFunctionType> > {
|
||||
public:
|
||||
virtual void SetUp() {
|
||||
const tuple<int, int, SubpelVarianceFunctionType>& params =
|
||||
@@ -280,6 +282,8 @@ void SubpelVarianceTest<vp9_subp_avg_variance_fn_t>::RefTest() {
|
||||
}
|
||||
}
|
||||
|
||||
#endif // CONFIG_VP9_ENCODER
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// VP8 test cases.
|
||||
|
||||
|
||||
@@ -8,10 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
extern "C" {
|
||||
#include "vp8/encoder/boolhuff.h"
|
||||
#include "vp8/decoder/dboolhuff.h"
|
||||
}
|
||||
|
||||
#include <math.h>
|
||||
#include <stddef.h>
|
||||
@@ -24,6 +20,11 @@ extern "C" {
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
extern "C" {
|
||||
#include "vp8/encoder/boolhuff.h"
|
||||
#include "vp8/decoder/dboolhuff.h"
|
||||
}
|
||||
|
||||
namespace {
|
||||
const int num_tests = 10;
|
||||
|
||||
@@ -44,7 +45,7 @@ void encrypt_buffer(uint8_t *buffer, int size) {
|
||||
|
||||
void test_decrypt_cb(void *decrypt_state, const uint8_t *input,
|
||||
uint8_t *output, int count) {
|
||||
int offset = input - (uint8_t *)decrypt_state;
|
||||
int offset = input - reinterpret_cast<uint8_t *>(decrypt_state);
|
||||
for (int i = 0; i < count; i++) {
|
||||
output[i] = input[i] ^ secret_key[(offset + i) & 15];
|
||||
}
|
||||
@@ -58,10 +59,10 @@ TEST(VP8, TestBitIO) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
for (int n = 0; n < num_tests; ++n) {
|
||||
for (int method = 0; method <= 7; ++method) { // we generate various proba
|
||||
const int bits_to_test = 1000;
|
||||
uint8_t probas[bits_to_test];
|
||||
const int kBitsToTest = 1000;
|
||||
uint8_t probas[kBitsToTest];
|
||||
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
const int parity = i & 1;
|
||||
probas[i] =
|
||||
(method == 0) ? 0 : (method == 1) ? 255 :
|
||||
@@ -76,14 +77,14 @@ TEST(VP8, TestBitIO) {
|
||||
}
|
||||
for (int bit_method = 0; bit_method <= 3; ++bit_method) {
|
||||
const int random_seed = 6432;
|
||||
const int buffer_size = 10000;
|
||||
const int kBufferSize = 10000;
|
||||
ACMRandom bit_rnd(random_seed);
|
||||
BOOL_CODER bw;
|
||||
uint8_t bw_buffer[buffer_size];
|
||||
vp8_start_encode(&bw, bw_buffer, bw_buffer + buffer_size);
|
||||
uint8_t bw_buffer[kBufferSize];
|
||||
vp8_start_encode(&bw, bw_buffer, bw_buffer + kBufferSize);
|
||||
|
||||
int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
if (bit_method == 2) {
|
||||
bit = (i & 1);
|
||||
} else if (bit_method == 3) {
|
||||
@@ -98,19 +99,20 @@ TEST(VP8, TestBitIO) {
|
||||
#if CONFIG_DECRYPT
|
||||
encrypt_buffer(bw_buffer, buffer_size);
|
||||
vp8dx_start_decode(&br, bw_buffer, buffer_size,
|
||||
test_decrypt_cb, (void *)bw_buffer);
|
||||
test_decrypt_cb,
|
||||
reinterpret_cast<void *>(bw_buffer));
|
||||
#else
|
||||
vp8dx_start_decode(&br, bw_buffer, buffer_size, NULL, NULL);
|
||||
vp8dx_start_decode(&br, bw_buffer, kBufferSize, NULL, NULL);
|
||||
#endif
|
||||
bit_rnd.Reset(random_seed);
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
if (bit_method == 2) {
|
||||
bit = (i & 1);
|
||||
} else if (bit_method == 3) {
|
||||
bit = bit_rnd(2);
|
||||
}
|
||||
GTEST_ASSERT_EQ(vp8dx_decode_bool(&br, probas[i]), bit)
|
||||
<< "pos: "<< i << " / " << bits_to_test
|
||||
<< "pos: "<< i << " / " << kBitsToTest
|
||||
<< " bit_method: " << bit_method
|
||||
<< " method: " << method;
|
||||
}
|
||||
|
||||
@@ -26,7 +26,8 @@ const uint8_t test_key[16] = {
|
||||
0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf0
|
||||
};
|
||||
|
||||
void encrypt_buffer(const uint8_t *src, uint8_t *dst, int size, int offset = 0) {
|
||||
void encrypt_buffer(const uint8_t *src, uint8_t *dst,
|
||||
int size, int offset = 0) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
dst[i] = src[i] ^ test_key[(offset + i) & 15];
|
||||
}
|
||||
@@ -34,10 +35,11 @@ void encrypt_buffer(const uint8_t *src, uint8_t *dst, int size, int offset = 0)
|
||||
|
||||
void test_decrypt_cb(void *decrypt_state, const uint8_t *input,
|
||||
uint8_t *output, int count) {
|
||||
encrypt_buffer(input, output, count, input - (uint8_t *)decrypt_state);
|
||||
encrypt_buffer(input, output, count,
|
||||
input - reinterpret_cast<uint8_t *>(decrypt_state));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
||||
namespace libvpx_test {
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
|
||||
|
||||
extern "C" {
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
}
|
||||
|
||||
#include "test/acm_random.h"
|
||||
|
||||
@@ -19,7 +19,7 @@ extern "C" {
|
||||
#include "vp9/decoder/vp9_dboolhuff.h"
|
||||
}
|
||||
|
||||
#include "acm_random.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
@@ -32,10 +32,10 @@ TEST(VP9, TestBitIO) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
for (int n = 0; n < num_tests; ++n) {
|
||||
for (int method = 0; method <= 7; ++method) { // we generate various proba
|
||||
const int bits_to_test = 1000;
|
||||
uint8_t probas[bits_to_test];
|
||||
const int kBitsToTest = 1000;
|
||||
uint8_t probas[kBitsToTest];
|
||||
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
const int parity = i & 1;
|
||||
probas[i] =
|
||||
(method == 0) ? 0 : (method == 1) ? 255 :
|
||||
@@ -50,14 +50,14 @@ TEST(VP9, TestBitIO) {
|
||||
}
|
||||
for (int bit_method = 0; bit_method <= 3; ++bit_method) {
|
||||
const int random_seed = 6432;
|
||||
const int buffer_size = 10000;
|
||||
const int kBufferSize = 10000;
|
||||
ACMRandom bit_rnd(random_seed);
|
||||
vp9_writer bw;
|
||||
uint8_t bw_buffer[buffer_size];
|
||||
uint8_t bw_buffer[kBufferSize];
|
||||
vp9_start_encode(&bw, bw_buffer);
|
||||
|
||||
int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
if (bit_method == 2) {
|
||||
bit = (i & 1);
|
||||
} else if (bit_method == 3) {
|
||||
@@ -72,16 +72,16 @@ TEST(VP9, TestBitIO) {
|
||||
GTEST_ASSERT_EQ(bw_buffer[0] & 0x80, 0);
|
||||
|
||||
vp9_reader br;
|
||||
vp9_reader_init(&br, bw_buffer, buffer_size);
|
||||
vp9_reader_init(&br, bw_buffer, kBufferSize);
|
||||
bit_rnd.Reset(random_seed);
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
if (bit_method == 2) {
|
||||
bit = (i & 1);
|
||||
} else if (bit_method == 3) {
|
||||
bit = bit_rnd(2);
|
||||
}
|
||||
GTEST_ASSERT_EQ(vp9_read(&br, probas[i]), bit)
|
||||
<< "pos: " << i << " / " << bits_to_test
|
||||
<< "pos: " << i << " / " << kBitsToTest
|
||||
<< " bit_method: " << bit_method
|
||||
<< " method: " << method;
|
||||
}
|
||||
|
||||
@@ -39,8 +39,8 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
|
||||
// FIXME(rbultje) split in its own file
|
||||
for (BLOCK_SIZE_TYPE bsize = BLOCK_4X4; bsize < BLOCK_SIZE_TYPES;
|
||||
bsize = static_cast<BLOCK_SIZE_TYPE>(static_cast<int>(bsize) + 1)) {
|
||||
for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES;
|
||||
bsize = static_cast<BLOCK_SIZE>(static_cast<int>(bsize) + 1)) {
|
||||
const int block_width = 4 << b_width_log2(bsize);
|
||||
const int block_height = 4 << b_height_log2(bsize);
|
||||
int16_t *diff = reinterpret_cast<int16_t *>(
|
||||
|
||||
96
third_party/x86inc/x86inc.asm
vendored
96
third_party/x86inc/x86inc.asm
vendored
@@ -97,21 +97,91 @@
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%if WIN64
|
||||
%define PIC
|
||||
%elifidn __OUTPUT_FORMAT__,macho64
|
||||
%define PIC
|
||||
%elif ARCH_X86_64 == 0
|
||||
; x86_32 doesn't require PIC.
|
||||
; Some distros prefer shared objects to be PIC, but nothing breaks if
|
||||
; the code contains a few textrels, so we'll skip that complexity.
|
||||
%undef PIC
|
||||
%elif CONFIG_PIC
|
||||
%define PIC
|
||||
; PIC macros are copied from vpx_ports/x86_abi_support.asm. The "define PIC"
|
||||
; from original code is added in for 64bit.
|
||||
%ifidn __OUTPUT_FORMAT__,elf32
|
||||
%define ABI_IS_32BIT 1
|
||||
%elifidn __OUTPUT_FORMAT__,macho32
|
||||
%define ABI_IS_32BIT 1
|
||||
%elifidn __OUTPUT_FORMAT__,win32
|
||||
%define ABI_IS_32BIT 1
|
||||
%elifidn __OUTPUT_FORMAT__,aout
|
||||
%define ABI_IS_32BIT 1
|
||||
%else
|
||||
%define ABI_IS_32BIT 0
|
||||
%endif
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
%if CONFIG_PIC=1
|
||||
%ifidn __OUTPUT_FORMAT__,elf32
|
||||
%define GET_GOT_SAVE_ARG 1
|
||||
%define WRT_PLT wrt ..plt
|
||||
%macro GET_GOT 1
|
||||
extern _GLOBAL_OFFSET_TABLE_
|
||||
push %1
|
||||
call %%get_got
|
||||
%%sub_offset:
|
||||
jmp %%exitGG
|
||||
%%get_got:
|
||||
mov %1, [esp]
|
||||
add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
|
||||
ret
|
||||
%%exitGG:
|
||||
%undef GLOBAL
|
||||
%define GLOBAL(x) x + %1 wrt ..gotoff
|
||||
%undef RESTORE_GOT
|
||||
%define RESTORE_GOT pop %1
|
||||
%endmacro
|
||||
%elifidn __OUTPUT_FORMAT__,macho32
|
||||
%define GET_GOT_SAVE_ARG 1
|
||||
%macro GET_GOT 1
|
||||
push %1
|
||||
call %%get_got
|
||||
%%get_got:
|
||||
pop %1
|
||||
%undef GLOBAL
|
||||
%define GLOBAL(x) x + %1 - %%get_got
|
||||
%undef RESTORE_GOT
|
||||
%define RESTORE_GOT pop %1
|
||||
%endmacro
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%if ARCH_X86_64 == 0
|
||||
%undef PIC
|
||||
%endif
|
||||
|
||||
%else
|
||||
%macro GET_GOT 1
|
||||
%endmacro
|
||||
%define GLOBAL(x) rel x
|
||||
%define WRT_PLT wrt ..plt
|
||||
|
||||
%if WIN64
|
||||
%define PIC
|
||||
%elifidn __OUTPUT_FORMAT__,macho64
|
||||
%define PIC
|
||||
%elif CONFIG_PIC
|
||||
%define PIC
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifnmacro GET_GOT
|
||||
%macro GET_GOT 1
|
||||
%endmacro
|
||||
%define GLOBAL(x) x
|
||||
%endif
|
||||
%ifndef RESTORE_GOT
|
||||
%define RESTORE_GOT
|
||||
%endif
|
||||
%ifndef WRT_PLT
|
||||
%define WRT_PLT
|
||||
%endif
|
||||
|
||||
%ifdef PIC
|
||||
default rel
|
||||
%endif
|
||||
; Done with PIC macros
|
||||
|
||||
; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
|
||||
%ifndef __NASM_VER__
|
||||
@@ -528,6 +598,10 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
|
||||
global %1:function hidden
|
||||
%elifidn __OUTPUT_FORMAT__,elf64
|
||||
global %1:function hidden
|
||||
%elifidn __OUTPUT_FORMAT__,macho32
|
||||
global %1:private_extern
|
||||
%elifidn __OUTPUT_FORMAT__,macho64
|
||||
global %1:private_extern
|
||||
%else
|
||||
global %1
|
||||
%endif
|
||||
|
||||
@@ -9,9 +9,7 @@
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "filter.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
|
||||
{
|
||||
|
||||
@@ -12,11 +12,13 @@
|
||||
#ifndef FILTER_H
|
||||
#define FILTER_H
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
extern const short vp8_bilinear_filters[8][2];
|
||||
extern const short vp8_sub_pel_filters[8][6];
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]);
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -124,7 +124,7 @@ static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride)
|
||||
b += 16;
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 4)->mv.as_int;
|
||||
return (cur_mb->bmi + (b - 4))->mv.as_int;
|
||||
}
|
||||
static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
|
||||
{
|
||||
|
||||
@@ -41,7 +41,8 @@ extern "C"
|
||||
{
|
||||
USAGE_STREAM_FROM_SERVER = 0x0,
|
||||
USAGE_LOCAL_FILE_PLAYBACK = 0x1,
|
||||
USAGE_CONSTRAINED_QUALITY = 0x2
|
||||
USAGE_CONSTRAINED_QUALITY = 0x2,
|
||||
USAGE_CONSTANT_QUALITY = 0x3
|
||||
} END_USAGE;
|
||||
|
||||
|
||||
|
||||
@@ -138,14 +138,10 @@ void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre,
|
||||
{
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
pred_ptr[0] = ptr[0];
|
||||
pred_ptr[1] = ptr[1];
|
||||
pred_ptr[2] = ptr[2];
|
||||
pred_ptr[3] = ptr[3];
|
||||
#else
|
||||
*(uint32_t *)pred_ptr = *(uint32_t *)ptr ;
|
||||
#endif
|
||||
pred_ptr += pitch;
|
||||
ptr += pre_stride;
|
||||
}
|
||||
@@ -196,16 +192,12 @@ static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst, int dst_stri
|
||||
{
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
dst[0] = ptr[0];
|
||||
dst[1] = ptr[1];
|
||||
dst[2] = ptr[2];
|
||||
dst[3] = ptr[3];
|
||||
#else
|
||||
*(uint32_t *)dst = *(uint32_t *)ptr ;
|
||||
#endif
|
||||
dst += dst_stride;
|
||||
ptr += pre_stride;
|
||||
dst += dst_stride;
|
||||
ptr += pre_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -270,7 +262,7 @@ void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x)
|
||||
+ x->block[yoffset+4].bmi.mv.as_mv.row
|
||||
+ x->block[yoffset+5].bmi.mv.as_mv.row;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3);
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
@@ -279,7 +271,7 @@ void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x)
|
||||
+ x->block[yoffset+4].bmi.mv.as_mv.col
|
||||
+ x->block[yoffset+5].bmi.mv.as_mv.col;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3);
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
@@ -558,7 +550,7 @@ void build_4x4uvmvs(MACROBLOCKD *x)
|
||||
+ x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row
|
||||
+ x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3);
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
@@ -567,7 +559,7 @@ void build_4x4uvmvs(MACROBLOCKD *x)
|
||||
+ x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col
|
||||
+ x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3);
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vp8/common/x86/filter_x86.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) =
|
||||
{
|
||||
|
||||
@@ -11,9 +11,15 @@
|
||||
#ifndef FILTER_X86_H
|
||||
#define FILTER_X86_H
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
/* x86 assembly specific copy of vp8/common/filter.c:vp8_bilinear_filters with
|
||||
* duplicated values */
|
||||
extern const short vp8_bilinear_filters_x86_4[8][8]; /* duplicated 4x */
|
||||
extern const short vp8_bilinear_filters_x86_8[8][16]; /* duplicated 8x */
|
||||
|
||||
/* duplicated 4x */
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]);
|
||||
|
||||
/* duplicated 8x */
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]);
|
||||
|
||||
#endif /* FILTER_X86_H */
|
||||
|
||||
@@ -611,16 +611,12 @@ void vp8_sixtap_predict4x4_ssse3
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
dst_ptr[0] = src_ptr[0];
|
||||
dst_ptr[1] = src_ptr[1];
|
||||
dst_ptr[2] = src_ptr[2];
|
||||
dst_ptr[3] = src_ptr[3];
|
||||
#else
|
||||
*(uint32_t *)dst_ptr = *(uint32_t *)src_ptr ;
|
||||
#endif
|
||||
dst_ptr += dst_pitch;
|
||||
src_ptr += src_pixels_per_line;
|
||||
dst_ptr += dst_pitch;
|
||||
src_ptr += src_pixels_per_line;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -110,8 +110,8 @@ static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc)
|
||||
|
||||
static void read_mv(vp8_reader *r, MV *mv, const MV_CONTEXT *mvc)
|
||||
{
|
||||
mv->row = (short)(read_mvcomponent(r, mvc) << 1);
|
||||
mv->col = (short)(read_mvcomponent(r, ++mvc) << 1);
|
||||
mv->row = (short)(read_mvcomponent(r, mvc) * 2);
|
||||
mv->col = (short)(read_mvcomponent(r, ++mvc) * 2);
|
||||
}
|
||||
|
||||
|
||||
@@ -292,9 +292,9 @@ static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi,
|
||||
blockmv.as_int = 0;
|
||||
if( vp8_read(bc, prob[2]) )
|
||||
{
|
||||
blockmv.as_mv.row = read_mvcomponent(bc, &mvc[0]) << 1;
|
||||
blockmv.as_mv.row = read_mvcomponent(bc, &mvc[0]) * 2;
|
||||
blockmv.as_mv.row += best_mv.as_mv.row;
|
||||
blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) << 1;
|
||||
blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) * 2;
|
||||
blockmv.as_mv.col += best_mv.as_mv.col;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -576,7 +576,7 @@ static void decode_mb_rows(VP8D_COMP *pbi)
|
||||
|
||||
xd->left_available = 0;
|
||||
|
||||
xd->mb_to_top_edge = -((mb_row * 16)) << 3;
|
||||
xd->mb_to_top_edge = -((mb_row * 16) << 3);
|
||||
xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
|
||||
|
||||
xd->recon_above[0] = dst_buffer[0] + recon_yoffset;
|
||||
@@ -1026,7 +1026,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
const unsigned char *clear = data;
|
||||
if (pbi->decrypt_cb)
|
||||
{
|
||||
int n = data_end - data;
|
||||
int n = (int)(data_end - data);
|
||||
if (n > 10) n = 10;
|
||||
pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n);
|
||||
clear = clear_buffer;
|
||||
|
||||
@@ -432,7 +432,7 @@ static void write_mv_ref
|
||||
assert(NEARESTMV <= m && m <= SPLITMV);
|
||||
#endif
|
||||
vp8_write_token(w, vp8_mv_ref_tree, p,
|
||||
vp8_mv_ref_encoding_array - NEARESTMV + m);
|
||||
vp8_mv_ref_encoding_array + (m - NEARESTMV));
|
||||
}
|
||||
|
||||
static void write_sub_mv_ref
|
||||
@@ -444,7 +444,7 @@ static void write_sub_mv_ref
|
||||
assert(LEFT4X4 <= m && m <= NEW4X4);
|
||||
#endif
|
||||
vp8_write_token(w, vp8_sub_mv_ref_tree, p,
|
||||
vp8_sub_mv_ref_encoding_array - LEFT4X4 + m);
|
||||
vp8_sub_mv_ref_encoding_array + (m - LEFT4X4));
|
||||
}
|
||||
|
||||
static void write_mv
|
||||
@@ -577,7 +577,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
|
||||
*/
|
||||
xd->mb_to_left_edge = -((mb_col * 16) << 3);
|
||||
xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
|
||||
xd->mb_to_top_edge = -((mb_row * 16)) << 3;
|
||||
xd->mb_to_top_edge = -((mb_row * 16) << 3);
|
||||
xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
|
||||
|
||||
#ifdef VP8_ENTROPY_STATS
|
||||
|
||||
@@ -20,10 +20,10 @@ void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ((ip[0] + ip[3])<<3);
|
||||
b1 = ((ip[1] + ip[2])<<3);
|
||||
c1 = ((ip[1] - ip[2])<<3);
|
||||
d1 = ((ip[0] - ip[3])<<3);
|
||||
a1 = ((ip[0] + ip[3]) * 8);
|
||||
b1 = ((ip[1] + ip[2]) * 8);
|
||||
c1 = ((ip[1] - ip[2]) * 8);
|
||||
d1 = ((ip[0] - ip[3]) * 8);
|
||||
|
||||
op[0] = a1 + b1;
|
||||
op[2] = a1 - b1;
|
||||
@@ -72,10 +72,10 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ((ip[0] + ip[2])<<2);
|
||||
d1 = ((ip[1] + ip[3])<<2);
|
||||
c1 = ((ip[1] - ip[3])<<2);
|
||||
b1 = ((ip[0] - ip[2])<<2);
|
||||
a1 = ((ip[0] + ip[2]) * 4);
|
||||
d1 = ((ip[1] + ip[3]) * 4);
|
||||
c1 = ((ip[1] - ip[3]) * 4);
|
||||
b1 = ((ip[0] - ip[2]) * 4);
|
||||
|
||||
op[0] = a1 + d1 + (a1!=0);
|
||||
op[1] = b1 + c1;
|
||||
|
||||
@@ -711,8 +711,8 @@ skip_motion_search:
|
||||
neutral_count++;
|
||||
}
|
||||
|
||||
d->bmi.mv.as_mv.row <<= 3;
|
||||
d->bmi.mv.as_mv.col <<= 3;
|
||||
d->bmi.mv.as_mv.row *= 8;
|
||||
d->bmi.mv.as_mv.col *= 8;
|
||||
this_error = motion_error;
|
||||
vp8_set_mbmode_and_mvs(x, NEWMV, &d->bmi.mv);
|
||||
vp8_encode_inter16x16y(x);
|
||||
@@ -909,13 +909,16 @@ extern const int vp8_bits_per_mb[2][QINDEX_RANGE];
|
||||
|
||||
static double bitcost( double prob )
|
||||
{
|
||||
return -(log( prob ) / log( 2.0 ));
|
||||
if (prob > 0.000122)
|
||||
return -log(prob) / log(2.0);
|
||||
else
|
||||
return 13.0;
|
||||
}
|
||||
static int64_t estimate_modemvcost(VP8_COMP *cpi,
|
||||
FIRSTPASS_STATS * fpstats)
|
||||
{
|
||||
int mv_cost;
|
||||
int mode_cost;
|
||||
int64_t mode_cost;
|
||||
|
||||
double av_pct_inter = fpstats->pcnt_inter / fpstats->count;
|
||||
double av_pct_motion = fpstats->pcnt_motion / fpstats->count;
|
||||
@@ -937,10 +940,9 @@ static int64_t estimate_modemvcost(VP8_COMP *cpi,
|
||||
/* Crude estimate of overhead cost from modes
|
||||
* << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb
|
||||
*/
|
||||
mode_cost =
|
||||
(int)( ( ((av_pct_inter - av_pct_motion) * zz_cost) +
|
||||
(av_pct_motion * motion_cost) +
|
||||
(av_intra * intra_cost) ) * cpi->common.MBs ) << 9;
|
||||
mode_cost =((((av_pct_inter - av_pct_motion) * zz_cost) +
|
||||
(av_pct_motion * motion_cost) +
|
||||
(av_intra * intra_cost)) * cpi->common.MBs) * 512;
|
||||
|
||||
return mv_cost + mode_cost;
|
||||
}
|
||||
|
||||
@@ -210,7 +210,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
unsigned char *z = (*(b->base_src) + b->src);
|
||||
|
||||
int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
|
||||
int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2;
|
||||
int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
|
||||
int tr = br, tc = bc;
|
||||
unsigned int besterr;
|
||||
unsigned int left, right, up, down, diag;
|
||||
@@ -220,10 +220,14 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
unsigned int quarteriters = 4;
|
||||
int thismse;
|
||||
|
||||
int minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
|
||||
int maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
|
||||
int minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
|
||||
int maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
|
||||
int minc = MAX(x->mv_col_min * 4,
|
||||
(ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
|
||||
int maxc = MIN(x->mv_col_max * 4,
|
||||
(ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
|
||||
int minr = MAX(x->mv_row_min * 4,
|
||||
(ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
|
||||
int maxr = MIN(x->mv_row_max * 4,
|
||||
(ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
|
||||
|
||||
int y_stride;
|
||||
int offset;
|
||||
@@ -254,8 +258,8 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
|
||||
|
||||
/* central mv */
|
||||
bestmv->as_mv.row <<= 3;
|
||||
bestmv->as_mv.col <<= 3;
|
||||
bestmv->as_mv.row *= 8;
|
||||
bestmv->as_mv.col *= 8;
|
||||
|
||||
/* calculate central point error */
|
||||
besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
|
||||
@@ -337,8 +341,8 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
tc = bc;
|
||||
}
|
||||
|
||||
bestmv->as_mv.row = br << 1;
|
||||
bestmv->as_mv.col = bc << 1;
|
||||
bestmv->as_mv.row = br * 2;
|
||||
bestmv->as_mv.col = bc * 2;
|
||||
|
||||
if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
|
||||
(abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
|
||||
@@ -699,8 +703,8 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
#endif
|
||||
|
||||
/* central mv */
|
||||
bestmv->as_mv.row <<= 3;
|
||||
bestmv->as_mv.col <<= 3;
|
||||
bestmv->as_mv.row *= 8;
|
||||
bestmv->as_mv.col *= 8;
|
||||
startmv = *bestmv;
|
||||
|
||||
/* calculate central point error */
|
||||
@@ -1315,8 +1319,8 @@ int vp8_diamond_search_sadx4
|
||||
(*num00)++;
|
||||
}
|
||||
|
||||
this_mv.as_mv.row = best_mv->as_mv.row << 3;
|
||||
this_mv.as_mv.col = best_mv->as_mv.col << 3;
|
||||
this_mv.as_mv.row = best_mv->as_mv.row * 8;
|
||||
this_mv.as_mv.col = best_mv->as_mv.col * 8;
|
||||
|
||||
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
|
||||
+ mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
|
||||
@@ -1709,8 +1713,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
}
|
||||
}
|
||||
|
||||
this_mv.as_mv.row = best_mv->as_mv.row << 3;
|
||||
this_mv.as_mv.col = best_mv->as_mv.col << 3;
|
||||
this_mv.as_mv.row = best_mv->as_mv.row * 8;
|
||||
this_mv.as_mv.col = best_mv->as_mv.col * 8;
|
||||
|
||||
return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
|
||||
+ mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
|
||||
@@ -1905,8 +1909,8 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
}
|
||||
}
|
||||
|
||||
this_mv.as_mv.row = ref_mv->as_mv.row << 3;
|
||||
this_mv.as_mv.col = ref_mv->as_mv.col << 3;
|
||||
this_mv.as_mv.row = ref_mv->as_mv.row * 8;
|
||||
this_mv.as_mv.col = ref_mv->as_mv.col * 8;
|
||||
|
||||
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
|
||||
+ mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
|
||||
|
||||
@@ -313,7 +313,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
/* Get baseline error score */
|
||||
|
||||
/* Copy the unfiltered / processed recon buffer to the new buffer */
|
||||
vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
|
||||
vp8cx_set_alt_lf_level(cpi, filt_mid);
|
||||
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_mid);
|
||||
@@ -339,7 +339,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
if(ss_err[filt_low] == 0)
|
||||
{
|
||||
/* Get Low filter error score */
|
||||
vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
vp8cx_set_alt_lf_level(cpi, filt_low);
|
||||
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low);
|
||||
|
||||
@@ -367,7 +367,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
{
|
||||
if(ss_err[filt_high] == 0)
|
||||
{
|
||||
vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
vp8cx_set_alt_lf_level(cpi, filt_high);
|
||||
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high);
|
||||
|
||||
|
||||
@@ -935,7 +935,7 @@ int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
|
||||
assert(NEARESTMV <= m && m <= SPLITMV);
|
||||
vp8_mv_ref_probs(p, near_mv_ref_ct);
|
||||
return vp8_cost_token(vp8_mv_ref_tree, p,
|
||||
vp8_mv_ref_encoding_array - NEARESTMV + m);
|
||||
vp8_mv_ref_encoding_array + (m - NEARESTMV));
|
||||
}
|
||||
|
||||
void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv)
|
||||
|
||||
@@ -153,7 +153,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
|
||||
#else
|
||||
RANGE_CHECK_HI(cfg, g_lag_in_frames, 25);
|
||||
#endif
|
||||
RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CQ);
|
||||
RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_Q);
|
||||
RANGE_CHECK_HI(cfg, rc_undershoot_pct, 1000);
|
||||
RANGE_CHECK_HI(cfg, rc_overshoot_pct, 1000);
|
||||
RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);
|
||||
@@ -204,7 +204,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
|
||||
RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6);
|
||||
RANGE_CHECK(vp8_cfg, arnr_type, 1, 3);
|
||||
RANGE_CHECK(vp8_cfg, cq_level, 0, 63);
|
||||
if(finalize && cfg->rc_end_usage == VPX_CQ)
|
||||
if (finalize && (cfg->rc_end_usage == VPX_CQ || cfg->rc_end_usage == VPX_Q))
|
||||
RANGE_CHECK(vp8_cfg, cq_level,
|
||||
cfg->rc_min_quantizer, cfg->rc_max_quantizer);
|
||||
|
||||
@@ -327,17 +327,14 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
|
||||
oxcf->resample_up_water_mark = cfg.rc_resize_up_thresh;
|
||||
oxcf->resample_down_water_mark = cfg.rc_resize_down_thresh;
|
||||
|
||||
if (cfg.rc_end_usage == VPX_VBR)
|
||||
{
|
||||
oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;
|
||||
}
|
||||
else if (cfg.rc_end_usage == VPX_CBR)
|
||||
{
|
||||
oxcf->end_usage = USAGE_STREAM_FROM_SERVER;
|
||||
}
|
||||
else if (cfg.rc_end_usage == VPX_CQ)
|
||||
{
|
||||
oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
|
||||
if (cfg.rc_end_usage == VPX_VBR) {
|
||||
oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;
|
||||
} else if (cfg.rc_end_usage == VPX_CBR) {
|
||||
oxcf->end_usage = USAGE_STREAM_FROM_SERVER;
|
||||
} else if (cfg.rc_end_usage == VPX_CQ) {
|
||||
oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
|
||||
} else if (cfg.rc_end_usage == VPX_Q) {
|
||||
oxcf->end_usage = USAGE_CONSTANT_QUALITY;
|
||||
}
|
||||
|
||||
oxcf->target_bandwidth = cfg.rc_target_bitrate;
|
||||
@@ -1272,7 +1269,7 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
|
||||
1, /* g_delete_first_pass_file */
|
||||
"vp8.fpf" /* first pass filename */
|
||||
#endif
|
||||
|
||||
VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */
|
||||
1, /* ts_number_layers */
|
||||
{0}, /* ts_target_bitrate */
|
||||
{0}, /* ts_rate_decimator */
|
||||
|
||||
116
vp9/common/arm/neon/vp9_avg_neon.asm
Normal file
116
vp9/common/arm/neon/vp9_avg_neon.asm
Normal file
@@ -0,0 +1,116 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_convolve_avg_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
|vp9_convolve_avg_neon| PROC
|
||||
push {r4-r6, lr}
|
||||
ldrd r4, r5, [sp, #32]
|
||||
mov r6, r2
|
||||
|
||||
cmp r4, #32
|
||||
bgt avg64
|
||||
beq avg32
|
||||
cmp r4, #8
|
||||
bgt avg16
|
||||
beq avg8
|
||||
b avg4
|
||||
|
||||
avg64
|
||||
sub lr, r1, #32
|
||||
sub r4, r3, #32
|
||||
avg64_h
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q0-q1}, [r0]!
|
||||
vld1.8 {q2-q3}, [r0], lr
|
||||
pld [r2, r3]
|
||||
vld1.8 {q8-q9}, [r6@128]!
|
||||
vld1.8 {q10-q11}, [r6@128], r4
|
||||
vrhadd.u8 q0, q0, q8
|
||||
vrhadd.u8 q1, q1, q9
|
||||
vrhadd.u8 q2, q2, q10
|
||||
vrhadd.u8 q3, q3, q11
|
||||
vst1.8 {q0-q1}, [r2@128]!
|
||||
vst1.8 {q2-q3}, [r2@128], r4
|
||||
subs r5, r5, #1
|
||||
bgt avg64_h
|
||||
pop {r4-r6, pc}
|
||||
|
||||
avg32
|
||||
vld1.8 {q0-q1}, [r0], r1
|
||||
vld1.8 {q2-q3}, [r0], r1
|
||||
vld1.8 {q8-q9}, [r6@128], r3
|
||||
vld1.8 {q10-q11}, [r6@128], r3
|
||||
pld [r0]
|
||||
vrhadd.u8 q0, q0, q8
|
||||
pld [r0, r1]
|
||||
vrhadd.u8 q1, q1, q9
|
||||
pld [r6]
|
||||
vrhadd.u8 q2, q2, q10
|
||||
pld [r6, r3]
|
||||
vrhadd.u8 q3, q3, q11
|
||||
vst1.8 {q0-q1}, [r2@128], r3
|
||||
vst1.8 {q2-q3}, [r2@128], r3
|
||||
subs r5, r5, #2
|
||||
bgt avg32
|
||||
pop {r4-r6, pc}
|
||||
|
||||
avg16
|
||||
vld1.8 {q0}, [r0], r1
|
||||
vld1.8 {q1}, [r0], r1
|
||||
vld1.8 {q2}, [r6@128], r3
|
||||
vld1.8 {q3}, [r6@128], r3
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
vrhadd.u8 q0, q0, q2
|
||||
pld [r6]
|
||||
pld [r6, r3]
|
||||
vrhadd.u8 q1, q1, q3
|
||||
vst1.8 {q0}, [r2@128], r3
|
||||
vst1.8 {q1}, [r2@128], r3
|
||||
subs r5, r5, #2
|
||||
bgt avg16
|
||||
pop {r4-r6, pc}
|
||||
|
||||
avg8
|
||||
vld1.8 {d0}, [r0], r1
|
||||
vld1.8 {d1}, [r0], r1
|
||||
vld1.8 {d2}, [r6@64], r3
|
||||
vld1.8 {d3}, [r6@64], r3
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
vrhadd.u8 q0, q0, q1
|
||||
pld [r6]
|
||||
pld [r6, r3]
|
||||
vst1.8 {d0}, [r2@64], r3
|
||||
vst1.8 {d1}, [r2@64], r3
|
||||
subs r5, r5, #2
|
||||
bgt avg8
|
||||
pop {r4-r6, pc}
|
||||
|
||||
avg4
|
||||
vld1.32 {d0[0]}, [r0], r1
|
||||
vld1.32 {d0[1]}, [r0], r1
|
||||
vld1.32 {d2[0]}, [r6@32], r3
|
||||
vld1.32 {d2[1]}, [r6@32], r3
|
||||
vrhadd.u8 d0, d0, d2
|
||||
vst1.32 {d0[0]}, [r2@32], r3
|
||||
vst1.32 {d0[1]}, [r2@32], r3
|
||||
subs r5, r5, #2
|
||||
bgt avg4
|
||||
pop {r4-r6, pc}
|
||||
ENDP
|
||||
|
||||
END
|
||||
@@ -66,46 +66,64 @@
|
||||
|
||||
vld1.s16 {q0}, [r5] ; filter_x
|
||||
|
||||
add r8, r1, r1, lsl #1 ; src_stride * 3
|
||||
add r8, r8, #4 ; src_stride * 3 + 4
|
||||
rsb r8, r8, #0 ; reset for src
|
||||
sub r8, r1, r1, lsl #2 ; -src_stride * 3
|
||||
add r8, r8, #4 ; -src_stride * 3 + 4
|
||||
|
||||
add r4, r3, r3, lsl #1 ; dst_stride * 3
|
||||
sub r4, r4, #4 ; dst_stride * 3 - 4
|
||||
rsb r4, r4, #0 ; reset for dst
|
||||
sub r4, r3, r3, lsl #2 ; -dst_stride * 3
|
||||
add r4, r4, #4 ; -dst_stride * 3 + 4
|
||||
|
||||
sub r9, r1, #8 ; post increment for src load
|
||||
|
||||
rsb r1, r6, r1, lsl #2 ; reset src for outer loop
|
||||
rsb r9, r6, r1, lsl #2 ; reset src for outer loop
|
||||
sub r9, r9, #7
|
||||
rsb r12, r6, r3, lsl #2 ; reset dst for outer loop
|
||||
|
||||
mov r10, r6 ; w loop counter
|
||||
|
||||
loop_horiz
|
||||
vld1.8 {d24}, [r0]!
|
||||
vld3.u8 {d28[0], d29[0], d30[0]}, [r0], r9
|
||||
|
||||
vld1.8 {d25}, [r0]!
|
||||
vld3.u8 {d28[1], d29[1], d30[1]}, [r0], r9
|
||||
|
||||
vld1.8 {d26}, [r0]!
|
||||
vld3.u8 {d28[2], d29[2], d30[2]}, [r0], r9
|
||||
|
||||
vld1.8 {d27}, [r0]!
|
||||
vld3.u8 {d28[3], d29[3], d30[3]}, [r0], r8
|
||||
loop_horiz_v
|
||||
vld1.8 {d24}, [r0], r1
|
||||
vld1.8 {d25}, [r0], r1
|
||||
vld1.8 {d26}, [r0], r1
|
||||
vld1.8 {d27}, [r0], r8
|
||||
|
||||
vtrn.16 q12, q13
|
||||
vtrn.8 d24, d25
|
||||
vtrn.8 d26, d27
|
||||
|
||||
; extract to s16
|
||||
pld [r0, r1, lsl #2]
|
||||
|
||||
vmovl.u8 q8, d24
|
||||
vmovl.u8 q9, d25
|
||||
vmovl.u8 q10, d26
|
||||
vmovl.u8 q11, d27
|
||||
vtrn.32 d28, d29 ; only the first half is populated
|
||||
|
||||
; save a few instructions in the inner loop
|
||||
vswp d17, d18
|
||||
vmov d23, d21
|
||||
|
||||
add r0, r0, #3
|
||||
|
||||
loop_horiz
|
||||
add r5, r0, #64
|
||||
|
||||
vld1.32 {d28[]}, [r0], r1
|
||||
vld1.32 {d29[]}, [r0], r1
|
||||
vld1.32 {d31[]}, [r0], r1
|
||||
vld1.32 {d30[]}, [r0], r8
|
||||
|
||||
pld [r5]
|
||||
|
||||
vtrn.16 d28, d31
|
||||
vtrn.16 d29, d30
|
||||
vtrn.8 d28, d29
|
||||
vtrn.8 d31, d30
|
||||
|
||||
pld [r5, r1]
|
||||
|
||||
; extract to s16
|
||||
vtrn.32 q14, q15
|
||||
vmovl.u8 q12, d28
|
||||
vmovl.u8 q13, d30
|
||||
vmovl.u8 q13, d29
|
||||
|
||||
pld [r5, r1, lsl #1]
|
||||
|
||||
; slightly out of order load to match the existing data
|
||||
vld1.u32 {d6[0]}, [r2], r3
|
||||
@@ -116,10 +134,12 @@ loop_horiz
|
||||
sub r2, r2, r3, lsl #2 ; reset for store
|
||||
|
||||
; src[] * filter_x
|
||||
MULTIPLY_BY_Q0 q1, d16, d18, d20, d22, d17, d19, d21, d23
|
||||
MULTIPLY_BY_Q0 q2, d18, d20, d22, d17, d19, d21, d23, d24
|
||||
MULTIPLY_BY_Q0 q14, d20, d22, d17, d19, d21, d23, d24, d25
|
||||
MULTIPLY_BY_Q0 q15, d22, d17, d19, d21, d23, d24, d25, d26
|
||||
MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24
|
||||
MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26
|
||||
MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27
|
||||
MULTIPLY_BY_Q0 q15, d22, d18, d19, d23, d24, d26, d27, d25
|
||||
|
||||
pld [r5, -r8]
|
||||
|
||||
; += 64 >> 7
|
||||
vqrshrun.s32 d2, q1, #7
|
||||
@@ -139,20 +159,25 @@ loop_horiz
|
||||
; average the new value and the dst value
|
||||
vrhadd.u8 q1, q1, q3
|
||||
|
||||
vst1.u32 {d2[0]}, [r2], r3
|
||||
vst1.u32 {d3[0]}, [r2], r3
|
||||
vst1.u32 {d2[1]}, [r2], r3
|
||||
vst1.u32 {d3[1]}, [r2], r4
|
||||
vst1.u32 {d2[0]}, [r2@32], r3
|
||||
vst1.u32 {d3[0]}, [r2@32], r3
|
||||
vst1.u32 {d2[1]}, [r2@32], r3
|
||||
vst1.u32 {d3[1]}, [r2@32], r4
|
||||
|
||||
vmov q8, q9
|
||||
vmov d20, d23
|
||||
vmov q11, q12
|
||||
vmov q9, q13
|
||||
|
||||
subs r6, r6, #4 ; w -= 4
|
||||
bgt loop_horiz
|
||||
|
||||
; outer loop
|
||||
mov r6, r10 ; restore w counter
|
||||
add r0, r0, r1 ; src += src_stride * 4 - w
|
||||
add r0, r0, r9 ; src += src_stride * 4 - w
|
||||
add r2, r2, r12 ; dst += dst_stride * 4 - w
|
||||
subs r7, r7, #4 ; h -= 4
|
||||
bgt loop_horiz
|
||||
bgt loop_horiz_v
|
||||
|
||||
pop {r4-r10, pc}
|
||||
|
||||
@@ -163,66 +188,77 @@ loop_horiz
|
||||
cmp r12, #16
|
||||
bne vp9_convolve8_avg_vert_c
|
||||
|
||||
push {r4-r10, lr}
|
||||
push {r4-r8, lr}
|
||||
|
||||
; adjust for taps
|
||||
sub r0, r0, r1
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
ldr r7, [sp, #40] ; filter_y
|
||||
ldr r8, [sp, #48] ; w
|
||||
ldr r9, [sp, #52] ; h
|
||||
ldr r4, [sp, #32] ; filter_y
|
||||
ldr r6, [sp, #40] ; w
|
||||
ldr lr, [sp, #44] ; h
|
||||
|
||||
vld1.s16 {q0}, [r7] ; filter_y
|
||||
vld1.s16 {q0}, [r4] ; filter_y
|
||||
|
||||
mov r5, r1, lsl #1 ; src_stride * 2
|
||||
add r5, r5, r1, lsl #3 ; src_stride * 10
|
||||
sub r5, r5, #4 ; src_stride * 10 + 4
|
||||
rsb r5, r5, #0 ; reset for src
|
||||
lsl r1, r1, #1
|
||||
lsl r3, r3, #1
|
||||
|
||||
add r6, r3, r3, lsl #1 ; dst_stride * 3
|
||||
sub r6, r6, #4 ; dst_stride * 3 - 4
|
||||
rsb r6, r6, #0 ; reset for dst
|
||||
loop_vert_h
|
||||
mov r4, r0
|
||||
add r7, r0, r1, asr #1
|
||||
mov r5, r2
|
||||
add r8, r2, r3, asr #1
|
||||
mov r12, lr ; h loop counter
|
||||
|
||||
rsb r7, r8, r1, lsl #2 ; reset src for outer loop
|
||||
rsb r12, r8, r3, lsl #2 ; reset dst for outer loop
|
||||
vld1.u32 {d16[0]}, [r4], r1
|
||||
vld1.u32 {d16[1]}, [r7], r1
|
||||
vld1.u32 {d18[0]}, [r4], r1
|
||||
vld1.u32 {d18[1]}, [r7], r1
|
||||
vld1.u32 {d20[0]}, [r4], r1
|
||||
vld1.u32 {d20[1]}, [r7], r1
|
||||
vld1.u32 {d22[0]}, [r4], r1
|
||||
|
||||
mov r10, r8 ; w loop counter
|
||||
|
||||
loop_vert
|
||||
; always process a 4x4 block at a time
|
||||
vld1.u32 {d16[0]}, [r0], r1
|
||||
vld1.u32 {d16[1]}, [r0], r1
|
||||
vld1.u32 {d18[0]}, [r0], r1
|
||||
vld1.u32 {d18[1]}, [r0], r1
|
||||
vld1.u32 {d20[0]}, [r0], r1
|
||||
vld1.u32 {d20[1]}, [r0], r1
|
||||
vld1.u32 {d22[0]}, [r0], r1
|
||||
vld1.u32 {d22[1]}, [r0], r1
|
||||
vld1.u32 {d24[0]}, [r0], r1
|
||||
vld1.u32 {d24[1]}, [r0], r1
|
||||
vld1.u32 {d26[0]}, [r0], r5
|
||||
|
||||
; extract to s16
|
||||
vmovl.u8 q8, d16
|
||||
vmovl.u8 q9, d18
|
||||
vmovl.u8 q10, d20
|
||||
vmovl.u8 q11, d22
|
||||
|
||||
loop_vert
|
||||
; always process a 4x4 block at a time
|
||||
vld1.u32 {d24[0]}, [r7], r1
|
||||
vld1.u32 {d26[0]}, [r4], r1
|
||||
vld1.u32 {d26[1]}, [r7], r1
|
||||
vld1.u32 {d24[1]}, [r4], r1
|
||||
|
||||
; extract to s16
|
||||
vmovl.u8 q12, d24
|
||||
vmovl.u8 q13, d26
|
||||
|
||||
vld1.u32 {d6[0]}, [r2], r3
|
||||
vld1.u32 {d6[1]}, [r2], r3
|
||||
vld1.u32 {d7[0]}, [r2], r3
|
||||
vld1.u32 {d7[1]}, [r2], r3
|
||||
vld1.u32 {d6[0]}, [r5@32], r3
|
||||
vld1.u32 {d6[1]}, [r8@32], r3
|
||||
vld1.u32 {d7[0]}, [r5@32], r3
|
||||
vld1.u32 {d7[1]}, [r8@32], r3
|
||||
|
||||
sub r2, r2, r3, lsl #2 ; reset for store
|
||||
pld [r7]
|
||||
pld [r4]
|
||||
|
||||
; src[] * filter_y
|
||||
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d23
|
||||
MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d23, d24
|
||||
MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d23, d24, d25
|
||||
MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d23, d24, d25, d26
|
||||
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24
|
||||
|
||||
pld [r7, r1]
|
||||
pld [r4, r1]
|
||||
|
||||
MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d24, d26
|
||||
|
||||
pld [r5]
|
||||
pld [r8]
|
||||
|
||||
MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d24, d26, d27
|
||||
|
||||
pld [r5, r3]
|
||||
pld [r8, r3]
|
||||
|
||||
MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d24, d26, d27, d25
|
||||
|
||||
; += 64 >> 7
|
||||
vqrshrun.s32 d2, q1, #7
|
||||
@@ -237,22 +273,30 @@ loop_vert
|
||||
; average the new value and the dst value
|
||||
vrhadd.u8 q1, q1, q3
|
||||
|
||||
vst1.u32 {d2[0]}, [r2], r3
|
||||
vst1.u32 {d2[1]}, [r2], r3
|
||||
vst1.u32 {d3[0]}, [r2], r3
|
||||
vst1.u32 {d3[1]}, [r2], r6
|
||||
sub r5, r5, r3, lsl #1 ; reset for store
|
||||
sub r8, r8, r3, lsl #1
|
||||
|
||||
subs r8, r8, #4 ; w -= 4
|
||||
vst1.u32 {d2[0]}, [r5@32], r3
|
||||
vst1.u32 {d2[1]}, [r8@32], r3
|
||||
vst1.u32 {d3[0]}, [r5@32], r3
|
||||
vst1.u32 {d3[1]}, [r8@32], r3
|
||||
|
||||
vmov q8, q10
|
||||
vmov d18, d22
|
||||
vmov d19, d24
|
||||
vmov q10, q13
|
||||
vmov d22, d25
|
||||
|
||||
subs r12, r12, #4 ; h -= 4
|
||||
bgt loop_vert
|
||||
|
||||
; outer loop
|
||||
mov r8, r10 ; restore w counter
|
||||
add r0, r0, r7 ; src += 4 * src_stride - w
|
||||
add r2, r2, r12 ; dst += 4 * dst_stride - w
|
||||
subs r9, r9, #4 ; h -= 4
|
||||
bgt loop_vert
|
||||
add r0, r0, #4
|
||||
add r2, r2, #4
|
||||
subs r6, r6, #4 ; w -= 4
|
||||
bgt loop_vert_h
|
||||
|
||||
pop {r4-r10, pc}
|
||||
pop {r4-r8, pc}
|
||||
|
||||
ENDP
|
||||
END
|
||||
|
||||
@@ -66,52 +66,72 @@
|
||||
|
||||
vld1.s16 {q0}, [r5] ; filter_x
|
||||
|
||||
add r8, r1, r1, lsl #1 ; src_stride * 3
|
||||
add r8, r8, #4 ; src_stride * 3 + 4
|
||||
rsb r8, r8, #0 ; reset for src
|
||||
sub r8, r1, r1, lsl #2 ; -src_stride * 3
|
||||
add r8, r8, #4 ; -src_stride * 3 + 4
|
||||
|
||||
add r4, r3, r3, lsl #1 ; dst_stride * 3
|
||||
sub r4, r4, #4 ; dst_stride * 3 - 4
|
||||
rsb r4, r4, #0 ; reset for dst
|
||||
sub r4, r3, r3, lsl #2 ; -dst_stride * 3
|
||||
add r4, r4, #4 ; -dst_stride * 3 + 4
|
||||
|
||||
sub r9, r1, #8 ; post increment for src load
|
||||
|
||||
rsb r1, r6, r1, lsl #2 ; reset src for outer loop
|
||||
rsb r9, r6, r1, lsl #2 ; reset src for outer loop
|
||||
sub r9, r9, #7
|
||||
rsb r12, r6, r3, lsl #2 ; reset dst for outer loop
|
||||
|
||||
mov r10, r6 ; w loop counter
|
||||
|
||||
loop_horiz
|
||||
vld1.8 {d24}, [r0]!
|
||||
vld3.u8 {d28[0], d29[0], d30[0]}, [r0], r9
|
||||
|
||||
vld1.8 {d25}, [r0]!
|
||||
vld3.u8 {d28[1], d29[1], d30[1]}, [r0], r9
|
||||
|
||||
vld1.8 {d26}, [r0]!
|
||||
vld3.u8 {d28[2], d29[2], d30[2]}, [r0], r9
|
||||
|
||||
vld1.8 {d27}, [r0]!
|
||||
vld3.u8 {d28[3], d29[3], d30[3]}, [r0], r8
|
||||
loop_horiz_v
|
||||
vld1.8 {d24}, [r0], r1
|
||||
vld1.8 {d25}, [r0], r1
|
||||
vld1.8 {d26}, [r0], r1
|
||||
vld1.8 {d27}, [r0], r8
|
||||
|
||||
vtrn.16 q12, q13
|
||||
vtrn.8 d24, d25
|
||||
vtrn.8 d26, d27
|
||||
|
||||
; extract to s16
|
||||
pld [r0, r1, lsl #2]
|
||||
|
||||
vmovl.u8 q8, d24
|
||||
vmovl.u8 q9, d25
|
||||
vmovl.u8 q10, d26
|
||||
vmovl.u8 q11, d27
|
||||
vtrn.32 d28, d29 ; only the first half is populated
|
||||
|
||||
; save a few instructions in the inner loop
|
||||
vswp d17, d18
|
||||
vmov d23, d21
|
||||
|
||||
add r0, r0, #3
|
||||
|
||||
loop_horiz
|
||||
add r5, r0, #64
|
||||
|
||||
vld1.32 {d28[]}, [r0], r1
|
||||
vld1.32 {d29[]}, [r0], r1
|
||||
vld1.32 {d31[]}, [r0], r1
|
||||
vld1.32 {d30[]}, [r0], r8
|
||||
|
||||
pld [r5]
|
||||
|
||||
vtrn.16 d28, d31
|
||||
vtrn.16 d29, d30
|
||||
vtrn.8 d28, d29
|
||||
vtrn.8 d31, d30
|
||||
|
||||
pld [r5, r1]
|
||||
|
||||
; extract to s16
|
||||
vtrn.32 q14, q15
|
||||
vmovl.u8 q12, d28
|
||||
vmovl.u8 q13, d30
|
||||
vmovl.u8 q13, d29
|
||||
|
||||
pld [r5, r1, lsl #1]
|
||||
|
||||
; src[] * filter_x
|
||||
MULTIPLY_BY_Q0 q1, d16, d18, d20, d22, d17, d19, d21, d23
|
||||
MULTIPLY_BY_Q0 q2, d18, d20, d22, d17, d19, d21, d23, d24
|
||||
MULTIPLY_BY_Q0 q14, d20, d22, d17, d19, d21, d23, d24, d25
|
||||
MULTIPLY_BY_Q0 q15, d22, d17, d19, d21, d23, d24, d25, d26
|
||||
MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24
|
||||
MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26
|
||||
MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27
|
||||
MULTIPLY_BY_Q0 q15, d22, d18, d19, d23, d24, d26, d27, d25
|
||||
|
||||
pld [r5, -r8]
|
||||
|
||||
; += 64 >> 7
|
||||
vqrshrun.s32 d2, q1, #7
|
||||
@@ -128,20 +148,25 @@ loop_horiz
|
||||
vtrn.32 d2, d3
|
||||
vtrn.8 d2, d3
|
||||
|
||||
vst1.u32 {d2[0]}, [r2], r3
|
||||
vst1.u32 {d3[0]}, [r2], r3
|
||||
vst1.u32 {d2[1]}, [r2], r3
|
||||
vst1.u32 {d3[1]}, [r2], r4
|
||||
vst1.u32 {d2[0]}, [r2@32], r3
|
||||
vst1.u32 {d3[0]}, [r2@32], r3
|
||||
vst1.u32 {d2[1]}, [r2@32], r3
|
||||
vst1.u32 {d3[1]}, [r2@32], r4
|
||||
|
||||
vmov q8, q9
|
||||
vmov d20, d23
|
||||
vmov q11, q12
|
||||
vmov q9, q13
|
||||
|
||||
subs r6, r6, #4 ; w -= 4
|
||||
bgt loop_horiz
|
||||
|
||||
; outer loop
|
||||
mov r6, r10 ; restore w counter
|
||||
add r0, r0, r1 ; src += src_stride * 4 - w
|
||||
add r0, r0, r9 ; src += src_stride * 4 - w
|
||||
add r2, r2, r12 ; dst += dst_stride * 4 - w
|
||||
subs r7, r7, #4 ; h -= 4
|
||||
bgt loop_horiz
|
||||
bgt loop_horiz_v
|
||||
|
||||
pop {r4-r10, pc}
|
||||
|
||||
@@ -152,59 +177,72 @@ loop_horiz
|
||||
cmp r12, #16
|
||||
bne vp9_convolve8_vert_c
|
||||
|
||||
push {r4-r10, lr}
|
||||
push {r4-r8, lr}
|
||||
|
||||
; adjust for taps
|
||||
sub r0, r0, r1
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
ldr r7, [sp, #40] ; filter_y
|
||||
ldr r8, [sp, #48] ; w
|
||||
ldr r9, [sp, #52] ; h
|
||||
ldr r4, [sp, #32] ; filter_y
|
||||
ldr r6, [sp, #40] ; w
|
||||
ldr lr, [sp, #44] ; h
|
||||
|
||||
vld1.s16 {q0}, [r7] ; filter_y
|
||||
vld1.s16 {q0}, [r4] ; filter_y
|
||||
|
||||
mov r5, r1, lsl #1 ; src_stride * 2
|
||||
add r5, r5, r1, lsl #3 ; src_stride * 10
|
||||
sub r5, r5, #4 ; src_stride * 10 + 4
|
||||
rsb r5, r5, #0 ; reset for src
|
||||
lsl r1, r1, #1
|
||||
lsl r3, r3, #1
|
||||
|
||||
add r6, r3, r3, lsl #1 ; dst_stride * 3
|
||||
sub r6, r6, #4 ; dst_stride * 3 - 4
|
||||
rsb r6, r6, #0 ; reset for dst
|
||||
loop_vert_h
|
||||
mov r4, r0
|
||||
add r7, r0, r1, asr #1
|
||||
mov r5, r2
|
||||
add r8, r2, r3, asr #1
|
||||
mov r12, lr ; h loop counter
|
||||
|
||||
rsb r7, r8, r1, lsl #2 ; reset src for outer loop
|
||||
rsb r12, r8, r3, lsl #2 ; reset dst for outer loop
|
||||
vld1.u32 {d16[0]}, [r4], r1
|
||||
vld1.u32 {d16[1]}, [r7], r1
|
||||
vld1.u32 {d18[0]}, [r4], r1
|
||||
vld1.u32 {d18[1]}, [r7], r1
|
||||
vld1.u32 {d20[0]}, [r4], r1
|
||||
vld1.u32 {d20[1]}, [r7], r1
|
||||
vld1.u32 {d22[0]}, [r4], r1
|
||||
|
||||
mov r10, r8 ; w loop counter
|
||||
|
||||
loop_vert
|
||||
; always process a 4x4 block at a time
|
||||
vld1.u32 {d16[0]}, [r0], r1
|
||||
vld1.u32 {d16[1]}, [r0], r1
|
||||
vld1.u32 {d18[0]}, [r0], r1
|
||||
vld1.u32 {d18[1]}, [r0], r1
|
||||
vld1.u32 {d20[0]}, [r0], r1
|
||||
vld1.u32 {d20[1]}, [r0], r1
|
||||
vld1.u32 {d22[0]}, [r0], r1
|
||||
vld1.u32 {d22[1]}, [r0], r1
|
||||
vld1.u32 {d24[0]}, [r0], r1
|
||||
vld1.u32 {d24[1]}, [r0], r1
|
||||
vld1.u32 {d26[0]}, [r0], r5
|
||||
|
||||
; extract to s16
|
||||
vmovl.u8 q8, d16
|
||||
vmovl.u8 q9, d18
|
||||
vmovl.u8 q10, d20
|
||||
vmovl.u8 q11, d22
|
||||
|
||||
loop_vert
|
||||
; always process a 4x4 block at a time
|
||||
vld1.u32 {d24[0]}, [r7], r1
|
||||
vld1.u32 {d26[0]}, [r4], r1
|
||||
vld1.u32 {d26[1]}, [r7], r1
|
||||
vld1.u32 {d24[1]}, [r4], r1
|
||||
|
||||
; extract to s16
|
||||
vmovl.u8 q12, d24
|
||||
vmovl.u8 q13, d26
|
||||
|
||||
pld [r5]
|
||||
pld [r8]
|
||||
|
||||
; src[] * filter_y
|
||||
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d23
|
||||
MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d23, d24
|
||||
MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d23, d24, d25
|
||||
MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d23, d24, d25, d26
|
||||
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24
|
||||
|
||||
pld [r5, r3]
|
||||
pld [r8, r3]
|
||||
|
||||
MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d24, d26
|
||||
|
||||
pld [r7]
|
||||
pld [r4]
|
||||
|
||||
MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d24, d26, d27
|
||||
|
||||
pld [r7, r1]
|
||||
pld [r4, r1]
|
||||
|
||||
MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d24, d26, d27, d25
|
||||
|
||||
; += 64 >> 7
|
||||
vqrshrun.s32 d2, q1, #7
|
||||
@@ -216,22 +254,27 @@ loop_vert
|
||||
vqmovn.u16 d2, q1
|
||||
vqmovn.u16 d3, q2
|
||||
|
||||
vst1.u32 {d2[0]}, [r2], r3
|
||||
vst1.u32 {d2[1]}, [r2], r3
|
||||
vst1.u32 {d3[0]}, [r2], r3
|
||||
vst1.u32 {d3[1]}, [r2], r6
|
||||
vst1.u32 {d2[0]}, [r5@32], r3
|
||||
vst1.u32 {d2[1]}, [r8@32], r3
|
||||
vst1.u32 {d3[0]}, [r5@32], r3
|
||||
vst1.u32 {d3[1]}, [r8@32], r3
|
||||
|
||||
subs r8, r8, #4 ; w -= 4
|
||||
vmov q8, q10
|
||||
vmov d18, d22
|
||||
vmov d19, d24
|
||||
vmov q10, q13
|
||||
vmov d22, d25
|
||||
|
||||
subs r12, r12, #4 ; h -= 4
|
||||
bgt loop_vert
|
||||
|
||||
; outer loop
|
||||
mov r8, r10 ; restore w counter
|
||||
add r0, r0, r7 ; src += 4 * src_stride - w
|
||||
add r2, r2, r12 ; dst += 4 * dst_stride - w
|
||||
subs r9, r9, #4 ; h -= 4
|
||||
bgt loop_vert
|
||||
add r0, r0, #4
|
||||
add r2, r2, #4
|
||||
subs r6, r6, #4 ; w -= 4
|
||||
bgt loop_vert_h
|
||||
|
||||
pop {r4-r10, pc}
|
||||
pop {r4-r8, pc}
|
||||
|
||||
ENDP
|
||||
END
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
@@ -19,7 +20,7 @@ void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||
/* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
|
||||
* maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
|
||||
*/
|
||||
uint8_t temp[64 * 72];
|
||||
DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72);
|
||||
|
||||
// Account for the vertical phase needing 3 lines prior and 4 lines post
|
||||
int intermediate_height = h + 7;
|
||||
@@ -53,7 +54,7 @@ void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h) {
|
||||
uint8_t temp[64 * 72];
|
||||
DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72);
|
||||
int intermediate_height = h + 7;
|
||||
|
||||
if (x_step_q4 != 16 || y_step_q4 != 16)
|
||||
|
||||
84
vp9/common/arm/neon/vp9_copy_neon.asm
Normal file
84
vp9/common/arm/neon/vp9_copy_neon.asm
Normal file
@@ -0,0 +1,84 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_convolve_copy_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
|vp9_convolve_copy_neon| PROC
|
||||
push {r4-r5, lr}
|
||||
ldrd r4, r5, [sp, #28]
|
||||
|
||||
cmp r4, #32
|
||||
bgt copy64
|
||||
beq copy32
|
||||
cmp r4, #8
|
||||
bgt copy16
|
||||
beq copy8
|
||||
b copy4
|
||||
|
||||
copy64
|
||||
sub lr, r1, #32
|
||||
sub r3, r3, #32
|
||||
copy64_h
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q0-q1}, [r0]!
|
||||
vld1.8 {q2-q3}, [r0], lr
|
||||
vst1.8 {q0-q1}, [r2@128]!
|
||||
vst1.8 {q2-q3}, [r2@128], r3
|
||||
subs r5, r5, #1
|
||||
bgt copy64_h
|
||||
pop {r4-r5, pc}
|
||||
|
||||
copy32
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q0-q1}, [r0], r1
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q2-q3}, [r0], r1
|
||||
vst1.8 {q0-q1}, [r2@128], r3
|
||||
vst1.8 {q2-q3}, [r2@128], r3
|
||||
subs r5, r5, #2
|
||||
bgt copy32
|
||||
pop {r4-r5, pc}
|
||||
|
||||
copy16
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q0}, [r0], r1
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q1}, [r0], r1
|
||||
vst1.8 {q0}, [r2@128], r3
|
||||
vst1.8 {q1}, [r2@128], r3
|
||||
subs r5, r5, #2
|
||||
bgt copy16
|
||||
pop {r4-r5, pc}
|
||||
|
||||
copy8
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {d0}, [r0], r1
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {d2}, [r0], r1
|
||||
vst1.8 {d0}, [r2@64], r3
|
||||
vst1.8 {d2}, [r2@64], r3
|
||||
subs r5, r5, #2
|
||||
bgt copy8
|
||||
pop {r4-r5, pc}
|
||||
|
||||
copy4
|
||||
ldr r12, [r0], r1
|
||||
str r12, [r2], r3
|
||||
subs r5, r5, #1
|
||||
bgt copy4
|
||||
pop {r4-r5, pc}
|
||||
ENDP
|
||||
|
||||
END
|
||||
172
vp9/common/arm/neon/vp9_idct16x16_neon.c
Normal file
172
vp9/common/arm/neon/vp9_idct16x16_neon.c
Normal file
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
|
||||
extern void vp9_short_idct16x16_add_neon_pass1(int16_t *input,
|
||||
int16_t *output,
|
||||
int output_stride);
|
||||
extern void vp9_short_idct16x16_add_neon_pass2(int16_t *src,
|
||||
int16_t *output,
|
||||
int16_t *pass1Output,
|
||||
int16_t skip_adding,
|
||||
uint8_t *dest,
|
||||
int dest_stride);
|
||||
extern void vp9_short_idct16x16_10_add_neon_pass1(int16_t *input,
|
||||
int16_t *output,
|
||||
int output_stride);
|
||||
extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
|
||||
int16_t *output,
|
||||
int16_t *pass1Output,
|
||||
int16_t skip_adding,
|
||||
uint8_t *dest,
|
||||
int dest_stride);
|
||||
|
||||
/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
|
||||
extern void vp9_push_neon(int64_t *store);
|
||||
extern void vp9_pop_neon(int64_t *store);
|
||||
|
||||
void vp9_short_idct16x16_add_neon(int16_t *input,
|
||||
uint8_t *dest, int dest_stride) {
|
||||
int64_t store_reg[8];
|
||||
int16_t pass1_output[16*16] = {0};
|
||||
int16_t row_idct_output[16*16] = {0};
|
||||
|
||||
// save d8-d15 register values.
|
||||
vp9_push_neon(store_reg);
|
||||
|
||||
/* Parallel idct on the upper 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(input, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7
|
||||
// which will be saved into row_idct_output.
|
||||
vp9_short_idct16x16_add_neon_pass2(input+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
0,
|
||||
dest,
|
||||
dest_stride);
|
||||
|
||||
/* Parallel idct on the lower 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(input+8*16, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7
|
||||
// which will be saved into row_idct_output.
|
||||
vp9_short_idct16x16_add_neon_pass2(input+8*16+1,
|
||||
row_idct_output+8,
|
||||
pass1_output,
|
||||
0,
|
||||
dest,
|
||||
dest_stride);
|
||||
|
||||
/* Parallel idct on the left 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
1,
|
||||
dest,
|
||||
dest_stride);
|
||||
|
||||
/* Parallel idct on the right 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,
|
||||
row_idct_output+8,
|
||||
pass1_output,
|
||||
1,
|
||||
dest+8,
|
||||
dest_stride);
|
||||
|
||||
// restore d8-d15 register values.
|
||||
vp9_pop_neon(store_reg);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void vp9_short_idct16x16_10_add_neon(int16_t *input,
|
||||
uint8_t *dest, int dest_stride) {
|
||||
int64_t store_reg[8];
|
||||
int16_t pass1_output[16*16] = {0};
|
||||
int16_t row_idct_output[16*16] = {0};
|
||||
|
||||
// save d8-d15 register values.
|
||||
vp9_push_neon(store_reg);
|
||||
|
||||
/* Parallel idct on the upper 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7
|
||||
// which will be saved into row_idct_output.
|
||||
vp9_short_idct16x16_10_add_neon_pass2(input+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
0,
|
||||
dest,
|
||||
dest_stride);
|
||||
|
||||
/* Skip Parallel idct on the lower 8 rows as they are all 0s */
|
||||
|
||||
/* Parallel idct on the left 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
1,
|
||||
dest,
|
||||
dest_stride);
|
||||
|
||||
/* Parallel idct on the right 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,
|
||||
row_idct_output+8,
|
||||
pass1_output,
|
||||
1,
|
||||
dest+8,
|
||||
dest_stride);
|
||||
|
||||
// restore d8-d15 register values.
|
||||
vp9_pop_neon(store_reg);
|
||||
|
||||
return;
|
||||
}
|
||||
@@ -361,8 +361,6 @@ v_end
|
||||
|
||||
vand d16, d20, d19 ; flat && mask
|
||||
vmov r5, r6, d16
|
||||
orrs r5, r5, r6 ; Check for 0
|
||||
orreq r7, r7, #1 ; Only do filter branch
|
||||
|
||||
; flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7)
|
||||
vabd.u8 d22, d3, d7 ; abs(p4 - p0)
|
||||
@@ -388,10 +386,11 @@ v_end
|
||||
|
||||
vmov.u8 d22, #0x80
|
||||
|
||||
orrs r5, r5, r6 ; Check for 0
|
||||
orreq r7, r7, #1 ; Only do filter branch
|
||||
|
||||
vand d17, d18, d16 ; flat2 && flat && mask
|
||||
vmov r5, r6, d17
|
||||
orrs r5, r5, r6 ; Check for 0
|
||||
orreq r7, r7, #2 ; Only do mbfilter branch
|
||||
|
||||
; mbfilter() function
|
||||
|
||||
@@ -405,15 +404,10 @@ v_end
|
||||
vmov.u8 d27, #3
|
||||
|
||||
vsub.s8 d28, d23, d24 ; ( qs0 - ps0)
|
||||
|
||||
vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1)
|
||||
|
||||
vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0)
|
||||
|
||||
vand d29, d29, d21 ; filter &= hev
|
||||
|
||||
vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0)
|
||||
|
||||
vmov.u8 d29, #4
|
||||
|
||||
; filter = clamp(filter + 3 * ( qs0 - ps0))
|
||||
@@ -452,37 +446,37 @@ v_end
|
||||
vaddl.u8 q15, d7, d8 ; op2 = p0 + q0
|
||||
vmlal.u8 q15, d4, d27 ; op2 = p0 + q0 + p3 * 3
|
||||
vmlal.u8 q15, d5, d29 ; op2 = p0 + q0 + p3 * 3 + p2 * 2
|
||||
vaddl.u8 q10, d4, d5
|
||||
vaddw.u8 q15, d6 ; op2=p1 + p0 + q0 + p3 * 3 + p2 *2
|
||||
vaddl.u8 q14, d6, d9
|
||||
vqrshrn.u16 d18, q15, #3 ; r_op2
|
||||
|
||||
vsubw.u8 q15, d4 ; op1 = op2 - p3
|
||||
vsubw.u8 q15, d5 ; op1 -= p2
|
||||
vaddw.u8 q15, d6 ; op1 += p1
|
||||
vaddw.u8 q15, d9 ; op1 += q1
|
||||
vsub.i16 q15, q10
|
||||
vaddl.u8 q10, d4, d6
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d7, d10
|
||||
vqrshrn.u16 d19, q15, #3 ; r_op1
|
||||
|
||||
vsubw.u8 q15, d4 ; op0 = op1 - p3
|
||||
vsubw.u8 q15, d6 ; op0 -= p1
|
||||
vaddw.u8 q15, d7 ; op0 += p0
|
||||
vaddw.u8 q15, d10 ; op0 += q2
|
||||
vsub.i16 q15, q10
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d8, d11
|
||||
vqrshrn.u16 d20, q15, #3 ; r_op0
|
||||
|
||||
vsubw.u8 q15, d4 ; oq0 = op0 - p3
|
||||
vsubw.u8 q15, d7 ; oq0 -= p0
|
||||
vaddw.u8 q15, d8 ; oq0 += q0
|
||||
vaddw.u8 q15, d11 ; oq0 += q3
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d9, d11
|
||||
vqrshrn.u16 d21, q15, #3 ; r_oq0
|
||||
|
||||
vsubw.u8 q15, d5 ; oq1 = oq0 - p2
|
||||
vsubw.u8 q15, d8 ; oq1 -= q0
|
||||
vaddw.u8 q15, d9 ; oq1 += q1
|
||||
vaddw.u8 q15, d11 ; oq1 += q3
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d10, d11
|
||||
vqrshrn.u16 d22, q15, #3 ; r_oq1
|
||||
|
||||
vsubw.u8 q15, d6 ; oq2 = oq0 - p1
|
||||
vsubw.u8 q15, d9 ; oq2 -= q1
|
||||
vaddw.u8 q15, d10 ; oq2 += q2
|
||||
vaddw.u8 q15, d11 ; oq2 += q3
|
||||
vadd.i16 q15, q14
|
||||
vqrshrn.u16 d27, q15, #3 ; r_oq2
|
||||
|
||||
; Filter does not set op2 or oq2, so use p2 and q2.
|
||||
@@ -501,113 +495,104 @@ v_end
|
||||
; wide_mbfilter flat2 && flat && mask branch
|
||||
vmov.u8 d16, #7
|
||||
vaddl.u8 q15, d7, d8 ; op6 = p0 + q0
|
||||
vaddl.u8 q12, d2, d3
|
||||
vaddl.u8 q13, d4, d5
|
||||
vaddl.u8 q14, d1, d6
|
||||
vmlal.u8 q15, d0, d16 ; op6 += p7 * 3
|
||||
vmlal.u8 q15, d1, d29 ; op6 += p6 * 2
|
||||
vaddw.u8 q15, d2 ; op6 += p5
|
||||
vaddw.u8 q15, d3 ; op6 += p4
|
||||
vaddw.u8 q15, d4 ; op6 += p3
|
||||
vaddw.u8 q15, d5 ; op6 += p2
|
||||
vaddw.u8 q15, d6 ; op6 += p1
|
||||
vadd.i16 q12, q13
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d2, d9
|
||||
vadd.i16 q15, q12
|
||||
vaddl.u8 q12, d0, d1
|
||||
vaddw.u8 q15, d1
|
||||
vaddl.u8 q13, d0, d2
|
||||
vadd.i16 q14, q15, q14
|
||||
vqrshrn.u16 d16, q15, #4 ; w_op6
|
||||
|
||||
vsubw.u8 q15, d0 ; op5 = op6 - p7
|
||||
vsubw.u8 q15, d1 ; op5 -= p6
|
||||
vaddw.u8 q15, d2 ; op5 += p5
|
||||
vaddw.u8 q15, d9 ; op5 += q1
|
||||
vsub.i16 q15, q14, q12
|
||||
vaddl.u8 q14, d3, d10
|
||||
vqrshrn.u16 d24, q15, #4 ; w_op5
|
||||
|
||||
vsubw.u8 q15, d0 ; op4 = op5 - p7
|
||||
vsubw.u8 q15, d2 ; op4 -= p5
|
||||
vaddw.u8 q15, d3 ; op4 += p4
|
||||
vaddw.u8 q15, d10 ; op4 += q2
|
||||
vsub.i16 q15, q13
|
||||
vaddl.u8 q13, d0, d3
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d4, d11
|
||||
vqrshrn.u16 d25, q15, #4 ; w_op4
|
||||
|
||||
vsubw.u8 q15, d0 ; op3 = op4 - p7
|
||||
vsubw.u8 q15, d3 ; op3 -= p4
|
||||
vaddw.u8 q15, d4 ; op3 += p3
|
||||
vaddw.u8 q15, d11 ; op3 += q3
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d0, d4
|
||||
vsub.i16 q15, q13
|
||||
vsub.i16 q14, q15, q14
|
||||
vqrshrn.u16 d26, q15, #4 ; w_op3
|
||||
|
||||
vsubw.u8 q15, d0 ; op2 = op3 - p7
|
||||
vsubw.u8 q15, d4 ; op2 -= p3
|
||||
vaddw.u8 q15, d5 ; op2 += p2
|
||||
vaddw.u8 q15, q14, d5 ; op2 += p2
|
||||
vaddl.u8 q14, d0, d5
|
||||
vaddw.u8 q15, d12 ; op2 += q4
|
||||
vbif d26, d4, d17 ; op3 |= p3 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d27, q15, #4 ; w_op2
|
||||
|
||||
vbif d27, d18, d17 ; op2 |= t_op2 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d0 ; op1 = op2 - p7
|
||||
vsubw.u8 q15, d5 ; op1 -= p2
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d0, d6
|
||||
vaddw.u8 q15, d6 ; op1 += p1
|
||||
vaddw.u8 q15, d13 ; op1 += q5
|
||||
vbif d27, d18, d17 ; op2 |= t_op2 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d18, q15, #4 ; w_op1
|
||||
|
||||
vbif d18, d19, d17 ; op1 |= t_op1 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d0 ; op0 = op1 - p7
|
||||
vsubw.u8 q15, d6 ; op0 -= p1
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d0, d7
|
||||
vaddw.u8 q15, d7 ; op0 += p0
|
||||
vaddw.u8 q15, d14 ; op0 += q6
|
||||
vbif d18, d19, d17 ; op1 |= t_op1 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d19, q15, #4 ; w_op0
|
||||
|
||||
vbif d19, d20, d17 ; op0 |= t_op0 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d0 ; oq0 = op0 - p7
|
||||
vsubw.u8 q15, d7 ; oq0 -= p0
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d1, d8
|
||||
vaddw.u8 q15, d8 ; oq0 += q0
|
||||
vaddw.u8 q15, d15 ; oq0 += q7
|
||||
vbif d19, d20, d17 ; op0 |= t_op0 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d20, q15, #4 ; w_oq0
|
||||
|
||||
vbif d20, d21, d17 ; oq0 |= t_oq0 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d1 ; oq1 = oq0 - p6
|
||||
vsubw.u8 q15, d8 ; oq1 -= q0
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d2, d9
|
||||
vaddw.u8 q15, d9 ; oq1 += q1
|
||||
vaddl.u8 q4, d10, d15
|
||||
vaddw.u8 q15, d15 ; oq1 += q7
|
||||
vbif d20, d21, d17 ; oq0 |= t_oq0 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d21, q15, #4 ; w_oq1
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d3, d10
|
||||
vadd.i16 q15, q4
|
||||
vaddl.u8 q4, d11, d15
|
||||
vbif d21, d22, d17 ; oq1 |= t_oq1 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d2 ; oq2 = oq1 - p5
|
||||
vsubw.u8 q15, d9 ; oq2 -= q1
|
||||
vaddw.u8 q15, d10 ; oq2 += q2
|
||||
vaddw.u8 q15, d15 ; oq2 += q7
|
||||
vqrshrn.u16 d22, q15, #4 ; w_oq2
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d4, d11
|
||||
vadd.i16 q15, q4
|
||||
vaddl.u8 q4, d12, d15
|
||||
vbif d22, d23, d17 ; oq2 |= t_oq2 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d3 ; oq3 = oq2 - p4
|
||||
vsubw.u8 q15, d10 ; oq3 -= q2
|
||||
vaddw.u8 q15, d11 ; oq3 += q3
|
||||
vaddw.u8 q15, d15 ; oq3 += q7
|
||||
vqrshrn.u16 d23, q15, #4 ; w_oq3
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d5, d12
|
||||
vadd.i16 q15, q4
|
||||
vaddl.u8 q4, d13, d15
|
||||
vbif d16, d1, d17 ; op6 |= p6 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d4 ; oq4 = oq3 - p3
|
||||
vsubw.u8 q15, d11 ; oq4 -= q3
|
||||
vaddw.u8 q15, d12 ; oq4 += q4
|
||||
vaddw.u8 q15, d15 ; oq4 += q7
|
||||
vqrshrn.u16 d1, q15, #4 ; w_oq4
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d6, d13
|
||||
vadd.i16 q15, q4
|
||||
vaddl.u8 q4, d14, d15
|
||||
vbif d24, d2, d17 ; op5 |= p5 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d5 ; oq5 = oq4 - p2
|
||||
vsubw.u8 q15, d12 ; oq5 -= q4
|
||||
vaddw.u8 q15, d13 ; oq5 += q5
|
||||
vaddw.u8 q15, d15 ; oq5 += q7
|
||||
vqrshrn.u16 d2, q15, #4 ; w_oq5
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vbif d25, d3, d17 ; op4 |= p4 & ~(f2 & f & m)
|
||||
|
||||
vsubw.u8 q15, d6 ; oq6 = oq5 - p1
|
||||
vsubw.u8 q15, d13 ; oq6 -= q5
|
||||
vaddw.u8 q15, d14 ; oq6 += q6
|
||||
vaddw.u8 q15, d15 ; oq6 += q7
|
||||
vqrshrn.u16 d3, q15, #4 ; w_oq6
|
||||
|
||||
vbif d26, d4, d17 ; op3 |= p3 & ~(f2 & f & m)
|
||||
vadd.i16 q15, q4
|
||||
vbif d23, d11, d17 ; oq3 |= q3 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d3, q15, #4 ; w_oq6
|
||||
vbif d1, d12, d17 ; oq4 |= q4 & ~(f2 & f & m)
|
||||
vbif d2, d13, d17 ; oq5 |= q5 & ~(f2 & f & m)
|
||||
vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m)
|
||||
|
||||
36
vp9/common/arm/neon/vp9_save_reg_neon.asm
Normal file
36
vp9/common/arm/neon/vp9_save_reg_neon.asm
Normal file
@@ -0,0 +1,36 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp9_push_neon|
|
||||
EXPORT |vp9_pop_neon|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
|vp9_push_neon| PROC
|
||||
vst1.i64 {d8, d9, d10, d11}, [r0]!
|
||||
vst1.i64 {d12, d13, d14, d15}, [r0]!
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
|vp9_pop_neon| PROC
|
||||
vld1.i64 {d8, d9, d10, d11}, [r0]!
|
||||
vld1.i64 {d12, d13, d14, d15}, [r0]!
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
||||
|
||||
198
vp9/common/arm/neon/vp9_short_idct16x16_1_add_neon.asm
Normal file
198
vp9/common/arm/neon/vp9_short_idct16x16_1_add_neon.asm
Normal file
@@ -0,0 +1,198 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp9_short_idct16x16_1_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
;void vp9_short_idct16x16_1_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct16x16_1_add_neon| PROC
|
||||
ldrsh r0, [r0]
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r12, #0x2d00
|
||||
add r12, #0x41
|
||||
|
||||
; out = dct_const_round_shift(input[0] * cospi_16_64)
|
||||
mul r0, r0, r12 ; input[0] * cospi_16_64
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; out = dct_const_round_shift(out * cospi_16_64)
|
||||
mul r0, r0, r12 ; out * cospi_16_64
|
||||
mov r12, r1 ; save dest
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; a1 = ROUND_POWER_OF_TWO(out, 6)
|
||||
add r0, r0, #32 ; + (1 <<((6) - 1))
|
||||
asr r0, r0, #6 ; >> 6
|
||||
|
||||
vdup.s16 q0, r0 ; duplicate a1
|
||||
mov r0, #8
|
||||
sub r2, #8
|
||||
|
||||
; load destination data row0 - row3
|
||||
vld1.64 {d2}, [r1], r0
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r0
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r0
|
||||
vld1.64 {d7}, [r1], r2
|
||||
vld1.64 {d16}, [r1], r0
|
||||
vld1.64 {d17}, [r1], r2
|
||||
|
||||
vaddw.u8 q9, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d3 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d4 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d5 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
vaddw.u8 q9, q0, d6 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d7 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d16 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d17 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
; load destination data row4 - row7
|
||||
vld1.64 {d2}, [r1], r0
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r0
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r0
|
||||
vld1.64 {d7}, [r1], r2
|
||||
vld1.64 {d16}, [r1], r0
|
||||
vld1.64 {d17}, [r1], r2
|
||||
|
||||
vaddw.u8 q9, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d3 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d4 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d5 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
vaddw.u8 q9, q0, d6 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d7 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d16 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d17 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
; load destination data row8 - row11
|
||||
vld1.64 {d2}, [r1], r0
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r0
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r0
|
||||
vld1.64 {d7}, [r1], r2
|
||||
vld1.64 {d16}, [r1], r0
|
||||
vld1.64 {d17}, [r1], r2
|
||||
|
||||
vaddw.u8 q9, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d3 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d4 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d5 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
vaddw.u8 q9, q0, d6 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d7 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d16 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d17 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
; load destination data row12 - row15
|
||||
vld1.64 {d2}, [r1], r0
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r0
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r0
|
||||
vld1.64 {d7}, [r1], r2
|
||||
vld1.64 {d16}, [r1], r0
|
||||
vld1.64 {d17}, [r1], r2
|
||||
|
||||
vaddw.u8 q9, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d3 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d4 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d5 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
vaddw.u8 q9, q0, d6 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d7 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d16 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d17 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct16x16_1_add_neon|
|
||||
|
||||
END
|
||||
1179
vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
Normal file
1179
vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
Normal file
File diff suppressed because it is too large
Load Diff
1299
vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
Normal file
1299
vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
Normal file
File diff suppressed because it is too large
Load Diff
68
vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm
Normal file
68
vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm
Normal file
@@ -0,0 +1,68 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp9_short_idct4x4_1_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
;void vp9_short_idct4x4_1_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct4x4_1_add_neon| PROC
|
||||
ldrsh r0, [r0]
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r12, #0x2d00
|
||||
add r12, #0x41
|
||||
|
||||
; out = dct_const_round_shift(input[0] * cospi_16_64)
|
||||
mul r0, r0, r12 ; input[0] * cospi_16_64
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; out = dct_const_round_shift(out * cospi_16_64)
|
||||
mul r0, r0, r12 ; out * cospi_16_64
|
||||
mov r12, r1 ; save dest
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; a1 = ROUND_POWER_OF_TWO(out, 4)
|
||||
add r0, r0, #8 ; + (1 <<((4) - 1))
|
||||
asr r0, r0, #4 ; >> 4
|
||||
|
||||
vdup.s16 q0, r0 ; duplicate a1
|
||||
|
||||
vld1.32 {d2[0]}, [r1], r2
|
||||
vld1.32 {d2[1]}, [r1], r2
|
||||
vld1.32 {d4[0]}, [r1], r2
|
||||
vld1.32 {d4[1]}, [r1]
|
||||
|
||||
vaddw.u8 q8, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q9, q0, d4
|
||||
|
||||
vqmovun.s16 d6, q8 ; clip_pixel
|
||||
vqmovun.s16 d7, q9
|
||||
|
||||
vst1.32 {d6[0]}, [r12], r2
|
||||
vst1.32 {d6[1]}, [r12], r2
|
||||
vst1.32 {d7[0]}, [r12], r2
|
||||
vst1.32 {d7[1]}, [r12]
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct4x4_1_add_neon|
|
||||
|
||||
END
|
||||
190
vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm
Normal file
190
vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm
Normal file
@@ -0,0 +1,190 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_short_idct4x4_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void vp9_short_idct4x4_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct4x4_add_neon| PROC
|
||||
|
||||
; The 2D transform is done with two passes which are actually pretty
|
||||
; similar. We first transform the rows. This is done by transposing
|
||||
; the inputs, doing an SIMD column transform (the columns are the
|
||||
; transposed rows) and then transpose the results (so that it goes back
|
||||
; in normal/row positions). Then, we transform the columns by doing
|
||||
; another SIMD column transform.
|
||||
; So, two passes of a transpose followed by a column transform.
|
||||
|
||||
; load the inputs into q8-q9, d16-d19
|
||||
vld1.s16 {q8,q9}, [r0]!
|
||||
|
||||
; generate scalar constants
|
||||
; cospi_8_64 = 15137 = 0x3b21
|
||||
mov r0, #0x3b00
|
||||
add r0, #0x21
|
||||
; cospi_16_64 = 11585 = 0x2d41
|
||||
mov r3, #0x2d00
|
||||
add r3, #0x41
|
||||
; cospi_24_64 = 6270 = 0x 187e
|
||||
mov r12, #0x1800
|
||||
add r12, #0x7e
|
||||
|
||||
; transpose the input data
|
||||
; 00 01 02 03 d16
|
||||
; 10 11 12 13 d17
|
||||
; 20 21 22 23 d18
|
||||
; 30 31 32 33 d19
|
||||
vtrn.16 d16, d17
|
||||
vtrn.16 d18, d19
|
||||
|
||||
; generate constant vectors
|
||||
vdup.16 d20, r0 ; replicate cospi_8_64
|
||||
vdup.16 d21, r3 ; replicate cospi_16_64
|
||||
|
||||
; 00 10 02 12 d16
|
||||
; 01 11 03 13 d17
|
||||
; 20 30 22 32 d18
|
||||
; 21 31 23 33 d19
|
||||
vtrn.32 q8, q9
|
||||
; 00 10 20 30 d16
|
||||
; 01 11 21 31 d17
|
||||
; 02 12 22 32 d18
|
||||
; 03 13 23 33 d19
|
||||
|
||||
vdup.16 d22, r12 ; replicate cospi_24_64
|
||||
|
||||
; do the transform on transposed rows
|
||||
|
||||
; stage 1
|
||||
vadd.s16 d23, d16, d18 ; (input[0] + input[2])
|
||||
vsub.s16 d24, d16, d18 ; (input[0] - input[2])
|
||||
|
||||
vmull.s16 q15, d17, d22 ; input[1] * cospi_24_64
|
||||
vmull.s16 q1, d17, d20 ; input[1] * cospi_8_64
|
||||
|
||||
; (input[0] + input[2]) * cospi_16_64;
|
||||
; (input[0] - input[2]) * cospi_16_64;
|
||||
vmull.s16 q13, d23, d21
|
||||
vmull.s16 q14, d24, d21
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64;
|
||||
; input[1] * cospi_8_64 + input[3] * cospi_24_64;
|
||||
vmlsl.s16 q15, d19, d20
|
||||
vmlal.s16 q1, d19, d22
|
||||
|
||||
; dct_const_round_shift
|
||||
vqrshrn.s32 d26, q13, #14
|
||||
vqrshrn.s32 d27, q14, #14
|
||||
vqrshrn.s32 d29, q15, #14
|
||||
vqrshrn.s32 d28, q1, #14
|
||||
|
||||
; stage 2
|
||||
; output[0] = step[0] + step[3];
|
||||
; output[1] = step[1] + step[2];
|
||||
; output[3] = step[0] - step[3];
|
||||
; output[2] = step[1] - step[2];
|
||||
vadd.s16 q8, q13, q14
|
||||
vsub.s16 q9, q13, q14
|
||||
vswp d18, d19
|
||||
|
||||
; transpose the results
|
||||
; 00 01 02 03 d16
|
||||
; 10 11 12 13 d17
|
||||
; 20 21 22 23 d18
|
||||
; 30 31 32 33 d19
|
||||
vtrn.16 d16, d17
|
||||
vtrn.16 d18, d19
|
||||
; 00 10 02 12 d16
|
||||
; 01 11 03 13 d17
|
||||
; 20 30 22 32 d18
|
||||
; 21 31 23 33 d19
|
||||
vtrn.32 q8, q9
|
||||
; 00 10 20 30 d16
|
||||
; 01 11 21 31 d17
|
||||
; 02 12 22 32 d18
|
||||
; 03 13 23 33 d19
|
||||
|
||||
; do the transform on columns
|
||||
|
||||
; stage 1
|
||||
vadd.s16 d23, d16, d18 ; (input[0] + input[2])
|
||||
vsub.s16 d24, d16, d18 ; (input[0] - input[2])
|
||||
|
||||
vmull.s16 q15, d17, d22 ; input[1] * cospi_24_64
|
||||
vmull.s16 q1, d17, d20 ; input[1] * cospi_8_64
|
||||
|
||||
; (input[0] + input[2]) * cospi_16_64;
|
||||
; (input[0] - input[2]) * cospi_16_64;
|
||||
vmull.s16 q13, d23, d21
|
||||
vmull.s16 q14, d24, d21
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64;
|
||||
; input[1] * cospi_8_64 + input[3] * cospi_24_64;
|
||||
vmlsl.s16 q15, d19, d20
|
||||
vmlal.s16 q1, d19, d22
|
||||
|
||||
; dct_const_round_shift
|
||||
vqrshrn.s32 d26, q13, #14
|
||||
vqrshrn.s32 d27, q14, #14
|
||||
vqrshrn.s32 d29, q15, #14
|
||||
vqrshrn.s32 d28, q1, #14
|
||||
|
||||
; stage 2
|
||||
; output[0] = step[0] + step[3];
|
||||
; output[1] = step[1] + step[2];
|
||||
; output[3] = step[0] - step[3];
|
||||
; output[2] = step[1] - step[2];
|
||||
vadd.s16 q8, q13, q14
|
||||
vsub.s16 q9, q13, q14
|
||||
|
||||
; The results are in two registers, one of them being swapped. This will
|
||||
; be taken care of by loading the 'dest' value in a swapped fashion and
|
||||
; also storing them in the same swapped fashion.
|
||||
; temp_out[0, 1] = d16, d17 = q8
|
||||
; temp_out[2, 3] = d19, d18 = q9 swapped
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 4)
|
||||
vrshr.s16 q8, q8, #4
|
||||
vrshr.s16 q9, q9, #4
|
||||
|
||||
vld1.32 {d26[0]}, [r1], r2
|
||||
vld1.32 {d26[1]}, [r1], r2
|
||||
vld1.32 {d27[1]}, [r1], r2
|
||||
vld1.32 {d27[0]}, [r1] ; no post-increment
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]
|
||||
vaddw.u8 q8, q8, d26
|
||||
vaddw.u8 q9, q9, d27
|
||||
|
||||
; clip_pixel
|
||||
vqmovun.s16 d26, q8
|
||||
vqmovun.s16 d27, q9
|
||||
|
||||
; do the stores in reverse order with negative post-increment, by changing
|
||||
; the sign of the stride
|
||||
rsb r2, r2, #0
|
||||
vst1.32 {d27[0]}, [r1], r2
|
||||
vst1.32 {d27[1]}, [r1], r2
|
||||
vst1.32 {d26[1]}, [r1], r2
|
||||
vst1.32 {d26[0]}, [r1] ; no post-increment
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct4x4_add_neon|
|
||||
|
||||
END
|
||||
88
vp9/common/arm/neon/vp9_short_idct8x8_1_add_neon.asm
Normal file
88
vp9/common/arm/neon/vp9_short_idct8x8_1_add_neon.asm
Normal file
@@ -0,0 +1,88 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp9_short_idct8x8_1_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
;void vp9_short_idct8x8_1_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct8x8_1_add_neon| PROC
|
||||
ldrsh r0, [r0]
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r12, #0x2d00
|
||||
add r12, #0x41
|
||||
|
||||
; out = dct_const_round_shift(input[0] * cospi_16_64)
|
||||
mul r0, r0, r12 ; input[0] * cospi_16_64
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; out = dct_const_round_shift(out * cospi_16_64)
|
||||
mul r0, r0, r12 ; out * cospi_16_64
|
||||
mov r12, r1 ; save dest
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; a1 = ROUND_POWER_OF_TWO(out, 5)
|
||||
add r0, r0, #16 ; + (1 <<((5) - 1))
|
||||
asr r0, r0, #5 ; >> 5
|
||||
|
||||
vdup.s16 q0, r0 ; duplicate a1
|
||||
|
||||
; load destination data
|
||||
vld1.64 {d2}, [r1], r2
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r2
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r2
|
||||
vld1.64 {d7}, [r1], r2
|
||||
vld1.64 {d16}, [r1], r2
|
||||
vld1.64 {d17}, [r1]
|
||||
|
||||
vaddw.u8 q9, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d3 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d4 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d5 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r2
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r2
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
vaddw.u8 q9, q0, d6 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d7 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d16 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d17 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r2
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r2
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct8x8_1_add_neon|
|
||||
|
||||
END
|
||||
@@ -9,6 +9,7 @@
|
||||
;
|
||||
|
||||
EXPORT |vp9_short_idct8x8_add_neon|
|
||||
EXPORT |vp9_short_idct8x8_10_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
@@ -24,191 +25,149 @@
|
||||
; stage 1
|
||||
vdup.16 d0, r3 ; duplicate cospi_28_64
|
||||
vdup.16 d1, r4 ; duplicate cospi_4_64
|
||||
vdup.16 d2, r5 ; duplicate cospi_12_64
|
||||
vdup.16 d3, r6 ; duplicate cospi_20_64
|
||||
|
||||
; input[1] * cospi_28_64
|
||||
vmull.s16 q2, d18, d0
|
||||
vmull.s16 q3, d19, d0
|
||||
|
||||
; input[7] * cospi_4_64
|
||||
vmull.s16 q4, d30, d1
|
||||
vmull.s16 q5, d31, d1
|
||||
; input[5] * cospi_12_64
|
||||
vmull.s16 q5, d26, d2
|
||||
vmull.s16 q6, d27, d2
|
||||
|
||||
; input[1]*cospi_28_64-input[7]*cospi_4_64
|
||||
vsub.s32 q6, q2, q4
|
||||
vsub.s32 q7, q3, q5
|
||||
vmlsl.s16 q2, d30, d1
|
||||
vmlsl.s16 q3, d31, d1
|
||||
|
||||
; input[5] * cospi_12_64 - input[3] * cospi_20_64
|
||||
vmlsl.s16 q5, d22, d3
|
||||
vmlsl.s16 q6, d23, d3
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d8, q6, #14 ; >> 14
|
||||
vqrshrn.s32 d9, q7, #14 ; >> 14
|
||||
vqrshrn.s32 d8, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d9, q3, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q5, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q6, #14 ; >> 14
|
||||
|
||||
; input[1] * cospi_4_64
|
||||
vmull.s16 q2, d18, d1
|
||||
vmull.s16 q3, d19, d1
|
||||
|
||||
; input[7] * cospi_28_64
|
||||
vmull.s16 q1, d30, d0
|
||||
vmull.s16 q5, d31, d0
|
||||
; input[5] * cospi_20_64
|
||||
vmull.s16 q9, d26, d3
|
||||
vmull.s16 q13, d27, d3
|
||||
|
||||
; input[1]*cospi_4_64+input[7]*cospi_28_64
|
||||
vadd.s32 q2, q2, q1
|
||||
vadd.s32 q3, q3, q5
|
||||
vmlal.s16 q2, d30, d0
|
||||
vmlal.s16 q3, d31, d0
|
||||
|
||||
; input[5] * cospi_20_64 + input[3] * cospi_12_64
|
||||
vmlal.s16 q9, d22, d2
|
||||
vmlal.s16 q13, d23, d2
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d14, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d15, q3, #14 ; >> 14
|
||||
|
||||
vdup.16 d0, r5 ; duplicate cospi_12_64
|
||||
vdup.16 d1, r6 ; duplicate cospi_20_64
|
||||
|
||||
; input[5] * cospi_12_64
|
||||
vmull.s16 q2, d26, d0
|
||||
vmull.s16 q3, d27, d0
|
||||
|
||||
; input[3] * cospi_20_64
|
||||
vmull.s16 q5, d22, d1
|
||||
vmull.s16 q6, d23, d1
|
||||
|
||||
; input[5] * cospi_12_64 - input[3] * cospi_20_64
|
||||
vsub.s32 q2, q2, q5
|
||||
vsub.s32 q3, q3, q6
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q3, #14 ; >> 14
|
||||
|
||||
; input[5] * cospi_20_64
|
||||
vmull.s16 q2, d26, d1
|
||||
vmull.s16 q3, d27, d1
|
||||
|
||||
; input[3] * cospi_12_64
|
||||
vmull.s16 q9, d22, d0
|
||||
vmull.s16 q15, d23, d0
|
||||
|
||||
; input[5] * cospi_20_64 + input[3] * cospi_12_64
|
||||
vadd.s32 q0, q2, q9
|
||||
vadd.s32 q1, q3, q15
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q0, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q1, #14 ; >> 14
|
||||
|
||||
; stage 2 & stage 3 - even half
|
||||
vdup.16 d0, r7 ; duplicate cospi_16_64
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q13, #14 ; >> 14
|
||||
|
||||
; input[0] * cospi_16_64
|
||||
vmull.s16 q2, d16, d0
|
||||
vmull.s16 q3, d17, d0
|
||||
|
||||
; input[2] * cospi_16_64
|
||||
vmull.s16 q9, d24, d0
|
||||
vmull.s16 q11, d25, d0
|
||||
; input[0] * cospi_16_64
|
||||
vmull.s16 q13, d16, d0
|
||||
vmull.s16 q15, d17, d0
|
||||
|
||||
; (input[0] + input[2]) * cospi_16_64
|
||||
vadd.s32 q9, q2, q9
|
||||
vadd.s32 q11, q3, q11
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d18, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d19, q11, #14 ; >> 14
|
||||
|
||||
; input[0] * cospi_16_64
|
||||
vmull.s16 q2, d16, d0
|
||||
vmull.s16 q3, d17, d0
|
||||
|
||||
; input[2] * cospi_16_64
|
||||
vmull.s16 q13, d24, d0
|
||||
vmull.s16 q15, d25, d0
|
||||
vmlal.s16 q2, d24, d0
|
||||
vmlal.s16 q3, d25, d0
|
||||
|
||||
; (input[0] - input[2]) * cospi_16_64
|
||||
vsub.s32 q2, q2, q13
|
||||
vsub.s32 q3, q3, q15
|
||||
vmlsl.s16 q13, d24, d0
|
||||
vmlsl.s16 q15, d25, d0
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d22, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d23, q3, #14 ; >> 14
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64
|
||||
vdup.16 d0, r8 ; duplicate cospi_24_64
|
||||
vdup.16 d1, r9 ; duplicate cospi_8_64
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d18, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d19, q3, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d22, q13, #14 ; >> 14
|
||||
vqrshrn.s32 d23, q15, #14 ; >> 14
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64
|
||||
; input[1] * cospi_24_64
|
||||
vmull.s16 q2, d20, d0
|
||||
vmull.s16 q3, d21, d0
|
||||
|
||||
; input[3] * cospi_8_64
|
||||
vmull.s16 q13, d28, d1
|
||||
vmull.s16 q15, d29, d1
|
||||
; input[1] * cospi_8_64
|
||||
vmull.s16 q8, d20, d1
|
||||
vmull.s16 q12, d21, d1
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64
|
||||
vsub.s32 q2, q2, q13
|
||||
vsub.s32 q3, q3, q15
|
||||
vmlsl.s16 q2, d28, d1
|
||||
vmlsl.s16 q3, d29, d1
|
||||
|
||||
; input[1] * cospi_8_64 + input[3] * cospi_24_64
|
||||
vmlal.s16 q8, d28, d0
|
||||
vmlal.s16 q12, d29, d0
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d26, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d27, q3, #14 ; >> 14
|
||||
|
||||
; input[1] * cospi_8_64
|
||||
vmull.s16 q2, d20, d1
|
||||
vmull.s16 q3, d21, d1
|
||||
|
||||
; input[3] * cospi_24_64
|
||||
vmull.s16 q8, d28, d0
|
||||
vmull.s16 q10, d29, d0
|
||||
|
||||
; input[1] * cospi_8_64 + input[3] * cospi_24_64
|
||||
vadd.s32 q0, q2, q8
|
||||
vadd.s32 q1, q3, q10
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d30, q0, #14 ; >> 14
|
||||
vqrshrn.s32 d31, q1, #14 ; >> 14
|
||||
|
||||
vqrshrn.s32 d30, q8, #14 ; >> 14
|
||||
vqrshrn.s32 d31, q12, #14 ; >> 14
|
||||
|
||||
vadd.s16 q0, q9, q15 ; output[0] = step[0] + step[3]
|
||||
vadd.s16 q1, q11, q13 ; output[1] = step[1] + step[2]
|
||||
vsub.s16 q2, q11, q13 ; output[2] = step[1] - step[2]
|
||||
vsub.s16 q3, q9, q15 ; output[3] = step[0] - step[3]
|
||||
|
||||
; stage 3 -odd half
|
||||
vdup.16 d16, r7 ; duplicate cospi_16_64
|
||||
|
||||
; stage 2 - odd half
|
||||
vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5]
|
||||
vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5]
|
||||
vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7]
|
||||
vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7]
|
||||
|
||||
; stage 3 -odd half
|
||||
vdup.16 d16, r7 ; duplicate cospi_16_64
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q9, d28, d16
|
||||
vmull.s16 q10, d29, d16
|
||||
|
||||
; step2[5] * cospi_16_64
|
||||
vmull.s16 q11, d26, d16
|
||||
vmull.s16 q12, d27, d16
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q11, d28, d16
|
||||
vmull.s16 q12, d29, d16
|
||||
|
||||
; (step2[6] - step2[5]) * cospi_16_64
|
||||
vsub.s32 q9, q9, q11
|
||||
vsub.s32 q10, q10, q12
|
||||
vmlsl.s16 q9, d26, d16
|
||||
vmlsl.s16 q10, d27, d16
|
||||
|
||||
; (step2[5] + step2[6]) * cospi_16_64
|
||||
vmlal.s16 q11, d26, d16
|
||||
vmlal.s16 q12, d27, d16
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q10, #14 ; >> 14
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q9, d28, d16
|
||||
vmull.s16 q10, d29, d16
|
||||
|
||||
; step2[5] * cospi_16_64
|
||||
vmull.s16 q11, d26, d16
|
||||
vmull.s16 q12, d27, d16
|
||||
|
||||
; (step2[5] + step2[6]) * cospi_16_64
|
||||
vadd.s32 q9, q9, q11
|
||||
vadd.s32 q10, q10, q12
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q10, #14 ; >> 14
|
||||
vqrshrn.s32 d12, q11, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q12, #14 ; >> 14
|
||||
|
||||
; stage 4
|
||||
vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7];
|
||||
@@ -247,14 +206,11 @@
|
||||
|
||||
|vp9_short_idct8x8_add_neon| PROC
|
||||
push {r4-r9}
|
||||
vld1.s16 {q8}, [r0]!
|
||||
vld1.s16 {q9}, [r0]!
|
||||
vld1.s16 {q10}, [r0]!
|
||||
vld1.s16 {q11}, [r0]!
|
||||
vld1.s16 {q12}, [r0]!
|
||||
vld1.s16 {q13}, [r0]!
|
||||
vld1.s16 {q14}, [r0]!
|
||||
vld1.s16 {q15}, [r0]!
|
||||
vpush {d8-d15}
|
||||
vld1.s16 {q8,q9}, [r0]!
|
||||
vld1.s16 {q10,q11}, [r0]!
|
||||
vld1.s16 {q12,q13}, [r0]!
|
||||
vld1.s16 {q14,q15}, [r0]!
|
||||
|
||||
; transpose the input data
|
||||
TRANSPOSE8X8
|
||||
@@ -349,8 +305,215 @@
|
||||
vst1.64 {d6}, [r0], r2
|
||||
vst1.64 {d7}, [r0], r2
|
||||
|
||||
vpop {d8-d15}
|
||||
pop {r4-r9}
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct8x8_add_neon|
|
||||
|
||||
;void vp9_short_idct8x8_10_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct8x8_10_add_neon| PROC
|
||||
push {r4-r9}
|
||||
vpush {d8-d15}
|
||||
vld1.s16 {q8,q9}, [r0]!
|
||||
vld1.s16 {q10,q11}, [r0]!
|
||||
vld1.s16 {q12,q13}, [r0]!
|
||||
vld1.s16 {q14,q15}, [r0]!
|
||||
|
||||
; transpose the input data
|
||||
TRANSPOSE8X8
|
||||
|
||||
; generate cospi_28_64 = 3196
|
||||
mov r3, #0x0c00
|
||||
add r3, #0x7c
|
||||
|
||||
; generate cospi_4_64 = 16069
|
||||
mov r4, #0x3e00
|
||||
add r4, #0xc5
|
||||
|
||||
; generate cospi_12_64 = 13623
|
||||
mov r5, #0x3500
|
||||
add r5, #0x37
|
||||
|
||||
; generate cospi_20_64 = 9102
|
||||
mov r6, #0x2300
|
||||
add r6, #0x8e
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r7, #0x2d00
|
||||
add r7, #0x41
|
||||
|
||||
; generate cospi_24_64 = 6270
|
||||
mov r8, #0x1800
|
||||
add r8, #0x7e
|
||||
|
||||
; generate cospi_8_64 = 15137
|
||||
mov r9, #0x3b00
|
||||
add r9, #0x21
|
||||
|
||||
; First transform rows
|
||||
; stage 1
|
||||
; The following instructions use vqrdmulh to do the
|
||||
; dct_const_round_shift(input[1] * cospi_28_64). vqrdmulh will do doubling
|
||||
; multiply and shift the result by 16 bits instead of 14 bits. So we need
|
||||
; to double the constants before multiplying to compensate this.
|
||||
mov r12, r3, lsl #1
|
||||
vdup.16 q0, r12 ; duplicate cospi_28_64*2
|
||||
mov r12, r4, lsl #1
|
||||
vdup.16 q1, r12 ; duplicate cospi_4_64*2
|
||||
|
||||
; dct_const_round_shift(input[1] * cospi_28_64)
|
||||
vqrdmulh.s16 q4, q9, q0
|
||||
|
||||
mov r12, r6, lsl #1
|
||||
rsb r12, #0
|
||||
vdup.16 q0, r12 ; duplicate -cospi_20_64*2
|
||||
|
||||
; dct_const_round_shift(input[1] * cospi_4_64)
|
||||
vqrdmulh.s16 q7, q9, q1
|
||||
|
||||
mov r12, r5, lsl #1
|
||||
vdup.16 q1, r12 ; duplicate cospi_12_64*2
|
||||
|
||||
; dct_const_round_shift(- input[3] * cospi_20_64)
|
||||
vqrdmulh.s16 q5, q11, q0
|
||||
|
||||
mov r12, r7, lsl #1
|
||||
vdup.16 q0, r12 ; duplicate cospi_16_64*2
|
||||
|
||||
; dct_const_round_shift(input[3] * cospi_12_64)
|
||||
vqrdmulh.s16 q6, q11, q1
|
||||
|
||||
; stage 2 & stage 3 - even half
|
||||
mov r12, r8, lsl #1
|
||||
vdup.16 q1, r12 ; duplicate cospi_24_64*2
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrdmulh.s16 q9, q8, q0
|
||||
|
||||
mov r12, r9, lsl #1
|
||||
vdup.16 q0, r12 ; duplicate cospi_8_64*2
|
||||
|
||||
; dct_const_round_shift(input[1] * cospi_24_64)
|
||||
vqrdmulh.s16 q13, q10, q1
|
||||
|
||||
; dct_const_round_shift(input[1] * cospi_8_64)
|
||||
vqrdmulh.s16 q15, q10, q0
|
||||
|
||||
; stage 3 -odd half
|
||||
vdup.16 d16, r7 ; duplicate cospi_16_64
|
||||
|
||||
vadd.s16 q0, q9, q15 ; output[0] = step[0] + step[3]
|
||||
vadd.s16 q1, q9, q13 ; output[1] = step[1] + step[2]
|
||||
vsub.s16 q2, q9, q13 ; output[2] = step[1] - step[2]
|
||||
vsub.s16 q3, q9, q15 ; output[3] = step[0] - step[3]
|
||||
|
||||
; stage 2 - odd half
|
||||
vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5]
|
||||
vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5]
|
||||
vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7]
|
||||
vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7]
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q9, d28, d16
|
||||
vmull.s16 q10, d29, d16
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q11, d28, d16
|
||||
vmull.s16 q12, d29, d16
|
||||
|
||||
; (step2[6] - step2[5]) * cospi_16_64
|
||||
vmlsl.s16 q9, d26, d16
|
||||
vmlsl.s16 q10, d27, d16
|
||||
|
||||
; (step2[5] + step2[6]) * cospi_16_64
|
||||
vmlal.s16 q11, d26, d16
|
||||
vmlal.s16 q12, d27, d16
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q10, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q11, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q12, #14 ; >> 14
|
||||
|
||||
; stage 4
|
||||
vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7];
|
||||
vadd.s16 q9, q1, q6 ; output[1] = step1[1] + step1[6];
|
||||
vadd.s16 q10, q2, q5 ; output[2] = step1[2] + step1[5];
|
||||
vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4];
|
||||
vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4];
|
||||
vsub.s16 q13, q2, q5 ; output[5] = step1[2] - step1[5];
|
||||
vsub.s16 q14, q1, q6 ; output[6] = step1[1] - step1[6];
|
||||
vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7];
|
||||
|
||||
; Transpose the matrix
|
||||
TRANSPOSE8X8
|
||||
|
||||
; Then transform columns
|
||||
IDCT8x8_1D
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 5)
|
||||
vrshr.s16 q8, q8, #5
|
||||
vrshr.s16 q9, q9, #5
|
||||
vrshr.s16 q10, q10, #5
|
||||
vrshr.s16 q11, q11, #5
|
||||
vrshr.s16 q12, q12, #5
|
||||
vrshr.s16 q13, q13, #5
|
||||
vrshr.s16 q14, q14, #5
|
||||
vrshr.s16 q15, q15, #5
|
||||
|
||||
; save dest pointer
|
||||
mov r0, r1
|
||||
|
||||
; load destination data
|
||||
vld1.64 {d0}, [r1], r2
|
||||
vld1.64 {d1}, [r1], r2
|
||||
vld1.64 {d2}, [r1], r2
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r2
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r2
|
||||
vld1.64 {d7}, [r1]
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]
|
||||
vaddw.u8 q8, q8, d0
|
||||
vaddw.u8 q9, q9, d1
|
||||
vaddw.u8 q10, q10, d2
|
||||
vaddw.u8 q11, q11, d3
|
||||
vaddw.u8 q12, q12, d4
|
||||
vaddw.u8 q13, q13, d5
|
||||
vaddw.u8 q14, q14, d6
|
||||
vaddw.u8 q15, q15, d7
|
||||
|
||||
; clip_pixel
|
||||
vqmovun.s16 d0, q8
|
||||
vqmovun.s16 d1, q9
|
||||
vqmovun.s16 d2, q10
|
||||
vqmovun.s16 d3, q11
|
||||
vqmovun.s16 d4, q12
|
||||
vqmovun.s16 d5, q13
|
||||
vqmovun.s16 d6, q14
|
||||
vqmovun.s16 d7, q15
|
||||
|
||||
; store the data
|
||||
vst1.64 {d0}, [r0], r2
|
||||
vst1.64 {d1}, [r0], r2
|
||||
vst1.64 {d2}, [r0], r2
|
||||
vst1.64 {d3}, [r0], r2
|
||||
vst1.64 {d4}, [r0], r2
|
||||
vst1.64 {d5}, [r0], r2
|
||||
vst1.64 {d6}, [r0], r2
|
||||
vst1.64 {d7}, [r0], r2
|
||||
|
||||
vpop {d8-d15}
|
||||
pop {r4-r9}
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct8x8_10_add_neon|
|
||||
|
||||
END
|
||||
|
||||
237
vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm
Normal file
237
vp9/common/arm/neon/vp9_short_iht4x4_add_neon.asm
Normal file
@@ -0,0 +1,237 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_short_iht4x4_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; Parallel 1D IDCT on all the columns of a 4x4 16bits data matrix which are
|
||||
; loaded in d16-d19. d0 must contain cospi_8_64. d1 must contain
|
||||
; cospi_16_64. d2 must contain cospi_24_64. The output will be stored back
|
||||
; into d16-d19 registers. This macro will touch q10- q15 registers and use
|
||||
; them as buffer during calculation.
|
||||
MACRO
|
||||
IDCT4x4_1D
|
||||
; stage 1
|
||||
vadd.s16 d23, d16, d18 ; (input[0] + input[2])
|
||||
vsub.s16 d24, d16, d18 ; (input[0] - input[2])
|
||||
|
||||
vmull.s16 q15, d17, d2 ; input[1] * cospi_24_64
|
||||
vmull.s16 q10, d17, d0 ; input[1] * cospi_8_64
|
||||
vmull.s16 q13, d23, d1 ; (input[0] + input[2]) * cospi_16_64
|
||||
vmull.s16 q14, d24, d1 ; (input[0] - input[2]) * cospi_16_64
|
||||
vmlsl.s16 q15, d19, d0 ; input[1] * cospi_24_64 - input[3] * cospi_8_64
|
||||
vmlal.s16 q10, d19, d2 ; input[1] * cospi_8_64 + input[3] * cospi_24_64
|
||||
|
||||
; dct_const_round_shift
|
||||
vqrshrn.s32 d26, q13, #14
|
||||
vqrshrn.s32 d27, q14, #14
|
||||
vqrshrn.s32 d29, q15, #14
|
||||
vqrshrn.s32 d28, q10, #14
|
||||
|
||||
; stage 2
|
||||
; output[0] = step[0] + step[3];
|
||||
; output[1] = step[1] + step[2];
|
||||
; output[3] = step[0] - step[3];
|
||||
; output[2] = step[1] - step[2];
|
||||
vadd.s16 q8, q13, q14
|
||||
vsub.s16 q9, q13, q14
|
||||
vswp d18, d19
|
||||
MEND
|
||||
|
||||
; Parallel 1D IADST on all the columns of a 4x4 16bits data matrix which
|
||||
; loaded in d16-d19. d3 must contain sinpi_1_9. d4 must contain sinpi_2_9.
|
||||
; d5 must contain sinpi_4_9. d6 must contain sinpi_3_9. The output will be
|
||||
; stored back into d16-d19 registers. This macro will touch q11,q12,q13,
|
||||
; q14,q15 registers and use them as buffer during calculation.
|
||||
MACRO
|
||||
IADST4x4_1D
|
||||
vmull.s16 q10, d3, d16 ; s0 = sinpi_1_9 * x0
|
||||
vmull.s16 q11, d4, d16 ; s1 = sinpi_2_9 * x0
|
||||
vmull.s16 q12, d6, d17 ; s2 = sinpi_3_9 * x1
|
||||
vmull.s16 q13, d5, d18 ; s3 = sinpi_4_9 * x2
|
||||
vmull.s16 q14, d3, d18 ; s4 = sinpi_1_9 * x2
|
||||
vmovl.s16 q15, d16 ; expand x0 from 16 bit to 32 bit
|
||||
vaddw.s16 q15, q15, d19 ; x0 + x3
|
||||
vmull.s16 q8, d4, d19 ; s5 = sinpi_2_9 * x3
|
||||
vsubw.s16 q15, q15, d18 ; s7 = x0 + x3 - x2
|
||||
vmull.s16 q9, d5, d19 ; s6 = sinpi_4_9 * x3
|
||||
|
||||
vadd.s32 q10, q10, q13 ; x0 = s0 + s3 + s5
|
||||
vadd.s32 q10, q10, q8
|
||||
vsub.s32 q11, q11, q14 ; x1 = s1 - s4 - s6
|
||||
vdup.32 q8, r0 ; duplicate sinpi_3_9
|
||||
vsub.s32 q11, q11, q9
|
||||
vmul.s32 q15, q15, q8 ; x2 = sinpi_3_9 * s7
|
||||
|
||||
vadd.s32 q13, q10, q12 ; s0 = x0 + x3
|
||||
vadd.s32 q10, q10, q11 ; x0 + x1
|
||||
vadd.s32 q14, q11, q12 ; s1 = x1 + x3
|
||||
vsub.s32 q10, q10, q12 ; s3 = x0 + x1 - x3
|
||||
|
||||
; dct_const_round_shift
|
||||
vqrshrn.s32 d16, q13, #14
|
||||
vqrshrn.s32 d17, q14, #14
|
||||
vqrshrn.s32 d18, q15, #14
|
||||
vqrshrn.s32 d19, q10, #14
|
||||
MEND
|
||||
|
||||
; Generate cosine constants in d6 - d8 for the IDCT
|
||||
MACRO
|
||||
GENERATE_COSINE_CONSTANTS
|
||||
; cospi_8_64 = 15137 = 0x3b21
|
||||
mov r0, #0x3b00
|
||||
add r0, #0x21
|
||||
; cospi_16_64 = 11585 = 0x2d41
|
||||
mov r3, #0x2d00
|
||||
add r3, #0x41
|
||||
; cospi_24_64 = 6270 = 0x187e
|
||||
mov r12, #0x1800
|
||||
add r12, #0x7e
|
||||
|
||||
; generate constant vectors
|
||||
vdup.16 d0, r0 ; duplicate cospi_8_64
|
||||
vdup.16 d1, r3 ; duplicate cospi_16_64
|
||||
vdup.16 d2, r12 ; duplicate cospi_24_64
|
||||
MEND
|
||||
|
||||
; Generate sine constants in d1 - d4 for the IADST.
|
||||
MACRO
|
||||
GENERATE_SINE_CONSTANTS
|
||||
; sinpi_1_9 = 5283 = 0x14A3
|
||||
mov r0, #0x1400
|
||||
add r0, #0xa3
|
||||
; sinpi_2_9 = 9929 = 0x26C9
|
||||
mov r3, #0x2600
|
||||
add r3, #0xc9
|
||||
; sinpi_4_9 = 15212 = 0x3B6C
|
||||
mov r12, #0x3b00
|
||||
add r12, #0x6c
|
||||
|
||||
; generate constant vectors
|
||||
vdup.16 d3, r0 ; duplicate sinpi_1_9
|
||||
|
||||
; sinpi_3_9 = 13377 = 0x3441
|
||||
mov r0, #0x3400
|
||||
add r0, #0x41
|
||||
|
||||
vdup.16 d4, r3 ; duplicate sinpi_2_9
|
||||
vdup.16 d5, r12 ; duplicate sinpi_4_9
|
||||
vdup.16 q3, r0 ; duplicate sinpi_3_9
|
||||
MEND
|
||||
|
||||
; Transpose a 4x4 16bits data matrix. Datas are loaded in d16-d19.
|
||||
MACRO
|
||||
TRANSPOSE4X4
|
||||
vtrn.16 d16, d17
|
||||
vtrn.16 d18, d19
|
||||
vtrn.32 q8, q9
|
||||
MEND
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void vp9_short_iht4x4_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride, int tx_type)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride
|
||||
; r3 int tx_type)
|
||||
; This function will only handle tx_type of 1,2,3.
|
||||
|vp9_short_iht4x4_add_neon| PROC
|
||||
|
||||
; load the inputs into d16-d19
|
||||
vld1.s16 {q8,q9}, [r0]!
|
||||
|
||||
; transpose the input data
|
||||
TRANSPOSE4X4
|
||||
|
||||
; decide the type of transform
|
||||
cmp r3, #2
|
||||
beq idct_iadst
|
||||
cmp r3, #3
|
||||
beq iadst_iadst
|
||||
|
||||
iadst_idct
|
||||
; generate constants
|
||||
GENERATE_COSINE_CONSTANTS
|
||||
GENERATE_SINE_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IDCT4x4_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE4X4
|
||||
|
||||
; then transform columns
|
||||
IADST4x4_1D
|
||||
|
||||
b end_vp9_short_iht4x4_add_neon
|
||||
|
||||
idct_iadst
|
||||
; generate constants
|
||||
GENERATE_COSINE_CONSTANTS
|
||||
GENERATE_SINE_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IADST4x4_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE4X4
|
||||
|
||||
; then transform columns
|
||||
IDCT4x4_1D
|
||||
|
||||
b end_vp9_short_iht4x4_add_neon
|
||||
|
||||
iadst_iadst
|
||||
; generate constants
|
||||
GENERATE_SINE_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IADST4x4_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE4X4
|
||||
|
||||
; then transform columns
|
||||
IADST4x4_1D
|
||||
|
||||
end_vp9_short_iht4x4_add_neon
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 4)
|
||||
vrshr.s16 q8, q8, #4
|
||||
vrshr.s16 q9, q9, #4
|
||||
|
||||
vld1.32 {d26[0]}, [r1], r2
|
||||
vld1.32 {d26[1]}, [r1], r2
|
||||
vld1.32 {d27[0]}, [r1], r2
|
||||
vld1.32 {d27[1]}, [r1]
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]
|
||||
vaddw.u8 q8, q8, d26
|
||||
vaddw.u8 q9, q9, d27
|
||||
|
||||
; clip_pixel
|
||||
vqmovun.s16 d26, q8
|
||||
vqmovun.s16 d27, q9
|
||||
|
||||
; do the stores in reverse order with negative post-increment, by changing
|
||||
; the sign of the stride
|
||||
rsb r2, r2, #0
|
||||
vst1.32 {d27[1]}, [r1], r2
|
||||
vst1.32 {d27[0]}, [r1], r2
|
||||
vst1.32 {d26[1]}, [r1], r2
|
||||
vst1.32 {d26[0]}, [r1] ; no post-increment
|
||||
bx lr
|
||||
ENDP ; |vp9_short_iht4x4_add_neon|
|
||||
|
||||
END
|
||||
696
vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm
Normal file
696
vp9/common/arm/neon/vp9_short_iht8x8_add_neon.asm
Normal file
@@ -0,0 +1,696 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_short_iht8x8_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; Generate IADST constants in r0 - r12 for the IADST.
|
||||
MACRO
|
||||
GENERATE_IADST_CONSTANTS
|
||||
; generate cospi_2_64 = 16305
|
||||
mov r0, #0x3f00
|
||||
add r0, #0xb1
|
||||
|
||||
; generate cospi_30_64 = 1606
|
||||
mov r1, #0x600
|
||||
add r1, #0x46
|
||||
|
||||
; generate cospi_10_64 = 14449
|
||||
mov r2, #0x3800
|
||||
add r2, #0x71
|
||||
|
||||
; generate cospi_22_64 = 7723
|
||||
mov r3, #0x1e00
|
||||
add r3, #0x2b
|
||||
|
||||
; generate cospi_18_64 = 10394
|
||||
mov r4, #0x2800
|
||||
add r4, #0x9a
|
||||
|
||||
; generate cospi_14_64 = 12665
|
||||
mov r5, #0x3100
|
||||
add r5, #0x79
|
||||
|
||||
; generate cospi_26_64 = 4756
|
||||
mov r6, #0x1200
|
||||
add r6, #0x94
|
||||
|
||||
; generate cospi_6_64 = 15679
|
||||
mov r7, #0x3d00
|
||||
add r7, #0x3f
|
||||
|
||||
; generate cospi_8_64 = 15137
|
||||
mov r8, #0x3b00
|
||||
add r8, #0x21
|
||||
|
||||
; generate cospi_24_64 = 6270
|
||||
mov r9, #0x1800
|
||||
add r9, #0x7e
|
||||
|
||||
; generate 0
|
||||
mov r10, #0
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r12, #0x2d00
|
||||
add r12, #0x41
|
||||
MEND
|
||||
|
||||
; Generate IDCT constants in r3 - r9 for the IDCT.
|
||||
MACRO
|
||||
GENERATE_IDCT_CONSTANTS
|
||||
; generate cospi_28_64 = 3196
|
||||
mov r3, #0x0c00
|
||||
add r3, #0x7c
|
||||
|
||||
; generate cospi_4_64 = 16069
|
||||
mov r4, #0x3e00
|
||||
add r4, #0xc5
|
||||
|
||||
; generate cospi_12_64 = 13623
|
||||
mov r5, #0x3500
|
||||
add r5, #0x37
|
||||
|
||||
; generate cospi_20_64 = 9102
|
||||
mov r6, #0x2300
|
||||
add r6, #0x8e
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r7, #0x2d00
|
||||
add r7, #0x41
|
||||
|
||||
; generate cospi_24_64 = 6270
|
||||
mov r8, #0x1800
|
||||
add r8, #0x7e
|
||||
|
||||
; generate cospi_8_64 = 15137
|
||||
mov r9, #0x3b00
|
||||
add r9, #0x21
|
||||
MEND
|
||||
|
||||
; Transpose a 8x8 16bits data matrix. Datas are loaded in q8-q15.
|
||||
MACRO
|
||||
TRANSPOSE8X8
|
||||
vswp d17, d24
|
||||
vswp d23, d30
|
||||
vswp d21, d28
|
||||
vswp d19, d26
|
||||
vtrn.32 q8, q10
|
||||
vtrn.32 q9, q11
|
||||
vtrn.32 q12, q14
|
||||
vtrn.32 q13, q15
|
||||
vtrn.16 q8, q9
|
||||
vtrn.16 q10, q11
|
||||
vtrn.16 q12, q13
|
||||
vtrn.16 q14, q15
|
||||
MEND
|
||||
|
||||
; Parallel 1D IDCT on all the columns of a 8x8 16bits data matrix which are
|
||||
; loaded in q8-q15. The IDCT constants are loaded in r3 - r9. The output
|
||||
; will be stored back into q8-q15 registers. This macro will touch q0-q7
|
||||
; registers and use them as buffer during calculation.
|
||||
MACRO
|
||||
IDCT8x8_1D
|
||||
; stage 1
|
||||
vdup.16 d0, r3 ; duplicate cospi_28_64
|
||||
vdup.16 d1, r4 ; duplicate cospi_4_64
|
||||
vdup.16 d2, r5 ; duplicate cospi_12_64
|
||||
vdup.16 d3, r6 ; duplicate cospi_20_64
|
||||
|
||||
; input[1] * cospi_28_64
|
||||
vmull.s16 q2, d18, d0
|
||||
vmull.s16 q3, d19, d0
|
||||
|
||||
; input[5] * cospi_12_64
|
||||
vmull.s16 q5, d26, d2
|
||||
vmull.s16 q6, d27, d2
|
||||
|
||||
; input[1]*cospi_28_64-input[7]*cospi_4_64
|
||||
vmlsl.s16 q2, d30, d1
|
||||
vmlsl.s16 q3, d31, d1
|
||||
|
||||
; input[5] * cospi_12_64 - input[3] * cospi_20_64
|
||||
vmlsl.s16 q5, d22, d3
|
||||
vmlsl.s16 q6, d23, d3
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d8, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d9, q3, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q5, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q6, #14 ; >> 14
|
||||
|
||||
; input[1] * cospi_4_64
|
||||
vmull.s16 q2, d18, d1
|
||||
vmull.s16 q3, d19, d1
|
||||
|
||||
; input[5] * cospi_20_64
|
||||
vmull.s16 q9, d26, d3
|
||||
vmull.s16 q13, d27, d3
|
||||
|
||||
; input[1]*cospi_4_64+input[7]*cospi_28_64
|
||||
vmlal.s16 q2, d30, d0
|
||||
vmlal.s16 q3, d31, d0
|
||||
|
||||
; input[5] * cospi_20_64 + input[3] * cospi_12_64
|
||||
vmlal.s16 q9, d22, d2
|
||||
vmlal.s16 q13, d23, d2
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d14, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d15, q3, #14 ; >> 14
|
||||
|
||||
; stage 2 & stage 3 - even half
|
||||
vdup.16 d0, r7 ; duplicate cospi_16_64
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q13, #14 ; >> 14
|
||||
|
||||
; input[0] * cospi_16_64
|
||||
vmull.s16 q2, d16, d0
|
||||
vmull.s16 q3, d17, d0
|
||||
|
||||
; input[0] * cospi_16_64
|
||||
vmull.s16 q13, d16, d0
|
||||
vmull.s16 q15, d17, d0
|
||||
|
||||
; (input[0] + input[2]) * cospi_16_64
|
||||
vmlal.s16 q2, d24, d0
|
||||
vmlal.s16 q3, d25, d0
|
||||
|
||||
; (input[0] - input[2]) * cospi_16_64
|
||||
vmlsl.s16 q13, d24, d0
|
||||
vmlsl.s16 q15, d25, d0
|
||||
|
||||
vdup.16 d0, r8 ; duplicate cospi_24_64
|
||||
vdup.16 d1, r9 ; duplicate cospi_8_64
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d18, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d19, q3, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d22, q13, #14 ; >> 14
|
||||
vqrshrn.s32 d23, q15, #14 ; >> 14
|
||||
|
||||
; input[1] * cospi_24_64
|
||||
vmull.s16 q2, d20, d0
|
||||
vmull.s16 q3, d21, d0
|
||||
|
||||
; input[1] * cospi_8_64
|
||||
vmull.s16 q8, d20, d1
|
||||
vmull.s16 q12, d21, d1
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64
|
||||
vmlsl.s16 q2, d28, d1
|
||||
vmlsl.s16 q3, d29, d1
|
||||
|
||||
; input[1] * cospi_8_64 + input[3] * cospi_24_64
|
||||
vmlal.s16 q8, d28, d0
|
||||
vmlal.s16 q12, d29, d0
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d26, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d27, q3, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d30, q8, #14 ; >> 14
|
||||
vqrshrn.s32 d31, q12, #14 ; >> 14
|
||||
|
||||
vadd.s16 q0, q9, q15 ; output[0] = step[0] + step[3]
|
||||
vadd.s16 q1, q11, q13 ; output[1] = step[1] + step[2]
|
||||
vsub.s16 q2, q11, q13 ; output[2] = step[1] - step[2]
|
||||
vsub.s16 q3, q9, q15 ; output[3] = step[0] - step[3]
|
||||
|
||||
; stage 3 -odd half
|
||||
vdup.16 d16, r7 ; duplicate cospi_16_64
|
||||
|
||||
; stage 2 - odd half
|
||||
vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5]
|
||||
vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5]
|
||||
vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7]
|
||||
vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7]
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q9, d28, d16
|
||||
vmull.s16 q10, d29, d16
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q11, d28, d16
|
||||
vmull.s16 q12, d29, d16
|
||||
|
||||
; (step2[6] - step2[5]) * cospi_16_64
|
||||
vmlsl.s16 q9, d26, d16
|
||||
vmlsl.s16 q10, d27, d16
|
||||
|
||||
; (step2[5] + step2[6]) * cospi_16_64
|
||||
vmlal.s16 q11, d26, d16
|
||||
vmlal.s16 q12, d27, d16
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q10, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q11, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q12, #14 ; >> 14
|
||||
|
||||
; stage 4
|
||||
vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7];
|
||||
vadd.s16 q9, q1, q6 ; output[1] = step1[1] + step1[6];
|
||||
vadd.s16 q10, q2, q5 ; output[2] = step1[2] + step1[5];
|
||||
vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4];
|
||||
vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4];
|
||||
vsub.s16 q13, q2, q5 ; output[5] = step1[2] - step1[5];
|
||||
vsub.s16 q14, q1, q6 ; output[6] = step1[1] - step1[6];
|
||||
vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7];
|
||||
MEND
|
||||
|
||||
; Parallel 1D IADST on all the columns of a 8x8 16bits data matrix which
|
||||
; loaded in q8-q15. IADST constants are loaded in r0 - r12 registers. The
|
||||
; output will be stored back into q8-q15 registers. This macro will touch
|
||||
; q0 - q7 registers and use them as buffer during calculation.
|
||||
MACRO
|
||||
IADST8X8_1D
|
||||
vdup.16 d14, r0 ; duplicate cospi_2_64
|
||||
vdup.16 d15, r1 ; duplicate cospi_30_64
|
||||
|
||||
; cospi_2_64 * x0
|
||||
vmull.s16 q1, d30, d14
|
||||
vmull.s16 q2, d31, d14
|
||||
|
||||
; cospi_30_64 * x0
|
||||
vmull.s16 q3, d30, d15
|
||||
vmull.s16 q4, d31, d15
|
||||
|
||||
vdup.16 d30, r4 ; duplicate cospi_18_64
|
||||
vdup.16 d31, r5 ; duplicate cospi_14_64
|
||||
|
||||
; s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
|
||||
vmlal.s16 q1, d16, d15
|
||||
vmlal.s16 q2, d17, d15
|
||||
|
||||
; s1 = cospi_30_64 * x0 - cospi_2_64 * x1
|
||||
vmlsl.s16 q3, d16, d14
|
||||
vmlsl.s16 q4, d17, d14
|
||||
|
||||
; cospi_18_64 * x4
|
||||
vmull.s16 q5, d22, d30
|
||||
vmull.s16 q6, d23, d30
|
||||
|
||||
; cospi_14_64 * x4
|
||||
vmull.s16 q7, d22, d31
|
||||
vmull.s16 q8, d23, d31
|
||||
|
||||
; s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
|
||||
vmlal.s16 q5, d24, d31
|
||||
vmlal.s16 q6, d25, d31
|
||||
|
||||
; s5 = cospi_14_64 * x4 - cospi_18_64 * x5
|
||||
vmlsl.s16 q7, d24, d30
|
||||
vmlsl.s16 q8, d25, d30
|
||||
|
||||
; (s0 + s4)
|
||||
vadd.s32 q11, q1, q5
|
||||
vadd.s32 q12, q2, q6
|
||||
|
||||
vdup.16 d0, r2 ; duplicate cospi_10_64
|
||||
vdup.16 d1, r3 ; duplicate cospi_22_64
|
||||
|
||||
; (s0 - s4)
|
||||
vsub.s32 q1, q1, q5
|
||||
vsub.s32 q2, q2, q6
|
||||
|
||||
; x0 = dct_const_round_shift(s0 + s4);
|
||||
vqrshrn.s32 d22, q11, #14 ; >> 14
|
||||
vqrshrn.s32 d23, q12, #14 ; >> 14
|
||||
|
||||
; (s1 + s5)
|
||||
vadd.s32 q12, q3, q7
|
||||
vadd.s32 q15, q4, q8
|
||||
|
||||
; (s1 - s5)
|
||||
vsub.s32 q3, q3, q7
|
||||
vsub.s32 q4, q4, q8
|
||||
|
||||
; x4 = dct_const_round_shift(s0 - s4);
|
||||
vqrshrn.s32 d2, q1, #14 ; >> 14
|
||||
vqrshrn.s32 d3, q2, #14 ; >> 14
|
||||
|
||||
; x1 = dct_const_round_shift(s1 + s5);
|
||||
vqrshrn.s32 d24, q12, #14 ; >> 14
|
||||
vqrshrn.s32 d25, q15, #14 ; >> 14
|
||||
|
||||
; x5 = dct_const_round_shift(s1 - s5);
|
||||
vqrshrn.s32 d6, q3, #14 ; >> 14
|
||||
vqrshrn.s32 d7, q4, #14 ; >> 14
|
||||
|
||||
; cospi_10_64 * x2
|
||||
vmull.s16 q4, d26, d0
|
||||
vmull.s16 q5, d27, d0
|
||||
|
||||
; cospi_22_64 * x2
|
||||
vmull.s16 q2, d26, d1
|
||||
vmull.s16 q6, d27, d1
|
||||
|
||||
vdup.16 d30, r6 ; duplicate cospi_26_64
|
||||
vdup.16 d31, r7 ; duplicate cospi_6_64
|
||||
|
||||
; s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
|
||||
vmlal.s16 q4, d20, d1
|
||||
vmlal.s16 q5, d21, d1
|
||||
|
||||
; s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
|
||||
vmlsl.s16 q2, d20, d0
|
||||
vmlsl.s16 q6, d21, d0
|
||||
|
||||
; cospi_26_64 * x6
|
||||
vmull.s16 q0, d18, d30
|
||||
vmull.s16 q13, d19, d30
|
||||
|
||||
; s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
|
||||
vmlal.s16 q0, d28, d31
|
||||
vmlal.s16 q13, d29, d31
|
||||
|
||||
; cospi_6_64 * x6
|
||||
vmull.s16 q10, d18, d31
|
||||
vmull.s16 q9, d19, d31
|
||||
|
||||
; s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
|
||||
vmlsl.s16 q10, d28, d30
|
||||
vmlsl.s16 q9, d29, d30
|
||||
|
||||
; (s3 + s7)
|
||||
vadd.s32 q14, q2, q10
|
||||
vadd.s32 q15, q6, q9
|
||||
|
||||
; (s3 - s7)
|
||||
vsub.s32 q2, q2, q10
|
||||
vsub.s32 q6, q6, q9
|
||||
|
||||
; x3 = dct_const_round_shift(s3 + s7);
|
||||
vqrshrn.s32 d28, q14, #14 ; >> 14
|
||||
vqrshrn.s32 d29, q15, #14 ; >> 14
|
||||
|
||||
; x7 = dct_const_round_shift(s3 - s7);
|
||||
vqrshrn.s32 d4, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d5, q6, #14 ; >> 14
|
||||
|
||||
; (s2 + s6)
|
||||
vadd.s32 q9, q4, q0
|
||||
vadd.s32 q10, q5, q13
|
||||
|
||||
; (s2 - s6)
|
||||
vsub.s32 q4, q4, q0
|
||||
vsub.s32 q5, q5, q13
|
||||
|
||||
vdup.16 d30, r8 ; duplicate cospi_8_64
|
||||
vdup.16 d31, r9 ; duplicate cospi_24_64
|
||||
|
||||
; x2 = dct_const_round_shift(s2 + s6);
|
||||
vqrshrn.s32 d18, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d19, q10, #14 ; >> 14
|
||||
|
||||
; x6 = dct_const_round_shift(s2 - s6);
|
||||
vqrshrn.s32 d8, q4, #14 ; >> 14
|
||||
vqrshrn.s32 d9, q5, #14 ; >> 14
|
||||
|
||||
; cospi_8_64 * x4
|
||||
vmull.s16 q5, d2, d30
|
||||
vmull.s16 q6, d3, d30
|
||||
|
||||
; cospi_24_64 * x4
|
||||
vmull.s16 q7, d2, d31
|
||||
vmull.s16 q0, d3, d31
|
||||
|
||||
; s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
|
||||
vmlal.s16 q5, d6, d31
|
||||
vmlal.s16 q6, d7, d31
|
||||
|
||||
; s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
|
||||
vmlsl.s16 q7, d6, d30
|
||||
vmlsl.s16 q0, d7, d30
|
||||
|
||||
; cospi_8_64 * x7
|
||||
vmull.s16 q1, d4, d30
|
||||
vmull.s16 q3, d5, d30
|
||||
|
||||
; cospi_24_64 * x7
|
||||
vmull.s16 q10, d4, d31
|
||||
vmull.s16 q2, d5, d31
|
||||
|
||||
; s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
|
||||
vmlsl.s16 q1, d8, d31
|
||||
vmlsl.s16 q3, d9, d31
|
||||
|
||||
; s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
|
||||
vmlal.s16 q10, d8, d30
|
||||
vmlal.s16 q2, d9, d30
|
||||
|
||||
vadd.s16 q8, q11, q9 ; x0 = s0 + s2;
|
||||
|
||||
vsub.s16 q11, q11, q9 ; x2 = s0 - s2;
|
||||
|
||||
vadd.s16 q4, q12, q14 ; x1 = s1 + s3;
|
||||
|
||||
vsub.s16 q12, q12, q14 ; x3 = s1 - s3;
|
||||
|
||||
; (s4 + s6)
|
||||
vadd.s32 q14, q5, q1
|
||||
vadd.s32 q15, q6, q3
|
||||
|
||||
; (s4 - s6)
|
||||
vsub.s32 q5, q5, q1
|
||||
vsub.s32 q6, q6, q3
|
||||
|
||||
; x4 = dct_const_round_shift(s4 + s6);
|
||||
vqrshrn.s32 d18, q14, #14 ; >> 14
|
||||
vqrshrn.s32 d19, q15, #14 ; >> 14
|
||||
|
||||
; x6 = dct_const_round_shift(s4 - s6);
|
||||
vqrshrn.s32 d10, q5, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q6, #14 ; >> 14
|
||||
|
||||
; (s5 + s7)
|
||||
vadd.s32 q1, q7, q10
|
||||
vadd.s32 q3, q0, q2
|
||||
|
||||
; (s5 - s7))
|
||||
vsub.s32 q7, q7, q10
|
||||
vsub.s32 q0, q0, q2
|
||||
|
||||
; x5 = dct_const_round_shift(s5 + s7);
|
||||
vqrshrn.s32 d28, q1, #14 ; >> 14
|
||||
vqrshrn.s32 d29, q3, #14 ; >> 14
|
||||
|
||||
; x7 = dct_const_round_shift(s5 - s7);
|
||||
vqrshrn.s32 d14, q7, #14 ; >> 14
|
||||
vqrshrn.s32 d15, q0, #14 ; >> 14
|
||||
|
||||
vdup.16 d30, r12 ; duplicate cospi_16_64
|
||||
|
||||
; cospi_16_64 * x2
|
||||
vmull.s16 q2, d22, d30
|
||||
vmull.s16 q3, d23, d30
|
||||
|
||||
; cospi_6_64 * x6
|
||||
vmull.s16 q13, d22, d30
|
||||
vmull.s16 q1, d23, d30
|
||||
|
||||
; cospi_16_64 * x2 + cospi_16_64 * x3;
|
||||
vmlal.s16 q2, d24, d30
|
||||
vmlal.s16 q3, d25, d30
|
||||
|
||||
; cospi_16_64 * x2 - cospi_16_64 * x3;
|
||||
vmlsl.s16 q13, d24, d30
|
||||
vmlsl.s16 q1, d25, d30
|
||||
|
||||
; x2 = dct_const_round_shift(s2);
|
||||
vqrshrn.s32 d4, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d5, q3, #14 ; >> 14
|
||||
|
||||
;x3 = dct_const_round_shift(s3);
|
||||
vqrshrn.s32 d24, q13, #14 ; >> 14
|
||||
vqrshrn.s32 d25, q1, #14 ; >> 14
|
||||
|
||||
; cospi_16_64 * x6
|
||||
vmull.s16 q13, d10, d30
|
||||
vmull.s16 q1, d11, d30
|
||||
|
||||
; cospi_6_64 * x6
|
||||
vmull.s16 q11, d10, d30
|
||||
vmull.s16 q0, d11, d30
|
||||
|
||||
; cospi_16_64 * x6 + cospi_16_64 * x7;
|
||||
vmlal.s16 q13, d14, d30
|
||||
vmlal.s16 q1, d15, d30
|
||||
|
||||
; cospi_16_64 * x6 - cospi_16_64 * x7;
|
||||
vmlsl.s16 q11, d14, d30
|
||||
vmlsl.s16 q0, d15, d30
|
||||
|
||||
; x6 = dct_const_round_shift(s6);
|
||||
vqrshrn.s32 d20, q13, #14 ; >> 14
|
||||
vqrshrn.s32 d21, q1, #14 ; >> 14
|
||||
|
||||
;x7 = dct_const_round_shift(s7);
|
||||
vqrshrn.s32 d12, q11, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q0, #14 ; >> 14
|
||||
|
||||
vdup.16 q5, r10 ; duplicate 0
|
||||
|
||||
vsub.s16 q9, q5, q9 ; output[1] = -x4;
|
||||
vsub.s16 q11, q5, q2 ; output[3] = -x2;
|
||||
vsub.s16 q13, q5, q6 ; output[5] = -x7;
|
||||
vsub.s16 q15, q5, q4 ; output[7] = -x1;
|
||||
MEND
|
||||
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void vp9_short_iht8x8_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride, int tx_type)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride
|
||||
; r3 int tx_type)
|
||||
; This function will only handle tx_type of 1,2,3.
|
||||
|vp9_short_iht8x8_add_neon| PROC
|
||||
|
||||
; load the inputs into d16-d19
|
||||
vld1.s16 {q8,q9}, [r0]!
|
||||
vld1.s16 {q10,q11}, [r0]!
|
||||
vld1.s16 {q12,q13}, [r0]!
|
||||
vld1.s16 {q14,q15}, [r0]!
|
||||
|
||||
push {r0-r10}
|
||||
|
||||
; transpose the input data
|
||||
TRANSPOSE8X8
|
||||
|
||||
; decide the type of transform
|
||||
cmp r3, #2
|
||||
beq idct_iadst
|
||||
cmp r3, #3
|
||||
beq iadst_iadst
|
||||
|
||||
iadst_idct
|
||||
; generate IDCT constants
|
||||
GENERATE_IDCT_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IDCT8x8_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE8X8
|
||||
|
||||
; generate IADST constants
|
||||
GENERATE_IADST_CONSTANTS
|
||||
|
||||
; then transform columns
|
||||
IADST8X8_1D
|
||||
|
||||
b end_vp9_short_iht8x8_add_neon
|
||||
|
||||
idct_iadst
|
||||
; generate IADST constants
|
||||
GENERATE_IADST_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IADST8X8_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE8X8
|
||||
|
||||
; generate IDCT constants
|
||||
GENERATE_IDCT_CONSTANTS
|
||||
|
||||
; then transform columns
|
||||
IDCT8x8_1D
|
||||
|
||||
b end_vp9_short_iht8x8_add_neon
|
||||
|
||||
iadst_iadst
|
||||
; generate IADST constants
|
||||
GENERATE_IADST_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IADST8X8_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE8X8
|
||||
|
||||
; then transform columns
|
||||
IADST8X8_1D
|
||||
|
||||
end_vp9_short_iht8x8_add_neon
|
||||
pop {r0-r10}
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 5)
|
||||
vrshr.s16 q8, q8, #5
|
||||
vrshr.s16 q9, q9, #5
|
||||
vrshr.s16 q10, q10, #5
|
||||
vrshr.s16 q11, q11, #5
|
||||
vrshr.s16 q12, q12, #5
|
||||
vrshr.s16 q13, q13, #5
|
||||
vrshr.s16 q14, q14, #5
|
||||
vrshr.s16 q15, q15, #5
|
||||
|
||||
; save dest pointer
|
||||
mov r0, r1
|
||||
|
||||
; load destination data
|
||||
vld1.64 {d0}, [r1], r2
|
||||
vld1.64 {d1}, [r1], r2
|
||||
vld1.64 {d2}, [r1], r2
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r2
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r2
|
||||
vld1.64 {d7}, [r1]
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]
|
||||
vaddw.u8 q8, q8, d0
|
||||
vaddw.u8 q9, q9, d1
|
||||
vaddw.u8 q10, q10, d2
|
||||
vaddw.u8 q11, q11, d3
|
||||
vaddw.u8 q12, q12, d4
|
||||
vaddw.u8 q13, q13, d5
|
||||
vaddw.u8 q14, q14, d6
|
||||
vaddw.u8 q15, q15, d7
|
||||
|
||||
; clip_pixel
|
||||
vqmovun.s16 d0, q8
|
||||
vqmovun.s16 d1, q9
|
||||
vqmovun.s16 d2, q10
|
||||
vqmovun.s16 d3, q11
|
||||
vqmovun.s16 d4, q12
|
||||
vqmovun.s16 d5, q13
|
||||
vqmovun.s16 d6, q14
|
||||
vqmovun.s16 d7, q15
|
||||
|
||||
; store the data
|
||||
vst1.64 {d0}, [r0], r2
|
||||
vst1.64 {d1}, [r0], r2
|
||||
vst1.64 {d2}, [r0], r2
|
||||
vst1.64 {d3}, [r0], r2
|
||||
vst1.64 {d4}, [r0], r2
|
||||
vst1.64 {d5}, [r0], r2
|
||||
vst1.64 {d6}, [r0], r2
|
||||
vst1.64 {d7}, [r0], r2
|
||||
bx lr
|
||||
ENDP ; |vp9_short_iht8x8_add_neon|
|
||||
|
||||
END
|
||||
@@ -10,9 +10,10 @@
|
||||
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vp9_rtcd.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_onyxc_int.h"
|
||||
|
||||
void vp9_machine_specific_config(VP9_COMMON *ctx) {
|
||||
void vp9_machine_specific_config(VP9_COMMON *cm) {
|
||||
(void)cm;
|
||||
vp9_rtcd();
|
||||
}
|
||||
|
||||
@@ -31,40 +31,30 @@ void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi) {
|
||||
vpx_memset(&mi[i * stride], 0, sizeof(MODE_INFO));
|
||||
}
|
||||
|
||||
void vp9_update_mode_info_in_image(VP9_COMMON *cm, MODE_INFO *mi) {
|
||||
int i, j;
|
||||
|
||||
// For each in image mode_info element set the in image flag to 1
|
||||
for (i = 0; i < cm->mi_rows; i++) {
|
||||
MODE_INFO *ptr = mi;
|
||||
for (j = 0; j < cm->mi_cols; j++) {
|
||||
ptr->mbmi.mb_in_image = 1;
|
||||
ptr++; // Next element in the row
|
||||
}
|
||||
|
||||
// Step over border element at start of next row
|
||||
mi += cm->mode_info_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_free_frame_buffers(VP9_COMMON *oci) {
|
||||
void vp9_free_frame_buffers(VP9_COMMON *cm) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++)
|
||||
vp9_free_frame_buffer(&oci->yv12_fb[i]);
|
||||
vp9_free_frame_buffer(&cm->yv12_fb[i]);
|
||||
|
||||
vp9_free_frame_buffer(&oci->post_proc_buffer);
|
||||
vp9_free_frame_buffer(&cm->post_proc_buffer);
|
||||
|
||||
vpx_free(oci->mip);
|
||||
vpx_free(oci->prev_mip);
|
||||
vpx_free(oci->above_seg_context);
|
||||
vpx_free(cm->mip);
|
||||
vpx_free(cm->prev_mip);
|
||||
vpx_free(cm->above_seg_context);
|
||||
vpx_free(cm->last_frame_seg_map);
|
||||
vpx_free(cm->mi_grid_base);
|
||||
vpx_free(cm->prev_mi_grid_base);
|
||||
|
||||
vpx_free(oci->above_context[0]);
|
||||
vpx_free(cm->above_context[0]);
|
||||
for (i = 0; i < MAX_MB_PLANE; i++)
|
||||
oci->above_context[i] = 0;
|
||||
oci->mip = NULL;
|
||||
oci->prev_mip = NULL;
|
||||
oci->above_seg_context = NULL;
|
||||
cm->above_context[i] = 0;
|
||||
cm->mip = NULL;
|
||||
cm->prev_mip = NULL;
|
||||
cm->above_seg_context = NULL;
|
||||
cm->last_frame_seg_map = NULL;
|
||||
cm->mi_grid_base = NULL;
|
||||
cm->prev_mi_grid_base = NULL;
|
||||
}
|
||||
|
||||
static void set_mb_mi(VP9_COMMON *cm, int aligned_width, int aligned_height) {
|
||||
@@ -72,112 +62,120 @@ static void set_mb_mi(VP9_COMMON *cm, int aligned_width, int aligned_height) {
|
||||
cm->mb_rows = (aligned_height + 8) >> 4;
|
||||
cm->MBs = cm->mb_rows * cm->mb_cols;
|
||||
|
||||
cm->mi_cols = aligned_width >> LOG2_MI_SIZE;
|
||||
cm->mi_rows = aligned_height >> LOG2_MI_SIZE;
|
||||
cm->mi_cols = aligned_width >> MI_SIZE_LOG2;
|
||||
cm->mi_rows = aligned_height >> MI_SIZE_LOG2;
|
||||
cm->mode_info_stride = cm->mi_cols + MI_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
static void setup_mi(VP9_COMMON *cm) {
|
||||
cm->mi = cm->mip + cm->mode_info_stride + 1;
|
||||
cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;
|
||||
cm->mi_grid_visible = cm->mi_grid_base + cm->mode_info_stride + 1;
|
||||
cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1;
|
||||
|
||||
vpx_memset(cm->mip, 0,
|
||||
cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO));
|
||||
|
||||
vp9_update_mode_info_border(cm, cm->mip);
|
||||
vp9_update_mode_info_in_image(cm, cm->mi);
|
||||
vpx_memset(cm->mi_grid_base, 0,
|
||||
cm->mode_info_stride * (cm->mi_rows + 1) *
|
||||
sizeof(*cm->mi_grid_base));
|
||||
|
||||
vp9_update_mode_info_border(cm, cm->mip);
|
||||
vp9_update_mode_info_border(cm, cm->prev_mip);
|
||||
vp9_update_mode_info_in_image(cm, cm->prev_mi);
|
||||
}
|
||||
|
||||
int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
|
||||
int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
|
||||
int i, mi_cols;
|
||||
|
||||
const int aligned_width = ALIGN_POWER_OF_TWO(width, LOG2_MI_SIZE);
|
||||
const int aligned_height = ALIGN_POWER_OF_TWO(height, LOG2_MI_SIZE);
|
||||
const int ss_x = oci->subsampling_x;
|
||||
const int ss_y = oci->subsampling_y;
|
||||
const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
|
||||
const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
|
||||
const int ss_x = cm->subsampling_x;
|
||||
const int ss_y = cm->subsampling_y;
|
||||
int mi_size;
|
||||
|
||||
vp9_free_frame_buffers(oci);
|
||||
vp9_free_frame_buffers(cm);
|
||||
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++) {
|
||||
oci->fb_idx_ref_cnt[i] = 0;
|
||||
if (vp9_alloc_frame_buffer(&oci->yv12_fb[i], width, height, ss_x, ss_y,
|
||||
cm->fb_idx_ref_cnt[i] = 0;
|
||||
if (vp9_alloc_frame_buffer(&cm->yv12_fb[i], width, height, ss_x, ss_y,
|
||||
VP9BORDERINPIXELS) < 0)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
oci->new_fb_idx = NUM_YV12_BUFFERS - 1;
|
||||
oci->fb_idx_ref_cnt[oci->new_fb_idx] = 1;
|
||||
cm->new_fb_idx = NUM_YV12_BUFFERS - 1;
|
||||
cm->fb_idx_ref_cnt[cm->new_fb_idx] = 1;
|
||||
|
||||
for (i = 0; i < ALLOWED_REFS_PER_FRAME; i++)
|
||||
oci->active_ref_idx[i] = i;
|
||||
cm->active_ref_idx[i] = i;
|
||||
|
||||
for (i = 0; i < NUM_REF_FRAMES; i++) {
|
||||
oci->ref_frame_map[i] = i;
|
||||
oci->fb_idx_ref_cnt[i] = 1;
|
||||
cm->ref_frame_map[i] = i;
|
||||
cm->fb_idx_ref_cnt[i] = 1;
|
||||
}
|
||||
|
||||
if (vp9_alloc_frame_buffer(&oci->post_proc_buffer, width, height, ss_x, ss_y,
|
||||
if (vp9_alloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
|
||||
VP9BORDERINPIXELS) < 0)
|
||||
goto fail;
|
||||
|
||||
set_mb_mi(oci, aligned_width, aligned_height);
|
||||
set_mb_mi(cm, aligned_width, aligned_height);
|
||||
|
||||
// Allocation
|
||||
mi_size = oci->mode_info_stride * (oci->mi_rows + MI_BLOCK_SIZE);
|
||||
mi_size = cm->mode_info_stride * (cm->mi_rows + MI_BLOCK_SIZE);
|
||||
|
||||
oci->mip = vpx_calloc(mi_size, sizeof(MODE_INFO));
|
||||
if (!oci->mip)
|
||||
cm->mip = vpx_calloc(mi_size, sizeof(MODE_INFO));
|
||||
if (!cm->mip)
|
||||
goto fail;
|
||||
|
||||
oci->prev_mip = vpx_calloc(mi_size, sizeof(MODE_INFO));
|
||||
if (!oci->prev_mip)
|
||||
cm->prev_mip = vpx_calloc(mi_size, sizeof(MODE_INFO));
|
||||
if (!cm->prev_mip)
|
||||
goto fail;
|
||||
|
||||
setup_mi(oci);
|
||||
cm->mi_grid_base = vpx_calloc(mi_size, sizeof(*cm->mi_grid_base));
|
||||
if (!cm->mi_grid_base)
|
||||
goto fail;
|
||||
|
||||
cm->prev_mi_grid_base = vpx_calloc(mi_size, sizeof(*cm->prev_mi_grid_base));
|
||||
if (!cm->prev_mi_grid_base)
|
||||
goto fail;
|
||||
|
||||
setup_mi(cm);
|
||||
|
||||
// FIXME(jkoleszar): allocate subsampled arrays for U/V once subsampling
|
||||
// information is exposed at this level
|
||||
mi_cols = mi_cols_aligned_to_sb(oci->mi_cols);
|
||||
mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
|
||||
|
||||
// 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
|
||||
// block where mi unit size is 8x8.
|
||||
# if CONFIG_ALPHA
|
||||
oci->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * 8 * mi_cols, 1);
|
||||
#else
|
||||
oci->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * 6 * mi_cols, 1);
|
||||
#endif
|
||||
if (!oci->above_context[0])
|
||||
cm->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * MAX_MB_PLANE *
|
||||
(2 * mi_cols), 1);
|
||||
if (!cm->above_context[0])
|
||||
goto fail;
|
||||
|
||||
oci->above_seg_context = vpx_calloc(sizeof(PARTITION_CONTEXT) * mi_cols, 1);
|
||||
if (!oci->above_seg_context)
|
||||
cm->above_seg_context = vpx_calloc(sizeof(PARTITION_CONTEXT) * mi_cols, 1);
|
||||
if (!cm->above_seg_context)
|
||||
goto fail;
|
||||
|
||||
// Create the segmentation map structure and set to 0.
|
||||
cm->last_frame_seg_map = vpx_calloc(cm->mi_rows * cm->mi_cols, 1);
|
||||
if (!cm->last_frame_seg_map)
|
||||
goto fail;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
vp9_free_frame_buffers(oci);
|
||||
vp9_free_frame_buffers(cm);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void vp9_create_common(VP9_COMMON *oci) {
|
||||
vp9_machine_specific_config(oci);
|
||||
void vp9_create_common(VP9_COMMON *cm) {
|
||||
vp9_machine_specific_config(cm);
|
||||
|
||||
vp9_init_mbmode_probs(oci);
|
||||
|
||||
oci->tx_mode = ONLY_4X4;
|
||||
oci->comp_pred_mode = HYBRID_PREDICTION;
|
||||
|
||||
// Initialize reference frame sign bias structure to defaults
|
||||
vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
|
||||
cm->tx_mode = ONLY_4X4;
|
||||
cm->comp_pred_mode = HYBRID_PREDICTION;
|
||||
}
|
||||
|
||||
void vp9_remove_common(VP9_COMMON *oci) {
|
||||
vp9_free_frame_buffers(oci);
|
||||
void vp9_remove_common(VP9_COMMON *cm) {
|
||||
vp9_free_frame_buffers(cm);
|
||||
}
|
||||
|
||||
void vp9_initialize_common() {
|
||||
@@ -188,8 +186,8 @@ void vp9_initialize_common() {
|
||||
|
||||
void vp9_update_frame_size(VP9_COMMON *cm) {
|
||||
int i, mi_cols;
|
||||
const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, LOG2_MI_SIZE);
|
||||
const int aligned_height = ALIGN_POWER_OF_TWO(cm->height, LOG2_MI_SIZE);
|
||||
const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, MI_SIZE_LOG2);
|
||||
const int aligned_height = ALIGN_POWER_OF_TWO(cm->height, MI_SIZE_LOG2);
|
||||
|
||||
set_mb_mi(cm, aligned_width, aligned_height);
|
||||
setup_mi(cm);
|
||||
@@ -198,4 +196,8 @@ void vp9_update_frame_size(VP9_COMMON *cm) {
|
||||
for (i = 1; i < MAX_MB_PLANE; i++)
|
||||
cm->above_context[i] =
|
||||
cm->above_context[0] + i * sizeof(ENTROPY_CONTEXT) * 2 * mi_cols;
|
||||
|
||||
// Initialize the previous frame segment map to 0.
|
||||
if (cm->last_frame_seg_map)
|
||||
vpx_memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
|
||||
}
|
||||
|
||||
@@ -16,14 +16,13 @@
|
||||
|
||||
void vp9_initialize_common();
|
||||
|
||||
void vp9_update_mode_info_border(VP9_COMMON *cpi, MODE_INFO *mi);
|
||||
void vp9_update_mode_info_in_image(VP9_COMMON *cpi, MODE_INFO *mi);
|
||||
void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi);
|
||||
|
||||
void vp9_create_common(VP9_COMMON *oci);
|
||||
void vp9_remove_common(VP9_COMMON *oci);
|
||||
void vp9_create_common(VP9_COMMON *cm);
|
||||
void vp9_remove_common(VP9_COMMON *cm);
|
||||
|
||||
int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height);
|
||||
void vp9_free_frame_buffers(VP9_COMMON *oci);
|
||||
int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height);
|
||||
void vp9_free_frame_buffers(VP9_COMMON *cm);
|
||||
|
||||
|
||||
void vp9_update_frame_size(VP9_COMMON *cm);
|
||||
|
||||
@@ -19,9 +19,9 @@
|
||||
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vp9/common/vp9_common_data.h"
|
||||
#include "vp9/common/vp9_convolve.h"
|
||||
#include "vp9/common/vp9_enums.h"
|
||||
#include "vp9/common/vp9_mv.h"
|
||||
#include "vp9/common/vp9_scale.h"
|
||||
#include "vp9/common/vp9_seg_common.h"
|
||||
#include "vp9/common/vp9_treecoder.h"
|
||||
|
||||
@@ -71,7 +71,7 @@ typedef enum {
|
||||
D135_PRED, // Directional 135 deg = 180 - 45
|
||||
D117_PRED, // Directional 117 deg = 180 - 63
|
||||
D153_PRED, // Directional 153 deg = 180 - 27
|
||||
D27_PRED, // Directional 27 deg = round(arctan(1/2) * 180/pi)
|
||||
D207_PRED, // Directional 207 deg = 180 + 27
|
||||
D63_PRED, // Directional 63 deg = round(arctan(2/1) * 180/pi)
|
||||
TM_PRED, // True-motion
|
||||
NEARESTMV,
|
||||
@@ -89,9 +89,9 @@ static INLINE int is_inter_mode(MB_PREDICTION_MODE mode) {
|
||||
return mode >= NEARESTMV && mode <= NEWMV;
|
||||
}
|
||||
|
||||
#define VP9_INTRA_MODES (TM_PRED + 1)
|
||||
#define INTRA_MODES (TM_PRED + 1)
|
||||
|
||||
#define VP9_INTER_MODES (1 + NEWMV - NEARESTMV)
|
||||
#define INTER_MODES (1 + NEWMV - NEARESTMV)
|
||||
|
||||
static INLINE int inter_mode_offset(MB_PREDICTION_MODE mode) {
|
||||
return (mode - NEARESTMV);
|
||||
@@ -115,45 +115,41 @@ typedef enum {
|
||||
MAX_REF_FRAMES = 4
|
||||
} MV_REFERENCE_FRAME;
|
||||
|
||||
static INLINE int b_width_log2(BLOCK_SIZE_TYPE sb_type) {
|
||||
static INLINE int b_width_log2(BLOCK_SIZE sb_type) {
|
||||
return b_width_log2_lookup[sb_type];
|
||||
}
|
||||
static INLINE int b_height_log2(BLOCK_SIZE_TYPE sb_type) {
|
||||
static INLINE int b_height_log2(BLOCK_SIZE sb_type) {
|
||||
return b_height_log2_lookup[sb_type];
|
||||
}
|
||||
|
||||
static INLINE int mi_width_log2(BLOCK_SIZE_TYPE sb_type) {
|
||||
static INLINE int mi_width_log2(BLOCK_SIZE sb_type) {
|
||||
return mi_width_log2_lookup[sb_type];
|
||||
}
|
||||
|
||||
static INLINE int mi_height_log2(BLOCK_SIZE_TYPE sb_type) {
|
||||
static INLINE int mi_height_log2(BLOCK_SIZE sb_type) {
|
||||
return mi_height_log2_lookup[sb_type];
|
||||
}
|
||||
|
||||
// This structure now relates to 8x8 block regions.
|
||||
typedef struct {
|
||||
MB_PREDICTION_MODE mode, uv_mode;
|
||||
MV_REFERENCE_FRAME ref_frame[2];
|
||||
TX_SIZE txfm_size;
|
||||
int_mv mv[2]; // for each reference frame used
|
||||
TX_SIZE tx_size;
|
||||
int_mv mv[2]; // for each reference frame used
|
||||
int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
|
||||
int_mv best_mv, best_second_mv;
|
||||
int_mv best_mv[2];
|
||||
|
||||
uint8_t mb_mode_context[MAX_REF_FRAMES];
|
||||
uint8_t mode_context[MAX_REF_FRAMES];
|
||||
|
||||
unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
|
||||
unsigned char segment_id; // Segment id for current frame
|
||||
unsigned char skip_coeff; // 0=need to decode coeffs, 1=no coefficients
|
||||
unsigned char segment_id; // Segment id for this block.
|
||||
|
||||
// Flags used for prediction status of various bistream signals
|
||||
// Flags used for prediction status of various bit-stream signals
|
||||
unsigned char seg_id_predicted;
|
||||
|
||||
// Indicates if the mb is part of the image (1) vs border (0)
|
||||
// This can be useful in determining whether the MB provides
|
||||
// a valid predictor
|
||||
unsigned char mb_in_image;
|
||||
|
||||
INTERPOLATIONFILTERTYPE interp_filter;
|
||||
|
||||
BLOCK_SIZE_TYPE sb_type;
|
||||
BLOCK_SIZE sb_type;
|
||||
} MB_MODE_INFO;
|
||||
|
||||
typedef struct {
|
||||
@@ -161,36 +157,19 @@ typedef struct {
|
||||
union b_mode_info bmi[4];
|
||||
} MODE_INFO;
|
||||
|
||||
static int is_inter_block(const MB_MODE_INFO *mbmi) {
|
||||
static INLINE int is_inter_block(const MB_MODE_INFO *mbmi) {
|
||||
return mbmi->ref_frame[0] > INTRA_FRAME;
|
||||
}
|
||||
|
||||
static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) {
|
||||
return mbmi->ref_frame[1] > INTRA_FRAME;
|
||||
}
|
||||
|
||||
enum mv_precision {
|
||||
MV_PRECISION_Q3,
|
||||
MV_PRECISION_Q4
|
||||
};
|
||||
|
||||
#define VP9_REF_SCALE_SHIFT 14
|
||||
#define VP9_REF_NO_SCALE (1 << VP9_REF_SCALE_SHIFT)
|
||||
|
||||
struct scale_factors {
|
||||
int x_scale_fp; // horizontal fixed point scale factor
|
||||
int y_scale_fp; // vertical fixed point scale factor
|
||||
int x_offset_q4;
|
||||
int x_step_q4;
|
||||
int y_offset_q4;
|
||||
int y_step_q4;
|
||||
|
||||
int (*scale_value_x)(int val, const struct scale_factors *scale);
|
||||
int (*scale_value_y)(int val, const struct scale_factors *scale);
|
||||
void (*set_scaled_offsets)(struct scale_factors *scale, int row, int col);
|
||||
MV32 (*scale_mv_q3_to_q4)(const MV *mv, const struct scale_factors *scale);
|
||||
MV32 (*scale_mv_q4)(const MV *mv, const struct scale_factors *scale);
|
||||
|
||||
convolve_fn_t predict[2][2][2]; // horiz, vert, avg
|
||||
};
|
||||
|
||||
#if CONFIG_ALPHA
|
||||
enum { MAX_MB_PLANE = 4 };
|
||||
#else
|
||||
@@ -216,45 +195,27 @@ struct macroblockd_plane {
|
||||
ENTROPY_CONTEXT *left_context;
|
||||
};
|
||||
|
||||
#define BLOCK_OFFSET(x, i, n) ((x) + (i) * (n))
|
||||
|
||||
#define MAX_REF_LF_DELTAS 4
|
||||
#define MAX_MODE_LF_DELTAS 2
|
||||
|
||||
struct loopfilter {
|
||||
int filter_level;
|
||||
|
||||
int sharpness_level;
|
||||
int last_sharpness_level;
|
||||
|
||||
uint8_t mode_ref_delta_enabled;
|
||||
uint8_t mode_ref_delta_update;
|
||||
|
||||
// 0 = Intra, Last, GF, ARF
|
||||
signed char ref_deltas[MAX_REF_LF_DELTAS];
|
||||
signed char last_ref_deltas[MAX_REF_LF_DELTAS];
|
||||
|
||||
// 0 = ZERO_MV, MV
|
||||
signed char mode_deltas[MAX_MODE_LF_DELTAS];
|
||||
signed char last_mode_deltas[MAX_MODE_LF_DELTAS];
|
||||
};
|
||||
#define BLOCK_OFFSET(x, i) ((x) + (i) * 16)
|
||||
|
||||
typedef struct macroblockd {
|
||||
struct macroblockd_plane plane[MAX_MB_PLANE];
|
||||
|
||||
struct scale_factors scale_factor[2];
|
||||
|
||||
MODE_INFO *prev_mode_info_context;
|
||||
MODE_INFO *mode_info_context;
|
||||
MODE_INFO *last_mi;
|
||||
MODE_INFO *this_mi;
|
||||
int mode_info_stride;
|
||||
|
||||
MODE_INFO *mic_stream_ptr;
|
||||
|
||||
// A NULL indicates that the 8x8 is not part of the image
|
||||
MODE_INFO **mi_8x8;
|
||||
MODE_INFO **prev_mi_8x8;
|
||||
|
||||
int up_available;
|
||||
int left_available;
|
||||
int right_available;
|
||||
|
||||
struct segmentation seg;
|
||||
struct loopfilter lf;
|
||||
|
||||
// partition contexts
|
||||
PARTITION_CONTEXT *above_seg_context;
|
||||
PARTITION_CONTEXT *left_seg_context;
|
||||
@@ -283,10 +244,9 @@ typedef struct macroblockd {
|
||||
unsigned char ab_index; // index of 4x4 block inside the 8x8 block
|
||||
|
||||
int q_index;
|
||||
|
||||
} MACROBLOCKD;
|
||||
|
||||
static INLINE unsigned char *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) {
|
||||
static INLINE uint8_t *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE subsize) {
|
||||
switch (subsize) {
|
||||
case BLOCK_64X64:
|
||||
case BLOCK_64X32:
|
||||
@@ -311,9 +271,8 @@ static INLINE unsigned char *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsi
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void update_partition_context(MACROBLOCKD *xd,
|
||||
BLOCK_SIZE_TYPE sb_type,
|
||||
BLOCK_SIZE_TYPE sb_size) {
|
||||
static INLINE void update_partition_context(MACROBLOCKD *xd, BLOCK_SIZE sb_type,
|
||||
BLOCK_SIZE sb_size) {
|
||||
const int bsl = b_width_log2(sb_size), bs = (1 << bsl) / 2;
|
||||
const int bwl = b_width_log2(sb_type);
|
||||
const int bhl = b_height_log2(sb_type);
|
||||
@@ -331,8 +290,7 @@ static INLINE void update_partition_context(MACROBLOCKD *xd,
|
||||
vpx_memset(xd->left_seg_context, pcvalue[bhl == bsl], bs);
|
||||
}
|
||||
|
||||
static INLINE int partition_plane_context(MACROBLOCKD *xd,
|
||||
BLOCK_SIZE_TYPE sb_type) {
|
||||
static INLINE int partition_plane_context(MACROBLOCKD *xd, BLOCK_SIZE sb_type) {
|
||||
int bsl = mi_width_log2(sb_type), bs = 1 << bsl;
|
||||
int above = 0, left = 0, i;
|
||||
int boffset = mi_width_log2(BLOCK_64X64) - bsl;
|
||||
@@ -352,10 +310,9 @@ static INLINE int partition_plane_context(MACROBLOCKD *xd,
|
||||
return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
|
||||
}
|
||||
|
||||
static BLOCK_SIZE_TYPE get_subsize(BLOCK_SIZE_TYPE bsize,
|
||||
PARTITION_TYPE partition) {
|
||||
BLOCK_SIZE_TYPE subsize = subsize_lookup[partition][bsize];
|
||||
assert(subsize != BLOCK_SIZE_TYPES);
|
||||
static BLOCK_SIZE get_subsize(BLOCK_SIZE bsize, PARTITION_TYPE partition) {
|
||||
const BLOCK_SIZE subsize = subsize_lookup[partition][bsize];
|
||||
assert(subsize < BLOCK_SIZES);
|
||||
return subsize;
|
||||
}
|
||||
|
||||
@@ -363,7 +320,7 @@ extern const TX_TYPE mode2txfm_map[MB_MODE_COUNT];
|
||||
|
||||
static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type,
|
||||
const MACROBLOCKD *xd, int ib) {
|
||||
const MODE_INFO *const mi = xd->mode_info_context;
|
||||
const MODE_INFO *const mi = xd->this_mi;
|
||||
const MB_MODE_INFO *const mbmi = &mi->mbmi;
|
||||
|
||||
if (plane_type != PLANE_TYPE_Y_WITH_DC ||
|
||||
@@ -378,13 +335,13 @@ static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type,
|
||||
static INLINE TX_TYPE get_tx_type_8x8(PLANE_TYPE plane_type,
|
||||
const MACROBLOCKD *xd) {
|
||||
return plane_type == PLANE_TYPE_Y_WITH_DC ?
|
||||
mode2txfm_map[xd->mode_info_context->mbmi.mode] : DCT_DCT;
|
||||
mode2txfm_map[xd->this_mi->mbmi.mode] : DCT_DCT;
|
||||
}
|
||||
|
||||
static INLINE TX_TYPE get_tx_type_16x16(PLANE_TYPE plane_type,
|
||||
const MACROBLOCKD *xd) {
|
||||
return plane_type == PLANE_TYPE_Y_WITH_DC ?
|
||||
mode2txfm_map[xd->mode_info_context->mbmi.mode] : DCT_DCT;
|
||||
mode2txfm_map[xd->this_mi->mbmi.mode] : DCT_DCT;
|
||||
}
|
||||
|
||||
static void setup_block_dptrs(MACROBLOCKD *xd, int ss_x, int ss_y) {
|
||||
@@ -404,259 +361,147 @@ static void setup_block_dptrs(MACROBLOCKD *xd, int ss_x, int ss_y) {
|
||||
|
||||
|
||||
static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) {
|
||||
return MIN(mbmi->txfm_size, max_uv_txsize_lookup[mbmi->sb_type]);
|
||||
return MIN(mbmi->tx_size, max_uv_txsize_lookup[mbmi->sb_type]);
|
||||
}
|
||||
|
||||
struct plane_block_idx {
|
||||
int plane;
|
||||
int block;
|
||||
};
|
||||
|
||||
// TODO(jkoleszar): returning a struct so it can be used in a const context,
|
||||
// expect to refactor this further later.
|
||||
static INLINE struct plane_block_idx plane_block_idx(int y_blocks,
|
||||
int b_idx) {
|
||||
const int v_offset = y_blocks * 5 / 4;
|
||||
struct plane_block_idx res;
|
||||
|
||||
if (b_idx < y_blocks) {
|
||||
res.plane = 0;
|
||||
res.block = b_idx;
|
||||
} else if (b_idx < v_offset) {
|
||||
res.plane = 1;
|
||||
res.block = b_idx - y_blocks;
|
||||
} else {
|
||||
assert(b_idx < y_blocks * 3 / 2);
|
||||
res.plane = 2;
|
||||
res.block = b_idx - v_offset;
|
||||
}
|
||||
return res;
|
||||
static BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize,
|
||||
const struct macroblockd_plane *pd) {
|
||||
BLOCK_SIZE bs = ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y];
|
||||
assert(bs < BLOCK_SIZES);
|
||||
return bs;
|
||||
}
|
||||
|
||||
static INLINE int plane_block_width(BLOCK_SIZE_TYPE bsize,
|
||||
static INLINE int plane_block_width(BLOCK_SIZE bsize,
|
||||
const struct macroblockd_plane* plane) {
|
||||
return 4 << (b_width_log2(bsize) - plane->subsampling_x);
|
||||
}
|
||||
|
||||
static INLINE int plane_block_height(BLOCK_SIZE_TYPE bsize,
|
||||
static INLINE int plane_block_height(BLOCK_SIZE bsize,
|
||||
const struct macroblockd_plane* plane) {
|
||||
return 4 << (b_height_log2(bsize) - plane->subsampling_y);
|
||||
}
|
||||
|
||||
static INLINE int plane_block_width_log2by4(
|
||||
BLOCK_SIZE_TYPE bsize, const struct macroblockd_plane* plane) {
|
||||
return (b_width_log2(bsize) - plane->subsampling_x);
|
||||
}
|
||||
|
||||
static INLINE int plane_block_height_log2by4(
|
||||
BLOCK_SIZE_TYPE bsize, const struct macroblockd_plane* plane) {
|
||||
return (b_height_log2(bsize) - plane->subsampling_y);
|
||||
}
|
||||
|
||||
typedef void (*foreach_transformed_block_visitor)(int plane, int block,
|
||||
BLOCK_SIZE_TYPE bsize,
|
||||
int ss_txfrm_size,
|
||||
BLOCK_SIZE plane_bsize,
|
||||
TX_SIZE tx_size,
|
||||
void *arg);
|
||||
|
||||
static INLINE void foreach_transformed_block_in_plane(
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, int plane,
|
||||
const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
|
||||
foreach_transformed_block_visitor visit, void *arg) {
|
||||
const int bw = b_width_log2(bsize), bh = b_height_log2(bsize);
|
||||
|
||||
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||
const MB_MODE_INFO* mbmi = &xd->this_mi->mbmi;
|
||||
// block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
|
||||
// 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
|
||||
// transform size varies per plane, look it up in a common way.
|
||||
const MB_MODE_INFO* mbmi = &xd->mode_info_context->mbmi;
|
||||
const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi)
|
||||
: mbmi->txfm_size;
|
||||
const int block_size_b = bw + bh;
|
||||
const int txfrm_size_b = tx_size * 2;
|
||||
|
||||
// subsampled size of the block
|
||||
const int ss_sum = xd->plane[plane].subsampling_x
|
||||
+ xd->plane[plane].subsampling_y;
|
||||
const int ss_block_size = block_size_b - ss_sum;
|
||||
|
||||
const int step = 1 << txfrm_size_b;
|
||||
|
||||
: mbmi->tx_size;
|
||||
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
|
||||
const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
|
||||
const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
|
||||
const int step = 1 << (tx_size << 1);
|
||||
int i;
|
||||
|
||||
assert(txfrm_size_b <= block_size_b);
|
||||
assert(txfrm_size_b <= ss_block_size);
|
||||
|
||||
// If mb_to_right_edge is < 0 we are in a situation in which
|
||||
// the current block size extends into the UMV and we won't
|
||||
// visit the sub blocks that are wholly within the UMV.
|
||||
if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) {
|
||||
int r, c;
|
||||
const int sw = bw - xd->plane[plane].subsampling_x;
|
||||
const int sh = bh - xd->plane[plane].subsampling_y;
|
||||
int max_blocks_wide = 1 << sw;
|
||||
int max_blocks_high = 1 << sh;
|
||||
|
||||
int max_blocks_wide = num_4x4_w;
|
||||
int max_blocks_high = num_4x4_h;
|
||||
|
||||
// xd->mb_to_right_edge is in units of pixels * 8. This converts
|
||||
// it to 4x4 block sizes.
|
||||
if (xd->mb_to_right_edge < 0)
|
||||
max_blocks_wide +=
|
||||
(xd->mb_to_right_edge >> (5 + xd->plane[plane].subsampling_x));
|
||||
max_blocks_wide += (xd->mb_to_right_edge >> (5 + pd->subsampling_x));
|
||||
|
||||
if (xd->mb_to_bottom_edge < 0)
|
||||
max_blocks_high +=
|
||||
(xd->mb_to_bottom_edge >> (5 + xd->plane[plane].subsampling_y));
|
||||
max_blocks_high += (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
|
||||
|
||||
i = 0;
|
||||
// Unlike the normal case - in here we have to keep track of the
|
||||
// row and column of the blocks we use so that we know if we are in
|
||||
// the unrestricted motion border.
|
||||
for (r = 0; r < (1 << sh); r += (1 << tx_size)) {
|
||||
for (c = 0; c < (1 << sw); c += (1 << tx_size)) {
|
||||
for (r = 0; r < num_4x4_h; r += (1 << tx_size)) {
|
||||
for (c = 0; c < num_4x4_w; c += (1 << tx_size)) {
|
||||
if (r < max_blocks_high && c < max_blocks_wide)
|
||||
visit(plane, i, bsize, txfrm_size_b, arg);
|
||||
visit(plane, i, plane_bsize, tx_size, arg);
|
||||
i += step;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < (1 << ss_block_size); i += step) {
|
||||
visit(plane, i, bsize, txfrm_size_b, arg);
|
||||
}
|
||||
for (i = 0; i < num_4x4_w * num_4x4_h; i += step)
|
||||
visit(plane, i, plane_bsize, tx_size, arg);
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void foreach_transformed_block(
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize,
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE bsize,
|
||||
foreach_transformed_block_visitor visit, void *arg) {
|
||||
int plane;
|
||||
|
||||
for (plane = 0; plane < MAX_MB_PLANE; plane++) {
|
||||
foreach_transformed_block_in_plane(xd, bsize, plane,
|
||||
visit, arg);
|
||||
}
|
||||
for (plane = 0; plane < MAX_MB_PLANE; plane++)
|
||||
foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
|
||||
}
|
||||
|
||||
static INLINE void foreach_transformed_block_uv(
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize,
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE bsize,
|
||||
foreach_transformed_block_visitor visit, void *arg) {
|
||||
int plane;
|
||||
|
||||
for (plane = 1; plane < MAX_MB_PLANE; plane++) {
|
||||
foreach_transformed_block_in_plane(xd, bsize, plane,
|
||||
visit, arg);
|
||||
}
|
||||
for (plane = 1; plane < MAX_MB_PLANE; plane++)
|
||||
foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
|
||||
}
|
||||
|
||||
// TODO(jkoleszar): In principle, pred_w, pred_h are unnecessary, as we could
|
||||
// calculate the subsampled BLOCK_SIZE_TYPE, but that type isn't defined for
|
||||
// sizes smaller than 16x16 yet.
|
||||
typedef void (*foreach_predicted_block_visitor)(int plane, int block,
|
||||
BLOCK_SIZE_TYPE bsize,
|
||||
int pred_w, int pred_h,
|
||||
void *arg);
|
||||
static INLINE void foreach_predicted_block_in_plane(
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, int plane,
|
||||
foreach_predicted_block_visitor visit, void *arg) {
|
||||
int i, x, y;
|
||||
|
||||
// block sizes in number of 4x4 blocks log 2 ("*_b")
|
||||
// 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
|
||||
// subsampled size of the block
|
||||
const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
|
||||
const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
|
||||
|
||||
// size of the predictor to use.
|
||||
int pred_w, pred_h;
|
||||
|
||||
if (xd->mode_info_context->mbmi.sb_type < BLOCK_8X8) {
|
||||
assert(bsize == BLOCK_8X8);
|
||||
pred_w = 0;
|
||||
pred_h = 0;
|
||||
} else {
|
||||
pred_w = bwl;
|
||||
pred_h = bhl;
|
||||
}
|
||||
assert(pred_w <= bwl);
|
||||
assert(pred_h <= bhl);
|
||||
|
||||
// visit each subblock in raster order
|
||||
i = 0;
|
||||
for (y = 0; y < 1 << bhl; y += 1 << pred_h) {
|
||||
for (x = 0; x < 1 << bwl; x += 1 << pred_w) {
|
||||
visit(plane, i, bsize, pred_w, pred_h, arg);
|
||||
i += 1 << pred_w;
|
||||
}
|
||||
i += (1 << (bwl + pred_h)) - (1 << bwl);
|
||||
}
|
||||
}
|
||||
static INLINE void foreach_predicted_block(
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize,
|
||||
foreach_predicted_block_visitor visit, void *arg) {
|
||||
int plane;
|
||||
|
||||
for (plane = 0; plane < MAX_MB_PLANE; plane++) {
|
||||
foreach_predicted_block_in_plane(xd, bsize, plane, visit, arg);
|
||||
}
|
||||
}
|
||||
static INLINE void foreach_predicted_block_uv(
|
||||
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize,
|
||||
foreach_predicted_block_visitor visit, void *arg) {
|
||||
int plane;
|
||||
|
||||
for (plane = 1; plane < MAX_MB_PLANE; plane++) {
|
||||
foreach_predicted_block_in_plane(xd, bsize, plane, visit, arg);
|
||||
}
|
||||
}
|
||||
static int raster_block_offset(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
|
||||
int plane, int block, int stride) {
|
||||
const int bw = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
|
||||
const int y = 4 * (block >> bw), x = 4 * (block & ((1 << bw) - 1));
|
||||
static int raster_block_offset(BLOCK_SIZE plane_bsize,
|
||||
int raster_block, int stride) {
|
||||
const int bw = b_width_log2(plane_bsize);
|
||||
const int y = 4 * (raster_block >> bw);
|
||||
const int x = 4 * (raster_block & ((1 << bw) - 1));
|
||||
return y * stride + x;
|
||||
}
|
||||
static int16_t* raster_block_offset_int16(MACROBLOCKD *xd,
|
||||
BLOCK_SIZE_TYPE bsize,
|
||||
int plane, int block, int16_t *base) {
|
||||
const int stride = plane_block_width(bsize, &xd->plane[plane]);
|
||||
return base + raster_block_offset(xd, bsize, plane, block, stride);
|
||||
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
|
||||
int raster_block, int16_t *base) {
|
||||
const int stride = 4 << b_width_log2(plane_bsize);
|
||||
return base + raster_block_offset(plane_bsize, raster_block, stride);
|
||||
}
|
||||
static uint8_t* raster_block_offset_uint8(MACROBLOCKD *xd,
|
||||
BLOCK_SIZE_TYPE bsize,
|
||||
int plane, int block,
|
||||
uint8_t *base, int stride) {
|
||||
return base + raster_block_offset(xd, bsize, plane, block, stride);
|
||||
static uint8_t* raster_block_offset_uint8(BLOCK_SIZE plane_bsize,
|
||||
int raster_block, uint8_t *base,
|
||||
int stride) {
|
||||
return base + raster_block_offset(plane_bsize, raster_block, stride);
|
||||
}
|
||||
|
||||
static int txfrm_block_to_raster_block(MACROBLOCKD *xd,
|
||||
BLOCK_SIZE_TYPE bsize,
|
||||
int plane, int block,
|
||||
int ss_txfrm_size) {
|
||||
const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
|
||||
const int txwl = ss_txfrm_size / 2;
|
||||
const int tx_cols_log2 = bwl - txwl;
|
||||
static int txfrm_block_to_raster_block(BLOCK_SIZE plane_bsize,
|
||||
TX_SIZE tx_size, int block) {
|
||||
const int bwl = b_width_log2(plane_bsize);
|
||||
const int tx_cols_log2 = bwl - tx_size;
|
||||
const int tx_cols = 1 << tx_cols_log2;
|
||||
const int raster_mb = block >> ss_txfrm_size;
|
||||
const int x = (raster_mb & (tx_cols - 1)) << (txwl);
|
||||
const int y = raster_mb >> tx_cols_log2 << (txwl);
|
||||
const int raster_mb = block >> (tx_size << 1);
|
||||
const int x = (raster_mb & (tx_cols - 1)) << tx_size;
|
||||
const int y = (raster_mb >> tx_cols_log2) << tx_size;
|
||||
return x + (y << bwl);
|
||||
}
|
||||
|
||||
static void txfrm_block_to_raster_xy(MACROBLOCKD *xd,
|
||||
BLOCK_SIZE_TYPE bsize,
|
||||
int plane, int block,
|
||||
int ss_txfrm_size,
|
||||
static void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize,
|
||||
TX_SIZE tx_size, int block,
|
||||
int *x, int *y) {
|
||||
const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
|
||||
const int txwl = ss_txfrm_size / 2;
|
||||
const int tx_cols_log2 = bwl - txwl;
|
||||
const int bwl = b_width_log2(plane_bsize);
|
||||
const int tx_cols_log2 = bwl - tx_size;
|
||||
const int tx_cols = 1 << tx_cols_log2;
|
||||
const int raster_mb = block >> ss_txfrm_size;
|
||||
*x = (raster_mb & (tx_cols - 1)) << (txwl);
|
||||
*y = raster_mb >> tx_cols_log2 << (txwl);
|
||||
const int raster_mb = block >> (tx_size << 1);
|
||||
*x = (raster_mb & (tx_cols - 1)) << tx_size;
|
||||
*y = (raster_mb >> tx_cols_log2) << tx_size;
|
||||
}
|
||||
|
||||
static void extend_for_intra(MACROBLOCKD* const xd, int plane, int block,
|
||||
BLOCK_SIZE_TYPE bsize, int ss_txfrm_size) {
|
||||
const int bw = plane_block_width(bsize, &xd->plane[plane]);
|
||||
const int bh = plane_block_height(bsize, &xd->plane[plane]);
|
||||
static void extend_for_intra(MACROBLOCKD* const xd, BLOCK_SIZE plane_bsize,
|
||||
int plane, int block, TX_SIZE tx_size) {
|
||||
struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||
uint8_t *const buf = pd->dst.buf;
|
||||
const int stride = pd->dst.stride;
|
||||
|
||||
int x, y;
|
||||
txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y);
|
||||
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
|
||||
x = x * 4 - 1;
|
||||
y = y * 4 - 1;
|
||||
// Copy a pixel into the umv if we are in a situation where the block size
|
||||
@@ -664,41 +509,38 @@ static void extend_for_intra(MACROBLOCKD* const xd, int plane, int block,
|
||||
// TODO(JBB): Should be able to do the full extend in place so we don't have
|
||||
// to do this multiple times.
|
||||
if (xd->mb_to_right_edge < 0) {
|
||||
int umv_border_start = bw
|
||||
+ (xd->mb_to_right_edge >> (3 + xd->plane[plane].subsampling_x));
|
||||
const int bw = 4 << b_width_log2(plane_bsize);
|
||||
const int umv_border_start = bw + (xd->mb_to_right_edge >>
|
||||
(3 + pd->subsampling_x));
|
||||
|
||||
if (x + bw > umv_border_start)
|
||||
vpx_memset(
|
||||
xd->plane[plane].dst.buf + y * xd->plane[plane].dst.stride
|
||||
+ umv_border_start,
|
||||
*(xd->plane[plane].dst.buf + y * xd->plane[plane].dst.stride
|
||||
+ umv_border_start - 1),
|
||||
bw);
|
||||
vpx_memset(&buf[y * stride + umv_border_start],
|
||||
buf[y * stride + umv_border_start - 1], bw);
|
||||
}
|
||||
if (xd->mb_to_bottom_edge < 0) {
|
||||
int umv_border_start = bh
|
||||
+ (xd->mb_to_bottom_edge >> (3 + xd->plane[plane].subsampling_y));
|
||||
int i;
|
||||
uint8_t c = *(xd->plane[plane].dst.buf
|
||||
+ (umv_border_start - 1) * xd->plane[plane].dst.stride + x);
|
||||
|
||||
uint8_t *d = xd->plane[plane].dst.buf
|
||||
+ umv_border_start * xd->plane[plane].dst.stride + x;
|
||||
if (xd->mb_to_bottom_edge < 0) {
|
||||
const int bh = 4 << b_height_log2(plane_bsize);
|
||||
const int umv_border_start = bh + (xd->mb_to_bottom_edge >>
|
||||
(3 + pd->subsampling_y));
|
||||
int i;
|
||||
const uint8_t c = buf[(umv_border_start - 1) * stride + x];
|
||||
uint8_t *d = &buf[umv_border_start * stride + x];
|
||||
|
||||
if (y + bh > umv_border_start)
|
||||
for (i = 0; i < bh; i++, d += xd->plane[plane].dst.stride)
|
||||
for (i = 0; i < bh; ++i, d += stride)
|
||||
*d = c;
|
||||
}
|
||||
}
|
||||
static void set_contexts_on_border(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
|
||||
int plane, int tx_size_in_blocks,
|
||||
int eob, int aoff, int loff,
|
||||
static void set_contexts_on_border(MACROBLOCKD *xd,
|
||||
struct macroblockd_plane *pd,
|
||||
BLOCK_SIZE plane_bsize,
|
||||
int tx_size_in_blocks, int has_eob,
|
||||
int aoff, int loff,
|
||||
ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) {
|
||||
struct macroblockd_plane *pd = &xd->plane[plane];
|
||||
int mi_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
|
||||
int mi_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
|
||||
int above_contexts = tx_size_in_blocks;
|
||||
int left_contexts = tx_size_in_blocks;
|
||||
int mi_blocks_wide = 1 << plane_block_width_log2by4(bsize, pd);
|
||||
int mi_blocks_high = 1 << plane_block_height_log2by4(bsize, pd);
|
||||
int pt;
|
||||
|
||||
// xd->mb_to_right_edge is in units of pixels * 8. This converts
|
||||
@@ -706,26 +548,47 @@ static void set_contexts_on_border(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
|
||||
if (xd->mb_to_right_edge < 0)
|
||||
mi_blocks_wide += (xd->mb_to_right_edge >> (5 + pd->subsampling_x));
|
||||
|
||||
if (xd->mb_to_bottom_edge < 0)
|
||||
mi_blocks_high += (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
|
||||
|
||||
// this code attempts to avoid copying into contexts that are outside
|
||||
// our border. Any blocks that do are set to 0...
|
||||
if (above_contexts + aoff > mi_blocks_wide)
|
||||
above_contexts = mi_blocks_wide - aoff;
|
||||
|
||||
if (xd->mb_to_bottom_edge < 0)
|
||||
mi_blocks_high += (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
|
||||
|
||||
if (left_contexts + loff > mi_blocks_high)
|
||||
left_contexts = mi_blocks_high - loff;
|
||||
|
||||
for (pt = 0; pt < above_contexts; pt++)
|
||||
A[pt] = eob > 0;
|
||||
A[pt] = has_eob;
|
||||
for (pt = above_contexts; pt < tx_size_in_blocks; pt++)
|
||||
A[pt] = 0;
|
||||
for (pt = 0; pt < left_contexts; pt++)
|
||||
L[pt] = eob > 0;
|
||||
L[pt] = has_eob;
|
||||
for (pt = left_contexts; pt < tx_size_in_blocks; pt++)
|
||||
L[pt] = 0;
|
||||
}
|
||||
|
||||
static void set_contexts(MACROBLOCKD *xd, struct macroblockd_plane *pd,
|
||||
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
|
||||
int has_eob, int aoff, int loff) {
|
||||
ENTROPY_CONTEXT *const A = pd->above_context + aoff;
|
||||
ENTROPY_CONTEXT *const L = pd->left_context + loff;
|
||||
const int tx_size_in_blocks = 1 << tx_size;
|
||||
|
||||
if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) {
|
||||
set_contexts_on_border(xd, pd, plane_bsize, tx_size_in_blocks, has_eob,
|
||||
aoff, loff, A, L);
|
||||
} else {
|
||||
vpx_memset(A, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
|
||||
vpx_memset(L, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
|
||||
}
|
||||
}
|
||||
|
||||
static int get_tx_eob(struct segmentation *seg, int segment_id,
|
||||
TX_SIZE tx_size) {
|
||||
const int eob_max = 16 << (tx_size << 1);
|
||||
return vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
|
||||
}
|
||||
|
||||
#endif // VP9_COMMON_VP9_BLOCKD_H_
|
||||
|
||||
@@ -13,33 +13,33 @@
|
||||
#include "vp9/common/vp9_common_data.h"
|
||||
|
||||
// Log 2 conversion lookup tables for block width and height
|
||||
const int b_width_log2_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int b_width_log2_lookup[BLOCK_SIZES] =
|
||||
{0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4};
|
||||
const int b_height_log2_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int b_height_log2_lookup[BLOCK_SIZES] =
|
||||
{0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4};
|
||||
const int num_4x4_blocks_wide_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int num_4x4_blocks_wide_lookup[BLOCK_SIZES] =
|
||||
{1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16};
|
||||
const int num_4x4_blocks_high_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int num_4x4_blocks_high_lookup[BLOCK_SIZES] =
|
||||
{1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16};
|
||||
// Log 2 conversion lookup tables for modeinfo width and height
|
||||
const int mi_width_log2_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int mi_width_log2_lookup[BLOCK_SIZES] =
|
||||
{0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3};
|
||||
const int num_8x8_blocks_wide_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int num_8x8_blocks_wide_lookup[BLOCK_SIZES] =
|
||||
{1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8};
|
||||
const int mi_height_log2_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int mi_height_log2_lookup[BLOCK_SIZES] =
|
||||
{0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3};
|
||||
const int num_8x8_blocks_high_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int num_8x8_blocks_high_lookup[BLOCK_SIZES] =
|
||||
{1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8};
|
||||
|
||||
// MIN(3, MIN(b_width_log2(bsize), b_height_log2(bsize)))
|
||||
const int size_group_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int size_group_lookup[BLOCK_SIZES] =
|
||||
{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3};
|
||||
|
||||
const int num_pels_log2_lookup[BLOCK_SIZE_TYPES] =
|
||||
const int num_pels_log2_lookup[BLOCK_SIZES] =
|
||||
{4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12};
|
||||
|
||||
|
||||
const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES] = {
|
||||
const PARTITION_TYPE partition_lookup[][BLOCK_SIZES] = {
|
||||
{ // 4X4
|
||||
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
|
||||
PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID,
|
||||
@@ -74,51 +74,62 @@ const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES] = {
|
||||
}
|
||||
};
|
||||
|
||||
const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES] = {
|
||||
const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = {
|
||||
{ // PARTITION_NONE
|
||||
BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
|
||||
BLOCK_8X8, BLOCK_8X16, BLOCK_16X8,
|
||||
BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
|
||||
BLOCK_8X8, BLOCK_8X16, BLOCK_16X8,
|
||||
BLOCK_16X16, BLOCK_16X32, BLOCK_32X16,
|
||||
BLOCK_32X32, BLOCK_32X64, BLOCK_64X32,
|
||||
BLOCK_64X64,
|
||||
}, { // PARTITION_HORZ
|
||||
BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_8X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_16X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_32X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_64X32,
|
||||
}, { // PARTITION_VERT
|
||||
BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_4X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_8X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_16X32, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_32X64,
|
||||
}, { // PARTITION_SPLIT
|
||||
BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_4X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_8X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_16X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
|
||||
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_4X4, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_8X8, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_16X16, BLOCK_INVALID, BLOCK_INVALID,
|
||||
BLOCK_32X32,
|
||||
}
|
||||
};
|
||||
|
||||
const TX_SIZE max_txsize_lookup[BLOCK_SIZE_TYPES] = {
|
||||
TX_4X4, TX_4X4, TX_4X4,
|
||||
TX_8X8, TX_8X8, TX_8X8,
|
||||
const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = {
|
||||
TX_4X4, TX_4X4, TX_4X4,
|
||||
TX_8X8, TX_8X8, TX_8X8,
|
||||
TX_16X16, TX_16X16, TX_16X16,
|
||||
TX_32X32, TX_32X32, TX_32X32, TX_32X32
|
||||
};
|
||||
const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES] = {
|
||||
TX_4X4, TX_4X4, TX_4X4,
|
||||
TX_4X4, TX_4X4, TX_4X4,
|
||||
TX_8X8, TX_8X8, TX_8X8,
|
||||
const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZES] = {
|
||||
TX_4X4, TX_4X4, TX_4X4,
|
||||
TX_4X4, TX_4X4, TX_4X4,
|
||||
TX_8X8, TX_8X8, TX_8X8,
|
||||
TX_16X16, TX_16X16, TX_16X16, TX_32X32
|
||||
};
|
||||
|
||||
const BLOCK_SIZE_TYPE bsize_from_dim_lookup[5][5] = {
|
||||
{ BLOCK_4X4, BLOCK_4X8, BLOCK_4X8, BLOCK_4X8, BLOCK_4X8 },
|
||||
{ BLOCK_8X4, BLOCK_8X8, BLOCK_8X16, BLOCK_8X16, BLOCK_8X16 },
|
||||
{ BLOCK_16X8, BLOCK_16X8, BLOCK_16X16, BLOCK_16X32, BLOCK_16X32 },
|
||||
{ BLOCK_32X16, BLOCK_32X16, BLOCK_32X16, BLOCK_32X32, BLOCK_32X64 },
|
||||
{ BLOCK_64X32, BLOCK_64X32, BLOCK_64X32, BLOCK_64X32, BLOCK_64X64 }
|
||||
const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = {
|
||||
// ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1
|
||||
// ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1
|
||||
{{BLOCK_4X4, BLOCK_INVALID}, {BLOCK_INVALID, BLOCK_INVALID}},
|
||||
{{BLOCK_4X8, BLOCK_4X4}, {BLOCK_INVALID, BLOCK_INVALID}},
|
||||
{{BLOCK_8X4, BLOCK_INVALID}, {BLOCK_4X4, BLOCK_INVALID}},
|
||||
{{BLOCK_8X8, BLOCK_8X4}, {BLOCK_4X8, BLOCK_4X4}},
|
||||
{{BLOCK_8X16, BLOCK_8X8}, {BLOCK_INVALID, BLOCK_4X8}},
|
||||
{{BLOCK_16X8, BLOCK_INVALID}, {BLOCK_8X8, BLOCK_8X4}},
|
||||
{{BLOCK_16X16, BLOCK_16X8}, {BLOCK_8X16, BLOCK_8X8}},
|
||||
{{BLOCK_16X32, BLOCK_16X16}, {BLOCK_INVALID, BLOCK_8X16}},
|
||||
{{BLOCK_32X16, BLOCK_INVALID}, {BLOCK_16X16, BLOCK_16X8}},
|
||||
{{BLOCK_32X32, BLOCK_32X16}, {BLOCK_16X32, BLOCK_16X16}},
|
||||
{{BLOCK_32X64, BLOCK_32X32}, {BLOCK_INVALID, BLOCK_16X32}},
|
||||
{{BLOCK_64X32, BLOCK_INVALID}, {BLOCK_32X32, BLOCK_32X16}},
|
||||
{{BLOCK_64X64, BLOCK_64X32}, {BLOCK_32X64, BLOCK_32X32}},
|
||||
};
|
||||
|
||||
|
||||
@@ -13,20 +13,20 @@
|
||||
|
||||
#include "vp9/common/vp9_enums.h"
|
||||
|
||||
extern const int b_width_log2_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int b_height_log2_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int mi_width_log2_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int mi_height_log2_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int num_8x8_blocks_wide_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int num_8x8_blocks_high_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int num_4x4_blocks_high_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int num_4x4_blocks_wide_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int size_group_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const int num_pels_log2_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES];
|
||||
extern const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES];
|
||||
extern const TX_SIZE max_txsize_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES];
|
||||
extern const BLOCK_SIZE_TYPE bsize_from_dim_lookup[5][5];
|
||||
extern const int b_width_log2_lookup[BLOCK_SIZES];
|
||||
extern const int b_height_log2_lookup[BLOCK_SIZES];
|
||||
extern const int mi_width_log2_lookup[BLOCK_SIZES];
|
||||
extern const int mi_height_log2_lookup[BLOCK_SIZES];
|
||||
extern const int num_8x8_blocks_wide_lookup[BLOCK_SIZES];
|
||||
extern const int num_8x8_blocks_high_lookup[BLOCK_SIZES];
|
||||
extern const int num_4x4_blocks_high_lookup[BLOCK_SIZES];
|
||||
extern const int num_4x4_blocks_wide_lookup[BLOCK_SIZES];
|
||||
extern const int size_group_lookup[BLOCK_SIZES];
|
||||
extern const int num_pels_log2_lookup[BLOCK_SIZES];
|
||||
extern const PARTITION_TYPE partition_lookup[][BLOCK_SIZES];
|
||||
extern const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES];
|
||||
extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES];
|
||||
extern const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZES];
|
||||
extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2];
|
||||
|
||||
#endif // VP9_COMMON_VP9_COMMON_DATA_H
|
||||
#endif // VP9_COMMON_VP9_COMMON_DATA_H
|
||||
|
||||
@@ -14,66 +14,45 @@
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vp9/common/vp9_filter.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#define VP9_FILTER_WEIGHT 128
|
||||
#define VP9_FILTER_SHIFT 7
|
||||
|
||||
/* Assume a bank of 16 filters to choose from. There are two implementations
|
||||
* for filter wrapping behavior, since we want to be able to pick which filter
|
||||
* to start with. We could either:
|
||||
*
|
||||
* 1) make filter_ a pointer to the base of the filter array, and then add an
|
||||
* additional offset parameter, to choose the starting filter.
|
||||
* 2) use a pointer to 2 periods worth of filters, so that even if the original
|
||||
* phase offset is at 15/16, we'll have valid data to read. The filter
|
||||
* tables become [32][8], and the second half is duplicated.
|
||||
* 3) fix the alignment of the filter tables, so that we know the 0/16 is
|
||||
* always 256 byte aligned.
|
||||
*
|
||||
* Implementations 2 and 3 are likely preferable, as they avoid an extra 2
|
||||
* parameters, and switching between them is trivial, with the
|
||||
* ALIGN_FILTERS_256 macro, below.
|
||||
*/
|
||||
#define ALIGN_FILTERS_256 1
|
||||
|
||||
static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x0, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h, int taps) {
|
||||
int x, y, k, sum;
|
||||
const int16_t *filter_x_base = filter_x0;
|
||||
int x, y, k;
|
||||
|
||||
#if ALIGN_FILTERS_256
|
||||
filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
|
||||
#endif
|
||||
/* NOTE: This assumes that the filter table is 256-byte aligned. */
|
||||
/* TODO(agrange) Modify to make independent of table alignment. */
|
||||
const int16_t *const filter_x_base =
|
||||
(const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
|
||||
|
||||
/* Adjust base pointer address for this source line */
|
||||
src -= taps / 2 - 1;
|
||||
|
||||
for (y = 0; y < h; ++y) {
|
||||
/* Pointer to filter to use */
|
||||
const int16_t *filter_x = filter_x0;
|
||||
|
||||
/* Initial phase offset */
|
||||
int x0_q4 = (filter_x - filter_x_base) / taps;
|
||||
int x_q4 = x0_q4;
|
||||
int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
|
||||
|
||||
for (x = 0; x < w; ++x) {
|
||||
/* Per-pixel src offset */
|
||||
int src_x = (x_q4 - x0_q4) >> 4;
|
||||
const int src_x = x_q4 >> SUBPEL_BITS;
|
||||
int sum = 0;
|
||||
|
||||
for (sum = 0, k = 0; k < taps; ++k) {
|
||||
/* Pointer to filter to use */
|
||||
const int16_t *const filter_x = filter_x_base +
|
||||
(x_q4 & SUBPEL_MASK) * taps;
|
||||
|
||||
for (k = 0; k < taps; ++k)
|
||||
sum += src[src_x + k] * filter_x[k];
|
||||
}
|
||||
sum += (VP9_FILTER_WEIGHT >> 1);
|
||||
dst[x] = clip_pixel(sum >> VP9_FILTER_SHIFT);
|
||||
|
||||
/* Adjust source and filter to use for the next pixel */
|
||||
dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
|
||||
|
||||
/* Move to the next source pixel */
|
||||
x_q4 += x_step_q4;
|
||||
filter_x = filter_x_base + (x_q4 & 0xf) * taps;
|
||||
}
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
@@ -85,37 +64,37 @@ static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_x0, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h, int taps) {
|
||||
int x, y, k, sum;
|
||||
const int16_t *filter_x_base = filter_x0;
|
||||
int x, y, k;
|
||||
|
||||
#if ALIGN_FILTERS_256
|
||||
filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
|
||||
#endif
|
||||
/* NOTE: This assumes that the filter table is 256-byte aligned. */
|
||||
/* TODO(agrange) Modify to make independent of table alignment. */
|
||||
const int16_t *const filter_x_base =
|
||||
(const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
|
||||
|
||||
/* Adjust base pointer address for this source line */
|
||||
src -= taps / 2 - 1;
|
||||
|
||||
for (y = 0; y < h; ++y) {
|
||||
/* Pointer to filter to use */
|
||||
const int16_t *filter_x = filter_x0;
|
||||
|
||||
/* Initial phase offset */
|
||||
int x0_q4 = (filter_x - filter_x_base) / taps;
|
||||
int x_q4 = x0_q4;
|
||||
int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
|
||||
|
||||
for (x = 0; x < w; ++x) {
|
||||
/* Per-pixel src offset */
|
||||
int src_x = (x_q4 - x0_q4) >> 4;
|
||||
const int src_x = x_q4 >> SUBPEL_BITS;
|
||||
int sum = 0;
|
||||
|
||||
for (sum = 0, k = 0; k < taps; ++k) {
|
||||
/* Pointer to filter to use */
|
||||
const int16_t *const filter_x = filter_x_base +
|
||||
(x_q4 & SUBPEL_MASK) * taps;
|
||||
|
||||
for (k = 0; k < taps; ++k)
|
||||
sum += src[src_x + k] * filter_x[k];
|
||||
}
|
||||
sum += (VP9_FILTER_WEIGHT >> 1);
|
||||
dst[x] = (dst[x] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1;
|
||||
|
||||
/* Adjust source and filter to use for the next pixel */
|
||||
dst[x] = ROUND_POWER_OF_TWO(dst[x] +
|
||||
clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
|
||||
|
||||
/* Move to the next source pixel */
|
||||
x_q4 += x_step_q4;
|
||||
filter_x = filter_x_base + (x_q4 & 0xf) * taps;
|
||||
}
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
@@ -127,37 +106,37 @@ static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y0, int y_step_q4,
|
||||
int w, int h, int taps) {
|
||||
int x, y, k, sum;
|
||||
int x, y, k;
|
||||
|
||||
const int16_t *filter_y_base = filter_y0;
|
||||
|
||||
#if ALIGN_FILTERS_256
|
||||
filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
|
||||
#endif
|
||||
/* NOTE: This assumes that the filter table is 256-byte aligned. */
|
||||
/* TODO(agrange) Modify to make independent of table alignment. */
|
||||
const int16_t *const filter_y_base =
|
||||
(const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
|
||||
|
||||
/* Adjust base pointer address for this source column */
|
||||
src -= src_stride * (taps / 2 - 1);
|
||||
for (x = 0; x < w; ++x) {
|
||||
/* Pointer to filter to use */
|
||||
const int16_t *filter_y = filter_y0;
|
||||
|
||||
for (x = 0; x < w; ++x) {
|
||||
/* Initial phase offset */
|
||||
int y0_q4 = (filter_y - filter_y_base) / taps;
|
||||
int y_q4 = y0_q4;
|
||||
int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
|
||||
|
||||
for (y = 0; y < h; ++y) {
|
||||
/* Per-pixel src offset */
|
||||
int src_y = (y_q4 - y0_q4) >> 4;
|
||||
const int src_y = y_q4 >> SUBPEL_BITS;
|
||||
int sum = 0;
|
||||
|
||||
for (sum = 0, k = 0; k < taps; ++k) {
|
||||
/* Pointer to filter to use */
|
||||
const int16_t *const filter_y = filter_y_base +
|
||||
(y_q4 & SUBPEL_MASK) * taps;
|
||||
|
||||
for (k = 0; k < taps; ++k)
|
||||
sum += src[(src_y + k) * src_stride] * filter_y[k];
|
||||
}
|
||||
sum += (VP9_FILTER_WEIGHT >> 1);
|
||||
dst[y * dst_stride] = clip_pixel(sum >> VP9_FILTER_SHIFT);
|
||||
|
||||
/* Adjust source and filter to use for the next pixel */
|
||||
dst[y * dst_stride] =
|
||||
clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
|
||||
|
||||
/* Move to the next source pixel */
|
||||
y_q4 += y_step_q4;
|
||||
filter_y = filter_y_base + (y_q4 & 0xf) * taps;
|
||||
}
|
||||
++src;
|
||||
++dst;
|
||||
@@ -169,38 +148,37 @@ static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y0, int y_step_q4,
|
||||
int w, int h, int taps) {
|
||||
int x, y, k, sum;
|
||||
int x, y, k;
|
||||
|
||||
const int16_t *filter_y_base = filter_y0;
|
||||
|
||||
#if ALIGN_FILTERS_256
|
||||
filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
|
||||
#endif
|
||||
/* NOTE: This assumes that the filter table is 256-byte aligned. */
|
||||
/* TODO(agrange) Modify to make independent of table alignment. */
|
||||
const int16_t *const filter_y_base =
|
||||
(const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
|
||||
|
||||
/* Adjust base pointer address for this source column */
|
||||
src -= src_stride * (taps / 2 - 1);
|
||||
for (x = 0; x < w; ++x) {
|
||||
/* Pointer to filter to use */
|
||||
const int16_t *filter_y = filter_y0;
|
||||
|
||||
for (x = 0; x < w; ++x) {
|
||||
/* Initial phase offset */
|
||||
int y0_q4 = (filter_y - filter_y_base) / taps;
|
||||
int y_q4 = y0_q4;
|
||||
int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
|
||||
|
||||
for (y = 0; y < h; ++y) {
|
||||
/* Per-pixel src offset */
|
||||
int src_y = (y_q4 - y0_q4) >> 4;
|
||||
const int src_y = y_q4 >> SUBPEL_BITS;
|
||||
int sum = 0;
|
||||
|
||||
for (sum = 0, k = 0; k < taps; ++k) {
|
||||
/* Pointer to filter to use */
|
||||
const int16_t *const filter_y = filter_y_base +
|
||||
(y_q4 & SUBPEL_MASK) * taps;
|
||||
|
||||
for (k = 0; k < taps; ++k)
|
||||
sum += src[(src_y + k) * src_stride] * filter_y[k];
|
||||
}
|
||||
sum += (VP9_FILTER_WEIGHT >> 1);
|
||||
dst[y * dst_stride] =
|
||||
(dst[y * dst_stride] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1;
|
||||
|
||||
/* Adjust source and filter to use for the next pixel */
|
||||
dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
|
||||
clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
|
||||
|
||||
/* Move to the next source pixel */
|
||||
y_q4 += y_step_q4;
|
||||
filter_y = filter_y_base + (y_q4 & 0xf) * taps;
|
||||
}
|
||||
++src;
|
||||
++dst;
|
||||
@@ -213,58 +191,27 @@ static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h, int taps) {
|
||||
/* Fixed size intermediate buffer places limits on parameters.
|
||||
* Maximum intermediate_height is 135, for y_step_q4 == 32,
|
||||
* Maximum intermediate_height is 324, for y_step_q4 == 80,
|
||||
* h == 64, taps == 8.
|
||||
* y_step_q4 of 80 allows for 1/10 scale for 5 layer svc
|
||||
*/
|
||||
uint8_t temp[64 * 135];
|
||||
int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1;
|
||||
uint8_t temp[64 * 324];
|
||||
int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + taps;
|
||||
|
||||
assert(w <= 64);
|
||||
assert(h <= 64);
|
||||
assert(taps <= 8);
|
||||
assert(y_step_q4 <= 32);
|
||||
assert(x_step_q4 <= 32);
|
||||
assert(y_step_q4 <= 80);
|
||||
assert(x_step_q4 <= 80);
|
||||
|
||||
if (intermediate_height < h)
|
||||
intermediate_height = h;
|
||||
|
||||
convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,
|
||||
temp, 64,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, intermediate_height, taps);
|
||||
convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h, taps);
|
||||
}
|
||||
|
||||
static void convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h, int taps) {
|
||||
/* Fixed size intermediate buffer places limits on parameters.
|
||||
* Maximum intermediate_height is 135, for y_step_q4 == 32,
|
||||
* h == 64, taps == 8.
|
||||
*/
|
||||
uint8_t temp[64 * 135];
|
||||
int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1;
|
||||
|
||||
assert(w <= 64);
|
||||
assert(h <= 64);
|
||||
assert(taps <= 8);
|
||||
assert(y_step_q4 <= 32);
|
||||
assert(x_step_q4 <= 32);
|
||||
|
||||
if (intermediate_height < h)
|
||||
intermediate_height = h;
|
||||
|
||||
convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,
|
||||
temp, 64,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, intermediate_height, taps);
|
||||
convolve_avg_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h, taps);
|
||||
convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4, w,
|
||||
intermediate_height, taps);
|
||||
convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x,
|
||||
x_step_q4, filter_y, y_step_q4, w, h, taps);
|
||||
}
|
||||
|
||||
void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
@@ -273,8 +220,7 @@ void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h) {
|
||||
convolve_horiz_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h, 8);
|
||||
filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
|
||||
}
|
||||
|
||||
void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
@@ -283,8 +229,7 @@ void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h) {
|
||||
convolve_avg_horiz_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h, 8);
|
||||
filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
|
||||
}
|
||||
|
||||
void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
@@ -293,8 +238,7 @@ void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h) {
|
||||
convolve_vert_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h, 8);
|
||||
filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
|
||||
}
|
||||
|
||||
void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
@@ -303,8 +247,7 @@ void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h) {
|
||||
convolve_avg_vert_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h, 8);
|
||||
filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
|
||||
}
|
||||
|
||||
void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
@@ -313,8 +256,7 @@ void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h) {
|
||||
convolve_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h, 8);
|
||||
filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
|
||||
}
|
||||
|
||||
void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
@@ -327,16 +269,9 @@ void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
assert(w <= 64);
|
||||
assert(h <= 64);
|
||||
|
||||
vp9_convolve8(src, src_stride,
|
||||
temp, 64,
|
||||
filter_x, x_step_q4,
|
||||
filter_y, y_step_q4,
|
||||
w, h);
|
||||
vp9_convolve_avg(temp, 64,
|
||||
dst, dst_stride,
|
||||
NULL, 0, /* These unused parameter should be removed! */
|
||||
NULL, 0, /* These unused parameter should be removed! */
|
||||
w, h);
|
||||
vp9_convolve8(src, src_stride, temp, 64,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4, w, h);
|
||||
vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
|
||||
}
|
||||
|
||||
void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
@@ -347,7 +282,7 @@ void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
int r;
|
||||
|
||||
for (r = h; r > 0; --r) {
|
||||
memcpy(dst, src, w);
|
||||
vpx_memcpy(dst, src, w);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
@@ -361,9 +296,9 @@ void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
int x, y;
|
||||
|
||||
for (y = 0; y < h; ++y) {
|
||||
for (x = 0; x < w; ++x) {
|
||||
dst[x] = (dst[x] + src[x] + 1) >> 1;
|
||||
}
|
||||
for (x = 0; x < w; ++x)
|
||||
dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
|
||||
@@ -7,12 +7,14 @@
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef VP9_COMMON_CONVOLVE_H_
|
||||
#define VP9_COMMON_CONVOLVE_H_
|
||||
#ifndef VP9_COMMON_VP9_CONVOLVE_H_
|
||||
#define VP9_COMMON_VP9_CONVOLVE_H_
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#define FILTER_BITS 7
|
||||
|
||||
typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
@@ -24,4 +26,4 @@ struct subpix_fn_table {
|
||||
const int16_t (*filter_y)[8];
|
||||
};
|
||||
|
||||
#endif // VP9_COMMON_CONVOLVE_H_
|
||||
#endif // VP9_COMMON_VP9_CONVOLVE_H_
|
||||
|
||||
@@ -22,23 +22,24 @@ static void log_frame_info(VP9_COMMON *cm, const char *str, FILE *f) {
|
||||
* and uses the passed in member offset to print out the value of an integer
|
||||
* for each mbmi member value in the mi structure.
|
||||
*/
|
||||
static void print_mi_data(VP9_COMMON *common, FILE *file, char *descriptor,
|
||||
static void print_mi_data(VP9_COMMON *cm, FILE *file, char *descriptor,
|
||||
size_t member_offset) {
|
||||
int mi_row;
|
||||
int mi_col;
|
||||
int mi_index = 0;
|
||||
MODE_INFO *mi = common->mi;
|
||||
int rows = common->mi_rows;
|
||||
int cols = common->mi_cols;
|
||||
MODE_INFO **mi_8x8 = cm->mi_grid_visible;
|
||||
int rows = cm->mi_rows;
|
||||
int cols = cm->mi_cols;
|
||||
char prefix = descriptor[0];
|
||||
|
||||
log_frame_info(common, descriptor, file);
|
||||
log_frame_info(cm, descriptor, file);
|
||||
mi_index = 0;
|
||||
for (mi_row = 0; mi_row < rows; mi_row++) {
|
||||
fprintf(file, "%c ", prefix);
|
||||
for (mi_col = 0; mi_col < cols; mi_col++) {
|
||||
fprintf(file, "%2d ",
|
||||
*((int*) ((char *) (&mi[mi_index].mbmi) + member_offset)));
|
||||
*((int*) ((char *) (&mi_8x8[mi_index]->mbmi) +
|
||||
member_offset)));
|
||||
mi_index++;
|
||||
}
|
||||
fprintf(file, "\n");
|
||||
@@ -51,23 +52,23 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, char *file) {
|
||||
int mi_col;
|
||||
int mi_index = 0;
|
||||
FILE *mvs = fopen(file, "a");
|
||||
MODE_INFO *mi = cm->mi;
|
||||
MODE_INFO **mi_8x8 = cm->mi_grid_visible;
|
||||
int rows = cm->mi_rows;
|
||||
int cols = cm->mi_cols;
|
||||
|
||||
print_mi_data(cm, mvs, "Partitions:", offsetof(MB_MODE_INFO, sb_type));
|
||||
print_mi_data(cm, mvs, "Modes:", offsetof(MB_MODE_INFO, mode));
|
||||
print_mi_data(cm, mvs, "Skips:", offsetof(MB_MODE_INFO, mb_skip_coeff));
|
||||
print_mi_data(cm, mvs, "Skips:", offsetof(MB_MODE_INFO, skip_coeff));
|
||||
print_mi_data(cm, mvs, "Ref frame:", offsetof(MB_MODE_INFO, ref_frame[0]));
|
||||
print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, txfm_size));
|
||||
print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, tx_size));
|
||||
print_mi_data(cm, mvs, "UV Modes:", offsetof(MB_MODE_INFO, uv_mode));
|
||||
|
||||
log_frame_info(cm, "Vectors ",mvs);
|
||||
log_frame_info(cm, "Vectors ", mvs);
|
||||
for (mi_row = 0; mi_row < rows; mi_row++) {
|
||||
fprintf(mvs,"V ");
|
||||
fprintf(mvs, "V ");
|
||||
for (mi_col = 0; mi_col < cols; mi_col++) {
|
||||
fprintf(mvs, "%4d:%4d ", mi[mi_index].mbmi.mv[0].as_mv.row,
|
||||
mi[mi_index].mbmi.mv[0].as_mv.col);
|
||||
fprintf(mvs, "%4d:%4d ", mi_8x8[mi_index]->mbmi.mv[0].as_mv.row,
|
||||
mi_8x8[mi_index]->mbmi.mv[0].as_mv.col);
|
||||
mi_index++;
|
||||
}
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef VP9_COMMON_DEFAULT_COEF_PROBS_H_
|
||||
#define VP9_COMMON_DEFAULT_COEF_PROBS_H_
|
||||
|
||||
/*Generated file, included by vp9_entropy.c*/
|
||||
static const vp9_coeff_probs_model default_coef_probs_4x4[BLOCK_TYPES] = {
|
||||
@@ -694,3 +696,4 @@ static const vp9_coeff_probs_model default_coef_probs_32x32[BLOCK_TYPES] = {
|
||||
}
|
||||
};
|
||||
|
||||
#endif // VP9_COMMON_DEFAULT_COEF_PROBS_H_
|
||||
|
||||
@@ -107,101 +107,171 @@ DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_8x8[64]) = {
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_16x16[256]) = {
|
||||
0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, 80,
|
||||
50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, 21, 52,
|
||||
98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, 129, 38, 69,
|
||||
100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116, 101, 131, 160, 146,
|
||||
55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147, 176, 162, 87, 56, 25,
|
||||
133, 118, 177, 148, 72, 103, 41, 163, 10, 192, 178, 88, 57, 134, 149, 119,
|
||||
26, 164, 73, 104, 193, 42, 179, 208, 11, 135, 89, 165, 120, 150, 58, 194,
|
||||
180, 27, 74, 209, 105, 151, 136, 43, 90, 224, 166, 195, 181, 121, 210, 59,
|
||||
12, 152, 106, 167, 196, 75, 137, 225, 211, 240, 182, 122, 91, 28, 197, 13,
|
||||
226, 168, 183, 153, 44, 212, 138, 107, 241, 60, 29, 123, 198, 184, 227, 169,
|
||||
242, 76, 213, 154, 45, 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108,
|
||||
77, 155, 30, 15, 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140,
|
||||
230, 62, 216, 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141,
|
||||
63, 232, 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142,
|
||||
219, 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, 251,
|
||||
190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239, 255,
|
||||
0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, 80,
|
||||
50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, 21, 52,
|
||||
98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, 129, 38, 69,
|
||||
100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116, 101, 131, 160, 146,
|
||||
55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147, 176, 162, 87, 56, 25,
|
||||
133, 118, 177, 148, 72, 103, 41, 163, 10, 192, 178, 88, 57, 134, 149, 119,
|
||||
26, 164, 73, 104, 193, 42, 179, 208, 11, 135, 89, 165, 120, 150, 58, 194,
|
||||
180, 27, 74, 209, 105, 151, 136, 43, 90, 224, 166, 195, 181, 121, 210, 59,
|
||||
12, 152, 106, 167, 196, 75, 137, 225, 211, 240, 182, 122, 91, 28, 197, 13,
|
||||
226, 168, 183, 153, 44, 212, 138, 107, 241, 60, 29, 123, 198, 184, 227, 169,
|
||||
242, 76, 213, 154, 45, 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108,
|
||||
77, 155, 30, 15, 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140,
|
||||
230, 62, 216, 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141,
|
||||
63, 232, 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142,
|
||||
219, 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159,
|
||||
251,
|
||||
190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239,
|
||||
255,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_16x16[256]) = {
|
||||
0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, 81,
|
||||
34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, 129, 4,
|
||||
67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, 68, 115, 21,
|
||||
146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6, 116, 193, 147, 85,
|
||||
22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70, 7, 148, 194, 86, 179,
|
||||
225, 23, 133, 39, 164, 8, 102, 210, 241, 55, 195, 118, 149, 71, 180, 24,
|
||||
87, 226, 134, 165, 211, 40, 103, 56, 72, 150, 196, 242, 119, 9, 181, 227,
|
||||
88, 166, 25, 135, 41, 104, 212, 57, 151, 197, 120, 73, 243, 182, 136, 167,
|
||||
213, 89, 10, 228, 105, 152, 198, 26, 42, 121, 183, 244, 168, 58, 137, 229,
|
||||
74, 214, 90, 153, 199, 184, 11, 106, 245, 27, 122, 230, 169, 43, 215, 59,
|
||||
200, 138, 185, 246, 75, 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170,
|
||||
60, 247, 232, 76, 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202,
|
||||
233, 171, 61, 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125,
|
||||
62, 172, 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79,
|
||||
126, 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205, 236,
|
||||
159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239, 255,
|
||||
0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, 81,
|
||||
34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, 129, 4,
|
||||
67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, 68, 115, 21,
|
||||
146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6, 116, 193, 147, 85,
|
||||
22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70, 7, 148, 194, 86, 179,
|
||||
225, 23, 133, 39, 164, 8, 102, 210, 241, 55, 195, 118, 149, 71, 180, 24,
|
||||
87, 226, 134, 165, 211, 40, 103, 56, 72, 150, 196, 242, 119, 9, 181, 227,
|
||||
88, 166, 25, 135, 41, 104, 212, 57, 151, 197, 120, 73, 243, 182, 136, 167,
|
||||
213, 89, 10, 228, 105, 152, 198, 26, 42, 121, 183, 244, 168, 58, 137, 229,
|
||||
74, 214, 90, 153, 199, 184, 11, 106, 245, 27, 122, 230, 169, 43, 215, 59,
|
||||
200, 138, 185, 246, 75, 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170,
|
||||
60, 247, 232, 76, 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202,
|
||||
233, 171, 61, 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125,
|
||||
62, 172, 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79,
|
||||
126, 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205,
|
||||
236,
|
||||
159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239,
|
||||
255,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_16x16[256]) = {
|
||||
0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, 20,
|
||||
49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, 66, 52,
|
||||
23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, 83, 97, 69,
|
||||
25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26, 41, 56, 114, 100,
|
||||
13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72, 116, 14, 87, 130, 102,
|
||||
144, 73, 131, 117, 28, 58, 15, 88, 43, 145, 103, 132, 146, 118, 74, 160,
|
||||
89, 133, 104, 29, 59, 147, 119, 44, 161, 148, 90, 105, 134, 162, 120, 176,
|
||||
75, 135, 149, 30, 60, 163, 177, 45, 121, 91, 106, 164, 178, 150, 192, 136,
|
||||
165, 179, 31, 151, 193, 76, 122, 61, 137, 194, 107, 152, 180, 208, 46, 166,
|
||||
167, 195, 92, 181, 138, 209, 123, 153, 224, 196, 77, 168, 210, 182, 240, 108,
|
||||
197, 62, 154, 225, 183, 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170,
|
||||
124, 155, 199, 78, 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186,
|
||||
156, 229, 243, 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110,
|
||||
157, 245, 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111, 158,
|
||||
188, 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, 175,
|
||||
190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254, 255,
|
||||
0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, 20,
|
||||
49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, 66, 52,
|
||||
23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, 83, 97, 69,
|
||||
25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26, 41, 56, 114, 100,
|
||||
13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72, 116, 14, 87, 130, 102,
|
||||
144, 73, 131, 117, 28, 58, 15, 88, 43, 145, 103, 132, 146, 118, 74, 160,
|
||||
89, 133, 104, 29, 59, 147, 119, 44, 161, 148, 90, 105, 134, 162, 120, 176,
|
||||
75, 135, 149, 30, 60, 163, 177, 45, 121, 91, 106, 164, 178, 150, 192, 136,
|
||||
165, 179, 31, 151, 193, 76, 122, 61, 137, 194, 107, 152, 180, 208, 46, 166,
|
||||
167, 195, 92, 181, 138, 209, 123, 153, 224, 196, 77, 168, 210, 182, 240, 108,
|
||||
197, 62, 154, 225, 183, 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170,
|
||||
124, 155, 199, 78, 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186,
|
||||
156, 229, 243, 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110,
|
||||
157, 245, 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111,
|
||||
158,
|
||||
188, 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220,
|
||||
175,
|
||||
190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254,
|
||||
255,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_32x32[1024]) = {
|
||||
0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, 160, 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, 162, 193, 68, 131, 37, 100,
|
||||
225, 194, 256, 163, 69, 132, 6, 226, 257, 288, 195, 101, 164, 38, 258, 7, 227, 289, 133, 320, 70, 196, 165, 290, 259, 228, 39, 321, 102, 352, 8, 197,
|
||||
71, 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292, 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, 293, 41, 417, 199, 136,
|
||||
262, 387, 448, 325, 356, 10, 73, 418, 231, 168, 449, 294, 388, 105, 419, 263, 42, 200, 357, 450, 137, 480, 74, 326, 232, 11, 389, 169, 295, 420, 106, 451,
|
||||
481, 358, 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, 75, 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391, 453, 139, 44, 234,
|
||||
484, 297, 360, 171, 76, 515, 545, 266, 329, 454, 13, 423, 203, 108, 546, 485, 576, 298, 235, 140, 361, 330, 172, 547, 45, 455, 267, 577, 486, 77, 204, 362,
|
||||
608, 14, 299, 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173, 610, 363, 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17, 111, 238, 48, 143,
|
||||
80, 175, 112, 207, 49, 18, 239, 81, 113, 19, 50, 82, 114, 51, 83, 115, 640, 516, 392, 268, 144, 20, 672, 641, 548, 517, 424, 393, 300, 269, 176, 145,
|
||||
52, 21, 704, 673, 642, 580, 549, 518, 456, 425, 394, 332, 301, 270, 208, 177, 146, 84, 53, 22, 736, 705, 674, 643, 612, 581, 550, 519, 488, 457, 426, 395,
|
||||
364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54, 23, 737, 706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303, 241, 210, 179, 117, 86, 55, 738, 707,
|
||||
614, 583, 490, 459, 366, 335, 242, 211, 118, 87, 739, 615, 491, 367, 243, 119, 768, 644, 520, 396, 272, 148, 24, 800, 769, 676, 645, 552, 521, 428, 397, 304,
|
||||
273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584, 553, 522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57, 26, 864, 833, 802, 771, 740, 709,
|
||||
678, 647, 616, 585, 554, 523, 492, 461, 430, 399, 368, 337, 306, 275, 244, 213, 182, 151, 120, 89, 58, 27, 865, 834, 803, 741, 710, 679, 617, 586, 555, 493,
|
||||
462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835, 742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867, 743, 619, 495, 371, 247, 123,
|
||||
896, 772, 648, 524, 400, 276, 152, 28, 928, 897, 804, 773, 680, 649, 556, 525, 432, 401, 308, 277, 184, 153, 60, 29, 960, 929, 898, 836, 805, 774, 712, 681,
|
||||
650, 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154, 92, 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682, 651, 620, 589, 558, 527,
|
||||
496, 465, 434, 403, 372, 341, 310, 279, 248, 217, 186, 155, 124, 93, 62, 31, 993, 962, 931, 869, 838, 807, 745, 714, 683, 621, 590, 559, 497, 466, 435, 373,
|
||||
342, 311, 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715, 622, 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623, 499, 375, 251, 127,
|
||||
900, 776, 652, 528, 404, 280, 156, 932, 901, 808, 777, 684, 653, 560, 529, 436, 405, 312, 281, 188, 157, 964, 933, 902, 840, 809, 778, 716, 685, 654, 592, 561,
|
||||
530, 468, 437, 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903, 872, 841, 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469, 438, 407, 376, 345,
|
||||
314, 283, 252, 221, 190, 159, 997, 966, 935, 873, 842, 811, 749, 718, 687, 625, 594, 563, 501, 470, 439, 377, 346, 315, 253, 222, 191, 998, 967, 874, 843, 750,
|
||||
719, 626, 595, 502, 471, 378, 347, 254, 223, 999, 875, 751, 627, 503, 379, 255, 904, 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657, 564, 533, 440, 409,
|
||||
316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658, 596, 565, 534, 472, 441, 410, 348, 317, 286, 1000, 969, 938, 907, 876, 845, 814, 783, 752, 721, 690, 659,
|
||||
628, 597, 566, 535, 504, 473, 442, 411, 380, 349, 318, 287, 1001, 970, 939, 877, 846, 815, 753, 722, 691, 629, 598, 567, 505, 474, 443, 381, 350, 319, 1002, 971,
|
||||
878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879, 755, 631, 507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785, 692, 661, 568, 537, 444, 413, 972,
|
||||
941, 910, 848, 817, 786, 724, 693, 662, 600, 569, 538, 476, 445, 414, 1004, 973, 942, 911, 880, 849, 818, 787, 756, 725, 694, 663, 632, 601, 570, 539, 508, 477,
|
||||
446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602, 571, 509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510, 479, 1007, 883, 759, 635, 511,
|
||||
912, 788, 664, 540, 944, 913, 820, 789, 696, 665, 572, 541, 976, 945, 914, 852, 821, 790, 728, 697, 666, 604, 573, 542, 1008, 977, 946, 915, 884, 853, 822, 791,
|
||||
760, 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823, 761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638, 607, 1011, 887, 763, 639,
|
||||
916, 792, 668, 948, 917, 824, 793, 700, 669, 980, 949, 918, 856, 825, 794, 732, 701, 670, 1012, 981, 950, 919, 888, 857, 826, 795, 764, 733, 702, 671, 1013, 982,
|
||||
951, 889, 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015, 891, 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798, 1016, 985, 954, 923,
|
||||
892, 861, 830, 799, 1017, 986, 955, 893, 862, 831, 1018, 987, 894, 863, 1019, 895, 924, 956, 925, 988, 957, 926, 1020, 989, 958, 927, 1021, 990, 959, 1022, 991, 1023,
|
||||
0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, 160,
|
||||
129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, 162, 193,
|
||||
68, 131, 37, 100,
|
||||
225, 194, 256, 163, 69, 132, 6, 226, 257, 288, 195, 101, 164, 38,
|
||||
258, 7, 227, 289, 133, 320, 70, 196, 165, 290, 259, 228, 39, 321,
|
||||
102, 352, 8, 197,
|
||||
71, 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292,
|
||||
135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, 293,
|
||||
41, 417, 199, 136,
|
||||
262, 387, 448, 325, 356, 10, 73, 418, 231, 168, 449, 294, 388, 105,
|
||||
419, 263, 42, 200, 357, 450, 137, 480, 74, 326, 232, 11, 389, 169,
|
||||
295, 420, 106, 451,
|
||||
481, 358, 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421,
|
||||
75, 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391,
|
||||
453, 139, 44, 234,
|
||||
484, 297, 360, 171, 76, 515, 545, 266, 329, 454, 13, 423, 203, 108,
|
||||
546, 485, 576, 298, 235, 140, 361, 330, 172, 547, 45, 455, 267, 577,
|
||||
486, 77, 204, 362,
|
||||
608, 14, 299, 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173,
|
||||
610, 363, 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17,
|
||||
111, 238, 48, 143,
|
||||
80, 175, 112, 207, 49, 18, 239, 81, 113, 19, 50, 82, 114, 51,
|
||||
83, 115, 640, 516, 392, 268, 144, 20, 672, 641, 548, 517, 424,
|
||||
393, 300, 269, 176, 145,
|
||||
52, 21, 704, 673, 642, 580, 549, 518, 456, 425, 394, 332, 301,
|
||||
270, 208, 177, 146, 84, 53, 22, 736, 705, 674, 643, 612, 581,
|
||||
550, 519, 488, 457, 426, 395,
|
||||
364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54, 23, 737,
|
||||
706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303, 241,
|
||||
210, 179, 117, 86, 55, 738, 707,
|
||||
614, 583, 490, 459, 366, 335, 242, 211, 118, 87, 739, 615, 491,
|
||||
367, 243, 119, 768, 644, 520, 396, 272, 148, 24, 800, 769, 676,
|
||||
645, 552, 521, 428, 397, 304,
|
||||
273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584, 553,
|
||||
522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57, 26,
|
||||
864, 833, 802, 771, 740, 709,
|
||||
678, 647, 616, 585, 554, 523, 492, 461, 430, 399, 368, 337, 306,
|
||||
275, 244, 213, 182, 151, 120, 89, 58, 27, 865, 834, 803, 741,
|
||||
710, 679, 617, 586, 555, 493,
|
||||
462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835,
|
||||
742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867,
|
||||
743, 619, 495, 371, 247, 123,
|
||||
896, 772, 648, 524, 400, 276, 152, 28, 928, 897, 804, 773, 680,
|
||||
649, 556, 525, 432, 401, 308, 277, 184, 153, 60, 29, 960, 929,
|
||||
898, 836, 805, 774, 712, 681,
|
||||
650, 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154,
|
||||
92, 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682,
|
||||
651, 620, 589, 558, 527,
|
||||
496, 465, 434, 403, 372, 341, 310, 279, 248, 217, 186, 155, 124,
|
||||
93, 62, 31, 993, 962, 931, 869, 838, 807, 745, 714, 683, 621, 590,
|
||||
559, 497, 466, 435, 373,
|
||||
342, 311, 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715,
|
||||
622, 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623,
|
||||
499, 375, 251, 127,
|
||||
900, 776, 652, 528, 404, 280, 156, 932, 901, 808, 777, 684, 653, 560,
|
||||
529, 436, 405, 312, 281, 188, 157, 964, 933, 902, 840, 809, 778, 716,
|
||||
685, 654, 592, 561,
|
||||
530, 468, 437, 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903,
|
||||
872, 841, 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469,
|
||||
438, 407, 376, 345,
|
||||
314, 283, 252, 221, 190, 159, 997, 966, 935, 873, 842, 811, 749, 718,
|
||||
687, 625, 594, 563, 501, 470, 439, 377, 346, 315, 253, 222, 191, 998,
|
||||
967, 874, 843, 750,
|
||||
719, 626, 595, 502, 471, 378, 347, 254, 223, 999, 875, 751, 627, 503,
|
||||
379, 255, 904, 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657,
|
||||
564, 533, 440, 409,
|
||||
316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658, 596, 565, 534,
|
||||
472, 441, 410, 348, 317, 286, 1000, 969, 938, 907, 876, 845, 814, 783,
|
||||
752, 721, 690, 659,
|
||||
628, 597, 566, 535, 504, 473, 442, 411, 380, 349, 318, 287, 1001, 970,
|
||||
939, 877, 846, 815, 753, 722, 691, 629, 598, 567, 505, 474, 443, 381,
|
||||
350, 319, 1002, 971,
|
||||
878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879, 755, 631,
|
||||
507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785, 692, 661, 568,
|
||||
537, 444, 413, 972,
|
||||
941, 910, 848, 817, 786, 724, 693, 662, 600, 569, 538, 476, 445, 414,
|
||||
1004, 973, 942, 911, 880, 849, 818, 787, 756, 725, 694, 663, 632, 601,
|
||||
570, 539, 508, 477,
|
||||
446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602, 571,
|
||||
509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510, 479,
|
||||
1007, 883, 759, 635, 511,
|
||||
912, 788, 664, 540, 944, 913, 820, 789, 696, 665, 572, 541, 976, 945,
|
||||
914, 852, 821, 790, 728, 697, 666, 604, 573, 542, 1008, 977, 946, 915,
|
||||
884, 853, 822, 791,
|
||||
760, 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823,
|
||||
761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638, 607,
|
||||
1011, 887, 763, 639,
|
||||
916, 792, 668, 948, 917, 824, 793, 700, 669, 980, 949, 918, 856, 825,
|
||||
794, 732, 701, 670, 1012, 981, 950, 919, 888, 857, 826, 795, 764, 733,
|
||||
702, 671, 1013, 982,
|
||||
951, 889, 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015,
|
||||
891, 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798,
|
||||
1016, 985, 954, 923,
|
||||
892, 861, 830, 799, 1017, 986, 955, 893, 862, 831, 1018, 987, 894, 863,
|
||||
1019, 895, 924, 956, 925, 988, 957, 926, 1020, 989, 958, 927, 1021,
|
||||
990, 959, 1022, 991, 1023,
|
||||
};
|
||||
|
||||
/* Array indices are identical to previously-existing CONTEXT_NODE indices */
|
||||
|
||||
const vp9_tree_index vp9_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */
|
||||
{
|
||||
const vp9_tree_index vp9_coef_tree[ 22] = {
|
||||
-DCT_EOB_TOKEN, 2, /* 0 = EOB */
|
||||
-ZERO_TOKEN, 4, /* 1 = ZERO */
|
||||
-ONE_TOKEN, 6, /* 2 = ONE */
|
||||
@@ -377,7 +447,7 @@ static const vp9_prob modelcoefprobs_pareto8[COEFPROB_MODELS][MODEL_NODES] = {
|
||||
|
||||
static void extend_model_to_full_distribution(vp9_prob p,
|
||||
vp9_prob *tree_probs) {
|
||||
const int l = ((p - 1) / 2);
|
||||
const int l = (p - 1) / 2;
|
||||
const vp9_prob (*model)[MODEL_NODES] = modelcoefprobs_pareto8;
|
||||
if (p & 1) {
|
||||
vpx_memcpy(tree_probs + UNCONSTRAINED_NODES,
|
||||
@@ -436,11 +506,11 @@ const vp9_extra_bit vp9_extra_bits[12] = {
|
||||
|
||||
#include "vp9/common/vp9_default_coef_probs.h"
|
||||
|
||||
void vp9_default_coef_probs(VP9_COMMON *pc) {
|
||||
vp9_copy(pc->fc.coef_probs[TX_4X4], default_coef_probs_4x4);
|
||||
vp9_copy(pc->fc.coef_probs[TX_8X8], default_coef_probs_8x8);
|
||||
vp9_copy(pc->fc.coef_probs[TX_16X16], default_coef_probs_16x16);
|
||||
vp9_copy(pc->fc.coef_probs[TX_32X32], default_coef_probs_32x32);
|
||||
void vp9_default_coef_probs(VP9_COMMON *cm) {
|
||||
vp9_copy(cm->fc.coef_probs[TX_4X4], default_coef_probs_4x4);
|
||||
vp9_copy(cm->fc.coef_probs[TX_8X8], default_coef_probs_8x8);
|
||||
vp9_copy(cm->fc.coef_probs[TX_16X16], default_coef_probs_16x16);
|
||||
vp9_copy(cm->fc.coef_probs[TX_32X32], default_coef_probs_32x32);
|
||||
}
|
||||
|
||||
// Neighborhood 5-tuples for various scans and blocksizes,
|
||||
@@ -569,31 +639,6 @@ void vp9_init_neighbors() {
|
||||
vp9_default_scan_32x32_neighbors);
|
||||
}
|
||||
|
||||
const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan) {
|
||||
if (scan == vp9_default_scan_4x4) {
|
||||
return vp9_default_scan_4x4_neighbors;
|
||||
} else if (scan == vp9_row_scan_4x4) {
|
||||
return vp9_row_scan_4x4_neighbors;
|
||||
} else if (scan == vp9_col_scan_4x4) {
|
||||
return vp9_col_scan_4x4_neighbors;
|
||||
} else if (scan == vp9_default_scan_8x8) {
|
||||
return vp9_default_scan_8x8_neighbors;
|
||||
} else if (scan == vp9_row_scan_8x8) {
|
||||
return vp9_row_scan_8x8_neighbors;
|
||||
} else if (scan == vp9_col_scan_8x8) {
|
||||
return vp9_col_scan_8x8_neighbors;
|
||||
} else if (scan == vp9_default_scan_16x16) {
|
||||
return vp9_default_scan_16x16_neighbors;
|
||||
} else if (scan == vp9_row_scan_16x16) {
|
||||
return vp9_row_scan_16x16_neighbors;
|
||||
} else if (scan == vp9_col_scan_16x16) {
|
||||
return vp9_col_scan_16x16_neighbors;
|
||||
} else {
|
||||
assert(scan == vp9_default_scan_32x32);
|
||||
return vp9_default_scan_32x32_neighbors;
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_coef_tree_initialize() {
|
||||
vp9_init_neighbors();
|
||||
init_bit_trees();
|
||||
@@ -622,7 +667,6 @@ static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE tx_size,
|
||||
int t, i, j, k, l;
|
||||
unsigned int branch_ct[UNCONSTRAINED_NODES][2];
|
||||
vp9_prob coef_probs[UNCONSTRAINED_NODES];
|
||||
int entropy_nodes_adapt = UNCONSTRAINED_NODES;
|
||||
|
||||
for (i = 0; i < BLOCK_TYPES; ++i)
|
||||
for (j = 0; j < REF_TYPES; ++j)
|
||||
@@ -635,7 +679,7 @@ static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE tx_size,
|
||||
0);
|
||||
branch_ct[0][1] = eob_branch_count[i][j][k][l] - branch_ct[0][0];
|
||||
coef_probs[0] = get_binary_prob(branch_ct[0][0], branch_ct[0][1]);
|
||||
for (t = 0; t < entropy_nodes_adapt; ++t)
|
||||
for (t = 0; t < UNCONSTRAINED_NODES; ++t)
|
||||
dst_coef_probs[i][j][k][l][t] = merge_probs(
|
||||
pre_coef_probs[i][j][k][l][t], coef_probs[t],
|
||||
branch_ct[t], count_sat, update_factor);
|
||||
|
||||
@@ -95,7 +95,7 @@ typedef vp9_prob vp9_coeff_probs[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
|
||||
#define MODULUS_PARAM 13 /* Modulus parameter */
|
||||
|
||||
struct VP9Common;
|
||||
void vp9_default_coef_probs(struct VP9Common *);
|
||||
void vp9_default_coef_probs(struct VP9Common *cm);
|
||||
extern DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_4x4[16]);
|
||||
|
||||
extern DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_4x4[16]);
|
||||
@@ -154,19 +154,17 @@ extern DECLARE_ALIGNED(16, int16_t,
|
||||
vp9_default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]);
|
||||
|
||||
void vp9_coef_tree_initialize(void);
|
||||
void vp9_adapt_coef_probs(struct VP9Common *);
|
||||
void vp9_adapt_coef_probs(struct VP9Common *cm);
|
||||
|
||||
static INLINE void vp9_reset_sb_tokens_context(MACROBLOCKD* const xd,
|
||||
BLOCK_SIZE_TYPE bsize) {
|
||||
/* Clear entropy contexts */
|
||||
const int bw = 1 << b_width_log2(bsize);
|
||||
const int bh = 1 << b_height_log2(bsize);
|
||||
static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
|
||||
int i;
|
||||
for (i = 0; i < MAX_MB_PLANE; i++) {
|
||||
vpx_memset(xd->plane[i].above_context, 0,
|
||||
sizeof(ENTROPY_CONTEXT) * bw >> xd->plane[i].subsampling_x);
|
||||
vpx_memset(xd->plane[i].left_context, 0,
|
||||
sizeof(ENTROPY_CONTEXT) * bh >> xd->plane[i].subsampling_y);
|
||||
struct macroblockd_plane *const pd = &xd->plane[i];
|
||||
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
|
||||
vpx_memset(pd->above_context, 0, sizeof(ENTROPY_CONTEXT) *
|
||||
num_4x4_blocks_wide_lookup[plane_bsize]);
|
||||
vpx_memset(pd->left_context, 0, sizeof(ENTROPY_CONTEXT) *
|
||||
num_4x4_blocks_high_lookup[plane_bsize]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -192,9 +190,6 @@ static INLINE int get_coef_context(const int16_t *neighbors,
|
||||
token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1;
|
||||
}
|
||||
|
||||
const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan);
|
||||
|
||||
|
||||
// 128 lists of probabilities are stored for the following ONE node probs:
|
||||
// 1, 3, 5, 7, ..., 253, 255
|
||||
// In between probabilities are interpolated linearly
|
||||
@@ -338,6 +333,63 @@ static INLINE const int16_t* get_iscan_16x16(TX_TYPE tx_type) {
|
||||
}
|
||||
}
|
||||
|
||||
static int get_entropy_context(TX_SIZE tx_size,
|
||||
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
|
||||
ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
|
||||
|
||||
switch (tx_size) {
|
||||
case TX_4X4:
|
||||
above_ec = a[0] != 0;
|
||||
left_ec = l[0] != 0;
|
||||
break;
|
||||
case TX_8X8:
|
||||
above_ec = !!*(uint16_t *)a;
|
||||
left_ec = !!*(uint16_t *)l;
|
||||
break;
|
||||
case TX_16X16:
|
||||
above_ec = !!*(uint32_t *)a;
|
||||
left_ec = !!*(uint32_t *)l;
|
||||
break;
|
||||
case TX_32X32:
|
||||
above_ec = !!*(uint64_t *)a;
|
||||
left_ec = !!*(uint64_t *)l;
|
||||
break;
|
||||
default:
|
||||
assert(!"Invalid transform size.");
|
||||
}
|
||||
|
||||
return combine_entropy_contexts(above_ec, left_ec);
|
||||
}
|
||||
|
||||
static void get_scan_and_band(const MACROBLOCKD *xd, TX_SIZE tx_size,
|
||||
PLANE_TYPE type, int block_idx,
|
||||
const int16_t **scan,
|
||||
const int16_t **scan_nb,
|
||||
const uint8_t **band_translate) {
|
||||
switch (tx_size) {
|
||||
case TX_4X4:
|
||||
get_scan_nb_4x4(get_tx_type_4x4(type, xd, block_idx), scan, scan_nb);
|
||||
*band_translate = vp9_coefband_trans_4x4;
|
||||
break;
|
||||
case TX_8X8:
|
||||
get_scan_nb_8x8(get_tx_type_8x8(type, xd), scan, scan_nb);
|
||||
*band_translate = vp9_coefband_trans_8x8plus;
|
||||
break;
|
||||
case TX_16X16:
|
||||
get_scan_nb_16x16(get_tx_type_16x16(type, xd), scan, scan_nb);
|
||||
*band_translate = vp9_coefband_trans_8x8plus;
|
||||
break;
|
||||
case TX_32X32:
|
||||
*scan = vp9_default_scan_32x32;
|
||||
*scan_nb = vp9_default_scan_32x32_neighbors;
|
||||
*band_translate = vp9_coefband_trans_8x8plus;
|
||||
break;
|
||||
default:
|
||||
assert(!"Invalid transform size.");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
enum { VP9_COEF_UPDATE_PROB = 252 };
|
||||
|
||||
#endif // VP9_COMMON_VP9_ENTROPY_H_
|
||||
|
||||
@@ -14,8 +14,8 @@
|
||||
#include "vp9/common/vp9_onyxc_int.h"
|
||||
#include "vp9/common/vp9_seg_common.h"
|
||||
|
||||
const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES]
|
||||
[VP9_INTRA_MODES - 1] = {
|
||||
const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES]
|
||||
[INTRA_MODES - 1] = {
|
||||
{ 144, 11, 54, 157, 195, 130, 46, 58, 108 } /* y = dc */,
|
||||
{ 118, 15, 123, 148, 131, 101, 44, 93, 131 } /* y = v */,
|
||||
{ 113, 12, 23, 188, 226, 142, 26, 32, 125 } /* y = h */,
|
||||
@@ -23,21 +23,21 @@ const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES]
|
||||
{ 113, 9, 36, 155, 111, 157, 32, 44, 161 } /* y = d135 */,
|
||||
{ 116, 9, 55, 176, 76, 96, 37, 61, 149 } /* y = d117 */,
|
||||
{ 115, 9, 28, 141, 161, 167, 21, 25, 193 } /* y = d153 */,
|
||||
{ 120, 12, 32, 145, 195, 142, 32, 38, 86 } /* y = d27 */,
|
||||
{ 120, 12, 32, 145, 195, 142, 32, 38, 86 } /* y = d207 */,
|
||||
{ 116, 12, 64, 120, 140, 125, 49, 115, 121 } /* y = d63 */,
|
||||
{ 102, 19, 66, 162, 182, 122, 35, 59, 128 } /* y = tm */
|
||||
};
|
||||
|
||||
static const vp9_prob default_if_y_probs[BLOCK_SIZE_GROUPS]
|
||||
[VP9_INTRA_MODES - 1] = {
|
||||
[INTRA_MODES - 1] = {
|
||||
{ 65, 32, 18, 144, 162, 194, 41, 51, 98 } /* block_size < 8x8 */,
|
||||
{ 132, 68, 18, 165, 217, 196, 45, 40, 78 } /* block_size < 16x16 */,
|
||||
{ 173, 80, 19, 176, 240, 193, 64, 35, 46 } /* block_size < 32x32 */,
|
||||
{ 221, 135, 38, 194, 248, 121, 96, 85, 29 } /* block_size >= 32x32 */
|
||||
};
|
||||
|
||||
static const vp9_prob default_if_uv_probs[VP9_INTRA_MODES]
|
||||
[VP9_INTRA_MODES - 1] = {
|
||||
static const vp9_prob default_if_uv_probs[INTRA_MODES]
|
||||
[INTRA_MODES - 1] = {
|
||||
{ 120, 7, 76, 176, 208, 126, 28, 54, 103 } /* y = dc */,
|
||||
{ 48, 12, 154, 155, 139, 90, 34, 117, 119 } /* y = v */,
|
||||
{ 67, 6, 25, 204, 243, 158, 13, 21, 96 } /* y = h */,
|
||||
@@ -45,7 +45,7 @@ static const vp9_prob default_if_uv_probs[VP9_INTRA_MODES]
|
||||
{ 83, 5, 42, 156, 111, 152, 26, 49, 152 } /* y = d135 */,
|
||||
{ 80, 5, 58, 178, 74, 83, 33, 62, 145 } /* y = d117 */,
|
||||
{ 86, 5, 32, 154, 192, 168, 14, 22, 163 } /* y = d153 */,
|
||||
{ 85, 5, 32, 156, 216, 148, 19, 29, 73 } /* y = d27 */,
|
||||
{ 85, 5, 32, 156, 216, 148, 19, 29, 73 } /* y = d207 */,
|
||||
{ 77, 7, 64, 116, 132, 122, 37, 126, 120 } /* y = d63 */,
|
||||
{ 101, 21, 107, 181, 192, 103, 19, 67, 125 } /* y = tm */
|
||||
};
|
||||
@@ -98,9 +98,9 @@ static const vp9_prob default_partition_probs[NUM_FRAME_TYPES]
|
||||
}
|
||||
};
|
||||
|
||||
const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
[VP9_INTRA_MODES]
|
||||
[VP9_INTRA_MODES - 1] = {
|
||||
const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES]
|
||||
[INTRA_MODES]
|
||||
[INTRA_MODES - 1] = {
|
||||
{ /* above = dc */
|
||||
{ 137, 30, 42, 148, 151, 207, 70, 52, 91 } /* left = dc */,
|
||||
{ 92, 45, 102, 136, 116, 180, 74, 90, 100 } /* left = v */,
|
||||
@@ -109,7 +109,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 72, 35, 36, 149, 68, 206, 68, 63, 105 } /* left = d135 */,
|
||||
{ 73, 31, 28, 138, 57, 124, 55, 122, 151 } /* left = d117 */,
|
||||
{ 67, 23, 21, 140, 126, 197, 40, 37, 171 } /* left = d153 */,
|
||||
{ 86, 27, 28, 128, 154, 212, 45, 43, 53 } /* left = d27 */,
|
||||
{ 86, 27, 28, 128, 154, 212, 45, 43, 53 } /* left = d207 */,
|
||||
{ 74, 32, 27, 107, 86, 160, 63, 134, 102 } /* left = d63 */,
|
||||
{ 59, 67, 44, 140, 161, 202, 78, 67, 119 } /* left = tm */
|
||||
}, { /* above = v */
|
||||
@@ -120,7 +120,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 46, 41, 76, 140, 63, 184, 69, 112, 57 } /* left = d135 */,
|
||||
{ 38, 32, 85, 140, 46, 112, 54, 151, 133 } /* left = d117 */,
|
||||
{ 39, 27, 61, 131, 110, 175, 44, 75, 136 } /* left = d153 */,
|
||||
{ 52, 30, 74, 113, 130, 175, 51, 64, 58 } /* left = d27 */,
|
||||
{ 52, 30, 74, 113, 130, 175, 51, 64, 58 } /* left = d207 */,
|
||||
{ 47, 35, 80, 100, 74, 143, 64, 163, 74 } /* left = d63 */,
|
||||
{ 36, 61, 116, 114, 128, 162, 80, 125, 82 } /* left = tm */
|
||||
}, { /* above = h */
|
||||
@@ -131,7 +131,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 58, 50, 25, 139, 115, 232, 39, 52, 118 } /* left = d135 */,
|
||||
{ 50, 35, 33, 153, 104, 162, 64, 59, 131 } /* left = d117 */,
|
||||
{ 44, 24, 16, 150, 177, 202, 33, 19, 156 } /* left = d153 */,
|
||||
{ 55, 27, 12, 153, 203, 218, 26, 27, 49 } /* left = d27 */,
|
||||
{ 55, 27, 12, 153, 203, 218, 26, 27, 49 } /* left = d207 */,
|
||||
{ 53, 49, 21, 110, 116, 168, 59, 80, 76 } /* left = d63 */,
|
||||
{ 38, 72, 19, 168, 203, 212, 50, 50, 107 } /* left = tm */
|
||||
}, { /* above = d45 */
|
||||
@@ -142,7 +142,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 60, 32, 33, 112, 71, 220, 64, 89, 104 } /* left = d135 */,
|
||||
{ 53, 26, 34, 130, 56, 149, 84, 120, 103 } /* left = d117 */,
|
||||
{ 53, 21, 23, 133, 109, 210, 56, 77, 172 } /* left = d153 */,
|
||||
{ 77, 19, 29, 112, 142, 228, 55, 66, 36 } /* left = d27 */,
|
||||
{ 77, 19, 29, 112, 142, 228, 55, 66, 36 } /* left = d207 */,
|
||||
{ 61, 29, 29, 93, 97, 165, 83, 175, 162 } /* left = d63 */,
|
||||
{ 47, 47, 43, 114, 137, 181, 100, 99, 95 } /* left = tm */
|
||||
}, { /* above = d135 */
|
||||
@@ -153,7 +153,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 52, 31, 22, 158, 40, 209, 58, 62, 89 } /* left = d135 */,
|
||||
{ 44, 31, 29, 147, 46, 158, 56, 102, 198 } /* left = d117 */,
|
||||
{ 35, 19, 12, 135, 87, 209, 41, 45, 167 } /* left = d153 */,
|
||||
{ 55, 25, 21, 118, 95, 215, 38, 39, 66 } /* left = d27 */,
|
||||
{ 55, 25, 21, 118, 95, 215, 38, 39, 66 } /* left = d207 */,
|
||||
{ 51, 38, 25, 113, 58, 164, 70, 93, 97 } /* left = d63 */,
|
||||
{ 47, 54, 34, 146, 108, 203, 72, 103, 151 } /* left = tm */
|
||||
}, { /* above = d117 */
|
||||
@@ -164,7 +164,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 40, 26, 35, 154, 40, 185, 51, 97, 123 } /* left = d135 */,
|
||||
{ 35, 19, 34, 179, 19, 97, 48, 129, 124 } /* left = d117 */,
|
||||
{ 36, 20, 26, 136, 62, 164, 33, 77, 154 } /* left = d153 */,
|
||||
{ 45, 18, 32, 130, 90, 157, 40, 79, 91 } /* left = d27 */,
|
||||
{ 45, 18, 32, 130, 90, 157, 40, 79, 91 } /* left = d207 */,
|
||||
{ 45, 26, 28, 129, 45, 129, 49, 147, 123 } /* left = d63 */,
|
||||
{ 38, 44, 51, 136, 74, 162, 57, 97, 121 } /* left = tm */
|
||||
}, { /* above = d153 */
|
||||
@@ -175,10 +175,10 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 47, 29, 17, 153, 64, 220, 59, 51, 114 } /* left = d135 */,
|
||||
{ 46, 16, 24, 136, 76, 147, 41, 64, 172 } /* left = d117 */,
|
||||
{ 34, 17, 11, 108, 152, 187, 13, 15, 209 } /* left = d153 */,
|
||||
{ 51, 24, 14, 115, 133, 209, 32, 26, 104 } /* left = d27 */,
|
||||
{ 51, 24, 14, 115, 133, 209, 32, 26, 104 } /* left = d207 */,
|
||||
{ 55, 30, 18, 122, 79, 179, 44, 88, 116 } /* left = d63 */,
|
||||
{ 37, 49, 25, 129, 168, 164, 41, 54, 148 } /* left = tm */
|
||||
}, { /* above = d27 */
|
||||
}, { /* above = d207 */
|
||||
{ 82, 22, 32, 127, 143, 213, 39, 41, 70 } /* left = dc */,
|
||||
{ 62, 44, 61, 123, 105, 189, 48, 57, 64 } /* left = v */,
|
||||
{ 47, 25, 17, 175, 222, 220, 24, 30, 86 } /* left = h */,
|
||||
@@ -186,7 +186,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 57, 39, 23, 151, 68, 216, 55, 63, 58 } /* left = d135 */,
|
||||
{ 49, 30, 35, 141, 70, 168, 82, 40, 115 } /* left = d117 */,
|
||||
{ 51, 25, 15, 136, 129, 202, 38, 35, 139 } /* left = d153 */,
|
||||
{ 68, 26, 16, 111, 141, 215, 29, 28, 28 } /* left = d27 */,
|
||||
{ 68, 26, 16, 111, 141, 215, 29, 28, 28 } /* left = d207 */,
|
||||
{ 59, 39, 19, 114, 75, 180, 77, 104, 42 } /* left = d63 */,
|
||||
{ 40, 61, 26, 126, 152, 206, 61, 59, 93 } /* left = tm */
|
||||
}, { /* above = d63 */
|
||||
@@ -197,7 +197,7 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 48, 31, 27, 114, 63, 183, 82, 116, 56 } /* left = d135 */,
|
||||
{ 43, 28, 37, 121, 63, 123, 61, 192, 169 } /* left = d117 */,
|
||||
{ 42, 17, 24, 109, 97, 177, 56, 76, 122 } /* left = d153 */,
|
||||
{ 58, 18, 28, 105, 139, 182, 70, 92, 63 } /* left = d27 */,
|
||||
{ 58, 18, 28, 105, 139, 182, 70, 92, 63 } /* left = d207 */,
|
||||
{ 46, 23, 32, 74, 86, 150, 67, 183, 88 } /* left = d63 */,
|
||||
{ 36, 38, 48, 92, 122, 165, 88, 137, 91 } /* left = tm */
|
||||
}, { /* above = tm */
|
||||
@@ -208,14 +208,14 @@ const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
|
||||
{ 49, 50, 35, 144, 95, 205, 63, 78, 59 } /* left = d135 */,
|
||||
{ 41, 53, 52, 148, 71, 142, 65, 128, 51 } /* left = d117 */,
|
||||
{ 40, 36, 28, 143, 143, 202, 40, 55, 137 } /* left = d153 */,
|
||||
{ 52, 34, 29, 129, 183, 227, 42, 35, 43 } /* left = d27 */,
|
||||
{ 52, 34, 29, 129, 183, 227, 42, 35, 43 } /* left = d207 */,
|
||||
{ 42, 44, 44, 104, 105, 164, 64, 130, 80 } /* left = d63 */,
|
||||
{ 43, 81, 53, 140, 169, 204, 68, 84, 72 } /* left = tm */
|
||||
}
|
||||
};
|
||||
|
||||
static const vp9_prob default_inter_mode_probs[INTER_MODE_CONTEXTS]
|
||||
[VP9_INTER_MODES - 1] = {
|
||||
[INTER_MODES - 1] = {
|
||||
{2, 173, 34}, // 0 = both zero mv
|
||||
{7, 145, 85}, // 1 = one zero mv + one a predicted mv
|
||||
{7, 166, 63}, // 2 = two predicted mvs
|
||||
@@ -226,7 +226,7 @@ static const vp9_prob default_inter_mode_probs[INTER_MODE_CONTEXTS]
|
||||
};
|
||||
|
||||
/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
|
||||
const vp9_tree_index vp9_intra_mode_tree[VP9_INTRA_MODES * 2 - 2] = {
|
||||
const vp9_tree_index vp9_intra_mode_tree[INTRA_MODES * 2 - 2] = {
|
||||
-DC_PRED, 2, /* 0 = DC_NODE */
|
||||
-TM_PRED, 4, /* 1 = TM_NODE */
|
||||
-V_PRED, 6, /* 2 = V_NODE */
|
||||
@@ -235,7 +235,7 @@ const vp9_tree_index vp9_intra_mode_tree[VP9_INTRA_MODES * 2 - 2] = {
|
||||
-D135_PRED, -D117_PRED, /* 5 = D135_NODE */
|
||||
-D45_PRED, 14, /* 6 = D45_NODE */
|
||||
-D63_PRED, 16, /* 7 = D63_NODE */
|
||||
-D153_PRED, -D27_PRED /* 8 = D153_NODE */
|
||||
-D153_PRED, -D207_PRED /* 8 = D153_NODE */
|
||||
};
|
||||
|
||||
const vp9_tree_index vp9_inter_mode_tree[6] = {
|
||||
@@ -250,8 +250,8 @@ const vp9_tree_index vp9_partition_tree[6] = {
|
||||
-PARTITION_VERT, -PARTITION_SPLIT
|
||||
};
|
||||
|
||||
struct vp9_token vp9_intra_mode_encodings[VP9_INTRA_MODES];
|
||||
struct vp9_token vp9_inter_mode_encodings[VP9_INTER_MODES];
|
||||
struct vp9_token vp9_intra_mode_encodings[INTRA_MODES];
|
||||
struct vp9_token vp9_inter_mode_encodings[INTER_MODES];
|
||||
|
||||
struct vp9_token vp9_partition_encodings[PARTITION_TYPES];
|
||||
|
||||
@@ -317,8 +317,8 @@ static const vp9_prob default_mbskip_probs[MBSKIP_CONTEXTS] = {
|
||||
192, 128, 64
|
||||
};
|
||||
|
||||
static const vp9_prob default_switchable_interp_prob[VP9_SWITCHABLE_FILTERS+1]
|
||||
[VP9_SWITCHABLE_FILTERS-1] = {
|
||||
static const vp9_prob default_switchable_interp_prob[SWITCHABLE_FILTERS+1]
|
||||
[SWITCHABLE_FILTERS-1] = {
|
||||
{ 235, 162, },
|
||||
{ 36, 255, },
|
||||
{ 34, 3, },
|
||||
@@ -338,11 +338,11 @@ void vp9_init_mbmode_probs(VP9_COMMON *cm) {
|
||||
vp9_copy(cm->fc.mbskip_probs, default_mbskip_probs);
|
||||
}
|
||||
|
||||
const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = {
|
||||
const vp9_tree_index vp9_switchable_interp_tree[SWITCHABLE_FILTERS*2-2] = {
|
||||
-EIGHTTAP, 2,
|
||||
-EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP
|
||||
};
|
||||
struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
|
||||
struct vp9_token vp9_switchable_interp_encodings[SWITCHABLE_FILTERS];
|
||||
|
||||
void vp9_entropy_mode_init() {
|
||||
vp9_tokens_from_tree(vp9_intra_mode_encodings, vp9_intra_mode_tree);
|
||||
@@ -400,17 +400,17 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
|
||||
counts->single_ref[i][j]);
|
||||
|
||||
for (i = 0; i < INTER_MODE_CONTEXTS; i++)
|
||||
update_mode_probs(VP9_INTER_MODES, vp9_inter_mode_tree,
|
||||
update_mode_probs(INTER_MODES, vp9_inter_mode_tree,
|
||||
counts->inter_mode[i], pre_fc->inter_mode_probs[i],
|
||||
fc->inter_mode_probs[i], NEARESTMV);
|
||||
|
||||
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
|
||||
update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree,
|
||||
update_mode_probs(INTRA_MODES, vp9_intra_mode_tree,
|
||||
counts->y_mode[i], pre_fc->y_mode_prob[i],
|
||||
fc->y_mode_prob[i], 0);
|
||||
|
||||
for (i = 0; i < VP9_INTRA_MODES; ++i)
|
||||
update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree,
|
||||
for (i = 0; i < INTRA_MODES; ++i)
|
||||
update_mode_probs(INTRA_MODES, vp9_intra_mode_tree,
|
||||
counts->uv_mode[i], pre_fc->uv_mode_prob[i],
|
||||
fc->uv_mode_prob[i], 0);
|
||||
|
||||
@@ -421,8 +421,8 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
|
||||
fc->partition_prob[INTER_FRAME][i], 0);
|
||||
|
||||
if (cm->mcomp_filter_type == SWITCHABLE) {
|
||||
for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
|
||||
update_mode_probs(VP9_SWITCHABLE_FILTERS, vp9_switchable_interp_tree,
|
||||
for (i = 0; i <= SWITCHABLE_FILTERS; i++)
|
||||
update_mode_probs(SWITCHABLE_FILTERS, vp9_switchable_interp_tree,
|
||||
counts->switchable_interp[i],
|
||||
pre_fc->switchable_interp_prob[i],
|
||||
fc->switchable_interp_prob[i], 0);
|
||||
@@ -440,14 +440,12 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
|
||||
fc->tx_probs.p8x8[i][j] = update_ct2(pre_fc->tx_probs.p8x8[i][j],
|
||||
branch_ct_8x8p[j]);
|
||||
|
||||
tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i],
|
||||
branch_ct_16x16p);
|
||||
tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], branch_ct_16x16p);
|
||||
for (j = 0; j < TX_SIZES - 2; ++j)
|
||||
fc->tx_probs.p16x16[i][j] = update_ct2(pre_fc->tx_probs.p16x16[i][j],
|
||||
branch_ct_16x16p[j]);
|
||||
|
||||
tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i],
|
||||
branch_ct_32x32p);
|
||||
tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], branch_ct_32x32p);
|
||||
for (j = 0; j < TX_SIZES - 1; ++j)
|
||||
fc->tx_probs.p32x32[i][j] = update_ct2(pre_fc->tx_probs.p32x32[i][j],
|
||||
branch_ct_32x32p[j]);
|
||||
@@ -472,14 +470,14 @@ static void set_default_lf_deltas(struct loopfilter *lf) {
|
||||
lf->mode_deltas[1] = 0;
|
||||
}
|
||||
|
||||
void vp9_setup_past_independence(VP9_COMMON *cm, MACROBLOCKD *xd) {
|
||||
void vp9_setup_past_independence(VP9_COMMON *cm) {
|
||||
// Reset the segment feature data to the default stats:
|
||||
// Features disabled, 0, with delta coding (Default state).
|
||||
struct loopfilter *const lf = &xd->lf;
|
||||
struct loopfilter *const lf = &cm->lf;
|
||||
|
||||
int i;
|
||||
vp9_clearall_segfeatures(&xd->seg);
|
||||
xd->seg.abs_delta = SEGMENT_DELTADATA;
|
||||
vp9_clearall_segfeatures(&cm->seg);
|
||||
cm->seg.abs_delta = SEGMENT_DELTADATA;
|
||||
if (cm->last_frame_seg_map)
|
||||
vpx_memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
|
||||
|
||||
@@ -512,10 +510,7 @@ void vp9_setup_past_independence(VP9_COMMON *cm, MACROBLOCKD *xd) {
|
||||
cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO));
|
||||
|
||||
vp9_update_mode_info_border(cm, cm->mip);
|
||||
vp9_update_mode_info_in_image(cm, cm->mi);
|
||||
|
||||
vp9_update_mode_info_border(cm, cm->prev_mip);
|
||||
vp9_update_mode_info_in_image(cm, cm->prev_mi);
|
||||
|
||||
vp9_zero(cm->ref_frame_sign_bias);
|
||||
|
||||
|
||||
@@ -14,10 +14,9 @@
|
||||
#include "vp9/common/vp9_blockd.h"
|
||||
#include "vp9/common/vp9_treecoder.h"
|
||||
|
||||
#define SUBMVREF_COUNT 5
|
||||
#define TX_SIZE_CONTEXTS 2
|
||||
#define VP9_MODE_UPDATE_PROB 252
|
||||
#define VP9_SWITCHABLE_FILTERS 3 // number of switchable filters
|
||||
#define MODE_UPDATE_PROB 252
|
||||
#define SWITCHABLE_FILTERS 3 // number of switchable filters
|
||||
|
||||
// #define MODE_STATS
|
||||
|
||||
@@ -35,32 +34,32 @@ struct tx_counts {
|
||||
unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2];
|
||||
};
|
||||
|
||||
extern const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1];
|
||||
extern const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES]
|
||||
[VP9_INTRA_MODES - 1];
|
||||
extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
|
||||
extern const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES]
|
||||
[INTRA_MODES - 1];
|
||||
|
||||
extern const vp9_tree_index vp9_intra_mode_tree[];
|
||||
extern const vp9_tree_index vp9_inter_mode_tree[];
|
||||
|
||||
extern struct vp9_token vp9_intra_mode_encodings[VP9_INTRA_MODES];
|
||||
extern struct vp9_token vp9_inter_mode_encodings[VP9_INTER_MODES];
|
||||
extern struct vp9_token vp9_intra_mode_encodings[INTRA_MODES];
|
||||
extern struct vp9_token vp9_inter_mode_encodings[INTER_MODES];
|
||||
|
||||
// probability models for partition information
|
||||
extern const vp9_tree_index vp9_partition_tree[];
|
||||
extern struct vp9_token vp9_partition_encodings[PARTITION_TYPES];
|
||||
|
||||
extern const vp9_tree_index vp9_switchable_interp_tree
|
||||
[2 * (VP9_SWITCHABLE_FILTERS - 1)];
|
||||
[2 * (SWITCHABLE_FILTERS - 1)];
|
||||
|
||||
extern struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
|
||||
extern struct vp9_token vp9_switchable_interp_encodings[SWITCHABLE_FILTERS];
|
||||
|
||||
void vp9_entropy_mode_init();
|
||||
|
||||
void vp9_setup_past_independence(struct VP9Common *cm, MACROBLOCKD *xd);
|
||||
void vp9_setup_past_independence(struct VP9Common *cm);
|
||||
|
||||
void vp9_init_mbmode_probs(struct VP9Common *x);
|
||||
void vp9_init_mbmode_probs(struct VP9Common *cm);
|
||||
|
||||
void vp9_adapt_mode_probs(struct VP9Common *);
|
||||
void vp9_adapt_mode_probs(struct VP9Common *cm);
|
||||
|
||||
void tx_counts_to_branch_counts_32x32(unsigned int *tx_count_32x32p,
|
||||
unsigned int (*ct_32x32p)[2]);
|
||||
|
||||
@@ -39,12 +39,12 @@ const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2] = {
|
||||
};
|
||||
struct vp9_token vp9_mv_class_encodings[MV_CLASSES];
|
||||
|
||||
const vp9_tree_index vp9_mv_class0_tree [2 * CLASS0_SIZE - 2] = {
|
||||
const vp9_tree_index vp9_mv_class0_tree[2 * CLASS0_SIZE - 2] = {
|
||||
-0, -1,
|
||||
};
|
||||
struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE];
|
||||
|
||||
const vp9_tree_index vp9_mv_fp_tree [2 * 4 - 2] = {
|
||||
const vp9_tree_index vp9_mv_fp_tree[2 * 4 - 2] = {
|
||||
-0, 2,
|
||||
-1, 4,
|
||||
-2, -3
|
||||
@@ -53,8 +53,8 @@ struct vp9_token vp9_mv_fp_encodings[4];
|
||||
|
||||
static const nmv_context default_nmv_context = {
|
||||
{32, 64, 96},
|
||||
{
|
||||
{ /* vert component */
|
||||
{ // NOLINT
|
||||
{ /* vert component */ // NOLINT
|
||||
128, /* sign */
|
||||
{224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, /* class */
|
||||
{216}, /* class0 */
|
||||
@@ -64,7 +64,7 @@ static const nmv_context default_nmv_context = {
|
||||
160, /* class0_hp bit */
|
||||
128, /* hp */
|
||||
},
|
||||
{ /* hor component */
|
||||
{ /* hor component */ // NOLINT
|
||||
128, /* sign */
|
||||
{216, 128, 176, 160, 176, 176, 192, 198, 198, 208}, /* class */
|
||||
{208}, /* class0 */
|
||||
@@ -79,20 +79,59 @@ static const nmv_context default_nmv_context = {
|
||||
|
||||
#define mv_class_base(c) ((c) ? (CLASS0_SIZE << (c + 2)) : 0)
|
||||
|
||||
static const uint8_t log_in_base_2[] = {
|
||||
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10
|
||||
};
|
||||
|
||||
MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) {
|
||||
MV_CLASS_TYPE c = MV_CLASS_0;
|
||||
if (z < CLASS0_SIZE * 8) c = MV_CLASS_0;
|
||||
else if (z < CLASS0_SIZE * 16) c = MV_CLASS_1;
|
||||
else if (z < CLASS0_SIZE * 32) c = MV_CLASS_2;
|
||||
else if (z < CLASS0_SIZE * 64) c = MV_CLASS_3;
|
||||
else if (z < CLASS0_SIZE * 128) c = MV_CLASS_4;
|
||||
else if (z < CLASS0_SIZE * 256) c = MV_CLASS_5;
|
||||
else if (z < CLASS0_SIZE * 512) c = MV_CLASS_6;
|
||||
else if (z < CLASS0_SIZE * 1024) c = MV_CLASS_7;
|
||||
else if (z < CLASS0_SIZE * 2048) c = MV_CLASS_8;
|
||||
else if (z < CLASS0_SIZE * 4096) c = MV_CLASS_9;
|
||||
else if (z < CLASS0_SIZE * 8192) c = MV_CLASS_10;
|
||||
else assert(0);
|
||||
if (z >= CLASS0_SIZE * 4096)
|
||||
c = MV_CLASS_10;
|
||||
else
|
||||
c = log_in_base_2[z >> 3];
|
||||
|
||||
if (offset)
|
||||
*offset = z - mv_class_base(c);
|
||||
return c;
|
||||
@@ -110,9 +149,7 @@ int vp9_get_mv_mag(MV_CLASS_TYPE c, int offset) {
|
||||
static void inc_mv_component(int v, nmv_component_counts *comp_counts,
|
||||
int incr, int usehp) {
|
||||
int s, z, c, o, d, e, f;
|
||||
if (!incr)
|
||||
return;
|
||||
assert (v != 0); /* should not be zero */
|
||||
assert(v != 0); /* should not be zero */
|
||||
s = v < 0;
|
||||
comp_counts->sign[s] += incr;
|
||||
z = (s ? -v : v) - 1; /* magnitude - 1 */
|
||||
@@ -123,68 +160,44 @@ static void inc_mv_component(int v, nmv_component_counts *comp_counts,
|
||||
d = (o >> 3); /* int mv data */
|
||||
f = (o >> 1) & 3; /* fractional pel mv data */
|
||||
e = (o & 1); /* high precision mv data */
|
||||
|
||||
if (c == MV_CLASS_0) {
|
||||
comp_counts->class0[d] += incr;
|
||||
comp_counts->class0_fp[d][f] += incr;
|
||||
comp_counts->class0_hp[e] += usehp * incr;
|
||||
} else {
|
||||
int i;
|
||||
int b = c + CLASS0_BITS - 1; // number of bits
|
||||
for (i = 0; i < b; ++i)
|
||||
comp_counts->bits[i][((d >> i) & 1)] += incr;
|
||||
}
|
||||
|
||||
/* Code the fractional pel bits */
|
||||
if (c == MV_CLASS_0) {
|
||||
comp_counts->class0_fp[d][f] += incr;
|
||||
} else {
|
||||
comp_counts->fp[f] += incr;
|
||||
}
|
||||
|
||||
/* Code the high precision bit */
|
||||
if (usehp) {
|
||||
if (c == MV_CLASS_0) {
|
||||
comp_counts->class0_hp[e] += incr;
|
||||
} else {
|
||||
comp_counts->hp[e] += incr;
|
||||
}
|
||||
comp_counts->hp[e] += usehp * incr;
|
||||
}
|
||||
}
|
||||
|
||||
static void counts_to_context(nmv_component_counts *mvcomp, int usehp) {
|
||||
int v;
|
||||
vpx_memset(mvcomp->sign, 0, sizeof(nmv_component_counts) - sizeof(mvcomp->mvcount));
|
||||
for (v = 1; v <= MV_MAX; v++) {
|
||||
inc_mv_component(-v, mvcomp, mvcomp->mvcount[MV_MAX - v], usehp);
|
||||
inc_mv_component( v, mvcomp, mvcomp->mvcount[MV_MAX + v], usehp);
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_inc_mv(const MV *mv, nmv_context_counts *counts) {
|
||||
const MV_JOINT_TYPE j = vp9_get_mv_joint(mv);
|
||||
++counts->joints[j];
|
||||
|
||||
if (mv_joint_vertical(j))
|
||||
++counts->comps[0].mvcount[MV_MAX + mv->row];
|
||||
if (mv_joint_vertical(j)) {
|
||||
inc_mv_component(mv->row, &counts->comps[0], 1, 1);
|
||||
}
|
||||
|
||||
if (mv_joint_horizontal(j))
|
||||
++counts->comps[1].mvcount[MV_MAX + mv->col];
|
||||
if (mv_joint_horizontal(j)) {
|
||||
inc_mv_component(mv->col, &counts->comps[1], 1, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static vp9_prob adapt_prob(vp9_prob prep, const unsigned int ct[2]) {
|
||||
return merge_probs2(prep, ct, MV_COUNT_SAT, MV_MAX_UPDATE_FACTOR);
|
||||
}
|
||||
|
||||
void vp9_counts_process(nmv_context_counts *nmv_count, int usehp) {
|
||||
counts_to_context(&nmv_count->comps[0], usehp);
|
||||
counts_to_context(&nmv_count->comps[1], usehp);
|
||||
}
|
||||
|
||||
static unsigned int adapt_probs(unsigned int i,
|
||||
vp9_tree tree,
|
||||
vp9_prob this_probs[],
|
||||
const vp9_prob last_probs[],
|
||||
const unsigned int num_events[]) {
|
||||
|
||||
|
||||
const unsigned int left = tree[i] <= 0
|
||||
? num_events[-tree[i]]
|
||||
: adapt_probs(tree[i], tree, this_probs, last_probs, num_events);
|
||||
@@ -207,8 +220,6 @@ void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) {
|
||||
nmv_context *pre_ctx = &pre_fc->nmvc;
|
||||
nmv_context_counts *cts = &cm->counts.mv;
|
||||
|
||||
vp9_counts_process(cts, allow_hp);
|
||||
|
||||
adapt_probs(0, vp9_mv_joint_tree, ctx->joints, pre_ctx->joints, cts->joints);
|
||||
|
||||
for (i = 0; i < 2; ++i) {
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
#define VP9_COMMON_VP9_ENTROPYMV_H_
|
||||
|
||||
#include "vp9/common/vp9_treecoder.h"
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "vp9/common/vp9_blockd.h"
|
||||
|
||||
struct VP9Common;
|
||||
@@ -24,7 +24,7 @@ void vp9_init_mv_probs(struct VP9Common *cm);
|
||||
void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp);
|
||||
int vp9_use_mv_hp(const MV *ref);
|
||||
|
||||
#define VP9_NMV_UPDATE_PROB 252
|
||||
#define NMV_UPDATE_PROB 252
|
||||
|
||||
/* Symbols for coding which components are zero jointly */
|
||||
#define MV_JOINTS 4
|
||||
@@ -73,6 +73,10 @@ extern struct vp9_token vp9_mv_class_encodings[MV_CLASSES];
|
||||
#define MV_MAX ((1 << MV_MAX_BITS) - 1)
|
||||
#define MV_VALS ((MV_MAX << 1) + 1)
|
||||
|
||||
#define MV_IN_USE_BITS 14
|
||||
#define MV_UPP ((1 << MV_IN_USE_BITS) - 1)
|
||||
#define MV_LOW (-(1 << MV_IN_USE_BITS))
|
||||
|
||||
extern const vp9_tree_index vp9_mv_class0_tree[2 * CLASS0_SIZE - 2];
|
||||
extern struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE];
|
||||
|
||||
@@ -126,6 +130,4 @@ typedef struct {
|
||||
|
||||
void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx);
|
||||
|
||||
void vp9_counts_process(nmv_context_counts *NMVcount, int usehp);
|
||||
|
||||
#endif // VP9_COMMON_VP9_ENTROPYMV_H_
|
||||
|
||||
@@ -13,15 +13,16 @@
|
||||
|
||||
#include "./vpx_config.h"
|
||||
|
||||
#define LOG2_MI_SIZE 3
|
||||
#define LOG2_MI_BLOCK_SIZE (6 - LOG2_MI_SIZE) // 64 = 2^6
|
||||
#define MI_SIZE_LOG2 3
|
||||
#define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2) // 64 = 2^6
|
||||
|
||||
#define MI_SIZE (1 << LOG2_MI_SIZE) // pixels per mi-unit
|
||||
#define MI_BLOCK_SIZE (1 << LOG2_MI_BLOCK_SIZE) // mi-units per max block
|
||||
#define MI_SIZE (1 << MI_SIZE_LOG2) // pixels per mi-unit
|
||||
#define MI_BLOCK_SIZE (1 << MI_BLOCK_SIZE_LOG2) // mi-units per max block
|
||||
|
||||
#define MI_MASK (MI_BLOCK_SIZE - 1)
|
||||
|
||||
typedef enum BLOCK_SIZE_TYPE {
|
||||
|
||||
typedef enum BLOCK_SIZE {
|
||||
BLOCK_4X4,
|
||||
BLOCK_4X8,
|
||||
BLOCK_8X4,
|
||||
@@ -35,15 +36,17 @@ typedef enum BLOCK_SIZE_TYPE {
|
||||
BLOCK_32X64,
|
||||
BLOCK_64X32,
|
||||
BLOCK_64X64,
|
||||
BLOCK_SIZE_TYPES
|
||||
} BLOCK_SIZE_TYPE;
|
||||
BLOCK_SIZES,
|
||||
BLOCK_INVALID = BLOCK_SIZES
|
||||
} BLOCK_SIZE;
|
||||
|
||||
typedef enum PARTITION_TYPE {
|
||||
PARTITION_NONE,
|
||||
PARTITION_HORZ,
|
||||
PARTITION_VERT,
|
||||
PARTITION_SPLIT,
|
||||
PARTITION_TYPES, PARTITION_INVALID = PARTITION_TYPES
|
||||
PARTITION_TYPES,
|
||||
PARTITION_INVALID = PARTITION_TYPES
|
||||
} PARTITION_TYPE;
|
||||
|
||||
#define PARTITION_PLOFFSET 4 // number of probability models per block size
|
||||
|
||||
@@ -57,15 +57,23 @@ static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
|
||||
|
||||
void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst) {
|
||||
const int et_y = dst->border;
|
||||
const int el_y = dst->border;
|
||||
const int eb_y = dst->border + dst->y_height - src->y_height;
|
||||
const int er_y = dst->border + dst->y_width - src->y_width;
|
||||
|
||||
const int et_uv = dst->border >> (dst->uv_height != dst->y_height);
|
||||
const int el_uv = dst->border >> (dst->uv_width != dst->y_width);
|
||||
const int eb_uv = et_uv + dst->uv_height - src->uv_height;
|
||||
const int er_uv = el_uv + dst->uv_width - src->uv_width;
|
||||
// Extend src frame in buffer
|
||||
// Altref filtering assumes 16 pixel extension
|
||||
const int et_y = 16;
|
||||
const int el_y = 16;
|
||||
// Motion estimation may use src block variance with the block size up
|
||||
// to 64x64, so the right and bottom need to be extended to 64 mulitple
|
||||
// or up to 16, whichever is greater.
|
||||
const int eb_y = MAX(ALIGN_POWER_OF_TWO(src->y_width, 6) - src->y_width,
|
||||
16);
|
||||
const int er_y = MAX(ALIGN_POWER_OF_TWO(src->y_height, 6) - src->y_height,
|
||||
16);
|
||||
const int uv_width_subsampling = (src->uv_width != src->y_width);
|
||||
const int uv_height_subsampling = (src->uv_height != src->y_height);
|
||||
const int et_uv = et_y >> uv_height_subsampling;
|
||||
const int el_uv = el_y >> uv_width_subsampling;
|
||||
const int eb_uv = eb_y >> uv_height_subsampling;
|
||||
const int er_uv = er_y >> uv_width_subsampling;
|
||||
|
||||
#if CONFIG_ALPHA
|
||||
const int et_a = dst->border >> (dst->alpha_height != dst->y_height);
|
||||
|
||||
@@ -8,14 +8,12 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "vp9/common/vp9_filter.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
|
||||
DECLARE_ALIGNED(256, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][8]) = {
|
||||
#include "vp9/common/vp9_filter.h"
|
||||
|
||||
DECLARE_ALIGNED(256, const int16_t,
|
||||
vp9_bilinear_filters[SUBPEL_SHIFTS][SUBPEL_TAPS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0 },
|
||||
{ 0, 0, 0, 120, 8, 0, 0, 0 },
|
||||
{ 0, 0, 0, 112, 16, 0, 0, 0 },
|
||||
@@ -34,8 +32,9 @@ DECLARE_ALIGNED(256, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][8]) = {
|
||||
{ 0, 0, 0, 8, 120, 0, 0, 0 }
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = {
|
||||
/* Lagrangian interpolation filter */
|
||||
// Lagrangian interpolation filter
|
||||
DECLARE_ALIGNED(256, const int16_t,
|
||||
vp9_sub_pel_filters_8[SUBPEL_SHIFTS][SUBPEL_TAPS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0},
|
||||
{ 0, 1, -5, 126, 8, -3, 1, 0},
|
||||
{ -1, 3, -10, 122, 18, -6, 2, 0},
|
||||
@@ -54,9 +53,9 @@ DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = {
|
||||
{ 0, 1, -3, 8, 126, -5, 1, 0}
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8])
|
||||
= {
|
||||
/* dct based filter */
|
||||
// DCT based filter
|
||||
DECLARE_ALIGNED(256, const int16_t,
|
||||
vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][SUBPEL_TAPS]) = {
|
||||
{0, 0, 0, 128, 0, 0, 0, 0},
|
||||
{-1, 3, -7, 127, 8, -3, 1, 0},
|
||||
{-2, 5, -13, 125, 17, -6, 3, -1},
|
||||
@@ -75,9 +74,9 @@ DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8])
|
||||
{0, 1, -3, 8, 127, -7, 3, -1}
|
||||
};
|
||||
|
||||
// freqmultiplier = 0.5
|
||||
DECLARE_ALIGNED(256, const int16_t,
|
||||
vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8]) = {
|
||||
/* freqmultiplier = 0.5 */
|
||||
vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][SUBPEL_TAPS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0},
|
||||
{-3, -1, 32, 64, 38, 1, -3, 0},
|
||||
{-2, -2, 29, 63, 41, 2, -3, 0},
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user