Compare commits
471 Commits
v0.9.5
...
sandbox/aw
Author | SHA1 | Date | |
---|---|---|---|
![]() |
84ae235450 | ||
![]() |
cd103a5721 | ||
![]() |
05d9421e8b | ||
![]() |
c619f6cb0f | ||
![]() |
e1a8b6c8d5 | ||
![]() |
8fcb801d15 | ||
![]() |
d889035fe6 | ||
![]() |
9409e38050 | ||
![]() |
f64f425a50 | ||
![]() |
0d2abe3084 | ||
![]() |
1312a7a2e2 | ||
![]() |
487c0299c9 | ||
![]() |
a3399291ad | ||
![]() |
918fb5487e | ||
![]() |
63f15987a5 | ||
![]() |
e749ae510f | ||
![]() |
8608de1c6f | ||
![]() |
ab48305fb6 | ||
![]() |
5e7a3bb69a | ||
![]() |
33cefd6f6e | ||
![]() |
8861174624 | ||
![]() |
88841f1059 | ||
![]() |
70f30aa95d | ||
![]() |
538f110407 | ||
![]() |
e689a27d62 | ||
![]() |
fd09009227 | ||
![]() |
1aadcedcfb | ||
![]() |
4fd81a99f8 | ||
![]() |
d1abe62d1c | ||
![]() |
a9ce3e3834 | ||
![]() |
4b43167ad1 | ||
![]() |
6e156a4cd7 | ||
![]() |
921a32a306 | ||
![]() |
de4e9e3b44 | ||
![]() |
d4cdb683a4 | ||
![]() |
08702002e8 | ||
![]() |
aec5487cdd | ||
![]() |
2de858b9fc | ||
![]() |
9e9f61a317 | ||
![]() |
02423b2e92 | ||
![]() |
c32e0ecc59 | ||
![]() |
f212a98ee7 | ||
![]() |
91036996ac | ||
![]() |
610dd90288 | ||
![]() |
15f03c2f13 | ||
![]() |
f5c0d95e8c | ||
![]() |
af1acc851b | ||
![]() |
3d6815817c | ||
![]() |
fd7040d2b6 | ||
![]() |
82315be75d | ||
![]() |
8520b5c785 | ||
![]() |
ba11e24d47 | ||
![]() |
cec76a36d6 | ||
![]() |
9d138379a2 | ||
![]() |
f56b9ee92e | ||
![]() |
607f8420f3 | ||
![]() |
53e9987b4d | ||
![]() |
7d335868df | ||
![]() |
26b6a3b088 | ||
![]() |
0e43668546 | ||
![]() |
534ea700bd | ||
![]() |
b843aa4eda | ||
![]() |
f0c22a3f33 | ||
![]() |
49c31dc2b4 | ||
![]() |
6fdc9aa79f | ||
![]() |
4be062bbc3 | ||
![]() |
f5e433464b | ||
![]() |
beaafefcf1 | ||
![]() |
8edaf6e2f2 | ||
![]() |
4cde2ab765 | ||
![]() |
edfc93aeba | ||
![]() |
577910b464 | ||
![]() |
2fced87e75 | ||
![]() |
73065b67e4 | ||
![]() |
2cbd962088 | ||
![]() |
769c74c0ac | ||
![]() |
500fec2d5f | ||
![]() |
a61785b6a1 | ||
![]() |
bfe803bda3 | ||
![]() |
429dc676b1 | ||
![]() |
185557344a | ||
![]() |
de5182eef3 | ||
![]() |
8431e768c9 | ||
![]() |
de50520a8c | ||
![]() |
346b3e7ce9 | ||
![]() |
71bcd9f1af | ||
![]() |
6795e256c1 | ||
![]() |
8c48c943e7 | ||
![]() |
aa4a90c880 | ||
![]() |
2ec0cfbe99 | ||
![]() |
d0ec28b3d3 | ||
![]() |
e54dcfe88d | ||
![]() |
52f6e28e9e | ||
![]() |
3788b3564c | ||
![]() |
27972d2c1d | ||
![]() |
5c60a646f3 | ||
![]() |
75051c8b59 | ||
![]() |
5db0eeea21 | ||
![]() |
6e73748492 | ||
![]() |
170b87390e | ||
![]() |
2ae91fbef0 | ||
![]() |
e34e417d94 | ||
![]() |
3c9dd6c3ef | ||
![]() |
c5c5dcd0be | ||
![]() |
29c46b64a2 | ||
![]() |
3dc382294b | ||
![]() |
3f6f7289aa | ||
![]() |
b2aa401776 | ||
![]() |
76ec21928c | ||
![]() |
9c836daf65 | ||
![]() |
3ae2465788 | ||
![]() |
7ab08e1fee | ||
![]() |
128d2c23b3 | ||
![]() |
6daacdb785 | ||
![]() |
ed40ff9e2d | ||
![]() |
f3e9e2a0f8 | ||
![]() |
a0306ea660 | ||
![]() |
c5a049babd | ||
![]() |
5c24071504 | ||
![]() |
43baf7ff21 | ||
![]() |
7b8e7f0f3a | ||
![]() |
4561109a69 | ||
![]() |
7966dd5287 | ||
![]() |
fa836faede | ||
![]() |
56efffdcd1 | ||
![]() |
fb037ec05b | ||
![]() |
419f638910 | ||
![]() |
95adf3df77 | ||
![]() |
859abd6b5d | ||
![]() |
8432a1729f | ||
![]() |
e8f7b0f7f5 | ||
![]() |
244e2e1451 | ||
![]() |
5091e01ea1 | ||
![]() |
ddd260eb62 | ||
![]() |
e6948bf0f9 | ||
![]() |
de87c420ef | ||
![]() |
0eccee4378 | ||
![]() |
5d1d9911cb | ||
![]() |
1016b856d1 | ||
![]() |
fe9a604b1e | ||
![]() |
bc9c30a003 | ||
![]() |
9fc8cb39aa | ||
![]() |
8e87d58712 | ||
![]() |
0491c2cfc8 | ||
![]() |
3fae3283e6 | ||
![]() |
d05c4d8841 | ||
![]() |
e38c1680d6 | ||
![]() |
77ed11c506 | ||
![]() |
4a742e5c79 | ||
![]() |
27c04aaa67 | ||
![]() |
0bc31f1887 | ||
![]() |
fb37eda3e2 | ||
![]() |
05d75b4353 | ||
![]() |
eed2ce58e3 | ||
![]() |
84f7f20985 | ||
![]() |
1de99a2a81 | ||
![]() |
36be4f7f06 | ||
![]() |
a1cfcb413d | ||
![]() |
6f5189c044 | ||
![]() |
06ce0d8830 | ||
![]() |
987ac89403 | ||
![]() |
cfaee9f7c6 | ||
![]() |
3e6d476ac3 | ||
![]() |
d96ba65a23 | ||
![]() |
4decd27947 | ||
![]() |
31dab574cc | ||
![]() |
da761c9a22 | ||
![]() |
e4fa638653 | ||
![]() |
1fae7018a8 | ||
![]() |
d8fc974ac0 | ||
![]() |
6da2018789 | ||
![]() |
1771722b2f | ||
![]() |
8e17e82d9e | ||
![]() |
861175ef00 | ||
![]() |
d53492bba4 | ||
![]() |
658454a04c | ||
![]() |
b862c108dd | ||
![]() |
aee120afb9 | ||
![]() |
8ae92aef66 | ||
![]() |
e6db21ecc4 | ||
![]() |
418f4219fa | ||
![]() |
7af0d906e3 | ||
![]() |
945dad277d | ||
![]() |
c764c2a20f | ||
![]() |
3ed8fe8778 | ||
![]() |
cbf923b12c | ||
![]() |
d371ca93e5 | ||
![]() |
597d02b508 | ||
![]() |
fb5a692d27 | ||
![]() |
c6ef75690f | ||
![]() |
b2ae57f1b6 | ||
![]() |
562f1470ce | ||
![]() |
ac10665ad8 | ||
![]() |
07f7b66fae | ||
![]() |
c351aa7f1b | ||
![]() |
f42d52e6bd | ||
![]() |
da9402fbf6 | ||
![]() |
da227b901d | ||
![]() |
0c2cfff9b0 | ||
![]() |
0030303b69 | ||
![]() |
7725a7eb56 | ||
![]() |
27dad21548 | ||
![]() |
94d4fee08f | ||
![]() |
2debd5b5f7 | ||
![]() |
404e998eb7 | ||
![]() |
d3dfcde0f7 | ||
![]() |
d419b93e3e | ||
![]() |
0ff10bb1f7 | ||
![]() |
bb6bcbccda | ||
![]() |
353246bd60 | ||
![]() |
9d0b2cbbce | ||
![]() |
1ef86980b9 | ||
![]() |
4f8a166058 | ||
![]() |
6f53e59641 | ||
![]() |
02321de0f2 | ||
![]() |
41e6eceb28 | ||
![]() |
7d8199f0c3 | ||
![]() |
19054ab6da | ||
![]() |
fffa2a61d7 | ||
![]() |
c2b43164bd | ||
![]() |
9954d05ca6 | ||
![]() |
a39b5af10b | ||
![]() |
315e3c2518 | ||
![]() |
85e79ce288 | ||
![]() |
c96031da69 | ||
![]() |
cb14764fab | ||
![]() |
e5aaac24bb | ||
![]() |
cc17629f30 | ||
![]() |
13db80c282 | ||
![]() |
40dcae9c2e | ||
![]() |
615c90c948 | ||
![]() |
ddacf1cf69 | ||
![]() |
48140167cd | ||
![]() |
3273c7b679 | ||
![]() |
eaadfb5869 | ||
![]() |
adaf2b697c | ||
![]() |
58d2e70fc5 | ||
![]() |
0905af38fc | ||
![]() |
bb9c95ea53 | ||
![]() |
350ffe8dae | ||
![]() |
b601eb8cda | ||
![]() |
63fc44dfa5 | ||
![]() |
6bf7e2cc37 | ||
![]() |
ffc6aeef14 | ||
![]() |
c0a9cbebe1 | ||
![]() |
bf5f585b0d | ||
![]() |
209def2d72 | ||
![]() |
4aa12b6c5f | ||
![]() |
a870315629 | ||
![]() |
72ebafff51 | ||
![]() |
e5904f2d5e | ||
![]() |
07a7c08aef | ||
![]() |
a5ecaca6a7 | ||
![]() |
b18df82e1d | ||
![]() |
4e7e79f770 | ||
![]() |
385c2a76d1 | ||
![]() |
9e7fec216e | ||
![]() |
317f0da91e | ||
![]() |
4a15e55793 | ||
![]() |
60fde4d342 | ||
![]() |
6d19d40718 | ||
![]() |
f6214d1db8 | ||
![]() |
2d03f073a7 | ||
![]() |
408a8adc15 | ||
![]() |
8f279596cb | ||
![]() |
f3cb9ae459 | ||
![]() |
7cbe684ef5 | ||
![]() |
e9f513d74a | ||
![]() |
dcb23e2aaa | ||
![]() |
11a222f5d9 | ||
![]() |
73207a1d8b | ||
![]() |
27000ed6d9 | ||
![]() |
8a5c255b3d | ||
![]() |
bb30ffc4dc | ||
![]() |
3ee4e1e79f | ||
![]() |
3c18a2bb2e | ||
![]() |
cac54404b9 | ||
![]() |
c4887da39c | ||
![]() |
35bb74a6bd | ||
![]() |
e8e09d33df | ||
![]() |
82266a1ac9 | ||
![]() |
be3e0ff7c3 | ||
![]() |
0def48b60f | ||
![]() |
a3f71ccff6 | ||
![]() |
2caa36aa4f | ||
![]() |
999e155f55 | ||
![]() |
53d8e9dc97 | ||
![]() |
907e98fbb5 | ||
![]() |
58f19cc697 | ||
![]() |
dcaaadd8ed | ||
![]() |
af7d23c9b4 | ||
![]() |
2168a94495 | ||
![]() |
4e149bb447 | ||
![]() |
3bf235a4c9 | ||
![]() |
a69c18980f | ||
![]() |
336aa0b7da | ||
![]() |
eb8b4d9a99 | ||
![]() |
0ee525d6de | ||
![]() |
d3e9409bb0 | ||
![]() |
0822a62f40 | ||
![]() |
0cdfef1e22 | ||
![]() |
8064583d26 | ||
![]() |
419553258d | ||
![]() |
815e1e9fe4 | ||
![]() |
06e7320c3e | ||
![]() |
e867516843 | ||
![]() |
2f0331c90c | ||
![]() |
67fb3a5155 | ||
![]() |
f97f2b1bb6 | ||
![]() |
ce6c954d2e | ||
![]() |
edcf74c6ad | ||
![]() |
d6d5d43708 | ||
![]() |
57136a268a | ||
![]() |
cb791aaa2f | ||
![]() |
339c512762 | ||
![]() |
15f9bea73b | ||
![]() |
a1a4d23797 | ||
![]() |
3aafb47729 | ||
![]() |
8f711db4e8 | ||
![]() |
415371c9d9 | ||
![]() |
2c1b06e672 | ||
![]() |
72e22b0bb8 | ||
![]() |
c8338ebf7a | ||
![]() |
b082790c7d | ||
![]() |
eda7d538bf | ||
![]() |
55acda98f7 | ||
![]() |
96fd758ea9 | ||
![]() |
6ff2b0883a | ||
![]() |
e88d7ab245 | ||
![]() |
f50f2fd2a7 | ||
![]() |
1546e6a8c9 | ||
![]() |
48c28fc42c | ||
![]() |
3675b2291c | ||
![]() |
cf7c4732e5 | ||
![]() |
405499d835 | ||
![]() |
c28b10adeb | ||
![]() |
e0846c9c8c | ||
![]() |
ba976eaa9b | ||
![]() |
3af3593c8e | ||
![]() |
f7e2f1fedf | ||
![]() |
dd314351e6 | ||
![]() |
6dbdfe3422 | ||
![]() |
8b0cf5f79d | ||
![]() |
1942eeb886 | ||
![]() |
431dac08d1 | ||
![]() |
b095d9df3c | ||
![]() |
de4e8185e9 | ||
![]() |
a864678cdb | ||
![]() |
3fb4abf3d1 | ||
![]() |
516ea8460b | ||
![]() |
bf53ec492d | ||
![]() |
e463b95b4e | ||
![]() |
a5a8d92976 | ||
![]() |
95dbe9ccfd | ||
![]() |
0f5264b584 | ||
![]() |
74e8446e58 | ||
![]() |
8c4552fb36 | ||
![]() |
d3c7365b46 | ||
![]() |
e2de094c99 | ||
![]() |
bd9b383db2 | ||
![]() |
30830d5a7c | ||
![]() |
20b855c33e | ||
![]() |
4b6219cb33 | ||
![]() |
092b5bef37 | ||
![]() |
6cb708d501 | ||
![]() |
c49f49b113 | ||
![]() |
fc6ce744a6 | ||
![]() |
b0da9b399d | ||
![]() |
2a87491fb0 | ||
![]() |
64baa8df2e | ||
![]() |
81cdeb7117 | ||
![]() |
4fbd0227f5 | ||
![]() |
08706a3ea7 | ||
![]() |
3ac73173a4 | ||
![]() |
23aa13d92c | ||
![]() |
7fb0f86863 | ||
![]() |
64f3d91579 | ||
![]() |
825adc464f | ||
![]() |
41f4458a03 | ||
![]() |
3809d7bbd9 | ||
![]() |
398aa81849 | ||
![]() |
b1aa54ab26 | ||
![]() |
b7b1e6fb55 | ||
![]() |
136bd2455e | ||
![]() |
97a86c5b13 | ||
![]() |
0ced701487 | ||
![]() |
e0cf330cde | ||
![]() |
cb9698951c | ||
![]() |
c63fc881e1 | ||
![]() |
160f3c7e9e | ||
![]() |
d88da98614 | ||
![]() |
718c19711a | ||
![]() |
f661fa1f24 | ||
![]() |
062980cc48 | ||
![]() |
7c03a1c308 | ||
![]() |
9520f4b3cc | ||
![]() |
2fa5d5a26d | ||
![]() |
d283d9bb30 | ||
![]() |
8534071de0 | ||
![]() |
ccb0348473 | ||
![]() |
cec6a596b5 | ||
![]() |
c3bbb29164 | ||
![]() |
5e76dfcc70 | ||
![]() |
9c8ad79fdc | ||
![]() |
3430820bbe | ||
![]() |
fd9f9dc054 | ||
![]() |
19e32ac7c7 | ||
![]() |
78cbe51bc3 | ||
![]() |
19255b8fe0 | ||
![]() |
ad6150f769 | ||
![]() |
1753f0d208 | ||
![]() |
70b885a0e8 | ||
![]() |
ed5ab7fa49 | ||
![]() |
9a6740af80 | ||
![]() |
f7670acc68 | ||
![]() |
f874391e02 | ||
![]() |
7ee516d2b3 | ||
![]() |
8d94796cad | ||
![]() |
79e2b1f39b | ||
![]() |
99d02c0f9f | ||
![]() |
69ee697fef | ||
![]() |
4fedfa75f8 | ||
![]() |
faaa57b945 | ||
![]() |
d49da085c0 | ||
![]() |
e180255375 | ||
![]() |
f4709d2895 | ||
![]() |
373f5c3144 | ||
![]() |
73189f21b3 | ||
![]() |
8c2dfde3ed | ||
![]() |
ef2f27f10e | ||
![]() |
0a49747b01 | ||
![]() |
58083cb34d | ||
![]() |
213f7b0907 | ||
![]() |
692b10858d | ||
![]() |
9b1ece2cca | ||
![]() |
5f0e0617ba | ||
![]() |
647df00f30 | ||
![]() |
513f8e6814 | ||
![]() |
6adbe09058 | ||
![]() |
458f4fedd2 | ||
![]() |
4d1b0d2a2d | ||
![]() |
9fb80f7170 | ||
![]() |
f7e187d362 | ||
![]() |
5551ef0ef4 | ||
![]() |
bd05d9e480 | ||
![]() |
507eb4b577 | ||
![]() |
0e7b60617f | ||
![]() |
a5397dbaf1 | ||
![]() |
77e6b4504b | ||
![]() |
4b9dc57260 | ||
![]() |
0a29bd9793 | ||
![]() |
b8f43aec66 | ||
![]() |
c377bf0eec | ||
![]() |
90c505f218 | ||
![]() |
9f61a83bf9 | ||
![]() |
ff4a71f4c2 | ||
![]() |
20745f8442 | ||
![]() |
0684c647ef | ||
![]() |
dcee88ea37 | ||
![]() |
6614563b8f | ||
![]() |
f57fc7bcc6 | ||
![]() |
9d93dabee0 | ||
![]() |
7e3a1e7361 | ||
![]() |
c4d7e5e67e | ||
![]() |
2b4913eb0d | ||
![]() |
a097e18964 | ||
![]() |
f26fe7d93b | ||
![]() |
3d84da6b8d | ||
![]() |
71ecb5d7d9 | ||
![]() |
15acc84f10 | ||
![]() |
8d0f7a01e6 |
6
.gitignore
vendored
6
.gitignore
vendored
@@ -60,3 +60,9 @@
|
||||
/vpx_config.h
|
||||
/vpx_version.h
|
||||
TAGS
|
||||
vpxdec
|
||||
vpxenc
|
||||
.project
|
||||
.cproject
|
||||
*.csv
|
||||
*.oclpj
|
||||
|
2
.mailmap
2
.mailmap
@@ -1,2 +1,4 @@
|
||||
Adrian Grange <agrange@google.com>
|
||||
Johann Koenig <johannkoenig@google.com>
|
||||
Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
|
||||
Tom Finegan <tomfinegan@google.com>
|
||||
|
9
AUTHORS
9
AUTHORS
@@ -4,13 +4,18 @@
|
||||
Aaron Watry <awatry@gmail.com>
|
||||
Adrian Grange <agrange@google.com>
|
||||
Alex Converse <alex.converse@gmail.com>
|
||||
Andoni Morales Alastruey <ylatuya@gmail.com>
|
||||
Andres Mejia <mcitadel@gmail.com>
|
||||
Attila Nagy <attilanagy@google.com>
|
||||
Fabio Pedretti <fabio.ped@libero.it>
|
||||
Frank Galligan <fgalligan@google.com>
|
||||
Fredrik Söderquist <fs@opera.com>
|
||||
Fritz Koenig <frkoenig@google.com>
|
||||
Gaute Strokkenes <gaute.strokkenes@broadcom.com>
|
||||
Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
Guillermo Ballester Valor <gbvalor@gmail.com>
|
||||
Henrik Lundin <hlundin@google.com>
|
||||
James Berry <jamesberry@google.com>
|
||||
James Zern <jzern@google.com>
|
||||
Jan Kratochvil <jan.kratochvil@redhat.com>
|
||||
Jeff Muizelaar <jmuizelaar@mozilla.com>
|
||||
@@ -23,10 +28,14 @@ Luca Barbato <lu_zero@gentoo.org>
|
||||
Makoto Kato <makoto.kt@gmail.com>
|
||||
Martin Ettl <ettl.martin78@googlemail.com>
|
||||
Michael Kohler <michaelkohler@live.com>
|
||||
Mikhal Shemer <mikhal@google.com>
|
||||
Pascal Massimino <pascal.massimino@gmail.com>
|
||||
Patrik Westin <patrik.westin@gmail.com>
|
||||
Paul Wilkins <paulwilkins@google.com>
|
||||
Pavol Rusnak <stick@gk2.sk>
|
||||
Philip Jägenstedt <philipj@opera.com>
|
||||
Scott LaVarnway <slavarnway@google.com>
|
||||
Tero Rintaluoma <teror@google.com>
|
||||
Timothy B. Terriberry <tterribe@xiph.org>
|
||||
Tom Finegan <tomfinegan@google.com>
|
||||
Yaowu Xu <yaowu@google.com>
|
||||
|
77
CHANGELOG
77
CHANGELOG
@@ -1,3 +1,80 @@
|
||||
2011-03-07 v0.9.6 "Bali"
|
||||
Our second named release, focused on a faster, higher quality, encoder.
|
||||
|
||||
- Upgrading:
|
||||
This release is backwards compatible with Aylesbury (v0.9.5). Users
|
||||
of older releases should refer to the Upgrading notes in this
|
||||
document for that release.
|
||||
|
||||
- Enhancements:
|
||||
vpxenc --psnr shows a summary when encode completes
|
||||
--tune=ssim option to enable activity masking
|
||||
improved postproc visualizations for development
|
||||
updated support for Apple iOS to SDK 4.2
|
||||
query decoder to determine which reference frames were updated
|
||||
implemented error tracking in the decoder
|
||||
fix pipe support on windows
|
||||
|
||||
- Speed:
|
||||
Primary focus was on good quality mode, speed 0. Average improvement
|
||||
on x86 about 40%, up to 100% on user-generated content at that speed.
|
||||
Best quality mode speed improved 35%, and realtime speed 10-20%. This
|
||||
release also saw significant improvement in realtime encoding speed
|
||||
on ARM platforms.
|
||||
|
||||
Improved encoder threading
|
||||
Dont pick encoder filter level when loopfilter is disabled.
|
||||
Avoid double copying of key frames into alt and golden buffer
|
||||
FDCT optimizations.
|
||||
x86 sse2 temporal filter
|
||||
SSSE3 version of fast quantizer
|
||||
vp8_rd_pick_best_mbsegmentation code restructure
|
||||
Adjusted breakout RD for SPLITMV
|
||||
Changed segmentation check order
|
||||
Improved rd_pick_intra4x4block
|
||||
Adds armv6 optimized variance calculation
|
||||
ARMv6 optimized sad16x16
|
||||
ARMv6 optimized half pixel variance calculations
|
||||
Full search SAD function optimization in SSE4.1
|
||||
Improve MV prediction accuracy to achieve performance gain
|
||||
Improve MV prediction in vp8_pick_inter_mode() for speed>3
|
||||
|
||||
- Quality:
|
||||
Best quality mode improved PSNR 6.3%, and SSIM 6.1%. This release
|
||||
also includes support for "activity masking," which greatly improves
|
||||
SSIM at the expense of PSNR. For now, this feature is available with
|
||||
the --tune=ssim option. Further experimentation in this area
|
||||
is ongoing. This release also introduces a new rate control mode
|
||||
called "CQ," which changes the allocation of bits within a clip to
|
||||
the sections where they will have the most visual impact.
|
||||
|
||||
Tuning for the more exact quantizer.
|
||||
Relax rate control for last few frames
|
||||
CQ Mode
|
||||
Limit key frame quantizer for forced key frames.
|
||||
KF/GF Pulsing
|
||||
Add simple version of activity masking.
|
||||
make rdmult adaptive for intra in quantizer RDO
|
||||
cap the best quantizer for 2nd order DC
|
||||
change the threshold of DC check for encode breakout
|
||||
|
||||
- Bug Fixes:
|
||||
Fix crash on Sparc Solaris.
|
||||
Fix counter of fixed keyframe distance
|
||||
ARNR filter pointer update bug fix
|
||||
Fixed use of motion percentage in KF/GF group calc
|
||||
Changed condition for using RD in Intra Mode
|
||||
Fix encoder real-time only configuration.
|
||||
Fix ARM encoder crash with multiple token partitions
|
||||
Fixed bug first cluster timecode of webm file is wrong.
|
||||
Fixed various encoder bugs with odd-sized images
|
||||
vp8e_get_preview fixed when spatial resampling enabled
|
||||
quantizer: fix assertion in fast quantizer path
|
||||
Allocate source buffers to be multiples of 16
|
||||
Fix for manual Golden frame frequency
|
||||
Fix drastic undershoot in long form content
|
||||
|
||||
|
||||
2010-10-28 v0.9.5 "Aylesbury"
|
||||
Our first named release, focused on a faster decoder, and a better encoder.
|
||||
|
||||
|
4
README
4
README
@@ -45,18 +45,14 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
||||
armv5te-linux-rvct
|
||||
armv5te-linux-gcc
|
||||
armv5te-symbian-gcc
|
||||
armv5te-wince-vs8
|
||||
armv6-darwin-gcc
|
||||
armv6-linux-rvct
|
||||
armv6-linux-gcc
|
||||
armv6-symbian-gcc
|
||||
armv6-wince-vs8
|
||||
iwmmxt-linux-rvct
|
||||
iwmmxt-linux-gcc
|
||||
iwmmxt-wince-vs8
|
||||
iwmmxt2-linux-rvct
|
||||
iwmmxt2-linux-gcc
|
||||
iwmmxt2-wince-vs8
|
||||
armv7-linux-rvct
|
||||
armv7-linux-gcc
|
||||
mips32-linux-gcc
|
||||
|
45
args.c
45
args.c
@@ -135,6 +135,17 @@ void arg_show_usage(FILE *fp, const struct arg_def *const *defs)
|
||||
def->long_name, long_val);
|
||||
|
||||
fprintf(fp, " %-37s\t%s\n", option_text, def->desc);
|
||||
|
||||
if(def->enums)
|
||||
{
|
||||
const struct arg_enum_list *listptr;
|
||||
|
||||
fprintf(fp, " %-37s\t ", "");
|
||||
|
||||
for(listptr = def->enums; listptr->name; listptr++)
|
||||
fprintf(fp, "%s%s", listptr->name,
|
||||
listptr[1].name ? ", " : "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -218,3 +229,37 @@ struct vpx_rational arg_parse_rational(const struct arg *arg)
|
||||
|
||||
return rat;
|
||||
}
|
||||
|
||||
|
||||
int arg_parse_enum(const struct arg *arg)
|
||||
{
|
||||
const struct arg_enum_list *listptr;
|
||||
long int rawval;
|
||||
char *endptr;
|
||||
|
||||
/* First see if the value can be parsed as a raw value */
|
||||
rawval = strtol(arg->val, &endptr, 10);
|
||||
if (arg->val[0] != '\0' && endptr[0] == '\0')
|
||||
{
|
||||
/* Got a raw value, make sure it's valid */
|
||||
for(listptr = arg->def->enums; listptr->name; listptr++)
|
||||
if(listptr->val == rawval)
|
||||
return rawval;
|
||||
}
|
||||
|
||||
/* Next see if it can be parsed as a string */
|
||||
for(listptr = arg->def->enums; listptr->name; listptr++)
|
||||
if(!strcmp(arg->val, listptr->name))
|
||||
return listptr->val;
|
||||
|
||||
die("Option %s: Invalid value '%s'\n", arg->name, arg->val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int arg_parse_enum_or_int(const struct arg *arg)
|
||||
{
|
||||
if(arg->def->enums)
|
||||
return arg_parse_enum(arg);
|
||||
return arg_parse_int(arg);
|
||||
}
|
||||
|
12
args.h
12
args.h
@@ -22,14 +22,23 @@ struct arg
|
||||
const struct arg_def *def;
|
||||
};
|
||||
|
||||
struct arg_enum_list
|
||||
{
|
||||
const char *name;
|
||||
int val;
|
||||
};
|
||||
#define ARG_ENUM_LIST_END {0}
|
||||
|
||||
typedef struct arg_def
|
||||
{
|
||||
const char *short_name;
|
||||
const char *long_name;
|
||||
int has_val;
|
||||
const char *desc;
|
||||
const struct arg_enum_list *enums;
|
||||
} arg_def_t;
|
||||
#define ARG_DEF(s,l,v,d) {s,l,v,d}
|
||||
#define ARG_DEF(s,l,v,d) {s,l,v,d, NULL}
|
||||
#define ARG_DEF_ENUM(s,l,v,d,e) {s,l,v,d,e}
|
||||
#define ARG_DEF_LIST_END {0}
|
||||
|
||||
struct arg arg_init(char **argv);
|
||||
@@ -41,4 +50,5 @@ char **argv_dup(int argc, const char **argv);
|
||||
unsigned int arg_parse_uint(const struct arg *arg);
|
||||
int arg_parse_int(const struct arg *arg);
|
||||
struct vpx_rational arg_parse_rational(const struct arg *arg);
|
||||
int arg_parse_enum_or_int(const struct arg *arg);
|
||||
#endif
|
||||
|
@@ -1,20 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<VisualStudioToolFile
|
||||
Name="armasm"
|
||||
Version="8.00"
|
||||
>
|
||||
<Rules>
|
||||
<CustomBuildRule
|
||||
Name="ARMASM"
|
||||
DisplayName="Armasm Assembler"
|
||||
CommandLine="armasm -o "$(IntDir)\$(InputName).obj" $(InputPath) -32 -ARCH 5
"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
FileExtensions="*.asm"
|
||||
ExecutionDescription="Assembling $(InputName).asm"
|
||||
ShowOnlyRuleProperties="false"
|
||||
>
|
||||
<Properties>
|
||||
</Properties>
|
||||
</CustomBuildRule>
|
||||
</Rules>
|
||||
</VisualStudioToolFile>
|
@@ -1,20 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<VisualStudioToolFile
|
||||
Name="armasm"
|
||||
Version="8.00"
|
||||
>
|
||||
<Rules>
|
||||
<CustomBuildRule
|
||||
Name="ARMASM"
|
||||
DisplayName="Armasm Assembler"
|
||||
CommandLine="armasm -o "$(IntDir)\$(InputName).obj" $(InputPath) -32 -ARCH 6
"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
FileExtensions="*.asm"
|
||||
ExecutionDescription="Assembling $(InputName).asm"
|
||||
ShowOnlyRuleProperties="false"
|
||||
>
|
||||
<Properties>
|
||||
</Properties>
|
||||
</CustomBuildRule>
|
||||
</Rules>
|
||||
</VisualStudioToolFile>
|
@@ -1,20 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<VisualStudioToolFile
|
||||
Name="armasm"
|
||||
Version="8.00"
|
||||
>
|
||||
<Rules>
|
||||
<CustomBuildRule
|
||||
Name="ARMASM"
|
||||
DisplayName="Armasm Assembler"
|
||||
CommandLine="armasm -o "$(IntDir)\$(InputName).obj" $(InputPath) -32 -cpu XSCALE
"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
FileExtensions="*.asm"
|
||||
ExecutionDescription="Assembling $(InputName).asm"
|
||||
ShowOnlyRuleProperties="false"
|
||||
>
|
||||
<Properties>
|
||||
</Properties>
|
||||
</CustomBuildRule>
|
||||
</Rules>
|
||||
</VisualStudioToolFile>
|
@@ -1,13 +0,0 @@
|
||||
@echo off
|
||||
REM Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
REM
|
||||
REM Use of this source code is governed by a BSD-style license
|
||||
REM that can be found in the LICENSE file in the root of the source
|
||||
REM tree. An additional intellectual property rights grant can be found
|
||||
REM in the file PATENTS. All contributing project authors may
|
||||
REM be found in the AUTHORS file in the root of the source tree.
|
||||
echo on
|
||||
|
||||
|
||||
cl /I ".\\" /I "..\vp6_decoder_sdk" /I "..\vp6_decoder_sdk\vpx_ports" /D "NDEBUG" /D "_WIN32_WCE=0x420" /D "UNDER_CE" /D "WIN32_PLATFORM_PSPC" /D "WINCE" /D "_LIB" /D "ARM" /D "_ARM_" /D "_UNICODE" /D "UNICODE" /FD /EHsc /MT /GS- /fp:fast /GR- /Fo"Pocket_PC_2003__ARMV4_\%1/" /Fd"Pocket_PC_2003__ARMV4_\%1/vc80.pdb" /W3 /nologo /c /TC ..\vp6_decoder_sdk\vp6_decoder\algo\common\arm\dec_asm_offsets_arm.c
|
||||
obj_int_extract.exe rvds "Pocket_PC_2003__ARMV4_\%1/dec_asm_offsets_arm.obj"
|
@@ -1,88 +0,0 @@
|
||||
Microsoft Visual Studio Solution File, Format Version 9.00
|
||||
# Visual Studio 2005
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example", "example.vcproj", "{BA5FE66F-38DD-E034-F542-B1578C5FB950}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74} = {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}
|
||||
{E1360C65-D375-4335-8057-7ED99CC3F9B2} = {E1360C65-D375-4335-8057-7ED99CC3F9B2}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_int_extract", "obj_int_extract.vcproj", "{E1360C65-D375-4335-8057-7ED99CC3F9B2}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vpx", "vpx.vcproj", "{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{E1360C65-D375-4335-8057-7ED99CC3F9B2} = {E1360C65-D375-4335-8057-7ED99CC3F9B2}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "xma", "xma.vcproj", "{A955FC4A-73F1-44F7-135E-30D84D32F022}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{E1360C65-D375-4335-8057-7ED99CC3F9B2} = {E1360C65-D375-4335-8057-7ED99CC3F9B2}
|
||||
{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74} = {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Mixed Platforms = Debug|Mixed Platforms
|
||||
Debug|Pocket PC 2003 (ARMV4) = Debug|Pocket PC 2003 (ARMV4)
|
||||
Debug|Win32 = Debug|Win32
|
||||
Release|Mixed Platforms = Release|Mixed Platforms
|
||||
Release|Pocket PC 2003 (ARMV4) = Release|Pocket PC 2003 (ARMV4)
|
||||
Release|Win32 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{BA5FE66F-38DD-E034-F542-B1578C5FB950}.Debug|Mixed Platforms.ActiveCfg = Debug|Pocket PC 2003 (ARMV4)
|
||||
{BA5FE66F-38DD-E034-F542-B1578C5FB950}.Debug|Mixed Platforms.Build.0 = Debug|Pocket PC 2003 (ARMV4)
|
||||
{BA5FE66F-38DD-E034-F542-B1578C5FB950}.Debug|Mixed Platforms.Deploy.0 = Debug|Pocket PC 2003 (ARMV4)
|
||||
{BA5FE66F-38DD-E034-F542-B1578C5FB950}.Debug|Pocket PC 2003 (ARMV4).ActiveCfg = Debug|Pocket PC 2003 (ARMV4)
|
||||
{BA5FE66F-38DD-E034-F542-B1578C5FB950}.Debug|Pocket PC 2003 (ARMV4).Build.0 = Debug|Pocket PC 2003 (ARMV4)
|
||||
{BA5FE66F-38DD-E034-F542-B1578C5FB950}.Debug|Pocket PC 2003 (ARMV4).Deploy.0 = Debug|Pocket PC 2003 (ARMV4)
|
||||
{BA5FE66F-38DD-E034-F542-B1578C5FB950}.Debug|Win32.ActiveCfg = Debug|Pocket PC 2003 (ARMV4)
|
||||
{BA5FE66F-38DD-E034-F542-B1578C5FB950}.Release|Mixed Platforms.ActiveCfg = Release|Pocket PC 2003 (ARMV4)
|
||||
{BA5FE66F-38DD-E034-F542-B1578C5FB950}.Release|Mixed Platforms.Build.0 = Release|Pocket PC 2003 (ARMV4)
|
||||
{BA5FE66F-38DD-E034-F542-B1578C5FB950}.Release|Mixed Platforms.Deploy.0 = Release|Pocket PC 2003 (ARMV4)
|
||||
{BA5FE66F-38DD-E034-F542-B1578C5FB950}.Release|Pocket PC 2003 (ARMV4).ActiveCfg = Release|Pocket PC 2003 (ARMV4)
|
||||
{BA5FE66F-38DD-E034-F542-B1578C5FB950}.Release|Pocket PC 2003 (ARMV4).Build.0 = Release|Pocket PC 2003 (ARMV4)
|
||||
{BA5FE66F-38DD-E034-F542-B1578C5FB950}.Release|Pocket PC 2003 (ARMV4).Deploy.0 = Release|Pocket PC 2003 (ARMV4)
|
||||
{BA5FE66F-38DD-E034-F542-B1578C5FB950}.Release|Win32.ActiveCfg = Release|Pocket PC 2003 (ARMV4)
|
||||
{E1360C65-D375-4335-8057-7ED99CC3F9B2}.Debug|Mixed Platforms.ActiveCfg = Release|Win32
|
||||
{E1360C65-D375-4335-8057-7ED99CC3F9B2}.Debug|Mixed Platforms.Build.0 = Release|Win32
|
||||
{E1360C65-D375-4335-8057-7ED99CC3F9B2}.Debug|Pocket PC 2003 (ARMV4).ActiveCfg = Release|Win32
|
||||
{E1360C65-D375-4335-8057-7ED99CC3F9B2}.Debug|Win32.ActiveCfg = Release|Win32
|
||||
{E1360C65-D375-4335-8057-7ED99CC3F9B2}.Debug|Win32.Build.0 = Release|Win32
|
||||
{E1360C65-D375-4335-8057-7ED99CC3F9B2}.Release|Mixed Platforms.ActiveCfg = Release|Win32
|
||||
{E1360C65-D375-4335-8057-7ED99CC3F9B2}.Release|Mixed Platforms.Build.0 = Release|Win32
|
||||
{E1360C65-D375-4335-8057-7ED99CC3F9B2}.Release|Pocket PC 2003 (ARMV4).ActiveCfg = Release|Win32
|
||||
{E1360C65-D375-4335-8057-7ED99CC3F9B2}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{E1360C65-D375-4335-8057-7ED99CC3F9B2}.Release|Win32.Build.0 = Release|Win32
|
||||
{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Debug|Mixed Platforms.ActiveCfg = Debug|Pocket PC 2003 (ARMV4)
|
||||
{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Debug|Mixed Platforms.Build.0 = Debug|Pocket PC 2003 (ARMV4)
|
||||
{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Debug|Mixed Platforms.Deploy.0 = Debug|Pocket PC 2003 (ARMV4)
|
||||
{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Debug|Pocket PC 2003 (ARMV4).ActiveCfg = Debug|Pocket PC 2003 (ARMV4)
|
||||
{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Debug|Pocket PC 2003 (ARMV4).Build.0 = Debug|Pocket PC 2003 (ARMV4)
|
||||
{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Debug|Pocket PC 2003 (ARMV4).Deploy.0 = Debug|Pocket PC 2003 (ARMV4)
|
||||
{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Debug|Win32.ActiveCfg = Debug|Pocket PC 2003 (ARMV4)
|
||||
{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Release|Mixed Platforms.ActiveCfg = Release|Pocket PC 2003 (ARMV4)
|
||||
{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Release|Mixed Platforms.Build.0 = Release|Pocket PC 2003 (ARMV4)
|
||||
{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Release|Mixed Platforms.Deploy.0 = Release|Pocket PC 2003 (ARMV4)
|
||||
{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Release|Pocket PC 2003 (ARMV4).ActiveCfg = Release|Pocket PC 2003 (ARMV4)
|
||||
{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Release|Pocket PC 2003 (ARMV4).Build.0 = Release|Pocket PC 2003 (ARMV4)
|
||||
{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Release|Pocket PC 2003 (ARMV4).Deploy.0 = Release|Pocket PC 2003 (ARMV4)
|
||||
{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Release|Win32.ActiveCfg = Release|Pocket PC 2003 (ARMV4)
|
||||
{A955FC4A-73F1-44F7-135E-30D84D32F022}.Debug|Mixed Platforms.ActiveCfg = Debug|Pocket PC 2003 (ARMV4)
|
||||
{A955FC4A-73F1-44F7-135E-30D84D32F022}.Debug|Mixed Platforms.Build.0 = Debug|Pocket PC 2003 (ARMV4)
|
||||
{A955FC4A-73F1-44F7-135E-30D84D32F022}.Debug|Mixed Platforms.Deploy.0 = Debug|Pocket PC 2003 (ARMV4)
|
||||
{A955FC4A-73F1-44F7-135E-30D84D32F022}.Debug|Pocket PC 2003 (ARMV4).ActiveCfg = Debug|Pocket PC 2003 (ARMV4)
|
||||
{A955FC4A-73F1-44F7-135E-30D84D32F022}.Debug|Pocket PC 2003 (ARMV4).Build.0 = Debug|Pocket PC 2003 (ARMV4)
|
||||
{A955FC4A-73F1-44F7-135E-30D84D32F022}.Debug|Pocket PC 2003 (ARMV4).Deploy.0 = Debug|Pocket PC 2003 (ARMV4)
|
||||
{A955FC4A-73F1-44F7-135E-30D84D32F022}.Debug|Win32.ActiveCfg = Debug|Pocket PC 2003 (ARMV4)
|
||||
{A955FC4A-73F1-44F7-135E-30D84D32F022}.Release|Mixed Platforms.ActiveCfg = Release|Pocket PC 2003 (ARMV4)
|
||||
{A955FC4A-73F1-44F7-135E-30D84D32F022}.Release|Mixed Platforms.Build.0 = Release|Pocket PC 2003 (ARMV4)
|
||||
{A955FC4A-73F1-44F7-135E-30D84D32F022}.Release|Mixed Platforms.Deploy.0 = Release|Pocket PC 2003 (ARMV4)
|
||||
{A955FC4A-73F1-44F7-135E-30D84D32F022}.Release|Pocket PC 2003 (ARMV4).ActiveCfg = Release|Pocket PC 2003 (ARMV4)
|
||||
{A955FC4A-73F1-44F7-135E-30D84D32F022}.Release|Pocket PC 2003 (ARMV4).Build.0 = Release|Pocket PC 2003 (ARMV4)
|
||||
{A955FC4A-73F1-44F7-135E-30D84D32F022}.Release|Pocket PC 2003 (ARMV4).Deploy.0 = Release|Pocket PC 2003 (ARMV4)
|
||||
{A955FC4A-73F1-44F7-135E-30D84D32F022}.Release|Win32.ActiveCfg = Release|Pocket PC 2003 (ARMV4)
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
@@ -152,8 +152,8 @@ endif
|
||||
# Rule to extract assembly constants from C sources
|
||||
#
|
||||
obj_int_extract: build/make/obj_int_extract.c
|
||||
$(if $(quiet),echo " [HOSTCC] $@")
|
||||
$(qexec)$(HOSTCC) -I. -o $@ $<
|
||||
$(if $(quiet),@echo " [HOSTCC] $@")
|
||||
$(qexec)$(HOSTCC) -I. -I$(SRC_PATH_BARE) -o $@ $<
|
||||
CLEAN-OBJS += obj_int_extract
|
||||
|
||||
#
|
||||
@@ -255,7 +255,7 @@ ifeq ($(filter clean,$(MAKECMDGOALS)),)
|
||||
endif
|
||||
|
||||
#
|
||||
# Configuration dependant rules
|
||||
# Configuration dependent rules
|
||||
#
|
||||
$(call pairmap,install_map_templates,$(INSTALL_MAPS))
|
||||
|
||||
@@ -331,11 +331,8 @@ ifneq ($(call enabled,DIST-SRCS),)
|
||||
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_sln.sh
|
||||
DIST-SRCS-$(CONFIG_MSVS) += build/x86-msvs/yasm.rules
|
||||
DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
|
||||
#
|
||||
# This isn't really ARCH_ARM dependent, it's dependant on whether we're
|
||||
# using assembly code or not (CONFIG_OPTIMIZATIONS maybe). Just use
|
||||
# this for now.
|
||||
DIST-SRCS-$(ARCH_ARM) += build/make/obj_int_extract.c
|
||||
# Include obj_int_extract if we use offsets from asm_*_offsets
|
||||
DIST-SRCS-$(ARCH_ARM)$(ARCH_X86)$(ARCH_X86_64) += build/make/obj_int_extract.c
|
||||
DIST-SRCS-$(ARCH_ARM) += build/make/ads2gas.pl
|
||||
DIST-SRCS-yes += $(target:-$(TOOLCHAIN)=).mk
|
||||
endif
|
||||
|
@@ -17,15 +17,17 @@ for i; do
|
||||
on_of=1
|
||||
elif [ "$i" == "-v" ]; then
|
||||
verbose=1
|
||||
elif [ "$i" == "-g" ]; then
|
||||
args="${args} --debug"
|
||||
elif [ "$on_of" == "1" ]; then
|
||||
outfile=$i
|
||||
on_of=0
|
||||
on_of=0
|
||||
elif [ -f "$i" ]; then
|
||||
infiles="$infiles $i"
|
||||
elif [ "${i:0:2}" == "-l" ]; then
|
||||
libs="$libs ${i#-l}"
|
||||
elif [ "${i:0:2}" == "-L" ]; then
|
||||
libpaths="${libpaths} ${i#-L}"
|
||||
libpaths="${libpaths} ${i#-L}"
|
||||
else
|
||||
args="${args} ${i}"
|
||||
fi
|
||||
|
@@ -78,11 +78,12 @@ Build options:
|
||||
--log=yes|no|FILE file configure log is written to [config.err]
|
||||
--target=TARGET target platform tuple [generic-gnu]
|
||||
--cpu=CPU optimize for a specific cpu rather than a family
|
||||
--extra-cflags=ECFLAGS add ECFLAGS to CFLAGS [$CFLAGS]
|
||||
${toggle_extra_warnings} emit harmless warnings (always non-fatal)
|
||||
${toggle_werror} treat warnings as errors, if possible
|
||||
(not available with all compilers)
|
||||
${toggle_optimizations} turn on/off compiler optimization flags
|
||||
${toggle_pic} turn on/off Position Independant Code
|
||||
${toggle_pic} turn on/off Position Independent Code
|
||||
${toggle_ccache} turn on/off compiler cache
|
||||
${toggle_debug} enable/disable debug mode
|
||||
${toggle_gprof} enable/disable gprof profiling instrumentation
|
||||
@@ -442,6 +443,9 @@ process_common_cmdline() {
|
||||
;;
|
||||
--cpu=*) tune_cpu="$optval"
|
||||
;;
|
||||
--extra-cflags=*)
|
||||
extra_cflags="${optval}"
|
||||
;;
|
||||
--enable-?*|--disable-?*)
|
||||
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
|
||||
echo "${CMDLINE_SELECT} ${ARCH_EXT_LIST}" | grep "^ *$option\$" >/dev/null || die_unknown $opt
|
||||
@@ -547,6 +551,10 @@ process_common_toolchain() {
|
||||
tgt_isa=universal
|
||||
tgt_os=darwin9
|
||||
;;
|
||||
*darwin10*)
|
||||
tgt_isa=x86_64
|
||||
tgt_os=darwin10
|
||||
;;
|
||||
*mingw32*|*cygwin*)
|
||||
[ -z "$tgt_isa" ] && tgt_isa=x86
|
||||
tgt_os=win32
|
||||
@@ -606,10 +614,20 @@ process_common_toolchain() {
|
||||
add_ldflags "-isysroot /Developer/SDKs/MacOSX10.5.sdk"
|
||||
add_ldflags "-mmacosx-version-min=10.5"
|
||||
;;
|
||||
*-darwin10-*)
|
||||
add_cflags "-isysroot /Developer/SDKs/MacOSX10.6.sdk"
|
||||
add_cflags "-mmacosx-version-min=10.6"
|
||||
add_ldflags "-isysroot /Developer/SDKs/MacOSX10.6.sdk"
|
||||
add_ldflags "-mmacosx-version-min=10.6"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Handle Solaris variants. Solaris 10 needs -lposix4
|
||||
case ${toolchain} in
|
||||
sparc-solaris-*)
|
||||
add_extralibs -lposix4
|
||||
add_cflags "-DMUST_BE_ALIGNED"
|
||||
;;
|
||||
*-solaris-*)
|
||||
add_extralibs -lposix4
|
||||
;;
|
||||
@@ -650,12 +668,12 @@ process_common_toolchain() {
|
||||
elif enabled armv7
|
||||
then
|
||||
check_add_cflags -march=armv7-a -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp #-ftree-vectorize
|
||||
check_add_asflags -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp #-march=armv7-a
|
||||
check_add_asflags -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp #-march=armv7-a
|
||||
else
|
||||
check_add_cflags -march=${tgt_isa}
|
||||
check_add_asflags -march=${tgt_isa}
|
||||
fi
|
||||
|
||||
enabled debug && add_asflags -g
|
||||
asm_conversion_cmd="${source_path}/build/make/ads2gas.pl"
|
||||
;;
|
||||
rvct)
|
||||
@@ -680,16 +698,24 @@ process_common_toolchain() {
|
||||
arch_int=${tgt_isa##armv}
|
||||
arch_int=${arch_int%%te}
|
||||
check_add_asflags --pd "\"ARCHITECTURE SETA ${arch_int}\""
|
||||
enabled debug && add_asflags -g
|
||||
add_cflags --gnu
|
||||
add_cflags --enum_is_int
|
||||
add_cflags --wchar32
|
||||
;;
|
||||
esac
|
||||
|
||||
case ${tgt_os} in
|
||||
none*)
|
||||
disable multithread
|
||||
disable os_support
|
||||
;;
|
||||
darwin*)
|
||||
SDK_PATH=/Developer/Platforms/iPhoneOS.platform/Developer
|
||||
TOOLCHAIN_PATH=${SDK_PATH}/usr/bin
|
||||
CC=${TOOLCHAIN_PATH}/gcc
|
||||
AR=${TOOLCHAIN_PATH}/ar
|
||||
LD=${TOOLCHAIN_PATH}/arm-apple-darwin9-gcc-4.2.1
|
||||
LD=${TOOLCHAIN_PATH}/arm-apple-darwin10-gcc-4.2.1
|
||||
AS=${TOOLCHAIN_PATH}/as
|
||||
STRIP=${TOOLCHAIN_PATH}/strip
|
||||
NM=${TOOLCHAIN_PATH}/nm
|
||||
@@ -703,19 +729,18 @@ process_common_toolchain() {
|
||||
add_cflags -arch ${tgt_isa}
|
||||
add_ldflags -arch_only ${tgt_isa}
|
||||
|
||||
add_cflags "-isysroot /Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS3.1.sdk"
|
||||
add_cflags "-isysroot ${SDK_PATH}/SDKs/iPhoneOS4.3.sdk"
|
||||
|
||||
# This should be overridable
|
||||
alt_libc=${SDK_PATH}/SDKs/iPhoneOS3.1.sdk
|
||||
alt_libc=${SDK_PATH}/SDKs/iPhoneOS4.3.sdk
|
||||
|
||||
# Add the paths for the alternate libc
|
||||
# for d in usr/include usr/include/gcc/darwin/4.0/; do
|
||||
for d in usr/include usr/include/gcc/darwin/4.0/ usr/lib/gcc/arm-apple-darwin9/4.0.1/include/; do
|
||||
for d in usr/include usr/include/gcc/darwin/4.2/ usr/lib/gcc/arm-apple-darwin10/4.2.1/include/; do
|
||||
try_dir="${alt_libc}/${d}"
|
||||
[ -d "${try_dir}" ] && add_cflags -I"${try_dir}"
|
||||
done
|
||||
|
||||
for d in lib usr/lib; do
|
||||
for d in lib usr/lib usr/lib/system; do
|
||||
try_dir="${alt_libc}/${d}"
|
||||
[ -d "${try_dir}" ] && add_ldflags -L"${try_dir}"
|
||||
done
|
||||
@@ -732,13 +757,9 @@ process_common_toolchain() {
|
||||
|| die "Must supply --libc when targetting *-linux-rvct"
|
||||
|
||||
# Set up compiler
|
||||
add_cflags --gnu
|
||||
add_cflags --enum_is_int
|
||||
add_cflags --library_interface=aeabi_glibc
|
||||
add_cflags --no_hide_all
|
||||
add_cflags --wchar32
|
||||
add_cflags --dwarf2
|
||||
add_cflags --gnu
|
||||
|
||||
# Set up linker
|
||||
add_ldflags --sysv --no_startup --no_ref_cpp_init
|
||||
@@ -824,6 +845,7 @@ process_common_toolchain() {
|
||||
soft_enable sse2
|
||||
soft_enable sse3
|
||||
soft_enable ssse3
|
||||
soft_enable sse4_1
|
||||
|
||||
case ${tgt_os} in
|
||||
win*)
|
||||
@@ -844,7 +866,7 @@ process_common_toolchain() {
|
||||
setup_gnu_toolchain
|
||||
add_cflags -use-msasm -use-asm
|
||||
add_ldflags -i-static
|
||||
enabled x86_64 && add_cflags -ipo -no-prec-div -static -xSSE3 -axSSE3
|
||||
enabled x86_64 && add_cflags -ipo -no-prec-div -static -xSSE2 -axSSE2
|
||||
enabled x86_64 && AR=xiar
|
||||
case ${tune_cpu} in
|
||||
atom*)
|
||||
@@ -862,6 +884,8 @@ process_common_toolchain() {
|
||||
link_with_cc=gcc
|
||||
tune_cflags="-march="
|
||||
setup_gnu_toolchain
|
||||
#for 32 bit x86 builds, -O3 did not turn on this flag
|
||||
enabled optimizations && check_add_cflags -fomit-frame-pointer
|
||||
;;
|
||||
esac
|
||||
|
||||
@@ -879,7 +903,7 @@ process_common_toolchain() {
|
||||
case ${tgt_os} in
|
||||
win*)
|
||||
add_asflags -f win${bits}
|
||||
enabled debug && add_asflags -g dwarf2
|
||||
enabled debug && add_asflags -g cv8
|
||||
;;
|
||||
linux*|solaris*)
|
||||
add_asflags -f elf${bits}
|
||||
@@ -934,7 +958,39 @@ process_common_toolchain() {
|
||||
enabled small && check_add_cflags -O2 || check_add_cflags -O3
|
||||
fi
|
||||
|
||||
# Position Independant Code (PIC) support, for building relocatable
|
||||
if enabled opencl; then
|
||||
disable multithread
|
||||
echo " disabling multithread"
|
||||
soft_enable opencl #Provide output to make user comfortable
|
||||
enable runtime_cpu_detect
|
||||
|
||||
#Use dlopen() to load OpenCL when possible.
|
||||
case ${toolchain} in
|
||||
*darwin10*)
|
||||
check_add_cflags -D__APPLE__
|
||||
add_extralibs -framework OpenCL
|
||||
;;
|
||||
*-win32-gcc)
|
||||
if check_header dlfcn.h; then
|
||||
add_extralibs -ldl
|
||||
enable dlopen
|
||||
else
|
||||
#This shouldn't be a hard-coded path in the long term
|
||||
add_extralibs -L/cygdrive/c/Windows/System32 -lOpenCL
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
if check_header dlfcn.h; then
|
||||
add_extralibs -ldl
|
||||
enable dlopen
|
||||
else
|
||||
add_extralibs -lOpenCL
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Position Independent Code (PIC) support, for building relocatable
|
||||
# shared objects
|
||||
enabled gcc && enabled pic && check_add_cflags -fPIC
|
||||
|
||||
@@ -961,6 +1017,12 @@ EOF
|
||||
add_cflags -D_LARGEFILE_SOURCE
|
||||
add_cflags -D_FILE_OFFSET_BITS=64
|
||||
fi
|
||||
|
||||
# append any user defined extra cflags
|
||||
if [ -n "${extra_cflags}" ] ; then
|
||||
check_add_cflags ${extra_cflags} || \
|
||||
die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler"
|
||||
fi
|
||||
}
|
||||
|
||||
process_toolchain() {
|
||||
|
@@ -32,7 +32,8 @@ Options:
|
||||
--name=project_name Name of the project (required)
|
||||
--proj-guid=GUID GUID to use for the project
|
||||
--module-def=filename File containing export definitions (for DLLs)
|
||||
--ver=version Version (7,8) of visual studio to generate for
|
||||
--ver=version Version (7,8,9) of visual studio to generate for
|
||||
--src-path-bare=dir Path to root of source tree
|
||||
-Ipath/to/include Additional include directories
|
||||
-DFLAG[=value] Preprocessor macros to define
|
||||
-Lpath/to/lib Additional library search paths
|
||||
@@ -132,7 +133,7 @@ generate_filter() {
|
||||
open_tag Filter \
|
||||
Name=$name \
|
||||
Filter=$pats \
|
||||
UniqueIdentifier=`generate_uuid`
|
||||
UniqueIdentifier=`generate_uuid` \
|
||||
|
||||
file_list_sz=${#file_list[@]}
|
||||
for i in ${!file_list[@]}; do
|
||||
@@ -145,31 +146,21 @@ generate_filter() {
|
||||
if [ "$pat" == "asm" ] && $asm_use_custom_step; then
|
||||
for plat in "${platforms[@]}"; do
|
||||
for cfg in Debug Release; do
|
||||
open_tag FileConfiguration \
|
||||
Name="${cfg}|${plat}"
|
||||
open_tag FileConfiguration \
|
||||
Name="${cfg}|${plat}" \
|
||||
|
||||
tag Tool \
|
||||
Name="VCCustomBuildTool" \
|
||||
Description="Assembling \$(InputFileName)" \
|
||||
CommandLine="$(eval echo \$asm_${cfg}_cmdline)"\
|
||||
Outputs="\$(InputName).obj"
|
||||
CommandLine="$(eval echo \$asm_${cfg}_cmdline)" \
|
||||
Outputs="\$(InputName).obj" \
|
||||
|
||||
close_tag FileConfiguration
|
||||
done
|
||||
done
|
||||
fi
|
||||
|
||||
if [ "${f##*.}" == "cpp" ]; then
|
||||
for plat in "${platforms[@]}"; do
|
||||
for cfg in Debug Release; do
|
||||
open_tag FileConfiguration \
|
||||
Name="${cfg}|${plat}"
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
CompileAs="2"
|
||||
close_tag FileConfiguration
|
||||
done
|
||||
done
|
||||
fi
|
||||
close_tag File
|
||||
close_tag File
|
||||
|
||||
break
|
||||
fi
|
||||
@@ -185,57 +176,63 @@ unset target
|
||||
for opt in "$@"; do
|
||||
optval="${opt#*=}"
|
||||
case "$opt" in
|
||||
--help|-h) show_help
|
||||
;;
|
||||
--target=*) target="${optval}"
|
||||
;;
|
||||
--out=*) outfile="$optval"
|
||||
;;
|
||||
--name=*) name="${optval}"
|
||||
;;
|
||||
--proj-guid=*) guid="${optval}"
|
||||
;;
|
||||
--module-def=*)
|
||||
link_opts="${link_opts} ModuleDefinitionFile=${optval}"
|
||||
;;
|
||||
--exe) proj_kind="exe"
|
||||
;;
|
||||
--lib) proj_kind="lib"
|
||||
;;
|
||||
--static-crt) use_static_runtime=true
|
||||
;;
|
||||
--ver=*) vs_ver="$optval"
|
||||
case $optval in
|
||||
[789])
|
||||
;;
|
||||
*) die Unrecognized Visual Studio Version in $opt
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
-I*) opt="${opt%/}"
|
||||
incs="${incs}${incs:+;}"${opt##-I}""
|
||||
yasmincs="${yasmincs} ${opt}"
|
||||
;;
|
||||
-D*) defines="${defines}${defines:+;}${opt##-D}"
|
||||
;;
|
||||
-L*) # fudge . to $(OutDir)
|
||||
if [ "${opt##-L}" == "." ]; then
|
||||
libdirs="${libdirs}${libdirs:+;}"\$(OutDir)""
|
||||
else
|
||||
# Also try directories for this platform/configuration
|
||||
libdirs="${libdirs}${libdirs:+;}"${opt##-L}""
|
||||
libdirs="${libdirs}${libdirs:+;}"${opt##-L}/\$(PlatformName)/\$(ConfigurationName)""
|
||||
libdirs="${libdirs}${libdirs:+;}"${opt##-L}/\$(PlatformName)""
|
||||
fi
|
||||
;;
|
||||
-l*) libs="${libs}${libs:+ }${opt##-l}.lib"
|
||||
;;
|
||||
-*) die_unknown $opt
|
||||
;;
|
||||
*) file_list[${#file_list[@]}]="$opt"
|
||||
case "$opt" in
|
||||
*.asm) uses_asm=true;;
|
||||
esac
|
||||
--help|-h) show_help
|
||||
;;
|
||||
--target=*) target="${optval}"
|
||||
;;
|
||||
--out=*) outfile="$optval"
|
||||
;;
|
||||
--name=*) name="${optval}"
|
||||
;;
|
||||
--proj-guid=*) guid="${optval}"
|
||||
;;
|
||||
--module-def=*) link_opts="${link_opts} ModuleDefinitionFile=${optval}"
|
||||
;;
|
||||
--exe) proj_kind="exe"
|
||||
;;
|
||||
--lib) proj_kind="lib"
|
||||
;;
|
||||
--src-path-bare=*) src_path_bare="$optval"
|
||||
;;
|
||||
--static-crt) use_static_runtime=true
|
||||
;;
|
||||
--ver=*)
|
||||
vs_ver="$optval"
|
||||
case "$optval" in
|
||||
[789])
|
||||
;;
|
||||
*) die Unrecognized Visual Studio Version in $opt
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
-I*)
|
||||
opt="${opt%/}"
|
||||
incs="${incs}${incs:+;}"${opt##-I}""
|
||||
yasmincs="${yasmincs} ${opt}"
|
||||
;;
|
||||
-D*) defines="${defines}${defines:+;}${opt##-D}"
|
||||
;;
|
||||
-L*) # fudge . to $(OutDir)
|
||||
if [ "${opt##-L}" == "." ]; then
|
||||
libdirs="${libdirs}${libdirs:+;}"\$(OutDir)""
|
||||
else
|
||||
# Also try directories for this platform/configuration
|
||||
libdirs="${libdirs}${libdirs:+;}"${opt##-L}""
|
||||
libdirs="${libdirs}${libdirs:+;}"${opt##-L}/\$(PlatformName)/\$(ConfigurationName)""
|
||||
libdirs="${libdirs}${libdirs:+;}"${opt##-L}/\$(PlatformName)""
|
||||
fi
|
||||
;;
|
||||
-l*) libs="${libs}${libs:+ }${opt##-l}.lib"
|
||||
;;
|
||||
-*) die_unknown $opt
|
||||
;;
|
||||
*)
|
||||
file_list[${#file_list[@]}]="$opt"
|
||||
case "$opt" in
|
||||
*.asm) uses_asm=true
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
done
|
||||
outfile=${outfile:-/dev/stdout}
|
||||
@@ -278,11 +275,7 @@ done
|
||||
|
||||
# List Keyword for this target
|
||||
case "$target" in
|
||||
x86*)
|
||||
keyword="ManagedCProj"
|
||||
;;
|
||||
arm*|iwmmx*)
|
||||
keyword="Win32Proj"
|
||||
x86*) keyword="ManagedCProj"
|
||||
;;
|
||||
*) die "Unsupported target $target!"
|
||||
esac
|
||||
@@ -298,402 +291,255 @@ case "$target" in
|
||||
asm_Debug_cmdline="yasm -Xvc -g cv8 -f \$(PlatformName) ${yasmincs} "\$(InputPath)""
|
||||
asm_Release_cmdline="yasm -Xvc -f \$(PlatformName) ${yasmincs} "\$(InputPath)""
|
||||
;;
|
||||
arm*|iwmmx*)
|
||||
case "${name}" in
|
||||
obj_int_extract) platforms[0]="Win32"
|
||||
;;
|
||||
*) platforms[0]="Pocket PC 2003 (ARMV4)"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*) die "Unsupported target $target!"
|
||||
esac
|
||||
|
||||
# List Command-line Arguments for this target
|
||||
case "$target" in
|
||||
arm*|iwmmx*)
|
||||
if [ "$name" == "example" ];then
|
||||
ARGU="--codec vp6 --flipuv --progress _bnd.vp6"
|
||||
fi
|
||||
if [ "$name" == "xma" ];then
|
||||
ARGU="--codec vp6 -h 240 -w 320 -v"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
generate_vcproj() {
|
||||
case "$proj_kind" in
|
||||
exe) vs_ConfigurationType=1
|
||||
;;
|
||||
*) vs_ConfigurationType=4
|
||||
;;
|
||||
exe) vs_ConfigurationType=1
|
||||
;;
|
||||
*) vs_ConfigurationType=4
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "<?xml version=\"1.0\" encoding=\"Windows-1252\"?>"
|
||||
open_tag VisualStudioProject \
|
||||
ProjectType="Visual C++" \
|
||||
Version="${vs_ver_id}" \
|
||||
Name="${name}" \
|
||||
ProjectGUID="{${guid}}" \
|
||||
RootNamespace="${name}" \
|
||||
Keyword="${keyword}"
|
||||
open_tag VisualStudioProject \
|
||||
ProjectType="Visual C++" \
|
||||
Version="${vs_ver_id}" \
|
||||
Name="${name}" \
|
||||
ProjectGUID="{${guid}}" \
|
||||
RootNamespace="${name}" \
|
||||
Keyword="${keyword}" \
|
||||
|
||||
open_tag Platforms
|
||||
open_tag Platforms
|
||||
for plat in "${platforms[@]}"; do
|
||||
tag Platform Name="$plat"
|
||||
tag Platform Name="$plat"
|
||||
done
|
||||
close_tag Platforms
|
||||
|
||||
open_tag ToolFiles
|
||||
open_tag ToolFiles
|
||||
case "$target" in
|
||||
x86*) $uses_asm && tag ToolFile RelativePath="$self_dirname/../x86-msvs/yasm.rules"
|
||||
;;
|
||||
arm*|iwmmx*)
|
||||
if [ "$name" == "vpx" ];then
|
||||
case "$target" in
|
||||
armv5*)
|
||||
tag ToolFile RelativePath="$self_dirname/../arm-wince-vs8/armasmv5.rules"
|
||||
;;
|
||||
armv6*)
|
||||
tag ToolFile RelativePath="$self_dirname/../arm-wince-vs8/armasmv6.rules"
|
||||
;;
|
||||
iwmmxt*)
|
||||
tag ToolFile RelativePath="$self_dirname/../arm-wince-vs8/armasmxscale.rules"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
close_tag ToolFiles
|
||||
|
||||
open_tag Configurations
|
||||
open_tag Configurations
|
||||
for plat in "${platforms[@]}"; do
|
||||
plat_no_ws=`echo $plat | sed 's/[^A-Za-z0-9_]/_/g'`
|
||||
open_tag Configuration \
|
||||
Name="Debug|$plat" \
|
||||
OutputDirectory="\$(SolutionDir)$plat_no_ws/\$(ConfigurationName)" \
|
||||
IntermediateDirectory="$plat_no_ws/\$(ConfigurationName)/${name}" \
|
||||
ConfigurationType="$vs_ConfigurationType" \
|
||||
CharacterSet="1"
|
||||
|
||||
if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then
|
||||
case "$name" in
|
||||
vpx) tag Tool \
|
||||
Name="VCPreBuildEventTool" \
|
||||
CommandLine="call obj_int_extract.bat \$(ConfigurationName)"
|
||||
tag Tool \
|
||||
Name="VCMIDLTool" \
|
||||
TargetEnvironment="1"
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
ExecutionBucket="7" \
|
||||
Optimization="0" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="_DEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES);WINCE;DEBUG;_LIB;\$(ARCHFAM);\$(_ARCHFAM_);_UNICODE;UNICODE;" \
|
||||
MinimalRebuild="true" \
|
||||
RuntimeLibrary="1" \
|
||||
BufferSecurityCheck="false" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="1" \
|
||||
CompileAs="1"
|
||||
tag Tool \
|
||||
Name="VCResourceCompilerTool" \
|
||||
PreprocessorDefinitions="_DEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES)" \
|
||||
Culture="1033" \
|
||||
AdditionalIncludeDirectories="\$(IntDir)" \
|
||||
;;
|
||||
example|xma) tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
ExecutionBucket="7" \
|
||||
Optimization="0" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="_DEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES);WINCE;DEBUG;_CONSOLE;\$(ARCHFAM);\$(_ARCHFAM_);_UNICODE;UNICODE;" \
|
||||
MinimalRebuild="true" \
|
||||
RuntimeLibrary="1" \
|
||||
BufferSecurityCheck="false" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="1" \
|
||||
CompileAs="1"
|
||||
tag Tool \
|
||||
Name="VCResourceCompilerTool" \
|
||||
PreprocessorDefinitions="_DEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES)" \
|
||||
Culture="1033" \
|
||||
AdditionalIncludeDirectories="\$(IntDir)" \
|
||||
;;
|
||||
obj_int_extract) tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
Optimization="0" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="WIN32;DEBUG;_CONSOLE" \
|
||||
RuntimeLibrary="1" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="1" \
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
open_tag Configuration \
|
||||
Name="Debug|$plat" \
|
||||
OutputDirectory="\$(SolutionDir)$plat_no_ws/\$(ConfigurationName)" \
|
||||
IntermediateDirectory="$plat_no_ws/\$(ConfigurationName)/${name}" \
|
||||
ConfigurationType="$vs_ConfigurationType" \
|
||||
CharacterSet="1" \
|
||||
|
||||
case "$target" in
|
||||
x86*) tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
Optimization="0" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
|
||||
RuntimeLibrary="$debug_runtime" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="1" \
|
||||
Detect64BitPortabilityProblems="true" \
|
||||
x86*)
|
||||
case "$name" in
|
||||
obj_int_extract)
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
Optimization="0" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="WIN32;DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
|
||||
RuntimeLibrary="$debug_runtime" \
|
||||
WarningLevel="3" \
|
||||
Detect64BitPortabilityProblems="true" \
|
||||
DebugInformationFormat="1" \
|
||||
;;
|
||||
vpx)
|
||||
tag Tool \
|
||||
Name="VCPreBuildEventTool" \
|
||||
CommandLine="call obj_int_extract.bat $src_path_bare" \
|
||||
|
||||
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="1"
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
Optimization="0" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
|
||||
RuntimeLibrary="$debug_runtime" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="1" \
|
||||
Detect64BitPortabilityProblems="true" \
|
||||
|
||||
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="1"
|
||||
;;
|
||||
*)
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
Optimization="0" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
|
||||
RuntimeLibrary="$debug_runtime" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="1" \
|
||||
Detect64BitPortabilityProblems="true" \
|
||||
|
||||
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="1"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
|
||||
case "$proj_kind" in
|
||||
exe)
|
||||
case "$target" in
|
||||
x86*) tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
AdditionalDependencies="$debug_libs \$(NoInherit)" \
|
||||
AdditionalLibraryDirectories="$libdirs" \
|
||||
GenerateDebugInformation="true" \
|
||||
ProgramDatabaseFile="\$(OutDir)/${name}.pdb" \
|
||||
|
||||
;;
|
||||
arm*|iwmmx*)
|
||||
x86*)
|
||||
case "$name" in
|
||||
obj_int_extract) tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
OutputFile="${name}.exe" \
|
||||
GenerateDebugInformation="true"
|
||||
obj_int_extract)
|
||||
tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
OutputFile="${name}.exe" \
|
||||
GenerateDebugInformation="true" \
|
||||
;;
|
||||
*) tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
AdditionalDependencies="$debug_libs" \
|
||||
OutputFile="\$(OutDir)/${name}.exe" \
|
||||
LinkIncremental="2" \
|
||||
AdditionalLibraryDirectories="${libdirs};"..\lib/$plat_no_ws"" \
|
||||
DelayLoadDLLs="\$(NOINHERIT)" \
|
||||
GenerateDebugInformation="true" \
|
||||
ProgramDatabaseFile="\$(OutDir)/${name}.pdb" \
|
||||
SubSystem="9" \
|
||||
StackReserveSize="65536" \
|
||||
StackCommitSize="4096" \
|
||||
EntryPointSymbol="mainWCRTStartup" \
|
||||
TargetMachine="3"
|
||||
*)
|
||||
tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
AdditionalDependencies="$debug_libs \$(NoInherit)" \
|
||||
AdditionalLibraryDirectories="$libdirs" \
|
||||
GenerateDebugInformation="true" \
|
||||
ProgramDatabaseFile="\$(OutDir)/${name}.pdb" \
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
lib)
|
||||
case "$target" in
|
||||
arm*|iwmmx*) tag Tool \
|
||||
Name="VCLibrarianTool" \
|
||||
AdditionalOptions=" /subsystem:windowsce,4.20 /machine:ARM" \
|
||||
OutputFile="\$(OutDir)/${name}.lib" \
|
||||
;;
|
||||
*) tag Tool \
|
||||
Name="VCLibrarianTool" \
|
||||
OutputFile="\$(OutDir)/${name}${lib_sfx}d.lib" \
|
||||
;;
|
||||
x86*)
|
||||
tag Tool \
|
||||
Name="VCLibrarianTool" \
|
||||
OutputFile="\$(OutDir)/${name}${lib_sfx}d.lib" \
|
||||
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
dll) tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
AdditionalDependencies="\$(NoInherit)" \
|
||||
LinkIncremental="2" \
|
||||
GenerateDebugInformation="true" \
|
||||
AssemblyDebug="1" \
|
||||
TargetMachine="1" \
|
||||
$link_opts
|
||||
dll)
|
||||
tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
AdditionalDependencies="\$(NoInherit)" \
|
||||
LinkIncremental="2" \
|
||||
GenerateDebugInformation="true" \
|
||||
AssemblyDebug="1" \
|
||||
TargetMachine="1" \
|
||||
$link_opts \
|
||||
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then
|
||||
case "$name" in
|
||||
vpx) tag DeploymentTool \
|
||||
ForceDirty="-1" \
|
||||
RegisterOutput="0"
|
||||
;;
|
||||
example|xma) tag DeploymentTool \
|
||||
ForceDirty="-1" \
|
||||
RegisterOutput="0"
|
||||
tag DebuggerTool \
|
||||
Arguments="${ARGU}"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
close_tag Configuration
|
||||
|
||||
open_tag Configuration \
|
||||
Name="Release|$plat" \
|
||||
OutputDirectory="\$(SolutionDir)$plat_no_ws/\$(ConfigurationName)" \
|
||||
IntermediateDirectory="$plat_no_ws/\$(ConfigurationName)/${name}" \
|
||||
ConfigurationType="$vs_ConfigurationType" \
|
||||
CharacterSet="1" \
|
||||
WholeProgramOptimization="0"
|
||||
open_tag Configuration \
|
||||
Name="Release|$plat" \
|
||||
OutputDirectory="\$(SolutionDir)$plat_no_ws/\$(ConfigurationName)" \
|
||||
IntermediateDirectory="$plat_no_ws/\$(ConfigurationName)/${name}" \
|
||||
ConfigurationType="$vs_ConfigurationType" \
|
||||
CharacterSet="1" \
|
||||
WholeProgramOptimization="0" \
|
||||
|
||||
if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then
|
||||
case "$name" in
|
||||
vpx) tag Tool \
|
||||
Name="VCPreBuildEventTool" \
|
||||
CommandLine="call obj_int_extract.bat \$(ConfigurationName)"
|
||||
tag Tool \
|
||||
Name="VCMIDLTool" \
|
||||
TargetEnvironment="1"
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
ExecutionBucket="7" \
|
||||
Optimization="2" \
|
||||
FavorSizeOrSpeed="1" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="NDEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES);WINCE;_LIB;\$(ARCHFAM);\$(_ARCHFAM_);_UNICODE;UNICODE;" \
|
||||
RuntimeLibrary="0" \
|
||||
BufferSecurityCheck="false" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="0" \
|
||||
CompileAs="1"
|
||||
tag Tool \
|
||||
Name="VCResourceCompilerTool" \
|
||||
PreprocessorDefinitions="NDEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES)" \
|
||||
Culture="1033" \
|
||||
AdditionalIncludeDirectories="\$(IntDir)" \
|
||||
;;
|
||||
example|xma) tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
ExecutionBucket="7" \
|
||||
Optimization="2" \
|
||||
FavorSizeOrSpeed="1" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="NDEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES);WINCE;_CONSOLE;\$(ARCHFAM);\$(_ARCHFAM_);_UNICODE;UNICODE;" \
|
||||
RuntimeLibrary="0" \
|
||||
BufferSecurityCheck="false" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="0" \
|
||||
CompileAs="1"
|
||||
tag Tool \
|
||||
Name="VCResourceCompilerTool" \
|
||||
PreprocessorDefinitions="NDEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES)" \
|
||||
Culture="1033" \
|
||||
AdditionalIncludeDirectories="\$(IntDir)" \
|
||||
;;
|
||||
obj_int_extract) tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE" \
|
||||
RuntimeLibrary="0" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
Detect64BitPortabilityProblems="true" \
|
||||
DebugInformationFormat="0" \
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
case "$target" in
|
||||
x86*)
|
||||
case "$name" in
|
||||
obj_int_extract)
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
|
||||
RuntimeLibrary="$release_runtime" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
Detect64BitPortabilityProblems="true" \
|
||||
DebugInformationFormat="0" \
|
||||
;;
|
||||
vpx)
|
||||
tag Tool \
|
||||
Name="VCPreBuildEventTool" \
|
||||
CommandLine="call obj_int_extract.bat $src_path_bare" \
|
||||
|
||||
case "$target" in
|
||||
x86*) tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
|
||||
RuntimeLibrary="$release_runtime" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="0" \
|
||||
Detect64BitPortabilityProblems="true"
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
|
||||
RuntimeLibrary="$release_runtime" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="0" \
|
||||
Detect64BitPortabilityProblems="true" \
|
||||
|
||||
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs"
|
||||
;;
|
||||
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs"
|
||||
;;
|
||||
*)
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
|
||||
RuntimeLibrary="$release_runtime" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="0" \
|
||||
Detect64BitPortabilityProblems="true" \
|
||||
|
||||
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
|
||||
case "$proj_kind" in
|
||||
exe)
|
||||
case "$target" in
|
||||
x86*) tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
AdditionalDependencies="$libs \$(NoInherit)" \
|
||||
AdditionalLibraryDirectories="$libdirs" \
|
||||
;;
|
||||
arm*|iwmmx*)
|
||||
x86*)
|
||||
case "$name" in
|
||||
obj_int_extract) tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
OutputFile="${name}.exe" \
|
||||
LinkIncremental="1" \
|
||||
GenerateDebugInformation="false" \
|
||||
SubSystem="0" \
|
||||
OptimizeReferences="0" \
|
||||
EnableCOMDATFolding="0" \
|
||||
TargetMachine="0"
|
||||
obj_int_extract)
|
||||
tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
OutputFile="${name}.exe" \
|
||||
GenerateDebugInformation="true" \
|
||||
;;
|
||||
*) tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
AdditionalDependencies="$libs" \
|
||||
OutputFile="\$(OutDir)/${name}.exe" \
|
||||
LinkIncremental="1" \
|
||||
AdditionalLibraryDirectories="${libdirs};"..\lib/$plat_no_ws"" \
|
||||
DelayLoadDLLs="\$(NOINHERIT)" \
|
||||
GenerateDebugInformation="true" \
|
||||
ProgramDatabaseFile="\$(OutDir)/${name}.pdb" \
|
||||
SubSystem="9" \
|
||||
StackReserveSize="65536" \
|
||||
StackCommitSize="4096" \
|
||||
OptimizeReferences="2" \
|
||||
EnableCOMDATFolding="2" \
|
||||
EntryPointSymbol="mainWCRTStartup" \
|
||||
TargetMachine="3"
|
||||
*)
|
||||
tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
AdditionalDependencies="$libs \$(NoInherit)" \
|
||||
AdditionalLibraryDirectories="$libdirs" \
|
||||
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
lib)
|
||||
lib)
|
||||
case "$target" in
|
||||
arm*|iwmmx*) tag Tool \
|
||||
Name="VCLibrarianTool" \
|
||||
AdditionalOptions=" /subsystem:windowsce,4.20 /machine:ARM" \
|
||||
OutputFile="\$(OutDir)/${name}.lib" \
|
||||
;;
|
||||
*) tag Tool \
|
||||
Name="VCLibrarianTool" \
|
||||
OutputFile="\$(OutDir)/${name}${lib_sfx}.lib" \
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
dll) # note differences to debug version: LinkIncremental, AssemblyDebug
|
||||
tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
AdditionalDependencies="\$(NoInherit)" \
|
||||
LinkIncremental="1" \
|
||||
GenerateDebugInformation="true" \
|
||||
TargetMachine="1" \
|
||||
$link_opts
|
||||
esac
|
||||
x86*)
|
||||
tag Tool \
|
||||
Name="VCLibrarianTool" \
|
||||
OutputFile="\$(OutDir)/${name}${lib_sfx}.lib" \
|
||||
|
||||
if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then
|
||||
case "$name" in
|
||||
vpx) tag DeploymentTool \
|
||||
ForceDirty="-1" \
|
||||
RegisterOutput="0"
|
||||
;;
|
||||
example|xma) tag DeploymentTool \
|
||||
ForceDirty="-1" \
|
||||
RegisterOutput="0"
|
||||
tag DebuggerTool \
|
||||
Arguments="${ARGU}"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
dll) # note differences to debug version: LinkIncremental, AssemblyDebug
|
||||
tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
AdditionalDependencies="\$(NoInherit)" \
|
||||
LinkIncremental="1" \
|
||||
GenerateDebugInformation="true" \
|
||||
TargetMachine="1" \
|
||||
$link_opts \
|
||||
|
||||
;;
|
||||
esac
|
||||
|
||||
close_tag Configuration
|
||||
done
|
||||
close_tag Configurations
|
||||
|
||||
open_tag Files
|
||||
generate_filter srcs "Source Files" "cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
|
||||
generate_filter hdrs "Header Files" "h;hpp;hxx;hm;inl;inc;xsd"
|
||||
open_tag Files
|
||||
generate_filter srcs "Source Files" "c;def;odl;idl;hpj;bat;asm;asmx"
|
||||
generate_filter hdrs "Header Files" "h;hm;inl;inc;xsd"
|
||||
generate_filter resrcs "Resource Files" "rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
|
||||
generate_filter resrcs "Build Files" "mk"
|
||||
close_tag Files
|
||||
|
@@ -139,9 +139,6 @@ process_global() {
|
||||
echo "${indent}${proj_guid}.${config}.ActiveCfg = ${config}"
|
||||
echo "${indent}${proj_guid}.${config}.Build.0 = ${config}"
|
||||
|
||||
if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then
|
||||
echo "${indent}${proj_guid}.${config}.Deploy.0 = ${config}"
|
||||
fi
|
||||
done
|
||||
IFS=${IFS_bak}
|
||||
done
|
||||
|
File diff suppressed because it is too large
Load Diff
15
build/x86-msvs/obj_int_extract.bat
Normal file
15
build/x86-msvs/obj_int_extract.bat
Normal file
@@ -0,0 +1,15 @@
|
||||
REM Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
REM
|
||||
REM Use of this source code is governed by a BSD-style license
|
||||
REM that can be found in the LICENSE file in the root of the source
|
||||
REM tree. An additional intellectual property rights grant can be found
|
||||
REM in the file PATENTS. All contributing project authors may
|
||||
REM be found in the AUTHORS file in the root of the source tree.
|
||||
echo on
|
||||
|
||||
cl /I "./" /I "%1" /nologo /c "%1/vp8/common/asm_com_offsets.c"
|
||||
cl /I "./" /I "%1" /nologo /c "%1/vp8/decoder/asm_dec_offsets.c"
|
||||
cl /I "./" /I "%1" /nologo /c "%1/vp8/encoder/asm_enc_offsets.c"
|
||||
obj_int_extract.exe rvds "asm_com_offsets.obj" > "asm_com_offsets.asm"
|
||||
obj_int_extract.exe rvds "asm_dec_offsets.obj" > "asm_dec_offsets.asm"
|
||||
obj_int_extract.exe rvds "asm_enc_offsets.obj" > "asm_enc_offsets.asm"
|
39
configure
vendored
39
configure
vendored
@@ -40,7 +40,8 @@ Advanced options:
|
||||
${toggle_runtime_cpu_detect} runtime cpu detection
|
||||
${toggle_shared} shared library support
|
||||
${toggle_small} favor smaller size over speed
|
||||
${toggle_arm_asm_detok} assembly version of the detokenizer (ARM platforms only)
|
||||
${toggle_opencl} support for OpenCL-assisted VP8 decoding (experimental)
|
||||
${toggle_postproc_visualizer} macro block / block level visualizers
|
||||
|
||||
Codecs:
|
||||
Codecs can be selectively enabled or disabled individually, or by family:
|
||||
@@ -78,22 +79,21 @@ EOF
|
||||
# alphabetically by architecture, generic-gnu last.
|
||||
all_platforms="${all_platforms} armv5te-linux-rvct"
|
||||
all_platforms="${all_platforms} armv5te-linux-gcc"
|
||||
all_platforms="${all_platforms} armv5te-none-rvct"
|
||||
all_platforms="${all_platforms} armv5te-symbian-gcc"
|
||||
all_platforms="${all_platforms} armv5te-wince-vs8"
|
||||
all_platforms="${all_platforms} armv6-darwin-gcc"
|
||||
all_platforms="${all_platforms} armv6-linux-rvct"
|
||||
all_platforms="${all_platforms} armv6-linux-gcc"
|
||||
all_platforms="${all_platforms} armv6-none-rvct"
|
||||
all_platforms="${all_platforms} armv6-symbian-gcc"
|
||||
all_platforms="${all_platforms} armv6-wince-vs8"
|
||||
all_platforms="${all_platforms} iwmmxt-linux-rvct"
|
||||
all_platforms="${all_platforms} iwmmxt-linux-gcc"
|
||||
all_platforms="${all_platforms} iwmmxt-wince-vs8"
|
||||
all_platforms="${all_platforms} iwmmxt2-linux-rvct"
|
||||
all_platforms="${all_platforms} iwmmxt2-linux-gcc"
|
||||
all_platforms="${all_platforms} iwmmxt2-wince-vs8"
|
||||
all_platforms="${all_platforms} armv7-darwin-gcc" #neon Cortex-A8
|
||||
all_platforms="${all_platforms} armv7-linux-rvct" #neon Cortex-A8
|
||||
all_platforms="${all_platforms} armv7-linux-gcc" #neon Cortex-A8
|
||||
all_platforms="${all_platforms} armv7-none-rvct" #neon Cortex-A8
|
||||
all_platforms="${all_platforms} mips32-linux-gcc"
|
||||
all_platforms="${all_platforms} ppc32-darwin8-gcc"
|
||||
all_platforms="${all_platforms} ppc32-darwin9-gcc"
|
||||
@@ -106,6 +106,7 @@ all_platforms="${all_platforms} x86-darwin8-gcc"
|
||||
all_platforms="${all_platforms} x86-darwin8-icc"
|
||||
all_platforms="${all_platforms} x86-darwin9-gcc"
|
||||
all_platforms="${all_platforms} x86-darwin9-icc"
|
||||
all_platforms="${all_platforms} x86-darwin10-gcc"
|
||||
all_platforms="${all_platforms} x86-linux-gcc"
|
||||
all_platforms="${all_platforms} x86-linux-icc"
|
||||
all_platforms="${all_platforms} x86-solaris-gcc"
|
||||
@@ -114,6 +115,7 @@ all_platforms="${all_platforms} x86-win32-vs7"
|
||||
all_platforms="${all_platforms} x86-win32-vs8"
|
||||
all_platforms="${all_platforms} x86-win32-vs9"
|
||||
all_platforms="${all_platforms} x86_64-darwin9-gcc"
|
||||
all_platforms="${all_platforms} x86_64-darwin10-gcc"
|
||||
all_platforms="${all_platforms} x86_64-linux-gcc"
|
||||
all_platforms="${all_platforms} x86_64-linux-icc"
|
||||
all_platforms="${all_platforms} x86_64-solaris-gcc"
|
||||
@@ -157,6 +159,7 @@ enable fast_unaligned #allow unaligned accesses, if supported by hw
|
||||
enable md5
|
||||
enable spatial_resampling
|
||||
enable multithread
|
||||
enable os_support
|
||||
|
||||
[ -d ${source_path}/../include ] && enable alt_tree_layout
|
||||
for d in vp8; do
|
||||
@@ -199,6 +202,7 @@ ARCH_EXT_LIST="
|
||||
sse2
|
||||
sse3
|
||||
ssse3
|
||||
sse4_1
|
||||
|
||||
altivec
|
||||
"
|
||||
@@ -209,6 +213,7 @@ HAVE_LIST="
|
||||
alt_tree_layout
|
||||
pthread_h
|
||||
sys_mman_h
|
||||
dlopen
|
||||
"
|
||||
CONFIG_LIST="
|
||||
external_build
|
||||
@@ -248,7 +253,9 @@ CONFIG_LIST="
|
||||
realtime_only
|
||||
shared
|
||||
small
|
||||
arm_asm_detok
|
||||
opencl
|
||||
postproc_visualizer
|
||||
os_support
|
||||
"
|
||||
CMDLINE_SELECT="
|
||||
extra_warnings
|
||||
@@ -287,7 +294,8 @@ CMDLINE_SELECT="
|
||||
realtime_only
|
||||
shared
|
||||
small
|
||||
arm_asm_detok
|
||||
opencl
|
||||
postproc_visualizer
|
||||
"
|
||||
|
||||
process_cmdline() {
|
||||
@@ -295,7 +303,7 @@ process_cmdline() {
|
||||
optval="${opt#*=}"
|
||||
case "$opt" in
|
||||
--disable-codecs) for c in ${CODECS}; do disable $c; done ;;
|
||||
*) process_common_cmdline $opt
|
||||
*) process_common_cmdline "$opt"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
@@ -324,8 +332,6 @@ post_process_cmdline() {
|
||||
for c in ${CODECS}; do
|
||||
enabled ${c} && enable ${c##*_}s
|
||||
done
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -376,6 +382,7 @@ process_targets() {
|
||||
if [ -f "${source_path}/build/make/version.sh" ]; then
|
||||
local ver=`"$source_path/build/make/version.sh" --bare $source_path`
|
||||
DIST_DIR="${DIST_DIR}-${ver}"
|
||||
VERSION_STRING=${ver}
|
||||
ver=${ver%%-*}
|
||||
VERSION_PATCH=${ver##*.}
|
||||
ver=${ver%.*}
|
||||
@@ -384,6 +391,8 @@ process_targets() {
|
||||
VERSION_MAJOR=${ver%.*}
|
||||
fi
|
||||
enabled child || cat <<EOF >> config.mk
|
||||
|
||||
PREFIX=${prefix}
|
||||
ifeq (\$(MAKECMDGOALS),dist)
|
||||
DIST_DIR?=${DIST_DIR}
|
||||
else
|
||||
@@ -391,6 +400,8 @@ DIST_DIR?=\$(DESTDIR)${prefix}
|
||||
endif
|
||||
LIBSUBDIR=${libdir##${prefix}/}
|
||||
|
||||
VERSION_STRING=${VERSION_STRING}
|
||||
|
||||
VERSION_MAJOR=${VERSION_MAJOR}
|
||||
VERSION_MINOR=${VERSION_MINOR}
|
||||
VERSION_PATCH=${VERSION_PATCH}
|
||||
@@ -485,7 +496,7 @@ process_toolchain() {
|
||||
check_add_cflags -Wpointer-arith
|
||||
check_add_cflags -Wtype-limits
|
||||
check_add_cflags -Wcast-qual
|
||||
enabled extra_warnings || check_add_cflags -Wno-unused
|
||||
enabled extra_warnings || check_add_cflags -Wno-unused-function
|
||||
fi
|
||||
|
||||
if enabled icc; then
|
||||
@@ -535,6 +546,10 @@ process_toolchain() {
|
||||
|
||||
# Other toolchain specific defaults
|
||||
case $toolchain in x86*|ppc*|universal*) soft_enable postproc;; esac
|
||||
|
||||
if enabled postproc_visualizer; then
|
||||
enabled postproc || die "postproc_visualizer requires postproc to be enabled"
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
@@ -546,4 +561,6 @@ process "$@"
|
||||
cat <<EOF > ${BUILD_PFX}vpx_config.c
|
||||
static const char* const cfg = "$CONFIGURE_ARGS";
|
||||
const char *vpx_codec_build_config(void) {return cfg;}
|
||||
static const char* const libdir = "$libdir";
|
||||
const char *vpx_codec_lib_dir(void) {return libdir;}
|
||||
EOF
|
||||
|
3
docs.mk
3
docs.mk
@@ -34,7 +34,8 @@ TXT_DOX = $(call enabled,TXT_DOX)
|
||||
|
||||
EXAMPLE_PATH += $(SRC_PATH_BARE) #for CHANGELOG, README, etc
|
||||
|
||||
doxyfile: libs.doxy_template libs.doxy examples.doxy
|
||||
doxyfile: $(if $(findstring examples, $(ALL_TARGETS)),examples.doxy)
|
||||
doxyfile: libs.doxy_template libs.doxy
|
||||
@echo " [CREATE] $@"
|
||||
@cat $^ > $@
|
||||
@echo "STRIP_FROM_PATH += $(SRC_PATH_BARE) $(BUILD_ROOT)" >> $@
|
||||
|
12
examples.mk
12
examples.mk
@@ -17,6 +17,7 @@ vpxdec.SRCS += md5_utils.c md5_utils.h
|
||||
vpxdec.SRCS += vpx_ports/vpx_timer.h
|
||||
vpxdec.SRCS += vpx/vpx_integer.h
|
||||
vpxdec.SRCS += args.c args.h vpx_ports/config.h
|
||||
vpxdec.SRCS += tools_common.c tools_common.h
|
||||
vpxdec.SRCS += nestegg/halloc/halloc.h
|
||||
vpxdec.SRCS += nestegg/halloc/src/align.h
|
||||
vpxdec.SRCS += nestegg/halloc/src/halloc.c
|
||||
@@ -28,6 +29,7 @@ vpxdec.GUID = BA5FE66F-38DD-E034-F542-B1578C5FB950
|
||||
vpxdec.DESCRIPTION = Full featured decoder
|
||||
UTILS-$(CONFIG_ENCODERS) += vpxenc.c
|
||||
vpxenc.SRCS += args.c args.h y4minput.c y4minput.h
|
||||
vpxenc.SRCS += tools_common.c tools_common.h
|
||||
vpxenc.SRCS += vpx_ports/config.h vpx_ports/mem_ops.h
|
||||
vpxenc.SRCS += vpx_ports/mem_ops_aligned.h
|
||||
vpxenc.SRCS += libmkv/EbmlIDs.h
|
||||
@@ -91,8 +93,16 @@ vp8cx_set_ref.DESCRIPTION = VP8 set encoder reference frame
|
||||
|
||||
|
||||
# Handle extra library flags depending on codec configuration
|
||||
CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m
|
||||
|
||||
# We should not link to math library (libm) on RVCT
|
||||
# when building for bare-metal targets
|
||||
ifeq ($(CONFIG_OS_SUPPORT), yes)
|
||||
CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m
|
||||
else
|
||||
ifeq ($(CONFIG_GCC), yes)
|
||||
CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m
|
||||
endif
|
||||
endif
|
||||
#
|
||||
# End of specified files. The rest of the build rules should happen
|
||||
# automagically from here.
|
||||
|
@@ -19,7 +19,7 @@
|
||||
#define VPX_CODEC_DISABLE_COMPAT 1
|
||||
#include "vpx/vpx_decoder.h"
|
||||
#include "vpx/vp8dx.h"
|
||||
#define interface (&vpx_codec_vp8_dx_algo)
|
||||
#define interface (vpx_codec_vp8_dx())
|
||||
@EXTRA_INCLUDES
|
||||
|
||||
|
||||
|
@@ -2,7 +2,7 @@
|
||||
#define VPX_CODEC_DISABLE_COMPAT 1
|
||||
#include "vpx/vpx_decoder.h"
|
||||
#include "vpx/vp8dx.h"
|
||||
#define interface (&vpx_codec_vp8_dx_algo)
|
||||
#define interface (vpx_codec_vp8_dx())
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_INCLUDES
|
||||
|
||||
|
||||
|
@@ -19,7 +19,7 @@
|
||||
#define VPX_CODEC_DISABLE_COMPAT 1
|
||||
#include "vpx/vpx_encoder.h"
|
||||
#include "vpx/vp8cx.h"
|
||||
#define interface (&vpx_codec_vp8_cx_algo)
|
||||
#define interface (vpx_codec_vp8_cx())
|
||||
#define fourcc 0x30385056
|
||||
@EXTRA_INCLUDES
|
||||
|
||||
|
@@ -2,7 +2,7 @@
|
||||
#define VPX_CODEC_DISABLE_COMPAT 1
|
||||
#include "vpx/vpx_encoder.h"
|
||||
#include "vpx/vp8cx.h"
|
||||
#define interface (&vpx_codec_vp8_cx_algo)
|
||||
#define interface (vpx_codec_vp8_cx())
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENC_INCLUDES
|
||||
|
||||
|
||||
|
@@ -33,7 +33,7 @@ Initializing The Codec
|
||||
----------------------
|
||||
The decoder is initialized by the following code. This is an example for
|
||||
the VP8 decoder, but the code is analogous for all algorithms. Replace
|
||||
`&vpx_codec_vp8_dx_algo` with a pointer to the interface exposed by the
|
||||
`vpx_codec_vp8_dx()` with a pointer to the interface exposed by the
|
||||
algorithm you want to use. The `cfg` argument is left as NULL in this
|
||||
example, because we want the algorithm to determine the stream
|
||||
configuration (width/height) and allocate memory automatically. This
|
||||
|
@@ -78,8 +78,8 @@ if(frame_cnt + 1 == 22) {
|
||||
} else if(frame_cnt + 1 == 44) {
|
||||
vpx_active_map_t active;
|
||||
|
||||
active.rows = 240/16;
|
||||
active.cols = 320/16;
|
||||
active.rows = cfg.g_h/16;
|
||||
active.cols = cfg.g_w/16;
|
||||
|
||||
/* pass in null map to disable active_map*/
|
||||
active.active_map = NULL;
|
||||
|
126
libs.mk
126
libs.mk
@@ -9,7 +9,13 @@
|
||||
##
|
||||
|
||||
|
||||
ASM:=$(if $(filter yes,$(CONFIG_GCC)),.asm.s,.asm)
|
||||
# ARM assembly files are written in RVCT-style. We use some make magic to
|
||||
# filter those files to allow GCC compilation
|
||||
ifeq ($(ARCH_ARM),yes)
|
||||
ASM:=$(if $(filter yes,$(CONFIG_GCC)),.asm.s,.asm)
|
||||
else
|
||||
ASM:=.asm
|
||||
endif
|
||||
|
||||
CODEC_SRCS-yes += libs.mk
|
||||
|
||||
@@ -117,6 +123,18 @@ endif
|
||||
else
|
||||
INSTALL-LIBS-yes += $(LIBSUBDIR)/libvpx.a
|
||||
INSTALL-LIBS-$(CONFIG_DEBUG_LIBS) += $(LIBSUBDIR)/libvpx_g.a
|
||||
|
||||
#Install the OpenCL kernels if CL enabled.
|
||||
ifeq ($(CONFIG_OPENCL),yes)
|
||||
INSTALL-LIBS-yes += $(LIBSUBDIR)/vp8/common/opencl/filter_cl.cl
|
||||
INSTALL-LIBS-yes += $(LIBSUBDIR)/vp8/common/opencl/idctllm_cl.cl
|
||||
INSTALL-LIBS-yes += $(LIBSUBDIR)/vp8/common/opencl/loopfilter.cl
|
||||
#only install decoder CL files if VP8 decoder enabled
|
||||
ifeq ($(CONFIG_VP8_DECODER),yes)
|
||||
INSTALL-LIBS-yes += $(LIBSUBDIR)/vp8/decoder/opencl/dequantize_cl.cl
|
||||
endif
|
||||
endif #CONFIG_OPENCL=yes
|
||||
|
||||
endif
|
||||
|
||||
CODEC_SRCS=$(call enabled,CODEC_SRCS)
|
||||
@@ -126,28 +144,22 @@ INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(call enabled,CODEC_EXPORTS)
|
||||
ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
|
||||
ifeq ($(CONFIG_MSVS),yes)
|
||||
|
||||
ifeq ($(ARCH_ARM),yes)
|
||||
ifeq ($(HAVE_ARMV5TE),yes)
|
||||
ARM_ARCH=v5
|
||||
endif
|
||||
ifeq ($(HAVE_ARMV6),yes)
|
||||
ARM_ARCH=v6
|
||||
endif
|
||||
obj_int_extract.vcproj: $(SRC_PATH_BARE)/build/make/obj_int_extract.c
|
||||
@cp $(SRC_PATH_BARE)/build/arm-wince-vs8/obj_int_extract.bat .
|
||||
@cp $(SRC_PATH_BARE)/build/x86-msvs/obj_int_extract.bat .
|
||||
@echo " [CREATE] $@"
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh\
|
||||
--exe\
|
||||
--target=$(TOOLCHAIN)\
|
||||
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
|
||||
--name=obj_int_extract\
|
||||
--proj-guid=E1360C65-D375-4335-8057-7ED99CC3F9B2\
|
||||
--out=$@ $^\
|
||||
-I".";"$(SRC_PATH_BARE)"
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
|
||||
--exe \
|
||||
--target=$(TOOLCHAIN) \
|
||||
--name=obj_int_extract \
|
||||
--ver=$(CONFIG_VS_VERSION) \
|
||||
--proj-guid=E1360C65-D375-4335-8057-7ED99CC3F9B2 \
|
||||
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
|
||||
--out=$@ $^ \
|
||||
-I. \
|
||||
-I"$(SRC_PATH_BARE)" \
|
||||
|
||||
PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.vcproj
|
||||
PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.bat
|
||||
endif
|
||||
|
||||
vpx.def: $(call enabled,CODEC_EXPORTS)
|
||||
@echo " [CREATE] $@"
|
||||
@@ -158,15 +170,16 @@ CLEAN-OBJS += vpx.def
|
||||
|
||||
vpx.vcproj: $(CODEC_SRCS) vpx.def
|
||||
@echo " [CREATE] $@"
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh\
|
||||
--lib\
|
||||
--target=$(TOOLCHAIN)\
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
|
||||
--lib \
|
||||
--target=$(TOOLCHAIN) \
|
||||
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
|
||||
--name=vpx\
|
||||
--proj-guid=DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74\
|
||||
--module-def=vpx.def\
|
||||
--ver=$(CONFIG_VS_VERSION)\
|
||||
--out=$@ $(CFLAGS) $^\
|
||||
--name=vpx \
|
||||
--proj-guid=DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74 \
|
||||
--module-def=vpx.def \
|
||||
--ver=$(CONFIG_VS_VERSION) \
|
||||
--out=$@ $(CFLAGS) $^ \
|
||||
--src-path-bare="$(SRC_PATH_BARE)" \
|
||||
|
||||
PROJECTS-$(BUILD_LIBVPX) += vpx.vcproj
|
||||
|
||||
@@ -203,6 +216,26 @@ $(addprefix $(DIST_DIR)/,$(LIBVPX_SO_SYMLINKS)):
|
||||
|
||||
INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBVPX_SO_SYMLINKS)
|
||||
INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBSUBDIR)/$(LIBVPX_SO)
|
||||
|
||||
LIBS-$(BUILD_LIBVPX) += vpx.pc
|
||||
vpx.pc: config.mk libs.mk
|
||||
@echo " [CREATE] $@"
|
||||
$(qexec)echo '# pkg-config file from libvpx $(VERSION_STRING)' > $@
|
||||
$(qexec)echo 'prefix=$(PREFIX)' >> $@
|
||||
$(qexec)echo 'exec_prefix=$${prefix}' >> $@
|
||||
$(qexec)echo 'libdir=$${prefix}/lib' >> $@
|
||||
$(qexec)echo 'includedir=$${prefix}/include' >> $@
|
||||
$(qexec)echo '' >> $@
|
||||
$(qexec)echo 'Name: vpx' >> $@
|
||||
$(qexec)echo 'Description: WebM Project VPx codec implementation' >> $@
|
||||
$(qexec)echo 'Version: $(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH)' >> $@
|
||||
$(qexec)echo 'Requires:' >> $@
|
||||
$(qexec)echo 'Conflicts:' >> $@
|
||||
$(qexec)echo 'Libs: -L$${libdir} -lvpx' >> $@
|
||||
$(qexec)echo 'Cflags: -I$${includedir}' >> $@
|
||||
INSTALL-LIBS-yes += $(LIBSUBDIR)/pkgconfig/vpx.pc
|
||||
INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc
|
||||
CLEAN-OBJS += vpx.pc
|
||||
endif
|
||||
|
||||
LIBS-$(LIPO_LIBVPX) += libvpx.a
|
||||
@@ -230,9 +263,44 @@ endif
|
||||
#
|
||||
# Add assembler dependencies for configuration and offsets
|
||||
#
|
||||
#$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm $(BUILD_PFX)vpx_asm_offsets.asm
|
||||
$(filter %.s.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
|
||||
$(filter %.asm.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
|
||||
$(filter %.s.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
|
||||
|
||||
#
|
||||
# Calculate platform- and compiler-specific offsets for hand coded assembly
|
||||
#
|
||||
ifeq ($(CONFIG_EXTERNAL_BUILD),) # Visual Studio uses obj_int_extract.bat
|
||||
ifeq ($(ARCH_ARM), yes)
|
||||
asm_com_offsets.asm: obj_int_extract
|
||||
asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.o
|
||||
./obj_int_extract rvds $< $(ADS2GAS) > $@
|
||||
OBJS-yes += $(VP8_PREFIX)common/asm_com_offsets.c.o
|
||||
CLEAN-OBJS += asm_com_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_com_offsets.asm
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH_ARM)$(ARCH_X86)$(ARCH_X86_64), yes)
|
||||
ifeq ($(CONFIG_VP8_ENCODER), yes)
|
||||
asm_enc_offsets.asm: obj_int_extract
|
||||
asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
|
||||
./obj_int_extract rvds $< $(ADS2GAS) > $@
|
||||
OBJS-yes += $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
|
||||
CLEAN-OBJS += asm_enc_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH_ARM), yes)
|
||||
ifeq ($(CONFIG_VP8_DECODER), yes)
|
||||
asm_dec_offsets.asm: obj_int_extract
|
||||
asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
|
||||
./obj_int_extract rvds $< $(ADS2GAS) > $@
|
||||
OBJS-yes += $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
|
||||
CLEAN-OBJS += asm_dec_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
$(shell $(SRC_PATH_BARE)/build/make/version.sh "$(SRC_PATH_BARE)" $(BUILD_PFX)vpx_version.h)
|
||||
CLEAN-OBJS += $(BUILD_PFX)vpx_version.h
|
||||
|
@@ -31,7 +31,7 @@
|
||||
The WebM project is an open source project supported by its community. For
|
||||
questions about this SDK, please mail the apps-devel@webmproject.org list.
|
||||
To contribute, see http://www.webmproject.org/code/contribute and mail
|
||||
vpx-devel@webmproject.org.
|
||||
codec-devel@webmproject.org.
|
||||
*/
|
||||
|
||||
/*!\page changelog CHANGELOG
|
||||
|
@@ -20,8 +20,6 @@
|
||||
* Still in the public domain.
|
||||
*/
|
||||
|
||||
#include <sys/types.h> /* for stupid systems */
|
||||
|
||||
#include <string.h> /* for memcpy() */
|
||||
|
||||
#include "md5_utils.h"
|
||||
|
30
solution.mk
30
solution.mk
@@ -9,38 +9,13 @@
|
||||
##
|
||||
|
||||
|
||||
ifeq ($(ARCH_ARM),yes)
|
||||
ARM_DEVELOP=no
|
||||
ARM_DEVELOP:=$(if $(filter %vpx.vcproj,$(wildcard *.vcproj)),yes)
|
||||
|
||||
ifeq ($(ARM_DEVELOP),yes)
|
||||
vpx.sln:
|
||||
@echo " [COPY] $@"
|
||||
@cp $(SRC_PATH_BARE)/build/arm-wince-vs8/vpx.sln .
|
||||
PROJECTS-yes += vpx.sln
|
||||
else
|
||||
vpx.sln: $(wildcard *.vcproj)
|
||||
@echo " [CREATE] $@"
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \
|
||||
$(if $(filter %vpx.vcproj,$^),--dep=vpxdec:vpx) \
|
||||
$(if $(filter %vpx.vcproj,$^),--dep=xma:vpx) \
|
||||
--ver=$(CONFIG_VS_VERSION)\
|
||||
--target=$(TOOLCHAIN)\
|
||||
--out=$@ $^
|
||||
vpx.sln.mk: vpx.sln
|
||||
@true
|
||||
|
||||
PROJECTS-yes += vpx.sln vpx.sln.mk
|
||||
-include vpx.sln.mk
|
||||
endif
|
||||
|
||||
else
|
||||
vpx.sln: $(wildcard *.vcproj)
|
||||
@echo " [CREATE] $@"
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \
|
||||
$(if $(filter %vpx.vcproj,$^),\
|
||||
$(foreach vcp,$(filter-out %vpx.vcproj,$^),\
|
||||
$(foreach vcp,$(filter-out %vpx.vcproj %obj_int_extract.vcproj,$^),\
|
||||
--dep=$(vcp:.vcproj=):vpx)) \
|
||||
--dep=vpx:obj_int_extract \
|
||||
--ver=$(CONFIG_VS_VERSION)\
|
||||
--out=$@ $^
|
||||
vpx.sln.mk: vpx.sln
|
||||
@@ -48,7 +23,6 @@ vpx.sln.mk: vpx.sln
|
||||
|
||||
PROJECTS-yes += vpx.sln vpx.sln.mk
|
||||
-include vpx.sln.mk
|
||||
endif
|
||||
|
||||
# Always install this file, as it is an unconditional post-build rule.
|
||||
INSTALL_MAPS += src/% $(SRC_PATH_BARE)/%
|
||||
|
@@ -7,18 +7,18 @@
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _VPX_REF_BUILD_PREFIX_h
|
||||
#define _VPX_REF_BUILD_PREFIX_h
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#include <stdio.h>
|
||||
#include "tools_common.h"
|
||||
#ifdef _WIN32
|
||||
#include <io.h>
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__cplusplus)
|
||||
FILE* set_binary_mode(FILE *stream)
|
||||
{
|
||||
(void)stream;
|
||||
#ifdef _WIN32
|
||||
_setmode(_fileno(stream), _O_BINARY);
|
||||
#endif
|
||||
return stream;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* include guards */
|
@@ -7,7 +7,10 @@
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef TOOLS_COMMON_H
|
||||
#define TOOLS_COMMON_H
|
||||
|
||||
/* Sets a stdio stream into binary mode */
|
||||
FILE* set_binary_mode(FILE *stream);
|
||||
|
||||
|
||||
#define ALLOC_FAILURE -2
|
||||
#endif
|
12
usage.dox
12
usage.dox
@@ -25,7 +25,7 @@
|
||||
codec may write into to store details about a single instance of that codec.
|
||||
Most of the context is implementation specific, and thus opaque to the
|
||||
application. The context structure as seen by the application is of fixed
|
||||
size, and thus can be allocated eith with automatic storage or dynamically
|
||||
size, and thus can be allocated with automatic storage or dynamically
|
||||
on the heap.
|
||||
|
||||
Most operations require an initialized codec context. Codec context
|
||||
@@ -74,7 +74,7 @@
|
||||
the ABI is versioned. The ABI version number must be passed at
|
||||
initialization time to ensure the application is using a header file that
|
||||
matches the library. The current ABI version number is stored in the
|
||||
prepropcessor macros #VPX_CODEC_ABI_VERSION, #VPX_ENCODER_ABI_VERSION, and
|
||||
preprocessor macros #VPX_CODEC_ABI_VERSION, #VPX_ENCODER_ABI_VERSION, and
|
||||
#VPX_DECODER_ABI_VERSION. For convenience, each initialization function has
|
||||
a wrapper macro that inserts the correct version number. These macros are
|
||||
named like the initialization methods, but without the _ver suffix.
|
||||
@@ -125,7 +125,7 @@
|
||||
|
||||
The special value <code>0</code> is reserved to represent an infinite
|
||||
deadline. In this case, the codec will perform as much processing as
|
||||
possible to yeild the highest quality frame.
|
||||
possible to yield the highest quality frame.
|
||||
|
||||
By convention, the value <code>1</code> is used to mean "return as fast as
|
||||
possible."
|
||||
@@ -135,7 +135,7 @@
|
||||
|
||||
/*! \page usage_xma External Memory Allocation
|
||||
Applications that wish to have fine grained control over how and where
|
||||
decoders allocate memory \ref MAY make use of the e_xternal Memory Allocation
|
||||
decoders allocate memory \ref MAY make use of the eXternal Memory Allocation
|
||||
(XMA) interface. Not all codecs support the XMA \ref usage_features.
|
||||
|
||||
To use a decoder in XMA mode, the decoder \ref MUST be initialized with the
|
||||
@@ -143,7 +143,7 @@
|
||||
allocate is heavily dependent on the size of the encoded video frames. The
|
||||
size of the video must be known before requesting the decoder's memory map.
|
||||
This stream information can be obtained with the vpx_codec_peek_stream_info()
|
||||
function, which does not require a contructed decoder context. If the exact
|
||||
function, which does not require a constructed decoder context. If the exact
|
||||
stream is not known, a stream info structure can be created that reflects
|
||||
the maximum size that the decoder instance is required to support.
|
||||
|
||||
@@ -175,7 +175,7 @@
|
||||
\section usage_xma_seg_szalign Segment Size and Alignment
|
||||
The sz (size) and align (alignment) parameters describe the required size
|
||||
and alignment of the requested segment. Alignment will always be a power of
|
||||
two. Applications \ref MUST honor the aligment requested. Failure to do so
|
||||
two. Applications \ref MUST honor the alignment requested. Failure to do so
|
||||
could result in program crashes or may incur a speed penalty.
|
||||
|
||||
\section usage_xma_seg_flags Segment Flags
|
||||
|
@@ -16,12 +16,11 @@
|
||||
#include "findnearmv.h"
|
||||
#include "entropymode.h"
|
||||
#include "systemdependent.h"
|
||||
#include "vpxerrors.h"
|
||||
|
||||
|
||||
extern void vp8_init_scan_order_mask();
|
||||
|
||||
void vp8_update_mode_info_border(MODE_INFO *mi, int rows, int cols)
|
||||
static void update_mode_info_border(MODE_INFO *mi, int rows, int cols)
|
||||
{
|
||||
int i;
|
||||
vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1));
|
||||
@@ -71,7 +70,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,13 +87,13 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
return 1;
|
||||
}
|
||||
|
||||
oci->mb_rows = height >> 4;
|
||||
@@ -106,7 +105,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
||||
if (!oci->mip)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
return 1;
|
||||
}
|
||||
|
||||
oci->mi = oci->mip + oci->mode_info_stride + 1;
|
||||
@@ -117,10 +116,10 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
||||
if (!oci->above_context)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
return 1;
|
||||
}
|
||||
|
||||
vp8_update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
|
||||
update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -131,32 +130,32 @@ void vp8_setup_version(VP8_COMMON *cm)
|
||||
case 0:
|
||||
cm->no_lpf = 0;
|
||||
cm->simpler_lpf = 0;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
cm->mcomp_filter_type = SIXTAP;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 1:
|
||||
cm->no_lpf = 0;
|
||||
cm->simpler_lpf = 1;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->mcomp_filter_type = BILINEAR;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 2:
|
||||
cm->no_lpf = 1;
|
||||
cm->simpler_lpf = 0;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->mcomp_filter_type = BILINEAR;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 3:
|
||||
cm->no_lpf = 1;
|
||||
cm->simpler_lpf = 1;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->mcomp_filter_type = BILINEAR;
|
||||
cm->full_pixel = 1;
|
||||
break;
|
||||
default:
|
||||
/*4,5,6,7 are reserved for future use*/
|
||||
cm->no_lpf = 0;
|
||||
cm->simpler_lpf = 0;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
cm->mcomp_filter_type = SIXTAP;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
}
|
||||
@@ -171,7 +170,7 @@ void vp8_create_common(VP8_COMMON *oci)
|
||||
oci->mb_no_coeff_skip = 1;
|
||||
oci->no_lpf = 0;
|
||||
oci->simpler_lpf = 0;
|
||||
oci->use_bilinear_mc_filter = 0;
|
||||
oci->mcomp_filter_type = SIXTAP;
|
||||
oci->full_pixel = 0;
|
||||
oci->multi_token_partition = ONE_PARTITION;
|
||||
oci->clr_type = REG_YUV;
|
||||
|
@@ -11,21 +11,13 @@
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "vpx_ports/arm.h"
|
||||
#include "g_common.h"
|
||||
#include "pragmas.h"
|
||||
#include "subpixel.h"
|
||||
#include "loopfilter.h"
|
||||
#include "recon.h"
|
||||
#include "idct.h"
|
||||
#include "onyxc_int.h"
|
||||
|
||||
extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
|
||||
|
||||
extern void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
|
||||
#include "vp8/common/g_common.h"
|
||||
#include "vp8/common/pragmas.h"
|
||||
#include "vp8/common/subpixel.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#include "vp8/common/recon.h"
|
||||
#include "vp8/common/idct.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
|
||||
void vp8_arch_arm_common_init(VP8_COMMON *ctx)
|
||||
{
|
||||
@@ -106,31 +98,12 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
|
||||
rtcd->recon.recon2 = vp8_recon2b_neon;
|
||||
rtcd->recon.recon4 = vp8_recon4b_neon;
|
||||
rtcd->recon.recon_mb = vp8_recon_mb_neon;
|
||||
|
||||
rtcd->recon.build_intra_predictors_mby =
|
||||
vp8_build_intra_predictors_mby_neon;
|
||||
rtcd->recon.build_intra_predictors_mby_s =
|
||||
vp8_build_intra_predictors_mby_s_neon;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV6
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
if (has_media)
|
||||
#endif
|
||||
{
|
||||
vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
|
||||
vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV7
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
if (has_neon)
|
||||
#endif
|
||||
{
|
||||
vp8_build_intra_predictors_mby_ptr =
|
||||
vp8_build_intra_predictors_mby_neon;
|
||||
vp8_build_intra_predictors_mby_s_ptr =
|
||||
vp8_build_intra_predictors_mby_s_neon;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@@ -15,19 +15,19 @@
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
;-------------------------------------
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 unsigned short *output_ptr,
|
||||
; r2 unsigned int src_pixels_per_line,
|
||||
; r3 unsigned int output_height,
|
||||
; stack unsigned int output_width,
|
||||
; stack const short *vp8_filter
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 unsigned short *dst_ptr,
|
||||
; r2 unsigned int src_pitch,
|
||||
; r3 unsigned int height,
|
||||
; stack unsigned int width,
|
||||
; stack const short *vp8_filter
|
||||
;-------------------------------------
|
||||
; The output is transposed stroed in output array to make it easy for second pass filtering.
|
||||
|vp8_filter_block2d_bil_first_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r4, [sp, #36] ; output width
|
||||
ldr r4, [sp, #36] ; width
|
||||
|
||||
mov r12, r3 ; outer-loop counter
|
||||
sub r2, r2, r4 ; src increment for height loop
|
||||
@@ -38,10 +38,10 @@
|
||||
|
||||
ldr r5, [r11] ; load up filter coefficients
|
||||
|
||||
mov r3, r3, lsl #1 ; output_height*2
|
||||
mov r3, r3, lsl #1 ; height*2
|
||||
add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1)
|
||||
|
||||
mov r11, r1 ; save output_ptr for each row
|
||||
mov r11, r1 ; save dst_ptr for each row
|
||||
|
||||
cmp r5, #128 ; if filter coef = 128, then skip the filter
|
||||
beq bil_null_1st_filter
|
||||
@@ -140,17 +140,17 @@
|
||||
|
||||
;---------------------------------
|
||||
; r0 unsigned short *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 int output_pitch,
|
||||
; r3 unsigned int output_height,
|
||||
; stack unsigned int output_width,
|
||||
; stack const short *vp8_filter
|
||||
; r1 unsigned char *dst_ptr,
|
||||
; r2 int dst_pitch,
|
||||
; r3 unsigned int height,
|
||||
; stack unsigned int width,
|
||||
; stack const short *vp8_filter
|
||||
;---------------------------------
|
||||
|vp8_filter_block2d_bil_second_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r4, [sp, #36] ; output width
|
||||
ldr r4, [sp, #36] ; width
|
||||
|
||||
ldr r5, [r11] ; load up filter coefficients
|
||||
mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix
|
||||
|
@@ -243,8 +243,6 @@ skip_secondpass_hloop
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
_filter8_coeff_
|
||||
|
@@ -10,128 +10,29 @@
|
||||
|
||||
|
||||
#include <math.h>
|
||||
#include "subpixel.h"
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
static const short bilinear_filters[8][2] =
|
||||
{
|
||||
{ 128, 0 },
|
||||
{ 112, 16 },
|
||||
{ 96, 32 },
|
||||
{ 80, 48 },
|
||||
{ 64, 64 },
|
||||
{ 48, 80 },
|
||||
{ 32, 96 },
|
||||
{ 16, 112 }
|
||||
};
|
||||
|
||||
|
||||
extern void vp8_filter_block2d_bil_first_pass_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_bil_second_pass_armv6
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
#if 0
|
||||
void vp8_filter_block2d_bil_first_pass_6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
for ( i=0; i<output_height; i++ )
|
||||
{
|
||||
for ( j=0; j<output_width; j++ )
|
||||
{
|
||||
/* Apply bilinear filter */
|
||||
output_ptr[j] = ( ( (int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[1] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT/2) ) >> VP8_FILTER_SHIFT;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_filter_block2d_bil_second_pass_6
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i,j;
|
||||
int Temp;
|
||||
|
||||
for ( i=0; i<output_height; i++ )
|
||||
{
|
||||
for ( j=0; j<output_width; j++ )
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[output_width] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT/2);
|
||||
output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
/*src_ptr += src_pixels_per_line - output_width;*/
|
||||
output_ptr += output_pitch;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#include "vp8/common/filter.h"
|
||||
#include "vp8/common/subpixel.h"
|
||||
#include "bilinearfilter_arm.h"
|
||||
|
||||
void vp8_filter_block2d_bil_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int dst_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
int Width,
|
||||
int Height
|
||||
)
|
||||
{
|
||||
|
||||
unsigned short FData[36*16]; /* Temp data bufffer used in filtering */
|
||||
unsigned short FData[36*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
/* pixel_step = 1; */
|
||||
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pixels_per_line, Height + 1, Width, HFilter);
|
||||
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_armv6(FData, output_ptr, dst_pitch, Height, Width, VFilter);
|
||||
vp8_filter_block2d_bil_second_pass_armv6(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
|
||||
}
|
||||
|
||||
|
||||
@@ -148,8 +49,8 @@ void vp8_bilinear_predict4x4_armv6
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
}
|
||||
@@ -167,8 +68,8 @@ void vp8_bilinear_predict8x8_armv6
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
}
|
||||
@@ -186,8 +87,8 @@ void vp8_bilinear_predict8x4_armv6
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
}
|
||||
@@ -205,8 +106,8 @@ void vp8_bilinear_predict16x16_armv6
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
}
|
||||
|
35
vp8/common/arm/bilinearfilter_arm.h
Normal file
35
vp8/common/arm/bilinearfilter_arm.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef BILINEARFILTER_ARM_H
|
||||
#define BILINEARFILTER_ARM_H
|
||||
|
||||
extern void vp8_filter_block2d_bil_first_pass_armv6
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
unsigned short *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_bil_second_pass_armv6
|
||||
(
|
||||
const unsigned short *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
#endif /* BILINEARFILTER_ARM_H */
|
@@ -11,26 +11,10 @@
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include <math.h>
|
||||
#include "subpixel.h"
|
||||
#include "vp8/common/filter.h"
|
||||
#include "vp8/common/subpixel.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
DECLARE_ALIGNED(16, static const short, sub_pel_filters[8][6]) =
|
||||
{
|
||||
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
|
||||
{ 0, -6, 123, 12, -1, 0 },
|
||||
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -9, 93, 50, -6, 0 },
|
||||
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
|
||||
{ 0, -6, 50, 93, -9, 0 },
|
||||
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -1, 12, 123, -6, 0 },
|
||||
};
|
||||
|
||||
|
||||
extern void vp8_filter_block2d_first_pass_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
@@ -93,11 +77,11 @@ void vp8_sixtap_predict_armv6
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data buffer used in filtering */
|
||||
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* Vfilter is null. First pass only */
|
||||
if (xoffset && !yoffset)
|
||||
@@ -129,47 +113,6 @@ void vp8_sixtap_predict_armv6
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
void vp8_sixtap_predict8x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
|
||||
/*if (xoffset && !yoffset)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter );
|
||||
}*/
|
||||
/* Hfilter is null. Second pass only */
|
||||
/*else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter );
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset & 0x1)
|
||||
vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter );
|
||||
else*/
|
||||
|
||||
vp8_filter_block2d_first_pass_armv6 ( src_ptr-(2*src_pixels_per_line), FData, src_pixels_per_line, 8, 9, HFilter );
|
||||
|
||||
vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, 8, VFilter );
|
||||
/*}*/
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp8_sixtap_predict8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
@@ -182,10 +125,10 @@ void vp8_sixtap_predict8x8_armv6
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
@@ -224,10 +167,10 @@ void vp8_sixtap_predict16x16_armv6
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
|
@@ -11,8 +11,8 @@
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include <math.h>
|
||||
#include "loopfilter.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
|
||||
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
|
||||
@@ -41,13 +41,13 @@ void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -57,7 +57,7 @@ void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsi
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
@@ -65,13 +65,13 @@ void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -81,7 +81,7 @@ void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsi
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
@@ -94,10 +94,10 @@ void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -122,10 +122,10 @@ void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -148,10 +148,10 @@ void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
|
||||
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -161,7 +161,7 @@ void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
@@ -169,10 +169,10 @@ void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
|
||||
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -182,7 +182,7 @@ void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
@@ -195,7 +195,7 @@ void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride);
|
||||
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -220,7 +220,7 @@ void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4);
|
||||
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
|
@@ -350,10 +350,7 @@ filt_blk2d_spo16x16_loop_neon
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bifilters16_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_bifilter16_coeff_
|
||||
DCD bifilter16_coeff
|
||||
bifilter16_coeff
|
||||
|
@@ -123,10 +123,7 @@ skip_secondpass_filter
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bilinearfilters4_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_bifilter4_coeff_
|
||||
DCD bifilter4_coeff
|
||||
bifilter4_coeff
|
||||
|
@@ -128,10 +128,7 @@ skip_secondpass_filter
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bifilters8x4_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_bifilter8x4_coeff_
|
||||
DCD bifilter8x4_coeff
|
||||
bifilter8x4_coeff
|
||||
|
@@ -176,10 +176,7 @@ skip_secondpass_filter
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bifilters8_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_bifilter8_coeff_
|
||||
DCD bifilter8_coeff
|
||||
bifilter8_coeff
|
||||
|
@@ -397,7 +397,8 @@
|
||||
bx lr
|
||||
ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
|
||||
|
||||
AREA loopfilter_dat, DATA, READONLY
|
||||
;-----------------
|
||||
|
||||
_lf_coeff_
|
||||
DCD lf_coeff
|
||||
lf_coeff
|
||||
|
@@ -104,10 +104,7 @@
|
||||
ENDP ; |vp8_loop_filter_simple_horizontal_edge_neon|
|
||||
|
||||
;-----------------
|
||||
AREA hloopfiltery_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_lfhy_coeff_
|
||||
DCD lfhy_coeff
|
||||
lfhy_coeff
|
||||
|
@@ -145,10 +145,7 @@
|
||||
ENDP ; |vp8_loop_filter_simple_vertical_edge_neon|
|
||||
|
||||
;-----------------
|
||||
AREA vloopfiltery_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_vlfy_coeff_
|
||||
DCD vlfy_coeff
|
||||
vlfy_coeff
|
||||
|
@@ -505,7 +505,8 @@
|
||||
bx lr
|
||||
ENDP ; |vp8_mbloop_filter_neon|
|
||||
|
||||
AREA mbloopfilter_dat, DATA, READONLY
|
||||
;-----------------
|
||||
|
||||
_mblf_coeff_
|
||||
DCD mblf_coeff
|
||||
mblf_coeff
|
||||
|
@@ -10,8 +10,8 @@
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "recon.h"
|
||||
#include "blockd.h"
|
||||
#include "vp8/common/recon.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr);
|
||||
|
||||
|
@@ -113,10 +113,7 @@
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA idct4x4_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_idct_coeff_
|
||||
DCD idct_coeff
|
||||
idct_coeff
|
||||
|
@@ -476,10 +476,7 @@ secondpass_only_inner_loop_neon
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters16_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_filter16_coeff_
|
||||
DCD filter16_coeff
|
||||
filter16_coeff
|
||||
|
@@ -407,10 +407,7 @@ secondpass_filter4x4_only
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters4_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_filter4_coeff_
|
||||
DCD filter4_coeff
|
||||
filter4_coeff
|
||||
|
@@ -458,10 +458,7 @@ secondpass_filter8x4_only
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_filter8_coeff_
|
||||
DCD filter8_coeff
|
||||
filter8_coeff
|
||||
|
@@ -509,10 +509,7 @@ filt_blk2d_spo8x8_loop_neon
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_filter8_coeff_
|
||||
DCD filter8_coeff
|
||||
filter8_coeff
|
||||
|
@@ -53,6 +53,9 @@ extern prototype_copy_block(vp8_copy_mem16x16_neon);
|
||||
|
||||
extern prototype_recon_macroblock(vp8_recon_mb_neon);
|
||||
|
||||
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_neon);
|
||||
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_s_neon);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_recon_recon
|
||||
#define vp8_recon_recon vp8_recon_b_neon
|
||||
@@ -74,6 +77,13 @@ extern prototype_recon_macroblock(vp8_recon_mb_neon);
|
||||
|
||||
#undef vp8_recon_recon_mb
|
||||
#define vp8_recon_recon_mb vp8_recon_mb_neon
|
||||
|
||||
#undef vp8_recon_build_intra_predictors_mby
|
||||
#define vp8_recon_build_intra_predictors_mby vp8_build_intra_predictors_mby_neon
|
||||
|
||||
#undef vp8_recon_build_intra_predictors_mby_s
|
||||
#define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_neon
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@@ -10,10 +10,10 @@
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "blockd.h"
|
||||
#include "reconintra.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vp8/common/reconintra.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "recon.h"
|
||||
#include "vp8/common/recon.h"
|
||||
|
||||
#if HAVE_ARMV7
|
||||
extern void vp8_build_intra_predictors_mby_neon_func(
|
||||
|
49
vp8/common/asm_com_offsets.c
Normal file
49
vp8/common/asm_com_offsets.c
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include <stddef.h>
|
||||
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
||||
#define ct_assert(name,cond) \
|
||||
static void assert_##name(void) UNUSED;\
|
||||
static void assert_##name(void) {switch(0){case 0:case !!(cond):;}}
|
||||
|
||||
#define DEFINE(sym, val) int sym = val;
|
||||
|
||||
/*
|
||||
#define BLANK() asm volatile("\n->" : : )
|
||||
*/
|
||||
|
||||
/*
|
||||
* int main(void)
|
||||
* {
|
||||
*/
|
||||
|
||||
//vpx_scale
|
||||
DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width));
|
||||
DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height));
|
||||
DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride));
|
||||
DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width));
|
||||
DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height));
|
||||
DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride));
|
||||
DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer));
|
||||
DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer));
|
||||
DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer));
|
||||
DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border));
|
||||
|
||||
//add asserts for any offset that is not supported by assembly code
|
||||
//add asserts for any size that is not supported by assembly code
|
||||
/*
|
||||
* return 0;
|
||||
* }
|
||||
*/
|
@@ -12,8 +12,6 @@
|
||||
#include "blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
const int vp8_block2type[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1};
|
||||
|
||||
const unsigned char vp8_block2left[25] =
|
||||
{
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
|
||||
|
@@ -14,12 +14,17 @@
|
||||
|
||||
void vpx_log(const char *format, ...);
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "../../vpx_ports/config.h"
|
||||
#include "../../vpx_scale/yv12config.h"
|
||||
#include "mv.h"
|
||||
#include "treecoder.h"
|
||||
#include "subpixel.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "../../vpx_ports/mem.h"
|
||||
|
||||
#include "../../vpx_config.h"
|
||||
#if CONFIG_OPENCL
|
||||
#include "opencl/vp8_opencl.h"
|
||||
#endif
|
||||
|
||||
#define TRUE 1
|
||||
#define FALSE 0
|
||||
@@ -28,11 +33,6 @@ void vpx_log(const char *format, ...);
|
||||
#define DCPREDSIMTHRESH 0
|
||||
#define DCPREDCNTTHRESH 3
|
||||
|
||||
#define Y1CONTEXT 0
|
||||
#define UCONTEXT 1
|
||||
#define VCONTEXT 2
|
||||
#define Y2CONTEXT 3
|
||||
|
||||
#define MB_FEATURE_TREE_PROBS 3
|
||||
#define MAX_MB_SEGMENTS 4
|
||||
|
||||
@@ -48,6 +48,11 @@ typedef struct
|
||||
int r, c;
|
||||
} POS;
|
||||
|
||||
#define PLANE_TYPE_Y_NO_DC 0
|
||||
#define PLANE_TYPE_Y2 1
|
||||
#define PLANE_TYPE_UV 2
|
||||
#define PLANE_TYPE_Y_WITH_DC 3
|
||||
|
||||
|
||||
typedef char ENTROPY_CONTEXT;
|
||||
typedef struct
|
||||
@@ -58,8 +63,6 @@ typedef struct
|
||||
ENTROPY_CONTEXT y2;
|
||||
} ENTROPY_CONTEXT_PLANES;
|
||||
|
||||
extern const int vp8_block2type[25];
|
||||
|
||||
extern const unsigned char vp8_block2left[25];
|
||||
extern const unsigned char vp8_block2above[25];
|
||||
|
||||
@@ -75,19 +78,19 @@ typedef enum
|
||||
|
||||
typedef enum
|
||||
{
|
||||
DC_PRED, /* average of above and left pixels */
|
||||
V_PRED, /* vertical prediction */
|
||||
H_PRED, /* horizontal prediction */
|
||||
TM_PRED, /* Truemotion prediction */
|
||||
B_PRED, /* block based prediction, each block has its own prediction mode */
|
||||
DC_PRED = 0, /* average of above and left pixels */
|
||||
V_PRED = 1, /* vertical prediction */
|
||||
H_PRED = 2, /* horizontal prediction */
|
||||
TM_PRED = 3, /* Truemotion prediction */
|
||||
B_PRED = 4, /* block based prediction, each block has its own prediction mode */
|
||||
|
||||
NEARESTMV,
|
||||
NEARMV,
|
||||
ZEROMV,
|
||||
NEWMV,
|
||||
SPLITMV,
|
||||
NEARESTMV = 5,
|
||||
NEARMV = 6,
|
||||
ZEROMV = 7,
|
||||
NEWMV = 8,
|
||||
SPLITMV = 9,
|
||||
|
||||
MB_MODE_COUNT
|
||||
MB_MODE_COUNT = 10
|
||||
} MB_PREDICTION_MODE;
|
||||
|
||||
/* Macroblock level features */
|
||||
@@ -189,24 +192,47 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
short *qcoeff;
|
||||
short *dqcoeff;
|
||||
unsigned char *predictor;
|
||||
short *diff;
|
||||
short *reference;
|
||||
short *qcoeff_base;
|
||||
int qcoeff_offset;
|
||||
|
||||
short *dqcoeff_base;
|
||||
int dqcoeff_offset;
|
||||
|
||||
unsigned char *predictor_base;
|
||||
int predictor_offset;
|
||||
|
||||
short *diff_base;
|
||||
int diff_offset;
|
||||
|
||||
short *dequant;
|
||||
|
||||
#if CONFIG_OPENCL
|
||||
cl_command_queue cl_commands; //pointer to macroblock CL command queue
|
||||
|
||||
cl_mem cl_diff_mem;
|
||||
cl_mem cl_predictor_mem;
|
||||
cl_mem cl_qcoeff_mem;
|
||||
cl_mem cl_dqcoeff_mem;
|
||||
cl_mem cl_eobs_mem;
|
||||
|
||||
cl_mem cl_dequant_mem; //Block-specific, not shared
|
||||
|
||||
cl_bool sixtap_filter; //Subpixel Prediction type (true=sixtap, false=bilinear)
|
||||
|
||||
#endif
|
||||
|
||||
/* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
|
||||
unsigned char **base_pre;
|
||||
unsigned char **base_pre; //previous frame, same Macroblock, base pointer
|
||||
int pre;
|
||||
int pre_stride;
|
||||
|
||||
unsigned char **base_dst;
|
||||
unsigned char **base_dst; //destination base pointer
|
||||
int dst;
|
||||
int dst_stride;
|
||||
|
||||
int eob;
|
||||
int eob; //only used in encoder? Decoder uses MBD.eobs
|
||||
|
||||
char *eobs_base; //beginning of MB.eobs
|
||||
|
||||
B_MODE_INFO bmi;
|
||||
|
||||
@@ -216,16 +242,26 @@ typedef struct
|
||||
{
|
||||
DECLARE_ALIGNED(16, short, diff[400]); /* from idct diff */
|
||||
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
|
||||
/* not used DECLARE_ALIGNED(16, short, reference[384]); */
|
||||
DECLARE_ALIGNED(16, short, qcoeff[400]);
|
||||
DECLARE_ALIGNED(16, short, dqcoeff[400]);
|
||||
DECLARE_ALIGNED(16, char, eobs[25]);
|
||||
|
||||
#if CONFIG_OPENCL
|
||||
cl_command_queue cl_commands; //Each macroblock gets its own command queue.
|
||||
cl_mem cl_diff_mem;
|
||||
cl_mem cl_predictor_mem;
|
||||
cl_mem cl_qcoeff_mem;
|
||||
cl_mem cl_dqcoeff_mem;
|
||||
cl_mem cl_eobs_mem;
|
||||
|
||||
cl_bool sixtap_filter;
|
||||
#endif
|
||||
|
||||
/* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
|
||||
BLOCKD block[25];
|
||||
|
||||
YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
|
||||
YV12_BUFFER_CONFIG dst;
|
||||
YV12_BUFFER_CONFIG dst; /* Destination buffer for current frame */
|
||||
|
||||
MODE_INFO *mode_info_context;
|
||||
int mode_info_stride;
|
||||
@@ -275,6 +311,7 @@ typedef struct
|
||||
|
||||
unsigned int frames_since_golden;
|
||||
unsigned int frames_till_alt_ref_frame;
|
||||
|
||||
vp8_subpix_fn_t subpixel_predict;
|
||||
vp8_subpix_fn_t subpixel_predict8x4;
|
||||
vp8_subpix_fn_t subpixel_predict8x8;
|
||||
@@ -282,6 +319,8 @@ typedef struct
|
||||
|
||||
void *current_bc;
|
||||
|
||||
int corrupted;
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
struct VP8_COMMON_RTCD *rtcd;
|
||||
#endif
|
||||
|
@@ -1,570 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef bool_coder_h
|
||||
#define bool_coder_h 1
|
||||
|
||||
/* Arithmetic bool coder with largish probability range.
|
||||
Timothy S Murphy 6 August 2004 */
|
||||
|
||||
/* So as not to force users to drag in too much of my idiosyncratic C++ world,
|
||||
I avoid fancy storage management. */
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
|
||||
typedef unsigned char vp8bc_index_t; // probability index
|
||||
|
||||
/* There are a couple of slight variants in the details of finite-precision
|
||||
arithmetic coding. May be safely ignored by most users. */
|
||||
|
||||
enum vp8bc_rounding
|
||||
{
|
||||
vp8bc_down = 0, // just like VP8
|
||||
vp8bc_down_full = 1, // handles minimum probability correctly
|
||||
vp8bc_up = 2
|
||||
};
|
||||
|
||||
#if _MSC_VER
|
||||
|
||||
/* Note that msvc by default does not inline _anything_ (regardless of the
|
||||
setting of inline_depth) and that a command-line option (-Ob1 or -Ob2)
|
||||
is required to inline even the smallest functions. */
|
||||
|
||||
# pragma inline_depth( 255) // I mean it when I inline something
|
||||
# pragma warning( disable : 4099) // No class vs. struct harassment
|
||||
# pragma warning( disable : 4250) // dominance complaints
|
||||
# pragma warning( disable : 4284) // operator-> in templates
|
||||
# pragma warning( disable : 4800) // bool conversion
|
||||
|
||||
// don't let prefix ++,-- stand in for postfix, disaster would ensue
|
||||
|
||||
# pragma warning( error : 4620 4621)
|
||||
|
||||
#endif // _MSC_VER
|
||||
|
||||
|
||||
#if __cplusplus
|
||||
|
||||
// Sometimes one wishes to be definite about integer lengths.
|
||||
|
||||
struct int_types
|
||||
{
|
||||
typedef const bool cbool;
|
||||
typedef const signed char cchar;
|
||||
typedef const short cshort;
|
||||
typedef const int cint;
|
||||
typedef const int clong;
|
||||
|
||||
typedef const double cdouble;
|
||||
typedef const size_t csize_t;
|
||||
|
||||
typedef unsigned char uchar; // 8 bits
|
||||
typedef const uchar cuchar;
|
||||
|
||||
typedef short int16;
|
||||
typedef unsigned short uint16;
|
||||
typedef const int16 cint16;
|
||||
typedef const uint16 cuint16;
|
||||
|
||||
typedef int int32;
|
||||
typedef unsigned int uint32;
|
||||
typedef const int32 cint32;
|
||||
typedef const uint32 cuint32;
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned int ulong;
|
||||
typedef const uint cuint;
|
||||
typedef const ulong culong;
|
||||
|
||||
|
||||
// All structs consume space, may as well have a vptr.
|
||||
|
||||
virtual ~int_types();
|
||||
};
|
||||
|
||||
|
||||
struct bool_coder_spec;
|
||||
struct bool_coder;
|
||||
struct bool_writer;
|
||||
struct bool_reader;
|
||||
|
||||
|
||||
struct bool_coder_namespace : int_types
|
||||
{
|
||||
typedef vp8bc_index_t Index;
|
||||
typedef bool_coder_spec Spec;
|
||||
typedef const Spec c_spec;
|
||||
|
||||
enum Rounding
|
||||
{
|
||||
Down = vp8bc_down,
|
||||
down_full = vp8bc_down_full,
|
||||
Up = vp8bc_up
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
// Archivable specification of a bool coder includes rounding spec
|
||||
// and probability mapping table. The latter replaces a uchar j
|
||||
// (0 <= j < 256) with an arbitrary uint16 tbl[j] = p.
|
||||
// p/65536 is then the probability of a zero.
|
||||
|
||||
struct bool_coder_spec : bool_coder_namespace
|
||||
{
|
||||
friend struct bool_coder;
|
||||
friend struct bool_writer;
|
||||
friend struct bool_reader;
|
||||
friend struct bool_coder_spec_float;
|
||||
friend struct bool_coder_spec_explicit_table;
|
||||
friend struct bool_coder_spec_exponential_table;
|
||||
friend struct BPsrc;
|
||||
private:
|
||||
uint w; // precision
|
||||
Rounding r;
|
||||
|
||||
uint ebits, mbits, ebias;
|
||||
uint32 mmask;
|
||||
|
||||
Index max_index, half_index;
|
||||
|
||||
uint32 mantissa(Index i) const
|
||||
{
|
||||
assert(i < half_index);
|
||||
return (1 << mbits) + (i & mmask);
|
||||
}
|
||||
uint exponent(Index i) const
|
||||
{
|
||||
assert(i < half_index);
|
||||
return ebias - (i >> mbits);
|
||||
}
|
||||
|
||||
uint16 Ptbl[256]; // kinda clunky, but so is storage management.
|
||||
|
||||
/* Cost in bits of encoding a zero at every probability, scaled by 2^20.
|
||||
Assumes that index is at most 8 bits wide. */
|
||||
|
||||
uint32 Ctbl[256];
|
||||
|
||||
uint32 split(Index i, uint32 R) const // 1 <= split <= max( 1, R-1)
|
||||
{
|
||||
if (!ebias)
|
||||
return 1 + (((R - 1) * Ptbl[i]) >> 16);
|
||||
|
||||
if (i >= half_index)
|
||||
return R - split(max_index - i, R);
|
||||
|
||||
return 1 + (((R - 1) * mantissa(i)) >> exponent(i));
|
||||
}
|
||||
|
||||
uint32 max_range() const
|
||||
{
|
||||
return (1 << w) - (r == down_full ? 0 : 1);
|
||||
}
|
||||
uint32 min_range() const
|
||||
{
|
||||
return (1 << (w - 1)) + (r == down_full ? 1 : 0);
|
||||
}
|
||||
uint32 Rinc() const
|
||||
{
|
||||
return r == Up ? 1 : 0;
|
||||
}
|
||||
|
||||
void check_prec() const;
|
||||
|
||||
bool float_init(uint Ebits, uint Mbits);
|
||||
|
||||
void cost_init();
|
||||
|
||||
bool_coder_spec(
|
||||
uint prec, Rounding rr, uint Ebits = 0, uint Mbits = 0
|
||||
)
|
||||
: w(prec), r(rr)
|
||||
{
|
||||
float_init(Ebits, Mbits);
|
||||
}
|
||||
public:
|
||||
// Read complete spec from file.
|
||||
bool_coder_spec(FILE *);
|
||||
|
||||
// Write spec to file.
|
||||
void dump(FILE *) const;
|
||||
|
||||
// return probability index best approximating prob.
|
||||
Index operator()(double prob) const;
|
||||
|
||||
// probability corresponding to index
|
||||
double operator()(Index i) const;
|
||||
|
||||
Index complement(Index i) const
|
||||
{
|
||||
return max_index - i;
|
||||
}
|
||||
|
||||
Index max_index() const
|
||||
{
|
||||
return max_index;
|
||||
}
|
||||
Index half_index() const
|
||||
{
|
||||
return half_index;
|
||||
}
|
||||
|
||||
uint32 cost_zero(Index i) const
|
||||
{
|
||||
return Ctbl[i];
|
||||
}
|
||||
uint32 cost_one(Index i) const
|
||||
{
|
||||
return Ctbl[ max_index - i];
|
||||
}
|
||||
uint32 cost_bit(Index i, bool b) const
|
||||
{
|
||||
return Ctbl[b? max_index-i:i];
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/* Pseudo floating-point probability specification.
|
||||
|
||||
At least one of Ebits and Mbits must be nonzero.
|
||||
|
||||
Since all arithmetic is done at 32 bits, Ebits is at most 5.
|
||||
|
||||
Total significant bits in index is Ebits + Mbits + 1.
|
||||
|
||||
Below the halfway point (i.e. when the top significant bit is 0),
|
||||
the index is (e << Mbits) + m.
|
||||
|
||||
The exponent e is between 0 and (2**Ebits) - 1,
|
||||
the mantissa m is between 0 and (2**Mbits) - 1.
|
||||
|
||||
Prepending an implicit 1 to the mantissa, the probability is then
|
||||
|
||||
(2**Mbits + m) >> (e - 2**Ebits - 1 - Mbits),
|
||||
|
||||
which has (1/2)**(2**Ebits + 1) as a minimum
|
||||
and (1/2) * [1 - 2**(Mbits + 1)] as a maximum.
|
||||
|
||||
When the index is above the halfway point, the probability is the
|
||||
complement of the probability associated to the complement of the index.
|
||||
|
||||
Note that the probability increases with the index and that, because of
|
||||
the symmetry, we cannot encode probability exactly 1/2; though we
|
||||
can get as close to 1/2 as we like, provided we have enough Mbits.
|
||||
|
||||
The latter is of course not a problem in practice, one never has
|
||||
exact probabilities and entropy errors are second order, that is, the
|
||||
"overcoding" of a zero will be largely compensated for by the
|
||||
"undercoding" of a one (or vice-versa).
|
||||
|
||||
Compared to arithmetic probability specs (a la VP8), this will do better
|
||||
at very high and low probabilities and worse at probabilities near 1/2,
|
||||
as well as facilitating the usage of wider or narrower probability indices.
|
||||
*/
|
||||
|
||||
struct bool_coder_spec_float : bool_coder_spec
|
||||
{
|
||||
bool_coder_spec_float(
|
||||
uint Ebits = 3, uint Mbits = 4, Rounding rr = down_full, uint prec = 12
|
||||
)
|
||||
: bool_coder_spec(prec, rr, Ebits, Mbits)
|
||||
{
|
||||
cost_init();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct bool_coder_spec_explicit_table : bool_coder_spec
|
||||
{
|
||||
bool_coder_spec_explicit_table(
|
||||
cuint16 probability_table[256] = 0, // default is tbl[i] = i << 8.
|
||||
Rounding = down_full,
|
||||
uint precision = 16
|
||||
);
|
||||
};
|
||||
|
||||
// Contruct table via multiplicative interpolation between
|
||||
// p[128] = 1/2 and p[0] = (1/2)^x.
|
||||
// Since we are working with 16-bit precision, x is at most 16.
|
||||
// For probabilities to increase with i, we must have x > 1.
|
||||
// For 0 <= i <= 128, p[i] = (1/2)^{ 1 + [1 - (i/128)]*[x-1] }.
|
||||
// Finally, p[128+i] = 1 - p[128 - i].
|
||||
|
||||
struct bool_coder_spec_exponential_table : bool_coder_spec
|
||||
{
|
||||
bool_coder_spec_exponential_table(uint x, Rounding = down_full, uint prec = 16);
|
||||
};
|
||||
|
||||
|
||||
// Commonalities between writer and reader.
|
||||
|
||||
struct bool_coder : bool_coder_namespace
|
||||
{
|
||||
friend struct bool_writer;
|
||||
friend struct bool_reader;
|
||||
friend struct BPsrc;
|
||||
private:
|
||||
uint32 Low, Range;
|
||||
cuint32 min_range;
|
||||
cuint32 rinc;
|
||||
c_spec spec;
|
||||
|
||||
void _reset()
|
||||
{
|
||||
Low = 0;
|
||||
Range = spec.max_range();
|
||||
}
|
||||
|
||||
bool_coder(c_spec &s)
|
||||
: min_range(s.min_range()),
|
||||
rinc(s.Rinc()),
|
||||
spec(s)
|
||||
{
|
||||
_reset();
|
||||
}
|
||||
|
||||
uint32 half() const
|
||||
{
|
||||
return 1 + ((Range - 1) >> 1);
|
||||
}
|
||||
public:
|
||||
c_spec &Spec() const
|
||||
{
|
||||
return spec;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct bool_writer : bool_coder
|
||||
{
|
||||
friend struct BPsrc;
|
||||
private:
|
||||
uchar *Bstart, *Bend, *B;
|
||||
int bit_lag;
|
||||
bool is_toast;
|
||||
void carry();
|
||||
void reset()
|
||||
{
|
||||
_reset();
|
||||
bit_lag = 32 - spec.w;
|
||||
is_toast = 0;
|
||||
}
|
||||
void raw(bool value, uint32 split);
|
||||
public:
|
||||
bool_writer(c_spec &, uchar *Dest, size_t Len);
|
||||
virtual ~bool_writer();
|
||||
|
||||
void operator()(Index p, bool v)
|
||||
{
|
||||
raw(v, spec.split(p, Range));
|
||||
}
|
||||
|
||||
uchar *buf() const
|
||||
{
|
||||
return Bstart;
|
||||
}
|
||||
size_t bytes_written() const
|
||||
{
|
||||
return B - Bstart;
|
||||
}
|
||||
|
||||
// Call when done with input, flushes internal state.
|
||||
// DO NOT write any more data after calling this.
|
||||
|
||||
bool_writer &flush();
|
||||
|
||||
void write_bits(int n, uint val)
|
||||
{
|
||||
if (n)
|
||||
{
|
||||
uint m = 1 << (n - 1);
|
||||
|
||||
do
|
||||
{
|
||||
raw((bool)(val & m), half());
|
||||
}
|
||||
while (m >>= 1);
|
||||
}
|
||||
}
|
||||
|
||||
# if 0
|
||||
// We are agnostic about storage management.
|
||||
// By default, overflows throw an assert but user can
|
||||
// override to provide an expanding buffer using ...
|
||||
|
||||
virtual void overflow(uint Len) const;
|
||||
|
||||
// ... this function copies already-written data into new buffer
|
||||
// and retains new buffer location.
|
||||
|
||||
void new_buffer(uchar *dest, uint Len);
|
||||
|
||||
// Note that storage management is the user's responsibility.
|
||||
# endif
|
||||
};
|
||||
|
||||
|
||||
// This could be adjusted to use a little less lookahead.
|
||||
|
||||
struct bool_reader : bool_coder
|
||||
{
|
||||
friend struct BPsrc;
|
||||
private:
|
||||
cuchar *const Bstart; // for debugging
|
||||
cuchar *B;
|
||||
cuchar *const Bend;
|
||||
cuint shf;
|
||||
uint bct;
|
||||
bool raw(uint32 split);
|
||||
public:
|
||||
bool_reader(c_spec &s, cuchar *src, size_t Len);
|
||||
|
||||
bool operator()(Index p)
|
||||
{
|
||||
return raw(spec.split(p, Range));
|
||||
}
|
||||
|
||||
uint read_bits(int num_bits)
|
||||
{
|
||||
uint v = 0;
|
||||
|
||||
while (--num_bits >= 0)
|
||||
v += v + (raw(half()) ? 1 : 0);
|
||||
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
extern "C" {
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
|
||||
/* C interface */
|
||||
|
||||
typedef struct bool_coder_spec bool_coder_spec;
|
||||
typedef struct bool_writer bool_writer;
|
||||
typedef struct bool_reader bool_reader;
|
||||
|
||||
typedef const bool_coder_spec c_bool_coder_spec;
|
||||
typedef const bool_writer c_bool_writer;
|
||||
typedef const bool_reader c_bool_reader;
|
||||
|
||||
|
||||
/* Optionally override default precision when constructing coder_specs.
|
||||
Just pass a zero pointer if you don't care.
|
||||
Precision is at most 16 bits for table specs, at most 23 otherwise. */
|
||||
|
||||
struct vp8bc_prec
|
||||
{
|
||||
enum vp8bc_rounding r; /* see top header file for def */
|
||||
unsigned int prec; /* range precision in bits */
|
||||
};
|
||||
|
||||
typedef const struct vp8bc_prec vp8bc_c_prec;
|
||||
|
||||
/* bool_coder_spec contains mapping of uchars to actual probabilities
|
||||
(16 bit uints) as well as (usually immaterial) selection of
|
||||
exact finite-precision algorithm used (for now, the latter can only
|
||||
be overridden using the C++ interface).
|
||||
See comments above the corresponding C++ constructors for discussion,
|
||||
especially of exponential probability table generation. */
|
||||
|
||||
bool_coder_spec *vp8bc_vp8spec(); // just like vp8
|
||||
|
||||
bool_coder_spec *vp8bc_literal_spec(
|
||||
const unsigned short prob_map[256], // 0 is like vp8 w/more precision
|
||||
vp8bc_c_prec*
|
||||
);
|
||||
|
||||
bool_coder_spec *vp8bc_float_spec(
|
||||
unsigned int exponent_bits, unsigned int mantissa_bits, vp8bc_c_prec*
|
||||
);
|
||||
|
||||
bool_coder_spec *vp8bc_exponential_spec(unsigned int min_exp, vp8bc_c_prec *);
|
||||
|
||||
bool_coder_spec *vp8bc_spec_from_file(FILE *);
|
||||
|
||||
|
||||
void vp8bc_destroy_spec(c_bool_coder_spec *);
|
||||
|
||||
void vp8bc_spec_to_file(c_bool_coder_spec *, FILE *);
|
||||
|
||||
|
||||
/* Nearest index to supplied probability of zero, 0 <= prob <= 1. */
|
||||
|
||||
vp8bc_index_t vp8bc_index(c_bool_coder_spec *, double prob);
|
||||
|
||||
vp8bc_index_t vp8bc_index_from_counts(
|
||||
c_bool_coder_spec *p, unsigned int zero_ct, unsigned int one_ct
|
||||
);
|
||||
|
||||
/* In case you want to look */
|
||||
|
||||
double vp8bc_probability(c_bool_coder_spec *, vp8bc_index_t);
|
||||
|
||||
/* Opposite index */
|
||||
|
||||
vp8bc_index_t vp8bc_complement(c_bool_coder_spec *, vp8bc_index_t);
|
||||
|
||||
/* Cost in bits of encoding a zero at given probability, scaled by 2^20.
|
||||
(assumes that an int holds at least 32 bits). */
|
||||
|
||||
unsigned int vp8bc_cost_zero(c_bool_coder_spec *, vp8bc_index_t);
|
||||
|
||||
unsigned int vp8bc_cost_one(c_bool_coder_spec *, vp8bc_index_t);
|
||||
unsigned int vp8bc_cost_bit(c_bool_coder_spec *, vp8bc_index_t, int);
|
||||
|
||||
|
||||
/* bool_writer interface */
|
||||
|
||||
/* Length = 0 disables checking for writes beyond buffer end. */
|
||||
|
||||
bool_writer *vp8bc_create_writer(
|
||||
c_bool_coder_spec *, unsigned char *Destination, size_t Length
|
||||
);
|
||||
|
||||
/* Flushes out any buffered data and returns total # of bytes written. */
|
||||
|
||||
size_t vp8bc_destroy_writer(bool_writer *);
|
||||
|
||||
void vp8bc_write_bool(bool_writer *, int boolean_val, vp8bc_index_t false_prob);
|
||||
|
||||
void vp8bc_write_bits(
|
||||
bool_writer *, unsigned int integer_value, int number_of_bits
|
||||
);
|
||||
|
||||
c_bool_coder_spec *vp8bc_writer_spec(c_bool_writer *);
|
||||
|
||||
|
||||
/* bool_reader interface */
|
||||
|
||||
/* Length = 0 disables checking for reads beyond buffer end. */
|
||||
|
||||
bool_reader *vp8bc_create_reader(
|
||||
c_bool_coder_spec *, const unsigned char *Source, size_t Length
|
||||
);
|
||||
void vp8bc_destroy_reader(bool_reader *);
|
||||
|
||||
int vp8bc_read_bool(bool_reader *, vp8bc_index_t false_prob);
|
||||
|
||||
unsigned int vp8bc_read_bits(bool_reader *, int number_of_bits);
|
||||
|
||||
c_bool_coder_spec *vp8bc_reader_spec(c_bool_reader *);
|
||||
|
||||
#if __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* bool_coder_h */
|
@@ -1,93 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef CODEC_COMMON_INTERFACE_H
|
||||
#define CODEC_COMMON_INTERFACE_H
|
||||
|
||||
#define __export
|
||||
#define _export
|
||||
#define dll_export __declspec( dllexport )
|
||||
#define dll_import __declspec( dllimport )
|
||||
|
||||
// Playback ERROR Codes.
|
||||
#define NO_DECODER_ERROR 0
|
||||
#define REMOTE_DECODER_ERROR -1
|
||||
|
||||
#define DFR_BAD_DCT_COEFF -100
|
||||
#define DFR_ZERO_LENGTH_FRAME -101
|
||||
#define DFR_FRAME_SIZE_INVALID -102
|
||||
#define DFR_OUTPUT_BUFFER_OVERFLOW -103
|
||||
#define DFR_INVALID_FRAME_HEADER -104
|
||||
#define FR_INVALID_MODE_TOKEN -110
|
||||
#define ETR_ALLOCATION_ERROR -200
|
||||
#define ETR_INVALID_ROOT_PTR -201
|
||||
#define SYNCH_ERROR -400
|
||||
#define BUFFER_UNDERFLOW_ERROR -500
|
||||
#define PB_IB_OVERFLOW_ERROR -501
|
||||
|
||||
// External error triggers
|
||||
#define PB_HEADER_CHECKSUM_ERROR -601
|
||||
#define PB_DATA_CHECKSUM_ERROR -602
|
||||
|
||||
// DCT Error Codes
|
||||
#define DDCT_EXPANSION_ERROR -700
|
||||
#define DDCT_INVALID_TOKEN_ERROR -701
|
||||
|
||||
// exception_errors
|
||||
#define GEN_EXCEPTIONS -800
|
||||
#define EX_UNQUAL_ERROR -801
|
||||
|
||||
// Unrecoverable error codes
|
||||
#define FATAL_PLAYBACK_ERROR -1000
|
||||
#define GEN_ERROR_CREATING_CDC -1001
|
||||
#define GEN_THREAD_CREATION_ERROR -1002
|
||||
#define DFR_CREATE_BMP_FAILED -1003
|
||||
|
||||
// YUV buffer configuration structure
|
||||
typedef struct
|
||||
{
|
||||
int y_width;
|
||||
int y_height;
|
||||
int y_stride;
|
||||
|
||||
int uv_width;
|
||||
int uv_height;
|
||||
int uv_stride;
|
||||
|
||||
unsigned char *y_buffer;
|
||||
unsigned char *u_buffer;
|
||||
unsigned char *v_buffer;
|
||||
|
||||
} YUV_BUFFER_CONFIG;
|
||||
typedef enum
|
||||
{
|
||||
C_SET_KEY_FRAME,
|
||||
C_SET_FIXED_Q,
|
||||
C_SET_FIRSTPASS_FILE,
|
||||
C_SET_EXPERIMENTAL_MIN,
|
||||
C_SET_EXPERIMENTAL_MAX = C_SET_EXPERIMENTAL_MIN + 255,
|
||||
C_SET_CHECKPROTECT,
|
||||
C_SET_TESTMODE,
|
||||
C_SET_INTERNAL_SIZE,
|
||||
C_SET_RECOVERY_FRAME,
|
||||
C_SET_REFERENCEFRAME,
|
||||
C_SET_GOLDENFRAME
|
||||
|
||||
#ifndef VP50_COMP_INTERFACE
|
||||
// Specialist test facilities.
|
||||
// C_VCAP_PARAMS, // DO NOT USE FOR NOW WITH VFW CODEC
|
||||
#endif
|
||||
|
||||
} C_SETTING;
|
||||
|
||||
typedef unsigned long C_SET_VALUE;
|
||||
|
||||
|
||||
#endif
|
@@ -36,6 +36,14 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
|
||||
7, 11, 14, 15,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
|
||||
{
|
||||
1, 2, 6, 7,
|
||||
3, 5, 8, 13,
|
||||
4, 9, 12, 14,
|
||||
10, 11, 15, 16
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, short, vp8_default_zig_zag_mask[16]);
|
||||
|
||||
const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6};
|
||||
@@ -106,23 +114,20 @@ static void init_bit_trees()
|
||||
init_bit_tree(cat6, 11);
|
||||
}
|
||||
|
||||
|
||||
static vp8bc_index_t bcc1[1], bcc2[2], bcc3[3], bcc4[4], bcc5[5], bcc6[11];
|
||||
|
||||
vp8_extra_bit_struct vp8_extra_bits[12] =
|
||||
{
|
||||
{ 0, 0, 0, 0, 0},
|
||||
{ 0, 0, 0, 0, 1},
|
||||
{ 0, 0, 0, 0, 2},
|
||||
{ 0, 0, 0, 0, 3},
|
||||
{ 0, 0, 0, 0, 4},
|
||||
{ cat1, Pcat1, bcc1, 1, 5},
|
||||
{ cat2, Pcat2, bcc2, 2, 7},
|
||||
{ cat3, Pcat3, bcc3, 3, 11},
|
||||
{ cat4, Pcat4, bcc4, 4, 19},
|
||||
{ cat5, Pcat5, bcc5, 5, 35},
|
||||
{ cat6, Pcat6, bcc6, 11, 67},
|
||||
{ 0, 0, 0, 0, 0}
|
||||
{ 0, 0, 0, 0},
|
||||
{ 0, 0, 0, 1},
|
||||
{ 0, 0, 0, 2},
|
||||
{ 0, 0, 0, 3},
|
||||
{ 0, 0, 0, 4},
|
||||
{ cat1, Pcat1, 1, 5},
|
||||
{ cat2, Pcat2, 2, 7},
|
||||
{ cat3, Pcat3, 3, 11},
|
||||
{ cat4, Pcat4, 4, 19},
|
||||
{ cat5, Pcat5, 5, 35},
|
||||
{ cat6, Pcat6, 11, 67},
|
||||
{ 0, 0, 0, 0}
|
||||
};
|
||||
#include "defaultcoefcounts.h"
|
||||
|
||||
|
@@ -24,10 +24,10 @@
|
||||
#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
|
||||
#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
|
||||
#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
|
||||
#define DCT_VAL_CATEGORY3 7 /* 11-26 Extra Bits 4+1 */
|
||||
#define DCT_VAL_CATEGORY4 8 /* 11-26 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY5 9 /* 27-58 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY6 10 /* 59+ Extra Bits 11+1 */
|
||||
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
|
||||
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
|
||||
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */
|
||||
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
|
||||
|
||||
#define vp8_coef_tokens 12
|
||||
@@ -42,7 +42,6 @@ typedef struct
|
||||
{
|
||||
vp8_tree_p tree;
|
||||
const vp8_prob *prob;
|
||||
vp8bc_index_t *prob_bc;
|
||||
int Len;
|
||||
int base_val;
|
||||
} vp8_extra_bit_struct;
|
||||
@@ -95,6 +94,7 @@ struct VP8Common;
|
||||
void vp8_default_coef_probs(struct VP8Common *);
|
||||
|
||||
extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]);
|
||||
extern short vp8_default_zig_zag_mask[16];
|
||||
extern const int vp8_mb_feature_data_bits[MB_LVL_MAX];
|
||||
|
||||
|
@@ -18,6 +18,8 @@ enum
|
||||
{
|
||||
mv_max = 1023, /* max absolute value of a MV component */
|
||||
MVvals = (2 * mv_max) + 1, /* # possible values "" */
|
||||
mvfp_max = 255, /* max absolute value of a full pixel MV component */
|
||||
MVfpvals = (2 * mvfp_max) +1, /* # possible full pixel MV values */
|
||||
|
||||
mvlong_width = 10, /* Large MVs have 9 bit magnitudes */
|
||||
mvnum_short = 8, /* magnitudes 0 through 7 */
|
||||
|
@@ -13,10 +13,12 @@
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
|
||||
static void extend_plane_borders
|
||||
static void copy_and_extend_plane
|
||||
(
|
||||
unsigned char *s, /* source */
|
||||
int sp, /* pitch */
|
||||
int sp, /* source pitch */
|
||||
unsigned char *d, /* destination */
|
||||
int dp, /* destination pitch */
|
||||
int h, /* height */
|
||||
int w, /* width */
|
||||
int et, /* extend top border */
|
||||
@@ -25,7 +27,6 @@ static void extend_plane_borders
|
||||
int er /* extend right border */
|
||||
)
|
||||
{
|
||||
|
||||
int i;
|
||||
unsigned char *src_ptr1, *src_ptr2;
|
||||
unsigned char *dest_ptr1, *dest_ptr2;
|
||||
@@ -34,68 +35,73 @@ static void extend_plane_borders
|
||||
/* copy the left and right most columns out */
|
||||
src_ptr1 = s;
|
||||
src_ptr2 = s + w - 1;
|
||||
dest_ptr1 = s - el;
|
||||
dest_ptr2 = s + w;
|
||||
dest_ptr1 = d - el;
|
||||
dest_ptr2 = d + w;
|
||||
|
||||
for (i = 0; i < h - 0 + 1; i++)
|
||||
for (i = 0; i < h; i++)
|
||||
{
|
||||
/* Some linkers will complain if we call vpx_memset with el set to a
|
||||
* constant 0.
|
||||
*/
|
||||
if (el)
|
||||
vpx_memset(dest_ptr1, src_ptr1[0], el);
|
||||
vpx_memset(dest_ptr1, src_ptr1[0], el);
|
||||
vpx_memcpy(dest_ptr1 + el, src_ptr1, w);
|
||||
vpx_memset(dest_ptr2, src_ptr2[0], er);
|
||||
src_ptr1 += sp;
|
||||
src_ptr2 += sp;
|
||||
dest_ptr1 += sp;
|
||||
dest_ptr2 += sp;
|
||||
dest_ptr1 += dp;
|
||||
dest_ptr2 += dp;
|
||||
}
|
||||
|
||||
/* Now copy the top and bottom source lines into each line of the respective borders */
|
||||
src_ptr1 = s - el;
|
||||
src_ptr2 = s + sp * (h - 1) - el;
|
||||
dest_ptr1 = s + sp * (-et) - el;
|
||||
dest_ptr2 = s + sp * (h) - el;
|
||||
linesize = el + er + w + 1;
|
||||
/* Now copy the top and bottom lines into each line of the respective
|
||||
* borders
|
||||
*/
|
||||
src_ptr1 = d - el;
|
||||
src_ptr2 = d + dp * (h - 1) - el;
|
||||
dest_ptr1 = d + dp * (-et) - el;
|
||||
dest_ptr2 = d + dp * (h) - el;
|
||||
linesize = el + er + w;
|
||||
|
||||
for (i = 0; i < (int)et; i++)
|
||||
for (i = 0; i < et; i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr1, src_ptr1, linesize);
|
||||
dest_ptr1 += sp;
|
||||
dest_ptr1 += dp;
|
||||
}
|
||||
|
||||
for (i = 0; i < (int)eb; i++)
|
||||
for (i = 0; i < eb; i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr2, src_ptr2, linesize);
|
||||
dest_ptr2 += sp;
|
||||
dest_ptr2 += dp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
|
||||
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst)
|
||||
{
|
||||
int er = 0xf & (16 - (width & 0xf));
|
||||
int eb = 0xf & (16 - (height & 0xf));
|
||||
int et = dst->border;
|
||||
int el = dst->border;
|
||||
int eb = dst->border + dst->y_height - src->y_height;
|
||||
int er = dst->border + dst->y_width - src->y_width;
|
||||
|
||||
/* check for non multiples of 16 */
|
||||
if (er != 0 || eb != 0)
|
||||
{
|
||||
extend_plane_borders(ybf->y_buffer, ybf->y_stride, height, width, 0, 0, eb, er);
|
||||
copy_and_extend_plane(src->y_buffer, src->y_stride,
|
||||
dst->y_buffer, dst->y_stride,
|
||||
src->y_height, src->y_width,
|
||||
et, el, eb, er);
|
||||
|
||||
/* adjust for uv */
|
||||
height = (height + 1) >> 1;
|
||||
width = (width + 1) >> 1;
|
||||
er = 0x7 & (8 - (width & 0x7));
|
||||
eb = 0x7 & (8 - (height & 0x7));
|
||||
et = (et + 1) >> 1;
|
||||
el = (el + 1) >> 1;
|
||||
eb = (eb + 1) >> 1;
|
||||
er = (er + 1) >> 1;
|
||||
|
||||
if (er || eb)
|
||||
{
|
||||
extend_plane_borders(ybf->u_buffer, ybf->uv_stride, height, width, 0, 0, eb, er);
|
||||
extend_plane_borders(ybf->v_buffer, ybf->uv_stride, height, width, 0, 0, eb, er);
|
||||
}
|
||||
}
|
||||
copy_and_extend_plane(src->u_buffer, src->uv_stride,
|
||||
dst->u_buffer, dst->uv_stride,
|
||||
src->uv_height, src->uv_width,
|
||||
et, el, eb, er);
|
||||
|
||||
copy_and_extend_plane(src->v_buffer, src->uv_stride,
|
||||
dst->v_buffer, dst->uv_stride,
|
||||
src->uv_height, src->uv_width,
|
||||
et, el, eb, er);
|
||||
}
|
||||
|
||||
|
||||
/* note the extension is only for the last row, for intra prediction purpose */
|
||||
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr)
|
||||
{
|
||||
|
@@ -14,8 +14,8 @@
|
||||
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
||||
void Extend(YV12_BUFFER_CONFIG *ybf);
|
||||
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr);
|
||||
void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height);
|
||||
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst);
|
||||
|
||||
#endif
|
||||
|
536
vp8/common/filter.c
Normal file
536
vp8/common/filter.c
Normal file
@@ -0,0 +1,536 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define REGISTER_FILTER 1
|
||||
#define CLAMP(x,min,max) if (x < min) x = min; else if ( x > max ) x = max;
|
||||
|
||||
#if REGISTER_FILTER
|
||||
#define FILTER0 filter0
|
||||
#define FILTER1 filter1
|
||||
#define FILTER2 filter2
|
||||
#define FILTER3 filter3
|
||||
#define FILTER4 filter4
|
||||
#define FILTER5 filter5
|
||||
#else
|
||||
#define FILTER0 vp8_filter[0]
|
||||
#define FILTER1 vp8_filter[1]
|
||||
#define FILTER2 vp8_filter[2]
|
||||
#define FILTER3 vp8_filter[3]
|
||||
#define FILTER4 vp8_filter[4]
|
||||
#define FILTER5 vp8_filter[5]
|
||||
#endif
|
||||
|
||||
#define SRC_INCREMENT src_increment
|
||||
|
||||
#include "filter.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
|
||||
{
|
||||
{ 128, 0 },
|
||||
{ 112, 16 },
|
||||
{ 96, 32 },
|
||||
{ 80, 48 },
|
||||
{ 64, 64 },
|
||||
{ 48, 80 },
|
||||
{ 32, 96 },
|
||||
{ 16, 112 }
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
|
||||
{
|
||||
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
|
||||
{ 0, -6, 123, 12, -1, 0 },
|
||||
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -9, 93, 50, -6, 0 },
|
||||
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
|
||||
{ 0, -6, 50, 93, -9, 0 },
|
||||
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -1, 12, 123, -6, 0 },
|
||||
};
|
||||
|
||||
static void filter_block2d_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
#if REGISTER_FILTER
|
||||
short filter0 = vp8_filter[0];
|
||||
short filter1 = vp8_filter[1];
|
||||
short filter2 = vp8_filter[2];
|
||||
short filter3 = vp8_filter[3];
|
||||
short filter4 = vp8_filter[4];
|
||||
short filter5 = vp8_filter[5];
|
||||
#endif
|
||||
|
||||
int ps2 = 2*(int)pixel_step;
|
||||
int ps3 = 3*(int)pixel_step;
|
||||
|
||||
unsigned int src_increment = src_pixels_per_line - output_width;
|
||||
for (i = 0; i < output_height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
Temp = ((int)src_ptr[-1*ps2] * FILTER0);
|
||||
Temp += ((int)src_ptr[-1*(int)pixel_step] * FILTER1) +
|
||||
((int)src_ptr[0] * FILTER2) +
|
||||
((int)src_ptr[pixel_step] * FILTER3) +
|
||||
((int)src_ptr[ps2] * FILTER4) +
|
||||
((int)src_ptr[ps3] * FILTER5) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
CLAMP(Temp, 0, 255);
|
||||
|
||||
output_ptr[j] = Temp;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += SRC_INCREMENT;
|
||||
output_ptr += output_width;
|
||||
}
|
||||
}
|
||||
|
||||
static void filter_block2d_second_pass
|
||||
(
|
||||
int *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
#if REGISTER_FILTER
|
||||
short filter0 = vp8_filter[0];
|
||||
short filter1 = vp8_filter[1];
|
||||
short filter2 = vp8_filter[2];
|
||||
short filter3 = vp8_filter[3];
|
||||
short filter4 = vp8_filter[4];
|
||||
short filter5 = vp8_filter[5];
|
||||
#endif
|
||||
|
||||
int ps2 = ((int)pixel_step) << 1;
|
||||
int ps3 = ps2 + (int)pixel_step;
|
||||
unsigned int src_increment = src_pixels_per_line - output_width;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[-1*ps2] * FILTER0) +
|
||||
((int)src_ptr[-1*(int)pixel_step] * FILTER1) +
|
||||
((int)src_ptr[0] * FILTER2) +
|
||||
((int)src_ptr[pixel_step] * FILTER3) +
|
||||
((int)src_ptr[ps2] * FILTER4) +
|
||||
((int)src_ptr[ps3] * FILTER5) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
CLAMP(Temp, 0, 255);
|
||||
|
||||
output_ptr[j] = (unsigned char)Temp;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Start next row */
|
||||
src_ptr += src_increment;
|
||||
output_ptr += output_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void filter_block2d
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
int output_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter
|
||||
)
|
||||
{
|
||||
int FData[9*4]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
|
||||
}
|
||||
|
||||
|
||||
void vp8_sixtap_predict_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict8x8_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
|
||||
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
|
||||
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict8x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
|
||||
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict16x16_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[21*24]; /* Temp data buffer used in filtering */
|
||||
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_first_pass
|
||||
*
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_stride : Stride of source block.
|
||||
* UINT32 height : Block height.
|
||||
* UINT32 width : Block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
*
|
||||
* OUTPUTS : INT32 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
|
||||
* in the horizontal direction to produce the filtered output
|
||||
* block. Used to implement first-pass of 2-D separable filter.
|
||||
*
|
||||
* SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
*
|
||||
****************************************************************************/
|
||||
static void filter_block2d_bil_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *dst_ptr,
|
||||
unsigned int src_stride,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < height; i++)
|
||||
{
|
||||
for (j = 0; j < width; j++)
|
||||
{
|
||||
/* Apply bilinear filter */
|
||||
dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[1] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_stride - width;
|
||||
dst_ptr += width;
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_second_pass
|
||||
*
|
||||
* INPUTS : INT32 *src_ptr : Pointer to source block.
|
||||
* UINT32 dst_pitch : Destination block pitch.
|
||||
* UINT32 height : Block height.
|
||||
* UINT32 width : Block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
*
|
||||
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
|
||||
* in the vertical direction to produce the filtered output
|
||||
* block. Used to implement second-pass of 2-D separable filter.
|
||||
*
|
||||
* SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
*
|
||||
****************************************************************************/
|
||||
static void filter_block2d_bil_second_pass
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < height; i++)
|
||||
{
|
||||
for (j = 0; j < width; j++)
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[width] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2);
|
||||
dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
dst_ptr += dst_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil
|
||||
*
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_pitch : Stride of source block.
|
||||
* UINT32 dst_pitch : Stride of destination block.
|
||||
* INT32 *HFilter : Array of 2 horizontal filter taps.
|
||||
* INT32 *VFilter : Array of 2 vertical filter taps.
|
||||
* INT32 Width : Block width
|
||||
* INT32 Height : Block height
|
||||
*
|
||||
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : 2-D filters an input block by applying a 2-tap
|
||||
* bi-linear filter horizontally followed by a 2-tap
|
||||
* bi-linear filter vertically on the result.
|
||||
*
|
||||
* SPECIAL NOTES : The largest block size can be handled here is 16x16
|
||||
*
|
||||
****************************************************************************/
|
||||
static void filter_block2d_bil
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int dst_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
int Width,
|
||||
int Height
|
||||
)
|
||||
{
|
||||
|
||||
unsigned short FData[17*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
|
||||
}
|
||||
|
||||
|
||||
void vp8_bilinear_predict4x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
#if 0
|
||||
{
|
||||
int i;
|
||||
unsigned char temp1[16];
|
||||
unsigned char temp2[16];
|
||||
|
||||
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
|
||||
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
if (temp1[i] != temp2[i])
|
||||
{
|
||||
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
|
||||
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
}
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
@@ -9,10 +9,14 @@
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __PREDICTDC_H
|
||||
#define __PREDICTDC_H
|
||||
#ifndef FILTER_H
|
||||
#define FILTER_H
|
||||
|
||||
void uvvp8_predict_dc(short *lastdc, short *thisdc, short quant, short *cons);
|
||||
void vp8_predict_dc(short *lastdc, short *thisdc, short quant, short *cons);
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
#endif
|
||||
extern const short vp8_bilinear_filters[8][2];
|
||||
extern const short vp8_sub_pel_filters[8][6];
|
||||
|
||||
#endif //FILTER_H
|
@@ -1,540 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
|
||||
static const int bilinear_filters[8][2] =
|
||||
{
|
||||
{ 128, 0 },
|
||||
{ 112, 16 },
|
||||
{ 96, 32 },
|
||||
{ 80, 48 },
|
||||
{ 64, 64 },
|
||||
{ 48, 80 },
|
||||
{ 32, 96 },
|
||||
{ 16, 112 }
|
||||
};
|
||||
|
||||
|
||||
static const short sub_pel_filters[8][6] =
|
||||
{
|
||||
|
||||
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
|
||||
{ 0, -6, 123, 12, -1, 0 },
|
||||
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -9, 93, 50, -6, 0 },
|
||||
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
|
||||
{ 0, -6, 50, 93, -9, 0 },
|
||||
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -1, 12, 123, -6, 0 },
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
void vp8_filter_block2d_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
|
||||
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
|
||||
((int)src_ptr[0] * vp8_filter[2]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[3]) +
|
||||
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
|
||||
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if (Temp > 255)
|
||||
Temp = 255;
|
||||
|
||||
output_ptr[j] = Temp;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_filter_block2d_second_pass
|
||||
(
|
||||
int *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
|
||||
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
|
||||
((int)src_ptr[0] * vp8_filter[2]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[3]) +
|
||||
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
|
||||
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if (Temp > 255)
|
||||
Temp = 255;
|
||||
|
||||
output_ptr[j] = (unsigned char)Temp;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Start next row */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_filter_block2d
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
int output_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter
|
||||
)
|
||||
{
|
||||
int FData[9*4]; /* Temp data bufffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
|
||||
}
|
||||
|
||||
|
||||
void vp8_block_variation_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int *HVar,
|
||||
int *VVar
|
||||
)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char *Ptr = src_ptr;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
*HVar += abs((int)Ptr[j] - (int)Ptr[j+1]);
|
||||
*VVar += abs((int)Ptr[j] - (int)Ptr[j+src_pixels_per_line]);
|
||||
}
|
||||
|
||||
Ptr += src_pixels_per_line;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void vp8_sixtap_predict_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
|
||||
}
|
||||
void vp8_sixtap_predict8x8_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; /* Temp data bufffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
|
||||
|
||||
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
|
||||
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict8x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; /* Temp data bufffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
|
||||
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
|
||||
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict16x16_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[21*24]; /* Temp data bufffer used in filtering */
|
||||
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_first_pass
|
||||
*
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_pixels_per_line : Stride of input block.
|
||||
* UINT32 pixel_step : Offset between filter input samples (see notes).
|
||||
* UINT32 output_height : Input block height.
|
||||
* UINT32 output_width : Input block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
*
|
||||
* OUTPUTS : INT32 *output_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
|
||||
* either horizontal or vertical direction to produce the
|
||||
* filtered output block. Used to implement first-pass
|
||||
* of 2-D separable filter.
|
||||
*
|
||||
* SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
* pixel_step defines whether the filter is applied
|
||||
* horizontally (pixel_step=1) or vertically (pixel_step=stride).
|
||||
* It defines the offset required to move from one input
|
||||
* to the next.
|
||||
*
|
||||
****************************************************************************/
|
||||
void vp8_filter_block2d_bil_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const int *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
/* Apply bilinear filter */
|
||||
output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_second_pass
|
||||
*
|
||||
* INPUTS : INT32 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_pixels_per_line : Stride of input block.
|
||||
* UINT32 pixel_step : Offset between filter input samples (see notes).
|
||||
* UINT32 output_height : Input block height.
|
||||
* UINT32 output_width : Input block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
*
|
||||
* OUTPUTS : UINT16 *output_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
|
||||
* either horizontal or vertical direction to produce the
|
||||
* filtered output block. Used to implement second-pass
|
||||
* of 2-D separable filter.
|
||||
*
|
||||
* SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
* pixel_step defines whether the filter is applied
|
||||
* horizontally (pixel_step=1) or vertically (pixel_step=stride).
|
||||
* It defines the offset required to move from one input
|
||||
* to the next.
|
||||
*
|
||||
****************************************************************************/
|
||||
void vp8_filter_block2d_bil_second_pass
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const int *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2);
|
||||
output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil
|
||||
*
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_pixels_per_line : Stride of input block.
|
||||
* INT32 *HFilter : Array of 2 horizontal filter taps.
|
||||
* INT32 *VFilter : Array of 2 vertical filter taps.
|
||||
*
|
||||
* OUTPUTS : UINT16 *output_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : 2-D filters an input block by applying a 2-tap
|
||||
* bi-linear filter horizontally followed by a 2-tap
|
||||
* bi-linear filter vertically on the result.
|
||||
*
|
||||
* SPECIAL NOTES : The largest block size can be handled here is 16x16
|
||||
*
|
||||
****************************************************************************/
|
||||
void vp8_filter_block2d_bil
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int dst_pitch,
|
||||
const int *HFilter,
|
||||
const int *VFilter,
|
||||
int Width,
|
||||
int Height
|
||||
)
|
||||
{
|
||||
|
||||
unsigned short FData[17*16]; /* Temp data bufffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, Height + 1, Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass(FData, output_ptr, dst_pitch, Width, Width, Height, Width, VFilter);
|
||||
}
|
||||
|
||||
|
||||
void vp8_bilinear_predict4x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const int *HFilter;
|
||||
const int *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
#if 0
|
||||
{
|
||||
int i;
|
||||
unsigned char temp1[16];
|
||||
unsigned char temp2[16];
|
||||
|
||||
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
|
||||
vp8_filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
if (temp1[i] != temp2[i])
|
||||
{
|
||||
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
|
||||
vp8_filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const int *HFilter;
|
||||
const int *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const int *HFilter;
|
||||
const int *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const int *HFilter;
|
||||
const int *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
}
|
@@ -11,47 +11,16 @@
|
||||
|
||||
#include "findnearmv.h"
|
||||
|
||||
#define FINDNEAR_SEARCH_SITES 3
|
||||
const unsigned char vp8_mbsplit_offset[4][16] = {
|
||||
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
|
||||
};
|
||||
|
||||
/* Predict motion vectors using those from already-decoded nearby blocks.
|
||||
Note that we only consider one 4x4 subblock from each candidate 16x16
|
||||
macroblock. */
|
||||
|
||||
typedef union
|
||||
{
|
||||
unsigned int as_int;
|
||||
MV as_mv;
|
||||
} int_mv; /* facilitates rapid equality tests */
|
||||
|
||||
static void mv_bias(const MODE_INFO *x, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
|
||||
{
|
||||
MV xmv;
|
||||
xmv = x->mbmi.mv.as_mv;
|
||||
|
||||
if (ref_frame_sign_bias[x->mbmi.ref_frame] != ref_frame_sign_bias[refframe])
|
||||
{
|
||||
xmv.row *= -1;
|
||||
xmv.col *= -1;
|
||||
}
|
||||
|
||||
mvp->as_mv = xmv;
|
||||
}
|
||||
|
||||
|
||||
void vp8_clamp_mv(MV *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
|
||||
mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
|
||||
|
||||
if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
|
||||
mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
|
||||
}
|
||||
|
||||
|
||||
void vp8_find_near_mvs
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
@@ -82,7 +51,7 @@ void vp8_find_near_mvs
|
||||
if (above->mbmi.mv.as_int)
|
||||
{
|
||||
(++mv)->as_int = above->mbmi.mv.as_int;
|
||||
mv_bias(above, refframe, mv, ref_frame_sign_bias);
|
||||
mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias);
|
||||
++cntx;
|
||||
}
|
||||
|
||||
@@ -97,7 +66,7 @@ void vp8_find_near_mvs
|
||||
int_mv this_mv;
|
||||
|
||||
this_mv.as_int = left->mbmi.mv.as_int;
|
||||
mv_bias(left, refframe, &this_mv, ref_frame_sign_bias);
|
||||
mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
|
||||
|
||||
if (this_mv.as_int != mv->as_int)
|
||||
{
|
||||
@@ -119,7 +88,7 @@ void vp8_find_near_mvs
|
||||
int_mv this_mv;
|
||||
|
||||
this_mv.as_int = aboveleft->mbmi.mv.as_int;
|
||||
mv_bias(aboveleft, refframe, &this_mv, ref_frame_sign_bias);
|
||||
mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
|
||||
|
||||
if (this_mv.as_int != mv->as_int)
|
||||
{
|
||||
|
@@ -17,6 +17,41 @@
|
||||
#include "modecont.h"
|
||||
#include "treecoder.h"
|
||||
|
||||
typedef union
|
||||
{
|
||||
unsigned int as_int;
|
||||
MV as_mv;
|
||||
} int_mv; /* facilitates rapid equality tests */
|
||||
|
||||
static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
|
||||
{
|
||||
MV xmv;
|
||||
xmv = mvp->as_mv;
|
||||
|
||||
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
|
||||
{
|
||||
xmv.row *= -1;
|
||||
xmv.col *= -1;
|
||||
}
|
||||
|
||||
mvp->as_mv = xmv;
|
||||
}
|
||||
|
||||
#define LEFT_TOP_MARGIN (16 << 3)
|
||||
#define RIGHT_BOTTOM_MARGIN (16 << 3)
|
||||
static void vp8_clamp_mv(MV *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
|
||||
mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
|
||||
|
||||
if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
|
||||
mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
|
||||
}
|
||||
|
||||
void vp8_find_near_mvs
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
@@ -35,8 +70,6 @@ const B_MODE_INFO *vp8_left_bmi(const MODE_INFO *cur_mb, int b);
|
||||
|
||||
const B_MODE_INFO *vp8_above_bmi(const MODE_INFO *cur_mb, int b, int mi_stride);
|
||||
|
||||
#define LEFT_TOP_MARGIN (16 << 3)
|
||||
#define RIGHT_BOTTOM_MARGIN (16 << 3)
|
||||
|
||||
extern const unsigned char vp8_mbsplit_offset[4][16];
|
||||
|
||||
#endif
|
||||
|
@@ -1,121 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef FOURCC_HPP
|
||||
#define FOURCC_HPP
|
||||
|
||||
#include <iosfwd>
|
||||
#include <cstring>
|
||||
|
||||
|
||||
#if defined(__POWERPC__) || defined(__APPLE__) || defined(__MERKS__)
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
class four_cc
|
||||
{
|
||||
public:
|
||||
|
||||
four_cc();
|
||||
four_cc(const char*);
|
||||
explicit four_cc(unsigned long);
|
||||
|
||||
bool operator==(const four_cc&) const;
|
||||
bool operator!=(const four_cc&) const;
|
||||
|
||||
bool operator==(const char*) const;
|
||||
bool operator!=(const char*) const;
|
||||
|
||||
operator unsigned long() const;
|
||||
unsigned long as_long() const;
|
||||
|
||||
four_cc& operator=(unsigned long);
|
||||
|
||||
char operator[](int) const;
|
||||
|
||||
std::ostream& put(std::ostream&) const;
|
||||
|
||||
bool printable() const;
|
||||
|
||||
private:
|
||||
|
||||
union
|
||||
{
|
||||
char code[4];
|
||||
unsigned long code_as_long;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
|
||||
inline four_cc::four_cc()
|
||||
{
|
||||
}
|
||||
|
||||
inline four_cc::four_cc(unsigned long x)
|
||||
: code_as_long(x)
|
||||
{
|
||||
}
|
||||
|
||||
inline four_cc::four_cc(const char* str)
|
||||
{
|
||||
memcpy(code, str, 4);
|
||||
}
|
||||
|
||||
|
||||
inline bool four_cc::operator==(const four_cc& rhs) const
|
||||
{
|
||||
return code_as_long == rhs.code_as_long;
|
||||
}
|
||||
|
||||
inline bool four_cc::operator!=(const four_cc& rhs) const
|
||||
{
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
inline bool four_cc::operator==(const char* rhs) const
|
||||
{
|
||||
return (memcmp(code, rhs, 4) == 0);
|
||||
}
|
||||
|
||||
inline bool four_cc::operator!=(const char* rhs) const
|
||||
{
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
|
||||
inline four_cc::operator unsigned long() const
|
||||
{
|
||||
return code_as_long;
|
||||
}
|
||||
|
||||
inline unsigned long four_cc::as_long() const
|
||||
{
|
||||
return code_as_long;
|
||||
}
|
||||
|
||||
inline char four_cc::operator[](int i) const
|
||||
{
|
||||
return code[i];
|
||||
}
|
||||
|
||||
inline four_cc& four_cc::operator=(unsigned long val)
|
||||
{
|
||||
code_as_long = val;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const four_cc& rhs)
|
||||
{
|
||||
return rhs.put(os);
|
||||
}
|
||||
|
||||
#endif
|
@@ -10,21 +10,16 @@
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "g_common.h"
|
||||
#include "subpixel.h"
|
||||
#include "loopfilter.h"
|
||||
#include "recon.h"
|
||||
#include "idct.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "vp8/common/g_common.h"
|
||||
#include "vp8/common/subpixel.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#include "vp8/common/recon.h"
|
||||
#include "vp8/common/idct.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
|
||||
extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
|
||||
extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);
|
||||
|
||||
void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
|
||||
|
||||
void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
|
||||
extern void vp8_arch_opencl_common_init(VP8_COMMON *ctx);
|
||||
|
||||
void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
{
|
||||
@@ -45,6 +40,10 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
rtcd->recon.recon4 = vp8_recon4b_c;
|
||||
rtcd->recon.recon_mb = vp8_recon_mb_c;
|
||||
rtcd->recon.recon_mby = vp8_recon_mby_c;
|
||||
rtcd->recon.build_intra_predictors_mby =
|
||||
vp8_build_intra_predictors_mby;
|
||||
rtcd->recon.build_intra_predictors_mby_s =
|
||||
vp8_build_intra_predictors_mby_s;
|
||||
|
||||
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_c;
|
||||
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_c;
|
||||
@@ -65,17 +64,16 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c;
|
||||
|
||||
#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
|
||||
rtcd->postproc.down = vp8_mbpost_proc_down_c;
|
||||
rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
|
||||
rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
|
||||
rtcd->postproc.addnoise = vp8_plane_add_noise_c;
|
||||
rtcd->postproc.blend_mb = vp8_blend_mb_c;
|
||||
rtcd->postproc.down = vp8_mbpost_proc_down_c;
|
||||
rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
|
||||
rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
|
||||
rtcd->postproc.addnoise = vp8_plane_add_noise_c;
|
||||
rtcd->postproc.blend_mb_inner = vp8_blend_mb_inner_c;
|
||||
rtcd->postproc.blend_mb_outer = vp8_blend_mb_outer_c;
|
||||
rtcd->postproc.blend_b = vp8_blend_b_c;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
/* Pure C: */
|
||||
vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
|
||||
vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
vp8_arch_x86_common_init(ctx);
|
||||
@@ -85,4 +83,8 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
vp8_arch_arm_common_init(ctx);
|
||||
#endif
|
||||
|
||||
#if CONFIG_OPENCL && (ENABLE_CL_IDCT_DEQUANT || ENABLE_CL_SUBPIXEL || ENABLE_CL_LOOPFILTER)
|
||||
vp8_arch_opencl_common_init(ctx);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
@@ -31,6 +31,10 @@
|
||||
#include "arm/idct_arm.h"
|
||||
#endif
|
||||
|
||||
#if CONFIG_OPENCL
|
||||
#include "opencl/idct_cl.h"
|
||||
#endif
|
||||
|
||||
#ifndef vp8_idct_idct1
|
||||
#define vp8_idct_idct1 vp8_short_idct4x4llm_1_c
|
||||
#endif
|
||||
|
@@ -13,6 +13,10 @@
|
||||
#include "loopfilter.h"
|
||||
#include "onyxc_int.h"
|
||||
|
||||
#if CONFIG_OPENCL
|
||||
#include "opencl/loopfilter_cl.h"
|
||||
#endif
|
||||
|
||||
typedef unsigned char uc;
|
||||
|
||||
|
||||
@@ -28,13 +32,13 @@ void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -44,7 +48,7 @@ void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
@@ -52,13 +56,13 @@ void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -68,7 +72,7 @@ void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
@@ -81,10 +85,10 @@ void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned c
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -109,10 +113,10 @@ void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned c
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -137,8 +141,6 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
|
||||
|
||||
int block_inside_limit = 0;
|
||||
int HEVThresh;
|
||||
const int yhedge_boost = 2;
|
||||
const int uvhedge_boost = 2;
|
||||
|
||||
/* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
|
||||
for (i = 0; i <= MAX_LOOP_FILTER; i++)
|
||||
@@ -182,15 +184,9 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
lfi[i].lim[j] = block_inside_limit;
|
||||
lfi[i].mbflim[j] = filt_lvl + yhedge_boost;
|
||||
lfi[i].mbthr[j] = HEVThresh;
|
||||
lfi[i].mbflim[j] = filt_lvl + 2;
|
||||
lfi[i].flim[j] = filt_lvl;
|
||||
lfi[i].thr[j] = HEVThresh;
|
||||
lfi[i].uvlim[j] = block_inside_limit;
|
||||
lfi[i].uvmbflim[j] = filt_lvl + uvhedge_boost;
|
||||
lfi[i].uvmbthr[j] = HEVThresh;
|
||||
lfi[i].uvflim[j] = filt_lvl;
|
||||
lfi[i].uvthr[j] = HEVThresh;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -249,57 +245,52 @@ void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
/*lfi[i].lim[j] = block_inside_limit;
|
||||
lfi[i].mbflim[j] = filt_lvl+yhedge_boost;*/
|
||||
lfi[i].mbthr[j] = HEVThresh;
|
||||
lfi[i].mbflim[j] = filt_lvl+2;*/
|
||||
/*lfi[i].flim[j] = filt_lvl;*/
|
||||
lfi[i].thr[j] = HEVThresh;
|
||||
/*lfi[i].uvlim[j] = block_inside_limit;
|
||||
lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;*/
|
||||
lfi[i].uvmbthr[j] = HEVThresh;
|
||||
/*lfi[i].uvflim[j] = filt_lvl;*/
|
||||
lfi[i].uvthr[j] = HEVThresh;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level)
|
||||
int vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int filter_level)
|
||||
{
|
||||
MB_MODE_INFO *mbmi = &mbd->mode_info_context->mbmi;
|
||||
|
||||
if (mbd->mode_ref_lf_delta_enabled)
|
||||
{
|
||||
/* Apply delta for reference frame */
|
||||
*filter_level += mbd->ref_lf_deltas[mbmi->ref_frame];
|
||||
filter_level += mbd->ref_lf_deltas[mbmi->ref_frame];
|
||||
|
||||
/* Apply delta for mode */
|
||||
if (mbmi->ref_frame == INTRA_FRAME)
|
||||
{
|
||||
/* Only the split mode BPRED has a further special case */
|
||||
if (mbmi->mode == B_PRED)
|
||||
*filter_level += mbd->mode_lf_deltas[0];
|
||||
filter_level += mbd->mode_lf_deltas[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Zero motion mode */
|
||||
if (mbmi->mode == ZEROMV)
|
||||
*filter_level += mbd->mode_lf_deltas[1];
|
||||
filter_level += mbd->mode_lf_deltas[1];
|
||||
|
||||
/* Split MB motion mode */
|
||||
else if (mbmi->mode == SPLITMV)
|
||||
*filter_level += mbd->mode_lf_deltas[3];
|
||||
filter_level += mbd->mode_lf_deltas[3];
|
||||
|
||||
/* All other inter motion modes (Nearest, Near, New) */
|
||||
else
|
||||
*filter_level += mbd->mode_lf_deltas[2];
|
||||
filter_level += mbd->mode_lf_deltas[2];
|
||||
}
|
||||
|
||||
/* Range check */
|
||||
if (*filter_level > MAX_LOOP_FILTER)
|
||||
*filter_level = MAX_LOOP_FILTER;
|
||||
else if (*filter_level < 0)
|
||||
*filter_level = 0;
|
||||
if (filter_level > MAX_LOOP_FILTER)
|
||||
filter_level = MAX_LOOP_FILTER;
|
||||
else if (filter_level < 0)
|
||||
filter_level = 0;
|
||||
}
|
||||
return filter_level;
|
||||
}
|
||||
|
||||
|
||||
@@ -325,6 +316,13 @@ void vp8_loop_filter_frame
|
||||
int i;
|
||||
unsigned char *y_ptr, *u_ptr, *v_ptr;
|
||||
|
||||
#if CONFIG_OPENCL && ENABLE_CL_LOOPFILTER
|
||||
if ( cl_initialized == CL_SUCCESS ){
|
||||
vp8_loop_filter_frame_cl(cm,mbd,default_filt_lvl);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */
|
||||
|
||||
/* Note the baseline filter values for each segment */
|
||||
@@ -373,7 +371,7 @@ void vp8_loop_filter_frame
|
||||
* These specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
* Apply any context driven MB level adjustment
|
||||
*/
|
||||
vp8_adjust_mb_lf_value(mbd, &filter_level);
|
||||
filter_level = vp8_adjust_mb_lf_value(mbd, filter_level);
|
||||
|
||||
if (filter_level)
|
||||
{
|
||||
@@ -407,6 +405,7 @@ void vp8_loop_filter_frame
|
||||
}
|
||||
|
||||
|
||||
/* Encoder only... */
|
||||
void vp8_loop_filter_frame_yonly
|
||||
(
|
||||
VP8_COMMON *cm,
|
||||
@@ -473,7 +472,7 @@ void vp8_loop_filter_frame_yonly
|
||||
filter_level = baseline_filter_level[Segment];
|
||||
|
||||
/* Apply any context driven MB level adjustment */
|
||||
vp8_adjust_mb_lf_value(mbd, &filter_level);
|
||||
filter_level = vp8_adjust_mb_lf_value(mbd, filter_level);
|
||||
|
||||
if (filter_level)
|
||||
{
|
||||
@@ -502,7 +501,7 @@ void vp8_loop_filter_frame_yonly
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Encoder only... */
|
||||
void vp8_loop_filter_partial_frame
|
||||
(
|
||||
VP8_COMMON *cm,
|
||||
|
@@ -32,12 +32,6 @@ typedef struct
|
||||
DECLARE_ALIGNED(16, signed char, flim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, thr[16]);
|
||||
DECLARE_ALIGNED(16, signed char, mbflim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, mbthr[16]);
|
||||
DECLARE_ALIGNED(16, signed char, uvlim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, uvflim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, uvthr[16]);
|
||||
DECLARE_ALIGNED(16, signed char, uvmbflim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, uvmbthr[16]);
|
||||
} loop_filter_info;
|
||||
|
||||
|
||||
|
@@ -49,7 +49,6 @@ static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0,
|
||||
}
|
||||
|
||||
static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
|
||||
|
||||
{
|
||||
signed char ps0, qs0;
|
||||
signed char ps1, qs1;
|
||||
@@ -94,6 +93,7 @@ static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *
|
||||
*op1 = u ^ 0x80;
|
||||
|
||||
}
|
||||
|
||||
void vp8_loop_filter_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
|
@@ -11,16 +11,21 @@
|
||||
|
||||
#include "blockd.h"
|
||||
|
||||
#include "stdio.h"
|
||||
#include "vpx_config.h"
|
||||
#if CONFIG_OPENCL
|
||||
#include "opencl/vp8_opencl.h"
|
||||
#endif
|
||||
|
||||
typedef enum
|
||||
{
|
||||
PRED = 0,
|
||||
DEST = 1
|
||||
} BLOCKSET;
|
||||
|
||||
void vp8_setup_block
|
||||
static void setup_block
|
||||
(
|
||||
BLOCKD *b,
|
||||
int mv_stride,
|
||||
unsigned char **base,
|
||||
int Stride,
|
||||
int offset,
|
||||
@@ -43,87 +48,183 @@ void vp8_setup_block
|
||||
|
||||
}
|
||||
|
||||
void vp8_setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
|
||||
|
||||
static void setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
|
||||
{
|
||||
int block;
|
||||
|
||||
unsigned char **y, **u, **v;
|
||||
unsigned char **buf_base;
|
||||
int y_off, u_off, v_off;
|
||||
|
||||
if (bs == DEST)
|
||||
{
|
||||
buf_base = &x->dst.buffer_alloc;
|
||||
y_off = x->dst.y_buffer - x->dst.buffer_alloc;
|
||||
u_off = x->dst.u_buffer - x->dst.buffer_alloc;
|
||||
v_off = x->dst.v_buffer - x->dst.buffer_alloc;
|
||||
y = &x->dst.y_buffer;
|
||||
u = &x->dst.u_buffer;
|
||||
v = &x->dst.v_buffer;
|
||||
y_off = 0;
|
||||
|
||||
//y = buf_base;
|
||||
//y_off = x->dst.y_buffer - x->dst.buffer_alloc;
|
||||
|
||||
u = buf_base;
|
||||
v = buf_base;
|
||||
|
||||
u_off = x->dst.u_buffer - x->dst.buffer_alloc;
|
||||
v_off = x->dst.v_buffer - x->dst.buffer_alloc;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
buf_base = &x->pre.buffer_alloc;
|
||||
y = &x->pre.y_buffer;
|
||||
u = &x->pre.u_buffer;
|
||||
v = &x->pre.v_buffer;
|
||||
y_off = u_off = v_off = 0;
|
||||
|
||||
//y = buf_base;
|
||||
//y_off = x->pre.y_buffer - x->pre.buffer_alloc;
|
||||
//u = buf_base;
|
||||
//u_off = x->pre.u_buffer - x->pre.buffer_alloc;
|
||||
//v = buf_base;
|
||||
//v_off = x->pre.v_buffer - x->pre.buffer_alloc;
|
||||
}
|
||||
|
||||
for (block = 0; block < 16; block++) /* y blocks */
|
||||
{
|
||||
vp8_setup_block(&x->block[block], x->dst.y_stride, y, x->dst.y_stride,
|
||||
(block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4, bs);
|
||||
setup_block(&x->block[block], y, x->dst.y_stride,
|
||||
y_off + ((block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4), bs);
|
||||
}
|
||||
|
||||
for (block = 16; block < 20; block++) /* U and V blocks */
|
||||
{
|
||||
vp8_setup_block(&x->block[block], x->dst.uv_stride, u, x->dst.uv_stride,
|
||||
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs);
|
||||
int block_off = ((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4;
|
||||
|
||||
vp8_setup_block(&x->block[block+4], x->dst.uv_stride, v, x->dst.uv_stride,
|
||||
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs);
|
||||
setup_block(&x->block[block], u, x->dst.uv_stride,
|
||||
u_off + block_off, bs);
|
||||
|
||||
setup_block(&x->block[block+4], v, x->dst.uv_stride,
|
||||
v_off + block_off, bs);
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_setup_block_dptrs(MACROBLOCKD *x)
|
||||
{
|
||||
int r, c;
|
||||
unsigned int offset;
|
||||
|
||||
#if CONFIG_OPENCL && !ONE_CQ_PER_MB
|
||||
cl_command_queue y_cq, u_cq, v_cq;
|
||||
int err;
|
||||
if (cl_initialized == CL_SUCCESS){
|
||||
//Create command queue for Y/U/V Planes
|
||||
y_cq = clCreateCommandQueue(cl_data.context, cl_data.device_id, 0, &err);
|
||||
if (!y_cq || err != CL_SUCCESS) {
|
||||
printf("Error: Failed to create a command queue!\n");
|
||||
cl_destroy(NULL, VP8_CL_TRIED_BUT_FAILED);
|
||||
}
|
||||
u_cq = clCreateCommandQueue(cl_data.context, cl_data.device_id, 0, &err);
|
||||
if (!u_cq || err != CL_SUCCESS) {
|
||||
printf("Error: Failed to create a command queue!\n");
|
||||
cl_destroy(NULL, VP8_CL_TRIED_BUT_FAILED);
|
||||
}
|
||||
v_cq = clCreateCommandQueue(cl_data.context, cl_data.device_id, 0, &err);
|
||||
if (!v_cq || err != CL_SUCCESS) {
|
||||
printf("Error: Failed to create a command queue!\n");
|
||||
cl_destroy(NULL, VP8_CL_TRIED_BUT_FAILED);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 16 Y blocks */
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
x->block[r*4+c].diff = &x->diff[r * 4 * 16 + c * 4];
|
||||
x->block[r*4+c].predictor = x->predictor + r * 4 * 16 + c * 4;
|
||||
offset = r * 4 * 16 + c * 4;
|
||||
x->block[r*4+c].diff_offset = offset;
|
||||
x->block[r*4+c].predictor_offset = offset;
|
||||
#if CONFIG_OPENCL && !ONE_CQ_PER_MB
|
||||
if (cl_initialized == CL_SUCCESS)
|
||||
x->block[r*4+c].cl_commands = y_cq;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* 4 U Blocks */
|
||||
for (r = 0; r < 2; r++)
|
||||
{
|
||||
for (c = 0; c < 2; c++)
|
||||
{
|
||||
x->block[16+r*2+c].diff = &x->diff[256 + r * 4 * 8 + c * 4];
|
||||
x->block[16+r*2+c].predictor = x->predictor + 256 + r * 4 * 8 + c * 4;
|
||||
offset = 256 + r * 4 * 8 + c * 4;
|
||||
x->block[16+r*2+c].diff_offset = offset;
|
||||
x->block[16+r*2+c].predictor_offset = offset;
|
||||
|
||||
#if CONFIG_OPENCL && !ONE_CQ_PER_MB
|
||||
if (cl_initialized == CL_SUCCESS)
|
||||
x->block[16+r*2+c].cl_commands = u_cq;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* 4 V Blocks */
|
||||
for (r = 0; r < 2; r++)
|
||||
{
|
||||
for (c = 0; c < 2; c++)
|
||||
{
|
||||
x->block[20+r*2+c].diff = &x->diff[320+ r * 4 * 8 + c * 4];
|
||||
x->block[20+r*2+c].predictor = x->predictor + 320 + r * 4 * 8 + c * 4;
|
||||
offset = 320+ r * 4 * 8 + c * 4;
|
||||
x->block[20+r*2+c].diff_offset = offset;
|
||||
x->block[20+r*2+c].predictor_offset = offset;
|
||||
|
||||
#if CONFIG_OPENCL && !ONE_CQ_PER_MB
|
||||
if (cl_initialized == CL_SUCCESS)
|
||||
x->block[20+r*2+c].cl_commands = v_cq;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
x->block[24].diff = &x->diff[384];
|
||||
x->block[24].diff_offset = 384;
|
||||
|
||||
for (r = 0; r < 25; r++)
|
||||
{
|
||||
x->block[r].qcoeff = x->qcoeff + r * 16;
|
||||
x->block[r].dqcoeff = x->dqcoeff + r * 16;
|
||||
x->block[r].qcoeff_base = x->qcoeff;
|
||||
x->block[r].qcoeff_offset = r * 16;
|
||||
x->block[r].dqcoeff_base = x->dqcoeff;
|
||||
x->block[r].dqcoeff_offset = r * 16;
|
||||
|
||||
x->block[r].predictor_base = x->predictor;
|
||||
x->block[r].diff_base = x->diff;
|
||||
x->block[r].eobs_base = x->eobs;
|
||||
|
||||
#if CONFIG_OPENCL
|
||||
if (cl_initialized == CL_SUCCESS){
|
||||
/* Copy command queue reference from macroblock */
|
||||
#if ONE_CQ_PER_MB
|
||||
x->block[r].cl_commands = x->cl_commands;
|
||||
#endif
|
||||
|
||||
/* Set up CL memory buffers as appropriate */
|
||||
x->block[r].cl_diff_mem = x->cl_diff_mem;
|
||||
x->block[r].cl_dqcoeff_mem = x->cl_dqcoeff_mem;
|
||||
x->block[r].cl_eobs_mem = x->cl_eobs_mem;
|
||||
x->block[r].cl_predictor_mem = x->cl_predictor_mem;
|
||||
x->block[r].cl_qcoeff_mem = x->cl_qcoeff_mem;
|
||||
}
|
||||
|
||||
//Copy filter type to block.
|
||||
x->block[r].sixtap_filter = x->sixtap_filter;
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void vp8_build_block_doffsets(MACROBLOCKD *x)
|
||||
{
|
||||
|
||||
/* handle the destination pitch features */
|
||||
vp8_setup_macroblock(x, DEST);
|
||||
vp8_setup_macroblock(x, PRED);
|
||||
setup_macroblock(x, DEST);
|
||||
setup_macroblock(x, PRED);
|
||||
}
|
||||
|
@@ -18,6 +18,7 @@ extern "C"
|
||||
#endif
|
||||
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
#include "vpx/vp8cx.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "type_aliases.h"
|
||||
#include "ppflags.h"
|
||||
@@ -45,7 +46,8 @@ extern "C"
|
||||
typedef enum
|
||||
{
|
||||
USAGE_STREAM_FROM_SERVER = 0x0,
|
||||
USAGE_LOCAL_FILE_PLAYBACK = 0x1
|
||||
USAGE_LOCAL_FILE_PLAYBACK = 0x1,
|
||||
USAGE_CONSTRAINED_QUALITY = 0x2
|
||||
} END_USAGE;
|
||||
|
||||
|
||||
@@ -149,6 +151,7 @@ extern "C"
|
||||
int fixed_q;
|
||||
int worst_allowed_q;
|
||||
int best_allowed_q;
|
||||
int cq_level;
|
||||
|
||||
// allow internal resizing ( currently disabled in the build !!!!!)
|
||||
int allow_spatial_resampling;
|
||||
@@ -186,9 +189,10 @@ extern "C"
|
||||
int arnr_strength ;
|
||||
int arnr_type ;
|
||||
|
||||
|
||||
struct vpx_fixed_buf two_pass_stats_in;
|
||||
struct vpx_codec_pkt_list *output_pkt_list;
|
||||
|
||||
vp8e_tuning tuning;
|
||||
} VP8_CONFIG;
|
||||
|
||||
|
||||
@@ -204,7 +208,7 @@ extern "C"
|
||||
// and not just a copy of the pointer..
|
||||
int vp8_receive_raw_frame(VP8_PTR comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time_stamp);
|
||||
int vp8_get_compressed_data(VP8_PTR comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush);
|
||||
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags);
|
||||
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
|
||||
|
||||
int vp8_use_as_reference(VP8_PTR comp, int ref_frame_flags);
|
||||
int vp8_update_reference(VP8_PTR comp, int ref_frame_flags);
|
||||
|
@@ -105,7 +105,7 @@ typedef struct VP8Common
|
||||
YV12_BUFFER_CONFIG post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG temp_scale_frame;
|
||||
|
||||
FRAME_TYPE last_frame_type; /* Add to check if vp8_frame_init_loop_filter() can be skipped. */
|
||||
FRAME_TYPE last_frame_type; /* Save last frame's frame type for loopfilter init checking and motion search. */
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
int show_frame;
|
||||
@@ -120,7 +120,6 @@ typedef struct VP8Common
|
||||
int mb_no_coeff_skip;
|
||||
int no_lpf;
|
||||
int simpler_lpf;
|
||||
int use_bilinear_mc_filter;
|
||||
int full_pixel;
|
||||
|
||||
int base_qindex;
|
||||
@@ -200,7 +199,7 @@ typedef struct VP8Common
|
||||
} VP8_COMMON;
|
||||
|
||||
|
||||
void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level);
|
||||
int vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int filter_level);
|
||||
void vp8_init_loop_filter(VP8_COMMON *cm);
|
||||
void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type);
|
||||
extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val);
|
||||
|
@@ -51,7 +51,7 @@ extern "C"
|
||||
int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst);
|
||||
|
||||
int vp8dx_receive_compressed_data(VP8D_PTR comp, unsigned long size, const unsigned char *dest, INT64 time_stamp);
|
||||
int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags);
|
||||
int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, vp8_ppflags_t *flags);
|
||||
|
||||
int vp8dx_get_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
int vp8dx_set_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
|
233
vp8/common/opencl/blockd_cl.c
Normal file
233
vp8/common/opencl/blockd_cl.c
Normal file
@@ -0,0 +1,233 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "../../decoder/onyxd_int.h"
|
||||
#include "../../../vpx_ports/config.h"
|
||||
#include "../../common/idct.h"
|
||||
#include "blockd_cl.h"
|
||||
#include "../../decoder/opencl/dequantize_cl.h"
|
||||
|
||||
|
||||
int vp8_cl_mb_prep(MACROBLOCKD *x, int flags){
|
||||
int err;
|
||||
|
||||
if (cl_initialized != CL_SUCCESS){
|
||||
return cl_initialized;
|
||||
}
|
||||
|
||||
//Copy all blockd.cl_*_mem objects
|
||||
if (flags & DIFF)
|
||||
VP8_CL_SET_BUF(x->cl_commands, x->cl_diff_mem, sizeof(cl_short)*400, x->diff,
|
||||
,err
|
||||
);
|
||||
|
||||
if (flags & PREDICTOR)
|
||||
VP8_CL_SET_BUF(x->cl_commands, x->cl_predictor_mem, sizeof(cl_uchar)*384, x->predictor,
|
||||
,err
|
||||
);
|
||||
|
||||
if (flags & QCOEFF)
|
||||
VP8_CL_SET_BUF(x->cl_commands, x->cl_qcoeff_mem, sizeof(cl_short)*400, x->qcoeff,
|
||||
,err
|
||||
);
|
||||
|
||||
if (flags & DQCOEFF)
|
||||
VP8_CL_SET_BUF(x->cl_commands, x->cl_dqcoeff_mem, sizeof(cl_short)*400, x->dqcoeff,
|
||||
,err
|
||||
);
|
||||
|
||||
if (flags & EOBS)
|
||||
VP8_CL_SET_BUF(x->cl_commands, x->cl_eobs_mem, sizeof(cl_char)*25, x->eobs,
|
||||
,err
|
||||
);
|
||||
|
||||
if (flags & PRE_BUF){
|
||||
VP8_CL_SET_BUF(x->cl_commands, x->pre.buffer_mem, x->pre.buffer_size, x->pre.buffer_alloc,
|
||||
,err
|
||||
);
|
||||
}
|
||||
|
||||
if (flags & DST_BUF){
|
||||
VP8_CL_SET_BUF(x->cl_commands, x->dst.buffer_mem, x->dst.buffer_size, x->dst.buffer_alloc,
|
||||
,err
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
int vp8_cl_mb_finish(MACROBLOCKD *x, int flags){
|
||||
int err;
|
||||
|
||||
if (cl_initialized != CL_SUCCESS){
|
||||
return cl_initialized;
|
||||
}
|
||||
|
||||
if (flags & DIFF){
|
||||
err = clEnqueueReadBuffer(x->cl_commands, x->cl_diff_mem, CL_FALSE, 0, sizeof(cl_short)*400, x->diff, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( x->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read from GPU!\n",
|
||||
, err
|
||||
);
|
||||
}
|
||||
|
||||
if (flags & PREDICTOR){
|
||||
err = clEnqueueReadBuffer(x->cl_commands, x->cl_predictor_mem, CL_FALSE, 0, sizeof(cl_uchar)*384, x->predictor, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( x->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read from GPU!\n",
|
||||
, err
|
||||
);
|
||||
}
|
||||
|
||||
if (flags & QCOEFF){
|
||||
err = clEnqueueReadBuffer(x->cl_commands, x->cl_qcoeff_mem, CL_FALSE, 0, sizeof(cl_short)*400, x->qcoeff, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( x->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read from GPU!\n",
|
||||
, err
|
||||
);
|
||||
}
|
||||
|
||||
if (flags & DQCOEFF){
|
||||
err = clEnqueueReadBuffer(x->cl_commands, x->cl_dqcoeff_mem, CL_FALSE, 0, sizeof(cl_short)*400, x->dqcoeff, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( x->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read from GPU!\n",
|
||||
, err
|
||||
);
|
||||
}
|
||||
|
||||
if (flags & EOBS){
|
||||
err = clEnqueueReadBuffer(x->cl_commands, x->cl_eobs_mem, CL_FALSE, 0, sizeof(cl_char)*25, x->eobs, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( x->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read from GPU!\n",
|
||||
, err
|
||||
);
|
||||
}
|
||||
|
||||
if (flags & PRE_BUF){
|
||||
err = clEnqueueReadBuffer(x->cl_commands, x->pre.buffer_mem, CL_FALSE,
|
||||
0, x->pre.buffer_size, x->pre.buffer_alloc, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( x->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read from GPU!\n",
|
||||
, err
|
||||
);
|
||||
}
|
||||
|
||||
if (flags & DST_BUF){
|
||||
err = clEnqueueReadBuffer(x->cl_commands, x->dst.buffer_mem, CL_FALSE,
|
||||
0, x->dst.buffer_size, x->dst.buffer_alloc, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( x->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read from GPU!\n",
|
||||
, err
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
int vp8_cl_block_prep(BLOCKD *b, int flags){
|
||||
int err;
|
||||
|
||||
if (cl_initialized != CL_SUCCESS){
|
||||
return cl_initialized;
|
||||
}
|
||||
|
||||
//Copy all blockd.cl_*_mem objects
|
||||
if (flags & DIFF)
|
||||
VP8_CL_SET_BUF(b->cl_commands, b->cl_diff_mem, sizeof(cl_short)*400, b->diff_base,
|
||||
,err
|
||||
);
|
||||
|
||||
if (flags & PREDICTOR)
|
||||
VP8_CL_SET_BUF(b->cl_commands, b->cl_predictor_mem, sizeof(cl_uchar)*384, b->predictor_base,
|
||||
,err
|
||||
);
|
||||
|
||||
if (flags & QCOEFF)
|
||||
VP8_CL_SET_BUF(b->cl_commands, b->cl_qcoeff_mem, sizeof(cl_short)*400, b->qcoeff_base,
|
||||
,err
|
||||
);
|
||||
|
||||
if (flags & DQCOEFF)
|
||||
VP8_CL_SET_BUF(b->cl_commands, b->cl_dqcoeff_mem, sizeof(cl_short)*400, b->dqcoeff_base,
|
||||
,err
|
||||
);
|
||||
|
||||
if (flags & EOBS)
|
||||
VP8_CL_SET_BUF(b->cl_commands, b->cl_eobs_mem, sizeof(cl_char)*25, b->eobs_base,
|
||||
,err
|
||||
);
|
||||
|
||||
if (flags & DEQUANT)
|
||||
VP8_CL_SET_BUF(b->cl_commands, b->cl_dequant_mem, sizeof(cl_short)*16 ,b->dequant,
|
||||
,err
|
||||
);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
int vp8_cl_block_finish(BLOCKD *b, int flags){
|
||||
int err;
|
||||
|
||||
if (cl_initialized != CL_SUCCESS){
|
||||
return cl_initialized;
|
||||
}
|
||||
|
||||
if (flags & DIFF){
|
||||
err = clEnqueueReadBuffer(b->cl_commands, b->cl_diff_mem, CL_FALSE, 0, sizeof(cl_short)*400, b->diff_base, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read from GPU!\n",
|
||||
, err
|
||||
);
|
||||
}
|
||||
|
||||
if (flags & PREDICTOR){
|
||||
err = clEnqueueReadBuffer(b->cl_commands, b->cl_predictor_mem, CL_FALSE, 0, sizeof(cl_uchar)*384, b->predictor_base, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read from GPU!\n",
|
||||
, err
|
||||
);
|
||||
}
|
||||
|
||||
if (flags & QCOEFF){
|
||||
err = clEnqueueReadBuffer(b->cl_commands, b->cl_qcoeff_mem, CL_FALSE, 0, sizeof(cl_short)*400, b->qcoeff_base, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read from GPU!\n",
|
||||
, err
|
||||
);
|
||||
}
|
||||
|
||||
if (flags & DQCOEFF){
|
||||
err = clEnqueueReadBuffer(b->cl_commands, b->cl_dqcoeff_mem, CL_FALSE, 0, sizeof(cl_short)*400, b->dqcoeff_base, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read from GPU!\n",
|
||||
, err
|
||||
);
|
||||
}
|
||||
|
||||
if (flags & EOBS){
|
||||
err = clEnqueueReadBuffer(b->cl_commands, b->cl_eobs_mem, CL_FALSE, 0, sizeof(cl_char)*25, b->eobs_base, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read from GPU!\n",
|
||||
, err
|
||||
);
|
||||
}
|
||||
|
||||
if (flags & DEQUANT){
|
||||
err = clEnqueueReadBuffer(b->cl_commands, b->cl_dequant_mem, CL_FALSE, 0, sizeof(cl_short)*16 ,b->dequant, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read from GPU!\n",
|
||||
, err
|
||||
);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
64
vp8/common/opencl/blockd_cl.h
Normal file
64
vp8/common/opencl/blockd_cl.h
Normal file
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef BLOCKD_OPENCL_H
|
||||
#define BLOCKD_OPENCL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "vp8_opencl.h"
|
||||
#include "../blockd.h"
|
||||
|
||||
#define DIFF 0x0001
|
||||
#define PREDICTOR 0x0002
|
||||
#define QCOEFF 0x0004
|
||||
#define DQCOEFF 0x0008
|
||||
#define EOBS 0x0010
|
||||
#define DEQUANT 0x0020
|
||||
#define PRE_BUF 0x0040
|
||||
#define DST_BUF 0x0080
|
||||
|
||||
#define BLOCK_COPY_ALL 0xffff
|
||||
|
||||
/*
|
||||
#define BLOCK_MEM_SIZE 6
|
||||
enum {
|
||||
DIFF_MEM = 0,
|
||||
PRED_MEM = 1,
|
||||
QCOEFF_MEM = 2,
|
||||
DQCOEFF_MEM = 3,
|
||||
EOBS_MEM = 4,
|
||||
DEQUANT_MEM = 5
|
||||
} BLOCK_MEM_TYPES;
|
||||
|
||||
|
||||
struct cl_block_mem{
|
||||
cl_mem gpu_mem;
|
||||
size_t size;
|
||||
void *host_mem;
|
||||
};
|
||||
|
||||
typedef struct cl_block_mem block_mem;
|
||||
*/
|
||||
|
||||
extern int vp8_cl_block_finish(BLOCKD *b, int flags);
|
||||
extern int vp8_cl_block_prep(BLOCKD *b, int flags);
|
||||
|
||||
extern int vp8_cl_mb_prep(MACROBLOCKD *x, int flags);
|
||||
extern int vp8_cl_mb_finish(MACROBLOCKD *x, int flags);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
106
vp8/common/opencl/dynamic_cl.c
Normal file
106
vp8/common/opencl/dynamic_cl.c
Normal file
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vp8_opencl.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
CL_FUNCTIONS cl;
|
||||
void *dll = NULL;
|
||||
int cl_loaded = VP8_CL_NOT_INITIALIZED;
|
||||
|
||||
int close_cl(){
|
||||
int ret = dlclose(dll);
|
||||
|
||||
if (ret != 0)
|
||||
fprintf(stderr, "Error closing OpenCL library: %s", dlerror());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int load_cl(char *lib_name){
|
||||
|
||||
//printf("Loading OpenCL library\n");
|
||||
dll = dlopen(lib_name, RTLD_NOW|RTLD_LOCAL);
|
||||
if (dll != NULL){
|
||||
//printf("Found CL library\n");
|
||||
} else {
|
||||
//printf("Didn't find CL library\n");
|
||||
return VP8_CL_TRIED_BUT_FAILED;
|
||||
}
|
||||
|
||||
CL_LOAD_FN("clGetPlatformIDs", cl.getPlatformIDs);
|
||||
CL_LOAD_FN("clGetPlatformInfo", cl.getPlatformInfo);
|
||||
CL_LOAD_FN("clGetDeviceIDs", cl.getDeviceIDs);
|
||||
CL_LOAD_FN("clGetDeviceInfo", cl.getDeviceInfo);
|
||||
CL_LOAD_FN("clCreateContext", cl.createContext);
|
||||
// CL_LOAD_FN("clCreateContextFromType", cl.createContextFromType);
|
||||
// CL_LOAD_FN("clRetainContext", cl.retainContext);
|
||||
CL_LOAD_FN("clReleaseContext", cl.releaseContext);
|
||||
// CL_LOAD_FN("clGetContextInfo", cl.getContextInfo);
|
||||
CL_LOAD_FN("clCreateCommandQueue", cl.createCommandQueue);
|
||||
// CL_LOAD_FN("clRetainCommandQueue", cl.retainCommandQueue);
|
||||
CL_LOAD_FN("clReleaseCommandQueue", cl.releaseCommandQueue);
|
||||
// CL_LOAD_FN("clGetCommandQueueInfo", cl.getCommandQueue);
|
||||
CL_LOAD_FN("clCreateBuffer", cl.createBuffer);
|
||||
// CL_LOAD_FN("clCreateImage2D", cl.createImage2D);
|
||||
// CL_LOAD_FN("clCreateImage3D", cl.createImage3D);
|
||||
// CL_LOAD_FN("clRetainMemObject", cl.retainMemObject);
|
||||
CL_LOAD_FN("clReleaseMemObject", cl.releaseMemObject);
|
||||
// CL_LOAD_FN("clGetSupportedImageFormats", cl.getSupportedImageFormats);
|
||||
// CL_LOAD_FN("clGetMemObjectInfo", cl.getMemObjectInfo);
|
||||
// CL_LOAD_FN("clGetImageInfo", cl.getImageInfo);
|
||||
// CL_LOAD_FN("clCreateSampler", cl.createSampler);
|
||||
// CL_LOAD_FN("clRetainSampler", cl.retainSampler);
|
||||
// CL_LOAD_FN("clReleaseSampler", cl.releaseSampler);
|
||||
// CL_LOAD_FN("clGetSamplerInfo", cl.getSamplerInfo);
|
||||
CL_LOAD_FN("clCreateProgramWithSource", cl.createProgramWithSource);
|
||||
// CL_LOAD_FN("clCreateProgramWithBinary", cl.createProgramWithBinary);
|
||||
// CL_LOAD_FN("clRetainProgram", cl.retainProgram);
|
||||
CL_LOAD_FN("clReleaseProgram", cl.releaseProgram);
|
||||
CL_LOAD_FN("clBuildProgram", cl.buildProgram);
|
||||
// CL_LOAD_FN("clUnloadCompiler", cl.unloadCompiler);
|
||||
CL_LOAD_FN("clGetProgramInfo", cl.getProgramInfo);
|
||||
CL_LOAD_FN("clGetProgramBuildInfo", cl.getProgramBuildInfo);
|
||||
CL_LOAD_FN("clCreateKernel", cl.createKernel);
|
||||
// CL_LOAD_FN("clCreateKernelsInProgram", cl.createKernelsInProgram);
|
||||
// CL_LOAD_FN("clRetainKernel", cl.retainKernel);
|
||||
CL_LOAD_FN("clReleaseKernel", cl.releaseKernel);
|
||||
CL_LOAD_FN("clSetKernelArg", cl.setKernelArg);
|
||||
// CL_LOAD_FN("clGetKernelInfo", cl.getKernelInfo);
|
||||
CL_LOAD_FN("clGetKernelWorkGroupInfo", cl.getKernelWorkGroupInfo);
|
||||
// CL_LOAD_FN("clWaitForEvents", cl.waitForEvents);
|
||||
// CL_LOAD_FN("clGetEventInfo", cl.getEventInfo);
|
||||
// CL_LOAD_FN("clRetainEvent", cl.retainEvent);
|
||||
// CL_LOAD_FN("clReleaseEvent", cl.releaseEvent);
|
||||
// CL_LOAD_FN("clGetEventProfilingInfo", cl.getEventProfilingInfo);
|
||||
CL_LOAD_FN("clFlush", cl.flush);
|
||||
CL_LOAD_FN("clFinish", cl.finish);
|
||||
CL_LOAD_FN("clEnqueueReadBuffer", cl.enqueueReadBuffer);
|
||||
CL_LOAD_FN("clEnqueueWriteBuffer", cl.enqueueWriteBuffer);
|
||||
CL_LOAD_FN("clEnqueueCopyBuffer", cl.enqueueCopyBuffer);
|
||||
// CL_LOAD_FN("clEnqueueReadImage", cl.enqueueReadImage);
|
||||
// CL_LOAD_FN("clEnqueueWriteImage", cl.enqueueWriteImage);
|
||||
// CL_LOAD_FN("clEnqueueCopyImage", cl.enqueueCopyImage);
|
||||
// CL_LOAD_FN("clEnqueueCopyImageToBuffer", cl.enqueueCopyImageToBuffer);
|
||||
// CL_LOAD_FN("clEnqueueCopyBufferToImage", cl.enqueueCopyBufferToImage);
|
||||
// CL_LOAD_FN("clEnqueueMapBuffer", cl.enqueueMapBuffer);
|
||||
// CL_LOAD_FN("clEnqueueMapImage", cl.enqueueMapImage);
|
||||
// CL_LOAD_FN("clEnqueueUnmapMemObject", cl.enqueueUnmapMemObject);
|
||||
CL_LOAD_FN("clEnqueueNDRangeKernel", cl.enqueueNDRAngeKernel);
|
||||
// CL_LOAD_FN("clEnqueueTask", cl.enqueueTask);
|
||||
// CL_LOAD_FN("clEnqueueNativeKernel", cl.enqueueNativeKernel);
|
||||
// CL_LOAD_FN("clEnqueueMarker", cl.enqueueMarker);
|
||||
// CL_LOAD_FN("clEnqueueWaitForEvents", cl.enqueueWaitForEvents);
|
||||
CL_LOAD_FN("clEnqueueBarrier", cl.enqueueBarrier);
|
||||
// CL_LOAD_FN("clGetExtensionFunctionAddress", cl.getExtensionFunctionAddress);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
253
vp8/common/opencl/dynamic_cl.h
Normal file
253
vp8/common/opencl/dynamic_cl.h
Normal file
@@ -0,0 +1,253 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef DYNAMIC_CL_H
|
||||
#define DYNAMIC_CL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/cl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#include <dlfcn.h>
|
||||
|
||||
int load_cl(char *lib_name);
|
||||
int close_cl();
|
||||
|
||||
extern int cl_loaded;
|
||||
|
||||
typedef cl_int(*fn_clGetPlatformIDs_t)(cl_uint, cl_platform_id *, cl_uint *);
|
||||
typedef cl_int(*fn_clGetPlatformInfo_t)(cl_platform_id, cl_platform_info, size_t, void *, size_t *);
|
||||
typedef cl_int(*fn_clGetDeviceIDs_t)(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *);
|
||||
typedef cl_int(*fn_clGetDeviceInfo_t)(cl_device_id, cl_device_info, size_t, void *, size_t *);
|
||||
typedef cl_context(*fn_clCreateContext_t)(const cl_context_properties *, cl_uint, const cl_device_id *, void (*pfn_notify)(const char *, const void *, size_t, void *), void *, cl_int *);
|
||||
typedef cl_context(*fn_clCreateContextFromType_t)(const cl_context_properties *, cl_device_type, void (*pfn_notify)(const char *, const void *, size_t, void *), void *, cl_int *);
|
||||
typedef cl_int(*fn_clRetainContext_t)(cl_context);
|
||||
typedef cl_int(*fn_clReleaseContext_t)(cl_context);
|
||||
typedef cl_int(*fn_clGetContextInfo_t)(cl_context, cl_context_info, size_t, void *, size_t *);
|
||||
typedef cl_command_queue(*fn_clCreateCommandQueue_t)(cl_context, cl_device_id, cl_command_queue_properties, cl_int *);
|
||||
typedef cl_int(*fn_clRetainCommandQueue_t)(cl_command_queue);
|
||||
typedef cl_int(*fn_clReleaseCommandQueue_t)(cl_command_queue);
|
||||
typedef cl_int(*fn_clGetCommandQueueInfo_t)(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *);
|
||||
typedef cl_mem(*fn_clCreateBuffer_t)(cl_context, cl_mem_flags, size_t, void *, cl_int *);
|
||||
typedef cl_mem(*fn_clCreateImage2D_t)(cl_context, cl_mem_flags, const cl_image_format *, size_t, size_t, size_t, void *, cl_int *);
|
||||
typedef cl_mem(*fn_clCreateImage3D_t)(cl_context, cl_mem_flags, const cl_image_format *, size_t, size_t, size_t, size_t, size_t, void *, cl_int *);
|
||||
typedef cl_int(*fn_clRetainMemObject_t)(cl_mem);
|
||||
typedef cl_int(*fn_clReleaseMemObject_t)(cl_mem);
|
||||
typedef cl_int(*fn_clGetSupportedImageFormats_t)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format *, cl_uint *);
|
||||
typedef cl_int(*fn_clGetMemObjectInfo_t)(cl_mem, cl_mem_info, size_t, void *, size_t *);
|
||||
typedef cl_int(*fn_clGetImageInfo_t)(cl_mem, cl_image_info, size_t, void *, size_t *);
|
||||
typedef cl_sampler(*fn_clCreateSampler_t)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int *);
|
||||
typedef cl_int(*fn_clRetainSampler_t)(cl_sampler);
|
||||
typedef cl_int(*fn_clReleaseSampler_t)(cl_sampler);
|
||||
typedef cl_int(*fn_clGetSamplerInfo_t)(cl_sampler, cl_sampler_info, size_t, void *, size_t *);
|
||||
typedef cl_program(*fn_clCreateProgramWithSource_t)(cl_context, cl_uint, const char **, const size_t *, cl_int *);
|
||||
typedef cl_program(*fn_clCreateProgramWithBinary_t)(cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *);
|
||||
typedef cl_int(*fn_clRetainProgram_t)(cl_program);
|
||||
typedef cl_int(*fn_clReleaseProgram_t)(cl_program);
|
||||
typedef cl_int(*fn_clBuildProgram_t)(cl_program, cl_uint, const cl_device_id *, const char *, void (*pfn_notify)(cl_program,void*), void *);
|
||||
typedef cl_int(*fn_clUnloadCompiler_t)(void);
|
||||
typedef cl_int(*fn_clGetProgramInfo_t)(cl_program, cl_program_info, size_t, void *, size_t *);
|
||||
typedef cl_int(*fn_clGetProgramBuildInfo_t)(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *);
|
||||
typedef cl_kernel(*fn_clCreateKernel_t)(cl_program, const char *, cl_int *);
|
||||
typedef cl_int(*fn_clCreateKernelsInProgram_t)(cl_program, cl_uint, cl_kernel *, cl_uint *);
|
||||
typedef cl_int(*fn_clRetainKernel_t)(cl_kernel);
|
||||
typedef cl_int(*fn_clReleaseKernel_t)(cl_kernel);
|
||||
typedef cl_int(*fn_clSetKernelArg_t)(cl_kernel, cl_uint, size_t, const void *);
|
||||
typedef cl_int(*fn_clGetKernelInfo_t)(cl_kernel, cl_kernel_info, size_t, void *, size_t *);
|
||||
typedef cl_int(*fn_clGetKernelWorkGroupInfo_t)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *);
|
||||
typedef cl_int(*fn_clWaitForEvents_t)(cl_uint, const cl_event *);
|
||||
typedef cl_int(*fn_clGetEventInfo_t)(cl_event, cl_event_info, size_t, void *, size_t *);
|
||||
typedef cl_int(*fn_clRetainEvent_t)(cl_event);
|
||||
typedef cl_int(*fn_clReleaseEvent_t)(cl_event);
|
||||
typedef cl_int(*fn_clGetEventProfilingInfo_t)(cl_event, cl_profiling_info, size_t, void *, size_t *);
|
||||
typedef cl_int(*fn_clFlush_t)(cl_command_queue);
|
||||
typedef cl_int(*fn_clFinish_t)(cl_command_queue);
|
||||
typedef cl_int(*fn_clEnqueueReadBuffer_t)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *);
|
||||
typedef cl_int(*fn_clEnqueueWriteBuffer_t)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *);
|
||||
typedef cl_int(*fn_clEnqueueCopyBuffer_t)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event *, cl_event *);
|
||||
typedef cl_int(*fn_clEnqueueReadImage_t)(cl_command_queue, cl_mem, cl_bool, const size_t *, const size_t *, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *);
|
||||
typedef cl_int(*fn_clEnqueueWriteImage_t)(cl_command_queue, cl_mem, cl_bool, const size_t *, const size_t *, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *);
|
||||
typedef cl_int(*fn_clEnqueueCopyImage_t)(cl_command_queue, cl_mem, cl_mem, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);
|
||||
typedef cl_int(*fn_clEnqueueCopyImageToBuffer_t)(cl_command_queue, cl_mem, cl_mem, const size_t *, const size_t *, size_t, cl_uint, const cl_event *, cl_event *);
|
||||
typedef cl_int(*fn_clEnqueueCopyBufferToImage_t)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);
|
||||
typedef void*(*fn_clEnqueueMapBuffer_t)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event *, cl_event *, cl_int *);
|
||||
typedef void*(*fn_clEnqueueMapImage_t)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t *, const size_t *, size_t *, size_t *, cl_uint, const cl_event *, cl_event *, cl_int *);
|
||||
typedef cl_int(*fn_clEnqueueUnmapMemObject_t)(cl_command_queue, cl_mem, void *, cl_uint, const cl_event *, cl_event *);
|
||||
typedef cl_int(*fn_clEnqueueNDRangeKernel_t)(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);
|
||||
typedef cl_int(*fn_clEnqueueTask_t)(cl_command_queue, cl_kernel, cl_uint, const cl_event *, cl_event *);
|
||||
typedef cl_int(*fn_clEnqueueNativeKernel_t)(cl_command_queue, void (*user_func)(void *), void *, size_t, cl_uint, const cl_mem *, const void **, cl_uint, const cl_event *, cl_event *);
|
||||
typedef cl_int(*fn_clEnqueueMarker_t)(cl_command_queue, cl_event *);
|
||||
typedef cl_int(*fn_clEnqueueWaitForEvents_t)(cl_command_queue, cl_uint, const cl_event *);
|
||||
typedef cl_int(*fn_clEnqueueBarrier_t)(cl_command_queue);
|
||||
typedef void*(*fn_clGetExtensionFunctionAddress_t)(const char *);
|
||||
|
||||
typedef struct CL_FUNCTIONS {
|
||||
fn_clGetPlatformIDs_t getPlatformIDs;
|
||||
fn_clGetPlatformInfo_t getPlatformInfo;
|
||||
fn_clGetDeviceIDs_t getDeviceIDs;
|
||||
fn_clGetDeviceInfo_t getDeviceInfo;
|
||||
fn_clCreateContext_t createContext;
|
||||
fn_clCreateContextFromType_t createContextFromType;
|
||||
fn_clRetainContext_t retainContext;
|
||||
fn_clReleaseContext_t releaseContext;
|
||||
fn_clGetContextInfo_t getContextInfo;
|
||||
fn_clCreateCommandQueue_t createCommandQueue;
|
||||
fn_clRetainCommandQueue_t retainCommandQueue;
|
||||
fn_clReleaseCommandQueue_t releaseCommandQueue;
|
||||
fn_clGetCommandQueueInfo_t getCommandQueue;
|
||||
fn_clCreateBuffer_t createBuffer;
|
||||
fn_clCreateImage2D_t createImage2D;
|
||||
fn_clCreateImage3D_t createImage3D;
|
||||
fn_clRetainMemObject_t retainMemObject;
|
||||
fn_clReleaseMemObject_t releaseMemObject;
|
||||
fn_clGetSupportedImageFormats_t getSupportedImageFormats;
|
||||
fn_clGetMemObjectInfo_t getMemObjectInfo;
|
||||
fn_clGetImageInfo_t getImageInfo;
|
||||
fn_clCreateSampler_t createSampler;
|
||||
fn_clRetainSampler_t retainSampler;
|
||||
fn_clReleaseSampler_t releaseSampler;
|
||||
fn_clGetSamplerInfo_t getSamplerInfo;
|
||||
fn_clCreateProgramWithSource_t createProgramWithSource;
|
||||
fn_clCreateProgramWithBinary_t createProgramWithBinary;
|
||||
fn_clRetainProgram_t retainProgram;
|
||||
fn_clReleaseProgram_t releaseProgram;
|
||||
fn_clBuildProgram_t buildProgram;
|
||||
fn_clUnloadCompiler_t unloadCompiler;
|
||||
fn_clGetProgramInfo_t getProgramInfo;
|
||||
fn_clGetProgramBuildInfo_t getProgramBuildInfo;
|
||||
fn_clCreateKernel_t createKernel;
|
||||
fn_clCreateKernelsInProgram_t createKernelsInProgram;
|
||||
fn_clRetainKernel_t retainKernel;
|
||||
fn_clReleaseKernel_t releaseKernel;
|
||||
fn_clSetKernelArg_t setKernelArg;
|
||||
fn_clGetKernelInfo_t getKernelInfo;
|
||||
fn_clGetKernelWorkGroupInfo_t getKernelWorkGroupInfo;
|
||||
fn_clWaitForEvents_t waitForEvents;
|
||||
fn_clGetEventInfo_t getEventInfo;
|
||||
fn_clRetainEvent_t retainEvent;
|
||||
fn_clReleaseEvent_t releaseEvent;
|
||||
fn_clGetEventProfilingInfo_t getEventProfilingInfo;
|
||||
fn_clFlush_t flush;
|
||||
fn_clFinish_t finish;
|
||||
fn_clEnqueueReadBuffer_t enqueueReadBuffer;
|
||||
fn_clEnqueueWriteBuffer_t enqueueWriteBuffer;
|
||||
fn_clEnqueueCopyBuffer_t enqueueCopyBuffer;
|
||||
fn_clEnqueueReadImage_t enqueueReadImage;
|
||||
fn_clEnqueueWriteImage_t enqueueWriteImage;
|
||||
fn_clEnqueueCopyImage_t enqueueCopyImage;
|
||||
fn_clEnqueueCopyImageToBuffer_t enqueueCopyImageToBuffer;
|
||||
fn_clEnqueueCopyBufferToImage_t enqueueCopyBufferToImage;
|
||||
fn_clEnqueueMapBuffer_t enqueueMapBuffer;
|
||||
fn_clEnqueueMapImage_t enqueueMapImage;
|
||||
fn_clEnqueueUnmapMemObject_t enqueueUnmapMemObject;
|
||||
fn_clEnqueueNDRangeKernel_t enqueueNDRAngeKernel;
|
||||
fn_clEnqueueTask_t enqueueTask;
|
||||
fn_clEnqueueNativeKernel_t enqueueNativeKernel;
|
||||
fn_clEnqueueMarker_t enqueueMarker;
|
||||
fn_clEnqueueWaitForEvents_t enqueueWaitForEvents;
|
||||
fn_clEnqueueBarrier_t enqueueBarrier;
|
||||
fn_clGetExtensionFunctionAddress_t getExtensionFunctionAddress;
|
||||
} CL_FUNCTIONS;
|
||||
|
||||
extern CL_FUNCTIONS cl;
|
||||
|
||||
#define clGetPlatformIDs cl.getPlatformIDs
|
||||
#define clGetPlatformInfo cl.getPlatformInfo
|
||||
#define clGetDeviceIDs cl.getDeviceIDs
|
||||
#define clGetDeviceInfo cl.getDeviceInfo
|
||||
#define clCreateContext cl.createContext
|
||||
#define clCreateContextFromType cl.createContextFromType
|
||||
#define clRetainContext cl.retainContext
|
||||
#define clReleaseContext cl.releaseContext
|
||||
#define clGetContextInfo cl.getContextInfo
|
||||
#define clCreateCommandQueue cl.createCommandQueue
|
||||
#define clRetainCommandQueue cl.retainCommandQueue
|
||||
#define clReleaseCommandQueue cl.releaseCommandQueue
|
||||
#define clGetCommandQueueInfo cl.getCommandQueue
|
||||
#define clCreateBuffer cl.createBuffer
|
||||
#define clCreateSubBuffer cl.createSubBuffer
|
||||
#define clCreateImage2D cl.createImage2D
|
||||
#define clCreateImage3D cl.createImage3D
|
||||
#define clRetainMemObject cl.retainMemObject
|
||||
#define clReleaseMemObject cl.releaseMemObject
|
||||
#define clGetSupportedImageFormats cl.getSupportedImageFormats
|
||||
#define clGetMemObjectInfo cl.getMemObjectInfo
|
||||
#define clGetImageInfo cl.getImageInfo
|
||||
#define clSetMemObjectDestructorCallback cl.setMemObjectDestructorCallback
|
||||
#define clCreateSampler cl.createSampler
|
||||
#define clRetainSampler cl.retainSampler
|
||||
#define clReleaseSampler cl.releaseSampler
|
||||
#define clGetSamplerInfo cl.getSamplerInfo
|
||||
#define clCreateProgramWithSource cl.createProgramWithSource
|
||||
#define clCreateProgramWithBinary cl.createProgramWithBinary
|
||||
#define clRetainProgram cl.retainProgram
|
||||
#define clReleaseProgram cl.releaseProgram
|
||||
#define clBuildProgram cl.buildProgram
|
||||
#define clUnloadCompiler cl.unloadCompiler
|
||||
#define clGetProgramInfo cl.getProgramInfo
|
||||
#define clGetProgramBuildInfo cl.getProgramBuildInfo
|
||||
#define clCreateKernel cl.createKernel
|
||||
#define clCreateKernelsInProgram cl.createKernelsInProgram
|
||||
#define clRetainKernel cl.retainKernel
|
||||
#define clReleaseKernel cl.releaseKernel
|
||||
#define clSetKernelArg cl.setKernelArg
|
||||
#define clGetKernelInfo cl.getKernelInfo
|
||||
#define clGetKernelWorkGroupInfo cl.getKernelWorkGroupInfo
|
||||
#define clWaitForEvents cl.waitForEvents
|
||||
#define clGetEventInfo cl.getEventInfo
|
||||
#define clCreateUserEvent cl.createUserEvent
|
||||
#define clRetainEvent cl.retainEvent
|
||||
#define clReleaseEvent cl.releaseEvent
|
||||
#define clSetUserEventStatus cl.setUserEventStatus
|
||||
#define clSetEventCallback cl.setEventCallback
|
||||
#define clGetEventProfilingInfo cl.getEventProfilingInfo
|
||||
#define clFlush cl.flush
|
||||
#define clFinish cl.finish
|
||||
#define clEnqueueReadBuffer cl.enqueueReadBuffer
|
||||
#define clEnqueueReadBufferRect cl.enqueueReadBufferRect
|
||||
#define clEnqueueWriteBuffer cl.enqueueWriteBuffer
|
||||
#define clEnqueueWriteBufferRect cl.enqueueWriteBufferRect
|
||||
#define clEnqueueCopyBuffer cl.enqueueCopyBuffer
|
||||
#define clEnqueueCopyBufferRect cl.enqueueCopyBufferRect
|
||||
#define clEnqueueReadImage cl.enqueueReadImage
|
||||
#define clEnqueueWriteImage cl.enqueueWriteImage
|
||||
#define clEnqueueCopyImage cl.enqueueCopyImage
|
||||
#define clEnqueueCopyImageToBuffer cl.enqueueCopyImageToBuffer
|
||||
#define clEnqueueCopyBufferToImage cl.enqueueCopyBufferToImage
|
||||
#define clEnqueueMapBuffer cl.enqueueMapBuffer
|
||||
#define clEnqueueMapImage cl.enqueueMapImage
|
||||
#define clEnqueueUnmapMemObject cl.enqueueUnmapMemObject
|
||||
#define clEnqueueNDRangeKernel cl.enqueueNDRAngeKernel
|
||||
#define clEnqueueTask cl.enqueueTask
|
||||
#define clEnqueueNativeKernel cl.enqueueNativeKernel
|
||||
#define clEnqueueMarker cl.enqueueMarker
|
||||
#define clEnqueueWaitForEvents cl.enqueueWaitForEvents
|
||||
#define clEnqueueBarrier cl.enqueueBarrier
|
||||
#define clGetExtensionFunctionAddress cl.getExtensionFunctionAddress
|
||||
|
||||
#define CL_LOAD_FN(name, ref) \
|
||||
ref = dlsym(dll,name); \
|
||||
if (ref == NULL){ \
|
||||
dlclose(dll); \
|
||||
return CL_INVALID_PLATFORM; \
|
||||
}
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* DYNAMIC_CL_H */
|
824
vp8/common/opencl/filter_cl.c
Normal file
824
vp8/common/opencl/filter_cl.c
Normal file
@@ -0,0 +1,824 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
//ACW: Remove me after debugging.
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "vp8_opencl.h"
|
||||
#include "filter_cl.h"
|
||||
#include "../blockd.h"
|
||||
|
||||
#define SIXTAP_FILTER_LEN 6
|
||||
|
||||
const char *filterCompileOptions = "-Ivp8/common/opencl -DVP8_FILTER_WEIGHT=128 -DVP8_FILTER_SHIFT=7 -DFILTER_OFFSET";
|
||||
const char *filter_cl_file_name = "vp8/common/opencl/filter_cl.cl";
|
||||
|
||||
#define STATIC_MEM 1
|
||||
#if STATIC_MEM
|
||||
static cl_mem int_mem = NULL;
|
||||
#endif
|
||||
|
||||
void cl_destroy_filter(){
|
||||
|
||||
if (cl_data.filter_program)
|
||||
clReleaseProgram(cl_data.filter_program);
|
||||
|
||||
//VP8_CL_RELEASE_KERNEL(cl_data.vp8_block_variation_kernel);
|
||||
#if !TWO_PASS_SIXTAP
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_sixtap_predict_kernel);
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_sixtap_predict8x8_kernel);
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_sixtap_predict8x4_kernel);
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_sixtap_predict16x16_kernel);
|
||||
#else
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_filter_block2d_first_pass_kernel);
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_filter_block2d_second_pass_kernel);
|
||||
#endif
|
||||
//VP8_CL_RELEASE_KERNEL(cl_data.vp8_bilinear_predict4x4_kernel);
|
||||
//VP8_CL_RELEASE_KERNEL(cl_data.vp8_bilinear_predict8x4_kernel);
|
||||
//VP8_CL_RELEASE_KERNEL(cl_data.vp8_bilinear_predict8x8_kernel);
|
||||
//VP8_CL_RELEASE_KERNEL(cl_data.vp8_bilinear_predict16x16_kernel);
|
||||
|
||||
#if MEM_COPY_KERNEL
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_memcpy_kernel);
|
||||
#endif
|
||||
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_filter_block2d_bil_first_pass_kernel);
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_filter_block2d_bil_second_pass_kernel);
|
||||
|
||||
#if STATIC_MEM
|
||||
if (int_mem != NULL)
|
||||
clReleaseMemObject(int_mem);
|
||||
int_mem = NULL;
|
||||
#endif
|
||||
|
||||
cl_data.filter_program = NULL;
|
||||
}
|
||||
|
||||
int cl_init_filter() {
|
||||
int err;
|
||||
|
||||
|
||||
// Create the filter compute program from the file-defined source code
|
||||
if ( cl_load_program(&cl_data.filter_program, filter_cl_file_name,
|
||||
filterCompileOptions) != CL_SUCCESS )
|
||||
return VP8_CL_TRIED_BUT_FAILED;
|
||||
|
||||
// Create the compute kernel in the program we wish to run
|
||||
#if TWO_PASS_SIXTAP
|
||||
VP8_CL_CREATE_KERNEL(cl_data,filter_program,vp8_filter_block2d_first_pass_kernel,"vp8_filter_block2d_first_pass_kernel");
|
||||
VP8_CL_CREATE_KERNEL(cl_data,filter_program,vp8_filter_block2d_second_pass_kernel,"vp8_filter_block2d_second_pass_kernel");
|
||||
VP8_CL_CALC_LOCAL_SIZE(vp8_filter_block2d_first_pass_kernel,vp8_filter_block2d_first_pass_kernel_size);
|
||||
VP8_CL_CALC_LOCAL_SIZE(vp8_filter_block2d_second_pass_kernel,vp8_filter_block2d_second_pass_kernel_size);
|
||||
#else
|
||||
VP8_CL_CREATE_KERNEL(cl_data,filter_program,vp8_sixtap_predict_kernel,"vp8_sixtap_predict_kernel");
|
||||
VP8_CL_CALC_LOCAL_SIZE(vp8_sixtap_predict_kernel,vp8_sixtap_predict_kernel_size);
|
||||
VP8_CL_CREATE_KERNEL(cl_data,filter_program,vp8_sixtap_predict8x8_kernel,"vp8_sixtap_predict8x8_kernel");
|
||||
VP8_CL_CALC_LOCAL_SIZE(vp8_sixtap_predict8x8_kernel,vp8_sixtap_predict8x8_kernel_size);
|
||||
VP8_CL_CREATE_KERNEL(cl_data,filter_program,vp8_sixtap_predict8x4_kernel,"vp8_sixtap_predict8x4_kernel");
|
||||
VP8_CL_CALC_LOCAL_SIZE(vp8_sixtap_predict8x4_kernel,vp8_sixtap_predict8x4_kernel_size);
|
||||
VP8_CL_CREATE_KERNEL(cl_data,filter_program,vp8_sixtap_predict16x16_kernel,"vp8_sixtap_predict16x16_kernel");
|
||||
VP8_CL_CALC_LOCAL_SIZE(vp8_sixtap_predict16x16_kernel,vp8_sixtap_predict16x16_kernel_size);
|
||||
#endif
|
||||
|
||||
//VP8_CL_CALC_LOCAL_SIZE(vp8_filter_block2d_bil_first_pass_kernel,vp8_filter_block2d_bil_first_pass_kernel_size);
|
||||
//VP8_CL_CALC_LOCAL_SIZE(vp8_filter_block2d_bil_second_pass_kernel,vp8_filter_block2d_bil_second_pass_kernel_size);
|
||||
VP8_CL_CREATE_KERNEL(cl_data,filter_program,vp8_filter_block2d_bil_first_pass_kernel,"vp8_filter_block2d_bil_first_pass_kernel");
|
||||
VP8_CL_CREATE_KERNEL(cl_data,filter_program,vp8_filter_block2d_bil_second_pass_kernel,"vp8_filter_block2d_bil_second_pass_kernel");
|
||||
|
||||
|
||||
//VP8_CL_CREATE_KERNEL(cl_data,filter_program,vp8_bilinear_predict4x4_kernel,"vp8_bilinear_predict4x4_kernel");
|
||||
//VP8_CL_CREATE_KERNEL(cl_data,filter_program,vp8_bilinear_predict8x4_kernel,"vp8_bilinear_predict8x4_kernel");
|
||||
//VP8_CL_CREATE_KERNEL(cl_data,filter_program,vp8_bilinear_predict8x8_kernel,"vp8_bilinear_predict8x8_kernel");
|
||||
//VP8_CL_CREATE_KERNEL(cl_data,filter_program,vp8_bilinear_predict16x16_kernel,"vp8_bilinear_predict16x16_kernel");
|
||||
|
||||
#if MEM_COPY_KERNEL
|
||||
VP8_CL_CREATE_KERNEL(cl_data,filter_program,vp8_memcpy_kernel,"vp8_memcpy_kernel");
|
||||
VP8_CL_CALC_LOCAL_SIZE(vp8_memcpy_kernel,vp8_memcpy_kernel_size);
|
||||
#endif
|
||||
|
||||
#if STATIC_MEM
|
||||
VP8_CL_CREATE_BUF(NULL, int_mem, NULL, sizeof(cl_int)*21*16, NULL, ,err);
|
||||
#endif
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
void vp8_filter_block2d_first_pass_cl(
|
||||
cl_command_queue cq,
|
||||
cl_mem src_mem,
|
||||
int src_offset,
|
||||
cl_mem int_mem,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int int_height,
|
||||
unsigned int int_width,
|
||||
int xoffset
|
||||
){
|
||||
int err;
|
||||
size_t global = int_width*int_height;
|
||||
size_t local = cl_data.vp8_filter_block2d_first_pass_kernel_size;
|
||||
if (local > global)
|
||||
local = global;
|
||||
|
||||
err = clSetKernelArg(cl_data.vp8_filter_block2d_first_pass_kernel, 0, sizeof (cl_mem), &src_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_first_pass_kernel, 1, sizeof (int), &src_offset);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_first_pass_kernel, 2, sizeof (cl_mem), &int_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_first_pass_kernel, 3, sizeof (cl_uint), &src_pixels_per_line);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_first_pass_kernel, 4, sizeof (cl_uint), &int_height);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_first_pass_kernel, 5, sizeof (cl_int), &int_width);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_first_pass_kernel, 6, sizeof (int), &xoffset);
|
||||
VP8_CL_CHECK_SUCCESS( cq, err != CL_SUCCESS,
|
||||
"Error: Failed to set kernel arguments!\n",
|
||||
,
|
||||
);
|
||||
|
||||
/* Execute the kernel */
|
||||
err = clEnqueueNDRangeKernel( cq, cl_data.vp8_filter_block2d_first_pass_kernel, 1, NULL, &global, &local , 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( cq, err != CL_SUCCESS,
|
||||
"Error: Failed to execute kernel!\n",
|
||||
printf("err = %d\n",err);,
|
||||
);
|
||||
}
|
||||
|
||||
void vp8_filter_block2d_second_pass_cl(
|
||||
cl_command_queue cq,
|
||||
cl_mem int_mem,
|
||||
int int_offset,
|
||||
cl_mem dst_mem,
|
||||
int dst_offset,
|
||||
int dst_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
int yoffset
|
||||
){
|
||||
int err;
|
||||
size_t global = output_width*output_height;
|
||||
size_t local = cl_data.vp8_filter_block2d_second_pass_kernel_size;
|
||||
if (local > global){
|
||||
//printf("Local is now %ld\n",global);
|
||||
local = global;
|
||||
}
|
||||
|
||||
/* Set kernel arguments */
|
||||
err = clSetKernelArg(cl_data.vp8_filter_block2d_second_pass_kernel, 0, sizeof (cl_mem), &int_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_second_pass_kernel, 1, sizeof (int), &int_offset);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_second_pass_kernel, 2, sizeof (cl_mem), &dst_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_second_pass_kernel, 3, sizeof (int), &dst_offset);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_second_pass_kernel, 4, sizeof (int), &dst_pitch);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_second_pass_kernel, 5, sizeof (int), &output_width);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_second_pass_kernel, 6, sizeof (int), &output_width);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_second_pass_kernel, 7, sizeof (int), &output_height);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_second_pass_kernel, 8, sizeof (int), &output_width);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_second_pass_kernel, 9, sizeof (int), &yoffset);
|
||||
VP8_CL_CHECK_SUCCESS( cq, err != CL_SUCCESS,
|
||||
"Error: Failed to set kernel arguments!\n",
|
||||
,
|
||||
);
|
||||
|
||||
/* Execute the kernel */
|
||||
err = clEnqueueNDRangeKernel( cq, cl_data.vp8_filter_block2d_second_pass_kernel, 1, NULL, &global, &local , 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( cq, err != CL_SUCCESS,
|
||||
"Error: Failed to execute kernel!\n",
|
||||
printf("err = %d\n",err);,
|
||||
);
|
||||
}
|
||||
|
||||
void vp8_sixtap_single_pass(
|
||||
cl_command_queue cq,
|
||||
cl_kernel kernel,
|
||||
size_t local,
|
||||
size_t global,
|
||||
cl_mem src_mem,
|
||||
cl_mem dst_mem,
|
||||
unsigned char *src_base,
|
||||
int src_offset,
|
||||
size_t src_len,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_base,
|
||||
int dst_offset,
|
||||
int dst_pitch,
|
||||
size_t dst_len
|
||||
){
|
||||
int err;
|
||||
|
||||
#if !STATIC_MEM
|
||||
cl_mem int_mem;
|
||||
#endif
|
||||
|
||||
int free_src = 0, free_dst = 0;
|
||||
|
||||
if (local > global){
|
||||
local = global;
|
||||
}
|
||||
|
||||
/* Make space for kernel input/output data.
|
||||
* Initialize the buffer as well if needed.
|
||||
*/
|
||||
if (src_mem == NULL){
|
||||
VP8_CL_CREATE_BUF( cq, src_mem,, sizeof (unsigned char) * src_len, src_base-2,,);
|
||||
src_offset = 2;
|
||||
free_src = 1;
|
||||
} else {
|
||||
src_offset -= 2*src_pixels_per_line;
|
||||
}
|
||||
|
||||
if (dst_mem == NULL){
|
||||
VP8_CL_CREATE_BUF( cq, dst_mem,, sizeof (unsigned char) * dst_len + dst_offset, dst_base,, );
|
||||
free_dst = 1;
|
||||
}
|
||||
|
||||
#if !STATIC_MEM
|
||||
CL_CREATE_BUF( cq, int_mem,, sizeof(cl_int)*FData_height*FData_width, NULL,, );
|
||||
#endif
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof (cl_mem), &src_mem);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof (int), &src_offset);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof (cl_int), &src_pixels_per_line);
|
||||
err |= clSetKernelArg(kernel, 3, sizeof (cl_int), &xoffset);
|
||||
err |= clSetKernelArg(kernel, 4, sizeof (cl_int), &yoffset);
|
||||
err |= clSetKernelArg(kernel, 5, sizeof (cl_mem), &dst_mem);
|
||||
err |= clSetKernelArg(kernel, 6, sizeof (cl_int), &dst_offset);
|
||||
err |= clSetKernelArg(kernel, 7, sizeof (int), &dst_pitch);
|
||||
VP8_CL_CHECK_SUCCESS( cq, err != CL_SUCCESS,
|
||||
"Error: Failed to set kernel arguments!\n",
|
||||
,
|
||||
);
|
||||
|
||||
/* Execute the kernel */
|
||||
err = clEnqueueNDRangeKernel( cq, kernel, 1, NULL, &global, &local , 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( cq, err != CL_SUCCESS,
|
||||
"Error: Failed to execute kernel!\n",
|
||||
printf("err = %d\n",err);,
|
||||
);
|
||||
|
||||
if (free_src == 1)
|
||||
clReleaseMemObject(src_mem);
|
||||
|
||||
if (free_dst == 1){
|
||||
/* Read back the result data from the device */
|
||||
err = clEnqueueReadBuffer(cq, dst_mem, CL_FALSE, 0, sizeof (unsigned char) * dst_len + dst_offset, dst_base, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( cq, err != CL_SUCCESS,
|
||||
"Error: Failed to read output array!\n",
|
||||
,
|
||||
);
|
||||
clReleaseMemObject(dst_mem);
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_sixtap_run_cl(
|
||||
cl_command_queue cq,
|
||||
cl_mem src_mem,
|
||||
cl_mem dst_mem,
|
||||
unsigned char *src_base,
|
||||
int src_offset,
|
||||
size_t src_len,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_base,
|
||||
int dst_offset,
|
||||
int dst_pitch,
|
||||
size_t dst_len,
|
||||
unsigned int FData_height,
|
||||
unsigned int FData_width,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
int int_offset
|
||||
)
|
||||
{
|
||||
int err;
|
||||
|
||||
#if !STATIC_MEM
|
||||
cl_mem int_mem;
|
||||
#endif
|
||||
|
||||
int free_src = 0, free_dst = 0;
|
||||
|
||||
/* Make space for kernel input/output data.
|
||||
* Initialize the buffer as well if needed.
|
||||
*/
|
||||
if (src_mem == NULL){
|
||||
VP8_CL_CREATE_BUF( cq, src_mem,, sizeof (unsigned char) * src_len, src_base-2,,);
|
||||
src_offset = 2;
|
||||
free_src = 1;
|
||||
} else {
|
||||
src_offset -= 2*src_pixels_per_line;
|
||||
}
|
||||
|
||||
if (dst_mem == NULL){
|
||||
VP8_CL_CREATE_BUF( cq, dst_mem,, sizeof (unsigned char) * dst_len + dst_offset, dst_base,, );
|
||||
free_dst = 1;
|
||||
}
|
||||
|
||||
#if !STATIC_MEM
|
||||
CL_CREATE_BUF( cq, int_mem,, sizeof(cl_int)*FData_height*FData_width, NULL,, );
|
||||
#endif
|
||||
|
||||
vp8_filter_block2d_first_pass_cl(
|
||||
cq, src_mem, src_offset, int_mem, src_pixels_per_line,
|
||||
FData_height, FData_width, xoffset
|
||||
);
|
||||
|
||||
vp8_filter_block2d_second_pass_cl(cq,int_mem,int_offset,dst_mem,dst_offset,dst_pitch,
|
||||
output_height,output_width,yoffset);
|
||||
|
||||
if (free_src == 1)
|
||||
clReleaseMemObject(src_mem);
|
||||
|
||||
if (free_dst == 1){
|
||||
/* Read back the result data from the device */
|
||||
err = clEnqueueReadBuffer(cq, dst_mem, CL_FALSE, 0, sizeof (unsigned char) * dst_len + dst_offset, dst_base, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( cq, err != CL_SUCCESS,
|
||||
"Error: Failed to read output array!\n",
|
||||
,
|
||||
);
|
||||
clReleaseMemObject(dst_mem);
|
||||
}
|
||||
|
||||
#if !STATIC_MEM
|
||||
clReleaseMemObject(int_mem);
|
||||
#endif
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict4x4_cl
|
||||
(
|
||||
cl_command_queue cq,
|
||||
unsigned char *src_base,
|
||||
cl_mem src_mem,
|
||||
int src_offset,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_base,
|
||||
cl_mem dst_mem,
|
||||
int dst_offset,
|
||||
int dst_pitch
|
||||
) {
|
||||
|
||||
int output_width=4, output_height=4, FData_height=9, FData_width=4;
|
||||
|
||||
//Size of output to transfer
|
||||
int dst_len = DST_LEN(dst_pitch,output_height,output_width);
|
||||
int src_len = SIXTAP_SRC_LEN(FData_width,FData_height,src_pixels_per_line);
|
||||
|
||||
#if TWO_PASS_SIXTAP
|
||||
int int_offset = 8;
|
||||
unsigned char *src_ptr = src_base + src_offset;
|
||||
|
||||
vp8_sixtap_run_cl(cq, src_mem, dst_mem,
|
||||
(src_ptr-2*src_pixels_per_line),src_offset, src_len,
|
||||
src_pixels_per_line, xoffset,yoffset,dst_base,dst_offset,
|
||||
dst_pitch,dst_len,FData_height,FData_width,output_height,
|
||||
output_width,int_offset
|
||||
);
|
||||
#else
|
||||
vp8_sixtap_single_pass(
|
||||
cq,
|
||||
cl_data.vp8_sixtap_predict_kernel,
|
||||
cl_data.vp8_sixtap_predict_kernel_size,
|
||||
FData_height*FData_width,
|
||||
src_mem,
|
||||
dst_mem,
|
||||
src_base,
|
||||
src_offset,
|
||||
src_len,
|
||||
src_pixels_per_line,
|
||||
xoffset,
|
||||
yoffset,
|
||||
dst_base,
|
||||
dst_offset,
|
||||
dst_pitch,
|
||||
dst_len
|
||||
);
|
||||
#endif
|
||||
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict8x8_cl
|
||||
(
|
||||
cl_command_queue cq,
|
||||
unsigned char *src_base,
|
||||
cl_mem src_mem,
|
||||
int src_offset,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_base,
|
||||
cl_mem dst_mem,
|
||||
int dst_offset,
|
||||
int dst_pitch
|
||||
) {
|
||||
int output_width=8, output_height=8, FData_height=13, FData_width=8;
|
||||
|
||||
//Size of output to transfer
|
||||
int dst_len = DST_LEN(dst_pitch,output_height,output_width);
|
||||
int src_len = SIXTAP_SRC_LEN(FData_width,FData_height,src_pixels_per_line);
|
||||
|
||||
#if TWO_PASS_SIXTAP
|
||||
int int_offset = 16;
|
||||
unsigned char *src_ptr = src_base + src_offset;
|
||||
|
||||
vp8_sixtap_run_cl(cq, src_mem, dst_mem,
|
||||
(src_ptr-2*src_pixels_per_line),src_offset, src_len,
|
||||
src_pixels_per_line, xoffset,yoffset,dst_base,dst_offset,
|
||||
dst_pitch,dst_len,FData_height,FData_width,output_height,
|
||||
output_width,int_offset
|
||||
);
|
||||
#else
|
||||
vp8_sixtap_single_pass(
|
||||
cq,
|
||||
cl_data.vp8_sixtap_predict8x8_kernel,
|
||||
cl_data.vp8_sixtap_predict8x8_kernel_size,
|
||||
FData_height*FData_width,
|
||||
src_mem,
|
||||
dst_mem,
|
||||
src_base,
|
||||
src_offset,
|
||||
src_len,
|
||||
src_pixels_per_line,
|
||||
xoffset,
|
||||
yoffset,
|
||||
dst_base,
|
||||
dst_offset,
|
||||
dst_pitch,
|
||||
dst_len
|
||||
);
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict8x4_cl
|
||||
(
|
||||
cl_command_queue cq,
|
||||
unsigned char *src_base,
|
||||
cl_mem src_mem,
|
||||
int src_offset,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_base,
|
||||
cl_mem dst_mem,
|
||||
int dst_offset,
|
||||
int dst_pitch
|
||||
) {
|
||||
|
||||
int output_width=8, output_height=4, FData_height=9, FData_width=8;
|
||||
|
||||
//Size of output to transfer
|
||||
int dst_len = DST_LEN(dst_pitch,output_height,output_width);
|
||||
int src_len = SIXTAP_SRC_LEN(FData_width,FData_height,src_pixels_per_line);
|
||||
|
||||
#if TWO_PASS_SIXTAP
|
||||
int int_offset = 16;
|
||||
unsigned char *src_ptr = src_base + src_offset;
|
||||
|
||||
vp8_sixtap_run_cl(cq, src_mem, dst_mem,
|
||||
(src_ptr-2*src_pixels_per_line),src_offset, src_len,
|
||||
src_pixels_per_line, xoffset,yoffset,dst_base,dst_offset,
|
||||
dst_pitch,dst_len,FData_height,FData_width,output_height,
|
||||
output_width,int_offset
|
||||
);
|
||||
#else
|
||||
vp8_sixtap_single_pass(
|
||||
cq,
|
||||
cl_data.vp8_sixtap_predict8x4_kernel,
|
||||
cl_data.vp8_sixtap_predict8x4_kernel_size,
|
||||
FData_height*FData_width,
|
||||
src_mem,
|
||||
dst_mem,
|
||||
src_base,
|
||||
src_offset,
|
||||
src_len,
|
||||
src_pixels_per_line,
|
||||
xoffset,
|
||||
yoffset,
|
||||
dst_base,
|
||||
dst_offset,
|
||||
dst_pitch,
|
||||
dst_len
|
||||
);
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict16x16_cl
|
||||
(
|
||||
cl_command_queue cq,
|
||||
unsigned char *src_base,
|
||||
cl_mem src_mem,
|
||||
int src_offset,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_base,
|
||||
cl_mem dst_mem,
|
||||
int dst_offset,
|
||||
int dst_pitch
|
||||
) {
|
||||
|
||||
int output_width=16, output_height=16, FData_height=21, FData_width=16;
|
||||
|
||||
//Size of output to transfer
|
||||
int dst_len = DST_LEN(dst_pitch,output_height,output_width);
|
||||
int src_len = SIXTAP_SRC_LEN(FData_width,FData_height,src_pixels_per_line);
|
||||
|
||||
#if TWO_PASS_SIXTAP
|
||||
int int_offset = 32;
|
||||
unsigned char *src_ptr = src_base + src_offset;
|
||||
|
||||
vp8_sixtap_run_cl(cq, src_mem, dst_mem,
|
||||
(src_ptr-2*src_pixels_per_line),src_offset, src_len,
|
||||
src_pixels_per_line, xoffset,yoffset,dst_base,dst_offset,
|
||||
dst_pitch,dst_len,FData_height,FData_width,output_height,
|
||||
output_width,int_offset
|
||||
);
|
||||
#else
|
||||
vp8_sixtap_single_pass(
|
||||
cq,
|
||||
cl_data.vp8_sixtap_predict16x16_kernel,
|
||||
cl_data.vp8_sixtap_predict16x16_kernel_size,
|
||||
FData_height*FData_width,
|
||||
src_mem,
|
||||
dst_mem,
|
||||
src_base,
|
||||
src_offset,
|
||||
src_len,
|
||||
src_pixels_per_line,
|
||||
xoffset,
|
||||
yoffset,
|
||||
dst_base,
|
||||
dst_offset,
|
||||
dst_pitch,
|
||||
dst_len
|
||||
);
|
||||
#endif
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void vp8_filter_block2d_bil_first_pass_cl(
|
||||
cl_command_queue cq,
|
||||
unsigned char *src_base,
|
||||
cl_mem src_mem,
|
||||
int src_offset,
|
||||
cl_mem int_mem,
|
||||
int src_pixels_per_line,
|
||||
int height,
|
||||
int width,
|
||||
int xoffset
|
||||
)
|
||||
{
|
||||
int err;
|
||||
size_t global = width*height;
|
||||
int free_src = 0;
|
||||
|
||||
if (src_mem == NULL){
|
||||
int src_len = BIL_SRC_LEN(width,height,src_pixels_per_line);
|
||||
|
||||
/*Make space for kernel input/output data. Initialize the buffer as well if needed. */
|
||||
VP8_CL_CREATE_BUF(cq, src_mem, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
|
||||
sizeof (unsigned char) * src_len, src_base+src_offset,,
|
||||
);
|
||||
src_offset = 0; //Set to zero as long as src_mem starts at base+offset
|
||||
free_src = 1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(cl_data.vp8_filter_block2d_bil_first_pass_kernel, 0, sizeof (cl_mem), &src_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_bil_first_pass_kernel, 1, sizeof (int), &src_offset);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_bil_first_pass_kernel, 2, sizeof (cl_mem), &int_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_bil_first_pass_kernel, 3, sizeof (int), &src_pixels_per_line);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_bil_first_pass_kernel, 4, sizeof (int), &height);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_bil_first_pass_kernel, 5, sizeof (int), &width);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_bil_first_pass_kernel, 6, sizeof (int), &xoffset);
|
||||
VP8_CL_CHECK_SUCCESS( cq, err != CL_SUCCESS,
|
||||
"Error: Failed to set kernel arguments!\n",
|
||||
,
|
||||
);
|
||||
|
||||
/* Execute the kernel */
|
||||
err = clEnqueueNDRangeKernel( cq, cl_data.vp8_filter_block2d_bil_first_pass_kernel, 1, NULL, &global, NULL , 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( cq, err != CL_SUCCESS,
|
||||
"Error: Failed to execute kernel!\n",
|
||||
printf("err = %d\n",err);,
|
||||
);
|
||||
|
||||
if (free_src == 1)
|
||||
clReleaseMemObject(src_mem);
|
||||
}
|
||||
|
||||
|
||||
void vp8_filter_block2d_bil_second_pass_cl(
|
||||
cl_command_queue cq,
|
||||
cl_mem int_mem,
|
||||
unsigned char *dst_base,
|
||||
cl_mem dst_mem,
|
||||
int dst_offset,
|
||||
int dst_pitch,
|
||||
int height,
|
||||
int width,
|
||||
int yoffset
|
||||
)
|
||||
{
|
||||
int err;
|
||||
size_t global = width*height;
|
||||
|
||||
//Size of output data
|
||||
int dst_len = DST_LEN(dst_pitch,height,width);
|
||||
|
||||
int free_dst = 0;
|
||||
if (dst_mem == NULL){
|
||||
VP8_CL_CREATE_BUF(cq, dst_mem, CL_MEM_WRITE_ONLY|CL_MEM_COPY_HOST_PTR,
|
||||
sizeof (unsigned char) * dst_len + dst_offset, dst_base,,
|
||||
);
|
||||
free_dst = 1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(cl_data.vp8_filter_block2d_bil_second_pass_kernel, 0, sizeof (cl_mem), &int_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_bil_second_pass_kernel, 1, sizeof (cl_mem), &dst_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_bil_second_pass_kernel, 2, sizeof (int), &dst_offset);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_bil_second_pass_kernel, 3, sizeof (int), &dst_pitch);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_bil_second_pass_kernel, 4, sizeof (int), &height);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_bil_second_pass_kernel, 5, sizeof (int), &width);
|
||||
err |= clSetKernelArg(cl_data.vp8_filter_block2d_bil_second_pass_kernel, 6, sizeof (int), &yoffset);
|
||||
VP8_CL_CHECK_SUCCESS( cq, err != CL_SUCCESS,
|
||||
"Error: Failed to set kernel arguments!\n",
|
||||
,
|
||||
);
|
||||
|
||||
/* Execute the kernel */
|
||||
err = clEnqueueNDRangeKernel( cq, cl_data.vp8_filter_block2d_bil_second_pass_kernel, 1, NULL, &global, NULL , 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( cq, err != CL_SUCCESS,
|
||||
"Error: Failed to execute kernel!\n",
|
||||
printf("err = %d\n",err);,
|
||||
);
|
||||
|
||||
if (free_dst == 1){
|
||||
/* Read back the result data from the device */
|
||||
err = clEnqueueReadBuffer(cq, dst_mem, CL_FALSE, 0, sizeof (unsigned char) * dst_len + dst_offset, dst_base, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( cq, err != CL_SUCCESS,
|
||||
"Error: Failed to read output array!\n",
|
||||
,
|
||||
);
|
||||
clReleaseMemObject(dst_mem);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict4x4_cl
|
||||
(
|
||||
cl_command_queue cq,
|
||||
unsigned char *src_base,
|
||||
cl_mem src_mem,
|
||||
int src_offset,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_base,
|
||||
cl_mem dst_mem,
|
||||
int dst_offset,
|
||||
int dst_pitch
|
||||
) {
|
||||
|
||||
const int height = 4, width = 4;
|
||||
|
||||
#if !STATIC_MEM
|
||||
int err;
|
||||
cl_mem int_mem = NULL;
|
||||
VP8_CL_CREATE_BUF(NULL, int_mem, NULL, sizeof(cl_int)*21*16, NULL, ,);
|
||||
#endif
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass_cl(cq, src_base, src_mem, src_offset, int_mem, src_pixels_per_line, height + 1, width, xoffset);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_cl(cq, int_mem, dst_base, dst_mem, dst_offset, dst_pitch, height, width, yoffset);
|
||||
|
||||
#if !STATIC_MEM
|
||||
clReleaseMemObject(int_mem);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_cl
|
||||
(
|
||||
cl_command_queue cq,
|
||||
unsigned char *src_base,
|
||||
cl_mem src_mem,
|
||||
int src_offset,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_base,
|
||||
cl_mem dst_mem,
|
||||
int dst_offset,
|
||||
int dst_pitch
|
||||
) {
|
||||
|
||||
const int height = 8, width = 8;
|
||||
|
||||
#if !STATIC_MEM
|
||||
int err;
|
||||
cl_mem int_mem = NULL;
|
||||
VP8_CL_CREATE_BUF(NULL, int_mem, NULL, sizeof(cl_int)*21*16, NULL, ,);
|
||||
#endif
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass_cl(cq, src_base, src_mem, src_offset, int_mem, src_pixels_per_line, height + 1, width, xoffset);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_cl(cq, int_mem, dst_base, dst_mem, dst_offset, dst_pitch, height, width, yoffset);
|
||||
|
||||
#if !STATIC_MEM
|
||||
clReleaseMemObject(int_mem);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x4_cl
|
||||
(
|
||||
cl_command_queue cq,
|
||||
unsigned char *src_base,
|
||||
cl_mem src_mem,
|
||||
int src_offset,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_base,
|
||||
cl_mem dst_mem,
|
||||
int dst_offset,
|
||||
int dst_pitch
|
||||
) {
|
||||
|
||||
const int height = 4, width = 8;
|
||||
|
||||
#if !STATIC_MEM
|
||||
int err;
|
||||
cl_mem int_mem = NULL;
|
||||
VP8_CL_CREATE_BUF(NULL, int_mem, NULL, sizeof(cl_int)*21*16, NULL, ,);
|
||||
#endif
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass_cl(cq, src_base, src_mem, src_offset, int_mem, src_pixels_per_line, height + 1, width, xoffset);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_cl(cq, int_mem, dst_base, dst_mem, dst_offset, dst_pitch, height, width, yoffset);
|
||||
|
||||
#if !STATIC_MEM
|
||||
clReleaseMemObject(int_mem);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_cl
|
||||
(
|
||||
cl_command_queue cq,
|
||||
unsigned char *src_base,
|
||||
cl_mem src_mem,
|
||||
int src_offset,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_base,
|
||||
cl_mem dst_mem,
|
||||
int dst_offset,
|
||||
int dst_pitch
|
||||
) {
|
||||
|
||||
const int height = 16, width = 16;
|
||||
|
||||
#if !STATIC_MEM
|
||||
int err;
|
||||
cl_mem int_mem = NULL;
|
||||
VP8_CL_CREATE_BUF(NULL, int_mem, NULL, sizeof(cl_int)*21*16, NULL, ,);
|
||||
#endif
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass_cl(cq, src_base, src_mem, src_offset, int_mem, src_pixels_per_line, height + 1, width, xoffset);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_cl(cq, int_mem, dst_base, dst_mem, dst_offset, dst_pitch, height, width, yoffset);
|
||||
|
||||
#if !STATIC_MEM
|
||||
clReleaseMemObject(int_mem);
|
||||
#endif
|
||||
|
||||
}
|
562
vp8/common/opencl/filter_cl.cl
Normal file
562
vp8/common/opencl/filter_cl.cl
Normal file
@@ -0,0 +1,562 @@
|
||||
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
|
||||
__constant int bilinear_filters[8][2] = {
|
||||
{ 128, 0},
|
||||
{ 112, 16},
|
||||
{ 96, 32},
|
||||
{ 80, 48},
|
||||
{ 64, 64},
|
||||
{ 48, 80},
|
||||
{ 32, 96},
|
||||
{ 16, 112}
|
||||
};
|
||||
|
||||
__constant short sub_pel_filters[8][8] = {
|
||||
//These were originally 8x6, but are padded for vector ops
|
||||
{ 0, 0, 128, 0, 0, 0, 0, 0}, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
|
||||
{ 0, -6, 123, 12, -1, 0, 0, 0},
|
||||
{ 2, -11, 108, 36, -8, 1, 0, 0}, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -9, 93, 50, -6, 0, 0, 0},
|
||||
{ 3, -16, 77, 77, -16, 3, 0, 0}, /* New 1/2 pel 6 tap filter */
|
||||
{ 0, -6, 50, 93, -9, 0, 0, 0},
|
||||
{ 1, -8, 36, 108, -11, 2, 0, 0}, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -1, 12, 123, -6, 0, 0, 0},
|
||||
};
|
||||
|
||||
|
||||
kernel void vp8_filter_block2d_first_pass_kernel(
|
||||
__global unsigned char *src_base,
|
||||
int src_offset,
|
||||
__global int *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
int filter_offset
|
||||
){
|
||||
uint tid = get_global_id(0);
|
||||
|
||||
global unsigned char *src_ptr = &src_base[src_offset];
|
||||
//Note that src_offset will be reset later, which is why we use it now
|
||||
|
||||
int Temp;
|
||||
|
||||
__constant short *vp8_filter = sub_pel_filters[filter_offset];
|
||||
|
||||
if (tid < (output_width*output_height)){
|
||||
src_offset = tid + (tid/output_width * (src_pixels_per_line - output_width));
|
||||
|
||||
Temp = (int)(src_ptr[src_offset - 2] * vp8_filter[0]) +
|
||||
(int)(src_ptr[src_offset - 1] * vp8_filter[1]) +
|
||||
(int)(src_ptr[src_offset] * vp8_filter[2]) +
|
||||
(int)(src_ptr[src_offset + 1] * vp8_filter[3]) +
|
||||
(int)(src_ptr[src_offset + 2] * vp8_filter[4]) +
|
||||
(int)(src_ptr[src_offset + 3] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if ( Temp > 255 )
|
||||
Temp = 255;
|
||||
|
||||
output_ptr[tid] = Temp;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
kernel void vp8_filter_block2d_second_pass_kernel
|
||||
(
|
||||
__global int *src_base,
|
||||
int src_offset,
|
||||
__global unsigned char *output_base,
|
||||
int output_offset,
|
||||
int output_pitch,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
int filter_offset
|
||||
) {
|
||||
|
||||
uint i = get_global_id(0);
|
||||
|
||||
global int *src_ptr = &src_base[src_offset];
|
||||
global unsigned char *output_ptr = &output_base[output_offset];
|
||||
|
||||
int out_offset; //Not same as output_offset...
|
||||
int Temp;
|
||||
int PS2 = 2*(int)pixel_step;
|
||||
int PS3 = 3*(int)pixel_step;
|
||||
|
||||
unsigned int src_increment = src_pixels_per_line - output_width;
|
||||
|
||||
__constant short *vp8_filter = sub_pel_filters[filter_offset];
|
||||
|
||||
if (i < (output_width * output_height)){
|
||||
out_offset = i/output_width;
|
||||
src_offset = out_offset;
|
||||
|
||||
src_offset = i + (src_offset * src_increment);
|
||||
out_offset = i%output_width + (out_offset * output_pitch);
|
||||
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[src_offset - PS2] * vp8_filter[0]) +
|
||||
((int)src_ptr[src_offset -(int)pixel_step] * vp8_filter[1]) +
|
||||
((int)src_ptr[src_offset] * vp8_filter[2]) +
|
||||
((int)src_ptr[src_offset + pixel_step] * vp8_filter[3]) +
|
||||
((int)src_ptr[src_offset + PS2] * vp8_filter[4]) +
|
||||
((int)src_ptr[src_offset + PS3] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if (Temp > 255)
|
||||
Temp = 255;
|
||||
|
||||
output_ptr[out_offset] = (unsigned char)Temp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
kernel void vp8_filter_block2d_bil_first_pass_kernel(
|
||||
__global unsigned char *src_base,
|
||||
int src_offset,
|
||||
__global int *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
int filter_offset
|
||||
)
|
||||
{
|
||||
uint tid = get_global_id(0);
|
||||
|
||||
if (tid < output_width * output_height){
|
||||
global unsigned char *src_ptr = &src_base[src_offset];
|
||||
|
||||
unsigned int i, j;
|
||||
__constant int *vp8_filter = bilinear_filters[filter_offset];
|
||||
|
||||
unsigned int out_row,out_offset;
|
||||
int src_increment = src_pixels_per_line - output_width;
|
||||
|
||||
i = tid / output_width;
|
||||
j = tid % output_width;
|
||||
|
||||
src_offset = i*(output_width+src_increment) + j;
|
||||
out_row = output_width * i;
|
||||
|
||||
out_offset = out_row + j;
|
||||
|
||||
/* Apply bilinear filter */
|
||||
output_ptr[out_offset] = (((int)src_ptr[src_offset] * vp8_filter[0]) +
|
||||
((int)src_ptr[src_offset+1] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
|
||||
}
|
||||
}
|
||||
|
||||
kernel void vp8_filter_block2d_bil_second_pass_kernel
|
||||
(
|
||||
__global int *src_ptr,
|
||||
__global unsigned char *output_base,
|
||||
int output_offset,
|
||||
int output_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
int filter_offset
|
||||
)
|
||||
{
|
||||
|
||||
uint tid = get_global_id(0);
|
||||
|
||||
if (tid < output_width * output_height){
|
||||
global unsigned char *output_ptr = &output_base[output_offset];
|
||||
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
__constant int *vp8_filter = bilinear_filters[filter_offset];
|
||||
|
||||
int out_offset;
|
||||
int src_offset;
|
||||
|
||||
i = tid / output_width;
|
||||
j = tid % output_width;
|
||||
|
||||
src_offset = i*(output_width) + j;
|
||||
out_offset = i*output_pitch + j;
|
||||
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[src_offset] * vp8_filter[0]) +
|
||||
((int)src_ptr[src_offset+output_width] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2);
|
||||
|
||||
output_ptr[out_offset++] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
//Called from reconinter_cl.c
|
||||
kernel void vp8_memcpy_kernel(
|
||||
global unsigned char *src_base,
|
||||
int src_offset,
|
||||
int src_stride,
|
||||
global unsigned char *dst_base,
|
||||
int dst_offset,
|
||||
int dst_stride,
|
||||
int num_bytes,
|
||||
int num_iter
|
||||
){
|
||||
|
||||
int i,r;
|
||||
global unsigned char *src = &src_base[src_offset];
|
||||
global unsigned char *dst = &dst_base[dst_offset];
|
||||
src_offset = dst_offset = 0;
|
||||
|
||||
r = get_global_id(1);
|
||||
if (r < get_global_size(1)){
|
||||
i = get_global_id(0);
|
||||
if (i < get_global_size(0)){
|
||||
src_offset = r*src_stride + i;
|
||||
dst_offset = r*dst_stride + i;
|
||||
dst[dst_offset] = src[src_offset];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Not used currently.
|
||||
void vp8_memset_short(
|
||||
global short *mem,
|
||||
int offset,
|
||||
short newval,
|
||||
unsigned int size
|
||||
)
|
||||
{
|
||||
int tid = get_global_id(0);
|
||||
|
||||
if (tid < (size/2)){
|
||||
mem[offset+tid/2] = newval;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__kernel void vp8_bilinear_predict4x4_kernel
|
||||
(
|
||||
__global unsigned char *src_base,
|
||||
int src_offset,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
__global unsigned char *dst_base,
|
||||
int dst_offset,
|
||||
int dst_pitch,
|
||||
__global int *int_mem
|
||||
)
|
||||
{
|
||||
int Height = 4, Width = 4;
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass_kernel(src_base, src_offset, int_mem, src_pixels_per_line, Height + 1, Width, xoffset);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_kernel(int_mem, dst_base, dst_offset, dst_pitch, Height, Width, yoffset);
|
||||
}
|
||||
|
||||
__kernel void vp8_bilinear_predict8x8_kernel
|
||||
(
|
||||
__global unsigned char *src_base,
|
||||
int src_offset,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
__global unsigned char *dst_base,
|
||||
int dst_offset,
|
||||
int dst_pitch,
|
||||
__global int *int_mem
|
||||
)
|
||||
{
|
||||
int Height = 8, Width = 8;
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass_kernel(src_base, src_offset, int_mem, src_pixels_per_line, Height + 1, Width, xoffset);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_kernel(int_mem, dst_base, dst_offset, dst_pitch, Height, Width, yoffset);
|
||||
|
||||
}
|
||||
|
||||
__kernel void vp8_bilinear_predict8x4_kernel
|
||||
(
|
||||
__global unsigned char *src_base,
|
||||
int src_offset,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
__global unsigned char *dst_base,
|
||||
int dst_offset,
|
||||
int dst_pitch,
|
||||
__global int *int_mem
|
||||
)
|
||||
{
|
||||
int Height = 4, Width = 8;
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass_kernel(src_base, src_offset, int_mem, src_pixels_per_line, Height + 1, Width, xoffset);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_kernel(int_mem, dst_base, dst_offset, dst_pitch, Height, Width, yoffset);
|
||||
}
|
||||
|
||||
__kernel void vp8_bilinear_predict16x16_kernel
|
||||
(
|
||||
__global unsigned char *src_base,
|
||||
int src_offset,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
__global unsigned char *dst_base,
|
||||
int dst_offset,
|
||||
int dst_pitch,
|
||||
__global int *int_mem
|
||||
)
|
||||
{
|
||||
|
||||
int Height = 16, Width = 16;
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass_kernel(src_base, src_offset, int_mem, src_pixels_per_line, Height + 1, Width, xoffset);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_kernel(int_mem, dst_base, dst_offset, dst_pitch, Height, Width, yoffset);
|
||||
|
||||
}
|
||||
|
||||
void vp8_filter_block2d_first_pass(
|
||||
global unsigned char *src_base,
|
||||
int src_offset,
|
||||
local int *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
int filter_offset
|
||||
){
|
||||
uint tid = get_global_id(0);
|
||||
uint i = tid;
|
||||
|
||||
int nthreads = get_global_size(0);
|
||||
int ngroups = nthreads / get_local_size(0);
|
||||
|
||||
global unsigned char *src_ptr = &src_base[src_offset];
|
||||
//Note that src_offset will be reset later, which is why we capture it now
|
||||
|
||||
int Temp;
|
||||
|
||||
__constant short *vp8_filter = sub_pel_filters[filter_offset];
|
||||
|
||||
if (tid < (output_width*output_height)){
|
||||
short filter0 = vp8_filter[0];
|
||||
short filter1 = vp8_filter[1];
|
||||
short filter2 = vp8_filter[2];
|
||||
short filter3 = vp8_filter[3];
|
||||
short filter4 = vp8_filter[4];
|
||||
short filter5 = vp8_filter[5];
|
||||
|
||||
if (ngroups > 1){
|
||||
//This is generally only true on Apple CPU-CL, which gives a group
|
||||
//size of 1, regardless of the CPU core count.
|
||||
for (i=0; i < output_width*output_height; i++){
|
||||
src_offset = i + (i/output_width * (src_pixels_per_line - output_width));
|
||||
|
||||
Temp = (int)(src_ptr[src_offset - 2] * filter0) +
|
||||
(int)(src_ptr[src_offset - 1] * filter1) +
|
||||
(int)(src_ptr[src_offset] * filter2) +
|
||||
(int)(src_ptr[src_offset + 1] * filter3) +
|
||||
(int)(src_ptr[src_offset + 2] * filter4) +
|
||||
(int)(src_ptr[src_offset + 3] * filter5) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp >>= VP8_FILTER_SHIFT;
|
||||
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if ( Temp > 255 )
|
||||
Temp = 255;
|
||||
|
||||
output_ptr[i] = Temp;
|
||||
}
|
||||
} else {
|
||||
src_offset = i + (i/output_width * (src_pixels_per_line - output_width));
|
||||
|
||||
Temp = (int)(src_ptr[src_offset - 2] * filter0) +
|
||||
(int)(src_ptr[src_offset - 1] * filter1) +
|
||||
(int)(src_ptr[src_offset] * filter2) +
|
||||
(int)(src_ptr[src_offset + 1] * filter3) +
|
||||
(int)(src_ptr[src_offset + 2] * filter4) +
|
||||
(int)(src_ptr[src_offset + 3] * filter5) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp >>= VP8_FILTER_SHIFT;
|
||||
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if ( Temp > 255 )
|
||||
Temp = 255;
|
||||
|
||||
output_ptr[i] = Temp;
|
||||
}
|
||||
}
|
||||
|
||||
//Add a fence so that no 2nd pass stuff starts before 1st pass writes are done.
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
void vp8_filter_block2d_second_pass
|
||||
(
|
||||
local int *src_ptr,
|
||||
global unsigned char *output_base,
|
||||
int output_offset,
|
||||
int output_pitch,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
int filter_offset
|
||||
) {
|
||||
|
||||
global unsigned char *output_ptr = &output_base[output_offset];
|
||||
|
||||
int out_offset; //Not same as output_offset...
|
||||
int src_offset;
|
||||
int Temp;
|
||||
int PS2 = 2*(int)pixel_step;
|
||||
int PS3 = 3*(int)pixel_step;
|
||||
|
||||
unsigned int src_increment = src_pixels_per_line - output_width;
|
||||
|
||||
uint i = get_global_id(0);
|
||||
|
||||
__constant short *vp8_filter = sub_pel_filters[filter_offset];
|
||||
|
||||
if (i < (output_width * output_height)){
|
||||
out_offset = i/output_width;
|
||||
src_offset = out_offset;
|
||||
|
||||
src_offset = i + (src_offset * src_increment);
|
||||
out_offset = i%output_width + (out_offset * output_pitch);
|
||||
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[src_offset - PS2] * vp8_filter[0]) +
|
||||
((int)src_ptr[src_offset -(int)pixel_step] * vp8_filter[1]) +
|
||||
((int)src_ptr[src_offset] * vp8_filter[2]) +
|
||||
((int)src_ptr[src_offset + pixel_step] * vp8_filter[3]) +
|
||||
((int)src_ptr[src_offset + PS2] * vp8_filter[4]) +
|
||||
((int)src_ptr[src_offset + PS3] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if (Temp > 255)
|
||||
Temp = 255;
|
||||
|
||||
output_ptr[out_offset] = (unsigned char)Temp;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void vp8_sixtap_predict_kernel
|
||||
(
|
||||
__global unsigned char *src_ptr,
|
||||
int src_offset,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
__global unsigned char *dst_ptr,
|
||||
int dst_offset,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
|
||||
local int FData[9*4];
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr, src_offset, FData, src_pixels_per_line, 1, 9, 4, xoffset);
|
||||
|
||||
/* then filter vertically... */
|
||||
vp8_filter_block2d_second_pass(&FData[8], dst_ptr, dst_offset, dst_pitch, 4, 4, 4, 4, yoffset);
|
||||
}
|
||||
|
||||
__kernel void vp8_sixtap_predict8x8_kernel
|
||||
(
|
||||
__global unsigned char *src_ptr,
|
||||
int src_offset,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
__global unsigned char *dst_ptr,
|
||||
int dst_offset,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
local int FData[13*16]; /* Temp data bufffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr, src_offset, FData, src_pixels_per_line, 1, 13, 8, xoffset);
|
||||
|
||||
/* then filter vertically... */
|
||||
vp8_filter_block2d_second_pass(&FData[16], dst_ptr, dst_offset, dst_pitch, 8, 8, 8, 8, yoffset);
|
||||
|
||||
}
|
||||
|
||||
__kernel void vp8_sixtap_predict8x4_kernel
|
||||
(
|
||||
__global unsigned char *src_ptr,
|
||||
int src_offset,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
__global unsigned char *dst_ptr,
|
||||
int dst_offset,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
local int FData[13*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr, src_offset, FData, src_pixels_per_line, 1, 9, 8, xoffset);
|
||||
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(&FData[16], dst_ptr, dst_offset, dst_pitch, 8, 8, 4, 8, yoffset);
|
||||
}
|
||||
|
||||
__kernel void vp8_sixtap_predict16x16_kernel
|
||||
(
|
||||
__global unsigned char *src_ptr,
|
||||
int src_offset,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
__global unsigned char *dst_ptr,
|
||||
int dst_offset,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
local int FData[21*24]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr, src_offset, FData, src_pixels_per_line, 1, 21, 16, xoffset);
|
||||
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(&FData[32], dst_ptr, dst_offset, dst_pitch, 16, 16, 16, 16, yoffset);
|
||||
|
||||
return;
|
||||
}
|
74
vp8/common/opencl/filter_cl.h
Normal file
74
vp8/common/opencl/filter_cl.h
Normal file
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef FILTER_CL_H_
|
||||
#define FILTER_CL_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "vp8_opencl.h"
|
||||
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
#define REGISTER_FILTER 1
|
||||
#define CLAMP(x,min,max) if (x < min) x = min; else if ( x > max ) x = max;
|
||||
#define PRE_CALC_PIXEL_STEPS 1
|
||||
#define PRE_CALC_SRC_INCREMENT 1
|
||||
|
||||
#if PRE_CALC_PIXEL_STEPS
|
||||
#define PS2 two_pixel_steps
|
||||
#define PS3 three_pixel_steps
|
||||
#else
|
||||
#define PS2 2*(int)pixel_step
|
||||
#define PS3 3*(int)pixel_step
|
||||
#endif
|
||||
|
||||
#if REGISTER_FILTER
|
||||
#define FILTER0 filter0
|
||||
#define FILTER1 filter1
|
||||
#define FILTER2 filter2
|
||||
#define FILTER3 filter3
|
||||
#define FILTER4 filter4
|
||||
#define FILTER5 filter5
|
||||
#else
|
||||
#define FILTER0 vp8_filter[0]
|
||||
#define FILTER1 vp8_filter[1]
|
||||
#define FILTER2 vp8_filter[2]
|
||||
#define FILTER3 vp8_filter[3]
|
||||
#define FILTER4 vp8_filter[4]
|
||||
#define FILTER5 vp8_filter[5]
|
||||
#endif
|
||||
|
||||
#if PRE_CALC_SRC_INCREMENT
|
||||
#define SRC_INCREMENT src_increment
|
||||
#else
|
||||
#define SRC_INCREMENT (src_pixels_per_line - output_width)
|
||||
#endif
|
||||
|
||||
#define FILTER_OFFSET //Filter data stored as CL constant memory
|
||||
#define FILTER_REF sub_pel_filters[filter_offset]
|
||||
|
||||
extern const char *filterCompileOptions;
|
||||
extern const char *filter_cl_file_name;
|
||||
|
||||
//Copy the -2*pixel_step (and ps*3) bytes because the filter algorithm
|
||||
//accesses negative indexes
|
||||
#define SIXTAP_SRC_LEN(out_width,out_height,src_px) ((out_width)*(out_height) + (((out_width)*(out_height)-1)/(out_width))*(src_px - out_width) + 5)
|
||||
#define BIL_SRC_LEN(out_width,out_height,src_px) ((out_height) * src_px + out_width)
|
||||
#define DST_LEN(dst_pitch,dst_height,dst_width) (dst_pitch * (dst_height) + (dst_width))
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* FILTER_CL_H_ */
|
45
vp8/common/opencl/idct_cl.h
Normal file
45
vp8/common/opencl/idct_cl.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef IDCT_OPENCL_H
|
||||
#define IDCT_OPENCL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "vp8_opencl.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
#define prototype_second_order_cl(sym) \
|
||||
void sym(BLOCKD *b)
|
||||
|
||||
#define prototype_idct_cl(sym) \
|
||||
void sym(BLOCKD *b, int pitch)
|
||||
|
||||
#define prototype_idct_scalar_add_cl(sym) \
|
||||
void sym(BLOCKD *b, cl_int use_diff, int diff_offset, int qcoeff_offset, \
|
||||
int pred_offset, unsigned char *output, cl_mem out_mem, int out_offset, size_t out_size, \
|
||||
int pitch, int stride)\
|
||||
|
||||
|
||||
extern prototype_idct_cl(vp8_short_idct4x4llm_1_cl);
|
||||
extern prototype_idct_cl(vp8_short_idct4x4llm_cl);
|
||||
extern prototype_idct_scalar_add_cl(vp8_dc_only_idct_add_cl);
|
||||
|
||||
extern prototype_second_order_cl(vp8_short_inv_walsh4x4_1_cl);
|
||||
extern prototype_second_order_cl(vp8_short_inv_walsh4x4_cl);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
325
vp8/common/opencl/idctllm_cl.c
Normal file
325
vp8/common/opencl/idctllm_cl.c
Normal file
@@ -0,0 +1,325 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
//ACW: Remove me after debugging.
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "idct_cl.h"
|
||||
#include "idctllm_cl.h"
|
||||
#include "blockd_cl.h"
|
||||
|
||||
void cl_destroy_idct(){
|
||||
|
||||
if (cl_data.idct_program)
|
||||
clReleaseProgram(cl_data.idct_program);
|
||||
|
||||
cl_data.idct_program = NULL;
|
||||
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_short_inv_walsh4x4_1_kernel);
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_short_inv_walsh4x4_1st_pass_kernel);
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_short_inv_walsh4x4_2nd_pass_kernel);
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_dc_only_idct_add_kernel);
|
||||
//VP8_CL_RELEASE_KERNEL(cl_data.vp8_short_idct4x4llm_1_kernel);
|
||||
//VP8_CL_RELEASE_KERNEL(cl_data.vp8_short_idct4x4llm_kernel);
|
||||
|
||||
}
|
||||
|
||||
int cl_init_idct() {
|
||||
int err;
|
||||
|
||||
// Create the filter compute program from the file-defined source code
|
||||
if (cl_load_program(&cl_data.idct_program, idctllm_cl_file_name,
|
||||
idctCompileOptions) != CL_SUCCESS)
|
||||
return VP8_CL_TRIED_BUT_FAILED;
|
||||
|
||||
// Create the compute kernel in the program we wish to run
|
||||
VP8_CL_CREATE_KERNEL(cl_data,idct_program,vp8_short_inv_walsh4x4_1_kernel,"vp8_short_inv_walsh4x4_1_kernel");
|
||||
VP8_CL_CREATE_KERNEL(cl_data,idct_program,vp8_short_inv_walsh4x4_1st_pass_kernel,"vp8_short_inv_walsh4x4_1st_pass_kernel");
|
||||
VP8_CL_CREATE_KERNEL(cl_data,idct_program,vp8_short_inv_walsh4x4_2nd_pass_kernel,"vp8_short_inv_walsh4x4_2nd_pass_kernel");
|
||||
VP8_CL_CREATE_KERNEL(cl_data,idct_program,vp8_dc_only_idct_add_kernel,"vp8_dc_only_idct_add_kernel");
|
||||
|
||||
////idct4x4llm kernels are only useful for the encoder
|
||||
//VP8_CL_CREATE_KERNEL(cl_data,idct_program,vp8_short_idct4x4llm_1_kernel,"vp8_short_idct4x4llm_1_kernel");
|
||||
//VP8_CL_CREATE_KERNEL(cl_data,idct_program,vp8_short_idct4x4llm_kernel,"vp8_short_idct4x4llm_kernel");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
#define max(x,y) (x > y ? x: y)
|
||||
//#define NO_CL
|
||||
|
||||
/* Only useful for encoder... Untested... */
|
||||
void vp8_short_idct4x4llm_cl(BLOCKD *b, int pitch)
|
||||
{
|
||||
int err;
|
||||
|
||||
short *input = b->dqcoeff_base + b->dqcoeff_offset;
|
||||
short *output = &b->diff_base[b->diff_offset];
|
||||
|
||||
cl_mem src_mem, dst_mem;
|
||||
|
||||
//1 instance for now. This should be split into 2-pass * 4 thread.
|
||||
size_t global = 1;
|
||||
|
||||
if (cl_initialized != CL_SUCCESS){
|
||||
vp8_short_idct4x4llm_c(input,output,pitch);
|
||||
return;
|
||||
}
|
||||
|
||||
VP8_CL_CREATE_BUF(b->cl_commands, src_mem,,
|
||||
sizeof(short)*16, input,
|
||||
vp8_short_idct4x4llm_c(input,output,pitch),
|
||||
);
|
||||
|
||||
VP8_CL_CREATE_BUF(b->cl_commands, dst_mem,,
|
||||
sizeof(short)*(4+(pitch/2)*3), output,
|
||||
vp8_short_idct4x4llm_c(input,output,pitch),
|
||||
);
|
||||
|
||||
//Set arguments and run kernel
|
||||
err = 0;
|
||||
err = clSetKernelArg(cl_data.vp8_short_idct4x4llm_kernel, 0, sizeof (cl_mem), &src_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_short_idct4x4llm_kernel, 1, sizeof (cl_mem), &dst_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_short_idct4x4llm_kernel, 2, sizeof (int), &pitch);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to set kernel arguments!\n",
|
||||
vp8_short_idct4x4llm_c(input,output,pitch),
|
||||
);
|
||||
|
||||
/* Execute the kernel */
|
||||
err = clEnqueueNDRangeKernel(b->cl_commands, cl_data.vp8_short_idct4x4llm_kernel, 1, NULL, &global, NULL , 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to execute kernel!\n",
|
||||
printf("err = %d\n",err);
|
||||
vp8_short_idct4x4llm_c(input,output,pitch),
|
||||
);
|
||||
|
||||
/* Read back the result data from the device */
|
||||
err = clEnqueueReadBuffer(b->cl_commands, dst_mem, CL_FALSE, 0, sizeof(short)*(4+pitch/2*3), output, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS(b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read output array!\n",
|
||||
vp8_short_idct4x4llm_c(input,output,pitch),
|
||||
);
|
||||
|
||||
clReleaseMemObject(src_mem);
|
||||
clReleaseMemObject(dst_mem);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* Only useful for encoder... Untested... */
|
||||
void vp8_short_idct4x4llm_1_cl(BLOCKD *b, int pitch)
|
||||
{
|
||||
int err;
|
||||
size_t global = 4;
|
||||
|
||||
short *input = b->dqcoeff_base + b->dqcoeff_offset;
|
||||
short *output = &b->diff_base[b->diff_offset];
|
||||
|
||||
cl_mem src_mem, dst_mem;
|
||||
|
||||
if (cl_initialized != CL_SUCCESS){
|
||||
vp8_short_idct4x4llm_1_c(input,output,pitch);
|
||||
return;
|
||||
}
|
||||
|
||||
printf("vp8_short_idct4x4llm_1_cl\n");
|
||||
|
||||
VP8_CL_CREATE_BUF(b->cl_commands, src_mem,,
|
||||
sizeof(short), input,
|
||||
vp8_short_idct4x4llm_1_c(input,output,pitch),
|
||||
);
|
||||
|
||||
VP8_CL_CREATE_BUF(b->cl_commands, dst_mem,,
|
||||
sizeof(short)*(4+(pitch/2)*3), output,
|
||||
vp8_short_idct4x4llm_1_c(input,output,pitch),
|
||||
);
|
||||
|
||||
//Set arguments and run kernel
|
||||
err = 0;
|
||||
err = clSetKernelArg(cl_data.vp8_short_idct4x4llm_1_kernel, 0, sizeof (cl_mem), &src_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_short_idct4x4llm_1_kernel, 1, sizeof (cl_mem), &dst_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_short_idct4x4llm_1_kernel, 2, sizeof (int), &pitch);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to set kernel arguments!\n",
|
||||
vp8_short_idct4x4llm_1_c(input,output,pitch),
|
||||
);
|
||||
|
||||
/* Execute the kernel */
|
||||
err = clEnqueueNDRangeKernel(b->cl_commands, cl_data.vp8_short_idct4x4llm_1_kernel, 1, NULL, &global, NULL , 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to execute kernel!\n",
|
||||
printf("err = %d\n",err);
|
||||
vp8_short_idct4x4llm_1_c(input,output,pitch),
|
||||
);
|
||||
|
||||
/* Read back the result data from the device */
|
||||
err = clEnqueueReadBuffer(b->cl_commands, dst_mem, CL_FALSE, 0, sizeof(short)*(4+pitch/2*3), output, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS(b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read output array!\n",
|
||||
vp8_short_idct4x4llm_1_c(input,output,pitch),
|
||||
);
|
||||
|
||||
clReleaseMemObject(src_mem);
|
||||
clReleaseMemObject(dst_mem);
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
void vp8_dc_only_idct_add_cl(BLOCKD *b, cl_int use_diff, int diff_offset,
|
||||
int qcoeff_offset, int pred_offset,
|
||||
unsigned char *dst_base, cl_mem dst_mem, int dst_offset, size_t dest_size,
|
||||
int pitch, int stride
|
||||
)
|
||||
{
|
||||
|
||||
int err;
|
||||
size_t global = 16;
|
||||
|
||||
int free_mem = 0;
|
||||
//cl_mem dest_mem = NULL;
|
||||
|
||||
if (dst_mem == NULL){
|
||||
VP8_CL_CREATE_BUF(b->cl_commands, dst_mem,,
|
||||
dest_size, dst_base,,
|
||||
);
|
||||
free_mem = 1;
|
||||
}
|
||||
|
||||
//Set arguments and run kernel
|
||||
err = clSetKernelArg(cl_data.vp8_dc_only_idct_add_kernel, 0, sizeof (cl_mem), &b->cl_predictor_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_dc_only_idct_add_kernel, 1, sizeof (int), &pred_offset);
|
||||
err |= clSetKernelArg(cl_data.vp8_dc_only_idct_add_kernel, 2, sizeof (cl_mem), &dst_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_dc_only_idct_add_kernel, 3, sizeof (int), &dst_offset);
|
||||
err |= clSetKernelArg(cl_data.vp8_dc_only_idct_add_kernel, 4, sizeof (int), &pitch);
|
||||
err |= clSetKernelArg(cl_data.vp8_dc_only_idct_add_kernel, 5, sizeof (int), &stride);
|
||||
err |= clSetKernelArg(cl_data.vp8_dc_only_idct_add_kernel, 6, sizeof (cl_int), &use_diff);
|
||||
err |= clSetKernelArg(cl_data.vp8_dc_only_idct_add_kernel, 7, sizeof (cl_mem), &b->cl_diff_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_dc_only_idct_add_kernel, 8, sizeof (int), &diff_offset);
|
||||
err |= clSetKernelArg(cl_data.vp8_dc_only_idct_add_kernel, 9, sizeof (cl_mem), &b->cl_qcoeff_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_dc_only_idct_add_kernel, 10, sizeof (int), &qcoeff_offset);
|
||||
err |= clSetKernelArg(cl_data.vp8_dc_only_idct_add_kernel, 11, sizeof (cl_mem), &b->cl_dequant_mem);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to set kernel arguments!\n",,
|
||||
);
|
||||
|
||||
/* Execute the kernel */
|
||||
err = clEnqueueNDRangeKernel(b->cl_commands, cl_data.vp8_dc_only_idct_add_kernel, 1, NULL, &global, NULL , 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to execute kernel!\n",
|
||||
printf("err = %d\n",err);,
|
||||
);
|
||||
|
||||
|
||||
if (free_mem == 1){
|
||||
/* Read back the result data from the device */
|
||||
err = clEnqueueReadBuffer(b->cl_commands, dst_mem, CL_FALSE, 0,
|
||||
dest_size, dst_base, 0, NULL, NULL);
|
||||
|
||||
VP8_CL_CHECK_SUCCESS(b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read output array!\n",,
|
||||
);
|
||||
|
||||
clReleaseMemObject(dst_mem);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_cl(BLOCKD *b)
|
||||
{
|
||||
int err;
|
||||
size_t global = 4;
|
||||
|
||||
if (cl_initialized != CL_SUCCESS){
|
||||
vp8_short_inv_walsh4x4_c(b->dqcoeff_base+b->dqcoeff_offset,&b->diff_base[b->diff_offset]);
|
||||
return;
|
||||
}
|
||||
|
||||
//Set arguments and run kernel
|
||||
err = 0;
|
||||
err = clSetKernelArg(cl_data.vp8_short_inv_walsh4x4_1st_pass_kernel, 0, sizeof (cl_mem), &b->cl_dqcoeff_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_short_inv_walsh4x4_1st_pass_kernel, 1, sizeof(int), &b->dqcoeff_offset);
|
||||
err |= clSetKernelArg(cl_data.vp8_short_inv_walsh4x4_1st_pass_kernel, 2, sizeof (cl_mem), &b->cl_diff_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_short_inv_walsh4x4_1st_pass_kernel, 3, sizeof(int), &b->diff_offset);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to set kernel arguments!\n",
|
||||
vp8_short_inv_walsh4x4_c(b->dqcoeff_base+b->dqcoeff_offset, &b->diff_base[b->diff_offset]),
|
||||
);
|
||||
|
||||
/* Execute the kernel */
|
||||
err = clEnqueueNDRangeKernel(b->cl_commands, cl_data.vp8_short_inv_walsh4x4_1st_pass_kernel, 1, NULL, &global, NULL , 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to execute kernel!\n",
|
||||
printf("err = %d\n",err);
|
||||
vp8_short_inv_walsh4x4_c(b->dqcoeff_base+b->dqcoeff_offset, &b->diff_base[b->diff_offset]),
|
||||
);
|
||||
|
||||
//Second pass
|
||||
//Set arguments and run kernel
|
||||
err = 0;
|
||||
err = clSetKernelArg(cl_data.vp8_short_inv_walsh4x4_2nd_pass_kernel, 0, sizeof (cl_mem), &b->cl_diff_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_short_inv_walsh4x4_2nd_pass_kernel, 1, sizeof(int), &b->diff_offset);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to set kernel arguments!\n",
|
||||
vp8_short_inv_walsh4x4_c(b->dqcoeff_base+b->dqcoeff_offset, &b->diff_base[b->diff_offset]),
|
||||
);
|
||||
|
||||
/* Execute the kernel */
|
||||
err = clEnqueueNDRangeKernel(b->cl_commands, cl_data.vp8_short_inv_walsh4x4_2nd_pass_kernel, 1, NULL, &global, NULL , 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to execute kernel!\n",
|
||||
printf("err = %d\n",err);
|
||||
vp8_short_inv_walsh4x4_c(b->dqcoeff_base+b->dqcoeff_offset, &b->diff_base[b->diff_offset]),
|
||||
);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_1_cl(BLOCKD *b)
|
||||
{
|
||||
|
||||
int err;
|
||||
size_t global = 4;
|
||||
|
||||
if (cl_initialized != CL_SUCCESS){
|
||||
vp8_short_inv_walsh4x4_1_c(b->dqcoeff_base + b->dqcoeff_offset,
|
||||
&b->diff_base[b->diff_offset]);
|
||||
return;
|
||||
}
|
||||
|
||||
//Set arguments and run kernel
|
||||
err = 0;
|
||||
err = clSetKernelArg(cl_data.vp8_short_inv_walsh4x4_1_kernel, 0, sizeof (cl_mem), &b->cl_dqcoeff_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_short_inv_walsh4x4_1_kernel, 1, sizeof (int), &b->dqcoeff_offset);
|
||||
err |= clSetKernelArg(cl_data.vp8_short_inv_walsh4x4_1_kernel, 2, sizeof (cl_mem), &b->cl_diff_mem);
|
||||
err |= clSetKernelArg(cl_data.vp8_short_inv_walsh4x4_1_kernel, 3, sizeof (int), &b->diff_offset);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to set kernel arguments!\n",
|
||||
vp8_short_inv_walsh4x4_1_c(b->dqcoeff_base + b->dqcoeff_offset,
|
||||
&b->diff_base[b->diff_offset]),
|
||||
);
|
||||
|
||||
/* Execute the kernel */
|
||||
err = clEnqueueNDRangeKernel(b->cl_commands, cl_data.vp8_short_inv_walsh4x4_1_kernel, 1, NULL, &global, NULL , 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( b->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to execute kernel!\n",
|
||||
printf("err = %d\n",err);
|
||||
vp8_short_inv_walsh4x4_1_c(b->dqcoeff_base + b->dqcoeff_offset,
|
||||
&b->diff_base[b->diff_offset]),
|
||||
);
|
||||
|
||||
return;
|
||||
}
|
309
vp8/common/opencl/idctllm_cl.cl
Normal file
309
vp8/common/opencl/idctllm_cl.cl
Normal file
@@ -0,0 +1,309 @@
|
||||
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
|
||||
__constant int cospi8sqrt2minus1 = 20091;
|
||||
__constant int sinpi8sqrt2 = 35468;
|
||||
__constant int rounding = 0;
|
||||
|
||||
|
||||
kernel void vp8_short_idct4x4llm_1st_pass_kernel(global short*,global short *,int);
|
||||
kernel void vp8_short_idct4x4llm_2nd_pass_kernel(global short*,int);
|
||||
|
||||
|
||||
__kernel void vp8_short_idct4x4llm_kernel(
|
||||
__global short *input,
|
||||
__global short *output,
|
||||
int pitch
|
||||
){
|
||||
vp8_short_idct4x4llm_1st_pass_kernel(input,output,pitch);
|
||||
vp8_short_idct4x4llm_2nd_pass_kernel(output,pitch);
|
||||
}
|
||||
|
||||
__kernel void vp8_short_idct4x4llm_1st_pass_kernel(
|
||||
__global short *ip,
|
||||
__global short *op,
|
||||
int pitch
|
||||
)
|
||||
{
|
||||
int i;
|
||||
int a1, b1, c1, d1;
|
||||
|
||||
int temp1, temp2;
|
||||
int shortpitch = pitch >> 1;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[8];
|
||||
b1 = ip[0] - ip[8];
|
||||
|
||||
temp1 = (ip[4] * sinpi8sqrt2 + rounding) >> 16;
|
||||
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1 + rounding) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1 + rounding) >> 16);
|
||||
temp2 = (ip[12] * sinpi8sqrt2 + rounding) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
op[shortpitch*0] = a1 + d1;
|
||||
op[shortpitch*3] = a1 - d1;
|
||||
|
||||
op[shortpitch*1] = b1 + c1;
|
||||
op[shortpitch*2] = b1 - c1;
|
||||
|
||||
ip++;
|
||||
op++;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
__kernel void vp8_short_idct4x4llm_2nd_pass_kernel(
|
||||
__global short *output,
|
||||
int pitch
|
||||
)
|
||||
{
|
||||
int i;
|
||||
int a1, b1, c1, d1;
|
||||
|
||||
int temp1, temp2;
|
||||
int shortpitch = pitch >> 1;
|
||||
__global short *ip = output;
|
||||
__global short *op = output;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[2];
|
||||
b1 = ip[0] - ip[2];
|
||||
|
||||
temp1 = (ip[1] * sinpi8sqrt2 + rounding) >> 16;
|
||||
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1 + rounding) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1 + rounding) >> 16);
|
||||
temp2 = (ip[3] * sinpi8sqrt2 + rounding) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
op[0] = (a1 + d1 + 4) >> 3;
|
||||
op[3] = (a1 - d1 + 4) >> 3;
|
||||
|
||||
op[1] = (b1 + c1 + 4) >> 3;
|
||||
op[2] = (b1 - c1 + 4) >> 3;
|
||||
|
||||
ip += shortpitch;
|
||||
op += shortpitch;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
__kernel void vp8_short_idct4x4llm_1_kernel(
|
||||
__global short *input,
|
||||
__global short *output,
|
||||
int pitch
|
||||
)
|
||||
{
|
||||
int a1;
|
||||
int out_offset;
|
||||
int shortpitch = pitch >> 1;
|
||||
|
||||
//short4 a;
|
||||
a1 = ((input[0] + 4) >> 3);
|
||||
//a = a1;
|
||||
|
||||
int tid = get_global_id(0);
|
||||
if (tid < 4){
|
||||
out_offset = shortpitch * tid;
|
||||
|
||||
//vstore4(a,0,&output[out_offset];
|
||||
output[out_offset] = a1;
|
||||
output[out_offset+1] = a1;
|
||||
output[out_offset+2] = a1;
|
||||
output[out_offset+3] = a1;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void vp8_dc_only_idct_add_kernel(
|
||||
__global unsigned char *pred_base,
|
||||
int pred_offset,
|
||||
__global unsigned char *dst_base,
|
||||
int dst_offset,
|
||||
int pitch,
|
||||
int stride,
|
||||
int use_diff,
|
||||
global short *diff_base,
|
||||
int diff_offset,
|
||||
global short *qcoeff_base,
|
||||
int qcoeff_offset,
|
||||
global short *dequant
|
||||
)
|
||||
{
|
||||
int r, c;
|
||||
//int pred_offset;
|
||||
global unsigned char *pred_ptr = &pred_base[pred_offset];
|
||||
global unsigned char *dst_ptr = &dst_base[dst_offset];
|
||||
|
||||
int tid = get_global_id(0);
|
||||
|
||||
int a1;
|
||||
|
||||
if (tid < 16){
|
||||
|
||||
if (use_diff == 1){
|
||||
a1 = diff_base[diff_offset];
|
||||
} else {
|
||||
a1 = qcoeff_base[qcoeff_offset] * dequant[0];
|
||||
}
|
||||
a1 = (a1 + 4)>>3;
|
||||
|
||||
r = tid / 4;
|
||||
c = tid % 4;
|
||||
|
||||
pred_offset = r * pitch;
|
||||
dst_offset += r * stride;
|
||||
int a = a1 + pred_ptr[pred_offset + c] ;
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
else if (a > 255)
|
||||
a = 255;
|
||||
|
||||
dst_base[dst_offset + c] = (unsigned char) a ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void vp8_short_inv_walsh4x4_1st_pass_kernel(
|
||||
__global short *src_base,
|
||||
int src_offset,
|
||||
__global short *output_base,
|
||||
int out_offset
|
||||
)
|
||||
{
|
||||
|
||||
__global short *input = src_base + src_offset;
|
||||
__global short *output = output_base + src_offset;
|
||||
int tid = get_global_id(0);
|
||||
|
||||
#define VEC_WALSH 0
|
||||
#if VEC_WALSH
|
||||
//4-short vectors to calculate things in
|
||||
short4 a,b,c,d, a2v, b2v, c2v, d2v, a1t, b1t, c1t, d1t;
|
||||
short16 out;
|
||||
|
||||
if (tid == 0){
|
||||
//first pass loop in vector form
|
||||
a = vload4(0,input) + vload4(3,input);
|
||||
b = vload4(1,input) + vload4(2,input);
|
||||
c = vload4(1,input) - vload4(2,input);
|
||||
d = vload4(0,input) - vload4(3,input);
|
||||
vstore4(a + b, 0, output);
|
||||
vstore4(c + d, 1, output);
|
||||
vstore4(a - b, 2, output);
|
||||
vstore4(d - c, 3, output);
|
||||
|
||||
return;
|
||||
|
||||
//2nd pass
|
||||
a = (short4)(output[0], output[4], output[8], output[12]);
|
||||
b = (short4)(output[1], output[5], output[9], output[13]);
|
||||
c = (short4)(output[1], output[5], output[9], output[13]);
|
||||
d = (short4)(output[0], output[4], output[8], output[12]);
|
||||
a1t = (short4)(output[3], output[7], output[11], output[15]);
|
||||
b1t = (short4)(output[2], output[6], output[10], output[14]);
|
||||
c1t = (short4)(output[2], output[6], output[10], output[14]);
|
||||
d1t = (short4)(output[3], output[7], output[11], output[15]);
|
||||
|
||||
a = a + a1t + (short)3;
|
||||
b = b + b1t;
|
||||
c = c - c1t;
|
||||
d = d - d1t + (short)3;
|
||||
|
||||
a2v = (a + b) >> (short)3;
|
||||
b2v = (c + d) >> (short)3;
|
||||
c2v = (a - b) >> (short)3;
|
||||
d2v = (d - c) >> (short)3;
|
||||
|
||||
out.s048c = a2v;
|
||||
out.s159d = b2v;
|
||||
out.s26ae = c2v;
|
||||
out.s37bf = d2v;
|
||||
vstore16(out,0,output);
|
||||
}
|
||||
#else
|
||||
|
||||
int i;
|
||||
int a1, b1, c1, d1;
|
||||
int a2, b2, c2, d2;
|
||||
global short *ip = input;
|
||||
global short *op = output;
|
||||
|
||||
int offset;
|
||||
|
||||
if (tid < 4){
|
||||
offset = tid;
|
||||
a1 = ip[offset] + ip[offset + 12];
|
||||
b1 = ip[offset + 4] + ip[offset + 8];
|
||||
c1 = ip[offset + 4] - ip[offset + 8];
|
||||
d1 = ip[offset] - ip[offset + 12];
|
||||
|
||||
op[offset] = a1 + b1;
|
||||
op[offset + 4] = c1 + d1;
|
||||
op[offset + 8] = a1 - b1;
|
||||
op[offset + 12] = d1 - c1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
__kernel void vp8_short_inv_walsh4x4_2nd_pass_kernel(
|
||||
__global short *output_base,
|
||||
int out_offset
|
||||
)
|
||||
{
|
||||
int i;
|
||||
int a1, b1, c1, d1;
|
||||
int a2, b2, c2, d2;
|
||||
|
||||
__global short *output = output_base + out_offset;
|
||||
int tid = get_global_id(0);
|
||||
int offset = 0;
|
||||
|
||||
if (tid < 4){
|
||||
offset = 4*tid;
|
||||
a1 = output[offset] + output[offset + 3];
|
||||
b1 = output[offset + 1] + output[offset + 2];
|
||||
c1 = output[offset + 1] - output[offset + 2];
|
||||
d1 = output[offset + 0] - output[offset + 3];
|
||||
|
||||
a2 = a1 + b1;
|
||||
b2 = c1 + d1;
|
||||
c2 = a1 - b1;
|
||||
d2 = d1 - c1;
|
||||
|
||||
output[offset + 0] = (a2 + 3) >> 3;
|
||||
output[offset + 1] = (b2 + 3) >> 3;
|
||||
output[offset + 2] = (c2 + 3) >> 3;
|
||||
output[offset + 3] = (d2 + 3) >> 3;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void vp8_short_inv_walsh4x4_1_kernel(
|
||||
__global short *src_data,
|
||||
int src_offset,
|
||||
__global short *dst_data,
|
||||
int dst_offset
|
||||
){
|
||||
int a1;
|
||||
int tid = get_global_id(0);
|
||||
//short16 a;
|
||||
int i;
|
||||
short4 a;
|
||||
__global short *input = src_data + src_offset;
|
||||
__global short *output = dst_data + dst_offset;
|
||||
|
||||
if (tid < 4)
|
||||
{
|
||||
a1 = ((input[0] + 3) >> 3);
|
||||
a = (short)a1; //Set all elements of vector to a1
|
||||
vstore4(a, tid, output);
|
||||
}
|
||||
}
|
26
vp8/common/opencl/idctllm_cl.h
Normal file
26
vp8/common/opencl/idctllm_cl.h
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_opencl.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
#define CLAMP(x,min,max) if (x < min) x = min; else if ( x > max ) x = max;
|
||||
|
||||
//External functions that are fallbacks if CL is unavailable
|
||||
extern void vp8_short_idct4x4llm_c(short *input, short *output, int pitch);
|
||||
extern void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch);
|
||||
extern void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride);
|
||||
extern void vp8_short_inv_walsh4x4_c(short *input, short *output);
|
||||
extern void vp8_short_inv_walsh4x4_1_c(short *input, short *output);
|
||||
|
||||
const char *idctCompileOptions = "-Ivp8/common/opencl";
|
||||
const char *idctllm_cl_file_name = "vp8/common/opencl/idctllm_cl.cl";
|
||||
|
427
vp8/common/opencl/loopfilter.cl
Normal file
427
vp8/common/opencl/loopfilter.cl
Normal file
@@ -0,0 +1,427 @@
|
||||
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
|
||||
typedef unsigned char uc;
|
||||
typedef signed char sc;
|
||||
|
||||
__inline signed char vp8_filter_mask(sc, sc, uc, uc, uc, uc, uc, uc, uc, uc);
|
||||
__inline signed char vp8_simple_filter_mask(signed char, signed char, uc, uc, uc, uc);
|
||||
__inline signed char vp8_hevmask(signed char, uc, uc, uc, uc);
|
||||
__inline signed char vp8_signed_char_clamp(int);
|
||||
|
||||
__inline void vp8_mbfilter(signed char mask,signed char hev,global uc *op2,
|
||||
global uc *op1,global uc *op0,global uc *oq0,global uc *oq1,global uc *oq2);
|
||||
|
||||
void vp8_simple_filter(signed char mask,global uc *base, int op1_off,int op0_off,int oq0_off,int oq1_off);
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
signed char lim[16];
|
||||
signed char flim[16];
|
||||
signed char thr[16];
|
||||
signed char mbflim[16];
|
||||
signed char mbthr[16];
|
||||
signed char uvlim[16];
|
||||
signed char uvflim[16];
|
||||
signed char uvthr[16];
|
||||
signed char uvmbflim[16];
|
||||
signed char uvmbthr[16];
|
||||
} loop_filter_info;
|
||||
|
||||
|
||||
|
||||
|
||||
void vp8_filter(
|
||||
signed char mask,
|
||||
signed char hev,
|
||||
global uc *base,
|
||||
int op1_off,
|
||||
int op0_off,
|
||||
int oq0_off,
|
||||
int oq1_off
|
||||
)
|
||||
{
|
||||
|
||||
global uc *op1 = &base[op1_off];
|
||||
global uc *op0 = &base[op0_off];
|
||||
global uc *oq0 = &base[oq0_off];
|
||||
global uc *oq1 = &base[oq1_off];
|
||||
|
||||
signed char ps0, qs0;
|
||||
signed char ps1, qs1;
|
||||
signed char vp8_filter, Filter1, Filter2;
|
||||
signed char u;
|
||||
|
||||
ps1 = (signed char) * op1 ^ 0x80;
|
||||
ps0 = (signed char) * op0 ^ 0x80;
|
||||
qs0 = (signed char) * oq0 ^ 0x80;
|
||||
qs1 = (signed char) * oq1 ^ 0x80;
|
||||
|
||||
/* add outer taps if we have high edge variance */
|
||||
vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
|
||||
vp8_filter &= hev;
|
||||
|
||||
/* inner taps */
|
||||
vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
|
||||
vp8_filter &= mask;
|
||||
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3
|
||||
* if it equals 4 we'll set to adjust by -1 to account for the fact
|
||||
* we'd round 3 the other way
|
||||
*/
|
||||
Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
|
||||
Filter2 = vp8_signed_char_clamp(vp8_filter + 3);
|
||||
Filter1 >>= 3;
|
||||
Filter2 >>= 3;
|
||||
u = vp8_signed_char_clamp(qs0 - Filter1);
|
||||
*oq0 = u ^ 0x80;
|
||||
u = vp8_signed_char_clamp(ps0 + Filter2);
|
||||
*op0 = u ^ 0x80;
|
||||
vp8_filter = Filter1;
|
||||
|
||||
/* outer tap adjustments */
|
||||
vp8_filter += 1;
|
||||
vp8_filter >>= 1;
|
||||
vp8_filter &= ~hev;
|
||||
|
||||
u = vp8_signed_char_clamp(qs1 - vp8_filter);
|
||||
*oq1 = u ^ 0x80;
|
||||
u = vp8_signed_char_clamp(ps1 + vp8_filter);
|
||||
*op1 = u ^ 0x80;
|
||||
}
|
||||
|
||||
|
||||
kernel void vp8_loop_filter_horizontal_edge_kernel
|
||||
(
|
||||
global unsigned char *s_base,
|
||||
int s_off,
|
||||
int p, /* pitch */
|
||||
global signed char *flimit,
|
||||
global signed char *limit,
|
||||
global signed char *thresh,
|
||||
int off_stride
|
||||
)
|
||||
{
|
||||
int hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = get_global_id(0);
|
||||
|
||||
if (i < get_global_size(0)){
|
||||
s_off += i;
|
||||
|
||||
mask = vp8_filter_mask(limit[i], flimit[i], s_base[s_off - 4*p],
|
||||
s_base[s_off - 3*p], s_base[s_off - 2*p], s_base[s_off - p],
|
||||
s_base[s_off], s_base[s_off + p], s_base[s_off + 2*p],
|
||||
s_base[s_off + 3*p]);
|
||||
|
||||
hev = vp8_hevmask(thresh[i], s_base[s_off - 2*p], s_base[s_off - p],
|
||||
s_base[s_off], s_base[s_off+p]);
|
||||
|
||||
vp8_filter(mask, hev, s_base, s_off - 2 * p, s_off - p, s_off,
|
||||
s_off + p);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
kernel void vp8_loop_filter_vertical_edge_kernel
|
||||
(
|
||||
global unsigned char *s_base,
|
||||
int s_off,
|
||||
int p,
|
||||
global signed char *flimit,
|
||||
global signed char *limit,
|
||||
global signed char *thresh,
|
||||
int off_stride
|
||||
)
|
||||
{
|
||||
|
||||
int hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = get_global_id(0);
|
||||
|
||||
if ( i < get_global_size(0) ){
|
||||
s_off += p * i;
|
||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
||||
s_base[s_off-4], s_base[s_off-3], s_base[s_off-2],
|
||||
s_base[s_off-1], s_base[s_off], s_base[s_off+1],
|
||||
s_base[s_off+2], s_base[s_off+3]);
|
||||
|
||||
hev = vp8_hevmask(thresh[i], s_base[s_off-2], s_base[s_off-1],
|
||||
s_base[s_off], s_base[s_off+1]);
|
||||
|
||||
vp8_filter(mask, hev, s_base, s_off - 2, s_off - 1, s_off, s_off + 1);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
kernel void vp8_mbloop_filter_horizontal_edge_kernel
|
||||
(
|
||||
global unsigned char *s_base,
|
||||
int s_off,
|
||||
int p,
|
||||
global signed char *flimit,
|
||||
global signed char *limit,
|
||||
global signed char *thresh,
|
||||
int off_stride
|
||||
)
|
||||
{
|
||||
|
||||
global uc *s = s_base+s_off;
|
||||
|
||||
signed char hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = get_global_id(0);
|
||||
|
||||
if (i < get_global_size(0)){
|
||||
s += i;
|
||||
|
||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
||||
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
||||
s[0*p], s[1*p], s[2*p], s[3*p]);
|
||||
|
||||
hev = vp8_hevmask(thresh[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
|
||||
vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
kernel void vp8_mbloop_filter_vertical_edge_kernel
|
||||
(
|
||||
global unsigned char *s_base,
|
||||
int s_off,
|
||||
int p,
|
||||
global signed char *flimit,
|
||||
global signed char *limit,
|
||||
global signed char *thresh,
|
||||
int off_stride
|
||||
)
|
||||
{
|
||||
|
||||
global uc *s = s_base + s_off;
|
||||
|
||||
signed char hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = get_global_id(0);
|
||||
|
||||
if (i < get_global_size(0)){
|
||||
s += p * i;
|
||||
|
||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
||||
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
||||
|
||||
hev = vp8_hevmask(thresh[i], s[-2], s[-1], s[0], s[1]);
|
||||
|
||||
vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
kernel void vp8_loop_filter_simple_horizontal_edge_kernel
|
||||
(
|
||||
global unsigned char *s_base,
|
||||
int s_off,
|
||||
int p,
|
||||
global const signed char *flimit,
|
||||
global const signed char *limit,
|
||||
global const signed char *thresh,
|
||||
int off_stride
|
||||
)
|
||||
{
|
||||
|
||||
signed char mask = 0;
|
||||
int i = get_global_id(0);
|
||||
(void) thresh;
|
||||
|
||||
if (i < get_global_size(0))
|
||||
{
|
||||
s_off += i;
|
||||
mask = vp8_simple_filter_mask(limit[i], flimit[i], s_base[s_off-2*p], s_base[s_off-p], s_base[s_off], s_base[s_off+p]);
|
||||
vp8_simple_filter(mask, s_base, s_off - 2 * p, s_off - 1 * p, s_off, s_off + 1 * p);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
kernel void vp8_loop_filter_simple_vertical_edge_kernel
|
||||
(
|
||||
global unsigned char *s_base,
|
||||
int s_off,
|
||||
int p,
|
||||
global signed char *flimit,
|
||||
global signed char *limit,
|
||||
global signed char *thresh,
|
||||
int off_stride
|
||||
)
|
||||
{
|
||||
|
||||
signed char mask = 0;
|
||||
int i = get_global_id(0);
|
||||
(void) thresh;
|
||||
|
||||
if (i < get_global_size(0)){
|
||||
s_off += p * i;
|
||||
mask = vp8_simple_filter_mask(limit[i], flimit[i], s_base[s_off-2], s_base[s_off-1], s_base[s_off], s_base[s_off+1]);
|
||||
vp8_simple_filter(mask, s_base, s_off - 2, s_off - 1, s_off, s_off + 1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
//Inline and non-kernel functions follow.
|
||||
|
||||
__inline void vp8_mbfilter(
|
||||
signed char mask,
|
||||
signed char hev,
|
||||
global uc *op2,
|
||||
global uc *op1,
|
||||
global uc *op0,
|
||||
global uc *oq0,
|
||||
global uc *oq1,
|
||||
global uc *oq2
|
||||
)
|
||||
{
|
||||
signed char s, u;
|
||||
signed char vp8_filter, Filter1, Filter2;
|
||||
signed char ps2 = (signed char) * op2 ^ 0x80;
|
||||
signed char ps1 = (signed char) * op1 ^ 0x80;
|
||||
signed char ps0 = (signed char) * op0 ^ 0x80;
|
||||
signed char qs0 = (signed char) * oq0 ^ 0x80;
|
||||
signed char qs1 = (signed char) * oq1 ^ 0x80;
|
||||
signed char qs2 = (signed char) * oq2 ^ 0x80;
|
||||
|
||||
/* add outer taps if we have high edge variance */
|
||||
vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
|
||||
vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
|
||||
vp8_filter &= mask;
|
||||
|
||||
Filter2 = vp8_filter;
|
||||
Filter2 &= hev;
|
||||
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3 */
|
||||
Filter1 = vp8_signed_char_clamp(Filter2 + 4);
|
||||
Filter2 = vp8_signed_char_clamp(Filter2 + 3);
|
||||
Filter1 >>= 3;
|
||||
Filter2 >>= 3;
|
||||
qs0 = vp8_signed_char_clamp(qs0 - Filter1);
|
||||
ps0 = vp8_signed_char_clamp(ps0 + Filter2);
|
||||
|
||||
|
||||
/* only apply wider filter if not high edge variance */
|
||||
vp8_filter &= ~hev;
|
||||
Filter2 = vp8_filter;
|
||||
|
||||
/* roughly 3/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
|
||||
s = vp8_signed_char_clamp(qs0 - u);
|
||||
*oq0 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps0 + u);
|
||||
*op0 = s ^ 0x80;
|
||||
|
||||
/* roughly 2/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
|
||||
s = vp8_signed_char_clamp(qs1 - u);
|
||||
*oq1 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps1 + u);
|
||||
*op1 = s ^ 0x80;
|
||||
|
||||
/* roughly 1/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
|
||||
s = vp8_signed_char_clamp(qs2 - u);
|
||||
*oq2 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps2 + u);
|
||||
*op2 = s ^ 0x80;
|
||||
}
|
||||
|
||||
|
||||
__inline signed char vp8_signed_char_clamp(int t)
|
||||
{
|
||||
t = (t < -128 ? -128 : t);
|
||||
t = (t > 127 ? 127 : t);
|
||||
return (signed char) t;
|
||||
}
|
||||
|
||||
|
||||
/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
|
||||
__inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
signed char hev = 0;
|
||||
hev |= (abs(p1 - p0) > thresh) * -1;
|
||||
hev |= (abs(q1 - q0) > thresh) * -1;
|
||||
return hev;
|
||||
}
|
||||
|
||||
|
||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||
__inline signed char vp8_filter_mask(
|
||||
signed char limit,
|
||||
signed char flimit,
|
||||
uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
|
||||
{
|
||||
signed char mask = 0;
|
||||
mask |= (abs(p3 - p2) > limit) * -1;
|
||||
mask |= (abs(p2 - p1) > limit) * -1;
|
||||
mask |= (abs(p1 - p0) > limit) * -1;
|
||||
mask |= (abs(q1 - q0) > limit) * -1;
|
||||
mask |= (abs(q2 - q1) > limit) * -1;
|
||||
mask |= (abs(q3 - q2) > limit) * -1;
|
||||
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit) * -1;
|
||||
mask = ~mask;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||
__inline signed char vp8_simple_filter_mask(
|
||||
signed char limit,
|
||||
signed char flimit,
|
||||
uc p1,
|
||||
uc p0,
|
||||
uc q0,
|
||||
uc q1
|
||||
)
|
||||
{
|
||||
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= flimit * 2 + limit) * -1;
|
||||
return mask;
|
||||
}
|
||||
|
||||
void vp8_simple_filter(
|
||||
signed char mask,
|
||||
global uc *base,
|
||||
int op1_off,
|
||||
int op0_off,
|
||||
int oq0_off,
|
||||
int oq1_off
|
||||
)
|
||||
{
|
||||
|
||||
global uc *op1 = base + op1_off;
|
||||
global uc *op0 = base + op0_off;
|
||||
global uc *oq0 = base + oq0_off;
|
||||
global uc *oq1 = base + oq1_off;
|
||||
|
||||
signed char vp8_filter, Filter1, Filter2;
|
||||
signed char p1 = (signed char) * op1 ^ 0x80;
|
||||
signed char p0 = (signed char) * op0 ^ 0x80;
|
||||
signed char q0 = (signed char) * oq0 ^ 0x80;
|
||||
signed char q1 = (signed char) * oq1 ^ 0x80;
|
||||
signed char u;
|
||||
|
||||
vp8_filter = vp8_signed_char_clamp(p1 - q1);
|
||||
vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (q0 - p0));
|
||||
vp8_filter &= mask;
|
||||
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3 */
|
||||
Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
|
||||
Filter1 >>= 3;
|
||||
u = vp8_signed_char_clamp(q0 - Filter1);
|
||||
*oq0 = u ^ 0x80;
|
||||
|
||||
Filter2 = vp8_signed_char_clamp(vp8_filter + 3);
|
||||
Filter2 >>= 3;
|
||||
u = vp8_signed_char_clamp(p0 + Filter2);
|
||||
*op0 = u ^ 0x80;
|
||||
}
|
457
vp8/common/opencl/loopfilter_cl.c
Normal file
457
vp8/common/opencl/loopfilter_cl.c
Normal file
@@ -0,0 +1,457 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "../../../vpx_ports/config.h"
|
||||
#include "loopfilter_cl.h"
|
||||
#include "../onyxc_int.h"
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_opencl.h"
|
||||
#include "blockd_cl.h"
|
||||
|
||||
const char *loopFilterCompileOptions = "-Ivp8/common/opencl";
|
||||
const char *loop_filter_cl_file_name = "vp8/common/opencl/loopfilter.cl";
|
||||
|
||||
typedef unsigned char uc;
|
||||
|
||||
extern void vp8_loop_filter_frame
|
||||
(
|
||||
VP8_COMMON *cm,
|
||||
MACROBLOCKD *mbd,
|
||||
int default_filt_lvl
|
||||
);
|
||||
|
||||
prototype_loopfilter_cl(vp8_loop_filter_horizontal_edge_cl);
|
||||
prototype_loopfilter_cl(vp8_loop_filter_vertical_edge_cl);
|
||||
prototype_loopfilter_cl(vp8_mbloop_filter_horizontal_edge_cl);
|
||||
prototype_loopfilter_cl(vp8_mbloop_filter_vertical_edge_cl);
|
||||
prototype_loopfilter_cl(vp8_loop_filter_simple_horizontal_edge_cl);
|
||||
prototype_loopfilter_cl(vp8_loop_filter_simple_vertical_edge_cl);
|
||||
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_cl(
|
||||
MACROBLOCKD *x,
|
||||
cl_mem buf_base,
|
||||
int y_off,
|
||||
int u_off,
|
||||
int v_off,
|
||||
int y_stride,
|
||||
int uv_stride,
|
||||
loop_filter_info *lfi,
|
||||
int simpler_lpf
|
||||
)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
|
||||
vp8_mbloop_filter_horizontal_edge_cl(x, buf_base, y_off, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_cl(x, buf_base, u_off, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_cl(x, buf_base, v_off, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_cl(MACROBLOCKD *x, cl_mem buf_base, int y_off, int u_off, int v_off,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_cl(x, buf_base, y_off, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2, 1);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_cl(MACROBLOCKD *x, cl_mem buf_base, int y_off, int u_off, int v_off,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
|
||||
vp8_mbloop_filter_vertical_edge_cl(x, buf_base, y_off, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2, 1);
|
||||
vp8_mbloop_filter_vertical_edge_cl(x, buf_base, u_off, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1, 1);
|
||||
vp8_mbloop_filter_vertical_edge_cl(x, buf_base, v_off, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_cl(MACROBLOCKD *x, cl_mem buf_base, int y_off, int u_off, int v_off,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_cl(x, buf_base, y_off, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2, 1);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_cl(MACROBLOCKD *x, cl_mem buf_base, int y_off, int u_off, int v_off,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
|
||||
vp8_loop_filter_horizontal_edge_cl(x, buf_base, y_off + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2, 1);
|
||||
vp8_loop_filter_horizontal_edge_cl(x, buf_base, y_off + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2, 1);
|
||||
vp8_loop_filter_horizontal_edge_cl(x, buf_base, y_off + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2, 1);
|
||||
vp8_loop_filter_horizontal_edge_cl(x, buf_base, u_off + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1, 1);
|
||||
vp8_loop_filter_horizontal_edge_cl(x, buf_base, v_off + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1, 1);
|
||||
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_cl(MACROBLOCKD *x, cl_mem buf_base, int y_off, int u_off, int v_off,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
|
||||
vp8_loop_filter_simple_horizontal_edge_cl(x, buf_base, y_off + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2, 1);
|
||||
vp8_loop_filter_simple_horizontal_edge_cl(x, buf_base, y_off + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2, 1);
|
||||
vp8_loop_filter_simple_horizontal_edge_cl(x, buf_base, y_off + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2, 1);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_cl(MACROBLOCKD *x, cl_mem buf_base, int y_off, int u_off, int v_off,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
|
||||
vp8_loop_filter_vertical_edge_cl(x, buf_base, y_off + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2, 1);
|
||||
vp8_loop_filter_vertical_edge_cl(x, buf_base, y_off + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2, 1);
|
||||
vp8_loop_filter_vertical_edge_cl(x, buf_base, y_off + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2, 1);
|
||||
|
||||
vp8_loop_filter_vertical_edge_cl(x, buf_base, u_off + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1, 1);
|
||||
vp8_loop_filter_vertical_edge_cl(x, buf_base, v_off + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_cl(MACROBLOCKD *x, cl_mem buf_base, int y_off, int u_off, int v_off,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
|
||||
vp8_loop_filter_simple_vertical_edge_cl(x, buf_base, y_off + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2, 1);
|
||||
vp8_loop_filter_simple_vertical_edge_cl(x, buf_base, y_off + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2, 1);
|
||||
vp8_loop_filter_simple_vertical_edge_cl(x, buf_base, y_off + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2, 1);
|
||||
}
|
||||
|
||||
void vp8_init_loop_filter_cl(VP8_COMMON *cm)
|
||||
{
|
||||
loop_filter_info *lfi = cm->lf_info;
|
||||
int sharpness_lvl = cm->sharpness_level;
|
||||
int frame_type = cm->frame_type;
|
||||
int i, j;
|
||||
|
||||
int block_inside_limit = 0;
|
||||
int HEVThresh;
|
||||
const int yhedge_boost = 2;
|
||||
|
||||
/* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
|
||||
for (i = 0; i <= MAX_LOOP_FILTER; i++)
|
||||
{
|
||||
int filt_lvl = i;
|
||||
|
||||
if (frame_type == KEY_FRAME)
|
||||
{
|
||||
if (filt_lvl >= 40)
|
||||
HEVThresh = 2;
|
||||
else if (filt_lvl >= 15)
|
||||
HEVThresh = 1;
|
||||
else
|
||||
HEVThresh = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (filt_lvl >= 40)
|
||||
HEVThresh = 3;
|
||||
else if (filt_lvl >= 20)
|
||||
HEVThresh = 2;
|
||||
else if (filt_lvl >= 15)
|
||||
HEVThresh = 1;
|
||||
else
|
||||
HEVThresh = 0;
|
||||
}
|
||||
|
||||
/* Set loop filter paramaeters that control sharpness. */
|
||||
block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
|
||||
block_inside_limit = block_inside_limit >> (sharpness_lvl > 4);
|
||||
|
||||
if (sharpness_lvl > 0)
|
||||
{
|
||||
if (block_inside_limit > (9 - sharpness_lvl))
|
||||
block_inside_limit = (9 - sharpness_lvl);
|
||||
}
|
||||
|
||||
if (block_inside_limit < 1)
|
||||
block_inside_limit = 1;
|
||||
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
lfi[i].lim[j] = block_inside_limit;
|
||||
lfi[i].mbflim[j] = filt_lvl + yhedge_boost;
|
||||
lfi[i].flim[j] = filt_lvl;
|
||||
lfi[i].thr[j] = HEVThresh;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
|
||||
* each frame. Check last_frame_type to skip the function most of times.
|
||||
*/
|
||||
void vp8_frame_init_loop_filter_cl(loop_filter_info *lfi, int frame_type)
|
||||
{
|
||||
int HEVThresh;
|
||||
int i, j;
|
||||
|
||||
/* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
|
||||
for (i = 0; i <= MAX_LOOP_FILTER; i++)
|
||||
{
|
||||
int filt_lvl = i;
|
||||
|
||||
if (frame_type == KEY_FRAME)
|
||||
{
|
||||
if (filt_lvl >= 40)
|
||||
HEVThresh = 2;
|
||||
else if (filt_lvl >= 15)
|
||||
HEVThresh = 1;
|
||||
else
|
||||
HEVThresh = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (filt_lvl >= 40)
|
||||
HEVThresh = 3;
|
||||
else if (filt_lvl >= 20)
|
||||
HEVThresh = 2;
|
||||
else if (filt_lvl >= 15)
|
||||
HEVThresh = 1;
|
||||
else
|
||||
HEVThresh = 0;
|
||||
}
|
||||
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
lfi[i].thr[j] = HEVThresh;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//This might not need to be copied from loopfilter.c
|
||||
void vp8_adjust_mb_lf_value_cl(MACROBLOCKD *mbd, int *filter_level)
|
||||
{
|
||||
MB_MODE_INFO *mbmi = &mbd->mode_info_context->mbmi;
|
||||
|
||||
if (mbd->mode_ref_lf_delta_enabled)
|
||||
{
|
||||
/* Apply delta for reference frame */
|
||||
*filter_level += mbd->ref_lf_deltas[mbmi->ref_frame];
|
||||
|
||||
/* Apply delta for mode */
|
||||
if (mbmi->ref_frame == INTRA_FRAME)
|
||||
{
|
||||
/* Only the split mode BPRED has a further special case */
|
||||
if (mbmi->mode == B_PRED)
|
||||
*filter_level += mbd->mode_lf_deltas[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Zero motion mode */
|
||||
if (mbmi->mode == ZEROMV)
|
||||
*filter_level += mbd->mode_lf_deltas[1];
|
||||
|
||||
/* Split MB motion mode */
|
||||
else if (mbmi->mode == SPLITMV)
|
||||
*filter_level += mbd->mode_lf_deltas[3];
|
||||
|
||||
/* All other inter motion modes (Nearest, Near, New) */
|
||||
else
|
||||
*filter_level += mbd->mode_lf_deltas[2];
|
||||
}
|
||||
|
||||
/* Range check */
|
||||
if (*filter_level > MAX_LOOP_FILTER)
|
||||
*filter_level = MAX_LOOP_FILTER;
|
||||
else if (*filter_level < 0)
|
||||
*filter_level = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//Start of externally callable functions.
|
||||
|
||||
int cl_init_loop_filter() {
|
||||
int err;
|
||||
|
||||
// Create the filter compute program from the file-defined source code
|
||||
if ( cl_load_program(&cl_data.loop_filter_program, loop_filter_cl_file_name,
|
||||
loopFilterCompileOptions) != CL_SUCCESS )
|
||||
return VP8_CL_TRIED_BUT_FAILED;
|
||||
|
||||
// Create the compute kernels in the program we wish to run
|
||||
VP8_CL_CREATE_KERNEL(cl_data,loop_filter_program,vp8_loop_filter_horizontal_edge_kernel,"vp8_loop_filter_horizontal_edge_kernel");
|
||||
VP8_CL_CREATE_KERNEL(cl_data,loop_filter_program,vp8_loop_filter_vertical_edge_kernel,"vp8_loop_filter_vertical_edge_kernel");
|
||||
VP8_CL_CREATE_KERNEL(cl_data,loop_filter_program,vp8_mbloop_filter_horizontal_edge_kernel,"vp8_mbloop_filter_horizontal_edge_kernel");
|
||||
VP8_CL_CREATE_KERNEL(cl_data,loop_filter_program,vp8_mbloop_filter_vertical_edge_kernel,"vp8_mbloop_filter_vertical_edge_kernel");
|
||||
VP8_CL_CREATE_KERNEL(cl_data,loop_filter_program,vp8_loop_filter_simple_horizontal_edge_kernel,"vp8_loop_filter_simple_horizontal_edge_kernel");
|
||||
VP8_CL_CREATE_KERNEL(cl_data,loop_filter_program,vp8_loop_filter_simple_vertical_edge_kernel,"vp8_loop_filter_simple_vertical_edge_kernel");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
void cl_destroy_loop_filter(){
|
||||
|
||||
if (cl_data.loop_filter_program)
|
||||
clReleaseProgram(cl_data.loop_filter_program);
|
||||
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_loop_filter_horizontal_edge_kernel);
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_loop_filter_vertical_edge_kernel);
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_mbloop_filter_horizontal_edge_kernel);
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_mbloop_filter_vertical_edge_kernel);
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_loop_filter_simple_horizontal_edge_kernel);
|
||||
VP8_CL_RELEASE_KERNEL(cl_data.vp8_loop_filter_simple_vertical_edge_kernel);
|
||||
|
||||
cl_data.loop_filter_program = NULL;
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_set_baselines_cl(MACROBLOCKD *mbd, int default_filt_lvl, int *baseline_filter_level){
|
||||
int alt_flt_enabled = mbd->segmentation_enabled;
|
||||
int i;
|
||||
|
||||
if (alt_flt_enabled)
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
{
|
||||
/* Abs value */
|
||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
||||
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
/* Delta Value */
|
||||
else
|
||||
{
|
||||
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
baseline_filter_level[i] = default_filt_lvl;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_loop_filter_frame_cl
|
||||
(
|
||||
VP8_COMMON *cm,
|
||||
MACROBLOCKD *mbd,
|
||||
int default_filt_lvl
|
||||
)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
||||
loop_filter_info *lfi = cm->lf_info;
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
LOOPFILTERTYPE filter_type = cm->filter_type;
|
||||
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
|
||||
int baseline_filter_level[MAX_MB_SEGMENTS];
|
||||
int filter_level;
|
||||
int alt_flt_enabled = mbd->segmentation_enabled;
|
||||
|
||||
int err;
|
||||
unsigned char *buf_base;
|
||||
int y_off, u_off, v_off;
|
||||
//unsigned char *y_ptr, *u_ptr, *v_ptr;
|
||||
|
||||
mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */
|
||||
|
||||
/* Note the baseline filter values for each segment */
|
||||
vp8_loop_filter_set_baselines_cl(mbd, default_filt_lvl, baseline_filter_level);
|
||||
|
||||
/* Initialize the loop filter for this frame. */
|
||||
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
|
||||
vp8_init_loop_filter_cl(cm);
|
||||
else if (frame_type != cm->last_frame_type)
|
||||
vp8_frame_init_loop_filter_cl(lfi, frame_type);
|
||||
|
||||
/* Set up the buffer pointers */
|
||||
|
||||
buf_base = post->buffer_alloc;
|
||||
y_off = post->y_buffer - buf_base;
|
||||
u_off = post->u_buffer - buf_base;
|
||||
v_off = post->v_buffer - buf_base;
|
||||
|
||||
VP8_CL_SET_BUF(mbd->cl_commands, post->buffer_mem, post->buffer_size, post->buffer_alloc,
|
||||
vp8_loop_filter_frame(cm,mbd,default_filt_lvl),);
|
||||
|
||||
/* vp8_filter each macro block */
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
|
||||
|
||||
filter_level = baseline_filter_level[Segment];
|
||||
|
||||
/* Distance of Mb to the various image edges.
|
||||
* These specified to 8th pel as they are always compared to values
|
||||
* that are in 1/8th pel units. Apply any context driven MB level
|
||||
* adjustment
|
||||
*/
|
||||
filter_level = vp8_adjust_mb_lf_value(mbd, filter_level);
|
||||
|
||||
if (filter_level)
|
||||
{
|
||||
if (mb_col > 0){
|
||||
if (filter_type == NORMAL_LOOPFILTER)
|
||||
vp8_loop_filter_mbv_cl(mbd, post->buffer_mem, y_off, u_off, v_off, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
||||
else
|
||||
vp8_loop_filter_mbvs_cl(mbd, post->buffer_mem, y_off, u_off, v_off, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
||||
}
|
||||
|
||||
if (mbd->mode_info_context->mbmi.dc_diff > 0){
|
||||
if (filter_type == NORMAL_LOOPFILTER)
|
||||
vp8_loop_filter_bv_cl(mbd, post->buffer_mem, y_off, u_off, v_off, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
||||
else
|
||||
vp8_loop_filter_bvs_cl(mbd, post->buffer_mem, y_off, u_off, v_off, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
||||
}
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0){
|
||||
if (filter_type == NORMAL_LOOPFILTER)
|
||||
vp8_loop_filter_mbh_cl(mbd, post->buffer_mem, y_off, u_off, v_off, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
||||
else
|
||||
vp8_loop_filter_mbhs_cl(mbd, post->buffer_mem, y_off, u_off, v_off, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
||||
}
|
||||
|
||||
if (mbd->mode_info_context->mbmi.dc_diff > 0){
|
||||
if (filter_type == NORMAL_LOOPFILTER)
|
||||
vp8_loop_filter_bh_cl(mbd, post->buffer_mem, y_off, u_off, v_off, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
||||
else
|
||||
vp8_loop_filter_bhs_cl(mbd, post->buffer_mem, y_off, u_off, v_off, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
||||
}
|
||||
}
|
||||
|
||||
y_off += 16;
|
||||
u_off += 8;
|
||||
v_off += 8;
|
||||
|
||||
mbd->mode_info_context++; /* step to next MB */
|
||||
}
|
||||
|
||||
y_off += post->y_stride * 16 - post->y_width;
|
||||
u_off += post->uv_stride * 8 - post->uv_width;
|
||||
v_off += post->uv_stride * 8 - post->uv_width;
|
||||
|
||||
mbd->mode_info_context++; /* Skip border mb */
|
||||
}
|
||||
|
||||
//Retrieve buffer contents
|
||||
err = clEnqueueReadBuffer(mbd->cl_commands, post->buffer_mem, CL_FALSE, 0, post->buffer_size, post->buffer_alloc, 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS(mbd->cl_commands, err != CL_SUCCESS,
|
||||
"Error: Failed to read loop filter output!\n",
|
||||
,
|
||||
);
|
||||
|
||||
VP8_CL_FINISH(mbd->cl_commands);
|
||||
}
|
48
vp8/common/opencl/loopfilter_cl.h
Normal file
48
vp8/common/opencl/loopfilter_cl.h
Normal file
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef loopfilter_cl_h
|
||||
#define loopfilter_cl_h
|
||||
|
||||
#include "../../../vpx_ports/mem.h"
|
||||
|
||||
#include "../onyxc_int.h"
|
||||
#include "blockd_cl.h"
|
||||
#include "../loopfilter.h"
|
||||
|
||||
#define prototype_loopfilter_cl(sym) \
|
||||
void sym(MACROBLOCKD*, cl_mem src_base, int src_offset, \
|
||||
int pitch, const signed char *flimit, \
|
||||
const signed char *limit, const signed char *thresh, int count, int block_cnt)
|
||||
|
||||
#define prototype_loopfilter_block_cl(sym) \
|
||||
void sym(MACROBLOCKD*, unsigned char *y, unsigned char *u, unsigned char *v,\
|
||||
int ystride, int uv_stride, loop_filter_info *lfi, int simpler)
|
||||
|
||||
extern void vp8_loop_filter_frame_cl
|
||||
(
|
||||
VP8_COMMON *cm,
|
||||
MACROBLOCKD *mbd,
|
||||
int default_filt_lvl
|
||||
);
|
||||
|
||||
extern prototype_loopfilter_block_cl(vp8_lf_normal_mb_v_cl);
|
||||
extern prototype_loopfilter_block_cl(vp8_lf_normal_b_v_cl);
|
||||
extern prototype_loopfilter_block_cl(vp8_lf_normal_mb_h_cl);
|
||||
extern prototype_loopfilter_block_cl(vp8_lf_normal_b_h_cl);
|
||||
extern prototype_loopfilter_block_cl(vp8_lf_simple_mb_v_cl);
|
||||
extern prototype_loopfilter_block_cl(vp8_lf_simple_b_v_cl);
|
||||
extern prototype_loopfilter_block_cl(vp8_lf_simple_mb_h_cl);
|
||||
extern prototype_loopfilter_block_cl(vp8_lf_simple_b_h_cl);
|
||||
|
||||
typedef prototype_loopfilter_block_cl((*vp8_lf_block_cl_fn_t));
|
||||
|
||||
#endif
|
187
vp8/common/opencl/loopfilter_filters_cl.c
Normal file
187
vp8/common/opencl/loopfilter_filters_cl.c
Normal file
@@ -0,0 +1,187 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "vp8_opencl.h"
|
||||
#include "blockd_cl.h"
|
||||
|
||||
//#include "loopfilter_cl.h"
|
||||
//#include "../onyxc_int.h"
|
||||
|
||||
typedef unsigned char uc;
|
||||
|
||||
static void vp8_loop_filter_cl_run(
|
||||
cl_command_queue cq,
|
||||
cl_kernel kernel,
|
||||
cl_mem buf_mem,
|
||||
int s_off,
|
||||
int p,
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
int count,
|
||||
int block_cnt
|
||||
){
|
||||
size_t global[] = {count,block_cnt};
|
||||
int err;
|
||||
|
||||
cl_mem flimit_mem;
|
||||
cl_mem limit_mem;
|
||||
cl_mem thresh_mem;
|
||||
|
||||
VP8_CL_CREATE_BUF(cq, flimit_mem, , sizeof(uc)*16, flimit,, );
|
||||
VP8_CL_CREATE_BUF(cq, limit_mem, , sizeof(uc)*16, limit,, );
|
||||
VP8_CL_CREATE_BUF(cq, thresh_mem, , sizeof(uc)*16, thresh,, );
|
||||
|
||||
err = 0;
|
||||
err = clSetKernelArg(kernel, 0, sizeof (cl_mem), &buf_mem);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof (cl_int), &s_off);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof (cl_int), &p);
|
||||
err |= clSetKernelArg(kernel, 3, sizeof (cl_mem), &flimit_mem);
|
||||
err |= clSetKernelArg(kernel, 4, sizeof (cl_mem), &limit_mem);
|
||||
err |= clSetKernelArg(kernel, 5, sizeof (cl_mem), &thresh_mem);
|
||||
err |= clSetKernelArg(kernel, 6, sizeof (cl_int), &block_cnt);
|
||||
VP8_CL_CHECK_SUCCESS( cq, err != CL_SUCCESS,
|
||||
"Error: Failed to set kernel arguments!\n",,
|
||||
);
|
||||
|
||||
/* Execute the kernel */
|
||||
err = clEnqueueNDRangeKernel(cq, kernel, 2, NULL, global, NULL , 0, NULL, NULL);
|
||||
VP8_CL_CHECK_SUCCESS( cq, err != CL_SUCCESS,
|
||||
"Error: Failed to execute kernel!\n",
|
||||
printf("err = %d\n",err);,
|
||||
);
|
||||
|
||||
clReleaseMemObject(flimit_mem);
|
||||
clReleaseMemObject(limit_mem);
|
||||
clReleaseMemObject(thresh_mem);
|
||||
|
||||
VP8_CL_FINISH(cq);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_horizontal_edge_cl
|
||||
(
|
||||
MACROBLOCKD *x,
|
||||
cl_mem s_base,
|
||||
int s_off,
|
||||
int p, /* pitch */
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
int count,
|
||||
int block_cnt
|
||||
)
|
||||
{
|
||||
vp8_loop_filter_cl_run(x->cl_commands,
|
||||
cl_data.vp8_loop_filter_horizontal_edge_kernel, s_base, s_off,
|
||||
p, flimit, limit, thresh, count*8, block_cnt
|
||||
);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_vertical_edge_cl
|
||||
(
|
||||
MACROBLOCKD *x,
|
||||
cl_mem s_base,
|
||||
int s_off,
|
||||
int p,
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
int count,
|
||||
int block_cnt
|
||||
)
|
||||
{
|
||||
vp8_loop_filter_cl_run(x->cl_commands,
|
||||
cl_data.vp8_loop_filter_vertical_edge_kernel, s_base, s_off,
|
||||
p, flimit, limit, thresh, count*8, block_cnt
|
||||
);
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_horizontal_edge_cl
|
||||
(
|
||||
MACROBLOCKD *x,
|
||||
cl_mem s_base,
|
||||
int s_off,
|
||||
int p,
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
int count,
|
||||
int block_cnt
|
||||
)
|
||||
{
|
||||
vp8_loop_filter_cl_run(x->cl_commands,
|
||||
cl_data.vp8_mbloop_filter_horizontal_edge_kernel, s_base, s_off,
|
||||
p, flimit, limit, thresh, count*8, block_cnt
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
void vp8_mbloop_filter_vertical_edge_cl
|
||||
(
|
||||
MACROBLOCKD *x,
|
||||
cl_mem s_base,
|
||||
int s_off,
|
||||
int p,
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
int count,
|
||||
int block_cnt
|
||||
)
|
||||
{
|
||||
vp8_loop_filter_cl_run(x->cl_commands,
|
||||
cl_data.vp8_mbloop_filter_vertical_edge_kernel, s_base, s_off,
|
||||
p, flimit, limit, thresh, count*8, block_cnt
|
||||
);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_simple_horizontal_edge_cl
|
||||
(
|
||||
MACROBLOCKD *x,
|
||||
cl_mem s_base,
|
||||
int s_off,
|
||||
int p,
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
int count,
|
||||
int block_cnt
|
||||
)
|
||||
{
|
||||
vp8_loop_filter_cl_run(x->cl_commands,
|
||||
cl_data.vp8_loop_filter_simple_horizontal_edge_kernel, s_base, s_off,
|
||||
p, flimit, limit, thresh, count*8, block_cnt
|
||||
);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_simple_vertical_edge_cl
|
||||
(
|
||||
MACROBLOCKD *x,
|
||||
cl_mem s_base,
|
||||
int s_off,
|
||||
int p,
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
int count,
|
||||
int block_cnt
|
||||
)
|
||||
{
|
||||
vp8_loop_filter_cl_run(x->cl_commands,
|
||||
cl_data.vp8_loop_filter_simple_vertical_edge_kernel, s_base, s_off,
|
||||
p, flimit, limit, thresh, count*8, block_cnt
|
||||
);
|
||||
}
|
41
vp8/common/opencl/opencl_systemdependent.c
Normal file
41
vp8/common/opencl/opencl_systemdependent.c
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "../subpixel.h"
|
||||
#include "subpixel_cl.h"
|
||||
#include "../onyxc_int.h"
|
||||
#include "vp8_opencl.h"
|
||||
|
||||
#if HAVE_DLOPEN
|
||||
#include "dynamic_cl.h"
|
||||
#endif
|
||||
|
||||
void vp8_arch_opencl_common_init(VP8_COMMON *ctx)
|
||||
{
|
||||
|
||||
#if HAVE_DLOPEN
|
||||
|
||||
#if WIN32 //Windows .dll has no lib prefix and no extension
|
||||
cl_loaded = load_cl("OpenCL");
|
||||
#else //But *nix needs full name
|
||||
cl_loaded = load_cl("libOpenCL.so");
|
||||
#endif
|
||||
|
||||
if (cl_loaded == CL_SUCCESS)
|
||||
cl_initialized = cl_common_init();
|
||||
else
|
||||
cl_initialized = VP8_CL_TRIED_BUT_FAILED;
|
||||
|
||||
#else //!HAVE_DLOPEN (e.g. Apple)
|
||||
cl_initialized = cl_common_init();
|
||||
#endif
|
||||
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user