Compare commits
159 Commits
sandbox/Ji
...
sandbox/hk
Author | SHA1 | Date | |
---|---|---|---|
![]() |
5504b39f5e | ||
![]() |
7b0f24fc21 | ||
![]() |
c77b1f5acd | ||
![]() |
778845da05 | ||
![]() |
2061359fcf | ||
![]() |
0ae1e4a95a | ||
![]() |
47767609fe | ||
![]() |
fd90ce2711 | ||
![]() |
95cb130f32 | ||
![]() |
d45870be8d | ||
![]() |
9e81112df2 | ||
![]() |
486a73a9ce | ||
![]() |
d31698b0e0 | ||
![]() |
3a7bc16156 | ||
![]() |
ee3d42bf3f | ||
![]() |
4aec440682 | ||
![]() |
0810a2d8bc | ||
![]() |
03f12e142b | ||
![]() |
2301d10f73 | ||
![]() |
f58011ada5 | ||
![]() |
f274c2199b | ||
![]() |
fbd3b89488 | ||
![]() |
ccc9e1da8d | ||
![]() |
4902606bb6 | ||
![]() |
2be50a1c9c | ||
![]() |
bfce02971e | ||
![]() |
afcb62b414 | ||
![]() |
297b2b99de | ||
![]() |
6686e4411d | ||
![]() |
8d98aef055 | ||
![]() |
abff8b24b9 | ||
![]() |
b3e411e481 | ||
![]() |
3e9263e1ca | ||
![]() |
4feae6791c | ||
![]() |
4659e3644f | ||
![]() |
4a29474c83 | ||
![]() |
9ed0e071fe | ||
![]() |
a6e9ae9066 | ||
![]() |
e07b141da0 | ||
![]() |
5d8877a944 | ||
![]() |
1c9af9833d | ||
![]() |
3b35e962e2 | ||
![]() |
931c0a954f | ||
![]() |
66b9933b8d | ||
![]() |
8b17f7f4eb | ||
![]() |
ee87e20d53 | ||
![]() |
924d06a075 | ||
![]() |
3606b78108 | ||
![]() |
ca90d4fd96 | ||
![]() |
391ecffed9 | ||
![]() |
02804821cc | ||
![]() |
e5eda53e3d | ||
![]() |
ef51c1ab5b | ||
![]() |
b423a6b212 | ||
![]() |
305492c375 | ||
![]() |
2e36149ccd | ||
![]() |
27d083c1b9 | ||
![]() |
03829f2fea | ||
![]() |
3d2f037a44 | ||
![]() |
f2cbee9a04 | ||
![]() |
1777413a2a | ||
![]() |
3a7ebf9c36 | ||
![]() |
9757c1aded | ||
![]() |
3f7f194304 | ||
![]() |
73bce9ec7e | ||
![]() |
f76ccce5bc | ||
![]() |
645c70f852 | ||
![]() |
c0b23ac20d | ||
![]() |
45f9ee263f | ||
![]() |
29b5cf6a9d | ||
![]() |
a53c495b3e | ||
![]() |
4aa9255efa | ||
![]() |
e8645ce9f9 | ||
![]() |
278593c9a9 | ||
![]() |
effd974b16 | ||
![]() |
3701dcc5c1 | ||
![]() |
e3bf76f9ec | ||
![]() |
1551d7c00b | ||
![]() |
14e7203e7b | ||
![]() |
63c5bf2b9c | ||
![]() |
3cba8dc304 | ||
![]() |
d61c1ea69b | ||
![]() |
14ef4aeafb | ||
![]() |
b09ac154af | ||
![]() |
d011a8a45c | ||
![]() |
4009b63c42 | ||
![]() |
004b9d83e3 | ||
![]() |
af7484a332 | ||
![]() |
2baa3debd5 | ||
![]() |
3b2510374a | ||
![]() |
eb8c667570 | ||
![]() |
2f693be8f8 | ||
![]() |
2404332c1b | ||
![]() |
4565812032 | ||
![]() |
93d9c50419 | ||
![]() |
9244ac8592 | ||
![]() |
481fb7640c | ||
![]() |
b815f3a977 | ||
![]() |
8a75847497 | ||
![]() |
7e8e507bfb | ||
![]() |
5668dcc7b9 | ||
![]() |
aac3adb3c5 | ||
![]() |
59681be0a0 | ||
![]() |
2ed0cf06f9 | ||
![]() |
c88ce84bb5 | ||
![]() |
90517b5e85 | ||
![]() |
60bd744c88 | ||
![]() |
5eee2a88f8 | ||
![]() |
e1ff83f4b0 | ||
![]() |
927693a991 | ||
![]() |
6de407b638 | ||
![]() |
8de26efa66 | ||
![]() |
866447a008 | ||
![]() |
0bb897211d | ||
![]() |
5e300cac2c | ||
![]() |
7f629dfca4 | ||
![]() |
b2baaa215b | ||
![]() |
5afa7d1f87 | ||
![]() |
30205e14b7 | ||
![]() |
acb219be25 | ||
![]() |
0c5a140a02 | ||
![]() |
04c53d2393 | ||
![]() |
970acffa8f | ||
![]() |
7b4f727959 | ||
![]() |
1a1114d21c | ||
![]() |
4e73e4bf93 | ||
![]() |
44e3640923 | ||
![]() |
0080aca235 | ||
![]() |
c5f7842234 | ||
![]() |
79bd071373 | ||
![]() |
c74bf6d889 | ||
![]() |
30e9c091c0 | ||
![]() |
73fe337647 | ||
![]() |
d72ed35374 | ||
![]() |
12cb30d4bd | ||
![]() |
718feb0f69 | ||
![]() |
73c8fe5deb | ||
![]() |
f85f79f630 | ||
![]() |
327b138b2c | ||
![]() |
cae03a7ef5 | ||
![]() |
d181a627f0 | ||
![]() |
5ef2d1ddae | ||
![]() |
77ea408983 | ||
![]() |
6eb05c9ed0 | ||
![]() |
bc98e93b53 | ||
![]() |
b8a1de86fd | ||
![]() |
b644384bb5 | ||
![]() |
54210f706c | ||
![]() |
f3bea3de5b | ||
![]() |
4cf68be17a | ||
![]() |
2149f214d5 | ||
![]() |
f26b8c84f8 | ||
![]() |
f2cf3c06a0 | ||
![]() |
1582ac851f | ||
![]() |
14e24a1297 | ||
![]() |
a98f6c0254 | ||
![]() |
8845334097 | ||
![]() |
bb5a39c1a7 | ||
![]() |
19b4dead25 |
20
.mailmap
20
.mailmap
@@ -1,18 +1,26 @@
|
||||
Adrian Grange <agrange@google.com>
|
||||
Alex Converse <aconverse@google.com> <alex.converse@gmail.com>
|
||||
Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
|
||||
Alpha Lam <hclam@google.com> <hclam@chromium.org>
|
||||
Deb Mukherjee <debargha@google.com>
|
||||
Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
|
||||
Guillaume Martres <gmartres@google.com> <smarter3@gmail.com>
|
||||
Hangyu Kuang <hkuang@google.com>
|
||||
Jim Bankoski <jimbankoski@google.com>
|
||||
John Koleszar <jkoleszar@google.com>
|
||||
Johann Koenig <johannkoenig@google.com>
|
||||
Johann Koenig <johannkoenig@google.com> <johann.koenig@duck.com>
|
||||
Johann Koenig <johannkoenig@google.com> <johannkoenig@dhcp-172-19-7-52.mtv.corp.google.com>
|
||||
John Koleszar <jkoleszar@google.com>
|
||||
Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org>
|
||||
Marco Paniconi <marpan@google.com>
|
||||
Marco Paniconi <marpan@google.com> <marpan@chromium.org>
|
||||
Pascal Massimino <pascal.massimino@gmail.com>
|
||||
Paul Wilkins <paulwilkins@google.com>
|
||||
Ralph Giles <giles@xiph.org> <giles@entropywave.com>
|
||||
Ralph Giles <giles@xiph.org> <giles@mozilla.com>
|
||||
Sami Pietilä <samipietila@google.com>
|
||||
Tamar Levy <tamar.levy@intel.com>
|
||||
Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
|
||||
Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
|
||||
Timothy B. Terriberry <tterribe@xiph.org> Tim Terriberry <tterriberry@mozilla.com>
|
||||
Tom Finegan <tomfinegan@google.com>
|
||||
Ralph Giles <giles@xiph.org> <giles@entropywave.com>
|
||||
Ralph Giles <giles@xiph.org> <giles@mozilla.com>
|
||||
Alpha Lam <hclam@google.com> <hclam@chromium.org>
|
||||
Deb Mukherjee <debargha@google.com>
|
||||
Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
|
||||
|
29
AUTHORS
29
AUTHORS
@@ -3,10 +3,11 @@
|
||||
|
||||
Aaron Watry <awatry@gmail.com>
|
||||
Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
|
||||
Adam Xu <adam@xuyaowu.com>
|
||||
Adrian Grange <agrange@google.com>
|
||||
Ahmad Sharif <asharif@google.com>
|
||||
Alexander Voronov <avoronov@graphics.cs.msu.ru>
|
||||
Alex Converse <alex.converse@gmail.com>
|
||||
Alex Converse <aconverse@google.com>
|
||||
Alexis Ballier <aballier@gentoo.org>
|
||||
Alok Ahuja <waveletcoeff@gmail.com>
|
||||
Alpha Lam <hclam@google.com>
|
||||
@@ -14,44 +15,58 @@ A.Mahfoodh <ab.mahfoodh@gmail.com>
|
||||
Ami Fischman <fischman@chromium.org>
|
||||
Andoni Morales Alastruey <ylatuya@gmail.com>
|
||||
Andres Mejia <mcitadel@gmail.com>
|
||||
Andrew Russell <anrussell@google.com>
|
||||
Aron Rosenberg <arosenberg@logitech.com>
|
||||
Attila Nagy <attilanagy@google.com>
|
||||
changjun.yang <changjun.yang@intel.com>
|
||||
Charles 'Buck' Krasic <ckrasic@google.com>
|
||||
chm <chm@rock-chips.com>
|
||||
Christian Duvivier <cduvivier@google.com>
|
||||
Daniel Kang <ddkang@google.com>
|
||||
Deb Mukherjee <debargha@google.com>
|
||||
Dim Temp <dimtemp0@gmail.com>
|
||||
Dmitry Kovalev <dkovalev@google.com>
|
||||
Dragan Mrdjan <dmrdjan@mips.com>
|
||||
Erik Niemeyer <erik.a.niemeyer@gmail.com>
|
||||
Ehsan Akhgari <ehsan.akhgari@gmail.com>
|
||||
Erik Niemeyer <erik.a.niemeyer@intel.com>
|
||||
Fabio Pedretti <fabio.ped@libero.it>
|
||||
Frank Galligan <fgalligan@google.com>
|
||||
Fredrik Söderquist <fs@opera.com>
|
||||
Fritz Koenig <frkoenig@google.com>
|
||||
Gaute Strokkenes <gaute.strokkenes@broadcom.com>
|
||||
Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
|
||||
Guillaume Martres <gmartres@google.com>
|
||||
Guillermo Ballester Valor <gbvalor@gmail.com>
|
||||
Hangyu Kuang <hkuang@google.com>
|
||||
Hanno Böck <hanno@hboeck.de>
|
||||
Henrik Lundin <hlundin@google.com>
|
||||
Hui Su <huisu@google.com>
|
||||
Ivan Maltz <ivanmaltz@google.com>
|
||||
Jacek Caban <cjacek@gmail.com>
|
||||
JackyChen <jackychen@google.com>
|
||||
James Berry <jamesberry@google.com>
|
||||
James Yu <james.yu@linaro.org>
|
||||
James Zern <jzern@google.com>
|
||||
Jan Gerber <j@mailb.org>
|
||||
Jan Kratochvil <jan.kratochvil@redhat.com>
|
||||
Janne Salonen <jsalonen@google.com>
|
||||
Jeff Faust <jfaust@google.com>
|
||||
Jeff Muizelaar <jmuizelaar@mozilla.com>
|
||||
Jeff Petkau <jpet@chromium.org>
|
||||
Jia Jia <jia.jia@linaro.org>
|
||||
Jim Bankoski <jimbankoski@google.com>
|
||||
Jingning Han <jingning@google.com>
|
||||
Joey Parrish <joeyparrish@google.com>
|
||||
Johann Koenig <johannkoenig@google.com>
|
||||
John Koleszar <jkoleszar@google.com>
|
||||
John Stark <jhnstrk@gmail.com>
|
||||
Joshua Bleecher Snyder <josh@treelinelabs.com>
|
||||
Joshua Litt <joshualitt@google.com>
|
||||
Justin Clift <justin@salasaga.org>
|
||||
Justin Lebar <justin.lebar@gmail.com>
|
||||
KO Myung-Hun <komh@chollian.net>
|
||||
Lawrence Velázquez <larryv@macports.org>
|
||||
Lou Quillio <louquillio@google.com>
|
||||
Luca Barbato <lu_zero@gentoo.org>
|
||||
Makoto Kato <makoto.kt@gmail.com>
|
||||
@@ -65,6 +80,7 @@ Michael Kohler <michaelkohler@live.com>
|
||||
Mike Frysinger <vapier@chromium.org>
|
||||
Mike Hommey <mhommey@mozilla.com>
|
||||
Mikhal Shemer <mikhal@google.com>
|
||||
Minghai Shang <minghai@google.com>
|
||||
Morton Jonuschat <yabawock@gmail.com>
|
||||
Parag Salasakar <img.mips1@gmail.com>
|
||||
Pascal Massimino <pascal.massimino@gmail.com>
|
||||
@@ -72,6 +88,8 @@ Patrik Westin <patrik.westin@gmail.com>
|
||||
Paul Wilkins <paulwilkins@google.com>
|
||||
Pavol Rusnak <stick@gk2.sk>
|
||||
Paweł Hajdan <phajdan@google.com>
|
||||
Pengchong Jin <pengchong@google.com>
|
||||
Peter de Rivaz <peter.derivaz@gmail.com>
|
||||
Philip Jägenstedt <philipj@opera.com>
|
||||
Priit Laes <plaes@plaes.org>
|
||||
Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
|
||||
@@ -79,22 +97,29 @@ Rafaël Carré <funman@videolan.org>
|
||||
Ralph Giles <giles@xiph.org>
|
||||
Rob Bradford <rob@linux.intel.com>
|
||||
Ronald S. Bultje <rbultje@google.com>
|
||||
Rui Ueyama <ruiu@google.com>
|
||||
Sami Pietilä <samipietila@google.com>
|
||||
Scott Graham <scottmg@chromium.org>
|
||||
Scott LaVarnway <slavarnway@google.com>
|
||||
Sean McGovern <gseanmcg@gmail.com>
|
||||
Sergey Ulanov <sergeyu@chromium.org>
|
||||
Shimon Doodkin <helpmepro1@gmail.com>
|
||||
Stefan Holmer <holmer@google.com>
|
||||
Suman Sunkara <sunkaras@google.com>
|
||||
Taekhyun Kim <takim@nvidia.com>
|
||||
Takanori MATSUURA <t.matsuu@gmail.com>
|
||||
Tamar Levy <tamar.levy@intel.com>
|
||||
Tao Bai <michaelbai@chromium.org>
|
||||
Tero Rintaluoma <teror@google.com>
|
||||
Thijs Vermeir <thijsvermeir@gmail.com>
|
||||
Tim Kopp <tkopp@google.com>
|
||||
Timothy B. Terriberry <tterribe@xiph.org>
|
||||
Tom Finegan <tomfinegan@google.com>
|
||||
Vignesh Venkatasubramanian <vigneshv@google.com>
|
||||
Yaowu Xu <yaowu@google.com>
|
||||
Yongzhe Wang <yongzhe@google.com>
|
||||
Yunqing Wang <yunqingwang@google.com>
|
||||
Zoe Liu <zoeliu@google.com>
|
||||
Google Inc.
|
||||
The Mozilla Foundation
|
||||
The Xiph.Org Foundation
|
||||
|
23
CHANGELOG
23
CHANGELOG
@@ -1,3 +1,26 @@
|
||||
2015-04-03 v1.4.0 "Indian Runner Duck"
|
||||
This release includes significant improvements to the VP9 codec.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI incompatible with 1.3.0. It drops the compatibility
|
||||
layer, requiring VPX_IMG_FMT_* instead of IMG_FMT_*, and adds several codec
|
||||
controls for VP9.
|
||||
|
||||
- Enhancements:
|
||||
Faster VP9 encoding and decoding
|
||||
Multithreaded VP9 decoding (tile and frame-based)
|
||||
Multithreaded VP9 encoding - on by default
|
||||
YUV 4:2:2 and 4:4:4 support in VP9
|
||||
10 and 12bit support in VP9
|
||||
64bit ARM support by replacing ARM assembly with intrinsics
|
||||
|
||||
- Bug Fixes:
|
||||
Fixes a VP9 bitstream issue in Profile 1. This only affected non-YUV 4:2:0
|
||||
files.
|
||||
|
||||
- Known Issues:
|
||||
Frame Parallel decoding fails for segmented and non-420 files.
|
||||
|
||||
2013-11-15 v1.3.0 "Forest"
|
||||
This release introduces the VP9 codec in a backward-compatible way.
|
||||
All existing users of VP8 can continue to use the library without
|
||||
|
2
PATENTS
2
PATENTS
@@ -17,7 +17,7 @@ or agree to the institution of patent litigation or any other patent
|
||||
enforcement activity against any entity (including a cross-claim or
|
||||
counterclaim in a lawsuit) alleging that any of these implementations of WebM
|
||||
or any code incorporated within any of these implementations of WebM
|
||||
constitutes direct or contributory patent infringement, or inducement of
|
||||
constitute direct or contributory patent infringement, or inducement of
|
||||
patent infringement, then any patent rights granted to you under this License
|
||||
for these implementations of WebM shall terminate as of the date such
|
||||
litigation is filed.
|
||||
|
11
README
11
README
@@ -1,4 +1,4 @@
|
||||
README - 30 May 2014
|
||||
README - 23 March 2015
|
||||
|
||||
Welcome to the WebM VP8/VP9 Codec SDK!
|
||||
|
||||
@@ -62,12 +62,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
||||
armv7s-darwin-gcc
|
||||
mips32-linux-gcc
|
||||
mips64-linux-gcc
|
||||
ppc32-darwin8-gcc
|
||||
ppc32-darwin9-gcc
|
||||
ppc32-linux-gcc
|
||||
ppc64-darwin8-gcc
|
||||
ppc64-darwin9-gcc
|
||||
ppc64-linux-gcc
|
||||
sparc-solaris-gcc
|
||||
x86-android-gcc
|
||||
x86-darwin8-gcc
|
||||
@@ -78,6 +72,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
||||
x86-darwin11-gcc
|
||||
x86-darwin12-gcc
|
||||
x86-darwin13-gcc
|
||||
x86-darwin14-gcc
|
||||
x86-iphonesimulator-gcc
|
||||
x86-linux-gcc
|
||||
x86-linux-icc
|
||||
@@ -95,6 +90,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
||||
x86_64-darwin11-gcc
|
||||
x86_64-darwin12-gcc
|
||||
x86_64-darwin13-gcc
|
||||
x86_64-darwin14-gcc
|
||||
x86_64-iphonesimulator-gcc
|
||||
x86_64-linux-gcc
|
||||
x86_64-linux-icc
|
||||
@@ -111,6 +107,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
||||
universal-darwin11-gcc
|
||||
universal-darwin12-gcc
|
||||
universal-darwin13-gcc
|
||||
universal-darwin14-gcc
|
||||
generic-gnu
|
||||
|
||||
The generic-gnu target, in conjunction with the CROSS environment variable,
|
||||
|
@@ -158,8 +158,6 @@ LOCAL_CFLAGS += \
|
||||
|
||||
LOCAL_MODULE := libvpx
|
||||
|
||||
LOCAL_LDLIBS := -llog
|
||||
|
||||
ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
|
||||
LOCAL_STATIC_LIBRARIES := cpufeatures
|
||||
endif
|
||||
@@ -184,7 +182,11 @@ clean:
|
||||
@$(RM) -r $(ASM_CNV_PATH)
|
||||
@$(RM) $(CLEAN-OBJS)
|
||||
|
||||
include $(BUILD_SHARED_LIBRARY)
|
||||
ifeq ($(ENABLE_SHARED),1)
|
||||
include $(BUILD_SHARED_LIBRARY)
|
||||
else
|
||||
include $(BUILD_STATIC_LIBRARY)
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
|
||||
$(call import-module,cpufeatures)
|
||||
|
@@ -383,8 +383,8 @@ LIBS=$(call enabled,LIBS)
|
||||
.libs: $(LIBS)
|
||||
@touch $@
|
||||
$(foreach lib,$(filter %_g.a,$(LIBS)),$(eval $(call archive_template,$(lib))))
|
||||
$(foreach lib,$(filter %so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib))))
|
||||
$(foreach lib,$(filter %$(VERSION_MAJOR).dylib,$(LIBS)),$(eval $(call dl_template,$(lib))))
|
||||
$(foreach lib,$(filter %so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib))))
|
||||
$(foreach lib,$(filter %$(SO_VERSION_MAJOR).dylib,$(LIBS)),$(eval $(call dl_template,$(lib))))
|
||||
|
||||
INSTALL-LIBS=$(call cond_enabled,CONFIG_INSTALL_LIBS,INSTALL-LIBS)
|
||||
ifeq ($(MAKECMDGOALS),dist)
|
||||
|
@@ -640,12 +640,6 @@ process_common_toolchain() {
|
||||
*i[3456]86*)
|
||||
tgt_isa=x86
|
||||
;;
|
||||
*powerpc64*)
|
||||
tgt_isa=ppc64
|
||||
;;
|
||||
*powerpc*)
|
||||
tgt_isa=ppc32
|
||||
;;
|
||||
*sparc*)
|
||||
tgt_isa=sparc
|
||||
;;
|
||||
@@ -1042,25 +1036,30 @@ EOF
|
||||
disable_feature fast_unaligned
|
||||
fi
|
||||
|
||||
if enabled runtime_cpu_detect; then
|
||||
disable_feature runtime_cpu_detect
|
||||
fi
|
||||
|
||||
if [ -n "${tune_cpu}" ]; then
|
||||
case ${tune_cpu} in
|
||||
p5600)
|
||||
add_cflags -mips32r5 -funroll-loops -mload-store-pairs
|
||||
add_cflags -msched-weight -mhard-float
|
||||
add_asflags -mips32r5 -mhard-float
|
||||
check_add_cflags -mips32r5 -funroll-loops -mload-store-pairs
|
||||
check_add_cflags -msched-weight -mhard-float -mfp64
|
||||
check_add_asflags -mips32r5 -mhard-float -mfp64
|
||||
check_add_ldflags -mfp64
|
||||
;;
|
||||
i6400)
|
||||
add_cflags -mips64r6 -mabi=64 -funroll-loops -mload-store-pairs
|
||||
add_cflags -msched-weight -mhard-float
|
||||
add_asflags -mips64r6 -mabi=64 -mhard-float
|
||||
add_ldflags -mips64r6 -mabi=64
|
||||
check_add_cflags -mips64r6 -mabi=64 -funroll-loops -msched-weight
|
||||
check_add_cflags -mload-store-pairs -mhard-float -mfp64
|
||||
check_add_asflags -mips64r6 -mabi=64 -mhard-float -mfp64
|
||||
check_add_ldflags -mips64r6 -mabi=64 -mfp64
|
||||
;;
|
||||
esac
|
||||
|
||||
if enabled msa; then
|
||||
add_cflags -mmsa -mfp64 -flax-vector-conversions
|
||||
add_asflags -mmsa -mfp64 -flax-vector-conversions
|
||||
add_ldflags -mmsa -mfp64 -flax-vector-conversions
|
||||
add_cflags -mmsa
|
||||
add_asflags -mmsa
|
||||
add_ldflags -mmsa
|
||||
|
||||
disable_feature fast_unaligned
|
||||
fi
|
||||
@@ -1070,29 +1069,6 @@ EOF
|
||||
check_add_asflags -march=${tgt_isa}
|
||||
check_add_asflags -KPIC
|
||||
;;
|
||||
ppc*)
|
||||
enable_feature ppc
|
||||
bits=${tgt_isa##ppc}
|
||||
link_with_cc=gcc
|
||||
setup_gnu_toolchain
|
||||
add_asflags -force_cpusubtype_ALL -I"\$(dir \$<)darwin"
|
||||
soft_enable altivec
|
||||
enabled altivec && add_cflags -maltivec
|
||||
|
||||
case "$tgt_os" in
|
||||
linux*)
|
||||
add_asflags -maltivec -mregnames -I"\$(dir \$<)linux"
|
||||
;;
|
||||
darwin*)
|
||||
darwin_arch="-arch ppc"
|
||||
enabled ppc64 && darwin_arch="${darwin_arch}64"
|
||||
add_cflags ${darwin_arch} -m${bits} -fasm-blocks
|
||||
add_asflags ${darwin_arch} -force_cpusubtype_ALL -I"\$(dir \$<)darwin"
|
||||
add_ldflags ${darwin_arch} -m${bits}
|
||||
enabled altivec && add_cflags -faltivec
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
x86*)
|
||||
case ${tgt_os} in
|
||||
win*)
|
||||
@@ -1329,11 +1305,15 @@ EOF
|
||||
# only for MIPS platforms
|
||||
case ${toolchain} in
|
||||
mips*)
|
||||
if enabled dspr2; then
|
||||
if enabled big_endian; then
|
||||
if enabled big_endian; then
|
||||
if enabled dspr2; then
|
||||
echo "dspr2 optimizations are available only for little endian platforms"
|
||||
disable_feature dspr2
|
||||
fi
|
||||
if enabled msa; then
|
||||
echo "msa optimizations are available only for little endian platforms"
|
||||
disable_feature msa
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
23
configure
vendored
23
configure
vendored
@@ -40,7 +40,6 @@ Advanced options:
|
||||
${toggle_vp8} VP8 codec support
|
||||
${toggle_vp9} VP9 codec support
|
||||
${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders)
|
||||
${toggle_mem_tracker} track memory usage
|
||||
${toggle_postproc} postprocessing
|
||||
${toggle_vp9_postproc} vp9 specific postprocessing
|
||||
${toggle_multithread} multithreaded encoding and decoding
|
||||
@@ -112,12 +111,6 @@ all_platforms="${all_platforms} armv7-win32-vs12"
|
||||
all_platforms="${all_platforms} armv7s-darwin-gcc"
|
||||
all_platforms="${all_platforms} mips32-linux-gcc"
|
||||
all_platforms="${all_platforms} mips64-linux-gcc"
|
||||
all_platforms="${all_platforms} ppc32-darwin8-gcc"
|
||||
all_platforms="${all_platforms} ppc32-darwin9-gcc"
|
||||
all_platforms="${all_platforms} ppc32-linux-gcc"
|
||||
all_platforms="${all_platforms} ppc64-darwin8-gcc"
|
||||
all_platforms="${all_platforms} ppc64-darwin9-gcc"
|
||||
all_platforms="${all_platforms} ppc64-linux-gcc"
|
||||
all_platforms="${all_platforms} sparc-solaris-gcc"
|
||||
all_platforms="${all_platforms} x86-android-gcc"
|
||||
all_platforms="${all_platforms} x86-darwin8-gcc"
|
||||
@@ -247,8 +240,6 @@ ARCH_LIST="
|
||||
mips
|
||||
x86
|
||||
x86_64
|
||||
ppc32
|
||||
ppc64
|
||||
"
|
||||
ARCH_EXT_LIST="
|
||||
edsp
|
||||
@@ -269,8 +260,6 @@ ARCH_EXT_LIST="
|
||||
sse4_1
|
||||
avx
|
||||
avx2
|
||||
|
||||
altivec
|
||||
"
|
||||
HAVE_LIST="
|
||||
${ARCH_EXT_LIST}
|
||||
@@ -306,9 +295,6 @@ CONFIG_LIST="
|
||||
codec_srcs
|
||||
debug_libs
|
||||
fast_unaligned
|
||||
mem_manager
|
||||
mem_tracker
|
||||
mem_checks
|
||||
|
||||
dequant_tokens
|
||||
dc_recon
|
||||
@@ -383,7 +369,6 @@ CMDLINE_SELECT="
|
||||
${CODECS}
|
||||
${CODEC_FAMILIES}
|
||||
static_msvcrt
|
||||
mem_tracker
|
||||
spatial_resampling
|
||||
realtime_only
|
||||
onthefly_bitpacking
|
||||
@@ -621,12 +606,6 @@ process_toolchain() {
|
||||
universal-darwin*)
|
||||
darwin_ver=${tgt_os##darwin}
|
||||
|
||||
# Snow Leopard (10.6/darwin10) dropped support for PPC
|
||||
# Include PPC support for all prior versions
|
||||
if [ $darwin_ver -lt 10 ]; then
|
||||
fat_bin_archs="$fat_bin_archs ppc32-${tgt_os}-gcc"
|
||||
fi
|
||||
|
||||
# Tiger (10.4/darwin8) brought support for x86
|
||||
if [ $darwin_ver -ge 8 ]; then
|
||||
fat_bin_archs="$fat_bin_archs x86-${tgt_os}-${tgt_cc}"
|
||||
@@ -727,7 +706,7 @@ process_toolchain() {
|
||||
esac
|
||||
|
||||
# Other toolchain specific defaults
|
||||
case $toolchain in x86*|ppc*|universal*) soft_enable postproc;; esac
|
||||
case $toolchain in x86*|universal*) soft_enable postproc;; esac
|
||||
|
||||
if enabled postproc_visualizer; then
|
||||
enabled postproc || die "postproc_visualizer requires postproc to be enabled"
|
||||
|
@@ -674,14 +674,14 @@ int main(int argc, char **argv) {
|
||||
|
||||
if (strncmp(encoder->name, "vp8", 3) == 0) {
|
||||
vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
|
||||
vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOnYOnly);
|
||||
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
|
||||
vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
|
||||
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
|
||||
} else if (strncmp(encoder->name, "vp9", 3) == 0) {
|
||||
vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
|
||||
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
|
||||
vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
|
||||
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
|
||||
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
|
||||
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
|
||||
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
|
||||
if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0)) {
|
||||
die_codec(&codec, "Failed to set SVC");
|
||||
|
14
libs.mk
14
libs.mk
@@ -230,25 +230,27 @@ $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
|
||||
|
||||
BUILD_LIBVPX_SO := $(if $(BUILD_LIBVPX),$(CONFIG_SHARED))
|
||||
|
||||
SO_VERSION_MAJOR := 2
|
||||
SO_VERSION_MINOR := 0
|
||||
SO_VERSION_PATCH := 0
|
||||
ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
|
||||
LIBVPX_SO := libvpx.$(VERSION_MAJOR).dylib
|
||||
LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib
|
||||
EXPORT_FILE := libvpx.syms
|
||||
LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
|
||||
libvpx.dylib )
|
||||
else
|
||||
LIBVPX_SO := libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH)
|
||||
LIBVPX_SO := libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH)
|
||||
EXPORT_FILE := libvpx.ver
|
||||
SYM_LINK := libvpx.so
|
||||
LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
|
||||
libvpx.so libvpx.so.$(VERSION_MAJOR) \
|
||||
libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR))
|
||||
libvpx.so libvpx.so.$(SO_VERSION_MAJOR) \
|
||||
libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR))
|
||||
endif
|
||||
|
||||
LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO)\
|
||||
$(notdir $(LIBVPX_SO_SYMLINKS))
|
||||
$(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) $(EXPORT_FILE)
|
||||
$(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm
|
||||
$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(VERSION_MAJOR)
|
||||
$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(SO_VERSION_MAJOR)
|
||||
$(BUILD_PFX)$(LIBVPX_SO): EXPORTS_FILE = $(EXPORT_FILE)
|
||||
|
||||
libvpx.ver: $(call enabled,CODEC_EXPORTS)
|
||||
|
@@ -40,7 +40,13 @@ include $(CLEAR_VARS)
|
||||
LOCAL_ARM_MODE := arm
|
||||
LOCAL_MODULE := libvpx_test
|
||||
LOCAL_STATIC_LIBRARIES := gtest libwebm
|
||||
LOCAL_SHARED_LIBRARIES := vpx
|
||||
|
||||
ifeq ($(ENABLE_SHARED),1)
|
||||
LOCAL_SHARED_LIBRARIES := vpx
|
||||
else
|
||||
LOCAL_STATIC_LIBRARIES += vpx
|
||||
endif
|
||||
|
||||
include $(LOCAL_PATH)/test/test.mk
|
||||
LOCAL_C_INCLUDES := $(BINDINGS_DIR)
|
||||
FILTERED_SRC := $(sort $(filter %.cc %.c, $(LIBVPX_TEST_SRCS-yes)))
|
||||
|
229
test/blockiness_test.cc
Normal file
229
test/blockiness_test.cc
Normal file
@@ -0,0 +1,229 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#if CONFIG_VP9_ENCODER
|
||||
#include "./vp9_rtcd.h"
|
||||
#endif
|
||||
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
|
||||
extern "C"
|
||||
double vp9_get_blockiness(const unsigned char *img1, int img1_pitch,
|
||||
const unsigned char *img2, int img2_pitch,
|
||||
int width, int height);
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
namespace {
|
||||
class BlockinessTestBase : public ::testing::Test {
|
||||
public:
|
||||
BlockinessTestBase(int width, int height) : width_(width), height_(height) {}
|
||||
|
||||
static void SetUpTestCase() {
|
||||
source_data_ = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(kDataAlignment, kDataBufferSize));
|
||||
reference_data_ = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(kDataAlignment, kDataBufferSize));
|
||||
}
|
||||
|
||||
static void TearDownTestCase() {
|
||||
vpx_free(source_data_);
|
||||
source_data_ = NULL;
|
||||
vpx_free(reference_data_);
|
||||
reference_data_ = NULL;
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
libvpx_test::ClearSystemState();
|
||||
}
|
||||
|
||||
protected:
|
||||
// Handle frames up to 640x480
|
||||
static const int kDataAlignment = 16;
|
||||
static const int kDataBufferSize = 640*480;
|
||||
|
||||
virtual void SetUp() {
|
||||
source_stride_ = (width_ + 31) & ~31;
|
||||
reference_stride_ = width_ * 2;
|
||||
rnd_.Reset(ACMRandom::DeterministicSeed());
|
||||
}
|
||||
|
||||
void FillConstant(uint8_t *data, int stride, uint8_t fill_constant,
|
||||
int width, int height) {
|
||||
for (int h = 0; h < height; ++h) {
|
||||
for (int w = 0; w < width; ++w) {
|
||||
data[h * stride + w] = fill_constant;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FillConstant(uint8_t *data, int stride, uint8_t fill_constant) {
|
||||
FillConstant(data, stride, fill_constant, width_, height_);
|
||||
}
|
||||
|
||||
void FillRandom(uint8_t *data, int stride, int width, int height) {
|
||||
for (int h = 0; h < height; ++h) {
|
||||
for (int w = 0; w < width; ++w) {
|
||||
data[h * stride + w] = rnd_.Rand8();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FillRandom(uint8_t *data, int stride) {
|
||||
FillRandom(data, stride, width_, height_);
|
||||
}
|
||||
|
||||
void FillRandomBlocky(uint8_t *data, int stride) {
|
||||
for (int h = 0; h < height_; h += 4) {
|
||||
for (int w = 0; w < width_; w += 4) {
|
||||
FillRandom(data + h * stride + w, stride, 4, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FillCheckerboard(uint8_t *data, int stride) {
|
||||
for (int h = 0; h < height_; h += 4) {
|
||||
for (int w = 0; w < width_; w += 4) {
|
||||
if (((h/4) ^ (w/4)) & 1)
|
||||
FillConstant(data + h * stride + w, stride, 255, 4, 4);
|
||||
else
|
||||
FillConstant(data + h * stride + w, stride, 0, 4, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Blur(uint8_t *data, int stride, int taps) {
|
||||
int sum = 0;
|
||||
int half_taps = taps / 2;
|
||||
for (int h = 0; h < height_; ++h) {
|
||||
for (int w = 0; w < taps; ++w) {
|
||||
sum += data[w + h * stride];
|
||||
}
|
||||
for (int w = taps; w < width_; ++w) {
|
||||
sum += data[w + h * stride] - data[w - taps + h * stride];
|
||||
data[w - half_taps + h * stride] = (sum + half_taps) / taps;
|
||||
}
|
||||
}
|
||||
for (int w = 0; w < width_; ++w) {
|
||||
for (int h = 0; h < taps; ++h) {
|
||||
sum += data[h + w * stride];
|
||||
}
|
||||
for (int h = taps; h < height_; ++h) {
|
||||
sum += data[w + h * stride] - data[(h - taps) * stride + w];
|
||||
data[(h - half_taps) * stride + w] = (sum + half_taps) / taps;
|
||||
}
|
||||
}
|
||||
}
|
||||
int width_, height_;
|
||||
static uint8_t* source_data_;
|
||||
int source_stride_;
|
||||
static uint8_t* reference_data_;
|
||||
int reference_stride_;
|
||||
|
||||
ACMRandom rnd_;
|
||||
};
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
typedef std::tr1::tuple<int, int> BlockinessParam;
|
||||
class BlockinessVP9Test
|
||||
: public BlockinessTestBase,
|
||||
public ::testing::WithParamInterface<BlockinessParam> {
|
||||
public:
|
||||
BlockinessVP9Test() : BlockinessTestBase(GET_PARAM(0), GET_PARAM(1)) {}
|
||||
|
||||
protected:
|
||||
int CheckBlockiness() {
|
||||
return vp9_get_blockiness(source_data_, source_stride_,
|
||||
reference_data_, reference_stride_,
|
||||
width_, height_);
|
||||
}
|
||||
};
|
||||
#endif // CONFIG_VP9_ENCODER
|
||||
|
||||
uint8_t* BlockinessTestBase::source_data_ = NULL;
|
||||
uint8_t* BlockinessTestBase::reference_data_ = NULL;
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
TEST_P(BlockinessVP9Test, SourceBlockierThanReference) {
|
||||
// Source is blockier than reference.
|
||||
FillRandomBlocky(source_data_, source_stride_);
|
||||
FillConstant(reference_data_, reference_stride_, 128);
|
||||
int super_blocky = CheckBlockiness();
|
||||
|
||||
EXPECT_EQ(0, super_blocky) << "Blocky source should produce 0 blockiness.";
|
||||
}
|
||||
|
||||
TEST_P(BlockinessVP9Test, ReferenceBlockierThanSource) {
|
||||
// Source is blockier than reference.
|
||||
FillConstant(source_data_, source_stride_, 128);
|
||||
FillRandomBlocky(reference_data_, reference_stride_);
|
||||
int super_blocky = CheckBlockiness();
|
||||
|
||||
EXPECT_GT(super_blocky, 0.0)
|
||||
<< "Blocky reference should score high for blockiness.";
|
||||
}
|
||||
|
||||
TEST_P(BlockinessVP9Test, BlurringDecreasesBlockiness) {
|
||||
// Source is blockier than reference.
|
||||
FillConstant(source_data_, source_stride_, 128);
|
||||
FillRandomBlocky(reference_data_, reference_stride_);
|
||||
int super_blocky = CheckBlockiness();
|
||||
|
||||
Blur(reference_data_, reference_stride_, 4);
|
||||
int less_blocky = CheckBlockiness();
|
||||
|
||||
EXPECT_GT(super_blocky, less_blocky)
|
||||
<< "A straight blur should decrease blockiness.";
|
||||
}
|
||||
|
||||
TEST_P(BlockinessVP9Test, WorstCaseBlockiness) {
|
||||
// Source is blockier than reference.
|
||||
FillConstant(source_data_, source_stride_, 128);
|
||||
FillCheckerboard(reference_data_, reference_stride_);
|
||||
|
||||
int super_blocky = CheckBlockiness();
|
||||
|
||||
Blur(reference_data_, reference_stride_, 4);
|
||||
int less_blocky = CheckBlockiness();
|
||||
|
||||
EXPECT_GT(super_blocky, less_blocky)
|
||||
<< "A straight blur should decrease blockiness.";
|
||||
}
|
||||
#endif // CONFIG_VP9_ENCODER
|
||||
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// C functions
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
const BlockinessParam c_vp9_tests[] = {
|
||||
make_tuple(320, 240),
|
||||
make_tuple(318, 242),
|
||||
make_tuple(318, 238),
|
||||
};
|
||||
INSTANTIATE_TEST_CASE_P(C, BlockinessVP9Test, ::testing::ValuesIn(c_vp9_tests));
|
||||
#endif
|
||||
|
||||
} // namespace
|
224
test/consistency_test.cc
Normal file
224
test/consistency_test.cc
Normal file
@@ -0,0 +1,224 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#if CONFIG_VP9_ENCODER
|
||||
#include "./vp9_rtcd.h"
|
||||
#endif
|
||||
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "vp9/encoder/vp9_ssim.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
extern "C"
|
||||
double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch,
|
||||
uint8_t *img2, int img2_pitch,
|
||||
int width, int height,
|
||||
Ssimv *sv2, Metrics *m,
|
||||
int do_inconsistency);
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
namespace {
|
||||
class ConsistencyTestBase : public ::testing::Test {
|
||||
public:
|
||||
ConsistencyTestBase(int width, int height) : width_(width), height_(height) {}
|
||||
|
||||
static void SetUpTestCase() {
|
||||
source_data_[0] = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(kDataAlignment, kDataBufferSize));
|
||||
reference_data_[0] = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(kDataAlignment, kDataBufferSize));
|
||||
source_data_[1] = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(kDataAlignment, kDataBufferSize));
|
||||
reference_data_[1] = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(kDataAlignment, kDataBufferSize));
|
||||
ssim_array_ = new Ssimv[kDataBufferSize / 16];
|
||||
}
|
||||
|
||||
static void ClearSsim() {
|
||||
memset(ssim_array_, 0, kDataBufferSize / 16);
|
||||
}
|
||||
static void TearDownTestCase() {
|
||||
vpx_free(source_data_[0]);
|
||||
source_data_[0] = NULL;
|
||||
vpx_free(reference_data_[0]);
|
||||
reference_data_[0] = NULL;
|
||||
vpx_free(source_data_[1]);
|
||||
source_data_[1] = NULL;
|
||||
vpx_free(reference_data_[1]);
|
||||
reference_data_[1] = NULL;
|
||||
|
||||
delete ssim_array_;
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
libvpx_test::ClearSystemState();
|
||||
}
|
||||
|
||||
protected:
|
||||
// Handle frames up to 640x480
|
||||
static const int kDataAlignment = 16;
|
||||
static const int kDataBufferSize = 640*480;
|
||||
|
||||
virtual void SetUp() {
|
||||
source_stride_ = (width_ + 31) & ~31;
|
||||
reference_stride_ = width_ * 2;
|
||||
rnd_.Reset(ACMRandom::DeterministicSeed());
|
||||
}
|
||||
|
||||
void FillRandom(uint8_t *data, int stride, int width, int height) {
|
||||
for (int h = 0; h < height; ++h) {
|
||||
for (int w = 0; w < width; ++w) {
|
||||
data[h * stride + w] = rnd_.Rand8();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FillRandom(uint8_t *data, int stride) {
|
||||
FillRandom(data, stride, width_, height_);
|
||||
}
|
||||
|
||||
void Copy(uint8_t *reference, uint8_t *source) {
|
||||
memcpy(reference, source, kDataBufferSize);
|
||||
}
|
||||
|
||||
void Blur(uint8_t *data, int stride, int taps) {
|
||||
int sum = 0;
|
||||
int half_taps = taps / 2;
|
||||
for (int h = 0; h < height_; ++h) {
|
||||
for (int w = 0; w < taps; ++w) {
|
||||
sum += data[w + h * stride];
|
||||
}
|
||||
for (int w = taps; w < width_; ++w) {
|
||||
sum += data[w + h * stride] - data[w - taps + h * stride];
|
||||
data[w - half_taps + h * stride] = (sum + half_taps) / taps;
|
||||
}
|
||||
}
|
||||
for (int w = 0; w < width_; ++w) {
|
||||
for (int h = 0; h < taps; ++h) {
|
||||
sum += data[h + w * stride];
|
||||
}
|
||||
for (int h = taps; h < height_; ++h) {
|
||||
sum += data[w + h * stride] - data[(h - taps) * stride + w];
|
||||
data[(h - half_taps) * stride + w] = (sum + half_taps) / taps;
|
||||
}
|
||||
}
|
||||
}
|
||||
int width_, height_;
|
||||
static uint8_t* source_data_[2];
|
||||
int source_stride_;
|
||||
static uint8_t* reference_data_[2];
|
||||
int reference_stride_;
|
||||
static Ssimv *ssim_array_;
|
||||
Metrics metrics_;
|
||||
|
||||
ACMRandom rnd_;
|
||||
};
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
typedef std::tr1::tuple<int, int> ConsistencyParam;
|
||||
class ConsistencyVP9Test
|
||||
: public ConsistencyTestBase,
|
||||
public ::testing::WithParamInterface<ConsistencyParam> {
|
||||
public:
|
||||
ConsistencyVP9Test() : ConsistencyTestBase(GET_PARAM(0), GET_PARAM(1)) {}
|
||||
|
||||
protected:
|
||||
double CheckConsistency(int frame) {
|
||||
EXPECT_LT(frame, 2)<< "Frame to check has to be less than 2.";
|
||||
return
|
||||
vp9_get_ssim_metrics(source_data_[frame], source_stride_,
|
||||
reference_data_[frame], reference_stride_,
|
||||
width_, height_, ssim_array_, &metrics_, 1);
|
||||
}
|
||||
};
|
||||
#endif // CONFIG_VP9_ENCODER
|
||||
|
||||
uint8_t* ConsistencyTestBase::source_data_[2] = {NULL, NULL};
|
||||
uint8_t* ConsistencyTestBase::reference_data_[2] = {NULL, NULL};
|
||||
Ssimv* ConsistencyTestBase::ssim_array_ = NULL;
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
TEST_P(ConsistencyVP9Test, ConsistencyIsZero) {
|
||||
FillRandom(source_data_[0], source_stride_);
|
||||
Copy(source_data_[1], source_data_[0]);
|
||||
Copy(reference_data_[0], source_data_[0]);
|
||||
Blur(reference_data_[0], reference_stride_, 3);
|
||||
Copy(reference_data_[1], source_data_[0]);
|
||||
Blur(reference_data_[1], reference_stride_, 3);
|
||||
|
||||
double inconsistency = CheckConsistency(1);
|
||||
inconsistency = CheckConsistency(0);
|
||||
EXPECT_EQ(inconsistency, 0.0)
|
||||
<< "Should have 0 inconsistency if they are exactly the same.";
|
||||
|
||||
// If sources are not consistent reference frames inconsistency should
|
||||
// be less than if the source is consistent.
|
||||
FillRandom(source_data_[0], source_stride_);
|
||||
FillRandom(source_data_[1], source_stride_);
|
||||
FillRandom(reference_data_[0], reference_stride_);
|
||||
FillRandom(reference_data_[1], reference_stride_);
|
||||
CheckConsistency(0);
|
||||
inconsistency = CheckConsistency(1);
|
||||
|
||||
Copy(source_data_[1], source_data_[0]);
|
||||
CheckConsistency(0);
|
||||
double inconsistency2 = CheckConsistency(1);
|
||||
EXPECT_LT(inconsistency, inconsistency2)
|
||||
<< "Should have less inconsistency if source itself is inconsistent.";
|
||||
|
||||
// Less of a blur should be less inconsistent than more blur coming off a
|
||||
// a frame with no blur.
|
||||
ClearSsim();
|
||||
FillRandom(source_data_[0], source_stride_);
|
||||
Copy(source_data_[1], source_data_[0]);
|
||||
Copy(reference_data_[0], source_data_[0]);
|
||||
Copy(reference_data_[1], source_data_[0]);
|
||||
Blur(reference_data_[1], reference_stride_, 4);
|
||||
CheckConsistency(0);
|
||||
inconsistency = CheckConsistency(1);
|
||||
ClearSsim();
|
||||
Copy(reference_data_[1], source_data_[0]);
|
||||
Blur(reference_data_[1], reference_stride_, 8);
|
||||
CheckConsistency(0);
|
||||
inconsistency2 = CheckConsistency(1);
|
||||
|
||||
EXPECT_LT(inconsistency, inconsistency2)
|
||||
<< "Stronger Blur should produce more inconsistency.";
|
||||
}
|
||||
#endif // CONFIG_VP9_ENCODER
|
||||
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// C functions
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
const ConsistencyParam c_vp9_tests[] = {
|
||||
make_tuple(320, 240),
|
||||
make_tuple(318, 242),
|
||||
make_tuple(318, 238),
|
||||
};
|
||||
INSTANTIATE_TEST_CASE_P(C, ConsistencyVP9Test,
|
||||
::testing::ValuesIn(c_vp9_tests));
|
||||
#endif
|
||||
|
||||
} // namespace
|
@@ -398,9 +398,9 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
|
||||
}
|
||||
|
||||
void CopyOutputToRef() {
|
||||
vpx_memcpy(output_ref_, output_, kOutputBufferSize);
|
||||
memcpy(output_ref_, output_, kOutputBufferSize);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
vpx_memcpy(output16_ref_, output16_, kOutputBufferSize);
|
||||
memcpy(output16_ref_, output16_, kOutputBufferSize);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -1814,4 +1814,27 @@ INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
|
||||
make_tuple(32, 64, &convolve8_dspr2),
|
||||
make_tuple(64, 64, &convolve8_dspr2)));
|
||||
#endif
|
||||
|
||||
#if HAVE_MSA
|
||||
const ConvolveFunctions convolve8_msa(
|
||||
vp9_convolve_copy_msa, vp9_convolve_avg_msa,
|
||||
vp9_convolve8_horiz_msa, vp9_convolve8_avg_horiz_c,
|
||||
vp9_convolve8_vert_msa, vp9_convolve8_avg_vert_c,
|
||||
vp9_convolve8_msa, vp9_convolve8_avg_c, 0);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values(
|
||||
make_tuple(4, 4, &convolve8_msa),
|
||||
make_tuple(8, 4, &convolve8_msa),
|
||||
make_tuple(4, 8, &convolve8_msa),
|
||||
make_tuple(8, 8, &convolve8_msa),
|
||||
make_tuple(16, 8, &convolve8_msa),
|
||||
make_tuple(8, 16, &convolve8_msa),
|
||||
make_tuple(16, 16, &convolve8_msa),
|
||||
make_tuple(32, 16, &convolve8_msa),
|
||||
make_tuple(16, 32, &convolve8_msa),
|
||||
make_tuple(32, 32, &convolve8_msa),
|
||||
make_tuple(64, 32, &convolve8_msa),
|
||||
make_tuple(32, 64, &convolve8_msa),
|
||||
make_tuple(64, 64, &convolve8_msa)));
|
||||
#endif // HAVE_MSA
|
||||
} // namespace
|
||||
|
@@ -502,11 +502,11 @@ class Trans16x16TestBase {
|
||||
fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
|
||||
|
||||
// clear reconstructed pixel buffers
|
||||
vpx_memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
|
||||
vpx_memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
|
||||
memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
|
||||
memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
vpx_memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
|
||||
vpx_memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
|
||||
memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
|
||||
memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
|
||||
#endif
|
||||
|
||||
// quantization with maximum allowed step sizes
|
||||
@@ -933,12 +933,4 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&idct16x16_12,
|
||||
&idct16x16_256_add_12_sse2, 3167, VPX_BITS_12)));
|
||||
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
|
||||
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSSE3, Trans16x16DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_ssse3, 0,
|
||||
VPX_BITS_8)));
|
||||
#endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
} // namespace
|
||||
|
@@ -29,8 +29,6 @@ void Encoder::InitEncoder(VideoSource *video) {
|
||||
cfg_.g_timebase = video->timebase();
|
||||
cfg_.rc_twopass_stats_in = stats_->buf();
|
||||
|
||||
// Default to 1 thread.
|
||||
cfg_.g_threads = 1;
|
||||
res = vpx_codec_enc_init(&encoder_, CodecInterface(), &cfg_,
|
||||
init_flags_);
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
|
||||
|
@@ -183,7 +183,10 @@ class EncoderTest {
|
||||
protected:
|
||||
explicit EncoderTest(const CodecFactory *codec)
|
||||
: codec_(codec), abort_(false), init_flags_(0), frame_flags_(0),
|
||||
last_pts_(0) {}
|
||||
last_pts_(0) {
|
||||
// Default to 1 thread.
|
||||
cfg_.g_threads = 1;
|
||||
}
|
||||
|
||||
virtual ~EncoderTest() {}
|
||||
|
||||
|
@@ -52,7 +52,7 @@ typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0,
|
||||
const uint8_t *thresh1);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
typedef std::tr1::tuple<loop_op_t, loop_op_t, int> loop8_param_t;
|
||||
typedef std::tr1::tuple<loop_op_t, loop_op_t, int, int> loop8_param_t;
|
||||
typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
|
||||
|
||||
#if HAVE_SSE2
|
||||
@@ -144,6 +144,7 @@ class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
|
||||
loopfilter_op_ = GET_PARAM(0);
|
||||
ref_loopfilter_op_ = GET_PARAM(1);
|
||||
bit_depth_ = GET_PARAM(2);
|
||||
count_ = GET_PARAM(3);
|
||||
mask_ = (1 << bit_depth_) - 1;
|
||||
}
|
||||
|
||||
@@ -151,6 +152,7 @@ class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
|
||||
|
||||
protected:
|
||||
int bit_depth_;
|
||||
int count_;
|
||||
int mask_;
|
||||
loop_op_t loopfilter_op_;
|
||||
loop_op_t ref_loopfilter_op_;
|
||||
@@ -206,7 +208,6 @@ TEST_P(Loop8Test6Param, OperationCheck) {
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
|
||||
};
|
||||
int32_t p = kNumCoeffs/32;
|
||||
int count = 1;
|
||||
|
||||
uint16_t tmp_s[kNumCoeffs];
|
||||
int j = 0;
|
||||
@@ -238,13 +239,13 @@ TEST_P(Loop8Test6Param, OperationCheck) {
|
||||
ref_s[j] = s[j];
|
||||
}
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count, bd);
|
||||
ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd);
|
||||
ASM_REGISTER_STATE_CHECK(
|
||||
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count, bd));
|
||||
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd));
|
||||
#else
|
||||
ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count);
|
||||
ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_);
|
||||
ASM_REGISTER_STATE_CHECK(
|
||||
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count));
|
||||
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_));
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
@@ -279,8 +280,8 @@ TEST_P(Loop8Test6Param, ValueCheck) {
|
||||
// function of sharpness_lvl and the loopfilter lvl as:
|
||||
// block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
|
||||
// ...
|
||||
// vpx_memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
|
||||
// SIMD_WIDTH);
|
||||
// memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
|
||||
// SIMD_WIDTH);
|
||||
// This means that the largest value for mblim will occur when sharpness_lvl
|
||||
// is equal to 0, and lvl is equal to its greatest value (MAX_LOOP_FILTER).
|
||||
// In this case block_inside_limit will be equal to MAX_LOOP_FILTER and
|
||||
@@ -305,19 +306,18 @@ TEST_P(Loop8Test6Param, ValueCheck) {
|
||||
tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
|
||||
};
|
||||
int32_t p = kNumCoeffs / 32;
|
||||
int count = 1;
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
s[j] = rnd.Rand16() & mask_;
|
||||
ref_s[j] = s[j];
|
||||
}
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count, bd);
|
||||
ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd);
|
||||
ASM_REGISTER_STATE_CHECK(
|
||||
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count, bd));
|
||||
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd));
|
||||
#else
|
||||
ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count);
|
||||
ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_);
|
||||
ASM_REGISTER_STATE_CHECK(
|
||||
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count));
|
||||
loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_));
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
err_count += ref_s[j] != s[j];
|
||||
@@ -521,55 +521,62 @@ INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Loop8Test6Param,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
|
||||
&vp9_highbd_lpf_horizontal_4_c, 8),
|
||||
&vp9_highbd_lpf_horizontal_4_c, 8, 1),
|
||||
make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
|
||||
&vp9_highbd_lpf_vertical_4_c, 8),
|
||||
&vp9_highbd_lpf_vertical_4_c, 8, 1),
|
||||
make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
|
||||
&vp9_highbd_lpf_horizontal_8_c, 8),
|
||||
&vp9_highbd_lpf_horizontal_8_c, 8, 1),
|
||||
make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
|
||||
&vp9_highbd_lpf_horizontal_16_c, 8),
|
||||
&vp9_highbd_lpf_horizontal_16_c, 8, 1),
|
||||
make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
|
||||
&vp9_highbd_lpf_horizontal_16_c, 8, 2),
|
||||
make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
|
||||
&vp9_highbd_lpf_vertical_8_c, 8),
|
||||
&vp9_highbd_lpf_vertical_8_c, 8, 1),
|
||||
make_tuple(&wrapper_vertical_16_sse2,
|
||||
&wrapper_vertical_16_c, 8),
|
||||
&wrapper_vertical_16_c, 8, 1),
|
||||
make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
|
||||
&vp9_highbd_lpf_horizontal_4_c, 10),
|
||||
&vp9_highbd_lpf_horizontal_4_c, 10, 1),
|
||||
make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
|
||||
&vp9_highbd_lpf_vertical_4_c, 10),
|
||||
&vp9_highbd_lpf_vertical_4_c, 10, 1),
|
||||
make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
|
||||
&vp9_highbd_lpf_horizontal_8_c, 10),
|
||||
&vp9_highbd_lpf_horizontal_8_c, 10, 1),
|
||||
make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
|
||||
&vp9_highbd_lpf_horizontal_16_c, 10),
|
||||
&vp9_highbd_lpf_horizontal_16_c, 10, 1),
|
||||
make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
|
||||
&vp9_highbd_lpf_horizontal_16_c, 10, 2),
|
||||
make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
|
||||
&vp9_highbd_lpf_vertical_8_c, 10),
|
||||
&vp9_highbd_lpf_vertical_8_c, 10, 1),
|
||||
make_tuple(&wrapper_vertical_16_sse2,
|
||||
&wrapper_vertical_16_c, 10),
|
||||
&wrapper_vertical_16_c, 10, 1),
|
||||
make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
|
||||
&vp9_highbd_lpf_horizontal_4_c, 12),
|
||||
&vp9_highbd_lpf_horizontal_4_c, 12, 1),
|
||||
make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
|
||||
&vp9_highbd_lpf_vertical_4_c, 12),
|
||||
&vp9_highbd_lpf_vertical_4_c, 12, 1),
|
||||
make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
|
||||
&vp9_highbd_lpf_horizontal_8_c, 12),
|
||||
&vp9_highbd_lpf_horizontal_8_c, 12, 1),
|
||||
make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
|
||||
&vp9_highbd_lpf_horizontal_16_c, 12),
|
||||
&vp9_highbd_lpf_horizontal_16_c, 12, 1),
|
||||
make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
|
||||
&vp9_highbd_lpf_horizontal_16_c, 12, 2),
|
||||
make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
|
||||
&vp9_highbd_lpf_vertical_8_c, 12),
|
||||
&vp9_highbd_lpf_vertical_8_c, 12, 1),
|
||||
make_tuple(&wrapper_vertical_16_sse2,
|
||||
&wrapper_vertical_16_c, 12),
|
||||
&wrapper_vertical_16_c, 12, 1),
|
||||
make_tuple(&wrapper_vertical_16_dual_sse2,
|
||||
&wrapper_vertical_16_dual_c, 8),
|
||||
&wrapper_vertical_16_dual_c, 8, 1),
|
||||
make_tuple(&wrapper_vertical_16_dual_sse2,
|
||||
&wrapper_vertical_16_dual_c, 10),
|
||||
&wrapper_vertical_16_dual_c, 10, 1),
|
||||
make_tuple(&wrapper_vertical_16_dual_sse2,
|
||||
&wrapper_vertical_16_dual_c, 12)));
|
||||
&wrapper_vertical_16_dual_c, 12, 1)));
|
||||
#else
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Loop8Test6Param,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c, 8),
|
||||
make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8),
|
||||
make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c, 8),
|
||||
make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8)));
|
||||
make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c, 8, 1),
|
||||
make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 1),
|
||||
make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 2),
|
||||
make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c, 8, 1),
|
||||
make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1)));
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
#endif
|
||||
|
||||
@@ -577,7 +584,9 @@ INSTANTIATE_TEST_CASE_P(
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
AVX2, Loop8Test6Param,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8)));
|
||||
make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8, 1),
|
||||
make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8,
|
||||
2)));
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2
|
||||
@@ -635,20 +644,22 @@ INSTANTIATE_TEST_CASE_P(
|
||||
// Using #if inside the macro is unsupported on MSVS but the tests are not
|
||||
// currently built for MSVS with ARM and NEON.
|
||||
make_tuple(&vp9_lpf_horizontal_16_neon,
|
||||
&vp9_lpf_horizontal_16_c, 8),
|
||||
&vp9_lpf_horizontal_16_c, 8, 1),
|
||||
make_tuple(&vp9_lpf_horizontal_16_neon,
|
||||
&vp9_lpf_horizontal_16_c, 8, 2),
|
||||
make_tuple(&wrapper_vertical_16_neon,
|
||||
&wrapper_vertical_16_c, 8),
|
||||
&wrapper_vertical_16_c, 8, 1),
|
||||
make_tuple(&wrapper_vertical_16_dual_neon,
|
||||
&wrapper_vertical_16_dual_c, 8),
|
||||
&wrapper_vertical_16_dual_c, 8, 1),
|
||||
make_tuple(&vp9_lpf_horizontal_8_neon,
|
||||
&vp9_lpf_horizontal_8_c, 8),
|
||||
&vp9_lpf_horizontal_8_c, 8, 1),
|
||||
make_tuple(&vp9_lpf_vertical_8_neon,
|
||||
&vp9_lpf_vertical_8_c, 8),
|
||||
&vp9_lpf_vertical_8_c, 8, 1),
|
||||
#endif // HAVE_NEON_ASM
|
||||
make_tuple(&vp9_lpf_horizontal_4_neon,
|
||||
&vp9_lpf_horizontal_4_c, 8),
|
||||
&vp9_lpf_horizontal_4_c, 8, 1),
|
||||
make_tuple(&vp9_lpf_vertical_4_neon,
|
||||
&vp9_lpf_vertical_4_c, 8)));
|
||||
&vp9_lpf_vertical_4_c, 8, 1)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NEON, Loop8Test9Param,
|
||||
::testing::Values(
|
||||
|
@@ -230,7 +230,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
&vp9_idct4x4_1_add_c,
|
||||
TX_4X4, 1)));
|
||||
|
||||
#if HAVE_NEON
|
||||
#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NEON, PartialIDctTest,
|
||||
::testing::Values(
|
||||
@@ -258,7 +258,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
&vp9_idct4x4_16_add_c,
|
||||
&vp9_idct4x4_1_add_neon,
|
||||
TX_4X4, 1)));
|
||||
#endif // HAVE_NEON
|
||||
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
|
||||
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
@@ -305,13 +305,4 @@ INSTANTIATE_TEST_CASE_P(
|
||||
TX_8X8, 12)));
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSSE3, PartialIDctTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_fdct16x16_c,
|
||||
&vp9_idct16x16_256_add_c,
|
||||
&vp9_idct16x16_10_add_ssse3,
|
||||
TX_16X16, 10)));
|
||||
#endif
|
||||
} // namespace
|
||||
|
@@ -63,12 +63,12 @@ TEST_P(VP8PostProcessingFilterTest, FilterOutputCheck) {
|
||||
uint8_t *const dst_image_ptr = dst_image + 8;
|
||||
uint8_t *const flimits =
|
||||
reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
|
||||
(void)vpx_memset(flimits, 255, block_width);
|
||||
(void)memset(flimits, 255, block_width);
|
||||
|
||||
// Initialize pixels in the input:
|
||||
// block pixels to value 1,
|
||||
// border pixels to value 10.
|
||||
(void)vpx_memset(src_image, 10, input_size);
|
||||
(void)memset(src_image, 10, input_size);
|
||||
uint8_t *pixel_ptr = src_image_ptr;
|
||||
for (int i = 0; i < block_height; ++i) {
|
||||
for (int j = 0; j < block_width; ++j) {
|
||||
@@ -78,7 +78,7 @@ TEST_P(VP8PostProcessingFilterTest, FilterOutputCheck) {
|
||||
}
|
||||
|
||||
// Initialize pixels in the output to 99.
|
||||
(void)vpx_memset(dst_image, 99, output_size);
|
||||
(void)memset(dst_image, 99, output_size);
|
||||
|
||||
ASM_REGISTER_STATE_CHECK(
|
||||
GetParam()(src_image_ptr, dst_image_ptr, input_stride,
|
||||
|
@@ -56,7 +56,7 @@ class QuantizeTestBase {
|
||||
|
||||
// The full configuration is necessary to generate the quantization tables.
|
||||
VP8_CONFIG vp8_config;
|
||||
vpx_memset(&vp8_config, 0, sizeof(vp8_config));
|
||||
memset(&vp8_config, 0, sizeof(vp8_config));
|
||||
|
||||
vp8_comp_ = vp8_create_compressor(&vp8_config);
|
||||
|
||||
@@ -69,8 +69,7 @@ class QuantizeTestBase {
|
||||
// Copy macroblockd from the reference to get pre-set-up dequant values.
|
||||
macroblockd_dst_ = reinterpret_cast<MACROBLOCKD *>(
|
||||
vpx_memalign(32, sizeof(*macroblockd_dst_)));
|
||||
vpx_memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd,
|
||||
sizeof(*macroblockd_dst_));
|
||||
memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd, sizeof(*macroblockd_dst_));
|
||||
// Fix block pointers - currently they point to the blocks in the reference
|
||||
// structure.
|
||||
vp8_setup_block_dptrs(macroblockd_dst_);
|
||||
@@ -79,8 +78,7 @@ class QuantizeTestBase {
|
||||
void UpdateQuantizer(int q) {
|
||||
vp8_set_quantizer(vp8_comp_, q);
|
||||
|
||||
vpx_memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd,
|
||||
sizeof(*macroblockd_dst_));
|
||||
memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd, sizeof(*macroblockd_dst_));
|
||||
vp8_setup_block_dptrs(macroblockd_dst_);
|
||||
}
|
||||
|
||||
|
@@ -53,7 +53,7 @@ TEST(VP8RoiMapTest, ParameterCheck) {
|
||||
cpi.common.mb_rows = 240 >> 4;
|
||||
cpi.common.mb_cols = 320 >> 4;
|
||||
const int mbs = (cpi.common.mb_rows * cpi.common.mb_cols);
|
||||
vpx_memset(cpi.segment_feature_data, 0, sizeof(cpi.segment_feature_data));
|
||||
memset(cpi.segment_feature_data, 0, sizeof(cpi.segment_feature_data));
|
||||
|
||||
// Segment map
|
||||
cpi.segmentation_map = reinterpret_cast<unsigned char *>(vpx_calloc(mbs, 1));
|
||||
@@ -61,9 +61,9 @@ TEST(VP8RoiMapTest, ParameterCheck) {
|
||||
// Allocate memory for the source memory map.
|
||||
unsigned char *roi_map =
|
||||
reinterpret_cast<unsigned char *>(vpx_calloc(mbs, 1));
|
||||
vpx_memset(&roi_map[mbs >> 2], 1, (mbs >> 2));
|
||||
vpx_memset(&roi_map[mbs >> 1], 2, (mbs >> 2));
|
||||
vpx_memset(&roi_map[mbs -(mbs >> 2)], 3, (mbs >> 2));
|
||||
memset(&roi_map[mbs >> 2], 1, (mbs >> 2));
|
||||
memset(&roi_map[mbs >> 1], 2, (mbs >> 2));
|
||||
memset(&roi_map[mbs -(mbs >> 2)], 3, (mbs >> 2));
|
||||
|
||||
// Do a test call with valid parameters.
|
||||
int roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
|
||||
|
@@ -63,6 +63,9 @@ class SvcTest : public ::testing::Test {
|
||||
vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t();
|
||||
VP9CodecFactory codec_factory;
|
||||
decoder_ = codec_factory.CreateDecoder(dec_cfg, 0);
|
||||
|
||||
tile_columns_ = 0;
|
||||
tile_rows_ = 0;
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
@@ -75,6 +78,8 @@ class SvcTest : public ::testing::Test {
|
||||
vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
|
||||
EXPECT_EQ(VPX_CODEC_OK, res);
|
||||
vpx_codec_control(&codec_, VP8E_SET_CPUUSED, 4); // Make the test faster
|
||||
vpx_codec_control(&codec_, VP9E_SET_TILE_COLUMNS, tile_columns_);
|
||||
vpx_codec_control(&codec_, VP9E_SET_TILE_ROWS, tile_rows_);
|
||||
codec_initialized_ = true;
|
||||
}
|
||||
|
||||
@@ -108,7 +113,8 @@ class SvcTest : public ::testing::Test {
|
||||
codec_enc_.g_pass = VPX_RC_FIRST_PASS;
|
||||
InitializeEncoder();
|
||||
|
||||
libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
|
||||
libvpx_test::I420VideoSource video(test_file_name_,
|
||||
codec_enc_.g_w, codec_enc_.g_h,
|
||||
codec_enc_.g_timebase.den,
|
||||
codec_enc_.g_timebase.num, 0, 30);
|
||||
video.Begin();
|
||||
@@ -176,7 +182,8 @@ class SvcTest : public ::testing::Test {
|
||||
}
|
||||
InitializeEncoder();
|
||||
|
||||
libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
|
||||
libvpx_test::I420VideoSource video(test_file_name_,
|
||||
codec_enc_.g_w, codec_enc_.g_h,
|
||||
codec_enc_.g_timebase.den,
|
||||
codec_enc_.g_timebase.num, 0, 30);
|
||||
video.Begin();
|
||||
@@ -310,6 +317,8 @@ class SvcTest : public ::testing::Test {
|
||||
std::string test_file_name_;
|
||||
bool codec_initialized_;
|
||||
Decoder *decoder_;
|
||||
int tile_columns_;
|
||||
int tile_rows_;
|
||||
};
|
||||
|
||||
TEST_F(SvcTest, SvcInit) {
|
||||
@@ -737,4 +746,51 @@ TEST_F(SvcTest,
|
||||
FreeBitstreamBuffers(&outputs[0], 10);
|
||||
}
|
||||
|
||||
TEST_F(SvcTest, TwoPassEncode2TemporalLayersWithTiles) {
|
||||
// First pass encode
|
||||
std::string stats_buf;
|
||||
vpx_svc_set_options(&svc_, "scale-factors=1/1");
|
||||
svc_.temporal_layers = 2;
|
||||
Pass1EncodeNFrames(10, 1, &stats_buf);
|
||||
|
||||
// Second pass encode
|
||||
codec_enc_.g_pass = VPX_RC_LAST_PASS;
|
||||
svc_.temporal_layers = 2;
|
||||
vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1");
|
||||
codec_enc_.g_w = 704;
|
||||
codec_enc_.g_h = 144;
|
||||
tile_columns_ = 1;
|
||||
tile_rows_ = 1;
|
||||
vpx_fixed_buf outputs[10];
|
||||
memset(&outputs[0], 0, sizeof(outputs));
|
||||
Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
|
||||
DecodeNFrames(&outputs[0], 10);
|
||||
FreeBitstreamBuffers(&outputs[0], 10);
|
||||
}
|
||||
|
||||
TEST_F(SvcTest,
|
||||
TwoPassEncode2TemporalLayersWithMultipleFrameContextsAndTiles) {
|
||||
// First pass encode
|
||||
std::string stats_buf;
|
||||
vpx_svc_set_options(&svc_, "scale-factors=1/1");
|
||||
svc_.temporal_layers = 2;
|
||||
Pass1EncodeNFrames(10, 1, &stats_buf);
|
||||
|
||||
// Second pass encode
|
||||
codec_enc_.g_pass = VPX_RC_LAST_PASS;
|
||||
svc_.temporal_layers = 2;
|
||||
codec_enc_.g_error_resilient = 0;
|
||||
codec_enc_.g_w = 704;
|
||||
codec_enc_.g_h = 144;
|
||||
tile_columns_ = 1;
|
||||
tile_rows_ = 1;
|
||||
vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1 "
|
||||
"multi-frame-contexts=1");
|
||||
vpx_fixed_buf outputs[10];
|
||||
memset(&outputs[0], 0, sizeof(outputs));
|
||||
Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
|
||||
DecodeNFrames(&outputs[0], 10);
|
||||
FreeBitstreamBuffers(&outputs[0], 10);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@@ -150,6 +150,9 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9) += vp9_intrapred_test.cc
|
||||
|
||||
ifeq ($(CONFIG_VP9_ENCODER),yes)
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += blockiness_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += consistency_test.cc
|
||||
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_TEMPORAL_DENOISING),yesyes)
|
||||
|
@@ -15,10 +15,11 @@
|
||||
extern "C" {
|
||||
#if CONFIG_VP8
|
||||
extern void vp8_rtcd();
|
||||
#endif
|
||||
#endif // CONFIG_VP8
|
||||
#if CONFIG_VP9
|
||||
extern void vp9_rtcd();
|
||||
#endif
|
||||
#endif // CONFIG_VP9
|
||||
extern void vpx_scale_rtcd();
|
||||
}
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
@@ -59,11 +60,12 @@ int main(int argc, char **argv) {
|
||||
|
||||
#if CONFIG_VP8
|
||||
vp8_rtcd();
|
||||
#endif
|
||||
#endif // CONFIG_VP8
|
||||
#if CONFIG_VP9
|
||||
vp9_rtcd();
|
||||
#endif
|
||||
#endif
|
||||
#endif // CONFIG_VP9
|
||||
vpx_scale_rtcd();
|
||||
#endif // !CONFIG_SHARED
|
||||
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
@@ -402,6 +402,7 @@ VP9_IVF_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-09-subpixel-00.ivf"
|
||||
|
||||
VP9_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-00-quantizer-00.webm"
|
||||
VP9_FPM_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-07-frame_parallel-1.webm"
|
||||
VP9_LT_50_FRAMES_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-02-size-32x08.webm"
|
||||
|
||||
YUV_RAW_INPUT="${LIBVPX_TEST_DATA_PATH}/hantro_collage_w352h288.yuv"
|
||||
YUV_RAW_INPUT_WIDTH=352
|
||||
|
@@ -29,7 +29,7 @@ using std::string;
|
||||
|
||||
#if CONFIG_WEBM_IO
|
||||
|
||||
struct FileList {
|
||||
struct PauseFileList {
|
||||
const char *name;
|
||||
// md5 sum for decoded frames which does not include skipped frames.
|
||||
const char *expected_md5;
|
||||
@@ -39,7 +39,8 @@ struct FileList {
|
||||
// Decodes |filename| with |num_threads|. Pause at the specified frame_num,
|
||||
// seek to next key frame and then continue decoding until the end. Return
|
||||
// the md5 of the decoded frames which does not include skipped frames.
|
||||
string DecodeFile(const string &filename, int num_threads, int pause_num) {
|
||||
string DecodeFileWithPause(const string &filename, int num_threads,
|
||||
int pause_num) {
|
||||
libvpx_test::WebMVideoSource video(filename);
|
||||
video.Init();
|
||||
int in_frames = 0;
|
||||
@@ -92,12 +93,12 @@ string DecodeFile(const string &filename, int num_threads, int pause_num) {
|
||||
return string(md5.Get());
|
||||
}
|
||||
|
||||
void DecodeFiles(const FileList files[]) {
|
||||
for (const FileList *iter = files; iter->name != NULL; ++iter) {
|
||||
void DecodeFilesWithPause(const PauseFileList files[]) {
|
||||
for (const PauseFileList *iter = files; iter->name != NULL; ++iter) {
|
||||
SCOPED_TRACE(iter->name);
|
||||
for (int t = 2; t <= 8; ++t) {
|
||||
EXPECT_EQ(iter->expected_md5,
|
||||
DecodeFile(iter->name, t, iter->pause_frame_num))
|
||||
DecodeFileWithPause(iter->name, t, iter->pause_frame_num))
|
||||
<< "threads = " << t;
|
||||
}
|
||||
}
|
||||
@@ -106,19 +107,19 @@ void DecodeFiles(const FileList files[]) {
|
||||
TEST(VP9MultiThreadedFrameParallel, PauseSeekResume) {
|
||||
// vp90-2-07-frame_parallel-1.webm is a 40 frame video file with
|
||||
// one key frame for every ten frames.
|
||||
static const FileList files[] = {
|
||||
static const PauseFileList files[] = {
|
||||
{ "vp90-2-07-frame_parallel-1.webm",
|
||||
"6ea7c3875d67252e7caf2bc6e75b36b1", 6},
|
||||
"6ea7c3875d67252e7caf2bc6e75b36b1", 6 },
|
||||
{ "vp90-2-07-frame_parallel-1.webm",
|
||||
"4bb634160c7356a8d7d4299b6dc83a45", 12},
|
||||
"4bb634160c7356a8d7d4299b6dc83a45", 12 },
|
||||
{ "vp90-2-07-frame_parallel-1.webm",
|
||||
"89772591e6ef461f9fa754f916c78ed8", 26},
|
||||
{ NULL, NULL, 0},
|
||||
"89772591e6ef461f9fa754f916c78ed8", 26 },
|
||||
{ NULL, NULL, 0 },
|
||||
};
|
||||
DecodeFiles(files);
|
||||
DecodeFilesWithPause(files);
|
||||
}
|
||||
|
||||
struct InvalidFileList {
|
||||
struct FileList {
|
||||
const char *name;
|
||||
// md5 sum for decoded frames which does not include corrupted frames.
|
||||
const char *expected_md5;
|
||||
@@ -128,8 +129,8 @@ struct InvalidFileList {
|
||||
|
||||
// Decodes |filename| with |num_threads|. Return the md5 of the decoded
|
||||
// frames which does not include corrupted frames.
|
||||
string DecodeInvalidFile(const string &filename, int num_threads,
|
||||
int expected_frame_count) {
|
||||
string DecodeFile(const string &filename, int num_threads,
|
||||
int expected_frame_count) {
|
||||
libvpx_test::WebMVideoSource video(filename);
|
||||
video.Init();
|
||||
|
||||
@@ -173,37 +174,47 @@ string DecodeInvalidFile(const string &filename, int num_threads,
|
||||
return string(md5.Get());
|
||||
}
|
||||
|
||||
void DecodeInvalidFiles(const InvalidFileList files[]) {
|
||||
for (const InvalidFileList *iter = files; iter->name != NULL; ++iter) {
|
||||
void DecodeFiles(const FileList files[]) {
|
||||
for (const FileList *iter = files; iter->name != NULL; ++iter) {
|
||||
SCOPED_TRACE(iter->name);
|
||||
for (int t = 2; t <= 8; ++t) {
|
||||
EXPECT_EQ(iter->expected_md5,
|
||||
DecodeInvalidFile(iter->name, t, iter->expected_frame_count))
|
||||
DecodeFile(iter->name, t, iter->expected_frame_count))
|
||||
<< "threads = " << t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(VP9MultiThreadedFrameParallel, InvalidFileTest) {
|
||||
static const InvalidFileList files[] = {
|
||||
static const FileList files[] = {
|
||||
// invalid-vp90-2-07-frame_parallel-1.webm is a 40 frame video file with
|
||||
// one key frame for every ten frames. The 11th frame has corrupted data.
|
||||
{ "invalid-vp90-2-07-frame_parallel-1.webm",
|
||||
"0549d0f45f60deaef8eb708e6c0eb6cb", 30},
|
||||
"0549d0f45f60deaef8eb708e6c0eb6cb", 30 },
|
||||
// invalid-vp90-2-07-frame_parallel-2.webm is a 40 frame video file with
|
||||
// one key frame for every ten frames. The 1st and 31st frames have
|
||||
// corrupted data.
|
||||
{ "invalid-vp90-2-07-frame_parallel-2.webm",
|
||||
"6a1f3cf6f9e7a364212fadb9580d525e", 20},
|
||||
"6a1f3cf6f9e7a364212fadb9580d525e", 20 },
|
||||
// invalid-vp90-2-07-frame_parallel-3.webm is a 40 frame video file with
|
||||
// one key frame for every ten frames. The 5th and 13th frames have
|
||||
// corrupted data.
|
||||
{ "invalid-vp90-2-07-frame_parallel-3.webm",
|
||||
"8256544308de926b0681e04685b98677", 27},
|
||||
{ NULL, NULL, 0},
|
||||
"8256544308de926b0681e04685b98677", 27 },
|
||||
{ NULL, NULL, 0 },
|
||||
};
|
||||
DecodeInvalidFiles(files);
|
||||
DecodeFiles(files);
|
||||
}
|
||||
|
||||
TEST(VP9MultiThreadedFrameParallel, ValidFileTest) {
|
||||
static const FileList files[] = {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
{ "vp92-2-20-10bit-yuv420.webm",
|
||||
"a16b99df180c584e8db2ffeda987d293", 10 },
|
||||
#endif
|
||||
{ NULL, NULL, 0 },
|
||||
};
|
||||
DecodeFiles(files);
|
||||
}
|
||||
#endif // CONFIG_WEBM_IO
|
||||
} // namespace
|
||||
|
@@ -33,10 +33,10 @@ class VpxScaleBase {
|
||||
void ResetImage(int width, int height) {
|
||||
width_ = width;
|
||||
height_ = height;
|
||||
vpx_memset(&img_, 0, sizeof(img_));
|
||||
memset(&img_, 0, sizeof(img_));
|
||||
ASSERT_EQ(0, vp8_yv12_alloc_frame_buffer(&img_, width_, height_,
|
||||
VP8BORDERINPIXELS));
|
||||
vpx_memset(img_.buffer_alloc, kBufFiller, img_.frame_size);
|
||||
memset(img_.buffer_alloc, kBufFiller, img_.frame_size);
|
||||
FillPlane(img_.y_buffer, img_.y_crop_width, img_.y_crop_height,
|
||||
img_.y_stride);
|
||||
FillPlane(img_.u_buffer, img_.uv_crop_width, img_.uv_crop_height,
|
||||
@@ -44,15 +44,15 @@ class VpxScaleBase {
|
||||
FillPlane(img_.v_buffer, img_.uv_crop_width, img_.uv_crop_height,
|
||||
img_.uv_stride);
|
||||
|
||||
vpx_memset(&ref_img_, 0, sizeof(ref_img_));
|
||||
memset(&ref_img_, 0, sizeof(ref_img_));
|
||||
ASSERT_EQ(0, vp8_yv12_alloc_frame_buffer(&ref_img_, width_, height_,
|
||||
VP8BORDERINPIXELS));
|
||||
vpx_memset(ref_img_.buffer_alloc, kBufFiller, ref_img_.frame_size);
|
||||
memset(ref_img_.buffer_alloc, kBufFiller, ref_img_.frame_size);
|
||||
|
||||
vpx_memset(&cpy_img_, 0, sizeof(cpy_img_));
|
||||
memset(&cpy_img_, 0, sizeof(cpy_img_));
|
||||
ASSERT_EQ(0, vp8_yv12_alloc_frame_buffer(&cpy_img_, width_, height_,
|
||||
VP8BORDERINPIXELS));
|
||||
vpx_memset(cpy_img_.buffer_alloc, kBufFiller, cpy_img_.frame_size);
|
||||
memset(cpy_img_.buffer_alloc, kBufFiller, cpy_img_.frame_size);
|
||||
ReferenceCopyFrame();
|
||||
}
|
||||
|
||||
@@ -87,8 +87,8 @@ class VpxScaleBase {
|
||||
|
||||
// Fill the border pixels from the nearest image pixel.
|
||||
for (int y = 0; y < crop_height; ++y) {
|
||||
vpx_memset(left, left[padding], padding);
|
||||
vpx_memset(right, right[-1], right_extend);
|
||||
memset(left, left[padding], padding);
|
||||
memset(right, right[-1], right_extend);
|
||||
left += stride;
|
||||
right += stride;
|
||||
}
|
||||
@@ -101,13 +101,13 @@ class VpxScaleBase {
|
||||
|
||||
// The first row was already extended to the left and right. Copy it up.
|
||||
for (int y = 0; y < padding; ++y) {
|
||||
vpx_memcpy(top, left, extend_width);
|
||||
memcpy(top, left, extend_width);
|
||||
top += stride;
|
||||
}
|
||||
|
||||
uint8_t *bottom = left + (crop_height * stride);
|
||||
for (int y = 0; y < bottom_extend; ++y) {
|
||||
vpx_memcpy(bottom, left + (crop_height - 1) * stride, extend_width);
|
||||
memcpy(bottom, left + (crop_height - 1) * stride, extend_width);
|
||||
bottom += stride;
|
||||
}
|
||||
}
|
||||
|
@@ -17,7 +17,8 @@
|
||||
# Environment check: Make sure input is available.
|
||||
vpxdec_verify_environment() {
|
||||
if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_WEBM_FILE}" ] || \
|
||||
[ ! -e "${VP9_FPM_WEBM_FILE}" ] ; then
|
||||
[ ! -e "${VP9_FPM_WEBM_FILE}" ] || \
|
||||
[ ! -e "${VP9_LT_50_FRAMES_WEBM_FILE}" ] ; then
|
||||
elog "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
|
||||
return 1
|
||||
fi
|
||||
@@ -87,12 +88,29 @@ vpxdec_vp9_webm_frame_parallel() {
|
||||
--frame-parallel
|
||||
done
|
||||
fi
|
||||
}
|
||||
|
||||
vpxdec_vp9_webm_less_than_50_frames() {
|
||||
# ensure that reaching eof in webm_guess_framerate doesn't result in invalid
|
||||
# frames in actual webm_read_frame calls.
|
||||
if [ "$(vpxdec_can_decode_vp9)" = "yes" ] && \
|
||||
[ "$(webm_io_available)" = "yes" ]; then
|
||||
local readonly decoder="$(vpx_tool_path vpxdec)"
|
||||
local readonly expected=10
|
||||
local readonly num_frames=$(${VPX_TEST_PREFIX} "${decoder}" \
|
||||
"${VP9_LT_50_FRAMES_WEBM_FILE}" --summary --noblit 2>&1 \
|
||||
| awk '/^[0-9]+ decoded frames/ { print $1 }')
|
||||
if [ "$num_frames" -ne "$expected" ]; then
|
||||
elog "Output frames ($num_frames) != expected ($expected)"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
vpxdec_tests="vpxdec_vp8_ivf
|
||||
vpxdec_vp8_ivf_pipe_input
|
||||
vpxdec_vp9_webm
|
||||
vpxdec_vp9_webm_frame_parallel"
|
||||
vpxdec_vp9_webm_frame_parallel
|
||||
vpxdec_vp9_webm_less_than_50_frames"
|
||||
|
||||
run_tests vpxdec_verify_environment "${vpxdec_tests}"
|
||||
|
1
third_party/x86inc/README.libvpx
vendored
1
third_party/x86inc/README.libvpx
vendored
@@ -9,3 +9,4 @@ defines that help automatically allow assembly to work cross-platform.
|
||||
|
||||
Local Modifications:
|
||||
Some modifications to allow PIC to work with x86inc.
|
||||
Conditionally define program_name to allow overriding.
|
||||
|
2
third_party/x86inc/x86inc.asm
vendored
2
third_party/x86inc/x86inc.asm
vendored
@@ -36,7 +36,9 @@
|
||||
|
||||
%include "vpx_config.asm"
|
||||
|
||||
%ifndef program_name
|
||||
%define program_name vp9
|
||||
%endif
|
||||
|
||||
|
||||
%define UNIX64 0
|
||||
|
@@ -103,9 +103,9 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
||||
goto allocation_fail;
|
||||
|
||||
oci->post_proc_buffer_int_used = 0;
|
||||
vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
|
||||
vpx_memset(oci->post_proc_buffer.buffer_alloc, 128,
|
||||
oci->post_proc_buffer.frame_size);
|
||||
memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
|
||||
memset(oci->post_proc_buffer.buffer_alloc, 128,
|
||||
oci->post_proc_buffer.frame_size);
|
||||
|
||||
/* Allocate buffer to store post-processing filter coefficients.
|
||||
*
|
||||
@@ -176,7 +176,7 @@ void vp8_create_common(VP8_COMMON *oci)
|
||||
oci->clamp_type = RECON_CLAMP_REQUIRED;
|
||||
|
||||
/* Initialize reference frame sign bias structure to defaults */
|
||||
vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
|
||||
memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
|
||||
|
||||
/* Default disable buffer to buffer copying */
|
||||
oci->copy_buffer_to_gf = 0;
|
||||
|
@@ -165,7 +165,7 @@ vp8_dequant_idct_loop2_v6
|
||||
str r1, [r2], r12 ; store output to dst
|
||||
bne vp8_dequant_idct_loop2_v6
|
||||
|
||||
; vpx_memset
|
||||
; memset
|
||||
sub r0, r0, #32
|
||||
add sp, sp, #4
|
||||
|
||||
|
@@ -29,19 +29,19 @@ extern "C" {
|
||||
|
||||
#define vp8_copy( Dest, Src) { \
|
||||
assert( sizeof( Dest) == sizeof( Src)); \
|
||||
vpx_memcpy( Dest, Src, sizeof( Src)); \
|
||||
memcpy( Dest, Src, sizeof( Src)); \
|
||||
}
|
||||
|
||||
/* Use this for variably-sized arrays. */
|
||||
|
||||
#define vp8_copy_array( Dest, Src, N) { \
|
||||
assert( sizeof( *Dest) == sizeof( *Src)); \
|
||||
vpx_memcpy( Dest, Src, N * sizeof( *Src)); \
|
||||
memcpy( Dest, Src, N * sizeof( *Src)); \
|
||||
}
|
||||
|
||||
#define vp8_zero( Dest) vpx_memset( &Dest, 0, sizeof( Dest));
|
||||
#define vp8_zero( Dest) memset( &Dest, 0, sizeof( Dest));
|
||||
|
||||
#define vp8_zero_array( Dest, N) vpx_memset( Dest, 0, N * sizeof( *Dest));
|
||||
#define vp8_zero_array( Dest, N) memset( Dest, 0, N * sizeof( *Dest));
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@@ -81,7 +81,6 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
/* print out the block modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "Mbs for Frame %d\n", frame);
|
||||
{
|
||||
int b_row;
|
||||
@@ -129,7 +128,6 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
|
||||
|
||||
|
||||
/* print out the block modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "MVs for Frame %d\n", frame);
|
||||
{
|
||||
int b_row;
|
||||
|
@@ -38,6 +38,6 @@ void vp8_dequant_idct_add_c(short *input, short *dq,
|
||||
|
||||
vp8_short_idct4x4llm_c(input, dest, stride, dest, stride);
|
||||
|
||||
vpx_memset(input, 0, 32);
|
||||
memset(input, 0, 32);
|
||||
|
||||
}
|
||||
|
@@ -183,7 +183,6 @@ const vp8_extra_bit_struct vp8_extra_bits[12] =
|
||||
|
||||
void vp8_default_coef_probs(VP8_COMMON *pc)
|
||||
{
|
||||
vpx_memcpy(pc->fc.coef_probs, default_coef_probs,
|
||||
sizeof(default_coef_probs));
|
||||
memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs));
|
||||
}
|
||||
|
||||
|
@@ -159,13 +159,13 @@ const vp8_tree_index vp8_small_mvtree [14] =
|
||||
|
||||
void vp8_init_mbmode_probs(VP8_COMMON *x)
|
||||
{
|
||||
vpx_memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
|
||||
vpx_memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
|
||||
vpx_memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
|
||||
memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
|
||||
memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
|
||||
memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
|
||||
}
|
||||
|
||||
void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1])
|
||||
{
|
||||
vpx_memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
|
||||
memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
|
||||
}
|
||||
|
||||
|
@@ -40,9 +40,9 @@ static void copy_and_extend_plane
|
||||
|
||||
for (i = 0; i < h; i++)
|
||||
{
|
||||
vpx_memset(dest_ptr1, src_ptr1[0], el);
|
||||
vpx_memcpy(dest_ptr1 + el, src_ptr1, w);
|
||||
vpx_memset(dest_ptr2, src_ptr2[0], er);
|
||||
memset(dest_ptr1, src_ptr1[0], el);
|
||||
memcpy(dest_ptr1 + el, src_ptr1, w);
|
||||
memset(dest_ptr2, src_ptr2[0], er);
|
||||
src_ptr1 += sp;
|
||||
src_ptr2 += sp;
|
||||
dest_ptr1 += dp;
|
||||
@@ -60,13 +60,13 @@ static void copy_and_extend_plane
|
||||
|
||||
for (i = 0; i < et; i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr1, src_ptr1, linesize);
|
||||
memcpy(dest_ptr1, src_ptr1, linesize);
|
||||
dest_ptr1 += dp;
|
||||
}
|
||||
|
||||
for (i = 0; i < eb; i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr2, src_ptr2, linesize);
|
||||
memcpy(dest_ptr2, src_ptr2, linesize);
|
||||
dest_ptr2 += dp;
|
||||
}
|
||||
}
|
||||
|
@@ -33,7 +33,7 @@ void vp8_dequant_idct_add_y_block_c
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride);
|
||||
vpx_memset(q, 0, 2 * sizeof(q[0]));
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
@@ -59,7 +59,7 @@ void vp8_dequant_idct_add_uv_block_c
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
vpx_memset(q, 0, 2 * sizeof(q[0]));
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
@@ -78,7 +78,7 @@ void vp8_dequant_idct_add_uv_block_c
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
vpx_memset(q, 0, 2 * sizeof(q[0]));
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
|
@@ -82,11 +82,10 @@ void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
|
||||
if (block_inside_limit < 1)
|
||||
block_inside_limit = 1;
|
||||
|
||||
vpx_memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH);
|
||||
vpx_memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit),
|
||||
SIMD_WIDTH);
|
||||
vpx_memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit),
|
||||
SIMD_WIDTH);
|
||||
memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH);
|
||||
memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit), SIMD_WIDTH);
|
||||
memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit),
|
||||
SIMD_WIDTH);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,7 +104,7 @@ void vp8_loop_filter_init(VP8_COMMON *cm)
|
||||
/* init hev threshold const vectors */
|
||||
for(i = 0; i < 4 ; i++)
|
||||
{
|
||||
vpx_memset(lfi->hev_thr[i], i, SIMD_WIDTH);
|
||||
memset(lfi->hev_thr[i], i, SIMD_WIDTH);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -151,7 +150,7 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm,
|
||||
/* we could get rid of this if we assume that deltas are set to
|
||||
* zero when not in use; encoder always uses deltas
|
||||
*/
|
||||
vpx_memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 );
|
||||
memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 );
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@@ -153,11 +153,11 @@ static void multiframe_quality_enhance_block
|
||||
actd = (vp8_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
|
||||
act = (vp8_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
|
||||
#ifdef USE_SSD
|
||||
sad = (vp8_variance16x16(y, y_stride, yd, yd_stride, &sse));
|
||||
vp8_variance16x16(y, y_stride, yd, yd_stride, &sse);
|
||||
sad = (sse + 128)>>8;
|
||||
usad = (vp8_variance8x8(u, uv_stride, ud, uvd_stride, &sse));
|
||||
vp8_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
|
||||
usad = (sse + 32)>>6;
|
||||
vsad = (vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse));
|
||||
vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
|
||||
vsad = (sse + 32)>>6;
|
||||
#else
|
||||
sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, UINT_MAX) + 128) >> 8;
|
||||
@@ -170,11 +170,11 @@ static void multiframe_quality_enhance_block
|
||||
actd = (vp8_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
|
||||
act = (vp8_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
|
||||
#ifdef USE_SSD
|
||||
sad = (vp8_variance8x8(y, y_stride, yd, yd_stride, &sse));
|
||||
vp8_variance8x8(y, y_stride, yd, yd_stride, &sse);
|
||||
sad = (sse + 32)>>6;
|
||||
usad = (vp8_variance4x4(u, uv_stride, ud, uvd_stride, &sse));
|
||||
vp8_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
|
||||
usad = (sse + 8)>>4;
|
||||
vsad = (vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse));
|
||||
vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
|
||||
vsad = (sse + 8)>>4;
|
||||
#else
|
||||
sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, UINT_MAX) + 32) >> 6;
|
||||
@@ -231,9 +231,9 @@ static void multiframe_quality_enhance_block
|
||||
{
|
||||
vp8_copy_mem8x8(y, y_stride, yd, yd_stride);
|
||||
for (up = u, udp = ud, i = 0; i < uvblksize; ++i, up += uv_stride, udp += uvd_stride)
|
||||
vpx_memcpy(udp, up, uvblksize);
|
||||
memcpy(udp, up, uvblksize);
|
||||
for (vp = v, vdp = vd, i = 0; i < uvblksize; ++i, vp += uv_stride, vdp += uvd_stride)
|
||||
vpx_memcpy(vdp, vp, uvblksize);
|
||||
memcpy(vdp, vp, uvblksize);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -341,8 +341,8 @@ void vp8_multiframe_quality_enhance
|
||||
for (k = 0; k < 4; ++k, up += show->uv_stride, udp += dest->uv_stride,
|
||||
vp += show->uv_stride, vdp += dest->uv_stride)
|
||||
{
|
||||
vpx_memcpy(udp, up, 4);
|
||||
vpx_memcpy(vdp, vp, 4);
|
||||
memcpy(udp, up, 4);
|
||||
memcpy(vdp, vp, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -26,7 +26,7 @@ void vp8_dequant_idct_add_dspr2(short *input, short *dq,
|
||||
|
||||
vp8_short_idct4x4llm_dspr2(input, dest, stride, dest, stride);
|
||||
|
||||
vpx_memset(input, 0, 32);
|
||||
memset(input, 0, 32);
|
||||
|
||||
}
|
||||
|
||||
|
@@ -355,8 +355,8 @@ void vp8_deblock(VP8_COMMON *cm,
|
||||
else
|
||||
mb_ppl = (unsigned char)ppl;
|
||||
|
||||
vpx_memset(ylptr, mb_ppl, 16);
|
||||
vpx_memset(uvlptr, mb_ppl, 8);
|
||||
memset(ylptr, mb_ppl, 16);
|
||||
memset(uvlptr, mb_ppl, 8);
|
||||
|
||||
ylptr += 16;
|
||||
uvlptr += 8;
|
||||
@@ -403,7 +403,7 @@ void vp8_de_noise(VP8_COMMON *cm,
|
||||
(void) low_var_thresh;
|
||||
(void) flag;
|
||||
|
||||
vpx_memset(limits, (unsigned char)ppl, 16 * mb_cols);
|
||||
memset(limits, (unsigned char)ppl, 16 * mb_cols);
|
||||
|
||||
/* TODO: The original code don't filter the 2 outer rows and columns. */
|
||||
for (mbr = 0; mbr < mb_rows; mbr++)
|
||||
@@ -763,7 +763,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
|
||||
/* insure that postproc is set to all 0's so that post proc
|
||||
* doesn't pull random data in from edge
|
||||
*/
|
||||
vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size);
|
||||
memset((&oci->post_proc_buffer_int)->buffer_alloc,128,(&oci->post_proc_buffer)->frame_size);
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -1,47 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
.globl copy_mem16x16_ppc
|
||||
|
||||
;# r3 unsigned char *src
|
||||
;# r4 int src_stride
|
||||
;# r5 unsigned char *dst
|
||||
;# r6 int dst_stride
|
||||
|
||||
;# Make the assumption that input will not be aligned,
|
||||
;# but the output will be. So two reads and a perm
|
||||
;# for the input, but only one store for the output.
|
||||
copy_mem16x16_ppc:
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xe000
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
li r10, 16
|
||||
mtctr r10
|
||||
|
||||
cp_16x16_loop:
|
||||
lvsl v0, 0, r3 ;# permutate value for alignment
|
||||
|
||||
lvx v1, 0, r3
|
||||
lvx v2, r10, r3
|
||||
|
||||
vperm v1, v1, v2, v0
|
||||
|
||||
stvx v1, 0, r5
|
||||
|
||||
add r3, r3, r4 ;# increment source pointer
|
||||
add r5, r5, r6 ;# increment destination pointer
|
||||
|
||||
bdnz cp_16x16_loop
|
||||
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
|
||||
blr
|
File diff suppressed because it is too large
Load Diff
@@ -1,677 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
.globl bilinear_predict4x4_ppc
|
||||
.globl bilinear_predict8x4_ppc
|
||||
.globl bilinear_predict8x8_ppc
|
||||
.globl bilinear_predict16x16_ppc
|
||||
|
||||
.macro load_c V, LABEL, OFF, R0, R1
|
||||
lis \R0, \LABEL@ha
|
||||
la \R1, \LABEL@l(\R0)
|
||||
lvx \V, \OFF, \R1
|
||||
.endm
|
||||
|
||||
.macro load_vfilter V0, V1
|
||||
load_c \V0, vfilter_b, r6, r9, r10
|
||||
|
||||
addi r6, r6, 16
|
||||
lvx \V1, r6, r10
|
||||
.endm
|
||||
|
||||
.macro HProlog jump_label
|
||||
;# load up horizontal filter
|
||||
slwi. r5, r5, 4 ;# index into horizontal filter array
|
||||
|
||||
;# index to the next set of vectors in the row.
|
||||
li r10, 16
|
||||
li r12, 32
|
||||
|
||||
;# downshift by 7 ( divide by 128 ) at the end
|
||||
vspltish v19, 7
|
||||
|
||||
;# If there isn't any filtering to be done for the horizontal, then
|
||||
;# just skip to the second pass.
|
||||
beq \jump_label
|
||||
|
||||
load_c v20, hfilter_b, r5, r9, r0
|
||||
|
||||
;# setup constants
|
||||
;# v14 permutation value for alignment
|
||||
load_c v28, b_hperm_b, 0, r9, r0
|
||||
|
||||
;# rounding added in on the multiply
|
||||
vspltisw v21, 8
|
||||
vspltisw v18, 3
|
||||
vslw v18, v21, v18 ;# 0x00000040000000400000004000000040
|
||||
|
||||
slwi. r6, r6, 5 ;# index into vertical filter array
|
||||
.endm
|
||||
|
||||
;# Filters a horizontal line
|
||||
;# expects:
|
||||
;# r3 src_ptr
|
||||
;# r4 pitch
|
||||
;# r10 16
|
||||
;# r12 32
|
||||
;# v17 perm intput
|
||||
;# v18 rounding
|
||||
;# v19 shift
|
||||
;# v20 filter taps
|
||||
;# v21 tmp
|
||||
;# v22 tmp
|
||||
;# v23 tmp
|
||||
;# v24 tmp
|
||||
;# v25 tmp
|
||||
;# v26 tmp
|
||||
;# v27 tmp
|
||||
;# v28 perm output
|
||||
;#
|
||||
.macro HFilter V
|
||||
vperm v24, v21, v21, v10 ;# v20 = 0123 1234 2345 3456
|
||||
vperm v25, v21, v21, v11 ;# v21 = 4567 5678 6789 789A
|
||||
|
||||
vmsummbm v24, v20, v24, v18
|
||||
vmsummbm v25, v20, v25, v18
|
||||
|
||||
vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit)
|
||||
|
||||
vsrh v24, v24, v19 ;# divide v0, v1 by 128
|
||||
|
||||
vpkuhus \V, v24, v24 ;# \V = scrambled 8-bit result
|
||||
.endm
|
||||
|
||||
.macro hfilter_8 V, increment_counter
|
||||
lvsl v17, 0, r3 ;# permutate value for alignment
|
||||
|
||||
;# input to filter is 9 bytes wide, output is 8 bytes.
|
||||
lvx v21, 0, r3
|
||||
lvx v22, r10, r3
|
||||
|
||||
.if \increment_counter
|
||||
add r3, r3, r4
|
||||
.endif
|
||||
vperm v21, v21, v22, v17
|
||||
|
||||
HFilter \V
|
||||
.endm
|
||||
|
||||
|
||||
.macro load_and_align_8 V, increment_counter
|
||||
lvsl v17, 0, r3 ;# permutate value for alignment
|
||||
|
||||
;# input to filter is 21 bytes wide, output is 16 bytes.
|
||||
;# input will can span three vectors if not aligned correctly.
|
||||
lvx v21, 0, r3
|
||||
lvx v22, r10, r3
|
||||
|
||||
.if \increment_counter
|
||||
add r3, r3, r4
|
||||
.endif
|
||||
|
||||
vperm \V, v21, v22, v17
|
||||
.endm
|
||||
|
||||
.macro write_aligned_8 V, increment_counter
|
||||
stvx \V, 0, r7
|
||||
|
||||
.if \increment_counter
|
||||
add r7, r7, r8
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro vfilter_16 P0 P1
|
||||
vmuleub v22, \P0, v20 ;# 64 + 4 positive taps
|
||||
vadduhm v22, v18, v22
|
||||
vmuloub v23, \P0, v20
|
||||
vadduhm v23, v18, v23
|
||||
|
||||
vmuleub v24, \P1, v21
|
||||
vadduhm v22, v22, v24 ;# Re = evens, saturation unnecessary
|
||||
vmuloub v25, \P1, v21
|
||||
vadduhm v23, v23, v25 ;# Ro = odds
|
||||
|
||||
vsrh v22, v22, v19 ;# divide by 128
|
||||
vsrh v23, v23, v19 ;# v16 v17 = evens, odds
|
||||
vmrghh \P0, v22, v23 ;# v18 v19 = 16-bit result in order
|
||||
vmrglh v23, v22, v23
|
||||
vpkuhus \P0, \P0, v23 ;# P0 = 8-bit result
|
||||
.endm
|
||||
|
||||
|
||||
.macro w_8x8 V, D, R, P
|
||||
stvx \V, 0, r1
|
||||
lwz \R, 0(r1)
|
||||
stw \R, 0(r7)
|
||||
lwz \R, 4(r1)
|
||||
stw \R, 4(r7)
|
||||
add \D, \D, \P
|
||||
.endm
|
||||
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char * src
|
||||
;# r4 int src_pitch
|
||||
;# r5 int x_offset
|
||||
;# r6 int y_offset
|
||||
;# r7 unsigned char * dst
|
||||
;# r8 int dst_pitch
|
||||
bilinear_predict4x4_ppc:
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xf830
|
||||
ori r12, r12, 0xfff8
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
stwu r1,-32(r1) ;# create space on the stack
|
||||
|
||||
HProlog second_pass_4x4_pre_copy_b
|
||||
|
||||
;# Load up permutation constants
|
||||
load_c v10, b_0123_b, 0, r9, r12
|
||||
load_c v11, b_4567_b, 0, r9, r12
|
||||
|
||||
hfilter_8 v0, 1
|
||||
hfilter_8 v1, 1
|
||||
hfilter_8 v2, 1
|
||||
hfilter_8 v3, 1
|
||||
|
||||
;# Finished filtering main horizontal block. If there is no
|
||||
;# vertical filtering, jump to storing the data. Otherwise
|
||||
;# load up and filter the additional line that is needed
|
||||
;# for the vertical filter.
|
||||
beq store_out_4x4_b
|
||||
|
||||
hfilter_8 v4, 0
|
||||
|
||||
b second_pass_4x4_b
|
||||
|
||||
second_pass_4x4_pre_copy_b:
|
||||
slwi r6, r6, 5 ;# index into vertical filter array
|
||||
|
||||
load_and_align_8 v0, 1
|
||||
load_and_align_8 v1, 1
|
||||
load_and_align_8 v2, 1
|
||||
load_and_align_8 v3, 1
|
||||
load_and_align_8 v4, 1
|
||||
|
||||
second_pass_4x4_b:
|
||||
vspltish v20, 8
|
||||
vspltish v18, 3
|
||||
vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
|
||||
|
||||
load_vfilter v20, v21
|
||||
|
||||
vfilter_16 v0, v1
|
||||
vfilter_16 v1, v2
|
||||
vfilter_16 v2, v3
|
||||
vfilter_16 v3, v4
|
||||
|
||||
store_out_4x4_b:
|
||||
|
||||
stvx v0, 0, r1
|
||||
lwz r0, 0(r1)
|
||||
stw r0, 0(r7)
|
||||
add r7, r7, r8
|
||||
|
||||
stvx v1, 0, r1
|
||||
lwz r0, 0(r1)
|
||||
stw r0, 0(r7)
|
||||
add r7, r7, r8
|
||||
|
||||
stvx v2, 0, r1
|
||||
lwz r0, 0(r1)
|
||||
stw r0, 0(r7)
|
||||
add r7, r7, r8
|
||||
|
||||
stvx v3, 0, r1
|
||||
lwz r0, 0(r1)
|
||||
stw r0, 0(r7)
|
||||
|
||||
exit_4x4:
|
||||
|
||||
addi r1, r1, 32 ;# recover stack
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
|
||||
blr
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char * src
|
||||
;# r4 int src_pitch
|
||||
;# r5 int x_offset
|
||||
;# r6 int y_offset
|
||||
;# r7 unsigned char * dst
|
||||
;# r8 int dst_pitch
|
||||
bilinear_predict8x4_ppc:
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xf830
|
||||
ori r12, r12, 0xfff8
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
stwu r1,-32(r1) ;# create space on the stack
|
||||
|
||||
HProlog second_pass_8x4_pre_copy_b
|
||||
|
||||
;# Load up permutation constants
|
||||
load_c v10, b_0123_b, 0, r9, r12
|
||||
load_c v11, b_4567_b, 0, r9, r12
|
||||
|
||||
hfilter_8 v0, 1
|
||||
hfilter_8 v1, 1
|
||||
hfilter_8 v2, 1
|
||||
hfilter_8 v3, 1
|
||||
|
||||
;# Finished filtering main horizontal block. If there is no
|
||||
;# vertical filtering, jump to storing the data. Otherwise
|
||||
;# load up and filter the additional line that is needed
|
||||
;# for the vertical filter.
|
||||
beq store_out_8x4_b
|
||||
|
||||
hfilter_8 v4, 0
|
||||
|
||||
b second_pass_8x4_b
|
||||
|
||||
second_pass_8x4_pre_copy_b:
|
||||
slwi r6, r6, 5 ;# index into vertical filter array
|
||||
|
||||
load_and_align_8 v0, 1
|
||||
load_and_align_8 v1, 1
|
||||
load_and_align_8 v2, 1
|
||||
load_and_align_8 v3, 1
|
||||
load_and_align_8 v4, 1
|
||||
|
||||
second_pass_8x4_b:
|
||||
vspltish v20, 8
|
||||
vspltish v18, 3
|
||||
vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
|
||||
|
||||
load_vfilter v20, v21
|
||||
|
||||
vfilter_16 v0, v1
|
||||
vfilter_16 v1, v2
|
||||
vfilter_16 v2, v3
|
||||
vfilter_16 v3, v4
|
||||
|
||||
store_out_8x4_b:
|
||||
|
||||
cmpi cr0, r8, 8
|
||||
beq cr0, store_aligned_8x4_b
|
||||
|
||||
w_8x8 v0, r7, r0, r8
|
||||
w_8x8 v1, r7, r0, r8
|
||||
w_8x8 v2, r7, r0, r8
|
||||
w_8x8 v3, r7, r0, r8
|
||||
|
||||
b exit_8x4
|
||||
|
||||
store_aligned_8x4_b:
|
||||
load_c v10, b_hilo_b, 0, r9, r10
|
||||
|
||||
vperm v0, v0, v1, v10
|
||||
vperm v2, v2, v3, v10
|
||||
|
||||
stvx v0, 0, r7
|
||||
addi r7, r7, 16
|
||||
stvx v2, 0, r7
|
||||
|
||||
exit_8x4:
|
||||
|
||||
addi r1, r1, 32 ;# recover stack
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
|
||||
blr
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char * src
|
||||
;# r4 int src_pitch
|
||||
;# r5 int x_offset
|
||||
;# r6 int y_offset
|
||||
;# r7 unsigned char * dst
|
||||
;# r8 int dst_pitch
|
||||
bilinear_predict8x8_ppc:
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xfff0
|
||||
ori r12, r12, 0xffff
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
stwu r1,-32(r1) ;# create space on the stack
|
||||
|
||||
HProlog second_pass_8x8_pre_copy_b
|
||||
|
||||
;# Load up permutation constants
|
||||
load_c v10, b_0123_b, 0, r9, r12
|
||||
load_c v11, b_4567_b, 0, r9, r12
|
||||
|
||||
hfilter_8 v0, 1
|
||||
hfilter_8 v1, 1
|
||||
hfilter_8 v2, 1
|
||||
hfilter_8 v3, 1
|
||||
hfilter_8 v4, 1
|
||||
hfilter_8 v5, 1
|
||||
hfilter_8 v6, 1
|
||||
hfilter_8 v7, 1
|
||||
|
||||
;# Finished filtering main horizontal block. If there is no
|
||||
;# vertical filtering, jump to storing the data. Otherwise
|
||||
;# load up and filter the additional line that is needed
|
||||
;# for the vertical filter.
|
||||
beq store_out_8x8_b
|
||||
|
||||
hfilter_8 v8, 0
|
||||
|
||||
b second_pass_8x8_b
|
||||
|
||||
second_pass_8x8_pre_copy_b:
|
||||
slwi r6, r6, 5 ;# index into vertical filter array
|
||||
|
||||
load_and_align_8 v0, 1
|
||||
load_and_align_8 v1, 1
|
||||
load_and_align_8 v2, 1
|
||||
load_and_align_8 v3, 1
|
||||
load_and_align_8 v4, 1
|
||||
load_and_align_8 v5, 1
|
||||
load_and_align_8 v6, 1
|
||||
load_and_align_8 v7, 1
|
||||
load_and_align_8 v8, 0
|
||||
|
||||
second_pass_8x8_b:
|
||||
vspltish v20, 8
|
||||
vspltish v18, 3
|
||||
vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
|
||||
|
||||
load_vfilter v20, v21
|
||||
|
||||
vfilter_16 v0, v1
|
||||
vfilter_16 v1, v2
|
||||
vfilter_16 v2, v3
|
||||
vfilter_16 v3, v4
|
||||
vfilter_16 v4, v5
|
||||
vfilter_16 v5, v6
|
||||
vfilter_16 v6, v7
|
||||
vfilter_16 v7, v8
|
||||
|
||||
store_out_8x8_b:
|
||||
|
||||
cmpi cr0, r8, 8
|
||||
beq cr0, store_aligned_8x8_b
|
||||
|
||||
w_8x8 v0, r7, r0, r8
|
||||
w_8x8 v1, r7, r0, r8
|
||||
w_8x8 v2, r7, r0, r8
|
||||
w_8x8 v3, r7, r0, r8
|
||||
w_8x8 v4, r7, r0, r8
|
||||
w_8x8 v5, r7, r0, r8
|
||||
w_8x8 v6, r7, r0, r8
|
||||
w_8x8 v7, r7, r0, r8
|
||||
|
||||
b exit_8x8
|
||||
|
||||
store_aligned_8x8_b:
|
||||
load_c v10, b_hilo_b, 0, r9, r10
|
||||
|
||||
vperm v0, v0, v1, v10
|
||||
vperm v2, v2, v3, v10
|
||||
vperm v4, v4, v5, v10
|
||||
vperm v6, v6, v7, v10
|
||||
|
||||
stvx v0, 0, r7
|
||||
addi r7, r7, 16
|
||||
stvx v2, 0, r7
|
||||
addi r7, r7, 16
|
||||
stvx v4, 0, r7
|
||||
addi r7, r7, 16
|
||||
stvx v6, 0, r7
|
||||
|
||||
exit_8x8:
|
||||
|
||||
addi r1, r1, 32 ;# recover stack
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
|
||||
blr
|
||||
|
||||
;# Filters a horizontal line
|
||||
;# expects:
|
||||
;# r3 src_ptr
|
||||
;# r4 pitch
|
||||
;# r10 16
|
||||
;# r12 32
|
||||
;# v17 perm intput
|
||||
;# v18 rounding
|
||||
;# v19 shift
|
||||
;# v20 filter taps
|
||||
;# v21 tmp
|
||||
;# v22 tmp
|
||||
;# v23 tmp
|
||||
;# v24 tmp
|
||||
;# v25 tmp
|
||||
;# v26 tmp
|
||||
;# v27 tmp
|
||||
;# v28 perm output
|
||||
;#
|
||||
.macro hfilter_16 V, increment_counter
|
||||
|
||||
lvsl v17, 0, r3 ;# permutate value for alignment
|
||||
|
||||
;# input to filter is 21 bytes wide, output is 16 bytes.
|
||||
;# input will can span three vectors if not aligned correctly.
|
||||
lvx v21, 0, r3
|
||||
lvx v22, r10, r3
|
||||
lvx v23, r12, r3
|
||||
|
||||
.if \increment_counter
|
||||
add r3, r3, r4
|
||||
.endif
|
||||
vperm v21, v21, v22, v17
|
||||
vperm v22, v22, v23, v17 ;# v8 v9 = 21 input pixels left-justified
|
||||
|
||||
;# set 0
|
||||
vmsummbm v24, v20, v21, v18 ;# taps times elements
|
||||
|
||||
;# set 1
|
||||
vsldoi v23, v21, v22, 1
|
||||
vmsummbm v25, v20, v23, v18
|
||||
|
||||
;# set 2
|
||||
vsldoi v23, v21, v22, 2
|
||||
vmsummbm v26, v20, v23, v18
|
||||
|
||||
;# set 3
|
||||
vsldoi v23, v21, v22, 3
|
||||
vmsummbm v27, v20, v23, v18
|
||||
|
||||
vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit)
|
||||
vpkswus v25, v26, v27 ;# v25 = 2 6 A E 3 7 B F
|
||||
|
||||
vsrh v24, v24, v19 ;# divide v0, v1 by 128
|
||||
vsrh v25, v25, v19
|
||||
|
||||
vpkuhus \V, v24, v25 ;# \V = scrambled 8-bit result
|
||||
vperm \V, \V, v0, v28 ;# \V = correctly-ordered result
|
||||
.endm
|
||||
|
||||
.macro load_and_align_16 V, increment_counter
|
||||
lvsl v17, 0, r3 ;# permutate value for alignment
|
||||
|
||||
;# input to filter is 21 bytes wide, output is 16 bytes.
|
||||
;# input will can span three vectors if not aligned correctly.
|
||||
lvx v21, 0, r3
|
||||
lvx v22, r10, r3
|
||||
|
||||
.if \increment_counter
|
||||
add r3, r3, r4
|
||||
.endif
|
||||
|
||||
vperm \V, v21, v22, v17
|
||||
.endm
|
||||
|
||||
.macro write_16 V, increment_counter
|
||||
stvx \V, 0, r7
|
||||
|
||||
.if \increment_counter
|
||||
add r7, r7, r8
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char * src
|
||||
;# r4 int src_pitch
|
||||
;# r5 int x_offset
|
||||
;# r6 int y_offset
|
||||
;# r7 unsigned char * dst
|
||||
;# r8 int dst_pitch
|
||||
bilinear_predict16x16_ppc:
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xffff
|
||||
ori r12, r12, 0xfff8
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
HProlog second_pass_16x16_pre_copy_b
|
||||
|
||||
hfilter_16 v0, 1
|
||||
hfilter_16 v1, 1
|
||||
hfilter_16 v2, 1
|
||||
hfilter_16 v3, 1
|
||||
hfilter_16 v4, 1
|
||||
hfilter_16 v5, 1
|
||||
hfilter_16 v6, 1
|
||||
hfilter_16 v7, 1
|
||||
hfilter_16 v8, 1
|
||||
hfilter_16 v9, 1
|
||||
hfilter_16 v10, 1
|
||||
hfilter_16 v11, 1
|
||||
hfilter_16 v12, 1
|
||||
hfilter_16 v13, 1
|
||||
hfilter_16 v14, 1
|
||||
hfilter_16 v15, 1
|
||||
|
||||
;# Finished filtering main horizontal block. If there is no
|
||||
;# vertical filtering, jump to storing the data. Otherwise
|
||||
;# load up and filter the additional line that is needed
|
||||
;# for the vertical filter.
|
||||
beq store_out_16x16_b
|
||||
|
||||
hfilter_16 v16, 0
|
||||
|
||||
b second_pass_16x16_b
|
||||
|
||||
second_pass_16x16_pre_copy_b:
|
||||
slwi r6, r6, 5 ;# index into vertical filter array
|
||||
|
||||
load_and_align_16 v0, 1
|
||||
load_and_align_16 v1, 1
|
||||
load_and_align_16 v2, 1
|
||||
load_and_align_16 v3, 1
|
||||
load_and_align_16 v4, 1
|
||||
load_and_align_16 v5, 1
|
||||
load_and_align_16 v6, 1
|
||||
load_and_align_16 v7, 1
|
||||
load_and_align_16 v8, 1
|
||||
load_and_align_16 v9, 1
|
||||
load_and_align_16 v10, 1
|
||||
load_and_align_16 v11, 1
|
||||
load_and_align_16 v12, 1
|
||||
load_and_align_16 v13, 1
|
||||
load_and_align_16 v14, 1
|
||||
load_and_align_16 v15, 1
|
||||
load_and_align_16 v16, 0
|
||||
|
||||
second_pass_16x16_b:
|
||||
vspltish v20, 8
|
||||
vspltish v18, 3
|
||||
vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
|
||||
|
||||
load_vfilter v20, v21
|
||||
|
||||
vfilter_16 v0, v1
|
||||
vfilter_16 v1, v2
|
||||
vfilter_16 v2, v3
|
||||
vfilter_16 v3, v4
|
||||
vfilter_16 v4, v5
|
||||
vfilter_16 v5, v6
|
||||
vfilter_16 v6, v7
|
||||
vfilter_16 v7, v8
|
||||
vfilter_16 v8, v9
|
||||
vfilter_16 v9, v10
|
||||
vfilter_16 v10, v11
|
||||
vfilter_16 v11, v12
|
||||
vfilter_16 v12, v13
|
||||
vfilter_16 v13, v14
|
||||
vfilter_16 v14, v15
|
||||
vfilter_16 v15, v16
|
||||
|
||||
store_out_16x16_b:
|
||||
|
||||
write_16 v0, 1
|
||||
write_16 v1, 1
|
||||
write_16 v2, 1
|
||||
write_16 v3, 1
|
||||
write_16 v4, 1
|
||||
write_16 v5, 1
|
||||
write_16 v6, 1
|
||||
write_16 v7, 1
|
||||
write_16 v8, 1
|
||||
write_16 v9, 1
|
||||
write_16 v10, 1
|
||||
write_16 v11, 1
|
||||
write_16 v12, 1
|
||||
write_16 v13, 1
|
||||
write_16 v14, 1
|
||||
write_16 v15, 0
|
||||
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
|
||||
blr
|
||||
|
||||
.data
|
||||
|
||||
.align 4
|
||||
hfilter_b:
|
||||
.byte 128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0
|
||||
.byte 112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0
|
||||
.byte 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0
|
||||
.byte 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0
|
||||
.byte 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0
|
||||
.byte 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0
|
||||
.byte 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0
|
||||
.byte 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0
|
||||
|
||||
.align 4
|
||||
vfilter_b:
|
||||
.byte 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
|
||||
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
.byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112
|
||||
.byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
|
||||
.byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96
|
||||
.byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
|
||||
.byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80
|
||||
.byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48
|
||||
.byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
|
||||
.byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
|
||||
.byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48
|
||||
.byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80
|
||||
.byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
|
||||
.byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96
|
||||
.byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
|
||||
.byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112
|
||||
|
||||
.align 4
|
||||
b_hperm_b:
|
||||
.byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15
|
||||
|
||||
.align 4
|
||||
b_0123_b:
|
||||
.byte 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6
|
||||
|
||||
.align 4
|
||||
b_4567_b:
|
||||
.byte 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10
|
||||
|
||||
b_hilo_b:
|
||||
.byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
|
@@ -1,189 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
.globl short_idct4x4llm_ppc
|
||||
|
||||
.macro load_c V, LABEL, OFF, R0, R1
|
||||
lis \R0, \LABEL@ha
|
||||
la \R1, \LABEL@l(\R0)
|
||||
lvx \V, \OFF, \R1
|
||||
.endm
|
||||
|
||||
;# r3 short *input
|
||||
;# r4 short *output
|
||||
;# r5 int pitch
|
||||
.align 2
|
||||
short_idct4x4llm_ppc:
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xfff8
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
load_c v8, sinpi8sqrt2, 0, r9, r10
|
||||
load_c v9, cospi8sqrt2minus1, 0, r9, r10
|
||||
load_c v10, hi_hi, 0, r9, r10
|
||||
load_c v11, lo_lo, 0, r9, r10
|
||||
load_c v12, shift_16, 0, r9, r10
|
||||
|
||||
li r10, 16
|
||||
lvx v0, 0, r3 ;# input ip[0], ip[ 4]
|
||||
lvx v1, r10, r3 ;# input ip[8], ip[12]
|
||||
|
||||
;# first pass
|
||||
vupkhsh v2, v0
|
||||
vupkhsh v3, v1
|
||||
vaddsws v6, v2, v3 ;# a1 = ip[0]+ip[8]
|
||||
vsubsws v7, v2, v3 ;# b1 = ip[0]-ip[8]
|
||||
|
||||
vupklsh v0, v0
|
||||
vmulosh v4, v0, v8
|
||||
vsraw v4, v4, v12
|
||||
vaddsws v4, v4, v0 ;# ip[ 4] * sin(pi/8) * sqrt(2)
|
||||
|
||||
vupklsh v1, v1
|
||||
vmulosh v5, v1, v9
|
||||
vsraw v5, v5, v12 ;# ip[12] * cos(pi/8) * sqrt(2)
|
||||
vaddsws v5, v5, v1
|
||||
|
||||
vsubsws v4, v4, v5 ;# c1
|
||||
|
||||
vmulosh v3, v1, v8
|
||||
vsraw v3, v3, v12
|
||||
vaddsws v3, v3, v1 ;# ip[12] * sin(pi/8) * sqrt(2)
|
||||
|
||||
vmulosh v5, v0, v9
|
||||
vsraw v5, v5, v12 ;# ip[ 4] * cos(pi/8) * sqrt(2)
|
||||
vaddsws v5, v5, v0
|
||||
|
||||
vaddsws v3, v3, v5 ;# d1
|
||||
|
||||
vaddsws v0, v6, v3 ;# a1 + d1
|
||||
vsubsws v3, v6, v3 ;# a1 - d1
|
||||
|
||||
vaddsws v1, v7, v4 ;# b1 + c1
|
||||
vsubsws v2, v7, v4 ;# b1 - c1
|
||||
|
||||
;# transpose input
|
||||
vmrghw v4, v0, v1 ;# a0 b0 a1 b1
|
||||
vmrghw v5, v2, v3 ;# c0 d0 c1 d1
|
||||
|
||||
vmrglw v6, v0, v1 ;# a2 b2 a3 b3
|
||||
vmrglw v7, v2, v3 ;# c2 d2 c3 d3
|
||||
|
||||
vperm v0, v4, v5, v10 ;# a0 b0 c0 d0
|
||||
vperm v1, v4, v5, v11 ;# a1 b1 c1 d1
|
||||
|
||||
vperm v2, v6, v7, v10 ;# a2 b2 c2 d2
|
||||
vperm v3, v6, v7, v11 ;# a3 b3 c3 d3
|
||||
|
||||
;# second pass
|
||||
vaddsws v6, v0, v2 ;# a1 = ip[0]+ip[8]
|
||||
vsubsws v7, v0, v2 ;# b1 = ip[0]-ip[8]
|
||||
|
||||
vmulosh v4, v1, v8
|
||||
vsraw v4, v4, v12
|
||||
vaddsws v4, v4, v1 ;# ip[ 4] * sin(pi/8) * sqrt(2)
|
||||
|
||||
vmulosh v5, v3, v9
|
||||
vsraw v5, v5, v12 ;# ip[12] * cos(pi/8) * sqrt(2)
|
||||
vaddsws v5, v5, v3
|
||||
|
||||
vsubsws v4, v4, v5 ;# c1
|
||||
|
||||
vmulosh v2, v3, v8
|
||||
vsraw v2, v2, v12
|
||||
vaddsws v2, v2, v3 ;# ip[12] * sin(pi/8) * sqrt(2)
|
||||
|
||||
vmulosh v5, v1, v9
|
||||
vsraw v5, v5, v12 ;# ip[ 4] * cos(pi/8) * sqrt(2)
|
||||
vaddsws v5, v5, v1
|
||||
|
||||
vaddsws v3, v2, v5 ;# d1
|
||||
|
||||
vaddsws v0, v6, v3 ;# a1 + d1
|
||||
vsubsws v3, v6, v3 ;# a1 - d1
|
||||
|
||||
vaddsws v1, v7, v4 ;# b1 + c1
|
||||
vsubsws v2, v7, v4 ;# b1 - c1
|
||||
|
||||
vspltish v6, 4
|
||||
vspltish v7, 3
|
||||
|
||||
vpkswss v0, v0, v1
|
||||
vpkswss v1, v2, v3
|
||||
|
||||
vaddshs v0, v0, v6
|
||||
vaddshs v1, v1, v6
|
||||
|
||||
vsrah v0, v0, v7
|
||||
vsrah v1, v1, v7
|
||||
|
||||
;# transpose output
|
||||
vmrghh v2, v0, v1 ;# a0 c0 a1 c1 a2 c2 a3 c3
|
||||
vmrglh v3, v0, v1 ;# b0 d0 b1 d1 b2 d2 b3 d3
|
||||
|
||||
vmrghh v0, v2, v3 ;# a0 b0 c0 d0 a1 b1 c1 d1
|
||||
vmrglh v1, v2, v3 ;# a2 b2 c2 d2 a3 b3 c3 d3
|
||||
|
||||
stwu r1,-416(r1) ;# create space on the stack
|
||||
|
||||
stvx v0, 0, r1
|
||||
lwz r6, 0(r1)
|
||||
stw r6, 0(r4)
|
||||
lwz r6, 4(r1)
|
||||
stw r6, 4(r4)
|
||||
|
||||
add r4, r4, r5
|
||||
|
||||
lwz r6, 8(r1)
|
||||
stw r6, 0(r4)
|
||||
lwz r6, 12(r1)
|
||||
stw r6, 4(r4)
|
||||
|
||||
add r4, r4, r5
|
||||
|
||||
stvx v1, 0, r1
|
||||
lwz r6, 0(r1)
|
||||
stw r6, 0(r4)
|
||||
lwz r6, 4(r1)
|
||||
stw r6, 4(r4)
|
||||
|
||||
add r4, r4, r5
|
||||
|
||||
lwz r6, 8(r1)
|
||||
stw r6, 0(r4)
|
||||
lwz r6, 12(r1)
|
||||
stw r6, 4(r4)
|
||||
|
||||
addi r1, r1, 416 ;# recover stack
|
||||
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
|
||||
blr
|
||||
|
||||
.align 4
|
||||
sinpi8sqrt2:
|
||||
.short 35468, 35468, 35468, 35468, 35468, 35468, 35468, 35468
|
||||
|
||||
.align 4
|
||||
cospi8sqrt2minus1:
|
||||
.short 20091, 20091, 20091, 20091, 20091, 20091, 20091, 20091
|
||||
|
||||
.align 4
|
||||
shift_16:
|
||||
.long 16, 16, 16, 16
|
||||
|
||||
.align 4
|
||||
hi_hi:
|
||||
.byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
|
||||
|
||||
.align 4
|
||||
lo_lo:
|
||||
.byte 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31
|
@@ -1,135 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "loopfilter.h"
|
||||
#include "onyxc_int.h"
|
||||
|
||||
typedef void loop_filter_function_y_ppc
|
||||
(
|
||||
unsigned char *s, // source pointer
|
||||
int p, // pitch
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh
|
||||
);
|
||||
|
||||
typedef void loop_filter_function_uv_ppc
|
||||
(
|
||||
unsigned char *u, // source pointer
|
||||
unsigned char *v, // source pointer
|
||||
int p, // pitch
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh
|
||||
);
|
||||
|
||||
typedef void loop_filter_function_s_ppc
|
||||
(
|
||||
unsigned char *s, // source pointer
|
||||
int p, // pitch
|
||||
const signed char *flimit
|
||||
);
|
||||
|
||||
loop_filter_function_y_ppc mbloop_filter_horizontal_edge_y_ppc;
|
||||
loop_filter_function_y_ppc mbloop_filter_vertical_edge_y_ppc;
|
||||
loop_filter_function_y_ppc loop_filter_horizontal_edge_y_ppc;
|
||||
loop_filter_function_y_ppc loop_filter_vertical_edge_y_ppc;
|
||||
|
||||
loop_filter_function_uv_ppc mbloop_filter_horizontal_edge_uv_ppc;
|
||||
loop_filter_function_uv_ppc mbloop_filter_vertical_edge_uv_ppc;
|
||||
loop_filter_function_uv_ppc loop_filter_horizontal_edge_uv_ppc;
|
||||
loop_filter_function_uv_ppc loop_filter_vertical_edge_uv_ppc;
|
||||
|
||||
loop_filter_function_s_ppc loop_filter_simple_horizontal_edge_ppc;
|
||||
loop_filter_function_s_ppc loop_filter_simple_vertical_edge_ppc;
|
||||
|
||||
// Horizontal MB filtering
|
||||
void loop_filter_mbh_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
mbloop_filter_horizontal_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr);
|
||||
|
||||
if (u_ptr)
|
||||
mbloop_filter_horizontal_edge_uv_ppc(u_ptr, v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr);
|
||||
}
|
||||
|
||||
void loop_filter_mbhs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void)u_ptr;
|
||||
(void)v_ptr;
|
||||
(void)uv_stride;
|
||||
loop_filter_simple_horizontal_edge_ppc(y_ptr, y_stride, lfi->mbflim);
|
||||
}
|
||||
|
||||
// Vertical MB Filtering
|
||||
void loop_filter_mbv_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
mbloop_filter_vertical_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr);
|
||||
|
||||
if (u_ptr)
|
||||
mbloop_filter_vertical_edge_uv_ppc(u_ptr, v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr);
|
||||
}
|
||||
|
||||
void loop_filter_mbvs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void)u_ptr;
|
||||
(void)v_ptr;
|
||||
(void)uv_stride;
|
||||
loop_filter_simple_vertical_edge_ppc(y_ptr, y_stride, lfi->mbflim);
|
||||
}
|
||||
|
||||
// Horizontal B Filtering
|
||||
void loop_filter_bh_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
// These should all be done at once with one call, instead of 3
|
||||
loop_filter_horizontal_edge_y_ppc(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr);
|
||||
loop_filter_horizontal_edge_y_ppc(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr);
|
||||
loop_filter_horizontal_edge_y_ppc(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr);
|
||||
|
||||
if (u_ptr)
|
||||
loop_filter_horizontal_edge_uv_ppc(u_ptr + 4 * uv_stride, v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr);
|
||||
}
|
||||
|
||||
void loop_filter_bhs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void)u_ptr;
|
||||
(void)v_ptr;
|
||||
(void)uv_stride;
|
||||
loop_filter_simple_horizontal_edge_ppc(y_ptr + 4 * y_stride, y_stride, lfi->flim);
|
||||
loop_filter_simple_horizontal_edge_ppc(y_ptr + 8 * y_stride, y_stride, lfi->flim);
|
||||
loop_filter_simple_horizontal_edge_ppc(y_ptr + 12 * y_stride, y_stride, lfi->flim);
|
||||
}
|
||||
|
||||
// Vertical B Filtering
|
||||
void loop_filter_bv_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
loop_filter_vertical_edge_y_ppc(y_ptr, y_stride, lfi->flim, lfi->lim, lfi->thr);
|
||||
|
||||
if (u_ptr)
|
||||
loop_filter_vertical_edge_uv_ppc(u_ptr + 4, v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr);
|
||||
}
|
||||
|
||||
void loop_filter_bvs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void)u_ptr;
|
||||
(void)v_ptr;
|
||||
(void)uv_stride;
|
||||
loop_filter_simple_vertical_edge_ppc(y_ptr + 4, y_stride, lfi->flim);
|
||||
loop_filter_simple_vertical_edge_ppc(y_ptr + 8, y_stride, lfi->flim);
|
||||
loop_filter_simple_vertical_edge_ppc(y_ptr + 12, y_stride, lfi->flim);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@@ -1,59 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
.globl save_platform_context
|
||||
.globl restore_platform_context
|
||||
|
||||
.macro W V P
|
||||
stvx \V, 0, \P
|
||||
addi \P, \P, 16
|
||||
.endm
|
||||
|
||||
.macro R V P
|
||||
lvx \V, 0, \P
|
||||
addi \P, \P, 16
|
||||
.endm
|
||||
|
||||
;# r3 context_ptr
|
||||
.align 2
|
||||
save_platform_contex:
|
||||
W v20, r3
|
||||
W v21, r3
|
||||
W v22, r3
|
||||
W v23, r3
|
||||
W v24, r3
|
||||
W v25, r3
|
||||
W v26, r3
|
||||
W v27, r3
|
||||
W v28, r3
|
||||
W v29, r3
|
||||
W v30, r3
|
||||
W v31, r3
|
||||
|
||||
blr
|
||||
|
||||
;# r3 context_ptr
|
||||
.align 2
|
||||
restore_platform_context:
|
||||
R v20, r3
|
||||
R v21, r3
|
||||
R v22, r3
|
||||
R v23, r3
|
||||
R v24, r3
|
||||
R v25, r3
|
||||
R v26, r3
|
||||
R v27, r3
|
||||
R v28, r3
|
||||
R v29, r3
|
||||
R v30, r3
|
||||
R v31, r3
|
||||
|
||||
blr
|
@@ -1,175 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
.globl recon4b_ppc
|
||||
.globl recon2b_ppc
|
||||
.globl recon_b_ppc
|
||||
|
||||
.macro row_of16 Diff Pred Dst Stride
|
||||
lvx v1, 0, \Pred ;# v1 = pred = p0..p15
|
||||
addi \Pred, \Pred, 16 ;# next pred
|
||||
vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7
|
||||
lvx v3, 0, \Diff ;# v3 = d0..d7
|
||||
vaddshs v2, v2, v3 ;# v2 = r0..r7
|
||||
vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15
|
||||
lvx v3, r8, \Diff ;# v3 = d8..d15
|
||||
addi \Diff, \Diff, 32 ;# next diff
|
||||
vaddshs v3, v3, v1 ;# v3 = r8..r15
|
||||
vpkshus v2, v2, v3 ;# v2 = 8-bit r0..r15
|
||||
stvx v2, 0, \Dst ;# to dst
|
||||
add \Dst, \Dst, \Stride ;# next dst
|
||||
.endm
|
||||
|
||||
.text
|
||||
.align 2
|
||||
;# r3 = short *diff_ptr,
|
||||
;# r4 = unsigned char *pred_ptr,
|
||||
;# r5 = unsigned char *dst_ptr,
|
||||
;# r6 = int stride
|
||||
recon4b_ppc:
|
||||
mfspr r0, 256 ;# get old VRSAVE
|
||||
stw r0, -8(r1) ;# save old VRSAVE to stack
|
||||
oris r0, r0, 0xf000
|
||||
mtspr 256,r0 ;# set VRSAVE
|
||||
|
||||
vxor v0, v0, v0
|
||||
li r8, 16
|
||||
|
||||
row_of16 r3, r4, r5, r6
|
||||
row_of16 r3, r4, r5, r6
|
||||
row_of16 r3, r4, r5, r6
|
||||
row_of16 r3, r4, r5, r6
|
||||
|
||||
lwz r12, -8(r1) ;# restore old VRSAVE from stack
|
||||
mtspr 256, r12 ;# reset old VRSAVE
|
||||
|
||||
blr
|
||||
|
||||
.macro two_rows_of8 Diff Pred Dst Stride write_first_four_pels
|
||||
lvx v1, 0, \Pred ;# v1 = pred = p0..p15
|
||||
vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7
|
||||
lvx v3, 0, \Diff ;# v3 = d0..d7
|
||||
vaddshs v2, v2, v3 ;# v2 = r0..r7
|
||||
vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15
|
||||
lvx v3, r8, \Diff ;# v2 = d8..d15
|
||||
vaddshs v3, v3, v1 ;# v3 = r8..r15
|
||||
vpkshus v2, v2, v3 ;# v3 = 8-bit r0..r15
|
||||
stvx v2, 0, r10 ;# 2 rows to dst from buf
|
||||
lwz r0, 0(r10)
|
||||
.if \write_first_four_pels
|
||||
stw r0, 0(\Dst)
|
||||
.else
|
||||
stwux r0, \Dst, \Stride
|
||||
.endif
|
||||
lwz r0, 4(r10)
|
||||
stw r0, 4(\Dst)
|
||||
lwz r0, 8(r10)
|
||||
stwux r0, \Dst, \Stride ;# advance dst to next row
|
||||
lwz r0, 12(r10)
|
||||
stw r0, 4(\Dst)
|
||||
.endm
|
||||
|
||||
.align 2
|
||||
;# r3 = short *diff_ptr,
|
||||
;# r4 = unsigned char *pred_ptr,
|
||||
;# r5 = unsigned char *dst_ptr,
|
||||
;# r6 = int stride
|
||||
|
||||
recon2b_ppc:
|
||||
mfspr r0, 256 ;# get old VRSAVE
|
||||
stw r0, -8(r1) ;# save old VRSAVE to stack
|
||||
oris r0, r0, 0xf000
|
||||
mtspr 256,r0 ;# set VRSAVE
|
||||
|
||||
vxor v0, v0, v0
|
||||
li r8, 16
|
||||
|
||||
la r10, -48(r1) ;# buf
|
||||
|
||||
two_rows_of8 r3, r4, r5, r6, 1
|
||||
|
||||
addi r4, r4, 16; ;# next pred
|
||||
addi r3, r3, 32; ;# next diff
|
||||
|
||||
two_rows_of8 r3, r4, r5, r6, 0
|
||||
|
||||
lwz r12, -8(r1) ;# restore old VRSAVE from stack
|
||||
mtspr 256, r12 ;# reset old VRSAVE
|
||||
|
||||
blr
|
||||
|
||||
.macro get_two_diff_rows
|
||||
stw r0, 0(r10)
|
||||
lwz r0, 4(r3)
|
||||
stw r0, 4(r10)
|
||||
lwzu r0, 32(r3)
|
||||
stw r0, 8(r10)
|
||||
lwz r0, 4(r3)
|
||||
stw r0, 12(r10)
|
||||
lvx v3, 0, r10
|
||||
.endm
|
||||
|
||||
.align 2
|
||||
;# r3 = short *diff_ptr,
|
||||
;# r4 = unsigned char *pred_ptr,
|
||||
;# r5 = unsigned char *dst_ptr,
|
||||
;# r6 = int stride
|
||||
recon_b_ppc:
|
||||
mfspr r0, 256 ;# get old VRSAVE
|
||||
stw r0, -8(r1) ;# save old VRSAVE to stack
|
||||
oris r0, r0, 0xf000
|
||||
mtspr 256,r0 ;# set VRSAVE
|
||||
|
||||
vxor v0, v0, v0
|
||||
|
||||
la r10, -48(r1) ;# buf
|
||||
|
||||
lwz r0, 0(r4)
|
||||
stw r0, 0(r10)
|
||||
lwz r0, 16(r4)
|
||||
stw r0, 4(r10)
|
||||
lwz r0, 32(r4)
|
||||
stw r0, 8(r10)
|
||||
lwz r0, 48(r4)
|
||||
stw r0, 12(r10)
|
||||
|
||||
lvx v1, 0, r10; ;# v1 = pred = p0..p15
|
||||
|
||||
lwz r0, 0(r3) ;# v3 = d0..d7
|
||||
|
||||
get_two_diff_rows
|
||||
|
||||
vmrghb v2, v0, v1; ;# v2 = 16-bit p0..p7
|
||||
vaddshs v2, v2, v3; ;# v2 = r0..r7
|
||||
|
||||
lwzu r0, 32(r3) ;# v3 = d8..d15
|
||||
|
||||
get_two_diff_rows
|
||||
|
||||
vmrglb v1, v0, v1; ;# v1 = 16-bit p8..p15
|
||||
vaddshs v3, v3, v1; ;# v3 = r8..r15
|
||||
|
||||
vpkshus v2, v2, v3; ;# v2 = 8-bit r0..r15
|
||||
stvx v2, 0, r10; ;# 16 pels to dst from buf
|
||||
|
||||
lwz r0, 0(r10)
|
||||
stw r0, 0(r5)
|
||||
lwz r0, 4(r10)
|
||||
stwux r0, r5, r6
|
||||
lwz r0, 8(r10)
|
||||
stwux r0, r5, r6
|
||||
lwz r0, 12(r10)
|
||||
stwx r0, r5, r6
|
||||
|
||||
lwz r12, -8(r1) ;# restore old VRSAVE from stack
|
||||
mtspr 256, r12 ;# reset old VRSAVE
|
||||
|
||||
blr
|
@@ -1,277 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
.globl vp8_sad16x16_ppc
|
||||
.globl vp8_sad16x8_ppc
|
||||
.globl vp8_sad8x16_ppc
|
||||
.globl vp8_sad8x8_ppc
|
||||
.globl vp8_sad4x4_ppc
|
||||
|
||||
.macro load_aligned_16 V R O
|
||||
lvsl v3, 0, \R ;# permutate value for alignment
|
||||
|
||||
lvx v1, 0, \R
|
||||
lvx v2, \O, \R
|
||||
|
||||
vperm \V, v1, v2, v3
|
||||
.endm
|
||||
|
||||
.macro prologue
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xffc0
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
stwu r1, -32(r1) ;# create space on the stack
|
||||
|
||||
li r10, 16 ;# load offset and loop counter
|
||||
|
||||
vspltisw v8, 0 ;# zero out total to start
|
||||
.endm
|
||||
|
||||
.macro epilogue
|
||||
addi r1, r1, 32 ;# recover stack
|
||||
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
.endm
|
||||
|
||||
.macro SAD_16
|
||||
;# v6 = abs (v4 - v5)
|
||||
vsububs v6, v4, v5
|
||||
vsububs v7, v5, v4
|
||||
vor v6, v6, v7
|
||||
|
||||
;# v8 += abs (v4 - v5)
|
||||
vsum4ubs v8, v6, v8
|
||||
.endm
|
||||
|
||||
.macro sad_16_loop loop_label
|
||||
lvsl v3, 0, r5 ;# only needs to be done once per block
|
||||
|
||||
;# preload a line of data before getting into the loop
|
||||
lvx v4, 0, r3
|
||||
lvx v1, 0, r5
|
||||
lvx v2, r10, r5
|
||||
|
||||
add r5, r5, r6
|
||||
add r3, r3, r4
|
||||
|
||||
vperm v5, v1, v2, v3
|
||||
|
||||
.align 4
|
||||
\loop_label:
|
||||
;# compute difference on first row
|
||||
vsububs v6, v4, v5
|
||||
vsububs v7, v5, v4
|
||||
|
||||
;# load up next set of data
|
||||
lvx v9, 0, r3
|
||||
lvx v1, 0, r5
|
||||
lvx v2, r10, r5
|
||||
|
||||
;# perform abs() of difference
|
||||
vor v6, v6, v7
|
||||
add r3, r3, r4
|
||||
|
||||
;# add to the running tally
|
||||
vsum4ubs v8, v6, v8
|
||||
|
||||
;# now onto the next line
|
||||
vperm v5, v1, v2, v3
|
||||
add r5, r5, r6
|
||||
lvx v4, 0, r3
|
||||
|
||||
;# compute difference on second row
|
||||
vsububs v6, v9, v5
|
||||
lvx v1, 0, r5
|
||||
vsububs v7, v5, v9
|
||||
lvx v2, r10, r5
|
||||
vor v6, v6, v7
|
||||
add r3, r3, r4
|
||||
vsum4ubs v8, v6, v8
|
||||
vperm v5, v1, v2, v3
|
||||
add r5, r5, r6
|
||||
|
||||
bdnz \loop_label
|
||||
|
||||
vspltisw v7, 0
|
||||
|
||||
vsumsws v8, v8, v7
|
||||
|
||||
stvx v8, 0, r1
|
||||
lwz r3, 12(r1)
|
||||
.endm
|
||||
|
||||
.macro sad_8_loop loop_label
|
||||
.align 4
|
||||
\loop_label:
|
||||
;# only one of the inputs should need to be aligned.
|
||||
load_aligned_16 v4, r3, r10
|
||||
load_aligned_16 v5, r5, r10
|
||||
|
||||
;# move onto the next line
|
||||
add r3, r3, r4
|
||||
add r5, r5, r6
|
||||
|
||||
;# only one of the inputs should need to be aligned.
|
||||
load_aligned_16 v6, r3, r10
|
||||
load_aligned_16 v7, r5, r10
|
||||
|
||||
;# move onto the next line
|
||||
add r3, r3, r4
|
||||
add r5, r5, r6
|
||||
|
||||
vmrghb v4, v4, v6
|
||||
vmrghb v5, v5, v7
|
||||
|
||||
SAD_16
|
||||
|
||||
bdnz \loop_label
|
||||
|
||||
vspltisw v7, 0
|
||||
|
||||
vsumsws v8, v8, v7
|
||||
|
||||
stvx v8, 0, r1
|
||||
lwz r3, 12(r1)
|
||||
.endm
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int src_stride
|
||||
;# r5 unsigned char *ref_ptr
|
||||
;# r6 int ref_stride
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_sad16x16_ppc:
|
||||
|
||||
prologue
|
||||
|
||||
li r9, 8
|
||||
mtctr r9
|
||||
|
||||
sad_16_loop sad16x16_loop
|
||||
|
||||
epilogue
|
||||
|
||||
blr
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int src_stride
|
||||
;# r5 unsigned char *ref_ptr
|
||||
;# r6 int ref_stride
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_sad16x8_ppc:
|
||||
|
||||
prologue
|
||||
|
||||
li r9, 4
|
||||
mtctr r9
|
||||
|
||||
sad_16_loop sad16x8_loop
|
||||
|
||||
epilogue
|
||||
|
||||
blr
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int src_stride
|
||||
;# r5 unsigned char *ref_ptr
|
||||
;# r6 int ref_stride
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_sad8x16_ppc:
|
||||
|
||||
prologue
|
||||
|
||||
li r9, 8
|
||||
mtctr r9
|
||||
|
||||
sad_8_loop sad8x16_loop
|
||||
|
||||
epilogue
|
||||
|
||||
blr
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int src_stride
|
||||
;# r5 unsigned char *ref_ptr
|
||||
;# r6 int ref_stride
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_sad8x8_ppc:
|
||||
|
||||
prologue
|
||||
|
||||
li r9, 4
|
||||
mtctr r9
|
||||
|
||||
sad_8_loop sad8x8_loop
|
||||
|
||||
epilogue
|
||||
|
||||
blr
|
||||
|
||||
.macro transfer_4x4 I P
|
||||
lwz r0, 0(\I)
|
||||
add \I, \I, \P
|
||||
|
||||
lwz r7, 0(\I)
|
||||
add \I, \I, \P
|
||||
|
||||
lwz r8, 0(\I)
|
||||
add \I, \I, \P
|
||||
|
||||
lwz r9, 0(\I)
|
||||
|
||||
stw r0, 0(r1)
|
||||
stw r7, 4(r1)
|
||||
stw r8, 8(r1)
|
||||
stw r9, 12(r1)
|
||||
.endm
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int src_stride
|
||||
;# r5 unsigned char *ref_ptr
|
||||
;# r6 int ref_stride
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_sad4x4_ppc:
|
||||
|
||||
prologue
|
||||
|
||||
transfer_4x4 r3, r4
|
||||
lvx v4, 0, r1
|
||||
|
||||
transfer_4x4 r5, r6
|
||||
lvx v5, 0, r1
|
||||
|
||||
vspltisw v8, 0 ;# zero out total to start
|
||||
|
||||
;# v6 = abs (v4 - v5)
|
||||
vsububs v6, v4, v5
|
||||
vsububs v7, v5, v4
|
||||
vor v6, v6, v7
|
||||
|
||||
;# v8 += abs (v4 - v5)
|
||||
vsum4ubs v7, v6, v8
|
||||
vsumsws v7, v7, v8
|
||||
|
||||
stvx v7, 0, r1
|
||||
lwz r3, 12(r1)
|
||||
|
||||
epilogue
|
||||
|
||||
blr
|
@@ -1,165 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "subpixel.h"
|
||||
#include "loopfilter.h"
|
||||
#include "recon.h"
|
||||
#include "onyxc_int.h"
|
||||
|
||||
extern void (*vp8_post_proc_down_and_across_mb_row)(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int src_pixels_per_line,
|
||||
int dst_pixels_per_line,
|
||||
int cols,
|
||||
unsigned char *f,
|
||||
int size
|
||||
);
|
||||
|
||||
extern void (*vp8_mbpost_proc_down)(unsigned char *dst, int pitch, int rows, int cols, int flimit);
|
||||
extern void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, int flimit);
|
||||
extern void (*vp8_mbpost_proc_across_ip)(unsigned char *src, int pitch, int rows, int cols, int flimit);
|
||||
extern void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int cols, int flimit);
|
||||
|
||||
extern void vp8_post_proc_down_and_across_mb_row_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int src_pixels_per_line,
|
||||
int dst_pixels_per_line,
|
||||
int cols,
|
||||
unsigned char *f,
|
||||
int size
|
||||
);
|
||||
void vp8_plane_add_noise_c(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int q, int a);
|
||||
|
||||
extern copy_mem_block_function *vp8_copy_mem16x16;
|
||||
extern copy_mem_block_function *vp8_copy_mem8x8;
|
||||
extern copy_mem_block_function *vp8_copy_mem8x4;
|
||||
|
||||
// PPC
|
||||
extern subpixel_predict_function sixtap_predict_ppc;
|
||||
extern subpixel_predict_function sixtap_predict8x4_ppc;
|
||||
extern subpixel_predict_function sixtap_predict8x8_ppc;
|
||||
extern subpixel_predict_function sixtap_predict16x16_ppc;
|
||||
extern subpixel_predict_function bilinear_predict4x4_ppc;
|
||||
extern subpixel_predict_function bilinear_predict8x4_ppc;
|
||||
extern subpixel_predict_function bilinear_predict8x8_ppc;
|
||||
extern subpixel_predict_function bilinear_predict16x16_ppc;
|
||||
|
||||
extern copy_mem_block_function copy_mem16x16_ppc;
|
||||
|
||||
void recon_b_ppc(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
|
||||
void recon2b_ppc(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
|
||||
void recon4b_ppc(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
|
||||
|
||||
extern void short_idct4x4llm_ppc(short *input, short *output, int pitch);
|
||||
|
||||
// Generic C
|
||||
extern subpixel_predict_function vp8_sixtap_predict_c;
|
||||
extern subpixel_predict_function vp8_sixtap_predict8x4_c;
|
||||
extern subpixel_predict_function vp8_sixtap_predict8x8_c;
|
||||
extern subpixel_predict_function vp8_sixtap_predict16x16_c;
|
||||
extern subpixel_predict_function vp8_bilinear_predict4x4_c;
|
||||
extern subpixel_predict_function vp8_bilinear_predict8x4_c;
|
||||
extern subpixel_predict_function vp8_bilinear_predict8x8_c;
|
||||
extern subpixel_predict_function vp8_bilinear_predict16x16_c;
|
||||
|
||||
extern copy_mem_block_function vp8_copy_mem16x16_c;
|
||||
extern copy_mem_block_function vp8_copy_mem8x8_c;
|
||||
extern copy_mem_block_function vp8_copy_mem8x4_c;
|
||||
|
||||
void vp8_recon_b_c(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
|
||||
void vp8_recon2b_c(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
|
||||
void vp8_recon4b_c(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
|
||||
|
||||
extern void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch);
|
||||
extern void vp8_short_idct4x4llm_c(short *input, short *output, int pitch);
|
||||
extern void vp8_dc_only_idct_c(short input_dc, short *output, int pitch);
|
||||
|
||||
// PPC
|
||||
extern loop_filter_block_function loop_filter_mbv_ppc;
|
||||
extern loop_filter_block_function loop_filter_bv_ppc;
|
||||
extern loop_filter_block_function loop_filter_mbh_ppc;
|
||||
extern loop_filter_block_function loop_filter_bh_ppc;
|
||||
|
||||
extern loop_filter_block_function loop_filter_mbvs_ppc;
|
||||
extern loop_filter_block_function loop_filter_bvs_ppc;
|
||||
extern loop_filter_block_function loop_filter_mbhs_ppc;
|
||||
extern loop_filter_block_function loop_filter_bhs_ppc;
|
||||
|
||||
// Generic C
|
||||
extern loop_filter_block_function vp8_loop_filter_mbv_c;
|
||||
extern loop_filter_block_function vp8_loop_filter_bv_c;
|
||||
extern loop_filter_block_function vp8_loop_filter_mbh_c;
|
||||
extern loop_filter_block_function vp8_loop_filter_bh_c;
|
||||
|
||||
extern loop_filter_block_function vp8_loop_filter_mbvs_c;
|
||||
extern loop_filter_block_function vp8_loop_filter_bvs_c;
|
||||
extern loop_filter_block_function vp8_loop_filter_mbhs_c;
|
||||
extern loop_filter_block_function vp8_loop_filter_bhs_c;
|
||||
|
||||
extern loop_filter_block_function *vp8_lf_mbvfull;
|
||||
extern loop_filter_block_function *vp8_lf_mbhfull;
|
||||
extern loop_filter_block_function *vp8_lf_bvfull;
|
||||
extern loop_filter_block_function *vp8_lf_bhfull;
|
||||
|
||||
extern loop_filter_block_function *vp8_lf_mbvsimple;
|
||||
extern loop_filter_block_function *vp8_lf_mbhsimple;
|
||||
extern loop_filter_block_function *vp8_lf_bvsimple;
|
||||
extern loop_filter_block_function *vp8_lf_bhsimple;
|
||||
|
||||
void vp8_clear_c(void)
|
||||
{
|
||||
}
|
||||
|
||||
void vp8_machine_specific_config(void)
|
||||
{
|
||||
// Pure C:
|
||||
vp8_clear_system_state = vp8_clear_c;
|
||||
vp8_recon_b = vp8_recon_b_c;
|
||||
vp8_recon4b = vp8_recon4b_c;
|
||||
vp8_recon2b = vp8_recon2b_c;
|
||||
|
||||
vp8_bilinear_predict16x16 = bilinear_predict16x16_ppc;
|
||||
vp8_bilinear_predict8x8 = bilinear_predict8x8_ppc;
|
||||
vp8_bilinear_predict8x4 = bilinear_predict8x4_ppc;
|
||||
vp8_bilinear_predict = bilinear_predict4x4_ppc;
|
||||
|
||||
vp8_sixtap_predict16x16 = sixtap_predict16x16_ppc;
|
||||
vp8_sixtap_predict8x8 = sixtap_predict8x8_ppc;
|
||||
vp8_sixtap_predict8x4 = sixtap_predict8x4_ppc;
|
||||
vp8_sixtap_predict = sixtap_predict_ppc;
|
||||
|
||||
vp8_short_idct4x4_1 = vp8_short_idct4x4llm_1_c;
|
||||
vp8_short_idct4x4 = short_idct4x4llm_ppc;
|
||||
vp8_dc_only_idct = vp8_dc_only_idct_c;
|
||||
|
||||
vp8_lf_mbvfull = loop_filter_mbv_ppc;
|
||||
vp8_lf_bvfull = loop_filter_bv_ppc;
|
||||
vp8_lf_mbhfull = loop_filter_mbh_ppc;
|
||||
vp8_lf_bhfull = loop_filter_bh_ppc;
|
||||
|
||||
vp8_lf_mbvsimple = loop_filter_mbvs_ppc;
|
||||
vp8_lf_bvsimple = loop_filter_bvs_ppc;
|
||||
vp8_lf_mbhsimple = loop_filter_mbhs_ppc;
|
||||
vp8_lf_bhsimple = loop_filter_bhs_ppc;
|
||||
|
||||
vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_c;
|
||||
vp8_mbpost_proc_down = vp8_mbpost_proc_down_c;
|
||||
vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_c;
|
||||
vp8_plane_add_noise = vp8_plane_add_noise_c;
|
||||
|
||||
vp8_copy_mem16x16 = copy_mem16x16_ppc;
|
||||
vp8_copy_mem8x8 = vp8_copy_mem8x8_c;
|
||||
vp8_copy_mem8x4 = vp8_copy_mem8x4_c;
|
||||
|
||||
}
|
@@ -1,375 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
.globl vp8_get8x8var_ppc
|
||||
.globl vp8_get16x16var_ppc
|
||||
.globl vp8_mse16x16_ppc
|
||||
.globl vp8_variance16x16_ppc
|
||||
.globl vp8_variance16x8_ppc
|
||||
.globl vp8_variance8x16_ppc
|
||||
.globl vp8_variance8x8_ppc
|
||||
.globl vp8_variance4x4_ppc
|
||||
|
||||
.macro load_aligned_16 V R O
|
||||
lvsl v3, 0, \R ;# permutate value for alignment
|
||||
|
||||
lvx v1, 0, \R
|
||||
lvx v2, \O, \R
|
||||
|
||||
vperm \V, v1, v2, v3
|
||||
.endm
|
||||
|
||||
.macro prologue
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xffc0
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
stwu r1, -32(r1) ;# create space on the stack
|
||||
|
||||
li r10, 16 ;# load offset and loop counter
|
||||
|
||||
vspltisw v7, 0 ;# zero for merging
|
||||
vspltisw v8, 0 ;# zero out total to start
|
||||
vspltisw v9, 0 ;# zero out total for dif^2
|
||||
.endm
|
||||
|
||||
.macro epilogue
|
||||
addi r1, r1, 32 ;# recover stack
|
||||
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
.endm
|
||||
|
||||
.macro compute_sum_sse
|
||||
;# Compute sum first. Unpack to so signed subract
|
||||
;# can be used. Only have a half word signed
|
||||
;# subract. Do high, then low.
|
||||
vmrghb v2, v7, v4
|
||||
vmrghb v3, v7, v5
|
||||
vsubshs v2, v2, v3
|
||||
vsum4shs v8, v2, v8
|
||||
|
||||
vmrglb v2, v7, v4
|
||||
vmrglb v3, v7, v5
|
||||
vsubshs v2, v2, v3
|
||||
vsum4shs v8, v2, v8
|
||||
|
||||
;# Now compute sse.
|
||||
vsububs v2, v4, v5
|
||||
vsububs v3, v5, v4
|
||||
vor v2, v2, v3
|
||||
|
||||
vmsumubm v9, v2, v2, v9
|
||||
.endm
|
||||
|
||||
.macro variance_16 DS loop_label store_sum
|
||||
\loop_label:
|
||||
;# only one of the inputs should need to be aligned.
|
||||
load_aligned_16 v4, r3, r10
|
||||
load_aligned_16 v5, r5, r10
|
||||
|
||||
;# move onto the next line
|
||||
add r3, r3, r4
|
||||
add r5, r5, r6
|
||||
|
||||
compute_sum_sse
|
||||
|
||||
bdnz \loop_label
|
||||
|
||||
vsumsws v8, v8, v7
|
||||
vsumsws v9, v9, v7
|
||||
|
||||
stvx v8, 0, r1
|
||||
lwz r3, 12(r1)
|
||||
|
||||
stvx v9, 0, r1
|
||||
lwz r4, 12(r1)
|
||||
|
||||
.if \store_sum
|
||||
stw r3, 0(r8) ;# sum
|
||||
.endif
|
||||
stw r4, 0(r7) ;# sse
|
||||
|
||||
mullw r3, r3, r3 ;# sum*sum
|
||||
srlwi r3, r3, \DS ;# (sum*sum) >> DS
|
||||
subf r3, r3, r4 ;# sse - ((sum*sum) >> DS)
|
||||
.endm
|
||||
|
||||
.macro variance_8 DS loop_label store_sum
|
||||
\loop_label:
|
||||
;# only one of the inputs should need to be aligned.
|
||||
load_aligned_16 v4, r3, r10
|
||||
load_aligned_16 v5, r5, r10
|
||||
|
||||
;# move onto the next line
|
||||
add r3, r3, r4
|
||||
add r5, r5, r6
|
||||
|
||||
;# only one of the inputs should need to be aligned.
|
||||
load_aligned_16 v6, r3, r10
|
||||
load_aligned_16 v0, r5, r10
|
||||
|
||||
;# move onto the next line
|
||||
add r3, r3, r4
|
||||
add r5, r5, r6
|
||||
|
||||
vmrghb v4, v4, v6
|
||||
vmrghb v5, v5, v0
|
||||
|
||||
compute_sum_sse
|
||||
|
||||
bdnz \loop_label
|
||||
|
||||
vsumsws v8, v8, v7
|
||||
vsumsws v9, v9, v7
|
||||
|
||||
stvx v8, 0, r1
|
||||
lwz r3, 12(r1)
|
||||
|
||||
stvx v9, 0, r1
|
||||
lwz r4, 12(r1)
|
||||
|
||||
.if \store_sum
|
||||
stw r3, 0(r8) ;# sum
|
||||
.endif
|
||||
stw r4, 0(r7) ;# sse
|
||||
|
||||
mullw r3, r3, r3 ;# sum*sum
|
||||
srlwi r3, r3, \DS ;# (sum*sum) >> 8
|
||||
subf r3, r3, r4 ;# sse - ((sum*sum) >> 8)
|
||||
.endm
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int source_stride
|
||||
;# r5 unsigned char *ref_ptr
|
||||
;# r6 int recon_stride
|
||||
;# r7 unsigned int *SSE
|
||||
;# r8 int *Sum
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_get8x8var_ppc:
|
||||
|
||||
prologue
|
||||
|
||||
li r9, 4
|
||||
mtctr r9
|
||||
|
||||
variance_8 6, get8x8var_loop, 1
|
||||
|
||||
epilogue
|
||||
|
||||
blr
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int source_stride
|
||||
;# r5 unsigned char *ref_ptr
|
||||
;# r6 int recon_stride
|
||||
;# r7 unsigned int *SSE
|
||||
;# r8 int *Sum
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_get16x16var_ppc:
|
||||
|
||||
prologue
|
||||
|
||||
mtctr r10
|
||||
|
||||
variance_16 8, get16x16var_loop, 1
|
||||
|
||||
epilogue
|
||||
|
||||
blr
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int source_stride
|
||||
;# r5 unsigned char *ref_ptr
|
||||
;# r6 int recon_stride
|
||||
;# r7 unsigned int *sse
|
||||
;#
|
||||
;# r 3 return value
|
||||
vp8_mse16x16_ppc:
|
||||
prologue
|
||||
|
||||
mtctr r10
|
||||
|
||||
mse16x16_loop:
|
||||
;# only one of the inputs should need to be aligned.
|
||||
load_aligned_16 v4, r3, r10
|
||||
load_aligned_16 v5, r5, r10
|
||||
|
||||
;# move onto the next line
|
||||
add r3, r3, r4
|
||||
add r5, r5, r6
|
||||
|
||||
;# Now compute sse.
|
||||
vsububs v2, v4, v5
|
||||
vsububs v3, v5, v4
|
||||
vor v2, v2, v3
|
||||
|
||||
vmsumubm v9, v2, v2, v9
|
||||
|
||||
bdnz mse16x16_loop
|
||||
|
||||
vsumsws v9, v9, v7
|
||||
|
||||
stvx v9, 0, r1
|
||||
lwz r3, 12(r1)
|
||||
|
||||
stvx v9, 0, r1
|
||||
lwz r3, 12(r1)
|
||||
|
||||
stw r3, 0(r7) ;# sse
|
||||
|
||||
epilogue
|
||||
|
||||
blr
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int source_stride
|
||||
;# r5 unsigned char *ref_ptr
|
||||
;# r6 int recon_stride
|
||||
;# r7 unsigned int *sse
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_variance16x16_ppc:
|
||||
|
||||
prologue
|
||||
|
||||
mtctr r10
|
||||
|
||||
variance_16 8, variance16x16_loop, 0
|
||||
|
||||
epilogue
|
||||
|
||||
blr
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int source_stride
|
||||
;# r5 unsigned char *ref_ptr
|
||||
;# r6 int recon_stride
|
||||
;# r7 unsigned int *sse
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_variance16x8_ppc:
|
||||
|
||||
prologue
|
||||
|
||||
li r9, 8
|
||||
mtctr r9
|
||||
|
||||
variance_16 7, variance16x8_loop, 0
|
||||
|
||||
epilogue
|
||||
|
||||
blr
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int source_stride
|
||||
;# r5 unsigned char *ref_ptr
|
||||
;# r6 int recon_stride
|
||||
;# r7 unsigned int *sse
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_variance8x16_ppc:
|
||||
|
||||
prologue
|
||||
|
||||
li r9, 8
|
||||
mtctr r9
|
||||
|
||||
variance_8 7, variance8x16_loop, 0
|
||||
|
||||
epilogue
|
||||
|
||||
blr
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int source_stride
|
||||
;# r5 unsigned char *ref_ptr
|
||||
;# r6 int recon_stride
|
||||
;# r7 unsigned int *sse
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_variance8x8_ppc:
|
||||
|
||||
prologue
|
||||
|
||||
li r9, 4
|
||||
mtctr r9
|
||||
|
||||
variance_8 6, variance8x8_loop, 0
|
||||
|
||||
epilogue
|
||||
|
||||
blr
|
||||
|
||||
.macro transfer_4x4 I P
|
||||
lwz r0, 0(\I)
|
||||
add \I, \I, \P
|
||||
|
||||
lwz r10,0(\I)
|
||||
add \I, \I, \P
|
||||
|
||||
lwz r8, 0(\I)
|
||||
add \I, \I, \P
|
||||
|
||||
lwz r9, 0(\I)
|
||||
|
||||
stw r0, 0(r1)
|
||||
stw r10, 4(r1)
|
||||
stw r8, 8(r1)
|
||||
stw r9, 12(r1)
|
||||
.endm
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int source_stride
|
||||
;# r5 unsigned char *ref_ptr
|
||||
;# r6 int recon_stride
|
||||
;# r7 unsigned int *sse
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_variance4x4_ppc:
|
||||
|
||||
prologue
|
||||
|
||||
transfer_4x4 r3, r4
|
||||
lvx v4, 0, r1
|
||||
|
||||
transfer_4x4 r5, r6
|
||||
lvx v5, 0, r1
|
||||
|
||||
compute_sum_sse
|
||||
|
||||
vsumsws v8, v8, v7
|
||||
vsumsws v9, v9, v7
|
||||
|
||||
stvx v8, 0, r1
|
||||
lwz r3, 12(r1)
|
||||
|
||||
stvx v9, 0, r1
|
||||
lwz r4, 12(r1)
|
||||
|
||||
stw r4, 0(r7) ;# sse
|
||||
|
||||
mullw r3, r3, r3 ;# sum*sum
|
||||
srlwi r3, r3, 4 ;# (sum*sum) >> 4
|
||||
subf r3, r3, r4 ;# sse - ((sum*sum) >> 4)
|
||||
|
||||
epilogue
|
||||
|
||||
blr
|
@@ -1,865 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
.globl vp8_sub_pixel_variance4x4_ppc
|
||||
.globl vp8_sub_pixel_variance8x8_ppc
|
||||
.globl vp8_sub_pixel_variance8x16_ppc
|
||||
.globl vp8_sub_pixel_variance16x8_ppc
|
||||
.globl vp8_sub_pixel_variance16x16_ppc
|
||||
|
||||
.macro load_c V, LABEL, OFF, R0, R1
|
||||
lis \R0, \LABEL@ha
|
||||
la \R1, \LABEL@l(\R0)
|
||||
lvx \V, \OFF, \R1
|
||||
.endm
|
||||
|
||||
.macro load_vfilter V0, V1
|
||||
load_c \V0, vfilter_b, r6, r12, r10
|
||||
|
||||
addi r6, r6, 16
|
||||
lvx \V1, r6, r10
|
||||
.endm
|
||||
|
||||
.macro HProlog jump_label
|
||||
;# load up horizontal filter
|
||||
slwi. r5, r5, 4 ;# index into horizontal filter array
|
||||
|
||||
;# index to the next set of vectors in the row.
|
||||
li r10, 16
|
||||
|
||||
;# downshift by 7 ( divide by 128 ) at the end
|
||||
vspltish v19, 7
|
||||
|
||||
;# If there isn't any filtering to be done for the horizontal, then
|
||||
;# just skip to the second pass.
|
||||
beq \jump_label
|
||||
|
||||
load_c v20, hfilter_b, r5, r12, r0
|
||||
|
||||
;# setup constants
|
||||
;# v14 permutation value for alignment
|
||||
load_c v28, b_hperm_b, 0, r12, r0
|
||||
|
||||
;# index to the next set of vectors in the row.
|
||||
li r12, 32
|
||||
|
||||
;# rounding added in on the multiply
|
||||
vspltisw v21, 8
|
||||
vspltisw v18, 3
|
||||
vslw v18, v21, v18 ;# 0x00000040000000400000004000000040
|
||||
|
||||
slwi. r6, r6, 5 ;# index into vertical filter array
|
||||
.endm
|
||||
|
||||
;# Filters a horizontal line
|
||||
;# expects:
|
||||
;# r3 src_ptr
|
||||
;# r4 pitch
|
||||
;# r10 16
|
||||
;# r12 32
|
||||
;# v17 perm intput
|
||||
;# v18 rounding
|
||||
;# v19 shift
|
||||
;# v20 filter taps
|
||||
;# v21 tmp
|
||||
;# v22 tmp
|
||||
;# v23 tmp
|
||||
;# v24 tmp
|
||||
;# v25 tmp
|
||||
;# v26 tmp
|
||||
;# v27 tmp
|
||||
;# v28 perm output
|
||||
;#
|
||||
|
||||
.macro hfilter_8 V, hp, lp, increment_counter
|
||||
lvsl v17, 0, r3 ;# permutate value for alignment
|
||||
|
||||
;# input to filter is 9 bytes wide, output is 8 bytes.
|
||||
lvx v21, 0, r3
|
||||
lvx v22, r10, r3
|
||||
|
||||
.if \increment_counter
|
||||
add r3, r3, r4
|
||||
.endif
|
||||
vperm v21, v21, v22, v17
|
||||
|
||||
vperm v24, v21, v21, \hp ;# v20 = 0123 1234 2345 3456
|
||||
vperm v25, v21, v21, \lp ;# v21 = 4567 5678 6789 789A
|
||||
|
||||
vmsummbm v24, v20, v24, v18
|
||||
vmsummbm v25, v20, v25, v18
|
||||
|
||||
vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit)
|
||||
|
||||
vsrh v24, v24, v19 ;# divide v0, v1 by 128
|
||||
|
||||
vpkuhus \V, v24, v24 ;# \V = scrambled 8-bit result
|
||||
.endm
|
||||
|
||||
.macro vfilter_16 P0 P1
|
||||
vmuleub v22, \P0, v20 ;# 64 + 4 positive taps
|
||||
vadduhm v22, v18, v22
|
||||
vmuloub v23, \P0, v20
|
||||
vadduhm v23, v18, v23
|
||||
|
||||
vmuleub v24, \P1, v21
|
||||
vadduhm v22, v22, v24 ;# Re = evens, saturation unnecessary
|
||||
vmuloub v25, \P1, v21
|
||||
vadduhm v23, v23, v25 ;# Ro = odds
|
||||
|
||||
vsrh v22, v22, v19 ;# divide by 128
|
||||
vsrh v23, v23, v19 ;# v16 v17 = evens, odds
|
||||
vmrghh \P0, v22, v23 ;# v18 v19 = 16-bit result in order
|
||||
vmrglh v23, v22, v23
|
||||
vpkuhus \P0, \P0, v23 ;# P0 = 8-bit result
|
||||
.endm
|
||||
|
||||
.macro compute_sum_sse src, ref, sum, sse, t1, t2, z0
|
||||
;# Compute sum first. Unpack to so signed subract
|
||||
;# can be used. Only have a half word signed
|
||||
;# subract. Do high, then low.
|
||||
vmrghb \t1, \z0, \src
|
||||
vmrghb \t2, \z0, \ref
|
||||
vsubshs \t1, \t1, \t2
|
||||
vsum4shs \sum, \t1, \sum
|
||||
|
||||
vmrglb \t1, \z0, \src
|
||||
vmrglb \t2, \z0, \ref
|
||||
vsubshs \t1, \t1, \t2
|
||||
vsum4shs \sum, \t1, \sum
|
||||
|
||||
;# Now compute sse.
|
||||
vsububs \t1, \src, \ref
|
||||
vsububs \t2, \ref, \src
|
||||
vor \t1, \t1, \t2
|
||||
|
||||
vmsumubm \sse, \t1, \t1, \sse
|
||||
.endm
|
||||
|
||||
.macro variance_final sum, sse, z0, DS
|
||||
vsumsws \sum, \sum, \z0
|
||||
vsumsws \sse, \sse, \z0
|
||||
|
||||
stvx \sum, 0, r1
|
||||
lwz r3, 12(r1)
|
||||
|
||||
stvx \sse, 0, r1
|
||||
lwz r4, 12(r1)
|
||||
|
||||
stw r4, 0(r9) ;# sse
|
||||
|
||||
mullw r3, r3, r3 ;# sum*sum
|
||||
srlwi r3, r3, \DS ;# (sum*sum) >> 8
|
||||
subf r3, r3, r4 ;# sse - ((sum*sum) >> 8)
|
||||
.endm
|
||||
|
||||
.macro compute_sum_sse_16 V, increment_counter
|
||||
load_and_align_16 v16, r7, r8, \increment_counter
|
||||
compute_sum_sse \V, v16, v18, v19, v20, v21, v23
|
||||
.endm
|
||||
|
||||
.macro load_and_align_16 V, R, P, increment_counter
|
||||
lvsl v17, 0, \R ;# permutate value for alignment
|
||||
|
||||
;# input to filter is 21 bytes wide, output is 16 bytes.
|
||||
;# input will can span three vectors if not aligned correctly.
|
||||
lvx v21, 0, \R
|
||||
lvx v22, r10, \R
|
||||
|
||||
.if \increment_counter
|
||||
add \R, \R, \P
|
||||
.endif
|
||||
|
||||
vperm \V, v21, v22, v17
|
||||
.endm
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int src_pixels_per_line
|
||||
;# r5 int xoffset
|
||||
;# r6 int yoffset
|
||||
;# r7 unsigned char *dst_ptr
|
||||
;# r8 int dst_pixels_per_line
|
||||
;# r9 unsigned int *sse
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_sub_pixel_variance4x4_ppc:
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xf830
|
||||
ori r12, r12, 0xfff8
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
stwu r1,-32(r1) ;# create space on the stack
|
||||
|
||||
HProlog second_pass_4x4_pre_copy_b
|
||||
|
||||
;# Load up permutation constants
|
||||
load_c v10, b_0123_b, 0, r12, r0
|
||||
load_c v11, b_4567_b, 0, r12, r0
|
||||
|
||||
hfilter_8 v0, v10, v11, 1
|
||||
hfilter_8 v1, v10, v11, 1
|
||||
hfilter_8 v2, v10, v11, 1
|
||||
hfilter_8 v3, v10, v11, 1
|
||||
|
||||
;# Finished filtering main horizontal block. If there is no
|
||||
;# vertical filtering, jump to storing the data. Otherwise
|
||||
;# load up and filter the additional line that is needed
|
||||
;# for the vertical filter.
|
||||
beq compute_sum_sse_4x4_b
|
||||
|
||||
hfilter_8 v4, v10, v11, 0
|
||||
|
||||
b second_pass_4x4_b
|
||||
|
||||
second_pass_4x4_pre_copy_b:
|
||||
slwi r6, r6, 5 ;# index into vertical filter array
|
||||
|
||||
load_and_align_16 v0, r3, r4, 1
|
||||
load_and_align_16 v1, r3, r4, 1
|
||||
load_and_align_16 v2, r3, r4, 1
|
||||
load_and_align_16 v3, r3, r4, 1
|
||||
load_and_align_16 v4, r3, r4, 0
|
||||
|
||||
second_pass_4x4_b:
|
||||
vspltish v20, 8
|
||||
vspltish v18, 3
|
||||
vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
|
||||
|
||||
load_vfilter v20, v21
|
||||
|
||||
vfilter_16 v0, v1
|
||||
vfilter_16 v1, v2
|
||||
vfilter_16 v2, v3
|
||||
vfilter_16 v3, v4
|
||||
|
||||
compute_sum_sse_4x4_b:
|
||||
vspltish v18, 0 ;# sum
|
||||
vspltish v19, 0 ;# sse
|
||||
vspltish v23, 0 ;# unpack
|
||||
li r10, 16
|
||||
|
||||
load_and_align_16 v4, r7, r8, 1
|
||||
load_and_align_16 v5, r7, r8, 1
|
||||
load_and_align_16 v6, r7, r8, 1
|
||||
load_and_align_16 v7, r7, r8, 1
|
||||
|
||||
vmrghb v0, v0, v1
|
||||
vmrghb v1, v2, v3
|
||||
|
||||
vmrghb v2, v4, v5
|
||||
vmrghb v3, v6, v7
|
||||
|
||||
load_c v10, b_hilo_b, 0, r12, r0
|
||||
|
||||
vperm v0, v0, v1, v10
|
||||
vperm v1, v2, v3, v10
|
||||
|
||||
compute_sum_sse v0, v1, v18, v19, v20, v21, v23
|
||||
|
||||
variance_final v18, v19, v23, 4
|
||||
|
||||
addi r1, r1, 32 ;# recover stack
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
|
||||
blr
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int src_pixels_per_line
|
||||
;# r5 int xoffset
|
||||
;# r6 int yoffset
|
||||
;# r7 unsigned char *dst_ptr
|
||||
;# r8 int dst_pixels_per_line
|
||||
;# r9 unsigned int *sse
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_sub_pixel_variance8x8_ppc:
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xfff0
|
||||
ori r12, r12, 0xffff
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
stwu r1,-32(r1) ;# create space on the stack
|
||||
|
||||
HProlog second_pass_8x8_pre_copy_b
|
||||
|
||||
;# Load up permutation constants
|
||||
load_c v10, b_0123_b, 0, r12, r0
|
||||
load_c v11, b_4567_b, 0, r12, r0
|
||||
|
||||
hfilter_8 v0, v10, v11, 1
|
||||
hfilter_8 v1, v10, v11, 1
|
||||
hfilter_8 v2, v10, v11, 1
|
||||
hfilter_8 v3, v10, v11, 1
|
||||
hfilter_8 v4, v10, v11, 1
|
||||
hfilter_8 v5, v10, v11, 1
|
||||
hfilter_8 v6, v10, v11, 1
|
||||
hfilter_8 v7, v10, v11, 1
|
||||
|
||||
;# Finished filtering main horizontal block. If there is no
|
||||
;# vertical filtering, jump to storing the data. Otherwise
|
||||
;# load up and filter the additional line that is needed
|
||||
;# for the vertical filter.
|
||||
beq compute_sum_sse_8x8_b
|
||||
|
||||
hfilter_8 v8, v10, v11, 0
|
||||
|
||||
b second_pass_8x8_b
|
||||
|
||||
second_pass_8x8_pre_copy_b:
|
||||
slwi. r6, r6, 5 ;# index into vertical filter array
|
||||
|
||||
load_and_align_16 v0, r3, r4, 1
|
||||
load_and_align_16 v1, r3, r4, 1
|
||||
load_and_align_16 v2, r3, r4, 1
|
||||
load_and_align_16 v3, r3, r4, 1
|
||||
load_and_align_16 v4, r3, r4, 1
|
||||
load_and_align_16 v5, r3, r4, 1
|
||||
load_and_align_16 v6, r3, r4, 1
|
||||
load_and_align_16 v7, r3, r4, 1
|
||||
load_and_align_16 v8, r3, r4, 0
|
||||
|
||||
beq compute_sum_sse_8x8_b
|
||||
|
||||
second_pass_8x8_b:
|
||||
vspltish v20, 8
|
||||
vspltish v18, 3
|
||||
vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
|
||||
|
||||
load_vfilter v20, v21
|
||||
|
||||
vfilter_16 v0, v1
|
||||
vfilter_16 v1, v2
|
||||
vfilter_16 v2, v3
|
||||
vfilter_16 v3, v4
|
||||
vfilter_16 v4, v5
|
||||
vfilter_16 v5, v6
|
||||
vfilter_16 v6, v7
|
||||
vfilter_16 v7, v8
|
||||
|
||||
compute_sum_sse_8x8_b:
|
||||
vspltish v18, 0 ;# sum
|
||||
vspltish v19, 0 ;# sse
|
||||
vspltish v23, 0 ;# unpack
|
||||
li r10, 16
|
||||
|
||||
vmrghb v0, v0, v1
|
||||
vmrghb v1, v2, v3
|
||||
vmrghb v2, v4, v5
|
||||
vmrghb v3, v6, v7
|
||||
|
||||
load_and_align_16 v4, r7, r8, 1
|
||||
load_and_align_16 v5, r7, r8, 1
|
||||
load_and_align_16 v6, r7, r8, 1
|
||||
load_and_align_16 v7, r7, r8, 1
|
||||
load_and_align_16 v8, r7, r8, 1
|
||||
load_and_align_16 v9, r7, r8, 1
|
||||
load_and_align_16 v10, r7, r8, 1
|
||||
load_and_align_16 v11, r7, r8, 0
|
||||
|
||||
vmrghb v4, v4, v5
|
||||
vmrghb v5, v6, v7
|
||||
vmrghb v6, v8, v9
|
||||
vmrghb v7, v10, v11
|
||||
|
||||
compute_sum_sse v0, v4, v18, v19, v20, v21, v23
|
||||
compute_sum_sse v1, v5, v18, v19, v20, v21, v23
|
||||
compute_sum_sse v2, v6, v18, v19, v20, v21, v23
|
||||
compute_sum_sse v3, v7, v18, v19, v20, v21, v23
|
||||
|
||||
variance_final v18, v19, v23, 6
|
||||
|
||||
addi r1, r1, 32 ;# recover stack
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
blr
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int src_pixels_per_line
|
||||
;# r5 int xoffset
|
||||
;# r6 int yoffset
|
||||
;# r7 unsigned char *dst_ptr
|
||||
;# r8 int dst_pixels_per_line
|
||||
;# r9 unsigned int *sse
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_sub_pixel_variance8x16_ppc:
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xffff
|
||||
ori r12, r12, 0xfffc
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
stwu r1,-32(r1) ;# create space on the stack
|
||||
|
||||
HProlog second_pass_8x16_pre_copy_b
|
||||
|
||||
;# Load up permutation constants
|
||||
load_c v29, b_0123_b, 0, r12, r0
|
||||
load_c v30, b_4567_b, 0, r12, r0
|
||||
|
||||
hfilter_8 v0, v29, v30, 1
|
||||
hfilter_8 v1, v29, v30, 1
|
||||
hfilter_8 v2, v29, v30, 1
|
||||
hfilter_8 v3, v29, v30, 1
|
||||
hfilter_8 v4, v29, v30, 1
|
||||
hfilter_8 v5, v29, v30, 1
|
||||
hfilter_8 v6, v29, v30, 1
|
||||
hfilter_8 v7, v29, v30, 1
|
||||
hfilter_8 v8, v29, v30, 1
|
||||
hfilter_8 v9, v29, v30, 1
|
||||
hfilter_8 v10, v29, v30, 1
|
||||
hfilter_8 v11, v29, v30, 1
|
||||
hfilter_8 v12, v29, v30, 1
|
||||
hfilter_8 v13, v29, v30, 1
|
||||
hfilter_8 v14, v29, v30, 1
|
||||
hfilter_8 v15, v29, v30, 1
|
||||
|
||||
;# Finished filtering main horizontal block. If there is no
|
||||
;# vertical filtering, jump to storing the data. Otherwise
|
||||
;# load up and filter the additional line that is needed
|
||||
;# for the vertical filter.
|
||||
beq compute_sum_sse_8x16_b
|
||||
|
||||
hfilter_8 v16, v29, v30, 0
|
||||
|
||||
b second_pass_8x16_b
|
||||
|
||||
second_pass_8x16_pre_copy_b:
|
||||
slwi. r6, r6, 5 ;# index into vertical filter array
|
||||
|
||||
load_and_align_16 v0, r3, r4, 1
|
||||
load_and_align_16 v1, r3, r4, 1
|
||||
load_and_align_16 v2, r3, r4, 1
|
||||
load_and_align_16 v3, r3, r4, 1
|
||||
load_and_align_16 v4, r3, r4, 1
|
||||
load_and_align_16 v5, r3, r4, 1
|
||||
load_and_align_16 v6, r3, r4, 1
|
||||
load_and_align_16 v7, r3, r4, 1
|
||||
load_and_align_16 v8, r3, r4, 1
|
||||
load_and_align_16 v9, r3, r4, 1
|
||||
load_and_align_16 v10, r3, r4, 1
|
||||
load_and_align_16 v11, r3, r4, 1
|
||||
load_and_align_16 v12, r3, r4, 1
|
||||
load_and_align_16 v13, r3, r4, 1
|
||||
load_and_align_16 v14, r3, r4, 1
|
||||
load_and_align_16 v15, r3, r4, 1
|
||||
load_and_align_16 v16, r3, r4, 0
|
||||
|
||||
beq compute_sum_sse_8x16_b
|
||||
|
||||
second_pass_8x16_b:
|
||||
vspltish v20, 8
|
||||
vspltish v18, 3
|
||||
vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
|
||||
|
||||
load_vfilter v20, v21
|
||||
|
||||
vfilter_16 v0, v1
|
||||
vfilter_16 v1, v2
|
||||
vfilter_16 v2, v3
|
||||
vfilter_16 v3, v4
|
||||
vfilter_16 v4, v5
|
||||
vfilter_16 v5, v6
|
||||
vfilter_16 v6, v7
|
||||
vfilter_16 v7, v8
|
||||
vfilter_16 v8, v9
|
||||
vfilter_16 v9, v10
|
||||
vfilter_16 v10, v11
|
||||
vfilter_16 v11, v12
|
||||
vfilter_16 v12, v13
|
||||
vfilter_16 v13, v14
|
||||
vfilter_16 v14, v15
|
||||
vfilter_16 v15, v16
|
||||
|
||||
compute_sum_sse_8x16_b:
|
||||
vspltish v18, 0 ;# sum
|
||||
vspltish v19, 0 ;# sse
|
||||
vspltish v23, 0 ;# unpack
|
||||
li r10, 16
|
||||
|
||||
vmrghb v0, v0, v1
|
||||
vmrghb v1, v2, v3
|
||||
vmrghb v2, v4, v5
|
||||
vmrghb v3, v6, v7
|
||||
vmrghb v4, v8, v9
|
||||
vmrghb v5, v10, v11
|
||||
vmrghb v6, v12, v13
|
||||
vmrghb v7, v14, v15
|
||||
|
||||
load_and_align_16 v8, r7, r8, 1
|
||||
load_and_align_16 v9, r7, r8, 1
|
||||
load_and_align_16 v10, r7, r8, 1
|
||||
load_and_align_16 v11, r7, r8, 1
|
||||
load_and_align_16 v12, r7, r8, 1
|
||||
load_and_align_16 v13, r7, r8, 1
|
||||
load_and_align_16 v14, r7, r8, 1
|
||||
load_and_align_16 v15, r7, r8, 1
|
||||
|
||||
vmrghb v8, v8, v9
|
||||
vmrghb v9, v10, v11
|
||||
vmrghb v10, v12, v13
|
||||
vmrghb v11, v14, v15
|
||||
|
||||
compute_sum_sse v0, v8, v18, v19, v20, v21, v23
|
||||
compute_sum_sse v1, v9, v18, v19, v20, v21, v23
|
||||
compute_sum_sse v2, v10, v18, v19, v20, v21, v23
|
||||
compute_sum_sse v3, v11, v18, v19, v20, v21, v23
|
||||
|
||||
load_and_align_16 v8, r7, r8, 1
|
||||
load_and_align_16 v9, r7, r8, 1
|
||||
load_and_align_16 v10, r7, r8, 1
|
||||
load_and_align_16 v11, r7, r8, 1
|
||||
load_and_align_16 v12, r7, r8, 1
|
||||
load_and_align_16 v13, r7, r8, 1
|
||||
load_and_align_16 v14, r7, r8, 1
|
||||
load_and_align_16 v15, r7, r8, 0
|
||||
|
||||
vmrghb v8, v8, v9
|
||||
vmrghb v9, v10, v11
|
||||
vmrghb v10, v12, v13
|
||||
vmrghb v11, v14, v15
|
||||
|
||||
compute_sum_sse v4, v8, v18, v19, v20, v21, v23
|
||||
compute_sum_sse v5, v9, v18, v19, v20, v21, v23
|
||||
compute_sum_sse v6, v10, v18, v19, v20, v21, v23
|
||||
compute_sum_sse v7, v11, v18, v19, v20, v21, v23
|
||||
|
||||
variance_final v18, v19, v23, 7
|
||||
|
||||
addi r1, r1, 32 ;# recover stack
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
blr
|
||||
|
||||
;# Filters a horizontal line
|
||||
;# expects:
|
||||
;# r3 src_ptr
|
||||
;# r4 pitch
|
||||
;# r10 16
|
||||
;# r12 32
|
||||
;# v17 perm intput
|
||||
;# v18 rounding
|
||||
;# v19 shift
|
||||
;# v20 filter taps
|
||||
;# v21 tmp
|
||||
;# v22 tmp
|
||||
;# v23 tmp
|
||||
;# v24 tmp
|
||||
;# v25 tmp
|
||||
;# v26 tmp
|
||||
;# v27 tmp
|
||||
;# v28 perm output
|
||||
;#
|
||||
.macro hfilter_16 V, increment_counter
|
||||
|
||||
lvsl v17, 0, r3 ;# permutate value for alignment
|
||||
|
||||
;# input to filter is 21 bytes wide, output is 16 bytes.
|
||||
;# input will can span three vectors if not aligned correctly.
|
||||
lvx v21, 0, r3
|
||||
lvx v22, r10, r3
|
||||
lvx v23, r12, r3
|
||||
|
||||
.if \increment_counter
|
||||
add r3, r3, r4
|
||||
.endif
|
||||
vperm v21, v21, v22, v17
|
||||
vperm v22, v22, v23, v17 ;# v8 v9 = 21 input pixels left-justified
|
||||
|
||||
;# set 0
|
||||
vmsummbm v24, v20, v21, v18 ;# taps times elements
|
||||
|
||||
;# set 1
|
||||
vsldoi v23, v21, v22, 1
|
||||
vmsummbm v25, v20, v23, v18
|
||||
|
||||
;# set 2
|
||||
vsldoi v23, v21, v22, 2
|
||||
vmsummbm v26, v20, v23, v18
|
||||
|
||||
;# set 3
|
||||
vsldoi v23, v21, v22, 3
|
||||
vmsummbm v27, v20, v23, v18
|
||||
|
||||
vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit)
|
||||
vpkswus v25, v26, v27 ;# v25 = 2 6 A E 3 7 B F
|
||||
|
||||
vsrh v24, v24, v19 ;# divide v0, v1 by 128
|
||||
vsrh v25, v25, v19
|
||||
|
||||
vpkuhus \V, v24, v25 ;# \V = scrambled 8-bit result
|
||||
vperm \V, \V, v0, v28 ;# \V = correctly-ordered result
|
||||
.endm
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int src_pixels_per_line
|
||||
;# r5 int xoffset
|
||||
;# r6 int yoffset
|
||||
;# r7 unsigned char *dst_ptr
|
||||
;# r8 int dst_pixels_per_line
|
||||
;# r9 unsigned int *sse
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_sub_pixel_variance16x8_ppc:
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xffff
|
||||
ori r12, r12, 0xfff8
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
stwu r1, -32(r1) ;# create space on the stack
|
||||
|
||||
HProlog second_pass_16x8_pre_copy_b
|
||||
|
||||
hfilter_16 v0, 1
|
||||
hfilter_16 v1, 1
|
||||
hfilter_16 v2, 1
|
||||
hfilter_16 v3, 1
|
||||
hfilter_16 v4, 1
|
||||
hfilter_16 v5, 1
|
||||
hfilter_16 v6, 1
|
||||
hfilter_16 v7, 1
|
||||
|
||||
;# Finished filtering main horizontal block. If there is no
|
||||
;# vertical filtering, jump to storing the data. Otherwise
|
||||
;# load up and filter the additional line that is needed
|
||||
;# for the vertical filter.
|
||||
beq compute_sum_sse_16x8_b
|
||||
|
||||
hfilter_16 v8, 0
|
||||
|
||||
b second_pass_16x8_b
|
||||
|
||||
second_pass_16x8_pre_copy_b:
|
||||
slwi. r6, r6, 5 ;# index into vertical filter array
|
||||
|
||||
load_and_align_16 v0, r3, r4, 1
|
||||
load_and_align_16 v1, r3, r4, 1
|
||||
load_and_align_16 v2, r3, r4, 1
|
||||
load_and_align_16 v3, r3, r4, 1
|
||||
load_and_align_16 v4, r3, r4, 1
|
||||
load_and_align_16 v5, r3, r4, 1
|
||||
load_and_align_16 v6, r3, r4, 1
|
||||
load_and_align_16 v7, r3, r4, 1
|
||||
load_and_align_16 v8, r3, r4, 1
|
||||
|
||||
beq compute_sum_sse_16x8_b
|
||||
|
||||
second_pass_16x8_b:
|
||||
vspltish v20, 8
|
||||
vspltish v18, 3
|
||||
vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
|
||||
|
||||
load_vfilter v20, v21
|
||||
|
||||
vfilter_16 v0, v1
|
||||
vfilter_16 v1, v2
|
||||
vfilter_16 v2, v3
|
||||
vfilter_16 v3, v4
|
||||
vfilter_16 v4, v5
|
||||
vfilter_16 v5, v6
|
||||
vfilter_16 v6, v7
|
||||
vfilter_16 v7, v8
|
||||
|
||||
compute_sum_sse_16x8_b:
|
||||
vspltish v18, 0 ;# sum
|
||||
vspltish v19, 0 ;# sse
|
||||
vspltish v23, 0 ;# unpack
|
||||
li r10, 16
|
||||
|
||||
compute_sum_sse_16 v0, 1
|
||||
compute_sum_sse_16 v1, 1
|
||||
compute_sum_sse_16 v2, 1
|
||||
compute_sum_sse_16 v3, 1
|
||||
compute_sum_sse_16 v4, 1
|
||||
compute_sum_sse_16 v5, 1
|
||||
compute_sum_sse_16 v6, 1
|
||||
compute_sum_sse_16 v7, 0
|
||||
|
||||
variance_final v18, v19, v23, 7
|
||||
|
||||
addi r1, r1, 32 ;# recover stack
|
||||
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
|
||||
blr
|
||||
|
||||
.align 2
|
||||
;# r3 unsigned char *src_ptr
|
||||
;# r4 int src_pixels_per_line
|
||||
;# r5 int xoffset
|
||||
;# r6 int yoffset
|
||||
;# r7 unsigned char *dst_ptr
|
||||
;# r8 int dst_pixels_per_line
|
||||
;# r9 unsigned int *sse
|
||||
;#
|
||||
;# r3 return value
|
||||
vp8_sub_pixel_variance16x16_ppc:
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xffff
|
||||
ori r12, r12, 0xfff8
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
stwu r1, -32(r1) ;# create space on the stack
|
||||
|
||||
HProlog second_pass_16x16_pre_copy_b
|
||||
|
||||
hfilter_16 v0, 1
|
||||
hfilter_16 v1, 1
|
||||
hfilter_16 v2, 1
|
||||
hfilter_16 v3, 1
|
||||
hfilter_16 v4, 1
|
||||
hfilter_16 v5, 1
|
||||
hfilter_16 v6, 1
|
||||
hfilter_16 v7, 1
|
||||
hfilter_16 v8, 1
|
||||
hfilter_16 v9, 1
|
||||
hfilter_16 v10, 1
|
||||
hfilter_16 v11, 1
|
||||
hfilter_16 v12, 1
|
||||
hfilter_16 v13, 1
|
||||
hfilter_16 v14, 1
|
||||
hfilter_16 v15, 1
|
||||
|
||||
;# Finished filtering main horizontal block. If there is no
|
||||
;# vertical filtering, jump to storing the data. Otherwise
|
||||
;# load up and filter the additional line that is needed
|
||||
;# for the vertical filter.
|
||||
beq compute_sum_sse_16x16_b
|
||||
|
||||
hfilter_16 v16, 0
|
||||
|
||||
b second_pass_16x16_b
|
||||
|
||||
second_pass_16x16_pre_copy_b:
|
||||
slwi. r6, r6, 5 ;# index into vertical filter array
|
||||
|
||||
load_and_align_16 v0, r3, r4, 1
|
||||
load_and_align_16 v1, r3, r4, 1
|
||||
load_and_align_16 v2, r3, r4, 1
|
||||
load_and_align_16 v3, r3, r4, 1
|
||||
load_and_align_16 v4, r3, r4, 1
|
||||
load_and_align_16 v5, r3, r4, 1
|
||||
load_and_align_16 v6, r3, r4, 1
|
||||
load_and_align_16 v7, r3, r4, 1
|
||||
load_and_align_16 v8, r3, r4, 1
|
||||
load_and_align_16 v9, r3, r4, 1
|
||||
load_and_align_16 v10, r3, r4, 1
|
||||
load_and_align_16 v11, r3, r4, 1
|
||||
load_and_align_16 v12, r3, r4, 1
|
||||
load_and_align_16 v13, r3, r4, 1
|
||||
load_and_align_16 v14, r3, r4, 1
|
||||
load_and_align_16 v15, r3, r4, 1
|
||||
load_and_align_16 v16, r3, r4, 0
|
||||
|
||||
beq compute_sum_sse_16x16_b
|
||||
|
||||
second_pass_16x16_b:
|
||||
vspltish v20, 8
|
||||
vspltish v18, 3
|
||||
vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
|
||||
|
||||
load_vfilter v20, v21
|
||||
|
||||
vfilter_16 v0, v1
|
||||
vfilter_16 v1, v2
|
||||
vfilter_16 v2, v3
|
||||
vfilter_16 v3, v4
|
||||
vfilter_16 v4, v5
|
||||
vfilter_16 v5, v6
|
||||
vfilter_16 v6, v7
|
||||
vfilter_16 v7, v8
|
||||
vfilter_16 v8, v9
|
||||
vfilter_16 v9, v10
|
||||
vfilter_16 v10, v11
|
||||
vfilter_16 v11, v12
|
||||
vfilter_16 v12, v13
|
||||
vfilter_16 v13, v14
|
||||
vfilter_16 v14, v15
|
||||
vfilter_16 v15, v16
|
||||
|
||||
compute_sum_sse_16x16_b:
|
||||
vspltish v18, 0 ;# sum
|
||||
vspltish v19, 0 ;# sse
|
||||
vspltish v23, 0 ;# unpack
|
||||
li r10, 16
|
||||
|
||||
compute_sum_sse_16 v0, 1
|
||||
compute_sum_sse_16 v1, 1
|
||||
compute_sum_sse_16 v2, 1
|
||||
compute_sum_sse_16 v3, 1
|
||||
compute_sum_sse_16 v4, 1
|
||||
compute_sum_sse_16 v5, 1
|
||||
compute_sum_sse_16 v6, 1
|
||||
compute_sum_sse_16 v7, 1
|
||||
compute_sum_sse_16 v8, 1
|
||||
compute_sum_sse_16 v9, 1
|
||||
compute_sum_sse_16 v10, 1
|
||||
compute_sum_sse_16 v11, 1
|
||||
compute_sum_sse_16 v12, 1
|
||||
compute_sum_sse_16 v13, 1
|
||||
compute_sum_sse_16 v14, 1
|
||||
compute_sum_sse_16 v15, 0
|
||||
|
||||
variance_final v18, v19, v23, 8
|
||||
|
||||
addi r1, r1, 32 ;# recover stack
|
||||
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
|
||||
blr
|
||||
|
||||
.data
|
||||
|
||||
.align 4
|
||||
hfilter_b:
|
||||
.byte 128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0
|
||||
.byte 112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0
|
||||
.byte 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0
|
||||
.byte 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0
|
||||
.byte 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0
|
||||
.byte 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0
|
||||
.byte 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0
|
||||
.byte 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0
|
||||
|
||||
.align 4
|
||||
vfilter_b:
|
||||
.byte 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
|
||||
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
.byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112
|
||||
.byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
|
||||
.byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96
|
||||
.byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
|
||||
.byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80
|
||||
.byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48
|
||||
.byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
|
||||
.byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
|
||||
.byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48
|
||||
.byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80
|
||||
.byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
|
||||
.byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96
|
||||
.byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
|
||||
.byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112
|
||||
|
||||
.align 4
|
||||
b_hperm_b:
|
||||
.byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15
|
||||
|
||||
.align 4
|
||||
b_0123_b:
|
||||
.byte 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6
|
||||
|
||||
.align 4
|
||||
b_4567_b:
|
||||
.byte 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10
|
||||
|
||||
b_hilo_b:
|
||||
.byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
|
@@ -70,10 +70,10 @@ void vp8_build_intra_predictors_mby_s_c(MACROBLOCKD *x,
|
||||
expected_dc = 128;
|
||||
}
|
||||
|
||||
/*vpx_memset(ypred_ptr, expected_dc, 256);*/
|
||||
/*memset(ypred_ptr, expected_dc, 256);*/
|
||||
for (r = 0; r < 16; r++)
|
||||
{
|
||||
vpx_memset(ypred_ptr, expected_dc, 16);
|
||||
memset(ypred_ptr, expected_dc, 16);
|
||||
ypred_ptr += y_stride;
|
||||
}
|
||||
}
|
||||
@@ -98,7 +98,7 @@ void vp8_build_intra_predictors_mby_s_c(MACROBLOCKD *x,
|
||||
for (r = 0; r < 16; r++)
|
||||
{
|
||||
|
||||
vpx_memset(ypred_ptr, yleft_col[r], 16);
|
||||
memset(ypred_ptr, yleft_col[r], 16);
|
||||
ypred_ptr += y_stride;
|
||||
}
|
||||
|
||||
@@ -202,12 +202,12 @@ void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x,
|
||||
}
|
||||
|
||||
|
||||
/*vpx_memset(upred_ptr,expected_udc,64);*/
|
||||
/*vpx_memset(vpred_ptr,expected_vdc,64);*/
|
||||
/*memset(upred_ptr,expected_udc,64);*/
|
||||
/*memset(vpred_ptr,expected_vdc,64);*/
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
vpx_memset(upred_ptr, expected_udc, 8);
|
||||
vpx_memset(vpred_ptr, expected_vdc, 8);
|
||||
memset(upred_ptr, expected_udc, 8);
|
||||
memset(vpred_ptr, expected_vdc, 8);
|
||||
upred_ptr += pred_stride;
|
||||
vpred_ptr += pred_stride;
|
||||
}
|
||||
@@ -217,8 +217,8 @@ void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x,
|
||||
{
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
vpx_memcpy(upred_ptr, uabove_row, 8);
|
||||
vpx_memcpy(vpred_ptr, vabove_row, 8);
|
||||
memcpy(upred_ptr, uabove_row, 8);
|
||||
memcpy(vpred_ptr, vabove_row, 8);
|
||||
upred_ptr += pred_stride;
|
||||
vpred_ptr += pred_stride;
|
||||
}
|
||||
@@ -229,8 +229,8 @@ void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x,
|
||||
{
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
vpx_memset(upred_ptr, uleft_col[i], 8);
|
||||
vpx_memset(vpred_ptr, vleft_col[i], 8);
|
||||
memset(upred_ptr, uleft_col[i], 8);
|
||||
memset(vpred_ptr, vleft_col[i], 8);
|
||||
upred_ptr += pred_stride;
|
||||
vpred_ptr += pred_stride;
|
||||
}
|
||||
|
@@ -7,15 +7,13 @@
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_config.h"
|
||||
#define RTCD_C
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_ports/vpx_once.h"
|
||||
|
||||
extern void vpx_scale_rtcd(void);
|
||||
|
||||
void vp8_rtcd()
|
||||
{
|
||||
vpx_scale_rtcd();
|
||||
once(setup_rtcd_internal);
|
||||
}
|
||||
|
@@ -17,15 +17,15 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf)
|
||||
int i;
|
||||
|
||||
/* set up frame new frame for intra coded blocks */
|
||||
vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
|
||||
memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
|
||||
for (i = 0; i < ybf->y_height; i++)
|
||||
ybf->y_buffer[ybf->y_stride *i - 1] = (unsigned char) 129;
|
||||
|
||||
vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
for (i = 0; i < ybf->uv_height; i++)
|
||||
ybf->u_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
|
||||
|
||||
vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
for (i = 0; i < ybf->uv_height; i++)
|
||||
ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
|
||||
|
||||
@@ -33,7 +33,7 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf)
|
||||
|
||||
void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf)
|
||||
{
|
||||
vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
|
||||
vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
|
||||
memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
}
|
||||
|
@@ -36,7 +36,7 @@ void vp8_dequant_idct_add_y_block_mmx
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dst, stride, dst, stride);
|
||||
vpx_memset(q, 0, 2 * sizeof(q[0]));
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
@@ -45,7 +45,7 @@ void vp8_dequant_idct_add_y_block_mmx
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dst+4, stride,
|
||||
dst+4, stride);
|
||||
vpx_memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[2] > 1)
|
||||
@@ -54,7 +54,7 @@ void vp8_dequant_idct_add_y_block_mmx
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[32]*dq[0], dst+8, stride,
|
||||
dst+8, stride);
|
||||
vpx_memset(q + 32, 0, 2 * sizeof(q[0]));
|
||||
memset(q + 32, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[3] > 1)
|
||||
@@ -63,7 +63,7 @@ void vp8_dequant_idct_add_y_block_mmx
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[48]*dq[0], dst+12, stride,
|
||||
dst+12, stride);
|
||||
vpx_memset(q + 48, 0, 2 * sizeof(q[0]));
|
||||
memset(q + 48, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 64;
|
||||
@@ -85,7 +85,7 @@ void vp8_dequant_idct_add_uv_block_mmx
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
vpx_memset(q, 0, 2 * sizeof(q[0]));
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
@@ -94,7 +94,7 @@ void vp8_dequant_idct_add_uv_block_mmx
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstu+4, stride,
|
||||
dstu+4, stride);
|
||||
vpx_memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 32;
|
||||
@@ -109,7 +109,7 @@ void vp8_dequant_idct_add_uv_block_mmx
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
vpx_memset(q, 0, 2 * sizeof(q[0]));
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
@@ -118,7 +118,7 @@ void vp8_dequant_idct_add_uv_block_mmx
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstv+4, stride,
|
||||
dstv+4, stride);
|
||||
vpx_memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 32;
|
||||
|
@@ -142,7 +142,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
* Better to use the predictor as reconstruction.
|
||||
*/
|
||||
pbi->frame_corrupt_residual = 1;
|
||||
vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
|
||||
memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
|
||||
vp8_conceal_corrupt_mb(xd);
|
||||
|
||||
|
||||
@@ -151,7 +151,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
/* force idct to be skipped for B_PRED and use the
|
||||
* prediction only for reconstruction
|
||||
* */
|
||||
vpx_memset(xd->eobs, 0, 25);
|
||||
memset(xd->eobs, 0, 25);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -184,7 +184,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
|
||||
/* clear out residual eob info */
|
||||
if(xd->mode_info_context->mbmi.mb_skip_coeff)
|
||||
vpx_memset(xd->eobs, 0, 25);
|
||||
memset(xd->eobs, 0, 25);
|
||||
|
||||
intra_prediction_down_copy(xd, xd->recon_above[0] + 16);
|
||||
|
||||
@@ -214,7 +214,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
(b->qcoeff[0] * DQC[0],
|
||||
dst, dst_stride,
|
||||
dst, dst_stride);
|
||||
vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
|
||||
memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -251,14 +251,14 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
|
||||
vp8_short_inv_walsh4x4(&b->dqcoeff[0],
|
||||
xd->qcoeff);
|
||||
vpx_memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
|
||||
memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
|
||||
}
|
||||
else
|
||||
{
|
||||
b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
|
||||
vp8_short_inv_walsh4x4_1(&b->dqcoeff[0],
|
||||
xd->qcoeff);
|
||||
vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
|
||||
memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
|
||||
}
|
||||
|
||||
/* override the dc dequant constant in order to preserve the
|
||||
@@ -323,7 +323,7 @@ static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf)
|
||||
|
||||
for (i = 0; i < (int)Border; i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
|
||||
memcpy(dest_ptr1, src_ptr1, plane_stride);
|
||||
dest_ptr1 += plane_stride;
|
||||
}
|
||||
|
||||
@@ -338,7 +338,7 @@ static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf)
|
||||
|
||||
for (i = 0; i < (int)(Border); i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
|
||||
memcpy(dest_ptr1, src_ptr1, plane_stride);
|
||||
dest_ptr1 += plane_stride;
|
||||
}
|
||||
|
||||
@@ -351,7 +351,7 @@ static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf)
|
||||
|
||||
for (i = 0; i < (int)(Border); i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
|
||||
memcpy(dest_ptr1, src_ptr1, plane_stride);
|
||||
dest_ptr1 += plane_stride;
|
||||
}
|
||||
}
|
||||
@@ -379,7 +379,7 @@ static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf)
|
||||
|
||||
for (i = 0; i < (int)Border; i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
|
||||
memcpy(dest_ptr2, src_ptr2, plane_stride);
|
||||
dest_ptr2 += plane_stride;
|
||||
}
|
||||
|
||||
@@ -397,7 +397,7 @@ static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf)
|
||||
|
||||
for (i = 0; i < (int)(Border); i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
|
||||
memcpy(dest_ptr2, src_ptr2, plane_stride);
|
||||
dest_ptr2 += plane_stride;
|
||||
}
|
||||
|
||||
@@ -411,7 +411,7 @@ static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf)
|
||||
|
||||
for (i = 0; i < (int)(Border); i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
|
||||
memcpy(dest_ptr2, src_ptr2, plane_stride);
|
||||
dest_ptr2 += plane_stride;
|
||||
}
|
||||
}
|
||||
@@ -446,8 +446,8 @@ static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf,
|
||||
|
||||
for (i = 0; i < plane_height; i++)
|
||||
{
|
||||
vpx_memset(dest_ptr1, src_ptr1[0], Border);
|
||||
vpx_memset(dest_ptr2, src_ptr2[0], Border);
|
||||
memset(dest_ptr1, src_ptr1[0], Border);
|
||||
memset(dest_ptr2, src_ptr2[0], Border);
|
||||
src_ptr1 += plane_stride;
|
||||
src_ptr2 += plane_stride;
|
||||
dest_ptr1 += plane_stride;
|
||||
@@ -470,8 +470,8 @@ static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf,
|
||||
|
||||
for (i = 0; i < plane_height; i++)
|
||||
{
|
||||
vpx_memset(dest_ptr1, src_ptr1[0], Border);
|
||||
vpx_memset(dest_ptr2, src_ptr2[0], Border);
|
||||
memset(dest_ptr1, src_ptr1[0], Border);
|
||||
memset(dest_ptr2, src_ptr2[0], Border);
|
||||
src_ptr1 += plane_stride;
|
||||
src_ptr2 += plane_stride;
|
||||
dest_ptr1 += plane_stride;
|
||||
@@ -490,8 +490,8 @@ static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf,
|
||||
|
||||
for (i = 0; i < plane_height; i++)
|
||||
{
|
||||
vpx_memset(dest_ptr1, src_ptr1[0], Border);
|
||||
vpx_memset(dest_ptr2, src_ptr2[0], Border);
|
||||
memset(dest_ptr1, src_ptr1[0], Border);
|
||||
memset(dest_ptr2, src_ptr2[0], Border);
|
||||
src_ptr1 += plane_stride;
|
||||
src_ptr2 += plane_stride;
|
||||
dest_ptr1 += plane_stride;
|
||||
@@ -568,7 +568,7 @@ static void decode_mb_rows(VP8D_COMP *pbi)
|
||||
|
||||
/* reset contexts */
|
||||
xd->above_context = pc->above_context;
|
||||
vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
xd->left_available = 0;
|
||||
|
||||
@@ -918,19 +918,19 @@ static void init_frame(VP8D_COMP *pbi)
|
||||
if (pc->frame_type == KEY_FRAME)
|
||||
{
|
||||
/* Various keyframe initializations */
|
||||
vpx_memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
|
||||
memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
|
||||
|
||||
vp8_init_mbmode_probs(pc);
|
||||
|
||||
vp8_default_coef_probs(pc);
|
||||
|
||||
/* reset the segment feature data to 0 with delta coding (Default state). */
|
||||
vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
|
||||
memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
|
||||
xd->mb_segement_abs_delta = SEGMENT_DELTADATA;
|
||||
|
||||
/* reset the mode ref deltasa for loop filter */
|
||||
vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas));
|
||||
vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas));
|
||||
memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas));
|
||||
memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas));
|
||||
|
||||
/* All buffers are implicitly updated on key frames. */
|
||||
pc->refresh_golden_frame = 1;
|
||||
@@ -1069,12 +1069,11 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
pc->vert_scale = clear[6] >> 6;
|
||||
}
|
||||
data += 7;
|
||||
clear += 7;
|
||||
}
|
||||
else
|
||||
{
|
||||
vpx_memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG));
|
||||
vpx_memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG));
|
||||
memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG));
|
||||
memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG));
|
||||
}
|
||||
}
|
||||
if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME))
|
||||
@@ -1106,7 +1105,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
{
|
||||
xd->mb_segement_abs_delta = (unsigned char)vp8_read_bit(bc);
|
||||
|
||||
vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
|
||||
memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
|
||||
|
||||
/* For each segmentation feature (Quant and loop filter level) */
|
||||
for (i = 0; i < MB_LVL_MAX; i++)
|
||||
@@ -1130,7 +1129,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
if (xd->update_mb_segmentation_map)
|
||||
{
|
||||
/* Which macro block level features are enabled */
|
||||
vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs));
|
||||
memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs));
|
||||
|
||||
/* Read the probs used to decode the segment id for each macro block. */
|
||||
for (i = 0; i < MB_FEATURE_TREE_PROBS; i++)
|
||||
@@ -1279,7 +1278,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
#endif
|
||||
if (pc->refresh_entropy_probs == 0)
|
||||
{
|
||||
vpx_memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc));
|
||||
memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc));
|
||||
}
|
||||
|
||||
pc->refresh_last_frame = pc->frame_type == KEY_FRAME || vp8_read_bit(bc);
|
||||
@@ -1328,7 +1327,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
}
|
||||
|
||||
/* clear out the coeff buffer */
|
||||
vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
|
||||
memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
|
||||
|
||||
vp8_decode_mode_mvs(pbi);
|
||||
|
||||
@@ -1342,7 +1341,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
}
|
||||
#endif
|
||||
|
||||
vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
|
||||
memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
|
||||
pbi->frame_corrupt_residual = 0;
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
@@ -1381,7 +1380,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
|
||||
if (pc->refresh_entropy_probs == 0)
|
||||
{
|
||||
vpx_memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc));
|
||||
memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc));
|
||||
pbi->independent_partitions = prev_independent_partitions;
|
||||
}
|
||||
|
||||
|
@@ -20,8 +20,8 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
|
||||
ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context);
|
||||
ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context);
|
||||
|
||||
vpx_memset(a_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
|
||||
vpx_memset(l_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
|
||||
memset(a_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
|
||||
memset(l_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
|
||||
|
||||
/* Clear entropy contexts for Y2 blocks */
|
||||
if (!x->mode_info_context->mbmi.is_4x4)
|
||||
|
@@ -350,7 +350,7 @@ static void estimate_missing_mvs(MB_OVERLAP *overlaps,
|
||||
unsigned int first_corrupt)
|
||||
{
|
||||
int mb_row, mb_col;
|
||||
vpx_memset(overlaps, 0, sizeof(MB_OVERLAP) * mb_rows * mb_cols);
|
||||
memset(overlaps, 0, sizeof(MB_OVERLAP) * mb_rows * mb_cols);
|
||||
/* First calculate the overlaps for all blocks */
|
||||
for (mb_row = 0; mb_row < mb_rows; ++mb_row)
|
||||
{
|
||||
|
@@ -58,7 +58,7 @@ static struct VP8D_COMP * create_decompressor(VP8D_CONFIG *oxcf)
|
||||
if (!pbi)
|
||||
return NULL;
|
||||
|
||||
vpx_memset(pbi, 0, sizeof(VP8D_COMP));
|
||||
memset(pbi, 0, sizeof(VP8D_COMP));
|
||||
|
||||
if (setjmp(pbi->common.error.jmp))
|
||||
{
|
||||
|
@@ -60,12 +60,12 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
|
||||
|
||||
mbd->segmentation_enabled = xd->segmentation_enabled;
|
||||
mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
|
||||
vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
|
||||
memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
|
||||
|
||||
/*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
|
||||
vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
|
||||
memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
|
||||
/*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
|
||||
vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
|
||||
memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
|
||||
/*unsigned char mode_ref_lf_delta_enabled;
|
||||
unsigned char mode_ref_lf_delta_update;*/
|
||||
mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled;
|
||||
@@ -73,10 +73,10 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
|
||||
|
||||
mbd->current_bc = &pbi->mbc[0];
|
||||
|
||||
vpx_memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
|
||||
vpx_memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
|
||||
vpx_memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
|
||||
vpx_memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
|
||||
memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
|
||||
memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
|
||||
memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
|
||||
memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
|
||||
|
||||
mbd->fullpixel_mask = 0xffffffff;
|
||||
|
||||
@@ -137,7 +137,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
* Better to use the predictor as reconstruction.
|
||||
*/
|
||||
pbi->frame_corrupt_residual = 1;
|
||||
vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
|
||||
memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
|
||||
vp8_conceal_corrupt_mb(xd);
|
||||
|
||||
|
||||
@@ -146,7 +146,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
/* force idct to be skipped for B_PRED and use the
|
||||
* prediction only for reconstruction
|
||||
* */
|
||||
vpx_memset(xd->eobs, 0, 25);
|
||||
memset(xd->eobs, 0, 25);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -179,7 +179,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
|
||||
/* clear out residual eob info */
|
||||
if(xd->mode_info_context->mbmi.mb_skip_coeff)
|
||||
vpx_memset(xd->eobs, 0, 25);
|
||||
memset(xd->eobs, 0, 25);
|
||||
|
||||
intra_prediction_down_copy(xd, xd->recon_above[0] + 16);
|
||||
|
||||
@@ -229,7 +229,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
{
|
||||
vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0],
|
||||
dst, dst_stride, dst, dst_stride);
|
||||
vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
|
||||
memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -266,14 +266,14 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
|
||||
|
||||
vp8_short_inv_walsh4x4(&b->dqcoeff[0],
|
||||
xd->qcoeff);
|
||||
vpx_memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
|
||||
memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
|
||||
}
|
||||
else
|
||||
{
|
||||
b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
|
||||
vp8_short_inv_walsh4x4_1(&b->dqcoeff[0],
|
||||
xd->qcoeff);
|
||||
vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
|
||||
memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
|
||||
}
|
||||
|
||||
/* override the dc dequant constant in order to preserve the
|
||||
@@ -360,7 +360,7 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)
|
||||
|
||||
/* reset contexts */
|
||||
xd->above_context = pc->above_context;
|
||||
vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
xd->left_available = 0;
|
||||
|
||||
@@ -499,9 +499,9 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)
|
||||
if( mb_row != pc->mb_rows-1 )
|
||||
{
|
||||
/* Save decoded MB last row data for next-row decoding */
|
||||
vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
|
||||
vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
|
||||
vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
|
||||
memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
|
||||
memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
|
||||
memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
|
||||
}
|
||||
|
||||
/* save left_col for next MB decoding */
|
||||
@@ -876,23 +876,23 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
if (filter_level)
|
||||
{
|
||||
/* Set above_row buffer to 127 for decoding first MB row */
|
||||
vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5);
|
||||
vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
|
||||
vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
|
||||
memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5);
|
||||
memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
|
||||
memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
|
||||
|
||||
for (j=1; j<pc->mb_rows; j++)
|
||||
{
|
||||
vpx_memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
|
||||
vpx_memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
|
||||
vpx_memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
|
||||
memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
|
||||
memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
|
||||
memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
|
||||
}
|
||||
|
||||
/* Set left_col to 129 initially */
|
||||
for (j=0; j<pc->mb_rows; j++)
|
||||
{
|
||||
vpx_memset(pbi->mt_yleft_col[j], (unsigned char)129, 16);
|
||||
vpx_memset(pbi->mt_uleft_col[j], (unsigned char)129, 8);
|
||||
vpx_memset(pbi->mt_vleft_col[j], (unsigned char)129, 8);
|
||||
memset(pbi->mt_yleft_col[j], (unsigned char)129, 16);
|
||||
memset(pbi->mt_uleft_col[j], (unsigned char)129, 8);
|
||||
memset(pbi->mt_vleft_col[j], (unsigned char)129, 8);
|
||||
}
|
||||
|
||||
/* Initialize the loop filter for this frame. */
|
||||
|
@@ -1543,7 +1543,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
|
||||
if (pc->refresh_entropy_probs == 0)
|
||||
{
|
||||
/* save a copy for later refresh */
|
||||
vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc));
|
||||
memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc));
|
||||
}
|
||||
|
||||
vp8_update_coef_probs(cpi);
|
||||
@@ -1620,7 +1620,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
|
||||
/* concatenate partition buffers */
|
||||
for(i = 0; i < num_part; i++)
|
||||
{
|
||||
vpx_memmove(dp, cpi->partition_d[i+1], cpi->partition_sz[i+1]);
|
||||
memmove(dp, cpi->partition_d[i+1], cpi->partition_sz[i+1]);
|
||||
cpi->partition_d[i+1] = dp;
|
||||
dp += cpi->partition_sz[i+1];
|
||||
}
|
||||
|
@@ -415,8 +415,8 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
|
||||
vp8_denoiser_free(denoiser);
|
||||
return 1;
|
||||
}
|
||||
vpx_memset(denoiser->yv12_running_avg[i].buffer_alloc, 0,
|
||||
denoiser->yv12_running_avg[i].frame_size);
|
||||
memset(denoiser->yv12_running_avg[i].buffer_alloc, 0,
|
||||
denoiser->yv12_running_avg[i].frame_size);
|
||||
|
||||
}
|
||||
denoiser->yv12_mc_running_avg.flags = 0;
|
||||
@@ -428,19 +428,19 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
|
||||
return 1;
|
||||
}
|
||||
|
||||
vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
|
||||
denoiser->yv12_mc_running_avg.frame_size);
|
||||
memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
|
||||
denoiser->yv12_mc_running_avg.frame_size);
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&denoiser->yv12_last_source, width,
|
||||
height, VP8BORDERINPIXELS) < 0) {
|
||||
vp8_denoiser_free(denoiser);
|
||||
return 1;
|
||||
}
|
||||
vpx_memset(denoiser->yv12_last_source.buffer_alloc, 0,
|
||||
denoiser->yv12_last_source.frame_size);
|
||||
memset(denoiser->yv12_last_source.buffer_alloc, 0,
|
||||
denoiser->yv12_last_source.frame_size);
|
||||
|
||||
denoiser->denoise_state = vpx_calloc((num_mb_rows * num_mb_cols), 1);
|
||||
vpx_memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols));
|
||||
memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols));
|
||||
vp8_denoiser_set_parameters(denoiser, mode);
|
||||
denoiser->nmse_source_diff = 0;
|
||||
denoiser->nmse_source_diff_count = 0;
|
||||
|
@@ -155,8 +155,8 @@ static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum )
|
||||
cpi->common.MBs));
|
||||
|
||||
/* Copy map to sort list */
|
||||
vpx_memcpy( sortlist, cpi->mb_activity_map,
|
||||
sizeof(unsigned int) * cpi->common.MBs );
|
||||
memcpy( sortlist, cpi->mb_activity_map,
|
||||
sizeof(unsigned int) * cpi->common.MBs );
|
||||
|
||||
|
||||
/* Ripple each value down to its correct position */
|
||||
@@ -665,8 +665,7 @@ static void init_encode_frame_mb_context(VP8_COMP *cpi)
|
||||
|
||||
x->mvc = cm->fc.mvc;
|
||||
|
||||
vpx_memset(cm->above_context, 0,
|
||||
sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
|
||||
memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
|
||||
|
||||
/* Special case treatment when GF and ARF are not sensible options
|
||||
* for reference
|
||||
@@ -744,7 +743,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
const int num_part = (1 << cm->multi_token_partition);
|
||||
#endif
|
||||
|
||||
vpx_memset(segment_counts, 0, sizeof(segment_counts));
|
||||
memset(segment_counts, 0, sizeof(segment_counts));
|
||||
totalrate = 0;
|
||||
|
||||
if (cpi->compressor_speed == 2)
|
||||
@@ -974,7 +973,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
int i;
|
||||
|
||||
/* Set to defaults */
|
||||
vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
|
||||
memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
|
||||
|
||||
tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
|
||||
|
||||
|
@@ -506,8 +506,8 @@ static void optimize_mb(MACROBLOCK *x)
|
||||
ENTROPY_CONTEXT *ta;
|
||||
ENTROPY_CONTEXT *tl;
|
||||
|
||||
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta = (ENTROPY_CONTEXT *)&t_above;
|
||||
tl = (ENTROPY_CONTEXT *)&t_left;
|
||||
@@ -555,8 +555,8 @@ void vp8_optimize_mby(MACROBLOCK *x)
|
||||
if (!x->e_mbd.left_context)
|
||||
return;
|
||||
|
||||
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta = (ENTROPY_CONTEXT *)&t_above;
|
||||
tl = (ENTROPY_CONTEXT *)&t_left;
|
||||
@@ -595,8 +595,8 @@ void vp8_optimize_mbuv(MACROBLOCK *x)
|
||||
if (!x->e_mbd.left_context)
|
||||
return;
|
||||
|
||||
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta = (ENTROPY_CONTEXT *)&t_above;
|
||||
tl = (ENTROPY_CONTEXT *)&t_left;
|
||||
|
@@ -416,14 +416,13 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
|
||||
zd->subpixel_predict16x16 = xd->subpixel_predict16x16;
|
||||
zd->segmentation_enabled = xd->segmentation_enabled;
|
||||
zd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
|
||||
vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data,
|
||||
sizeof(xd->segment_feature_data));
|
||||
memcpy(zd->segment_feature_data, xd->segment_feature_data,
|
||||
sizeof(xd->segment_feature_data));
|
||||
|
||||
vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc,
|
||||
sizeof(xd->dequant_y1_dc));
|
||||
vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
|
||||
vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
|
||||
vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
|
||||
memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
|
||||
memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
|
||||
memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
|
||||
memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
|
||||
|
||||
#if 1
|
||||
/*TODO: Remove dequant from BLOCKD. This is a temporary solution until
|
||||
@@ -438,15 +437,14 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
|
||||
#endif
|
||||
|
||||
|
||||
vpx_memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes));
|
||||
vpx_memcpy(z->rd_thresh_mult, x->rd_thresh_mult,
|
||||
sizeof(x->rd_thresh_mult));
|
||||
memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes));
|
||||
memcpy(z->rd_thresh_mult, x->rd_thresh_mult, sizeof(x->rd_thresh_mult));
|
||||
|
||||
z->zbin_over_quant = x->zbin_over_quant;
|
||||
z->zbin_mode_boost_enabled = x->zbin_mode_boost_enabled;
|
||||
z->zbin_mode_boost = x->zbin_mode_boost;
|
||||
|
||||
vpx_memset(z->error_bins, 0, sizeof(z->error_bins));
|
||||
memset(z->error_bins, 0, sizeof(z->error_bins));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -472,7 +470,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
|
||||
mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
|
||||
mb->gf_active_ptr = x->gf_active_ptr;
|
||||
|
||||
vpx_memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts));
|
||||
memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts));
|
||||
mbr_ei[i].totalrate = 0;
|
||||
|
||||
mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1);
|
||||
@@ -547,7 +545,7 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi)
|
||||
vpx_malloc(sizeof(sem_t) * th_count));
|
||||
CHECK_MEM_ERROR(cpi->mb_row_ei,
|
||||
vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count));
|
||||
vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count);
|
||||
memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count);
|
||||
CHECK_MEM_ERROR(cpi->en_thread_data,
|
||||
vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count));
|
||||
|
||||
|
@@ -573,7 +573,7 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
{
|
||||
int flag[2] = {1, 1};
|
||||
vp8_initialize_rd_consts(cpi, x, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
|
||||
vpx_memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
|
||||
memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
|
||||
vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
|
||||
}
|
||||
|
||||
@@ -1779,7 +1779,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
|
||||
start_pos = cpi->twopass.stats_in;
|
||||
|
||||
vpx_memset(&next_frame, 0, sizeof(next_frame)); /* assure clean */
|
||||
memset(&next_frame, 0, sizeof(next_frame)); /* assure clean */
|
||||
|
||||
/* Load stats for the current frame. */
|
||||
mod_frame_err = calculate_modified_err(cpi, this_frame);
|
||||
@@ -1875,7 +1875,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
break;
|
||||
}
|
||||
|
||||
vpx_memcpy(this_frame, &next_frame, sizeof(*this_frame));
|
||||
memcpy(this_frame, &next_frame, sizeof(*this_frame));
|
||||
|
||||
old_boost_score = boost_score;
|
||||
}
|
||||
@@ -2445,7 +2445,7 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
if (cpi->twopass.frames_to_key == 0)
|
||||
{
|
||||
/* Define next KF group and assign bits to it */
|
||||
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
find_next_key_frame(cpi, &this_frame_copy);
|
||||
|
||||
/* Special case: Error error_resilient_mode mode does not make much
|
||||
@@ -2471,7 +2471,7 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
if (cpi->frames_till_gf_update_due == 0)
|
||||
{
|
||||
/* Define next gf group and assign bits to it */
|
||||
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
define_gf_group(cpi, &this_frame_copy);
|
||||
|
||||
/* If we are going to code an altref frame at the end of the group
|
||||
@@ -2487,7 +2487,7 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
* to the GF group
|
||||
*/
|
||||
int bak = cpi->per_frame_bandwidth;
|
||||
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
assign_std_frame_bits(cpi, &this_frame_copy);
|
||||
cpi->per_frame_bandwidth = bak;
|
||||
}
|
||||
@@ -2510,14 +2510,14 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
if (cpi->common.frame_type != KEY_FRAME)
|
||||
{
|
||||
/* Assign bits from those allocated to the GF group */
|
||||
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
assign_std_frame_bits(cpi, &this_frame_copy);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Assign bits from those allocated to the GF group */
|
||||
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
assign_std_frame_bits(cpi, &this_frame_copy);
|
||||
}
|
||||
}
|
||||
@@ -2658,7 +2658,7 @@ static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTP
|
||||
double decay_accumulator = 1.0;
|
||||
double next_iiratio;
|
||||
|
||||
vpx_memcpy(&local_next_frame, next_frame, sizeof(*next_frame));
|
||||
memcpy(&local_next_frame, next_frame, sizeof(*next_frame));
|
||||
|
||||
/* Note the starting file position so we can reset to it */
|
||||
start_pos = cpi->twopass.stats_in;
|
||||
@@ -2735,7 +2735,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
double kf_group_coded_err = 0.0;
|
||||
double recent_loop_decay[8] = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0};
|
||||
|
||||
vpx_memset(&next_frame, 0, sizeof(next_frame));
|
||||
memset(&next_frame, 0, sizeof(next_frame));
|
||||
|
||||
vp8_clear_system_state();
|
||||
start_position = cpi->twopass.stats_in;
|
||||
@@ -2756,7 +2756,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
cpi->twopass.frames_to_key = 1;
|
||||
|
||||
/* Take a copy of the initial frame details */
|
||||
vpx_memcpy(&first_frame, this_frame, sizeof(*this_frame));
|
||||
memcpy(&first_frame, this_frame, sizeof(*this_frame));
|
||||
|
||||
cpi->twopass.kf_group_bits = 0;
|
||||
cpi->twopass.kf_group_error_left = 0;
|
||||
@@ -2779,7 +2779,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
kf_group_coded_err += this_frame->coded_error;
|
||||
|
||||
/* Load the next frame's stats. */
|
||||
vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame));
|
||||
memcpy(&last_frame, this_frame, sizeof(*this_frame));
|
||||
input_stats(cpi, this_frame);
|
||||
|
||||
/* Provided that we are not at the end of the file... */
|
||||
@@ -2847,7 +2847,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
cpi->twopass.frames_to_key /= 2;
|
||||
|
||||
/* Copy first frame details */
|
||||
vpx_memcpy(&tmp_frame, &first_frame, sizeof(first_frame));
|
||||
memcpy(&tmp_frame, &first_frame, sizeof(first_frame));
|
||||
|
||||
/* Reset to the start of the group */
|
||||
reset_fpf_position(cpi, start_position);
|
||||
@@ -2969,7 +2969,6 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
*/
|
||||
decay_accumulator = 1.0;
|
||||
boost_score = 0.0;
|
||||
loop_decay_rate = 1.00; /* Starting decay rate */
|
||||
|
||||
for (i = 0 ; i < cpi->twopass.frames_to_key ; i++)
|
||||
{
|
||||
@@ -3213,7 +3212,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
int new_width = cpi->oxcf.Width;
|
||||
int new_height = cpi->oxcf.Height;
|
||||
|
||||
int projected_buffer_level = (int)cpi->buffer_level;
|
||||
int projected_buffer_level;
|
||||
int tmp_q;
|
||||
|
||||
double projected_bits_perframe;
|
||||
|
@@ -1978,8 +1978,8 @@ void print_mode_context(void)
|
||||
#ifdef VP8_ENTROPY_STATS
|
||||
void init_mv_ref_counts()
|
||||
{
|
||||
vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
|
||||
vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
|
||||
memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
|
||||
memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
|
||||
}
|
||||
|
||||
void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
|
||||
|
@@ -428,10 +428,10 @@ static void setup_features(VP8_COMP *cpi)
|
||||
|
||||
cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 0;
|
||||
cpi->mb.e_mbd.mode_ref_lf_delta_update = 0;
|
||||
vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
|
||||
vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
|
||||
vpx_memset(cpi->mb.e_mbd.last_ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
|
||||
vpx_memset(cpi->mb.e_mbd.last_mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
|
||||
memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
|
||||
memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
|
||||
memset(cpi->mb.e_mbd.last_ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
|
||||
memset(cpi->mb.e_mbd.last_mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
|
||||
|
||||
set_default_lf_deltas(cpi);
|
||||
|
||||
@@ -508,7 +508,7 @@ static void disable_segmentation(VP8_COMP *cpi)
|
||||
static void set_segmentation_map(VP8_COMP *cpi, unsigned char *segmentation_map)
|
||||
{
|
||||
/* Copy in the new segmentation map */
|
||||
vpx_memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols));
|
||||
memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols));
|
||||
|
||||
/* Signal that the map should be updated. */
|
||||
cpi->mb.e_mbd.update_mb_segmentation_map = 1;
|
||||
@@ -530,7 +530,7 @@ static void set_segmentation_map(VP8_COMP *cpi, unsigned char *segmentation_map)
|
||||
static void set_segment_data(VP8_COMP *cpi, signed char *feature_data, unsigned char abs_delta)
|
||||
{
|
||||
cpi->mb.e_mbd.mb_segement_abs_delta = abs_delta;
|
||||
vpx_memcpy(cpi->segment_feature_data, feature_data, sizeof(cpi->segment_feature_data));
|
||||
memcpy(cpi->segment_feature_data, feature_data, sizeof(cpi->segment_feature_data));
|
||||
}
|
||||
|
||||
|
||||
@@ -602,7 +602,7 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment)
|
||||
|
||||
// Set every macroblock to be eligible for update.
|
||||
// For key frame this will reset seg map to 0.
|
||||
vpx_memset(cpi->segmentation_map, 0, mbs_in_frame);
|
||||
memset(cpi->segmentation_map, 0, mbs_in_frame);
|
||||
|
||||
if (cpi->common.frame_type != KEY_FRAME && block_count > 0)
|
||||
{
|
||||
@@ -686,8 +686,8 @@ static void set_default_lf_deltas(VP8_COMP *cpi)
|
||||
cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 1;
|
||||
cpi->mb.e_mbd.mode_ref_lf_delta_update = 1;
|
||||
|
||||
vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
|
||||
vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
|
||||
memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
|
||||
memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
|
||||
|
||||
/* Test of ref frame deltas */
|
||||
cpi->mb.e_mbd.ref_lf_deltas[INTRA_FRAME] = 2;
|
||||
@@ -1087,7 +1087,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
if (Speed >= 15)
|
||||
sf->half_pixel_search = 0;
|
||||
|
||||
vpx_memset(cpi->mb.error_bins, 0, sizeof(cpi->mb.error_bins));
|
||||
memset(cpi->mb.error_bins, 0, sizeof(cpi->mb.error_bins));
|
||||
|
||||
}; /* switch */
|
||||
|
||||
@@ -1298,7 +1298,7 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
|
||||
CHECK_MEM_ERROR(cpi->active_map,
|
||||
vpx_calloc(cm->mb_rows * cm->mb_cols,
|
||||
sizeof(*cpi->active_map)));
|
||||
vpx_memset(cpi->active_map , 1, (cm->mb_rows * cm->mb_cols));
|
||||
memset(cpi->active_map , 1, (cm->mb_rows * cm->mb_cols));
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (width < 640)
|
||||
@@ -1891,7 +1891,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
|
||||
cm = &cpi->common;
|
||||
|
||||
vpx_memset(cpi, 0, sizeof(VP8_COMP));
|
||||
memset(cpi, 0, sizeof(VP8_COMP));
|
||||
|
||||
if (setjmp(cm->error.jmp))
|
||||
{
|
||||
@@ -2867,7 +2867,7 @@ static void update_alt_ref_frame_stats(VP8_COMP *cpi)
|
||||
}
|
||||
|
||||
/* Update data structure that monitors level of reference to last GF */
|
||||
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
|
||||
memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
|
||||
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
|
||||
|
||||
/* this frame refreshes means next frames don't unless specified by user */
|
||||
@@ -2916,7 +2916,7 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
|
||||
}
|
||||
|
||||
/* Update data structure that monitors level of reference to last GF */
|
||||
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
|
||||
memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
|
||||
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
|
||||
|
||||
/* this frame refreshes means next frames don't unless specified by
|
||||
@@ -3830,9 +3830,9 @@ static void encode_frame_to_data_rate
|
||||
}
|
||||
|
||||
// Reset the zero_last counter to 0 on key frame.
|
||||
vpx_memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols);
|
||||
vpx_memset(cpi->consec_zero_last_mvbias, 0,
|
||||
(cpi->common.mb_rows * cpi->common.mb_cols));
|
||||
memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols);
|
||||
memset(cpi->consec_zero_last_mvbias, 0,
|
||||
(cpi->common.mb_rows * cpi->common.mb_cols));
|
||||
}
|
||||
|
||||
#if 0
|
||||
@@ -4362,9 +4362,9 @@ static void encode_frame_to_data_rate
|
||||
disable_segmentation(cpi);
|
||||
}
|
||||
// Reset the zero_last counter to 0 on key frame.
|
||||
vpx_memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols);
|
||||
vpx_memset(cpi->consec_zero_last_mvbias, 0,
|
||||
(cpi->common.mb_rows * cpi->common.mb_cols));
|
||||
memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols);
|
||||
memset(cpi->consec_zero_last_mvbias, 0,
|
||||
(cpi->common.mb_rows * cpi->common.mb_cols));
|
||||
vp8_set_quantizer(cpi, Q);
|
||||
}
|
||||
|
||||
@@ -4387,7 +4387,7 @@ static void encode_frame_to_data_rate
|
||||
if (cm->refresh_entropy_probs == 0)
|
||||
{
|
||||
/* save a copy for later refresh */
|
||||
vpx_memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc));
|
||||
memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc));
|
||||
}
|
||||
|
||||
vp8_update_coef_context(cpi);
|
||||
@@ -5613,19 +5613,19 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
|
||||
|
||||
if (cm->refresh_entropy_probs == 0)
|
||||
{
|
||||
vpx_memcpy(&cm->fc, &cm->lfc, sizeof(cm->fc));
|
||||
memcpy(&cm->fc, &cm->lfc, sizeof(cm->fc));
|
||||
}
|
||||
|
||||
/* Save the contexts separately for alt ref, gold and last. */
|
||||
/* (TODO jbb -> Optimize this with pointers to avoid extra copies. ) */
|
||||
if(cm->refresh_alt_ref_frame)
|
||||
vpx_memcpy(&cpi->lfc_a, &cm->fc, sizeof(cm->fc));
|
||||
memcpy(&cpi->lfc_a, &cm->fc, sizeof(cm->fc));
|
||||
|
||||
if(cm->refresh_golden_frame)
|
||||
vpx_memcpy(&cpi->lfc_g, &cm->fc, sizeof(cm->fc));
|
||||
memcpy(&cpi->lfc_g, &cm->fc, sizeof(cm->fc));
|
||||
|
||||
if(cm->refresh_last_frame)
|
||||
vpx_memcpy(&cpi->lfc_n, &cm->fc, sizeof(cm->fc));
|
||||
memcpy(&cpi->lfc_n, &cm->fc, sizeof(cm->fc));
|
||||
|
||||
/* if its a dropped frame honor the requests on subsequent frames */
|
||||
if (*size > 0)
|
||||
@@ -5934,7 +5934,7 @@ int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, uns
|
||||
{
|
||||
if (map)
|
||||
{
|
||||
vpx_memcpy(cpi->active_map, map, rows * cols);
|
||||
memcpy(cpi->active_map, map, rows * cols);
|
||||
cpi->active_map_enabled = 1;
|
||||
}
|
||||
else
|
||||
|
@@ -862,8 +862,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
|
||||
mode_mv = mode_mv_sb[sign_bias];
|
||||
best_ref_mv.as_int = 0;
|
||||
vpx_memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
|
||||
vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
|
||||
memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
|
||||
memset(&best_mbmode, 0, sizeof(best_mbmode));
|
||||
|
||||
/* Setup search priorities */
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
@@ -1348,8 +1348,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
*returndistortion = distortion2;
|
||||
best_rd_sse = sse;
|
||||
best_rd = this_rd;
|
||||
vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
|
||||
sizeof(MB_MODE_INFO));
|
||||
memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
|
||||
sizeof(MB_MODE_INFO));
|
||||
|
||||
/* Testing this mode gave rise to an improvement in best error
|
||||
* score. Lower threshold a bit for next time
|
||||
@@ -1487,8 +1487,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
|
||||
if (this_rd < best_rd)
|
||||
{
|
||||
vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
|
||||
sizeof(MB_MODE_INFO));
|
||||
memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
|
||||
sizeof(MB_MODE_INFO));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1512,8 +1512,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
/* set to the best mb mode, this copy can be skip if x->skip since it
|
||||
* already has the right content */
|
||||
if (!x->skip)
|
||||
vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode,
|
||||
sizeof(MB_MODE_INFO));
|
||||
memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode,
|
||||
sizeof(MB_MODE_INFO));
|
||||
|
||||
if (best_mbmode.mode <= B_PRED)
|
||||
{
|
||||
|
@@ -49,7 +49,7 @@ static void yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc,
|
||||
src_y = src_ybc->y_buffer + yoffset;
|
||||
dst_y = dst_ybc->y_buffer + yoffset;
|
||||
|
||||
vpx_memcpy(dst_y, src_y, ystride * linestocopy);
|
||||
memcpy(dst_y, src_y, ystride * linestocopy);
|
||||
}
|
||||
|
||||
static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
|
||||
@@ -142,7 +142,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
int min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
|
||||
int max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
|
||||
int filt_val;
|
||||
int best_filt_val = cm->filter_level;
|
||||
int best_filt_val;
|
||||
YV12_BUFFER_CONFIG * saved_frame = cm->frame_to_show;
|
||||
|
||||
/* Replace unfiltered frame buffer with a new one */
|
||||
@@ -274,8 +274,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
|
||||
int filter_step;
|
||||
int filt_high = 0;
|
||||
/* Start search at previous frame filter level */
|
||||
int filt_mid = cm->filter_level;
|
||||
int filt_mid;
|
||||
int filt_low = 0;
|
||||
int filt_best;
|
||||
int filt_direction = 0;
|
||||
@@ -287,7 +286,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
|
||||
YV12_BUFFER_CONFIG * saved_frame = cm->frame_to_show;
|
||||
|
||||
vpx_memset(ss_err, 0, sizeof(ss_err));
|
||||
memset(ss_err, 0, sizeof(ss_err));
|
||||
|
||||
/* Replace unfiltered frame buffer with a new one */
|
||||
cm->frame_to_show = &cpi->pick_lf_lvl_frame;
|
||||
|
@@ -1,160 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vp8/encoder/variance.h"
|
||||
#include "vp8/encoder/onyx_int.h"
|
||||
|
||||
SADFunction *vp8_sad16x16;
|
||||
SADFunction *vp8_sad16x8;
|
||||
SADFunction *vp8_sad8x16;
|
||||
SADFunction *vp8_sad8x8;
|
||||
SADFunction *vp8_sad4x4;
|
||||
|
||||
variance_function *vp8_variance4x4;
|
||||
variance_function *vp8_variance8x8;
|
||||
variance_function *vp8_variance8x16;
|
||||
variance_function *vp8_variance16x8;
|
||||
variance_function *vp8_variance16x16;
|
||||
|
||||
variance_function *vp8_mse16x16;
|
||||
|
||||
sub_pixel_variance_function *vp8_sub_pixel_variance4x4;
|
||||
sub_pixel_variance_function *vp8_sub_pixel_variance8x8;
|
||||
sub_pixel_variance_function *vp8_sub_pixel_variance8x16;
|
||||
sub_pixel_variance_function *vp8_sub_pixel_variance16x8;
|
||||
sub_pixel_variance_function *vp8_sub_pixel_variance16x16;
|
||||
|
||||
int (*vp8_block_error)(short *coeff, short *dqcoeff);
|
||||
int (*vp8_mbblock_error)(MACROBLOCK *mb, int dc);
|
||||
|
||||
int (*vp8_mbuverror)(MACROBLOCK *mb);
|
||||
unsigned int (*vp8_get_mb_ss)(short *);
|
||||
void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
|
||||
void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
|
||||
void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch);
|
||||
void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch);
|
||||
void (*short_walsh4x4)(short *input, short *output, int pitch);
|
||||
|
||||
void (*vp8_subtract_b)(BLOCK *be, BLOCKD *bd, int pitch);
|
||||
void (*vp8_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, int stride);
|
||||
void (*vp8_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
|
||||
void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
|
||||
|
||||
unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
|
||||
|
||||
// c imports
|
||||
extern int block_error_c(short *coeff, short *dqcoeff);
|
||||
extern int vp8_mbblock_error_c(MACROBLOCK *mb, int dc);
|
||||
|
||||
extern int vp8_mbuverror_c(MACROBLOCK *mb);
|
||||
extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
|
||||
extern void short_fdct4x4_c(short *input, short *output, int pitch);
|
||||
extern void short_fdct8x4_c(short *input, short *output, int pitch);
|
||||
extern void vp8_short_walsh4x4_c(short *input, short *output, int pitch);
|
||||
|
||||
extern void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch);
|
||||
extern void subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride);
|
||||
extern void subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
|
||||
extern void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d);
|
||||
|
||||
extern SADFunction sad16x16_c;
|
||||
extern SADFunction sad16x8_c;
|
||||
extern SADFunction sad8x16_c;
|
||||
extern SADFunction sad8x8_c;
|
||||
extern SADFunction sad4x4_c;
|
||||
|
||||
extern variance_function variance16x16_c;
|
||||
extern variance_function variance8x16_c;
|
||||
extern variance_function variance16x8_c;
|
||||
extern variance_function variance8x8_c;
|
||||
extern variance_function variance4x4_c;
|
||||
extern variance_function mse16x16_c;
|
||||
|
||||
extern sub_pixel_variance_function sub_pixel_variance4x4_c;
|
||||
extern sub_pixel_variance_function sub_pixel_variance8x8_c;
|
||||
extern sub_pixel_variance_function sub_pixel_variance8x16_c;
|
||||
extern sub_pixel_variance_function sub_pixel_variance16x8_c;
|
||||
extern sub_pixel_variance_function sub_pixel_variance16x16_c;
|
||||
|
||||
extern unsigned int vp8_get_mb_ss_c(short *);
|
||||
extern unsigned int vp8_get4x4sse_cs_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
|
||||
|
||||
// ppc
|
||||
extern int vp8_block_error_ppc(short *coeff, short *dqcoeff);
|
||||
|
||||
extern void vp8_short_fdct4x4_ppc(short *input, short *output, int pitch);
|
||||
extern void vp8_short_fdct8x4_ppc(short *input, short *output, int pitch);
|
||||
|
||||
extern void vp8_subtract_mby_ppc(short *diff, unsigned char *src, unsigned char *pred, int stride);
|
||||
extern void vp8_subtract_mbuv_ppc(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
|
||||
|
||||
extern SADFunction vp8_sad16x16_ppc;
|
||||
extern SADFunction vp8_sad16x8_ppc;
|
||||
extern SADFunction vp8_sad8x16_ppc;
|
||||
extern SADFunction vp8_sad8x8_ppc;
|
||||
extern SADFunction vp8_sad4x4_ppc;
|
||||
|
||||
extern variance_function vp8_variance16x16_ppc;
|
||||
extern variance_function vp8_variance8x16_ppc;
|
||||
extern variance_function vp8_variance16x8_ppc;
|
||||
extern variance_function vp8_variance8x8_ppc;
|
||||
extern variance_function vp8_variance4x4_ppc;
|
||||
extern variance_function vp8_mse16x16_ppc;
|
||||
|
||||
extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_ppc;
|
||||
extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_ppc;
|
||||
extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_ppc;
|
||||
extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_ppc;
|
||||
extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_ppc;
|
||||
|
||||
extern unsigned int vp8_get8x8var_ppc(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
|
||||
extern unsigned int vp8_get16x16var_ppc(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
|
||||
|
||||
void vp8_cmachine_specific_config(void)
|
||||
{
|
||||
// Pure C:
|
||||
vp8_mbuverror = vp8_mbuverror_c;
|
||||
vp8_fast_quantize_b = vp8_fast_quantize_b_c;
|
||||
vp8_short_fdct4x4 = vp8_short_fdct4x4_ppc;
|
||||
vp8_short_fdct8x4 = vp8_short_fdct8x4_ppc;
|
||||
vp8_fast_fdct4x4 = vp8_short_fdct4x4_ppc;
|
||||
vp8_fast_fdct8x4 = vp8_short_fdct8x4_ppc;
|
||||
short_walsh4x4 = vp8_short_walsh4x4_c;
|
||||
|
||||
vp8_variance4x4 = vp8_variance4x4_ppc;
|
||||
vp8_variance8x8 = vp8_variance8x8_ppc;
|
||||
vp8_variance8x16 = vp8_variance8x16_ppc;
|
||||
vp8_variance16x8 = vp8_variance16x8_ppc;
|
||||
vp8_variance16x16 = vp8_variance16x16_ppc;
|
||||
vp8_mse16x16 = vp8_mse16x16_ppc;
|
||||
|
||||
vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_ppc;
|
||||
vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_ppc;
|
||||
vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_ppc;
|
||||
vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_ppc;
|
||||
vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ppc;
|
||||
|
||||
vp8_get_mb_ss = vp8_get_mb_ss_c;
|
||||
vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
|
||||
|
||||
vp8_sad16x16 = vp8_sad16x16_ppc;
|
||||
vp8_sad16x8 = vp8_sad16x8_ppc;
|
||||
vp8_sad8x16 = vp8_sad8x16_ppc;
|
||||
vp8_sad8x8 = vp8_sad8x8_ppc;
|
||||
vp8_sad4x4 = vp8_sad4x4_ppc;
|
||||
|
||||
vp8_block_error = vp8_block_error_ppc;
|
||||
vp8_mbblock_error = vp8_mbblock_error_c;
|
||||
|
||||
vp8_subtract_b = vp8_subtract_b_c;
|
||||
vp8_subtract_mby = vp8_subtract_mby_ppc;
|
||||
vp8_subtract_mbuv = vp8_subtract_mbuv_ppc;
|
||||
}
|
@@ -1,153 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
.globl vp8_subtract_mbuv_ppc
|
||||
.globl vp8_subtract_mby_ppc
|
||||
|
||||
;# r3 short *diff
|
||||
;# r4 unsigned char *usrc
|
||||
;# r5 unsigned char *vsrc
|
||||
;# r6 unsigned char *pred
|
||||
;# r7 int stride
|
||||
vp8_subtract_mbuv_ppc:
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xf000
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
li r9, 256
|
||||
add r3, r3, r9
|
||||
add r3, r3, r9
|
||||
add r6, r6, r9
|
||||
|
||||
li r10, 16
|
||||
li r9, 4
|
||||
mtctr r9
|
||||
|
||||
vspltisw v0, 0
|
||||
|
||||
mbu_loop:
|
||||
lvsl v5, 0, r4 ;# permutate value for alignment
|
||||
lvx v1, 0, r4 ;# src
|
||||
lvx v2, 0, r6 ;# pred
|
||||
|
||||
add r4, r4, r7
|
||||
addi r6, r6, 16
|
||||
|
||||
vperm v1, v1, v0, v5
|
||||
|
||||
vmrghb v3, v0, v1 ;# unpack high src to short
|
||||
vmrghb v4, v0, v2 ;# unpack high pred to short
|
||||
|
||||
lvsl v5, 0, r4 ;# permutate value for alignment
|
||||
lvx v1, 0, r4 ;# src
|
||||
|
||||
add r4, r4, r7
|
||||
|
||||
vsubshs v3, v3, v4
|
||||
|
||||
stvx v3, 0, r3 ;# store out diff
|
||||
|
||||
vperm v1, v1, v0, v5
|
||||
|
||||
vmrghb v3, v0, v1 ;# unpack high src to short
|
||||
vmrglb v4, v0, v2 ;# unpack high pred to short
|
||||
|
||||
vsubshs v3, v3, v4
|
||||
|
||||
stvx v3, r10, r3 ;# store out diff
|
||||
|
||||
addi r3, r3, 32
|
||||
|
||||
bdnz mbu_loop
|
||||
|
||||
mtctr r9
|
||||
|
||||
mbv_loop:
|
||||
lvsl v5, 0, r5 ;# permutate value for alignment
|
||||
lvx v1, 0, r5 ;# src
|
||||
lvx v2, 0, r6 ;# pred
|
||||
|
||||
add r5, r5, r7
|
||||
addi r6, r6, 16
|
||||
|
||||
vperm v1, v1, v0, v5
|
||||
|
||||
vmrghb v3, v0, v1 ;# unpack high src to short
|
||||
vmrghb v4, v0, v2 ;# unpack high pred to short
|
||||
|
||||
lvsl v5, 0, r5 ;# permutate value for alignment
|
||||
lvx v1, 0, r5 ;# src
|
||||
|
||||
add r5, r5, r7
|
||||
|
||||
vsubshs v3, v3, v4
|
||||
|
||||
stvx v3, 0, r3 ;# store out diff
|
||||
|
||||
vperm v1, v1, v0, v5
|
||||
|
||||
vmrghb v3, v0, v1 ;# unpack high src to short
|
||||
vmrglb v4, v0, v2 ;# unpack high pred to short
|
||||
|
||||
vsubshs v3, v3, v4
|
||||
|
||||
stvx v3, r10, r3 ;# store out diff
|
||||
|
||||
addi r3, r3, 32
|
||||
|
||||
bdnz mbv_loop
|
||||
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
|
||||
blr
|
||||
|
||||
;# r3 short *diff
|
||||
;# r4 unsigned char *src
|
||||
;# r5 unsigned char *pred
|
||||
;# r6 int stride
|
||||
vp8_subtract_mby_ppc:
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xf800
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
li r10, 16
|
||||
mtctr r10
|
||||
|
||||
vspltisw v0, 0
|
||||
|
||||
mby_loop:
|
||||
lvx v1, 0, r4 ;# src
|
||||
lvx v2, 0, r5 ;# pred
|
||||
|
||||
add r4, r4, r6
|
||||
addi r5, r5, 16
|
||||
|
||||
vmrghb v3, v0, v1 ;# unpack high src to short
|
||||
vmrghb v4, v0, v2 ;# unpack high pred to short
|
||||
|
||||
vsubshs v3, v3, v4
|
||||
|
||||
stvx v3, 0, r3 ;# store out diff
|
||||
|
||||
vmrglb v3, v0, v1 ;# unpack low src to short
|
||||
vmrglb v4, v0, v2 ;# unpack low pred to short
|
||||
|
||||
vsubshs v3, v3, v4
|
||||
|
||||
stvx v3, r10, r3 ;# store out diff
|
||||
|
||||
addi r3, r3, 32
|
||||
|
||||
bdnz mby_loop
|
||||
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
|
||||
blr
|
@@ -1,205 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
.globl vp8_short_fdct4x4_ppc
|
||||
.globl vp8_short_fdct8x4_ppc
|
||||
|
||||
.macro load_c V, LABEL, OFF, R0, R1
|
||||
lis \R0, \LABEL@ha
|
||||
la \R1, \LABEL@l(\R0)
|
||||
lvx \V, \OFF, \R1
|
||||
.endm
|
||||
|
||||
;# Forward and inverse DCTs are nearly identical; only differences are
|
||||
;# in normalization (fwd is twice unitary, inv is half unitary)
|
||||
;# and that they are of course transposes of each other.
|
||||
;#
|
||||
;# The following three accomplish most of implementation and
|
||||
;# are used only by ppc_idct.c and ppc_fdct.c.
|
||||
.macro prologue
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xfffc
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
stwu r1,-32(r1) ;# create space on the stack
|
||||
|
||||
li r6, 16
|
||||
|
||||
load_c v0, dct_tab, 0, r9, r10
|
||||
lvx v1, r6, r10
|
||||
addi r10, r10, 32
|
||||
lvx v2, 0, r10
|
||||
lvx v3, r6, r10
|
||||
|
||||
load_c v4, ppc_dctperm_tab, 0, r9, r10
|
||||
load_c v5, ppc_dctperm_tab, r6, r9, r10
|
||||
|
||||
load_c v6, round_tab, 0, r10, r9
|
||||
.endm
|
||||
|
||||
.macro epilogue
|
||||
addi r1, r1, 32 ;# recover stack
|
||||
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
.endm
|
||||
|
||||
;# Do horiz xf on two rows of coeffs v8 = a0 a1 a2 a3 b0 b1 b2 b3.
|
||||
;# a/A are the even rows 0,2 b/B are the odd rows 1,3
|
||||
;# For fwd transform, indices are horizontal positions, then frequencies.
|
||||
;# For inverse transform, frequencies then positions.
|
||||
;# The two resulting A0..A3 B0..B3 are later combined
|
||||
;# and vertically transformed.
|
||||
|
||||
.macro two_rows_horiz Dst
|
||||
vperm v9, v8, v8, v4 ;# v9 = a2 a3 a0 a1 b2 b3 b0 b1
|
||||
|
||||
vmsumshm v10, v0, v8, v6
|
||||
vmsumshm v10, v1, v9, v10
|
||||
vsraw v10, v10, v7 ;# v10 = A0 A1 B0 B1
|
||||
|
||||
vmsumshm v11, v2, v8, v6
|
||||
vmsumshm v11, v3, v9, v11
|
||||
vsraw v11, v11, v7 ;# v11 = A2 A3 B2 B3
|
||||
|
||||
vpkuwum v10, v10, v11 ;# v10 = A0 A1 B0 B1 A2 A3 B2 B3
|
||||
vperm \Dst, v10, v10, v5 ;# Dest = A0 B0 A1 B1 A2 B2 A3 B3
|
||||
.endm
|
||||
|
||||
;# Vertical xf on two rows. DCT values in comments are for inverse transform;
|
||||
;# forward transform uses transpose.
|
||||
|
||||
.macro two_rows_vert Ceven, Codd
|
||||
vspltw v8, \Ceven, 0 ;# v8 = c00 c10 or c02 c12 four times
|
||||
vspltw v9, \Codd, 0 ;# v9 = c20 c30 or c22 c32 ""
|
||||
vmsumshm v8, v8, v12, v6
|
||||
vmsumshm v8, v9, v13, v8
|
||||
vsraw v10, v8, v7
|
||||
|
||||
vspltw v8, \Codd, 1 ;# v8 = c01 c11 or c03 c13
|
||||
vspltw v9, \Ceven, 1 ;# v9 = c21 c31 or c23 c33
|
||||
vmsumshm v8, v8, v12, v6
|
||||
vmsumshm v8, v9, v13, v8
|
||||
vsraw v8, v8, v7
|
||||
|
||||
vpkuwum v8, v10, v8 ;# v8 = rows 0,1 or 2,3
|
||||
.endm
|
||||
|
||||
.macro two_rows_h Dest
|
||||
stw r0, 0(r8)
|
||||
lwz r0, 4(r3)
|
||||
stw r0, 4(r8)
|
||||
lwzux r0, r3,r5
|
||||
stw r0, 8(r8)
|
||||
lwz r0, 4(r3)
|
||||
stw r0, 12(r8)
|
||||
lvx v8, 0,r8
|
||||
two_rows_horiz \Dest
|
||||
.endm
|
||||
|
||||
.align 2
|
||||
;# r3 short *input
|
||||
;# r4 short *output
|
||||
;# r5 int pitch
|
||||
vp8_short_fdct4x4_ppc:
|
||||
|
||||
prologue
|
||||
|
||||
vspltisw v7, 14 ;# == 14, fits in 5 signed bits
|
||||
addi r8, r1, 0
|
||||
|
||||
|
||||
lwz r0, 0(r3)
|
||||
two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13
|
||||
|
||||
lwzux r0, r3, r5
|
||||
two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33
|
||||
|
||||
lvx v6, r6, r9 ;# v6 = Vround
|
||||
vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter
|
||||
|
||||
two_rows_vert v0, v1
|
||||
stvx v8, 0, r4
|
||||
two_rows_vert v2, v3
|
||||
stvx v8, r6, r4
|
||||
|
||||
epilogue
|
||||
|
||||
blr
|
||||
|
||||
.align 2
|
||||
;# r3 short *input
|
||||
;# r4 short *output
|
||||
;# r5 int pitch
|
||||
vp8_short_fdct8x4_ppc:
|
||||
prologue
|
||||
|
||||
vspltisw v7, 14 ;# == 14, fits in 5 signed bits
|
||||
addi r8, r1, 0
|
||||
addi r10, r3, 0
|
||||
|
||||
lwz r0, 0(r3)
|
||||
two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13
|
||||
|
||||
lwzux r0, r3, r5
|
||||
two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33
|
||||
|
||||
lvx v6, r6, r9 ;# v6 = Vround
|
||||
vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter
|
||||
|
||||
two_rows_vert v0, v1
|
||||
stvx v8, 0, r4
|
||||
two_rows_vert v2, v3
|
||||
stvx v8, r6, r4
|
||||
|
||||
;# Next block
|
||||
addi r3, r10, 8
|
||||
addi r4, r4, 32
|
||||
lvx v6, 0, r9 ;# v6 = Hround
|
||||
|
||||
vspltisw v7, 14 ;# == 14, fits in 5 signed bits
|
||||
addi r8, r1, 0
|
||||
|
||||
lwz r0, 0(r3)
|
||||
two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13
|
||||
|
||||
lwzux r0, r3, r5
|
||||
two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33
|
||||
|
||||
lvx v6, r6, r9 ;# v6 = Vround
|
||||
vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter
|
||||
|
||||
two_rows_vert v0, v1
|
||||
stvx v8, 0, r4
|
||||
two_rows_vert v2, v3
|
||||
stvx v8, r6, r4
|
||||
|
||||
epilogue
|
||||
|
||||
blr
|
||||
|
||||
.data
|
||||
.align 4
|
||||
ppc_dctperm_tab:
|
||||
.byte 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11
|
||||
.byte 0,1,4,5, 2,3,6,7, 8,9,12,13, 10,11,14,15
|
||||
|
||||
.align 4
|
||||
dct_tab:
|
||||
.short 23170, 23170,-12540,-30274, 23170, 23170,-12540,-30274
|
||||
.short 23170, 23170, 30274, 12540, 23170, 23170, 30274, 12540
|
||||
|
||||
.short 23170,-23170, 30274,-12540, 23170,-23170, 30274,-12540
|
||||
.short -23170, 23170, 12540,-30274,-23170, 23170, 12540,-30274
|
||||
|
||||
.align 4
|
||||
round_tab:
|
||||
.long (1 << (14-1)), (1 << (14-1)), (1 << (14-1)), (1 << (14-1))
|
||||
.long (1 << (16-1)), (1 << (16-1)), (1 << (16-1)), (1 << (16-1))
|
@@ -1,51 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
.globl vp8_block_error_ppc
|
||||
|
||||
.align 2
|
||||
;# r3 short *Coeff
|
||||
;# r4 short *dqcoeff
|
||||
vp8_block_error_ppc:
|
||||
mfspr r11, 256 ;# get old VRSAVE
|
||||
oris r12, r11, 0xf800
|
||||
mtspr 256, r12 ;# set VRSAVE
|
||||
|
||||
stwu r1,-32(r1) ;# create space on the stack
|
||||
|
||||
stw r5, 12(r1) ;# tranfer dc to vector register
|
||||
|
||||
lvx v0, 0, r3 ;# Coeff
|
||||
lvx v1, 0, r4 ;# dqcoeff
|
||||
|
||||
li r10, 16
|
||||
|
||||
vspltisw v3, 0
|
||||
|
||||
vsubshs v0, v0, v1
|
||||
|
||||
vmsumshm v2, v0, v0, v3 ;# multiply differences
|
||||
|
||||
lvx v0, r10, r3 ;# Coeff
|
||||
lvx v1, r10, r4 ;# dqcoeff
|
||||
|
||||
vsubshs v0, v0, v1
|
||||
|
||||
vmsumshm v1, v0, v0, v2 ;# multiply differences
|
||||
vsumsws v1, v1, v3 ;# sum up
|
||||
|
||||
stvx v1, 0, r1
|
||||
lwz r3, 12(r1) ;# return value
|
||||
|
||||
addi r1, r1, 32 ;# recover stack
|
||||
mtspr 256, r11 ;# reset old VRSAVE
|
||||
|
||||
blr
|
@@ -65,8 +65,8 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
short *dequant_ptr = d->dequant;
|
||||
short zbin_oq_value = b->zbin_extra;
|
||||
|
||||
vpx_memset(qcoeff_ptr, 0, 32);
|
||||
vpx_memset(dqcoeff_ptr, 0, 32);
|
||||
memset(qcoeff_ptr, 0, 32);
|
||||
memset(dqcoeff_ptr, 0, 32);
|
||||
|
||||
eob = -1;
|
||||
|
||||
|
@@ -296,7 +296,7 @@ void vp8_setup_key_frame(VP8_COMP *cpi)
|
||||
|
||||
vp8_default_coef_probs(& cpi->common);
|
||||
|
||||
vpx_memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
|
||||
memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
|
||||
{
|
||||
int flag[2] = {1, 1};
|
||||
vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag);
|
||||
@@ -305,9 +305,9 @@ void vp8_setup_key_frame(VP8_COMP *cpi)
|
||||
/* Make sure we initialize separate contexts for altref,gold, and normal.
|
||||
* TODO shouldn't need 3 different copies of structure to do this!
|
||||
*/
|
||||
vpx_memcpy(&cpi->lfc_a, &cpi->common.fc, sizeof(cpi->common.fc));
|
||||
vpx_memcpy(&cpi->lfc_g, &cpi->common.fc, sizeof(cpi->common.fc));
|
||||
vpx_memcpy(&cpi->lfc_n, &cpi->common.fc, sizeof(cpi->common.fc));
|
||||
memcpy(&cpi->lfc_a, &cpi->common.fc, sizeof(cpi->common.fc));
|
||||
memcpy(&cpi->lfc_g, &cpi->common.fc, sizeof(cpi->common.fc));
|
||||
memcpy(&cpi->lfc_n, &cpi->common.fc, sizeof(cpi->common.fc));
|
||||
|
||||
cpi->common.filter_level = cpi->common.base_qindex * 3 / 8 ;
|
||||
|
||||
|
@@ -555,8 +555,8 @@ static int vp8_rdcost_mby(MACROBLOCK *mb)
|
||||
ENTROPY_CONTEXT *ta;
|
||||
ENTROPY_CONTEXT *tl;
|
||||
|
||||
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta = (ENTROPY_CONTEXT *)&t_above;
|
||||
tl = (ENTROPY_CONTEXT *)&t_left;
|
||||
@@ -691,7 +691,7 @@ static int rd_pick_intra4x4block(
|
||||
*a = tempa;
|
||||
*l = templ;
|
||||
copy_predictor(best_predictor, b->predictor);
|
||||
vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
|
||||
memcpy(best_dqcoeff, b->dqcoeff, 32);
|
||||
}
|
||||
}
|
||||
b->bmi.as_mode = *best_mode;
|
||||
@@ -715,8 +715,8 @@ static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate,
|
||||
ENTROPY_CONTEXT *tl;
|
||||
const int *bmode_costs;
|
||||
|
||||
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta = (ENTROPY_CONTEXT *)&t_above;
|
||||
tl = (ENTROPY_CONTEXT *)&t_left;
|
||||
@@ -820,8 +820,8 @@ static int rd_cost_mbuv(MACROBLOCK *mb)
|
||||
ENTROPY_CONTEXT *ta;
|
||||
ENTROPY_CONTEXT *tl;
|
||||
|
||||
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta = (ENTROPY_CONTEXT *)&t_above;
|
||||
tl = (ENTROPY_CONTEXT *)&t_left;
|
||||
@@ -1128,8 +1128,8 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
ENTROPY_CONTEXT *ta_b;
|
||||
ENTROPY_CONTEXT *tl_b;
|
||||
|
||||
vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta = (ENTROPY_CONTEXT *)&t_above;
|
||||
tl = (ENTROPY_CONTEXT *)&t_left;
|
||||
@@ -1172,8 +1172,8 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
ENTROPY_CONTEXT *ta_s;
|
||||
ENTROPY_CONTEXT *tl_s;
|
||||
|
||||
vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta_s = (ENTROPY_CONTEXT *)&t_above_s;
|
||||
tl_s = (ENTROPY_CONTEXT *)&t_left_s;
|
||||
@@ -1329,14 +1329,14 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
mode_selected = this_mode;
|
||||
best_label_rd = this_rd;
|
||||
|
||||
vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
}
|
||||
} /*for each 4x4 mode*/
|
||||
|
||||
vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
|
||||
bsi->ref_mv, x->mvcost);
|
||||
@@ -1392,7 +1392,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
int i;
|
||||
BEST_SEG_INFO bsi;
|
||||
|
||||
vpx_memset(&bsi, 0, sizeof(bsi));
|
||||
memset(&bsi, 0, sizeof(bsi));
|
||||
|
||||
bsi.segment_rd = best_rd;
|
||||
bsi.ref_mv = best_ref_mv;
|
||||
@@ -1661,7 +1661,6 @@ void vp8_mv_pred
|
||||
mv.as_mv.row = mvx[vcnt/2];
|
||||
mv.as_mv.col = mvy[vcnt/2];
|
||||
|
||||
find = 1;
|
||||
/* sr is set to 0 to allow calling function to decide the search
|
||||
* range.
|
||||
*/
|
||||
@@ -1926,8 +1925,8 @@ static void update_best_mode(BEST_MODE* best_mode, int this_rd,
|
||||
(rd->distortion2-rd->distortion_uv));
|
||||
|
||||
best_mode->rd = this_rd;
|
||||
vpx_memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
|
||||
vpx_memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO));
|
||||
memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
|
||||
memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO));
|
||||
|
||||
if ((this_mode == B_PRED) || (this_mode == SPLITMV))
|
||||
{
|
||||
@@ -1989,9 +1988,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
best_mode.rd = INT_MAX;
|
||||
best_mode.yrd = INT_MAX;
|
||||
best_mode.intra_rd = INT_MAX;
|
||||
vpx_memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
|
||||
vpx_memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
|
||||
vpx_memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
|
||||
memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
|
||||
memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
|
||||
memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
|
||||
|
||||
/* Setup search priorities */
|
||||
get_reference_search_order(cpi, ref_frame_map);
|
||||
@@ -2293,7 +2292,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
|
||||
|
||||
/* Further step/diamond searches as necessary */
|
||||
n = 0;
|
||||
further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
|
||||
|
||||
n = num00;
|
||||
@@ -2560,8 +2558,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
intra_rd_penalty, cpi, x);
|
||||
if (this_rd < best_mode.rd || x->skip)
|
||||
{
|
||||
/* Note index of best mode so far */
|
||||
best_mode_index = mode_index;
|
||||
*returnrate = rd.rate2;
|
||||
*returndistortion = rd.distortion2;
|
||||
update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
|
||||
@@ -2586,7 +2582,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
|
||||
|
||||
/* macroblock modes */
|
||||
vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO));
|
||||
memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO));
|
||||
|
||||
if (best_mode.mbmode.mode == B_PRED)
|
||||
{
|
||||
@@ -2599,7 +2595,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
for (i = 0; i < 16; i++)
|
||||
xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
|
||||
|
||||
vpx_memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO));
|
||||
memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO));
|
||||
|
||||
x->e_mbd.mode_info_context->mbmi.mv.as_int =
|
||||
x->partition_info->bmi[15].mv.as_int;
|
||||
|
@@ -23,7 +23,7 @@ void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x)
|
||||
if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame))
|
||||
{
|
||||
/* Reset Gf useage monitors */
|
||||
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
|
||||
memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
|
||||
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
|
||||
}
|
||||
else
|
||||
|
@@ -274,8 +274,8 @@ static void vp8_temporal_filter_iterate_c
|
||||
int i, j, k;
|
||||
int stride;
|
||||
|
||||
vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
|
||||
vpx_memset(count, 0, 384*sizeof(unsigned short));
|
||||
memset(accumulator, 0, 384*sizeof(unsigned int));
|
||||
memset(count, 0, 384*sizeof(unsigned short));
|
||||
|
||||
#if ALT_REF_MC_ENABLED
|
||||
cpi->mb.mv_col_min = -((mb_col * 16) + (16 - 5));
|
||||
@@ -502,7 +502,7 @@ void vp8_temporal_filter_prepare_c
|
||||
start_frame = distance + frames_to_blur_forward;
|
||||
|
||||
/* Setup frame pointers, NULL indicates frame not included in filter */
|
||||
vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
|
||||
memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
|
||||
for (frame = 0; frame < frames_to_blur; frame++)
|
||||
{
|
||||
int which_buffer = start_frame - frame;
|
||||
|
@@ -421,7 +421,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
|
||||
|
||||
void init_context_counters(void)
|
||||
{
|
||||
vpx_memset(context_counters, 0, sizeof(context_counters));
|
||||
memset(context_counters, 0, sizeof(context_counters));
|
||||
}
|
||||
|
||||
void print_context_counters()
|
||||
@@ -596,13 +596,13 @@ void vp8_fix_contexts(MACROBLOCKD *x)
|
||||
/* Clear entropy contexts for Y2 blocks */
|
||||
if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
}
|
||||
else
|
||||
{
|
||||
vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
|
||||
vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
|
||||
memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
|
||||
memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -35,7 +35,7 @@
|
||||
void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
char eob = 0;
|
||||
short *zbin_boost_ptr = b->zrun_zbin_boost;
|
||||
short *zbin_boost_ptr;
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
DECLARE_ALIGNED_ARRAY(16, short, x, 16);
|
||||
DECLARE_ALIGNED_ARRAY(16, short, y, 16);
|
||||
@@ -55,7 +55,7 @@ void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d)
|
||||
__m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
|
||||
__m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
|
||||
|
||||
vpx_memset(qcoeff_ptr, 0, 32);
|
||||
memset(qcoeff_ptr, 0, 32);
|
||||
|
||||
/* Duplicate to all lanes. */
|
||||
zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0);
|
||||
|
@@ -10,7 +10,8 @@
|
||||
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "./vpx_scale_rtcd.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
#include "vpx_version.h"
|
||||
@@ -365,9 +366,9 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
|
||||
if (oxcf->number_of_layers > 1)
|
||||
{
|
||||
memcpy (oxcf->target_bitrate, cfg.ts_target_bitrate,
|
||||
sizeof(cfg.ts_target_bitrate));
|
||||
sizeof(cfg.ts_target_bitrate));
|
||||
memcpy (oxcf->rate_decimator, cfg.ts_rate_decimator,
|
||||
sizeof(cfg.ts_rate_decimator));
|
||||
sizeof(cfg.ts_rate_decimator));
|
||||
memcpy (oxcf->layer_id, cfg.ts_layer_id, sizeof(cfg.ts_layer_id));
|
||||
}
|
||||
|
||||
@@ -649,6 +650,7 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx,
|
||||
|
||||
|
||||
vp8_rtcd();
|
||||
vpx_scale_rtcd();
|
||||
|
||||
if (!ctx->priv)
|
||||
{
|
||||
|
@@ -11,7 +11,8 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "./vpx_scale_rtcd.h"
|
||||
#include "vpx/vpx_decoder.h"
|
||||
#include "vpx/vp8dx.h"
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
@@ -106,6 +107,7 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
|
||||
(void) data;
|
||||
|
||||
vp8_rtcd();
|
||||
vpx_scale_rtcd();
|
||||
|
||||
/* This function only allocates space for the vpx_codec_alg_priv_t
|
||||
* structure. More memory may be required at the time the stream
|
||||
@@ -286,8 +288,8 @@ update_fragments(vpx_codec_alg_priv_t *ctx,
|
||||
if (ctx->fragments.count == 0)
|
||||
{
|
||||
/* New frame, reset fragment pointers and sizes */
|
||||
vpx_memset((void*)ctx->fragments.ptrs, 0, sizeof(ctx->fragments.ptrs));
|
||||
vpx_memset(ctx->fragments.sizes, 0, sizeof(ctx->fragments.sizes));
|
||||
memset((void*)ctx->fragments.ptrs, 0, sizeof(ctx->fragments.ptrs));
|
||||
memset(ctx->fragments.sizes, 0, sizeof(ctx->fragments.sizes));
|
||||
}
|
||||
if (ctx->fragments.enabled && !(data == NULL && data_sz == 0))
|
||||
{
|
||||
|
1045
vp9/common/mips/msa/vp9_convolve8_horiz_msa.c
Normal file
1045
vp9/common/mips/msa/vp9_convolve8_horiz_msa.c
Normal file
File diff suppressed because it is too large
Load Diff
880
vp9/common/mips/msa/vp9_convolve8_msa.c
Normal file
880
vp9/common/mips/msa/vp9_convolve8_msa.c
Normal file
@@ -0,0 +1,880 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/mips/msa/vp9_convolve_msa.h"
|
||||
|
||||
const uint8_t mc_filt_mask_arr[16 * 3] = {
|
||||
/* 8 width cases */
|
||||
0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
|
||||
/* 4 width cases */
|
||||
0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20,
|
||||
/* 4 width cases */
|
||||
8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28
|
||||
};
|
||||
|
||||
static void common_hv_8ht_8vt_4w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter_horiz, int8_t *filter_vert,
|
||||
int32_t height) {
|
||||
uint32_t loop_cnt;
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
|
||||
v16i8 filt_horiz0, filt_horiz1, filt_horiz2, filt_horiz3;
|
||||
v16u8 mask0, mask1, mask2, mask3;
|
||||
v8i16 filt_horiz;
|
||||
v8i16 horiz_out0, horiz_out1, horiz_out2, horiz_out3, horiz_out4;
|
||||
v8i16 horiz_out5, horiz_out6, horiz_out7, horiz_out8, horiz_out9;
|
||||
v8i16 tmp0, tmp1, out0, out1, out2, out3, out4;
|
||||
v8i16 filt, filt_vert0, filt_vert1, filt_vert2, filt_vert3;
|
||||
|
||||
mask0 = LOAD_UB(&mc_filt_mask_arr[16]);
|
||||
|
||||
src -= (3 + 3 * src_stride);
|
||||
|
||||
/* rearranging filter */
|
||||
filt_horiz = LOAD_SH(filter_horiz);
|
||||
filt_horiz0 = (v16i8)__msa_splati_h(filt_horiz, 0);
|
||||
filt_horiz1 = (v16i8)__msa_splati_h(filt_horiz, 1);
|
||||
filt_horiz2 = (v16i8)__msa_splati_h(filt_horiz, 2);
|
||||
filt_horiz3 = (v16i8)__msa_splati_h(filt_horiz, 3);
|
||||
|
||||
mask1 = mask0 + 2;
|
||||
mask2 = mask0 + 4;
|
||||
mask3 = mask0 + 6;
|
||||
|
||||
LOAD_7VECS_SB(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
|
||||
src += (7 * src_stride);
|
||||
|
||||
XORI_B_7VECS_SB(src0, src1, src2, src3, src4, src5, src6,
|
||||
src0, src1, src2, src3, src4, src5, src6, 128);
|
||||
|
||||
horiz_out0 = HORIZ_8TAP_FILT_2VECS(src0, src1, mask0, mask1, mask2, mask3,
|
||||
filt_horiz0, filt_horiz1, filt_horiz2,
|
||||
filt_horiz3);
|
||||
horiz_out2 = HORIZ_8TAP_FILT_2VECS(src2, src3, mask0, mask1, mask2, mask3,
|
||||
filt_horiz0, filt_horiz1, filt_horiz2,
|
||||
filt_horiz3);
|
||||
horiz_out4 = HORIZ_8TAP_FILT_2VECS(src4, src5, mask0, mask1, mask2, mask3,
|
||||
filt_horiz0, filt_horiz1, filt_horiz2,
|
||||
filt_horiz3);
|
||||
horiz_out5 = HORIZ_8TAP_FILT_2VECS(src5, src6, mask0, mask1, mask2, mask3,
|
||||
filt_horiz0, filt_horiz1, filt_horiz2,
|
||||
filt_horiz3);
|
||||
horiz_out1 = (v8i16)__msa_sldi_b((v16i8)horiz_out2, (v16i8)horiz_out0, 8);
|
||||
horiz_out3 = (v8i16)__msa_sldi_b((v16i8)horiz_out4, (v16i8)horiz_out2, 8);
|
||||
|
||||
filt = LOAD_SH(filter_vert);
|
||||
filt_vert0 = __msa_splati_h(filt, 0);
|
||||
filt_vert1 = __msa_splati_h(filt, 1);
|
||||
filt_vert2 = __msa_splati_h(filt, 2);
|
||||
filt_vert3 = __msa_splati_h(filt, 3);
|
||||
|
||||
out0 = (v8i16)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
|
||||
out1 = (v8i16)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
|
||||
out2 = (v8i16)__msa_ilvev_b((v16i8)horiz_out5, (v16i8)horiz_out4);
|
||||
|
||||
for (loop_cnt = (height >> 2); loop_cnt--;) {
|
||||
LOAD_4VECS_SB(src, src_stride, src7, src8, src9, src10);
|
||||
src += (4 * src_stride);
|
||||
|
||||
XORI_B_4VECS_SB(src7, src8, src9, src10, src7, src8, src9, src10, 128);
|
||||
|
||||
horiz_out7 = HORIZ_8TAP_FILT_2VECS(src7, src8, mask0, mask1, mask2, mask3,
|
||||
filt_horiz0, filt_horiz1, filt_horiz2,
|
||||
filt_horiz3);
|
||||
horiz_out6 = (v8i16)__msa_sldi_b((v16i8)horiz_out7, (v16i8)horiz_out5, 8);
|
||||
|
||||
out3 = (v8i16)__msa_ilvev_b((v16i8)horiz_out7, (v16i8)horiz_out6);
|
||||
|
||||
tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vert0, filt_vert1,
|
||||
filt_vert2, filt_vert3);
|
||||
|
||||
horiz_out9 = HORIZ_8TAP_FILT_2VECS(src9, src10, mask0, mask1, mask2, mask3,
|
||||
filt_horiz0, filt_horiz1, filt_horiz2,
|
||||
filt_horiz3);
|
||||
horiz_out8 = (v8i16)__msa_sldi_b((v16i8)horiz_out9, (v16i8)horiz_out7, 8);
|
||||
|
||||
out4 = (v8i16)__msa_ilvev_b((v16i8)horiz_out9, (v16i8)horiz_out8);
|
||||
|
||||
tmp1 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out4, filt_vert0, filt_vert1,
|
||||
filt_vert2, filt_vert3);
|
||||
tmp0 = SRARI_SATURATE_SIGNED_H(tmp0, FILTER_BITS, 7);
|
||||
tmp1 = SRARI_SATURATE_SIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_2B_XORI128_STORE_4_BYTES_4(tmp0, tmp1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
horiz_out5 = horiz_out9;
|
||||
|
||||
out0 = out2;
|
||||
out1 = out3;
|
||||
out2 = out4;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_8ht_8vt_8w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter_horiz, int8_t *filter_vert,
|
||||
int32_t height) {
|
||||
uint32_t loop_cnt;
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
|
||||
v16i8 filt_horiz0, filt_horiz1, filt_horiz2, filt_horiz3;
|
||||
v8i16 filt_horiz, filt, filt_vert0, filt_vert1, filt_vert2, filt_vert3;
|
||||
v16u8 mask0, mask1, mask2, mask3;
|
||||
v8i16 horiz_out0, horiz_out1, horiz_out2, horiz_out3;
|
||||
v8i16 horiz_out4, horiz_out5, horiz_out6, horiz_out7;
|
||||
v8i16 horiz_out8, horiz_out9, horiz_out10;
|
||||
v8i16 out0, out1, out2, out3, out4, out5, out6, out7, out8, out9;
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3;
|
||||
|
||||
mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
|
||||
|
||||
src -= (3 + 3 * src_stride);
|
||||
|
||||
/* rearranging filter */
|
||||
filt_horiz = LOAD_SH(filter_horiz);
|
||||
filt_horiz0 = (v16i8)__msa_splati_h(filt_horiz, 0);
|
||||
filt_horiz1 = (v16i8)__msa_splati_h(filt_horiz, 1);
|
||||
filt_horiz2 = (v16i8)__msa_splati_h(filt_horiz, 2);
|
||||
filt_horiz3 = (v16i8)__msa_splati_h(filt_horiz, 3);
|
||||
|
||||
mask1 = mask0 + 2;
|
||||
mask2 = mask0 + 4;
|
||||
mask3 = mask0 + 6;
|
||||
|
||||
LOAD_7VECS_SB(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
|
||||
src += (7 * src_stride);
|
||||
|
||||
XORI_B_7VECS_SB(src0, src1, src2, src3, src4, src5, src6,
|
||||
src0, src1, src2, src3, src4, src5, src6, 128);
|
||||
|
||||
horiz_out0 = HORIZ_8TAP_FILT(src0, mask0, mask1, mask2, mask3, filt_horiz0,
|
||||
filt_horiz1, filt_horiz2, filt_horiz3);
|
||||
horiz_out1 = HORIZ_8TAP_FILT(src1, mask0, mask1, mask2, mask3, filt_horiz0,
|
||||
filt_horiz1, filt_horiz2, filt_horiz3);
|
||||
horiz_out2 = HORIZ_8TAP_FILT(src2, mask0, mask1, mask2, mask3, filt_horiz0,
|
||||
filt_horiz1, filt_horiz2, filt_horiz3);
|
||||
horiz_out3 = HORIZ_8TAP_FILT(src3, mask0, mask1, mask2, mask3, filt_horiz0,
|
||||
filt_horiz1, filt_horiz2, filt_horiz3);
|
||||
horiz_out4 = HORIZ_8TAP_FILT(src4, mask0, mask1, mask2, mask3, filt_horiz0,
|
||||
filt_horiz1, filt_horiz2, filt_horiz3);
|
||||
horiz_out5 = HORIZ_8TAP_FILT(src5, mask0, mask1, mask2, mask3, filt_horiz0,
|
||||
filt_horiz1, filt_horiz2, filt_horiz3);
|
||||
horiz_out6 = HORIZ_8TAP_FILT(src6, mask0, mask1, mask2, mask3, filt_horiz0,
|
||||
filt_horiz1, filt_horiz2, filt_horiz3);
|
||||
|
||||
filt = LOAD_SH(filter_vert);
|
||||
filt_vert0 = __msa_splati_h(filt, 0);
|
||||
filt_vert1 = __msa_splati_h(filt, 1);
|
||||
filt_vert2 = __msa_splati_h(filt, 2);
|
||||
filt_vert3 = __msa_splati_h(filt, 3);
|
||||
|
||||
out0 = (v8i16)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
|
||||
out1 = (v8i16)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
|
||||
out2 = (v8i16)__msa_ilvev_b((v16i8)horiz_out5, (v16i8)horiz_out4);
|
||||
out4 = (v8i16)__msa_ilvev_b((v16i8)horiz_out2, (v16i8)horiz_out1);
|
||||
out5 = (v8i16)__msa_ilvev_b((v16i8)horiz_out4, (v16i8)horiz_out3);
|
||||
out6 = (v8i16)__msa_ilvev_b((v16i8)horiz_out6, (v16i8)horiz_out5);
|
||||
|
||||
for (loop_cnt = (height >> 2); loop_cnt--;) {
|
||||
LOAD_4VECS_SB(src, src_stride, src7, src8, src9, src10);
|
||||
src += (4 * src_stride);
|
||||
|
||||
XORI_B_4VECS_SB(src7, src8, src9, src10, src7, src8, src9, src10, 128);
|
||||
|
||||
horiz_out7 = HORIZ_8TAP_FILT(src7, mask0, mask1, mask2, mask3, filt_horiz0,
|
||||
filt_horiz1, filt_horiz2, filt_horiz3);
|
||||
|
||||
out3 = (v8i16)__msa_ilvev_b((v16i8)horiz_out7, (v16i8)horiz_out6);
|
||||
tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vert0, filt_vert1,
|
||||
filt_vert2, filt_vert3);
|
||||
tmp0 = SRARI_SATURATE_SIGNED_H(tmp0, FILTER_BITS, 7);
|
||||
|
||||
horiz_out8 = HORIZ_8TAP_FILT(src8, mask0, mask1, mask2, mask3, filt_horiz0,
|
||||
filt_horiz1, filt_horiz2, filt_horiz3);
|
||||
|
||||
out7 = (v8i16)__msa_ilvev_b((v16i8)horiz_out8, (v16i8)horiz_out7);
|
||||
tmp1 = FILT_8TAP_DPADD_S_H(out4, out5, out6, out7, filt_vert0, filt_vert1,
|
||||
filt_vert2, filt_vert3);
|
||||
tmp1 = SRARI_SATURATE_SIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
|
||||
horiz_out9 = HORIZ_8TAP_FILT(src9, mask0, mask1, mask2, mask3, filt_horiz0,
|
||||
filt_horiz1, filt_horiz2, filt_horiz3);
|
||||
|
||||
out8 = (v8i16)__msa_ilvev_b((v16i8)horiz_out9, (v16i8)horiz_out8);
|
||||
tmp2 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out8, filt_vert0, filt_vert1,
|
||||
filt_vert2, filt_vert3);
|
||||
tmp2 = SRARI_SATURATE_SIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
|
||||
horiz_out10 = HORIZ_8TAP_FILT(src10, mask0, mask1, mask2, mask3,
|
||||
filt_horiz0, filt_horiz1, filt_horiz2,
|
||||
filt_horiz3);
|
||||
|
||||
out9 = (v8i16)__msa_ilvev_b((v16i8)horiz_out10, (v16i8)horiz_out9);
|
||||
tmp3 = FILT_8TAP_DPADD_S_H(out5, out6, out7, out9, filt_vert0, filt_vert1,
|
||||
filt_vert2, filt_vert3);
|
||||
tmp3 = SRARI_SATURATE_SIGNED_H(tmp3, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_4_XORI128_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
horiz_out6 = horiz_out10;
|
||||
|
||||
out0 = out2;
|
||||
out1 = out3;
|
||||
out2 = out8;
|
||||
out4 = out6;
|
||||
out5 = out7;
|
||||
out6 = out9;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_8ht_8vt_16w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter_horiz, int8_t *filter_vert,
|
||||
int32_t height) {
|
||||
int32_t multiple8_cnt;
|
||||
for (multiple8_cnt = 2; multiple8_cnt--;) {
|
||||
common_hv_8ht_8vt_8w_msa(src, src_stride, dst, dst_stride, filter_horiz,
|
||||
filter_vert, height);
|
||||
src += 8;
|
||||
dst += 8;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_8ht_8vt_32w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter_horiz, int8_t *filter_vert,
|
||||
int32_t height) {
|
||||
int32_t multiple8_cnt;
|
||||
for (multiple8_cnt = 4; multiple8_cnt--;) {
|
||||
common_hv_8ht_8vt_8w_msa(src, src_stride, dst, dst_stride, filter_horiz,
|
||||
filter_vert, height);
|
||||
src += 8;
|
||||
dst += 8;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_8ht_8vt_64w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter_horiz, int8_t *filter_vert,
|
||||
int32_t height) {
|
||||
int32_t multiple8_cnt;
|
||||
for (multiple8_cnt = 8; multiple8_cnt--;) {
|
||||
common_hv_8ht_8vt_8w_msa(src, src_stride, dst, dst_stride, filter_horiz,
|
||||
filter_vert, height);
|
||||
src += 8;
|
||||
dst += 8;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_4x4_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter_horiz,
|
||||
int8_t *filter_vert) {
|
||||
uint32_t out0, out1, out2, out3;
|
||||
v16i8 src0, src1, src2, src3, src4, mask;
|
||||
v16u8 res0, res1, horiz_vec;
|
||||
v16u8 filt_vert, filt_horiz, vec0, vec1;
|
||||
v8u16 filt, tmp0, tmp1;
|
||||
v8u16 horiz_out0, horiz_out1, horiz_out2, horiz_out3, horiz_out4;
|
||||
|
||||
mask = LOAD_SB(&mc_filt_mask_arr[16]);
|
||||
|
||||
/* rearranging filter */
|
||||
filt = LOAD_UH(filter_horiz);
|
||||
filt_horiz = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
filt = LOAD_UH(filter_vert);
|
||||
filt_vert = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LOAD_5VECS_SB(src, src_stride, src0, src1, src2, src3, src4);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src0);
|
||||
horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src2);
|
||||
horiz_out2 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_out2, FILTER_BITS, 7);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
|
||||
horiz_out4 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out4 = SRARI_SATURATE_UNSIGNED_H(horiz_out4, FILTER_BITS, 7);
|
||||
|
||||
horiz_out1 = (v8u16)__msa_sldi_b((v16i8)horiz_out2, (v16i8)horiz_out0, 8);
|
||||
horiz_out3 = (v8u16)__msa_pckod_d((v2i64)horiz_out4, (v2i64)horiz_out2);
|
||||
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
|
||||
vec1 = (v16u8)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
|
||||
|
||||
tmp0 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
tmp1 = __msa_dotp_u_h(vec1, filt_vert);
|
||||
tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
|
||||
res0 = (v16u8)__msa_pckev_b((v16i8)tmp0, (v16i8)tmp0);
|
||||
res1 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp1);
|
||||
|
||||
out0 = __msa_copy_u_w((v4i32)res0, 0);
|
||||
out1 = __msa_copy_u_w((v4i32)res0, 1);
|
||||
out2 = __msa_copy_u_w((v4i32)res1, 0);
|
||||
out3 = __msa_copy_u_w((v4i32)res1, 1);
|
||||
|
||||
STORE_WORD(dst, out0);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out1);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out2);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out3);
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_4x8_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter_horiz,
|
||||
int8_t *filter_vert) {
|
||||
uint32_t out0, out1, out2, out3;
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask;
|
||||
v16u8 filt_horiz, filt_vert, horiz_vec;
|
||||
v16u8 vec0, vec1, vec2, vec3;
|
||||
v8u16 horiz_out0, horiz_out1, horiz_out2, horiz_out3;
|
||||
v8u16 vec4, vec5, vec6, vec7, filt;
|
||||
v8u16 horiz_out4, horiz_out5, horiz_out6, horiz_out7, horiz_out8;
|
||||
v16i8 res0, res1, res2, res3;
|
||||
|
||||
mask = LOAD_SB(&mc_filt_mask_arr[16]);
|
||||
|
||||
/* rearranging filter */
|
||||
filt = LOAD_UH(filter_horiz);
|
||||
filt_horiz = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
filt = LOAD_UH(filter_vert);
|
||||
filt_vert = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LOAD_8VECS_SB(src, src_stride,
|
||||
src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
src += (8 * src_stride);
|
||||
src8 = LOAD_SB(src);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src0);
|
||||
horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src2);
|
||||
horiz_out2 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_out2, FILTER_BITS, 7);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src5, src4);
|
||||
horiz_out4 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out4 = SRARI_SATURATE_UNSIGNED_H(horiz_out4, FILTER_BITS, 7);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src7, src6);
|
||||
horiz_out6 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out6 = SRARI_SATURATE_UNSIGNED_H(horiz_out6, FILTER_BITS, 7);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src8, src8);
|
||||
horiz_out8 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out8 = SRARI_SATURATE_UNSIGNED_H(horiz_out8, FILTER_BITS, 7);
|
||||
|
||||
horiz_out1 = (v8u16)__msa_sldi_b((v16i8)horiz_out2, (v16i8)horiz_out0, 8);
|
||||
horiz_out3 = (v8u16)__msa_sldi_b((v16i8)horiz_out4, (v16i8)horiz_out2, 8);
|
||||
horiz_out5 = (v8u16)__msa_sldi_b((v16i8)horiz_out6, (v16i8)horiz_out4, 8);
|
||||
horiz_out7 = (v8u16)__msa_pckod_d((v2i64)horiz_out8, (v2i64)horiz_out6);
|
||||
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
|
||||
vec1 = (v16u8)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
|
||||
vec2 = (v16u8)__msa_ilvev_b((v16i8)horiz_out5, (v16i8)horiz_out4);
|
||||
vec3 = (v16u8)__msa_ilvev_b((v16i8)horiz_out7, (v16i8)horiz_out6);
|
||||
|
||||
vec4 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
vec5 = __msa_dotp_u_h(vec1, filt_vert);
|
||||
vec6 = __msa_dotp_u_h(vec2, filt_vert);
|
||||
vec7 = __msa_dotp_u_h(vec3, filt_vert);
|
||||
|
||||
vec4 = SRARI_SATURATE_UNSIGNED_H(vec4, FILTER_BITS, 7);
|
||||
vec5 = SRARI_SATURATE_UNSIGNED_H(vec5, FILTER_BITS, 7);
|
||||
vec6 = SRARI_SATURATE_UNSIGNED_H(vec6, FILTER_BITS, 7);
|
||||
vec7 = SRARI_SATURATE_UNSIGNED_H(vec7, FILTER_BITS, 7);
|
||||
|
||||
res0 = __msa_pckev_b((v16i8)vec4, (v16i8)vec4);
|
||||
res1 = __msa_pckev_b((v16i8)vec5, (v16i8)vec5);
|
||||
res2 = __msa_pckev_b((v16i8)vec6, (v16i8)vec6);
|
||||
res3 = __msa_pckev_b((v16i8)vec7, (v16i8)vec7);
|
||||
|
||||
out0 = __msa_copy_u_w((v4i32)res0, 0);
|
||||
out1 = __msa_copy_u_w((v4i32)res0, 1);
|
||||
out2 = __msa_copy_u_w((v4i32)res1, 0);
|
||||
out3 = __msa_copy_u_w((v4i32)res1, 1);
|
||||
|
||||
STORE_WORD(dst, out0);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out1);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out2);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out3);
|
||||
dst += dst_stride;
|
||||
|
||||
out0 = __msa_copy_u_w((v4i32)res2, 0);
|
||||
out1 = __msa_copy_u_w((v4i32)res2, 1);
|
||||
out2 = __msa_copy_u_w((v4i32)res3, 0);
|
||||
out3 = __msa_copy_u_w((v4i32)res3, 1);
|
||||
|
||||
STORE_WORD(dst, out0);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out1);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out2);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out3);
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_4w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter_horiz,
|
||||
int8_t *filter_vert,
|
||||
int32_t height) {
|
||||
if (4 == height) {
|
||||
common_hv_2ht_2vt_4x4_msa(src, src_stride, dst, dst_stride,
|
||||
filter_horiz, filter_vert);
|
||||
} else if (8 == height) {
|
||||
common_hv_2ht_2vt_4x8_msa(src, src_stride, dst, dst_stride,
|
||||
filter_horiz, filter_vert);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_8x4_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter_horiz,
|
||||
int8_t *filter_vert) {
|
||||
v16i8 src0, src1, src2, src3, src4, mask;
|
||||
v16u8 filt_horiz, filt_vert, horiz_vec;
|
||||
v16u8 vec0, vec1, vec2, vec3;
|
||||
v8u16 horiz_out0, horiz_out1;
|
||||
v8u16 tmp0, tmp1, tmp2, tmp3;
|
||||
v8i16 filt;
|
||||
|
||||
mask = LOAD_SB(&mc_filt_mask_arr[0]);
|
||||
|
||||
/* rearranging filter */
|
||||
filt = LOAD_SH(filter_horiz);
|
||||
filt_horiz = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
filt = LOAD_SH(filter_vert);
|
||||
filt_vert = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
LOAD_5VECS_SB(src, src_stride, src0, src1, src2, src3, src4);
|
||||
src += (5 * src_stride);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src0, src0);
|
||||
horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
|
||||
horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
|
||||
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
|
||||
tmp0 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src2, src2);
|
||||
horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
|
||||
|
||||
vec1 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
|
||||
tmp1 = __msa_dotp_u_h(vec1, filt_vert);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src3);
|
||||
horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
|
||||
|
||||
vec2 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
|
||||
tmp2 = __msa_dotp_u_h(vec2, filt_vert);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
|
||||
horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
|
||||
|
||||
vec3 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
|
||||
tmp3 = __msa_dotp_u_h(vec3, filt_vert);
|
||||
|
||||
tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_8x8mult_msa(const uint8_t *src,
|
||||
int32_t src_stride,
|
||||
uint8_t *dst,
|
||||
int32_t dst_stride,
|
||||
int8_t *filter_horiz,
|
||||
int8_t *filter_vert,
|
||||
int32_t height) {
|
||||
uint32_t loop_cnt;
|
||||
v16i8 src0, src1, src2, src3, src4, mask;
|
||||
v16u8 filt_horiz, filt_vert, vec0, horiz_vec;
|
||||
v8u16 horiz_out0, horiz_out1;
|
||||
v8u16 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
|
||||
v8i16 filt;
|
||||
|
||||
mask = LOAD_SB(&mc_filt_mask_arr[0]);
|
||||
|
||||
/* rearranging filter */
|
||||
filt = LOAD_SH(filter_horiz);
|
||||
filt_horiz = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
filt = LOAD_SH(filter_vert);
|
||||
filt_vert = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
src0 = LOAD_SB(src);
|
||||
src += src_stride;
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src0, src0);
|
||||
horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
|
||||
|
||||
for (loop_cnt = (height >> 3); loop_cnt--;) {
|
||||
LOAD_4VECS_SB(src, src_stride, src1, src2, src3, src4);
|
||||
src += (4 * src_stride);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
|
||||
horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
|
||||
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
|
||||
tmp1 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src2, src2);
|
||||
horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
|
||||
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
|
||||
tmp2 = (v8u16)__msa_dotp_u_h(vec0, filt_vert);
|
||||
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src3);
|
||||
horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
|
||||
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
|
||||
tmp3 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
|
||||
horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
|
||||
|
||||
LOAD_4VECS_SB(src, src_stride, src1, src2, src3, src4);
|
||||
src += (4 * src_stride);
|
||||
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
|
||||
tmp4 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
|
||||
tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
|
||||
tmp4 = SRARI_SATURATE_UNSIGNED_H(tmp4, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_8_BYTES_4(tmp1, tmp2, tmp3, tmp4, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
|
||||
horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
|
||||
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
|
||||
tmp5 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src2, src2);
|
||||
horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
|
||||
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
|
||||
tmp6 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src3);
|
||||
horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
|
||||
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
|
||||
tmp7 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
|
||||
horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
|
||||
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
|
||||
tmp8 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
|
||||
tmp5 = SRARI_SATURATE_UNSIGNED_H(tmp5, FILTER_BITS, 7);
|
||||
tmp6 = SRARI_SATURATE_UNSIGNED_H(tmp6, FILTER_BITS, 7);
|
||||
tmp7 = SRARI_SATURATE_UNSIGNED_H(tmp7, FILTER_BITS, 7);
|
||||
tmp8 = SRARI_SATURATE_UNSIGNED_H(tmp8, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_8_BYTES_4(tmp5, tmp6, tmp7, tmp8, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_8w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter_horiz, int8_t *filter_vert,
|
||||
int32_t height) {
|
||||
if (4 == height) {
|
||||
common_hv_2ht_2vt_8x4_msa(src, src_stride, dst, dst_stride, filter_horiz,
|
||||
filter_vert);
|
||||
} else {
|
||||
common_hv_2ht_2vt_8x8mult_msa(src, src_stride, dst, dst_stride,
|
||||
filter_horiz, filter_vert, height);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_16w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter_horiz, int8_t *filter_vert,
|
||||
int32_t height) {
|
||||
uint32_t loop_cnt;
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
|
||||
v16u8 filt_horiz, filt_vert, vec0, horiz_vec;
|
||||
v8u16 horiz_vec0, horiz_vec1, tmp1, tmp2;
|
||||
v8u16 horiz_out0, horiz_out1, horiz_out2, horiz_out3;
|
||||
v8i16 filt;
|
||||
|
||||
mask = LOAD_SB(&mc_filt_mask_arr[0]);
|
||||
|
||||
/* rearranging filter */
|
||||
filt = LOAD_SH(filter_horiz);
|
||||
filt_horiz = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
filt = LOAD_SH(filter_vert);
|
||||
filt_vert = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
src0 = LOAD_SB(src);
|
||||
src1 = LOAD_SB(src + 8);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src0, src0);
|
||||
horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
|
||||
horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
|
||||
|
||||
src += src_stride;
|
||||
|
||||
for (loop_cnt = (height >> 2); loop_cnt--;) {
|
||||
LOAD_4VECS_SB(src, src_stride, src0, src2, src4, src6);
|
||||
LOAD_4VECS_SB(src + 8, src_stride, src1, src3, src5, src7);
|
||||
src += (4 * src_stride);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src0, src0);
|
||||
horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
|
||||
horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out3 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
|
||||
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
|
||||
tmp1 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
|
||||
tmp2 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp2, tmp1, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src2, src2);
|
||||
horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src3);
|
||||
horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
|
||||
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
|
||||
tmp1 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out2, (v16i8)horiz_out3);
|
||||
tmp2 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp2, tmp1, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
|
||||
horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src5, src5);
|
||||
horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out3 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
|
||||
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
|
||||
tmp1 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
|
||||
tmp2 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp2, tmp1, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src6, src6);
|
||||
horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
|
||||
|
||||
horiz_vec = (v16u8)__msa_vshf_b(mask, src7, src7);
|
||||
horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
|
||||
horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
|
||||
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
|
||||
tmp1 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out2, (v16i8)horiz_out3);
|
||||
tmp2 = __msa_dotp_u_h(vec0, filt_vert);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp2, tmp1, dst);
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_32w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter_horiz, int8_t *filter_vert,
|
||||
int32_t height) {
|
||||
int32_t multiple8_cnt;
|
||||
for (multiple8_cnt = 2; multiple8_cnt--;) {
|
||||
common_hv_2ht_2vt_16w_msa(src, src_stride, dst, dst_stride, filter_horiz,
|
||||
filter_vert, height);
|
||||
src += 16;
|
||||
dst += 16;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_64w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter_horiz, int8_t *filter_vert,
|
||||
int32_t height) {
|
||||
int32_t multiple8_cnt;
|
||||
for (multiple8_cnt = 4; multiple8_cnt--;) {
|
||||
common_hv_2ht_2vt_16w_msa(src, src_stride, dst, dst_stride, filter_horiz,
|
||||
filter_vert, height);
|
||||
src += 16;
|
||||
dst += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int32_t x_step_q4,
|
||||
const int16_t *filter_y, int32_t y_step_q4,
|
||||
int32_t w, int32_t h) {
|
||||
int8_t cnt, filt_hor[8], filt_ver[8];
|
||||
|
||||
if (16 != x_step_q4 || 16 != y_step_q4) {
|
||||
vp9_convolve8_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h);
|
||||
return;
|
||||
}
|
||||
|
||||
if (((const int32_t *)filter_x)[1] == 0x800000 &&
|
||||
((const int32_t *)filter_y)[1] == 0x800000) {
|
||||
vp9_convolve_copy(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h);
|
||||
return;
|
||||
}
|
||||
|
||||
for (cnt = 0; cnt < 8; ++cnt) {
|
||||
filt_hor[cnt] = filter_x[cnt];
|
||||
filt_ver[cnt] = filter_y[cnt];
|
||||
}
|
||||
|
||||
if (((const int32_t *)filter_x)[0] == 0 &&
|
||||
((const int32_t *)filter_y)[0] == 0) {
|
||||
switch (w) {
|
||||
case 4:
|
||||
common_hv_2ht_2vt_4w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
&filt_hor[3], &filt_ver[3], (int32_t)h);
|
||||
break;
|
||||
case 8:
|
||||
common_hv_2ht_2vt_8w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
&filt_hor[3], &filt_ver[3], (int32_t)h);
|
||||
break;
|
||||
case 16:
|
||||
common_hv_2ht_2vt_16w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
&filt_hor[3], &filt_ver[3], (int32_t)h);
|
||||
break;
|
||||
case 32:
|
||||
common_hv_2ht_2vt_32w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
&filt_hor[3], &filt_ver[3], (int32_t)h);
|
||||
break;
|
||||
case 64:
|
||||
common_hv_2ht_2vt_64w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
&filt_hor[3], &filt_ver[3], (int32_t)h);
|
||||
break;
|
||||
default:
|
||||
vp9_convolve8_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h);
|
||||
break;
|
||||
}
|
||||
} else if (((const int32_t *)filter_x)[0] == 0 ||
|
||||
((const int32_t *)filter_y)[0] == 0) {
|
||||
vp9_convolve8_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h);
|
||||
} else {
|
||||
switch (w) {
|
||||
case 4:
|
||||
common_hv_8ht_8vt_4w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
filt_hor, filt_ver, (int32_t)h);
|
||||
break;
|
||||
case 8:
|
||||
common_hv_8ht_8vt_8w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
filt_hor, filt_ver, (int32_t)h);
|
||||
break;
|
||||
case 16:
|
||||
common_hv_8ht_8vt_16w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
filt_hor, filt_ver, (int32_t)h);
|
||||
break;
|
||||
case 32:
|
||||
common_hv_8ht_8vt_32w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
filt_hor, filt_ver, (int32_t)h);
|
||||
break;
|
||||
case 64:
|
||||
common_hv_8ht_8vt_64w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
filt_hor, filt_ver, (int32_t)h);
|
||||
break;
|
||||
default:
|
||||
vp9_convolve8_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
856
vp9/common/mips/msa/vp9_convolve8_vert_msa.c
Normal file
856
vp9/common/mips/msa/vp9_convolve8_vert_msa.c
Normal file
@@ -0,0 +1,856 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/mips/msa/vp9_convolve_msa.h"
|
||||
|
||||
static void common_vt_8t_4w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter, int32_t height) {
|
||||
uint32_t loop_cnt;
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
|
||||
v16i8 src10_r, src32_r, src54_r, src76_r, src98_r;
|
||||
v16i8 src21_r, src43_r, src65_r, src87_r, src109_r;
|
||||
v16i8 src2110, src4332, src6554, src8776, src10998;
|
||||
v8i16 filt, out10, out32;
|
||||
v16i8 filt0, filt1, filt2, filt3;
|
||||
|
||||
src -= (3 * src_stride);
|
||||
|
||||
filt = LOAD_SH(filter);
|
||||
filt0 = (v16i8)__msa_splati_h(filt, 0);
|
||||
filt1 = (v16i8)__msa_splati_h(filt, 1);
|
||||
filt2 = (v16i8)__msa_splati_h(filt, 2);
|
||||
filt3 = (v16i8)__msa_splati_h(filt, 3);
|
||||
|
||||
LOAD_7VECS_SB(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
|
||||
src += (7 * src_stride);
|
||||
|
||||
ILVR_B_6VECS_SB(src0, src2, src4, src1, src3, src5,
|
||||
src1, src3, src5, src2, src4, src6,
|
||||
src10_r, src32_r, src54_r, src21_r, src43_r, src65_r);
|
||||
|
||||
ILVR_D_3VECS_SB(src2110, src21_r, src10_r, src4332, src43_r, src32_r,
|
||||
src6554, src65_r, src54_r);
|
||||
|
||||
XORI_B_3VECS_SB(src2110, src4332, src6554, src2110, src4332, src6554, 128);
|
||||
|
||||
for (loop_cnt = (height >> 2); loop_cnt--;) {
|
||||
LOAD_4VECS_SB(src, src_stride, src7, src8, src9, src10);
|
||||
src += (4 * src_stride);
|
||||
|
||||
ILVR_B_4VECS_SB(src6, src7, src8, src9, src7, src8, src9, src10,
|
||||
src76_r, src87_r, src98_r, src109_r);
|
||||
|
||||
ILVR_D_2VECS_SB(src8776, src87_r, src76_r, src10998, src109_r, src98_r);
|
||||
|
||||
XORI_B_2VECS_SB(src8776, src10998, src8776, src10998, 128);
|
||||
|
||||
out10 = FILT_8TAP_DPADD_S_H(src2110, src4332, src6554, src8776,
|
||||
filt0, filt1, filt2, filt3);
|
||||
out32 = FILT_8TAP_DPADD_S_H(src4332, src6554, src8776, src10998,
|
||||
filt0, filt1, filt2, filt3);
|
||||
|
||||
out10 = SRARI_SATURATE_SIGNED_H(out10, FILTER_BITS, 7);
|
||||
out32 = SRARI_SATURATE_SIGNED_H(out32, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_2B_XORI128_STORE_4_BYTES_4(out10, out32, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
src2110 = src6554;
|
||||
src4332 = src8776;
|
||||
src6554 = src10998;
|
||||
|
||||
src6 = src10;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_vt_8t_8w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter, int32_t height) {
|
||||
uint32_t loop_cnt;
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
|
||||
v16i8 src10_r, src32_r, src54_r, src76_r, src98_r;
|
||||
v16i8 src21_r, src43_r, src65_r, src87_r, src109_r;
|
||||
v16i8 filt0, filt1, filt2, filt3;
|
||||
v8i16 filt, out0_r, out1_r, out2_r, out3_r;
|
||||
|
||||
src -= (3 * src_stride);
|
||||
|
||||
filt = LOAD_SH(filter);
|
||||
filt0 = (v16i8)__msa_splati_h(filt, 0);
|
||||
filt1 = (v16i8)__msa_splati_h(filt, 1);
|
||||
filt2 = (v16i8)__msa_splati_h(filt, 2);
|
||||
filt3 = (v16i8)__msa_splati_h(filt, 3);
|
||||
|
||||
LOAD_7VECS_SB(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
|
||||
src += (7 * src_stride);
|
||||
|
||||
XORI_B_7VECS_SB(src0, src1, src2, src3, src4, src5, src6,
|
||||
src0, src1, src2, src3, src4, src5, src6, 128);
|
||||
|
||||
ILVR_B_6VECS_SB(src0, src2, src4, src1, src3, src5,
|
||||
src1, src3, src5, src2, src4, src6,
|
||||
src10_r, src32_r, src54_r, src21_r, src43_r, src65_r);
|
||||
|
||||
for (loop_cnt = (height >> 2); loop_cnt--;) {
|
||||
LOAD_4VECS_SB(src, src_stride, src7, src8, src9, src10);
|
||||
src += (4 * src_stride);
|
||||
|
||||
XORI_B_4VECS_SB(src7, src8, src9, src10, src7, src8, src9, src10, 128);
|
||||
|
||||
ILVR_B_4VECS_SB(src6, src7, src8, src9, src7, src8, src9, src10,
|
||||
src76_r, src87_r, src98_r, src109_r);
|
||||
|
||||
out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r,
|
||||
filt0, filt1, filt2, filt3);
|
||||
out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r,
|
||||
filt0, filt1, filt2, filt3);
|
||||
out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r,
|
||||
filt0, filt1, filt2, filt3);
|
||||
out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r,
|
||||
filt0, filt1, filt2, filt3);
|
||||
|
||||
out0_r = SRARI_SATURATE_SIGNED_H(out0_r, FILTER_BITS, 7);
|
||||
out1_r = SRARI_SATURATE_SIGNED_H(out1_r, FILTER_BITS, 7);
|
||||
out2_r = SRARI_SATURATE_SIGNED_H(out2_r, FILTER_BITS, 7);
|
||||
out3_r = SRARI_SATURATE_SIGNED_H(out3_r, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_4_XORI128_STORE_8_BYTES_4(out0_r, out1_r, out2_r, out3_r,
|
||||
dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
src10_r = src54_r;
|
||||
src32_r = src76_r;
|
||||
src54_r = src98_r;
|
||||
src21_r = src65_r;
|
||||
src43_r = src87_r;
|
||||
src65_r = src109_r;
|
||||
|
||||
src6 = src10;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_vt_8t_16w_mult_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter, int32_t height,
|
||||
int32_t width) {
|
||||
const uint8_t *src_tmp;
|
||||
uint8_t *dst_tmp;
|
||||
uint32_t loop_cnt, cnt;
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
|
||||
v16i8 filt0, filt1, filt2, filt3;
|
||||
v16i8 src10_r, src32_r, src54_r, src76_r, src98_r;
|
||||
v16i8 src21_r, src43_r, src65_r, src87_r, src109_r;
|
||||
v16i8 src10_l, src32_l, src54_l, src76_l, src98_l;
|
||||
v16i8 src21_l, src43_l, src65_l, src87_l, src109_l;
|
||||
v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
|
||||
v8i16 filt;
|
||||
v16u8 tmp0, tmp1, tmp2, tmp3;
|
||||
|
||||
src -= (3 * src_stride);
|
||||
|
||||
filt = LOAD_SH(filter);
|
||||
filt0 = (v16i8)__msa_splati_h(filt, 0);
|
||||
filt1 = (v16i8)__msa_splati_h(filt, 1);
|
||||
filt2 = (v16i8)__msa_splati_h(filt, 2);
|
||||
filt3 = (v16i8)__msa_splati_h(filt, 3);
|
||||
|
||||
for (cnt = (width >> 4); cnt--;) {
|
||||
src_tmp = src;
|
||||
dst_tmp = dst;
|
||||
|
||||
LOAD_7VECS_SB(src_tmp, src_stride,
|
||||
src0, src1, src2, src3, src4, src5, src6);
|
||||
src_tmp += (7 * src_stride);
|
||||
|
||||
XORI_B_7VECS_SB(src0, src1, src2, src3, src4, src5, src6,
|
||||
src0, src1, src2, src3, src4, src5, src6, 128);
|
||||
|
||||
ILVR_B_6VECS_SB(src0, src2, src4, src1, src3, src5,
|
||||
src1, src3, src5, src2, src4, src6,
|
||||
src10_r, src32_r, src54_r, src21_r, src43_r, src65_r);
|
||||
|
||||
ILVL_B_6VECS_SB(src0, src2, src4, src1, src3, src5,
|
||||
src1, src3, src5, src2, src4, src6,
|
||||
src10_l, src32_l, src54_l, src21_l, src43_l, src65_l);
|
||||
|
||||
for (loop_cnt = (height >> 2); loop_cnt--;) {
|
||||
LOAD_4VECS_SB(src_tmp, src_stride, src7, src8, src9, src10);
|
||||
src_tmp += (4 * src_stride);
|
||||
|
||||
XORI_B_4VECS_SB(src7, src8, src9, src10, src7, src8, src9, src10, 128);
|
||||
|
||||
ILVR_B_4VECS_SB(src6, src7, src8, src9, src7, src8, src9, src10,
|
||||
src76_r, src87_r, src98_r, src109_r);
|
||||
|
||||
ILVL_B_4VECS_SB(src6, src7, src8, src9, src7, src8, src9, src10,
|
||||
src76_l, src87_l, src98_l, src109_l);
|
||||
|
||||
out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r,
|
||||
filt0, filt1, filt2, filt3);
|
||||
out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r,
|
||||
filt0, filt1, filt2, filt3);
|
||||
out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r,
|
||||
filt0, filt1, filt2, filt3);
|
||||
out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r,
|
||||
filt0, filt1, filt2, filt3);
|
||||
|
||||
out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l,
|
||||
filt0, filt1, filt2, filt3);
|
||||
out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l,
|
||||
filt0, filt1, filt2, filt3);
|
||||
out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l,
|
||||
filt0, filt1, filt2, filt3);
|
||||
out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l,
|
||||
filt0, filt1, filt2, filt3);
|
||||
|
||||
out0_r = SRARI_SATURATE_SIGNED_H(out0_r, FILTER_BITS, 7);
|
||||
out1_r = SRARI_SATURATE_SIGNED_H(out1_r, FILTER_BITS, 7);
|
||||
out2_r = SRARI_SATURATE_SIGNED_H(out2_r, FILTER_BITS, 7);
|
||||
out3_r = SRARI_SATURATE_SIGNED_H(out3_r, FILTER_BITS, 7);
|
||||
out0_l = SRARI_SATURATE_SIGNED_H(out0_l, FILTER_BITS, 7);
|
||||
out1_l = SRARI_SATURATE_SIGNED_H(out1_l, FILTER_BITS, 7);
|
||||
out2_l = SRARI_SATURATE_SIGNED_H(out2_l, FILTER_BITS, 7);
|
||||
out3_l = SRARI_SATURATE_SIGNED_H(out3_l, FILTER_BITS, 7);
|
||||
|
||||
out0_r = (v8i16)__msa_pckev_b((v16i8)out0_l, (v16i8)out0_r);
|
||||
out1_r = (v8i16)__msa_pckev_b((v16i8)out1_l, (v16i8)out1_r);
|
||||
out2_r = (v8i16)__msa_pckev_b((v16i8)out2_l, (v16i8)out2_r);
|
||||
out3_r = (v8i16)__msa_pckev_b((v16i8)out3_l, (v16i8)out3_r);
|
||||
|
||||
XORI_B_4VECS_UB(out0_r, out1_r, out2_r, out3_r,
|
||||
tmp0, tmp1, tmp2, tmp3, 128);
|
||||
|
||||
STORE_4VECS_UB(dst_tmp, dst_stride, tmp0, tmp1, tmp2, tmp3);
|
||||
dst_tmp += (4 * dst_stride);
|
||||
|
||||
src10_r = src54_r;
|
||||
src32_r = src76_r;
|
||||
src54_r = src98_r;
|
||||
src21_r = src65_r;
|
||||
src43_r = src87_r;
|
||||
src65_r = src109_r;
|
||||
|
||||
src10_l = src54_l;
|
||||
src32_l = src76_l;
|
||||
src54_l = src98_l;
|
||||
src21_l = src65_l;
|
||||
src43_l = src87_l;
|
||||
src65_l = src109_l;
|
||||
|
||||
src6 = src10;
|
||||
}
|
||||
|
||||
src += 16;
|
||||
dst += 16;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_vt_8t_16w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter, int32_t height) {
|
||||
common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride,
|
||||
filter, height, 16);
|
||||
}
|
||||
|
||||
static void common_vt_8t_32w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter, int32_t height) {
|
||||
common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride,
|
||||
filter, height, 32);
|
||||
}
|
||||
|
||||
static void common_vt_8t_64w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter, int32_t height) {
|
||||
common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride,
|
||||
filter, height, 64);
|
||||
}
|
||||
|
||||
static void common_vt_2t_4x4_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter) {
|
||||
uint32_t out0, out1, out2, out3;
|
||||
v16i8 src0, src1, src2, src3, src4;
|
||||
v16i8 src10_r, src32_r, src21_r, src43_r, src2110, src4332;
|
||||
v16i8 filt0;
|
||||
v8u16 filt;
|
||||
|
||||
filt = LOAD_UH(filter);
|
||||
filt0 = (v16i8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LOAD_5VECS_SB(src, src_stride, src0, src1, src2, src3, src4);
|
||||
src += (5 * src_stride);
|
||||
|
||||
ILVR_B_4VECS_SB(src0, src1, src2, src3, src1, src2, src3, src4,
|
||||
src10_r, src21_r, src32_r, src43_r);
|
||||
|
||||
ILVR_D_2VECS_SB(src2110, src21_r, src10_r, src4332, src43_r, src32_r);
|
||||
|
||||
src2110 = (v16i8)__msa_dotp_u_h((v16u8)src2110, (v16u8)filt0);
|
||||
src4332 = (v16i8)__msa_dotp_u_h((v16u8)src4332, (v16u8)filt0);
|
||||
|
||||
src2110 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src2110, FILTER_BITS, 7);
|
||||
src4332 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src4332, FILTER_BITS, 7);
|
||||
|
||||
src2110 = (v16i8)__msa_pckev_b((v16i8)src4332, (v16i8)src2110);
|
||||
|
||||
out0 = __msa_copy_u_w((v4i32)src2110, 0);
|
||||
out1 = __msa_copy_u_w((v4i32)src2110, 1);
|
||||
out2 = __msa_copy_u_w((v4i32)src2110, 2);
|
||||
out3 = __msa_copy_u_w((v4i32)src2110, 3);
|
||||
|
||||
STORE_WORD(dst, out0);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out1);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out2);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out3);
|
||||
}
|
||||
|
||||
static void common_vt_2t_4x8_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter) {
|
||||
uint32_t out0, out1, out2, out3, out4, out5, out6, out7;
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
|
||||
v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r;
|
||||
v16i8 src65_r, src87_r, src2110, src4332, src6554, src8776;
|
||||
v16i8 filt0;
|
||||
v8u16 filt;
|
||||
|
||||
filt = LOAD_UH(filter);
|
||||
filt0 = (v16i8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LOAD_8VECS_SB(src, src_stride,
|
||||
src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
src += (8 * src_stride);
|
||||
|
||||
src8 = LOAD_SB(src);
|
||||
src += src_stride;
|
||||
|
||||
ILVR_B_8VECS_SB(src0, src1, src2, src3, src4, src5, src6, src7,
|
||||
src1, src2, src3, src4, src5, src6, src7, src8,
|
||||
src10_r, src21_r, src32_r, src43_r,
|
||||
src54_r, src65_r, src76_r, src87_r);
|
||||
|
||||
ILVR_D_4VECS_SB(src2110, src21_r, src10_r, src4332, src43_r, src32_r,
|
||||
src6554, src65_r, src54_r, src8776, src87_r, src76_r);
|
||||
|
||||
src2110 = (v16i8)__msa_dotp_u_h((v16u8)src2110, (v16u8)filt0);
|
||||
src4332 = (v16i8)__msa_dotp_u_h((v16u8)src4332, (v16u8)filt0);
|
||||
src6554 = (v16i8)__msa_dotp_u_h((v16u8)src6554, (v16u8)filt0);
|
||||
src8776 = (v16i8)__msa_dotp_u_h((v16u8)src8776, (v16u8)filt0);
|
||||
|
||||
src2110 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src2110, FILTER_BITS, 7);
|
||||
src4332 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src4332, FILTER_BITS, 7);
|
||||
src6554 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src6554, FILTER_BITS, 7);
|
||||
src8776 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src8776, FILTER_BITS, 7);
|
||||
|
||||
src2110 = (v16i8)__msa_pckev_b((v16i8)src4332, (v16i8)src2110);
|
||||
src4332 = (v16i8)__msa_pckev_b((v16i8)src8776, (v16i8)src6554);
|
||||
|
||||
out0 = __msa_copy_u_w((v4i32)src2110, 0);
|
||||
out1 = __msa_copy_u_w((v4i32)src2110, 1);
|
||||
out2 = __msa_copy_u_w((v4i32)src2110, 2);
|
||||
out3 = __msa_copy_u_w((v4i32)src2110, 3);
|
||||
out4 = __msa_copy_u_w((v4i32)src4332, 0);
|
||||
out5 = __msa_copy_u_w((v4i32)src4332, 1);
|
||||
out6 = __msa_copy_u_w((v4i32)src4332, 2);
|
||||
out7 = __msa_copy_u_w((v4i32)src4332, 3);
|
||||
|
||||
STORE_WORD(dst, out0);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out1);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out2);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out3);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out4);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out5);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out6);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out7);
|
||||
}
|
||||
|
||||
static void common_vt_2t_4w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter, int32_t height) {
|
||||
if (4 == height) {
|
||||
common_vt_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
|
||||
} else if (8 == height) {
|
||||
common_vt_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_vt_2t_8x4_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter) {
|
||||
v16u8 src0, src1, src2, src3, src4;
|
||||
v16u8 vec0, vec1, vec2, vec3, filt0;
|
||||
v8u16 tmp0, tmp1, tmp2, tmp3;
|
||||
v8u16 filt;
|
||||
|
||||
/* rearranging filter_y */
|
||||
filt = LOAD_UH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LOAD_5VECS_UB(src, src_stride, src0, src1, src2, src3, src4);
|
||||
|
||||
ILVR_B_2VECS_UB(src0, src1, src1, src2, vec0, vec1);
|
||||
ILVR_B_2VECS_UB(src2, src3, src3, src4, vec2, vec3);
|
||||
|
||||
/* filter calc */
|
||||
tmp0 = __msa_dotp_u_h(vec0, filt0);
|
||||
tmp1 = __msa_dotp_u_h(vec1, filt0);
|
||||
tmp2 = __msa_dotp_u_h(vec2, filt0);
|
||||
tmp3 = __msa_dotp_u_h(vec3, filt0);
|
||||
|
||||
tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_vt_2t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter, int32_t height) {
|
||||
uint32_t loop_cnt;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
|
||||
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
|
||||
v8u16 tmp0, tmp1, tmp2, tmp3;
|
||||
v8u16 filt;
|
||||
|
||||
/* rearranging filter_y */
|
||||
filt = LOAD_UH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
src0 = LOAD_UB(src);
|
||||
src += src_stride;
|
||||
|
||||
for (loop_cnt = (height >> 3); loop_cnt--;) {
|
||||
LOAD_8VECS_UB(src, src_stride,
|
||||
src1, src2, src3, src4, src5, src6, src7, src8);
|
||||
src += (8 * src_stride);
|
||||
|
||||
ILVR_B_4VECS_UB(src0, src1, src2, src3, src1, src2, src3, src4,
|
||||
vec0, vec1, vec2, vec3);
|
||||
|
||||
ILVR_B_4VECS_UB(src4, src5, src6, src7, src5, src6, src7, src8,
|
||||
vec4, vec5, vec6, vec7);
|
||||
|
||||
tmp0 = __msa_dotp_u_h(vec0, filt0);
|
||||
tmp1 = __msa_dotp_u_h(vec1, filt0);
|
||||
tmp2 = __msa_dotp_u_h(vec2, filt0);
|
||||
tmp3 = __msa_dotp_u_h(vec3, filt0);
|
||||
|
||||
tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
tmp0 = __msa_dotp_u_h(vec4, filt0);
|
||||
tmp1 = __msa_dotp_u_h(vec5, filt0);
|
||||
tmp2 = __msa_dotp_u_h(vec6, filt0);
|
||||
tmp3 = __msa_dotp_u_h(vec7, filt0);
|
||||
|
||||
tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
src0 = src8;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_vt_2t_8w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter, int32_t height) {
|
||||
if (4 == height) {
|
||||
common_vt_2t_8x4_msa(src, src_stride, dst, dst_stride, filter);
|
||||
} else {
|
||||
common_vt_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_vt_2t_16w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter, int32_t height) {
|
||||
uint32_t loop_cnt;
|
||||
v16u8 src0, src1, src2, src3, src4;
|
||||
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
|
||||
v8u16 tmp0, tmp1, tmp2, tmp3;
|
||||
v8u16 filt;
|
||||
|
||||
/* rearranging filter_y */
|
||||
filt = LOAD_UH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
src0 = LOAD_UB(src);
|
||||
src += src_stride;
|
||||
|
||||
for (loop_cnt = (height >> 2); loop_cnt--;) {
|
||||
LOAD_4VECS_UB(src, src_stride, src1, src2, src3, src4);
|
||||
src += (4 * src_stride);
|
||||
|
||||
ILV_B_LRLR_UB(src0, src1, src1, src2, vec1, vec0, vec3, vec2);
|
||||
|
||||
tmp0 = __msa_dotp_u_h(vec0, filt0);
|
||||
tmp1 = __msa_dotp_u_h(vec1, filt0);
|
||||
|
||||
tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp1, tmp0, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
ILV_B_LRLR_UB(src2, src3, src3, src4, vec5, vec4, vec7, vec6);
|
||||
|
||||
tmp2 = __msa_dotp_u_h(vec2, filt0);
|
||||
tmp3 = __msa_dotp_u_h(vec3, filt0);
|
||||
|
||||
tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp3, tmp2, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
tmp0 = __msa_dotp_u_h(vec4, filt0);
|
||||
tmp1 = __msa_dotp_u_h(vec5, filt0);
|
||||
|
||||
tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp1, tmp0, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
tmp2 = __msa_dotp_u_h(vec6, filt0);
|
||||
tmp3 = __msa_dotp_u_h(vec7, filt0);
|
||||
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp3, tmp2, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
src0 = src4;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_vt_2t_32w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter, int32_t height) {
|
||||
uint32_t loop_cnt;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
|
||||
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
|
||||
v8u16 tmp0, tmp1, tmp2, tmp3;
|
||||
v8u16 filt;
|
||||
|
||||
/* rearranging filter_y */
|
||||
filt = LOAD_UH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
src0 = LOAD_UB(src);
|
||||
src5 = LOAD_UB(src + 16);
|
||||
src += src_stride;
|
||||
|
||||
for (loop_cnt = (height >> 2); loop_cnt--;) {
|
||||
LOAD_4VECS_UB(src, src_stride, src1, src2, src3, src4);
|
||||
|
||||
ILV_B_LRLR_UB(src0, src1, src1, src2, vec1, vec0, vec3, vec2);
|
||||
|
||||
LOAD_4VECS_UB(src + 16, src_stride, src6, src7, src8, src9);
|
||||
src += (4 * src_stride);
|
||||
|
||||
tmp0 = __msa_dotp_u_h(vec0, filt0);
|
||||
tmp1 = __msa_dotp_u_h(vec1, filt0);
|
||||
|
||||
tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp1, tmp0, dst);
|
||||
|
||||
tmp2 = __msa_dotp_u_h(vec2, filt0);
|
||||
tmp3 = __msa_dotp_u_h(vec3, filt0);
|
||||
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp3, tmp2, dst + dst_stride);
|
||||
|
||||
ILV_B_LRLR_UB(src2, src3, src3, src4, vec5, vec4, vec7, vec6);
|
||||
|
||||
tmp0 = __msa_dotp_u_h(vec4, filt0);
|
||||
tmp1 = __msa_dotp_u_h(vec5, filt0);
|
||||
|
||||
tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp1, tmp0, dst + 2 * dst_stride);
|
||||
|
||||
tmp2 = __msa_dotp_u_h(vec6, filt0);
|
||||
tmp3 = __msa_dotp_u_h(vec7, filt0);
|
||||
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp3, tmp2, dst + 3 * dst_stride);
|
||||
|
||||
ILV_B_LRLR_UB(src5, src6, src6, src7, vec1, vec0, vec3, vec2);
|
||||
|
||||
tmp0 = __msa_dotp_u_h(vec0, filt0);
|
||||
tmp1 = __msa_dotp_u_h(vec1, filt0);
|
||||
|
||||
tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp1, tmp0, dst + 16);
|
||||
|
||||
tmp2 = __msa_dotp_u_h(vec2, filt0);
|
||||
tmp3 = __msa_dotp_u_h(vec3, filt0);
|
||||
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp3, tmp2, dst + 16 + dst_stride);
|
||||
|
||||
ILV_B_LRLR_UB(src7, src8, src8, src9, vec5, vec4, vec7, vec6);
|
||||
|
||||
tmp0 = __msa_dotp_u_h(vec4, filt0);
|
||||
tmp1 = __msa_dotp_u_h(vec5, filt0);
|
||||
|
||||
tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp1, tmp0, dst + 16 + 2 * dst_stride);
|
||||
|
||||
tmp2 = __msa_dotp_u_h(vec6, filt0);
|
||||
tmp3 = __msa_dotp_u_h(vec7, filt0);
|
||||
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp3, tmp2, dst + 16 + 3 * dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
src0 = src4;
|
||||
src5 = src9;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_vt_2t_64w_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int8_t *filter, int32_t height) {
|
||||
uint32_t loop_cnt;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 src8, src9, src10, src11;
|
||||
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
|
||||
v8u16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
v8u16 filt;
|
||||
|
||||
/* rearranging filter_y */
|
||||
filt = LOAD_UH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LOAD_4VECS_UB(src, 16, src0, src3, src6, src9);
|
||||
src += src_stride;
|
||||
|
||||
for (loop_cnt = (height >> 1); loop_cnt--;) {
|
||||
LOAD_2VECS_UB(src, src_stride, src1, src2);
|
||||
LOAD_2VECS_UB(src + 16, src_stride, src4, src5);
|
||||
LOAD_2VECS_UB(src + 32, src_stride, src7, src8);
|
||||
LOAD_2VECS_UB(src + 48, src_stride, src10, src11);
|
||||
src += (2 * src_stride);
|
||||
|
||||
ILV_B_LRLR_UB(src0, src1, src1, src2, vec1, vec0, vec3, vec2);
|
||||
|
||||
tmp0 = __msa_dotp_u_h(vec0, filt0);
|
||||
tmp1 = __msa_dotp_u_h(vec1, filt0);
|
||||
|
||||
tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp1, tmp0, dst);
|
||||
|
||||
tmp2 = __msa_dotp_u_h(vec2, filt0);
|
||||
tmp3 = __msa_dotp_u_h(vec3, filt0);
|
||||
|
||||
tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp3, tmp2, dst + dst_stride);
|
||||
|
||||
ILV_B_LRLR_UB(src3, src4, src4, src5, vec5, vec4, vec7, vec6);
|
||||
|
||||
tmp4 = __msa_dotp_u_h(vec4, filt0);
|
||||
tmp5 = __msa_dotp_u_h(vec5, filt0);
|
||||
|
||||
tmp4 = SRARI_SATURATE_UNSIGNED_H(tmp4, FILTER_BITS, 7);
|
||||
tmp5 = SRARI_SATURATE_UNSIGNED_H(tmp5, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp5, tmp4, dst + 16);
|
||||
|
||||
tmp6 = __msa_dotp_u_h(vec6, filt0);
|
||||
tmp7 = __msa_dotp_u_h(vec7, filt0);
|
||||
|
||||
tmp6 = SRARI_SATURATE_UNSIGNED_H(tmp6, FILTER_BITS, 7);
|
||||
tmp7 = SRARI_SATURATE_UNSIGNED_H(tmp7, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp7, tmp6, dst + 16 + dst_stride);
|
||||
|
||||
ILV_B_LRLR_UB(src6, src7, src7, src8, vec1, vec0, vec3, vec2);
|
||||
|
||||
tmp0 = __msa_dotp_u_h(vec0, filt0);
|
||||
tmp1 = __msa_dotp_u_h(vec1, filt0);
|
||||
|
||||
tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
|
||||
tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp1, tmp0, dst + 32);
|
||||
|
||||
tmp2 = __msa_dotp_u_h(vec2, filt0);
|
||||
tmp3 = __msa_dotp_u_h(vec3, filt0);
|
||||
|
||||
tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
|
||||
tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp3, tmp2, dst + 32 + dst_stride);
|
||||
|
||||
ILV_B_LRLR_UB(src9, src10, src10, src11, vec5, vec4, vec7, vec6);
|
||||
|
||||
tmp4 = __msa_dotp_u_h(vec4, filt0);
|
||||
tmp5 = __msa_dotp_u_h(vec5, filt0);
|
||||
|
||||
tmp4 = SRARI_SATURATE_UNSIGNED_H(tmp4, FILTER_BITS, 7);
|
||||
tmp5 = SRARI_SATURATE_UNSIGNED_H(tmp5, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp5, tmp4, dst + 48);
|
||||
|
||||
tmp6 = __msa_dotp_u_h(vec6, filt0);
|
||||
tmp7 = __msa_dotp_u_h(vec7, filt0);
|
||||
|
||||
tmp6 = SRARI_SATURATE_UNSIGNED_H(tmp6, FILTER_BITS, 7);
|
||||
tmp7 = SRARI_SATURATE_UNSIGNED_H(tmp7, FILTER_BITS, 7);
|
||||
|
||||
PCKEV_B_STORE_VEC(tmp7, tmp6, dst + 48 + dst_stride);
|
||||
dst += (2 * dst_stride);
|
||||
|
||||
src0 = src2;
|
||||
src3 = src5;
|
||||
src6 = src8;
|
||||
src9 = src11;
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h) {
|
||||
int8_t cnt, filt_ver[8];
|
||||
|
||||
if (16 != y_step_q4) {
|
||||
vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h);
|
||||
return;
|
||||
}
|
||||
|
||||
if (((const int32_t *)filter_y)[1] == 0x800000) {
|
||||
vp9_convolve_copy(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h);
|
||||
return;
|
||||
}
|
||||
|
||||
for (cnt = 8; cnt--;) {
|
||||
filt_ver[cnt] = filter_y[cnt];
|
||||
}
|
||||
|
||||
if (((const int32_t *)filter_y)[0] == 0) {
|
||||
switch (w) {
|
||||
case 4:
|
||||
common_vt_2t_4w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
&filt_ver[3], h);
|
||||
break;
|
||||
case 8:
|
||||
common_vt_2t_8w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
&filt_ver[3], h);
|
||||
break;
|
||||
case 16:
|
||||
common_vt_2t_16w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
&filt_ver[3], h);
|
||||
break;
|
||||
case 32:
|
||||
common_vt_2t_32w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
&filt_ver[3], h);
|
||||
break;
|
||||
case 64:
|
||||
common_vt_2t_64w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
&filt_ver[3], h);
|
||||
break;
|
||||
default:
|
||||
vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (w) {
|
||||
case 4:
|
||||
common_vt_8t_4w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
filt_ver, h);
|
||||
break;
|
||||
case 8:
|
||||
common_vt_8t_8w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
filt_ver, h);
|
||||
break;
|
||||
case 16:
|
||||
common_vt_8t_16w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
filt_ver, h);
|
||||
break;
|
||||
case 32:
|
||||
common_vt_8t_32w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
filt_ver, h);
|
||||
break;
|
||||
case 64:
|
||||
common_vt_8t_64w_msa(src, (int32_t)src_stride,
|
||||
dst, (int32_t)dst_stride,
|
||||
filt_ver, h);
|
||||
break;
|
||||
default:
|
||||
vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
335
vp9/common/mips/msa/vp9_convolve_avg_msa.c
Normal file
335
vp9/common/mips/msa/vp9_convolve_avg_msa.c
Normal file
@@ -0,0 +1,335 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vp9/common/mips/msa/vp9_macros_msa.h"
|
||||
|
||||
static void avg_width4_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride, int32_t height) {
|
||||
int32_t cnt;
|
||||
uint32_t out0, out1, out2, out3;
|
||||
v16u8 src0, src1, src2, src3;
|
||||
v16u8 dst0, dst1, dst2, dst3;
|
||||
|
||||
if (0 == (height % 4)) {
|
||||
for (cnt = (height / 4); cnt--;) {
|
||||
LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
|
||||
LOAD_4VECS_UB(dst, dst_stride, dst0, dst1, dst2, dst3);
|
||||
|
||||
dst0 = __msa_aver_u_b(src0, dst0);
|
||||
dst1 = __msa_aver_u_b(src1, dst1);
|
||||
dst2 = __msa_aver_u_b(src2, dst2);
|
||||
dst3 = __msa_aver_u_b(src3, dst3);
|
||||
|
||||
out0 = __msa_copy_u_w((v4i32)dst0, 0);
|
||||
out1 = __msa_copy_u_w((v4i32)dst1, 0);
|
||||
out2 = __msa_copy_u_w((v4i32)dst2, 0);
|
||||
out3 = __msa_copy_u_w((v4i32)dst3, 0);
|
||||
|
||||
STORE_WORD(dst, out0);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out1);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out2);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out3);
|
||||
dst += dst_stride;
|
||||
}
|
||||
} else if (0 == (height % 2)) {
|
||||
for (cnt = (height / 2); cnt--;) {
|
||||
LOAD_2VECS_UB(src, src_stride, src0, src1);
|
||||
src += (2 * src_stride);
|
||||
|
||||
LOAD_2VECS_UB(dst, dst_stride, dst0, dst1);
|
||||
|
||||
dst0 = __msa_aver_u_b(src0, dst0);
|
||||
dst1 = __msa_aver_u_b(src1, dst1);
|
||||
|
||||
out0 = __msa_copy_u_w((v4i32)dst0, 0);
|
||||
out1 = __msa_copy_u_w((v4i32)dst1, 0);
|
||||
|
||||
STORE_WORD(dst, out0);
|
||||
dst += dst_stride;
|
||||
STORE_WORD(dst, out1);
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void avg_width8_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride, int32_t height) {
|
||||
int32_t cnt;
|
||||
uint64_t out0, out1, out2, out3;
|
||||
v16u8 src0, src1, src2, src3;
|
||||
v16u8 dst0, dst1, dst2, dst3;
|
||||
|
||||
for (cnt = (height / 4); cnt--;) {
|
||||
LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
|
||||
LOAD_4VECS_UB(dst, dst_stride, dst0, dst1, dst2, dst3);
|
||||
|
||||
dst0 = __msa_aver_u_b(src0, dst0);
|
||||
dst1 = __msa_aver_u_b(src1, dst1);
|
||||
dst2 = __msa_aver_u_b(src2, dst2);
|
||||
dst3 = __msa_aver_u_b(src3, dst3);
|
||||
|
||||
out0 = __msa_copy_u_d((v2i64)dst0, 0);
|
||||
out1 = __msa_copy_u_d((v2i64)dst1, 0);
|
||||
out2 = __msa_copy_u_d((v2i64)dst2, 0);
|
||||
out3 = __msa_copy_u_d((v2i64)dst3, 0);
|
||||
|
||||
STORE_DWORD(dst, out0);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out1);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out2);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out3);
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void avg_width16_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride, int32_t height) {
|
||||
int32_t cnt;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
|
||||
|
||||
for (cnt = (height / 8); cnt--;) {
|
||||
LOAD_8VECS_UB(src, src_stride,
|
||||
src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
src += (8 * src_stride);
|
||||
|
||||
LOAD_8VECS_UB(dst, dst_stride,
|
||||
dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
|
||||
|
||||
dst0 = __msa_aver_u_b(src0, dst0);
|
||||
dst1 = __msa_aver_u_b(src1, dst1);
|
||||
dst2 = __msa_aver_u_b(src2, dst2);
|
||||
dst3 = __msa_aver_u_b(src3, dst3);
|
||||
dst4 = __msa_aver_u_b(src4, dst4);
|
||||
dst5 = __msa_aver_u_b(src5, dst5);
|
||||
dst6 = __msa_aver_u_b(src6, dst6);
|
||||
dst7 = __msa_aver_u_b(src7, dst7);
|
||||
|
||||
STORE_8VECS_UB(dst, dst_stride,
|
||||
dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
|
||||
dst += (8 * dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void avg_width32_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride, int32_t height) {
|
||||
int32_t cnt;
|
||||
uint8_t *dst_dup = dst;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 src8, src9, src10, src11, src12, src13, src14, src15;
|
||||
v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
|
||||
v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15;
|
||||
|
||||
for (cnt = (height / 8); cnt--;) {
|
||||
src0 = LOAD_UB(src);
|
||||
src1 = LOAD_UB(src + 16);
|
||||
src += src_stride;
|
||||
src2 = LOAD_UB(src);
|
||||
src3 = LOAD_UB(src + 16);
|
||||
src += src_stride;
|
||||
src4 = LOAD_UB(src);
|
||||
src5 = LOAD_UB(src + 16);
|
||||
src += src_stride;
|
||||
src6 = LOAD_UB(src);
|
||||
src7 = LOAD_UB(src + 16);
|
||||
src += src_stride;
|
||||
|
||||
dst0 = LOAD_UB(dst_dup);
|
||||
dst1 = LOAD_UB(dst_dup + 16);
|
||||
dst_dup += dst_stride;
|
||||
dst2 = LOAD_UB(dst_dup);
|
||||
dst3 = LOAD_UB(dst_dup + 16);
|
||||
dst_dup += dst_stride;
|
||||
dst4 = LOAD_UB(dst_dup);
|
||||
dst5 = LOAD_UB(dst_dup + 16);
|
||||
dst_dup += dst_stride;
|
||||
dst6 = LOAD_UB(dst_dup);
|
||||
dst7 = LOAD_UB(dst_dup + 16);
|
||||
dst_dup += dst_stride;
|
||||
|
||||
src8 = LOAD_UB(src);
|
||||
src9 = LOAD_UB(src + 16);
|
||||
src += src_stride;
|
||||
src10 = LOAD_UB(src);
|
||||
src11 = LOAD_UB(src + 16);
|
||||
src += src_stride;
|
||||
src12 = LOAD_UB(src);
|
||||
src13 = LOAD_UB(src + 16);
|
||||
src += src_stride;
|
||||
src14 = LOAD_UB(src);
|
||||
src15 = LOAD_UB(src + 16);
|
||||
src += src_stride;
|
||||
|
||||
dst8 = LOAD_UB(dst_dup);
|
||||
dst9 = LOAD_UB(dst_dup + 16);
|
||||
dst_dup += dst_stride;
|
||||
dst10 = LOAD_UB(dst_dup);
|
||||
dst11 = LOAD_UB(dst_dup + 16);
|
||||
dst_dup += dst_stride;
|
||||
dst12 = LOAD_UB(dst_dup);
|
||||
dst13 = LOAD_UB(dst_dup + 16);
|
||||
dst_dup += dst_stride;
|
||||
dst14 = LOAD_UB(dst_dup);
|
||||
dst15 = LOAD_UB(dst_dup + 16);
|
||||
dst_dup += dst_stride;
|
||||
|
||||
dst0 = __msa_aver_u_b(src0, dst0);
|
||||
dst1 = __msa_aver_u_b(src1, dst1);
|
||||
dst2 = __msa_aver_u_b(src2, dst2);
|
||||
dst3 = __msa_aver_u_b(src3, dst3);
|
||||
dst4 = __msa_aver_u_b(src4, dst4);
|
||||
dst5 = __msa_aver_u_b(src5, dst5);
|
||||
dst6 = __msa_aver_u_b(src6, dst6);
|
||||
dst7 = __msa_aver_u_b(src7, dst7);
|
||||
dst8 = __msa_aver_u_b(src8, dst8);
|
||||
dst9 = __msa_aver_u_b(src9, dst9);
|
||||
dst10 = __msa_aver_u_b(src10, dst10);
|
||||
dst11 = __msa_aver_u_b(src11, dst11);
|
||||
dst12 = __msa_aver_u_b(src12, dst12);
|
||||
dst13 = __msa_aver_u_b(src13, dst13);
|
||||
dst14 = __msa_aver_u_b(src14, dst14);
|
||||
dst15 = __msa_aver_u_b(src15, dst15);
|
||||
|
||||
STORE_UB(dst0, dst);
|
||||
STORE_UB(dst1, dst + 16);
|
||||
dst += dst_stride;
|
||||
STORE_UB(dst2, dst);
|
||||
STORE_UB(dst3, dst + 16);
|
||||
dst += dst_stride;
|
||||
STORE_UB(dst4, dst);
|
||||
STORE_UB(dst5, dst + 16);
|
||||
dst += dst_stride;
|
||||
STORE_UB(dst6, dst);
|
||||
STORE_UB(dst7, dst + 16);
|
||||
dst += dst_stride;
|
||||
STORE_UB(dst8, dst);
|
||||
STORE_UB(dst9, dst + 16);
|
||||
dst += dst_stride;
|
||||
STORE_UB(dst10, dst);
|
||||
STORE_UB(dst11, dst + 16);
|
||||
dst += dst_stride;
|
||||
STORE_UB(dst12, dst);
|
||||
STORE_UB(dst13, dst + 16);
|
||||
dst += dst_stride;
|
||||
STORE_UB(dst14, dst);
|
||||
STORE_UB(dst15, dst + 16);
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void avg_width64_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride, int32_t height) {
|
||||
int32_t cnt;
|
||||
uint8_t *dst_dup = dst;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 src8, src9, src10, src11, src12, src13, src14, src15;
|
||||
v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
|
||||
v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15;
|
||||
|
||||
for (cnt = (height / 4); cnt--;) {
|
||||
LOAD_4VECS_UB(src, 16, src0, src1, src2, src3);
|
||||
src += src_stride;
|
||||
LOAD_4VECS_UB(src, 16, src4, src5, src6, src7);
|
||||
src += src_stride;
|
||||
LOAD_4VECS_UB(src, 16, src8, src9, src10, src11);
|
||||
src += src_stride;
|
||||
LOAD_4VECS_UB(src, 16, src12, src13, src14, src15);
|
||||
src += src_stride;
|
||||
|
||||
LOAD_4VECS_UB(dst_dup, 16, dst0, dst1, dst2, dst3);
|
||||
dst_dup += dst_stride;
|
||||
LOAD_4VECS_UB(dst_dup, 16, dst4, dst5, dst6, dst7);
|
||||
dst_dup += dst_stride;
|
||||
LOAD_4VECS_UB(dst_dup, 16, dst8, dst9, dst10, dst11);
|
||||
dst_dup += dst_stride;
|
||||
LOAD_4VECS_UB(dst_dup, 16, dst12, dst13, dst14, dst15);
|
||||
dst_dup += dst_stride;
|
||||
|
||||
dst0 = __msa_aver_u_b(src0, dst0);
|
||||
dst1 = __msa_aver_u_b(src1, dst1);
|
||||
dst2 = __msa_aver_u_b(src2, dst2);
|
||||
dst3 = __msa_aver_u_b(src3, dst3);
|
||||
dst4 = __msa_aver_u_b(src4, dst4);
|
||||
dst5 = __msa_aver_u_b(src5, dst5);
|
||||
dst6 = __msa_aver_u_b(src6, dst6);
|
||||
dst7 = __msa_aver_u_b(src7, dst7);
|
||||
dst8 = __msa_aver_u_b(src8, dst8);
|
||||
dst9 = __msa_aver_u_b(src9, dst9);
|
||||
dst10 = __msa_aver_u_b(src10, dst10);
|
||||
dst11 = __msa_aver_u_b(src11, dst11);
|
||||
dst12 = __msa_aver_u_b(src12, dst12);
|
||||
dst13 = __msa_aver_u_b(src13, dst13);
|
||||
dst14 = __msa_aver_u_b(src14, dst14);
|
||||
dst15 = __msa_aver_u_b(src15, dst15);
|
||||
|
||||
STORE_4VECS_UB(dst, 16, dst0, dst1, dst2, dst3);
|
||||
dst += dst_stride;
|
||||
STORE_4VECS_UB(dst, 16, dst4, dst5, dst6, dst7);
|
||||
dst += dst_stride;
|
||||
STORE_4VECS_UB(dst, 16, dst8, dst9, dst10, dst11);
|
||||
dst += dst_stride;
|
||||
STORE_4VECS_UB(dst, 16, dst12, dst13, dst14, dst15);
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_convolve_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int32_t filter_x_stride,
|
||||
const int16_t *filter_y, int32_t filter_y_stride,
|
||||
int32_t w, int32_t h) {
|
||||
(void)filter_x;
|
||||
(void)filter_y;
|
||||
(void)filter_x_stride;
|
||||
(void)filter_y_stride;
|
||||
|
||||
switch (w) {
|
||||
case 4: {
|
||||
avg_width4_msa(src, src_stride, dst, dst_stride, h);
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
avg_width8_msa(src, src_stride, dst, dst_stride, h);
|
||||
break;
|
||||
}
|
||||
case 16: {
|
||||
avg_width16_msa(src, src_stride, dst, dst_stride, h);
|
||||
break;
|
||||
}
|
||||
case 32: {
|
||||
avg_width32_msa(src, src_stride, dst, dst_stride, h);
|
||||
break;
|
||||
}
|
||||
case 64: {
|
||||
avg_width64_msa(src, src_stride, dst, dst_stride, h);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
int32_t lp, cnt;
|
||||
for (cnt = h; cnt--;) {
|
||||
for (lp = 0; lp < w; ++lp) {
|
||||
dst[lp] = (((dst[lp] + src[lp]) + 1) >> 1);
|
||||
}
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
300
vp9/common/mips/msa/vp9_convolve_copy_msa.c
Normal file
300
vp9/common/mips/msa/vp9_convolve_copy_msa.c
Normal file
@@ -0,0 +1,300 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "vp9/common/mips/msa/vp9_macros_msa.h"
|
||||
|
||||
static void copy_width8_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int32_t height) {
|
||||
int32_t cnt;
|
||||
uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
|
||||
if (0 == height % 12) {
|
||||
for (cnt = (height / 12); cnt--;) {
|
||||
LOAD_8VECS_UB(src, src_stride,
|
||||
src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
src += (8 * src_stride);
|
||||
|
||||
out0 = __msa_copy_u_d((v2i64)src0, 0);
|
||||
out1 = __msa_copy_u_d((v2i64)src1, 0);
|
||||
out2 = __msa_copy_u_d((v2i64)src2, 0);
|
||||
out3 = __msa_copy_u_d((v2i64)src3, 0);
|
||||
out4 = __msa_copy_u_d((v2i64)src4, 0);
|
||||
out5 = __msa_copy_u_d((v2i64)src5, 0);
|
||||
out6 = __msa_copy_u_d((v2i64)src6, 0);
|
||||
out7 = __msa_copy_u_d((v2i64)src7, 0);
|
||||
|
||||
STORE_DWORD(dst, out0);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out1);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out2);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out3);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out4);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out5);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out6);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out7);
|
||||
dst += dst_stride;
|
||||
|
||||
LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
|
||||
out0 = __msa_copy_u_d((v2i64)src0, 0);
|
||||
out1 = __msa_copy_u_d((v2i64)src1, 0);
|
||||
out2 = __msa_copy_u_d((v2i64)src2, 0);
|
||||
out3 = __msa_copy_u_d((v2i64)src3, 0);
|
||||
|
||||
STORE_DWORD(dst, out0);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out1);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out2);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out3);
|
||||
dst += dst_stride;
|
||||
}
|
||||
} else if (0 == height % 8) {
|
||||
for (cnt = height >> 3; cnt--;) {
|
||||
LOAD_8VECS_UB(src, src_stride,
|
||||
src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
src += (8 * src_stride);
|
||||
|
||||
out0 = __msa_copy_u_d((v2i64)src0, 0);
|
||||
out1 = __msa_copy_u_d((v2i64)src1, 0);
|
||||
out2 = __msa_copy_u_d((v2i64)src2, 0);
|
||||
out3 = __msa_copy_u_d((v2i64)src3, 0);
|
||||
out4 = __msa_copy_u_d((v2i64)src4, 0);
|
||||
out5 = __msa_copy_u_d((v2i64)src5, 0);
|
||||
out6 = __msa_copy_u_d((v2i64)src6, 0);
|
||||
out7 = __msa_copy_u_d((v2i64)src7, 0);
|
||||
|
||||
STORE_DWORD(dst, out0);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out1);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out2);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out3);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out4);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out5);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out6);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out7);
|
||||
dst += dst_stride;
|
||||
}
|
||||
} else if (0 == height % 4) {
|
||||
for (cnt = (height / 4); cnt--;) {
|
||||
LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
|
||||
out0 = __msa_copy_u_d((v2i64)src0, 0);
|
||||
out1 = __msa_copy_u_d((v2i64)src1, 0);
|
||||
out2 = __msa_copy_u_d((v2i64)src2, 0);
|
||||
out3 = __msa_copy_u_d((v2i64)src3, 0);
|
||||
|
||||
STORE_DWORD(dst, out0);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out1);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out2);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out3);
|
||||
dst += dst_stride;
|
||||
}
|
||||
} else if (0 == height % 2) {
|
||||
for (cnt = (height / 2); cnt--;) {
|
||||
LOAD_2VECS_UB(src, src_stride, src0, src1);
|
||||
src += (2 * src_stride);
|
||||
|
||||
out0 = __msa_copy_u_d((v2i64)src0, 0);
|
||||
out1 = __msa_copy_u_d((v2i64)src1, 0);
|
||||
|
||||
STORE_DWORD(dst, out0);
|
||||
dst += dst_stride;
|
||||
STORE_DWORD(dst, out1);
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int32_t height, int32_t width) {
|
||||
int32_t cnt, loop_cnt;
|
||||
const uint8_t *src_tmp;
|
||||
uint8_t *dst_tmp;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
|
||||
for (cnt = (width >> 4); cnt--;) {
|
||||
src_tmp = src;
|
||||
dst_tmp = dst;
|
||||
|
||||
for (loop_cnt = (height >> 3); loop_cnt--;) {
|
||||
LOAD_8VECS_UB(src_tmp, src_stride,
|
||||
src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
src_tmp += (8 * src_stride);
|
||||
|
||||
STORE_8VECS_UB(dst_tmp, dst_stride,
|
||||
src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
dst_tmp += (8 * dst_stride);
|
||||
}
|
||||
|
||||
src += 16;
|
||||
dst += 16;
|
||||
}
|
||||
}
|
||||
|
||||
static void copy_width16_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int32_t height) {
|
||||
int32_t cnt;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
|
||||
if (0 == height % 12) {
|
||||
for (cnt = (height / 12); cnt--;) {
|
||||
LOAD_8VECS_UB(src, src_stride,
|
||||
src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
src += (8 * src_stride);
|
||||
|
||||
STORE_8VECS_UB(dst, dst_stride,
|
||||
src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
dst += (8 * dst_stride);
|
||||
|
||||
LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
|
||||
STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
|
||||
dst += (4 * dst_stride);
|
||||
}
|
||||
} else if (0 == height % 8) {
|
||||
copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16);
|
||||
} else if (0 == height % 4) {
|
||||
for (cnt = (height >> 2); cnt--;) {
|
||||
LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
|
||||
STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
|
||||
dst += (4 * dst_stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void copy_width32_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int32_t height) {
|
||||
int32_t cnt;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
|
||||
if (0 == height % 12) {
|
||||
for (cnt = (height / 12); cnt--;) {
|
||||
LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
|
||||
LOAD_4VECS_UB(src + 16, src_stride, src4, src5, src6, src7);
|
||||
src += (4 * src_stride);
|
||||
|
||||
STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
|
||||
STORE_4VECS_UB(dst + 16, dst_stride, src4, src5, src6, src7);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
|
||||
LOAD_4VECS_UB(src + 16, src_stride, src4, src5, src6, src7);
|
||||
src += (4 * src_stride);
|
||||
|
||||
STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
|
||||
STORE_4VECS_UB(dst + 16, dst_stride, src4, src5, src6, src7);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
|
||||
LOAD_4VECS_UB(src + 16, src_stride, src4, src5, src6, src7);
|
||||
src += (4 * src_stride);
|
||||
|
||||
STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
|
||||
STORE_4VECS_UB(dst + 16, dst_stride, src4, src5, src6, src7);
|
||||
dst += (4 * dst_stride);
|
||||
}
|
||||
} else if (0 == height % 8) {
|
||||
copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 32);
|
||||
} else if (0 == height % 4) {
|
||||
for (cnt = (height >> 2); cnt--;) {
|
||||
LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
|
||||
LOAD_4VECS_UB(src + 16, src_stride, src4, src5, src6, src7);
|
||||
src += (4 * src_stride);
|
||||
|
||||
STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
|
||||
STORE_4VECS_UB(dst + 16, dst_stride, src4, src5, src6, src7);
|
||||
dst += (4 * dst_stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void copy_width64_msa(const uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
int32_t height) {
|
||||
copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 64);
|
||||
}
|
||||
|
||||
void vp9_convolve_copy_msa(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int32_t filter_x_stride,
|
||||
const int16_t *filter_y, int32_t filter_y_stride,
|
||||
int32_t w, int32_t h) {
|
||||
(void)filter_x;
|
||||
(void)filter_y;
|
||||
(void)filter_x_stride;
|
||||
(void)filter_y_stride;
|
||||
|
||||
switch (w) {
|
||||
case 4: {
|
||||
uint32_t cnt, tmp;
|
||||
/* 1 word storage */
|
||||
for (cnt = h; cnt--;) {
|
||||
tmp = LOAD_WORD(src);
|
||||
STORE_WORD(dst, tmp);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
copy_width8_msa(src, src_stride, dst, dst_stride, h);
|
||||
break;
|
||||
}
|
||||
case 16: {
|
||||
copy_width16_msa(src, src_stride, dst, dst_stride, h);
|
||||
break;
|
||||
}
|
||||
case 32: {
|
||||
copy_width32_msa(src, src_stride, dst, dst_stride, h);
|
||||
break;
|
||||
}
|
||||
case 64: {
|
||||
copy_width64_msa(src, src_stride, dst, dst_stride, h);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
uint32_t cnt;
|
||||
for (cnt = h; cnt--;) {
|
||||
memcpy(dst, src, w);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
157
vp9/common/mips/msa/vp9_convolve_msa.h
Normal file
157
vp9/common/mips/msa/vp9_convolve_msa.h
Normal file
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP9_COMMON_MIPS_MSA_VP9_CONVOLVE_MSA_H_
|
||||
#define VP9_COMMON_MIPS_MSA_VP9_CONVOLVE_MSA_H_
|
||||
|
||||
#include "vp9/common/vp9_filter.h"
|
||||
#include "vp9/common/mips/msa/vp9_macros_msa.h"
|
||||
|
||||
extern const uint8_t mc_filt_mask_arr[16 * 3];
|
||||
|
||||
#define HORIZ_8TAP_FILT(src, mask0, mask1, mask2, mask3, \
|
||||
filt_h0, filt_h1, filt_h2, filt_h3) ({ \
|
||||
v8i16 vec0, vec1, vec2, vec3, horiz_out; \
|
||||
\
|
||||
vec0 = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src), (v16i8)(src)); \
|
||||
vec0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)(filt_h0)); \
|
||||
vec1 = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src), (v16i8)(src)); \
|
||||
vec0 = __msa_dpadd_s_h(vec0, (v16i8)(filt_h1), (v16i8)vec1); \
|
||||
vec2 = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src), (v16i8)(src)); \
|
||||
vec2 = __msa_dotp_s_h((v16i8)vec2, (v16i8)(filt_h2)); \
|
||||
vec3 = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src), (v16i8)(src)); \
|
||||
vec2 = __msa_dpadd_s_h(vec2, (v16i8)(filt_h3), (v16i8)vec3); \
|
||||
vec0 = __msa_adds_s_h(vec0, vec2); \
|
||||
horiz_out = SRARI_SATURATE_SIGNED_H(vec0, FILTER_BITS, 7); \
|
||||
\
|
||||
horiz_out; \
|
||||
})
|
||||
|
||||
#define HORIZ_8TAP_FILT_2VECS(src0, src1, mask0, mask1, mask2, mask3, \
|
||||
filt_h0, filt_h1, filt_h2, filt_h3) ({ \
|
||||
v8i16 vec0, vec1, vec2, vec3, horiz_out; \
|
||||
\
|
||||
vec0 = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src1), (v16i8)(src0)); \
|
||||
vec0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)(filt_h0)); \
|
||||
vec1 = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src1), (v16i8)(src0)); \
|
||||
vec0 = __msa_dpadd_s_h(vec0, (v16i8)(filt_h1), (v16i8)vec1); \
|
||||
vec2 = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src1), (v16i8)(src0)); \
|
||||
vec2 = __msa_dotp_s_h((v16i8)vec2, (v16i8)(filt_h2)); \
|
||||
vec3 = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src1), (v16i8)(src0)); \
|
||||
vec2 = __msa_dpadd_s_h(vec2, ((v16i8)filt_h3), (v16i8)vec3); \
|
||||
vec0 = __msa_adds_s_h(vec0, vec2); \
|
||||
horiz_out = (v8i16)SRARI_SATURATE_SIGNED_H(vec0, FILTER_BITS, 7); \
|
||||
\
|
||||
horiz_out; \
|
||||
})
|
||||
|
||||
#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, \
|
||||
filt0, filt1, filt2, filt3) ({ \
|
||||
v8i16 tmp0, tmp1; \
|
||||
\
|
||||
tmp0 = __msa_dotp_s_h((v16i8)(vec0), (v16i8)(filt0)); \
|
||||
tmp0 = __msa_dpadd_s_h(tmp0, (v16i8)(vec1), (v16i8)(filt1)); \
|
||||
tmp1 = __msa_dotp_s_h((v16i8)(vec2), (v16i8)(filt2)); \
|
||||
tmp1 = __msa_dpadd_s_h(tmp1, (v16i8)(vec3), ((v16i8)filt3)); \
|
||||
tmp0 = __msa_adds_s_h(tmp0, tmp1); \
|
||||
\
|
||||
tmp0; \
|
||||
})
|
||||
|
||||
#define HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, \
|
||||
mask0, mask1, mask2, mask3, \
|
||||
filt0, filt1, filt2, filt3, \
|
||||
out0, out1) { \
|
||||
v8i16 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
|
||||
v8i16 res0_m, res1_m, res2_m, res3_m; \
|
||||
\
|
||||
vec0_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src1), (v16i8)(src0)); \
|
||||
vec1_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src3), (v16i8)(src2)); \
|
||||
\
|
||||
res0_m = __msa_dotp_s_h((v16i8)vec0_m, (v16i8)(filt0)); \
|
||||
res1_m = __msa_dotp_s_h((v16i8)vec1_m, (v16i8)(filt0)); \
|
||||
\
|
||||
vec2_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src1), (v16i8)(src0)); \
|
||||
vec3_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src3), (v16i8)(src2)); \
|
||||
\
|
||||
res0_m = __msa_dpadd_s_h(res0_m, (filt1), (v16i8)vec2_m); \
|
||||
res1_m = __msa_dpadd_s_h(res1_m, (filt1), (v16i8)vec3_m); \
|
||||
\
|
||||
vec4_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src1), (v16i8)(src0)); \
|
||||
vec5_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src3), (v16i8)(src2)); \
|
||||
\
|
||||
res2_m = __msa_dotp_s_h((v16i8)(filt2), (v16i8)vec4_m); \
|
||||
res3_m = __msa_dotp_s_h((v16i8)(filt2), (v16i8)vec5_m); \
|
||||
\
|
||||
vec6_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src1), (v16i8)(src0)); \
|
||||
vec7_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src3), (v16i8)(src2)); \
|
||||
\
|
||||
res2_m = __msa_dpadd_s_h(res2_m, (v16i8)(filt3), (v16i8)vec6_m); \
|
||||
res3_m = __msa_dpadd_s_h(res3_m, (v16i8)(filt3), (v16i8)vec7_m); \
|
||||
\
|
||||
out0 = __msa_adds_s_h(res0_m, res2_m); \
|
||||
out1 = __msa_adds_s_h(res1_m, res3_m); \
|
||||
}
|
||||
|
||||
#define HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, \
|
||||
mask0, mask1, mask2, mask3, \
|
||||
filt0, filt1, filt2, filt3, \
|
||||
out0, out1, out2, out3) { \
|
||||
v8i16 vec0_m, vec1_m, vec2_m, vec3_m; \
|
||||
v8i16 vec4_m, vec5_m, vec6_m, vec7_m; \
|
||||
v8i16 res0_m, res1_m, res2_m, res3_m; \
|
||||
v8i16 res4_m, res5_m, res6_m, res7_m; \
|
||||
\
|
||||
vec0_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src0), (v16i8)(src0)); \
|
||||
vec1_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src1), (v16i8)(src1)); \
|
||||
vec2_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src2), (v16i8)(src2)); \
|
||||
vec3_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src3), (v16i8)(src3)); \
|
||||
\
|
||||
res0_m = __msa_dotp_s_h((v16i8)vec0_m, (v16i8)(filt0)); \
|
||||
res1_m = __msa_dotp_s_h((v16i8)vec1_m, (v16i8)(filt0)); \
|
||||
res2_m = __msa_dotp_s_h((v16i8)vec2_m, (v16i8)(filt0)); \
|
||||
res3_m = __msa_dotp_s_h((v16i8)vec3_m, (v16i8)(filt0)); \
|
||||
\
|
||||
vec0_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src0), (v16i8)(src0)); \
|
||||
vec1_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src1), (v16i8)(src1)); \
|
||||
vec2_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src2), (v16i8)(src2)); \
|
||||
vec3_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src3), (v16i8)(src3)); \
|
||||
\
|
||||
res4_m = __msa_dotp_s_h((v16i8)vec0_m, (v16i8)(filt2)); \
|
||||
res5_m = __msa_dotp_s_h((v16i8)vec1_m, (v16i8)(filt2)); \
|
||||
res6_m = __msa_dotp_s_h((v16i8)vec2_m, (v16i8)(filt2)); \
|
||||
res7_m = __msa_dotp_s_h((v16i8)vec3_m, (v16i8)(filt2)); \
|
||||
\
|
||||
vec4_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src0), (v16i8)(src0)); \
|
||||
vec5_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src1), (v16i8)(src1)); \
|
||||
vec6_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src2), (v16i8)(src2)); \
|
||||
vec7_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src3), (v16i8)(src3)); \
|
||||
\
|
||||
res0_m = __msa_dpadd_s_h(res0_m, (v16i8)(filt1), (v16i8)vec4_m); \
|
||||
res1_m = __msa_dpadd_s_h(res1_m, (v16i8)(filt1), (v16i8)vec5_m); \
|
||||
res2_m = __msa_dpadd_s_h(res2_m, (v16i8)(filt1), (v16i8)vec6_m); \
|
||||
res3_m = __msa_dpadd_s_h(res3_m, (v16i8)(filt1), (v16i8)vec7_m); \
|
||||
\
|
||||
vec4_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src0), (v16i8)(src0)); \
|
||||
vec5_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src1), (v16i8)(src1)); \
|
||||
vec6_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src2), (v16i8)(src2)); \
|
||||
vec7_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src3), (v16i8)(src3)); \
|
||||
\
|
||||
res4_m = __msa_dpadd_s_h(res4_m, (v16i8)(filt3), (v16i8)vec4_m); \
|
||||
res5_m = __msa_dpadd_s_h(res5_m, (v16i8)(filt3), (v16i8)vec5_m); \
|
||||
res6_m = __msa_dpadd_s_h(res6_m, (v16i8)(filt3), (v16i8)vec6_m); \
|
||||
res7_m = __msa_dpadd_s_h(res7_m, (v16i8)(filt3), (v16i8)vec7_m); \
|
||||
\
|
||||
out0 = __msa_adds_s_h(res0_m, res4_m); \
|
||||
out1 = __msa_adds_s_h(res1_m, res5_m); \
|
||||
out2 = __msa_adds_s_h(res2_m, res6_m); \
|
||||
out3 = __msa_adds_s_h(res3_m, res7_m); \
|
||||
}
|
||||
#endif /* VP9_COMMON_MIPS_MSA_VP9_CONVOLVE_MSA_H_ */
|
692
vp9/common/mips/msa/vp9_macros_msa.h
Normal file
692
vp9/common/mips/msa/vp9_macros_msa.h
Normal file
@@ -0,0 +1,692 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP9_COMMON_MIPS_MSA_VP9_MACROS_MSA_H_
|
||||
#define VP9_COMMON_MIPS_MSA_VP9_MACROS_MSA_H_
|
||||
|
||||
#include <msa.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#if HAVE_MSA
|
||||
/* load macros */
|
||||
#define LOAD_UB(psrc) *((const v16u8 *)(psrc))
|
||||
#define LOAD_SB(psrc) *((const v16i8 *)(psrc))
|
||||
#define LOAD_UH(psrc) *((const v8u16 *)(psrc))
|
||||
#define LOAD_SH(psrc) *((const v8i16 *)(psrc))
|
||||
#define LOAD_UW(psrc) *((const v4u32 *)(psrc))
|
||||
#define LOAD_SW(psrc) *((const v4i32 *)(psrc))
|
||||
#define LOAD_UD(psrc) *((const v2u64 *)(psrc))
|
||||
#define LOAD_SD(psrc) *((const v2i64 *)(psrc))
|
||||
|
||||
/* store macros */
|
||||
#define STORE_UB(vec, pdest) *((v16u8 *)(pdest)) = (vec)
|
||||
#define STORE_SB(vec, pdest) *((v16i8 *)(pdest)) = (vec)
|
||||
#define STORE_UH(vec, pdest) *((v8u16 *)(pdest)) = (vec)
|
||||
#define STORE_SH(vec, pdest) *((v8i16 *)(pdest)) = (vec)
|
||||
#define STORE_UW(vec, pdest) *((v4u32 *)(pdest)) = (vec)
|
||||
#define STORE_SW(vec, pdest) *((v4i32 *)(pdest)) = (vec)
|
||||
#define STORE_UD(vec, pdest) *((v2u64 *)(pdest)) = (vec)
|
||||
#define STORE_SD(vec, pdest) *((v2i64 *)(pdest)) = (vec)
|
||||
|
||||
#if (__mips_isa_rev >= 6)
|
||||
#define LOAD_WORD(psrc) ({ \
|
||||
const uint8_t *src_m = (const uint8_t *)(psrc); \
|
||||
uint32_t val_m; \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"lw %[val_m], %[src_m] \n\t" \
|
||||
\
|
||||
: [val_m] "=r" (val_m) \
|
||||
: [src_m] "m" (*src_m) \
|
||||
); \
|
||||
\
|
||||
val_m; \
|
||||
})
|
||||
|
||||
#if (__mips == 64)
|
||||
#define LOAD_DWORD(psrc) ({ \
|
||||
const uint8_t *src_m = (const uint8_t *)(psrc); \
|
||||
uint64_t val_m = 0; \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"ld %[val_m], %[src_m] \n\t" \
|
||||
\
|
||||
: [val_m] "=r" (val_m) \
|
||||
: [src_m] "m" (*src_m) \
|
||||
); \
|
||||
\
|
||||
val_m; \
|
||||
})
|
||||
#else // !(__mips == 64)
|
||||
#define LOAD_DWORD(psrc) ({ \
|
||||
const uint8_t *src1_m = (const uint8_t *)(psrc); \
|
||||
const uint8_t *src2_m = ((const uint8_t *)(psrc)) + 4; \
|
||||
uint32_t val0_m, val1_m; \
|
||||
uint64_t genval_m = 0; \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"lw %[val0_m], %[src1_m] \n\t" \
|
||||
\
|
||||
: [val0_m] "=r" (val0_m) \
|
||||
: [src1_m] "m" (*src1_m) \
|
||||
); \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"lw %[val1_m], %[src2_m] \n\t" \
|
||||
\
|
||||
: [val1_m] "=r" (val1_m) \
|
||||
: [src2_m] "m" (*src2_m) \
|
||||
); \
|
||||
\
|
||||
genval_m = (uint64_t)(val1_m); \
|
||||
genval_m = (uint64_t)((genval_m << 32) & 0xFFFFFFFF00000000); \
|
||||
genval_m = (uint64_t)(genval_m | (uint64_t)val0_m); \
|
||||
\
|
||||
genval_m; \
|
||||
})
|
||||
#endif // (__mips == 64)
|
||||
#define STORE_WORD_WITH_OFFSET_1(pdst, val) { \
|
||||
uint8_t *dst_ptr_m = ((uint8_t *)(pdst)) + 1; \
|
||||
const uint32_t val_m = (val); \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"sw %[val_m], %[dst_ptr_m] \n\t" \
|
||||
\
|
||||
: [dst_ptr_m] "=m" (*dst_ptr_m) \
|
||||
: [val_m] "r" (val_m) \
|
||||
); \
|
||||
}
|
||||
|
||||
#define STORE_WORD(pdst, val) { \
|
||||
uint8_t *dst_ptr_m = (uint8_t *)(pdst); \
|
||||
const uint32_t val_m = (val); \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"sw %[val_m], %[dst_ptr_m] \n\t" \
|
||||
\
|
||||
: [dst_ptr_m] "=m" (*dst_ptr_m) \
|
||||
: [val_m] "r" (val_m) \
|
||||
); \
|
||||
}
|
||||
|
||||
#define STORE_DWORD(pdst, val) { \
|
||||
uint8_t *dst_ptr_m = (uint8_t *)(pdst); \
|
||||
const uint64_t val_m = (val); \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"sd %[val_m], %[dst_ptr_m] \n\t" \
|
||||
\
|
||||
: [dst_ptr_m] "=m" (*dst_ptr_m) \
|
||||
: [val_m] "r" (val_m) \
|
||||
); \
|
||||
}
|
||||
#else // !(__mips_isa_rev >= 6)
|
||||
#define LOAD_WORD(psrc) ({ \
|
||||
const uint8_t *src_m = (const uint8_t *)(psrc); \
|
||||
uint32_t val_m; \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"ulw %[val_m], %[src_m] \n\t" \
|
||||
\
|
||||
: [val_m] "=r" (val_m) \
|
||||
: [src_m] "m" (*src_m) \
|
||||
); \
|
||||
\
|
||||
val_m; \
|
||||
})
|
||||
|
||||
#if (__mips == 64)
|
||||
#define LOAD_DWORD(psrc) ({ \
|
||||
const uint8_t *src_m = (const uint8_t *)(psrc); \
|
||||
uint64_t val_m = 0; \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"uld %[val_m], %[src_m] \n\t" \
|
||||
\
|
||||
: [val_m] "=r" (val_m) \
|
||||
: [src_m] "m" (*src_m) \
|
||||
); \
|
||||
\
|
||||
val_m; \
|
||||
})
|
||||
#else // !(__mips == 64)
|
||||
#define LOAD_DWORD(psrc) ({ \
|
||||
const uint8_t *src1_m = (const uint8_t *)(psrc); \
|
||||
const uint8_t *src2_m = ((const uint8_t *)(psrc)) + 4; \
|
||||
uint32_t val0_m, val1_m; \
|
||||
uint64_t genval_m = 0; \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"ulw %[val0_m], %[src1_m] \n\t" \
|
||||
\
|
||||
: [val0_m] "=r" (val0_m) \
|
||||
: [src1_m] "m" (*src1_m) \
|
||||
); \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"ulw %[val1_m], %[src2_m] \n\t" \
|
||||
\
|
||||
: [val1_m] "=r" (val1_m) \
|
||||
: [src2_m] "m" (*src2_m) \
|
||||
); \
|
||||
\
|
||||
genval_m = (uint64_t)(val1_m); \
|
||||
genval_m = (uint64_t)((genval_m << 32) & 0xFFFFFFFF00000000); \
|
||||
genval_m = (uint64_t)(genval_m | (uint64_t)val0_m); \
|
||||
\
|
||||
genval_m; \
|
||||
})
|
||||
#endif // (__mips == 64)
|
||||
|
||||
#define STORE_WORD_WITH_OFFSET_1(pdst, val) { \
|
||||
uint8_t *dst_ptr_m = ((uint8_t *)(pdst)) + 1; \
|
||||
const uint32_t val_m = (val); \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"usw %[val_m], %[dst_ptr_m] \n\t" \
|
||||
\
|
||||
: [dst_ptr_m] "=m" (*dst_ptr_m) \
|
||||
: [val_m] "r" (val_m) \
|
||||
); \
|
||||
}
|
||||
|
||||
#define STORE_WORD(pdst, val) { \
|
||||
uint8_t *dst_ptr_m = (uint8_t *)(pdst); \
|
||||
const uint32_t val_m = (val); \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"usw %[val_m], %[dst_ptr_m] \n\t" \
|
||||
\
|
||||
: [dst_ptr_m] "=m" (*dst_ptr_m) \
|
||||
: [val_m] "r" (val_m) \
|
||||
); \
|
||||
}
|
||||
|
||||
#define STORE_DWORD(pdst, val) { \
|
||||
uint8_t *dst1_m = (uint8_t *)(pdst); \
|
||||
uint8_t *dst2_m = ((uint8_t *)(pdst)) + 4; \
|
||||
uint32_t val0_m, val1_m; \
|
||||
\
|
||||
val0_m = (uint32_t)((val) & 0x00000000FFFFFFFF); \
|
||||
val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"usw %[val0_m], %[dst1_m] \n\t" \
|
||||
"usw %[val1_m], %[dst2_m] \n\t" \
|
||||
\
|
||||
: [dst1_m] "=m" (*dst1_m), [dst2_m] "=m" (*dst2_m) \
|
||||
: [val0_m] "r" (val0_m), [val1_m] "r" (val1_m) \
|
||||
); \
|
||||
}
|
||||
#endif // (__mips_isa_rev >= 6)
|
||||
|
||||
#define LOAD_2VECS_UB(psrc, stride, \
|
||||
val0, val1) { \
|
||||
val0 = LOAD_UB(psrc + 0 * stride); \
|
||||
val1 = LOAD_UB(psrc + 1 * stride); \
|
||||
}
|
||||
|
||||
#define LOAD_4VECS_UB(psrc, stride, \
|
||||
val0, val1, val2, val3) { \
|
||||
val0 = LOAD_UB(psrc + 0 * stride); \
|
||||
val1 = LOAD_UB(psrc + 1 * stride); \
|
||||
val2 = LOAD_UB(psrc + 2 * stride); \
|
||||
val3 = LOAD_UB(psrc + 3 * stride); \
|
||||
}
|
||||
|
||||
#define LOAD_4VECS_SB(psrc, stride, \
|
||||
val0, val1, val2, val3) { \
|
||||
val0 = LOAD_SB(psrc + 0 * stride); \
|
||||
val1 = LOAD_SB(psrc + 1 * stride); \
|
||||
val2 = LOAD_SB(psrc + 2 * stride); \
|
||||
val3 = LOAD_SB(psrc + 3 * stride); \
|
||||
}
|
||||
|
||||
#define LOAD_5VECS_UB(psrc, stride, \
|
||||
out0, out1, out2, out3, out4) { \
|
||||
LOAD_4VECS_UB((psrc), (stride), \
|
||||
(out0), (out1), (out2), (out3)); \
|
||||
out4 = LOAD_UB(psrc + 4 * stride); \
|
||||
}
|
||||
|
||||
#define LOAD_5VECS_SB(psrc, stride, \
|
||||
out0, out1, out2, out3, out4) { \
|
||||
LOAD_4VECS_SB((psrc), (stride), \
|
||||
(out0), (out1), (out2), (out3)); \
|
||||
out4 = LOAD_SB(psrc + 4 * stride); \
|
||||
}
|
||||
|
||||
#define LOAD_7VECS_SB(psrc, stride, \
|
||||
val0, val1, val2, val3, \
|
||||
val4, val5, val6) { \
|
||||
val0 = LOAD_SB((psrc) + 0 * (stride)); \
|
||||
val1 = LOAD_SB((psrc) + 1 * (stride)); \
|
||||
val2 = LOAD_SB((psrc) + 2 * (stride)); \
|
||||
val3 = LOAD_SB((psrc) + 3 * (stride)); \
|
||||
val4 = LOAD_SB((psrc) + 4 * (stride)); \
|
||||
val5 = LOAD_SB((psrc) + 5 * (stride)); \
|
||||
val6 = LOAD_SB((psrc) + 6 * (stride)); \
|
||||
}
|
||||
|
||||
#define LOAD_8VECS_UB(psrc, stride, \
|
||||
out0, out1, out2, out3, \
|
||||
out4, out5, out6, out7) { \
|
||||
LOAD_4VECS_UB((psrc), (stride), \
|
||||
(out0), (out1), (out2), (out3)); \
|
||||
LOAD_4VECS_UB((psrc + 4 * stride), (stride), \
|
||||
(out4), (out5), (out6), (out7)); \
|
||||
}
|
||||
|
||||
#define LOAD_8VECS_SB(psrc, stride, \
|
||||
out0, out1, out2, out3, \
|
||||
out4, out5, out6, out7) { \
|
||||
LOAD_4VECS_SB((psrc), (stride), \
|
||||
(out0), (out1), (out2), (out3)); \
|
||||
LOAD_4VECS_SB((psrc + 4 * stride), (stride), \
|
||||
(out4), (out5), (out6), (out7)); \
|
||||
}
|
||||
|
||||
#define STORE_4VECS_UB(dst_out, pitch, \
|
||||
in0, in1, in2, in3) { \
|
||||
STORE_UB((in0), (dst_out)); \
|
||||
STORE_UB((in1), ((dst_out) + (pitch))); \
|
||||
STORE_UB((in2), ((dst_out) + 2 * (pitch))); \
|
||||
STORE_UB((in3), ((dst_out) + 3 * (pitch))); \
|
||||
}
|
||||
|
||||
#define STORE_8VECS_UB(dst_out, pitch_in, \
|
||||
in0, in1, in2, in3, \
|
||||
in4, in5, in6, in7) { \
|
||||
STORE_4VECS_UB(dst_out, pitch_in, \
|
||||
in0, in1, in2, in3); \
|
||||
STORE_4VECS_UB((dst_out + 4 * (pitch_in)), pitch_in, \
|
||||
in4, in5, in6, in7); \
|
||||
}
|
||||
|
||||
#define VEC_INSERT_4W_UB(src, src0, src1, src2, src3) { \
|
||||
src = (v16u8)__msa_insert_w((v4i32)(src), 0, (src0)); \
|
||||
src = (v16u8)__msa_insert_w((v4i32)(src), 1, (src1)); \
|
||||
src = (v16u8)__msa_insert_w((v4i32)(src), 2, (src2)); \
|
||||
src = (v16u8)__msa_insert_w((v4i32)(src), 3, (src3)); \
|
||||
}
|
||||
|
||||
#define VEC_INSERT_2DW_UB(src, src0, src1) { \
|
||||
src = (v16u8)__msa_insert_d((v2i64)(src), 0, (src0)); \
|
||||
src = (v16u8)__msa_insert_d((v2i64)(src), 1, (src1)); \
|
||||
}
|
||||
|
||||
/* interleave macros */
|
||||
/* no in-place support */
|
||||
#define ILV_B_LRLR_UB(in0, in1, in2, in3, \
|
||||
out0, out1, out2, out3) { \
|
||||
out0 = (v16u8)__msa_ilvl_b((v16i8)(in1), (v16i8)(in0)); \
|
||||
out1 = (v16u8)__msa_ilvr_b((v16i8)(in1), (v16i8)(in0)); \
|
||||
out2 = (v16u8)__msa_ilvl_b((v16i8)(in3), (v16i8)(in2)); \
|
||||
out3 = (v16u8)__msa_ilvr_b((v16i8)(in3), (v16i8)(in2)); \
|
||||
}
|
||||
|
||||
#define ILVR_B_2VECS_UB(in0_r, in1_r, in0_l, in1_l, \
|
||||
out0, out1) { \
|
||||
out0 = (v16u8)__msa_ilvr_b((v16i8)(in0_l), (v16i8)(in0_r)); \
|
||||
out1 = (v16u8)__msa_ilvr_b((v16i8)(in1_l), (v16i8)(in1_r)); \
|
||||
}
|
||||
|
||||
#define ILVR_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
|
||||
out0, out1) { \
|
||||
out0 = __msa_ilvr_b((v16i8)(in0_l), (v16i8)(in0_r)); \
|
||||
out1 = __msa_ilvr_b((v16i8)(in1_l), (v16i8)(in1_r)); \
|
||||
}
|
||||
|
||||
#define ILVR_B_4VECS_UB(in0_r, in1_r, in2_r, in3_r, \
|
||||
in0_l, in1_l, in2_l, in3_l, \
|
||||
out0, out1, out2, out3) { \
|
||||
ILVR_B_2VECS_UB(in0_r, in1_r, in0_l, in1_l, \
|
||||
out0, out1); \
|
||||
ILVR_B_2VECS_UB(in2_r, in3_r, in2_l, in3_l, \
|
||||
out2, out3); \
|
||||
}
|
||||
|
||||
#define ILVR_B_4VECS_SB(in0_r, in1_r, in2_r, in3_r, \
|
||||
in0_l, in1_l, in2_l, in3_l, \
|
||||
out0, out1, out2, out3) { \
|
||||
ILVR_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
|
||||
out0, out1); \
|
||||
ILVR_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l, \
|
||||
out2, out3); \
|
||||
}
|
||||
|
||||
#define ILVR_B_6VECS_SB(in0_r, in1_r, in2_r, \
|
||||
in3_r, in4_r, in5_r, \
|
||||
in0_l, in1_l, in2_l, \
|
||||
in3_l, in4_l, in5_l, \
|
||||
out0, out1, out2, \
|
||||
out3, out4, out5) { \
|
||||
ILVR_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
|
||||
out0, out1); \
|
||||
ILVR_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l, \
|
||||
out2, out3); \
|
||||
ILVR_B_2VECS_SB(in4_r, in5_r, in4_l, in5_l, \
|
||||
out4, out5); \
|
||||
}
|
||||
|
||||
#define ILVR_B_8VECS_SB(in0_r, in1_r, in2_r, in3_r, \
|
||||
in4_r, in5_r, in6_r, in7_r, \
|
||||
in0_l, in1_l, in2_l, in3_l, \
|
||||
in4_l, in5_l, in6_l, in7_l, \
|
||||
out0, out1, out2, out3, \
|
||||
out4, out5, out6, out7) { \
|
||||
ILVR_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
|
||||
out0, out1); \
|
||||
ILVR_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l, \
|
||||
out2, out3); \
|
||||
ILVR_B_2VECS_SB(in4_r, in5_r, in4_l, in5_l, \
|
||||
out4, out5); \
|
||||
ILVR_B_2VECS_SB(in6_r, in7_r, in6_l, in7_l, \
|
||||
out6, out7); \
|
||||
}
|
||||
|
||||
#define ILVL_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
|
||||
out0, out1) { \
|
||||
out0 = __msa_ilvl_b((v16i8)(in0_l), (v16i8)(in0_r)); \
|
||||
out1 = __msa_ilvl_b((v16i8)(in1_l), (v16i8)(in1_r)); \
|
||||
}
|
||||
|
||||
#define ILVL_B_4VECS_SB(in0_r, in1_r, in2_r, in3_r, \
|
||||
in0_l, in1_l, in2_l, in3_l, \
|
||||
out0, out1, out2, out3) { \
|
||||
ILVL_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
|
||||
out0, out1); \
|
||||
ILVL_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l, \
|
||||
out2, out3); \
|
||||
}
|
||||
|
||||
#define ILVL_B_6VECS_SB(in0_r, in1_r, in2_r, \
|
||||
in3_r, in4_r, in5_r, \
|
||||
in0_l, in1_l, in2_l, \
|
||||
in3_l, in4_l, in5_l, \
|
||||
out0, out1, out2, \
|
||||
out3, out4, out5) { \
|
||||
ILVL_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
|
||||
out0, out1); \
|
||||
ILVL_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l, \
|
||||
out2, out3); \
|
||||
ILVL_B_2VECS_SB(in4_r, in5_r, in4_l, in5_l, \
|
||||
out4, out5); \
|
||||
}
|
||||
|
||||
#define ILVR_D_2VECS_SB(out0, in0_l, in0_r, \
|
||||
out1, in1_l, in1_r) { \
|
||||
out0 = (v16i8)__msa_ilvr_d((v2i64)(in0_l), (v2i64)(in0_r)); \
|
||||
out1 = (v16i8)__msa_ilvr_d((v2i64)(in1_l), (v2i64)(in1_r)); \
|
||||
}
|
||||
|
||||
#define ILVR_D_3VECS_SB(out0, in0_l, in0_r, \
|
||||
out1, in1_l, in1_r, \
|
||||
out2, in2_l, in2_r) { \
|
||||
ILVR_D_2VECS_SB(out0, in0_l, in0_r, \
|
||||
out1, in1_l, in1_r); \
|
||||
out2 = (v16i8)__msa_ilvr_d((v2i64)(in2_l), (v2i64)(in2_r)); \
|
||||
}
|
||||
|
||||
#define ILVR_D_4VECS_SB(out0, in0_l, in0_r, \
|
||||
out1, in1_l, in1_r, \
|
||||
out2, in2_l, in2_r, \
|
||||
out3, in3_l, in3_r) { \
|
||||
ILVR_D_2VECS_SB(out0, in0_l, in0_r, \
|
||||
out1, in1_l, in1_r); \
|
||||
ILVR_D_2VECS_SB(out2, in2_l, in2_r, \
|
||||
out3, in3_l, in3_r); \
|
||||
}
|
||||
|
||||
#define XORI_B_2VECS_UB(val0, val1, \
|
||||
out0, out1, xor_val) { \
|
||||
out0 = __msa_xori_b((v16u8)(val0), (xor_val)); \
|
||||
out1 = __msa_xori_b((v16u8)(val1), (xor_val)); \
|
||||
}
|
||||
|
||||
#define XORI_B_2VECS_SB(val0, val1, \
|
||||
out0, out1, xor_val) { \
|
||||
out0 = (v16i8)__msa_xori_b((v16u8)(val0), (xor_val)); \
|
||||
out1 = (v16i8)__msa_xori_b((v16u8)(val1), (xor_val)); \
|
||||
}
|
||||
|
||||
#define XORI_B_3VECS_SB(val0, val1, val2, \
|
||||
out0, out1, out2, xor_val) { \
|
||||
XORI_B_2VECS_SB(val0, val1, out0, out1, xor_val); \
|
||||
out2 = (v16i8)__msa_xori_b((v16u8)(val2), (xor_val)); \
|
||||
}
|
||||
|
||||
#define XORI_B_4VECS_UB(val0, val1, val2, val3, \
|
||||
out0, out1, out2, out3, \
|
||||
xor_val) { \
|
||||
XORI_B_2VECS_UB(val0, val1, out0, out1, xor_val); \
|
||||
XORI_B_2VECS_UB(val2, val3, out2, out3, xor_val); \
|
||||
}
|
||||
|
||||
#define XORI_B_4VECS_SB(val0, val1, val2, val3, \
|
||||
out0, out1, out2, out3, \
|
||||
xor_val) { \
|
||||
XORI_B_2VECS_SB(val0, val1, out0, out1, xor_val); \
|
||||
XORI_B_2VECS_SB(val2, val3, out2, out3, xor_val); \
|
||||
}
|
||||
|
||||
#define XORI_B_7VECS_SB(val0, val1, val2, val3, \
|
||||
val4, val5, val6, \
|
||||
out0, out1, out2, out3, \
|
||||
out4, out5, out6, \
|
||||
xor_val) { \
|
||||
XORI_B_4VECS_SB(val0, val1, val2, val3, \
|
||||
out0, out1, out2, out3, xor_val); \
|
||||
XORI_B_3VECS_SB(val4, val5, val6, \
|
||||
out4, out5, out6, xor_val); \
|
||||
}
|
||||
|
||||
#define SRARI_H_4VECS_UH(val0, val1, val2, val3, \
|
||||
out0, out1, out2, out3, \
|
||||
shift_right_val) { \
|
||||
out0 = (v8u16)__msa_srari_h((v8i16)(val0), (shift_right_val)); \
|
||||
out1 = (v8u16)__msa_srari_h((v8i16)(val1), (shift_right_val)); \
|
||||
out2 = (v8u16)__msa_srari_h((v8i16)(val2), (shift_right_val)); \
|
||||
out3 = (v8u16)__msa_srari_h((v8i16)(val3), (shift_right_val)); \
|
||||
}
|
||||
|
||||
#define SRARI_SATURATE_UNSIGNED_H(input, right_shift_val, sat_val) ({ \
|
||||
v8u16 out_m; \
|
||||
\
|
||||
out_m = (v8u16)__msa_srari_h((v8i16)(input), (right_shift_val)); \
|
||||
out_m = __msa_sat_u_h(out_m, (sat_val)); \
|
||||
out_m; \
|
||||
})
|
||||
|
||||
#define SRARI_SATURATE_SIGNED_H(input, right_shift_val, sat_val) ({ \
|
||||
v8i16 out_m; \
|
||||
\
|
||||
out_m = __msa_srari_h((v8i16)(input), (right_shift_val)); \
|
||||
out_m = __msa_sat_s_h(out_m, (sat_val)); \
|
||||
out_m; \
|
||||
})
|
||||
|
||||
#define PCKEV_2B_XORI128_STORE_4_BYTES_4(in1, in2, \
|
||||
pdst, stride) { \
|
||||
uint32_t out0_m, out1_m, out2_m, out3_m; \
|
||||
v16i8 tmp0_m; \
|
||||
uint8_t *dst_m = (uint8_t *)(pdst); \
|
||||
\
|
||||
tmp0_m = __msa_pckev_b((v16i8)(in2), (v16i8)(in1)); \
|
||||
tmp0_m = (v16i8)__msa_xori_b((v16u8)tmp0_m, 128); \
|
||||
\
|
||||
out0_m = __msa_copy_u_w((v4i32)tmp0_m, 0); \
|
||||
out1_m = __msa_copy_u_w((v4i32)tmp0_m, 1); \
|
||||
out2_m = __msa_copy_u_w((v4i32)tmp0_m, 2); \
|
||||
out3_m = __msa_copy_u_w((v4i32)tmp0_m, 3); \
|
||||
\
|
||||
STORE_WORD(dst_m, out0_m); \
|
||||
dst_m += stride; \
|
||||
STORE_WORD(dst_m, out1_m); \
|
||||
dst_m += stride; \
|
||||
STORE_WORD(dst_m, out2_m); \
|
||||
dst_m += stride; \
|
||||
STORE_WORD(dst_m, out3_m); \
|
||||
}
|
||||
|
||||
#define PCKEV_B_4_XORI128_STORE_8_BYTES_4(in1, in2, \
|
||||
in3, in4, \
|
||||
pdst, stride) { \
|
||||
uint64_t out0_m, out1_m, out2_m, out3_m; \
|
||||
v16i8 tmp0_m, tmp1_m; \
|
||||
uint8_t *dst_m = (uint8_t *)(pdst); \
|
||||
\
|
||||
tmp0_m = __msa_pckev_b((v16i8)(in2), (v16i8)(in1)); \
|
||||
tmp1_m = __msa_pckev_b((v16i8)(in4), (v16i8)(in3)); \
|
||||
\
|
||||
tmp0_m = (v16i8)__msa_xori_b((v16u8)tmp0_m, 128); \
|
||||
tmp1_m = (v16i8)__msa_xori_b((v16u8)tmp1_m, 128); \
|
||||
\
|
||||
out0_m = __msa_copy_u_d((v2i64)tmp0_m, 0); \
|
||||
out1_m = __msa_copy_u_d((v2i64)tmp0_m, 1); \
|
||||
out2_m = __msa_copy_u_d((v2i64)tmp1_m, 0); \
|
||||
out3_m = __msa_copy_u_d((v2i64)tmp1_m, 1); \
|
||||
\
|
||||
STORE_DWORD(dst_m, out0_m); \
|
||||
dst_m += stride; \
|
||||
STORE_DWORD(dst_m, out1_m); \
|
||||
dst_m += stride; \
|
||||
STORE_DWORD(dst_m, out2_m); \
|
||||
dst_m += stride; \
|
||||
STORE_DWORD(dst_m, out3_m); \
|
||||
}
|
||||
|
||||
/* Only for signed vecs */
|
||||
#define PCKEV_B_XORI128_STORE_VEC(in1, in2, pdest) { \
|
||||
v16i8 tmp_m; \
|
||||
\
|
||||
tmp_m = __msa_pckev_b((v16i8)(in1), (v16i8)(in2)); \
|
||||
tmp_m = (v16i8)__msa_xori_b((v16u8)tmp_m, 128); \
|
||||
STORE_SB(tmp_m, (pdest)); \
|
||||
}
|
||||
|
||||
/* Only for signed vecs */
|
||||
#define PCKEV_B_4_XORI128_AVG_STORE_8_BYTES_4(in1, dst0, \
|
||||
in2, dst1, \
|
||||
in3, dst2, \
|
||||
in4, dst3, \
|
||||
pdst, stride) { \
|
||||
uint64_t out0_m, out1_m, out2_m, out3_m; \
|
||||
v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
|
||||
uint8_t *dst_m = (uint8_t *)(pdst); \
|
||||
\
|
||||
tmp0_m = (v16u8)__msa_pckev_b((v16i8)(in2), (v16i8)(in1)); \
|
||||
tmp1_m = (v16u8)__msa_pckev_b((v16i8)(in4), (v16i8)(in3)); \
|
||||
\
|
||||
tmp2_m = (v16u8)__msa_ilvr_d((v2i64)(dst1), (v2i64)(dst0)); \
|
||||
tmp3_m = (v16u8)__msa_ilvr_d((v2i64)(dst3), (v2i64)(dst2)); \
|
||||
\
|
||||
tmp0_m = __msa_xori_b(tmp0_m, 128); \
|
||||
tmp1_m = __msa_xori_b(tmp1_m, 128); \
|
||||
\
|
||||
tmp0_m = __msa_aver_u_b(tmp0_m, tmp2_m); \
|
||||
tmp1_m = __msa_aver_u_b(tmp1_m, tmp3_m); \
|
||||
\
|
||||
out0_m = __msa_copy_u_d((v2i64)tmp0_m, 0); \
|
||||
out1_m = __msa_copy_u_d((v2i64)tmp0_m, 1); \
|
||||
out2_m = __msa_copy_u_d((v2i64)tmp1_m, 0); \
|
||||
out3_m = __msa_copy_u_d((v2i64)tmp1_m, 1); \
|
||||
\
|
||||
STORE_DWORD(dst_m, out0_m); \
|
||||
dst_m += stride; \
|
||||
STORE_DWORD(dst_m, out1_m); \
|
||||
dst_m += stride; \
|
||||
STORE_DWORD(dst_m, out2_m); \
|
||||
dst_m += stride; \
|
||||
STORE_DWORD(dst_m, out3_m); \
|
||||
}
|
||||
|
||||
/* Only for signed vecs */
|
||||
#define PCKEV_B_XORI128_AVG_STORE_VEC(in1, in2, dst, pdest) { \
|
||||
v16u8 tmp_m; \
|
||||
\
|
||||
tmp_m = (v16u8)__msa_pckev_b((v16i8)(in1), (v16i8)(in2)); \
|
||||
tmp_m = __msa_xori_b(tmp_m, 128); \
|
||||
tmp_m = __msa_aver_u_b(tmp_m, (v16u8)(dst)); \
|
||||
STORE_UB(tmp_m, (pdest)); \
|
||||
}
|
||||
|
||||
#define PCKEV_B_STORE_8_BYTES_4(in1, in2, in3, in4, \
|
||||
pdst, stride) { \
|
||||
uint64_t out0_m, out1_m, out2_m, out3_m; \
|
||||
v16i8 tmp0_m, tmp1_m; \
|
||||
uint8_t *dst_m = (uint8_t *)(pdst); \
|
||||
\
|
||||
tmp0_m = __msa_pckev_b((v16i8)(in2), (v16i8)(in1)); \
|
||||
tmp1_m = __msa_pckev_b((v16i8)(in4), (v16i8)(in3)); \
|
||||
\
|
||||
out0_m = __msa_copy_u_d((v2i64)tmp0_m, 0); \
|
||||
out1_m = __msa_copy_u_d((v2i64)tmp0_m, 1); \
|
||||
out2_m = __msa_copy_u_d((v2i64)tmp1_m, 0); \
|
||||
out3_m = __msa_copy_u_d((v2i64)tmp1_m, 1); \
|
||||
\
|
||||
STORE_DWORD(dst_m, out0_m); \
|
||||
dst_m += stride; \
|
||||
STORE_DWORD(dst_m, out1_m); \
|
||||
dst_m += stride; \
|
||||
STORE_DWORD(dst_m, out2_m); \
|
||||
dst_m += stride; \
|
||||
STORE_DWORD(dst_m, out3_m); \
|
||||
}
|
||||
|
||||
/* Only for unsigned vecs */
|
||||
#define PCKEV_B_STORE_VEC(in1, in2, pdest) { \
|
||||
v16i8 tmp_m; \
|
||||
\
|
||||
tmp_m = __msa_pckev_b((v16i8)(in1), (v16i8)(in2)); \
|
||||
STORE_SB(tmp_m, (pdest)); \
|
||||
}
|
||||
|
||||
#define PCKEV_B_AVG_STORE_8_BYTES_4(in1, dst0, in2, dst1, \
|
||||
in3, dst2, in4, dst3, \
|
||||
pdst, stride) { \
|
||||
uint64_t out0_m, out1_m, out2_m, out3_m; \
|
||||
v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
|
||||
uint8_t *dst_m = (uint8_t *)(pdst); \
|
||||
\
|
||||
tmp0_m = (v16u8)__msa_pckev_b((v16i8)(in2), (v16i8)(in1)); \
|
||||
tmp1_m = (v16u8)__msa_pckev_b((v16i8)(in4), (v16i8)(in3)); \
|
||||
\
|
||||
tmp2_m = (v16u8)__msa_pckev_d((v2i64)(dst1), (v2i64)(dst0)); \
|
||||
tmp3_m = (v16u8)__msa_pckev_d((v2i64)(dst3), (v2i64)(dst2)); \
|
||||
\
|
||||
tmp0_m = __msa_aver_u_b(tmp0_m, tmp2_m); \
|
||||
tmp1_m = __msa_aver_u_b(tmp1_m, tmp3_m); \
|
||||
\
|
||||
out0_m = __msa_copy_u_d((v2i64)tmp0_m, 0); \
|
||||
out1_m = __msa_copy_u_d((v2i64)tmp0_m, 1); \
|
||||
out2_m = __msa_copy_u_d((v2i64)tmp1_m, 0); \
|
||||
out3_m = __msa_copy_u_d((v2i64)tmp1_m, 1); \
|
||||
\
|
||||
STORE_DWORD(dst_m, out0_m); \
|
||||
dst_m += stride; \
|
||||
STORE_DWORD(dst_m, out1_m); \
|
||||
dst_m += stride; \
|
||||
STORE_DWORD(dst_m, out2_m); \
|
||||
dst_m += stride; \
|
||||
STORE_DWORD(dst_m, out3_m); \
|
||||
}
|
||||
|
||||
#define PCKEV_B_AVG_STORE_VEC(in1, in2, dst, pdest) { \
|
||||
v16u8 tmp_m; \
|
||||
\
|
||||
tmp_m = (v16u8)__msa_pckev_b((v16i8)(in1), (v16i8)(in2)); \
|
||||
tmp_m = __msa_aver_u_b(tmp_m, (v16u8)(dst)); \
|
||||
STORE_UB(tmp_m, (pdest)); \
|
||||
}
|
||||
#endif /* HAVE_MSA */
|
||||
#endif /* VP9_COMMON_MIPS_MSA_VP9_MACROS_MSA_H_ */
|
@@ -156,7 +156,7 @@ void vp9_remove_common(VP9_COMMON *cm) {
|
||||
void vp9_init_context_buffers(VP9_COMMON *cm) {
|
||||
cm->setup_mi(cm);
|
||||
if (cm->last_frame_seg_map && !cm->frame_parallel_decode)
|
||||
vpx_memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
|
||||
memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
|
||||
}
|
||||
|
||||
void vp9_swap_current_and_last_seg_map(VP9_COMMON *cm) {
|
||||
|
@@ -40,7 +40,7 @@ void vp9_foreach_transformed_block_in_plane(
|
||||
const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
|
||||
foreach_transformed_block_visitor visit, void *arg) {
|
||||
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||
const MB_MODE_INFO* mbmi = &xd->mi[0].src_mi->mbmi;
|
||||
const MB_MODE_INFO* mbmi = &xd->mi[0]->mbmi;
|
||||
// block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
|
||||
// 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
|
||||
// transform size varies per plane, look it up in a common way.
|
||||
@@ -103,7 +103,7 @@ void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
|
||||
for (i = above_contexts; i < tx_size_in_blocks; ++i)
|
||||
a[i] = 0;
|
||||
} else {
|
||||
vpx_memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
|
||||
memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
|
||||
}
|
||||
|
||||
// left
|
||||
@@ -120,7 +120,7 @@ void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
|
||||
for (i = left_contexts; i < tx_size_in_blocks; ++i)
|
||||
l[i] = 0;
|
||||
} else {
|
||||
vpx_memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
|
||||
memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
|
||||
}
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user