Compare commits
1 Commits
sandbox/jz
...
sandbox/hk
Author | SHA1 | Date | |
---|---|---|---|
![]() |
5504b39f5e |
@@ -1,8 +1,3 @@
|
||||
xxxx-yy-zz v1.4.0 "Changes for next release"
|
||||
vpxenc is changed to use VP9 by default.
|
||||
Encoder controls added for 1 pass SVC.
|
||||
Decoder control to toggle on/off loopfilter.
|
||||
|
||||
2015-04-03 v1.4.0 "Indian Runner Duck"
|
||||
This release includes significant improvements to the VP9 codec.
|
||||
|
||||
|
7
README
7
README
@@ -101,6 +101,13 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
||||
x86_64-win64-vs10
|
||||
x86_64-win64-vs11
|
||||
x86_64-win64-vs12
|
||||
universal-darwin8-gcc
|
||||
universal-darwin9-gcc
|
||||
universal-darwin10-gcc
|
||||
universal-darwin11-gcc
|
||||
universal-darwin12-gcc
|
||||
universal-darwin13-gcc
|
||||
universal-darwin14-gcc
|
||||
generic-gnu
|
||||
|
||||
The generic-gnu target, in conjunction with the CROSS environment variable,
|
||||
|
4
args.c
4
args.c
@@ -14,7 +14,9 @@
|
||||
#include <limits.h>
|
||||
#include "args.h"
|
||||
|
||||
#include "vpx_ports/msvc.h"
|
||||
#ifdef _MSC_VER
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__
|
||||
extern void die(const char *fmt, ...) __attribute__((noreturn));
|
||||
|
@@ -163,7 +163,6 @@ ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
|
||||
endif
|
||||
|
||||
# Add a dependency to force generation of the RTCD files.
|
||||
define rtcd_dep_template
|
||||
ifeq ($(CONFIG_VP8), yes)
|
||||
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp8_rtcd.h
|
||||
endif
|
||||
@@ -171,14 +170,10 @@ ifeq ($(CONFIG_VP9), yes)
|
||||
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp9_rtcd.h
|
||||
endif
|
||||
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_scale_rtcd.h
|
||||
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_dsp_rtcd.h
|
||||
|
||||
ifeq ($(TARGET_ARCH_ABI),x86)
|
||||
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_config.asm
|
||||
endif
|
||||
endef
|
||||
|
||||
$(eval $(call rtcd_dep_template))
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
|
@@ -22,10 +22,8 @@ clean:: .DEFAULT
|
||||
exampletest: .DEFAULT
|
||||
install:: .DEFAULT
|
||||
test:: .DEFAULT
|
||||
test-no-data-check:: .DEFAULT
|
||||
testdata:: .DEFAULT
|
||||
utiltest: .DEFAULT
|
||||
exampletest-no-data-check utiltest-no-data-check: .DEFAULT
|
||||
|
||||
|
||||
# Note: md5sum is not installed on OS X, but openssl is. Openssl may not be
|
||||
@@ -58,10 +56,13 @@ dist:
|
||||
fi
|
||||
endif
|
||||
|
||||
# Since we invoke make recursively for multiple targets we need to include the
|
||||
# .mk file for the correct target, but only when $(target) is non-empty.
|
||||
ifneq ($(target),)
|
||||
include $(target)-$(TOOLCHAIN).mk
|
||||
# Normally, we want to build the filename from the target and the toolchain.
|
||||
# This disambiguates from the $(target).mk file that exists in the source tree.
|
||||
# However, the toolchain is part of the target in universal builds, so we
|
||||
# don't want to include TOOLCHAIN in that case. FAT_ARCHS is used to test
|
||||
# if we're in the universal case.
|
||||
include $(target)$(if $(FAT_ARCHS),,-$(TOOLCHAIN)).mk
|
||||
endif
|
||||
BUILD_ROOT?=.
|
||||
VPATH=$(SRC_PATH_BARE)
|
||||
@@ -115,9 +116,6 @@ test::
|
||||
testdata::
|
||||
.PHONY: utiltest
|
||||
utiltest:
|
||||
.PHONY: test-no-data-check exampletest-no-data-check utiltest-no-data-check
|
||||
test-no-data-check::
|
||||
exampletest-no-data-check utiltest-no-data-check:
|
||||
|
||||
# Add compiler flags for intrinsic files
|
||||
ifeq ($(TOOLCHAIN), x86-os2-gcc)
|
||||
@@ -315,15 +313,18 @@ $(1):
|
||||
$$(filter %.o,$$^) $$(extralibs)
|
||||
endef
|
||||
|
||||
define dll_template
|
||||
# Not using a pattern rule here because we don't want to generate empty
|
||||
# archives when they are listed as a dependency in files not responsible
|
||||
# for creating them.
|
||||
$(1):
|
||||
$(if $(quiet),@echo " [LD] $$@")
|
||||
$(qexec)$$(LD) -Zdll $$(LDFLAGS) \
|
||||
-o $$@ \
|
||||
$$(filter %.o,$$^) $$(extralibs) $$(EXPORTS_FILE)
|
||||
|
||||
|
||||
define lipo_lib_template
|
||||
$(1): $(addsuffix /$(1),$(FAT_ARCHS))
|
||||
$(if $(quiet),@echo " [LIPO] $$@")
|
||||
$(qexec)libtool -static -o $$@ $$?
|
||||
endef
|
||||
|
||||
define lipo_bin_template
|
||||
$(1): $(addsuffix /$(1),$(FAT_ARCHS))
|
||||
$(if $(quiet),@echo " [LIPO] $$@")
|
||||
$(qexec)lipo -output $$@ -create $$?
|
||||
endef
|
||||
|
||||
|
||||
@@ -384,7 +385,6 @@ LIBS=$(call enabled,LIBS)
|
||||
$(foreach lib,$(filter %_g.a,$(LIBS)),$(eval $(call archive_template,$(lib))))
|
||||
$(foreach lib,$(filter %so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib))))
|
||||
$(foreach lib,$(filter %$(SO_VERSION_MAJOR).dylib,$(LIBS)),$(eval $(call dl_template,$(lib))))
|
||||
$(foreach lib,$(filter %$(SO_VERSION_MAJOR).dll,$(LIBS)),$(eval $(call dll_template,$(lib))))
|
||||
|
||||
INSTALL-LIBS=$(call cond_enabled,CONFIG_INSTALL_LIBS,INSTALL-LIBS)
|
||||
ifeq ($(MAKECMDGOALS),dist)
|
||||
|
@@ -390,7 +390,7 @@ write_common_config_banner() {
|
||||
write_common_config_targets() {
|
||||
for t in ${all_targets}; do
|
||||
if enabled ${t}; then
|
||||
if enabled child; then
|
||||
if enabled universal || enabled child; then
|
||||
fwrite config.mk "ALL_TARGETS += ${t}-${toolchain}"
|
||||
else
|
||||
fwrite config.mk "ALL_TARGETS += ${t}"
|
||||
@@ -647,6 +647,14 @@ process_common_toolchain() {
|
||||
|
||||
# detect tgt_os
|
||||
case "$gcctarget" in
|
||||
*darwin8*)
|
||||
tgt_isa=universal
|
||||
tgt_os=darwin8
|
||||
;;
|
||||
*darwin9*)
|
||||
tgt_isa=universal
|
||||
tgt_os=darwin9
|
||||
;;
|
||||
*darwin10*)
|
||||
tgt_isa=x86_64
|
||||
tgt_os=darwin10
|
||||
@@ -728,13 +736,6 @@ process_common_toolchain() {
|
||||
# Handle darwin variants. Newer SDKs allow targeting older
|
||||
# platforms, so use the newest one available.
|
||||
case ${toolchain} in
|
||||
arm*-darwin*)
|
||||
ios_sdk_dir="$(show_darwin_sdk_path iphoneos)"
|
||||
if [ -d "${ios_sdk_dir}" ]; then
|
||||
add_cflags "-isysroot ${ios_sdk_dir}"
|
||||
add_ldflags "-isysroot ${ios_sdk_dir}"
|
||||
fi
|
||||
;;
|
||||
*-darwin*)
|
||||
osx_sdk_dir="$(show_darwin_sdk_path macosx)"
|
||||
if [ -d "${osx_sdk_dir}" ]; then
|
||||
@@ -788,6 +789,7 @@ process_common_toolchain() {
|
||||
case ${toolchain} in
|
||||
sparc-solaris-*)
|
||||
add_extralibs -lposix4
|
||||
disable_feature fast_unaligned
|
||||
;;
|
||||
*-solaris-*)
|
||||
add_extralibs -lposix4
|
||||
@@ -810,17 +812,12 @@ process_common_toolchain() {
|
||||
if disabled neon && enabled neon_asm; then
|
||||
die "Disabling neon while keeping neon-asm is not supported"
|
||||
fi
|
||||
case ${toolchain} in
|
||||
*-darwin*)
|
||||
# Neon is guaranteed on iOS 6+ devices, while old media extensions
|
||||
# no longer assemble with iOS 9 SDK
|
||||
;;
|
||||
*)
|
||||
soft_enable media
|
||||
esac
|
||||
soft_enable media
|
||||
soft_enable fast_unaligned
|
||||
;;
|
||||
armv6)
|
||||
soft_enable media
|
||||
soft_enable fast_unaligned
|
||||
;;
|
||||
esac
|
||||
|
||||
@@ -1036,6 +1033,7 @@ EOF
|
||||
tune_cflags="-mtune="
|
||||
if enabled dspr2; then
|
||||
check_add_cflags -mips32r2 -mdspr2
|
||||
disable_feature fast_unaligned
|
||||
fi
|
||||
|
||||
if enabled runtime_cpu_detect; then
|
||||
@@ -1062,6 +1060,8 @@ EOF
|
||||
add_cflags -mmsa
|
||||
add_asflags -mmsa
|
||||
add_ldflags -mmsa
|
||||
|
||||
disable_feature fast_unaligned
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -1221,7 +1221,7 @@ EOF
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*-gcc|generic-gnu)
|
||||
universal*|*-gcc|generic-gnu)
|
||||
link_with_cc=gcc
|
||||
enable_feature gcc
|
||||
setup_gnu_toolchain
|
||||
|
@@ -263,8 +263,8 @@ case "$target" in
|
||||
;;
|
||||
arm*)
|
||||
platforms[0]="ARM"
|
||||
asm_Debug_cmdline="armasm -nologo -oldit "%(FullPath)""
|
||||
asm_Release_cmdline="armasm -nologo -oldit "%(FullPath)""
|
||||
asm_Debug_cmdline="armasm -nologo "%(FullPath)""
|
||||
asm_Release_cmdline="armasm -nologo "%(FullPath)""
|
||||
;;
|
||||
*) die "Unsupported target $target!"
|
||||
;;
|
||||
|
93
configure
vendored
93
configure
vendored
@@ -31,6 +31,8 @@ Advanced options:
|
||||
--size-limit=WxH max size to allow in the decoder
|
||||
--as={yasm|nasm|auto} use specified assembler [auto, yasm preferred]
|
||||
--sdk-path=PATH path to root of sdk (android builds only)
|
||||
${toggle_fast_unaligned} don't use unaligned accesses, even when
|
||||
supported by hardware [auto]
|
||||
${toggle_codec_srcs} in/exclude codec library source code
|
||||
${toggle_debug_libs} in/exclude debug version of libraries
|
||||
${toggle_static_msvcrt} use static MSVCRT (VS builds only)
|
||||
@@ -148,6 +150,13 @@ all_platforms="${all_platforms} x86_64-win64-vs9"
|
||||
all_platforms="${all_platforms} x86_64-win64-vs10"
|
||||
all_platforms="${all_platforms} x86_64-win64-vs11"
|
||||
all_platforms="${all_platforms} x86_64-win64-vs12"
|
||||
all_platforms="${all_platforms} universal-darwin8-gcc"
|
||||
all_platforms="${all_platforms} universal-darwin9-gcc"
|
||||
all_platforms="${all_platforms} universal-darwin10-gcc"
|
||||
all_platforms="${all_platforms} universal-darwin11-gcc"
|
||||
all_platforms="${all_platforms} universal-darwin12-gcc"
|
||||
all_platforms="${all_platforms} universal-darwin13-gcc"
|
||||
all_platforms="${all_platforms} universal-darwin14-gcc"
|
||||
all_platforms="${all_platforms} generic-gnu"
|
||||
|
||||
# all_targets is a list of all targets that can be configured
|
||||
@@ -184,10 +193,6 @@ if [ ${doxy_major:-0} -ge 1 ]; then
|
||||
[ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable_feature doxygen
|
||||
fi
|
||||
|
||||
# disable codecs when their source directory does not exist
|
||||
[ -d "${source_path}/vp8" ] || disable_feature vp8
|
||||
[ -d "${source_path}/vp9" ] || disable_feature vp9
|
||||
|
||||
# install everything except the sources, by default. sources will have
|
||||
# to be enabled when doing dist builds, since that's no longer a common
|
||||
# case.
|
||||
@@ -198,21 +203,37 @@ enable_feature install_libs
|
||||
enable_feature static
|
||||
enable_feature optimizations
|
||||
enable_feature dependency_tracking
|
||||
enable_feature fast_unaligned #allow unaligned accesses, if supported by hw
|
||||
enable_feature spatial_resampling
|
||||
enable_feature multithread
|
||||
enable_feature os_support
|
||||
enable_feature temporal_denoising
|
||||
|
||||
CODECS="
|
||||
vp8_encoder
|
||||
vp8_decoder
|
||||
vp9_encoder
|
||||
vp9_decoder
|
||||
"
|
||||
CODEC_FAMILIES="
|
||||
vp8
|
||||
vp9
|
||||
"
|
||||
[ -d "${source_path}/../include" ] && enable_feature alt_tree_layout
|
||||
for d in vp8 vp9; do
|
||||
[ -d "${source_path}/${d}" ] && disable_feature alt_tree_layout;
|
||||
done
|
||||
|
||||
if ! enabled alt_tree_layout; then
|
||||
# development environment
|
||||
[ -d "${source_path}/vp8" ] && CODECS="${CODECS} vp8_encoder vp8_decoder"
|
||||
[ -d "${source_path}/vp9" ] && CODECS="${CODECS} vp9_encoder vp9_decoder"
|
||||
else
|
||||
# customer environment
|
||||
[ -f "${source_path}/../include/vpx/vp8cx.h" ] && CODECS="${CODECS} vp8_encoder"
|
||||
[ -f "${source_path}/../include/vpx/vp8dx.h" ] && CODECS="${CODECS} vp8_decoder"
|
||||
[ -f "${source_path}/../include/vpx/vp9cx.h" ] && CODECS="${CODECS} vp9_encoder"
|
||||
[ -f "${source_path}/../include/vpx/vp9dx.h" ] && CODECS="${CODECS} vp9_decoder"
|
||||
[ -f "${source_path}/../include/vpx/vp8cx.h" ] || disable_feature vp8_encoder
|
||||
[ -f "${source_path}/../include/vpx/vp8dx.h" ] || disable_feature vp8_decoder
|
||||
[ -f "${source_path}/../include/vpx/vp9cx.h" ] || disable_feature vp9_encoder
|
||||
[ -f "${source_path}/../include/vpx/vp9dx.h" ] || disable_feature vp9_decoder
|
||||
|
||||
[ -f "${source_path}/../lib/*/*mt.lib" ] && soft_enable static_msvcrt
|
||||
fi
|
||||
|
||||
CODECS="$(echo ${CODECS} | tr ' ' '\n')"
|
||||
CODEC_FAMILIES="$(for c in ${CODECS}; do echo ${c%_*}; done | sort | uniq)"
|
||||
|
||||
ARCH_LIST="
|
||||
arm
|
||||
@@ -244,6 +265,7 @@ HAVE_LIST="
|
||||
${ARCH_EXT_LIST}
|
||||
vpx_ports
|
||||
stdint_h
|
||||
alt_tree_layout
|
||||
pthread_h
|
||||
sys_mman_h
|
||||
unistd_h
|
||||
@@ -272,6 +294,7 @@ CONFIG_LIST="
|
||||
|
||||
codec_srcs
|
||||
debug_libs
|
||||
fast_unaligned
|
||||
|
||||
dequant_tokens
|
||||
dc_recon
|
||||
@@ -333,6 +356,7 @@ CMDLINE_SELECT="
|
||||
libc
|
||||
as
|
||||
size_limit
|
||||
fast_unaligned
|
||||
codec_srcs
|
||||
debug_libs
|
||||
|
||||
@@ -417,8 +441,22 @@ post_process_cmdline() {
|
||||
|
||||
process_targets() {
|
||||
enabled child || write_common_config_banner
|
||||
write_common_target_config_h ${BUILD_PFX}vpx_config.h
|
||||
enabled universal || write_common_target_config_h ${BUILD_PFX}vpx_config.h
|
||||
|
||||
# For fat binaries, call configure recursively to configure for each
|
||||
# binary architecture to be included.
|
||||
if enabled universal; then
|
||||
# Call configure (ourselves) for each subarchitecture
|
||||
for arch in $fat_bin_archs; do
|
||||
BUILD_PFX=${arch}/ toolchain=${arch} $self --child $cmdline_args || exit $?
|
||||
done
|
||||
fi
|
||||
|
||||
# The write_common_config (config.mk) logic is deferred until after the
|
||||
# recursive calls to configure complete, because we want our universal
|
||||
# targets to be executed last.
|
||||
write_common_config_targets
|
||||
enabled universal && echo "FAT_ARCHS=${fat_bin_archs}" >> config.mk
|
||||
|
||||
# Calculate the default distribution name, based on the enabled features
|
||||
cf=""
|
||||
@@ -494,11 +532,11 @@ process_detect() {
|
||||
# Can only build shared libs on a subset of platforms. Doing this check
|
||||
# here rather than at option parse time because the target auto-detect
|
||||
# magic happens after the command line has been parsed.
|
||||
if ! enabled linux && ! enabled os2; then
|
||||
if ! enabled linux; then
|
||||
if enabled gnu; then
|
||||
echo "--enable-shared is only supported on ELF; assuming this is OK"
|
||||
else
|
||||
die "--enable-shared only supported on ELF and OS/2 for now"
|
||||
die "--enable-shared only supported on ELF for now"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
@@ -563,6 +601,24 @@ EOF
|
||||
process_toolchain() {
|
||||
process_common_toolchain
|
||||
|
||||
# Handle universal binaries for this architecture
|
||||
case $toolchain in
|
||||
universal-darwin*)
|
||||
darwin_ver=${tgt_os##darwin}
|
||||
|
||||
# Tiger (10.4/darwin8) brought support for x86
|
||||
if [ $darwin_ver -ge 8 ]; then
|
||||
fat_bin_archs="$fat_bin_archs x86-${tgt_os}-${tgt_cc}"
|
||||
fi
|
||||
|
||||
# Leopard (10.5/darwin9) brought 64 bit support
|
||||
if [ $darwin_ver -ge 9 ]; then
|
||||
fat_bin_archs="$fat_bin_archs x86_64-${tgt_os}-${tgt_cc}"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
# Enable some useful compiler flags
|
||||
if enabled gcc; then
|
||||
enabled werror && check_add_cflags -Werror
|
||||
@@ -650,7 +706,7 @@ process_toolchain() {
|
||||
esac
|
||||
|
||||
# Other toolchain specific defaults
|
||||
case $toolchain in x86*) soft_enable postproc;; esac
|
||||
case $toolchain in x86*|universal*) soft_enable postproc;; esac
|
||||
|
||||
if enabled postproc_visualizer; then
|
||||
enabled postproc || die "postproc_visualizer requires postproc to be enabled"
|
||||
@@ -714,7 +770,6 @@ CONFIGURE_ARGS="$@"
|
||||
process "$@"
|
||||
print_webm_license ${BUILD_PFX}vpx_config.c "/*" " */"
|
||||
cat <<EOF >> ${BUILD_PFX}vpx_config.c
|
||||
#include "vpx/vpx_codec.h"
|
||||
static const char* const cfg = "$CONFIGURE_ARGS";
|
||||
const char *vpx_codec_build_config(void) {return cfg;}
|
||||
EOF
|
||||
|
44
examples.mk
44
examples.mk
@@ -56,7 +56,6 @@ UTILS-$(CONFIG_DECODERS) += vpxdec.c
|
||||
vpxdec.SRCS += md5_utils.c md5_utils.h
|
||||
vpxdec.SRCS += vpx_ports/mem_ops.h
|
||||
vpxdec.SRCS += vpx_ports/mem_ops_aligned.h
|
||||
vpxdec.SRCS += vpx_ports/msvc.h
|
||||
vpxdec.SRCS += vpx_ports/vpx_timer.h
|
||||
vpxdec.SRCS += vpx/vpx_integer.h
|
||||
vpxdec.SRCS += args.c args.h
|
||||
@@ -81,7 +80,6 @@ vpxenc.SRCS += tools_common.c tools_common.h
|
||||
vpxenc.SRCS += warnings.c warnings.h
|
||||
vpxenc.SRCS += vpx_ports/mem_ops.h
|
||||
vpxenc.SRCS += vpx_ports/mem_ops_aligned.h
|
||||
vpxenc.SRCS += vpx_ports/msvc.h
|
||||
vpxenc.SRCS += vpx_ports/vpx_timer.h
|
||||
vpxenc.SRCS += vpxstats.c vpxstats.h
|
||||
ifeq ($(CONFIG_LIBYUV),yes)
|
||||
@@ -100,7 +98,6 @@ ifeq ($(CONFIG_SPATIAL_SVC),yes)
|
||||
vp9_spatial_svc_encoder.SRCS += tools_common.c tools_common.h
|
||||
vp9_spatial_svc_encoder.SRCS += video_common.h
|
||||
vp9_spatial_svc_encoder.SRCS += video_writer.h video_writer.c
|
||||
vp9_spatial_svc_encoder.SRCS += vpx_ports/msvc.h
|
||||
vp9_spatial_svc_encoder.SRCS += vpxstats.c vpxstats.h
|
||||
vp9_spatial_svc_encoder.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D
|
||||
vp9_spatial_svc_encoder.DESCRIPTION = VP9 Spatial SVC Encoder
|
||||
@@ -115,7 +112,6 @@ vpx_temporal_svc_encoder.SRCS += ivfenc.c ivfenc.h
|
||||
vpx_temporal_svc_encoder.SRCS += tools_common.c tools_common.h
|
||||
vpx_temporal_svc_encoder.SRCS += video_common.h
|
||||
vpx_temporal_svc_encoder.SRCS += video_writer.h video_writer.c
|
||||
vpx_temporal_svc_encoder.SRCS += vpx_ports/msvc.h
|
||||
vpx_temporal_svc_encoder.GUID = B18C08F2-A439-4502-A78E-849BE3D60947
|
||||
vpx_temporal_svc_encoder.DESCRIPTION = Temporal SVC Encoder
|
||||
EXAMPLES-$(CONFIG_DECODERS) += simple_decoder.c
|
||||
@@ -126,7 +122,6 @@ simple_decoder.SRCS += video_common.h
|
||||
simple_decoder.SRCS += video_reader.h video_reader.c
|
||||
simple_decoder.SRCS += vpx_ports/mem_ops.h
|
||||
simple_decoder.SRCS += vpx_ports/mem_ops_aligned.h
|
||||
simple_decoder.SRCS += vpx_ports/msvc.h
|
||||
simple_decoder.DESCRIPTION = Simplified decoder loop
|
||||
EXAMPLES-$(CONFIG_DECODERS) += postproc.c
|
||||
postproc.SRCS += ivfdec.h ivfdec.c
|
||||
@@ -135,7 +130,6 @@ postproc.SRCS += video_common.h
|
||||
postproc.SRCS += video_reader.h video_reader.c
|
||||
postproc.SRCS += vpx_ports/mem_ops.h
|
||||
postproc.SRCS += vpx_ports/mem_ops_aligned.h
|
||||
postproc.SRCS += vpx_ports/msvc.h
|
||||
postproc.GUID = 65E33355-F35E-4088-884D-3FD4905881D7
|
||||
postproc.DESCRIPTION = Decoder postprocessor control
|
||||
EXAMPLES-$(CONFIG_DECODERS) += decode_to_md5.c
|
||||
@@ -146,7 +140,6 @@ decode_to_md5.SRCS += video_common.h
|
||||
decode_to_md5.SRCS += video_reader.h video_reader.c
|
||||
decode_to_md5.SRCS += vpx_ports/mem_ops.h
|
||||
decode_to_md5.SRCS += vpx_ports/mem_ops_aligned.h
|
||||
decode_to_md5.SRCS += vpx_ports/msvc.h
|
||||
decode_to_md5.GUID = 59120B9B-2735-4BFE-B022-146CA340FE42
|
||||
decode_to_md5.DESCRIPTION = Frame by frame MD5 checksum
|
||||
EXAMPLES-$(CONFIG_ENCODERS) += simple_encoder.c
|
||||
@@ -154,7 +147,6 @@ simple_encoder.SRCS += ivfenc.h ivfenc.c
|
||||
simple_encoder.SRCS += tools_common.h tools_common.c
|
||||
simple_encoder.SRCS += video_common.h
|
||||
simple_encoder.SRCS += video_writer.h video_writer.c
|
||||
simple_encoder.SRCS += vpx_ports/msvc.h
|
||||
simple_encoder.GUID = 4607D299-8A71-4D2C-9B1D-071899B6FBFD
|
||||
simple_encoder.DESCRIPTION = Simplified encoder loop
|
||||
EXAMPLES-$(CONFIG_VP9_ENCODER) += vp9_lossless_encoder.c
|
||||
@@ -162,7 +154,6 @@ vp9_lossless_encoder.SRCS += ivfenc.h ivfenc.c
|
||||
vp9_lossless_encoder.SRCS += tools_common.h tools_common.c
|
||||
vp9_lossless_encoder.SRCS += video_common.h
|
||||
vp9_lossless_encoder.SRCS += video_writer.h video_writer.c
|
||||
vp9_lossless_encoder.SRCS += vpx_ports/msvc.h
|
||||
vp9_lossless_encoder.GUID = B63C7C88-5348-46DC-A5A6-CC151EF93366
|
||||
vp9_lossless_encoder.DESCRIPTION = Simplified lossless VP9 encoder
|
||||
EXAMPLES-$(CONFIG_ENCODERS) += twopass_encoder.c
|
||||
@@ -170,7 +161,6 @@ twopass_encoder.SRCS += ivfenc.h ivfenc.c
|
||||
twopass_encoder.SRCS += tools_common.h tools_common.c
|
||||
twopass_encoder.SRCS += video_common.h
|
||||
twopass_encoder.SRCS += video_writer.h video_writer.c
|
||||
twopass_encoder.SRCS += vpx_ports/msvc.h
|
||||
twopass_encoder.GUID = 73494FA6-4AF9-4763-8FBB-265C92402FD8
|
||||
twopass_encoder.DESCRIPTION = Two-pass encoder loop
|
||||
EXAMPLES-$(CONFIG_DECODERS) += decode_with_drops.c
|
||||
@@ -180,7 +170,6 @@ decode_with_drops.SRCS += video_common.h
|
||||
decode_with_drops.SRCS += video_reader.h video_reader.c
|
||||
decode_with_drops.SRCS += vpx_ports/mem_ops.h
|
||||
decode_with_drops.SRCS += vpx_ports/mem_ops_aligned.h
|
||||
decode_with_drops.SRCS += vpx_ports/msvc.h
|
||||
decode_with_drops.GUID = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
|
||||
decode_with_drops.DESCRIPTION = Drops frames while decoding
|
||||
EXAMPLES-$(CONFIG_ENCODERS) += set_maps.c
|
||||
@@ -188,7 +177,6 @@ set_maps.SRCS += ivfenc.h ivfenc.c
|
||||
set_maps.SRCS += tools_common.h tools_common.c
|
||||
set_maps.SRCS += video_common.h
|
||||
set_maps.SRCS += video_writer.h video_writer.c
|
||||
set_maps.SRCS += vpx_ports/msvc.h
|
||||
set_maps.GUID = ECB2D24D-98B8-4015-A465-A4AF3DCC145F
|
||||
set_maps.DESCRIPTION = Set active and ROI maps
|
||||
EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8cx_set_ref.c
|
||||
@@ -196,7 +184,6 @@ vp8cx_set_ref.SRCS += ivfenc.h ivfenc.c
|
||||
vp8cx_set_ref.SRCS += tools_common.h tools_common.c
|
||||
vp8cx_set_ref.SRCS += video_common.h
|
||||
vp8cx_set_ref.SRCS += video_writer.h video_writer.c
|
||||
vp8cx_set_ref.SRCS += vpx_ports/msvc.h
|
||||
vp8cx_set_ref.GUID = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A
|
||||
vp8cx_set_ref.DESCRIPTION = VP8 set encoder reference frame
|
||||
|
||||
@@ -207,7 +194,6 @@ EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8_multi_resolution_encoder.c
|
||||
vp8_multi_resolution_encoder.SRCS += ivfenc.h ivfenc.c
|
||||
vp8_multi_resolution_encoder.SRCS += tools_common.h tools_common.c
|
||||
vp8_multi_resolution_encoder.SRCS += video_writer.h video_writer.c
|
||||
vp8_multi_resolution_encoder.SRCS += vpx_ports/msvc.h
|
||||
vp8_multi_resolution_encoder.SRCS += $(LIBYUV_SRCS)
|
||||
vp8_multi_resolution_encoder.GUID = 04f8738e-63c8-423b-90fa-7c2703a374de
|
||||
vp8_multi_resolution_encoder.DESCRIPTION = VP8 Multiple-resolution Encoding
|
||||
@@ -268,6 +254,14 @@ CODEC_EXTRA_LIBS=$(sort $(call enabled,CODEC_EXTRA_LIBS))
|
||||
$(foreach ex,$(ALL_EXAMPLES),$(eval $(notdir $(ex:.c=)).SRCS += $(ex) examples.mk))
|
||||
|
||||
|
||||
# If this is a universal (fat) binary, then all the subarchitectures have
|
||||
# already been built and our job is to stitch them together. The
|
||||
# BUILD_OBJS variable indicates whether we should be building
|
||||
# (compiling, linking) the library. The LIPO_OBJS variable indicates
|
||||
# that we're stitching.
|
||||
$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_OBJS,BUILD_OBJS):=yes)
|
||||
|
||||
|
||||
# Create build/install dependencies for all examples. The common case
|
||||
# is handled here. The MSVS case is handled below.
|
||||
NOT_MSVS = $(if $(CONFIG_MSVS),,yes)
|
||||
@@ -275,28 +269,24 @@ DIST-BINS-$(NOT_MSVS) += $(addprefix bin/,$(ALL_EXAMPLES:.c=$(EXE_SFX)))
|
||||
INSTALL-BINS-$(NOT_MSVS) += $(addprefix bin/,$(UTILS:.c=$(EXE_SFX)))
|
||||
DIST-SRCS-yes += $(ALL_SRCS)
|
||||
INSTALL-SRCS-yes += $(UTIL_SRCS)
|
||||
OBJS-$(NOT_MSVS) += $(call objs,$(ALL_SRCS))
|
||||
OBJS-$(NOT_MSVS) += $(if $(BUILD_OBJS),$(call objs,$(ALL_SRCS)))
|
||||
BINS-$(NOT_MSVS) += $(addprefix $(BUILD_PFX),$(ALL_EXAMPLES:.c=$(EXE_SFX)))
|
||||
|
||||
|
||||
# Instantiate linker template for all examples.
|
||||
CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
|
||||
ifneq ($(filter darwin%,$(TGT_OS)),)
|
||||
SHARED_LIB_SUF=.dylib
|
||||
else
|
||||
ifneq ($(filter os2%,$(TGT_OS)),)
|
||||
SHARED_LIB_SUF=_dll.a
|
||||
else
|
||||
SHARED_LIB_SUF=.so
|
||||
endif
|
||||
endif
|
||||
SHARED_LIB_SUF=$(if $(filter darwin%,$(TGT_OS)),.dylib,.so)
|
||||
CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a)
|
||||
$(foreach bin,$(BINS-yes),\
|
||||
$(eval $(bin):$(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF))\
|
||||
$(eval $(call linker_template,$(bin),\
|
||||
$(if $(BUILD_OBJS),$(eval $(bin):\
|
||||
$(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF)))\
|
||||
$(if $(BUILD_OBJS),$(eval $(call linker_template,$(bin),\
|
||||
$(call objs,$($(notdir $(bin:$(EXE_SFX)=)).SRCS)) \
|
||||
-l$(CODEC_LIB) $(addprefix -l,$(CODEC_EXTRA_LIBS))\
|
||||
)))
|
||||
)))\
|
||||
$(if $(LIPO_OBJS),$(eval $(call lipo_bin_template,$(bin))))\
|
||||
)
|
||||
|
||||
|
||||
# The following pairs define a mapping of locations in the distribution
|
||||
# tree to locations in the source/build trees.
|
||||
|
@@ -71,7 +71,7 @@ static void print_md5(FILE *stream, unsigned char digest[16]) {
|
||||
|
||||
static const char *exec_name;
|
||||
|
||||
void usage_exit(void) {
|
||||
void usage_exit() {
|
||||
fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
@@ -65,7 +65,7 @@
|
||||
|
||||
static const char *exec_name;
|
||||
|
||||
void usage_exit(void) {
|
||||
void usage_exit() {
|
||||
fprintf(stderr, "Usage: %s <infile> <outfile> <N-M|N/M>\n", exec_name);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
@@ -52,7 +52,7 @@
|
||||
|
||||
static const char *exec_name;
|
||||
|
||||
void usage_exit(void) {
|
||||
void usage_exit() {
|
||||
fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
@@ -15,7 +15,6 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../tools_common.h"
|
||||
#include "../vp9/encoder/vp9_resize.h"
|
||||
|
||||
static const char *exec_name = NULL;
|
||||
@@ -27,7 +26,7 @@ static void usage() {
|
||||
printf("<output_yuv> [<frames>]\n");
|
||||
}
|
||||
|
||||
void usage_exit(void) {
|
||||
void usage_exit() {
|
||||
usage();
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
@@ -55,7 +55,7 @@
|
||||
|
||||
static const char *exec_name;
|
||||
|
||||
void usage_exit(void) {
|
||||
void usage_exit() {
|
||||
fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
|
||||
exec_name);
|
||||
exit(EXIT_FAILURE);
|
||||
|
@@ -88,7 +88,7 @@
|
||||
|
||||
static const char *exec_name;
|
||||
|
||||
void usage_exit(void) {
|
||||
void usage_exit() {
|
||||
fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
@@ -106,7 +106,7 @@
|
||||
|
||||
static const char *exec_name;
|
||||
|
||||
void usage_exit(void) {
|
||||
void usage_exit() {
|
||||
fprintf(stderr,
|
||||
"Usage: %s <codec> <width> <height> <infile> <outfile> "
|
||||
"<keyframe-interval> [<error-resilient>]\nSee comments in "
|
||||
|
@@ -58,7 +58,7 @@
|
||||
|
||||
static const char *exec_name;
|
||||
|
||||
void usage_exit(void) {
|
||||
void usage_exit() {
|
||||
fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
|
||||
exec_name);
|
||||
exit(EXIT_FAILURE);
|
||||
|
@@ -37,14 +37,15 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include "vpx_ports/vpx_timer.h"
|
||||
#define VPX_CODEC_DISABLE_COMPAT 1
|
||||
#include "vpx/vpx_encoder.h"
|
||||
#include "vpx/vp8cx.h"
|
||||
#include "vpx_ports/mem_ops.h"
|
||||
#include "../tools_common.h"
|
||||
#include "./tools_common.h"
|
||||
#define interface (vpx_codec_vp8_cx())
|
||||
#define fourcc 0x30385056
|
||||
|
||||
void usage_exit(void) {
|
||||
void usage_exit() {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
|
@@ -58,7 +58,7 @@
|
||||
|
||||
static const char *exec_name;
|
||||
|
||||
void usage_exit(void) {
|
||||
void usage_exit() {
|
||||
fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile> <frame>\n",
|
||||
exec_name);
|
||||
exit(EXIT_FAILURE);
|
||||
|
@@ -20,7 +20,7 @@
|
||||
|
||||
static const char *exec_name;
|
||||
|
||||
void usage_exit(void) {
|
||||
void usage_exit() {
|
||||
fprintf(stderr, "vp9_lossless_encoder: Example demonstrating VP9 lossless "
|
||||
"encoding feature. Supports raw input only.\n");
|
||||
fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name);
|
||||
|
@@ -14,13 +14,11 @@
|
||||
* that benefit from a scalable bitstream.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
|
||||
#include "../args.h"
|
||||
#include "../tools_common.h"
|
||||
#include "../video_writer.h"
|
||||
@@ -29,18 +27,11 @@
|
||||
#include "vpx/vp8cx.h"
|
||||
#include "vpx/vpx_encoder.h"
|
||||
#include "../vpxstats.h"
|
||||
#define OUTPUT_RC_STATS 1
|
||||
|
||||
static const arg_def_t skip_frames_arg =
|
||||
ARG_DEF("s", "skip-frames", 1, "input frames to skip");
|
||||
static const arg_def_t frames_arg =
|
||||
ARG_DEF("f", "frames", 1, "number of frames to encode");
|
||||
static const arg_def_t threads_arg =
|
||||
ARG_DEF("th", "threads", 1, "number of threads to use");
|
||||
#if OUTPUT_RC_STATS
|
||||
static const arg_def_t output_rc_stats_arg =
|
||||
ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats");
|
||||
#endif
|
||||
static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
|
||||
static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
|
||||
static const arg_def_t timebase_arg =
|
||||
@@ -51,9 +42,6 @@ static const arg_def_t spatial_layers_arg =
|
||||
ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");
|
||||
static const arg_def_t temporal_layers_arg =
|
||||
ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
|
||||
static const arg_def_t temporal_layering_mode_arg =
|
||||
ARG_DEF("tlm", "temporal-layering-mode", 1, "temporal layering scheme."
|
||||
"VP9E_TEMPORAL_LAYERING_MODE");
|
||||
static const arg_def_t kf_dist_arg =
|
||||
ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
|
||||
static const arg_def_t scale_factors_arg =
|
||||
@@ -77,8 +65,6 @@ static const arg_def_t lag_in_frame_arg =
|
||||
"generating any outputs");
|
||||
static const arg_def_t rc_end_usage_arg =
|
||||
ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
|
||||
static const arg_def_t speed_arg =
|
||||
ARG_DEF("sp", "speed", 1, "speed configuration");
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
static const struct arg_enum_list bitdepth_enum[] = {
|
||||
@@ -99,16 +85,10 @@ static const arg_def_t *svc_args[] = {
|
||||
&timebase_arg, &bitrate_arg, &skip_frames_arg, &spatial_layers_arg,
|
||||
&kf_dist_arg, &scale_factors_arg, &passes_arg, &pass_arg,
|
||||
&fpf_name_arg, &min_q_arg, &max_q_arg, &min_bitrate_arg,
|
||||
&max_bitrate_arg, &temporal_layers_arg, &temporal_layering_mode_arg,
|
||||
&lag_in_frame_arg, &threads_arg,
|
||||
#if OUTPUT_RC_STATS
|
||||
&output_rc_stats_arg,
|
||||
#endif
|
||||
|
||||
&max_bitrate_arg, &temporal_layers_arg, &lag_in_frame_arg,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
&bitdepth_arg,
|
||||
#endif
|
||||
&speed_arg,
|
||||
&rc_end_usage_arg, NULL
|
||||
};
|
||||
|
||||
@@ -122,10 +102,6 @@ static const uint32_t default_bitrate = 1000;
|
||||
static const uint32_t default_spatial_layers = 5;
|
||||
static const uint32_t default_temporal_layers = 1;
|
||||
static const uint32_t default_kf_dist = 100;
|
||||
static const uint32_t default_temporal_layering_mode = 0;
|
||||
static const uint32_t default_output_rc_stats = 0;
|
||||
static const int32_t default_speed = -1; // -1 means use library default.
|
||||
static const uint32_t default_threads = 0; // zero means use library default.
|
||||
|
||||
typedef struct {
|
||||
const char *input_filename;
|
||||
@@ -140,7 +116,7 @@ typedef struct {
|
||||
|
||||
static const char *exec_name;
|
||||
|
||||
void usage_exit(void) {
|
||||
void usage_exit() {
|
||||
fprintf(stderr, "Usage: %s <options> input_filename output_filename\n",
|
||||
exec_name);
|
||||
fprintf(stderr, "Options:\n");
|
||||
@@ -167,12 +143,6 @@ static void parse_command_line(int argc, const char **argv_,
|
||||
svc_ctx->log_level = SVC_LOG_DEBUG;
|
||||
svc_ctx->spatial_layers = default_spatial_layers;
|
||||
svc_ctx->temporal_layers = default_temporal_layers;
|
||||
svc_ctx->temporal_layering_mode = default_temporal_layering_mode;
|
||||
#if OUTPUT_RC_STATS
|
||||
svc_ctx->output_rc_stat = default_output_rc_stats;
|
||||
#endif
|
||||
svc_ctx->speed = default_speed;
|
||||
svc_ctx->threads = default_threads;
|
||||
|
||||
// start with default encoder configuration
|
||||
res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
|
||||
@@ -214,20 +184,6 @@ static void parse_command_line(int argc, const char **argv_,
|
||||
svc_ctx->spatial_layers = arg_parse_uint(&arg);
|
||||
} else if (arg_match(&arg, &temporal_layers_arg, argi)) {
|
||||
svc_ctx->temporal_layers = arg_parse_uint(&arg);
|
||||
#if OUTPUT_RC_STATS
|
||||
} else if (arg_match(&arg, &output_rc_stats_arg, argi)) {
|
||||
svc_ctx->output_rc_stat = arg_parse_uint(&arg);
|
||||
#endif
|
||||
} else if (arg_match(&arg, &speed_arg, argi)) {
|
||||
svc_ctx->speed = arg_parse_uint(&arg);
|
||||
} else if (arg_match(&arg, &threads_arg, argi)) {
|
||||
svc_ctx->threads = arg_parse_uint(&arg);
|
||||
} else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
|
||||
svc_ctx->temporal_layering_mode =
|
||||
enc_cfg->temporal_layering_mode = arg_parse_int(&arg);
|
||||
if (svc_ctx->temporal_layering_mode) {
|
||||
enc_cfg->g_error_resilient = 1;
|
||||
}
|
||||
} else if (arg_match(&arg, &kf_dist_arg, argi)) {
|
||||
enc_cfg->kf_min_dist = arg_parse_uint(&arg);
|
||||
enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
|
||||
@@ -360,185 +316,6 @@ static void parse_command_line(int argc, const char **argv_,
|
||||
enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
|
||||
}
|
||||
|
||||
#if OUTPUT_RC_STATS
|
||||
// For rate control encoding stats.
|
||||
struct RateControlStats {
|
||||
// Number of input frames per layer.
|
||||
int layer_input_frames[VPX_MAX_LAYERS];
|
||||
// Total (cumulative) number of encoded frames per layer.
|
||||
int layer_tot_enc_frames[VPX_MAX_LAYERS];
|
||||
// Number of encoded non-key frames per layer.
|
||||
int layer_enc_frames[VPX_MAX_LAYERS];
|
||||
// Framerate per layer (cumulative).
|
||||
double layer_framerate[VPX_MAX_LAYERS];
|
||||
// Target average frame size per layer (per-frame-bandwidth per layer).
|
||||
double layer_pfb[VPX_MAX_LAYERS];
|
||||
// Actual average frame size per layer.
|
||||
double layer_avg_frame_size[VPX_MAX_LAYERS];
|
||||
// Average rate mismatch per layer (|target - actual| / target).
|
||||
double layer_avg_rate_mismatch[VPX_MAX_LAYERS];
|
||||
// Actual encoding bitrate per layer (cumulative).
|
||||
double layer_encoding_bitrate[VPX_MAX_LAYERS];
|
||||
// Average of the short-time encoder actual bitrate.
|
||||
// TODO(marpan): Should we add these short-time stats for each layer?
|
||||
double avg_st_encoding_bitrate;
|
||||
// Variance of the short-time encoder actual bitrate.
|
||||
double variance_st_encoding_bitrate;
|
||||
// Window (number of frames) for computing short-time encoding bitrate.
|
||||
int window_size;
|
||||
// Number of window measurements.
|
||||
int window_count;
|
||||
};
|
||||
|
||||
// Note: these rate control stats assume only 1 key frame in the
|
||||
// sequence (i.e., first frame only).
|
||||
static void set_rate_control_stats(struct RateControlStats *rc,
|
||||
vpx_codec_enc_cfg_t *cfg) {
|
||||
unsigned int sl, tl;
|
||||
// Set the layer (cumulative) framerate and the target layer (non-cumulative)
|
||||
// per-frame-bandwidth, for the rate control encoding stats below.
|
||||
const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
|
||||
|
||||
for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
|
||||
for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
|
||||
const int layer = sl * cfg->ts_number_layers + tl;
|
||||
const int tlayer0 = sl * cfg->ts_number_layers;
|
||||
rc->layer_framerate[layer] =
|
||||
framerate / cfg->ts_rate_decimator[tl];
|
||||
if (tl > 0) {
|
||||
rc->layer_pfb[layer] = 1000.0 *
|
||||
(cfg->layer_target_bitrate[layer] -
|
||||
cfg->layer_target_bitrate[layer - 1]) /
|
||||
(rc->layer_framerate[layer] -
|
||||
rc->layer_framerate[layer - 1]);
|
||||
} else {
|
||||
rc->layer_pfb[tlayer0] = 1000.0 *
|
||||
cfg->layer_target_bitrate[tlayer0] /
|
||||
rc->layer_framerate[tlayer0];
|
||||
}
|
||||
rc->layer_input_frames[layer] = 0;
|
||||
rc->layer_enc_frames[layer] = 0;
|
||||
rc->layer_tot_enc_frames[layer] = 0;
|
||||
rc->layer_encoding_bitrate[layer] = 0.0;
|
||||
rc->layer_avg_frame_size[layer] = 0.0;
|
||||
rc->layer_avg_rate_mismatch[layer] = 0.0;
|
||||
}
|
||||
}
|
||||
rc->window_count = 0;
|
||||
rc->window_size = 15;
|
||||
rc->avg_st_encoding_bitrate = 0.0;
|
||||
rc->variance_st_encoding_bitrate = 0.0;
|
||||
}
|
||||
|
||||
static void printout_rate_control_summary(struct RateControlStats *rc,
|
||||
vpx_codec_enc_cfg_t *cfg,
|
||||
int frame_cnt) {
|
||||
unsigned int sl, tl;
|
||||
int tot_num_frames = 0;
|
||||
double perc_fluctuation = 0.0;
|
||||
printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
|
||||
printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
|
||||
cfg->ss_number_layers, cfg->ts_number_layers);
|
||||
for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
|
||||
for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
|
||||
const int layer = sl * cfg->ts_number_layers + tl;
|
||||
const int num_dropped = (tl > 0) ?
|
||||
(rc->layer_input_frames[layer] - rc->layer_enc_frames[layer]) :
|
||||
(rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] - 1);
|
||||
if (!sl)
|
||||
tot_num_frames += rc->layer_input_frames[layer];
|
||||
rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
|
||||
rc->layer_encoding_bitrate[layer] / tot_num_frames;
|
||||
rc->layer_avg_frame_size[layer] = rc->layer_avg_frame_size[layer] /
|
||||
rc->layer_enc_frames[layer];
|
||||
rc->layer_avg_rate_mismatch[layer] =
|
||||
100.0 * rc->layer_avg_rate_mismatch[layer] /
|
||||
rc->layer_enc_frames[layer];
|
||||
printf("For layer#: sl%d tl%d \n", sl, tl);
|
||||
printf("Bitrate (target vs actual): %d %f.0 kbps\n",
|
||||
cfg->layer_target_bitrate[layer],
|
||||
rc->layer_encoding_bitrate[layer]);
|
||||
printf("Average frame size (target vs actual): %f %f bits\n",
|
||||
rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]);
|
||||
printf("Average rate_mismatch: %f\n",
|
||||
rc->layer_avg_rate_mismatch[layer]);
|
||||
printf("Number of input frames, encoded (non-key) frames, "
|
||||
"and percent dropped frames: %d %d %f.0 \n",
|
||||
rc->layer_input_frames[layer], rc->layer_enc_frames[layer],
|
||||
100.0 * num_dropped / rc->layer_input_frames[layer]);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
|
||||
rc->variance_st_encoding_bitrate =
|
||||
rc->variance_st_encoding_bitrate / rc->window_count -
|
||||
(rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
|
||||
perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
|
||||
rc->avg_st_encoding_bitrate;
|
||||
printf("Short-time stats, for window of %d frames: \n", rc->window_size);
|
||||
printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
|
||||
rc->avg_st_encoding_bitrate,
|
||||
sqrt(rc->variance_st_encoding_bitrate),
|
||||
perc_fluctuation);
|
||||
if (frame_cnt != tot_num_frames)
|
||||
die("Error: Number of input frames not equal to output encoded frames != "
|
||||
"%d tot_num_frames = %d\n", frame_cnt, tot_num_frames);
|
||||
}
|
||||
|
||||
vpx_codec_err_t parse_superframe_index(const uint8_t *data,
|
||||
size_t data_sz,
|
||||
uint32_t sizes[8], int *count) {
|
||||
// A chunk ending with a byte matching 0xc0 is an invalid chunk unless
|
||||
// it is a super frame index. If the last byte of real video compression
|
||||
// data is 0xc0 the encoder must add a 0 byte. If we have the marker but
|
||||
// not the associated matching marker byte at the front of the index we have
|
||||
// an invalid bitstream and need to return an error.
|
||||
|
||||
uint8_t marker;
|
||||
|
||||
marker = *(data + data_sz - 1);
|
||||
*count = 0;
|
||||
|
||||
|
||||
if ((marker & 0xe0) == 0xc0) {
|
||||
const uint32_t frames = (marker & 0x7) + 1;
|
||||
const uint32_t mag = ((marker >> 3) & 0x3) + 1;
|
||||
const size_t index_sz = 2 + mag * frames;
|
||||
|
||||
// This chunk is marked as having a superframe index but doesn't have
|
||||
// enough data for it, thus it's an invalid superframe index.
|
||||
if (data_sz < index_sz)
|
||||
return VPX_CODEC_CORRUPT_FRAME;
|
||||
|
||||
{
|
||||
const uint8_t marker2 = *(data + data_sz - index_sz);
|
||||
|
||||
// This chunk is marked as having a superframe index but doesn't have
|
||||
// the matching marker byte at the front of the index therefore it's an
|
||||
// invalid chunk.
|
||||
if (marker != marker2)
|
||||
return VPX_CODEC_CORRUPT_FRAME;
|
||||
}
|
||||
|
||||
{
|
||||
// Found a valid superframe index.
|
||||
uint32_t i, j;
|
||||
const uint8_t *x = &data[data_sz - index_sz + 1];
|
||||
|
||||
for (i = 0; i < frames; ++i) {
|
||||
uint32_t this_sz = 0;
|
||||
|
||||
for (j = 0; j < mag; ++j)
|
||||
this_sz |= (*x++) << (j * 8);
|
||||
sizes[i] = this_sz;
|
||||
}
|
||||
*count = frames;
|
||||
}
|
||||
}
|
||||
return VPX_CODEC_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
int main(int argc, const char **argv) {
|
||||
AppInput app_input = {0};
|
||||
VpxVideoWriter *writer = NULL;
|
||||
@@ -555,15 +332,7 @@ int main(int argc, const char **argv) {
|
||||
FILE *infile = NULL;
|
||||
int end_of_stream = 0;
|
||||
int frames_received = 0;
|
||||
#if OUTPUT_RC_STATS
|
||||
VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL};
|
||||
struct RateControlStats rc;
|
||||
vpx_svc_layer_id_t layer_id;
|
||||
int sl, tl;
|
||||
double sum_bitrate = 0.0;
|
||||
double sum_bitrate2 = 0.0;
|
||||
double framerate = 30.0;
|
||||
#endif
|
||||
|
||||
memset(&svc_ctx, 0, sizeof(svc_ctx));
|
||||
svc_ctx.log_print = 1;
|
||||
exec_name = argv[0];
|
||||
@@ -590,13 +359,6 @@ int main(int argc, const char **argv) {
|
||||
VPX_CODEC_OK)
|
||||
die("Failed to initialize encoder\n");
|
||||
|
||||
#if OUTPUT_RC_STATS
|
||||
if (svc_ctx.output_rc_stat) {
|
||||
set_rate_control_stats(&rc, &enc_cfg);
|
||||
framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num;
|
||||
}
|
||||
#endif
|
||||
|
||||
info.codec_fourcc = VP9_FOURCC;
|
||||
info.time_base.numerator = enc_cfg.g_timebase.num;
|
||||
info.time_base.denominator = enc_cfg.g_timebase.den;
|
||||
@@ -608,31 +370,11 @@ int main(int argc, const char **argv) {
|
||||
if (!writer)
|
||||
die("Failed to open %s for writing\n", app_input.output_filename);
|
||||
}
|
||||
#if OUTPUT_RC_STATS
|
||||
// For now, just write temporal layer streams.
|
||||
// TODO(wonkap): do spatial by re-writing superframe.
|
||||
if (svc_ctx.output_rc_stat) {
|
||||
for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
|
||||
char file_name[PATH_MAX];
|
||||
|
||||
snprintf(file_name, sizeof(file_name), "%s_t%d.ivf",
|
||||
app_input.output_filename, tl);
|
||||
outfile[tl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
|
||||
if (!outfile[tl])
|
||||
die("Failed to open %s for writing", file_name);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// skip initial frames
|
||||
for (i = 0; i < app_input.frames_to_skip; ++i)
|
||||
vpx_img_read(&raw, infile);
|
||||
|
||||
if (svc_ctx.speed != -1)
|
||||
vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
|
||||
if (svc_ctx.threads)
|
||||
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
|
||||
|
||||
// Encode frames
|
||||
while (!end_of_stream) {
|
||||
vpx_codec_iter_t iter = NULL;
|
||||
@@ -644,9 +386,7 @@ int main(int argc, const char **argv) {
|
||||
}
|
||||
|
||||
res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
|
||||
pts, frame_duration, svc_ctx.speed >= 5 ?
|
||||
VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY);
|
||||
|
||||
pts, frame_duration, VPX_DL_GOOD_QUALITY);
|
||||
printf("%s", vpx_svc_get_message(&svc_ctx));
|
||||
if (res != VPX_CODEC_OK) {
|
||||
die_codec(&codec, "Failed to encode frame");
|
||||
@@ -655,90 +395,11 @@ int main(int argc, const char **argv) {
|
||||
while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) {
|
||||
switch (cx_pkt->kind) {
|
||||
case VPX_CODEC_CX_FRAME_PKT: {
|
||||
if (cx_pkt->data.frame.sz > 0) {
|
||||
#if OUTPUT_RC_STATS
|
||||
uint32_t sizes[8];
|
||||
int count = 0;
|
||||
#endif
|
||||
if (cx_pkt->data.frame.sz > 0)
|
||||
vpx_video_writer_write_frame(writer,
|
||||
cx_pkt->data.frame.buf,
|
||||
cx_pkt->data.frame.sz,
|
||||
cx_pkt->data.frame.pts);
|
||||
#if OUTPUT_RC_STATS
|
||||
// TODO(marpan/wonkap): Put this (to line728) in separate function.
|
||||
if (svc_ctx.output_rc_stat) {
|
||||
vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
|
||||
parse_superframe_index(cx_pkt->data.frame.buf,
|
||||
cx_pkt->data.frame.sz, sizes, &count);
|
||||
for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
|
||||
++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
|
||||
layer_id.temporal_layer_id];
|
||||
}
|
||||
for (tl = layer_id.temporal_layer_id;
|
||||
tl < enc_cfg.ts_number_layers; ++tl) {
|
||||
vpx_video_writer_write_frame(outfile[tl],
|
||||
cx_pkt->data.frame.buf,
|
||||
cx_pkt->data.frame.sz,
|
||||
cx_pkt->data.frame.pts);
|
||||
}
|
||||
|
||||
for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
|
||||
for (tl = layer_id.temporal_layer_id;
|
||||
tl < enc_cfg.ts_number_layers; ++tl) {
|
||||
const int layer = sl * enc_cfg.ts_number_layers + tl;
|
||||
++rc.layer_tot_enc_frames[layer];
|
||||
rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
|
||||
// Keep count of rate control stats per layer, for non-key
|
||||
// frames.
|
||||
if (tl == layer_id.temporal_layer_id &&
|
||||
!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
|
||||
rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
|
||||
rc.layer_avg_rate_mismatch[layer] +=
|
||||
fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /
|
||||
rc.layer_pfb[layer];
|
||||
++rc.layer_enc_frames[layer];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update for short-time encoding bitrate states, for moving
|
||||
// window of size rc->window, shifted by rc->window / 2.
|
||||
// Ignore first window segment, due to key frame.
|
||||
if (frame_cnt > rc.window_size) {
|
||||
tl = layer_id.temporal_layer_id;
|
||||
for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
|
||||
sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
|
||||
}
|
||||
if (frame_cnt % rc.window_size == 0) {
|
||||
rc.window_count += 1;
|
||||
rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
|
||||
rc.variance_st_encoding_bitrate +=
|
||||
(sum_bitrate / rc.window_size) *
|
||||
(sum_bitrate / rc.window_size);
|
||||
sum_bitrate = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
// Second shifted window.
|
||||
if (frame_cnt > rc.window_size + rc.window_size / 2) {
|
||||
tl = layer_id.temporal_layer_id;
|
||||
for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
|
||||
sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
|
||||
}
|
||||
|
||||
if (frame_cnt > 2 * rc.window_size &&
|
||||
frame_cnt % rc.window_size == 0) {
|
||||
rc.window_count += 1;
|
||||
rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
|
||||
rc.variance_st_encoding_bitrate +=
|
||||
(sum_bitrate2 / rc.window_size) *
|
||||
(sum_bitrate2 / rc.window_size);
|
||||
sum_bitrate2 = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
|
||||
!!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
|
||||
@@ -763,30 +424,25 @@ int main(int argc, const char **argv) {
|
||||
pts += frame_duration;
|
||||
}
|
||||
}
|
||||
|
||||
printf("Processed %d frames\n", frame_cnt);
|
||||
|
||||
fclose(infile);
|
||||
#if OUTPUT_RC_STATS
|
||||
if (svc_ctx.output_rc_stat) {
|
||||
printout_rate_control_summary(&rc, &enc_cfg, frame_cnt);
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
|
||||
|
||||
if (app_input.passes == 2)
|
||||
stats_close(&app_input.rc_stats, 1);
|
||||
|
||||
if (writer) {
|
||||
vpx_video_writer_close(writer);
|
||||
}
|
||||
#if OUTPUT_RC_STATS
|
||||
if (svc_ctx.output_rc_stat) {
|
||||
for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
|
||||
vpx_video_writer_close(outfile[tl]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
vpx_img_free(&raw);
|
||||
|
||||
// display average size, psnr
|
||||
printf("%s", vpx_svc_dump_statistics(&svc_ctx));
|
||||
|
||||
vpx_svc_release(&svc_ctx);
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
@@ -28,7 +28,7 @@
|
||||
|
||||
static const char *exec_name;
|
||||
|
||||
void usage_exit(void) {
|
||||
void usage_exit() {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
@@ -70,7 +70,6 @@ struct RateControlMetrics {
|
||||
int window_size;
|
||||
// Number of window measurements.
|
||||
int window_count;
|
||||
int layer_target_bitrate[VPX_MAX_LAYERS];
|
||||
};
|
||||
|
||||
// Note: these rate control metrics assume only 1 key frame in the
|
||||
@@ -86,13 +85,13 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc,
|
||||
// per-frame-bandwidth, for the rate control encoding stats below.
|
||||
const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
|
||||
rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0];
|
||||
rc->layer_pfb[0] = 1000.0 * rc->layer_target_bitrate[0] /
|
||||
rc->layer_pfb[0] = 1000.0 * cfg->ts_target_bitrate[0] /
|
||||
rc->layer_framerate[0];
|
||||
for (i = 0; i < cfg->ts_number_layers; ++i) {
|
||||
if (i > 0) {
|
||||
rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
|
||||
rc->layer_pfb[i] = 1000.0 *
|
||||
(rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
|
||||
(cfg->ts_target_bitrate[i] - cfg->ts_target_bitrate[i - 1]) /
|
||||
(rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
|
||||
}
|
||||
rc->layer_input_frames[i] = 0;
|
||||
@@ -129,7 +128,7 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
|
||||
rc->layer_avg_rate_mismatch[i] = 100.0 * rc->layer_avg_rate_mismatch[i] /
|
||||
rc->layer_enc_frames[i];
|
||||
printf("For layer#: %d \n", i);
|
||||
printf("Bitrate (target vs actual): %d %f \n", rc->layer_target_bitrate[i],
|
||||
printf("Bitrate (target vs actual): %d %f \n", cfg->ts_target_bitrate[i],
|
||||
rc->layer_encoding_bitrate[i]);
|
||||
printf("Average frame size (target vs actual): %f %f \n", rc->layer_pfb[i],
|
||||
rc->layer_avg_frame_size[i]);
|
||||
@@ -598,16 +597,13 @@ int main(int argc, char **argv) {
|
||||
for (i = min_args_base;
|
||||
(int)i < min_args_base + mode_to_num_layers[layering_mode];
|
||||
++i) {
|
||||
rc.layer_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
|
||||
if (strncmp(encoder->name, "vp8", 3) == 0)
|
||||
cfg.ts_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
|
||||
else if (strncmp(encoder->name, "vp9", 3) == 0)
|
||||
cfg.layer_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
|
||||
cfg.ts_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
|
||||
}
|
||||
|
||||
// Real time parameters.
|
||||
cfg.rc_dropframe_thresh = strtol(argv[9], NULL, 0);
|
||||
cfg.rc_end_usage = VPX_CBR;
|
||||
cfg.rc_resize_allowed = 0;
|
||||
cfg.rc_min_quantizer = 2;
|
||||
cfg.rc_max_quantizer = 56;
|
||||
if (strncmp(encoder->name, "vp9", 3) == 0)
|
||||
@@ -618,9 +614,6 @@ int main(int argc, char **argv) {
|
||||
cfg.rc_buf_optimal_sz = 600;
|
||||
cfg.rc_buf_sz = 1000;
|
||||
|
||||
// Disable dynamic resizing by default.
|
||||
cfg.rc_resize_allowed = 0;
|
||||
|
||||
// Use 1 thread as default.
|
||||
cfg.g_threads = 1;
|
||||
|
||||
@@ -632,8 +625,6 @@ int main(int argc, char **argv) {
|
||||
// Disable automatic keyframe placement.
|
||||
cfg.kf_min_dist = cfg.kf_max_dist = 3000;
|
||||
|
||||
cfg.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
|
||||
|
||||
set_temporal_layer_pattern(layering_mode,
|
||||
&cfg,
|
||||
layer_flags,
|
||||
@@ -642,8 +633,8 @@ int main(int argc, char **argv) {
|
||||
set_rate_control_metrics(&rc, &cfg);
|
||||
|
||||
// Target bandwidth for the whole stream.
|
||||
// Set to layer_target_bitrate for highest layer (total bitrate).
|
||||
cfg.rc_target_bitrate = rc.layer_target_bitrate[cfg.ts_number_layers - 1];
|
||||
// Set to ts_target_bitrate for highest layer (total bitrate).
|
||||
cfg.rc_target_bitrate = cfg.ts_target_bitrate[cfg.ts_number_layers - 1];
|
||||
|
||||
// Open input file.
|
||||
if (!(infile = fopen(argv[1], "rb"))) {
|
||||
@@ -686,22 +677,15 @@ int main(int argc, char **argv) {
|
||||
vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
|
||||
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
|
||||
} else if (strncmp(encoder->name, "vp9", 3) == 0) {
|
||||
vpx_svc_extra_cfg_t svc_params;
|
||||
vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
|
||||
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
|
||||
vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
|
||||
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
|
||||
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
|
||||
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
|
||||
if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0))
|
||||
die_codec(&codec, "Failed to set SVC");
|
||||
for (i = 0; i < cfg.ts_number_layers; ++i) {
|
||||
svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
|
||||
svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
|
||||
vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
|
||||
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
|
||||
vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
|
||||
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
|
||||
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
|
||||
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
|
||||
if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0)) {
|
||||
die_codec(&codec, "Failed to set SVC");
|
||||
}
|
||||
svc_params.scaling_factor_num[0] = cfg.g_h;
|
||||
svc_params.scaling_factor_den[0] = cfg.g_h;
|
||||
vpx_codec_control(&codec, VP9E_SET_SVC_PARAMETERS, &svc_params);
|
||||
}
|
||||
if (strncmp(encoder->name, "vp8", 3) == 0) {
|
||||
vpx_codec_control(&codec, VP8E_SET_SCREEN_CONTENT_MODE, 0);
|
||||
|
168
libs.mk
168
libs.mk
@@ -25,7 +25,7 @@ $$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2)
|
||||
@echo " [CREATE] $$@"
|
||||
$$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.pl --arch=$$(TGT_ISA) \
|
||||
--sym=$(1) \
|
||||
--config=$$(CONFIG_DIR)$$(target)-$$(TOOLCHAIN).mk \
|
||||
--config=$$(CONFIG_DIR)$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \
|
||||
$$(RTCD_OPTIONS) $$^ > $$@
|
||||
CLEAN-OBJS += $$(BUILD_PFX)$(1).h
|
||||
RTCD += $$(BUILD_PFX)$(1).h
|
||||
@@ -34,6 +34,13 @@ endef
|
||||
CODEC_SRCS-yes += CHANGELOG
|
||||
CODEC_SRCS-yes += libs.mk
|
||||
|
||||
# If this is a universal (fat) binary, then all the subarchitectures have
|
||||
# already been built and our job is to stitch them together. The
|
||||
# BUILD_LIBVPX variable indicates whether we should be building
|
||||
# (compiling, linking) the library. The LIPO_LIBVPX variable indicates
|
||||
# that we're stitching.
|
||||
$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes)
|
||||
|
||||
include $(SRC_PATH_BARE)/vpx/vpx_codec.mk
|
||||
CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS))
|
||||
CODEC_DOC_SRCS += $(addprefix vpx/,$(call enabled,API_DOC_SRCS))
|
||||
@@ -47,9 +54,6 @@ CODEC_SRCS-yes += $(addprefix vpx_scale/,$(call enabled,SCALE_SRCS))
|
||||
include $(SRC_PATH_BARE)/vpx_ports/vpx_ports.mk
|
||||
CODEC_SRCS-yes += $(addprefix vpx_ports/,$(call enabled,PORTS_SRCS))
|
||||
|
||||
include $(SRC_PATH_BARE)/vpx_dsp/vpx_dsp.mk
|
||||
CODEC_SRCS-yes += $(addprefix vpx_dsp/,$(call enabled,DSP_SRCS))
|
||||
|
||||
ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
|
||||
VP8_PREFIX=vp8/
|
||||
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
|
||||
@@ -133,18 +137,18 @@ INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Release/%)
|
||||
INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Debug/%)
|
||||
endif
|
||||
|
||||
CODEC_SRCS-yes += build/make/version.sh
|
||||
CODEC_SRCS-yes += build/make/rtcd.pl
|
||||
CODEC_SRCS-yes += vpx_ports/emmintrin_compat.h
|
||||
CODEC_SRCS-yes += vpx_ports/mem_ops.h
|
||||
CODEC_SRCS-yes += vpx_ports/mem_ops_aligned.h
|
||||
CODEC_SRCS-yes += vpx_ports/vpx_once.h
|
||||
CODEC_SRCS-yes += $(BUILD_PFX)vpx_config.c
|
||||
CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh
|
||||
CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.pl
|
||||
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emmintrin_compat.h
|
||||
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops.h
|
||||
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops_aligned.h
|
||||
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_once.h
|
||||
CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c
|
||||
INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c
|
||||
ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
|
||||
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
|
||||
endif
|
||||
CODEC_EXPORTS-yes += vpx/exports_com
|
||||
CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
|
||||
CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
|
||||
CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec
|
||||
|
||||
@@ -211,7 +215,7 @@ vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def
|
||||
$(filter-out $(addprefix %, $(ASM_INCLUDES)), $^) \
|
||||
--src-path-bare="$(SRC_PATH_BARE)" \
|
||||
|
||||
PROJECTS-yes += vpx.$(VCPROJ_SFX)
|
||||
PROJECTS-$(BUILD_LIBVPX) += vpx.$(VCPROJ_SFX)
|
||||
|
||||
vpx.$(VCPROJ_SFX): vpx_config.asm
|
||||
vpx.$(VCPROJ_SFX): $(RTCD)
|
||||
@@ -219,39 +223,31 @@ vpx.$(VCPROJ_SFX): $(RTCD)
|
||||
endif
|
||||
else
|
||||
LIBVPX_OBJS=$(call objs,$(CODEC_SRCS))
|
||||
OBJS-yes += $(LIBVPX_OBJS)
|
||||
LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
|
||||
OBJS-$(BUILD_LIBVPX) += $(LIBVPX_OBJS)
|
||||
LIBS-$(if $(BUILD_LIBVPX),$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
|
||||
$(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
|
||||
|
||||
|
||||
BUILD_LIBVPX_SO := $(if $(BUILD_LIBVPX),$(CONFIG_SHARED))
|
||||
|
||||
SO_VERSION_MAJOR := 2
|
||||
SO_VERSION_MINOR := 0
|
||||
SO_VERSION_PATCH := 0
|
||||
ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
|
||||
LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib
|
||||
SHARED_LIB_SUF := .dylib
|
||||
EXPORT_FILE := libvpx.syms
|
||||
LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
|
||||
libvpx.dylib )
|
||||
else
|
||||
ifeq ($(filter os2%,$(TGT_OS)),$(TGT_OS))
|
||||
LIBVPX_SO := libvpx$(SO_VERSION_MAJOR).dll
|
||||
SHARED_LIB_SUF := _dll.a
|
||||
EXPORT_FILE := libvpx.def
|
||||
LIBVPX_SO_SYMLINKS :=
|
||||
LIBVPX_SO_IMPLIB := libvpx_dll.a
|
||||
else
|
||||
LIBVPX_SO := libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH)
|
||||
SHARED_LIB_SUF := .so
|
||||
EXPORT_FILE := libvpx.ver
|
||||
LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
|
||||
libvpx.so libvpx.so.$(SO_VERSION_MAJOR) \
|
||||
libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR))
|
||||
endif
|
||||
endif
|
||||
|
||||
LIBS-$(CONFIG_SHARED) += $(BUILD_PFX)$(LIBVPX_SO)\
|
||||
$(notdir $(LIBVPX_SO_SYMLINKS)) \
|
||||
$(if $(LIBVPX_SO_IMPLIB), $(BUILD_PFX)$(LIBVPX_SO_IMPLIB))
|
||||
LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO)\
|
||||
$(notdir $(LIBVPX_SO_SYMLINKS))
|
||||
$(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) $(EXPORT_FILE)
|
||||
$(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm
|
||||
$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(SO_VERSION_MAJOR)
|
||||
@@ -269,19 +265,6 @@ libvpx.syms: $(call enabled,CODEC_EXPORTS)
|
||||
$(qexec)awk '{print "_"$$2}' $^ >$@
|
||||
CLEAN-OBJS += libvpx.syms
|
||||
|
||||
libvpx.def: $(call enabled,CODEC_EXPORTS)
|
||||
@echo " [CREATE] $@"
|
||||
$(qexec)echo LIBRARY $(LIBVPX_SO:.dll=) INITINSTANCE TERMINSTANCE > $@
|
||||
$(qexec)echo "DATA MULTIPLE NONSHARED" >> $@
|
||||
$(qexec)echo "EXPORTS" >> $@
|
||||
$(qexec)awk '!/vpx_svc_*/ {print "_"$$2}' $^ >>$@
|
||||
CLEAN-OBJS += libvpx.def
|
||||
|
||||
libvpx_dll.a: $(LIBVPX_SO)
|
||||
@echo " [IMPLIB] $@"
|
||||
$(qexec)emximp -o $@ $<
|
||||
CLEAN-OBJS += libvpx_dll.a
|
||||
|
||||
define libvpx_symlink_template
|
||||
$(1): $(2)
|
||||
@echo " [LN] $(2) $$@"
|
||||
@@ -297,12 +280,11 @@ $(eval $(call libvpx_symlink_template,\
|
||||
$(LIBVPX_SO)))
|
||||
|
||||
|
||||
INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBVPX_SO_SYMLINKS)
|
||||
INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBSUBDIR)/$(LIBVPX_SO)
|
||||
INSTALL-LIBS-$(CONFIG_SHARED) += $(if $(LIBVPX_SO_IMPLIB),$(LIBSUBDIR)/$(LIBVPX_SO_IMPLIB))
|
||||
INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBVPX_SO_SYMLINKS)
|
||||
INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBSUBDIR)/$(LIBVPX_SO)
|
||||
|
||||
|
||||
LIBS-yes += vpx.pc
|
||||
LIBS-$(BUILD_LIBVPX) += vpx.pc
|
||||
vpx.pc: config.mk libs.mk
|
||||
@echo " [CREATE] $@"
|
||||
$(qexec)echo '# pkg-config file from libvpx $(VERSION_STRING)' > $@
|
||||
@@ -328,6 +310,9 @@ INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc
|
||||
CLEAN-OBJS += vpx.pc
|
||||
endif
|
||||
|
||||
LIBS-$(LIPO_LIBVPX) += libvpx.a
|
||||
$(eval $(if $(LIPO_LIBVPX),$(call lipo_lib_template,libvpx.a)))
|
||||
|
||||
#
|
||||
# Rule to make assembler configuration file from C configuration file
|
||||
#
|
||||
@@ -366,15 +351,11 @@ LIBVPX_TEST_DATA_PATH ?= .
|
||||
|
||||
include $(SRC_PATH_BARE)/test/test.mk
|
||||
LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS))
|
||||
LIBVPX_TEST_BIN=./test_libvpx$(EXE_SFX)
|
||||
LIBVPX_TEST_BINS=./test_libvpx$(EXE_SFX)
|
||||
LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
|
||||
$(call enabled,LIBVPX_TEST_DATA))
|
||||
libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)
|
||||
|
||||
TEST_INTRA_PRED_SPEED_BIN=./test_intra_pred_speed$(EXE_SFX)
|
||||
TEST_INTRA_PRED_SPEED_SRCS=$(addprefix test/,$(call enabled,TEST_INTRA_PRED_SPEED_SRCS))
|
||||
TEST_INTRA_PRED_SPEED_OBJS := $(sort $(call objs,$(TEST_INTRA_PRED_SPEED_SRCS)))
|
||||
|
||||
libvpx_test_srcs.txt:
|
||||
@echo " [CREATE] $@"
|
||||
@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | LC_ALL=C sort -u > $@
|
||||
@@ -438,25 +419,7 @@ test_libvpx.$(VCPROJ_SFX): $(LIBVPX_TEST_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_
|
||||
|
||||
PROJECTS-$(CONFIG_MSVS) += test_libvpx.$(VCPROJ_SFX)
|
||||
|
||||
LIBVPX_TEST_BIN := $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BIN)))
|
||||
|
||||
ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),)
|
||||
PROJECTS-$(CONFIG_MSVS) += test_intra_pred_speed.$(VCPROJ_SFX)
|
||||
test_intra_pred_speed.$(VCPROJ_SFX): $(TEST_INTRA_PRED_SPEED_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_SFX)
|
||||
@echo " [CREATE] $@"
|
||||
$(qexec)$(GEN_VCPROJ) \
|
||||
--exe \
|
||||
--target=$(TOOLCHAIN) \
|
||||
--name=test_intra_pred_speed \
|
||||
-D_VARIADIC_MAX=10 \
|
||||
--proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \
|
||||
--ver=$(CONFIG_VS_VERSION) \
|
||||
--src-path-bare="$(SRC_PATH_BARE)" \
|
||||
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
|
||||
--out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \
|
||||
-I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
|
||||
-L. -l$(CODEC_LIB) -l$(GTEST_LIB) $^
|
||||
endif # TEST_INTRA_PRED_SPEED
|
||||
LIBVPX_TEST_BINS := $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BINS)))
|
||||
endif
|
||||
else
|
||||
|
||||
@@ -467,54 +430,45 @@ ifeq ($(filter win%,$(TGT_OS)),$(TGT_OS))
|
||||
# Disabling pthreads globally will cause issues on darwin and possibly elsewhere
|
||||
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -DGTEST_HAS_PTHREAD=0
|
||||
endif
|
||||
GTEST_INCLUDES := -I$(SRC_PATH_BARE)/third_party/googletest/src
|
||||
GTEST_INCLUDES += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
|
||||
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
|
||||
OBJS-yes += $(GTEST_OBJS)
|
||||
LIBS-yes += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
|
||||
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
|
||||
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
|
||||
OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS)
|
||||
LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
|
||||
$(BUILD_PFX)libgtest_g.a: $(GTEST_OBJS)
|
||||
|
||||
LIBVPX_TEST_OBJS=$(sort $(call objs,$(LIBVPX_TEST_SRCS)))
|
||||
$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
|
||||
OBJS-yes += $(LIBVPX_TEST_OBJS)
|
||||
BINS-yes += $(LIBVPX_TEST_BIN)
|
||||
$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
|
||||
$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
|
||||
OBJS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_OBJS)
|
||||
BINS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_BINS)
|
||||
|
||||
CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
|
||||
CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a)
|
||||
TEST_LIBS := lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a
|
||||
$(LIBVPX_TEST_BIN): $(TEST_LIBS)
|
||||
$(eval $(call linkerxx_template,$(LIBVPX_TEST_BIN), \
|
||||
$(LIBVPX_TEST_OBJS) \
|
||||
-L. -lvpx -lgtest $(extralibs) -lm))
|
||||
CODEC_LIB_SUF=$(if $(CONFIG_SHARED),.so,.a)
|
||||
$(foreach bin,$(LIBVPX_TEST_BINS),\
|
||||
$(if $(BUILD_LIBVPX),$(eval $(bin): \
|
||||
lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\
|
||||
$(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\
|
||||
$(LIBVPX_TEST_OBJS) \
|
||||
-L. -lvpx -lgtest $(extralibs) -lm)\
|
||||
)))\
|
||||
$(if $(LIPO_LIBS),$(eval $(call lipo_bin_template,$(bin))))\
|
||||
|
||||
ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),)
|
||||
$(TEST_INTRA_PRED_SPEED_OBJS) $(TEST_INTRA_PRED_SPEED_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
|
||||
OBJS-yes += $(TEST_INTRA_PRED_SPEED_OBJS)
|
||||
BINS-yes += $(TEST_INTRA_PRED_SPEED_BIN)
|
||||
|
||||
$(TEST_INTRA_PRED_SPEED_BIN): $(TEST_LIBS)
|
||||
$(eval $(call linkerxx_template,$(TEST_INTRA_PRED_SPEED_BIN), \
|
||||
$(TEST_INTRA_PRED_SPEED_OBJS) \
|
||||
-L. -lvpx -lgtest $(extralibs) -lm))
|
||||
endif # TEST_INTRA_PRED_SPEED
|
||||
|
||||
endif # CONFIG_UNIT_TESTS
|
||||
endif
|
||||
|
||||
# Install test sources only if codec source is included
|
||||
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(patsubst $(SRC_PATH_BARE)/%,%,\
|
||||
$(shell find $(SRC_PATH_BARE)/third_party/googletest -type f))
|
||||
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(LIBVPX_TEST_SRCS)
|
||||
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(TEST_INTRA_PRED_SPEED_SRCS)
|
||||
|
||||
define test_shard_template
|
||||
test:: test_shard.$(1)
|
||||
test-no-data-check:: test_shard_ndc.$(1)
|
||||
test_shard.$(1) test_shard_ndc.$(1): $(LIBVPX_TEST_BIN)
|
||||
test_shard.$(1): $(LIBVPX_TEST_BINS) testdata
|
||||
@set -e; \
|
||||
export GTEST_SHARD_INDEX=$(1); \
|
||||
export GTEST_TOTAL_SHARDS=$(2); \
|
||||
$(LIBVPX_TEST_BIN)
|
||||
test_shard.$(1): testdata
|
||||
for t in $(LIBVPX_TEST_BINS); do \
|
||||
export GTEST_SHARD_INDEX=$(1); \
|
||||
export GTEST_TOTAL_SHARDS=$(2); \
|
||||
$$$$t; \
|
||||
done
|
||||
.PHONY: test_shard.$(1)
|
||||
endef
|
||||
|
||||
@@ -559,16 +513,15 @@ ifeq ($(CONFIG_MSVS),yes)
|
||||
# TODO(tomfinegan): Support running the debug versions of tools?
|
||||
TEST_BIN_PATH := $(addsuffix /$(TGT_OS:win64=x64)/Release, $(TEST_BIN_PATH))
|
||||
endif
|
||||
utiltest utiltest-no-data-check:
|
||||
utiltest: testdata
|
||||
$(qexec)$(SRC_PATH_BARE)/test/vpxdec.sh \
|
||||
--test-data-path $(LIBVPX_TEST_DATA_PATH) \
|
||||
--bin-path $(TEST_BIN_PATH)
|
||||
$(qexec)$(SRC_PATH_BARE)/test/vpxenc.sh \
|
||||
--test-data-path $(LIBVPX_TEST_DATA_PATH) \
|
||||
--bin-path $(TEST_BIN_PATH)
|
||||
utiltest: testdata
|
||||
else
|
||||
utiltest utiltest-no-data-check:
|
||||
utiltest:
|
||||
@echo Unit tests must be enabled to make the utiltest target.
|
||||
endif
|
||||
|
||||
@@ -586,12 +539,11 @@ ifeq ($(CONFIG_MSVS),yes)
|
||||
# TODO(tomfinegan): Support running the debug versions of tools?
|
||||
EXAMPLES_BIN_PATH := $(TGT_OS:win64=x64)/Release
|
||||
endif
|
||||
exampletest exampletest-no-data-check: examples
|
||||
exampletest: examples testdata
|
||||
$(qexec)$(SRC_PATH_BARE)/test/examples.sh \
|
||||
--test-data-path $(LIBVPX_TEST_DATA_PATH) \
|
||||
--bin-path $(EXAMPLES_BIN_PATH)
|
||||
exampletest: testdata
|
||||
else
|
||||
exampletest exampletest-no-data-check:
|
||||
exampletest:
|
||||
@echo Unit tests must be enabled to make the exampletest target.
|
||||
endif
|
||||
|
@@ -24,7 +24,7 @@
|
||||
|
||||
#include "md5_utils.h"
|
||||
|
||||
static void
|
||||
void
|
||||
byteSwap(UWORD32 *buf, unsigned words) {
|
||||
md5byte *p;
|
||||
|
||||
|
@@ -88,9 +88,6 @@ void update_rate_histogram(struct rate_hist *hist,
|
||||
if (now < cfg->rc_buf_initial_sz)
|
||||
return;
|
||||
|
||||
if (!cfg->rc_target_bitrate)
|
||||
return;
|
||||
|
||||
then = now;
|
||||
|
||||
/* Sum the size over the past rc_buf_sz ms */
|
||||
|
@@ -51,6 +51,4 @@ include $(LOCAL_PATH)/test/test.mk
|
||||
LOCAL_C_INCLUDES := $(BINDINGS_DIR)
|
||||
FILTERED_SRC := $(sort $(filter %.cc %.c, $(LIBVPX_TEST_SRCS-yes)))
|
||||
LOCAL_SRC_FILES := $(addprefix ./test/, $(FILTERED_SRC))
|
||||
# some test files depend on *_rtcd.h, ensure they're generated first.
|
||||
$(eval $(call rtcd_dep_template))
|
||||
include $(BUILD_EXECUTABLE)
|
||||
|
@@ -1818,9 +1818,9 @@ INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
|
||||
#if HAVE_MSA
|
||||
const ConvolveFunctions convolve8_msa(
|
||||
vp9_convolve_copy_msa, vp9_convolve_avg_msa,
|
||||
vp9_convolve8_horiz_msa, vp9_convolve8_avg_horiz_msa,
|
||||
vp9_convolve8_vert_msa, vp9_convolve8_avg_vert_msa,
|
||||
vp9_convolve8_msa, vp9_convolve8_avg_msa, 0);
|
||||
vp9_convolve8_horiz_msa, vp9_convolve8_avg_horiz_c,
|
||||
vp9_convolve8_vert_msa, vp9_convolve8_avg_vert_c,
|
||||
vp9_convolve8_msa, vp9_convolve8_avg_c, 0);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values(
|
||||
make_tuple(4, 4, &convolve8_msa),
|
||||
|
@@ -14,7 +14,6 @@
|
||||
#include "test/i420_video_source.h"
|
||||
#include "test/util.h"
|
||||
#include "test/y4m_video_source.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -372,7 +371,9 @@ class DatarateTestVP9Large : public ::libvpx_test::EncoderTest,
|
||||
encoder->Control(VP9E_SET_SVC, 1);
|
||||
}
|
||||
vpx_svc_layer_id_t layer_id;
|
||||
#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
|
||||
layer_id.spatial_layer_id = 0;
|
||||
#endif
|
||||
frame_flags_ = SetFrameFlags(video->frame(), cfg_.ts_number_layers);
|
||||
layer_id.temporal_layer_id = SetLayerId(video->frame(),
|
||||
cfg_.ts_number_layers);
|
||||
@@ -564,8 +565,6 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting2TemporalLayers) {
|
||||
cfg_.ts_rate_decimator[0] = 2;
|
||||
cfg_.ts_rate_decimator[1] = 1;
|
||||
|
||||
cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
|
||||
|
||||
if (deadline_ == VPX_DL_REALTIME)
|
||||
cfg_.g_error_resilient = 1;
|
||||
|
||||
@@ -575,14 +574,14 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting2TemporalLayers) {
|
||||
cfg_.rc_target_bitrate = i;
|
||||
ResetModel();
|
||||
// 60-40 bitrate allocation for 2 temporal layers.
|
||||
cfg_.layer_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
|
||||
cfg_.layer_target_bitrate[1] = cfg_.rc_target_bitrate;
|
||||
cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
|
||||
cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
|
||||
ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85)
|
||||
ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85)
|
||||
<< " The datarate for the file is lower than target by too much, "
|
||||
"for layer: " << j;
|
||||
ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15)
|
||||
ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15)
|
||||
<< " The datarate for the file is greater than target by too much, "
|
||||
"for layer: " << j;
|
||||
}
|
||||
@@ -607,27 +606,25 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayers) {
|
||||
cfg_.ts_rate_decimator[1] = 2;
|
||||
cfg_.ts_rate_decimator[2] = 1;
|
||||
|
||||
cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
|
||||
|
||||
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
|
||||
30, 1, 0, 200);
|
||||
for (int i = 200; i <= 800; i += 200) {
|
||||
cfg_.rc_target_bitrate = i;
|
||||
ResetModel();
|
||||
// 40-20-40 bitrate allocation for 3 temporal layers.
|
||||
cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
|
||||
cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
|
||||
cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate;
|
||||
cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
|
||||
cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
|
||||
cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
|
||||
// TODO(yaowu): Work out more stable rc control strategy and
|
||||
// Adjust the thresholds to be tighter than .75.
|
||||
ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.75)
|
||||
ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.75)
|
||||
<< " The datarate for the file is lower than target by too much, "
|
||||
"for layer: " << j;
|
||||
// TODO(yaowu): Work out more stable rc control strategy and
|
||||
// Adjust the thresholds to be tighter than 1.25.
|
||||
ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.25)
|
||||
ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.25)
|
||||
<< " The datarate for the file is greater than target by too much, "
|
||||
"for layer: " << j;
|
||||
}
|
||||
@@ -655,22 +652,20 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) {
|
||||
cfg_.ts_rate_decimator[1] = 2;
|
||||
cfg_.ts_rate_decimator[2] = 1;
|
||||
|
||||
cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
|
||||
|
||||
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
|
||||
30, 1, 0, 200);
|
||||
cfg_.rc_target_bitrate = 200;
|
||||
ResetModel();
|
||||
// 40-20-40 bitrate allocation for 3 temporal layers.
|
||||
cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
|
||||
cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
|
||||
cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate;
|
||||
cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
|
||||
cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
|
||||
cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
|
||||
ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85)
|
||||
ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85)
|
||||
<< " The datarate for the file is lower than target by too much, "
|
||||
"for layer: " << j;
|
||||
ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15)
|
||||
ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15)
|
||||
<< " The datarate for the file is greater than target by too much, "
|
||||
"for layer: " << j;
|
||||
// Expect some frame drops in this test: for this 200 frames test,
|
||||
@@ -742,178 +737,9 @@ TEST_P(DatarateTestVP9Large, DenoiserOffOn) {
|
||||
}
|
||||
#endif // CONFIG_VP9_TEMPORAL_DENOISING
|
||||
|
||||
class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest,
|
||||
public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
|
||||
public:
|
||||
DatarateOnePassCbrSvc() : EncoderTest(GET_PARAM(0)) {}
|
||||
virtual ~DatarateOnePassCbrSvc() {}
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
InitializeConfig();
|
||||
SetMode(GET_PARAM(1));
|
||||
speed_setting_ = GET_PARAM(2);
|
||||
ResetModel();
|
||||
}
|
||||
virtual void ResetModel() {
|
||||
last_pts_ = 0;
|
||||
bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
|
||||
frame_number_ = 0;
|
||||
first_drop_ = 0;
|
||||
bits_total_ = 0;
|
||||
duration_ = 0.0;
|
||||
}
|
||||
virtual void BeginPassHook(unsigned int /*pass*/) {
|
||||
}
|
||||
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
|
||||
::libvpx_test::Encoder *encoder) {
|
||||
if (video->frame() == 0) {
|
||||
int i;
|
||||
for (i = 0; i < 2; ++i) {
|
||||
svc_params_.max_quantizers[i] = 63;
|
||||
svc_params_.min_quantizers[i] = 0;
|
||||
}
|
||||
svc_params_.scaling_factor_num[0] = 144;
|
||||
svc_params_.scaling_factor_den[0] = 288;
|
||||
svc_params_.scaling_factor_num[1] = 288;
|
||||
svc_params_.scaling_factor_den[1] = 288;
|
||||
encoder->Control(VP9E_SET_SVC, 1);
|
||||
encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_);
|
||||
encoder->Control(VP8E_SET_CPUUSED, speed_setting_);
|
||||
encoder->Control(VP9E_SET_TILE_COLUMNS, 0);
|
||||
encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 300);
|
||||
}
|
||||
const vpx_rational_t tb = video->timebase();
|
||||
timebase_ = static_cast<double>(tb.num) / tb.den;
|
||||
duration_ = 0;
|
||||
}
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
|
||||
if (last_pts_ == 0)
|
||||
duration = 1;
|
||||
bits_in_buffer_model_ += static_cast<int64_t>(
|
||||
duration * timebase_ * cfg_.rc_target_bitrate * 1000);
|
||||
const bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY)
|
||||
? true: false;
|
||||
if (!key_frame) {
|
||||
ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
|
||||
<< pkt->data.frame.pts;
|
||||
}
|
||||
const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
|
||||
bits_in_buffer_model_ -= frame_size_in_bits;
|
||||
bits_total_ += frame_size_in_bits;
|
||||
if (!first_drop_ && duration > 1)
|
||||
first_drop_ = last_pts_ + 1;
|
||||
last_pts_ = pkt->data.frame.pts;
|
||||
bits_in_last_frame_ = frame_size_in_bits;
|
||||
++frame_number_;
|
||||
}
|
||||
virtual void EndPassHook(void) {
|
||||
if (bits_total_) {
|
||||
const double file_size_in_kb = bits_total_ / 1000.; // bits per kilobit
|
||||
duration_ = (last_pts_ + 1) * timebase_;
|
||||
effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0
|
||||
/ (cfg_.rc_buf_initial_sz / 1000.0 + duration_);
|
||||
file_datarate_ = file_size_in_kb / duration_;
|
||||
}
|
||||
}
|
||||
vpx_codec_pts_t last_pts_;
|
||||
int64_t bits_in_buffer_model_;
|
||||
double timebase_;
|
||||
int frame_number_;
|
||||
vpx_codec_pts_t first_drop_;
|
||||
int64_t bits_total_;
|
||||
double duration_;
|
||||
double file_datarate_;
|
||||
double effective_datarate_;
|
||||
size_t bits_in_last_frame_;
|
||||
vpx_svc_extra_cfg_t svc_params_;
|
||||
int speed_setting_;
|
||||
};
|
||||
static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
|
||||
const vpx_svc_extra_cfg_t *svc_params,
|
||||
int spatial_layers,
|
||||
int temporal_layers,
|
||||
int temporal_layering_mode,
|
||||
unsigned int total_rate) {
|
||||
int sl, spatial_layer_target;
|
||||
float total = 0;
|
||||
float alloc_ratio[VPX_MAX_LAYERS] = {0};
|
||||
for (sl = 0; sl < spatial_layers; ++sl) {
|
||||
if (svc_params->scaling_factor_den[sl] > 0) {
|
||||
alloc_ratio[sl] = (float)(svc_params->scaling_factor_num[sl] *
|
||||
1.0 / svc_params->scaling_factor_den[sl]);
|
||||
total += alloc_ratio[sl];
|
||||
}
|
||||
}
|
||||
for (sl = 0; sl < spatial_layers; ++sl) {
|
||||
enc_cfg->ss_target_bitrate[sl] = spatial_layer_target =
|
||||
(unsigned int)(enc_cfg->rc_target_bitrate *
|
||||
alloc_ratio[sl] / total);
|
||||
const int index = sl * temporal_layers;
|
||||
if (temporal_layering_mode == 3) {
|
||||
enc_cfg->layer_target_bitrate[index] =
|
||||
spatial_layer_target >> 1;
|
||||
enc_cfg->layer_target_bitrate[index + 1] =
|
||||
(spatial_layer_target >> 1) + (spatial_layer_target >> 2);
|
||||
enc_cfg->layer_target_bitrate[index + 2] =
|
||||
spatial_layer_target;
|
||||
} else if (temporal_layering_mode == 2) {
|
||||
enc_cfg->layer_target_bitrate[index] =
|
||||
spatial_layer_target * 2 / 3;
|
||||
enc_cfg->layer_target_bitrate[index + 1] =
|
||||
spatial_layer_target;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and
|
||||
// 3 temporal layers.
|
||||
TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc) {
|
||||
cfg_.rc_buf_initial_sz = 500;
|
||||
cfg_.rc_buf_optimal_sz = 500;
|
||||
cfg_.rc_buf_sz = 1000;
|
||||
cfg_.rc_min_quantizer = 0;
|
||||
cfg_.rc_max_quantizer = 63;
|
||||
cfg_.rc_end_usage = VPX_CBR;
|
||||
cfg_.g_lag_in_frames = 0;
|
||||
cfg_.ss_number_layers = 2;
|
||||
cfg_.ts_number_layers = 3;
|
||||
cfg_.ts_rate_decimator[0] = 4;
|
||||
cfg_.ts_rate_decimator[1] = 2;
|
||||
cfg_.ts_rate_decimator[2] = 1;
|
||||
cfg_.g_error_resilient = 1;
|
||||
cfg_.temporal_layering_mode = 3;
|
||||
svc_params_.scaling_factor_num[0] = 144;
|
||||
svc_params_.scaling_factor_den[0] = 288;
|
||||
svc_params_.scaling_factor_num[1] = 288;
|
||||
svc_params_.scaling_factor_den[1] = 288;
|
||||
// TODO(wonkap/marpan): No frame drop for now, we need to implement correct
|
||||
// frame dropping for SVC.
|
||||
cfg_.rc_dropframe_thresh = 0;
|
||||
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
|
||||
30, 1, 0, 200);
|
||||
// TODO(wonkap/marpan): Check that effective_datarate for each layer hits the
|
||||
// layer target_bitrate. Also check if test can pass at lower bitrate (~200k).
|
||||
for (int i = 400; i <= 800; i += 200) {
|
||||
cfg_.rc_target_bitrate = i;
|
||||
ResetModel();
|
||||
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
|
||||
cfg_.ts_number_layers, cfg_.temporal_layering_mode,
|
||||
cfg_.rc_target_bitrate);
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.85)
|
||||
<< " The datarate for the file exceeds the target by too much!";
|
||||
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
|
||||
<< " The datarate for the file is lower than the target by too much!";
|
||||
}
|
||||
}
|
||||
|
||||
VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES);
|
||||
VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
|
||||
::testing::Values(::libvpx_test::kOnePassGood,
|
||||
::libvpx_test::kRealTime),
|
||||
::testing::Range(2, 7));
|
||||
VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvc,
|
||||
::testing::Values(::libvpx_test::kRealTime),
|
||||
::testing::Range(5, 8));
|
||||
} // namespace
|
||||
|
@@ -20,10 +20,8 @@
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "vp9/common/vp9_scan.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
@@ -358,13 +356,13 @@ class Trans16x16TestBase {
|
||||
int64_t total_error = 0;
|
||||
const int count_test_block = 10000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
|
||||
#endif
|
||||
|
||||
// Initialize a test block with input range [-mask_, mask_].
|
||||
@@ -418,9 +416,9 @@ class Trans16x16TestBase {
|
||||
void RunCoeffCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-mask_, mask_].
|
||||
@@ -439,13 +437,15 @@ class Trans16x16TestBase {
|
||||
void RunMemCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-mask_, mask_].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
|
||||
input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
|
||||
}
|
||||
if (i == 0) {
|
||||
@@ -472,19 +472,24 @@ class Trans16x16TestBase {
|
||||
void RunQuantCheck(int dc_thred, int ac_thred) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 100000;
|
||||
DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
|
||||
|
||||
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref16, kNumCoeffs);
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-mask_, mask_].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
if (bit_depth_ == VPX_BITS_8)
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
else
|
||||
input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
|
||||
input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
|
||||
}
|
||||
if (i == 0)
|
||||
@@ -534,13 +539,13 @@ class Trans16x16TestBase {
|
||||
void RunInvAccuracyCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
@@ -594,12 +599,12 @@ class Trans16x16TestBase {
|
||||
const int count_test_block = 10000;
|
||||
const int eob = 10;
|
||||
const int16_t *scan = vp9_default_scan_orders[TX_16X16].scan;
|
||||
DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref16, kNumCoeffs);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
@@ -928,20 +933,4 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&idct16x16_12,
|
||||
&idct16x16_256_add_12_sse2, 3167, VPX_BITS_12)));
|
||||
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
|
||||
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MSA, Trans16x16DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_fdct16x16_msa,
|
||||
&vp9_idct16x16_256_add_msa, 0, VPX_BITS_8)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MSA, Trans16x16HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 0, VPX_BITS_8),
|
||||
make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 1, VPX_BITS_8),
|
||||
make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8),
|
||||
make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3,
|
||||
VPX_BITS_8)));
|
||||
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
} // namespace
|
||||
|
@@ -23,7 +23,6 @@
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
@@ -120,13 +119,13 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
|
||||
uint32_t max_error = 0;
|
||||
int64_t total_error = 0;
|
||||
const int count_test_block = 10000;
|
||||
DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
@@ -185,9 +184,9 @@ TEST_P(Trans32x32Test, CoeffCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
|
||||
DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
@@ -213,13 +212,15 @@ TEST_P(Trans32x32Test, MemCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 2000;
|
||||
|
||||
DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-mask_, mask_].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
|
||||
input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
|
||||
}
|
||||
if (i == 0) {
|
||||
@@ -256,13 +257,13 @@ TEST_P(Trans32x32Test, MemCheck) {
|
||||
TEST_P(Trans32x32Test, InverseAccuracy) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
@@ -381,14 +382,4 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&vp9_fdct32x32_rd_avx2,
|
||||
&vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
|
||||
#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
|
||||
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MSA, Trans32x32Test,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_fdct32x32_msa,
|
||||
&vp9_idct32x32_1024_add_msa, 0, VPX_BITS_8),
|
||||
make_tuple(&vp9_fdct32x32_rd_msa,
|
||||
&vp9_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
|
||||
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
} // namespace
|
||||
|
@@ -133,10 +133,6 @@ class Encoder {
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
|
||||
}
|
||||
|
||||
void Control(int ctrl_id, struct vpx_svc_parameters *arg) {
|
||||
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
|
||||
}
|
||||
#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
|
||||
void Control(int ctrl_id, vpx_active_map_t *arg) {
|
||||
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
|
||||
|
@@ -22,7 +22,6 @@
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
@@ -103,13 +102,13 @@ class Trans4x4TestBase {
|
||||
int64_t total_error = 0;
|
||||
const int count_test_block = 10000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
|
||||
#endif
|
||||
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
@@ -143,7 +142,6 @@ class Trans4x4TestBase {
|
||||
const uint32_t diff =
|
||||
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
|
||||
#else
|
||||
ASSERT_EQ(VPX_BITS_8, bit_depth_);
|
||||
const uint32_t diff = dst[j] - src[j];
|
||||
#endif
|
||||
const uint32_t error = diff * diff;
|
||||
@@ -165,9 +163,9 @@ class Trans4x4TestBase {
|
||||
void RunCoeffCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 5000;
|
||||
DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-mask_, mask_].
|
||||
@@ -186,13 +184,15 @@ class Trans4x4TestBase {
|
||||
void RunMemCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 5000;
|
||||
DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-mask_, mask_].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
|
||||
input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
|
||||
}
|
||||
if (i == 0) {
|
||||
@@ -219,13 +219,13 @@ class Trans4x4TestBase {
|
||||
void RunInvAccuracyCheck(int limit) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
@@ -536,18 +536,4 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
|
||||
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
|
||||
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
|
||||
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MSA, Trans4x4DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_fdct4x4_msa, &vp9_idct4x4_16_add_msa, 0, VPX_BITS_8)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MSA, Trans4x4HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 0, VPX_BITS_8),
|
||||
make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 1, VPX_BITS_8),
|
||||
make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 2, VPX_BITS_8),
|
||||
make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 3, VPX_BITS_8)));
|
||||
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
} // namespace
|
||||
|
@@ -20,32 +20,11 @@
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "vp9/common/vp9_scan.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
namespace {
|
||||
|
||||
const int kNumCoeffs = 64;
|
||||
const double kPi = 3.141592653589793238462643383279502884;
|
||||
|
||||
const int kSignBiasMaxDiff255 = 1500;
|
||||
const int kSignBiasMaxDiff15 = 10000;
|
||||
|
||||
typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
|
||||
typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
|
||||
typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
|
||||
int tx_type);
|
||||
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
|
||||
int tx_type);
|
||||
|
||||
typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
|
||||
typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
|
||||
typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
|
||||
|
||||
void reference_8x8_dct_1d(const double in[8], double out[8], int stride) {
|
||||
const double kInvSqrt2 = 0.707106781186547524400844362104;
|
||||
for (int k = 0; k < 8; k++) {
|
||||
@@ -80,6 +59,23 @@ void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
|
||||
}
|
||||
}
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
namespace {
|
||||
|
||||
const int kSignBiasMaxDiff255 = 1500;
|
||||
const int kSignBiasMaxDiff15 = 10000;
|
||||
|
||||
typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
|
||||
typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
|
||||
typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
|
||||
int tx_type);
|
||||
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
|
||||
int tx_type);
|
||||
|
||||
typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
|
||||
typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
|
||||
typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
|
||||
|
||||
void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
|
||||
vp9_fdct8x8_c(in, out, stride);
|
||||
@@ -143,8 +139,8 @@ class FwdTrans8x8TestBase {
|
||||
|
||||
void RunSignBiasCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_output_block, 64);
|
||||
int count_sign_block[64][2];
|
||||
const int count_test_block = 100000;
|
||||
|
||||
@@ -214,13 +210,13 @@ class FwdTrans8x8TestBase {
|
||||
int max_error = 0;
|
||||
int total_error = 0;
|
||||
const int count_test_block = 100000;
|
||||
DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst[64]);
|
||||
DECLARE_ALIGNED(16, uint8_t, src[64]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
DECLARE_ALIGNED(16, uint16_t, dst16[64]);
|
||||
DECLARE_ALIGNED(16, uint16_t, src16[64]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, 64);
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
@@ -291,14 +287,14 @@ class FwdTrans8x8TestBase {
|
||||
int total_error = 0;
|
||||
int total_coeff_error = 0;
|
||||
const int count_test_block = 100000;
|
||||
DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst[64]);
|
||||
DECLARE_ALIGNED(16, uint8_t, src[64]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_temp_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
DECLARE_ALIGNED(16, uint16_t, dst16[64]);
|
||||
DECLARE_ALIGNED(16, uint16_t, src16[64]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, 64);
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
@@ -380,13 +376,13 @@ class FwdTrans8x8TestBase {
|
||||
void RunInvAccuracyCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
@@ -438,9 +434,9 @@ class FwdTrans8x8TestBase {
|
||||
void RunFwdAccuracyCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_r, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
double out_r[kNumCoeffs];
|
||||
@@ -468,12 +464,12 @@ void CompareInvReference(IdctFunc ref_txfm, int thresh) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 10000;
|
||||
const int eob = 12;
|
||||
DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref16, kNumCoeffs);
|
||||
#endif
|
||||
const int16_t *scan = vp9_default_scan_orders[TX_8X8].scan;
|
||||
|
||||
@@ -781,18 +777,4 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0,
|
||||
VPX_BITS_8)));
|
||||
#endif
|
||||
|
||||
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MSA, FwdTrans8x8DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_fdct8x8_msa, &vp9_idct8x8_64_add_msa, 0, VPX_BITS_8)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MSA, FwdTrans8x8HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 0, VPX_BITS_8),
|
||||
make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 1, VPX_BITS_8),
|
||||
make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8),
|
||||
make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8)));
|
||||
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
} // namespace
|
||||
|
@@ -137,20 +137,6 @@ void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
#endif // HAVE_NEON_ASM
|
||||
|
||||
#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
|
||||
void wrapper_vertical_16_msa(uint8_t *s, int p, const uint8_t *blimit,
|
||||
const uint8_t *limit, const uint8_t *thresh,
|
||||
int count) {
|
||||
vp9_lpf_vertical_16_msa(s, p, blimit, limit, thresh);
|
||||
}
|
||||
|
||||
void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
|
||||
const uint8_t *limit, const uint8_t *thresh,
|
||||
int count) {
|
||||
vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
|
||||
}
|
||||
#endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
|
||||
|
||||
class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
|
||||
public:
|
||||
virtual ~Loop8Test6Param() {}
|
||||
@@ -196,11 +182,11 @@ TEST_P(Loop8Test6Param, OperationCheck) {
|
||||
const int count_test_block = number_of_iterations;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
int32_t bd = bit_depth_;
|
||||
DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
|
||||
#else
|
||||
DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(8, uint8_t, s, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(8, uint8_t, ref_s, kNumCoeffs);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
int err_count_total = 0;
|
||||
int first_failure = -1;
|
||||
@@ -281,11 +267,11 @@ TEST_P(Loop8Test6Param, ValueCheck) {
|
||||
const int count_test_block = number_of_iterations;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
const int32_t bd = bit_depth_;
|
||||
DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
|
||||
#else
|
||||
DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(8, uint8_t, s, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(8, uint8_t, ref_s, kNumCoeffs);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
int err_count_total = 0;
|
||||
int first_failure = -1;
|
||||
@@ -352,11 +338,11 @@ TEST_P(Loop8Test9Param, OperationCheck) {
|
||||
const int count_test_block = number_of_iterations;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
const int32_t bd = bit_depth_;
|
||||
DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
|
||||
#else
|
||||
DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(8, uint8_t, s, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(8, uint8_t, ref_s, kNumCoeffs);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
int err_count_total = 0;
|
||||
int first_failure = -1;
|
||||
@@ -454,11 +440,11 @@ TEST_P(Loop8Test9Param, ValueCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = number_of_iterations;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
|
||||
#else
|
||||
DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(8, uint8_t, s, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(8, uint8_t, ref_s, kNumCoeffs);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
int err_count_total = 0;
|
||||
int first_failure = -1;
|
||||
@@ -690,27 +676,4 @@ INSTANTIATE_TEST_CASE_P(
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
#endif // HAVE_NEON
|
||||
|
||||
#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MSA, Loop8Test6Param,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_lpf_horizontal_8_msa, &vp9_lpf_horizontal_8_c, 8, 1),
|
||||
make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 1),
|
||||
make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 2),
|
||||
make_tuple(&vp9_lpf_vertical_8_msa, &vp9_lpf_vertical_8_c, 8, 1),
|
||||
make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MSA, Loop8Test9Param,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_lpf_horizontal_4_dual_msa,
|
||||
&vp9_lpf_horizontal_4_dual_c, 8),
|
||||
make_tuple(&vp9_lpf_horizontal_8_dual_msa,
|
||||
&vp9_lpf_horizontal_8_dual_c, 8),
|
||||
make_tuple(&vp9_lpf_vertical_4_dual_msa,
|
||||
&vp9_lpf_vertical_4_dual_c, 8),
|
||||
make_tuple(&vp9_lpf_vertical_8_dual_msa,
|
||||
&vp9_lpf_vertical_8_dual_c, 8)));
|
||||
#endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
|
||||
|
||||
} // namespace
|
||||
|
@@ -42,10 +42,6 @@ class MD5 {
|
||||
}
|
||||
}
|
||||
|
||||
void Add(const uint8_t *data, size_t size) {
|
||||
MD5Update(&md5_, data, static_cast<uint32_t>(size));
|
||||
}
|
||||
|
||||
const char *Get(void) {
|
||||
static const char hex[16] = {
|
||||
'0', '1', '2', '3', '4', '5', '6', '7',
|
||||
|
@@ -74,16 +74,16 @@ TEST_P(PartialIDctTest, RunQuantCheck) {
|
||||
FAIL() << "Wrong Size!";
|
||||
break;
|
||||
}
|
||||
DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block1, kMaxNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block2, kMaxNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
|
||||
|
||||
const int count_test_block = 1000;
|
||||
const int block_size = size * size;
|
||||
|
||||
DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kMaxNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kMaxNumCoeffs);
|
||||
|
||||
int max_error = 0;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
@@ -153,10 +153,10 @@ TEST_P(PartialIDctTest, ResultsMatch) {
|
||||
FAIL() << "Wrong Size!";
|
||||
break;
|
||||
}
|
||||
DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block1, kMaxNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block2, kMaxNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
|
||||
const int count_test_block = 1000;
|
||||
const int max_coeff = 32766 / 4;
|
||||
const int block_size = size * size;
|
||||
@@ -305,38 +305,4 @@ INSTANTIATE_TEST_CASE_P(
|
||||
TX_8X8, 12)));
|
||||
#endif
|
||||
|
||||
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MSA, PartialIDctTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_fdct32x32_c,
|
||||
&vp9_idct32x32_1024_add_c,
|
||||
&vp9_idct32x32_34_add_msa,
|
||||
TX_32X32, 34),
|
||||
make_tuple(&vp9_fdct32x32_c,
|
||||
&vp9_idct32x32_1024_add_c,
|
||||
&vp9_idct32x32_1_add_msa,
|
||||
TX_32X32, 1),
|
||||
make_tuple(&vp9_fdct16x16_c,
|
||||
&vp9_idct16x16_256_add_c,
|
||||
&vp9_idct16x16_10_add_msa,
|
||||
TX_16X16, 10),
|
||||
make_tuple(&vp9_fdct16x16_c,
|
||||
&vp9_idct16x16_256_add_c,
|
||||
&vp9_idct16x16_1_add_msa,
|
||||
TX_16X16, 1),
|
||||
make_tuple(&vp9_fdct8x8_c,
|
||||
&vp9_idct8x8_64_add_c,
|
||||
&vp9_idct8x8_12_add_msa,
|
||||
TX_8X8, 10),
|
||||
make_tuple(&vp9_fdct8x8_c,
|
||||
&vp9_idct8x8_64_add_c,
|
||||
&vp9_idct8x8_1_add_msa,
|
||||
TX_8X8, 1),
|
||||
make_tuple(&vp9_fdct4x4_c,
|
||||
&vp9_idct4x4_16_add_c,
|
||||
&vp9_idct4x4_1_add_msa,
|
||||
TX_4X4, 1)));
|
||||
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
|
||||
} // namespace
|
||||
|
1422
test/sad_test.cc
1422
test/sad_test.cc
File diff suppressed because it is too large
Load Diff
@@ -453,7 +453,6 @@ TEST_F(SvcTest, OnePassEncodeOneFrame) {
|
||||
|
||||
TEST_F(SvcTest, OnePassEncodeThreeFrames) {
|
||||
codec_enc_.g_pass = VPX_RC_ONE_PASS;
|
||||
codec_enc_.g_lag_in_frames = 0;
|
||||
vpx_fixed_buf outputs[3];
|
||||
memset(&outputs[0], 0, sizeof(outputs));
|
||||
Pass2EncodeNFrames(NULL, 3, 2, &outputs[0]);
|
||||
|
@@ -12,7 +12,6 @@ LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420.y4m
|
||||
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422.y4m
|
||||
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444.y4m
|
||||
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_440.yuv
|
||||
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420_a10-1.y4m
|
||||
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m
|
||||
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m
|
||||
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m
|
||||
|
1489
test/test-data.sha1
1489
test/test-data.sha1
File diff suppressed because it is too large
Load Diff
@@ -66,7 +66,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../tools_common.h
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.h
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += webm_video_source.h
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_skip_loopfilter_test.cc
|
||||
endif
|
||||
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += decode_api_test.cc
|
||||
@@ -164,9 +163,6 @@ endif # VP9
|
||||
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc
|
||||
|
||||
TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9_DECODER) := test_intra_pred_speed.cc
|
||||
TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9_DECODER) += ../md5_utils.h ../md5_utils.c
|
||||
|
||||
endif # CONFIG_SHARED
|
||||
|
||||
include $(SRC_PATH_BARE)/test/test-data.mk
|
||||
|
@@ -1,384 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
// Test and time VP9 intra-predictor functions
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/md5_helper.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx_ports/vpx_timer.h"
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
|
||||
typedef void (*VpxPredFunc)(uint8_t *dst, ptrdiff_t y_stride,
|
||||
const uint8_t *above, const uint8_t *left);
|
||||
|
||||
const int kNumVp9IntraPredFuncs = 13;
|
||||
const char *kVp9IntraPredNames[kNumVp9IntraPredFuncs] = {
|
||||
"DC_PRED", "DC_LEFT_PRED", "DC_TOP_PRED", "DC_128_PRED", "V_PRED", "H_PRED",
|
||||
"D45_PRED", "D135_PRED", "D117_PRED", "D153_PRED", "D207_PRED", "D63_PRED",
|
||||
"TM_PRED"
|
||||
};
|
||||
|
||||
void TestIntraPred(const char name[], VpxPredFunc const *pred_funcs,
|
||||
const char *const pred_func_names[], int num_funcs,
|
||||
const char *const signatures[], int block_size,
|
||||
int num_pixels_per_test) {
|
||||
libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
|
||||
const int kBPS = 32;
|
||||
const int kTotalPixels = 32 * kBPS;
|
||||
DECLARE_ALIGNED(16, uint8_t, src[kTotalPixels]);
|
||||
DECLARE_ALIGNED(16, uint8_t, ref_src[kTotalPixels]);
|
||||
DECLARE_ALIGNED(16, uint8_t, left[kBPS]);
|
||||
DECLARE_ALIGNED(16, uint8_t, above_mem[2 * kBPS + 16]);
|
||||
uint8_t *const above = above_mem + 16;
|
||||
for (int i = 0; i < kTotalPixels; ++i) ref_src[i] = rnd.Rand8();
|
||||
for (int i = 0; i < kBPS; ++i) left[i] = rnd.Rand8();
|
||||
for (int i = -1; i < kBPS; ++i) above[i] = rnd.Rand8();
|
||||
const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test);
|
||||
|
||||
// some code assumes the top row has been extended:
|
||||
// d45/d63 C-code, for instance, but not the assembly.
|
||||
// TODO(jzern): this style of extension isn't strictly necessary.
|
||||
ASSERT_LE(block_size, kBPS);
|
||||
memset(above + block_size, above[block_size - 1], 2 * kBPS - block_size);
|
||||
|
||||
for (int k = 0; k < num_funcs; ++k) {
|
||||
if (pred_funcs[k] == NULL) continue;
|
||||
memcpy(src, ref_src, sizeof(src));
|
||||
vpx_usec_timer timer;
|
||||
vpx_usec_timer_start(&timer);
|
||||
for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
|
||||
pred_funcs[k](src, kBPS, above, left);
|
||||
}
|
||||
libvpx_test::ClearSystemState();
|
||||
vpx_usec_timer_mark(&timer);
|
||||
const int elapsed_time =
|
||||
static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
|
||||
libvpx_test::MD5 md5;
|
||||
md5.Add(src, sizeof(src));
|
||||
printf("Mode %s[%12s]: %5d ms MD5: %s\n", name, pred_func_names[k],
|
||||
elapsed_time, md5.Get());
|
||||
EXPECT_STREQ(signatures[k], md5.Get());
|
||||
}
|
||||
}
|
||||
|
||||
void TestIntraPred4(VpxPredFunc const *pred_funcs) {
|
||||
static const int kNumVp9IntraFuncs = 13;
|
||||
static const char *const kSignatures[kNumVp9IntraFuncs] = {
|
||||
"4334156168b34ab599d9b5b30f522fe9",
|
||||
"bc4649d5ba47c7ff178d92e475960fb0",
|
||||
"8d316e5933326dcac24e1064794b5d12",
|
||||
"a27270fed024eafd762c95de85f4da51",
|
||||
"c33dff000d4256c2b8f3bf9e9bab14d2",
|
||||
"44d8cddc2ad8f79b8ed3306051722b4f",
|
||||
"eb54839b2bad6699d8946f01ec041cd0",
|
||||
"ecb0d56ae5f677ea45127ce9d5c058e4",
|
||||
"0b7936841f6813da818275944895b574",
|
||||
"9117972ef64f91a58ff73e1731c81db2",
|
||||
"c56d5e8c729e46825f46dd5d3b5d508a",
|
||||
"c0889e2039bcf7bcb5d2f33cdca69adc",
|
||||
"309a618577b27c648f9c5ee45252bc8f",
|
||||
};
|
||||
TestIntraPred("Intra4", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
|
||||
kSignatures, 4, 4 * 4 * kNumVp9IntraFuncs);
|
||||
}
|
||||
|
||||
void TestIntraPred8(VpxPredFunc const *pred_funcs) {
|
||||
static const int kNumVp9IntraFuncs = 13;
|
||||
static const char *const kSignatures[kNumVp9IntraFuncs] = {
|
||||
"7694ddeeefed887faf9d339d18850928",
|
||||
"7d726b1213591b99f736be6dec65065b",
|
||||
"19c5711281357a485591aaf9c96c0a67",
|
||||
"ba6b66877a089e71cd938e3b8c40caac",
|
||||
"802440c93317e0f8ba93fab02ef74265",
|
||||
"9e09a47a15deb0b9d8372824f9805080",
|
||||
"b7c2d8c662268c0c427da412d7b0311d",
|
||||
"78339c1c60bb1d67d248ab8c4da08b7f",
|
||||
"5c97d70f7d47de1882a6cd86c165c8a9",
|
||||
"8182bf60688b42205acd95e59e967157",
|
||||
"08323400005a297f16d7e57e7fe1eaac",
|
||||
"95f7bfc262329a5849eda66d8f7c68ce",
|
||||
"815b75c8e0d91cc1ae766dc5d3e445a3",
|
||||
};
|
||||
TestIntraPred("Intra8", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
|
||||
kSignatures, 8, 8 * 8 * kNumVp9IntraFuncs);
|
||||
}
|
||||
|
||||
void TestIntraPred16(VpxPredFunc const *pred_funcs) {
|
||||
static const int kNumVp9IntraFuncs = 13;
|
||||
static const char *const kSignatures[kNumVp9IntraFuncs] = {
|
||||
"b40dbb555d5d16a043dc361e6694fe53",
|
||||
"fb08118cee3b6405d64c1fd68be878c6",
|
||||
"6c190f341475c837cc38c2e566b64875",
|
||||
"db5c34ccbe2c7f595d9b08b0dc2c698c",
|
||||
"a62cbfd153a1f0b9fed13e62b8408a7a",
|
||||
"143df5b4c89335e281103f610f5052e4",
|
||||
"d87feb124107cdf2cfb147655aa0bb3c",
|
||||
"7841fae7d4d47b519322e6a03eeed9dc",
|
||||
"f6ebed3f71cbcf8d6d0516ce87e11093",
|
||||
"3cc480297dbfeed01a1c2d78dd03d0c5",
|
||||
"b9f69fa6532b372c545397dcb78ef311",
|
||||
"a8fe1c70432f09d0c20c67bdb6432c4d",
|
||||
"b8a41aa968ec108af447af4217cba91b",
|
||||
};
|
||||
TestIntraPred("Intra16", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
|
||||
kSignatures, 16, 16 * 16 * kNumVp9IntraFuncs);
|
||||
}
|
||||
|
||||
void TestIntraPred32(VpxPredFunc const *pred_funcs) {
|
||||
static const int kNumVp9IntraFuncs = 13;
|
||||
static const char *const kSignatures[kNumVp9IntraFuncs] = {
|
||||
"558541656d84f9ae7896db655826febe",
|
||||
"b3587a1f9a01495fa38c8cd3c8e2a1bf",
|
||||
"4c6501e64f25aacc55a2a16c7e8f0255",
|
||||
"b3b01379ba08916ef6b1b35f7d9ad51c",
|
||||
"0f1eb38b6cbddb3d496199ef9f329071",
|
||||
"911c06efb9ed1c3b4c104b232b55812f",
|
||||
"9225beb0ddfa7a1d24eaa1be430a6654",
|
||||
"0a6d584a44f8db9aa7ade2e2fdb9fc9e",
|
||||
"b01c9076525216925f3456f034fb6eee",
|
||||
"d267e20ad9e5cd2915d1a47254d3d149",
|
||||
"ed012a4a5da71f36c2393023184a0e59",
|
||||
"f162b51ed618d28b936974cff4391da5",
|
||||
"9e1370c6d42e08d357d9612c93a71cfc",
|
||||
};
|
||||
TestIntraPred("Intra32", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
|
||||
kSignatures, 32, 32 * 32 * kNumVp9IntraFuncs);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Defines a test case for |arch| (e.g., C, SSE2, ...) passing the predictors
|
||||
// to |test_func|. The test name is 'arch.test_func', e.g., C.TestIntraPred4.
|
||||
#define INTRA_PRED_TEST(arch, test_func, dc, dc_left, dc_top, dc_128, v, h, \
|
||||
d45, d135, d117, d153, d207, d63, tm) \
|
||||
TEST(arch, test_func) { \
|
||||
static const VpxPredFunc vp9_intra_pred[] = { \
|
||||
dc, dc_left, dc_top, dc_128, v, h, d45, \
|
||||
d135, d117, d153, d207, d63, tm}; \
|
||||
test_func(vp9_intra_pred); \
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// 4x4
|
||||
|
||||
INTRA_PRED_TEST(C, TestIntraPred4, vp9_dc_predictor_4x4_c,
|
||||
vp9_dc_left_predictor_4x4_c, vp9_dc_top_predictor_4x4_c,
|
||||
vp9_dc_128_predictor_4x4_c, vp9_v_predictor_4x4_c,
|
||||
vp9_h_predictor_4x4_c, vp9_d45_predictor_4x4_c,
|
||||
vp9_d135_predictor_4x4_c, vp9_d117_predictor_4x4_c,
|
||||
vp9_d153_predictor_4x4_c, vp9_d207_predictor_4x4_c,
|
||||
vp9_d63_predictor_4x4_c, vp9_tm_predictor_4x4_c)
|
||||
|
||||
#if HAVE_SSE
|
||||
INTRA_PRED_TEST(SSE, TestIntraPred4, vp9_dc_predictor_4x4_sse,
|
||||
vp9_dc_left_predictor_4x4_sse, vp9_dc_top_predictor_4x4_sse,
|
||||
vp9_dc_128_predictor_4x4_sse, vp9_v_predictor_4x4_sse, NULL,
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, vp9_tm_predictor_4x4_sse)
|
||||
#endif // HAVE_SSE
|
||||
|
||||
#if HAVE_SSSE3
|
||||
INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL,
|
||||
vp9_h_predictor_4x4_ssse3, vp9_d45_predictor_4x4_ssse3, NULL,
|
||||
NULL, vp9_d153_predictor_4x4_ssse3,
|
||||
vp9_d207_predictor_4x4_ssse3, vp9_d63_predictor_4x4_ssse3, NULL)
|
||||
#endif // HAVE_SSSE3
|
||||
|
||||
#if HAVE_DSPR2
|
||||
INTRA_PRED_TEST(DSPR2, TestIntraPred4, vp9_dc_predictor_4x4_dspr2, NULL, NULL,
|
||||
NULL, NULL, vp9_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, vp9_tm_predictor_4x4_dspr2)
|
||||
#endif // HAVE_DSPR2
|
||||
|
||||
#if HAVE_NEON
|
||||
INTRA_PRED_TEST(NEON, TestIntraPred4, vp9_dc_predictor_4x4_neon,
|
||||
vp9_dc_left_predictor_4x4_neon, vp9_dc_top_predictor_4x4_neon,
|
||||
vp9_dc_128_predictor_4x4_neon, vp9_v_predictor_4x4_neon,
|
||||
vp9_h_predictor_4x4_neon, vp9_d45_predictor_4x4_neon,
|
||||
vp9_d135_predictor_4x4_neon, NULL, NULL, NULL, NULL,
|
||||
vp9_tm_predictor_4x4_neon)
|
||||
#endif // HAVE_NEON
|
||||
|
||||
#if HAVE_MSA
|
||||
INTRA_PRED_TEST(MSA, TestIntraPred4, vp9_dc_predictor_4x4_msa,
|
||||
vp9_dc_left_predictor_4x4_msa, vp9_dc_top_predictor_4x4_msa,
|
||||
vp9_dc_128_predictor_4x4_msa, vp9_v_predictor_4x4_msa,
|
||||
vp9_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, vp9_tm_predictor_4x4_msa)
|
||||
#endif // HAVE_MSA
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// 8x8
|
||||
|
||||
INTRA_PRED_TEST(C, TestIntraPred8, vp9_dc_predictor_8x8_c,
|
||||
vp9_dc_left_predictor_8x8_c, vp9_dc_top_predictor_8x8_c,
|
||||
vp9_dc_128_predictor_8x8_c, vp9_v_predictor_8x8_c,
|
||||
vp9_h_predictor_8x8_c, vp9_d45_predictor_8x8_c,
|
||||
vp9_d135_predictor_8x8_c, vp9_d117_predictor_8x8_c,
|
||||
vp9_d153_predictor_8x8_c, vp9_d207_predictor_8x8_c,
|
||||
vp9_d63_predictor_8x8_c, vp9_tm_predictor_8x8_c)
|
||||
|
||||
#if HAVE_SSE
|
||||
INTRA_PRED_TEST(SSE, TestIntraPred8, vp9_dc_predictor_8x8_sse,
|
||||
vp9_dc_left_predictor_8x8_sse, vp9_dc_top_predictor_8x8_sse,
|
||||
vp9_dc_128_predictor_8x8_sse, vp9_v_predictor_8x8_sse, NULL,
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
||||
#endif // HAVE_SSE
|
||||
|
||||
#if HAVE_SSE2
|
||||
INTRA_PRED_TEST(SSE2, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL, NULL, vp9_tm_predictor_8x8_sse2)
|
||||
#endif // HAVE_SSE2
|
||||
|
||||
#if HAVE_SSSE3
|
||||
INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL,
|
||||
vp9_h_predictor_8x8_ssse3, vp9_d45_predictor_8x8_ssse3, NULL,
|
||||
NULL, vp9_d153_predictor_8x8_ssse3,
|
||||
vp9_d207_predictor_8x8_ssse3, vp9_d63_predictor_8x8_ssse3, NULL)
|
||||
#endif // HAVE_SSSE3
|
||||
|
||||
#if HAVE_DSPR2
|
||||
INTRA_PRED_TEST(DSPR2, TestIntraPred8, vp9_dc_predictor_8x8_dspr2, NULL, NULL,
|
||||
NULL, NULL, vp9_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, vp9_tm_predictor_8x8_c)
|
||||
#endif // HAVE_DSPR2
|
||||
|
||||
#if HAVE_NEON
|
||||
INTRA_PRED_TEST(NEON, TestIntraPred8, vp9_dc_predictor_8x8_neon,
|
||||
vp9_dc_left_predictor_8x8_neon, vp9_dc_top_predictor_8x8_neon,
|
||||
vp9_dc_128_predictor_8x8_neon, vp9_v_predictor_8x8_neon,
|
||||
vp9_h_predictor_8x8_neon, vp9_d45_predictor_8x8_neon, NULL,
|
||||
NULL, NULL, NULL, NULL, vp9_tm_predictor_8x8_neon)
|
||||
|
||||
#endif // HAVE_NEON
|
||||
|
||||
#if HAVE_MSA
|
||||
INTRA_PRED_TEST(MSA, TestIntraPred8, vp9_dc_predictor_8x8_msa,
|
||||
vp9_dc_left_predictor_8x8_msa, vp9_dc_top_predictor_8x8_msa,
|
||||
vp9_dc_128_predictor_8x8_msa, vp9_v_predictor_8x8_msa,
|
||||
vp9_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, vp9_tm_predictor_8x8_msa)
|
||||
#endif // HAVE_MSA
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// 16x16
|
||||
|
||||
INTRA_PRED_TEST(C, TestIntraPred16, vp9_dc_predictor_16x16_c,
|
||||
vp9_dc_left_predictor_16x16_c, vp9_dc_top_predictor_16x16_c,
|
||||
vp9_dc_128_predictor_16x16_c, vp9_v_predictor_16x16_c,
|
||||
vp9_h_predictor_16x16_c, vp9_d45_predictor_16x16_c,
|
||||
vp9_d135_predictor_16x16_c, vp9_d117_predictor_16x16_c,
|
||||
vp9_d153_predictor_16x16_c, vp9_d207_predictor_16x16_c,
|
||||
vp9_d63_predictor_16x16_c, vp9_tm_predictor_16x16_c)
|
||||
|
||||
#if HAVE_SSE2
|
||||
INTRA_PRED_TEST(SSE2, TestIntraPred16, vp9_dc_predictor_16x16_sse2,
|
||||
vp9_dc_left_predictor_16x16_sse2,
|
||||
vp9_dc_top_predictor_16x16_sse2,
|
||||
vp9_dc_128_predictor_16x16_sse2, vp9_v_predictor_16x16_sse2,
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||
vp9_tm_predictor_16x16_sse2)
|
||||
#endif // HAVE_SSE2
|
||||
|
||||
#if HAVE_SSSE3
|
||||
INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL,
|
||||
vp9_h_predictor_16x16_ssse3, vp9_d45_predictor_16x16_ssse3,
|
||||
NULL, NULL, vp9_d153_predictor_16x16_ssse3,
|
||||
vp9_d207_predictor_16x16_ssse3, vp9_d63_predictor_16x16_ssse3,
|
||||
NULL)
|
||||
#endif // HAVE_SSSE3
|
||||
|
||||
#if HAVE_DSPR2
|
||||
INTRA_PRED_TEST(DSPR2, TestIntraPred16, vp9_dc_predictor_16x16_dspr2, NULL,
|
||||
NULL, NULL, NULL, vp9_h_predictor_16x16_dspr2, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL)
|
||||
#endif // HAVE_DSPR2
|
||||
|
||||
#if HAVE_NEON
|
||||
INTRA_PRED_TEST(NEON, TestIntraPred16, vp9_dc_predictor_16x16_neon,
|
||||
vp9_dc_left_predictor_16x16_neon,
|
||||
vp9_dc_top_predictor_16x16_neon,
|
||||
vp9_dc_128_predictor_16x16_neon, vp9_v_predictor_16x16_neon,
|
||||
vp9_h_predictor_16x16_neon, vp9_d45_predictor_16x16_neon, NULL,
|
||||
NULL, NULL, NULL, NULL, vp9_tm_predictor_16x16_neon)
|
||||
#endif // HAVE_NEON
|
||||
|
||||
#if HAVE_MSA
|
||||
INTRA_PRED_TEST(MSA, TestIntraPred16, vp9_dc_predictor_16x16_msa,
|
||||
vp9_dc_left_predictor_16x16_msa, vp9_dc_top_predictor_16x16_msa,
|
||||
vp9_dc_128_predictor_16x16_msa, vp9_v_predictor_16x16_msa,
|
||||
vp9_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, vp9_tm_predictor_16x16_msa)
|
||||
#endif // HAVE_MSA
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// 32x32
|
||||
|
||||
INTRA_PRED_TEST(C, TestIntraPred32, vp9_dc_predictor_32x32_c,
|
||||
vp9_dc_left_predictor_32x32_c, vp9_dc_top_predictor_32x32_c,
|
||||
vp9_dc_128_predictor_32x32_c, vp9_v_predictor_32x32_c,
|
||||
vp9_h_predictor_32x32_c, vp9_d45_predictor_32x32_c,
|
||||
vp9_d135_predictor_32x32_c, vp9_d117_predictor_32x32_c,
|
||||
vp9_d153_predictor_32x32_c, vp9_d207_predictor_32x32_c,
|
||||
vp9_d63_predictor_32x32_c, vp9_tm_predictor_32x32_c)
|
||||
|
||||
#if HAVE_SSE2
|
||||
#if ARCH_X86_64
|
||||
INTRA_PRED_TEST(SSE2, TestIntraPred32, vp9_dc_predictor_32x32_sse2,
|
||||
vp9_dc_left_predictor_32x32_sse2,
|
||||
vp9_dc_top_predictor_32x32_sse2,
|
||||
vp9_dc_128_predictor_32x32_sse2, vp9_v_predictor_32x32_sse2,
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||
vp9_tm_predictor_32x32_sse2)
|
||||
#else
|
||||
INTRA_PRED_TEST(SSE2, TestIntraPred32, vp9_dc_predictor_32x32_sse2,
|
||||
vp9_dc_left_predictor_32x32_sse2,
|
||||
vp9_dc_top_predictor_32x32_sse2,
|
||||
vp9_dc_128_predictor_32x32_sse2, vp9_v_predictor_32x32_sse2,
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
||||
#endif // ARCH_X86_64
|
||||
#endif // HAVE_SSE2
|
||||
|
||||
#if HAVE_SSSE3
|
||||
INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL,
|
||||
vp9_h_predictor_32x32_ssse3, vp9_d45_predictor_32x32_ssse3,
|
||||
NULL, NULL, vp9_d153_predictor_32x32_ssse3,
|
||||
vp9_d207_predictor_32x32_ssse3, vp9_d63_predictor_32x32_ssse3,
|
||||
NULL)
|
||||
#endif // HAVE_SSSE3
|
||||
|
||||
#if HAVE_NEON
|
||||
INTRA_PRED_TEST(NEON, TestIntraPred32, vp9_dc_predictor_32x32_neon,
|
||||
vp9_dc_left_predictor_32x32_neon,
|
||||
vp9_dc_top_predictor_32x32_neon,
|
||||
vp9_dc_128_predictor_32x32_neon, vp9_v_predictor_32x32_neon,
|
||||
vp9_h_predictor_32x32_neon, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||
vp9_tm_predictor_32x32_neon)
|
||||
#endif // HAVE_NEON
|
||||
|
||||
#if HAVE_MSA
|
||||
INTRA_PRED_TEST(MSA, TestIntraPred32, vp9_dc_predictor_32x32_msa,
|
||||
vp9_dc_left_predictor_32x32_msa, vp9_dc_top_predictor_32x32_msa,
|
||||
vp9_dc_128_predictor_32x32_msa, vp9_v_predictor_32x32_msa,
|
||||
vp9_h_predictor_32x32_msa, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, vp9_tm_predictor_32x32_msa)
|
||||
#endif // HAVE_MSA
|
||||
|
||||
#include "test/test_libvpx.cc"
|
@@ -19,7 +19,6 @@ extern void vp8_rtcd();
|
||||
#if CONFIG_VP9
|
||||
extern void vp9_rtcd();
|
||||
#endif // CONFIG_VP9
|
||||
extern void vpx_dsp_rtcd();
|
||||
extern void vpx_scale_rtcd();
|
||||
}
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
@@ -38,21 +37,21 @@ int main(int argc, char **argv) {
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
const int simd_caps = x86_simd_caps();
|
||||
if (!(simd_caps & HAS_MMX))
|
||||
append_negative_gtest_filter(":MMX.*:MMX/*");
|
||||
append_negative_gtest_filter(":MMX/*");
|
||||
if (!(simd_caps & HAS_SSE))
|
||||
append_negative_gtest_filter(":SSE.*:SSE/*");
|
||||
append_negative_gtest_filter(":SSE/*");
|
||||
if (!(simd_caps & HAS_SSE2))
|
||||
append_negative_gtest_filter(":SSE2.*:SSE2/*");
|
||||
append_negative_gtest_filter(":SSE2/*");
|
||||
if (!(simd_caps & HAS_SSE3))
|
||||
append_negative_gtest_filter(":SSE3.*:SSE3/*");
|
||||
append_negative_gtest_filter(":SSE3/*");
|
||||
if (!(simd_caps & HAS_SSSE3))
|
||||
append_negative_gtest_filter(":SSSE3.*:SSSE3/*");
|
||||
append_negative_gtest_filter(":SSSE3/*");
|
||||
if (!(simd_caps & HAS_SSE4_1))
|
||||
append_negative_gtest_filter(":SSE4_1.*:SSE4_1/*");
|
||||
append_negative_gtest_filter(":SSE4_1/*");
|
||||
if (!(simd_caps & HAS_AVX))
|
||||
append_negative_gtest_filter(":AVX.*:AVX/*");
|
||||
append_negative_gtest_filter(":AVX/*");
|
||||
if (!(simd_caps & HAS_AVX2))
|
||||
append_negative_gtest_filter(":AVX2.*:AVX2/*");
|
||||
append_negative_gtest_filter(":AVX2/*");
|
||||
#endif
|
||||
|
||||
#if !CONFIG_SHARED
|
||||
@@ -65,7 +64,6 @@ int main(int argc, char **argv) {
|
||||
#if CONFIG_VP9
|
||||
vp9_rtcd();
|
||||
#endif // CONFIG_VP9
|
||||
vpx_dsp_rtcd();
|
||||
vpx_scale_rtcd();
|
||||
#endif // !CONFIG_SHARED
|
||||
|
||||
|
@@ -165,10 +165,7 @@ const char *const kVP9TestVectors[] = {
|
||||
"vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm",
|
||||
"vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf",
|
||||
"vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf",
|
||||
#if !CONFIG_SIZE_LIMIT || \
|
||||
(DECODE_WIDTH_LIMIT >= 20400 && DECODE_HEIGHT_LIMIT >= 120)
|
||||
"vp90-2-13-largescaling.webm",
|
||||
#endif
|
||||
"vp90-2-14-resize-fp-tiles-1-16.webm",
|
||||
"vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm",
|
||||
"vp90-2-14-resize-fp-tiles-1-2.webm", "vp90-2-14-resize-fp-tiles-1-4.webm",
|
||||
|
@@ -408,9 +408,6 @@ YUV_RAW_INPUT="${LIBVPX_TEST_DATA_PATH}/hantro_collage_w352h288.yuv"
|
||||
YUV_RAW_INPUT_WIDTH=352
|
||||
YUV_RAW_INPUT_HEIGHT=288
|
||||
|
||||
Y4M_NOSQ_PAR_INPUT="${LIBVPX_TEST_DATA_PATH}/park_joy_90p_8_420_a10-1.y4m"
|
||||
Y4M_720P_INPUT="${LIBVPX_TEST_DATA_PATH}/niklas_1280_720_30.y4m"
|
||||
|
||||
# Setup a trap function to clean up after tests complete.
|
||||
trap cleanup EXIT
|
||||
|
||||
@@ -432,7 +429,6 @@ vlog "$(basename "${0%.*}") test configuration:
|
||||
VPX_TEST_VERBOSE_OUTPUT=${VPX_TEST_VERBOSE_OUTPUT}
|
||||
YUV_RAW_INPUT=${YUV_RAW_INPUT}
|
||||
YUV_RAW_INPUT_WIDTH=${YUV_RAW_INPUT_WIDTH}
|
||||
YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}
|
||||
Y4M_NOSQ_PAR_INPUT=${Y4M_NOSQ_PAR_INPUT}"
|
||||
YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}"
|
||||
|
||||
fi # End $VPX_TEST_TOOLS_COMMON_SH pseudo include guard.
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -52,13 +52,13 @@ TEST_P(VP8DenoiserTest, BitexactCheck) {
|
||||
// mc_avg_block is the denoised reference block,
|
||||
// avg_block_c is the denoised result from C code,
|
||||
// avg_block_sse2 is the denoised result from SSE2 code.
|
||||
DECLARE_ALIGNED(16, uint8_t, sig_block_c[kNumPixels]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, sig_block_c, kNumPixels);
|
||||
// Since in VP8 denoiser, the source signal will be changed,
|
||||
// we need another copy of the source signal as the input of sse2 code.
|
||||
DECLARE_ALIGNED(16, uint8_t, sig_block_sse2[kNumPixels]);
|
||||
DECLARE_ALIGNED(16, uint8_t, mc_avg_block[kNumPixels]);
|
||||
DECLARE_ALIGNED(16, uint8_t, avg_block_c[kNumPixels]);
|
||||
DECLARE_ALIGNED(16, uint8_t, avg_block_sse2[kNumPixels]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, sig_block_sse2, kNumPixels);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, mc_avg_block, kNumPixels);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, avg_block_c, kNumPixels);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, avg_block_sse2, kNumPixels);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Generate random motion magnitude, 20% of which exceed the threshold.
|
||||
|
@@ -121,79 +121,6 @@ class AverageTest
|
||||
}
|
||||
};
|
||||
|
||||
typedef void (*IntProRowFunc)(int16_t hbuf[16], uint8_t const *ref,
|
||||
const int ref_stride, const int height);
|
||||
|
||||
typedef std::tr1::tuple<int, IntProRowFunc, IntProRowFunc> IntProRowParam;
|
||||
|
||||
class IntProRowTest
|
||||
: public AverageTestBase,
|
||||
public ::testing::WithParamInterface<IntProRowParam> {
|
||||
public:
|
||||
IntProRowTest()
|
||||
: AverageTestBase(16, GET_PARAM(0)),
|
||||
hbuf_asm_(NULL),
|
||||
hbuf_c_(NULL) {
|
||||
asm_func_ = GET_PARAM(1);
|
||||
c_func_ = GET_PARAM(2);
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
hbuf_asm_ = reinterpret_cast<int16_t*>(
|
||||
vpx_memalign(kDataAlignment, sizeof(*hbuf_asm_) * 16));
|
||||
hbuf_c_ = reinterpret_cast<int16_t*>(
|
||||
vpx_memalign(kDataAlignment, sizeof(*hbuf_c_) * 16));
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
vpx_free(hbuf_c_);
|
||||
hbuf_c_ = NULL;
|
||||
vpx_free(hbuf_asm_);
|
||||
hbuf_asm_ = NULL;
|
||||
}
|
||||
|
||||
void RunComparison() {
|
||||
ASM_REGISTER_STATE_CHECK(c_func_(hbuf_c_, source_data_, 0, height_));
|
||||
ASM_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, 0, height_));
|
||||
EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * 16))
|
||||
<< "Output mismatch";
|
||||
}
|
||||
|
||||
private:
|
||||
IntProRowFunc asm_func_;
|
||||
IntProRowFunc c_func_;
|
||||
int16_t *hbuf_asm_;
|
||||
int16_t *hbuf_c_;
|
||||
};
|
||||
|
||||
typedef int16_t (*IntProColFunc)(uint8_t const *ref, const int width);
|
||||
|
||||
typedef std::tr1::tuple<int, IntProColFunc, IntProColFunc> IntProColParam;
|
||||
|
||||
class IntProColTest
|
||||
: public AverageTestBase,
|
||||
public ::testing::WithParamInterface<IntProColParam> {
|
||||
public:
|
||||
IntProColTest() : AverageTestBase(GET_PARAM(0), 1), sum_asm_(0), sum_c_(0) {
|
||||
asm_func_ = GET_PARAM(1);
|
||||
c_func_ = GET_PARAM(2);
|
||||
}
|
||||
|
||||
protected:
|
||||
void RunComparison() {
|
||||
ASM_REGISTER_STATE_CHECK(sum_c_ = c_func_(source_data_, width_));
|
||||
ASM_REGISTER_STATE_CHECK(sum_asm_ = asm_func_(source_data_, width_));
|
||||
EXPECT_EQ(sum_c_, sum_asm_) << "Output mismatch";
|
||||
}
|
||||
|
||||
private:
|
||||
IntProColFunc asm_func_;
|
||||
IntProColFunc c_func_;
|
||||
int16_t sum_asm_;
|
||||
int16_t sum_c_;
|
||||
};
|
||||
|
||||
|
||||
uint8_t* AverageTestBase::source_data_ = NULL;
|
||||
|
||||
@@ -216,36 +143,6 @@ TEST_P(AverageTest, Random) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(IntProRowTest, MinValue) {
|
||||
FillConstant(0);
|
||||
RunComparison();
|
||||
}
|
||||
|
||||
TEST_P(IntProRowTest, MaxValue) {
|
||||
FillConstant(255);
|
||||
RunComparison();
|
||||
}
|
||||
|
||||
TEST_P(IntProRowTest, Random) {
|
||||
FillRandom();
|
||||
RunComparison();
|
||||
}
|
||||
|
||||
TEST_P(IntProColTest, MinValue) {
|
||||
FillConstant(0);
|
||||
RunComparison();
|
||||
}
|
||||
|
||||
TEST_P(IntProColTest, MaxValue) {
|
||||
FillConstant(255);
|
||||
RunComparison();
|
||||
}
|
||||
|
||||
TEST_P(IntProColTest, Random) {
|
||||
FillRandom();
|
||||
RunComparison();
|
||||
}
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
@@ -254,6 +151,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(16, 16, 1, 8, &vp9_avg_8x8_c),
|
||||
make_tuple(16, 16, 1, 4, &vp9_avg_4x4_c)));
|
||||
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, AverageTest,
|
||||
@@ -265,17 +163,6 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(16, 16, 5, 4, &vp9_avg_4x4_sse2),
|
||||
make_tuple(32, 32, 15, 4, &vp9_avg_4x4_sse2)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, IntProRowTest, ::testing::Values(
|
||||
make_tuple(16, &vp9_int_pro_row_sse2, &vp9_int_pro_row_c),
|
||||
make_tuple(32, &vp9_int_pro_row_sse2, &vp9_int_pro_row_c),
|
||||
make_tuple(64, &vp9_int_pro_row_sse2, &vp9_int_pro_row_c)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, IntProColTest, ::testing::Values(
|
||||
make_tuple(16, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c),
|
||||
make_tuple(32, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c),
|
||||
make_tuple(64, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c)));
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
@@ -288,16 +175,4 @@ INSTANTIATE_TEST_CASE_P(
|
||||
|
||||
#endif
|
||||
|
||||
#if HAVE_MSA
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MSA, AverageTest,
|
||||
::testing::Values(
|
||||
make_tuple(16, 16, 0, 8, &vp9_avg_8x8_msa),
|
||||
make_tuple(16, 16, 5, 8, &vp9_avg_8x8_msa),
|
||||
make_tuple(32, 32, 15, 8, &vp9_avg_8x8_msa),
|
||||
make_tuple(16, 16, 0, 4, &vp9_avg_4x4_msa),
|
||||
make_tuple(16, 16, 5, 4, &vp9_avg_4x4_msa),
|
||||
make_tuple(32, 32, 15, 4, &vp9_avg_4x4_msa)));
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
@@ -52,10 +52,10 @@ TEST_P(VP9DenoiserTest, BitexactCheck) {
|
||||
// mc_avg_block is the denoised reference block,
|
||||
// avg_block_c is the denoised result from C code,
|
||||
// avg_block_sse2 is the denoised result from SSE2 code.
|
||||
DECLARE_ALIGNED(16, uint8_t, sig_block[kNumPixels]);
|
||||
DECLARE_ALIGNED(16, uint8_t, mc_avg_block[kNumPixels]);
|
||||
DECLARE_ALIGNED(16, uint8_t, avg_block_c[kNumPixels]);
|
||||
DECLARE_ALIGNED(16, uint8_t, avg_block_sse2[kNumPixels]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, sig_block, kNumPixels);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, mc_avg_block, kNumPixels);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, avg_block_c, kNumPixels);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, avg_block_sse2, kNumPixels);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Generate random motion magnitude, 20% of which exceed the threshold.
|
||||
|
@@ -21,7 +21,6 @@
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
@@ -58,8 +57,8 @@ class ErrorBlockTest
|
||||
|
||||
TEST_P(ErrorBlockTest, OperationCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, 4096);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff, 4096);
|
||||
int err_count_total = 0;
|
||||
int first_failure = -1;
|
||||
intptr_t block_size;
|
||||
@@ -91,8 +90,8 @@ TEST_P(ErrorBlockTest, OperationCheck) {
|
||||
|
||||
TEST_P(ErrorBlockTest, ExtremeValues) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, 4096);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff, 4096);
|
||||
int err_count_total = 0;
|
||||
int first_failure = -1;
|
||||
intptr_t block_size;
|
||||
|
@@ -120,10 +120,10 @@ class VP9IntraPredTest
|
||||
|
||||
TEST_P(VP9IntraPredTest, IntraPredTests) {
|
||||
// max block size is 32
|
||||
DECLARE_ALIGNED(16, uint16_t, left_col[2*32]);
|
||||
DECLARE_ALIGNED(16, uint16_t, above_data[2*32+32]);
|
||||
DECLARE_ALIGNED(16, uint16_t, dst[3 * 32 * 32]);
|
||||
DECLARE_ALIGNED(16, uint16_t, ref_dst[3 * 32 * 32]);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, left_col, 2*32);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, above_data, 2*32+32);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst, 3 * 32 * 32);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_dst, 3 * 32 * 32);
|
||||
RunTest(left_col, above_data, dst, ref_dst);
|
||||
}
|
||||
|
||||
|
@@ -21,8 +21,6 @@
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "vp9/common/vp9_scan.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
@@ -82,18 +80,18 @@ class VP9Quantize32Test : public ::testing::TestWithParam<QuantizeParam> {
|
||||
|
||||
TEST_P(VP9QuantizeTest, OperationCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[256]);
|
||||
DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
|
||||
DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
|
||||
DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
|
||||
DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[256]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[256]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[256]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[256]);
|
||||
DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
|
||||
DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
|
||||
DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
|
||||
int err_count_total = 0;
|
||||
int first_failure = -1;
|
||||
for (int i = 0; i < number_of_iterations; ++i) {
|
||||
@@ -141,18 +139,18 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
|
||||
|
||||
TEST_P(VP9Quantize32Test, OperationCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[1024]);
|
||||
DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
|
||||
DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
|
||||
DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
|
||||
DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[1024]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[1024]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[1024]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[1024]);
|
||||
DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
|
||||
DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
|
||||
DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 1024);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 1024);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 1024);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 1024);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 1024);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
|
||||
int err_count_total = 0;
|
||||
int first_failure = -1;
|
||||
for (int i = 0; i < number_of_iterations; ++i) {
|
||||
@@ -200,18 +198,18 @@ TEST_P(VP9Quantize32Test, OperationCheck) {
|
||||
|
||||
TEST_P(VP9QuantizeTest, EOBCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[256]);
|
||||
DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
|
||||
DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
|
||||
DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
|
||||
DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[256]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[256]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[256]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[256]);
|
||||
DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
|
||||
DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
|
||||
DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 256);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
|
||||
int err_count_total = 0;
|
||||
int first_failure = -1;
|
||||
for (int i = 0; i < number_of_iterations; ++i) {
|
||||
@@ -264,18 +262,18 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
|
||||
|
||||
TEST_P(VP9Quantize32Test, EOBCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[1024]);
|
||||
DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
|
||||
DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
|
||||
DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
|
||||
DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[1024]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[1024]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[1024]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[1024]);
|
||||
DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
|
||||
DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
|
||||
DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 1024);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 1024);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 1024);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 1024);
|
||||
DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 1024);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
|
||||
int err_count_total = 0;
|
||||
int first_failure = -1;
|
||||
for (int i = 0; i < number_of_iterations; ++i) {
|
||||
|
@@ -1,180 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "test/codec_factory.h"
|
||||
#include "test/decode_test_driver.h"
|
||||
#include "test/md5_helper.h"
|
||||
#include "test/util.h"
|
||||
#include "test/webm_video_source.h"
|
||||
|
||||
namespace {
|
||||
|
||||
const char kVp9TestFile[] = "vp90-2-08-tile_1x8_frame_parallel.webm";
|
||||
const char kVp9Md5File[] = "vp90-2-08-tile_1x8_frame_parallel.webm.md5";
|
||||
|
||||
// Class for testing shutting off the loop filter.
|
||||
class SkipLoopFilterTest {
|
||||
public:
|
||||
SkipLoopFilterTest()
|
||||
: video_(NULL),
|
||||
decoder_(NULL),
|
||||
md5_file_(NULL) {}
|
||||
|
||||
~SkipLoopFilterTest() {
|
||||
if (md5_file_ != NULL)
|
||||
fclose(md5_file_);
|
||||
delete decoder_;
|
||||
delete video_;
|
||||
}
|
||||
|
||||
// If |threads| > 0 then set the decoder with that number of threads.
|
||||
void Init(int num_threads) {
|
||||
expected_md5_[0] = '\0';
|
||||
junk_[0] = '\0';
|
||||
video_ = new libvpx_test::WebMVideoSource(kVp9TestFile);
|
||||
ASSERT_TRUE(video_ != NULL);
|
||||
video_->Init();
|
||||
video_->Begin();
|
||||
|
||||
vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
|
||||
if (num_threads > 0)
|
||||
cfg.threads = num_threads;
|
||||
decoder_ = new libvpx_test::VP9Decoder(cfg, 0);
|
||||
ASSERT_TRUE(decoder_ != NULL);
|
||||
|
||||
OpenMd5File(kVp9Md5File);
|
||||
}
|
||||
|
||||
// Set the VP9 skipLoopFilter control value.
|
||||
void SetSkipLoopFilter(int value, vpx_codec_err_t expected_value) {
|
||||
decoder_->Control(VP9_SET_SKIP_LOOP_FILTER, value, expected_value);
|
||||
}
|
||||
|
||||
vpx_codec_err_t DecodeOneFrame() {
|
||||
const vpx_codec_err_t res =
|
||||
decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
|
||||
if (res == VPX_CODEC_OK) {
|
||||
ReadMd5();
|
||||
video_->Next();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
vpx_codec_err_t DecodeRemainingFrames() {
|
||||
for (; video_->cxdata() != NULL; video_->Next()) {
|
||||
const vpx_codec_err_t res =
|
||||
decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
|
||||
if (res != VPX_CODEC_OK)
|
||||
return res;
|
||||
ReadMd5();
|
||||
}
|
||||
return VPX_CODEC_OK;
|
||||
}
|
||||
|
||||
// Checks if MD5 matches or doesn't.
|
||||
void CheckMd5(bool matches) {
|
||||
libvpx_test::DxDataIterator dec_iter = decoder_->GetDxData();
|
||||
const vpx_image_t *img = dec_iter.Next();
|
||||
CheckMd5Vpx(*img, matches);
|
||||
}
|
||||
|
||||
private:
|
||||
// TODO(fgalligan): Move the MD5 testing code into another class.
|
||||
void OpenMd5File(const std::string &md5_file_name) {
|
||||
md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name);
|
||||
ASSERT_TRUE(md5_file_ != NULL) << "MD5 file open failed. Filename: "
|
||||
<< md5_file_name;
|
||||
}
|
||||
|
||||
// Reads the next line of the MD5 file.
|
||||
void ReadMd5() {
|
||||
ASSERT_TRUE(md5_file_ != NULL);
|
||||
const int res = fscanf(md5_file_, "%s %s", expected_md5_, junk_);
|
||||
ASSERT_NE(EOF, res) << "Read md5 data failed";
|
||||
expected_md5_[32] = '\0';
|
||||
}
|
||||
|
||||
// Checks if the last read MD5 matches |img| or doesn't.
|
||||
void CheckMd5Vpx(const vpx_image_t &img, bool matches) {
|
||||
::libvpx_test::MD5 md5_res;
|
||||
md5_res.Add(&img);
|
||||
const char *const actual_md5 = md5_res.Get();
|
||||
|
||||
// Check MD5.
|
||||
if (matches)
|
||||
ASSERT_STREQ(expected_md5_, actual_md5) << "MD5 checksums don't match";
|
||||
else
|
||||
ASSERT_STRNE(expected_md5_, actual_md5) << "MD5 checksums match";
|
||||
}
|
||||
|
||||
libvpx_test::WebMVideoSource *video_;
|
||||
libvpx_test::VP9Decoder *decoder_;
|
||||
FILE *md5_file_;
|
||||
char expected_md5_[33];
|
||||
char junk_[128];
|
||||
};
|
||||
|
||||
TEST(SkipLoopFilterTest, ShutOffLoopFilter) {
|
||||
const int non_zero_value = 1;
|
||||
const int num_threads = 0;
|
||||
SkipLoopFilterTest skip_loop_filter;
|
||||
skip_loop_filter.Init(num_threads);
|
||||
skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK);
|
||||
ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
|
||||
skip_loop_filter.CheckMd5(false);
|
||||
}
|
||||
|
||||
TEST(SkipLoopFilterTest, ShutOffLoopFilterSingleThread) {
|
||||
const int non_zero_value = 1;
|
||||
const int num_threads = 1;
|
||||
SkipLoopFilterTest skip_loop_filter;
|
||||
skip_loop_filter.Init(num_threads);
|
||||
skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK);
|
||||
ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
|
||||
skip_loop_filter.CheckMd5(false);
|
||||
}
|
||||
|
||||
TEST(SkipLoopFilterTest, ShutOffLoopFilter8Threads) {
|
||||
const int non_zero_value = 1;
|
||||
const int num_threads = 8;
|
||||
SkipLoopFilterTest skip_loop_filter;
|
||||
skip_loop_filter.Init(num_threads);
|
||||
skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK);
|
||||
ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
|
||||
skip_loop_filter.CheckMd5(false);
|
||||
}
|
||||
|
||||
TEST(SkipLoopFilterTest, WithLoopFilter) {
|
||||
const int non_zero_value = 1;
|
||||
const int num_threads = 0;
|
||||
SkipLoopFilterTest skip_loop_filter;
|
||||
skip_loop_filter.Init(num_threads);
|
||||
skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK);
|
||||
skip_loop_filter.SetSkipLoopFilter(0, VPX_CODEC_OK);
|
||||
ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
|
||||
skip_loop_filter.CheckMd5(true);
|
||||
}
|
||||
|
||||
TEST(SkipLoopFilterTest, ToggleLoopFilter) {
|
||||
const int num_threads = 0;
|
||||
SkipLoopFilterTest skip_loop_filter;
|
||||
skip_loop_filter.Init(num_threads);
|
||||
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
skip_loop_filter.SetSkipLoopFilter(i % 2, VPX_CODEC_OK);
|
||||
ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeOneFrame());
|
||||
}
|
||||
ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
|
||||
skip_loop_filter.CheckMd5(false);
|
||||
}
|
||||
|
||||
} // namespace
|
@@ -23,13 +23,6 @@ vpxenc_verify_environment() {
|
||||
elog "The file ${YUV_RAW_INPUT##*/} must exist in LIBVPX_TEST_DATA_PATH."
|
||||
return 1
|
||||
fi
|
||||
if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then
|
||||
if [ ! -e "${Y4M_NOSQ_PAR_INPUT}" ]; then
|
||||
elog "The file ${Y4M_NOSQ_PAR_INPUT##*/} must exist in"
|
||||
elog "LIBVPX_TEST_DATA_PATH."
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
if [ -z "$(vpx_tool_path vpxenc)" ]; then
|
||||
elog "vpxenc not found. It must exist in LIBVPX_BIN_PATH or its parent."
|
||||
return 1
|
||||
@@ -56,14 +49,6 @@ yuv_input_hantro_collage() {
|
||||
--height="${YUV_RAW_INPUT_HEIGHT}""
|
||||
}
|
||||
|
||||
y4m_input_non_square_par() {
|
||||
echo ""${Y4M_NOSQ_PAR_INPUT}""
|
||||
}
|
||||
|
||||
y4m_input_720p() {
|
||||
echo ""${Y4M_720P_INPUT}""
|
||||
}
|
||||
|
||||
# Echo default vpxenc real time encoding params. $1 is the codec, which defaults
|
||||
# to vp8 if unspecified.
|
||||
vpxenc_rt_params() {
|
||||
@@ -72,7 +57,7 @@ vpxenc_rt_params() {
|
||||
--buf-initial-sz=500
|
||||
--buf-optimal-sz=600
|
||||
--buf-sz=1000
|
||||
--cpu-used=-6
|
||||
--cpu-used=-5
|
||||
--end-usage=cbr
|
||||
--error-resilient=1
|
||||
--kf-max-dist=90000
|
||||
@@ -262,63 +247,6 @@ vpxenc_vp9_webm_rt() {
|
||||
fi
|
||||
}
|
||||
|
||||
vpxenc_vp9_webm_rt_multithread_tiled() {
|
||||
if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
|
||||
[ "$(webm_io_available)" = "yes" ]; then
|
||||
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_rt_multithread_tiled.webm"
|
||||
local readonly tilethread_min=2
|
||||
local readonly tilethread_max=4
|
||||
local readonly num_threads="$(seq ${tilethread_min} ${tilethread_max})"
|
||||
local readonly num_tile_cols="$(seq ${tilethread_min} ${tilethread_max})"
|
||||
|
||||
for threads in ${num_threads}; do
|
||||
for tile_cols in ${num_tile_cols}; do
|
||||
vpxenc $(y4m_input_720p) \
|
||||
$(vpxenc_rt_params vp9) \
|
||||
--threads=${threads} \
|
||||
--tile-columns=${tile_cols} \
|
||||
--output="${output}"
|
||||
done
|
||||
done
|
||||
|
||||
if [ ! -e "${output}" ]; then
|
||||
elog "Output file does not exist."
|
||||
return 1
|
||||
fi
|
||||
|
||||
rm "${output}"
|
||||
fi
|
||||
}
|
||||
|
||||
vpxenc_vp9_webm_rt_multithread_tiled_frameparallel() {
|
||||
if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
|
||||
[ "$(webm_io_available)" = "yes" ]; then
|
||||
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_rt_mt_t_fp.webm"
|
||||
local readonly tilethread_min=2
|
||||
local readonly tilethread_max=4
|
||||
local readonly num_threads="$(seq ${tilethread_min} ${tilethread_max})"
|
||||
local readonly num_tile_cols="$(seq ${tilethread_min} ${tilethread_max})"
|
||||
|
||||
for threads in ${num_threads}; do
|
||||
for tile_cols in ${num_tile_cols}; do
|
||||
vpxenc $(y4m_input_720p) \
|
||||
$(vpxenc_rt_params vp9) \
|
||||
--threads=${threads} \
|
||||
--tile-columns=${tile_cols} \
|
||||
--frame-parallel=1 \
|
||||
--output="${output}"
|
||||
done
|
||||
done
|
||||
|
||||
if [ ! -e "${output}" ]; then
|
||||
elog "Output file does not exist."
|
||||
return 1
|
||||
fi
|
||||
|
||||
rm "${output}"
|
||||
fi
|
||||
}
|
||||
|
||||
vpxenc_vp9_webm_2pass() {
|
||||
if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
|
||||
[ "$(webm_io_available)" = "yes" ]; then
|
||||
@@ -392,23 +320,6 @@ vpxenc_vp9_webm_lag10_frames20() {
|
||||
fi
|
||||
}
|
||||
|
||||
# TODO(fgalligan): Test that DisplayWidth is different than video width.
|
||||
vpxenc_vp9_webm_non_square_par() {
|
||||
if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
|
||||
[ "$(webm_io_available)" = "yes" ]; then
|
||||
local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_non_square_par.webm"
|
||||
vpxenc $(y4m_input_non_square_par) \
|
||||
--codec=vp9 \
|
||||
--limit="${TEST_FRAMES}" \
|
||||
--output="${output}"
|
||||
|
||||
if [ ! -e "${output}" ]; then
|
||||
elog "Output file does not exist."
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
vpxenc_tests="vpxenc_vp8_ivf
|
||||
vpxenc_vp8_webm
|
||||
vpxenc_vp8_webm_rt
|
||||
@@ -418,12 +329,9 @@ vpxenc_tests="vpxenc_vp8_ivf
|
||||
vpxenc_vp9_ivf
|
||||
vpxenc_vp9_webm
|
||||
vpxenc_vp9_webm_rt
|
||||
vpxenc_vp9_webm_rt_multithread_tiled
|
||||
vpxenc_vp9_webm_rt_multithread_tiled_frameparallel
|
||||
vpxenc_vp9_webm_2pass
|
||||
vpxenc_vp9_ivf_lossless
|
||||
vpxenc_vp9_ivf_minq0_maxq0
|
||||
vpxenc_vp9_webm_lag10_frames20
|
||||
vpxenc_vp9_webm_non_square_par"
|
||||
vpxenc_vp9_webm_lag10_frames20"
|
||||
|
||||
run_tests vpxenc_verify_environment "${vpxenc_tests}"
|
||||
|
@@ -140,7 +140,7 @@ static const VpxInterface vpx_encoders[] = {
|
||||
#endif
|
||||
};
|
||||
|
||||
int get_vpx_encoder_count(void) {
|
||||
int get_vpx_encoder_count() {
|
||||
return sizeof(vpx_encoders) / sizeof(vpx_encoders[0]);
|
||||
}
|
||||
|
||||
@@ -170,7 +170,7 @@ static const VpxInterface vpx_decoders[] = {
|
||||
#endif
|
||||
};
|
||||
|
||||
int get_vpx_decoder_count(void) {
|
||||
int get_vpx_decoder_count() {
|
||||
return sizeof(vpx_decoders) / sizeof(vpx_decoders[0]);
|
||||
}
|
||||
|
||||
|
@@ -16,7 +16,6 @@
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx/vpx_image.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/msvc.h"
|
||||
|
||||
#if CONFIG_ENCODERS
|
||||
#include "./y4minput.h"
|
||||
@@ -35,6 +34,7 @@
|
||||
#if CONFIG_OS_SUPPORT
|
||||
#if defined(_MSC_VER)
|
||||
#include <io.h> /* NOLINT */
|
||||
#define snprintf _snprintf
|
||||
#define isatty _isatty
|
||||
#define fileno _fileno
|
||||
#else
|
||||
@@ -89,7 +89,6 @@ struct VpxInputContext {
|
||||
enum VideoFileType file_type;
|
||||
uint32_t width;
|
||||
uint32_t height;
|
||||
struct VpxRational pixel_aspect_ratio;
|
||||
vpx_img_fmt_t fmt;
|
||||
vpx_bit_depth_t bit_depth;
|
||||
int only_i420;
|
||||
@@ -120,7 +119,7 @@ void warn(const char *fmt, ...);
|
||||
void die_codec(vpx_codec_ctx_t *ctx, const char *s) VPX_NO_RETURN;
|
||||
|
||||
/* The tool including this file must define usage_exit() */
|
||||
void usage_exit(void) VPX_NO_RETURN;
|
||||
void usage_exit() VPX_NO_RETURN;
|
||||
|
||||
#undef VPX_NO_RETURN
|
||||
|
||||
@@ -132,11 +131,11 @@ typedef struct VpxInterface {
|
||||
vpx_codec_iface_t *(*const codec_interface)();
|
||||
} VpxInterface;
|
||||
|
||||
int get_vpx_encoder_count(void);
|
||||
int get_vpx_encoder_count();
|
||||
const VpxInterface *get_vpx_encoder_by_index(int i);
|
||||
const VpxInterface *get_vpx_encoder_by_name(const char *name);
|
||||
|
||||
int get_vpx_decoder_count(void);
|
||||
int get_vpx_decoder_count();
|
||||
const VpxInterface *get_vpx_decoder_by_index(int i);
|
||||
const VpxInterface *get_vpx_decoder_by_name(const char *name);
|
||||
const VpxInterface *get_vpx_decoder_by_fourcc(uint32_t fourcc);
|
||||
|
@@ -10,7 +10,6 @@
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "alloccommon.h"
|
||||
#include "blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "onyxc_int.h"
|
||||
|
@@ -9,7 +9,7 @@
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vpx_sad16x16_media|
|
||||
EXPORT |vp8_sad16x16_armv6|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
@@ -21,7 +21,8 @@
|
||||
; r1 int src_stride
|
||||
; r2 const unsigned char *ref_ptr
|
||||
; r3 int ref_stride
|
||||
|vpx_sad16x16_media| PROC
|
||||
; stack max_sad (not used)
|
||||
|vp8_sad16x16_armv6| PROC
|
||||
stmfd sp!, {r4-r12, lr}
|
||||
|
||||
pld [r0, r1, lsl #0]
|
154
vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
Normal file
154
vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
Normal file
@@ -0,0 +1,154 @@
|
||||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_variance16x16_armv6|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 int source_stride
|
||||
; r2 unsigned char *ref_ptr
|
||||
; r3 int recon_stride
|
||||
; stack unsigned int *sse
|
||||
|vp8_variance16x16_armv6| PROC
|
||||
|
||||
stmfd sp!, {r4-r12, lr}
|
||||
|
||||
pld [r0, r1, lsl #0]
|
||||
pld [r2, r3, lsl #0]
|
||||
|
||||
mov r8, #0 ; initialize sum = 0
|
||||
mov r11, #0 ; initialize sse = 0
|
||||
mov r12, #16 ; set loop counter to 16 (=block height)
|
||||
|
||||
loop
|
||||
; 1st 4 pixels
|
||||
ldr r4, [r0, #0] ; load 4 src pixels
|
||||
ldr r5, [r2, #0] ; load 4 ref pixels
|
||||
|
||||
mov lr, #0 ; constant zero
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
pld [r0, r1, lsl #1]
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r9, r5, r4 ; calculate difference with reversed operands
|
||||
pld [r2, r3, lsl #1]
|
||||
sel r6, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
; calculate total sum
|
||||
adds r8, r8, r4 ; add positive differences to sum
|
||||
subs r8, r8, r5 ; subtract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 2nd 4 pixels
|
||||
ldr r4, [r0, #4] ; load 4 src pixels
|
||||
ldr r5, [r2, #4] ; load 4 ref pixels
|
||||
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r9, r5, r4 ; calculate difference with reversed operands
|
||||
sel r6, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; subtract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 3rd 4 pixels
|
||||
ldr r4, [r0, #8] ; load 4 src pixels
|
||||
ldr r5, [r2, #8] ; load 4 ref pixels
|
||||
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r9, r5, r4 ; calculate difference with reversed operands
|
||||
sel r6, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; subtract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 4th 4 pixels
|
||||
ldr r4, [r0, #12] ; load 4 src pixels
|
||||
ldr r5, [r2, #12] ; load 4 ref pixels
|
||||
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
add r0, r0, r1 ; set src_ptr to next row
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r9, r5, r4 ; calculate difference with reversed operands
|
||||
add r2, r2, r3 ; set dst_ptr to next row
|
||||
sel r6, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; subtract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
bne loop
|
||||
|
||||
; return stuff
|
||||
ldr r6, [sp, #40] ; get address of sse
|
||||
mul r0, r8, r8 ; sum * sum
|
||||
str r11, [r6] ; store sse
|
||||
sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
|
||||
|
||||
ldmfd sp!, {r4-r12, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
||||
|
101
vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
Normal file
101
vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
Normal file
@@ -0,0 +1,101 @@
|
||||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_variance8x8_armv6|
|
||||
|
||||
ARM
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 int source_stride
|
||||
; r2 unsigned char *ref_ptr
|
||||
; r3 int recon_stride
|
||||
; stack unsigned int *sse
|
||||
|vp8_variance8x8_armv6| PROC
|
||||
|
||||
push {r4-r10, lr}
|
||||
|
||||
pld [r0, r1, lsl #0]
|
||||
pld [r2, r3, lsl #0]
|
||||
|
||||
mov r12, #8 ; set loop counter to 8 (=block height)
|
||||
mov r4, #0 ; initialize sum = 0
|
||||
mov r5, #0 ; initialize sse = 0
|
||||
|
||||
loop
|
||||
; 1st 4 pixels
|
||||
ldr r6, [r0, #0x0] ; load 4 src pixels
|
||||
ldr r7, [r2, #0x0] ; load 4 ref pixels
|
||||
|
||||
mov lr, #0 ; constant zero
|
||||
|
||||
usub8 r8, r6, r7 ; calculate difference
|
||||
pld [r0, r1, lsl #1]
|
||||
sel r10, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r7, r6 ; calculate difference with reversed operands
|
||||
pld [r2, r3, lsl #1]
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r6, r10, lr ; calculate sum of positive differences
|
||||
usad8 r7, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r10 ; differences of all 4 pixels
|
||||
; calculate total sum
|
||||
add r4, r4, r6 ; add positive differences to sum
|
||||
sub r4, r4, r7 ; subtract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r7, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 2nd 4 pixels
|
||||
ldr r6, [r0, #0x4] ; load 4 src pixels
|
||||
ldr r7, [r2, #0x4] ; load 4 ref pixels
|
||||
smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r8, r6, r7 ; calculate difference
|
||||
add r0, r0, r1 ; set src_ptr to next row
|
||||
sel r10, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r7, r6 ; calculate difference with reversed operands
|
||||
add r2, r2, r3 ; set dst_ptr to next row
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r6, r10, lr ; calculate sum of positive differences
|
||||
usad8 r7, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r10 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r4, r4, r6 ; add positive differences to sum
|
||||
sub r4, r4, r7 ; subtract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r7, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
|
||||
subs r12, r12, #1 ; next row
|
||||
smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
bne loop
|
||||
|
||||
; return stuff
|
||||
ldr r8, [sp, #32] ; get address of sse
|
||||
mul r1, r4, r4 ; sum * sum
|
||||
str r5, [r8] ; store sse
|
||||
sub r0, r5, r1, ASR #6 ; return (sse - ((sum * sum) >> 6))
|
||||
|
||||
pop {r4-r10, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
@@ -99,7 +99,7 @@ void vp8_sixtap_predict4x4_armv6
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED(4, short, FData[12*4]); /* Temp data buffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data buffer used in filtering */
|
||||
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
@@ -147,7 +147,7 @@ void vp8_sixtap_predict8x8_armv6
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED(4, short, FData[16*8]); /* Temp data buffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
@@ -189,7 +189,7 @@ void vp8_sixtap_predict16x16_armv6
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED(4, short, FData[24*16]); /* Temp data buffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
184
vp8/common/arm/neon/sad_neon.c
Normal file
184
vp8/common/arm/neon/sad_neon.c
Normal file
@@ -0,0 +1,184 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
unsigned int vp8_sad8x8_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
unsigned char *ref_ptr,
|
||||
int ref_stride) {
|
||||
uint8x8_t d0, d8;
|
||||
uint16x8_t q12;
|
||||
uint32x4_t q1;
|
||||
uint64x2_t q3;
|
||||
uint32x2_t d5;
|
||||
int i;
|
||||
|
||||
d0 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabdl_u8(d0, d8);
|
||||
|
||||
for (i = 0; i < 7; i++) {
|
||||
d0 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabal_u8(q12, d0, d8);
|
||||
}
|
||||
|
||||
q1 = vpaddlq_u16(q12);
|
||||
q3 = vpaddlq_u32(q1);
|
||||
d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
|
||||
vreinterpret_u32_u64(vget_high_u64(q3)));
|
||||
|
||||
return vget_lane_u32(d5, 0);
|
||||
}
|
||||
|
||||
unsigned int vp8_sad8x16_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
unsigned char *ref_ptr,
|
||||
int ref_stride) {
|
||||
uint8x8_t d0, d8;
|
||||
uint16x8_t q12;
|
||||
uint32x4_t q1;
|
||||
uint64x2_t q3;
|
||||
uint32x2_t d5;
|
||||
int i;
|
||||
|
||||
d0 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabdl_u8(d0, d8);
|
||||
|
||||
for (i = 0; i < 15; i++) {
|
||||
d0 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabal_u8(q12, d0, d8);
|
||||
}
|
||||
|
||||
q1 = vpaddlq_u16(q12);
|
||||
q3 = vpaddlq_u32(q1);
|
||||
d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
|
||||
vreinterpret_u32_u64(vget_high_u64(q3)));
|
||||
|
||||
return vget_lane_u32(d5, 0);
|
||||
}
|
||||
|
||||
unsigned int vp8_sad4x4_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
unsigned char *ref_ptr,
|
||||
int ref_stride) {
|
||||
uint8x8_t d0, d8;
|
||||
uint16x8_t q12;
|
||||
uint32x2_t d1;
|
||||
uint64x1_t d3;
|
||||
int i;
|
||||
|
||||
d0 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabdl_u8(d0, d8);
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
d0 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabal_u8(q12, d0, d8);
|
||||
}
|
||||
|
||||
d1 = vpaddl_u16(vget_low_u16(q12));
|
||||
d3 = vpaddl_u32(d1);
|
||||
|
||||
return vget_lane_u32(vreinterpret_u32_u64(d3), 0);
|
||||
}
|
||||
|
||||
unsigned int vp8_sad16x16_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
unsigned char *ref_ptr,
|
||||
int ref_stride) {
|
||||
uint8x16_t q0, q4;
|
||||
uint16x8_t q12, q13;
|
||||
uint32x4_t q1;
|
||||
uint64x2_t q3;
|
||||
uint32x2_t d5;
|
||||
int i;
|
||||
|
||||
q0 = vld1q_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
q4 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
|
||||
q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
|
||||
|
||||
for (i = 0; i < 15; i++) {
|
||||
q0 = vld1q_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
q4 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
|
||||
q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
|
||||
}
|
||||
|
||||
q12 = vaddq_u16(q12, q13);
|
||||
q1 = vpaddlq_u16(q12);
|
||||
q3 = vpaddlq_u32(q1);
|
||||
d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
|
||||
vreinterpret_u32_u64(vget_high_u64(q3)));
|
||||
|
||||
return vget_lane_u32(d5, 0);
|
||||
}
|
||||
|
||||
unsigned int vp8_sad16x8_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
unsigned char *ref_ptr,
|
||||
int ref_stride) {
|
||||
uint8x16_t q0, q4;
|
||||
uint16x8_t q12, q13;
|
||||
uint32x4_t q1;
|
||||
uint64x2_t q3;
|
||||
uint32x2_t d5;
|
||||
int i;
|
||||
|
||||
q0 = vld1q_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
q4 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
|
||||
q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
|
||||
|
||||
for (i = 0; i < 7; i++) {
|
||||
q0 = vld1q_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
q4 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += ref_stride;
|
||||
q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
|
||||
q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
|
||||
}
|
||||
|
||||
q12 = vaddq_u16(q12, q13);
|
||||
q1 = vpaddlq_u16(q12);
|
||||
q3 = vpaddlq_u32(q1);
|
||||
d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
|
||||
vreinterpret_u32_u64(vget_high_u64(q3)));
|
||||
|
||||
return vget_lane_u32(d5, 0);
|
||||
}
|
320
vp8/common/arm/neon/variance_neon.c
Normal file
320
vp8/common/arm/neon/variance_neon.c
Normal file
@@ -0,0 +1,320 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
unsigned int vp8_variance16x16_neon(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse) {
|
||||
int i;
|
||||
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
|
||||
uint32x2_t d0u32, d10u32;
|
||||
int64x1_t d0s64, d1s64;
|
||||
uint8x16_t q0u8, q1u8, q2u8, q3u8;
|
||||
uint16x8_t q11u16, q12u16, q13u16, q14u16;
|
||||
int32x4_t q8s32, q9s32, q10s32;
|
||||
int64x2_t q0s64, q1s64, q5s64;
|
||||
|
||||
q8s32 = vdupq_n_s32(0);
|
||||
q9s32 = vdupq_n_s32(0);
|
||||
q10s32 = vdupq_n_s32(0);
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
q0u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
q1u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
__builtin_prefetch(src_ptr);
|
||||
|
||||
q2u8 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
q3u8 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
__builtin_prefetch(ref_ptr);
|
||||
|
||||
q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
|
||||
q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
|
||||
q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
|
||||
q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
|
||||
|
||||
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
|
||||
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
|
||||
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
|
||||
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
|
||||
|
||||
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
|
||||
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
|
||||
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
|
||||
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
|
||||
|
||||
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
|
||||
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
|
||||
q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
|
||||
q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
|
||||
|
||||
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
|
||||
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
|
||||
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
|
||||
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
|
||||
}
|
||||
|
||||
q10s32 = vaddq_s32(q10s32, q9s32);
|
||||
q0s64 = vpaddlq_s32(q8s32);
|
||||
q1s64 = vpaddlq_s32(q10s32);
|
||||
|
||||
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
|
||||
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
|
||||
|
||||
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
|
||||
vreinterpret_s32_s64(d0s64));
|
||||
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
|
||||
|
||||
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8);
|
||||
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
|
||||
|
||||
return vget_lane_u32(d0u32, 0);
|
||||
}
|
||||
|
||||
unsigned int vp8_variance16x8_neon(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse) {
|
||||
int i;
|
||||
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
|
||||
uint32x2_t d0u32, d10u32;
|
||||
int64x1_t d0s64, d1s64;
|
||||
uint8x16_t q0u8, q1u8, q2u8, q3u8;
|
||||
uint16x8_t q11u16, q12u16, q13u16, q14u16;
|
||||
int32x4_t q8s32, q9s32, q10s32;
|
||||
int64x2_t q0s64, q1s64, q5s64;
|
||||
|
||||
q8s32 = vdupq_n_s32(0);
|
||||
q9s32 = vdupq_n_s32(0);
|
||||
q10s32 = vdupq_n_s32(0);
|
||||
|
||||
for (i = 0; i < 4; i++) { // variance16x8_neon_loop
|
||||
q0u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
q1u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
__builtin_prefetch(src_ptr);
|
||||
|
||||
q2u8 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
q3u8 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
__builtin_prefetch(ref_ptr);
|
||||
|
||||
q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
|
||||
q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
|
||||
q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
|
||||
q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
|
||||
|
||||
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
|
||||
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
|
||||
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
|
||||
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
|
||||
|
||||
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
|
||||
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
|
||||
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
|
||||
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
|
||||
|
||||
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
|
||||
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
|
||||
q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
|
||||
q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
|
||||
|
||||
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
|
||||
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
|
||||
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
|
||||
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
|
||||
}
|
||||
|
||||
q10s32 = vaddq_s32(q10s32, q9s32);
|
||||
q0s64 = vpaddlq_s32(q8s32);
|
||||
q1s64 = vpaddlq_s32(q10s32);
|
||||
|
||||
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
|
||||
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
|
||||
|
||||
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
|
||||
vreinterpret_s32_s64(d0s64));
|
||||
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
|
||||
|
||||
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
|
||||
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
|
||||
|
||||
return vget_lane_u32(d0u32, 0);
|
||||
}
|
||||
|
||||
unsigned int vp8_variance8x16_neon(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse) {
|
||||
int i;
|
||||
uint8x8_t d0u8, d2u8, d4u8, d6u8;
|
||||
int16x4_t d22s16, d23s16, d24s16, d25s16;
|
||||
uint32x2_t d0u32, d10u32;
|
||||
int64x1_t d0s64, d1s64;
|
||||
uint16x8_t q11u16, q12u16;
|
||||
int32x4_t q8s32, q9s32, q10s32;
|
||||
int64x2_t q0s64, q1s64, q5s64;
|
||||
|
||||
q8s32 = vdupq_n_s32(0);
|
||||
q9s32 = vdupq_n_s32(0);
|
||||
q10s32 = vdupq_n_s32(0);
|
||||
|
||||
for (i = 0; i < 8; i++) { // variance8x16_neon_loop
|
||||
d0u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
__builtin_prefetch(src_ptr);
|
||||
|
||||
d4u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d6u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
__builtin_prefetch(ref_ptr);
|
||||
|
||||
q11u16 = vsubl_u8(d0u8, d4u8);
|
||||
q12u16 = vsubl_u8(d2u8, d6u8);
|
||||
|
||||
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
|
||||
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
|
||||
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
|
||||
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
|
||||
|
||||
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
|
||||
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
|
||||
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
|
||||
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
|
||||
}
|
||||
|
||||
q10s32 = vaddq_s32(q10s32, q9s32);
|
||||
q0s64 = vpaddlq_s32(q8s32);
|
||||
q1s64 = vpaddlq_s32(q10s32);
|
||||
|
||||
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
|
||||
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
|
||||
|
||||
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
|
||||
vreinterpret_s32_s64(d0s64));
|
||||
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
|
||||
|
||||
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
|
||||
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
|
||||
|
||||
return vget_lane_u32(d0u32, 0);
|
||||
}
|
||||
|
||||
unsigned int vp8_variance8x8_neon(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse) {
|
||||
int i;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
|
||||
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
|
||||
uint32x2_t d0u32, d10u32;
|
||||
int64x1_t d0s64, d1s64;
|
||||
uint16x8_t q11u16, q12u16, q13u16, q14u16;
|
||||
int32x4_t q8s32, q9s32, q10s32;
|
||||
int64x2_t q0s64, q1s64, q5s64;
|
||||
|
||||
q8s32 = vdupq_n_s32(0);
|
||||
q9s32 = vdupq_n_s32(0);
|
||||
q10s32 = vdupq_n_s32(0);
|
||||
|
||||
for (i = 0; i < 2; i++) { // variance8x8_neon_loop
|
||||
d0u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d1u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d3u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
|
||||
d4u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d5u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d6u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d7u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
|
||||
q11u16 = vsubl_u8(d0u8, d4u8);
|
||||
q12u16 = vsubl_u8(d1u8, d5u8);
|
||||
q13u16 = vsubl_u8(d2u8, d6u8);
|
||||
q14u16 = vsubl_u8(d3u8, d7u8);
|
||||
|
||||
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
|
||||
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
|
||||
q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
|
||||
q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
|
||||
|
||||
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
|
||||
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
|
||||
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
|
||||
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
|
||||
|
||||
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
|
||||
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
|
||||
q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
|
||||
q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
|
||||
|
||||
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
|
||||
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
|
||||
q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
|
||||
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
|
||||
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
|
||||
}
|
||||
|
||||
q10s32 = vaddq_s32(q10s32, q9s32);
|
||||
q0s64 = vpaddlq_s32(q8s32);
|
||||
q1s64 = vpaddlq_s32(q10s32);
|
||||
|
||||
d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
|
||||
d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
|
||||
|
||||
q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
|
||||
vreinterpret_s32_s64(d0s64));
|
||||
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
|
||||
|
||||
d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 6);
|
||||
d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
|
||||
|
||||
return vget_lane_u32(d0u32, 0);
|
||||
}
|
@@ -12,7 +12,7 @@
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
static const uint8_t bilinear_taps_coeff[8][2] = {
|
||||
static const uint16_t bilinear_taps_coeff[8][2] = {
|
||||
{128, 0},
|
||||
{112, 16},
|
||||
{ 96, 32},
|
||||
@@ -32,7 +32,7 @@ unsigned int vp8_sub_pixel_variance16x16_neon_func(
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse) {
|
||||
int i;
|
||||
DECLARE_ALIGNED(16, unsigned char, tmp[528]);
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, tmp, 528);
|
||||
unsigned char *tmpp;
|
||||
unsigned char *tmpp2;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
|
||||
@@ -911,6 +911,12 @@ unsigned int vp8_variance_halfpixvar16x16_hv_neon(
|
||||
return vget_lane_u32(d0u32, 0);
|
||||
}
|
||||
|
||||
enum { kWidth8 = 8 };
|
||||
enum { kHeight8 = 8 };
|
||||
enum { kHeight8PlusOne = 9 };
|
||||
enum { kPixelStepOne = 1 };
|
||||
enum { kAlign16 = 16 };
|
||||
|
||||
#define FILTER_BITS 7
|
||||
|
||||
static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) {
|
||||
@@ -962,8 +968,8 @@ static unsigned int variance8x8_neon(const uint8_t *a, int a_stride,
|
||||
const uint8_t *b, int b_stride,
|
||||
unsigned int *sse) {
|
||||
int sum;
|
||||
variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, &sum);
|
||||
return *sse - (((int64_t)sum * sum) / (8 * 8));
|
||||
variance_neon_w8(a, a_stride, b, b_stride, kWidth8, kHeight8, sse, &sum);
|
||||
return *sse - (((int64_t)sum * sum) / (kWidth8 * kHeight8));
|
||||
}
|
||||
|
||||
static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
|
||||
@@ -972,9 +978,9 @@ static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
|
||||
int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const uint8_t *vpx_filter) {
|
||||
const uint8x8_t f0 = vmov_n_u8(vpx_filter[0]);
|
||||
const uint8x8_t f1 = vmov_n_u8(vpx_filter[1]);
|
||||
const uint16_t *vpx_filter) {
|
||||
const uint8x8_t f0 = vmov_n_u8((uint8_t)vpx_filter[0]);
|
||||
const uint8x8_t f1 = vmov_n_u8((uint8_t)vpx_filter[1]);
|
||||
unsigned int i;
|
||||
for (i = 0; i < output_height; ++i) {
|
||||
const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
|
||||
@@ -997,21 +1003,21 @@ unsigned int vp8_sub_pixel_variance8x8_neon(
|
||||
const unsigned char *dst,
|
||||
int dst_stride,
|
||||
unsigned int *sse) {
|
||||
DECLARE_ALIGNED(16, uint8_t, temp2[9 * 8]);
|
||||
DECLARE_ALIGNED(16, uint8_t, fdata3[9 * 8]);
|
||||
DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, temp2, kHeight8PlusOne * kWidth8);
|
||||
DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, fdata3, kHeight8PlusOne * kWidth8);
|
||||
if (xoffset == 0) {
|
||||
var_filter_block2d_bil_w8(src, temp2, src_stride, 8, 8,
|
||||
8, bilinear_taps_coeff[yoffset]);
|
||||
var_filter_block2d_bil_w8(src, temp2, src_stride, kWidth8, kHeight8,
|
||||
kWidth8, bilinear_taps_coeff[yoffset]);
|
||||
} else if (yoffset == 0) {
|
||||
var_filter_block2d_bil_w8(src, temp2, src_stride, 1,
|
||||
9, 8,
|
||||
var_filter_block2d_bil_w8(src, temp2, src_stride, kPixelStepOne,
|
||||
kHeight8PlusOne, kWidth8,
|
||||
bilinear_taps_coeff[xoffset]);
|
||||
} else {
|
||||
var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
|
||||
9, 8,
|
||||
var_filter_block2d_bil_w8(src, fdata3, src_stride, kPixelStepOne,
|
||||
kHeight8PlusOne, kWidth8,
|
||||
bilinear_taps_coeff[xoffset]);
|
||||
var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
|
||||
8, bilinear_taps_coeff[yoffset]);
|
||||
var_filter_block2d_bil_w8(fdata3, temp2, kWidth8, kWidth8, kHeight8,
|
||||
kWidth8, bilinear_taps_coeff[yoffset]);
|
||||
}
|
||||
return variance8x8_neon(temp2, 8, dst, dst_stride, sse);
|
||||
return variance8x8_neon(temp2, kWidth8, dst, dst_stride, sse);
|
||||
}
|
||||
|
@@ -9,14 +9,10 @@
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vp8/common/variance.h"
|
||||
#include "vp8/common/filter.h"
|
||||
|
||||
// TODO(johannkoenig): Move this to vpx_dsp or vp8/encoder
|
||||
#if CONFIG_VP8_ENCODER
|
||||
|
||||
#if HAVE_MEDIA
|
||||
#include "vp8/common/arm/bilinearfilter_arm.h"
|
||||
|
||||
@@ -44,8 +40,8 @@ unsigned int vp8_sub_pixel_variance8x8_armv6
|
||||
vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
|
||||
8, 8, 8, VFilter);
|
||||
|
||||
return vpx_variance8x8_media(second_pass, 8, dst_ptr,
|
||||
dst_pixels_per_line, sse);
|
||||
return vp8_variance8x8_armv6(second_pass, 8, dst_ptr,
|
||||
dst_pixels_per_line, sse);
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance16x16_armv6
|
||||
@@ -90,13 +86,13 @@ unsigned int vp8_sub_pixel_variance16x16_armv6
|
||||
vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
|
||||
16, 16, 16, VFilter);
|
||||
|
||||
var = vpx_variance16x16_media(second_pass, 16, dst_ptr,
|
||||
dst_pixels_per_line, sse);
|
||||
var = vp8_variance16x16_armv6(second_pass, 16, dst_ptr,
|
||||
dst_pixels_per_line, sse);
|
||||
}
|
||||
return var;
|
||||
}
|
||||
|
||||
#endif // HAVE_MEDIA
|
||||
#endif /* HAVE_MEDIA */
|
||||
|
||||
|
||||
#if HAVE_NEON
|
||||
@@ -133,5 +129,4 @@ unsigned int vp8_sub_pixel_variance16x16_neon
|
||||
return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
|
||||
}
|
||||
|
||||
#endif // HAVE_NEON
|
||||
#endif // CONFIG_VP8_ENCODER
|
||||
#endif
|
||||
|
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
/* Copy 2 macroblocks to a buffer */
|
||||
void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride,
|
||||
unsigned char *dst_ptr, int dst_stride,
|
||||
int height)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < height; r++)
|
||||
{
|
||||
memcpy(dst_ptr, src_ptr, 32);
|
||||
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
}
|
||||
}
|
@@ -10,7 +10,6 @@
|
||||
|
||||
|
||||
#include "filter.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
|
||||
{
|
||||
|
@@ -17,7 +17,6 @@
|
||||
#include "vpx_ports/x86.h"
|
||||
#endif
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
#include "vp8/common/systemdependent.h"
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
#if HAVE_UNISTD_H && !defined(__OS2__)
|
||||
|
@@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Notes:
|
||||
|
@@ -17,11 +17,10 @@
|
||||
* higher quality.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vp8/common/postproc.h"
|
||||
#include "vp8/common/variance.h"
|
||||
#include "postproc.h"
|
||||
#include "variance.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
||||
#include <limits.h>
|
||||
@@ -151,36 +150,36 @@ static void multiframe_quality_enhance_block
|
||||
|
||||
if (blksize == 16)
|
||||
{
|
||||
actd = (vpx_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
|
||||
act = (vpx_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
|
||||
actd = (vp8_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
|
||||
act = (vp8_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
|
||||
#ifdef USE_SSD
|
||||
vpx_variance16x16(y, y_stride, yd, yd_stride, &sse);
|
||||
vp8_variance16x16(y, y_stride, yd, yd_stride, &sse);
|
||||
sad = (sse + 128)>>8;
|
||||
vpx_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
|
||||
vp8_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
|
||||
usad = (sse + 32)>>6;
|
||||
vpx_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
|
||||
vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
|
||||
vsad = (sse + 32)>>6;
|
||||
#else
|
||||
sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
|
||||
usad = (vpx_sad8x8(u, uv_stride, ud, uvd_stride) + 32) >> 6;
|
||||
vsad = (vpx_sad8x8(v, uv_stride, vd, uvd_stride)+ 32) >> 6;
|
||||
sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, UINT_MAX) + 128) >> 8;
|
||||
usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, UINT_MAX) + 32) >> 6;
|
||||
vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, UINT_MAX)+ 32) >> 6;
|
||||
#endif
|
||||
}
|
||||
else /* if (blksize == 8) */
|
||||
{
|
||||
actd = (vpx_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
|
||||
act = (vpx_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
|
||||
actd = (vp8_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
|
||||
act = (vp8_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
|
||||
#ifdef USE_SSD
|
||||
vpx_variance8x8(y, y_stride, yd, yd_stride, &sse);
|
||||
vp8_variance8x8(y, y_stride, yd, yd_stride, &sse);
|
||||
sad = (sse + 32)>>6;
|
||||
vpx_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
|
||||
vp8_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
|
||||
usad = (sse + 8)>>4;
|
||||
vpx_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
|
||||
vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
|
||||
vsad = (sse + 8)>>4;
|
||||
#else
|
||||
sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6;
|
||||
usad = (vpx_sad4x4(u, uv_stride, ud, uvd_stride) + 8) >> 4;
|
||||
vsad = (vpx_sad4x4(v, uv_stride, vd, uvd_stride) + 8) >> 4;
|
||||
sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, UINT_MAX) + 32) >> 6;
|
||||
usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, UINT_MAX) + 8) >> 4;
|
||||
vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, UINT_MAX) + 8) >> 4;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@@ -427,7 +427,7 @@ void vp8_de_noise(VP8_COMMON *cm,
|
||||
}
|
||||
}
|
||||
|
||||
static double gaussian(double sigma, double mu, double x)
|
||||
double vp8_gaussian(double sigma, double mu, double x)
|
||||
{
|
||||
return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
|
||||
(exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
|
||||
@@ -455,7 +455,7 @@ static void fillrd(struct postproc_state *state, int q, int a)
|
||||
|
||||
for (i = -32; i < 32; i++)
|
||||
{
|
||||
const int v = (int)(.5 + 256 * gaussian(sigma, 0, i));
|
||||
const int v = (int)(.5 + 256 * vp8_gaussian(sigma, 0, i));
|
||||
|
||||
if (v)
|
||||
{
|
||||
|
@@ -10,8 +10,6 @@
|
||||
|
||||
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
@@ -32,8 +30,31 @@ void vp8_copy_mem16x16_c(
|
||||
|
||||
for (r = 0; r < 16; r++)
|
||||
{
|
||||
memcpy(dst, src, 16);
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[1];
|
||||
dst[2] = src[2];
|
||||
dst[3] = src[3];
|
||||
dst[4] = src[4];
|
||||
dst[5] = src[5];
|
||||
dst[6] = src[6];
|
||||
dst[7] = src[7];
|
||||
dst[8] = src[8];
|
||||
dst[9] = src[9];
|
||||
dst[10] = src[10];
|
||||
dst[11] = src[11];
|
||||
dst[12] = src[12];
|
||||
dst[13] = src[13];
|
||||
dst[14] = src[14];
|
||||
dst[15] = src[15];
|
||||
|
||||
#else
|
||||
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
|
||||
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
|
||||
((uint32_t *)dst)[2] = ((uint32_t *)src)[2] ;
|
||||
((uint32_t *)dst)[3] = ((uint32_t *)src)[3] ;
|
||||
|
||||
#endif
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
||||
@@ -51,8 +72,19 @@ void vp8_copy_mem8x8_c(
|
||||
|
||||
for (r = 0; r < 8; r++)
|
||||
{
|
||||
memcpy(dst, src, 8);
|
||||
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[1];
|
||||
dst[2] = src[2];
|
||||
dst[3] = src[3];
|
||||
dst[4] = src[4];
|
||||
dst[5] = src[5];
|
||||
dst[6] = src[6];
|
||||
dst[7] = src[7];
|
||||
#else
|
||||
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
|
||||
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
|
||||
#endif
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
||||
@@ -70,8 +102,19 @@ void vp8_copy_mem8x4_c(
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
memcpy(dst, src, 8);
|
||||
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[1];
|
||||
dst[2] = src[2];
|
||||
dst[3] = src[3];
|
||||
dst[4] = src[4];
|
||||
dst[5] = src[5];
|
||||
dst[6] = src[6];
|
||||
dst[7] = src[7];
|
||||
#else
|
||||
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
|
||||
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
|
||||
#endif
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
||||
|
@@ -215,7 +215,7 @@ $vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6;
|
||||
$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;
|
||||
|
||||
add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
|
||||
#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=817
|
||||
# Disable neon while investigating https://code.google.com/p/webm/issues/detail?id=817
|
||||
specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media dspr2/;
|
||||
$vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6;
|
||||
$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;
|
||||
@@ -233,10 +233,34 @@ specialize qw/vp8_bilinear_predict8x4 mmx media neon/;
|
||||
$vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6;
|
||||
|
||||
add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
|
||||
#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=892
|
||||
specialize qw/vp8_bilinear_predict4x4 mmx media/;
|
||||
specialize qw/vp8_bilinear_predict4x4 mmx media neon/;
|
||||
$vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6;
|
||||
|
||||
#
|
||||
# Whole-pixel Variance
|
||||
#
|
||||
add_proto qw/unsigned int vp8_variance4x4/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp8_variance4x4 mmx sse2/;
|
||||
$vp8_variance4x4_sse2=vp8_variance4x4_wmt;
|
||||
|
||||
add_proto qw/unsigned int vp8_variance8x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp8_variance8x8 mmx sse2 media neon/;
|
||||
$vp8_variance8x8_sse2=vp8_variance8x8_wmt;
|
||||
$vp8_variance8x8_media=vp8_variance8x8_armv6;
|
||||
|
||||
add_proto qw/unsigned int vp8_variance8x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp8_variance8x16 mmx sse2 neon/;
|
||||
$vp8_variance8x16_sse2=vp8_variance8x16_wmt;
|
||||
|
||||
add_proto qw/unsigned int vp8_variance16x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp8_variance16x8 mmx sse2 neon/;
|
||||
$vp8_variance16x8_sse2=vp8_variance16x8_wmt;
|
||||
|
||||
add_proto qw/unsigned int vp8_variance16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp8_variance16x16 mmx sse2 media neon/;
|
||||
$vp8_variance16x16_sse2=vp8_variance16x16_wmt;
|
||||
$vp8_variance16x16_media=vp8_variance16x16_armv6;
|
||||
|
||||
#
|
||||
# Sub-pixel Variance
|
||||
#
|
||||
@@ -245,9 +269,10 @@ specialize qw/vp8_sub_pixel_variance4x4 mmx sse2/;
|
||||
$vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt;
|
||||
|
||||
add_proto qw/unsigned int vp8_sub_pixel_variance8x8/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
|
||||
specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media/;
|
||||
specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon_asm/;
|
||||
$vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt;
|
||||
$vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6;
|
||||
$vp8_sub_pixel_variance8x8_neon_asm=vp8_sub_pixel_variance8x8_neon;
|
||||
|
||||
add_proto qw/unsigned int vp8_sub_pixel_variance8x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
|
||||
specialize qw/vp8_sub_pixel_variance8x16 mmx sse2/;
|
||||
@@ -278,16 +303,119 @@ specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon/;
|
||||
$vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt;
|
||||
$vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6;
|
||||
|
||||
#
|
||||
# Single block SAD
|
||||
#
|
||||
add_proto qw/unsigned int vp8_sad4x4/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
|
||||
specialize qw/vp8_sad4x4 mmx sse2 neon/;
|
||||
$vp8_sad4x4_sse2=vp8_sad4x4_wmt;
|
||||
|
||||
add_proto qw/unsigned int vp8_sad8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
|
||||
specialize qw/vp8_sad8x8 mmx sse2 neon/;
|
||||
$vp8_sad8x8_sse2=vp8_sad8x8_wmt;
|
||||
|
||||
add_proto qw/unsigned int vp8_sad8x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
|
||||
specialize qw/vp8_sad8x16 mmx sse2 neon/;
|
||||
$vp8_sad8x16_sse2=vp8_sad8x16_wmt;
|
||||
|
||||
add_proto qw/unsigned int vp8_sad16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
|
||||
specialize qw/vp8_sad16x8 mmx sse2 neon/;
|
||||
$vp8_sad16x8_sse2=vp8_sad16x8_wmt;
|
||||
|
||||
add_proto qw/unsigned int vp8_sad16x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
|
||||
specialize qw/vp8_sad16x16 mmx sse2 sse3 media neon/;
|
||||
$vp8_sad16x16_sse2=vp8_sad16x16_wmt;
|
||||
$vp8_sad16x16_media=vp8_sad16x16_armv6;
|
||||
|
||||
#
|
||||
# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
|
||||
#
|
||||
add_proto qw/void vp8_sad4x4x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
|
||||
specialize qw/vp8_sad4x4x3 sse3/;
|
||||
|
||||
add_proto qw/void vp8_sad8x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
|
||||
specialize qw/vp8_sad8x8x3 sse3/;
|
||||
|
||||
add_proto qw/void vp8_sad8x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
|
||||
specialize qw/vp8_sad8x16x3 sse3/;
|
||||
|
||||
add_proto qw/void vp8_sad16x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
|
||||
specialize qw/vp8_sad16x8x3 sse3 ssse3/;
|
||||
|
||||
add_proto qw/void vp8_sad16x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array";
|
||||
specialize qw/vp8_sad16x16x3 sse3 ssse3/;
|
||||
|
||||
# Note the only difference in the following prototypes is that they return into
|
||||
# an array of short
|
||||
add_proto qw/void vp8_sad4x4x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
|
||||
specialize qw/vp8_sad4x4x8 sse4_1/;
|
||||
$vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4;
|
||||
|
||||
add_proto qw/void vp8_sad8x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
|
||||
specialize qw/vp8_sad8x8x8 sse4_1/;
|
||||
$vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4;
|
||||
|
||||
add_proto qw/void vp8_sad8x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
|
||||
specialize qw/vp8_sad8x16x8 sse4_1/;
|
||||
$vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4;
|
||||
|
||||
add_proto qw/void vp8_sad16x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
|
||||
specialize qw/vp8_sad16x8x8 sse4_1/;
|
||||
$vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4;
|
||||
|
||||
add_proto qw/void vp8_sad16x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array";
|
||||
specialize qw/vp8_sad16x16x8 sse4_1/;
|
||||
$vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4;
|
||||
|
||||
#
|
||||
# Multi-block SAD, comparing a reference to N independent blocks
|
||||
#
|
||||
add_proto qw/void vp8_sad4x4x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
|
||||
specialize qw/vp8_sad4x4x4d sse3/;
|
||||
|
||||
add_proto qw/void vp8_sad8x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
|
||||
specialize qw/vp8_sad8x8x4d sse3/;
|
||||
|
||||
add_proto qw/void vp8_sad8x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
|
||||
specialize qw/vp8_sad8x16x4d sse3/;
|
||||
|
||||
add_proto qw/void vp8_sad16x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
|
||||
specialize qw/vp8_sad16x8x4d sse3/;
|
||||
|
||||
add_proto qw/void vp8_sad16x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array";
|
||||
specialize qw/vp8_sad16x16x4d sse3/;
|
||||
|
||||
#
|
||||
# Encoder functions below this point.
|
||||
#
|
||||
if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") {
|
||||
|
||||
#
|
||||
# Sum of squares (vector)
|
||||
#
|
||||
add_proto qw/unsigned int vp8_get_mb_ss/, "const short *";
|
||||
specialize qw/vp8_get_mb_ss mmx sse2/;
|
||||
|
||||
#
|
||||
# SSE (Sum Squared Error)
|
||||
#
|
||||
add_proto qw/unsigned int vp8_sub_pixel_mse16x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
|
||||
specialize qw/vp8_sub_pixel_mse16x16 mmx sse2/;
|
||||
$vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt;
|
||||
|
||||
add_proto qw/unsigned int vp8_mse16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
|
||||
specialize qw/vp8_mse16x16 mmx sse2 media neon/;
|
||||
$vp8_mse16x16_sse2=vp8_mse16x16_wmt;
|
||||
$vp8_mse16x16_media=vp8_mse16x16_armv6;
|
||||
|
||||
add_proto qw/unsigned int vp8_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
|
||||
specialize qw/vp8_get4x4sse_cs mmx neon/;
|
||||
|
||||
#
|
||||
# Block copy
|
||||
#
|
||||
if ($opts{arch} =~ /x86/) {
|
||||
add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n";
|
||||
add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n";
|
||||
specialize qw/vp8_copy32xn sse2 sse3/;
|
||||
}
|
||||
|
||||
|
302
vp8/common/sad_c.c
Normal file
302
vp8/common/sad_c.c
Normal file
@@ -0,0 +1,302 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include "vpx_config.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
static unsigned int sad_mx_n_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int max_sad, int m, int n)
|
||||
{
|
||||
int r, c;
|
||||
unsigned int sad = 0;
|
||||
|
||||
for (r = 0; r < n; r++)
|
||||
{
|
||||
for (c = 0; c < m; c++)
|
||||
{
|
||||
sad += abs(src_ptr[c] - ref_ptr[c]);
|
||||
}
|
||||
|
||||
if (sad > max_sad)
|
||||
break;
|
||||
|
||||
src_ptr += src_stride;
|
||||
ref_ptr += ref_stride;
|
||||
}
|
||||
|
||||
return sad;
|
||||
}
|
||||
|
||||
/* max_sad is provided as an optional optimization point. Alternative
|
||||
* implementations of these functions are not required to check it.
|
||||
*/
|
||||
|
||||
unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int max_sad)
|
||||
{
|
||||
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 16);
|
||||
}
|
||||
|
||||
unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int max_sad)
|
||||
{
|
||||
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 8);
|
||||
}
|
||||
|
||||
unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int max_sad)
|
||||
{
|
||||
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 16, 8);
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int max_sad)
|
||||
{
|
||||
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 8, 16);
|
||||
}
|
||||
|
||||
unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int max_sad)
|
||||
{
|
||||
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, max_sad, 4, 4);
|
||||
}
|
||||
|
||||
void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned short *sad_array)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
|
||||
sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
|
||||
sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
|
||||
sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
|
||||
sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned short *sad_array)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
|
||||
sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
|
||||
sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
|
||||
sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
|
||||
sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned short *sad_array)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
|
||||
sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
|
||||
sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
|
||||
sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
|
||||
sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned short *sad_array)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
|
||||
sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
|
||||
sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
|
||||
sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
|
||||
sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char *ref_ptr, int ref_stride,
|
||||
unsigned short *sad_array)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 0, ref_stride, UINT_MAX);
|
||||
sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, UINT_MAX);
|
||||
sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, UINT_MAX);
|
||||
sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, UINT_MAX);
|
||||
sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, UINT_MAX);
|
||||
sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, UINT_MAX);
|
||||
sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, UINT_MAX);
|
||||
sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char * const ref_ptr[], int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
|
||||
sad_array[3] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char * const ref_ptr[], int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
|
||||
sad_array[3] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char * const ref_ptr[], int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
|
||||
sad_array[3] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char * const ref_ptr[], int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
|
||||
sad_array[3] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride,
|
||||
const unsigned char * const ref_ptr[], int ref_stride,
|
||||
unsigned int *sad_array)
|
||||
{
|
||||
sad_array[0] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[0], ref_stride, UINT_MAX);
|
||||
sad_array[1] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[1], ref_stride, UINT_MAX);
|
||||
sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[2], ref_stride, UINT_MAX);
|
||||
sad_array[3] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr[3], ref_stride, UINT_MAX);
|
||||
}
|
||||
|
||||
/* Copy 2 macroblocks to a buffer */
|
||||
void vp8_copy32xn_c(unsigned char *src_ptr, int src_stride,
|
||||
unsigned char *dst_ptr, int dst_stride,
|
||||
int height)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < height; r++)
|
||||
{
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
dst_ptr[0] = src_ptr[0];
|
||||
dst_ptr[1] = src_ptr[1];
|
||||
dst_ptr[2] = src_ptr[2];
|
||||
dst_ptr[3] = src_ptr[3];
|
||||
dst_ptr[4] = src_ptr[4];
|
||||
dst_ptr[5] = src_ptr[5];
|
||||
dst_ptr[6] = src_ptr[6];
|
||||
dst_ptr[7] = src_ptr[7];
|
||||
dst_ptr[8] = src_ptr[8];
|
||||
dst_ptr[9] = src_ptr[9];
|
||||
dst_ptr[10] = src_ptr[10];
|
||||
dst_ptr[11] = src_ptr[11];
|
||||
dst_ptr[12] = src_ptr[12];
|
||||
dst_ptr[13] = src_ptr[13];
|
||||
dst_ptr[14] = src_ptr[14];
|
||||
dst_ptr[15] = src_ptr[15];
|
||||
dst_ptr[16] = src_ptr[16];
|
||||
dst_ptr[17] = src_ptr[17];
|
||||
dst_ptr[18] = src_ptr[18];
|
||||
dst_ptr[19] = src_ptr[19];
|
||||
dst_ptr[20] = src_ptr[20];
|
||||
dst_ptr[21] = src_ptr[21];
|
||||
dst_ptr[22] = src_ptr[22];
|
||||
dst_ptr[23] = src_ptr[23];
|
||||
dst_ptr[24] = src_ptr[24];
|
||||
dst_ptr[25] = src_ptr[25];
|
||||
dst_ptr[26] = src_ptr[26];
|
||||
dst_ptr[27] = src_ptr[27];
|
||||
dst_ptr[28] = src_ptr[28];
|
||||
dst_ptr[29] = src_ptr[29];
|
||||
dst_ptr[30] = src_ptr[30];
|
||||
dst_ptr[31] = src_ptr[31];
|
||||
#else
|
||||
((uint32_t *)dst_ptr)[0] = ((uint32_t *)src_ptr)[0] ;
|
||||
((uint32_t *)dst_ptr)[1] = ((uint32_t *)src_ptr)[1] ;
|
||||
((uint32_t *)dst_ptr)[2] = ((uint32_t *)src_ptr)[2] ;
|
||||
((uint32_t *)dst_ptr)[3] = ((uint32_t *)src_ptr)[3] ;
|
||||
((uint32_t *)dst_ptr)[4] = ((uint32_t *)src_ptr)[4] ;
|
||||
((uint32_t *)dst_ptr)[5] = ((uint32_t *)src_ptr)[5] ;
|
||||
((uint32_t *)dst_ptr)[6] = ((uint32_t *)src_ptr)[6] ;
|
||||
((uint32_t *)dst_ptr)[7] = ((uint32_t *)src_ptr)[7] ;
|
||||
#endif
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
}
|
||||
}
|
@@ -14,42 +14,50 @@
|
||||
|
||||
#include "vpx_config.h"
|
||||
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef unsigned int(*vpx_sad_fn_t)(
|
||||
const uint8_t *src_ptr,
|
||||
typedef unsigned int(*vp8_sad_fn_t)(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const uint8_t *ref_ptr,
|
||||
int ref_stride);
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned int max_sad);
|
||||
|
||||
typedef void (*vp8_copy32xn_fn_t)(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
unsigned char *ref_ptr,
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
int n);
|
||||
|
||||
typedef void (*vpx_sad_multi_fn_t)(
|
||||
typedef void (*vp8_sad_multi_fn_t)(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_array,
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned int *sad_array);
|
||||
|
||||
typedef void (*vpx_sad_multi_d_fn_t)
|
||||
typedef void (*vp8_sad_multi1_fn_t)
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char * const ref_array[],
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned short *sad_array
|
||||
);
|
||||
|
||||
typedef void (*vp8_sad_multi_d_fn_t)
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char * const ref_ptr[],
|
||||
int ref_stride,
|
||||
unsigned int *sad_array
|
||||
);
|
||||
|
||||
typedef unsigned int (*vpx_variance_fn_t)
|
||||
typedef unsigned int (*vp8_variance_fn_t)
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
@@ -69,17 +77,40 @@ typedef unsigned int (*vp8_subpixvariance_fn_t)
|
||||
unsigned int *sse
|
||||
);
|
||||
|
||||
typedef void (*vp8_ssimpf_fn_t)
|
||||
(
|
||||
unsigned char *s,
|
||||
int sp,
|
||||
unsigned char *r,
|
||||
int rp,
|
||||
unsigned long *sum_s,
|
||||
unsigned long *sum_r,
|
||||
unsigned long *sum_sq_s,
|
||||
unsigned long *sum_sq_r,
|
||||
unsigned long *sum_sxr
|
||||
);
|
||||
|
||||
typedef unsigned int (*vp8_getmbss_fn_t)(const short *);
|
||||
|
||||
typedef unsigned int (*vp8_get16x16prederror_fn_t)
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride
|
||||
);
|
||||
|
||||
typedef struct variance_vtable
|
||||
{
|
||||
vpx_sad_fn_t sdf;
|
||||
vpx_variance_fn_t vf;
|
||||
vp8_sad_fn_t sdf;
|
||||
vp8_variance_fn_t vf;
|
||||
vp8_subpixvariance_fn_t svf;
|
||||
vpx_variance_fn_t svf_halfpix_h;
|
||||
vpx_variance_fn_t svf_halfpix_v;
|
||||
vpx_variance_fn_t svf_halfpix_hv;
|
||||
vpx_sad_multi_fn_t sdx3f;
|
||||
vpx_sad_multi_fn_t sdx8f;
|
||||
vpx_sad_multi_d_fn_t sdx4df;
|
||||
vp8_variance_fn_t svf_halfpix_h;
|
||||
vp8_variance_fn_t svf_halfpix_v;
|
||||
vp8_variance_fn_t svf_halfpix_hv;
|
||||
vp8_sad_multi_fn_t sdx3f;
|
||||
vp8_sad_multi1_fn_t sdx8f;
|
||||
vp8_sad_multi_d_fn_t sdx4df;
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
vp8_copy32xn_fn_t copymem;
|
||||
#endif
|
||||
|
@@ -8,34 +8,43 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "filter.h"
|
||||
#include "variance.h"
|
||||
|
||||
/* This is a bad idea.
|
||||
* ctz = count trailing zeros */
|
||||
static int ctz(int a) {
|
||||
int b = 0;
|
||||
while (a != 1) {
|
||||
a >>= 1;
|
||||
b++;
|
||||
}
|
||||
return b;
|
||||
#include "variance.h"
|
||||
#include "filter.h"
|
||||
|
||||
|
||||
unsigned int vp8_get_mb_ss_c
|
||||
(
|
||||
const short *src_ptr
|
||||
)
|
||||
{
|
||||
unsigned int i = 0, sum = 0;
|
||||
|
||||
do
|
||||
{
|
||||
sum += (src_ptr[i] * src_ptr[i]);
|
||||
i++;
|
||||
}
|
||||
while (i < 256);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
static unsigned int variance(
|
||||
|
||||
static void variance(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
int w,
|
||||
int h,
|
||||
unsigned int *sse)
|
||||
unsigned int *sse,
|
||||
int *sum)
|
||||
{
|
||||
int i, j;
|
||||
int diff, sum;
|
||||
int diff;
|
||||
|
||||
sum = 0;
|
||||
*sum = 0;
|
||||
*sse = 0;
|
||||
|
||||
for (i = 0; i < h; i++)
|
||||
@@ -43,17 +52,114 @@ static unsigned int variance(
|
||||
for (j = 0; j < w; j++)
|
||||
{
|
||||
diff = src_ptr[j] - ref_ptr[j];
|
||||
sum += diff;
|
||||
*sum += diff;
|
||||
*sse += diff * diff;
|
||||
}
|
||||
|
||||
src_ptr += source_stride;
|
||||
ref_ptr += recon_stride;
|
||||
}
|
||||
|
||||
return (*sse - (((unsigned int)sum * sum) >> (int)((ctz(w) + ctz(h)))));
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance16x16_c(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 8));
|
||||
}
|
||||
|
||||
unsigned int vp8_variance8x16_c(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 7));
|
||||
}
|
||||
|
||||
unsigned int vp8_variance16x8_c(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 7));
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance8x8_c(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 6));
|
||||
}
|
||||
|
||||
unsigned int vp8_variance4x4_c(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 4));
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_mse16x16_c(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
|
||||
*sse = var;
|
||||
return var;
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_first_pass
|
||||
@@ -197,7 +303,7 @@ unsigned int vp8_sub_pixel_variance4x4_c
|
||||
/* Now filter Verticaly */
|
||||
var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter);
|
||||
|
||||
return variance(temp2, 4, dst_ptr, dst_pixels_per_line, 4, 4, sse);
|
||||
return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
|
||||
}
|
||||
|
||||
|
||||
@@ -222,7 +328,7 @@ unsigned int vp8_sub_pixel_variance8x8_c
|
||||
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
|
||||
|
||||
return variance(temp2, 8, dst_ptr, dst_pixels_per_line, 8, 8, sse);
|
||||
return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance16x16_c
|
||||
@@ -246,7 +352,7 @@ unsigned int vp8_sub_pixel_variance16x16_c
|
||||
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
|
||||
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
|
||||
|
||||
return variance(temp2, 16, dst_ptr, dst_pixels_per_line, 16, 16, sse);
|
||||
return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
|
||||
}
|
||||
|
||||
|
||||
@@ -286,6 +392,21 @@ unsigned int vp8_variance_halfpixvar16x16_hv_c(
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_sub_pixel_mse16x16_c
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
|
||||
return *sse;
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance16x8_c
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
@@ -307,7 +428,7 @@ unsigned int vp8_sub_pixel_variance16x8_c
|
||||
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
|
||||
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
|
||||
|
||||
return variance(temp2, 16, dst_ptr, dst_pixels_per_line, 16, 8, sse);
|
||||
return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance8x16_c
|
||||
@@ -333,5 +454,5 @@ unsigned int vp8_sub_pixel_variance8x16_c
|
||||
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
|
||||
var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
|
||||
|
||||
return variance(temp2, 8, dst_ptr, dst_pixels_per_line, 8, 16, sse);
|
||||
return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
|
||||
}
|
||||
|
@@ -1,93 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
|
||||
;void vp8_copy32xn_sse2(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_stride,
|
||||
; int height);
|
||||
global sym(vp8_copy32xn_sse2) PRIVATE
|
||||
sym(vp8_copy32xn_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
SAVE_XMM 7
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;dst_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;dst_stride
|
||||
movsxd rcx, dword ptr arg(4) ;height
|
||||
|
||||
.block_copy_sse2_loopx4:
|
||||
movdqu xmm0, XMMWORD PTR [rsi]
|
||||
movdqu xmm1, XMMWORD PTR [rsi + 16]
|
||||
movdqu xmm2, XMMWORD PTR [rsi + rax]
|
||||
movdqu xmm3, XMMWORD PTR [rsi + rax + 16]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
movdqu xmm4, XMMWORD PTR [rsi]
|
||||
movdqu xmm5, XMMWORD PTR [rsi + 16]
|
||||
movdqu xmm6, XMMWORD PTR [rsi + rax]
|
||||
movdqu xmm7, XMMWORD PTR [rsi + rax + 16]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
movdqa XMMWORD PTR [rdi], xmm0
|
||||
movdqa XMMWORD PTR [rdi + 16], xmm1
|
||||
movdqa XMMWORD PTR [rdi + rdx], xmm2
|
||||
movdqa XMMWORD PTR [rdi + rdx + 16], xmm3
|
||||
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
movdqa XMMWORD PTR [rdi], xmm4
|
||||
movdqa XMMWORD PTR [rdi + 16], xmm5
|
||||
movdqa XMMWORD PTR [rdi + rdx], xmm6
|
||||
movdqa XMMWORD PTR [rdi + rdx + 16], xmm7
|
||||
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
sub rcx, 4
|
||||
cmp rcx, 4
|
||||
jge .block_copy_sse2_loopx4
|
||||
|
||||
cmp rcx, 0
|
||||
je .copy_is_done
|
||||
|
||||
.block_copy_sse2_loop:
|
||||
movdqu xmm0, XMMWORD PTR [rsi]
|
||||
movdqu xmm1, XMMWORD PTR [rsi + 16]
|
||||
lea rsi, [rsi+rax]
|
||||
|
||||
movdqa XMMWORD PTR [rdi], xmm0
|
||||
movdqa XMMWORD PTR [rdi + 16], xmm1
|
||||
lea rdi, [rdi+rdx]
|
||||
|
||||
sub rcx, 1
|
||||
jne .block_copy_sse2_loop
|
||||
|
||||
.copy_is_done:
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
@@ -1,146 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
%macro STACK_FRAME_CREATE_X3 0
|
||||
%if ABI_IS_32BIT
|
||||
%define src_ptr rsi
|
||||
%define src_stride rax
|
||||
%define ref_ptr rdi
|
||||
%define ref_stride rdx
|
||||
%define end_ptr rcx
|
||||
%define ret_var rbx
|
||||
%define result_ptr arg(4)
|
||||
%define max_sad arg(4)
|
||||
%define height dword ptr arg(4)
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push rsi
|
||||
push rdi
|
||||
push rbx
|
||||
|
||||
mov rsi, arg(0) ; src_ptr
|
||||
mov rdi, arg(2) ; ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ; src_stride
|
||||
movsxd rdx, dword ptr arg(3) ; ref_stride
|
||||
%else
|
||||
%if LIBVPX_YASM_WIN64
|
||||
SAVE_XMM 7, u
|
||||
%define src_ptr rcx
|
||||
%define src_stride rdx
|
||||
%define ref_ptr r8
|
||||
%define ref_stride r9
|
||||
%define end_ptr r10
|
||||
%define ret_var r11
|
||||
%define result_ptr [rsp+xmm_stack_space+8+4*8]
|
||||
%define max_sad [rsp+xmm_stack_space+8+4*8]
|
||||
%define height dword ptr [rsp+xmm_stack_space+8+4*8]
|
||||
%else
|
||||
%define src_ptr rdi
|
||||
%define src_stride rsi
|
||||
%define ref_ptr rdx
|
||||
%define ref_stride rcx
|
||||
%define end_ptr r9
|
||||
%define ret_var r10
|
||||
%define result_ptr r8
|
||||
%define max_sad r8
|
||||
%define height r8
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%endmacro
|
||||
|
||||
%macro STACK_FRAME_DESTROY_X3 0
|
||||
%define src_ptr
|
||||
%define src_stride
|
||||
%define ref_ptr
|
||||
%define ref_stride
|
||||
%define end_ptr
|
||||
%define ret_var
|
||||
%define result_ptr
|
||||
%define max_sad
|
||||
%define height
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
%else
|
||||
%if LIBVPX_YASM_WIN64
|
||||
RESTORE_XMM
|
||||
%endif
|
||||
%endif
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
|
||||
;void vp8_copy32xn_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_stride,
|
||||
; int height);
|
||||
global sym(vp8_copy32xn_sse3) PRIVATE
|
||||
sym(vp8_copy32xn_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
.block_copy_sse3_loopx4:
|
||||
lea end_ptr, [src_ptr+src_stride*2]
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
|
||||
movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
|
||||
movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
|
||||
movdqu xmm4, XMMWORD PTR [end_ptr]
|
||||
movdqu xmm5, XMMWORD PTR [end_ptr + 16]
|
||||
movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
|
||||
movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*4]
|
||||
|
||||
lea end_ptr, [ref_ptr+ref_stride*2]
|
||||
|
||||
movdqa XMMWORD PTR [ref_ptr], xmm0
|
||||
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
|
||||
movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
|
||||
movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
|
||||
movdqa XMMWORD PTR [end_ptr], xmm4
|
||||
movdqa XMMWORD PTR [end_ptr + 16], xmm5
|
||||
movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
|
||||
movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
|
||||
|
||||
lea ref_ptr, [ref_ptr+ref_stride*4]
|
||||
|
||||
sub height, 4
|
||||
cmp height, 4
|
||||
jge .block_copy_sse3_loopx4
|
||||
|
||||
;Check to see if there is more rows need to be copied.
|
||||
cmp height, 0
|
||||
je .copy_is_done
|
||||
|
||||
.block_copy_sse3_loop:
|
||||
movdqu xmm0, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
|
||||
lea src_ptr, [src_ptr+src_stride]
|
||||
|
||||
movdqa XMMWORD PTR [ref_ptr], xmm0
|
||||
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
|
||||
lea ref_ptr, [ref_ptr+ref_stride]
|
||||
|
||||
sub height, 1
|
||||
jne .block_copy_sse3_loop
|
||||
|
||||
.copy_is_done:
|
||||
STACK_FRAME_DESTROY_X3
|
@@ -11,18 +11,18 @@
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
global sym(vpx_sad16x16_mmx) PRIVATE
|
||||
global sym(vpx_sad8x16_mmx) PRIVATE
|
||||
global sym(vpx_sad8x8_mmx) PRIVATE
|
||||
global sym(vpx_sad4x4_mmx) PRIVATE
|
||||
global sym(vpx_sad16x8_mmx) PRIVATE
|
||||
global sym(vp8_sad16x16_mmx) PRIVATE
|
||||
global sym(vp8_sad8x16_mmx) PRIVATE
|
||||
global sym(vp8_sad8x8_mmx) PRIVATE
|
||||
global sym(vp8_sad4x4_mmx) PRIVATE
|
||||
global sym(vp8_sad16x8_mmx) PRIVATE
|
||||
|
||||
;unsigned int vpx_sad16x16_mmx(
|
||||
;unsigned int vp8_sad16x16_mmx(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
sym(vpx_sad16x16_mmx):
|
||||
sym(vp8_sad16x16_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
@@ -109,12 +109,12 @@ sym(vpx_sad16x16_mmx):
|
||||
ret
|
||||
|
||||
|
||||
;unsigned int vpx_sad8x16_mmx(
|
||||
;unsigned int vp8_sad8x16_mmx(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
sym(vpx_sad8x16_mmx):
|
||||
sym(vp8_sad8x16_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
@@ -181,12 +181,12 @@ sym(vpx_sad8x16_mmx):
|
||||
ret
|
||||
|
||||
|
||||
;unsigned int vpx_sad8x8_mmx(
|
||||
;unsigned int vp8_sad8x8_mmx(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
sym(vpx_sad8x8_mmx):
|
||||
sym(vp8_sad8x8_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
@@ -251,12 +251,12 @@ sym(vpx_sad8x8_mmx):
|
||||
ret
|
||||
|
||||
|
||||
;unsigned int vpx_sad4x4_mmx(
|
||||
;unsigned int vp8_sad4x4_mmx(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
sym(vpx_sad4x4_mmx):
|
||||
sym(vp8_sad4x4_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
@@ -340,12 +340,12 @@ sym(vpx_sad4x4_mmx):
|
||||
ret
|
||||
|
||||
|
||||
;unsigned int vpx_sad16x8_mmx(
|
||||
;unsigned int vp8_sad16x8_mmx(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
sym(vpx_sad16x8_mmx):
|
||||
sym(vp8_sad16x8_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
410
vp8/common/x86/sad_sse2.asm
Normal file
410
vp8/common/x86/sad_sse2.asm
Normal file
@@ -0,0 +1,410 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;unsigned int vp8_sad16x16_wmt(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
global sym(vp8_sad16x16_wmt) PRIVATE
|
||||
sym(vp8_sad16x16_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
SAVE_XMM 6
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
lea rcx, [rsi+rax*8]
|
||||
|
||||
lea rcx, [rcx+rax*8]
|
||||
pxor xmm6, xmm6
|
||||
|
||||
.x16x16sad_wmt_loop:
|
||||
|
||||
movq xmm0, QWORD PTR [rsi]
|
||||
movq xmm2, QWORD PTR [rsi+8]
|
||||
|
||||
movq xmm1, QWORD PTR [rdi]
|
||||
movq xmm3, QWORD PTR [rdi+8]
|
||||
|
||||
movq xmm4, QWORD PTR [rsi+rax]
|
||||
movq xmm5, QWORD PTR [rdi+rdx]
|
||||
|
||||
|
||||
punpcklbw xmm0, xmm2
|
||||
punpcklbw xmm1, xmm3
|
||||
|
||||
psadbw xmm0, xmm1
|
||||
movq xmm2, QWORD PTR [rsi+rax+8]
|
||||
|
||||
movq xmm3, QWORD PTR [rdi+rdx+8]
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
lea rdi, [rdi+rdx*2]
|
||||
punpcklbw xmm4, xmm2
|
||||
|
||||
punpcklbw xmm5, xmm3
|
||||
psadbw xmm4, xmm5
|
||||
|
||||
paddw xmm6, xmm0
|
||||
paddw xmm6, xmm4
|
||||
|
||||
cmp rsi, rcx
|
||||
jne .x16x16sad_wmt_loop
|
||||
|
||||
movq xmm0, xmm6
|
||||
psrldq xmm6, 8
|
||||
|
||||
paddw xmm0, xmm6
|
||||
movq rax, xmm0
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;unsigned int vp8_sad8x16_wmt(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int max_sad)
|
||||
global sym(vp8_sad8x16_wmt) PRIVATE
|
||||
sym(vp8_sad8x16_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rbx, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
lea rcx, [rsi+rbx*8]
|
||||
|
||||
lea rcx, [rcx+rbx*8]
|
||||
pxor mm7, mm7
|
||||
|
||||
.x8x16sad_wmt_loop:
|
||||
|
||||
movq rax, mm7
|
||||
cmp eax, arg(4)
|
||||
ja .x8x16sad_wmt_early_exit
|
||||
|
||||
movq mm0, QWORD PTR [rsi]
|
||||
movq mm1, QWORD PTR [rdi]
|
||||
|
||||
movq mm2, QWORD PTR [rsi+rbx]
|
||||
movq mm3, QWORD PTR [rdi+rdx]
|
||||
|
||||
psadbw mm0, mm1
|
||||
psadbw mm2, mm3
|
||||
|
||||
lea rsi, [rsi+rbx*2]
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
paddw mm7, mm0
|
||||
paddw mm7, mm2
|
||||
|
||||
cmp rsi, rcx
|
||||
jne .x8x16sad_wmt_loop
|
||||
|
||||
movq rax, mm7
|
||||
|
||||
.x8x16sad_wmt_early_exit:
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbx
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;unsigned int vp8_sad8x8_wmt(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
global sym(vp8_sad8x8_wmt) PRIVATE
|
||||
sym(vp8_sad8x8_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rbx, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
lea rcx, [rsi+rbx*8]
|
||||
pxor mm7, mm7
|
||||
|
||||
.x8x8sad_wmt_loop:
|
||||
|
||||
movq rax, mm7
|
||||
cmp eax, arg(4)
|
||||
ja .x8x8sad_wmt_early_exit
|
||||
|
||||
movq mm0, QWORD PTR [rsi]
|
||||
movq mm1, QWORD PTR [rdi]
|
||||
|
||||
psadbw mm0, mm1
|
||||
lea rsi, [rsi+rbx]
|
||||
|
||||
add rdi, rdx
|
||||
paddw mm7, mm0
|
||||
|
||||
cmp rsi, rcx
|
||||
jne .x8x8sad_wmt_loop
|
||||
|
||||
movq rax, mm7
|
||||
.x8x8sad_wmt_early_exit:
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbx
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;unsigned int vp8_sad4x4_wmt(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
global sym(vp8_sad4x4_wmt) PRIVATE
|
||||
sym(vp8_sad4x4_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
movd mm0, DWORD PTR [rsi]
|
||||
movd mm1, DWORD PTR [rdi]
|
||||
|
||||
movd mm2, DWORD PTR [rsi+rax]
|
||||
movd mm3, DWORD PTR [rdi+rdx]
|
||||
|
||||
punpcklbw mm0, mm2
|
||||
punpcklbw mm1, mm3
|
||||
|
||||
psadbw mm0, mm1
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
lea rdi, [rdi+rdx*2]
|
||||
movd mm4, DWORD PTR [rsi]
|
||||
|
||||
movd mm5, DWORD PTR [rdi]
|
||||
movd mm6, DWORD PTR [rsi+rax]
|
||||
|
||||
movd mm7, DWORD PTR [rdi+rdx]
|
||||
punpcklbw mm4, mm6
|
||||
|
||||
punpcklbw mm5, mm7
|
||||
psadbw mm4, mm5
|
||||
|
||||
paddw mm0, mm4
|
||||
movq rax, mm0
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;unsigned int vp8_sad16x8_wmt(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
global sym(vp8_sad16x8_wmt) PRIVATE
|
||||
sym(vp8_sad16x8_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rbx, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
lea rcx, [rsi+rbx*8]
|
||||
pxor mm7, mm7
|
||||
|
||||
.x16x8sad_wmt_loop:
|
||||
|
||||
movq rax, mm7
|
||||
cmp eax, arg(4)
|
||||
ja .x16x8sad_wmt_early_exit
|
||||
|
||||
movq mm0, QWORD PTR [rsi]
|
||||
movq mm2, QWORD PTR [rsi+8]
|
||||
|
||||
movq mm1, QWORD PTR [rdi]
|
||||
movq mm3, QWORD PTR [rdi+8]
|
||||
|
||||
movq mm4, QWORD PTR [rsi+rbx]
|
||||
movq mm5, QWORD PTR [rdi+rdx]
|
||||
|
||||
psadbw mm0, mm1
|
||||
psadbw mm2, mm3
|
||||
|
||||
movq mm1, QWORD PTR [rsi+rbx+8]
|
||||
movq mm3, QWORD PTR [rdi+rdx+8]
|
||||
|
||||
psadbw mm4, mm5
|
||||
psadbw mm1, mm3
|
||||
|
||||
lea rsi, [rsi+rbx*2]
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
paddw mm0, mm2
|
||||
paddw mm4, mm1
|
||||
|
||||
paddw mm7, mm0
|
||||
paddw mm7, mm4
|
||||
|
||||
cmp rsi, rcx
|
||||
jne .x16x8sad_wmt_loop
|
||||
|
||||
movq rax, mm7
|
||||
|
||||
.x16x8sad_wmt_early_exit:
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbx
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_copy32xn_sse2(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_stride,
|
||||
; int height);
|
||||
global sym(vp8_copy32xn_sse2) PRIVATE
|
||||
sym(vp8_copy32xn_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
SAVE_XMM 7
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;dst_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;dst_stride
|
||||
movsxd rcx, dword ptr arg(4) ;height
|
||||
|
||||
.block_copy_sse2_loopx4:
|
||||
movdqu xmm0, XMMWORD PTR [rsi]
|
||||
movdqu xmm1, XMMWORD PTR [rsi + 16]
|
||||
movdqu xmm2, XMMWORD PTR [rsi + rax]
|
||||
movdqu xmm3, XMMWORD PTR [rsi + rax + 16]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
movdqu xmm4, XMMWORD PTR [rsi]
|
||||
movdqu xmm5, XMMWORD PTR [rsi + 16]
|
||||
movdqu xmm6, XMMWORD PTR [rsi + rax]
|
||||
movdqu xmm7, XMMWORD PTR [rsi + rax + 16]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
movdqa XMMWORD PTR [rdi], xmm0
|
||||
movdqa XMMWORD PTR [rdi + 16], xmm1
|
||||
movdqa XMMWORD PTR [rdi + rdx], xmm2
|
||||
movdqa XMMWORD PTR [rdi + rdx + 16], xmm3
|
||||
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
movdqa XMMWORD PTR [rdi], xmm4
|
||||
movdqa XMMWORD PTR [rdi + 16], xmm5
|
||||
movdqa XMMWORD PTR [rdi + rdx], xmm6
|
||||
movdqa XMMWORD PTR [rdi + rdx + 16], xmm7
|
||||
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
sub rcx, 4
|
||||
cmp rcx, 4
|
||||
jge .block_copy_sse2_loopx4
|
||||
|
||||
cmp rcx, 0
|
||||
je .copy_is_done
|
||||
|
||||
.block_copy_sse2_loop:
|
||||
movdqu xmm0, XMMWORD PTR [rsi]
|
||||
movdqu xmm1, XMMWORD PTR [rsi + 16]
|
||||
lea rsi, [rsi+rax]
|
||||
|
||||
movdqa XMMWORD PTR [rdi], xmm0
|
||||
movdqa XMMWORD PTR [rdi + 16], xmm1
|
||||
lea rdi, [rdi+rdx]
|
||||
|
||||
sub rcx, 1
|
||||
jne .block_copy_sse2_loop
|
||||
|
||||
.copy_is_done:
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
960
vp8/common/x86/sad_sse3.asm
Normal file
960
vp8/common/x86/sad_sse3.asm
Normal file
@@ -0,0 +1,960 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
%macro STACK_FRAME_CREATE_X3 0
|
||||
%if ABI_IS_32BIT
|
||||
%define src_ptr rsi
|
||||
%define src_stride rax
|
||||
%define ref_ptr rdi
|
||||
%define ref_stride rdx
|
||||
%define end_ptr rcx
|
||||
%define ret_var rbx
|
||||
%define result_ptr arg(4)
|
||||
%define max_sad arg(4)
|
||||
%define height dword ptr arg(4)
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push rsi
|
||||
push rdi
|
||||
push rbx
|
||||
|
||||
mov rsi, arg(0) ; src_ptr
|
||||
mov rdi, arg(2) ; ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ; src_stride
|
||||
movsxd rdx, dword ptr arg(3) ; ref_stride
|
||||
%else
|
||||
%if LIBVPX_YASM_WIN64
|
||||
SAVE_XMM 7, u
|
||||
%define src_ptr rcx
|
||||
%define src_stride rdx
|
||||
%define ref_ptr r8
|
||||
%define ref_stride r9
|
||||
%define end_ptr r10
|
||||
%define ret_var r11
|
||||
%define result_ptr [rsp+xmm_stack_space+8+4*8]
|
||||
%define max_sad [rsp+xmm_stack_space+8+4*8]
|
||||
%define height dword ptr [rsp+xmm_stack_space+8+4*8]
|
||||
%else
|
||||
%define src_ptr rdi
|
||||
%define src_stride rsi
|
||||
%define ref_ptr rdx
|
||||
%define ref_stride rcx
|
||||
%define end_ptr r9
|
||||
%define ret_var r10
|
||||
%define result_ptr r8
|
||||
%define max_sad r8
|
||||
%define height r8
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%endmacro
|
||||
|
||||
%macro STACK_FRAME_DESTROY_X3 0
|
||||
%define src_ptr
|
||||
%define src_stride
|
||||
%define ref_ptr
|
||||
%define ref_stride
|
||||
%define end_ptr
|
||||
%define ret_var
|
||||
%define result_ptr
|
||||
%define max_sad
|
||||
%define height
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
%else
|
||||
%if LIBVPX_YASM_WIN64
|
||||
RESTORE_XMM
|
||||
%endif
|
||||
%endif
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
%macro STACK_FRAME_CREATE_X4 0
|
||||
%if ABI_IS_32BIT
|
||||
%define src_ptr rsi
|
||||
%define src_stride rax
|
||||
%define r0_ptr rcx
|
||||
%define r1_ptr rdx
|
||||
%define r2_ptr rbx
|
||||
%define r3_ptr rdi
|
||||
%define ref_stride rbp
|
||||
%define result_ptr arg(4)
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push rsi
|
||||
push rdi
|
||||
push rbx
|
||||
|
||||
push rbp
|
||||
mov rdi, arg(2) ; ref_ptr_base
|
||||
|
||||
LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
|
||||
|
||||
mov rsi, arg(0) ; src_ptr
|
||||
|
||||
movsxd rbx, dword ptr arg(1) ; src_stride
|
||||
movsxd rbp, dword ptr arg(3) ; ref_stride
|
||||
|
||||
xchg rbx, rax
|
||||
%else
|
||||
%if LIBVPX_YASM_WIN64
|
||||
SAVE_XMM 7, u
|
||||
%define src_ptr rcx
|
||||
%define src_stride rdx
|
||||
%define r0_ptr rsi
|
||||
%define r1_ptr r10
|
||||
%define r2_ptr r11
|
||||
%define r3_ptr r8
|
||||
%define ref_stride r9
|
||||
%define result_ptr [rsp+xmm_stack_space+16+4*8]
|
||||
push rsi
|
||||
|
||||
LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr
|
||||
%else
|
||||
%define src_ptr rdi
|
||||
%define src_stride rsi
|
||||
%define r0_ptr r9
|
||||
%define r1_ptr r10
|
||||
%define r2_ptr r11
|
||||
%define r3_ptr rdx
|
||||
%define ref_stride rcx
|
||||
%define result_ptr r8
|
||||
|
||||
LOAD_X4_ADDRESSES rdx, r0_ptr, r1_ptr, r2_ptr, r3_ptr
|
||||
|
||||
%endif
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro STACK_FRAME_DESTROY_X4 0
|
||||
%define src_ptr
|
||||
%define src_stride
|
||||
%define r0_ptr
|
||||
%define r1_ptr
|
||||
%define r2_ptr
|
||||
%define r3_ptr
|
||||
%define ref_stride
|
||||
%define result_ptr
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
%else
|
||||
%if LIBVPX_YASM_WIN64
|
||||
pop rsi
|
||||
RESTORE_XMM
|
||||
%endif
|
||||
%endif
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
%macro PROCESS_16X2X3 5
|
||||
%if %1==0
|
||||
movdqa xmm0, XMMWORD PTR [%2]
|
||||
lddqu xmm5, XMMWORD PTR [%3]
|
||||
lddqu xmm6, XMMWORD PTR [%3+1]
|
||||
lddqu xmm7, XMMWORD PTR [%3+2]
|
||||
|
||||
psadbw xmm5, xmm0
|
||||
psadbw xmm6, xmm0
|
||||
psadbw xmm7, xmm0
|
||||
%else
|
||||
movdqa xmm0, XMMWORD PTR [%2]
|
||||
lddqu xmm1, XMMWORD PTR [%3]
|
||||
lddqu xmm2, XMMWORD PTR [%3+1]
|
||||
lddqu xmm3, XMMWORD PTR [%3+2]
|
||||
|
||||
psadbw xmm1, xmm0
|
||||
psadbw xmm2, xmm0
|
||||
psadbw xmm3, xmm0
|
||||
|
||||
paddw xmm5, xmm1
|
||||
paddw xmm6, xmm2
|
||||
paddw xmm7, xmm3
|
||||
%endif
|
||||
movdqa xmm0, XMMWORD PTR [%2+%4]
|
||||
lddqu xmm1, XMMWORD PTR [%3+%5]
|
||||
lddqu xmm2, XMMWORD PTR [%3+%5+1]
|
||||
lddqu xmm3, XMMWORD PTR [%3+%5+2]
|
||||
|
||||
%if %1==0 || %1==1
|
||||
lea %2, [%2+%4*2]
|
||||
lea %3, [%3+%5*2]
|
||||
%endif
|
||||
|
||||
psadbw xmm1, xmm0
|
||||
psadbw xmm2, xmm0
|
||||
psadbw xmm3, xmm0
|
||||
|
||||
paddw xmm5, xmm1
|
||||
paddw xmm6, xmm2
|
||||
paddw xmm7, xmm3
|
||||
%endmacro
|
||||
|
||||
%macro PROCESS_8X2X3 5
|
||||
%if %1==0
|
||||
movq mm0, QWORD PTR [%2]
|
||||
movq mm5, QWORD PTR [%3]
|
||||
movq mm6, QWORD PTR [%3+1]
|
||||
movq mm7, QWORD PTR [%3+2]
|
||||
|
||||
psadbw mm5, mm0
|
||||
psadbw mm6, mm0
|
||||
psadbw mm7, mm0
|
||||
%else
|
||||
movq mm0, QWORD PTR [%2]
|
||||
movq mm1, QWORD PTR [%3]
|
||||
movq mm2, QWORD PTR [%3+1]
|
||||
movq mm3, QWORD PTR [%3+2]
|
||||
|
||||
psadbw mm1, mm0
|
||||
psadbw mm2, mm0
|
||||
psadbw mm3, mm0
|
||||
|
||||
paddw mm5, mm1
|
||||
paddw mm6, mm2
|
||||
paddw mm7, mm3
|
||||
%endif
|
||||
movq mm0, QWORD PTR [%2+%4]
|
||||
movq mm1, QWORD PTR [%3+%5]
|
||||
movq mm2, QWORD PTR [%3+%5+1]
|
||||
movq mm3, QWORD PTR [%3+%5+2]
|
||||
|
||||
%if %1==0 || %1==1
|
||||
lea %2, [%2+%4*2]
|
||||
lea %3, [%3+%5*2]
|
||||
%endif
|
||||
|
||||
psadbw mm1, mm0
|
||||
psadbw mm2, mm0
|
||||
psadbw mm3, mm0
|
||||
|
||||
paddw mm5, mm1
|
||||
paddw mm6, mm2
|
||||
paddw mm7, mm3
|
||||
%endmacro
|
||||
|
||||
%macro LOAD_X4_ADDRESSES 5
|
||||
mov %2, [%1+REG_SZ_BYTES*0]
|
||||
mov %3, [%1+REG_SZ_BYTES*1]
|
||||
|
||||
mov %4, [%1+REG_SZ_BYTES*2]
|
||||
mov %5, [%1+REG_SZ_BYTES*3]
|
||||
%endmacro
|
||||
|
||||
%macro PROCESS_16X2X4 8
|
||||
%if %1==0
|
||||
movdqa xmm0, XMMWORD PTR [%2]
|
||||
lddqu xmm4, XMMWORD PTR [%3]
|
||||
lddqu xmm5, XMMWORD PTR [%4]
|
||||
lddqu xmm6, XMMWORD PTR [%5]
|
||||
lddqu xmm7, XMMWORD PTR [%6]
|
||||
|
||||
psadbw xmm4, xmm0
|
||||
psadbw xmm5, xmm0
|
||||
psadbw xmm6, xmm0
|
||||
psadbw xmm7, xmm0
|
||||
%else
|
||||
movdqa xmm0, XMMWORD PTR [%2]
|
||||
lddqu xmm1, XMMWORD PTR [%3]
|
||||
lddqu xmm2, XMMWORD PTR [%4]
|
||||
lddqu xmm3, XMMWORD PTR [%5]
|
||||
|
||||
psadbw xmm1, xmm0
|
||||
psadbw xmm2, xmm0
|
||||
psadbw xmm3, xmm0
|
||||
|
||||
paddw xmm4, xmm1
|
||||
lddqu xmm1, XMMWORD PTR [%6]
|
||||
paddw xmm5, xmm2
|
||||
paddw xmm6, xmm3
|
||||
|
||||
psadbw xmm1, xmm0
|
||||
paddw xmm7, xmm1
|
||||
%endif
|
||||
movdqa xmm0, XMMWORD PTR [%2+%7]
|
||||
lddqu xmm1, XMMWORD PTR [%3+%8]
|
||||
lddqu xmm2, XMMWORD PTR [%4+%8]
|
||||
lddqu xmm3, XMMWORD PTR [%5+%8]
|
||||
|
||||
psadbw xmm1, xmm0
|
||||
psadbw xmm2, xmm0
|
||||
psadbw xmm3, xmm0
|
||||
|
||||
paddw xmm4, xmm1
|
||||
lddqu xmm1, XMMWORD PTR [%6+%8]
|
||||
paddw xmm5, xmm2
|
||||
paddw xmm6, xmm3
|
||||
|
||||
%if %1==0 || %1==1
|
||||
lea %2, [%2+%7*2]
|
||||
lea %3, [%3+%8*2]
|
||||
|
||||
lea %4, [%4+%8*2]
|
||||
lea %5, [%5+%8*2]
|
||||
|
||||
lea %6, [%6+%8*2]
|
||||
%endif
|
||||
psadbw xmm1, xmm0
|
||||
paddw xmm7, xmm1
|
||||
|
||||
%endmacro
|
||||
|
||||
%macro PROCESS_8X2X4 8
|
||||
%if %1==0
|
||||
movq mm0, QWORD PTR [%2]
|
||||
movq mm4, QWORD PTR [%3]
|
||||
movq mm5, QWORD PTR [%4]
|
||||
movq mm6, QWORD PTR [%5]
|
||||
movq mm7, QWORD PTR [%6]
|
||||
|
||||
psadbw mm4, mm0
|
||||
psadbw mm5, mm0
|
||||
psadbw mm6, mm0
|
||||
psadbw mm7, mm0
|
||||
%else
|
||||
movq mm0, QWORD PTR [%2]
|
||||
movq mm1, QWORD PTR [%3]
|
||||
movq mm2, QWORD PTR [%4]
|
||||
movq mm3, QWORD PTR [%5]
|
||||
|
||||
psadbw mm1, mm0
|
||||
psadbw mm2, mm0
|
||||
psadbw mm3, mm0
|
||||
|
||||
paddw mm4, mm1
|
||||
movq mm1, QWORD PTR [%6]
|
||||
paddw mm5, mm2
|
||||
paddw mm6, mm3
|
||||
|
||||
psadbw mm1, mm0
|
||||
paddw mm7, mm1
|
||||
%endif
|
||||
movq mm0, QWORD PTR [%2+%7]
|
||||
movq mm1, QWORD PTR [%3+%8]
|
||||
movq mm2, QWORD PTR [%4+%8]
|
||||
movq mm3, QWORD PTR [%5+%8]
|
||||
|
||||
psadbw mm1, mm0
|
||||
psadbw mm2, mm0
|
||||
psadbw mm3, mm0
|
||||
|
||||
paddw mm4, mm1
|
||||
movq mm1, QWORD PTR [%6+%8]
|
||||
paddw mm5, mm2
|
||||
paddw mm6, mm3
|
||||
|
||||
%if %1==0 || %1==1
|
||||
lea %2, [%2+%7*2]
|
||||
lea %3, [%3+%8*2]
|
||||
|
||||
lea %4, [%4+%8*2]
|
||||
lea %5, [%5+%8*2]
|
||||
|
||||
lea %6, [%6+%8*2]
|
||||
%endif
|
||||
psadbw mm1, mm0
|
||||
paddw mm7, mm1
|
||||
|
||||
%endmacro
|
||||
|
||||
;void int vp8_sad16x16x3_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad16x16x3_sse3) PRIVATE
|
||||
sym(vp8_sad16x16x3_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
|
||||
mov rcx, result_ptr
|
||||
|
||||
movq xmm0, xmm5
|
||||
psrldq xmm5, 8
|
||||
|
||||
paddw xmm0, xmm5
|
||||
movd [rcx], xmm0
|
||||
;-
|
||||
movq xmm0, xmm6
|
||||
psrldq xmm6, 8
|
||||
|
||||
paddw xmm0, xmm6
|
||||
movd [rcx+4], xmm0
|
||||
;-
|
||||
movq xmm0, xmm7
|
||||
psrldq xmm7, 8
|
||||
|
||||
paddw xmm0, xmm7
|
||||
movd [rcx+8], xmm0
|
||||
|
||||
STACK_FRAME_DESTROY_X3
|
||||
|
||||
;void int vp8_sad16x8x3_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad16x8x3_sse3) PRIVATE
|
||||
sym(vp8_sad16x8x3_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
|
||||
mov rcx, result_ptr
|
||||
|
||||
movq xmm0, xmm5
|
||||
psrldq xmm5, 8
|
||||
|
||||
paddw xmm0, xmm5
|
||||
movd [rcx], xmm0
|
||||
;-
|
||||
movq xmm0, xmm6
|
||||
psrldq xmm6, 8
|
||||
|
||||
paddw xmm0, xmm6
|
||||
movd [rcx+4], xmm0
|
||||
;-
|
||||
movq xmm0, xmm7
|
||||
psrldq xmm7, 8
|
||||
|
||||
paddw xmm0, xmm7
|
||||
movd [rcx+8], xmm0
|
||||
|
||||
STACK_FRAME_DESTROY_X3
|
||||
|
||||
;void int vp8_sad8x16x3_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad8x16x3_sse3) PRIVATE
|
||||
sym(vp8_sad8x16x3_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
|
||||
mov rcx, result_ptr
|
||||
|
||||
punpckldq mm5, mm6
|
||||
|
||||
movq [rcx], mm5
|
||||
movd [rcx+8], mm7
|
||||
|
||||
STACK_FRAME_DESTROY_X3
|
||||
|
||||
;void int vp8_sad8x8x3_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad8x8x3_sse3) PRIVATE
|
||||
sym(vp8_sad8x8x3_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
|
||||
|
||||
mov rcx, result_ptr
|
||||
|
||||
punpckldq mm5, mm6
|
||||
|
||||
movq [rcx], mm5
|
||||
movd [rcx+8], mm7
|
||||
|
||||
STACK_FRAME_DESTROY_X3
|
||||
|
||||
;void int vp8_sad4x4x3_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad4x4x3_sse3) PRIVATE
|
||||
sym(vp8_sad4x4x3_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
movd mm0, DWORD PTR [src_ptr]
|
||||
movd mm1, DWORD PTR [ref_ptr]
|
||||
|
||||
movd mm2, DWORD PTR [src_ptr+src_stride]
|
||||
movd mm3, DWORD PTR [ref_ptr+ref_stride]
|
||||
|
||||
punpcklbw mm0, mm2
|
||||
punpcklbw mm1, mm3
|
||||
|
||||
movd mm4, DWORD PTR [ref_ptr+1]
|
||||
movd mm5, DWORD PTR [ref_ptr+2]
|
||||
|
||||
movd mm2, DWORD PTR [ref_ptr+ref_stride+1]
|
||||
movd mm3, DWORD PTR [ref_ptr+ref_stride+2]
|
||||
|
||||
psadbw mm1, mm0
|
||||
|
||||
punpcklbw mm4, mm2
|
||||
punpcklbw mm5, mm3
|
||||
|
||||
psadbw mm4, mm0
|
||||
psadbw mm5, mm0
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*2]
|
||||
lea ref_ptr, [ref_ptr+ref_stride*2]
|
||||
|
||||
movd mm0, DWORD PTR [src_ptr]
|
||||
movd mm2, DWORD PTR [ref_ptr]
|
||||
|
||||
movd mm3, DWORD PTR [src_ptr+src_stride]
|
||||
movd mm6, DWORD PTR [ref_ptr+ref_stride]
|
||||
|
||||
punpcklbw mm0, mm3
|
||||
punpcklbw mm2, mm6
|
||||
|
||||
movd mm3, DWORD PTR [ref_ptr+1]
|
||||
movd mm7, DWORD PTR [ref_ptr+2]
|
||||
|
||||
psadbw mm2, mm0
|
||||
|
||||
paddw mm1, mm2
|
||||
|
||||
movd mm2, DWORD PTR [ref_ptr+ref_stride+1]
|
||||
movd mm6, DWORD PTR [ref_ptr+ref_stride+2]
|
||||
|
||||
punpcklbw mm3, mm2
|
||||
punpcklbw mm7, mm6
|
||||
|
||||
psadbw mm3, mm0
|
||||
psadbw mm7, mm0
|
||||
|
||||
paddw mm3, mm4
|
||||
paddw mm7, mm5
|
||||
|
||||
mov rcx, result_ptr
|
||||
|
||||
punpckldq mm1, mm3
|
||||
|
||||
movq [rcx], mm1
|
||||
movd [rcx+8], mm7
|
||||
|
||||
STACK_FRAME_DESTROY_X3
|
||||
|
||||
;unsigned int vp8_sad16x16_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int max_sad)
|
||||
;%define lddqu movdqu
|
||||
global sym(vp8_sad16x16_sse3) PRIVATE
|
||||
sym(vp8_sad16x16_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
mov end_ptr, 4
|
||||
pxor xmm7, xmm7
|
||||
|
||||
.vp8_sad16x16_sse3_loop:
|
||||
movdqa xmm0, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm1, XMMWORD PTR [ref_ptr]
|
||||
movdqa xmm2, XMMWORD PTR [src_ptr+src_stride]
|
||||
movdqu xmm3, XMMWORD PTR [ref_ptr+ref_stride]
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*2]
|
||||
lea ref_ptr, [ref_ptr+ref_stride*2]
|
||||
|
||||
movdqa xmm4, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm5, XMMWORD PTR [ref_ptr]
|
||||
movdqa xmm6, XMMWORD PTR [src_ptr+src_stride]
|
||||
|
||||
psadbw xmm0, xmm1
|
||||
|
||||
movdqu xmm1, XMMWORD PTR [ref_ptr+ref_stride]
|
||||
|
||||
psadbw xmm2, xmm3
|
||||
psadbw xmm4, xmm5
|
||||
psadbw xmm6, xmm1
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*2]
|
||||
lea ref_ptr, [ref_ptr+ref_stride*2]
|
||||
|
||||
paddw xmm7, xmm0
|
||||
paddw xmm7, xmm2
|
||||
paddw xmm7, xmm4
|
||||
paddw xmm7, xmm6
|
||||
|
||||
sub end_ptr, 1
|
||||
jne .vp8_sad16x16_sse3_loop
|
||||
|
||||
movq xmm0, xmm7
|
||||
psrldq xmm7, 8
|
||||
paddw xmm0, xmm7
|
||||
movq rax, xmm0
|
||||
|
||||
STACK_FRAME_DESTROY_X3
|
||||
|
||||
;void vp8_copy32xn_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_stride,
|
||||
; int height);
|
||||
global sym(vp8_copy32xn_sse3) PRIVATE
|
||||
sym(vp8_copy32xn_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
.block_copy_sse3_loopx4:
|
||||
lea end_ptr, [src_ptr+src_stride*2]
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
|
||||
movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
|
||||
movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
|
||||
movdqu xmm4, XMMWORD PTR [end_ptr]
|
||||
movdqu xmm5, XMMWORD PTR [end_ptr + 16]
|
||||
movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
|
||||
movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*4]
|
||||
|
||||
lea end_ptr, [ref_ptr+ref_stride*2]
|
||||
|
||||
movdqa XMMWORD PTR [ref_ptr], xmm0
|
||||
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
|
||||
movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
|
||||
movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
|
||||
movdqa XMMWORD PTR [end_ptr], xmm4
|
||||
movdqa XMMWORD PTR [end_ptr + 16], xmm5
|
||||
movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
|
||||
movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
|
||||
|
||||
lea ref_ptr, [ref_ptr+ref_stride*4]
|
||||
|
||||
sub height, 4
|
||||
cmp height, 4
|
||||
jge .block_copy_sse3_loopx4
|
||||
|
||||
;Check to see if there is more rows need to be copied.
|
||||
cmp height, 0
|
||||
je .copy_is_done
|
||||
|
||||
.block_copy_sse3_loop:
|
||||
movdqu xmm0, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
|
||||
lea src_ptr, [src_ptr+src_stride]
|
||||
|
||||
movdqa XMMWORD PTR [ref_ptr], xmm0
|
||||
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
|
||||
lea ref_ptr, [ref_ptr+ref_stride]
|
||||
|
||||
sub height, 1
|
||||
jne .block_copy_sse3_loop
|
||||
|
||||
.copy_is_done:
|
||||
STACK_FRAME_DESTROY_X3
|
||||
|
||||
;void vp8_sad16x16x4d_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr_base,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad16x16x4d_sse3) PRIVATE
|
||||
sym(vp8_sad16x16x4d_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X4
|
||||
|
||||
PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rbp
|
||||
%endif
|
||||
mov rcx, result_ptr
|
||||
|
||||
movq xmm0, xmm4
|
||||
psrldq xmm4, 8
|
||||
|
||||
paddw xmm0, xmm4
|
||||
movd [rcx], xmm0
|
||||
;-
|
||||
movq xmm0, xmm5
|
||||
psrldq xmm5, 8
|
||||
|
||||
paddw xmm0, xmm5
|
||||
movd [rcx+4], xmm0
|
||||
;-
|
||||
movq xmm0, xmm6
|
||||
psrldq xmm6, 8
|
||||
|
||||
paddw xmm0, xmm6
|
||||
movd [rcx+8], xmm0
|
||||
;-
|
||||
movq xmm0, xmm7
|
||||
psrldq xmm7, 8
|
||||
|
||||
paddw xmm0, xmm7
|
||||
movd [rcx+12], xmm0
|
||||
|
||||
STACK_FRAME_DESTROY_X4
|
||||
|
||||
;void vp8_sad16x8x4d_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr_base,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad16x8x4d_sse3) PRIVATE
|
||||
sym(vp8_sad16x8x4d_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X4
|
||||
|
||||
PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rbp
|
||||
%endif
|
||||
mov rcx, result_ptr
|
||||
|
||||
movq xmm0, xmm4
|
||||
psrldq xmm4, 8
|
||||
|
||||
paddw xmm0, xmm4
|
||||
movd [rcx], xmm0
|
||||
;-
|
||||
movq xmm0, xmm5
|
||||
psrldq xmm5, 8
|
||||
|
||||
paddw xmm0, xmm5
|
||||
movd [rcx+4], xmm0
|
||||
;-
|
||||
movq xmm0, xmm6
|
||||
psrldq xmm6, 8
|
||||
|
||||
paddw xmm0, xmm6
|
||||
movd [rcx+8], xmm0
|
||||
;-
|
||||
movq xmm0, xmm7
|
||||
psrldq xmm7, 8
|
||||
|
||||
paddw xmm0, xmm7
|
||||
movd [rcx+12], xmm0
|
||||
|
||||
STACK_FRAME_DESTROY_X4
|
||||
|
||||
;void int vp8_sad8x16x4d_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad8x16x4d_sse3) PRIVATE
|
||||
sym(vp8_sad8x16x4d_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X4
|
||||
|
||||
PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rbp
|
||||
%endif
|
||||
mov rcx, result_ptr
|
||||
|
||||
punpckldq mm4, mm5
|
||||
punpckldq mm6, mm7
|
||||
|
||||
movq [rcx], mm4
|
||||
movq [rcx+8], mm6
|
||||
|
||||
STACK_FRAME_DESTROY_X4
|
||||
|
||||
;void int vp8_sad8x8x4d_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad8x8x4d_sse3) PRIVATE
|
||||
sym(vp8_sad8x8x4d_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X4
|
||||
|
||||
PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rbp
|
||||
%endif
|
||||
mov rcx, result_ptr
|
||||
|
||||
punpckldq mm4, mm5
|
||||
punpckldq mm6, mm7
|
||||
|
||||
movq [rcx], mm4
|
||||
movq [rcx+8], mm6
|
||||
|
||||
STACK_FRAME_DESTROY_X4
|
||||
|
||||
;void int vp8_sad4x4x4d_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad4x4x4d_sse3) PRIVATE
|
||||
sym(vp8_sad4x4x4d_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X4
|
||||
|
||||
movd mm0, DWORD PTR [src_ptr]
|
||||
movd mm1, DWORD PTR [r0_ptr]
|
||||
|
||||
movd mm2, DWORD PTR [src_ptr+src_stride]
|
||||
movd mm3, DWORD PTR [r0_ptr+ref_stride]
|
||||
|
||||
punpcklbw mm0, mm2
|
||||
punpcklbw mm1, mm3
|
||||
|
||||
movd mm4, DWORD PTR [r1_ptr]
|
||||
movd mm5, DWORD PTR [r2_ptr]
|
||||
|
||||
movd mm6, DWORD PTR [r3_ptr]
|
||||
movd mm2, DWORD PTR [r1_ptr+ref_stride]
|
||||
|
||||
movd mm3, DWORD PTR [r2_ptr+ref_stride]
|
||||
movd mm7, DWORD PTR [r3_ptr+ref_stride]
|
||||
|
||||
psadbw mm1, mm0
|
||||
|
||||
punpcklbw mm4, mm2
|
||||
punpcklbw mm5, mm3
|
||||
|
||||
punpcklbw mm6, mm7
|
||||
psadbw mm4, mm0
|
||||
|
||||
psadbw mm5, mm0
|
||||
psadbw mm6, mm0
|
||||
|
||||
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*2]
|
||||
lea r0_ptr, [r0_ptr+ref_stride*2]
|
||||
|
||||
lea r1_ptr, [r1_ptr+ref_stride*2]
|
||||
lea r2_ptr, [r2_ptr+ref_stride*2]
|
||||
|
||||
lea r3_ptr, [r3_ptr+ref_stride*2]
|
||||
|
||||
movd mm0, DWORD PTR [src_ptr]
|
||||
movd mm2, DWORD PTR [r0_ptr]
|
||||
|
||||
movd mm3, DWORD PTR [src_ptr+src_stride]
|
||||
movd mm7, DWORD PTR [r0_ptr+ref_stride]
|
||||
|
||||
punpcklbw mm0, mm3
|
||||
punpcklbw mm2, mm7
|
||||
|
||||
movd mm3, DWORD PTR [r1_ptr]
|
||||
movd mm7, DWORD PTR [r2_ptr]
|
||||
|
||||
psadbw mm2, mm0
|
||||
%if ABI_IS_32BIT
|
||||
mov rax, rbp
|
||||
|
||||
pop rbp
|
||||
%define ref_stride rax
|
||||
%endif
|
||||
mov rsi, result_ptr
|
||||
|
||||
paddw mm1, mm2
|
||||
movd [rsi], mm1
|
||||
|
||||
movd mm2, DWORD PTR [r1_ptr+ref_stride]
|
||||
movd mm1, DWORD PTR [r2_ptr+ref_stride]
|
||||
|
||||
punpcklbw mm3, mm2
|
||||
punpcklbw mm7, mm1
|
||||
|
||||
psadbw mm3, mm0
|
||||
psadbw mm7, mm0
|
||||
|
||||
movd mm2, DWORD PTR [r3_ptr]
|
||||
movd mm1, DWORD PTR [r3_ptr+ref_stride]
|
||||
|
||||
paddw mm3, mm4
|
||||
paddw mm7, mm5
|
||||
|
||||
movd [rsi+4], mm3
|
||||
punpcklbw mm2, mm1
|
||||
|
||||
movd [rsi+8], mm7
|
||||
psadbw mm2, mm0
|
||||
|
||||
paddw mm2, mm6
|
||||
movd [rsi+12], mm2
|
||||
|
||||
|
||||
STACK_FRAME_DESTROY_X4
|
||||
|
353
vp8/common/x86/sad_sse4.asm
Normal file
353
vp8/common/x86/sad_sse4.asm
Normal file
@@ -0,0 +1,353 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
%macro PROCESS_16X2X8 1
|
||||
%if %1
|
||||
movdqa xmm0, XMMWORD PTR [rsi]
|
||||
movq xmm1, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
movq xmm2, MMWORD PTR [rdi+16]
|
||||
punpcklqdq xmm1, xmm3
|
||||
punpcklqdq xmm3, xmm2
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
mpsadbw xmm1, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
|
||||
psrldq xmm0, 8
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
mpsadbw xmm3, xmm0, 0x0
|
||||
mpsadbw xmm4, xmm0, 0x5
|
||||
|
||||
paddw xmm1, xmm2
|
||||
paddw xmm1, xmm3
|
||||
paddw xmm1, xmm4
|
||||
%else
|
||||
movdqa xmm0, XMMWORD PTR [rsi]
|
||||
movq xmm5, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
movq xmm2, MMWORD PTR [rdi+16]
|
||||
punpcklqdq xmm5, xmm3
|
||||
punpcklqdq xmm3, xmm2
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
|
||||
psrldq xmm0, 8
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
mpsadbw xmm3, xmm0, 0x0
|
||||
mpsadbw xmm4, xmm0, 0x5
|
||||
|
||||
paddw xmm5, xmm2
|
||||
paddw xmm5, xmm3
|
||||
paddw xmm5, xmm4
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endif
|
||||
movdqa xmm0, XMMWORD PTR [rsi + rax]
|
||||
movq xmm5, MMWORD PTR [rdi+ rdx]
|
||||
movq xmm3, MMWORD PTR [rdi+ rdx+8]
|
||||
movq xmm2, MMWORD PTR [rdi+ rdx+16]
|
||||
punpcklqdq xmm5, xmm3
|
||||
punpcklqdq xmm3, xmm2
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
|
||||
psrldq xmm0, 8
|
||||
movdqa xmm4, xmm3
|
||||
mpsadbw xmm3, xmm0, 0x0
|
||||
mpsadbw xmm4, xmm0, 0x5
|
||||
|
||||
paddw xmm5, xmm2
|
||||
paddw xmm5, xmm3
|
||||
paddw xmm5, xmm4
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endmacro
|
||||
|
||||
%macro PROCESS_8X2X8 1
|
||||
%if %1
|
||||
movq xmm0, MMWORD PTR [rsi]
|
||||
movq xmm1, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
punpcklqdq xmm1, xmm3
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
mpsadbw xmm1, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
paddw xmm1, xmm2
|
||||
%else
|
||||
movq xmm0, MMWORD PTR [rsi]
|
||||
movq xmm5, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
punpcklqdq xmm5, xmm3
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
paddw xmm5, xmm2
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endif
|
||||
movq xmm0, MMWORD PTR [rsi + rax]
|
||||
movq xmm5, MMWORD PTR [rdi+ rdx]
|
||||
movq xmm3, MMWORD PTR [rdi+ rdx+8]
|
||||
punpcklqdq xmm5, xmm3
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
paddw xmm5, xmm2
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endmacro
|
||||
|
||||
%macro PROCESS_4X2X8 1
|
||||
%if %1
|
||||
movd xmm0, [rsi]
|
||||
movq xmm1, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
punpcklqdq xmm1, xmm3
|
||||
|
||||
mpsadbw xmm1, xmm0, 0x0
|
||||
%else
|
||||
movd xmm0, [rsi]
|
||||
movq xmm5, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
punpcklqdq xmm5, xmm3
|
||||
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endif
|
||||
movd xmm0, [rsi + rax]
|
||||
movq xmm5, MMWORD PTR [rdi+ rdx]
|
||||
movq xmm3, MMWORD PTR [rdi+ rdx+8]
|
||||
punpcklqdq xmm5, xmm3
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endmacro
|
||||
|
||||
|
||||
;void vp8_sad16x16x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array);
|
||||
global sym(vp8_sad16x16x8_sse4) PRIVATE
|
||||
sym(vp8_sad16x16x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_16X2X8 1
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_sad16x8x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad16x8x8_sse4) PRIVATE
|
||||
sym(vp8_sad16x8x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_16X2X8 1
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_sad8x8x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad8x8x8_sse4) PRIVATE
|
||||
sym(vp8_sad8x8x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_8X2X8 1
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_sad8x16x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad8x16x8_sse4) PRIVATE
|
||||
sym(vp8_sad8x16x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_8X2X8 1
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_sad4x4x8_c(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad4x4x8_sse4) PRIVATE
|
||||
sym(vp8_sad4x4x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_4X2X8 1
|
||||
PROCESS_4X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
@@ -146,14 +146,14 @@
|
||||
|
||||
%endmacro
|
||||
|
||||
;void int vpx_sad16x16x3_ssse3(
|
||||
;void int vp8_sad16x16x3_ssse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vpx_sad16x16x3_ssse3) PRIVATE
|
||||
sym(vpx_sad16x16x3_ssse3):
|
||||
global sym(vp8_sad16x16x3_ssse3) PRIVATE
|
||||
sym(vp8_sad16x16x3_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
@@ -169,31 +169,31 @@ sym(vpx_sad16x16x3_ssse3):
|
||||
mov rdx, 0xf
|
||||
and rdx, rdi
|
||||
|
||||
jmp .vpx_sad16x16x3_ssse3_skiptable
|
||||
.vpx_sad16x16x3_ssse3_jumptable:
|
||||
dd .vpx_sad16x16x3_ssse3_aligned_by_0 - .vpx_sad16x16x3_ssse3_do_jump
|
||||
dd .vpx_sad16x16x3_ssse3_aligned_by_1 - .vpx_sad16x16x3_ssse3_do_jump
|
||||
dd .vpx_sad16x16x3_ssse3_aligned_by_2 - .vpx_sad16x16x3_ssse3_do_jump
|
||||
dd .vpx_sad16x16x3_ssse3_aligned_by_3 - .vpx_sad16x16x3_ssse3_do_jump
|
||||
dd .vpx_sad16x16x3_ssse3_aligned_by_4 - .vpx_sad16x16x3_ssse3_do_jump
|
||||
dd .vpx_sad16x16x3_ssse3_aligned_by_5 - .vpx_sad16x16x3_ssse3_do_jump
|
||||
dd .vpx_sad16x16x3_ssse3_aligned_by_6 - .vpx_sad16x16x3_ssse3_do_jump
|
||||
dd .vpx_sad16x16x3_ssse3_aligned_by_7 - .vpx_sad16x16x3_ssse3_do_jump
|
||||
dd .vpx_sad16x16x3_ssse3_aligned_by_8 - .vpx_sad16x16x3_ssse3_do_jump
|
||||
dd .vpx_sad16x16x3_ssse3_aligned_by_9 - .vpx_sad16x16x3_ssse3_do_jump
|
||||
dd .vpx_sad16x16x3_ssse3_aligned_by_10 - .vpx_sad16x16x3_ssse3_do_jump
|
||||
dd .vpx_sad16x16x3_ssse3_aligned_by_11 - .vpx_sad16x16x3_ssse3_do_jump
|
||||
dd .vpx_sad16x16x3_ssse3_aligned_by_12 - .vpx_sad16x16x3_ssse3_do_jump
|
||||
dd .vpx_sad16x16x3_ssse3_aligned_by_13 - .vpx_sad16x16x3_ssse3_do_jump
|
||||
dd .vpx_sad16x16x3_ssse3_aligned_by_14 - .vpx_sad16x16x3_ssse3_do_jump
|
||||
dd .vpx_sad16x16x3_ssse3_aligned_by_15 - .vpx_sad16x16x3_ssse3_do_jump
|
||||
.vpx_sad16x16x3_ssse3_skiptable:
|
||||
jmp .vp8_sad16x16x3_ssse3_skiptable
|
||||
.vp8_sad16x16x3_ssse3_jumptable:
|
||||
dd .vp8_sad16x16x3_ssse3_aligned_by_0 - .vp8_sad16x16x3_ssse3_do_jump
|
||||
dd .vp8_sad16x16x3_ssse3_aligned_by_1 - .vp8_sad16x16x3_ssse3_do_jump
|
||||
dd .vp8_sad16x16x3_ssse3_aligned_by_2 - .vp8_sad16x16x3_ssse3_do_jump
|
||||
dd .vp8_sad16x16x3_ssse3_aligned_by_3 - .vp8_sad16x16x3_ssse3_do_jump
|
||||
dd .vp8_sad16x16x3_ssse3_aligned_by_4 - .vp8_sad16x16x3_ssse3_do_jump
|
||||
dd .vp8_sad16x16x3_ssse3_aligned_by_5 - .vp8_sad16x16x3_ssse3_do_jump
|
||||
dd .vp8_sad16x16x3_ssse3_aligned_by_6 - .vp8_sad16x16x3_ssse3_do_jump
|
||||
dd .vp8_sad16x16x3_ssse3_aligned_by_7 - .vp8_sad16x16x3_ssse3_do_jump
|
||||
dd .vp8_sad16x16x3_ssse3_aligned_by_8 - .vp8_sad16x16x3_ssse3_do_jump
|
||||
dd .vp8_sad16x16x3_ssse3_aligned_by_9 - .vp8_sad16x16x3_ssse3_do_jump
|
||||
dd .vp8_sad16x16x3_ssse3_aligned_by_10 - .vp8_sad16x16x3_ssse3_do_jump
|
||||
dd .vp8_sad16x16x3_ssse3_aligned_by_11 - .vp8_sad16x16x3_ssse3_do_jump
|
||||
dd .vp8_sad16x16x3_ssse3_aligned_by_12 - .vp8_sad16x16x3_ssse3_do_jump
|
||||
dd .vp8_sad16x16x3_ssse3_aligned_by_13 - .vp8_sad16x16x3_ssse3_do_jump
|
||||
dd .vp8_sad16x16x3_ssse3_aligned_by_14 - .vp8_sad16x16x3_ssse3_do_jump
|
||||
dd .vp8_sad16x16x3_ssse3_aligned_by_15 - .vp8_sad16x16x3_ssse3_do_jump
|
||||
.vp8_sad16x16x3_ssse3_skiptable:
|
||||
|
||||
call .vpx_sad16x16x3_ssse3_do_jump
|
||||
.vpx_sad16x16x3_ssse3_do_jump:
|
||||
call .vp8_sad16x16x3_ssse3_do_jump
|
||||
.vp8_sad16x16x3_ssse3_do_jump:
|
||||
pop rcx ; get the address of do_jump
|
||||
mov rax, .vpx_sad16x16x3_ssse3_jumptable - .vpx_sad16x16x3_ssse3_do_jump
|
||||
add rax, rcx ; get the absolute address of vpx_sad16x16x3_ssse3_jumptable
|
||||
mov rax, .vp8_sad16x16x3_ssse3_jumptable - .vp8_sad16x16x3_ssse3_do_jump
|
||||
add rax, rcx ; get the absolute address of vp8_sad16x16x3_ssse3_jumptable
|
||||
|
||||
movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable
|
||||
add rcx, rax
|
||||
@@ -203,23 +203,23 @@ sym(vpx_sad16x16x3_ssse3):
|
||||
|
||||
jmp rcx
|
||||
|
||||
PROCESS_16X16X3_OFFSET 0, .vpx_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 1, .vpx_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 2, .vpx_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 3, .vpx_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 4, .vpx_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 5, .vpx_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 6, .vpx_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 7, .vpx_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 8, .vpx_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 9, .vpx_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 10, .vpx_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 11, .vpx_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 12, .vpx_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 13, .vpx_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 14, .vpx_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 0, .vp8_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 1, .vp8_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 2, .vp8_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 3, .vp8_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 4, .vp8_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 5, .vp8_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 6, .vp8_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 7, .vp8_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 8, .vp8_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 9, .vp8_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 10, .vp8_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 11, .vp8_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 12, .vp8_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 13, .vp8_sad16x16x3_ssse3
|
||||
PROCESS_16X16X3_OFFSET 14, .vp8_sad16x16x3_ssse3
|
||||
|
||||
.vpx_sad16x16x3_ssse3_aligned_by_15:
|
||||
.vp8_sad16x16x3_ssse3_aligned_by_15:
|
||||
PROCESS_16X2X3 1
|
||||
PROCESS_16X2X3 0
|
||||
PROCESS_16X2X3 0
|
||||
@@ -229,7 +229,7 @@ sym(vpx_sad16x16x3_ssse3):
|
||||
PROCESS_16X2X3 0
|
||||
PROCESS_16X2X3 0
|
||||
|
||||
.vpx_sad16x16x3_ssse3_store_off:
|
||||
.vp8_sad16x16x3_ssse3_store_off:
|
||||
mov rdi, arg(4) ;Results
|
||||
|
||||
movq xmm0, xmm5
|
||||
@@ -259,14 +259,14 @@ sym(vpx_sad16x16x3_ssse3):
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void int vpx_sad16x8x3_ssse3(
|
||||
;void int vp8_sad16x8x3_ssse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vpx_sad16x8x3_ssse3) PRIVATE
|
||||
sym(vpx_sad16x8x3_ssse3):
|
||||
global sym(vp8_sad16x8x3_ssse3) PRIVATE
|
||||
sym(vp8_sad16x8x3_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
@@ -282,31 +282,31 @@ sym(vpx_sad16x8x3_ssse3):
|
||||
mov rdx, 0xf
|
||||
and rdx, rdi
|
||||
|
||||
jmp .vpx_sad16x8x3_ssse3_skiptable
|
||||
.vpx_sad16x8x3_ssse3_jumptable:
|
||||
dd .vpx_sad16x8x3_ssse3_aligned_by_0 - .vpx_sad16x8x3_ssse3_do_jump
|
||||
dd .vpx_sad16x8x3_ssse3_aligned_by_1 - .vpx_sad16x8x3_ssse3_do_jump
|
||||
dd .vpx_sad16x8x3_ssse3_aligned_by_2 - .vpx_sad16x8x3_ssse3_do_jump
|
||||
dd .vpx_sad16x8x3_ssse3_aligned_by_3 - .vpx_sad16x8x3_ssse3_do_jump
|
||||
dd .vpx_sad16x8x3_ssse3_aligned_by_4 - .vpx_sad16x8x3_ssse3_do_jump
|
||||
dd .vpx_sad16x8x3_ssse3_aligned_by_5 - .vpx_sad16x8x3_ssse3_do_jump
|
||||
dd .vpx_sad16x8x3_ssse3_aligned_by_6 - .vpx_sad16x8x3_ssse3_do_jump
|
||||
dd .vpx_sad16x8x3_ssse3_aligned_by_7 - .vpx_sad16x8x3_ssse3_do_jump
|
||||
dd .vpx_sad16x8x3_ssse3_aligned_by_8 - .vpx_sad16x8x3_ssse3_do_jump
|
||||
dd .vpx_sad16x8x3_ssse3_aligned_by_9 - .vpx_sad16x8x3_ssse3_do_jump
|
||||
dd .vpx_sad16x8x3_ssse3_aligned_by_10 - .vpx_sad16x8x3_ssse3_do_jump
|
||||
dd .vpx_sad16x8x3_ssse3_aligned_by_11 - .vpx_sad16x8x3_ssse3_do_jump
|
||||
dd .vpx_sad16x8x3_ssse3_aligned_by_12 - .vpx_sad16x8x3_ssse3_do_jump
|
||||
dd .vpx_sad16x8x3_ssse3_aligned_by_13 - .vpx_sad16x8x3_ssse3_do_jump
|
||||
dd .vpx_sad16x8x3_ssse3_aligned_by_14 - .vpx_sad16x8x3_ssse3_do_jump
|
||||
dd .vpx_sad16x8x3_ssse3_aligned_by_15 - .vpx_sad16x8x3_ssse3_do_jump
|
||||
.vpx_sad16x8x3_ssse3_skiptable:
|
||||
jmp .vp8_sad16x8x3_ssse3_skiptable
|
||||
.vp8_sad16x8x3_ssse3_jumptable:
|
||||
dd .vp8_sad16x8x3_ssse3_aligned_by_0 - .vp8_sad16x8x3_ssse3_do_jump
|
||||
dd .vp8_sad16x8x3_ssse3_aligned_by_1 - .vp8_sad16x8x3_ssse3_do_jump
|
||||
dd .vp8_sad16x8x3_ssse3_aligned_by_2 - .vp8_sad16x8x3_ssse3_do_jump
|
||||
dd .vp8_sad16x8x3_ssse3_aligned_by_3 - .vp8_sad16x8x3_ssse3_do_jump
|
||||
dd .vp8_sad16x8x3_ssse3_aligned_by_4 - .vp8_sad16x8x3_ssse3_do_jump
|
||||
dd .vp8_sad16x8x3_ssse3_aligned_by_5 - .vp8_sad16x8x3_ssse3_do_jump
|
||||
dd .vp8_sad16x8x3_ssse3_aligned_by_6 - .vp8_sad16x8x3_ssse3_do_jump
|
||||
dd .vp8_sad16x8x3_ssse3_aligned_by_7 - .vp8_sad16x8x3_ssse3_do_jump
|
||||
dd .vp8_sad16x8x3_ssse3_aligned_by_8 - .vp8_sad16x8x3_ssse3_do_jump
|
||||
dd .vp8_sad16x8x3_ssse3_aligned_by_9 - .vp8_sad16x8x3_ssse3_do_jump
|
||||
dd .vp8_sad16x8x3_ssse3_aligned_by_10 - .vp8_sad16x8x3_ssse3_do_jump
|
||||
dd .vp8_sad16x8x3_ssse3_aligned_by_11 - .vp8_sad16x8x3_ssse3_do_jump
|
||||
dd .vp8_sad16x8x3_ssse3_aligned_by_12 - .vp8_sad16x8x3_ssse3_do_jump
|
||||
dd .vp8_sad16x8x3_ssse3_aligned_by_13 - .vp8_sad16x8x3_ssse3_do_jump
|
||||
dd .vp8_sad16x8x3_ssse3_aligned_by_14 - .vp8_sad16x8x3_ssse3_do_jump
|
||||
dd .vp8_sad16x8x3_ssse3_aligned_by_15 - .vp8_sad16x8x3_ssse3_do_jump
|
||||
.vp8_sad16x8x3_ssse3_skiptable:
|
||||
|
||||
call .vpx_sad16x8x3_ssse3_do_jump
|
||||
.vpx_sad16x8x3_ssse3_do_jump:
|
||||
call .vp8_sad16x8x3_ssse3_do_jump
|
||||
.vp8_sad16x8x3_ssse3_do_jump:
|
||||
pop rcx ; get the address of do_jump
|
||||
mov rax, .vpx_sad16x8x3_ssse3_jumptable - .vpx_sad16x8x3_ssse3_do_jump
|
||||
add rax, rcx ; get the absolute address of vpx_sad16x8x3_ssse3_jumptable
|
||||
mov rax, .vp8_sad16x8x3_ssse3_jumptable - .vp8_sad16x8x3_ssse3_do_jump
|
||||
add rax, rcx ; get the absolute address of vp8_sad16x8x3_ssse3_jumptable
|
||||
|
||||
movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable
|
||||
add rcx, rax
|
||||
@@ -316,30 +316,30 @@ sym(vpx_sad16x8x3_ssse3):
|
||||
|
||||
jmp rcx
|
||||
|
||||
PROCESS_16X8X3_OFFSET 0, .vpx_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 1, .vpx_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 2, .vpx_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 3, .vpx_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 4, .vpx_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 5, .vpx_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 6, .vpx_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 7, .vpx_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 8, .vpx_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 9, .vpx_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 10, .vpx_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 11, .vpx_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 12, .vpx_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 13, .vpx_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 14, .vpx_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 0, .vp8_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 1, .vp8_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 2, .vp8_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 3, .vp8_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 4, .vp8_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 5, .vp8_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 6, .vp8_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 7, .vp8_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 8, .vp8_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 9, .vp8_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 10, .vp8_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 11, .vp8_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 12, .vp8_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 13, .vp8_sad16x8x3_ssse3
|
||||
PROCESS_16X8X3_OFFSET 14, .vp8_sad16x8x3_ssse3
|
||||
|
||||
.vpx_sad16x8x3_ssse3_aligned_by_15:
|
||||
.vp8_sad16x8x3_ssse3_aligned_by_15:
|
||||
|
||||
PROCESS_16X2X3 1
|
||||
PROCESS_16X2X3 0
|
||||
PROCESS_16X2X3 0
|
||||
PROCESS_16X2X3 0
|
||||
|
||||
.vpx_sad16x8x3_ssse3_store_off:
|
||||
.vp8_sad16x8x3_ssse3_store_off:
|
||||
mov rdi, arg(4) ;Results
|
||||
|
||||
movq xmm0, xmm5
|
@@ -11,9 +11,9 @@
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;unsigned int vpx_get_mb_ss_mmx( short *src_ptr )
|
||||
global sym(vpx_get_mb_ss_mmx) PRIVATE
|
||||
sym(vpx_get_mb_ss_mmx):
|
||||
;unsigned int vp8_get_mb_ss_mmx( short *src_ptr )
|
||||
global sym(vp8_get_mb_ss_mmx) PRIVATE
|
||||
sym(vp8_get_mb_ss_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
@@ -63,7 +63,7 @@ sym(vpx_get_mb_ss_mmx):
|
||||
ret
|
||||
|
||||
|
||||
;void vpx_get8x8var_mmx
|
||||
;unsigned int vp8_get8x8var_mmx
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int source_stride,
|
||||
@@ -72,8 +72,8 @@ sym(vpx_get_mb_ss_mmx):
|
||||
; unsigned int *SSE,
|
||||
; int *Sum
|
||||
;)
|
||||
global sym(vpx_get8x8var_mmx) PRIVATE
|
||||
sym(vpx_get8x8var_mmx):
|
||||
global sym(vp8_get8x8var_mmx) PRIVATE
|
||||
sym(vp8_get8x8var_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
@@ -310,8 +310,8 @@ sym(vpx_get8x8var_mmx):
|
||||
|
||||
|
||||
|
||||
;void
|
||||
;vpx_get4x4var_mmx
|
||||
;unsigned int
|
||||
;vp8_get4x4var_mmx
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int source_stride,
|
||||
@@ -320,8 +320,8 @@ sym(vpx_get8x8var_mmx):
|
||||
; unsigned int *SSE,
|
||||
; int *Sum
|
||||
;)
|
||||
global sym(vpx_get4x4var_mmx) PRIVATE
|
||||
sym(vpx_get4x4var_mmx):
|
||||
global sym(vp8_get4x4var_mmx) PRIVATE
|
||||
sym(vp8_get4x4var_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
@@ -422,3 +422,430 @@ sym(vpx_get4x4var_mmx):
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
|
||||
;unsigned int
|
||||
;vp8_get4x4sse_cs_mmx
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int source_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int recon_stride
|
||||
;)
|
||||
global sym(vp8_get4x4sse_cs_mmx) PRIVATE
|
||||
sym(vp8_get4x4sse_cs_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
push rsi
|
||||
push rdi
|
||||
push rbx
|
||||
; end prolog
|
||||
|
||||
|
||||
pxor mm6, mm6 ; Blank mmx7
|
||||
pxor mm7, mm7 ; Blank mmx7
|
||||
|
||||
mov rax, arg(0) ;[src_ptr] ; Load base addresses
|
||||
mov rbx, arg(2) ;[ref_ptr]
|
||||
movsxd rcx, dword ptr arg(1) ;[source_stride]
|
||||
movsxd rdx, dword ptr arg(3) ;[recon_stride]
|
||||
; Row 1
|
||||
movd mm0, [rax] ; Copy eight bytes to mm0
|
||||
movd mm1, [rbx] ; Copy eight bytes to mm1
|
||||
punpcklbw mm0, mm6 ; unpack to higher prrcision
|
||||
punpcklbw mm1, mm6
|
||||
psubsw mm0, mm1 ; A-B (low order) to MM0
|
||||
pmaddwd mm0, mm0 ; square and accumulate
|
||||
add rbx,rdx ; Inc pointer into ref data
|
||||
add rax,rcx ; Inc pointer into the new data
|
||||
movd mm1, [rbx] ; Copy eight bytes to mm1
|
||||
paddd mm7, mm0 ; accumulate in mm7
|
||||
|
||||
; Row 2
|
||||
movd mm0, [rax] ; Copy eight bytes to mm0
|
||||
punpcklbw mm0, mm6 ; unpack to higher prrcision
|
||||
punpcklbw mm1, mm6
|
||||
psubsw mm0, mm1 ; A-B (low order) to MM0
|
||||
pmaddwd mm0, mm0 ; square and accumulate
|
||||
add rbx,rdx ; Inc pointer into ref data
|
||||
add rax,rcx ; Inc pointer into the new data
|
||||
movd mm1, [rbx] ; Copy eight bytes to mm1
|
||||
paddd mm7, mm0 ; accumulate in mm7
|
||||
|
||||
; Row 3
|
||||
movd mm0, [rax] ; Copy eight bytes to mm0
|
||||
punpcklbw mm1, mm6
|
||||
punpcklbw mm0, mm6 ; unpack to higher prrcision
|
||||
psubsw mm0, mm1 ; A-B (low order) to MM0
|
||||
|
||||
pmaddwd mm0, mm0 ; square and accumulate
|
||||
add rbx,rdx ; Inc pointer into ref data
|
||||
add rax,rcx ; Inc pointer into the new data
|
||||
movd mm1, [rbx] ; Copy eight bytes to mm1
|
||||
paddd mm7, mm0 ; accumulate in mm7
|
||||
|
||||
; Row 4
|
||||
movd mm0, [rax] ; Copy eight bytes to mm0
|
||||
punpcklbw mm0, mm6 ; unpack to higher prrcision
|
||||
punpcklbw mm1, mm6
|
||||
psubsw mm0, mm1 ; A-B (low order) to MM0
|
||||
pmaddwd mm0, mm0 ; square and accumulate
|
||||
paddd mm7, mm0 ; accumulate in mm7
|
||||
|
||||
movq mm0, mm7 ;
|
||||
psrlq mm7, 32
|
||||
|
||||
paddd mm0, mm7
|
||||
movq rax, mm0
|
||||
|
||||
|
||||
; begin epilog
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
%define mmx_filter_shift 7
|
||||
|
||||
;void vp8_filter_block2d_bil4x4_var_mmx
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; unsigned short *HFilter,
|
||||
; unsigned short *VFilter,
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE
|
||||
sym(vp8_filter_block2d_bil4x4_var_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
sub rsp, 16
|
||||
; end prolog
|
||||
|
||||
|
||||
pxor mm6, mm6 ;
|
||||
pxor mm7, mm7 ;
|
||||
|
||||
mov rax, arg(4) ;HFilter ;
|
||||
mov rdx, arg(5) ;VFilter ;
|
||||
|
||||
mov rsi, arg(0) ;ref_ptr ;
|
||||
mov rdi, arg(2) ;src_ptr ;
|
||||
|
||||
mov rcx, 4 ;
|
||||
pxor mm0, mm0 ;
|
||||
|
||||
movd mm1, [rsi] ;
|
||||
movd mm3, [rsi+1] ;
|
||||
|
||||
punpcklbw mm1, mm0 ;
|
||||
pmullw mm1, [rax] ;
|
||||
|
||||
punpcklbw mm3, mm0 ;
|
||||
pmullw mm3, [rax+8] ;
|
||||
|
||||
paddw mm1, mm3 ;
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
movq mm5, mm1
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
%else
|
||||
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
add rsi, r8
|
||||
%endif
|
||||
|
||||
.filter_block2d_bil4x4_var_mmx_loop:
|
||||
|
||||
movd mm1, [rsi] ;
|
||||
movd mm3, [rsi+1] ;
|
||||
|
||||
punpcklbw mm1, mm0 ;
|
||||
pmullw mm1, [rax] ;
|
||||
|
||||
punpcklbw mm3, mm0 ;
|
||||
pmullw mm3, [rax+8] ;
|
||||
|
||||
paddw mm1, mm3 ;
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
movq mm3, mm5 ;
|
||||
|
||||
movq mm5, mm1 ;
|
||||
pmullw mm3, [rdx] ;
|
||||
|
||||
pmullw mm1, [rdx+8] ;
|
||||
paddw mm1, mm3 ;
|
||||
|
||||
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
|
||||
movd mm3, [rdi] ;
|
||||
punpcklbw mm3, mm0 ;
|
||||
|
||||
psubw mm1, mm3 ;
|
||||
paddw mm6, mm1 ;
|
||||
|
||||
pmaddwd mm1, mm1 ;
|
||||
paddd mm7, mm1 ;
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
add rdi, dword ptr arg(3) ;src_pixels_per_line ;
|
||||
%else
|
||||
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
|
||||
movsxd r9, dword ptr arg(3) ;src_pixels_per_line
|
||||
add rsi, r8
|
||||
add rdi, r9
|
||||
%endif
|
||||
sub rcx, 1 ;
|
||||
jnz .filter_block2d_bil4x4_var_mmx_loop ;
|
||||
|
||||
|
||||
pxor mm3, mm3 ;
|
||||
pxor mm2, mm2 ;
|
||||
|
||||
punpcklwd mm2, mm6 ;
|
||||
punpckhwd mm3, mm6 ;
|
||||
|
||||
paddd mm2, mm3 ;
|
||||
movq mm6, mm2 ;
|
||||
|
||||
psrlq mm6, 32 ;
|
||||
paddd mm2, mm6 ;
|
||||
|
||||
psrad mm2, 16 ;
|
||||
movq mm4, mm7 ;
|
||||
|
||||
psrlq mm4, 32 ;
|
||||
paddd mm4, mm7 ;
|
||||
|
||||
mov rdi, arg(6) ;sum
|
||||
mov rsi, arg(7) ;sumsquared
|
||||
|
||||
movd dword ptr [rdi], mm2 ;
|
||||
movd dword ptr [rsi], mm4 ;
|
||||
|
||||
|
||||
|
||||
; begin epilog
|
||||
add rsp, 16
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
;void vp8_filter_block2d_bil_var_mmx
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; unsigned int Height,
|
||||
; unsigned short *HFilter,
|
||||
; unsigned short *VFilter,
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE
|
||||
sym(vp8_filter_block2d_bil_var_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 9
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
sub rsp, 16
|
||||
; end prolog
|
||||
|
||||
pxor mm6, mm6 ;
|
||||
pxor mm7, mm7 ;
|
||||
mov rax, arg(5) ;HFilter ;
|
||||
|
||||
mov rdx, arg(6) ;VFilter ;
|
||||
mov rsi, arg(0) ;ref_ptr ;
|
||||
|
||||
mov rdi, arg(2) ;src_ptr ;
|
||||
movsxd rcx, dword ptr arg(4) ;Height ;
|
||||
|
||||
pxor mm0, mm0 ;
|
||||
movq mm1, [rsi] ;
|
||||
|
||||
movq mm3, [rsi+1] ;
|
||||
movq mm2, mm1 ;
|
||||
|
||||
movq mm4, mm3 ;
|
||||
punpcklbw mm1, mm0 ;
|
||||
|
||||
punpckhbw mm2, mm0 ;
|
||||
pmullw mm1, [rax] ;
|
||||
|
||||
pmullw mm2, [rax] ;
|
||||
punpcklbw mm3, mm0 ;
|
||||
|
||||
punpckhbw mm4, mm0 ;
|
||||
pmullw mm3, [rax+8] ;
|
||||
|
||||
pmullw mm4, [rax+8] ;
|
||||
paddw mm1, mm3 ;
|
||||
|
||||
paddw mm2, mm4 ;
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm2, mmx_filter_shift ;
|
||||
movq mm5, mm1
|
||||
|
||||
packuswb mm5, mm2 ;
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, dword ptr arg(1) ;ref_pixels_per_line
|
||||
%else
|
||||
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
|
||||
add rsi, r8
|
||||
%endif
|
||||
|
||||
.filter_block2d_bil_var_mmx_loop:
|
||||
|
||||
movq mm1, [rsi] ;
|
||||
movq mm3, [rsi+1] ;
|
||||
|
||||
movq mm2, mm1 ;
|
||||
movq mm4, mm3 ;
|
||||
|
||||
punpcklbw mm1, mm0 ;
|
||||
punpckhbw mm2, mm0 ;
|
||||
|
||||
pmullw mm1, [rax] ;
|
||||
pmullw mm2, [rax] ;
|
||||
|
||||
punpcklbw mm3, mm0 ;
|
||||
punpckhbw mm4, mm0 ;
|
||||
|
||||
pmullw mm3, [rax+8] ;
|
||||
pmullw mm4, [rax+8] ;
|
||||
|
||||
paddw mm1, mm3 ;
|
||||
paddw mm2, mm4 ;
|
||||
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
|
||||
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
|
||||
psraw mm2, mmx_filter_shift ;
|
||||
|
||||
movq mm3, mm5 ;
|
||||
movq mm4, mm5 ;
|
||||
|
||||
punpcklbw mm3, mm0 ;
|
||||
punpckhbw mm4, mm0 ;
|
||||
|
||||
movq mm5, mm1 ;
|
||||
packuswb mm5, mm2 ;
|
||||
|
||||
pmullw mm3, [rdx] ;
|
||||
pmullw mm4, [rdx] ;
|
||||
|
||||
pmullw mm1, [rdx+8] ;
|
||||
pmullw mm2, [rdx+8] ;
|
||||
|
||||
paddw mm1, mm3 ;
|
||||
paddw mm2, mm4 ;
|
||||
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
psraw mm2, mmx_filter_shift ;
|
||||
|
||||
movq mm3, [rdi] ;
|
||||
movq mm4, mm3 ;
|
||||
|
||||
punpcklbw mm3, mm0 ;
|
||||
punpckhbw mm4, mm0 ;
|
||||
|
||||
psubw mm1, mm3 ;
|
||||
psubw mm2, mm4 ;
|
||||
|
||||
paddw mm6, mm1 ;
|
||||
pmaddwd mm1, mm1 ;
|
||||
|
||||
paddw mm6, mm2 ;
|
||||
pmaddwd mm2, mm2 ;
|
||||
|
||||
paddd mm7, mm1 ;
|
||||
paddd mm7, mm2 ;
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
add rdi, dword ptr arg(3) ;src_pixels_per_line ;
|
||||
%else
|
||||
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
movsxd r9, dword ptr arg(3) ;src_pixels_per_line ;
|
||||
add rsi, r8
|
||||
add rdi, r9
|
||||
%endif
|
||||
sub rcx, 1 ;
|
||||
jnz .filter_block2d_bil_var_mmx_loop ;
|
||||
|
||||
|
||||
pxor mm3, mm3 ;
|
||||
pxor mm2, mm2 ;
|
||||
|
||||
punpcklwd mm2, mm6 ;
|
||||
punpckhwd mm3, mm6 ;
|
||||
|
||||
paddd mm2, mm3 ;
|
||||
movq mm6, mm2 ;
|
||||
|
||||
psrlq mm6, 32 ;
|
||||
paddd mm2, mm6 ;
|
||||
|
||||
psrad mm2, 16 ;
|
||||
movq mm4, mm7 ;
|
||||
|
||||
psrlq mm4, 32 ;
|
||||
paddd mm4, mm7 ;
|
||||
|
||||
mov rdi, arg(7) ;sum
|
||||
mov rsi, arg(8) ;sumsquared
|
||||
|
||||
movd dword ptr [rdi], mm2 ;
|
||||
movd dword ptr [rsi], mm4 ;
|
||||
|
||||
; begin epilog
|
||||
add rsp, 16
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
;short mmx_bi_rd[4] = { 64, 64, 64, 64};
|
||||
align 16
|
||||
mmx_bi_rd:
|
||||
times 4 dw 64
|
@@ -13,6 +13,393 @@
|
||||
|
||||
%define xmm_filter_shift 7
|
||||
|
||||
;unsigned int vp8_get_mb_ss_sse2
|
||||
;(
|
||||
; short *src_ptr
|
||||
;)
|
||||
global sym(vp8_get_mb_ss_sse2) PRIVATE
|
||||
sym(vp8_get_mb_ss_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 1
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
sub rsp, 16
|
||||
; end prolog
|
||||
|
||||
|
||||
mov rax, arg(0) ;[src_ptr]
|
||||
mov rcx, 8
|
||||
pxor xmm4, xmm4
|
||||
|
||||
.NEXTROW:
|
||||
movdqa xmm0, [rax]
|
||||
movdqa xmm1, [rax+16]
|
||||
movdqa xmm2, [rax+32]
|
||||
movdqa xmm3, [rax+48]
|
||||
pmaddwd xmm0, xmm0
|
||||
pmaddwd xmm1, xmm1
|
||||
pmaddwd xmm2, xmm2
|
||||
pmaddwd xmm3, xmm3
|
||||
|
||||
paddd xmm0, xmm1
|
||||
paddd xmm2, xmm3
|
||||
paddd xmm4, xmm0
|
||||
paddd xmm4, xmm2
|
||||
|
||||
add rax, 0x40
|
||||
dec rcx
|
||||
ja .NEXTROW
|
||||
|
||||
movdqa xmm3,xmm4
|
||||
psrldq xmm4,8
|
||||
paddd xmm4,xmm3
|
||||
movdqa xmm3,xmm4
|
||||
psrldq xmm4,4
|
||||
paddd xmm4,xmm3
|
||||
movq rax,xmm4
|
||||
|
||||
|
||||
; begin epilog
|
||||
add rsp, 16
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;unsigned int vp8_get16x16var_sse2
|
||||
;(
|
||||
; unsigned char * src_ptr,
|
||||
; int source_stride,
|
||||
; unsigned char * ref_ptr,
|
||||
; int recon_stride,
|
||||
; unsigned int * SSE,
|
||||
; int * Sum
|
||||
;)
|
||||
global sym(vp8_get16x16var_sse2) PRIVATE
|
||||
sym(vp8_get16x16var_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM 7
|
||||
push rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;[src_ptr]
|
||||
mov rdi, arg(2) ;[ref_ptr]
|
||||
|
||||
movsxd rax, DWORD PTR arg(1) ;[source_stride]
|
||||
movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
|
||||
|
||||
; Prefetch data
|
||||
lea rcx, [rax+rax*2]
|
||||
prefetcht0 [rsi]
|
||||
prefetcht0 [rsi+rax]
|
||||
prefetcht0 [rsi+rax*2]
|
||||
prefetcht0 [rsi+rcx]
|
||||
lea rbx, [rsi+rax*4]
|
||||
prefetcht0 [rbx]
|
||||
prefetcht0 [rbx+rax]
|
||||
prefetcht0 [rbx+rax*2]
|
||||
prefetcht0 [rbx+rcx]
|
||||
|
||||
lea rcx, [rdx+rdx*2]
|
||||
prefetcht0 [rdi]
|
||||
prefetcht0 [rdi+rdx]
|
||||
prefetcht0 [rdi+rdx*2]
|
||||
prefetcht0 [rdi+rcx]
|
||||
lea rbx, [rdi+rdx*4]
|
||||
prefetcht0 [rbx]
|
||||
prefetcht0 [rbx+rdx]
|
||||
prefetcht0 [rbx+rdx*2]
|
||||
prefetcht0 [rbx+rcx]
|
||||
|
||||
pxor xmm0, xmm0 ; clear xmm0 for unpack
|
||||
pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
|
||||
|
||||
pxor xmm6, xmm6 ; clear xmm6 for accumulating sse
|
||||
mov rcx, 16
|
||||
|
||||
.var16loop:
|
||||
movdqu xmm1, XMMWORD PTR [rsi]
|
||||
movdqu xmm2, XMMWORD PTR [rdi]
|
||||
|
||||
prefetcht0 [rsi+rax*8]
|
||||
prefetcht0 [rdi+rdx*8]
|
||||
|
||||
movdqa xmm3, xmm1
|
||||
movdqa xmm4, xmm2
|
||||
|
||||
|
||||
punpcklbw xmm1, xmm0
|
||||
punpckhbw xmm3, xmm0
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
punpckhbw xmm4, xmm0
|
||||
|
||||
|
||||
psubw xmm1, xmm2
|
||||
psubw xmm3, xmm4
|
||||
|
||||
paddw xmm7, xmm1
|
||||
pmaddwd xmm1, xmm1
|
||||
|
||||
paddw xmm7, xmm3
|
||||
pmaddwd xmm3, xmm3
|
||||
|
||||
paddd xmm6, xmm1
|
||||
paddd xmm6, xmm3
|
||||
|
||||
add rsi, rax
|
||||
add rdi, rdx
|
||||
|
||||
sub rcx, 1
|
||||
jnz .var16loop
|
||||
|
||||
|
||||
movdqa xmm1, xmm6
|
||||
pxor xmm6, xmm6
|
||||
|
||||
pxor xmm5, xmm5
|
||||
punpcklwd xmm6, xmm7
|
||||
|
||||
punpckhwd xmm5, xmm7
|
||||
psrad xmm5, 16
|
||||
|
||||
psrad xmm6, 16
|
||||
paddd xmm6, xmm5
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
punpckldq xmm1, xmm0
|
||||
|
||||
punpckhdq xmm2, xmm0
|
||||
movdqa xmm7, xmm6
|
||||
|
||||
paddd xmm1, xmm2
|
||||
punpckldq xmm6, xmm0
|
||||
|
||||
punpckhdq xmm7, xmm0
|
||||
paddd xmm6, xmm7
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
movdqa xmm7, xmm6
|
||||
|
||||
psrldq xmm1, 8
|
||||
psrldq xmm6, 8
|
||||
|
||||
paddd xmm7, xmm6
|
||||
paddd xmm1, xmm2
|
||||
|
||||
mov rax, arg(5) ;[Sum]
|
||||
mov rdi, arg(4) ;[SSE]
|
||||
|
||||
movd DWORD PTR [rax], xmm7
|
||||
movd DWORD PTR [rdi], xmm1
|
||||
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbx
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
;unsigned int vp8_get8x8var_sse2
|
||||
;(
|
||||
; unsigned char * src_ptr,
|
||||
; int source_stride,
|
||||
; unsigned char * ref_ptr,
|
||||
; int recon_stride,
|
||||
; unsigned int * SSE,
|
||||
; int * Sum
|
||||
;)
|
||||
global sym(vp8_get8x8var_sse2) PRIVATE
|
||||
sym(vp8_get8x8var_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
sub rsp, 16
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;[src_ptr]
|
||||
mov rdi, arg(2) ;[ref_ptr]
|
||||
|
||||
movsxd rax, DWORD PTR arg(1) ;[source_stride]
|
||||
movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
|
||||
|
||||
pxor xmm0, xmm0 ; clear xmm0 for unpack
|
||||
pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
|
||||
|
||||
movq xmm1, QWORD PTR [rsi]
|
||||
movq xmm2, QWORD PTR [rdi]
|
||||
|
||||
punpcklbw xmm1, xmm0
|
||||
punpcklbw xmm2, xmm0
|
||||
|
||||
psubsw xmm1, xmm2
|
||||
paddw xmm7, xmm1
|
||||
|
||||
pmaddwd xmm1, xmm1
|
||||
|
||||
movq xmm2, QWORD PTR[rsi + rax]
|
||||
movq xmm3, QWORD PTR[rdi + rdx]
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
punpcklbw xmm3, xmm0
|
||||
|
||||
psubsw xmm2, xmm3
|
||||
paddw xmm7, xmm2
|
||||
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm1, xmm2
|
||||
|
||||
|
||||
movq xmm2, QWORD PTR[rsi + rax * 2]
|
||||
movq xmm3, QWORD PTR[rdi + rdx * 2]
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
punpcklbw xmm3, xmm0
|
||||
|
||||
psubsw xmm2, xmm3
|
||||
paddw xmm7, xmm2
|
||||
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm1, xmm2
|
||||
|
||||
|
||||
lea rsi, [rsi + rax * 2]
|
||||
lea rdi, [rdi + rdx * 2]
|
||||
movq xmm2, QWORD PTR[rsi + rax]
|
||||
movq xmm3, QWORD PTR[rdi + rdx]
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
punpcklbw xmm3, xmm0
|
||||
|
||||
psubsw xmm2, xmm3
|
||||
paddw xmm7, xmm2
|
||||
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm1, xmm2
|
||||
|
||||
movq xmm2, QWORD PTR[rsi + rax *2]
|
||||
movq xmm3, QWORD PTR[rdi + rdx *2]
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
punpcklbw xmm3, xmm0
|
||||
|
||||
psubsw xmm2, xmm3
|
||||
paddw xmm7, xmm2
|
||||
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm1, xmm2
|
||||
|
||||
|
||||
lea rsi, [rsi + rax * 2]
|
||||
lea rdi, [rdi + rdx * 2]
|
||||
|
||||
|
||||
movq xmm2, QWORD PTR[rsi + rax]
|
||||
movq xmm3, QWORD PTR[rdi + rdx]
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
punpcklbw xmm3, xmm0
|
||||
|
||||
psubsw xmm2, xmm3
|
||||
paddw xmm7, xmm2
|
||||
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm1, xmm2
|
||||
|
||||
movq xmm2, QWORD PTR[rsi + rax *2]
|
||||
movq xmm3, QWORD PTR[rdi + rdx *2]
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
punpcklbw xmm3, xmm0
|
||||
|
||||
psubsw xmm2, xmm3
|
||||
paddw xmm7, xmm2
|
||||
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm1, xmm2
|
||||
|
||||
|
||||
lea rsi, [rsi + rax * 2]
|
||||
lea rdi, [rdi + rdx * 2]
|
||||
|
||||
movq xmm2, QWORD PTR[rsi + rax]
|
||||
movq xmm3, QWORD PTR[rdi + rdx]
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
punpcklbw xmm3, xmm0
|
||||
|
||||
psubsw xmm2, xmm3
|
||||
paddw xmm7, xmm2
|
||||
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm1, xmm2
|
||||
|
||||
|
||||
movdqa xmm6, xmm7
|
||||
punpcklwd xmm6, xmm0
|
||||
|
||||
punpckhwd xmm7, xmm0
|
||||
movdqa xmm2, xmm1
|
||||
|
||||
paddw xmm6, xmm7
|
||||
punpckldq xmm1, xmm0
|
||||
|
||||
punpckhdq xmm2, xmm0
|
||||
movdqa xmm7, xmm6
|
||||
|
||||
paddd xmm1, xmm2
|
||||
punpckldq xmm6, xmm0
|
||||
|
||||
punpckhdq xmm7, xmm0
|
||||
paddw xmm6, xmm7
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
movdqa xmm7, xmm6
|
||||
|
||||
psrldq xmm1, 8
|
||||
psrldq xmm6, 8
|
||||
|
||||
paddw xmm7, xmm6
|
||||
paddd xmm1, xmm2
|
||||
|
||||
mov rax, arg(5) ;[Sum]
|
||||
mov rdi, arg(4) ;[SSE]
|
||||
|
||||
movq rdx, xmm7
|
||||
movsx rcx, dx
|
||||
|
||||
mov dword ptr [rax], ecx
|
||||
movd DWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
add rsp, 16
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_filter_block2d_bil_var_sse2
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
|
@@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/common/variance.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
@@ -35,6 +34,25 @@ extern void filter_block1d_v6_mmx
|
||||
short *filter
|
||||
);
|
||||
|
||||
extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr);
|
||||
extern unsigned int vp8_get8x8var_mmx
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *SSE,
|
||||
int *Sum
|
||||
);
|
||||
extern unsigned int vp8_get4x4var_mmx
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *SSE,
|
||||
int *Sum
|
||||
);
|
||||
extern void vp8_filter_block2d_bil4x4_var_mmx
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
@@ -59,6 +77,127 @@ extern void vp8_filter_block2d_bil_var_mmx
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
|
||||
|
||||
unsigned int vp8_variance4x4_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 4));
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_variance8x8_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
|
||||
*sse = var;
|
||||
|
||||
return (var - (((unsigned int)avg * avg) >> 6));
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_mse16x16_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, sse2, sse3, var;
|
||||
int sum0, sum1, sum2, sum3;
|
||||
|
||||
|
||||
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
||||
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
|
||||
vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
|
||||
|
||||
var = sse0 + sse1 + sse2 + sse3;
|
||||
*sse = var;
|
||||
return var;
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance16x16_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, sse2, sse3, var;
|
||||
int sum0, sum1, sum2, sum3, avg;
|
||||
|
||||
|
||||
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
||||
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
|
||||
vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
|
||||
|
||||
var = sse0 + sse1 + sse2 + sse3;
|
||||
avg = sum0 + sum1 + sum2 + sum3;
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 8));
|
||||
}
|
||||
|
||||
unsigned int vp8_variance16x8_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, var;
|
||||
int sum0, sum1, avg;
|
||||
|
||||
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
||||
|
||||
var = sse0 + sse1;
|
||||
avg = sum0 + sum1;
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 7));
|
||||
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance8x16_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, var;
|
||||
int sum0, sum1, avg;
|
||||
|
||||
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
|
||||
|
||||
var = sse0 + sse1;
|
||||
avg = sum0 + sum1;
|
||||
*sse = var;
|
||||
|
||||
return (var - (((unsigned int)avg * avg) >> 7));
|
||||
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_sub_pixel_variance4x4_mmx
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
@@ -147,6 +286,20 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_mse16x16_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
|
||||
return *sse;
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance16x8_mmx
|
||||
(
|
||||
const unsigned char *src_ptr,
|
@@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/common/variance.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
@@ -31,6 +30,38 @@ extern void vp8_filter_block2d_bil4x4_var_mmx
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
|
||||
extern unsigned int vp8_get4x4var_mmx
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *SSE,
|
||||
int *Sum
|
||||
);
|
||||
|
||||
unsigned int vp8_get_mb_ss_sse2
|
||||
(
|
||||
const short *src_ptr
|
||||
);
|
||||
unsigned int vp8_get16x16var_sse2
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *SSE,
|
||||
int *Sum
|
||||
);
|
||||
unsigned int vp8_get8x8var_sse2
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *SSE,
|
||||
int *Sum
|
||||
);
|
||||
void vp8_filter_block2d_bil_var_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
@@ -104,6 +135,115 @@ void vp8_half_vert_variance16x_h_sse2
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
|
||||
unsigned int vp8_variance4x4_wmt(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 4));
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_variance8x8_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 6));
|
||||
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance16x16_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0;
|
||||
int sum0;
|
||||
|
||||
|
||||
vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
*sse = sse0;
|
||||
return (sse0 - (((unsigned int)sum0 * sum0) >> 8));
|
||||
}
|
||||
unsigned int vp8_mse16x16_wmt(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
|
||||
unsigned int sse0;
|
||||
int sum0;
|
||||
vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
*sse = sse0;
|
||||
return sse0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance16x8_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, var;
|
||||
int sum0, sum1, avg;
|
||||
|
||||
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
||||
|
||||
var = sse0 + sse1;
|
||||
avg = sum0 + sum1;
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 7));
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_variance8x16_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, var;
|
||||
int sum0, sum1, avg;
|
||||
|
||||
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
|
||||
|
||||
var = sse0 + sse1;
|
||||
avg = sum0 + sum1;
|
||||
*sse = var;
|
||||
return (var - (((unsigned int)avg * avg) >> 7));
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance4x4_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
@@ -238,6 +378,20 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
|
||||
return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_mse16x16_wmt(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
vp8_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
|
||||
return *sse;
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance16x8_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
@@ -8,11 +8,19 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/common/variance.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
extern unsigned int vp8_get16x16var_sse2
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *SSE,
|
||||
int *Sum
|
||||
);
|
||||
extern void vp8_half_horiz_vert_variance16x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
|
@@ -127,7 +127,7 @@ void vp8_sixtap_predict4x4_mmx
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[16*16]); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16); /* Temp data bufffer used in filtering */
|
||||
const short *HFilter, *VFilter;
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
@@ -148,7 +148,7 @@ void vp8_sixtap_predict16x16_mmx
|
||||
)
|
||||
{
|
||||
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
|
||||
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
@@ -180,7 +180,7 @@ void vp8_sixtap_predict8x8_mmx
|
||||
)
|
||||
{
|
||||
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
|
||||
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
@@ -206,7 +206,7 @@ void vp8_sixtap_predict8x4_mmx
|
||||
)
|
||||
{
|
||||
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
|
||||
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
@@ -252,7 +252,7 @@ void vp8_sixtap_predict16x16_sse2
|
||||
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */
|
||||
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
@@ -292,7 +292,7 @@ void vp8_sixtap_predict8x8_sse2
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
if (xoffset)
|
||||
@@ -330,7 +330,7 @@ void vp8_sixtap_predict8x4_sse2
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
if (xoffset)
|
||||
@@ -432,7 +432,7 @@ void vp8_sixtap_predict16x16_ssse3
|
||||
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned char, FData2[24*24]);
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
@@ -480,7 +480,7 @@ void vp8_sixtap_predict8x8_ssse3
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned char, FData2[256]);
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
@@ -528,7 +528,7 @@ void vp8_sixtap_predict8x4_ssse3
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned char, FData2[256]);
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
@@ -576,7 +576,7 @@ void vp8_sixtap_predict4x4_ssse3
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned char, FData2[4*9]);
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
|
@@ -1,353 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
%define mmx_filter_shift 7
|
||||
|
||||
;void vp8_filter_block2d_bil4x4_var_mmx
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; unsigned short *HFilter,
|
||||
; unsigned short *VFilter,
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE
|
||||
sym(vp8_filter_block2d_bil4x4_var_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
sub rsp, 16
|
||||
; end prolog
|
||||
|
||||
|
||||
pxor mm6, mm6 ;
|
||||
pxor mm7, mm7 ;
|
||||
|
||||
mov rax, arg(4) ;HFilter ;
|
||||
mov rdx, arg(5) ;VFilter ;
|
||||
|
||||
mov rsi, arg(0) ;ref_ptr ;
|
||||
mov rdi, arg(2) ;src_ptr ;
|
||||
|
||||
mov rcx, 4 ;
|
||||
pxor mm0, mm0 ;
|
||||
|
||||
movd mm1, [rsi] ;
|
||||
movd mm3, [rsi+1] ;
|
||||
|
||||
punpcklbw mm1, mm0 ;
|
||||
pmullw mm1, [rax] ;
|
||||
|
||||
punpcklbw mm3, mm0 ;
|
||||
pmullw mm3, [rax+8] ;
|
||||
|
||||
paddw mm1, mm3 ;
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
movq mm5, mm1
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
%else
|
||||
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
add rsi, r8
|
||||
%endif
|
||||
|
||||
.filter_block2d_bil4x4_var_mmx_loop:
|
||||
|
||||
movd mm1, [rsi] ;
|
||||
movd mm3, [rsi+1] ;
|
||||
|
||||
punpcklbw mm1, mm0 ;
|
||||
pmullw mm1, [rax] ;
|
||||
|
||||
punpcklbw mm3, mm0 ;
|
||||
pmullw mm3, [rax+8] ;
|
||||
|
||||
paddw mm1, mm3 ;
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
movq mm3, mm5 ;
|
||||
|
||||
movq mm5, mm1 ;
|
||||
pmullw mm3, [rdx] ;
|
||||
|
||||
pmullw mm1, [rdx+8] ;
|
||||
paddw mm1, mm3 ;
|
||||
|
||||
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
|
||||
movd mm3, [rdi] ;
|
||||
punpcklbw mm3, mm0 ;
|
||||
|
||||
psubw mm1, mm3 ;
|
||||
paddw mm6, mm1 ;
|
||||
|
||||
pmaddwd mm1, mm1 ;
|
||||
paddd mm7, mm1 ;
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
add rdi, dword ptr arg(3) ;src_pixels_per_line ;
|
||||
%else
|
||||
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
|
||||
movsxd r9, dword ptr arg(3) ;src_pixels_per_line
|
||||
add rsi, r8
|
||||
add rdi, r9
|
||||
%endif
|
||||
sub rcx, 1 ;
|
||||
jnz .filter_block2d_bil4x4_var_mmx_loop ;
|
||||
|
||||
|
||||
pxor mm3, mm3 ;
|
||||
pxor mm2, mm2 ;
|
||||
|
||||
punpcklwd mm2, mm6 ;
|
||||
punpckhwd mm3, mm6 ;
|
||||
|
||||
paddd mm2, mm3 ;
|
||||
movq mm6, mm2 ;
|
||||
|
||||
psrlq mm6, 32 ;
|
||||
paddd mm2, mm6 ;
|
||||
|
||||
psrad mm2, 16 ;
|
||||
movq mm4, mm7 ;
|
||||
|
||||
psrlq mm4, 32 ;
|
||||
paddd mm4, mm7 ;
|
||||
|
||||
mov rdi, arg(6) ;sum
|
||||
mov rsi, arg(7) ;sumsquared
|
||||
|
||||
movd dword ptr [rdi], mm2 ;
|
||||
movd dword ptr [rsi], mm4 ;
|
||||
|
||||
|
||||
|
||||
; begin epilog
|
||||
add rsp, 16
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
;void vp8_filter_block2d_bil_var_mmx
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; unsigned int Height,
|
||||
; unsigned short *HFilter,
|
||||
; unsigned short *VFilter,
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE
|
||||
sym(vp8_filter_block2d_bil_var_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 9
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
sub rsp, 16
|
||||
; end prolog
|
||||
|
||||
pxor mm6, mm6 ;
|
||||
pxor mm7, mm7 ;
|
||||
mov rax, arg(5) ;HFilter ;
|
||||
|
||||
mov rdx, arg(6) ;VFilter ;
|
||||
mov rsi, arg(0) ;ref_ptr ;
|
||||
|
||||
mov rdi, arg(2) ;src_ptr ;
|
||||
movsxd rcx, dword ptr arg(4) ;Height ;
|
||||
|
||||
pxor mm0, mm0 ;
|
||||
movq mm1, [rsi] ;
|
||||
|
||||
movq mm3, [rsi+1] ;
|
||||
movq mm2, mm1 ;
|
||||
|
||||
movq mm4, mm3 ;
|
||||
punpcklbw mm1, mm0 ;
|
||||
|
||||
punpckhbw mm2, mm0 ;
|
||||
pmullw mm1, [rax] ;
|
||||
|
||||
pmullw mm2, [rax] ;
|
||||
punpcklbw mm3, mm0 ;
|
||||
|
||||
punpckhbw mm4, mm0 ;
|
||||
pmullw mm3, [rax+8] ;
|
||||
|
||||
pmullw mm4, [rax+8] ;
|
||||
paddw mm1, mm3 ;
|
||||
|
||||
paddw mm2, mm4 ;
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm2, mmx_filter_shift ;
|
||||
movq mm5, mm1
|
||||
|
||||
packuswb mm5, mm2 ;
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, dword ptr arg(1) ;ref_pixels_per_line
|
||||
%else
|
||||
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
|
||||
add rsi, r8
|
||||
%endif
|
||||
|
||||
.filter_block2d_bil_var_mmx_loop:
|
||||
|
||||
movq mm1, [rsi] ;
|
||||
movq mm3, [rsi+1] ;
|
||||
|
||||
movq mm2, mm1 ;
|
||||
movq mm4, mm3 ;
|
||||
|
||||
punpcklbw mm1, mm0 ;
|
||||
punpckhbw mm2, mm0 ;
|
||||
|
||||
pmullw mm1, [rax] ;
|
||||
pmullw mm2, [rax] ;
|
||||
|
||||
punpcklbw mm3, mm0 ;
|
||||
punpckhbw mm4, mm0 ;
|
||||
|
||||
pmullw mm3, [rax+8] ;
|
||||
pmullw mm4, [rax+8] ;
|
||||
|
||||
paddw mm1, mm3 ;
|
||||
paddw mm2, mm4 ;
|
||||
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
|
||||
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
|
||||
psraw mm2, mmx_filter_shift ;
|
||||
|
||||
movq mm3, mm5 ;
|
||||
movq mm4, mm5 ;
|
||||
|
||||
punpcklbw mm3, mm0 ;
|
||||
punpckhbw mm4, mm0 ;
|
||||
|
||||
movq mm5, mm1 ;
|
||||
packuswb mm5, mm2 ;
|
||||
|
||||
pmullw mm3, [rdx] ;
|
||||
pmullw mm4, [rdx] ;
|
||||
|
||||
pmullw mm1, [rdx+8] ;
|
||||
pmullw mm2, [rdx+8] ;
|
||||
|
||||
paddw mm1, mm3 ;
|
||||
paddw mm2, mm4 ;
|
||||
|
||||
paddw mm1, [GLOBAL(mmx_bi_rd)] ;
|
||||
paddw mm2, [GLOBAL(mmx_bi_rd)] ;
|
||||
|
||||
psraw mm1, mmx_filter_shift ;
|
||||
psraw mm2, mmx_filter_shift ;
|
||||
|
||||
movq mm3, [rdi] ;
|
||||
movq mm4, mm3 ;
|
||||
|
||||
punpcklbw mm3, mm0 ;
|
||||
punpckhbw mm4, mm0 ;
|
||||
|
||||
psubw mm1, mm3 ;
|
||||
psubw mm2, mm4 ;
|
||||
|
||||
paddw mm6, mm1 ;
|
||||
pmaddwd mm1, mm1 ;
|
||||
|
||||
paddw mm6, mm2 ;
|
||||
pmaddwd mm2, mm2 ;
|
||||
|
||||
paddd mm7, mm1 ;
|
||||
paddd mm7, mm2 ;
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
add rdi, dword ptr arg(3) ;src_pixels_per_line ;
|
||||
%else
|
||||
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
movsxd r9, dword ptr arg(3) ;src_pixels_per_line ;
|
||||
add rsi, r8
|
||||
add rdi, r9
|
||||
%endif
|
||||
sub rcx, 1 ;
|
||||
jnz .filter_block2d_bil_var_mmx_loop ;
|
||||
|
||||
|
||||
pxor mm3, mm3 ;
|
||||
pxor mm2, mm2 ;
|
||||
|
||||
punpcklwd mm2, mm6 ;
|
||||
punpckhwd mm3, mm6 ;
|
||||
|
||||
paddd mm2, mm3 ;
|
||||
movq mm6, mm2 ;
|
||||
|
||||
psrlq mm6, 32 ;
|
||||
paddd mm2, mm6 ;
|
||||
|
||||
psrad mm2, 16 ;
|
||||
movq mm4, mm7 ;
|
||||
|
||||
psrlq mm4, 32 ;
|
||||
paddd mm4, mm7 ;
|
||||
|
||||
mov rdi, arg(7) ;sum
|
||||
mov rsi, arg(8) ;sumsquared
|
||||
|
||||
movd dword ptr [rdi], mm2 ;
|
||||
movd dword ptr [rsi], mm4 ;
|
||||
|
||||
; begin epilog
|
||||
add rsp, 16
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
;short mmx_bi_rd[4] = { 64, 64, 64, 64};
|
||||
align 16
|
||||
mmx_bi_rd:
|
||||
times 4 dw 64
|
@@ -259,7 +259,7 @@ static int swap_frame_buffers (VP8_COMMON *cm)
|
||||
return err;
|
||||
}
|
||||
|
||||
static int check_fragments_for_errors(VP8D_COMP *pbi)
|
||||
int check_fragments_for_errors(VP8D_COMP *pbi)
|
||||
{
|
||||
if (!pbi->ec_active &&
|
||||
pbi->fragments.count <= 1 && pbi->fragments.sizes[0] == 0)
|
||||
|
138
vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
Normal file
138
vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
Normal file
@@ -0,0 +1,138 @@
|
||||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_mse16x16_armv6|
|
||||
|
||||
ARM
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 int source_stride
|
||||
; r2 unsigned char *ref_ptr
|
||||
; r3 int recon_stride
|
||||
; stack unsigned int *sse
|
||||
;
|
||||
;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.
|
||||
; So, we can remove this part of calculation.
|
||||
|
||||
|vp8_mse16x16_armv6| PROC
|
||||
|
||||
push {r4-r9, lr}
|
||||
|
||||
pld [r0, r1, lsl #0]
|
||||
pld [r2, r3, lsl #0]
|
||||
|
||||
mov r12, #16 ; set loop counter to 16 (=block height)
|
||||
mov r4, #0 ; initialize sse = 0
|
||||
|
||||
loop
|
||||
; 1st 4 pixels
|
||||
ldr r5, [r0, #0x0] ; load 4 src pixels
|
||||
ldr r6, [r2, #0x0] ; load 4 ref pixels
|
||||
|
||||
mov lr, #0 ; constant zero
|
||||
|
||||
usub8 r8, r5, r6 ; calculate difference
|
||||
pld [r0, r1, lsl #1]
|
||||
sel r7, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r6, r5 ; calculate difference with reversed operands
|
||||
pld [r2, r3, lsl #1]
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r5, r7, lr ; calculate sum of positive differences
|
||||
usad8 r6, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r7 ; differences of all 4 pixels
|
||||
|
||||
ldr r5, [r0, #0x4] ; load 4 src pixels
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r6, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 2nd 4 pixels
|
||||
ldr r6, [r2, #0x4] ; load 4 ref pixels
|
||||
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r8, r5, r6 ; calculate difference
|
||||
sel r7, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r6, r5 ; calculate difference with reversed operands
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r5, r7, lr ; calculate sum of positive differences
|
||||
usad8 r6, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r7 ; differences of all 4 pixels
|
||||
ldr r5, [r0, #0x8] ; load 4 src pixels
|
||||
; calculate sse
|
||||
uxtb16 r6, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 3rd 4 pixels
|
||||
ldr r6, [r2, #0x8] ; load 4 ref pixels
|
||||
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r8, r5, r6 ; calculate difference
|
||||
sel r7, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r6, r5 ; calculate difference with reversed operands
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r5, r7, lr ; calculate sum of positive differences
|
||||
usad8 r6, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r7 ; differences of all 4 pixels
|
||||
|
||||
ldr r5, [r0, #0xc] ; load 4 src pixels
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r6, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 4th 4 pixels
|
||||
ldr r6, [r2, #0xc] ; load 4 ref pixels
|
||||
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r8, r5, r6 ; calculate difference
|
||||
add r0, r0, r1 ; set src_ptr to next row
|
||||
sel r7, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r6, r5 ; calculate difference with reversed operands
|
||||
add r2, r2, r3 ; set dst_ptr to next row
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r5, r7, lr ; calculate sum of positive differences
|
||||
usad8 r6, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r7 ; differences of all 4 pixels
|
||||
|
||||
subs r12, r12, #1 ; next row
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r6, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
|
||||
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
bne loop
|
||||
|
||||
; return stuff
|
||||
ldr r1, [sp, #28] ; get address of sse
|
||||
mov r0, r4 ; return sse
|
||||
str r4, [r1] ; store sse
|
||||
|
||||
pop {r4-r9, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
131
vp8/encoder/arm/neon/vp8_mse16x16_neon.c
Normal file
131
vp8/encoder/arm/neon/vp8_mse16x16_neon.c
Normal file
@@ -0,0 +1,131 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
unsigned int vp8_mse16x16_neon(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse) {
|
||||
int i;
|
||||
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
|
||||
int64x1_t d0s64;
|
||||
uint8x16_t q0u8, q1u8, q2u8, q3u8;
|
||||
int32x4_t q7s32, q8s32, q9s32, q10s32;
|
||||
uint16x8_t q11u16, q12u16, q13u16, q14u16;
|
||||
int64x2_t q1s64;
|
||||
|
||||
q7s32 = vdupq_n_s32(0);
|
||||
q8s32 = vdupq_n_s32(0);
|
||||
q9s32 = vdupq_n_s32(0);
|
||||
q10s32 = vdupq_n_s32(0);
|
||||
|
||||
for (i = 0; i < 8; i++) { // mse16x16_neon_loop
|
||||
q0u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
q1u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
q2u8 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
q3u8 = vld1q_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
|
||||
q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
|
||||
q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
|
||||
q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
|
||||
q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
|
||||
|
||||
d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
|
||||
d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
|
||||
q7s32 = vmlal_s16(q7s32, d22s16, d22s16);
|
||||
q8s32 = vmlal_s16(q8s32, d23s16, d23s16);
|
||||
|
||||
d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
|
||||
d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
|
||||
q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
|
||||
q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
|
||||
|
||||
d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
|
||||
d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
|
||||
q7s32 = vmlal_s16(q7s32, d26s16, d26s16);
|
||||
q8s32 = vmlal_s16(q8s32, d27s16, d27s16);
|
||||
|
||||
d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
|
||||
d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
|
||||
q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
|
||||
q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
|
||||
}
|
||||
|
||||
q7s32 = vaddq_s32(q7s32, q8s32);
|
||||
q9s32 = vaddq_s32(q9s32, q10s32);
|
||||
q10s32 = vaddq_s32(q7s32, q9s32);
|
||||
|
||||
q1s64 = vpaddlq_s32(q10s32);
|
||||
d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
|
||||
|
||||
vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0);
|
||||
return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
|
||||
}
|
||||
|
||||
unsigned int vp8_get4x4sse_cs_neon(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride) {
|
||||
int16x4_t d22s16, d24s16, d26s16, d28s16;
|
||||
int64x1_t d0s64;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
|
||||
int32x4_t q7s32, q8s32, q9s32, q10s32;
|
||||
uint16x8_t q11u16, q12u16, q13u16, q14u16;
|
||||
int64x2_t q1s64;
|
||||
|
||||
d0u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d4u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d1u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d5u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d6u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
d3u8 = vld1_u8(src_ptr);
|
||||
src_ptr += source_stride;
|
||||
d7u8 = vld1_u8(ref_ptr);
|
||||
ref_ptr += recon_stride;
|
||||
|
||||
q11u16 = vsubl_u8(d0u8, d4u8);
|
||||
q12u16 = vsubl_u8(d1u8, d5u8);
|
||||
q13u16 = vsubl_u8(d2u8, d6u8);
|
||||
q14u16 = vsubl_u8(d3u8, d7u8);
|
||||
|
||||
d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16));
|
||||
d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16));
|
||||
d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16));
|
||||
d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16));
|
||||
|
||||
q7s32 = vmull_s16(d22s16, d22s16);
|
||||
q8s32 = vmull_s16(d24s16, d24s16);
|
||||
q9s32 = vmull_s16(d26s16, d26s16);
|
||||
q10s32 = vmull_s16(d28s16, d28s16);
|
||||
|
||||
q7s32 = vaddq_s32(q7s32, q8s32);
|
||||
q9s32 = vaddq_s32(q9s32, q10s32);
|
||||
q9s32 = vaddq_s32(q7s32, q9s32);
|
||||
|
||||
q1s64 = vpaddlq_s32(q9s32);
|
||||
d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
|
||||
|
||||
return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
|
||||
}
|
@@ -11,8 +11,6 @@
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
|
||||
void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
|
||||
{
|
||||
int i;
|
||||
|
@@ -11,7 +11,6 @@
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "encodemb.h"
|
||||
#include "encodemv.h"
|
||||
#include "vp8/common/common.h"
|
||||
@@ -91,7 +90,7 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
|
||||
* lambda using a non-linear combination (e.g., the smallest, or second
|
||||
* smallest, etc.).
|
||||
*/
|
||||
act = vpx_variance16x16(x->src.y_buffer,
|
||||
act = vp8_variance16x16(x->src.y_buffer,
|
||||
x->src.y_stride, VP8_VAR_OFFS, 0, &sse);
|
||||
act = act<<4;
|
||||
|
||||
|
@@ -11,7 +11,6 @@
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "quantize.h"
|
||||
#include "vp8/common/reconintra4x4.h"
|
||||
#include "encodemb.h"
|
||||
@@ -45,7 +44,7 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
|
||||
}
|
||||
}
|
||||
|
||||
intra_pred_var = vpx_get_mb_ss(x->src_diff);
|
||||
intra_pred_var = vp8_get_mb_ss(x->src_diff);
|
||||
|
||||
return intra_pred_var;
|
||||
}
|
||||
|
@@ -19,6 +19,8 @@
|
||||
|
||||
extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip);
|
||||
|
||||
extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
|
||||
|
||||
static THREAD_FUNCTION thread_loopfilter(void *p_data)
|
||||
{
|
||||
VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1);
|
||||
|
@@ -12,7 +12,6 @@
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "./vpx_scale_rtcd.h"
|
||||
#include "block.h"
|
||||
#include "onyx_int.h"
|
||||
@@ -35,6 +34,8 @@
|
||||
/* #define OUTPUT_FPF 1 */
|
||||
|
||||
extern void vp8cx_frame_init_quantizer(VP8_COMP *cpi);
|
||||
extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv);
|
||||
extern void vp8_alloc_compressor_data(VP8_COMP *cpi);
|
||||
|
||||
#define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q]
|
||||
extern int vp8_kf_boost_qadjustment[QINDEX_RANGE];
|
||||
@@ -423,14 +424,14 @@ static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x,
|
||||
/* Set up pointers for this macro block raw buffer */
|
||||
raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset
|
||||
+ d->offset);
|
||||
vpx_mse16x16(src_ptr, src_stride, raw_ptr, raw_stride,
|
||||
(unsigned int *)(raw_motion_err));
|
||||
vp8_mse16x16 ( src_ptr, src_stride, raw_ptr, raw_stride,
|
||||
(unsigned int *)(raw_motion_err));
|
||||
|
||||
/* Set up pointers for this macro block recon buffer */
|
||||
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
|
||||
ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset );
|
||||
vpx_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride,
|
||||
(unsigned int *)(best_motion_err));
|
||||
vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride,
|
||||
(unsigned int *)(best_motion_err));
|
||||
}
|
||||
|
||||
static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
@@ -454,7 +455,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
int new_mv_mode_penalty = 256;
|
||||
|
||||
/* override the default variance function to use MSE */
|
||||
v_fn_ptr.vf = vpx_mse16x16;
|
||||
v_fn_ptr.vf = vp8_mse16x16;
|
||||
|
||||
/* Set up pointers for this macro block recon buffer */
|
||||
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
|
||||
@@ -1328,6 +1329,8 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
|
||||
return Q;
|
||||
}
|
||||
|
||||
extern void vp8_new_framerate(VP8_COMP *cpi, double framerate);
|
||||
|
||||
void vp8_init_second_pass(VP8_COMP *cpi)
|
||||
{
|
||||
FIRSTPASS_STATS this_frame;
|
||||
|
@@ -9,8 +9,6 @@
|
||||
*/
|
||||
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "onyx_int.h"
|
||||
#include "mcomp.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
@@ -902,7 +900,7 @@ int vp8_hex_search
|
||||
this_offset = base_offset + (br * (pre_stride)) + bc;
|
||||
this_mv.as_mv.row = br;
|
||||
this_mv.as_mv.col = bc;
|
||||
bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride)
|
||||
bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX)
|
||||
+ mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
|
||||
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
@@ -929,7 +927,7 @@ int vp8_hex_search
|
||||
this_mv.as_mv.row = br + hex[i].row;
|
||||
this_mv.as_mv.col = bc + hex[i].col;
|
||||
this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
|
||||
CHECK_BETTER
|
||||
}
|
||||
}else
|
||||
@@ -940,7 +938,7 @@ int vp8_hex_search
|
||||
this_mv.as_mv.col = bc + hex[i].col;
|
||||
CHECK_POINT
|
||||
this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
|
||||
CHECK_BETTER
|
||||
}
|
||||
}
|
||||
@@ -966,7 +964,7 @@ int vp8_hex_search
|
||||
this_mv.as_mv.row = br + next_chkpts[k][i].row;
|
||||
this_mv.as_mv.col = bc + next_chkpts[k][i].col;
|
||||
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
|
||||
CHECK_BETTER
|
||||
}
|
||||
}else
|
||||
@@ -977,7 +975,7 @@ int vp8_hex_search
|
||||
this_mv.as_mv.col = bc + next_chkpts[k][i].col;
|
||||
CHECK_POINT
|
||||
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
|
||||
CHECK_BETTER
|
||||
}
|
||||
}
|
||||
@@ -1008,7 +1006,7 @@ cal_neighbors:
|
||||
this_mv.as_mv.row = br + neighbors[i].row;
|
||||
this_mv.as_mv.col = bc + neighbors[i].col;
|
||||
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
|
||||
CHECK_BETTER
|
||||
}
|
||||
}else
|
||||
@@ -1019,7 +1017,7 @@ cal_neighbors:
|
||||
this_mv.as_mv.col = bc + neighbors[i].col;
|
||||
CHECK_POINT
|
||||
this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
|
||||
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
|
||||
CHECK_BETTER
|
||||
}
|
||||
}
|
||||
@@ -1103,7 +1101,7 @@ int vp8_diamond_search_sad_c
|
||||
best_address = in_what;
|
||||
|
||||
/* Check the starting position */
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
|
||||
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
|
||||
|
||||
/* search_param determines the length of the initial step and hence
|
||||
@@ -1128,7 +1126,7 @@ int vp8_diamond_search_sad_c
|
||||
|
||||
{
|
||||
check_here = ss[i].offset + best_address;
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1227,7 +1225,7 @@ int vp8_diamond_search_sadx4
|
||||
best_address = in_what;
|
||||
|
||||
/* Check the starting position */
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
|
||||
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
|
||||
|
||||
/* search_param determines the length of the initial step and hence the
|
||||
@@ -1295,7 +1293,7 @@ int vp8_diamond_search_sadx4
|
||||
(this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
|
||||
{
|
||||
check_here = ss[i].offset + best_address;
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1378,7 +1376,8 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
best_mv->as_mv.col = ref_col;
|
||||
|
||||
/* Baseline value at the centre */
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
|
||||
in_what_stride, UINT_MAX)
|
||||
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
|
||||
|
||||
/* Apply further limits to prevent us looking using vectors that
|
||||
@@ -1403,7 +1402,7 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
|
||||
for (c = col_min; c < col_max; c++)
|
||||
{
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
|
||||
|
||||
this_mv.as_mv.col = c;
|
||||
thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
|
||||
@@ -1475,7 +1474,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
best_mv->as_mv.col = ref_col;
|
||||
|
||||
/* Baseline value at the centre */
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
|
||||
in_what_stride, UINT_MAX)
|
||||
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
|
||||
|
||||
/* Apply further limits to prevent us looking using vectors that stretch
|
||||
@@ -1531,7 +1531,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
|
||||
while (c < col_max)
|
||||
{
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1590,8 +1590,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
int col_min = ref_col - distance;
|
||||
int col_max = ref_col + distance;
|
||||
|
||||
// TODO(johannkoenig): check if this alignment is necessary.
|
||||
DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
|
||||
unsigned int sad_array[3];
|
||||
|
||||
int *mvsadcost[2];
|
||||
@@ -1610,7 +1609,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
best_mv->as_mv.col = ref_col;
|
||||
|
||||
/* Baseline value at the centre */
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
|
||||
bestsad = fn_ptr->sdf(what, what_stride,
|
||||
bestaddress, in_what_stride, UINT_MAX)
|
||||
+ mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
|
||||
|
||||
/* Apply further limits to prevent us looking using vectors that stretch
|
||||
@@ -1696,7 +1696,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
|
||||
while (c < col_max)
|
||||
{
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1754,7 +1754,8 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv
|
||||
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
|
||||
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
|
||||
|
||||
bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride)
|
||||
bestsad = fn_ptr->sdf(what, what_stride, best_address,
|
||||
in_what_stride, UINT_MAX)
|
||||
+ mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
|
||||
|
||||
for (i=0; i<search_range; i++)
|
||||
@@ -1770,7 +1771,7 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv
|
||||
(this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
|
||||
{
|
||||
check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1833,7 +1834,8 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
|
||||
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
|
||||
|
||||
bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride)
|
||||
bestsad = fn_ptr->sdf(what, what_stride, best_address,
|
||||
in_what_stride, UINT_MAX)
|
||||
+ mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
|
||||
|
||||
for (i=0; i<search_range; i++)
|
||||
@@ -1884,7 +1886,7 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
(this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
|
||||
{
|
||||
check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
|
@@ -10,7 +10,6 @@
|
||||
|
||||
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "modecosts.h"
|
||||
#include "onyx_int.h"
|
||||
#include "treewriter.h"
|
||||
#include "vp8/common/entropymode.h"
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user