Compare commits

..

6 Commits

Author SHA1 Message Date
Ami Fischman
28147a449a libvpx: enable building for iOS devices (armv7)
Allow output of gas syntax assembly directly from obj_int_extract

Change-Id: I33a747e87ef1c590a8766dea17f8cb2497e54591
2013-07-19 14:05:59 -07:00
Ronald S. Bultje
33149cbb4c Replace generated quant tables with static lookup tables.
This prevents possible float rounding issues between architectures.

Change-Id: I6ed260aebd49feb4cfb5596a5370c44be5f72167
2013-07-16 14:04:41 -07:00
John Koleszar
3f454060bb Fix above context pointers
In the prior code, the above context pointers used for entropy
decoding were initialized on the first frame, and not updated when
the frame size changed. The per-frame code which initializes the
contexts assumes that the contexts are contiguous, leading to an
incomplete initialization when the frame is smaller. This commit
updates the pointers so that the context is contigous whenever
the frame size changes.

Conflicts:
	vp9/common/vp9_alloccommon.c

Change-Id: I08b53e3a30c8289491212311682ff1b8028cff6c
2013-07-16 14:04:41 -07:00
Yaowu Xu
d19ed5f249 Change to extend full border only when needed
This is a short term optimization till we work out a decoder
implementation requiring no frame border extension.

Change-Id: I02d15bfde4d926b50a4e58b393d8c4062d1be70f
2013-07-16 14:04:39 -07:00
Ronald S. Bultje
a801f7a295 Increase border size from 96 to 160.
This is required because upon downscaling, if a motion vector points
partially into the UMV (e.g. all minus 1 of 64+7 pixels, i.e. 70),
then we can point up to 140 pixels into the larger-resolution (2x)
reference buffer UMV, which means the UMV for reference buffers in
downscaling needs to be 140 rounded up to the nearest multiple of 32,
i.e. 160.

Longer-term, we should probably handle the UMV differently by detecting
edge coverage on-the-fly and using a temporary buffer for edge extensions
instead of adding 160 pixels on all sides of the image (which means a
CIF image uses 3x its own area size for borders).

Change-Id: I5184443e6731cd6721fc6a5d430a53e7d91b4f7e
2013-07-16 12:41:10 -07:00
Dmitry Kovalev
e39bd6407f Fixing vp9_get_pred_context_comp_ref_p function.
Adding missed parenthesis around boolean expressions. Bitstream is changed.
Regenerating test vectors.

Conflicts:
	vp9/common/vp9_pred_common.c

Change-Id: I4cc00b761e9473f92f180a9fc3a0c607f0aaae56
2013-07-16 12:40:48 -07:00
264 changed files with 19822 additions and 31282 deletions

4
.gitignore vendored
View File

@@ -1,8 +1,6 @@
*.a
*.asm.s
*.d
*.gcno
*.gcda
*.o
*~
/*.ivf
@@ -16,7 +14,7 @@
/.install-*
/.libs
/Makefile
/config.log
/config.err
/config.mk
/decode_to_md5
/decode_to_md5.c

36
README
View File

@@ -1,7 +1,7 @@
vpx Multi-Format Codec SDK
README - 1 August 2013
README - 21 June 2012
Welcome to the WebM VP8/VP9 Codec SDK!
Welcome to the WebM VP8 Codec SDK!
COMPILING THE APPLICATIONS/LIBRARIES:
The build system used is similar to autotools. Building generally consists of
@@ -53,63 +53,33 @@ COMPILING THE APPLICATIONS/LIBRARIES:
armv5te-android-gcc
armv5te-linux-rvct
armv5te-linux-gcc
armv5te-none-rvct
armv6-darwin-gcc
armv6-linux-rvct
armv6-linux-gcc
armv6-none-rvct
armv7-android-gcc
armv7-darwin-gcc
armv7-linux-rvct
armv7-linux-gcc
armv7-none-rvct
armv7-win32-vs11
mips32-linux-gcc
ppc32-darwin8-gcc
ppc32-darwin9-gcc
ppc32-linux-gcc
ppc64-darwin8-gcc
ppc64-darwin9-gcc
ppc64-linux-gcc
sparc-solaris-gcc
x86-android-gcc
x86-darwin8-gcc
x86-darwin8-icc
x86-darwin9-gcc
x86-darwin9-icc
x86-darwin10-gcc
x86-darwin11-gcc
x86-darwin12-gcc
x86-darwin13-gcc
x86-linux-gcc
x86-linux-icc
x86-os2-gcc
x86-solaris-gcc
x86-win32-gcc
x86-win32-vs7
x86-win32-vs8
x86-win32-vs9
x86-win32-vs10
x86-win32-vs11
x86_64-darwin9-gcc
x86_64-darwin10-gcc
x86_64-darwin11-gcc
x86_64-darwin12-gcc
x86_64-darwin13-gcc
x86_64-linux-gcc
x86_64-linux-icc
x86_64-solaris-gcc
x86_64-win64-gcc
x86_64-win64-vs8
x86_64-win64-vs9
x86_64-win64-vs10
x86_64-win64-vs11
universal-darwin8-gcc
universal-darwin9-gcc
universal-darwin10-gcc
universal-darwin11-gcc
universal-darwin12-gcc
universal-darwin13-gcc
generic-gnu
The generic-gnu target, in conjunction with the CROSS environment variable,
@@ -127,7 +97,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
5. Configuration errors
If the configuration step fails, the first step is to look in the error log.
This defaults to config.log. This should give a good indication of what went
This defaults to config.err. This should give a good indication of what went
wrong. If not, contact us for support.
SUPPORT

View File

@@ -7,7 +7,18 @@ REM in the file PATENTS. All contributing project authors may
REM be found in the AUTHORS file in the root of the source tree.
echo on
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp9/common/vp9_asm_com_offsets.c"
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp9/decoder/vp9_asm_dec_offsets.c"
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp9/encoder/vp9_asm_enc_offsets.c"
obj_int_extract.exe rvds "vp9_asm_com_offsets.obj" > "vp9_asm_com_offsets.asm"
obj_int_extract.exe rvds "vp9_asm_dec_offsets.obj" > "vp9_asm_dec_offsets.asm"
obj_int_extract.exe rvds "vp9_asm_enc_offsets.obj" > "vp9_asm_enc_offsets.asm"
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp8/common/vp8_asm_com_offsets.c"
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp8/decoder/vp8_asm_dec_offsets.c"
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp8/encoder/vp8_asm_enc_offsets.c"
obj_int_extract.exe rvds "vp8_asm_com_offsets.obj" > "vp8_asm_com_offsets.asm"
obj_int_extract.exe rvds "vp8_asm_dec_offsets.obj" > "vp8_asm_dec_offsets.asm"
obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vpx_scale/vpx_scale_asm_offsets.c"

View File

@@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash
##
## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
##
@@ -13,20 +13,20 @@
verbose=0
set -- $*
for i; do
if [ "$i" = "-o" ]; then
if [ "$i" == "-o" ]; then
on_of=1
elif [ "$i" = "-v" ]; then
elif [ "$i" == "-v" ]; then
verbose=1
elif [ "$i" = "-g" ]; then
elif [ "$i" == "-g" ]; then
args="${args} --debug"
elif [ "$on_of" = "1" ]; then
elif [ "$on_of" == "1" ]; then
outfile=$i
on_of=0
elif [ -f "$i" ]; then
infiles="$infiles $i"
elif [ "${i#-l}" != "$i" ]; then
elif [ "${i:0:2}" == "-l" ]; then
libs="$libs ${i#-l}"
elif [ "${i#-L}" != "$i" ]; then
elif [ "${i:0:2}" == "-L" ]; then
libpaths="${libpaths} ${i#-L}"
else
args="${args} ${i}"

View File

@@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash
##
## configure.sh
##
@@ -75,7 +75,7 @@ Options:
Build options:
--help print this message
--log=yes|no|FILE file configure log is written to [config.log]
--log=yes|no|FILE file configure log is written to [config.err]
--target=TARGET target platform tuple [generic-gnu]
--cpu=CPU optimize for a specific cpu rather than a family
--extra-cflags=ECFLAGS add ECFLAGS to CFLAGS [$CFLAGS]
@@ -198,11 +198,11 @@ add_extralibs() {
#
# Boolean Manipulation Functions
#
enable_feature(){
enable(){
set_all yes $*
}
disable_feature(){
disable(){
set_all no $*
}
@@ -219,7 +219,7 @@ soft_enable() {
for var in $*; do
if ! disabled $var; then
log_echo " enabling $var"
enable_feature $var
enable $var
fi
done
}
@@ -228,7 +228,7 @@ soft_disable() {
for var in $*; do
if ! enabled $var; then
log_echo " disabling $var"
disable_feature $var
disable $var
fi
done
}
@@ -251,10 +251,10 @@ tolower(){
# Temporary File Functions
#
source_path=${0%/*}
enable_feature source_path_used
enable source_path_used
if test -z "$source_path" -o "$source_path" = "." ; then
source_path="`pwd`"
disable_feature source_path_used
disable source_path_used
fi
if test ! -z "$TMPDIR" ; then
@@ -264,13 +264,12 @@ elif test ! -z "$TEMPDIR" ; then
else
TMPDIRx="/tmp"
fi
RAND=$(awk 'BEGIN { srand(); printf "%d\n",(rand() * 32768)}')
TMP_H="${TMPDIRx}/vpx-conf-$$-${RAND}.h"
TMP_C="${TMPDIRx}/vpx-conf-$$-${RAND}.c"
TMP_CC="${TMPDIRx}/vpx-conf-$$-${RAND}.cc"
TMP_O="${TMPDIRx}/vpx-conf-$$-${RAND}.o"
TMP_X="${TMPDIRx}/vpx-conf-$$-${RAND}.x"
TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RAND}.asm"
TMP_H="${TMPDIRx}/vpx-conf-$$-${RANDOM}.h"
TMP_C="${TMPDIRx}/vpx-conf-$$-${RANDOM}.c"
TMP_CC="${TMPDIRx}/vpx-conf-$$-${RANDOM}.cc"
TMP_O="${TMPDIRx}/vpx-conf-$$-${RANDOM}.o"
TMP_X="${TMPDIRx}/vpx-conf-$$-${RANDOM}.x"
TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RANDOM}.asm"
clean_temp_files() {
rm -f ${TMP_C} ${TMP_CC} ${TMP_H} ${TMP_O} ${TMP_X} ${TMP_ASM}
@@ -317,8 +316,8 @@ check_header(){
header=$1
shift
var=`echo $header | sed 's/[^A-Za-z0-9_]/_/g'`
disable_feature $var
check_cpp "$@" <<EOF && enable_feature $var
disable $var
check_cpp "$@" <<EOF && enable $var
#include "$header"
int x;
EOF
@@ -480,7 +479,7 @@ process_common_cmdline() {
for opt in "$@"; do
optval="${opt#*=}"
case "$opt" in
--child) enable_feature child
--child) enable child
;;
--log*)
logging="$optval"
@@ -492,7 +491,7 @@ process_common_cmdline() {
;;
--target=*) toolchain="${toolchain:-${optval}}"
;;
--force-target=*) toolchain="${toolchain:-${optval}}"; enable_feature force_toolchain
--force-target=*) toolchain="${toolchain:-${optval}}"; enable force_toolchain
;;
--cpu)
;;
@@ -512,7 +511,7 @@ process_common_cmdline() {
echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null ||
die_unknown $opt
fi
${action}_feature $option
$action $option
;;
--require-?*)
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
@@ -524,11 +523,11 @@ process_common_cmdline() {
;;
--force-enable-?*|--force-disable-?*)
eval `echo "$opt" | sed 's/--force-/action=/;s/-/ option=/;s/-/_/g'`
${action}_feature $option
$action $option
;;
--libc=*)
[ -d "${optval}" ] || die "Not a directory: ${optval}"
disable_feature builtin_libc
disable builtin_libc
alt_libc="${optval}"
;;
--as=*)
@@ -654,10 +653,6 @@ process_common_toolchain() {
tgt_isa=x86_64
tgt_os=darwin12
;;
*darwin13*)
tgt_isa=x86_64
tgt_os=darwin13
;;
x86_64*mingw32*)
tgt_os=win64
;;
@@ -697,13 +692,13 @@ process_common_toolchain() {
# Mark the specific ISA requested as enabled
soft_enable ${tgt_isa}
enable_feature ${tgt_os}
enable_feature ${tgt_cc}
enable ${tgt_os}
enable ${tgt_cc}
# Enable the architecture family
case ${tgt_isa} in
arm*) enable_feature arm;;
mips*) enable_feature mips;;
arm*) enable arm;;
mips*) enable mips;;
esac
# PIC is probably what we want when building shared libs
@@ -756,17 +751,13 @@ process_common_toolchain() {
add_cflags "-mmacosx-version-min=10.8"
add_ldflags "-mmacosx-version-min=10.8"
;;
*-darwin13-*)
add_cflags "-mmacosx-version-min=10.9"
add_ldflags "-mmacosx-version-min=10.9"
;;
esac
# Handle Solaris variants. Solaris 10 needs -lposix4
case ${toolchain} in
sparc-solaris-*)
add_extralibs -lposix4
disable_feature fast_unaligned
disable fast_unaligned
;;
*-solaris-*)
add_extralibs -lposix4
@@ -791,7 +782,7 @@ process_common_toolchain() {
;;
armv5te)
soft_enable edsp
disable_feature fast_unaligned
disable fast_unaligned
;;
esac
@@ -806,7 +797,7 @@ process_common_toolchain() {
arch_int=${arch_int%%te}
check_add_asflags --defsym ARCHITECTURE=${arch_int}
tune_cflags="-mtune="
if [ ${tgt_isa} = "armv7" ]; then
if [ ${tgt_isa} == "armv7" ]; then
if [ -z "${float_abi}" ]; then
check_cpp <<EOF && float_abi=hard || float_abi=softfp
#ifndef __ARM_PCS_VFP
@@ -843,8 +834,8 @@ EOF
asm_conversion_cmd="${source_path}/build/make/ads2armasm_ms.pl"
AS_SFX=.s
msvs_arch_dir=arm-msvs
disable_feature multithread
disable_feature unit_tests
disable multithread
disable unit_tests
;;
rvct)
CC=armcc
@@ -856,7 +847,7 @@ EOF
tune_cflags="--cpu="
tune_asflags="--cpu="
if [ -z "${tune_cpu}" ]; then
if [ ${tgt_isa} = "armv7" ]; then
if [ ${tgt_isa} == "armv7" ]; then
if enabled neon
then
check_add_cflags --fpu=softvfp+vfpv3
@@ -881,8 +872,8 @@ EOF
case ${tgt_os} in
none*)
disable_feature multithread
disable_feature os_support
disable multithread
disable os_support
;;
android*)
@@ -914,9 +905,9 @@ EOF
# Cortex-A8 implementations (NDK Dev Guide)
add_ldflags "-Wl,--fix-cortex-a8"
enable_feature pic
enable pic
soft_enable realtime_only
if [ ${tgt_isa} = "armv7" ]; then
if [ ${tgt_isa} == "armv7" ]; then
soft_enable runtime_cpu_detect
fi
if enabled runtime_cpu_detect; then
@@ -970,7 +961,7 @@ EOF
;;
linux*)
enable_feature linux
enable linux
if enabled rvct; then
# Check if we have CodeSourcery GCC in PATH. Needed for
# libraries
@@ -1001,14 +992,14 @@ EOF
tune_cflags="-mtune="
if enabled dspr2; then
check_add_cflags -mips32r2 -mdspr2
disable_feature fast_unaligned
disable fast_unaligned
fi
check_add_cflags -march=${tgt_isa}
check_add_asflags -march=${tgt_isa}
check_add_asflags -KPIC
;;
ppc*)
enable_feature ppc
enable ppc
bits=${tgt_isa##ppc}
link_with_cc=gcc
setup_gnu_toolchain
@@ -1156,7 +1147,7 @@ EOF
;;
universal*|*-gcc|generic-gnu)
link_with_cc=gcc
enable_feature gcc
enable gcc
setup_gnu_toolchain
;;
esac
@@ -1190,12 +1181,6 @@ EOF
fi
fi
# default use_x86inc to yes if pic is no or 64bit or we are not on darwin
echo " checking here for x86inc \"${tgt_isa}\" \"$pic\" "
if [ ${tgt_isa} = x86_64 -o ! "$pic" = "yes" -o "${tgt_os#darwin}" = "${tgt_os}" ]; then
soft_enable use_x86inc
fi
# Position Independent Code (PIC) support, for building relocatable
# shared objects
enabled gcc && enabled pic && check_add_cflags -fPIC
@@ -1205,14 +1190,14 @@ EOF
enabled linux && check_add_cflags -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0
# Check for strip utility variant
${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable_feature gnu_strip
${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable gnu_strip
# Try to determine target endianness
check_cc <<EOF
unsigned int e = 'O'<<24 | '2'<<16 | 'B'<<8 | 'E';
EOF
[ -f "${TMP_O}" ] && od -A n -t x1 "${TMP_O}" | tr -d '\n' |
grep '4f *32 *42 *45' >/dev/null 2>&1 && enable_feature big_endian
grep '4f *32 *42 *45' >/dev/null 2>&1 && enable big_endian
# Try to find which inline keywords are supported
check_cc <<EOF && INLINE="inline"
@@ -1237,7 +1222,7 @@ EOF
if enabled dspr2; then
if enabled big_endian; then
echo "dspr2 optimizations are available only for little endian platforms"
disable_feature dspr2
disable dspr2
fi
fi
;;
@@ -1288,8 +1273,8 @@ print_config_h() {
print_webm_license() {
local destination=$1
local prefix="$2"
local suffix="$3"
local prefix=$2
local suffix=$3
shift 3
cat <<EOF > ${destination}
${prefix} Copyright (c) 2011 The WebM project authors. All Rights Reserved.${suffix}
@@ -1310,8 +1295,8 @@ process_detect() {
true;
}
enable_feature logging
logfile="config.log"
enable logging
logfile="config.err"
self=$0
process() {
cmdline_args="$@"

View File

@@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash
##
## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
##

View File

@@ -381,7 +381,7 @@ generate_vcproj() {
RuntimeLibrary="$debug_runtime" \
UsePrecompiledHeader="0" \
WarningLevel="3" \
DebugInformationFormat="2" \
DebugInformationFormat="1" \
$warn_64bit \
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="true"
@@ -395,7 +395,7 @@ generate_vcproj() {
RuntimeLibrary="$debug_runtime" \
UsePrecompiledHeader="0" \
WarningLevel="3" \
DebugInformationFormat="2" \
DebugInformationFormat="1" \
$warn_64bit \
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="true"

View File

@@ -72,21 +72,10 @@ parse_project() {
eval "${var}_name=$name"
eval "${var}_guid=$guid"
if [ "$sfx" = "vcproj" ]; then
cur_config_list=`grep -A1 '<Configuration' $file |
grep Name | cut -d\" -f2`
else
cur_config_list=`grep -B1 'Label="Configuration"' $file |
grep Condition | cut -d\' -f4`
fi
new_config_list=$(for i in $config_list $cur_config_list; do
echo $i
done | sort | uniq)
if [ "$config_list" != "" ] && [ "$config_list" != "$new_config_list" ]; then
mixed_platforms=1
fi
config_list="$new_config_list"
eval "${var}_config_list=\"$cur_config_list\""
# assume that all projects have the same list of possible configurations,
# so overwriting old config_lists is not a problem
config_list=`grep -A1 '<Configuration' $file |
grep Name | cut -d\" -f2`
proj_list="${proj_list} ${var}"
}
@@ -136,11 +125,6 @@ process_global() {
indent_push
IFS_bak=${IFS}
IFS=$'\r'$'\n'
if [ "$mixed_platforms" != "" ]; then
config_list="
Release|Mixed Platforms
Debug|Mixed Platforms"
fi
for config in ${config_list}; do
echo "${indent}$config = $config"
done
@@ -155,17 +139,10 @@ Debug|Mixed Platforms"
indent_push
for proj in ${proj_list}; do
eval "local proj_guid=\${${proj}_guid}"
eval "local proj_config_list=\${${proj}_config_list}"
IFS=$'\r'$'\n'
for config in ${proj_config_list}; do
if [ "$mixed_platforms" != "" ]; then
local c=${config%%|*}
echo "${indent}${proj_guid}.${c}|Mixed Platforms.ActiveCfg = ${config}"
echo "${indent}${proj_guid}.${c}|Mixed Platforms.Build.0 = ${config}"
else
echo "${indent}${proj_guid}.${config}.ActiveCfg = ${config}"
echo "${indent}${proj_guid}.${config}.Build.0 = ${config}"
fi
for config in ${config_list}; do
echo "${indent}${proj_guid}.${config}.ActiveCfg = ${config}"
echo "${indent}${proj_guid}.${config}.Build.0 = ${config}"
done
IFS=${IFS_bak}
@@ -191,14 +168,9 @@ process_makefile() {
IFS=$'\r'$'\n'
local TAB=$'\t'
cat <<EOF
ifeq (\$(CONFIG_VS_VERSION),7)
MSBUILD_TOOL := devenv.com
else
MSBUILD_TOOL := msbuild.exe
endif
found_devenv := \$(shell which \$(MSBUILD_TOOL) >/dev/null 2>&1 && echo yes)
found_devenv := \$(shell which devenv.com >/dev/null 2>&1 && echo yes)
.nodevenv.once:
${TAB}@echo " * \$(MSBUILD_TOOL) not found in path."
${TAB}@echo " * devenv.com not found in path."
${TAB}@echo " * "
${TAB}@echo " * You will have to build all configurations manually using the"
${TAB}@echo " * Visual Studio IDE. To allow make to build them automatically,"
@@ -223,17 +195,16 @@ ${TAB}rm -rf "$platform"/"$config"
ifneq (\$(found_devenv),)
ifeq (\$(CONFIG_VS_VERSION),7)
$nows_sln_config: $outfile
${TAB}\$(MSBUILD_TOOL) $outfile -build "$config"
${TAB}devenv.com $outfile -build "$config"
else
$nows_sln_config: $outfile
${TAB}\$(MSBUILD_TOOL) $outfile -m -t:Build \\
${TAB}${TAB}-p:Configuration="$config" -p:Platform="$platform"
${TAB}devenv.com $outfile -build "$sln_config"
endif
else
$nows_sln_config: $outfile .nodevenv.once
${TAB}@echo " * Skipping build of $sln_config (\$(MSBUILD_TOOL) not in path)."
${TAB}@echo " * Skipping build of $sln_config (devenv.com not in path)."
${TAB}@echo " * "
endif

View File

@@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash
##
## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
##

View File

@@ -7,6 +7,17 @@ REM in the file PATENTS. All contributing project authors may
REM be found in the AUTHORS file in the root of the source tree.
echo on
cl /I "./" /I "%1" /nologo /c "%1/vp9/common/vp9_asm_com_offsets.c"
cl /I "./" /I "%1" /nologo /c "%1/vp9/decoder/vp9_asm_dec_offsets.c"
cl /I "./" /I "%1" /nologo /c "%1/vp9/encoder/vp9_asm_enc_offsets.c"
obj_int_extract.exe rvds "vp9_asm_com_offsets.obj" > "vp9_asm_com_offsets.asm"
obj_int_extract.exe rvds "vp9_asm_dec_offsets.obj" > "vp9_asm_dec_offsets.asm"
obj_int_extract.exe rvds "vp9_asm_enc_offsets.obj" > "vp9_asm_enc_offsets.asm"
cl /I "./" /I "%1" /nologo /c "%1/vp8/common/vp8_asm_com_offsets.c"
cl /I "./" /I "%1" /nologo /c "%1/vp8/decoder/vp8_asm_dec_offsets.c"
cl /I "./" /I "%1" /nologo /c "%1/vp8/encoder/vp8_asm_enc_offsets.c"
obj_int_extract.exe rvds "vp8_asm_com_offsets.obj" > "vp8_asm_com_offsets.asm"
obj_int_extract.exe rvds "vp8_asm_dec_offsets.obj" > "vp8_asm_dec_offsets.asm"
obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"

99
configure vendored
View File

@@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash
##
## configure
##
@@ -38,7 +38,6 @@ Advanced options:
${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders)
${toggle_mem_tracker} track memory usage
${toggle_postproc} postprocessing
${toggle_vp9_postproc} vp9 specific postprocessing
${toggle_multithread} multithreaded encoding and decoding
${toggle_spatial_resampling} spatial sampling (scaling) support
${toggle_realtime_only} enable this option while building for real-time encoding
@@ -116,7 +115,6 @@ all_platforms="${all_platforms} x86-darwin9-icc"
all_platforms="${all_platforms} x86-darwin10-gcc"
all_platforms="${all_platforms} x86-darwin11-gcc"
all_platforms="${all_platforms} x86-darwin12-gcc"
all_platforms="${all_platforms} x86-darwin13-gcc"
all_platforms="${all_platforms} x86-linux-gcc"
all_platforms="${all_platforms} x86-linux-icc"
all_platforms="${all_platforms} x86-os2-gcc"
@@ -131,7 +129,6 @@ all_platforms="${all_platforms} x86_64-darwin9-gcc"
all_platforms="${all_platforms} x86_64-darwin10-gcc"
all_platforms="${all_platforms} x86_64-darwin11-gcc"
all_platforms="${all_platforms} x86_64-darwin12-gcc"
all_platforms="${all_platforms} x86_64-darwin13-gcc"
all_platforms="${all_platforms} x86_64-linux-gcc"
all_platforms="${all_platforms} x86_64-linux-icc"
all_platforms="${all_platforms} x86_64-solaris-gcc"
@@ -145,7 +142,6 @@ all_platforms="${all_platforms} universal-darwin9-gcc"
all_platforms="${all_platforms} universal-darwin10-gcc"
all_platforms="${all_platforms} universal-darwin11-gcc"
all_platforms="${all_platforms} universal-darwin12-gcc"
all_platforms="${all_platforms} universal-darwin13-gcc"
all_platforms="${all_platforms} generic-gnu"
# all_targets is a list of all targets that can be configured
@@ -154,7 +150,7 @@ all_targets="libs examples docs"
# all targets available are enabled, by default.
for t in ${all_targets}; do
[ -f ${source_path}/${t}.mk ] && enable_feature ${t}
[ -f ${source_path}/${t}.mk ] && enable ${t}
done
# check installed doxygen version
@@ -165,30 +161,30 @@ if [ ${doxy_major:-0} -ge 1 ]; then
doxy_minor=${doxy_version%%.*}
doxy_patch=${doxy_version##*.}
[ $doxy_major -gt 1 ] && enable_feature doxygen
[ $doxy_minor -gt 5 ] && enable_feature doxygen
[ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable_feature doxygen
[ $doxy_major -gt 1 ] && enable doxygen
[ $doxy_minor -gt 5 ] && enable doxygen
[ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable doxygen
fi
# install everything except the sources, by default. sources will have
# to be enabled when doing dist builds, since that's no longer a common
# case.
enabled doxygen && php -v >/dev/null 2>&1 && enable_feature install_docs
enable_feature install_bins
enable_feature install_libs
enabled doxygen && php -v >/dev/null 2>&1 && enable install_docs
enable install_bins
enable install_libs
enable_feature static
enable_feature optimizations
enable_feature fast_unaligned #allow unaligned accesses, if supported by hw
enable_feature md5
enable_feature spatial_resampling
enable_feature multithread
enable_feature os_support
enable_feature temporal_denoising
enable static
enable optimizations
enable fast_unaligned #allow unaligned accesses, if supported by hw
enable md5
enable spatial_resampling
enable multithread
enable os_support
enable temporal_denoising
[ -d ${source_path}/../include ] && enable_feature alt_tree_layout
[ -d ${source_path}/../include ] && enable alt_tree_layout
for d in vp8 vp9; do
[ -d ${source_path}/${d} ] && disable_feature alt_tree_layout;
[ -d ${source_path}/${d} ] && disable alt_tree_layout;
done
if ! enabled alt_tree_layout; then
@@ -201,10 +197,10 @@ else
[ -f ${source_path}/../include/vpx/vp8dx.h ] && CODECS="${CODECS} vp8_decoder"
[ -f ${source_path}/../include/vpx/vp9cx.h ] && CODECS="${CODECS} vp9_encoder"
[ -f ${source_path}/../include/vpx/vp9dx.h ] && CODECS="${CODECS} vp9_decoder"
[ -f ${source_path}/../include/vpx/vp8cx.h ] || disable_feature vp8_encoder
[ -f ${source_path}/../include/vpx/vp8dx.h ] || disable_feature vp8_decoder
[ -f ${source_path}/../include/vpx/vp9cx.h ] || disable_feature vp9_encoder
[ -f ${source_path}/../include/vpx/vp9dx.h ] || disable_feature vp9_decoder
[ -f ${source_path}/../include/vpx/vp8cx.h ] || disable vp8_encoder
[ -f ${source_path}/../include/vpx/vp8dx.h ] || disable vp8_decoder
[ -f ${source_path}/../include/vpx/vp9cx.h ] || disable vp9_encoder
[ -f ${source_path}/../include/vpx/vp9dx.h ] || disable vp9_decoder
[ -f ${source_path}/../lib/*/*mt.lib ] && soft_enable static_msvcrt
fi
@@ -251,6 +247,7 @@ EXPERIMENT_LIST="
multiple_arf
non420
alpha
balanced_coeftree
"
CONFIG_LIST="
external_build
@@ -258,7 +255,6 @@ CONFIG_LIST="
install_bins
install_libs
install_srcs
use_x86inc
debug
gprof
gcov
@@ -280,7 +276,6 @@ CONFIG_LIST="
dc_recon
runtime_cpu_detect
postproc
vp9_postproc
multithread
internal_stats
${CODECS}
@@ -316,7 +311,6 @@ CMDLINE_SELECT="
gprof
gcov
pic
use_x86inc
optimizations
ccache
runtime_cpu_detect
@@ -335,7 +329,6 @@ CMDLINE_SELECT="
dequant_tokens
dc_recon
postproc
vp9_postproc
multithread
internal_stats
${CODECS}
@@ -361,12 +354,12 @@ process_cmdline() {
for opt do
optval="${opt#*=}"
case "$opt" in
--disable-codecs) for c in ${CODECS}; do disable_feature $c; done ;;
--disable-codecs) for c in ${CODECS}; do disable $c; done ;;
--enable-?*|--disable-?*)
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
if echo "${EXPERIMENT_LIST}" | grep "^ *$option\$" >/dev/null; then
if enabled experimental; then
${action}_feature $option
$action $option
else
log_echo "Ignoring $opt -- not in experimental mode."
fi
@@ -387,8 +380,8 @@ post_process_cmdline() {
# If the codec family is enabled, enable all components of that family.
log_echo "Configuring selected codecs"
for c in ${CODECS}; do
disabled ${c%%_*} && disable_feature ${c}
enabled ${c%%_*} && enable_feature ${c}
disabled ${c%%_*} && disable ${c}
enabled ${c%%_*} && enable ${c}
done
# Enable all detected codecs, if they haven't been disabled
@@ -396,12 +389,12 @@ post_process_cmdline() {
# Enable the codec family if any component of that family is enabled
for c in ${CODECS}; do
enabled $c && enable_feature ${c%_*}
enabled $c && enable ${c%_*}
done
# Set the {en,de}coders variable if any algorithm in that class is enabled
for c in ${CODECS}; do
enabled ${c} && enable_feature ${c##*_}s
enabled ${c} && enable ${c##*_}s
done
}
@@ -441,7 +434,7 @@ process_targets() {
done
enabled debug_libs && DIST_DIR="${DIST_DIR}-debug"
enabled codec_srcs && DIST_DIR="${DIST_DIR}-src"
! enabled postproc && ! enabled vp9_postproc && DIST_DIR="${DIST_DIR}-nopost"
! enabled postproc && DIST_DIR="${DIST_DIR}-nopost"
! enabled multithread && DIST_DIR="${DIST_DIR}-nomt"
! enabled install_docs && DIST_DIR="${DIST_DIR}-nodocs"
DIST_DIR="${DIST_DIR}-${tgt_isa}-${tgt_os}"
@@ -511,13 +504,13 @@ process_detect() {
fi
if [ -z "$CC" ] || enabled external_build; then
echo "Bypassing toolchain for environment detection."
enable_feature external_build
enable external_build
check_header() {
log fake_check_header "$@"
header=$1
shift
var=`echo $header | sed 's/[^A-Za-z0-9_]/_/g'`
disable_feature $var
disable $var
# Headers common to all environments
case $header in
stdio.h)
@@ -529,7 +522,7 @@ process_detect() {
[ -f "${d##-I}/$header" ] && result=true && break
done
${result:-true}
esac && enable_feature $var
esac && enable $var
# Specialize windows and POSIX environments.
case $toolchain in
@@ -537,7 +530,7 @@ process_detect() {
case $header-$toolchain in
stdint*-gcc) true;;
*) false;;
esac && enable_feature $var
esac && enable $var
;;
*)
case $header in
@@ -546,7 +539,7 @@ process_detect() {
sys/mman.h) true;;
unistd.h) true;;
*) false;;
esac && enable_feature $var
esac && enable $var
esac
enabled $var
}
@@ -564,7 +557,7 @@ EOF
check_header sys/mman.h
check_header unistd.h # for sysconf(3) and friends.
check_header vpx/vpx_integer.h -I${source_path} && enable_feature vpx_ports
check_header vpx/vpx_integer.h -I${source_path} && enable vpx_ports
}
process_toolchain() {
@@ -646,18 +639,14 @@ process_toolchain() {
# ccache only really works on gcc toolchains
enabled gcc || soft_disable ccache
if enabled mips; then
enable_feature dequant_tokens
enable_feature dc_recon
fi
if enabled internal_stats; then
enable_feature vp9_postproc
enable dequant_tokens
enable dc_recon
fi
# Enable the postbuild target if building for visual studio.
case "$tgt_cc" in
vs*) enable_feature msvs
enable_feature solution
vs*) enable msvs
enable solution
vs_version=${tgt_cc##vs}
case $vs_version in
[789])
@@ -693,14 +682,6 @@ process_toolchain() {
# iOS/ARM builds do not work with gtest. This does not match
# x86 targets.
;;
*-win*)
# Some mingw toolchains don't have pthread available by default.
# Treat these more like visual studio where threading in gtest
# would be disabled for the same reason.
check_cxx "$@" <<EOF && soft_enable unit_tests
int z;
EOF
;;
*)
enabled pthread_h && check_cxx "$@" <<EOF && soft_enable unit_tests
int z;

View File

@@ -49,9 +49,6 @@ vpxenc.DESCRIPTION = Full featured encoder
UTILS-$(CONFIG_VP8_ENCODER) += vp8_scalable_patterns.c
vp8_scalable_patterns.GUID = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C
vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder
UTILS-$(CONFIG_VP8_ENCODER) += vp9_spatial_scalable_encoder.c
vp8_scalable_patterns.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D
vp8_scalable_patterns.DESCRIPTION = Spatial Scalable Encoder
# Clean up old ivfenc, ivfdec binaries.
ifeq ($(CONFIG_MSVS),yes)

18
libs.mk
View File

@@ -57,13 +57,6 @@ CLEAN-OBJS += $$(BUILD_PFX)$(1).h
RTCD += $$(BUILD_PFX)$(1).h
endef
# x86inc.asm is not compatible with pic 32bit builds. Restrict
# files which use it to 64bit builds or 32bit without pic
USE_X86INC = no
ifeq ($(CONFIG_USE_X86INC),yes)
USE_X86INC = yes
endif
CODEC_SRCS-yes += CHANGELOG
CODEC_SRCS-yes += libs.mk
@@ -390,11 +383,6 @@ LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
$(call enabled,LIBVPX_TEST_DATA))
libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)
libvpx_test_srcs.txt:
@echo " [CREATE] $@"
@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | sort -u > $@
CLEAN-OBJS += libvpx_test_srcs.txt
$(LIBVPX_TEST_DATA):
@echo " [DOWNLOAD] $@"
$(qexec)trap 'rm -f $@' INT TERM &&\
@@ -455,10 +443,6 @@ else
include $(SRC_PATH_BARE)/third_party/googletest/gtest.mk
GTEST_SRCS := $(addprefix third_party/googletest/src/,$(call enabled,GTEST_SRCS))
GTEST_OBJS=$(call objs,$(GTEST_SRCS))
ifeq ($(filter win%,$(TGT_OS)),$(TGT_OS))
# Disabling pthreads globally will cause issues on darwin and possibly elsewhere
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -DGTEST_HAS_PTHREAD=0
endif
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS)
@@ -483,7 +467,7 @@ $(foreach bin,$(LIBVPX_TEST_BINS),\
lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\
$(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\
$(LIBVPX_TEST_OBJS) \
-L. -lvpx -lgtest $(extralibs) -lm)\
-L. -lvpx -lgtest -lpthread -lm)\
)))\
$(if $(LIPO_LIBS),$(eval $(call lipo_bin_template,$(bin))))\

View File

@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef TEST_ACM_RANDOM_H_
#define TEST_ACM_RANDOM_H_
#ifndef LIBVPX_TEST_ACM_RANDOM_H_
#define LIBVPX_TEST_ACM_RANDOM_H_
#include "third_party/googletest/src/include/gtest/gtest.h"
@@ -59,4 +59,4 @@ class ACMRandom {
} // namespace libvpx_test
#endif // TEST_ACM_RANDOM_H_
#endif // LIBVPX_TEST_ACM_RANDOM_H_

View File

@@ -33,6 +33,10 @@ class AltRefTest : public ::libvpx_test::EncoderTest,
altref_count_ = 0;
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
libvpx_test::Encoder *encoder) {
if (video->frame() == 1) {

View File

@@ -27,10 +27,14 @@ class BordersTest : public ::libvpx_test::EncoderTest,
SetMode(GET_PARAM(1));
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
if (video->frame() == 1) {
encoder->Control(VP8E_SET_CPUUSED, 1);
if ( video->frame() == 1) {
encoder->Control(VP8E_SET_CPUUSED, 0);
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);

View File

@@ -10,7 +10,7 @@
#ifndef TEST_CLEAR_SYSTEM_STATE_H_
#define TEST_CLEAR_SYSTEM_STATE_H_
#include "./vpx_config.h"
#include "vpx_config.h"
extern "C" {
#if ARCH_X86 || ARCH_X86_64
# include "vpx_ports/x86.h"

View File

@@ -134,14 +134,14 @@ class VP8CodecFactory : public CodecFactory {
const libvpx_test::VP8CodecFactory kVP8;
#define VP8_INSTANTIATE_TEST_CASE(test, ...)\
#define VP8_INSTANTIATE_TEST_CASE(test, params)\
INSTANTIATE_TEST_CASE_P(VP8, test, \
::testing::Combine( \
::testing::Values(static_cast<const libvpx_test::CodecFactory*>( \
&libvpx_test::kVP8)), \
__VA_ARGS__))
params))
#else
#define VP8_INSTANTIATE_TEST_CASE(test, ...)
#define VP8_INSTANTIATE_TEST_CASE(test, params)
#endif // CONFIG_VP8
@@ -216,14 +216,14 @@ class VP9CodecFactory : public CodecFactory {
const libvpx_test::VP9CodecFactory kVP9;
#define VP9_INSTANTIATE_TEST_CASE(test, ...)\
#define VP9_INSTANTIATE_TEST_CASE(test, params)\
INSTANTIATE_TEST_CASE_P(VP9, test, \
::testing::Combine( \
::testing::Values(static_cast<const libvpx_test::CodecFactory*>( \
&libvpx_test::kVP9)), \
__VA_ARGS__))
params))
#else
#define VP9_INSTANTIATE_TEST_CASE(test, ...)
#define VP9_INSTANTIATE_TEST_CASE(test, params)
#endif // CONFIG_VP9

View File

@@ -40,6 +40,10 @@ class ConfigTest : public ::libvpx_test::EncoderTest,
++frame_count_out_;
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
unsigned int frame_count_in_;
unsigned int frame_count_out_;
unsigned int frame_count_max_;

View File

@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <string.h>
#include "test/acm_random.h"
#include "test/register_state_check.h"
#include "test/util.h"
@@ -23,8 +22,8 @@ extern "C" {
}
namespace {
typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
typedef void (*convolve_fn_t)(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int filter_x_stride,
const int16_t *filter_y, int filter_y_stride,
int w, int h);
@@ -188,7 +187,7 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
protected:
static const int kDataAlignment = 16;
static const int kOuterBlockSize = 256;
static const int kOuterBlockSize = 128;
static const int kInputStride = kOuterBlockSize;
static const int kOutputStride = kOuterBlockSize;
static const int kMaxDimension = 64;
@@ -212,7 +211,7 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
virtual void SetUp() {
UUT_ = GET_PARAM(2);
/* Set up guard blocks for an inner block centered in the outer block */
/* Set up guard blocks for an inner block cetered in the outer block */
for (int i = 0; i < kOutputBufferSize; ++i) {
if (IsIndexInBorder(i))
output_[i] = 255;
@@ -225,10 +224,6 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
input_[i] = prng.Rand8Extremes();
}
void SetConstantInput(int value) {
memset(input_, value, kInputBufferSize);
}
void CheckGuardBlocks() {
for (int i = 0; i < kOutputBufferSize; ++i) {
if (IsIndexInBorder(i))
@@ -461,86 +456,45 @@ DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = {
{ 128}
};
/* This test exercises the horizontal and vertical filter functions. */
TEST_P(ConvolveTest, ChangeFilterWorks) {
uint8_t* const in = input();
uint8_t* const out = output();
/* Assume that the first input sample is at the 8/16th position. */
const int kInitialSubPelOffset = 8;
/* Filters are 8-tap, so the first filter tap will be applied to the pixel
* at position -3 with respect to the current filtering position. Since
* kInitialSubPelOffset is set to 8, we first select sub-pixel filter 8,
* which is non-zero only in the last tap. So, applying the filter at the
* current input position will result in an output equal to the pixel at
* offset +4 (-3 + 7) with respect to the current filtering position.
*/
const int kPixelSelected = 4;
/* Assume that each output pixel requires us to step on by 17/16th pixels in
* the input.
*/
const int kInputPixelStep = 17;
/* The filters are setup in such a way that the expected output produces
* sets of 8 identical output samples. As the filter position moves to the
* next 1/16th pixel position the only active (=128) filter tap moves one
* position to the left, resulting in the same input pixel being replicated
* in to the output for 8 consecutive samples. After each set of 8 positions
* the filters select a different input pixel. kFilterPeriodAdjust below
* computes which input pixel is written to the output for a specified
* x or y position.
*/
/* Test the horizontal filter. */
REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride,
kChangeFilters[kInitialSubPelOffset],
kInputPixelStep, NULL, 0, Width(), Height()));
kChangeFilters[8], 17, kChangeFilters[4], 16,
Width(), Height()));
for (int x = 0; x < Width(); ++x) {
const int kQ4StepAdjust = x >> 4;
const int kFilterPeriodAdjust = (x >> 3) << 3;
const int ref_x =
kPixelSelected + ((kInitialSubPelOffset
+ kFilterPeriodAdjust * kInputPixelStep)
>> SUBPEL_BITS);
ASSERT_EQ(in[ref_x], out[x]) << "x == " << x << "width = " << Width();
const int ref_x = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;
ASSERT_EQ(in[ref_x], out[x]) << "x == " << x;
}
/* Test the vertical filter. */
REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride,
NULL, 0, kChangeFilters[kInitialSubPelOffset],
kInputPixelStep, Width(), Height()));
kChangeFilters[4], 16, kChangeFilters[8], 17,
Width(), Height()));
for (int y = 0; y < Height(); ++y) {
const int kQ4StepAdjust = y >> 4;
const int kFilterPeriodAdjust = (y >> 3) << 3;
const int ref_y =
kPixelSelected + ((kInitialSubPelOffset
+ kFilterPeriodAdjust * kInputPixelStep)
>> SUBPEL_BITS);
const int ref_y = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;
ASSERT_EQ(in[ref_y * kInputStride], out[y * kInputStride]) << "y == " << y;
}
/* Test the horizontal and vertical filters in combination. */
REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
kChangeFilters[kInitialSubPelOffset],
kInputPixelStep,
kChangeFilters[kInitialSubPelOffset],
kInputPixelStep,
kChangeFilters[8], 17, kChangeFilters[8], 17,
Width(), Height()));
for (int y = 0; y < Height(); ++y) {
const int kQ4StepAdjustY = y >> 4;
const int kFilterPeriodAdjustY = (y >> 3) << 3;
const int ref_y =
kPixelSelected + ((kInitialSubPelOffset
+ kFilterPeriodAdjustY * kInputPixelStep)
>> SUBPEL_BITS);
const int ref_y = kQ4StepAdjustY + kFilterPeriodAdjustY + kPixelSelected;
for (int x = 0; x < Width(); ++x) {
const int kQ4StepAdjustX = x >> 4;
const int kFilterPeriodAdjustX = (x >> 3) << 3;
const int ref_x =
kPixelSelected + ((kInitialSubPelOffset
+ kFilterPeriodAdjustX * kInputPixelStep)
>> SUBPEL_BITS);
const int ref_x = kQ4StepAdjustX + kFilterPeriodAdjustX + kPixelSelected;
ASSERT_EQ(in[ref_y * kInputStride + ref_x], out[y * kOutputStride + x])
<< "x == " << x << ", y == " << y;
@@ -548,34 +502,6 @@ TEST_P(ConvolveTest, ChangeFilterWorks) {
}
}
/* This test exercises that enough rows and columns are filtered with every
possible initial fractional positions and scaling steps. */
TEST_P(ConvolveTest, CheckScalingFiltering) {
uint8_t* const in = input();
uint8_t* const out = output();
SetConstantInput(127);
for (int frac = 0; frac < 16; ++frac) {
for (int step = 1; step <= 32; ++step) {
/* Test the horizontal and vertical filters in combination. */
REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
vp9_sub_pel_filters_8[frac], step,
vp9_sub_pel_filters_8[frac], step,
Width(), Height()));
CheckGuardBlocks();
for (int y = 0; y < Height(); ++y) {
for (int x = 0; x < Width(); ++x) {
ASSERT_EQ(in[y * kInputStride + x], out[y * kOutputStride + x])
<< "x == " << x << ", y == " << y
<< ", frac == " << frac << ", step == " << step;
}
}
}
}
}
using std::tr1::make_tuple;
@@ -601,9 +527,9 @@ INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
#if HAVE_SSSE3
const ConvolveFunctions convolve8_ssse3(
vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3,
vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3,
vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3);
vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_c,
vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_c,
vp9_convolve8_ssse3, vp9_convolve8_avg_c);
INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
make_tuple(4, 4, &convolve8_ssse3),
@@ -620,26 +546,4 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
make_tuple(32, 64, &convolve8_ssse3),
make_tuple(64, 64, &convolve8_ssse3)));
#endif
#if HAVE_NEON
const ConvolveFunctions convolve8_neon(
vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
vp9_convolve8_neon, vp9_convolve8_avg_neon);
INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
make_tuple(4, 4, &convolve8_neon),
make_tuple(8, 4, &convolve8_neon),
make_tuple(4, 8, &convolve8_neon),
make_tuple(8, 8, &convolve8_neon),
make_tuple(16, 8, &convolve8_neon),
make_tuple(8, 16, &convolve8_neon),
make_tuple(16, 16, &convolve8_neon),
make_tuple(32, 16, &convolve8_neon),
make_tuple(16, 32, &convolve8_neon),
make_tuple(32, 32, &convolve8_neon),
make_tuple(64, 32, &convolve8_neon),
make_tuple(32, 64, &convolve8_neon),
make_tuple(64, 64, &convolve8_neon)));
#endif
} // namespace

View File

@@ -1,112 +0,0 @@
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <climits>
#include <vector>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
#include "test/util.h"
namespace {
class CpuSpeedTest : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWith2Params<
libvpx_test::TestMode, int> {
protected:
CpuSpeedTest() : EncoderTest(GET_PARAM(0)) {}
virtual void SetUp() {
InitializeConfig();
SetMode(GET_PARAM(1));
set_cpu_used_ = GET_PARAM(2);
}
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
if (video->frame() == 1) {
encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
encoder->Control(VP8E_SET_ARNR_TYPE, 3);
}
}
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
}
}
int set_cpu_used_;
};
TEST_P(CpuSpeedTest, TestQ0) {
// Validate that this non multiple of 64 wide clip encodes and decodes
// without a mismatch when passing in a very low max q. This pushes
// the encoder to producing lots of big partitions which will likely
// extend into the border and test the border condition.
cfg_.g_lag_in_frames = 25;
cfg_.rc_2pass_vbr_minsection_pct = 5;
cfg_.rc_2pass_vbr_minsection_pct = 2000;
cfg_.rc_target_bitrate = 400;
cfg_.rc_max_quantizer = 0;
cfg_.rc_min_quantizer = 0;
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
20);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
TEST_P(CpuSpeedTest, TestEncodeHighBitrate) {
// Validate that this non multiple of 64 wide clip encodes and decodes
// without a mismatch when passing in a very low max q. This pushes
// the encoder to producing lots of big partitions which will likely
// extend into the border and test the border condition.
cfg_.g_lag_in_frames = 25;
cfg_.rc_2pass_vbr_minsection_pct = 5;
cfg_.rc_2pass_vbr_minsection_pct = 2000;
cfg_.rc_target_bitrate = 12000;
cfg_.rc_max_quantizer = 10;
cfg_.rc_min_quantizer = 0;
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
40);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
TEST_P(CpuSpeedTest, TestLowBitrate) {
// Validate that this clip encodes and decodes without a mismatch
// when passing in a very high min q. This pushes the encoder to producing
// lots of small partitions which might will test the other condition.
cfg_.g_lag_in_frames = 25;
cfg_.rc_2pass_vbr_minsection_pct = 5;
cfg_.rc_2pass_vbr_minsection_pct = 2000;
cfg_.rc_target_bitrate = 200;
cfg_.rc_min_quantizer = 40;
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
40);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
using std::tr1::make_tuple;
#define VP9_FACTORY \
static_cast<const libvpx_test::CodecFactory*> (&libvpx_test::kVP9)
VP9_INSTANTIATE_TEST_CASE(
CpuSpeedTest,
::testing::Values(::libvpx_test::kTwoPassGood),
::testing::Range(0, 5));
} // namespace

View File

@@ -42,6 +42,10 @@ class CQTest : public ::libvpx_test::EncoderTest,
n_frames_ = 0;
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
libvpx_test::Encoder *encoder) {
if (video->frame() == 1) {

View File

@@ -36,6 +36,10 @@ class DatarateTest : public ::libvpx_test::EncoderTest,
duration_ = 0.0;
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
const vpx_rational_t tb = video->timebase();
@@ -75,7 +79,7 @@ class DatarateTest : public ::libvpx_test::EncoderTest,
bits_in_buffer_model_ -= frame_size_in_bits;
// Update the running total of bits for end of test datarate checks.
bits_total_ += frame_size_in_bits;
bits_total_ += frame_size_in_bits ;
// If first drop not set and we have a drop set it to this time.
if (!first_drop_ && duration > 1)

View File

@@ -13,16 +13,14 @@
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
extern "C" {
#include "vp9/common/vp9_entropy.h"
#include "./vp9_rtcd.h"
void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *output, int pitch);
#include "vp9_rtcd.h"
void vp9_short_idct16x16_add_c(short *input, uint8_t *output, int pitch);
}
#include "acm_random.h"
#include "vpx/vpx_integer.h"
using libvpx_test::ACMRandom;
@@ -32,13 +30,12 @@ namespace {
#ifdef _MSC_VER
static int round(double x) {
if (x < 0)
return static_cast<int>(ceil(x - 0.5));
return (int)ceil(x - 0.5);
else
return static_cast<int>(floor(x + 0.5));
return (int)floor(x + 0.5);
}
#endif
const int kNumCoeffs = 256;
const double PI = 3.1415926535898;
void reference2_16x16_idct_2d(double *input, double *output) {
double x;
@@ -47,9 +44,7 @@ void reference2_16x16_idct_2d(double *input, double *output) {
double s = 0;
for (int i = 0; i < 16; ++i) {
for (int j = 0; j < 16; ++j) {
x = cos(PI * j * (l + 0.5) / 16.0) *
cos(PI * i * (k + 0.5) / 16.0) *
input[i * 16 + j] / 256;
x=cos(PI*j*(l+0.5)/16.0)*cos(PI*i*(k+0.5)/16.0)*input[i*16+j]/256;
if (i != 0)
x *= sqrt(2.0);
if (j != 0)
@@ -63,23 +58,23 @@ void reference2_16x16_idct_2d(double *input, double *output) {
}
const double C1 = 0.995184726672197;
const double C2 = 0.98078528040323;
const double C3 = 0.956940335732209;
const double C4 = 0.923879532511287;
const double C5 = 0.881921264348355;
const double C6 = 0.831469612302545;
const double C7 = 0.773010453362737;
const double C8 = 0.707106781186548;
const double C9 = 0.634393284163646;
const double C10 = 0.555570233019602;
const double C11 = 0.471396736825998;
const double C12 = 0.38268343236509;
const double C13 = 0.290284677254462;
const double C14 = 0.195090322016128;
const double C15 = 0.098017140329561;
static const double C1 = 0.995184726672197;
static const double C2 = 0.98078528040323;
static const double C3 = 0.956940335732209;
static const double C4 = 0.923879532511287;
static const double C5 = 0.881921264348355;
static const double C6 = 0.831469612302545;
static const double C7 = 0.773010453362737;
static const double C8 = 0.707106781186548;
static const double C9 = 0.634393284163646;
static const double C10 = 0.555570233019602;
static const double C11 = 0.471396736825998;
static const double C12 = 0.38268343236509;
static const double C13 = 0.290284677254462;
static const double C14 = 0.195090322016128;
static const double C15 = 0.098017140329561;
void butterfly_16x16_dct_1d(double input[16], double output[16]) {
static void butterfly_16x16_dct_1d(double input[16], double output[16]) {
double step[16];
double intermediate[16];
double temp1, temp2;
@@ -112,36 +107,36 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
output[6] = step[1] - step[6];
output[7] = step[0] - step[7];
temp1 = step[ 8] * C7;
temp2 = step[15] * C9;
temp1 = step[ 8]*C7;
temp2 = step[15]*C9;
output[ 8] = temp1 + temp2;
temp1 = step[ 9] * C11;
temp2 = step[14] * C5;
temp1 = step[ 9]*C11;
temp2 = step[14]*C5;
output[ 9] = temp1 - temp2;
temp1 = step[10] * C3;
temp2 = step[13] * C13;
temp1 = step[10]*C3;
temp2 = step[13]*C13;
output[10] = temp1 + temp2;
temp1 = step[11] * C15;
temp2 = step[12] * C1;
temp1 = step[11]*C15;
temp2 = step[12]*C1;
output[11] = temp1 - temp2;
temp1 = step[11] * C1;
temp2 = step[12] * C15;
temp1 = step[11]*C1;
temp2 = step[12]*C15;
output[12] = temp2 + temp1;
temp1 = step[10] * C13;
temp2 = step[13] * C3;
temp1 = step[10]*C13;
temp2 = step[13]*C3;
output[13] = temp2 - temp1;
temp1 = step[ 9] * C5;
temp2 = step[14] * C11;
temp1 = step[ 9]*C5;
temp2 = step[14]*C11;
output[14] = temp2 + temp1;
temp1 = step[ 8] * C9;
temp2 = step[15] * C7;
temp1 = step[ 8]*C9;
temp2 = step[15]*C7;
output[15] = temp2 - temp1;
// step 3
@@ -150,20 +145,20 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
step[ 2] = output[1] - output[2];
step[ 3] = output[0] - output[3];
temp1 = output[4] * C14;
temp2 = output[7] * C2;
temp1 = output[4]*C14;
temp2 = output[7]*C2;
step[ 4] = temp1 + temp2;
temp1 = output[5] * C10;
temp2 = output[6] * C6;
temp1 = output[5]*C10;
temp2 = output[6]*C6;
step[ 5] = temp1 + temp2;
temp1 = output[5] * C6;
temp2 = output[6] * C10;
temp1 = output[5]*C6;
temp2 = output[6]*C10;
step[ 6] = temp2 - temp1;
temp1 = output[4] * C2;
temp2 = output[7] * C14;
temp1 = output[4]*C2;
temp2 = output[7]*C14;
step[ 7] = temp2 - temp1;
step[ 8] = output[ 8] + output[11];
@@ -180,18 +175,18 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
output[ 0] = (step[ 0] + step[ 1]);
output[ 8] = (step[ 0] - step[ 1]);
temp1 = step[2] * C12;
temp2 = step[3] * C4;
temp1 = step[2]*C12;
temp2 = step[3]*C4;
temp1 = temp1 + temp2;
output[ 4] = 2*(temp1 * C8);
output[ 4] = 2*(temp1*C8);
temp1 = step[2] * C4;
temp2 = step[3] * C12;
temp1 = step[2]*C4;
temp2 = step[3]*C12;
temp1 = temp2 - temp1;
output[12] = 2 * (temp1 * C8);
output[12] = 2*(temp1*C8);
output[ 2] = 2 * ((step[4] + step[ 5]) * C8);
output[14] = 2 * ((step[7] - step[ 6]) * C8);
output[ 2] = 2*((step[4] + step[ 5])*C8);
output[14] = 2*((step[7] - step[ 6])*C8);
temp1 = step[4] - step[5];
temp2 = step[6] + step[7];
@@ -201,17 +196,17 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
intermediate[8] = step[8] + step[14];
intermediate[9] = step[9] + step[15];
temp1 = intermediate[8] * C12;
temp2 = intermediate[9] * C4;
temp1 = intermediate[8]*C12;
temp2 = intermediate[9]*C4;
temp1 = temp1 - temp2;
output[3] = 2 * (temp1 * C8);
output[3] = 2*(temp1*C8);
temp1 = intermediate[8] * C4;
temp2 = intermediate[9] * C12;
temp1 = intermediate[8]*C4;
temp2 = intermediate[9]*C12;
temp1 = temp2 + temp1;
output[13] = 2 * (temp1 * C8);
output[13] = 2*(temp1*C8);
output[ 9] = 2 * ((step[10] + step[11]) * C8);
output[ 9] = 2*((step[10] + step[11])*C8);
intermediate[11] = step[10] - step[11];
intermediate[12] = step[12] + step[13];
@@ -222,300 +217,150 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
output[15] = (intermediate[11] + intermediate[12]);
output[ 1] = -(intermediate[11] - intermediate[12]);
output[ 7] = 2 * (intermediate[13] * C8);
output[ 7] = 2*(intermediate[13]*C8);
temp1 = intermediate[14] * C12;
temp2 = intermediate[15] * C4;
temp1 = intermediate[14]*C12;
temp2 = intermediate[15]*C4;
temp1 = temp1 - temp2;
output[11] = -2 * (temp1 * C8);
output[11] = -2*(temp1*C8);
temp1 = intermediate[14] * C4;
temp2 = intermediate[15] * C12;
temp1 = intermediate[14]*C4;
temp2 = intermediate[15]*C12;
temp1 = temp2 + temp1;
output[ 5] = 2 * (temp1 * C8);
output[ 5] = 2*(temp1*C8);
}
void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
static void reference_16x16_dct_1d(double in[16], double out[16]) {
const double kPi = 3.141592653589793238462643383279502884;
const double kInvSqrt2 = 0.707106781186547524400844362104;
for (int k = 0; k < 16; k++) {
out[k] = 0.0;
for (int n = 0; n < 16; n++)
out[k] += in[n]*cos(kPi*(2*n+1)*k/32.0);
if (k == 0)
out[k] = out[k]*kInvSqrt2;
}
}
void reference_16x16_dct_2d(int16_t input[16*16], double output[16*16]) {
// First transform columns
for (int i = 0; i < 16; ++i) {
double temp_in[16], temp_out[16];
for (int j = 0; j < 16; ++j)
temp_in[j] = input[j * 16 + i];
temp_in[j] = input[j*16 + i];
butterfly_16x16_dct_1d(temp_in, temp_out);
for (int j = 0; j < 16; ++j)
output[j * 16 + i] = temp_out[j];
output[j*16 + i] = temp_out[j];
}
// Then transform rows
for (int i = 0; i < 16; ++i) {
double temp_in[16], temp_out[16];
for (int j = 0; j < 16; ++j)
temp_in[j] = output[j + i * 16];
temp_in[j] = output[j + i*16];
butterfly_16x16_dct_1d(temp_in, temp_out);
// Scale by some magic number
for (int j = 0; j < 16; ++j)
output[j + i * 16] = temp_out[j]/2;
output[j + i*16] = temp_out[j]/2;
}
}
typedef void (*fdct_t)(int16_t *in, int16_t *out, int stride);
typedef void (*idct_t)(int16_t *in, uint8_t *out, int stride);
typedef void (*fht_t) (int16_t *in, int16_t *out, int stride, int tx_type);
typedef void (*iht_t) (int16_t *in, uint8_t *dst, int stride, int tx_type);
void fdct16x16_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
vp9_short_fdct16x16_c(in, out, stride);
}
TEST(VP9Idct16x16Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
for (int i = 0; i < count_test_block; ++i) {
int16_t in[256], coeff[256];
uint8_t dst[256], src[256];
double out_r[256];
void fht16x16_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
vp9_short_fht16x16_c(in, out, stride, tx_type);
}
class Trans16x16TestBase {
public:
virtual ~Trans16x16TestBase() {}
protected:
virtual void RunFwdTxfm(int16_t *in, int16_t *out, int stride) = 0;
virtual void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) = 0;
void RunAccuracyCheck() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
uint32_t max_error = 0;
int64_t total_error = 0;
const int count_test_block = 10000;
for (int i = 0; i < count_test_block; ++i) {
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < kNumCoeffs; ++j) {
src[j] = rnd.Rand8();
dst[j] = rnd.Rand8();
test_input_block[j] = src[j] - dst[j];
}
REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
test_temp_block, pitch_));
REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
for (int j = 0; j < kNumCoeffs; ++j) {
const uint32_t diff = dst[j] - src[j];
const uint32_t error = diff * diff;
if (max_error < error)
max_error = error;
total_error += error;
}
for (int j = 0; j < 256; ++j) {
src[j] = rnd.Rand8();
dst[j] = rnd.Rand8();
}
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < 256; ++j)
in[j] = src[j] - dst[j];
EXPECT_GE(1u, max_error)
<< "Error: 16x16 FHT/IHT has an individual round trip error > 1";
EXPECT_GE(count_test_block , total_error)
<< "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
reference_16x16_dct_2d(in, out_r);
for (int j = 0; j < 256; j++)
coeff[j] = round(out_r[j]);
vp9_short_idct16x16_add_c(coeff, dst, 16);
for (int j = 0; j < 256; ++j) {
const int diff = dst[j] - src[j];
const int error = diff * diff;
EXPECT_GE(1, error)
<< "Error: 16x16 IDCT has error " << error
<< " at index " << j;
}
}
}
void RunCoeffCheck() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
// we need enable fdct test once we re-do the 16 point fdct.
TEST(VP9Fdct16x16Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int max_error = 0;
double total_error = 0;
const int count_test_block = 1000;
for (int i = 0; i < count_test_block; ++i) {
int16_t test_input_block[256];
int16_t test_temp_block[256];
uint8_t dst[256], src[256];
for (int i = 0; i < count_test_block; ++i) {
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < kNumCoeffs; ++j)
input_block[j] = rnd.Rand8() - rnd.Rand8();
for (int j = 0; j < 256; ++j) {
src[j] = rnd.Rand8();
dst[j] = rnd.Rand8();
}
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < 256; ++j)
test_input_block[j] = src[j] - dst[j];
fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
const int pitch = 32;
vp9_short_fdct16x16_c(test_input_block, test_temp_block, pitch);
vp9_short_idct16x16_add_c(test_temp_block, dst, 16);
// The minimum quant value is 4.
for (int j = 0; j < kNumCoeffs; ++j)
EXPECT_EQ(output_block[j], output_ref_block[j]);
for (int j = 0; j < 256; ++j) {
const int diff = dst[j] - src[j];
const int error = diff * diff;
if (max_error < error)
max_error = error;
total_error += error;
}
}
void RunMemCheck() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
EXPECT_GE(1, max_error)
<< "Error: 16x16 FDCT/IDCT has an individual round trip error > 1";
for (int i = 0; i < count_test_block; ++i) {
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < kNumCoeffs; ++j) {
input_block[j] = rnd.Rand8() - rnd.Rand8();
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
}
if (i == 0)
for (int j = 0; j < kNumCoeffs; ++j)
input_extreme_block[j] = 255;
if (i == 1)
for (int j = 0; j < kNumCoeffs; ++j)
input_extreme_block[j] = -255;
EXPECT_GE(count_test_block , total_error)
<< "Error: 16x16 FDCT/IDCT has average round trip error > 1 per block";
}
fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
output_block, pitch_));
TEST(VP9Fdct16x16Test, CoeffSizeCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
for (int i = 0; i < count_test_block; ++i) {
int16_t input_block[256], input_extreme_block[256];
int16_t output_block[256], output_extreme_block[256];
// The minimum quant value is 4.
for (int j = 0; j < kNumCoeffs; ++j) {
EXPECT_EQ(output_block[j], output_ref_block[j]);
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
<< "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
}
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < 256; ++j) {
input_block[j] = rnd.Rand8() - rnd.Rand8();
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
}
if (i == 0)
for (int j = 0; j < 256; ++j)
input_extreme_block[j] = 255;
const int pitch = 32;
vp9_short_fdct16x16_c(input_block, output_block, pitch);
vp9_short_fdct16x16_c(input_extreme_block, output_extreme_block, pitch);
// The minimum quant value is 4.
for (int j = 0; j < 256; ++j) {
EXPECT_GE(4*DCT_MAX_VALUE, abs(output_block[j]))
<< "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
EXPECT_GE(4*DCT_MAX_VALUE, abs(output_extreme_block[j]))
<< "Error: 16x16 FDCT extreme has coefficient larger than 4*DCT_MAX_VALUE";
}
}
void RunInvAccuracyCheck() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
for (int i = 0; i < count_test_block; ++i) {
double out_r[kNumCoeffs];
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < kNumCoeffs; ++j) {
src[j] = rnd.Rand8();
dst[j] = rnd.Rand8();
in[j] = src[j] - dst[j];
}
reference_16x16_dct_2d(in, out_r);
for (int j = 0; j < kNumCoeffs; ++j)
coeff[j] = round(out_r[j]);
const int pitch = 32;
REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch));
for (int j = 0; j < kNumCoeffs; ++j) {
const uint32_t diff = dst[j] - src[j];
const uint32_t error = diff * diff;
EXPECT_GE(1u, error)
<< "Error: 16x16 IDCT has error " << error
<< " at index " << j;
}
}
}
int pitch_;
int tx_type_;
fht_t fwd_txfm_ref;
};
class Trans16x16DCT : public Trans16x16TestBase,
public PARAMS(fdct_t, idct_t, int) {
public:
virtual ~Trans16x16DCT() {}
virtual void SetUp() {
fwd_txfm_ = GET_PARAM(0);
inv_txfm_ = GET_PARAM(1);
tx_type_ = GET_PARAM(2);
pitch_ = 32;
fwd_txfm_ref = fdct16x16_ref;
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
protected:
void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
fwd_txfm_(in, out, stride);
}
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
inv_txfm_(out, dst, stride >> 1);
}
fdct_t fwd_txfm_;
idct_t inv_txfm_;
};
TEST_P(Trans16x16DCT, AccuracyCheck) {
RunAccuracyCheck();
}
TEST_P(Trans16x16DCT, CoeffCheck) {
RunCoeffCheck();
}
TEST_P(Trans16x16DCT, MemCheck) {
RunMemCheck();
}
TEST_P(Trans16x16DCT, InvAccuracyCheck) {
RunInvAccuracyCheck();
}
class Trans16x16HT : public Trans16x16TestBase,
public PARAMS(fht_t, iht_t, int) {
public:
virtual ~Trans16x16HT() {}
virtual void SetUp() {
fwd_txfm_ = GET_PARAM(0);
inv_txfm_ = GET_PARAM(1);
tx_type_ = GET_PARAM(2);
pitch_ = 16;
fwd_txfm_ref = fht16x16_ref;
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
protected:
void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
fwd_txfm_(in, out, stride, tx_type_);
}
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
inv_txfm_(out, dst, stride, tx_type_);
}
fht_t fwd_txfm_;
iht_t inv_txfm_;
};
TEST_P(Trans16x16HT, AccuracyCheck) {
RunAccuracyCheck();
}
TEST_P(Trans16x16HT, CoeffCheck) {
RunCoeffCheck();
}
TEST_P(Trans16x16HT, MemCheck) {
RunMemCheck();
}
using std::tr1::make_tuple;
INSTANTIATE_TEST_CASE_P(
C, Trans16x16DCT,
::testing::Values(
make_tuple(&vp9_short_fdct16x16_c, &vp9_short_idct16x16_add_c, 0)));
INSTANTIATE_TEST_CASE_P(
C, Trans16x16HT,
::testing::Values(
make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 0),
make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 1),
make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 2),
make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 3)));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
SSE2, Trans16x16DCT,
::testing::Values(
make_tuple(&vp9_short_fdct16x16_sse2, &vp9_short_idct16x16_add_c, 0)));
INSTANTIATE_TEST_CASE_P(
SSE2, Trans16x16HT,
::testing::Values(
make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 0),
make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 1),
make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 2),
make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 3)));
#endif
} // namespace

View File

@@ -13,17 +13,15 @@
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
extern "C" {
#include "./vpx_config.h"
#include "vp9/common/vp9_entropy.h"
#include "./vp9_rtcd.h"
void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch);
void vp9_short_idct32x32_add_c(short *input, uint8_t *output, int pitch);
}
#include "test/acm_random.h"
#include "vpx/vpx_integer.h"
using libvpx_test::ACMRandom;
@@ -32,15 +30,35 @@ namespace {
#ifdef _MSC_VER
static int round(double x) {
if (x < 0)
return static_cast<int>(ceil(x - 0.5));
return (int)ceil(x - 0.5);
else
return static_cast<int>(floor(x + 0.5));
return (int)floor(x + 0.5);
}
#endif
const int kNumCoeffs = 1024;
const double kPi = 3.141592653589793238462643383279502884;
void reference_32x32_dct_1d(const double in[32], double out[32], int stride) {
static const double kPi = 3.141592653589793238462643383279502884;
static void reference2_32x32_idct_2d(double *input, double *output) {
double x;
for (int l = 0; l < 32; ++l) {
for (int k = 0; k < 32; ++k) {
double s = 0;
for (int i = 0; i < 32; ++i) {
for (int j = 0; j < 32; ++j) {
x = cos(kPi * j * (l + 0.5) / 32.0) *
cos(kPi * i * (k + 0.5) / 32.0) * input[i * 32 + j] / 1024;
if (i != 0)
x *= sqrt(2.0);
if (j != 0)
x *= sqrt(2.0);
s += x;
}
}
output[k * 32 + l] = s / 4;
}
}
}
static void reference_32x32_dct_1d(double in[32], double out[32], int stride) {
const double kInvSqrt2 = 0.707106781186547524400844362104;
for (int k = 0; k < 32; k++) {
out[k] = 0.0;
@@ -51,8 +69,7 @@ void reference_32x32_dct_1d(const double in[32], double out[32], int stride) {
}
}
void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
double output[kNumCoeffs]) {
static void reference_32x32_dct_2d(int16_t input[32*32], double output[32*32]) {
// First transform columns
for (int i = 0; i < 32; ++i) {
double temp_in[32], temp_out[32];
@@ -74,165 +91,27 @@ void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
}
}
typedef void (*fwd_txfm_t)(int16_t *in, int16_t *out, int stride);
typedef void (*inv_txfm_t)(int16_t *in, uint8_t *dst, int stride);
class Trans32x32Test : public PARAMS(fwd_txfm_t, inv_txfm_t, int) {
public:
virtual ~Trans32x32Test() {}
virtual void SetUp() {
fwd_txfm_ = GET_PARAM(0);
inv_txfm_ = GET_PARAM(1);
version_ = GET_PARAM(2); // 0: high precision forward transform
// 1: low precision version for rd loop
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
protected:
int version_;
fwd_txfm_t fwd_txfm_;
inv_txfm_t inv_txfm_;
};
TEST_P(Trans32x32Test, AccuracyCheck) {
TEST(VP9Idct32x32Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
uint32_t max_error = 0;
int64_t total_error = 0;
const int count_test_block = 1000;
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
for (int i = 0; i < count_test_block; ++i) {
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < kNumCoeffs; ++j) {
int16_t in[1024], coeff[1024];
uint8_t dst[1024], src[1024];
double out_r[1024];
for (int j = 0; j < 1024; ++j) {
src[j] = rnd.Rand8();
dst[j] = rnd.Rand8();
test_input_block[j] = src[j] - dst[j];
}
const int pitch = 64;
REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, pitch));
REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
for (int j = 0; j < kNumCoeffs; ++j) {
const uint32_t diff = dst[j] - src[j];
const uint32_t error = diff * diff;
if (max_error < error)
max_error = error;
total_error += error;
}
}
if (version_ == 1) {
max_error /= 2;
total_error /= 45;
}
EXPECT_GE(1u, max_error)
<< "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
EXPECT_GE(count_test_block, total_error)
<< "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
}
TEST_P(Trans32x32Test, CoeffCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
for (int i = 0; i < count_test_block; ++i) {
for (int j = 0; j < kNumCoeffs; ++j)
input_block[j] = rnd.Rand8() - rnd.Rand8();
const int pitch = 64;
vp9_short_fdct32x32_c(input_block, output_ref_block, pitch);
REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, pitch));
if (version_ == 0) {
for (int j = 0; j < kNumCoeffs; ++j)
EXPECT_EQ(output_block[j], output_ref_block[j])
<< "Error: 32x32 FDCT versions have mismatched coefficients";
} else {
for (int j = 0; j < kNumCoeffs; ++j)
EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
<< "Error: 32x32 FDCT rd has mismatched coefficients";
}
}
}
TEST_P(Trans32x32Test, MemCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 2000;
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
for (int i = 0; i < count_test_block; ++i) {
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < kNumCoeffs; ++j) {
input_block[j] = rnd.Rand8() - rnd.Rand8();
input_extreme_block[j] = rnd.Rand8() & 1 ? 255 : -255;
}
if (i == 0)
for (int j = 0; j < kNumCoeffs; ++j)
input_extreme_block[j] = 255;
if (i == 1)
for (int j = 0; j < kNumCoeffs; ++j)
input_extreme_block[j] = -255;
const int pitch = 64;
vp9_short_fdct32x32_c(input_extreme_block, output_ref_block, pitch);
REGISTER_STATE_CHECK(fwd_txfm_(input_extreme_block, output_block, pitch));
// The minimum quant value is 4.
for (int j = 0; j < kNumCoeffs; ++j) {
if (version_ == 0) {
EXPECT_EQ(output_block[j], output_ref_block[j])
<< "Error: 32x32 FDCT versions have mismatched coefficients";
} else {
EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
<< "Error: 32x32 FDCT rd has mismatched coefficients";
}
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_ref_block[j]))
<< "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
<< "Error: 32x32 FDCT has coefficient larger than "
<< "4*DCT_MAX_VALUE";
}
}
}
TEST_P(Trans32x32Test, InverseAccuracy) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
for (int i = 0; i < count_test_block; ++i) {
double out_r[kNumCoeffs];
// Initialize a test block with input range [-255, 255]
for (int j = 0; j < kNumCoeffs; ++j) {
src[j] = rnd.Rand8();
dst[j] = rnd.Rand8();
for (int j = 0; j < 1024; ++j)
in[j] = src[j] - dst[j];
}
reference_32x32_dct_2d(in, out_r);
for (int j = 0; j < kNumCoeffs; ++j)
for (int j = 0; j < 1024; j++)
coeff[j] = round(out_r[j]);
REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
for (int j = 0; j < kNumCoeffs; ++j) {
vp9_short_idct32x32_add_c(coeff, dst, 32);
for (int j = 0; j < 1024; ++j) {
const int diff = dst[j] - src[j];
const int error = diff * diff;
EXPECT_GE(1, error)
@@ -242,21 +121,72 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
}
}
using std::tr1::make_tuple;
TEST(VP9Fdct32x32Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
unsigned int max_error = 0;
int64_t total_error = 0;
const int count_test_block = 1000;
for (int i = 0; i < count_test_block; ++i) {
int16_t test_input_block[1024];
int16_t test_temp_block[1024];
uint8_t dst[1024], src[1024];
INSTANTIATE_TEST_CASE_P(
C, Trans32x32Test,
::testing::Values(
make_tuple(&vp9_short_fdct32x32_c, &vp9_short_idct32x32_add_c, 0),
make_tuple(&vp9_short_fdct32x32_rd_c, &vp9_short_idct32x32_add_c, 1)));
for (int j = 0; j < 1024; ++j) {
src[j] = rnd.Rand8();
dst[j] = rnd.Rand8();
}
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < 1024; ++j)
test_input_block[j] = src[j] - dst[j];
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
SSE2, Trans32x32Test,
::testing::Values(
make_tuple(&vp9_short_fdct32x32_sse2,
&vp9_short_idct32x32_add_sse2, 0),
make_tuple(&vp9_short_fdct32x32_rd_sse2,
&vp9_short_idct32x32_add_sse2, 1)));
#endif
const int pitch = 64;
vp9_short_fdct32x32_c(test_input_block, test_temp_block, pitch);
vp9_short_idct32x32_add_c(test_temp_block, dst, 32);
for (int j = 0; j < 1024; ++j) {
const unsigned diff = dst[j] - src[j];
const unsigned error = diff * diff;
if (max_error < error)
max_error = error;
total_error += error;
}
}
EXPECT_GE(1u, max_error)
<< "Error: 32x32 FDCT/IDCT has an individual roundtrip error > 1";
EXPECT_GE(count_test_block, total_error)
<< "Error: 32x32 FDCT/IDCT has average roundtrip error > 1 per block";
}
TEST(VP9Fdct32x32Test, CoeffSizeCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
for (int i = 0; i < count_test_block; ++i) {
int16_t input_block[1024], input_extreme_block[1024];
int16_t output_block[1024], output_extreme_block[1024];
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < 1024; ++j) {
input_block[j] = rnd.Rand8() - rnd.Rand8();
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
}
if (i == 0)
for (int j = 0; j < 1024; ++j)
input_extreme_block[j] = 255;
const int pitch = 64;
vp9_short_fdct32x32_c(input_block, output_block, pitch);
vp9_short_fdct32x32_c(input_extreme_block, output_extreme_block, pitch);
// The minimum quant value is 4.
for (int j = 0; j < 1024; ++j) {
EXPECT_GE(4*DCT_MAX_VALUE, abs(output_block[j]))
<< "Error: 32x32 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
EXPECT_GE(4*DCT_MAX_VALUE, abs(output_extreme_block[j]))
<< "Error: 32x32 FDCT extreme has coefficient larger than "
"4*DCT_MAX_VALUE";
}
}
}
} // namespace

View File

@@ -12,7 +12,7 @@
#define TEST_DECODE_TEST_DRIVER_H_
#include <cstring>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vpx_config.h"
#include "vpx_config.h"
#include "vpx/vpx_decoder.h"
namespace libvpx_test {
@@ -36,8 +36,9 @@ class DxDataIterator {
};
// Provides a simplified interface to manage one video decoding.
// Similar to Encoder class, the exact services should be added
// as more tests are added.
//
// TODO: similar to Encoder class, the exact services should be
// added as more tests are added.
class Decoder {
public:
Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)

View File

@@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vpx_config.h"
#include "vpx_config.h"
#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
#include "test/decode_test_driver.h"
@@ -114,19 +114,19 @@ static bool compare_img(const vpx_image_t *img1,
const unsigned int height_y = img1->d_h;
unsigned int i;
for (i = 0; i < height_y; ++i)
match = (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
width_y) == 0) && match;
match = ( memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
width_y) == 0) && match;
const unsigned int width_uv = (img1->d_w + 1) >> 1;
const unsigned int height_uv = (img1->d_h + 1) >> 1;
for (i = 0; i < height_uv; ++i)
match = (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
width_uv) == 0) && match;
match = ( memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
width_uv) == 0) && match;
for (i = 0; i < height_uv; ++i)
match = (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
width_uv) == 0) && match;
match = ( memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
width_uv) == 0) && match;
return match;
}
@@ -158,7 +158,7 @@ void EncoderTest::RunLoop(VideoSource *video) {
Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
bool again;
for (again = true, video->Begin(); again; video->Next()) {
again = (video->img() != NULL);
again = video->img() != NULL;
PreEncodeFrameHook(video);
PreEncodeFrameHook(video, encoder);

View File

@@ -190,9 +190,7 @@ class EncoderTest {
virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {}
// Hook to determine whether the encode loop should continue.
virtual bool Continue() const {
return !(::testing::Test::HasFatalFailure() || abort_);
}
virtual bool Continue() const { return !abort_; }
const CodecFactory *codec_;
// Hook to determine whether to decode frame after encoding

View File

@@ -50,6 +50,10 @@ class ErrorResilienceTest : public ::libvpx_test::EncoderTest,
mismatch_nframes_ = 0;
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
psnr_ += pkt->data.psnr.psnr[0];
nframes_++;
@@ -62,7 +66,7 @@ class ErrorResilienceTest : public ::libvpx_test::EncoderTest,
if (droppable_nframes_ > 0 &&
(cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
for (unsigned int i = 0; i < droppable_nframes_; ++i) {
if (droppable_frames_[i] == video->frame()) {
if (droppable_frames_[i] == nframes_) {
std::cout << " Encoding droppable frame: "
<< droppable_frames_[i] << "\n";
frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST |
@@ -148,7 +152,7 @@ TEST_P(ErrorResilienceTest, OnVersusOff) {
const vpx_rational timebase = { 33333333, 1000000000 };
cfg_.g_timebase = timebase;
cfg_.rc_target_bitrate = 2000;
cfg_.g_lag_in_frames = 10;
cfg_.g_lag_in_frames = 25;
init_flags_ = VPX_CODEC_USE_PSNR;
@@ -179,9 +183,6 @@ TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) {
const vpx_rational timebase = { 33333333, 1000000000 };
cfg_.g_timebase = timebase;
cfg_.rc_target_bitrate = 500;
// FIXME(debargha): Fix this to work for any lag.
// Currently this test only works for lag = 0
cfg_.g_lag_in_frames = 0;
init_flags_ = VPX_CODEC_USE_PSNR;

View File

@@ -15,69 +15,68 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
extern "C" {
#include "./vp9_rtcd.h"
#include "vp9_rtcd.h"
}
#include "test/acm_random.h"
#include "acm_random.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
using libvpx_test::ACMRandom;
namespace {
void fdct4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
int stride, int /*tx_type*/) {
void fdct4x4(int16_t *in, int16_t *out, uint8_t *dst, int stride, int tx_type) {
vp9_short_fdct4x4_c(in, out, stride);
}
void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
int stride, int /*tx_type*/) {
void idct4x4_add(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
vp9_short_idct4x4_add_c(out, dst, stride >> 1);
}
void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
int stride, int tx_type) {
void fht4x4(int16_t *in, int16_t *out, uint8_t *dst, int stride, int tx_type) {
vp9_short_fht4x4_c(in, out, stride >> 1, tx_type);
}
void iht4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
void iht4x4_add(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
vp9_short_iht4x4_add_c(out, dst, stride >> 1, tx_type);
}
class FwdTrans4x4Test : public ::testing::TestWithParam<int> {
public:
virtual ~FwdTrans4x4Test() {}
virtual void SetUp() {
tx_type_ = GetParam();
if (tx_type_ == 0) {
fwd_txfm_ = fdct4x4;
inv_txfm_ = idct4x4_add;
FwdTrans4x4Test() {SetUpTestTxfm();}
~FwdTrans4x4Test() {}
void SetUpTestTxfm() {
tx_type = GetParam();
if (tx_type == 0) {
fwd_txfm = fdct4x4;
inv_txfm = idct4x4_add;
} else {
fwd_txfm_ = fht4x4;
inv_txfm_ = iht4x4_add;
fwd_txfm = fht4x4;
inv_txfm = iht4x4_add;
}
}
protected:
void RunFwdTxfm(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
(*fwd_txfm_)(in, out, dst, stride, tx_type);
(*fwd_txfm)(in, out, dst, stride, tx_type);
}
void RunInvTxfm(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
(*inv_txfm_)(in, out, dst, stride, tx_type);
(*inv_txfm)(in, out, dst, stride, tx_type);
}
int tx_type_;
void (*fwd_txfm_)(int16_t *in, int16_t *out, uint8_t *dst,
int tx_type;
void (*fwd_txfm)(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type);
void (*inv_txfm_)(int16_t *in, int16_t *out, uint8_t *dst,
void (*inv_txfm)(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type);
};
TEST_P(FwdTrans4x4Test, SignBiasCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 16);
int16_t test_input_block[16];
int16_t test_output_block[16];
const int pitch = 8;
int count_sign_block[16][2];
const int count_test_block = 1000000;
@@ -88,7 +87,7 @@ TEST_P(FwdTrans4x4Test, SignBiasCheck) {
for (int j = 0; j < 16; ++j)
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type_);
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type);
for (int j = 0; j < 16; ++j) {
if (test_output_block[j] < 0)
@@ -104,7 +103,7 @@ TEST_P(FwdTrans4x4Test, SignBiasCheck) {
EXPECT_TRUE(bias_acceptable)
<< "Error: 4x4 FDCT/FHT has a sign bias > 1%"
<< " for input range [-255, 255] at index " << j
<< " tx_type " << tx_type_;
<< " tx_type " << tx_type;
}
memset(count_sign_block, 0, sizeof(count_sign_block));
@@ -113,7 +112,7 @@ TEST_P(FwdTrans4x4Test, SignBiasCheck) {
for (int j = 0; j < 16; ++j)
test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type_);
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type);
for (int j = 0; j < 16; ++j) {
if (test_output_block[j] < 0)
@@ -136,13 +135,12 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int max_error = 0;
int total_error = 0;
double total_error = 0;
const int count_test_block = 1000000;
for (int i = 0; i < count_test_block; ++i) {
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 16);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 16);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 16);
int16_t test_input_block[16];
int16_t test_temp_block[16];
uint8_t dst[16], src[16];
for (int j = 0; j < 16; ++j) {
src[j] = rnd.Rand8();
@@ -153,10 +151,10 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
test_input_block[j] = src[j] - dst[j];
const int pitch = 8;
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type);
for (int j = 0; j < 16; ++j) {
if (test_temp_block[j] > 0) {
if(test_temp_block[j] > 0) {
test_temp_block[j] += 2;
test_temp_block[j] /= 4;
test_temp_block[j] *= 4;
@@ -168,7 +166,7 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
}
// inverse transform and reconstruct the pixel block
RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type);
for (int j = 0; j < 16; ++j) {
const int diff = dst[j] - src[j];
@@ -183,7 +181,7 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
EXPECT_GE(count_test_block, total_error)
<< "Error: FDCT/IDCT or FHT/IHT has average "
<< "roundtrip error > 1 per block";
"roundtrip error > 1 per block";
}
INSTANTIATE_TEST_CASE_P(VP9, FwdTrans4x4Test, ::testing::Range(0, 4));

View File

@@ -13,78 +13,23 @@
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "vpx_ports/mem.h"
extern "C" {
#include "./vp9_rtcd.h"
void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *output, int pitch);
#include "vp9_rtcd.h"
void vp9_short_idct8x8_add_c(short *input, uint8_t *output, int pitch);
}
#include "test/acm_random.h"
#include "acm_random.h"
#include "vpx/vpx_integer.h"
using libvpx_test::ACMRandom;
namespace {
void fdct8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/,
int stride, int /*tx_type*/) {
vp9_short_fdct8x8_c(in, out, stride);
}
void idct8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
int stride, int /*tx_type*/) {
vp9_short_idct8x8_add_c(out, dst, stride >> 1);
}
void fht8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/,
int stride, int tx_type) {
// TODO(jingning): need to refactor this to test both _c and _sse2 functions,
// when we have all inverse dct functions done sse2.
#if HAVE_SSE2
vp9_short_fht8x8_sse2(in, out, stride >> 1, tx_type);
#else
vp9_short_fht8x8_c(in, out, stride >> 1, tx_type);
#endif
}
void iht8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
vp9_short_iht8x8_add_c(out, dst, stride >> 1, tx_type);
}
class FwdTrans8x8Test : public ::testing::TestWithParam<int> {
public:
virtual ~FwdTrans8x8Test() {}
virtual void SetUp() {
tx_type_ = GetParam();
if (tx_type_ == 0) {
fwd_txfm = fdct8x8;
inv_txfm = idct8x8_add;
} else {
fwd_txfm = fht8x8;
inv_txfm = iht8x8_add;
}
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
protected:
void RunFwdTxfm(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
(*fwd_txfm)(in, out, dst, stride, tx_type);
}
void RunInvTxfm(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
(*inv_txfm)(in, out, dst, stride, tx_type);
}
int tx_type_;
void (*fwd_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
void (*inv_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
};
TEST_P(FwdTrans8x8Test, SignBiasCheck) {
TEST(VP9Fdct8x8Test, SignBiasCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 64);
int16_t test_input_block[64];
int16_t test_output_block[64];
const int pitch = 16;
int count_sign_block[64][2];
const int count_test_block = 100000;
@@ -95,9 +40,8 @@ TEST_P(FwdTrans8x8Test, SignBiasCheck) {
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < 64; ++j)
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
REGISTER_STATE_CHECK(
RunFwdTxfm(test_input_block, test_output_block,
NULL, pitch, tx_type_));
vp9_short_fdct8x8_c(test_input_block, test_output_block, pitch);
for (int j = 0; j < 64; ++j) {
if (test_output_block[j] < 0)
@@ -111,7 +55,7 @@ TEST_P(FwdTrans8x8Test, SignBiasCheck) {
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
const int max_diff = 1125;
EXPECT_LT(diff, max_diff)
<< "Error: 8x8 FDCT/FHT has a sign bias > "
<< "Error: 8x8 FDCT has a sign bias > "
<< 1. * max_diff / count_test_block * 100 << "%"
<< " for input range [-255, 255] at index " << j
<< " count0: " << count_sign_block[j][0]
@@ -125,9 +69,8 @@ TEST_P(FwdTrans8x8Test, SignBiasCheck) {
// Initialize a test block with input range [-15, 15].
for (int j = 0; j < 64; ++j)
test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
REGISTER_STATE_CHECK(
RunFwdTxfm(test_input_block, test_output_block,
NULL, pitch, tx_type_));
vp9_short_fdct8x8_c(test_input_block, test_output_block, pitch);
for (int j = 0; j < 64; ++j) {
if (test_output_block[j] < 0)
@@ -141,25 +84,24 @@ TEST_P(FwdTrans8x8Test, SignBiasCheck) {
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
const int max_diff = 10000;
EXPECT_LT(diff, max_diff)
<< "Error: 4x4 FDCT/FHT has a sign bias > "
<< "Error: 4x4 FDCT has a sign bias > "
<< 1. * max_diff / count_test_block * 100 << "%"
<< " for input range [-15, 15] at index " << j
<< " count0: " << count_sign_block[j][0]
<< " count1: " << count_sign_block[j][1]
<< " diff: " << diff;
}
}
};
TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
TEST(VP9Fdct8x8Test, RoundTripErrorCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int max_error = 0;
int total_error = 0;
double total_error = 0;
const int count_test_block = 100000;
for (int i = 0; i < count_test_block; ++i) {
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
int16_t test_input_block[64];
int16_t test_temp_block[64];
uint8_t dst[64], src[64];
for (int j = 0; j < 64; ++j) {
src[j] = rnd.Rand8();
@@ -170,11 +112,9 @@ TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
test_input_block[j] = src[j] - dst[j];
const int pitch = 16;
REGISTER_STATE_CHECK(
RunFwdTxfm(test_input_block, test_temp_block,
dst, pitch, tx_type_));
for (int j = 0; j < 64; ++j) {
if (test_temp_block[j] > 0) {
vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
for (int j = 0; j < 64; ++j){
if(test_temp_block[j] > 0) {
test_temp_block[j] += 2;
test_temp_block[j] /= 4;
test_temp_block[j] *= 4;
@@ -184,9 +124,7 @@ TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
test_temp_block[j] *= 4;
}
}
REGISTER_STATE_CHECK(
RunInvTxfm(test_input_block, test_temp_block,
dst, pitch, tx_type_));
vp9_short_idct8x8_add_c(test_temp_block, dst, 8);
for (int j = 0; j < 64; ++j) {
const int diff = dst[j] - src[j];
@@ -198,23 +136,21 @@ TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
}
EXPECT_GE(1, max_error)
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual roundtrip error > 1";
<< "Error: 8x8 FDCT/IDCT has an individual roundtrip error > 1";
EXPECT_GE(count_test_block/5, total_error)
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
"error > 1/5 per block";
}
<< "Error: 8x8 FDCT/IDCT has average roundtrip error > 1/5 per block";
};
TEST_P(FwdTrans8x8Test, ExtremalCheck) {
TEST(VP9Fdct8x8Test, ExtremalCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int max_error = 0;
int total_error = 0;
double total_error = 0;
const int count_test_block = 100000;
for (int i = 0; i < count_test_block; ++i) {
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
int16_t test_input_block[64];
int16_t test_temp_block[64];
uint8_t dst[64], src[64];
for (int j = 0; j < 64; ++j) {
src[j] = rnd.Rand8() % 2 ? 255 : 0;
@@ -225,12 +161,8 @@ TEST_P(FwdTrans8x8Test, ExtremalCheck) {
test_input_block[j] = src[j] - dst[j];
const int pitch = 16;
REGISTER_STATE_CHECK(
RunFwdTxfm(test_input_block, test_temp_block,
dst, pitch, tx_type_));
REGISTER_STATE_CHECK(
RunInvTxfm(test_input_block, test_temp_block,
dst, pitch, tx_type_));
vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
vp9_short_idct8x8_add_c(test_temp_block, dst, 8);
for (int j = 0; j < 64; ++j) {
const int diff = dst[j] - src[j];
@@ -241,14 +173,13 @@ TEST_P(FwdTrans8x8Test, ExtremalCheck) {
}
EXPECT_GE(1, max_error)
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has an"
<< "Error: Extremal 8x8 FDCT/IDCT has an"
<< " individual roundtrip error > 1";
EXPECT_GE(count_test_block/5, total_error)
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
<< "Error: Extremal 8x8 FDCT/IDCT has average"
<< " roundtrip error > 1/5 per block";
}
}
};
INSTANTIATE_TEST_CASE_P(VP9, FwdTrans8x8Test, ::testing::Range(0, 4));
} // namespace

View File

@@ -11,7 +11,6 @@
#define TEST_I420_VIDEO_SOURCE_H_
#include <cstdio>
#include <cstdlib>
#include <string>
#include "test/video_source.h"
@@ -35,6 +34,7 @@ class I420VideoSource : public VideoSource {
height_(0),
framerate_numerator_(rate_numerator),
framerate_denominator_(rate_denominator) {
// This initializes raw_sz_, width_, height_ and allocates an img.
SetSize(width, height);
}
@@ -49,7 +49,7 @@ class I420VideoSource : public VideoSource {
if (input_file_)
fclose(input_file_);
input_file_ = OpenTestDataFile(file_name_);
ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
<< file_name_;
if (start_) {
fseek(input_file_, raw_sz_ * start_, SEEK_SET);
@@ -92,7 +92,6 @@ class I420VideoSource : public VideoSource {
}
virtual void FillFrame() {
ASSERT_TRUE(input_file_ != NULL);
// Read a frame from input_file.
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
@@ -109,8 +108,8 @@ class I420VideoSource : public VideoSource {
unsigned int frame_;
unsigned int width_;
unsigned int height_;
int framerate_numerator_;
int framerate_denominator_;
unsigned int framerate_numerator_;
unsigned int framerate_denominator_;
};
} // namespace libvpx_test

View File

@@ -15,10 +15,10 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
extern "C" {
#include "./vp9_rtcd.h"
#include "vp9_rtcd.h"
}
#include "test/acm_random.h"
#include "acm_random.h"
#include "vpx/vpx_integer.h"
using libvpx_test::ACMRandom;
@@ -27,10 +27,10 @@ namespace {
#ifdef _MSC_VER
static int round(double x) {
if (x < 0)
return static_cast<int>(ceil(x - 0.5));
if(x < 0)
return (int)ceil(x - 0.5);
else
return static_cast<int>(floor(x + 0.5));
return (int)floor(x + 0.5);
}
#endif

View File

@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
extern "C" {
#include "./vpx_config.h"
#include "./vp8_rtcd.h"
@@ -16,101 +17,105 @@ extern "C" {
#include "test/register_state_check.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "vpx/vpx_integer.h"
typedef void (*idct_fn_t)(int16_t *input, unsigned char *pred_ptr,
typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr,
int pred_stride, unsigned char *dst_ptr,
int dst_stride);
namespace {
class IDCTTest : public ::testing::TestWithParam<idct_fn_t> {
protected:
virtual void SetUp() {
int i;
protected:
virtual void SetUp() {
int i;
UUT = GetParam();
memset(input, 0, sizeof(input));
/* Set up guard blocks */
for (i = 0; i < 256; i++) output[i] = ((i & 0xF) < 4 && (i < 64)) ? 0 : -1;
}
UUT = GetParam();
memset(input, 0, sizeof(input));
/* Set up guard blocks */
for (i = 0; i < 256; i++)
output[i] = ((i & 0xF) < 4 && (i < 64)) ? 0 : -1;
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
virtual void TearDown() {
libvpx_test::ClearSystemState();
}
idct_fn_t UUT;
int16_t input[16];
unsigned char output[256];
unsigned char predict[256];
idct_fn_t UUT;
short input[16];
unsigned char output[256];
unsigned char predict[256];
};
TEST_P(IDCTTest, TestGuardBlocks) {
int i;
int i;
for (i = 0; i < 256; i++)
if ((i & 0xF) < 4 && i < 64)
EXPECT_EQ(0, output[i]) << i;
else
EXPECT_EQ(255, output[i]);
for (i = 0; i < 256; i++)
if ((i & 0xF) < 4 && i < 64)
EXPECT_EQ(0, output[i]) << i;
else
EXPECT_EQ(255, output[i]);
}
TEST_P(IDCTTest, TestAllZeros) {
int i;
int i;
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
for (i = 0; i < 256; i++)
if ((i & 0xF) < 4 && i < 64)
EXPECT_EQ(0, output[i]) << "i==" << i;
else
EXPECT_EQ(255, output[i]) << "i==" << i;
for (i = 0; i < 256; i++)
if ((i & 0xF) < 4 && i < 64)
EXPECT_EQ(0, output[i]) << "i==" << i;
else
EXPECT_EQ(255, output[i]) << "i==" << i;
}
TEST_P(IDCTTest, TestAllOnes) {
int i;
int i;
input[0] = 4;
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
input[0] = 4;
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
for (i = 0; i < 256; i++)
if ((i & 0xF) < 4 && i < 64)
EXPECT_EQ(1, output[i]) << "i==" << i;
else
EXPECT_EQ(255, output[i]) << "i==" << i;
for (i = 0; i < 256; i++)
if ((i & 0xF) < 4 && i < 64)
EXPECT_EQ(1, output[i]) << "i==" << i;
else
EXPECT_EQ(255, output[i]) << "i==" << i;
}
TEST_P(IDCTTest, TestAddOne) {
int i;
int i;
for (i = 0; i < 256; i++) predict[i] = i;
input[0] = 4;
REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
for (i = 0; i < 256; i++)
predict[i] = i;
input[0] = 4;
REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
for (i = 0; i < 256; i++)
if ((i & 0xF) < 4 && i < 64)
EXPECT_EQ(i + 1, output[i]) << "i==" << i;
else
EXPECT_EQ(255, output[i]) << "i==" << i;
for (i = 0; i < 256; i++)
if ((i & 0xF) < 4 && i < 64)
EXPECT_EQ(i+1, output[i]) << "i==" << i;
else
EXPECT_EQ(255, output[i]) << "i==" << i;
}
TEST_P(IDCTTest, TestWithData) {
int i;
int i;
for (i = 0; i < 16; i++) input[i] = i;
for (i = 0; i < 16; i++)
input[i] = i;
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
for (i = 0; i < 256; i++)
if ((i & 0xF) > 3 || i > 63)
EXPECT_EQ(255, output[i]) << "i==" << i;
else if (i == 0)
EXPECT_EQ(11, output[i]) << "i==" << i;
else if (i == 34)
EXPECT_EQ(1, output[i]) << "i==" << i;
else if (i == 2 || i == 17 || i == 32)
EXPECT_EQ(3, output[i]) << "i==" << i;
else
EXPECT_EQ(0, output[i]) << "i==" << i;
for (i = 0; i < 256; i++)
if ((i & 0xF) > 3 || i > 63)
EXPECT_EQ(255, output[i]) << "i==" << i;
else if (i == 0)
EXPECT_EQ(11, output[i]) << "i==" << i;
else if (i == 34)
EXPECT_EQ(1, output[i]) << "i==" << i;
else if (i == 2 || i == 17 || i == 32)
EXPECT_EQ(3, output[i]) << "i==" << i;
else
EXPECT_EQ(0, output[i]) << "i==" << i;
}
INSTANTIATE_TEST_CASE_P(C, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_c));
INSTANTIATE_TEST_CASE_P(C, IDCTTest,
::testing::Values(vp8_short_idct4x4llm_c));
#if HAVE_MMX
INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
::testing::Values(vp8_short_idct4x4llm_mmx));

View File

@@ -15,8 +15,8 @@
#include "test/register_state_check.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
extern "C" {
#include "./vpx_config.h"
#include "./vp8_rtcd.h"
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vp8/common/blockd.h"
#include "vpx_mem/vpx_mem.h"
}
@@ -27,8 +27,6 @@ using libvpx_test::ACMRandom;
class IntraPredBase {
public:
virtual ~IntraPredBase() {}
virtual void TearDown() {
libvpx_test::ClearSystemState();
}
@@ -106,9 +104,9 @@ class IntraPredBase {
for (int y = 0; y < block_size_; y++)
sum += data_ptr_[p][y * stride_ - 1];
expected = (sum + (1 << (shift - 1))) >> shift;
} else {
} else
expected = 0x80;
}
// check that all subsequent lines are equal to the first
for (int y = 1; y < block_size_; ++y)
ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_],

View File

@@ -28,7 +28,7 @@ static unsigned int MemGetLe32(const uint8_t *mem) {
// so that we can do actual file decodes.
class IVFVideoSource : public CompressedVideoSource {
public:
explicit IVFVideoSource(const std::string &file_name)
IVFVideoSource(const std::string &file_name)
: file_name_(file_name),
input_file_(NULL),
compressed_frame_buf_(NULL),
@@ -47,13 +47,12 @@ class IVFVideoSource : public CompressedVideoSource {
virtual void Init() {
// Allocate a buffer for read in the compressed video frame.
compressed_frame_buf_ = new uint8_t[libvpx_test::kCodeBufferSize];
ASSERT_TRUE(compressed_frame_buf_ != NULL)
<< "Allocate frame buffer failed";
ASSERT_TRUE(compressed_frame_buf_) << "Allocate frame buffer failed";
}
virtual void Begin() {
input_file_ = OpenTestDataFile(file_name_);
ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
<< file_name_;
// Read file header
@@ -73,7 +72,6 @@ class IVFVideoSource : public CompressedVideoSource {
}
void FillFrame() {
ASSERT_TRUE(input_file_ != NULL);
uint8_t frame_hdr[kIvfFrameHdrSize];
// Check frame header and read a frame from input_file.
if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_)

View File

@@ -31,6 +31,10 @@ class KeyframeTest : public ::libvpx_test::EncoderTest,
set_cpu_used_ = 0;
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
if (kf_do_force_kf_)
@@ -132,6 +136,7 @@ TEST_P(KeyframeTest, TestAutoKeyframe) {
// Verify that keyframes match the file keyframes in the file.
for (std::vector<vpx_codec_pts_t>::const_iterator iter = kf_pts_list_.begin();
iter != kf_pts_list_.end(); ++iter) {
if (deadline_ == VPX_DL_REALTIME && *iter > 0)
EXPECT_EQ(0, (*iter - 1) % 30) << "Unexpected keyframe at frame "
<< *iter;

View File

@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef TEST_MD5_HELPER_H_
#define TEST_MD5_HELPER_H_
#ifndef LIBVPX_TEST_MD5_HELPER_H_
#define LIBVPX_TEST_MD5_HELPER_H_
extern "C" {
#include "./md5_utils.h"
@@ -25,15 +25,9 @@ class MD5 {
void Add(const vpx_image_t *img) {
for (int plane = 0; plane < 3; ++plane) {
const uint8_t *buf = img->planes[plane];
// Calculate the width and height to do the md5 check. For the chroma
// plane, we never want to round down and thus skip a pixel so if
// we are shifting by 1 (chroma_shift) we add 1 before doing the shift.
// This works only for chroma_shift of 0 and 1.
const int h = plane ? (img->d_h + img->y_chroma_shift) >>
img->y_chroma_shift : img->d_h;
const int w = plane ? (img->d_w + img->x_chroma_shift) >>
img->x_chroma_shift : img->d_w;
uint8_t *buf = img->planes[plane];
const int h = plane ? (img->d_h + 1) >> 1 : img->d_h;
const int w = plane ? (img->d_w + 1) >> 1 : img->d_w;
for (int y = 0; y < h; ++y) {
MD5Update(&md5_, buf, w);
@@ -67,4 +61,4 @@ class MD5 {
} // namespace libvpx_test
#endif // TEST_MD5_HELPER_H_
#endif // LIBVPX_TEST_MD5_HELPER_H_

View File

@@ -11,8 +11,8 @@
#include "test/register_state_check.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
extern "C" {
#include "./vpx_config.h"
#include "./vp8_rtcd.h"
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"
}
@@ -63,8 +63,7 @@ TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) {
// Pointers to top-left pixel of block in the input and output images.
uint8_t *const src_image_ptr = src_image + (input_stride << 1);
uint8_t *const dst_image_ptr = dst_image + 8;
uint8_t *const flimits =
reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
uint8_t *const flimits = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
(void)vpx_memset(flimits, 255, block_width);
// Initialize pixels in the input:

View File

@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef TEST_REGISTER_STATE_CHECK_H_
#define TEST_REGISTER_STATE_CHECK_H_
#ifndef LIBVPX_TEST_REGISTER_STATE_CHECK_H_
#define LIBVPX_TEST_REGISTER_STATE_CHECK_H_
#ifdef _WIN64
@@ -92,4 +92,4 @@ class RegisterStateCheck {};
#endif // _WIN64
#endif // TEST_REGISTER_STATE_CHECK_H_
#endif // LIBVPX_TEST_REGISTER_STATE_CHECK_H_

View File

@@ -16,68 +16,8 @@
#include "test/video_source.h"
#include "test/util.h"
// Enable(1) or Disable(0) writing of the compressed bitstream.
#define WRITE_COMPRESSED_STREAM 0
namespace {
#if WRITE_COMPRESSED_STREAM
static void mem_put_le16(char *const mem, const unsigned int val) {
mem[0] = val;
mem[1] = val >> 8;
}
static void mem_put_le32(char *const mem, const unsigned int val) {
mem[0] = val;
mem[1] = val >> 8;
mem[2] = val >> 16;
mem[3] = val >> 24;
}
static void write_ivf_file_header(const vpx_codec_enc_cfg_t *const cfg,
int frame_cnt, FILE *const outfile) {
char header[32];
header[0] = 'D';
header[1] = 'K';
header[2] = 'I';
header[3] = 'F';
mem_put_le16(header + 4, 0); /* version */
mem_put_le16(header + 6, 32); /* headersize */
mem_put_le32(header + 8, 0x30395056); /* fourcc (vp9) */
mem_put_le16(header + 12, cfg->g_w); /* width */
mem_put_le16(header + 14, cfg->g_h); /* height */
mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
mem_put_le32(header + 24, frame_cnt); /* length */
mem_put_le32(header + 28, 0); /* unused */
(void)fwrite(header, 1, 32, outfile);
}
static void write_ivf_frame_size(FILE *const outfile, const size_t size) {
char header[4];
mem_put_le32(header, static_cast<unsigned int>(size));
(void)fwrite(header, 1, 4, outfile);
}
static void write_ivf_frame_header(const vpx_codec_cx_pkt_t *const pkt,
FILE *const outfile) {
char header[12];
vpx_codec_pts_t pts;
if (pkt->kind != VPX_CODEC_CX_FRAME_PKT)
return;
pts = pkt->data.frame.pts;
mem_put_le32(header, static_cast<unsigned int>(pkt->data.frame.sz));
mem_put_le32(header + 4, pts & 0xFFFFFFFF);
mem_put_le32(header + 8, pts >> 32);
(void)fwrite(header, 1, 12, outfile);
}
#endif // WRITE_COMPRESSED_STREAM
const unsigned int kInitialWidth = 320;
const unsigned int kInitialHeight = 240;
@@ -102,8 +42,6 @@ class ResizingVideoSource : public ::libvpx_test::DummyVideoSource {
limit_ = 60;
}
virtual ~ResizingVideoSource() {}
protected:
virtual void Next() {
++frame_;
@@ -118,15 +56,13 @@ class ResizeTest : public ::libvpx_test::EncoderTest,
protected:
ResizeTest() : EncoderTest(GET_PARAM(0)) {}
virtual ~ResizeTest() {}
struct FrameInfo {
FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
: pts(_pts), w(_w), h(_h) {}
vpx_codec_pts_t pts;
unsigned int w;
unsigned int h;
unsigned int w;
unsigned int h;
};
virtual void SetUp() {
@@ -134,6 +70,10 @@ class ResizeTest : public ::libvpx_test::EncoderTest,
SetMode(GET_PARAM(1));
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void DecompressedFrameHook(const vpx_image_t &img,
vpx_codec_pts_t pts) {
frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
@@ -159,47 +99,17 @@ TEST_P(ResizeTest, TestExternalResizeWorks) {
}
}
const unsigned int kStepDownFrame = 3;
const unsigned int kStepUpFrame = 6;
class ResizeInternalTest : public ResizeTest {
protected:
#if WRITE_COMPRESSED_STREAM
ResizeInternalTest()
: ResizeTest(),
frame0_psnr_(0.0),
outfile_(NULL),
out_frames_(0) {}
#else
ResizeInternalTest() : ResizeTest(), frame0_psnr_(0.0) {}
#endif
virtual ~ResizeInternalTest() {}
virtual void BeginPassHook(unsigned int /*pass*/) {
#if WRITE_COMPRESSED_STREAM
outfile_ = fopen("vp90-2-05-resize.ivf", "wb");
#endif
}
virtual void EndPassHook() {
#if WRITE_COMPRESSED_STREAM
if (outfile_) {
if (!fseek(outfile_, 0, SEEK_SET))
write_ivf_file_header(&cfg_, out_frames_, outfile_);
fclose(outfile_);
outfile_ = NULL;
}
#endif
}
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
libvpx_test::Encoder *encoder) {
if (video->frame() == kStepDownFrame) {
if (video->frame() == 3) {
struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE};
encoder->Control(VP8E_SET_SCALEMODE, &mode);
}
if (video->frame() == kStepUpFrame) {
if (video->frame() == 6) {
struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
encoder->Control(VP8E_SET_SCALEMODE, &mode);
}
@@ -211,46 +121,21 @@ class ResizeInternalTest : public ResizeTest {
EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 1.0);
}
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
#if WRITE_COMPRESSED_STREAM
++out_frames_;
// Write initial file header if first frame.
if (pkt->data.frame.pts == 0)
write_ivf_file_header(&cfg_, 0, outfile_);
// Write frame header and data.
write_ivf_frame_header(pkt, outfile_);
(void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_);
#endif
}
double frame0_psnr_;
#if WRITE_COMPRESSED_STREAM
FILE *outfile_;
unsigned int out_frames_;
#endif
};
TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 10);
init_flags_ = VPX_CODEC_USE_PSNR;
// q picked such that initial keyframe on this clip is ~30dB PSNR
cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
// If the number of frames being encoded is smaller than g_lag_in_frames
// the encoded frame is unavailable using the current API. Comparing
// frames to detect mismatch would then not be possible. Set
// g_lag_in_frames = 0 to get around this.
cfg_.g_lag_in_frames = 0;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
for (std::vector<FrameInfo>::iterator info = frame_info_list_.begin();
info != frame_info_list_.end(); ++info) {
const vpx_codec_pts_t pts = info->pts;
if (pts >= kStepDownFrame && pts < kStepUpFrame) {
if (pts >= 3 && pts < 6) {
ASSERT_EQ(282U, info->w) << "Frame " << pts << " had unexpected width";
ASSERT_EQ(173U, info->h) << "Frame " << pts << " had unexpected height";
} else {

View File

@@ -17,6 +17,7 @@ extern "C" {
#include "./vpx_config.h"
#if CONFIG_VP8_ENCODER
#include "./vp8_rtcd.h"
//#include "vp8/common/blockd.h"
#endif
#if CONFIG_VP9_ENCODER
#include "./vp9_rtcd.h"
@@ -427,7 +428,6 @@ INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
#if HAVE_SSE
#if CONFIG_VP9_ENCODER
#if CONFIG_USE_X86INC
const sad_m_by_n_fn_t sad_4x4_sse_vp9 = vp9_sad4x4_sse;
const sad_m_by_n_fn_t sad_4x8_sse_vp9 = vp9_sad4x8_sse;
INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::Values(
@@ -441,7 +441,6 @@ INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::Values(
make_tuple(4, 4, sad_4x4x4d_sse)));
#endif
#endif
#endif
#if HAVE_SSE2
#if CONFIG_VP8_ENCODER
@@ -452,20 +451,14 @@ const sad_m_by_n_fn_t sad_8x8_wmt = vp8_sad8x8_wmt;
const sad_m_by_n_fn_t sad_4x4_wmt = vp8_sad4x4_wmt;
#endif
#if CONFIG_VP9_ENCODER
#if CONFIG_USE_X86INC
const sad_m_by_n_fn_t sad_64x64_sse2_vp9 = vp9_sad64x64_sse2;
const sad_m_by_n_fn_t sad_64x32_sse2_vp9 = vp9_sad64x32_sse2;
const sad_m_by_n_fn_t sad_32x64_sse2_vp9 = vp9_sad32x64_sse2;
const sad_m_by_n_fn_t sad_32x32_sse2_vp9 = vp9_sad32x32_sse2;
const sad_m_by_n_fn_t sad_32x16_sse2_vp9 = vp9_sad32x16_sse2;
const sad_m_by_n_fn_t sad_16x32_sse2_vp9 = vp9_sad16x32_sse2;
const sad_m_by_n_fn_t sad_16x16_sse2_vp9 = vp9_sad16x16_sse2;
const sad_m_by_n_fn_t sad_16x8_sse2_vp9 = vp9_sad16x8_sse2;
const sad_m_by_n_fn_t sad_8x16_sse2_vp9 = vp9_sad8x16_sse2;
const sad_m_by_n_fn_t sad_16x8_sse2_vp9 = vp9_sad16x8_sse2;
const sad_m_by_n_fn_t sad_8x8_sse2_vp9 = vp9_sad8x8_sse2;
const sad_m_by_n_fn_t sad_8x4_sse2_vp9 = vp9_sad8x4_sse2;
#endif
#endif
const sad_m_by_n_test_param_t sse2_tests[] = {
#if CONFIG_VP8_ENCODER
make_tuple(16, 16, sad_16x16_wmt),
@@ -475,25 +468,18 @@ const sad_m_by_n_test_param_t sse2_tests[] = {
make_tuple(4, 4, sad_4x4_wmt),
#endif
#if CONFIG_VP9_ENCODER
#if CONFIG_USE_X86INC
make_tuple(64, 64, sad_64x64_sse2_vp9),
make_tuple(64, 32, sad_64x32_sse2_vp9),
make_tuple(32, 64, sad_32x64_sse2_vp9),
make_tuple(32, 32, sad_32x32_sse2_vp9),
make_tuple(32, 16, sad_32x16_sse2_vp9),
make_tuple(16, 32, sad_16x32_sse2_vp9),
make_tuple(16, 16, sad_16x16_sse2_vp9),
make_tuple(16, 8, sad_16x8_sse2_vp9),
make_tuple(8, 16, sad_8x16_sse2_vp9),
make_tuple(16, 8, sad_16x8_sse2_vp9),
make_tuple(8, 8, sad_8x8_sse2_vp9),
make_tuple(8, 4, sad_8x4_sse2_vp9),
#endif
#endif
};
INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
#if CONFIG_VP9_ENCODER
#if CONFIG_USE_X86INC
const sad_n_by_n_by_4_fn_t sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_64x32x4d_sse2 = vp9_sad64x32x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_32x64x4d_sse2 = vp9_sad32x64x4d_sse2;
@@ -519,7 +505,6 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
make_tuple(8, 4, sad_8x4x4d_sse2)));
#endif
#endif
#endif
#if HAVE_SSE3
#if CONFIG_VP8_ENCODER
@@ -538,11 +523,9 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADx4Test, ::testing::Values(
#endif
#if HAVE_SSSE3
#if CONFIG_USE_X86INC
const sad_m_by_n_fn_t sad_16x16_sse3 = vp8_sad16x16_sse3;
INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
make_tuple(16, 16, sad_16x16_sse3)));
#endif
#endif
} // namespace

View File

@@ -17,19 +17,15 @@
#include <sys/types.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/acm_random.h"
#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"
extern "C" {
#include "vp8/encoder/onyx_int.h"
}
using libvpx_test::ACMRandom;
namespace {
TEST(Vp8RoiMapTest, ParameterCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int delta_q[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
int delta_lf[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
unsigned int threshold[MAX_MB_SEGMENTS] = { 0, 100, 200, 300 };
@@ -125,10 +121,10 @@ TEST(Vp8RoiMapTest, ParameterCheck) {
for (int i = 0; i < 1000; ++i) {
int rand_deltas[4];
int deltas_valid;
rand_deltas[0] = rnd(160) - 80;
rand_deltas[1] = rnd(160) - 80;
rand_deltas[2] = rnd(160) - 80;
rand_deltas[3] = rnd(160) - 80;
rand_deltas[0] = (rand() % 160) - 80;
rand_deltas[1] = (rand() % 160) - 80;
rand_deltas[2] = (rand() % 160) - 80;
rand_deltas[3] = (rand() % 160) - 80;
deltas_valid = ((abs(rand_deltas[0]) <= 63) &&
(abs(rand_deltas[1]) <= 63) &&

View File

@@ -13,8 +13,8 @@
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
extern "C" {
#include "./vpx_config.h"
#include "./vp8_rtcd.h"
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vp8/common/blockd.h"
#include "vp8/encoder/block.h"
#include "vpx_mem/vpx_mem.h"
@@ -51,7 +51,7 @@ TEST_P(SubtractBlockTest, SimpleSubtract) {
bd.predictor = reinterpret_cast<unsigned char*>(
vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*bd.predictor)));
for (int i = 0; kSrcStride[i] > 0; ++i) {
for(int i = 0; kSrcStride[i] > 0; ++i) {
// start at block0
be.src = 0;
be.base_src = &source;
@@ -61,7 +61,7 @@ TEST_P(SubtractBlockTest, SimpleSubtract) {
int16_t *src_diff = be.src_diff;
for (int r = 0; r < kBlockHeight; ++r) {
for (int c = 0; c < kBlockWidth; ++c) {
src_diff[c] = static_cast<int16_t>(0xa5a5);
src_diff[c] = 0xa5a5;
}
src_diff += kDiffPredStride;
}

View File

@@ -33,6 +33,10 @@ class SuperframeTest : public ::libvpx_test::EncoderTest,
delete[] modified_buf_;
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
libvpx_test::Encoder *encoder) {
if (video->frame() == 1) {

View File

@@ -520,12 +520,3 @@ d17bc08eedfc60c4c23d576a6c964a21bf854d1f vp90-2-03-size-226x202.webm
83c6d8f2969b759e10e5c6542baca1265c874c29 vp90-2-03-size-226x224.webm.md5
fe0af2ee47b1e5f6a66db369e2d7e9d870b38dce vp90-2-03-size-226x226.webm
94ad19b8b699cea105e2ff18f0df2afd7242bcf7 vp90-2-03-size-226x226.webm.md5
b6524e4084d15b5d0caaa3d3d1368db30cbee69c vp90-2-03-deltaq.webm
65f45ec9a55537aac76104818278e0978f94a678 vp90-2-03-deltaq.webm.md5
4dbb87494c7f565ffc266c98d17d0d8c7a5c5aba vp90-2-05-resize.ivf
7f6d8879336239a43dbb6c9f13178cb11cf7ed09 vp90-2-05-resize.ivf.md5
bf61ddc1f716eba58d4c9837d4e91031d9ce4ffe vp90-2-06-bilinear.webm
f6235f937552e11d8eb331ec55da6b3aa596b9ac vp90-2-06-bilinear.webm.md5
495256cfd123fe777b2c0406862ed8468a1f4677 vp91-2-04-yv444.webm
65e3a7ffef61ab340d9140f335ecc49125970c2c vp91-2-04-yv444.webm.md5

View File

@@ -24,9 +24,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += resize_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../md5_utils.h ../md5_utils.c
LIBVPX_TEST_SRCS-yes += decode_test_driver.cc
@@ -89,7 +87,6 @@ LIBVPX_TEST_SRCS-yes += tile_independence_test.cc
endif
LIBVPX_TEST_SRCS-$(CONFIG_VP9) += convolve_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_thread_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
@@ -629,11 +626,3 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5

View File

@@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <string>
#include "./vpx_config.h"
#include "vpx_config.h"
extern "C" {
#if ARCH_X86 || ARCH_X86_64
#include "vpx_ports/x86.h"
@@ -48,9 +48,7 @@ int main(int argc, char **argv) {
#endif
#if !CONFIG_SHARED
// Shared library builds don't support whitebox tests
// that exercise internal symbols.
/* Shared library builds don't support whitebox tests that exercise internal symbols. */
#if CONFIG_VP8
vp8_rtcd();
#endif

View File

@@ -159,11 +159,7 @@ const char *kVP9TestVectors[] = {
"vp90-2-03-size-226x198.webm", "vp90-2-03-size-226x200.webm",
"vp90-2-03-size-226x202.webm", "vp90-2-03-size-226x208.webm",
"vp90-2-03-size-226x210.webm", "vp90-2-03-size-226x224.webm",
"vp90-2-03-size-226x226.webm", "vp90-2-03-deltaq.webm",
"vp90-2-05-resize.ivf", "vp90-2-06-bilinear.webm",
#if CONFIG_NON420
"vp91-2-04-yv444.webm"
#endif
"vp90-2-03-size-226x226.webm"
};
#endif
@@ -185,7 +181,6 @@ class TestVectorTest : public ::libvpx_test::DecoderTest,
virtual void DecompressedFrameHook(const vpx_image_t& img,
const unsigned int frame_number) {
ASSERT_TRUE(md5_file_ != NULL);
char expected_md5[33];
char junk[128];

View File

@@ -23,13 +23,10 @@ extern "C" {
namespace {
class TileIndependenceTest : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWithParam<int> {
public ::libvpx_test::CodecTestWithParam<int> {
protected:
TileIndependenceTest()
: EncoderTest(GET_PARAM(0)),
md5_fw_order_(),
md5_inv_order_(),
n_tiles_(GET_PARAM(1)) {
TileIndependenceTest() : EncoderTest(GET_PARAM(0)), n_tiles_(GET_PARAM(1)),
md5_fw_order_(), md5_inv_order_() {
init_flags_ = VPX_CODEC_USE_PSNR;
vpx_codec_dec_cfg_t cfg;
cfg.w = 704;
@@ -59,8 +56,9 @@ class TileIndependenceTest : public ::libvpx_test::EncoderTest,
void UpdateMD5(::libvpx_test::Decoder *dec, const vpx_codec_cx_pkt_t *pkt,
::libvpx_test::MD5 *md5) {
const vpx_codec_err_t res = dec->DecodeFrame(
reinterpret_cast<uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz);
const vpx_codec_err_t res =
dec->DecodeFrame(reinterpret_cast<uint8_t*>(pkt->data.frame.buf),
pkt->data.frame.sz);
if (res != VPX_CODEC_OK) {
abort_ = true;
ASSERT_EQ(VPX_CODEC_OK, res);
@@ -74,11 +72,11 @@ class TileIndependenceTest : public ::libvpx_test::EncoderTest,
UpdateMD5(inv_dec_, pkt, &md5_inv_order_);
}
::libvpx_test::MD5 md5_fw_order_, md5_inv_order_;
::libvpx_test::Decoder *fw_dec_, *inv_dec_;
private:
int n_tiles_;
protected:
::libvpx_test::MD5 md5_fw_order_, md5_inv_order_;
::libvpx_test::Decoder *fw_dec_, *inv_dec_;
};
// run an encode with 2 or 4 tiles, and do the decode both in normal and
@@ -95,7 +93,7 @@ TEST_P(TileIndependenceTest, MD5Match) {
timebase.den, timebase.num, 0, 30);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
const char *md5_fw_str = md5_fw_order_.Get();
const char *md5_fw_str = md5_fw_order_.Get();
const char *md5_inv_str = md5_inv_order_.Get();
// could use ASSERT_EQ(!memcmp(.., .., 16) here, but this gives nicer
@@ -104,6 +102,7 @@ TEST_P(TileIndependenceTest, MD5Match) {
ASSERT_STREQ(md5_fw_str, md5_inv_str);
}
VP9_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Range(0, 2, 1));
VP9_INSTANTIATE_TEST_CASE(TileIndependenceTest,
::testing::Range(0, 2, 1));
} // namespace

View File

@@ -37,7 +37,7 @@ static double compute_psnr(const vpx_image_t *img1,
img2->planes[VPX_PLANE_Y][i * img2->stride[VPX_PLANE_Y] + j];
sqrerr += d * d;
}
double mse = static_cast<double>(sqrerr) / (width_y * height_y);
double mse = sqrerr / (width_y * height_y);
double psnr = 100.0;
if (mse > 0.0) {
psnr = 10 * log10(255.0 * 255.0 / mse);

View File

@@ -16,16 +16,16 @@
#include "test/register_state_check.h"
#include "vpx/vpx_integer.h"
#include "./vpx_config.h"
#include "vpx_config.h"
extern "C" {
#include "vpx_mem/vpx_mem.h"
#if CONFIG_VP8_ENCODER
# include "vp8/common/variance.h"
# include "./vp8_rtcd.h"
# include "vp8_rtcd.h"
#endif
#if CONFIG_VP9_ENCODER
# include "vp9/encoder/vp9_variance.h"
# include "./vp9_rtcd.h"
# include "vp9_rtcd.h"
#endif
}
#include "test/acm_random.h"
@@ -107,8 +107,8 @@ static unsigned int subpel_avg_variance_ref(const uint8_t *ref,
}
template<typename VarianceFunctionType>
class VarianceTest
: public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > {
class VarianceTest :
public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > {
public:
virtual void SetUp() {
const tuple<int, int, VarianceFunctionType>& params = this->GetParam();
@@ -191,9 +191,9 @@ void VarianceTest<VarianceFunctionType>::OneQuarterTest() {
}
template<typename SubpelVarianceFunctionType>
class SubpelVarianceTest
: public ::testing::TestWithParam<tuple<int, int,
SubpelVarianceFunctionType> > {
class SubpelVarianceTest :
public ::testing::TestWithParam<tuple<int, int,
SubpelVarianceFunctionType> > {
public:
virtual void SetUp() {
const tuple<int, int, SubpelVarianceFunctionType>& params =
@@ -218,7 +218,6 @@ class SubpelVarianceTest
vpx_free(src_);
delete[] ref_;
vpx_free(sec_);
libvpx_test::ClearSystemState();
}
protected:
@@ -483,7 +482,6 @@ INSTANTIATE_TEST_CASE_P(
#endif
#if HAVE_SSE2
#if CONFIG_USE_X86INC
const vp9_variance_fn_t variance4x4_sse2 = vp9_variance4x4_sse2;
const vp9_variance_fn_t variance4x8_sse2 = vp9_variance4x8_sse2;
const vp9_variance_fn_t variance8x4_sse2 = vp9_variance8x4_sse2;
@@ -597,11 +595,8 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(6, 5, subpel_avg_variance64x32_sse2),
make_tuple(6, 6, subpel_avg_variance64x64_sse2)));
#endif
#endif
#if HAVE_SSSE3
#if CONFIG_USE_X86INC
const vp9_subpixvariance_fn_t subpel_variance4x4_ssse3 =
vp9_sub_pixel_variance4x4_ssse3;
const vp9_subpixvariance_fn_t subpel_variance4x8_ssse3 =
@@ -686,7 +681,6 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(6, 5, subpel_avg_variance64x32_ssse3),
make_tuple(6, 6, subpel_avg_variance64x64_ssse3)));
#endif
#endif
#endif // CONFIG_VP9_ENCODER
} // namespace vp9

View File

@@ -8,6 +8,10 @@
* be found in the AUTHORS file in the root of the source tree.
*/
extern "C" {
#include "vp8/encoder/boolhuff.h"
#include "vp8/decoder/dboolhuff.h"
}
#include <math.h>
#include <stddef.h>
@@ -20,11 +24,6 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "vpx/vpx_integer.h"
extern "C" {
#include "vp8/encoder/boolhuff.h"
#include "vp8/decoder/dboolhuff.h"
}
namespace {
const int num_tests = 10;
@@ -45,7 +44,7 @@ void encrypt_buffer(uint8_t *buffer, int size) {
void test_decrypt_cb(void *decrypt_state, const uint8_t *input,
uint8_t *output, int count) {
int offset = input - reinterpret_cast<uint8_t *>(decrypt_state);
int offset = input - (uint8_t *)decrypt_state;
for (int i = 0; i < count; i++) {
output[i] = input[i] ^ secret_key[(offset + i) & 15];
}
@@ -59,10 +58,10 @@ TEST(VP8, TestBitIO) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
for (int n = 0; n < num_tests; ++n) {
for (int method = 0; method <= 7; ++method) { // we generate various proba
const int kBitsToTest = 1000;
uint8_t probas[kBitsToTest];
const int bits_to_test = 1000;
uint8_t probas[bits_to_test];
for (int i = 0; i < kBitsToTest; ++i) {
for (int i = 0; i < bits_to_test; ++i) {
const int parity = i & 1;
probas[i] =
(method == 0) ? 0 : (method == 1) ? 255 :
@@ -77,14 +76,14 @@ TEST(VP8, TestBitIO) {
}
for (int bit_method = 0; bit_method <= 3; ++bit_method) {
const int random_seed = 6432;
const int kBufferSize = 10000;
const int buffer_size = 10000;
ACMRandom bit_rnd(random_seed);
BOOL_CODER bw;
uint8_t bw_buffer[kBufferSize];
vp8_start_encode(&bw, bw_buffer, bw_buffer + kBufferSize);
uint8_t bw_buffer[buffer_size];
vp8_start_encode(&bw, bw_buffer, bw_buffer + buffer_size);
int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
for (int i = 0; i < kBitsToTest; ++i) {
for (int i = 0; i < bits_to_test; ++i) {
if (bit_method == 2) {
bit = (i & 1);
} else if (bit_method == 3) {
@@ -99,20 +98,19 @@ TEST(VP8, TestBitIO) {
#if CONFIG_DECRYPT
encrypt_buffer(bw_buffer, buffer_size);
vp8dx_start_decode(&br, bw_buffer, buffer_size,
test_decrypt_cb,
reinterpret_cast<void *>(bw_buffer));
test_decrypt_cb, (void *)bw_buffer);
#else
vp8dx_start_decode(&br, bw_buffer, kBufferSize, NULL, NULL);
vp8dx_start_decode(&br, bw_buffer, buffer_size, NULL, NULL);
#endif
bit_rnd.Reset(random_seed);
for (int i = 0; i < kBitsToTest; ++i) {
for (int i = 0; i < bits_to_test; ++i) {
if (bit_method == 2) {
bit = (i & 1);
} else if (bit_method == 3) {
bit = bit_rnd(2);
}
GTEST_ASSERT_EQ(vp8dx_decode_bool(&br, probas[i]), bit)
<< "pos: "<< i << " / " << kBitsToTest
<< "pos: "<< i << " / " << bits_to_test
<< " bit_method: " << bit_method
<< " method: " << method;
}

View File

@@ -26,8 +26,7 @@ const uint8_t test_key[16] = {
0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf0
};
void encrypt_buffer(const uint8_t *src, uint8_t *dst,
int size, int offset = 0) {
void encrypt_buffer(const uint8_t *src, uint8_t *dst, int size, int offset = 0) {
for (int i = 0; i < size; ++i) {
dst[i] = src[i] ^ test_key[(offset + i) & 15];
}
@@ -35,11 +34,10 @@ void encrypt_buffer(const uint8_t *src, uint8_t *dst,
void test_decrypt_cb(void *decrypt_state, const uint8_t *input,
uint8_t *output, int count) {
encrypt_buffer(input, output, count,
input - reinterpret_cast<uint8_t *>(decrypt_state));
encrypt_buffer(input, output, count, input - (uint8_t *)decrypt_state);
}
} // namespace
} // namespace
namespace libvpx_test {

View File

@@ -18,7 +18,7 @@
extern "C" {
#include "./vp8_rtcd.h"
#include "vp8_rtcd.h"
}
#include "test/acm_random.h"

View File

@@ -19,7 +19,7 @@ extern "C" {
#include "vp9/decoder/vp9_dboolhuff.h"
}
#include "test/acm_random.h"
#include "acm_random.h"
#include "vpx/vpx_integer.h"
using libvpx_test::ACMRandom;
@@ -32,10 +32,10 @@ TEST(VP9, TestBitIO) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
for (int n = 0; n < num_tests; ++n) {
for (int method = 0; method <= 7; ++method) { // we generate various proba
const int kBitsToTest = 1000;
uint8_t probas[kBitsToTest];
const int bits_to_test = 1000;
uint8_t probas[bits_to_test];
for (int i = 0; i < kBitsToTest; ++i) {
for (int i = 0; i < bits_to_test; ++i) {
const int parity = i & 1;
probas[i] =
(method == 0) ? 0 : (method == 1) ? 255 :
@@ -50,14 +50,14 @@ TEST(VP9, TestBitIO) {
}
for (int bit_method = 0; bit_method <= 3; ++bit_method) {
const int random_seed = 6432;
const int kBufferSize = 10000;
const int buffer_size = 10000;
ACMRandom bit_rnd(random_seed);
vp9_writer bw;
uint8_t bw_buffer[kBufferSize];
uint8_t bw_buffer[buffer_size];
vp9_start_encode(&bw, bw_buffer);
int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
for (int i = 0; i < kBitsToTest; ++i) {
for (int i = 0; i < bits_to_test; ++i) {
if (bit_method == 2) {
bit = (i & 1);
} else if (bit_method == 3) {
@@ -72,16 +72,16 @@ TEST(VP9, TestBitIO) {
GTEST_ASSERT_EQ(bw_buffer[0] & 0x80, 0);
vp9_reader br;
vp9_reader_init(&br, bw_buffer, kBufferSize);
vp9_reader_init(&br, bw_buffer, buffer_size);
bit_rnd.Reset(random_seed);
for (int i = 0; i < kBitsToTest; ++i) {
for (int i = 0; i < bits_to_test; ++i) {
if (bit_method == 2) {
bit = (i & 1);
} else if (bit_method == 3) {
bit = bit_rnd(2);
}
GTEST_ASSERT_EQ(vp9_read(&br, probas[i]), bit)
<< "pos: " << i << " / " << kBitsToTest
<< "pos: " << i << " / " << bits_to_test
<< " bit_method: " << bit_method
<< " method: " << method;
}

View File

@@ -1,75 +0,0 @@
/*
Copyright (c) 2012 The WebM project authors. All Rights Reserved.
Use of this source code is governed by a BSD-style license
that can be found in the LICENSE file in the root of the source
tree. An additional intellectual property rights grant can be found
in the file PATENTS. All contributing project authors may
be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
#include "test/util.h"
namespace {
const int kMaxPsnr = 100;
class LossLessTest : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
protected:
LossLessTest() : EncoderTest(GET_PARAM(0)),
psnr_(kMaxPsnr),
nframes_(0),
encoding_mode_(GET_PARAM(1)) {
}
virtual ~LossLessTest() {}
virtual void SetUp() {
InitializeConfig();
SetMode(encoding_mode_);
}
virtual void BeginPassHook(unsigned int /*pass*/) {
psnr_ = 0.0;
nframes_ = 0;
}
virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
if (pkt->data.psnr.psnr[0] < psnr_)
psnr_= pkt->data.psnr.psnr[0];
}
double GetMinPsnr() const {
return psnr_;
}
private:
double psnr_;
unsigned int nframes_;
libvpx_test::TestMode encoding_mode_;
};
TEST_P(LossLessTest, TestLossLessEncoding) {
const vpx_rational timebase = { 33333333, 1000000000 };
cfg_.g_timebase = timebase;
cfg_.rc_target_bitrate = 2000;
cfg_.g_lag_in_frames = 25;
cfg_.rc_min_quantizer = 0;
cfg_.rc_max_quantizer = 0;
init_flags_ = VPX_CODEC_USE_PSNR;
// intentionally changed the dimension for better testing coverage
libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 356, 284,
timebase.den, timebase.num, 0, 30);
const double psnr_lossless = GetMinPsnr();
EXPECT_GE(psnr_lossless, kMaxPsnr);
}
VP9_INSTANTIATE_TEST_CASE(LossLessTest, ALL_TEST_MODES);
} // namespace

View File

@@ -39,8 +39,8 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
// FIXME(rbultje) split in its own file
for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES;
bsize = static_cast<BLOCK_SIZE>(static_cast<int>(bsize) + 1)) {
for (BLOCK_SIZE_TYPE bsize = BLOCK_SIZE_AB4X4; bsize < BLOCK_SIZE_TYPES;
bsize = static_cast<BLOCK_SIZE_TYPE>(static_cast<int>(bsize) + 1)) {
const int block_width = 4 << b_width_log2(bsize);
const int block_height = 4 << b_height_log2(bsize);
int16_t *diff = reinterpret_cast<int16_t *>(
@@ -93,8 +93,9 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
INSTANTIATE_TEST_CASE_P(C, VP9SubtractBlockTest,
::testing::Values(vp9_subtract_block_c));
#if HAVE_SSE2 && CONFIG_USE_X86INC
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(SSE2, VP9SubtractBlockTest,
::testing::Values(vp9_subtract_block_sse2));
#endif
} // namespace vp9

View File

@@ -1,109 +0,0 @@
/*
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp9/decoder/vp9_thread.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/codec_factory.h"
#include "test/decode_test_driver.h"
#include "test/md5_helper.h"
#include "test/webm_video_source.h"
namespace {
class VP9WorkerThreadTest : public ::testing::Test {
protected:
virtual ~VP9WorkerThreadTest() {}
virtual void SetUp() {
vp9_worker_init(&worker_);
}
virtual void TearDown() {
vp9_worker_end(&worker_);
}
VP9Worker worker_;
};
int ThreadHook(void* data, void* return_value) {
int* const hook_data = reinterpret_cast<int*>(data);
*hook_data = 5;
return *reinterpret_cast<int*>(return_value);
}
TEST_F(VP9WorkerThreadTest, HookSuccess) {
EXPECT_TRUE(vp9_worker_sync(&worker_)); // should be a no-op.
for (int i = 0; i < 2; ++i) {
EXPECT_TRUE(vp9_worker_reset(&worker_));
int hook_data = 0;
int return_value = 1; // return successfully from the hook
worker_.hook = ThreadHook;
worker_.data1 = &hook_data;
worker_.data2 = &return_value;
vp9_worker_launch(&worker_);
EXPECT_TRUE(vp9_worker_sync(&worker_));
EXPECT_FALSE(worker_.had_error);
EXPECT_EQ(5, hook_data);
EXPECT_TRUE(vp9_worker_sync(&worker_)); // should be a no-op.
}
}
TEST_F(VP9WorkerThreadTest, HookFailure) {
EXPECT_TRUE(vp9_worker_reset(&worker_));
int hook_data = 0;
int return_value = 0; // return failure from the hook
worker_.hook = ThreadHook;
worker_.data1 = &hook_data;
worker_.data2 = &return_value;
vp9_worker_launch(&worker_);
EXPECT_FALSE(vp9_worker_sync(&worker_));
EXPECT_TRUE(worker_.had_error);
// Ensure _reset() clears the error and _launch() can be called again.
return_value = 1;
EXPECT_TRUE(vp9_worker_reset(&worker_));
EXPECT_FALSE(worker_.had_error);
vp9_worker_launch(&worker_);
EXPECT_TRUE(vp9_worker_sync(&worker_));
EXPECT_FALSE(worker_.had_error);
}
TEST(VP9DecodeMTTest, MTDecode) {
libvpx_test::WebMVideoSource video("vp90-2-03-size-226x226.webm");
video.Init();
vpx_codec_dec_cfg_t cfg = {0};
cfg.threads = 2;
libvpx_test::VP9Decoder decoder(cfg, 0);
libvpx_test::MD5 md5;
for (video.Begin(); video.cxdata(); video.Next()) {
const vpx_codec_err_t res =
decoder.DecodeFrame(video.cxdata(), video.frame_size());
ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
const vpx_image_t *img = NULL;
// Get decompressed data
while ((img = dec_iter.Next())) {
md5.Add(img);
}
}
EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc", md5.Get());
}
} // namespace

View File

@@ -99,7 +99,7 @@ class WebMVideoSource : public CompressedVideoSource {
virtual void Begin() {
input_file_ = OpenTestDataFile(file_name_);
ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
<< file_name_;
nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb,
@@ -130,7 +130,6 @@ class WebMVideoSource : public CompressedVideoSource {
}
void FillFrame() {
ASSERT_TRUE(input_file_ != NULL);
if (chunk_ >= chunks_) {
unsigned int track;

View File

@@ -1370,12 +1370,12 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
mov edx, [esp + 8 + 12] // src_stride
mov ecx, [esp + 8 + 16] // dst_width
mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
shr eax, 1
cmp eax, 0
je xloop1
cmp eax, 64
cmp eax, 128
je xloop2
shr eax, 1
mov ah,al
neg al
add al, 128
@@ -2132,11 +2132,11 @@ void ScaleFilterRows_SSSE3(uint8* dst_ptr,
"mov 0x14(%esp),%edx \n"
"mov 0x18(%esp),%ecx \n"
"mov 0x1c(%esp),%eax \n"
"shr %eax \n"
"cmp $0x0,%eax \n"
"je 2f \n"
"cmp $0x40,%eax \n"
"cmp $0x80,%eax \n"
"je 3f \n"
"shr %eax \n"
"mov %al,%ah \n"
"neg %al \n"
"add $0x80,%al \n"
@@ -2662,7 +2662,6 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
const uint8* src_ptr, int src_stride,
int dst_width, int source_y_fraction) {
source_y_fraction >>= 1;
if (source_y_fraction == 0) {
asm volatile (
"1:"
@@ -2681,7 +2680,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
: "memory", "cc", "rax"
);
return;
} else if (source_y_fraction == 64) {
} else if (source_y_fraction == 128) {
asm volatile (
"1:"
"movdqa (%1),%%xmm0 \n"
@@ -2704,6 +2703,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
} else {
asm volatile (
"mov %3,%%eax \n"
"shr %%eax \n"
"mov %%al,%%ah \n"
"neg %%al \n"
"add $0x80,%%al \n"

View File

@@ -173,6 +173,7 @@ void vp8_create_common(VP8_COMMON *oci)
oci->use_bilinear_mc_filter = 0;
oci->full_pixel = 0;
oci->multi_token_partition = ONE_PARTITION;
oci->clr_type = REG_YUV;
oci->clamp_type = RECON_CLAMP_REQUIRED;
/* Initialize reference frame sign bias structure to defaults */

View File

@@ -41,8 +41,7 @@ extern "C"
{
USAGE_STREAM_FROM_SERVER = 0x0,
USAGE_LOCAL_FILE_PLAYBACK = 0x1,
USAGE_CONSTRAINED_QUALITY = 0x2,
USAGE_CONSTANT_QUALITY = 0x3
USAGE_CONSTRAINED_QUALITY = 0x2
} END_USAGE;

View File

@@ -72,6 +72,7 @@ typedef struct VP8Common
int horiz_scale;
int vert_scale;
YUV_TYPE clr_type;
CLAMP_TYPE clamp_type;
YV12_BUFFER_CONFIG *frame_to_show;
@@ -114,6 +115,9 @@ typedef struct VP8Common
int uvdc_delta_q;
int uvac_delta_q;
unsigned int frames_since_golden;
unsigned int frames_till_alt_ref_frame;
/* We allocate a MODE_INFO struct for each macroblock, together with
an extra row on top and column on the left to simplify prediction. */
@@ -153,6 +157,7 @@ typedef struct VP8Common
unsigned int current_video_frame;
int near_boffset[3];
int version;
TOKEN_PARTITION multi_token_partition;
@@ -160,10 +165,8 @@ typedef struct VP8Common
#ifdef PACKET_TESTING
VP8_HEADER oh;
#endif
#if CONFIG_POSTPROC_VISUALIZER
double bitrate;
double framerate;
#endif
#if CONFIG_MULTITHREAD
int processor_core_count;

View File

@@ -923,7 +923,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
if (flags & VP8D_DEBUG_TXT_RATE_INFO)
{
char message[512];
sprintf(message, "Bitrate: %10.2f framerate: %10.2f ", oci->bitrate, oci->framerate);
sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate);
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
}

View File

@@ -0,0 +1,52 @@
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vpx/vpx_codec.h"
#include "vpx_ports/asm_offsets.h"
#include "vp8/common/blockd.h"
#if CONFIG_POSTPROC
#include "postproc.h"
#endif /* CONFIG_POSTPROC */
BEGIN
#if CONFIG_POSTPROC
/* mfqe.c / filter_by_weight */
DEFINE(MFQE_PRECISION_VAL, MFQE_PRECISION);
#endif /* CONFIG_POSTPROC */
END
/* add asserts for any offset that is not supported by assembly code */
/* add asserts for any size that is not supported by assembly code */
#if HAVE_MEDIA
/* switch case in vp8_intra4x4_predict_armv6 is based on these enumerated values */
ct_assert(B_DC_PRED, B_DC_PRED == 0);
ct_assert(B_TM_PRED, B_TM_PRED == 1);
ct_assert(B_VE_PRED, B_VE_PRED == 2);
ct_assert(B_HE_PRED, B_HE_PRED == 3);
ct_assert(B_LD_PRED, B_LD_PRED == 4);
ct_assert(B_RD_PRED, B_RD_PRED == 5);
ct_assert(B_VR_PRED, B_VR_PRED == 6);
ct_assert(B_VL_PRED, B_VL_PRED == 7);
ct_assert(B_HD_PRED, B_HD_PRED == 8);
ct_assert(B_HU_PRED, B_HU_PRED == 9);
#endif
#if HAVE_SSE2
#if CONFIG_POSTPROC
/* vp8_filter_by_weight16x16 and 8x8 */
ct_assert(MFQE_PRECISION_VAL, MFQE_PRECISION == 4)
#endif /* CONFIG_POSTPROC */
#endif /* HAVE_SSE2 */

View File

@@ -1095,7 +1095,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate bool decoder 0");
if (pc->frame_type == KEY_FRAME) {
(void)vp8_read_bit(bc); // colorspace
pc->clr_type = (YUV_TYPE)vp8_read_bit(bc);
pc->clamp_type = (CLAMP_TYPE)vp8_read_bit(bc);
}

View File

@@ -430,6 +430,7 @@ int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_st
*time_stamp = pbi->last_time_stamp;
*time_end_stamp = 0;
sd->clrtype = pbi->common.clr_type;
#if CONFIG_POSTPROC
ret = vp8_post_proc_frame(&pbi->common, sd, flags);
#else

View File

@@ -0,0 +1,26 @@
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_ports/asm_offsets.h"
#include "onyxd_int.h"
BEGIN
DEFINE(bool_decoder_user_buffer_end, offsetof(BOOL_DECODER, user_buffer_end));
DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer));
DEFINE(bool_decoder_value, offsetof(BOOL_DECODER, value));
DEFINE(bool_decoder_count, offsetof(BOOL_DECODER, count));
DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range));
END
/* add asserts for any offset that is not supported by assembly code */
/* add asserts for any size that is not supported by assembly code */

View File

@@ -1322,7 +1322,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
vp8_start_encode(bc, cx_data, cx_data_end);
/* signal clr type */
vp8_write_bit(bc, 0);
vp8_write_bit(bc, pc->clr_type);
vp8_write_bit(bc, pc->clamp_type);
}

View File

@@ -1325,7 +1325,7 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
return Q;
}
extern void vp8_new_framerate(VP8_COMP *cpi, double framerate);
extern void vp8_new_frame_rate(VP8_COMP *cpi, double framerate);
void vp8_init_second_pass(VP8_COMP *cpi)
{
@@ -1349,9 +1349,9 @@ void vp8_init_second_pass(VP8_COMP *cpi)
* sum duration is not. Its calculated based on the actual durations of
* all frames from the first pass.
*/
vp8_new_framerate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration);
vp8_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration);
cpi->output_framerate = cpi->framerate;
cpi->output_frame_rate = cpi->frame_rate;
cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats.duration * two_pass_min_rate / 10000000.0);
@@ -2398,7 +2398,7 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
target_frame_size += cpi->min_frame_bandwidth;
/* Every other frame gets a few extra bits */
if ( (cpi->frames_since_golden & 0x01) &&
if ( (cpi->common.frames_since_golden & 0x01) &&
(cpi->frames_till_gf_update_due > 0) )
{
target_frame_size += cpi->twopass.alt_extra_bits;
@@ -2529,7 +2529,7 @@ void vp8_second_pass(VP8_COMP *cpi)
/* Set nominal per second bandwidth for this frame */
cpi->target_bandwidth = (int)
(cpi->per_frame_bandwidth * cpi->output_framerate);
(cpi->per_frame_bandwidth * cpi->output_frame_rate);
if (cpi->target_bandwidth < 0)
cpi->target_bandwidth = 0;
@@ -3185,7 +3185,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
/* Convert to a per second bitrate */
cpi->target_bandwidth = (int)(cpi->twopass.kf_bits *
cpi->output_framerate);
cpi->output_frame_rate);
}
/* Note the total error score of the kf group minus the key frame itself */
@@ -3224,7 +3224,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->common.vert_scale = NORMAL;
/* Calculate Average bits per frame. */
av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->framerate);
av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate);
/* CBR... Use the clip average as the target for deciding resample */
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
@@ -3299,7 +3299,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
}
else
{
int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->framerate));
int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate));
int64_t over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level;
/* If triggered last time the threshold for triggering again is

View File

@@ -301,11 +301,11 @@ static int rescale(int val, int num, int denom)
static void init_temporal_layer_context(VP8_COMP *cpi,
VP8_CONFIG *oxcf,
const int layer,
double prev_layer_framerate)
double prev_layer_frame_rate)
{
LAYER_CONTEXT *lc = &cpi->layer_context[layer];
lc->framerate = cpi->output_framerate / cpi->oxcf.rate_decimator[layer];
lc->frame_rate = cpi->output_frame_rate / cpi->oxcf.rate_decimator[layer];
lc->target_bandwidth = cpi->oxcf.target_bitrate[layer] * 1000;
lc->starting_buffer_level_in_ms = oxcf->starting_buffer_level;
@@ -335,7 +335,7 @@ static void init_temporal_layer_context(VP8_COMP *cpi,
lc->avg_frame_size_for_layer =
(int)((cpi->oxcf.target_bitrate[layer] -
cpi->oxcf.target_bitrate[layer-1]) * 1000 /
(lc->framerate - prev_layer_framerate));
(lc->frame_rate - prev_layer_frame_rate));
lc->active_worst_quality = cpi->oxcf.worst_allowed_q;
lc->active_best_quality = cpi->oxcf.best_allowed_q;
@@ -363,7 +363,7 @@ static void reset_temporal_layer_change(VP8_COMP *cpi,
const int prev_num_layers)
{
int i;
double prev_layer_framerate = 0;
double prev_layer_frame_rate = 0;
const int curr_num_layers = cpi->oxcf.number_of_layers;
// If the previous state was 1 layer, get current layer context from cpi.
// We need this to set the layer context for the new layers below.
@@ -377,7 +377,7 @@ static void reset_temporal_layer_change(VP8_COMP *cpi,
LAYER_CONTEXT *lc = &cpi->layer_context[i];
if (i >= prev_num_layers)
{
init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
init_temporal_layer_context(cpi, oxcf, i, prev_layer_frame_rate);
}
// The initial buffer levels are set based on their starting levels.
// We could set the buffer levels based on the previous state (normalized
@@ -403,8 +403,8 @@ static void reset_temporal_layer_change(VP8_COMP *cpi,
lc->bits_off_target = lc->buffer_level;
restore_layer_context(cpi, 0);
}
prev_layer_framerate = cpi->output_framerate /
cpi->oxcf.rate_decimator[i];
prev_layer_frame_rate = cpi->output_frame_rate /
cpi->oxcf.rate_decimator[i];
}
}
@@ -1282,21 +1282,21 @@ int vp8_reverse_trans(int x)
return 63;
}
void vp8_new_framerate(VP8_COMP *cpi, double framerate)
void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
{
if(framerate < .1)
framerate = 30;
cpi->framerate = framerate;
cpi->output_framerate = framerate;
cpi->frame_rate = framerate;
cpi->output_frame_rate = framerate;
cpi->per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth /
cpi->output_framerate);
cpi->output_frame_rate);
cpi->av_per_frame_bandwidth = cpi->per_frame_bandwidth;
cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth *
cpi->oxcf.two_pass_vbrmin_section / 100);
/* Set Maximum gf/arf interval */
cpi->max_gf_interval = ((int)(cpi->output_framerate / 2.0) + 2);
cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2);
if(cpi->max_gf_interval < 12)
cpi->max_gf_interval = 12;
@@ -1337,13 +1337,13 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
* seems like a reasonable framerate, then use that as a guess, otherwise
* use 30.
*/
cpi->framerate = (double)(oxcf->timebase.den) /
(double)(oxcf->timebase.num);
cpi->frame_rate = (double)(oxcf->timebase.den) /
(double)(oxcf->timebase.num);
if (cpi->framerate > 180)
cpi->framerate = 30;
if (cpi->frame_rate > 180)
cpi->frame_rate = 30;
cpi->ref_framerate = cpi->framerate;
cpi->ref_frame_rate = cpi->frame_rate;
/* change includes all joint functionality */
vp8_change_config(cpi, oxcf);
@@ -1369,13 +1369,13 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
if (cpi->oxcf.number_of_layers > 1)
{
unsigned int i;
double prev_layer_framerate=0;
double prev_layer_frame_rate=0;
for (i=0; i<cpi->oxcf.number_of_layers; i++)
{
init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
prev_layer_framerate = cpi->output_framerate /
cpi->oxcf.rate_decimator[i];
init_temporal_layer_context(cpi, oxcf, i, prev_layer_frame_rate);
prev_layer_frame_rate = cpi->output_frame_rate /
cpi->oxcf.rate_decimator[i];
}
}
@@ -1399,14 +1399,14 @@ static void update_layer_contexts (VP8_COMP *cpi)
if (oxcf->number_of_layers > 1)
{
unsigned int i;
double prev_layer_framerate=0;
double prev_layer_frame_rate=0;
for (i=0; i<oxcf->number_of_layers; i++)
{
LAYER_CONTEXT *lc = &cpi->layer_context[i];
lc->framerate =
cpi->ref_framerate / oxcf->rate_decimator[i];
lc->frame_rate =
cpi->ref_frame_rate / oxcf->rate_decimator[i];
lc->target_bandwidth = oxcf->target_bitrate[i] * 1000;
lc->starting_buffer_level = rescale(
@@ -1432,9 +1432,9 @@ static void update_layer_contexts (VP8_COMP *cpi)
lc->avg_frame_size_for_layer =
(int)((oxcf->target_bitrate[i] -
oxcf->target_bitrate[i-1]) * 1000 /
(lc->framerate - prev_layer_framerate));
(lc->frame_rate - prev_layer_frame_rate));
prev_layer_framerate = lc->framerate;
prev_layer_frame_rate = lc->frame_rate;
}
}
}
@@ -1625,7 +1625,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cpi->oxcf.target_bandwidth, 1000);
/* Set up frame rate and related parameters rate control values. */
vp8_new_framerate(cpi, cpi->framerate);
vp8_new_frame_rate(cpi, cpi->frame_rate);
/* Set absolute upper and lower quality limits */
cpi->worst_quality = cpi->oxcf.worst_allowed_q;
@@ -1945,7 +1945,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
for (i = 0; i < KEY_FRAME_CONTEXT; i++)
{
cpi->prior_key_frame_distance[i] = (int)cpi->output_framerate;
cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
}
#ifdef OUTPUT_YUV_SRC
@@ -2273,7 +2273,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
{
extern int count_mb_seg[4];
FILE *f = fopen("modes.stt", "a");
double dr = (double)cpi->framerate * (double)bytes * (double)8 / (double)count / (double)1000 ;
double dr = (double)cpi->frame_rate * (double)bytes * (double)8 / (double)count / (double)1000 ;
fprintf(f, "intra_mode in Intra Frames:\n");
fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d\n", y_modes[0], y_modes[1], y_modes[2], y_modes[3], y_modes[4]);
fprintf(f, "UV:%8d, %8d, %8d, %8d\n", uv_modes[0], uv_modes[1], uv_modes[2], uv_modes[3]);
@@ -2750,7 +2750,7 @@ static void update_alt_ref_frame_stats(VP8_COMP *cpi)
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
/* this frame refreshes means next frames don't unless specified by user */
cpi->frames_since_golden = 0;
cpi->common.frames_since_golden = 0;
/* Clear the alternate reference update pending flag. */
cpi->source_alt_ref_pending = 0;
@@ -2802,7 +2802,7 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
* user
*/
cm->refresh_golden_frame = 0;
cpi->frames_since_golden = 0;
cpi->common.frames_since_golden = 0;
cpi->recent_ref_frame_usage[INTRA_FRAME] = 1;
cpi->recent_ref_frame_usage[LAST_FRAME] = 1;
@@ -2834,12 +2834,12 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
if (cpi->frames_till_gf_update_due > 0)
cpi->frames_till_gf_update_due--;
if (cpi->frames_till_alt_ref_frame)
cpi->frames_till_alt_ref_frame --;
if (cpi->common.frames_till_alt_ref_frame)
cpi->common.frames_till_alt_ref_frame --;
cpi->frames_since_golden ++;
cpi->common.frames_since_golden ++;
if (cpi->frames_since_golden > 1)
if (cpi->common.frames_since_golden > 1)
{
cpi->recent_ref_frame_usage[INTRA_FRAME] +=
cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME];
@@ -2890,11 +2890,11 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi)
cpi->prob_last_coded = 200;
cpi->prob_gf_coded = 1;
}
else if (cpi->frames_since_golden == 0)
else if (cpi->common.frames_since_golden == 0)
{
cpi->prob_last_coded = 214;
}
else if (cpi->frames_since_golden == 1)
else if (cpi->common.frames_since_golden == 1)
{
cpi->prob_last_coded = 192;
cpi->prob_gf_coded = 220;
@@ -3368,12 +3368,12 @@ static void encode_frame_to_data_rate
cpi->per_frame_bandwidth = cpi->twopass.gf_bits;
/* per second target bitrate */
cpi->target_bandwidth = (int)(cpi->twopass.gf_bits *
cpi->output_framerate);
cpi->output_frame_rate);
}
}
else
#endif
cpi->per_frame_bandwidth = (int)(cpi->target_bandwidth / cpi->output_framerate);
cpi->per_frame_bandwidth = (int)(cpi->target_bandwidth / cpi->output_frame_rate);
/* Default turn off buffer to buffer copying */
cm->copy_buffer_to_gf = 0;
@@ -4557,7 +4557,7 @@ static void encode_frame_to_data_rate
{
LAYER_CONTEXT *lc = &cpi->layer_context[i];
int bits_off_for_this_layer =
(int)(lc->target_bandwidth / lc->framerate -
(int)(lc->target_bandwidth / lc->frame_rate -
cpi->projected_frame_size);
lc->bits_off_target += bits_off_for_this_layer;
@@ -4805,7 +4805,7 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
{
double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth
*cpi->oxcf.two_pass_vbrmin_section / 100);
cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->framerate);
cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->frame_rate);
}
}
#endif
@@ -4821,10 +4821,8 @@ int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_C
{
#if HAVE_NEON
int64_t store_reg[8];
#if CONFIG_RUNTIME_CPU_DETECT
#endif
VP8_COMMON *cm = &cpi->common;
#endif
#endif
struct vpx_usec_timer timer;
int res = 0;
@@ -4850,6 +4848,7 @@ int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_C
if(vp8_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
frame_flags, cpi->active_map_enabled ? cpi->active_map : NULL))
res = -1;
cm->clr_type = sd->clrtype;
vpx_usec_timer_mark(&timer);
cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
@@ -4934,7 +4933,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
cpi->frames_till_gf_update_due);
force_src_buffer = &cpi->alt_ref_buffer;
}
cpi->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due;
cm->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due;
cm->refresh_alt_ref_frame = 1;
cm->refresh_golden_frame = 0;
cm->refresh_last_frame = 0;
@@ -5039,7 +5038,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if (this_duration)
{
if (step)
cpi->ref_framerate = 10000000.0 / this_duration;
cpi->ref_frame_rate = 10000000.0 / this_duration;
else
{
double avg_duration, interval;
@@ -5053,11 +5052,11 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if(interval > 10000000.0)
interval = 10000000;
avg_duration = 10000000.0 / cpi->ref_framerate;
avg_duration = 10000000.0 / cpi->ref_frame_rate;
avg_duration *= (interval - avg_duration + this_duration);
avg_duration /= interval;
cpi->ref_framerate = 10000000.0 / avg_duration;
cpi->ref_frame_rate = 10000000.0 / avg_duration;
}
if (cpi->oxcf.number_of_layers > 1)
@@ -5068,12 +5067,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
for (i=0; i<cpi->oxcf.number_of_layers; i++)
{
LAYER_CONTEXT *lc = &cpi->layer_context[i];
lc->framerate = cpi->ref_framerate /
cpi->oxcf.rate_decimator[i];
lc->frame_rate = cpi->ref_frame_rate /
cpi->oxcf.rate_decimator[i];
}
}
else
vp8_new_framerate(cpi, cpi->ref_framerate);
vp8_new_frame_rate(cpi, cpi->ref_frame_rate);
}
cpi->last_time_stamp_seen = cpi->source->ts_start;
@@ -5090,7 +5089,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
layer = cpi->oxcf.layer_id[
cpi->temporal_pattern_counter % cpi->oxcf.periodicity];
restore_layer_context (cpi, layer);
vp8_new_framerate(cpi, cpi->layer_context[layer].framerate);
vp8_new_frame_rate (cpi, cpi->layer_context[layer].frame_rate);
}
if (cpi->compressor_speed == 2)

View File

@@ -232,7 +232,7 @@ enum
typedef struct
{
/* Layer configuration */
double framerate;
double frame_rate;
int target_bandwidth;
/* Layer specific coding parameters */
@@ -320,7 +320,6 @@ typedef struct VP8_COMP
YV12_BUFFER_CONFIG scaled_source;
YV12_BUFFER_CONFIG *last_frame_unscaled_source;
unsigned int frames_till_alt_ref_frame;
/* frame in src_buffers has been identified to be encoded as an alt ref */
int source_alt_ref_pending;
/* an alt ref frame has been encoded and is usable */
@@ -370,7 +369,6 @@ typedef struct VP8_COMP
double key_frame_rate_correction_factor;
double gf_rate_correction_factor;
unsigned int frames_since_golden;
/* Count down till next GF */
int frames_till_gf_update_due;
@@ -403,7 +401,7 @@ typedef struct VP8_COMP
/* Minimum allocation that should be used for any frame */
int min_frame_bandwidth;
int inter_frame_target;
double output_framerate;
double output_frame_rate;
int64_t last_time_stamp_seen;
int64_t last_end_time_stamp_seen;
int64_t first_time_stamp_ever;
@@ -417,8 +415,8 @@ typedef struct VP8_COMP
int buffered_mode;
double framerate;
double ref_framerate;
double frame_rate;
double ref_frame_rate;
int64_t buffer_level;
int64_t bits_off_target;

View File

@@ -313,7 +313,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
/* Get baseline error score */
/* Copy the unfiltered / processed recon buffer to the new buffer */
vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
vp8cx_set_alt_lf_level(cpi, filt_mid);
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_mid);
@@ -339,7 +339,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
if(ss_err[filt_low] == 0)
{
/* Get Low filter error score */
vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
vp8cx_set_alt_lf_level(cpi, filt_low);
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low);
@@ -367,7 +367,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
{
if(ss_err[filt_high] == 0)
{
vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
vp8cx_set_alt_lf_level(cpi, filt_high);
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high);

View File

@@ -234,7 +234,7 @@ void vp8_save_coding_context(VP8_COMP *cpi)
cc->frames_since_key = cpi->frames_since_key;
cc->filter_level = cpi->common.filter_level;
cc->frames_till_gf_update_due = cpi->frames_till_gf_update_due;
cc->frames_since_golden = cpi->frames_since_golden;
cc->frames_since_golden = cpi->common.frames_since_golden;
vp8_copy(cc->mvc, cpi->common.fc.mvc);
vp8_copy(cc->mvcosts, cpi->rd_costs.mvcosts);
@@ -271,7 +271,7 @@ void vp8_restore_coding_context(VP8_COMP *cpi)
cpi->frames_since_key = cc->frames_since_key;
cpi->common.filter_level = cc->filter_level;
cpi->frames_till_gf_update_due = cc->frames_till_gf_update_due;
cpi->frames_since_golden = cc->frames_since_golden;
cpi->common.frames_since_golden = cc->frames_since_golden;
vp8_copy(cpi->common.fc.mvc, cc->mvc);
@@ -388,7 +388,7 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
int initial_boost = 32; /* |3.0 * per_frame_bandwidth| */
/* Boost depends somewhat on frame rate: only used for 1 layer case. */
if (cpi->oxcf.number_of_layers == 1) {
kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
kf_boost = MAX(initial_boost, (int)(2 * cpi->output_frame_rate - 16));
}
else {
/* Initial factor: set target size to: |3.0 * per_frame_bandwidth|. */
@@ -399,9 +399,9 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
kf_boost = kf_boost * kf_boost_qadjustment[Q] / 100;
/* frame separation adjustment ( down) */
if (cpi->frames_since_key < cpi->output_framerate / 2)
if (cpi->frames_since_key < cpi->output_frame_rate / 2)
kf_boost = (int)(kf_boost
* cpi->frames_since_key / (cpi->output_framerate / 2));
* cpi->frames_since_key / (cpi->output_frame_rate / 2));
/* Minimal target size is |2* per_frame_bandwidth|. */
if (kf_boost < 16)
@@ -715,7 +715,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
if (Adjustment > (cpi->this_frame_target - min_frame_target))
Adjustment = (cpi->this_frame_target - min_frame_target);
if (cpi->frames_since_golden == (cpi->current_gf_interval >> 1))
if (cpi->common.frames_since_golden == (cpi->current_gf_interval >> 1))
cpi->this_frame_target += ((cpi->current_gf_interval - 1) * Adjustment);
else
cpi->this_frame_target -= Adjustment;
@@ -1360,7 +1360,7 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi)
* whichever is smaller.
*/
int key_freq = cpi->oxcf.key_freq>0 ? cpi->oxcf.key_freq : 1;
av_key_frame_frequency = 1 + (int)cpi->output_framerate * 2;
av_key_frame_frequency = 1 + (int)cpi->output_frame_rate * 2;
if (cpi->oxcf.auto_key && av_key_frame_frequency > key_freq)
av_key_frame_frequency = key_freq;

View File

@@ -341,7 +341,7 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue)
void vp8_auto_select_speed(VP8_COMP *cpi)
{
int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
int milliseconds_for_compress = (int)(1000000 / cpi->frame_rate);
milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;

View File

@@ -66,6 +66,7 @@ VP8_COMMON_SRCS-yes += common/setupintrarecon.c
VP8_COMMON_SRCS-yes += common/swapyv12buffer.c
VP8_COMMON_SRCS-yes += common/variance_c.c
VP8_COMMON_SRCS-yes += common/variance.h
VP8_COMMON_SRCS-yes += common/vp8_asm_com_offsets.c
VP8_COMMON_SRCS-yes += common/vp8_entropymodedata.h
@@ -191,4 +192,7 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance8x8_neon$(A
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
$(eval $(call asm_offsets_template,\
vp8_asm_com_offsets.asm, $(VP8_PREFIX)common/vp8_asm_com_offsets.c))
$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh))

View File

@@ -153,7 +153,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
#else
RANGE_CHECK_HI(cfg, g_lag_in_frames, 25);
#endif
RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_Q);
RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CQ);
RANGE_CHECK_HI(cfg, rc_undershoot_pct, 1000);
RANGE_CHECK_HI(cfg, rc_overshoot_pct, 1000);
RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);
@@ -204,7 +204,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6);
RANGE_CHECK(vp8_cfg, arnr_type, 1, 3);
RANGE_CHECK(vp8_cfg, cq_level, 0, 63);
if (finalize && (cfg->rc_end_usage == VPX_CQ || cfg->rc_end_usage == VPX_Q))
if(finalize && cfg->rc_end_usage == VPX_CQ)
RANGE_CHECK(vp8_cfg, cq_level,
cfg->rc_min_quantizer, cfg->rc_max_quantizer);
@@ -327,14 +327,17 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
oxcf->resample_up_water_mark = cfg.rc_resize_up_thresh;
oxcf->resample_down_water_mark = cfg.rc_resize_down_thresh;
if (cfg.rc_end_usage == VPX_VBR) {
oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;
} else if (cfg.rc_end_usage == VPX_CBR) {
oxcf->end_usage = USAGE_STREAM_FROM_SERVER;
} else if (cfg.rc_end_usage == VPX_CQ) {
oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
} else if (cfg.rc_end_usage == VPX_Q) {
oxcf->end_usage = USAGE_CONSTANT_QUALITY;
if (cfg.rc_end_usage == VPX_VBR)
{
oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;
}
else if (cfg.rc_end_usage == VPX_CBR)
{
oxcf->end_usage = USAGE_STREAM_FROM_SERVER;
}
else if (cfg.rc_end_usage == VPX_CQ)
{
oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
}
oxcf->target_bandwidth = cfg.rc_target_bitrate;
@@ -692,6 +695,7 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
yv12->uv_stride = img->stride[VPX_PLANE_U];
yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12);
return res;
}
@@ -1075,7 +1079,11 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx)
ctx->preview_img.planes[VPX_PLANE_U] = sd.u_buffer;
ctx->preview_img.planes[VPX_PLANE_V] = sd.v_buffer;
ctx->preview_img.fmt = VPX_IMG_FMT_I420;
if (sd.clrtype == REG_YUV)
ctx->preview_img.fmt = VPX_IMG_FMT_I420;
else
ctx->preview_img.fmt = VPX_IMG_FMT_VPXI420;
ctx->preview_img.x_chroma_shift = 1;
ctx->preview_img.y_chroma_shift = 1;
@@ -1269,7 +1277,7 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
1, /* g_delete_first_pass_file */
"vp8.fpf" /* first pass filename */
#endif
VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */
1, /* ts_number_layers */
{0}, /* ts_target_bitrate */
{0}, /* ts_rate_decimator */

View File

@@ -41,6 +41,15 @@ typedef enum
static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t);
typedef struct
{
unsigned int id;
unsigned long sz;
unsigned int align;
unsigned int flags;
unsigned long(*calc_sz)(const vpx_codec_dec_cfg_t *, vpx_codec_flags_t);
} mem_req_t;
static const mem_req_t vp8_mem_req_segs[] =
{
{VP8_SEG_ALG_PRIV, 0, 8, VPX_CODEC_MEM_ZERO, vp8_priv_sz},
@@ -84,6 +93,65 @@ static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_
return sizeof(vpx_codec_alg_priv_t);
}
static void vp8_mmap_dtor(vpx_codec_mmap_t *mmap)
{
free(mmap->priv);
}
static vpx_codec_err_t vp8_mmap_alloc(vpx_codec_mmap_t *mmap)
{
vpx_codec_err_t res;
unsigned int align;
align = mmap->align ? mmap->align - 1 : 0;
if (mmap->flags & VPX_CODEC_MEM_ZERO)
mmap->priv = calloc(1, mmap->sz + align);
else
mmap->priv = malloc(mmap->sz + align);
res = (mmap->priv) ? VPX_CODEC_OK : VPX_CODEC_MEM_ERROR;
mmap->base = (void *)((((uintptr_t)mmap->priv) + align) & ~(uintptr_t)align);
mmap->dtor = vp8_mmap_dtor;
return res;
}
static vpx_codec_err_t vp8_validate_mmaps(const vp8_stream_info_t *si,
const vpx_codec_mmap_t *mmaps,
vpx_codec_flags_t init_flags)
{
int i;
vpx_codec_err_t res = VPX_CODEC_OK;
for (i = 0; i < NELEMENTS(vp8_mem_req_segs) - 1; i++)
{
/* Ensure the segment has been allocated */
if (!mmaps[i].base)
{
res = VPX_CODEC_MEM_ERROR;
break;
}
/* Verify variable size segment is big enough for the current si. */
if (vp8_mem_req_segs[i].calc_sz)
{
vpx_codec_dec_cfg_t cfg;
cfg.w = si->w;
cfg.h = si->h;
if (mmaps[i].sz < vp8_mem_req_segs[i].calc_sz(&cfg, init_flags))
{
res = VPX_CODEC_MEM_ERROR;
break;
}
}
}
return res;
}
static void vp8_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap)
{
int i;
@@ -110,6 +178,16 @@ static void vp8_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap)
}
}
static void *mmap_lkup(vpx_codec_alg_priv_t *ctx, unsigned int id)
{
int i;
for (i = 0; i < NELEMENTS(ctx->mmaps); i++)
if (ctx->mmaps[i].id == id)
return ctx->mmaps[i].base;
return NULL;
}
static void vp8_finalize_mmaps(vpx_codec_alg_priv_t *ctx)
{
/* nothing to clean up */
@@ -136,7 +214,7 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
mmap.align = vp8_mem_req_segs[0].align;
mmap.flags = vp8_mem_req_segs[0].flags;
res = vpx_mmap_alloc(&mmap);
res = vp8_mmap_alloc(&mmap);
if (res != VPX_CODEC_OK) return res;
vp8_init_ctx(ctx, &mmap);
@@ -288,7 +366,8 @@ static void yuvconfig2image(vpx_image_t *img,
* the Y, U, and V planes, nor other alignment adjustments that
* might be representable by a YV12_BUFFER_CONFIG, so we just
* initialize all the fields.*/
img->fmt = VPX_IMG_FMT_I420;
img->fmt = yv12->clrtype == REG_YUV ?
VPX_IMG_FMT_I420 : VPX_IMG_FMT_VPXI420;
img->w = yv12->y_stride;
img->h = (yv12->y_height + 2 * VP8BORDERINPIXELS + 15) & ~15;
img->d_w = yv12->y_width;
@@ -409,7 +488,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
ctx->mmaps[i].sz = vp8_mem_req_segs[i].calc_sz(&cfg,
ctx->base.init_flags);
res = vpx_mmap_alloc(&ctx->mmaps[i]);
res = vp8_mmap_alloc(&ctx->mmaps[i]);
}
if (!res)
@@ -421,9 +500,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
/* Initialize the decoder instance on the first frame*/
if (!res && !ctx->decoder_init)
{
res = vpx_validate_mmaps(&ctx->si, ctx->mmaps,
vp8_mem_req_segs, NELEMENTS(vp8_mem_req_segs),
ctx->base.init_flags);
res = vp8_validate_mmaps(&ctx->si, ctx->mmaps, ctx->base.init_flags);
if (!res)
{
@@ -720,6 +797,8 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
yv12->uv_stride = img->stride[VPX_PLANE_U];
yv12->border = (img->stride[VPX_PLANE_Y] - img->d_w) / 2;
yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12);
return res;
}

View File

@@ -35,5 +35,9 @@ VP8_DX_SRCS-yes += decoder/onyxd_int.h
VP8_DX_SRCS-yes += decoder/treereader.h
VP8_DX_SRCS-yes += decoder/onyxd_if.c
VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c
VP8_DX_SRCS-yes += decoder/vp8_asm_dec_offsets.c
VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes))
$(eval $(call asm_offsets_template,\
vp8_asm_dec_offsets.asm, $(VP8_PREFIX)decoder/vp8_asm_dec_offsets.c))

View File

@@ -1,116 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_convolve_avg_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
|vp9_convolve_avg_neon| PROC
push {r4-r6, lr}
ldrd r4, r5, [sp, #32]
mov r6, r2
cmp r4, #32
bgt avg64
beq avg32
cmp r4, #8
bgt avg16
beq avg8
b avg4
avg64
sub lr, r1, #32
sub r4, r3, #32
avg64_h
pld [r0, r1, lsl #1]
vld1.8 {q0-q1}, [r0]!
vld1.8 {q2-q3}, [r0], lr
pld [r2, r3]
vld1.8 {q8-q9}, [r6@128]!
vld1.8 {q10-q11}, [r6@128], r4
vrhadd.u8 q0, q0, q8
vrhadd.u8 q1, q1, q9
vrhadd.u8 q2, q2, q10
vrhadd.u8 q3, q3, q11
vst1.8 {q0-q1}, [r2@128]!
vst1.8 {q2-q3}, [r2@128], r4
subs r5, r5, #1
bgt avg64_h
pop {r4-r6, pc}
avg32
vld1.8 {q0-q1}, [r0], r1
vld1.8 {q2-q3}, [r0], r1
vld1.8 {q8-q9}, [r6@128], r3
vld1.8 {q10-q11}, [r6@128], r3
pld [r0]
vrhadd.u8 q0, q0, q8
pld [r0, r1]
vrhadd.u8 q1, q1, q9
pld [r6]
vrhadd.u8 q2, q2, q10
pld [r6, r3]
vrhadd.u8 q3, q3, q11
vst1.8 {q0-q1}, [r2@128], r3
vst1.8 {q2-q3}, [r2@128], r3
subs r5, r5, #2
bgt avg32
pop {r4-r6, pc}
avg16
vld1.8 {q0}, [r0], r1
vld1.8 {q1}, [r0], r1
vld1.8 {q2}, [r6@128], r3
vld1.8 {q3}, [r6@128], r3
pld [r0]
pld [r0, r1]
vrhadd.u8 q0, q0, q2
pld [r6]
pld [r6, r3]
vrhadd.u8 q1, q1, q3
vst1.8 {q0}, [r2@128], r3
vst1.8 {q1}, [r2@128], r3
subs r5, r5, #2
bgt avg16
pop {r4-r6, pc}
avg8
vld1.8 {d0}, [r0], r1
vld1.8 {d1}, [r0], r1
vld1.8 {d2}, [r6@64], r3
vld1.8 {d3}, [r6@64], r3
pld [r0]
pld [r0, r1]
vrhadd.u8 q0, q0, q1
pld [r6]
pld [r6, r3]
vst1.8 {d0}, [r2@64], r3
vst1.8 {d1}, [r2@64], r3
subs r5, r5, #2
bgt avg8
pop {r4-r6, pc}
avg4
vld1.32 {d0[0]}, [r0], r1
vld1.32 {d0[1]}, [r0], r1
vld1.32 {d2[0]}, [r6@32], r3
vld1.32 {d2[1]}, [r6@32], r3
vrhadd.u8 d0, d0, d2
vst1.32 {d0[0]}, [r2@32], r3
vst1.32 {d0[1]}, [r2@32], r3
subs r5, r5, #2
bgt avg4
pop {r4-r6, pc}
ENDP
END

View File

@@ -1,302 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
; These functions are only valid when:
; x_step_q4 == 16
; w%4 == 0
; h%4 == 0
; taps == 8
; VP9_FILTER_WEIGHT == 128
; VP9_FILTER_SHIFT == 7
EXPORT |vp9_convolve8_avg_horiz_neon|
EXPORT |vp9_convolve8_avg_vert_neon|
IMPORT |vp9_convolve8_avg_horiz_c|
IMPORT |vp9_convolve8_avg_vert_c|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
; Multiply and accumulate by q0
MACRO
MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7
vmull.s16 $dst, $src0, d0[0]
vmlal.s16 $dst, $src1, d0[1]
vmlal.s16 $dst, $src2, d0[2]
vmlal.s16 $dst, $src3, d0[3]
vmlal.s16 $dst, $src4, d1[0]
vmlal.s16 $dst, $src5, d1[1]
vmlal.s16 $dst, $src6, d1[2]
vmlal.s16 $dst, $src7, d1[3]
MEND
; r0 const uint8_t *src
; r1 int src_stride
; r2 uint8_t *dst
; r3 int dst_stride
; sp[]const int16_t *filter_x
; sp[]int x_step_q4
; sp[]const int16_t *filter_y ; unused
; sp[]int y_step_q4 ; unused
; sp[]int w
; sp[]int h
|vp9_convolve8_avg_horiz_neon| PROC
ldr r12, [sp, #4] ; x_step_q4
cmp r12, #16
bne vp9_convolve8_avg_horiz_c
push {r4-r10, lr}
sub r0, r0, #3 ; adjust for taps
ldr r5, [sp, #32] ; filter_x
ldr r6, [sp, #48] ; w
ldr r7, [sp, #52] ; h
vld1.s16 {q0}, [r5] ; filter_x
sub r8, r1, r1, lsl #2 ; -src_stride * 3
add r8, r8, #4 ; -src_stride * 3 + 4
sub r4, r3, r3, lsl #2 ; -dst_stride * 3
add r4, r4, #4 ; -dst_stride * 3 + 4
rsb r9, r6, r1, lsl #2 ; reset src for outer loop
sub r9, r9, #7
rsb r12, r6, r3, lsl #2 ; reset dst for outer loop
mov r10, r6 ; w loop counter
loop_horiz_v
vld1.8 {d24}, [r0], r1
vld1.8 {d25}, [r0], r1
vld1.8 {d26}, [r0], r1
vld1.8 {d27}, [r0], r8
vtrn.16 q12, q13
vtrn.8 d24, d25
vtrn.8 d26, d27
pld [r0, r1, lsl #2]
vmovl.u8 q8, d24
vmovl.u8 q9, d25
vmovl.u8 q10, d26
vmovl.u8 q11, d27
; save a few instructions in the inner loop
vswp d17, d18
vmov d23, d21
add r0, r0, #3
loop_horiz
add r5, r0, #64
vld1.32 {d28[]}, [r0], r1
vld1.32 {d29[]}, [r0], r1
vld1.32 {d31[]}, [r0], r1
vld1.32 {d30[]}, [r0], r8
pld [r5]
vtrn.16 d28, d31
vtrn.16 d29, d30
vtrn.8 d28, d29
vtrn.8 d31, d30
pld [r5, r1]
; extract to s16
vtrn.32 q14, q15
vmovl.u8 q12, d28
vmovl.u8 q13, d29
pld [r5, r1, lsl #1]
; slightly out of order load to match the existing data
vld1.u32 {d6[0]}, [r2], r3
vld1.u32 {d7[0]}, [r2], r3
vld1.u32 {d6[1]}, [r2], r3
vld1.u32 {d7[1]}, [r2], r3
sub r2, r2, r3, lsl #2 ; reset for store
; src[] * filter_x
MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24
MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26
MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27
MULTIPLY_BY_Q0 q15, d22, d18, d19, d23, d24, d26, d27, d25
pld [r5, -r8]
; += 64 >> 7
vqrshrun.s32 d2, q1, #7
vqrshrun.s32 d3, q2, #7
vqrshrun.s32 d4, q14, #7
vqrshrun.s32 d5, q15, #7
; saturate
vqmovn.u16 d2, q1
vqmovn.u16 d3, q2
; transpose
vtrn.16 d2, d3
vtrn.32 d2, d3
vtrn.8 d2, d3
; average the new value and the dst value
vrhadd.u8 q1, q1, q3
vst1.u32 {d2[0]}, [r2@32], r3
vst1.u32 {d3[0]}, [r2@32], r3
vst1.u32 {d2[1]}, [r2@32], r3
vst1.u32 {d3[1]}, [r2@32], r4
vmov q8, q9
vmov d20, d23
vmov q11, q12
vmov q9, q13
subs r6, r6, #4 ; w -= 4
bgt loop_horiz
; outer loop
mov r6, r10 ; restore w counter
add r0, r0, r9 ; src += src_stride * 4 - w
add r2, r2, r12 ; dst += dst_stride * 4 - w
subs r7, r7, #4 ; h -= 4
bgt loop_horiz_v
pop {r4-r10, pc}
ENDP
|vp9_convolve8_avg_vert_neon| PROC
ldr r12, [sp, #12]
cmp r12, #16
bne vp9_convolve8_avg_vert_c
push {r4-r8, lr}
; adjust for taps
sub r0, r0, r1
sub r0, r0, r1, lsl #1
ldr r4, [sp, #32] ; filter_y
ldr r6, [sp, #40] ; w
ldr lr, [sp, #44] ; h
vld1.s16 {q0}, [r4] ; filter_y
lsl r1, r1, #1
lsl r3, r3, #1
loop_vert_h
mov r4, r0
add r7, r0, r1, asr #1
mov r5, r2
add r8, r2, r3, asr #1
mov r12, lr ; h loop counter
vld1.u32 {d16[0]}, [r4], r1
vld1.u32 {d16[1]}, [r7], r1
vld1.u32 {d18[0]}, [r4], r1
vld1.u32 {d18[1]}, [r7], r1
vld1.u32 {d20[0]}, [r4], r1
vld1.u32 {d20[1]}, [r7], r1
vld1.u32 {d22[0]}, [r4], r1
vmovl.u8 q8, d16
vmovl.u8 q9, d18
vmovl.u8 q10, d20
vmovl.u8 q11, d22
loop_vert
; always process a 4x4 block at a time
vld1.u32 {d24[0]}, [r7], r1
vld1.u32 {d26[0]}, [r4], r1
vld1.u32 {d26[1]}, [r7], r1
vld1.u32 {d24[1]}, [r4], r1
; extract to s16
vmovl.u8 q12, d24
vmovl.u8 q13, d26
vld1.u32 {d6[0]}, [r5@32], r3
vld1.u32 {d6[1]}, [r8@32], r3
vld1.u32 {d7[0]}, [r5@32], r3
vld1.u32 {d7[1]}, [r8@32], r3
pld [r7]
pld [r4]
; src[] * filter_y
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24
pld [r7, r1]
pld [r4, r1]
MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d24, d26
pld [r5]
pld [r8]
MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d24, d26, d27
pld [r5, r3]
pld [r8, r3]
MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d24, d26, d27, d25
; += 64 >> 7
vqrshrun.s32 d2, q1, #7
vqrshrun.s32 d3, q2, #7
vqrshrun.s32 d4, q14, #7
vqrshrun.s32 d5, q15, #7
; saturate
vqmovn.u16 d2, q1
vqmovn.u16 d3, q2
; average the new value and the dst value
vrhadd.u8 q1, q1, q3
sub r5, r5, r3, lsl #1 ; reset for store
sub r8, r8, r3, lsl #1
vst1.u32 {d2[0]}, [r5@32], r3
vst1.u32 {d2[1]}, [r8@32], r3
vst1.u32 {d3[0]}, [r5@32], r3
vst1.u32 {d3[1]}, [r8@32], r3
vmov q8, q10
vmov d18, d22
vmov d19, d24
vmov q10, q13
vmov d22, d25
subs r12, r12, #4 ; h -= 4
bgt loop_vert
; outer loop
add r0, r0, #4
add r2, r2, #4
subs r6, r6, #4 ; w -= 4
bgt loop_vert_h
pop {r4-r8, pc}
ENDP
END

View File

@@ -1,280 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
; These functions are only valid when:
; x_step_q4 == 16
; w%4 == 0
; h%4 == 0
; taps == 8
; VP9_FILTER_WEIGHT == 128
; VP9_FILTER_SHIFT == 7
EXPORT |vp9_convolve8_horiz_neon|
EXPORT |vp9_convolve8_vert_neon|
IMPORT |vp9_convolve8_horiz_c|
IMPORT |vp9_convolve8_vert_c|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
; Multiply and accumulate by q0
MACRO
MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7
vmull.s16 $dst, $src0, d0[0]
vmlal.s16 $dst, $src1, d0[1]
vmlal.s16 $dst, $src2, d0[2]
vmlal.s16 $dst, $src3, d0[3]
vmlal.s16 $dst, $src4, d1[0]
vmlal.s16 $dst, $src5, d1[1]
vmlal.s16 $dst, $src6, d1[2]
vmlal.s16 $dst, $src7, d1[3]
MEND
; r0 const uint8_t *src
; r1 int src_stride
; r2 uint8_t *dst
; r3 int dst_stride
; sp[]const int16_t *filter_x
; sp[]int x_step_q4
; sp[]const int16_t *filter_y ; unused
; sp[]int y_step_q4 ; unused
; sp[]int w
; sp[]int h
|vp9_convolve8_horiz_neon| PROC
ldr r12, [sp, #4] ; x_step_q4
cmp r12, #16
bne vp9_convolve8_horiz_c
push {r4-r10, lr}
sub r0, r0, #3 ; adjust for taps
ldr r5, [sp, #32] ; filter_x
ldr r6, [sp, #48] ; w
ldr r7, [sp, #52] ; h
vld1.s16 {q0}, [r5] ; filter_x
sub r8, r1, r1, lsl #2 ; -src_stride * 3
add r8, r8, #4 ; -src_stride * 3 + 4
sub r4, r3, r3, lsl #2 ; -dst_stride * 3
add r4, r4, #4 ; -dst_stride * 3 + 4
rsb r9, r6, r1, lsl #2 ; reset src for outer loop
sub r9, r9, #7
rsb r12, r6, r3, lsl #2 ; reset dst for outer loop
mov r10, r6 ; w loop counter
loop_horiz_v
vld1.8 {d24}, [r0], r1
vld1.8 {d25}, [r0], r1
vld1.8 {d26}, [r0], r1
vld1.8 {d27}, [r0], r8
vtrn.16 q12, q13
vtrn.8 d24, d25
vtrn.8 d26, d27
pld [r0, r1, lsl #2]
vmovl.u8 q8, d24
vmovl.u8 q9, d25
vmovl.u8 q10, d26
vmovl.u8 q11, d27
; save a few instructions in the inner loop
vswp d17, d18
vmov d23, d21
add r0, r0, #3
loop_horiz
add r5, r0, #64
vld1.32 {d28[]}, [r0], r1
vld1.32 {d29[]}, [r0], r1
vld1.32 {d31[]}, [r0], r1
vld1.32 {d30[]}, [r0], r8
pld [r5]
vtrn.16 d28, d31
vtrn.16 d29, d30
vtrn.8 d28, d29
vtrn.8 d31, d30
pld [r5, r1]
; extract to s16
vtrn.32 q14, q15
vmovl.u8 q12, d28
vmovl.u8 q13, d29
pld [r5, r1, lsl #1]
; src[] * filter_x
MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24
MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26
MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27
MULTIPLY_BY_Q0 q15, d22, d18, d19, d23, d24, d26, d27, d25
pld [r5, -r8]
; += 64 >> 7
vqrshrun.s32 d2, q1, #7
vqrshrun.s32 d3, q2, #7
vqrshrun.s32 d4, q14, #7
vqrshrun.s32 d5, q15, #7
; saturate
vqmovn.u16 d2, q1
vqmovn.u16 d3, q2
; transpose
vtrn.16 d2, d3
vtrn.32 d2, d3
vtrn.8 d2, d3
vst1.u32 {d2[0]}, [r2@32], r3
vst1.u32 {d3[0]}, [r2@32], r3
vst1.u32 {d2[1]}, [r2@32], r3
vst1.u32 {d3[1]}, [r2@32], r4
vmov q8, q9
vmov d20, d23
vmov q11, q12
vmov q9, q13
subs r6, r6, #4 ; w -= 4
bgt loop_horiz
; outer loop
mov r6, r10 ; restore w counter
add r0, r0, r9 ; src += src_stride * 4 - w
add r2, r2, r12 ; dst += dst_stride * 4 - w
subs r7, r7, #4 ; h -= 4
bgt loop_horiz_v
pop {r4-r10, pc}
ENDP
|vp9_convolve8_vert_neon| PROC
ldr r12, [sp, #12]
cmp r12, #16
bne vp9_convolve8_vert_c
push {r4-r8, lr}
; adjust for taps
sub r0, r0, r1
sub r0, r0, r1, lsl #1
ldr r4, [sp, #32] ; filter_y
ldr r6, [sp, #40] ; w
ldr lr, [sp, #44] ; h
vld1.s16 {q0}, [r4] ; filter_y
lsl r1, r1, #1
lsl r3, r3, #1
loop_vert_h
mov r4, r0
add r7, r0, r1, asr #1
mov r5, r2
add r8, r2, r3, asr #1
mov r12, lr ; h loop counter
vld1.u32 {d16[0]}, [r4], r1
vld1.u32 {d16[1]}, [r7], r1
vld1.u32 {d18[0]}, [r4], r1
vld1.u32 {d18[1]}, [r7], r1
vld1.u32 {d20[0]}, [r4], r1
vld1.u32 {d20[1]}, [r7], r1
vld1.u32 {d22[0]}, [r4], r1
vmovl.u8 q8, d16
vmovl.u8 q9, d18
vmovl.u8 q10, d20
vmovl.u8 q11, d22
loop_vert
; always process a 4x4 block at a time
vld1.u32 {d24[0]}, [r7], r1
vld1.u32 {d26[0]}, [r4], r1
vld1.u32 {d26[1]}, [r7], r1
vld1.u32 {d24[1]}, [r4], r1
; extract to s16
vmovl.u8 q12, d24
vmovl.u8 q13, d26
pld [r5]
pld [r8]
; src[] * filter_y
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24
pld [r5, r3]
pld [r8, r3]
MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d24, d26
pld [r7]
pld [r4]
MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d24, d26, d27
pld [r7, r1]
pld [r4, r1]
MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d24, d26, d27, d25
; += 64 >> 7
vqrshrun.s32 d2, q1, #7
vqrshrun.s32 d3, q2, #7
vqrshrun.s32 d4, q14, #7
vqrshrun.s32 d5, q15, #7
; saturate
vqmovn.u16 d2, q1
vqmovn.u16 d3, q2
vst1.u32 {d2[0]}, [r5@32], r3
vst1.u32 {d2[1]}, [r8@32], r3
vst1.u32 {d3[0]}, [r5@32], r3
vst1.u32 {d3[1]}, [r8@32], r3
vmov q8, q10
vmov d18, d22
vmov d19, d24
vmov q10, q13
vmov d22, d25
subs r12, r12, #4 ; h -= 4
bgt loop_vert
; outer loop
add r0, r0, #4
add r2, r2, #4
subs r6, r6, #4 ; w -= 4
bgt loop_vert_h
pop {r4-r8, pc}
ENDP
END

View File

@@ -1,78 +0,0 @@
/*
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_common.h"
#include "vpx_ports/mem.h"
void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
/* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
* maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
*/
DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72);
// Account for the vertical phase needing 3 lines prior and 4 lines post
int intermediate_height = h + 7;
if (x_step_q4 != 16 || y_step_q4 != 16)
return vp9_convolve8_c(src, src_stride,
dst, dst_stride,
filter_x, x_step_q4,
filter_y, y_step_q4,
w, h);
/* Filter starting 3 lines back. The neon implementation will ignore the
* given height and filter a multiple of 4 lines. Since this goes in to
* the temp buffer which has lots of extra room and is subsequently discarded
* this is safe if somewhat less than ideal.
*/
vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
temp, 64,
filter_x, x_step_q4, filter_y, y_step_q4,
w, intermediate_height);
/* Step into the temp buffer 3 lines to get the actual frame data */
vp9_convolve8_vert_neon(temp + 64 * 3, 64,
dst, dst_stride,
filter_x, x_step_q4, filter_y, y_step_q4,
w, h);
}
void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72);
int intermediate_height = h + 7;
if (x_step_q4 != 16 || y_step_q4 != 16)
return vp9_convolve8_avg_c(src, src_stride,
dst, dst_stride,
filter_x, x_step_q4,
filter_y, y_step_q4,
w, h);
/* This implementation has the same issues as above. In addition, we only want
* to average the values after both passes.
*/
vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
temp, 64,
filter_x, x_step_q4, filter_y, y_step_q4,
w, intermediate_height);
vp9_convolve8_avg_vert_neon(temp + 64 * 3,
64, dst, dst_stride,
filter_x, x_step_q4, filter_y, y_step_q4,
w, h);
}

View File

@@ -1,84 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_convolve_copy_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
|vp9_convolve_copy_neon| PROC
push {r4-r5, lr}
ldrd r4, r5, [sp, #28]
cmp r4, #32
bgt copy64
beq copy32
cmp r4, #8
bgt copy16
beq copy8
b copy4
copy64
sub lr, r1, #32
sub r3, r3, #32
copy64_h
pld [r0, r1, lsl #1]
vld1.8 {q0-q1}, [r0]!
vld1.8 {q2-q3}, [r0], lr
vst1.8 {q0-q1}, [r2@128]!
vst1.8 {q2-q3}, [r2@128], r3
subs r5, r5, #1
bgt copy64_h
pop {r4-r5, pc}
copy32
pld [r0, r1, lsl #1]
vld1.8 {q0-q1}, [r0], r1
pld [r0, r1, lsl #1]
vld1.8 {q2-q3}, [r0], r1
vst1.8 {q0-q1}, [r2@128], r3
vst1.8 {q2-q3}, [r2@128], r3
subs r5, r5, #2
bgt copy32
pop {r4-r5, pc}
copy16
pld [r0, r1, lsl #1]
vld1.8 {q0}, [r0], r1
pld [r0, r1, lsl #1]
vld1.8 {q1}, [r0], r1
vst1.8 {q0}, [r2@128], r3
vst1.8 {q1}, [r2@128], r3
subs r5, r5, #2
bgt copy16
pop {r4-r5, pc}
copy8
pld [r0, r1, lsl #1]
vld1.8 {d0}, [r0], r1
pld [r0, r1, lsl #1]
vld1.8 {d2}, [r0], r1
vst1.8 {d0}, [r2@64], r3
vst1.8 {d2}, [r2@64], r3
subs r5, r5, #2
bgt copy8
pop {r4-r5, pc}
copy4
ldr r12, [r0], r1
str r12, [r2], r3
subs r5, r5, #1
bgt copy4
pop {r4-r5, pc}
ENDP
END

View File

@@ -1,69 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license and patent
; grant that can be found in the LICENSE file in the root of the source
; tree. All contributing project authors may be found in the AUTHORS
; file in the root of the source tree.
;
EXPORT |vp9_dc_only_idct_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
;void vp9_dc_only_idct_add_neon(int input_dc, uint8_t *pred_ptr,
; uint8_t *dst_ptr, int pitch, int stride)
;
; r0 int input_dc
; r1 uint8_t *pred_ptr
; r2 uint8_t *dst_ptr
; r3 int pitch
; sp int stride
|vp9_dc_only_idct_add_neon| PROC
; generate cospi_16_64 = 11585
mov r12, #0x2d00
add r12, #0x41
; dct_const_round_shift(input_dc * cospi_16_64)
mul r0, r0, r12 ; input_dc * cospi_16_64
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
asr r0, r0, #14 ; >> DCT_CONST_BITS
; dct_const_round_shift(out * cospi_16_64)
mul r0, r0, r12 ; out * cospi_16_64
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
asr r0, r0, #14 ; >> DCT_CONST_BITS
; ROUND_POWER_OF_TWO(out, 4)
add r0, r0, #8 ; + (1 <<((4) - 1))
asr r0, r0, #4 ; >> 4
vdup.16 q0, r0; ; duplicate a1
ldr r12, [sp] ; load stride
vld1.32 {d2[0]}, [r1], r3
vld1.32 {d2[1]}, [r1], r3
vld1.32 {d4[0]}, [r1], r3
vld1.32 {d4[1]}, [r1]
vaddw.u8 q1, q0, d2 ; a1 + pred_ptr[c]
vaddw.u8 q2, q0, d4
vqmovun.s16 d2, q1 ; clip_pixel
vqmovun.s16 d4, q2
vst1.32 {d2[0]}, [r2], r12
vst1.32 {d2[1]}, [r2], r12
vst1.32 {d4[0]}, [r2], r12
vst1.32 {d4[1]}, [r2]
bx lr
ENDP ; |vp9_dc_only_idct_add_neon|
END

View File

@@ -1,169 +0,0 @@
/*
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_common.h"
extern void vp9_short_idct16x16_add_neon_pass1(int16_t *input,
int16_t *output,
int output_stride);
extern void vp9_short_idct16x16_add_neon_pass2(int16_t *src,
int16_t *output,
int16_t *pass1Output,
int16_t skip_adding,
uint8_t *dest,
int dest_stride);
extern void vp9_short_idct10_16x16_add_neon_pass1(int16_t *input,
int16_t *output,
int output_stride);
extern void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,
int16_t *output,
int16_t *pass1Output,
int16_t skip_adding,
uint8_t *dest,
int dest_stride);
extern void save_neon_registers();
extern void restore_neon_registers();
void vp9_short_idct16x16_add_neon(int16_t *input,
uint8_t *dest, int dest_stride) {
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
// save d8-d15 register values.
save_neon_registers();
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_short_idct16x16_add_neon_pass1(input, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
vp9_short_idct16x16_add_neon_pass2(input+1,
row_idct_output,
pass1_output,
0,
dest,
dest_stride);
/* Parallel idct on the lower 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_short_idct16x16_add_neon_pass1(input+8*16, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
vp9_short_idct16x16_add_neon_pass2(input+8*16+1,
row_idct_output+8,
pass1_output,
0,
dest,
dest_stride);
/* Parallel idct on the left 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,
row_idct_output,
pass1_output,
1,
dest,
dest_stride);
/* Parallel idct on the right 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,
row_idct_output+8,
pass1_output,
1,
dest+8,
dest_stride);
// restore d8-d15 register values.
restore_neon_registers();
return;
}
void vp9_short_idct10_16x16_add_neon(int16_t *input,
uint8_t *dest, int dest_stride) {
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
// save d8-d15 register values.
save_neon_registers();
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_short_idct10_16x16_add_neon_pass1(input, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
vp9_short_idct10_16x16_add_neon_pass2(input+1,
row_idct_output,
pass1_output,
0,
dest,
dest_stride);
/* Skip Parallel idct on the lower 8 rows as they are all 0s */
/* Parallel idct on the left 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,
row_idct_output,
pass1_output,
1,
dest,
dest_stride);
/* Parallel idct on the right 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,
row_idct_output+8,
pass1_output,
1,
dest+8,
dest_stride);
// restore d8-d15 register values.
restore_neon_registers();
return;
}

View File

@@ -1,47 +0,0 @@
/*
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp9/common/vp9_common.h"
// defined in vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
extern void idct32_transpose_and_transform(int16_t *transpose_buffer,
int16_t *output, int16_t *input);
extern void idct32_combine_add(uint8_t *dest, int16_t *out, int dest_stride);
// defined in vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
extern void save_neon_registers();
extern void restore_neon_registers();
void vp9_short_idct32x32_add_neon(int16_t *input, uint8_t *dest,
int dest_stride) {
// TODO(cd): move the creation of these buffers within the ASM file
// internal buffer used to transpose 8 lines into before transforming them
int16_t transpose_buffer[32 * 8];
// results of the first pass (transpose and transform rows)
int16_t pass1[32 * 32];
// results of the second pass (transpose and transform columns)
int16_t pass2[32 * 32];
// save register we need to preserve
save_neon_registers();
// process rows
idct32_transpose_and_transform(transpose_buffer, pass1, input);
// process columns
// TODO(cd): do these two steps/passes within the ASM file
idct32_transpose_and_transform(transpose_buffer, pass2, pass1);
// combine and add to dest
// TODO(cd): integrate this within the last storage step of the second pass
idct32_combine_add(dest, pass2, dest_stride);
// restore register we need to preserve
restore_neon_registers();
}
// TODO(cd): Eliminate this file altogether when everything is in ASM file

View File

@@ -1,708 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_loop_filter_horizontal_edge_neon|
EXPORT |vp9_loop_filter_vertical_edge_neon|
EXPORT |vp9_mbloop_filter_horizontal_edge_neon|
EXPORT |vp9_mbloop_filter_vertical_edge_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
; void vp9_loop_filter_horizontal_edge_neon(uint8_t *s,
; int p /* pitch */,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh,
; int count)
;
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
|vp9_loop_filter_horizontal_edge_neon| PROC
push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
ldr r12, [sp, #8] ; load count
ldr r2, [sp, #4] ; load thresh
add r1, r1, r1 ; double pitch
cmp r12, #0
beq end_vp9_lf_h_edge
vld1.8 {d1[]}, [r3] ; duplicate *limit
vld1.8 {d2[]}, [r2] ; duplicate *thresh
count_lf_h_loop
sub r2, r0, r1, lsl #1 ; move src pointer down by 4 lines
add r3, r2, r1, lsr #1 ; set to 3 lines down
vld1.u8 {d3}, [r2@64], r1 ; p3
vld1.u8 {d4}, [r3@64], r1 ; p2
vld1.u8 {d5}, [r2@64], r1 ; p1
vld1.u8 {d6}, [r3@64], r1 ; p0
vld1.u8 {d7}, [r2@64], r1 ; q0
vld1.u8 {d16}, [r3@64], r1 ; q1
vld1.u8 {d17}, [r2@64] ; q2
vld1.u8 {d18}, [r3@64] ; q3
sub r2, r2, r1, lsl #1
sub r3, r3, r1, lsl #1
bl vp9_loop_filter_neon
vst1.u8 {d4}, [r2@64], r1 ; store op1
vst1.u8 {d5}, [r3@64], r1 ; store op0
vst1.u8 {d6}, [r2@64], r1 ; store oq0
vst1.u8 {d7}, [r3@64], r1 ; store oq1
add r0, r0, #8
subs r12, r12, #1
bne count_lf_h_loop
end_vp9_lf_h_edge
pop {pc}
ENDP ; |vp9_loop_filter_horizontal_edge_neon|
; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
; void vp9_loop_filter_vertical_edge_neon(uint8_t *s,
; int p /* pitch */,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh,
; int count)
;
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
|vp9_loop_filter_vertical_edge_neon| PROC
push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
ldr r12, [sp, #8] ; load count
vld1.8 {d1[]}, [r3] ; duplicate *limit
ldr r3, [sp, #4] ; load thresh
sub r2, r0, #4 ; move s pointer down by 4 columns
cmp r12, #0
beq end_vp9_lf_v_edge
vld1.8 {d2[]}, [r3] ; duplicate *thresh
count_lf_v_loop
vld1.u8 {d3}, [r2], r1 ; load s data
vld1.u8 {d4}, [r2], r1
vld1.u8 {d5}, [r2], r1
vld1.u8 {d6}, [r2], r1
vld1.u8 {d7}, [r2], r1
vld1.u8 {d16}, [r2], r1
vld1.u8 {d17}, [r2], r1
vld1.u8 {d18}, [r2]
;transpose to 8x16 matrix
vtrn.32 d3, d7
vtrn.32 d4, d16
vtrn.32 d5, d17
vtrn.32 d6, d18
vtrn.16 d3, d5
vtrn.16 d4, d6
vtrn.16 d7, d17
vtrn.16 d16, d18
vtrn.8 d3, d4
vtrn.8 d5, d6
vtrn.8 d7, d16
vtrn.8 d17, d18
bl vp9_loop_filter_neon
sub r0, r0, #2
;store op1, op0, oq0, oq1
vst4.8 {d4[0], d5[0], d6[0], d7[0]}, [r0], r1
vst4.8 {d4[1], d5[1], d6[1], d7[1]}, [r0], r1
vst4.8 {d4[2], d5[2], d6[2], d7[2]}, [r0], r1
vst4.8 {d4[3], d5[3], d6[3], d7[3]}, [r0], r1
vst4.8 {d4[4], d5[4], d6[4], d7[4]}, [r0], r1
vst4.8 {d4[5], d5[5], d6[5], d7[5]}, [r0], r1
vst4.8 {d4[6], d5[6], d6[6], d7[6]}, [r0], r1
vst4.8 {d4[7], d5[7], d6[7], d7[7]}, [r0]
add r0, r0, r1, lsl #3 ; s += pitch * 8
subs r12, r12, #1
subne r2, r0, #4 ; move s pointer down by 4 columns
bne count_lf_v_loop
end_vp9_lf_v_edge
pop {pc}
ENDP ; |vp9_loop_filter_vertical_edge_neon|
; void vp9_loop_filter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store. The function does not use
; registers d8-d15.
;
; Inputs:
; r0-r3, r12 PRESERVE
; d0 blimit
; d1 limit
; d2 thresh
; d3 p3
; d4 p2
; d5 p1
; d6 p0
; d7 q0
; d16 q1
; d17 q2
; d18 q3
;
; Outputs:
; d4 op1
; d5 op0
; d6 oq0
; d7 oq1
|vp9_loop_filter_neon| PROC
; filter_mask
vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2)
vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1)
vabd.u8 d21, d5, d6 ; m3 = abs(p1 - p0)
vabd.u8 d22, d16, d7 ; m4 = abs(q1 - q0)
vabd.u8 d3, d17, d16 ; m5 = abs(q2 - q1)
vabd.u8 d4, d18, d17 ; m6 = abs(q3 - q2)
; only compare the largest value to limit
vmax.u8 d19, d19, d20 ; m1 = max(m1, m2)
vmax.u8 d20, d21, d22 ; m2 = max(m3, m4)
vabd.u8 d17, d6, d7 ; abs(p0 - q0)
vmax.u8 d3, d3, d4 ; m3 = max(m5, m6)
vmov.u8 d18, #0x80
vmax.u8 d23, d19, d20 ; m1 = max(m1, m2)
; hevmask
vcgt.u8 d21, d21, d2 ; (abs(p1 - p0) > thresh)*-1
vcgt.u8 d22, d22, d2 ; (abs(q1 - q0) > thresh)*-1
vmax.u8 d23, d23, d3 ; m1 = max(m1, m3)
vabd.u8 d28, d5, d16 ; a = abs(p1 - q1)
vqadd.u8 d17, d17, d17 ; b = abs(p0 - q0) * 2
veor d7, d7, d18 ; qs0
vcge.u8 d23, d1, d23 ; abs(m1) > limit
; filter() function
; convert to signed
vshr.u8 d28, d28, #1 ; a = a / 2
veor d6, d6, d18 ; ps0
veor d5, d5, d18 ; ps1
vqadd.u8 d17, d17, d28 ; a = b + a
veor d16, d16, d18 ; qs1
vmov.u8 d19, #3
vsub.s8 d28, d7, d6 ; ( qs0 - ps0)
vcge.u8 d17, d0, d17 ; a > blimit
vqsub.s8 d27, d5, d16 ; filter = clamp(ps1-qs1)
vorr d22, d21, d22 ; hevmask
vmull.s8 q12, d28, d19 ; 3 * ( qs0 - ps0)
vand d27, d27, d22 ; filter &= hev
vand d23, d23, d17 ; filter_mask
vaddw.s8 q12, q12, d27 ; filter + 3 * (qs0 - ps0)
vmov.u8 d17, #4
; filter = clamp(filter + 3 * ( qs0 - ps0))
vqmovn.s16 d27, q12
vand d27, d27, d23 ; filter &= mask
vqadd.s8 d28, d27, d19 ; filter2 = clamp(filter+3)
vqadd.s8 d27, d27, d17 ; filter1 = clamp(filter+4)
vshr.s8 d28, d28, #3 ; filter2 >>= 3
vshr.s8 d27, d27, #3 ; filter1 >>= 3
vqadd.s8 d19, d6, d28 ; u = clamp(ps0 + filter2)
vqsub.s8 d26, d7, d27 ; u = clamp(qs0 - filter1)
; outer tap adjustments
vrshr.s8 d27, d27, #1 ; filter = ++filter1 >> 1
veor d6, d26, d18 ; *oq0 = u^0x80
vbic d27, d27, d22 ; filter &= ~hev
vqadd.s8 d21, d5, d27 ; u = clamp(ps1 + filter)
vqsub.s8 d20, d16, d27 ; u = clamp(qs1 - filter)
veor d5, d19, d18 ; *op0 = u^0x80
veor d4, d21, d18 ; *op1 = u^0x80
veor d7, d20, d18 ; *oq1 = u^0x80
bx lr
ENDP ; |vp9_loop_filter_neon|
; void vp9_mbloop_filter_horizontal_edge_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh,
; int count)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
|vp9_mbloop_filter_horizontal_edge_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
ldr r12, [sp, #16] ; load count
ldr r2, [sp, #12] ; load thresh
add r1, r1, r1 ; double pitch
cmp r12, #0
beq end_vp9_mblf_h_edge
vld1.8 {d1[]}, [r3] ; duplicate *limit
vld1.8 {d2[]}, [r2] ; duplicate *thresh
count_mblf_h_loop
sub r3, r0, r1, lsl #1 ; move src pointer down by 4 lines
add r2, r3, r1, lsr #1 ; set to 3 lines down
vld1.u8 {d3}, [r3@64], r1 ; p3
vld1.u8 {d4}, [r2@64], r1 ; p2
vld1.u8 {d5}, [r3@64], r1 ; p1
vld1.u8 {d6}, [r2@64], r1 ; p0
vld1.u8 {d7}, [r3@64], r1 ; q0
vld1.u8 {d16}, [r2@64], r1 ; q1
vld1.u8 {d17}, [r3@64] ; q2
vld1.u8 {d18}, [r2@64], r1 ; q3
sub r3, r3, r1, lsl #1
sub r2, r2, r1, lsl #2
bl vp9_mbloop_filter_neon
vst1.u8 {d0}, [r2@64], r1 ; store op2
vst1.u8 {d1}, [r3@64], r1 ; store op1
vst1.u8 {d2}, [r2@64], r1 ; store op0
vst1.u8 {d3}, [r3@64], r1 ; store oq0
vst1.u8 {d4}, [r2@64], r1 ; store oq1
vst1.u8 {d5}, [r3@64], r1 ; store oq2
add r0, r0, #8
subs r12, r12, #1
bne count_mblf_h_loop
end_vp9_mblf_h_edge
pop {r4-r5, pc}
ENDP ; |vp9_mbloop_filter_horizontal_edge_neon|
; void vp9_mbloop_filter_vertical_edge_neon(uint8_t *s,
; int pitch,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh,
; int count)
;
; r0 uint8_t *s,
; r1 int pitch,
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
|vp9_mbloop_filter_vertical_edge_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
ldr r12, [sp, #16] ; load count
vld1.8 {d1[]}, [r3] ; duplicate *limit
ldr r3, [sp, #12] ; load thresh
sub r2, r0, #4 ; move s pointer down by 4 columns
cmp r12, #0
beq end_vp9_mblf_v_edge
vld1.8 {d2[]}, [r3] ; duplicate *thresh
count_mblf_v_loop
vld1.u8 {d3}, [r2], r1 ; load s data
vld1.u8 {d4}, [r2], r1
vld1.u8 {d5}, [r2], r1
vld1.u8 {d6}, [r2], r1
vld1.u8 {d7}, [r2], r1
vld1.u8 {d16}, [r2], r1
vld1.u8 {d17}, [r2], r1
vld1.u8 {d18}, [r2]
;transpose to 8x16 matrix
vtrn.32 d3, d7
vtrn.32 d4, d16
vtrn.32 d5, d17
vtrn.32 d6, d18
vtrn.16 d3, d5
vtrn.16 d4, d6
vtrn.16 d7, d17
vtrn.16 d16, d18
vtrn.8 d3, d4
vtrn.8 d5, d6
vtrn.8 d7, d16
vtrn.8 d17, d18
sub r2, r0, #3
add r3, r0, #1
bl vp9_mbloop_filter_neon
;store op2, op1, op0, oq0
vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r2], r1
vst4.8 {d0[1], d1[1], d2[1], d3[1]}, [r2], r1
vst4.8 {d0[2], d1[2], d2[2], d3[2]}, [r2], r1
vst4.8 {d0[3], d1[3], d2[3], d3[3]}, [r2], r1
vst4.8 {d0[4], d1[4], d2[4], d3[4]}, [r2], r1
vst4.8 {d0[5], d1[5], d2[5], d3[5]}, [r2], r1
vst4.8 {d0[6], d1[6], d2[6], d3[6]}, [r2], r1
vst4.8 {d0[7], d1[7], d2[7], d3[7]}, [r2]
;store oq1, oq2
vst2.8 {d4[0], d5[0]}, [r3], r1
vst2.8 {d4[1], d5[1]}, [r3], r1
vst2.8 {d4[2], d5[2]}, [r3], r1
vst2.8 {d4[3], d5[3]}, [r3], r1
vst2.8 {d4[4], d5[4]}, [r3], r1
vst2.8 {d4[5], d5[5]}, [r3], r1
vst2.8 {d4[6], d5[6]}, [r3], r1
vst2.8 {d4[7], d5[7]}, [r3]
add r0, r0, r1, lsl #3 ; s += pitch * 8
subs r12, r12, #1
subne r2, r0, #4 ; move s pointer down by 4 columns
bne count_mblf_v_loop
end_vp9_mblf_v_edge
pop {r4-r5, pc}
ENDP ; |vp9_mbloop_filter_vertical_edge_neon|
; void vp9_mbloop_filter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store. The function does not use
; registers d8-d15.
;
; Inputs:
; r0-r3, r12 PRESERVE
; d0 blimit
; d1 limit
; d2 thresh
; d3 p3
; d4 p2
; d5 p1
; d6 p0
; d7 q0
; d16 q1
; d17 q2
; d18 q3
;
; Outputs:
; d0 op2
; d1 op1
; d2 op0
; d3 oq0
; d4 oq1
; d5 oq2
|vp9_mbloop_filter_neon| PROC
; filter_mask
vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2)
vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1)
vabd.u8 d21, d5, d6 ; m3 = abs(p1 - p0)
vabd.u8 d22, d16, d7 ; m4 = abs(q1 - q0)
vabd.u8 d23, d17, d16 ; m5 = abs(q2 - q1)
vabd.u8 d24, d18, d17 ; m6 = abs(q3 - q2)
; only compare the largest value to limit
vmax.u8 d19, d19, d20 ; m1 = max(m1, m2)
vmax.u8 d20, d21, d22 ; m2 = max(m3, m4)
vabd.u8 d25, d6, d4 ; m7 = abs(p0 - p2)
vmax.u8 d23, d23, d24 ; m3 = max(m5, m6)
vabd.u8 d26, d7, d17 ; m8 = abs(q0 - q2)
vmax.u8 d19, d19, d20
vabd.u8 d24, d6, d7 ; m9 = abs(p0 - q0)
vabd.u8 d27, d3, d6 ; m10 = abs(p3 - p0)
vabd.u8 d28, d18, d7 ; m11 = abs(q3 - q0)
vmax.u8 d19, d19, d23
vabd.u8 d23, d5, d16 ; a = abs(p1 - q1)
vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2
; abs () > limit
vcge.u8 d19, d1, d19
; only compare the largest value to thresh
vmax.u8 d25, d25, d26 ; m4 = max(m7, m8)
vmax.u8 d26, d27, d28 ; m5 = max(m10, m11)
vshr.u8 d23, d23, #1 ; a = a / 2
vmax.u8 d25, d25, d26 ; m4 = max(m4, m5)
vqadd.u8 d24, d24, d23 ; a = b + a
vmax.u8 d20, d20, d25 ; m2 = max(m2, m4)
vmov.u8 d23, #1
vcge.u8 d24, d0, d24 ; a > blimit
vcgt.u8 d21, d21, d2 ; (abs(p1 - p0) > thresh)*-1
vcge.u8 d20, d23, d20 ; flat
vand d19, d19, d24 ; mask
vcgt.u8 d23, d22, d2 ; (abs(q1 - q0) > thresh)*-1
vand d20, d20, d19 ; flat & mask
vmov.u8 d22, #0x80
vorr d23, d21, d23 ; hev
; This instruction will truncate the "flat & mask" masks down to 4 bits
; each to fit into one 32 bit arm register. The values are stored in
; q10.64[0].
vshrn.u16 d30, q10, #4
vmov.u32 r4, d30[0] ; flat & mask 4bits
adds r5, r4, #1 ; Check for all 1's
; If mask and flat are 1's for all vectors, then we only need to execute
; the power branch for all vectors.
beq power_branch_only
cmp r4, #0 ; Check for 0, set flag for later
; mbfilter() function
; filter() function
; convert to signed
veor d21, d7, d22 ; qs0
veor d24, d6, d22 ; ps0
veor d25, d5, d22 ; ps1
veor d26, d16, d22 ; qs1
vmov.u8 d27, #3
vsub.s8 d28, d21, d24 ; ( qs0 - ps0)
vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1)
vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0)
vand d29, d29, d23 ; filter &= hev
vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0)
vmov.u8 d29, #4
; filter = clamp(filter + 3 * ( qs0 - ps0))
vqmovn.s16 d28, q15
vand d28, d28, d19 ; filter &= mask
vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3)
vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4)
vshr.s8 d30, d30, #3 ; filter2 >>= 3
vshr.s8 d29, d29, #3 ; filter1 >>= 3
vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2)
vqsub.s8 d21, d21, d29 ; oq0 = clamp(qs0 - filter1)
; outer tap adjustments: ++filter1 >> 1
vrshr.s8 d29, d29, #1
vbic d29, d29, d23 ; filter &= ~hev
vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter)
vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter)
; If mask and flat are 0's for all vectors, then we only need to execute
; the filter branch for all vectors.
beq filter_branch_only
; If mask and flat are mixed then we must perform both branches and
; combine the data.
veor d24, d24, d22 ; *f_op0 = u^0x80
veor d21, d21, d22 ; *f_oq0 = u^0x80
veor d25, d25, d22 ; *f_op1 = u^0x80
veor d26, d26, d22 ; *f_oq1 = u^0x80
; At this point we have already executed the filter branch. The filter
; branch does not set op2 or oq2, so use p2 and q2. Execute the power
; branch and combine the data.
vmov.u8 d23, #2
vaddl.u8 q14, d6, d7 ; r_op2 = p0 + q0
vmlal.u8 q14, d3, d27 ; r_op2 += p3 * 3
vmlal.u8 q14, d4, d23 ; r_op2 += p2 * 2
vbif d0, d4, d20 ; op2 |= p2 & ~(flat & mask)
vaddw.u8 q14, d5 ; r_op2 += p1
vbif d1, d25, d20 ; op1 |= f_op1 & ~(flat & mask)
vqrshrn.u16 d30, q14, #3 ; r_op2
vsubw.u8 q14, d3 ; r_op1 = r_op2 - p3
vsubw.u8 q14, d4 ; r_op1 -= p2
vaddw.u8 q14, d5 ; r_op1 += p1
vaddw.u8 q14, d16 ; r_op1 += q1
vbif d2, d24, d20 ; op0 |= f_op0 & ~(flat & mask)
vqrshrn.u16 d31, q14, #3 ; r_op1
vsubw.u8 q14, d3 ; r_op0 = r_op1 - p3
vsubw.u8 q14, d5 ; r_op0 -= p1
vaddw.u8 q14, d6 ; r_op0 += p0
vaddw.u8 q14, d17 ; r_op0 += q2
vbit d0, d30, d20 ; op2 |= r_op2 & (flat & mask)
vqrshrn.u16 d23, q14, #3 ; r_op0
vsubw.u8 q14, d3 ; r_oq0 = r_op0 - p3
vsubw.u8 q14, d6 ; r_oq0 -= p0
vaddw.u8 q14, d7 ; r_oq0 += q0
vbit d1, d31, d20 ; op1 |= r_op1 & (flat & mask)
vaddw.u8 q14, d18 ; oq0 += q3
vbit d2, d23, d20 ; op0 |= r_op0 & (flat & mask)
vqrshrn.u16 d22, q14, #3 ; r_oq0
vsubw.u8 q14, d4 ; r_oq1 = r_oq0 - p2
vsubw.u8 q14, d7 ; r_oq1 -= q0
vaddw.u8 q14, d16 ; r_oq1 += q1
vbif d3, d21, d20 ; oq0 |= f_oq0 & ~(flat & mask)
vaddw.u8 q14, d18 ; r_oq1 += q3
vbif d4, d26, d20 ; oq1 |= f_oq1 & ~(flat & mask)
vqrshrn.u16 d6, q14, #3 ; r_oq1
vsubw.u8 q14, d5 ; r_oq2 = r_oq1 - p1
vsubw.u8 q14, d16 ; r_oq2 -= q1
vaddw.u8 q14, d17 ; r_oq2 += q2
vaddw.u8 q14, d18 ; r_oq2 += q3
vbif d5, d17, d20 ; oq2 |= q2 & ~(flat & mask)
vqrshrn.u16 d7, q14, #3 ; r_oq2
vbit d3, d22, d20 ; oq0 |= r_oq0 & (flat & mask)
vbit d4, d6, d20 ; oq1 |= r_oq1 & (flat & mask)
vbit d5, d7, d20 ; oq2 |= r_oq2 & (flat & mask)
bx lr
power_branch_only
vmov.u8 d27, #3
vmov.u8 d21, #2
vaddl.u8 q14, d6, d7 ; op2 = p0 + q0
vmlal.u8 q14, d3, d27 ; op2 += p3 * 3
vmlal.u8 q14, d4, d21 ; op2 += p2 * 2
vaddw.u8 q14, d5 ; op2 += p1
vqrshrn.u16 d0, q14, #3 ; op2
vsubw.u8 q14, d3 ; op1 = op2 - p3
vsubw.u8 q14, d4 ; op1 -= p2
vaddw.u8 q14, d5 ; op1 += p1
vaddw.u8 q14, d16 ; op1 += q1
vqrshrn.u16 d1, q14, #3 ; op1
vsubw.u8 q14, d3 ; op0 = op1 - p3
vsubw.u8 q14, d5 ; op0 -= p1
vaddw.u8 q14, d6 ; op0 += p0
vaddw.u8 q14, d17 ; op0 += q2
vqrshrn.u16 d2, q14, #3 ; op0
vsubw.u8 q14, d3 ; oq0 = op0 - p3
vsubw.u8 q14, d6 ; oq0 -= p0
vaddw.u8 q14, d7 ; oq0 += q0
vaddw.u8 q14, d18 ; oq0 += q3
vqrshrn.u16 d3, q14, #3 ; oq0
vsubw.u8 q14, d4 ; oq1 = oq0 - p2
vsubw.u8 q14, d7 ; oq1 -= q0
vaddw.u8 q14, d16 ; oq1 += q1
vaddw.u8 q14, d18 ; oq1 += q3
vqrshrn.u16 d4, q14, #3 ; oq1
vsubw.u8 q14, d5 ; oq2 = oq1 - p1
vsubw.u8 q14, d16 ; oq2 -= q1
vaddw.u8 q14, d17 ; oq2 += q2
vaddw.u8 q14, d18 ; oq2 += q3
vqrshrn.u16 d5, q14, #3 ; oq2
bx lr
filter_branch_only
; TODO(fgalligan): See if we can rearange registers so we do not need to
; do the 2 vswp.
vswp d0, d4 ; op2
vswp d5, d17 ; oq2
veor d2, d24, d22 ; *op0 = u^0x80
veor d3, d21, d22 ; *oq0 = u^0x80
veor d1, d25, d22 ; *op1 = u^0x80
veor d4, d26, d22 ; *oq1 = u^0x80
bx lr
ENDP ; |vp9_mbloop_filter_neon|
END

View File

@@ -1,603 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_mb_lpf_horizontal_edge_w_neon|
EXPORT |vp9_mb_lpf_vertical_edge_w_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
; void vp9_mb_lpf_horizontal_edge_w_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh
; int count)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
|vp9_mb_lpf_horizontal_edge_w_neon| PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] ; load thresh
ldr r12, [sp, #92] ; load count
h_count
vld1.8 {d16[]}, [r2] ; load *blimit
vld1.8 {d17[]}, [r3] ; load *limit
vld1.8 {d18[]}, [r4] ; load *thresh
sub r8, r0, r1, lsl #3 ; move src pointer down by 8 lines
vld1.u8 {d0}, [r8@64], r1 ; p7
vld1.u8 {d1}, [r8@64], r1 ; p6
vld1.u8 {d2}, [r8@64], r1 ; p5
vld1.u8 {d3}, [r8@64], r1 ; p4
vld1.u8 {d4}, [r8@64], r1 ; p3
vld1.u8 {d5}, [r8@64], r1 ; p2
vld1.u8 {d6}, [r8@64], r1 ; p1
vld1.u8 {d7}, [r8@64], r1 ; p0
vld1.u8 {d8}, [r8@64], r1 ; q0
vld1.u8 {d9}, [r8@64], r1 ; q1
vld1.u8 {d10}, [r8@64], r1 ; q2
vld1.u8 {d11}, [r8@64], r1 ; q3
vld1.u8 {d12}, [r8@64], r1 ; q4
vld1.u8 {d13}, [r8@64], r1 ; q5
vld1.u8 {d14}, [r8@64], r1 ; q6
vld1.u8 {d15}, [r8@64], r1 ; q7
bl vp9_wide_mbfilter_neon
tst r7, #1
beq h_mbfilter
; flat && mask were not set for any of the channels. Just store the values
; from filter.
sub r8, r0, r1, lsl #1
vst1.u8 {d25}, [r8@64], r1 ; store op1
vst1.u8 {d24}, [r8@64], r1 ; store op0
vst1.u8 {d23}, [r8@64], r1 ; store oq0
vst1.u8 {d26}, [r8@64], r1 ; store oq1
b h_next
h_mbfilter
tst r7, #2
beq h_wide_mbfilter
; flat2 was not set for any of the channels. Just store the values from
; mbfilter.
sub r8, r0, r1, lsl #1
sub r8, r8, r1
vst1.u8 {d18}, [r8@64], r1 ; store op2
vst1.u8 {d19}, [r8@64], r1 ; store op1
vst1.u8 {d20}, [r8@64], r1 ; store op0
vst1.u8 {d21}, [r8@64], r1 ; store oq0
vst1.u8 {d22}, [r8@64], r1 ; store oq1
vst1.u8 {d23}, [r8@64], r1 ; store oq2
b h_next
h_wide_mbfilter
sub r8, r0, r1, lsl #3
add r8, r8, r1
vst1.u8 {d16}, [r8@64], r1 ; store op6
vst1.u8 {d24}, [r8@64], r1 ; store op5
vst1.u8 {d25}, [r8@64], r1 ; store op4
vst1.u8 {d26}, [r8@64], r1 ; store op3
vst1.u8 {d27}, [r8@64], r1 ; store op2
vst1.u8 {d18}, [r8@64], r1 ; store op1
vst1.u8 {d19}, [r8@64], r1 ; store op0
vst1.u8 {d20}, [r8@64], r1 ; store oq0
vst1.u8 {d21}, [r8@64], r1 ; store oq1
vst1.u8 {d22}, [r8@64], r1 ; store oq2
vst1.u8 {d23}, [r8@64], r1 ; store oq3
vst1.u8 {d1}, [r8@64], r1 ; store oq4
vst1.u8 {d2}, [r8@64], r1 ; store oq5
vst1.u8 {d3}, [r8@64], r1 ; store oq6
h_next
add r0, r0, #8
subs r12, r12, #1
bne h_count
vpop {d8-d15}
pop {r4-r8, pc}
ENDP ; |vp9_mb_lpf_horizontal_edge_w_neon|
; void vp9_mb_lpf_vertical_edge_w_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
|vp9_mb_lpf_vertical_edge_w_neon| PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] ; load thresh
vld1.8 {d16[]}, [r2] ; load *blimit
vld1.8 {d17[]}, [r3] ; load *limit
vld1.8 {d18[]}, [r4] ; load *thresh
sub r8, r0, #8
vld1.8 {d0}, [r8@64], r1
vld1.8 {d8}, [r0@64], r1
vld1.8 {d1}, [r8@64], r1
vld1.8 {d9}, [r0@64], r1
vld1.8 {d2}, [r8@64], r1
vld1.8 {d10}, [r0@64], r1
vld1.8 {d3}, [r8@64], r1
vld1.8 {d11}, [r0@64], r1
vld1.8 {d4}, [r8@64], r1
vld1.8 {d12}, [r0@64], r1
vld1.8 {d5}, [r8@64], r1
vld1.8 {d13}, [r0@64], r1
vld1.8 {d6}, [r8@64], r1
vld1.8 {d14}, [r0@64], r1
vld1.8 {d7}, [r8@64], r1
vld1.8 {d15}, [r0@64], r1
sub r0, r0, r1, lsl #3
vtrn.32 q0, q2
vtrn.32 q1, q3
vtrn.32 q4, q6
vtrn.32 q5, q7
vtrn.16 q0, q1
vtrn.16 q2, q3
vtrn.16 q4, q5
vtrn.16 q6, q7
vtrn.8 d0, d1
vtrn.8 d2, d3
vtrn.8 d4, d5
vtrn.8 d6, d7
vtrn.8 d8, d9
vtrn.8 d10, d11
vtrn.8 d12, d13
vtrn.8 d14, d15
bl vp9_wide_mbfilter_neon
tst r7, #1
beq v_mbfilter
; flat && mask were not set for any of the channels. Just store the values
; from filter.
sub r8, r0, #2
vswp d23, d25
vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1
vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1
vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1
vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1
vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1
vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1
vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1
vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1
b v_end
v_mbfilter
tst r7, #2
beq v_wide_mbfilter
; flat2 was not set for any of the channels. Just store the values from
; mbfilter.
sub r8, r0, #3
vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1
vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1
vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1
vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1
vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1
vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1
vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1
vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1
vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1
vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1
vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1
vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1
vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1
vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1
vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1
vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1
b v_end
v_wide_mbfilter
sub r8, r0, #8
vtrn.32 d0, d26
vtrn.32 d16, d27
vtrn.32 d24, d18
vtrn.32 d25, d19
vtrn.16 d0, d24
vtrn.16 d16, d25
vtrn.16 d26, d18
vtrn.16 d27, d19
vtrn.8 d0, d16
vtrn.8 d24, d25
vtrn.8 d26, d27
vtrn.8 d18, d19
vtrn.32 d20, d1
vtrn.32 d21, d2
vtrn.32 d22, d3
vtrn.32 d23, d15
vtrn.16 d20, d22
vtrn.16 d21, d23
vtrn.16 d1, d3
vtrn.16 d2, d15
vtrn.8 d20, d21
vtrn.8 d22, d23
vtrn.8 d1, d2
vtrn.8 d3, d15
vst1.8 {d0}, [r8@64], r1
vst1.8 {d20}, [r0@64], r1
vst1.8 {d16}, [r8@64], r1
vst1.8 {d21}, [r0@64], r1
vst1.8 {d24}, [r8@64], r1
vst1.8 {d22}, [r0@64], r1
vst1.8 {d25}, [r8@64], r1
vst1.8 {d23}, [r0@64], r1
vst1.8 {d26}, [r8@64], r1
vst1.8 {d1}, [r0@64], r1
vst1.8 {d27}, [r8@64], r1
vst1.8 {d2}, [r0@64], r1
vst1.8 {d18}, [r8@64], r1
vst1.8 {d3}, [r0@64], r1
vst1.8 {d19}, [r8@64], r1
vst1.8 {d15}, [r0@64], r1
v_end
vpop {d8-d15}
pop {r4-r8, pc}
ENDP ; |vp9_mb_lpf_vertical_edge_w_neon|
; void vp9_wide_mbfilter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store.
;
; r0-r3 PRESERVE
; d16 blimit
; d17 limit
; d18 thresh
; d0 p7
; d1 p6
; d2 p5
; d3 p4
; d4 p3
; d5 p2
; d6 p1
; d7 p0
; d8 q0
; d9 q1
; d10 q2
; d11 q3
; d12 q4
; d13 q5
; d14 q6
; d15 q7
|vp9_wide_mbfilter_neon| PROC
mov r7, #0
; filter_mask
vabd.u8 d19, d4, d5 ; abs(p3 - p2)
vabd.u8 d20, d5, d6 ; abs(p2 - p1)
vabd.u8 d21, d6, d7 ; abs(p1 - p0)
vabd.u8 d22, d9, d8 ; abs(q1 - q0)
vabd.u8 d23, d10, d9 ; abs(q2 - q1)
vabd.u8 d24, d11, d10 ; abs(q3 - q2)
; only compare the largest value to limit
vmax.u8 d19, d19, d20 ; max(abs(p3 - p2), abs(p2 - p1))
vmax.u8 d20, d21, d22 ; max(abs(p1 - p0), abs(q1 - q0))
vmax.u8 d23, d23, d24 ; max(abs(q2 - q1), abs(q3 - q2))
vmax.u8 d19, d19, d20
vabd.u8 d24, d7, d8 ; abs(p0 - q0)
vmax.u8 d19, d19, d23
vabd.u8 d23, d6, d9 ; a = abs(p1 - q1)
vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2
; abs () > limit
vcge.u8 d19, d17, d19
; flatmask4
vabd.u8 d25, d7, d5 ; abs(p0 - p2)
vabd.u8 d26, d8, d10 ; abs(q0 - q2)
vabd.u8 d27, d4, d7 ; abs(p3 - p0)
vabd.u8 d28, d11, d8 ; abs(q3 - q0)
; only compare the largest value to thresh
vmax.u8 d25, d25, d26 ; max(abs(p0 - p2), abs(q0 - q2))
vmax.u8 d26, d27, d28 ; max(abs(p3 - p0), abs(q3 - q0))
vmax.u8 d25, d25, d26
vmax.u8 d20, d20, d25
vshr.u8 d23, d23, #1 ; a = a / 2
vqadd.u8 d24, d24, d23 ; a = b + a
vmov.u8 d30, #1
vcge.u8 d24, d16, d24 ; (a > blimit * 2 + limit) * -1
vcge.u8 d20, d30, d20 ; flat
vand d19, d19, d24 ; mask
; hevmask
vcgt.u8 d21, d21, d18 ; (abs(p1 - p0) > thresh)*-1
vcgt.u8 d22, d22, d18 ; (abs(q1 - q0) > thresh)*-1
vorr d21, d21, d22 ; hev
vand d16, d20, d19 ; flat && mask
vmov r5, r6, d16
; flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7)
vabd.u8 d22, d3, d7 ; abs(p4 - p0)
vabd.u8 d23, d12, d8 ; abs(q4 - q0)
vabd.u8 d24, d7, d2 ; abs(p0 - p5)
vabd.u8 d25, d8, d13 ; abs(q0 - q5)
vabd.u8 d26, d1, d7 ; abs(p6 - p0)
vabd.u8 d27, d14, d8 ; abs(q6 - q0)
vabd.u8 d28, d0, d7 ; abs(p7 - p0)
vabd.u8 d29, d15, d8 ; abs(q7 - q0)
; only compare the largest value to thresh
vmax.u8 d22, d22, d23 ; max(abs(p4 - p0), abs(q4 - q0))
vmax.u8 d23, d24, d25 ; max(abs(p0 - p5), abs(q0 - q5))
vmax.u8 d24, d26, d27 ; max(abs(p6 - p0), abs(q6 - q0))
vmax.u8 d25, d28, d29 ; max(abs(p7 - p0), abs(q7 - q0))
vmax.u8 d26, d22, d23
vmax.u8 d27, d24, d25
vmax.u8 d23, d26, d27
vcge.u8 d18, d30, d23 ; flat2
vmov.u8 d22, #0x80
orrs r5, r5, r6 ; Check for 0
orreq r7, r7, #1 ; Only do filter branch
vand d17, d18, d16 ; flat2 && flat && mask
vmov r5, r6, d17
; mbfilter() function
; filter() function
; convert to signed
veor d23, d8, d22 ; qs0
veor d24, d7, d22 ; ps0
veor d25, d6, d22 ; ps1
veor d26, d9, d22 ; qs1
vmov.u8 d27, #3
vsub.s8 d28, d23, d24 ; ( qs0 - ps0)
vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1)
vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0)
vand d29, d29, d21 ; filter &= hev
vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0)
vmov.u8 d29, #4
; filter = clamp(filter + 3 * ( qs0 - ps0))
vqmovn.s16 d28, q15
vand d28, d28, d19 ; filter &= mask
vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3)
vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4)
vshr.s8 d30, d30, #3 ; filter2 >>= 3
vshr.s8 d29, d29, #3 ; filter1 >>= 3
vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2)
vqsub.s8 d23, d23, d29 ; oq0 = clamp(qs0 - filter1)
; outer tap adjustments: ++filter1 >> 1
vrshr.s8 d29, d29, #1
vbic d29, d29, d21 ; filter &= ~hev
vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter)
vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter)
veor d24, d24, d22 ; *f_op0 = u^0x80
veor d23, d23, d22 ; *f_oq0 = u^0x80
veor d25, d25, d22 ; *f_op1 = u^0x80
veor d26, d26, d22 ; *f_oq1 = u^0x80
tst r7, #1
bxne lr
; mbfilter flat && mask branch
; TODO(fgalligan): Can I decrease the cycles shifting to consective d's
; and using vibt on the q's?
vmov.u8 d29, #2
vaddl.u8 q15, d7, d8 ; op2 = p0 + q0
vmlal.u8 q15, d4, d27 ; op2 = p0 + q0 + p3 * 3
vmlal.u8 q15, d5, d29 ; op2 = p0 + q0 + p3 * 3 + p2 * 2
vaddl.u8 q10, d4, d5
vaddw.u8 q15, d6 ; op2=p1 + p0 + q0 + p3 * 3 + p2 *2
vaddl.u8 q14, d6, d9
vqrshrn.u16 d18, q15, #3 ; r_op2
vsub.i16 q15, q10
vaddl.u8 q10, d4, d6
vadd.i16 q15, q14
vaddl.u8 q14, d7, d10
vqrshrn.u16 d19, q15, #3 ; r_op1
vsub.i16 q15, q10
vadd.i16 q15, q14
vaddl.u8 q14, d8, d11
vqrshrn.u16 d20, q15, #3 ; r_op0
vsubw.u8 q15, d4 ; oq0 = op0 - p3
vsubw.u8 q15, d7 ; oq0 -= p0
vadd.i16 q15, q14
vaddl.u8 q14, d9, d11
vqrshrn.u16 d21, q15, #3 ; r_oq0
vsubw.u8 q15, d5 ; oq1 = oq0 - p2
vsubw.u8 q15, d8 ; oq1 -= q0
vadd.i16 q15, q14
vaddl.u8 q14, d10, d11
vqrshrn.u16 d22, q15, #3 ; r_oq1
vsubw.u8 q15, d6 ; oq2 = oq0 - p1
vsubw.u8 q15, d9 ; oq2 -= q1
vadd.i16 q15, q14
vqrshrn.u16 d27, q15, #3 ; r_oq2
; Filter does not set op2 or oq2, so use p2 and q2.
vbif d18, d5, d16 ; t_op2 |= p2 & ~(flat & mask)
vbif d19, d25, d16 ; t_op1 |= f_op1 & ~(flat & mask)
vbif d20, d24, d16 ; t_op0 |= f_op0 & ~(flat & mask)
vbif d21, d23, d16 ; t_oq0 |= f_oq0 & ~(flat & mask)
vbif d22, d26, d16 ; t_oq1 |= f_oq1 & ~(flat & mask)
vbit d23, d27, d16 ; t_oq2 |= r_oq2 & (flat & mask)
vbif d23, d10, d16 ; t_oq2 |= q2 & ~(flat & mask)
tst r7, #2
bxne lr
; wide_mbfilter flat2 && flat && mask branch
vmov.u8 d16, #7
vaddl.u8 q15, d7, d8 ; op6 = p0 + q0
vaddl.u8 q12, d2, d3
vaddl.u8 q13, d4, d5
vaddl.u8 q14, d1, d6
vmlal.u8 q15, d0, d16 ; op6 += p7 * 3
vadd.i16 q12, q13
vadd.i16 q15, q14
vaddl.u8 q14, d2, d9
vadd.i16 q15, q12
vaddl.u8 q12, d0, d1
vaddw.u8 q15, d1
vaddl.u8 q13, d0, d2
vadd.i16 q14, q15, q14
vqrshrn.u16 d16, q15, #4 ; w_op6
vsub.i16 q15, q14, q12
vaddl.u8 q14, d3, d10
vqrshrn.u16 d24, q15, #4 ; w_op5
vsub.i16 q15, q13
vaddl.u8 q13, d0, d3
vadd.i16 q15, q14
vaddl.u8 q14, d4, d11
vqrshrn.u16 d25, q15, #4 ; w_op4
vadd.i16 q15, q14
vaddl.u8 q14, d0, d4
vsub.i16 q15, q13
vsub.i16 q14, q15, q14
vqrshrn.u16 d26, q15, #4 ; w_op3
vaddw.u8 q15, q14, d5 ; op2 += p2
vaddl.u8 q14, d0, d5
vaddw.u8 q15, d12 ; op2 += q4
vbif d26, d4, d17 ; op3 |= p3 & ~(f2 & f & m)
vqrshrn.u16 d27, q15, #4 ; w_op2
vsub.i16 q15, q14
vaddl.u8 q14, d0, d6
vaddw.u8 q15, d6 ; op1 += p1
vaddw.u8 q15, d13 ; op1 += q5
vbif d27, d18, d17 ; op2 |= t_op2 & ~(f2 & f & m)
vqrshrn.u16 d18, q15, #4 ; w_op1
vsub.i16 q15, q14
vaddl.u8 q14, d0, d7
vaddw.u8 q15, d7 ; op0 += p0
vaddw.u8 q15, d14 ; op0 += q6
vbif d18, d19, d17 ; op1 |= t_op1 & ~(f2 & f & m)
vqrshrn.u16 d19, q15, #4 ; w_op0
vsub.i16 q15, q14
vaddl.u8 q14, d1, d8
vaddw.u8 q15, d8 ; oq0 += q0
vaddw.u8 q15, d15 ; oq0 += q7
vbif d19, d20, d17 ; op0 |= t_op0 & ~(f2 & f & m)
vqrshrn.u16 d20, q15, #4 ; w_oq0
vsub.i16 q15, q14
vaddl.u8 q14, d2, d9
vaddw.u8 q15, d9 ; oq1 += q1
vaddl.u8 q4, d10, d15
vaddw.u8 q15, d15 ; oq1 += q7
vbif d20, d21, d17 ; oq0 |= t_oq0 & ~(f2 & f & m)
vqrshrn.u16 d21, q15, #4 ; w_oq1
vsub.i16 q15, q14
vaddl.u8 q14, d3, d10
vadd.i16 q15, q4
vaddl.u8 q4, d11, d15
vbif d21, d22, d17 ; oq1 |= t_oq1 & ~(f2 & f & m)
vqrshrn.u16 d22, q15, #4 ; w_oq2
vsub.i16 q15, q14
vaddl.u8 q14, d4, d11
vadd.i16 q15, q4
vaddl.u8 q4, d12, d15
vbif d22, d23, d17 ; oq2 |= t_oq2 & ~(f2 & f & m)
vqrshrn.u16 d23, q15, #4 ; w_oq3
vsub.i16 q15, q14
vaddl.u8 q14, d5, d12
vadd.i16 q15, q4
vaddl.u8 q4, d13, d15
vbif d16, d1, d17 ; op6 |= p6 & ~(f2 & f & m)
vqrshrn.u16 d1, q15, #4 ; w_oq4
vsub.i16 q15, q14
vaddl.u8 q14, d6, d13
vadd.i16 q15, q4
vaddl.u8 q4, d14, d15
vbif d24, d2, d17 ; op5 |= p5 & ~(f2 & f & m)
vqrshrn.u16 d2, q15, #4 ; w_oq5
vsub.i16 q15, q14
vbif d25, d3, d17 ; op4 |= p4 & ~(f2 & f & m)
vadd.i16 q15, q4
vbif d23, d11, d17 ; oq3 |= q3 & ~(f2 & f & m)
vqrshrn.u16 d3, q15, #4 ; w_oq6
vbif d1, d12, d17 ; oq4 |= q4 & ~(f2 & f & m)
vbif d2, d13, d17 ; oq5 |= q5 & ~(f2 & f & m)
vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m)
bx lr
ENDP ; |vp9_wide_mbfilter_neon|
END

View File

@@ -1,198 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license and patent
; grant that can be found in the LICENSE file in the root of the source
; tree. All contributing project authors may be found in the AUTHORS
; file in the root of the source tree.
;
EXPORT |vp9_short_idct16x16_1_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
;void vp9_short_idct16x16_1_add_neon(int16_t *input, uint8_t *dest,
; int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
|vp9_short_idct16x16_1_add_neon| PROC
ldrsh r0, [r0]
; generate cospi_16_64 = 11585
mov r12, #0x2d00
add r12, #0x41
; out = dct_const_round_shift(input[0] * cospi_16_64)
mul r0, r0, r12 ; input[0] * cospi_16_64
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
asr r0, r0, #14 ; >> DCT_CONST_BITS
; out = dct_const_round_shift(out * cospi_16_64)
mul r0, r0, r12 ; out * cospi_16_64
mov r12, r1 ; save dest
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
asr r0, r0, #14 ; >> DCT_CONST_BITS
; a1 = ROUND_POWER_OF_TWO(out, 6)
add r0, r0, #32 ; + (1 <<((6) - 1))
asr r0, r0, #6 ; >> 6
vdup.s16 q0, r0 ; duplicate a1
mov r0, #8
sub r2, #8
; load destination data row0 - row3
vld1.64 {d2}, [r1], r0
vld1.64 {d3}, [r1], r2
vld1.64 {d4}, [r1], r0
vld1.64 {d5}, [r1], r2
vld1.64 {d6}, [r1], r0
vld1.64 {d7}, [r1], r2
vld1.64 {d16}, [r1], r0
vld1.64 {d17}, [r1], r2
vaddw.u8 q9, q0, d2 ; dest[x] + a1
vaddw.u8 q10, q0, d3 ; dest[x] + a1
vaddw.u8 q11, q0, d4 ; dest[x] + a1
vaddw.u8 q12, q0, d5 ; dest[x] + a1
vqmovun.s16 d2, q9 ; clip_pixel
vqmovun.s16 d3, q10 ; clip_pixel
vqmovun.s16 d30, q11 ; clip_pixel
vqmovun.s16 d31, q12 ; clip_pixel
vst1.64 {d2}, [r12], r0
vst1.64 {d3}, [r12], r2
vst1.64 {d30}, [r12], r0
vst1.64 {d31}, [r12], r2
vaddw.u8 q9, q0, d6 ; dest[x] + a1
vaddw.u8 q10, q0, d7 ; dest[x] + a1
vaddw.u8 q11, q0, d16 ; dest[x] + a1
vaddw.u8 q12, q0, d17 ; dest[x] + a1
vqmovun.s16 d2, q9 ; clip_pixel
vqmovun.s16 d3, q10 ; clip_pixel
vqmovun.s16 d30, q11 ; clip_pixel
vqmovun.s16 d31, q12 ; clip_pixel
vst1.64 {d2}, [r12], r0
vst1.64 {d3}, [r12], r2
vst1.64 {d30}, [r12], r0
vst1.64 {d31}, [r12], r2
; load destination data row4 - row7
vld1.64 {d2}, [r1], r0
vld1.64 {d3}, [r1], r2
vld1.64 {d4}, [r1], r0
vld1.64 {d5}, [r1], r2
vld1.64 {d6}, [r1], r0
vld1.64 {d7}, [r1], r2
vld1.64 {d16}, [r1], r0
vld1.64 {d17}, [r1], r2
vaddw.u8 q9, q0, d2 ; dest[x] + a1
vaddw.u8 q10, q0, d3 ; dest[x] + a1
vaddw.u8 q11, q0, d4 ; dest[x] + a1
vaddw.u8 q12, q0, d5 ; dest[x] + a1
vqmovun.s16 d2, q9 ; clip_pixel
vqmovun.s16 d3, q10 ; clip_pixel
vqmovun.s16 d30, q11 ; clip_pixel
vqmovun.s16 d31, q12 ; clip_pixel
vst1.64 {d2}, [r12], r0
vst1.64 {d3}, [r12], r2
vst1.64 {d30}, [r12], r0
vst1.64 {d31}, [r12], r2
vaddw.u8 q9, q0, d6 ; dest[x] + a1
vaddw.u8 q10, q0, d7 ; dest[x] + a1
vaddw.u8 q11, q0, d16 ; dest[x] + a1
vaddw.u8 q12, q0, d17 ; dest[x] + a1
vqmovun.s16 d2, q9 ; clip_pixel
vqmovun.s16 d3, q10 ; clip_pixel
vqmovun.s16 d30, q11 ; clip_pixel
vqmovun.s16 d31, q12 ; clip_pixel
vst1.64 {d2}, [r12], r0
vst1.64 {d3}, [r12], r2
vst1.64 {d30}, [r12], r0
vst1.64 {d31}, [r12], r2
; load destination data row8 - row11
vld1.64 {d2}, [r1], r0
vld1.64 {d3}, [r1], r2
vld1.64 {d4}, [r1], r0
vld1.64 {d5}, [r1], r2
vld1.64 {d6}, [r1], r0
vld1.64 {d7}, [r1], r2
vld1.64 {d16}, [r1], r0
vld1.64 {d17}, [r1], r2
vaddw.u8 q9, q0, d2 ; dest[x] + a1
vaddw.u8 q10, q0, d3 ; dest[x] + a1
vaddw.u8 q11, q0, d4 ; dest[x] + a1
vaddw.u8 q12, q0, d5 ; dest[x] + a1
vqmovun.s16 d2, q9 ; clip_pixel
vqmovun.s16 d3, q10 ; clip_pixel
vqmovun.s16 d30, q11 ; clip_pixel
vqmovun.s16 d31, q12 ; clip_pixel
vst1.64 {d2}, [r12], r0
vst1.64 {d3}, [r12], r2
vst1.64 {d30}, [r12], r0
vst1.64 {d31}, [r12], r2
vaddw.u8 q9, q0, d6 ; dest[x] + a1
vaddw.u8 q10, q0, d7 ; dest[x] + a1
vaddw.u8 q11, q0, d16 ; dest[x] + a1
vaddw.u8 q12, q0, d17 ; dest[x] + a1
vqmovun.s16 d2, q9 ; clip_pixel
vqmovun.s16 d3, q10 ; clip_pixel
vqmovun.s16 d30, q11 ; clip_pixel
vqmovun.s16 d31, q12 ; clip_pixel
vst1.64 {d2}, [r12], r0
vst1.64 {d3}, [r12], r2
vst1.64 {d30}, [r12], r0
vst1.64 {d31}, [r12], r2
; load destination data row12 - row15
vld1.64 {d2}, [r1], r0
vld1.64 {d3}, [r1], r2
vld1.64 {d4}, [r1], r0
vld1.64 {d5}, [r1], r2
vld1.64 {d6}, [r1], r0
vld1.64 {d7}, [r1], r2
vld1.64 {d16}, [r1], r0
vld1.64 {d17}, [r1], r2
vaddw.u8 q9, q0, d2 ; dest[x] + a1
vaddw.u8 q10, q0, d3 ; dest[x] + a1
vaddw.u8 q11, q0, d4 ; dest[x] + a1
vaddw.u8 q12, q0, d5 ; dest[x] + a1
vqmovun.s16 d2, q9 ; clip_pixel
vqmovun.s16 d3, q10 ; clip_pixel
vqmovun.s16 d30, q11 ; clip_pixel
vqmovun.s16 d31, q12 ; clip_pixel
vst1.64 {d2}, [r12], r0
vst1.64 {d3}, [r12], r2
vst1.64 {d30}, [r12], r0
vst1.64 {d31}, [r12], r2
vaddw.u8 q9, q0, d6 ; dest[x] + a1
vaddw.u8 q10, q0, d7 ; dest[x] + a1
vaddw.u8 q11, q0, d16 ; dest[x] + a1
vaddw.u8 q12, q0, d17 ; dest[x] + a1
vqmovun.s16 d2, q9 ; clip_pixel
vqmovun.s16 d3, q10 ; clip_pixel
vqmovun.s16 d30, q11 ; clip_pixel
vqmovun.s16 d31, q12 ; clip_pixel
vst1.64 {d2}, [r12], r0
vst1.64 {d3}, [r12], r2
vst1.64 {d30}, [r12], r0
vst1.64 {d31}, [r12], r2
bx lr
ENDP ; |vp9_short_idct16x16_1_add_neon|
END

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,68 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license and patent
; grant that can be found in the LICENSE file in the root of the source
; tree. All contributing project authors may be found in the AUTHORS
; file in the root of the source tree.
;
EXPORT |vp9_short_idct4x4_1_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
;void vp9_short_idct4x4_1_add_neon(int16_t *input, uint8_t *dest,
; int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
|vp9_short_idct4x4_1_add_neon| PROC
ldrsh r0, [r0]
; generate cospi_16_64 = 11585
mov r12, #0x2d00
add r12, #0x41
; out = dct_const_round_shift(input[0] * cospi_16_64)
mul r0, r0, r12 ; input[0] * cospi_16_64
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
asr r0, r0, #14 ; >> DCT_CONST_BITS
; out = dct_const_round_shift(out * cospi_16_64)
mul r0, r0, r12 ; out * cospi_16_64
mov r12, r1 ; save dest
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
asr r0, r0, #14 ; >> DCT_CONST_BITS
; a1 = ROUND_POWER_OF_TWO(out, 4)
add r0, r0, #8 ; + (1 <<((4) - 1))
asr r0, r0, #4 ; >> 4
vdup.s16 q0, r0 ; duplicate a1
vld1.32 {d2[0]}, [r1], r2
vld1.32 {d2[1]}, [r1], r2
vld1.32 {d4[0]}, [r1], r2
vld1.32 {d4[1]}, [r1]
vaddw.u8 q8, q0, d2 ; dest[x] + a1
vaddw.u8 q9, q0, d4
vqmovun.s16 d6, q8 ; clip_pixel
vqmovun.s16 d7, q9
vst1.32 {d6[0]}, [r12], r2
vst1.32 {d6[1]}, [r12], r2
vst1.32 {d7[0]}, [r12], r2
vst1.32 {d7[1]}, [r12]
bx lr
ENDP ; |vp9_short_idct4x4_1_add_neon|
END

View File

@@ -1,190 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_short_idct4x4_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
AREA Block, CODE, READONLY ; name this block of code
;void vp9_short_idct4x4_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
|vp9_short_idct4x4_add_neon| PROC
; The 2D transform is done with two passes which are actually pretty
; similar. We first transform the rows. This is done by transposing
; the inputs, doing an SIMD column transform (the columns are the
; transposed rows) and then transpose the results (so that it goes back
; in normal/row positions). Then, we transform the columns by doing
; another SIMD column transform.
; So, two passes of a transpose followed by a column transform.
; load the inputs into q8-q9, d16-d19
vld1.s16 {q8,q9}, [r0]!
; generate scalar constants
; cospi_8_64 = 15137 = 0x3b21
mov r0, #0x3b00
add r0, #0x21
; cospi_16_64 = 11585 = 0x2d41
mov r3, #0x2d00
add r3, #0x41
; cospi_24_64 = 6270 = 0x 187e
mov r12, #0x1800
add r12, #0x7e
; transpose the input data
; 00 01 02 03 d16
; 10 11 12 13 d17
; 20 21 22 23 d18
; 30 31 32 33 d19
vtrn.16 d16, d17
vtrn.16 d18, d19
; generate constant vectors
vdup.16 d20, r0 ; replicate cospi_8_64
vdup.16 d21, r3 ; replicate cospi_16_64
; 00 10 02 12 d16
; 01 11 03 13 d17
; 20 30 22 32 d18
; 21 31 23 33 d19
vtrn.32 q8, q9
; 00 10 20 30 d16
; 01 11 21 31 d17
; 02 12 22 32 d18
; 03 13 23 33 d19
vdup.16 d22, r12 ; replicate cospi_24_64
; do the transform on transposed rows
; stage 1
vadd.s16 d23, d16, d18 ; (input[0] + input[2])
vsub.s16 d24, d16, d18 ; (input[0] - input[2])
vmull.s16 q15, d17, d22 ; input[1] * cospi_24_64
vmull.s16 q1, d17, d20 ; input[1] * cospi_8_64
; (input[0] + input[2]) * cospi_16_64;
; (input[0] - input[2]) * cospi_16_64;
vmull.s16 q13, d23, d21
vmull.s16 q14, d24, d21
; input[1] * cospi_24_64 - input[3] * cospi_8_64;
; input[1] * cospi_8_64 + input[3] * cospi_24_64;
vmlsl.s16 q15, d19, d20
vmlal.s16 q1, d19, d22
; dct_const_round_shift
vqrshrn.s32 d26, q13, #14
vqrshrn.s32 d27, q14, #14
vqrshrn.s32 d29, q15, #14
vqrshrn.s32 d28, q1, #14
; stage 2
; output[0] = step[0] + step[3];
; output[1] = step[1] + step[2];
; output[3] = step[0] - step[3];
; output[2] = step[1] - step[2];
vadd.s16 q8, q13, q14
vsub.s16 q9, q13, q14
vswp d18, d19
; transpose the results
; 00 01 02 03 d16
; 10 11 12 13 d17
; 20 21 22 23 d18
; 30 31 32 33 d19
vtrn.16 d16, d17
vtrn.16 d18, d19
; 00 10 02 12 d16
; 01 11 03 13 d17
; 20 30 22 32 d18
; 21 31 23 33 d19
vtrn.32 q8, q9
; 00 10 20 30 d16
; 01 11 21 31 d17
; 02 12 22 32 d18
; 03 13 23 33 d19
; do the transform on columns
; stage 1
vadd.s16 d23, d16, d18 ; (input[0] + input[2])
vsub.s16 d24, d16, d18 ; (input[0] - input[2])
vmull.s16 q15, d17, d22 ; input[1] * cospi_24_64
vmull.s16 q1, d17, d20 ; input[1] * cospi_8_64
; (input[0] + input[2]) * cospi_16_64;
; (input[0] - input[2]) * cospi_16_64;
vmull.s16 q13, d23, d21
vmull.s16 q14, d24, d21
; input[1] * cospi_24_64 - input[3] * cospi_8_64;
; input[1] * cospi_8_64 + input[3] * cospi_24_64;
vmlsl.s16 q15, d19, d20
vmlal.s16 q1, d19, d22
; dct_const_round_shift
vqrshrn.s32 d26, q13, #14
vqrshrn.s32 d27, q14, #14
vqrshrn.s32 d29, q15, #14
vqrshrn.s32 d28, q1, #14
; stage 2
; output[0] = step[0] + step[3];
; output[1] = step[1] + step[2];
; output[3] = step[0] - step[3];
; output[2] = step[1] - step[2];
vadd.s16 q8, q13, q14
vsub.s16 q9, q13, q14
; The results are in two registers, one of them being swapped. This will
; be taken care of by loading the 'dest' value in a swapped fashion and
; also storing them in the same swapped fashion.
; temp_out[0, 1] = d16, d17 = q8
; temp_out[2, 3] = d19, d18 = q9 swapped
; ROUND_POWER_OF_TWO(temp_out[j], 4)
vrshr.s16 q8, q8, #4
vrshr.s16 q9, q9, #4
vld1.32 {d26[0]}, [r1], r2
vld1.32 {d26[1]}, [r1], r2
vld1.32 {d27[1]}, [r1], r2
vld1.32 {d27[0]}, [r1] ; no post-increment
; ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]
vaddw.u8 q8, q8, d26
vaddw.u8 q9, q9, d27
; clip_pixel
vqmovun.s16 d26, q8
vqmovun.s16 d27, q9
; do the stores in reverse order with negative post-increment, by changing
; the sign of the stride
rsb r2, r2, #0
vst1.32 {d27[0]}, [r1], r2
vst1.32 {d27[1]}, [r1], r2
vst1.32 {d26[1]}, [r1], r2
vst1.32 {d26[0]}, [r1] ; no post-increment
bx lr
ENDP ; |vp9_short_idct4x4_add_neon|
END

View File

@@ -1,88 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license and patent
; grant that can be found in the LICENSE file in the root of the source
; tree. All contributing project authors may be found in the AUTHORS
; file in the root of the source tree.
;
EXPORT |vp9_short_idct8x8_1_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
;void vp9_short_idct8x8_1_add_neon(int16_t *input, uint8_t *dest,
; int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
|vp9_short_idct8x8_1_add_neon| PROC
ldrsh r0, [r0]
; generate cospi_16_64 = 11585
mov r12, #0x2d00
add r12, #0x41
; out = dct_const_round_shift(input[0] * cospi_16_64)
mul r0, r0, r12 ; input[0] * cospi_16_64
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
asr r0, r0, #14 ; >> DCT_CONST_BITS
; out = dct_const_round_shift(out * cospi_16_64)
mul r0, r0, r12 ; out * cospi_16_64
mov r12, r1 ; save dest
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
asr r0, r0, #14 ; >> DCT_CONST_BITS
; a1 = ROUND_POWER_OF_TWO(out, 5)
add r0, r0, #16 ; + (1 <<((5) - 1))
asr r0, r0, #5 ; >> 5
vdup.s16 q0, r0 ; duplicate a1
; load destination data
vld1.64 {d2}, [r1], r2
vld1.64 {d3}, [r1], r2
vld1.64 {d4}, [r1], r2
vld1.64 {d5}, [r1], r2
vld1.64 {d6}, [r1], r2
vld1.64 {d7}, [r1], r2
vld1.64 {d16}, [r1], r2
vld1.64 {d17}, [r1]
vaddw.u8 q9, q0, d2 ; dest[x] + a1
vaddw.u8 q10, q0, d3 ; dest[x] + a1
vaddw.u8 q11, q0, d4 ; dest[x] + a1
vaddw.u8 q12, q0, d5 ; dest[x] + a1
vqmovun.s16 d2, q9 ; clip_pixel
vqmovun.s16 d3, q10 ; clip_pixel
vqmovun.s16 d30, q11 ; clip_pixel
vqmovun.s16 d31, q12 ; clip_pixel
vst1.64 {d2}, [r12], r2
vst1.64 {d3}, [r12], r2
vst1.64 {d30}, [r12], r2
vst1.64 {d31}, [r12], r2
vaddw.u8 q9, q0, d6 ; dest[x] + a1
vaddw.u8 q10, q0, d7 ; dest[x] + a1
vaddw.u8 q11, q0, d16 ; dest[x] + a1
vaddw.u8 q12, q0, d17 ; dest[x] + a1
vqmovun.s16 d2, q9 ; clip_pixel
vqmovun.s16 d3, q10 ; clip_pixel
vqmovun.s16 d30, q11 ; clip_pixel
vqmovun.s16 d31, q12 ; clip_pixel
vst1.64 {d2}, [r12], r2
vst1.64 {d3}, [r12], r2
vst1.64 {d30}, [r12], r2
vst1.64 {d31}, [r12], r2
bx lr
ENDP ; |vp9_short_idct8x8_1_add_neon|
END

View File

@@ -1,519 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_short_idct8x8_add_neon|
EXPORT |vp9_short_idct10_8x8_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
; Parallel 1D IDCT on all the columns of a 8x8 16bit data matrix which are
; loaded in q8-q15. The output will be stored back into q8-q15 registers.
; This macro will touch q0-q7 registers and use them as buffer during
; calculation.
MACRO
IDCT8x8_1D
; stage 1
vdup.16 d0, r3 ; duplicate cospi_28_64
vdup.16 d1, r4 ; duplicate cospi_4_64
vdup.16 d2, r5 ; duplicate cospi_12_64
vdup.16 d3, r6 ; duplicate cospi_20_64
; input[1] * cospi_28_64
vmull.s16 q2, d18, d0
vmull.s16 q3, d19, d0
; input[5] * cospi_12_64
vmull.s16 q5, d26, d2
vmull.s16 q6, d27, d2
; input[1]*cospi_28_64-input[7]*cospi_4_64
vmlsl.s16 q2, d30, d1
vmlsl.s16 q3, d31, d1
; input[5] * cospi_12_64 - input[3] * cospi_20_64
vmlsl.s16 q5, d22, d3
vmlsl.s16 q6, d23, d3
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d8, q2, #14 ; >> 14
vqrshrn.s32 d9, q3, #14 ; >> 14
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d10, q5, #14 ; >> 14
vqrshrn.s32 d11, q6, #14 ; >> 14
; input[1] * cospi_4_64
vmull.s16 q2, d18, d1
vmull.s16 q3, d19, d1
; input[5] * cospi_20_64
vmull.s16 q9, d26, d3
vmull.s16 q13, d27, d3
; input[1]*cospi_4_64+input[7]*cospi_28_64
vmlal.s16 q2, d30, d0
vmlal.s16 q3, d31, d0
; input[5] * cospi_20_64 + input[3] * cospi_12_64
vmlal.s16 q9, d22, d2
vmlal.s16 q13, d23, d2
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d14, q2, #14 ; >> 14
vqrshrn.s32 d15, q3, #14 ; >> 14
; stage 2 & stage 3 - even half
vdup.16 d0, r7 ; duplicate cospi_16_64
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d12, q9, #14 ; >> 14
vqrshrn.s32 d13, q13, #14 ; >> 14
; input[0] * cospi_16_64
vmull.s16 q2, d16, d0
vmull.s16 q3, d17, d0
; input[0] * cospi_16_64
vmull.s16 q13, d16, d0
vmull.s16 q15, d17, d0
; (input[0] + input[2]) * cospi_16_64
vmlal.s16 q2, d24, d0
vmlal.s16 q3, d25, d0
; (input[0] - input[2]) * cospi_16_64
vmlsl.s16 q13, d24, d0
vmlsl.s16 q15, d25, d0
vdup.16 d0, r8 ; duplicate cospi_24_64
vdup.16 d1, r9 ; duplicate cospi_8_64
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d18, q2, #14 ; >> 14
vqrshrn.s32 d19, q3, #14 ; >> 14
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d22, q13, #14 ; >> 14
vqrshrn.s32 d23, q15, #14 ; >> 14
; input[1] * cospi_24_64 - input[3] * cospi_8_64
; input[1] * cospi_24_64
vmull.s16 q2, d20, d0
vmull.s16 q3, d21, d0
; input[1] * cospi_8_64
vmull.s16 q8, d20, d1
vmull.s16 q12, d21, d1
; input[1] * cospi_24_64 - input[3] * cospi_8_64
vmlsl.s16 q2, d28, d1
vmlsl.s16 q3, d29, d1
; input[1] * cospi_8_64 + input[3] * cospi_24_64
vmlal.s16 q8, d28, d0
vmlal.s16 q12, d29, d0
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d26, q2, #14 ; >> 14
vqrshrn.s32 d27, q3, #14 ; >> 14
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d30, q8, #14 ; >> 14
vqrshrn.s32 d31, q12, #14 ; >> 14
vadd.s16 q0, q9, q15 ; output[0] = step[0] + step[3]
vadd.s16 q1, q11, q13 ; output[1] = step[1] + step[2]
vsub.s16 q2, q11, q13 ; output[2] = step[1] - step[2]
vsub.s16 q3, q9, q15 ; output[3] = step[0] - step[3]
; stage 3 -odd half
vdup.16 d16, r7 ; duplicate cospi_16_64
; stage 2 - odd half
vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5]
vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5]
vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7]
vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7]
; step2[6] * cospi_16_64
vmull.s16 q9, d28, d16
vmull.s16 q10, d29, d16
; step2[6] * cospi_16_64
vmull.s16 q11, d28, d16
vmull.s16 q12, d29, d16
; (step2[6] - step2[5]) * cospi_16_64
vmlsl.s16 q9, d26, d16
vmlsl.s16 q10, d27, d16
; (step2[5] + step2[6]) * cospi_16_64
vmlal.s16 q11, d26, d16
vmlal.s16 q12, d27, d16
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d10, q9, #14 ; >> 14
vqrshrn.s32 d11, q10, #14 ; >> 14
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d12, q11, #14 ; >> 14
vqrshrn.s32 d13, q12, #14 ; >> 14
; stage 4
vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7];
vadd.s16 q9, q1, q6 ; output[1] = step1[1] + step1[6];
vadd.s16 q10, q2, q5 ; output[2] = step1[2] + step1[5];
vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4];
vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4];
vsub.s16 q13, q2, q5 ; output[5] = step1[2] - step1[5];
vsub.s16 q14, q1, q6 ; output[6] = step1[1] - step1[6];
vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7];
MEND
; Transpose a 8x8 16bit data matrix. Datas are loaded in q8-q15.
MACRO
TRANSPOSE8X8
vswp d17, d24
vswp d23, d30
vswp d21, d28
vswp d19, d26
vtrn.32 q8, q10
vtrn.32 q9, q11
vtrn.32 q12, q14
vtrn.32 q13, q15
vtrn.16 q8, q9
vtrn.16 q10, q11
vtrn.16 q12, q13
vtrn.16 q14, q15
MEND
AREA Block, CODE, READONLY ; name this block of code
;void vp9_short_idct8x8_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
|vp9_short_idct8x8_add_neon| PROC
push {r4-r9}
vpush {d8-d15}
vld1.s16 {q8,q9}, [r0]!
vld1.s16 {q10,q11}, [r0]!
vld1.s16 {q12,q13}, [r0]!
vld1.s16 {q14,q15}, [r0]!
; transpose the input data
TRANSPOSE8X8
; generate cospi_28_64 = 3196
mov r3, #0x0c00
add r3, #0x7c
; generate cospi_4_64 = 16069
mov r4, #0x3e00
add r4, #0xc5
; generate cospi_12_64 = 13623
mov r5, #0x3500
add r5, #0x37
; generate cospi_20_64 = 9102
mov r6, #0x2300
add r6, #0x8e
; generate cospi_16_64 = 11585
mov r7, #0x2d00
add r7, #0x41
; generate cospi_24_64 = 6270
mov r8, #0x1800
add r8, #0x7e
; generate cospi_8_64 = 15137
mov r9, #0x3b00
add r9, #0x21
; First transform rows
IDCT8x8_1D
; Transpose the matrix
TRANSPOSE8X8
; Then transform columns
IDCT8x8_1D
; ROUND_POWER_OF_TWO(temp_out[j], 5)
vrshr.s16 q8, q8, #5
vrshr.s16 q9, q9, #5
vrshr.s16 q10, q10, #5
vrshr.s16 q11, q11, #5
vrshr.s16 q12, q12, #5
vrshr.s16 q13, q13, #5
vrshr.s16 q14, q14, #5
vrshr.s16 q15, q15, #5
; save dest pointer
mov r0, r1
; load destination data
vld1.64 {d0}, [r1], r2
vld1.64 {d1}, [r1], r2
vld1.64 {d2}, [r1], r2
vld1.64 {d3}, [r1], r2
vld1.64 {d4}, [r1], r2
vld1.64 {d5}, [r1], r2
vld1.64 {d6}, [r1], r2
vld1.64 {d7}, [r1]
; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]
vaddw.u8 q8, q8, d0
vaddw.u8 q9, q9, d1
vaddw.u8 q10, q10, d2
vaddw.u8 q11, q11, d3
vaddw.u8 q12, q12, d4
vaddw.u8 q13, q13, d5
vaddw.u8 q14, q14, d6
vaddw.u8 q15, q15, d7
; clip_pixel
vqmovun.s16 d0, q8
vqmovun.s16 d1, q9
vqmovun.s16 d2, q10
vqmovun.s16 d3, q11
vqmovun.s16 d4, q12
vqmovun.s16 d5, q13
vqmovun.s16 d6, q14
vqmovun.s16 d7, q15
; store the data
vst1.64 {d0}, [r0], r2
vst1.64 {d1}, [r0], r2
vst1.64 {d2}, [r0], r2
vst1.64 {d3}, [r0], r2
vst1.64 {d4}, [r0], r2
vst1.64 {d5}, [r0], r2
vst1.64 {d6}, [r0], r2
vst1.64 {d7}, [r0], r2
vpop {d8-d15}
pop {r4-r9}
bx lr
ENDP ; |vp9_short_idct8x8_add_neon|
;void vp9_short_idct10_8x8_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
|vp9_short_idct10_8x8_add_neon| PROC
push {r4-r9}
vpush {d8-d15}
vld1.s16 {q8,q9}, [r0]!
vld1.s16 {q10,q11}, [r0]!
vld1.s16 {q12,q13}, [r0]!
vld1.s16 {q14,q15}, [r0]!
; transpose the input data
TRANSPOSE8X8
; generate cospi_28_64 = 3196
mov r3, #0x0c00
add r3, #0x7c
; generate cospi_4_64 = 16069
mov r4, #0x3e00
add r4, #0xc5
; generate cospi_12_64 = 13623
mov r5, #0x3500
add r5, #0x37
; generate cospi_20_64 = 9102
mov r6, #0x2300
add r6, #0x8e
; generate cospi_16_64 = 11585
mov r7, #0x2d00
add r7, #0x41
; generate cospi_24_64 = 6270
mov r8, #0x1800
add r8, #0x7e
; generate cospi_8_64 = 15137
mov r9, #0x3b00
add r9, #0x21
; First transform rows
; stage 1
; The following instructions use vqrdmulh to do the
; dct_const_round_shift(input[1] * cospi_28_64). vqrdmulh will do doubling
; multiply and shift the result by 16 bits instead of 14 bits. So we need
; to double the constants before multiplying to compensate this.
mov r12, r3, lsl #1
vdup.16 q0, r12 ; duplicate cospi_28_64*2
mov r12, r4, lsl #1
vdup.16 q1, r12 ; duplicate cospi_4_64*2
; dct_const_round_shift(input[1] * cospi_28_64)
vqrdmulh.s16 q4, q9, q0
mov r12, r6, lsl #1
rsb r12, #0
vdup.16 q0, r12 ; duplicate -cospi_20_64*2
; dct_const_round_shift(input[1] * cospi_4_64)
vqrdmulh.s16 q7, q9, q1
mov r12, r5, lsl #1
vdup.16 q1, r12 ; duplicate cospi_12_64*2
; dct_const_round_shift(- input[3] * cospi_20_64)
vqrdmulh.s16 q5, q11, q0
mov r12, r7, lsl #1
vdup.16 q0, r12 ; duplicate cospi_16_64*2
; dct_const_round_shift(input[3] * cospi_12_64)
vqrdmulh.s16 q6, q11, q1
; stage 2 & stage 3 - even half
mov r12, r8, lsl #1
vdup.16 q1, r12 ; duplicate cospi_24_64*2
; dct_const_round_shift(input_dc * cospi_16_64)
vqrdmulh.s16 q9, q8, q0
mov r12, r9, lsl #1
vdup.16 q0, r12 ; duplicate cospi_8_64*2
; dct_const_round_shift(input[1] * cospi_24_64)
vqrdmulh.s16 q13, q10, q1
; dct_const_round_shift(input[1] * cospi_8_64)
vqrdmulh.s16 q15, q10, q0
; stage 3 -odd half
vdup.16 d16, r7 ; duplicate cospi_16_64
vadd.s16 q0, q9, q15 ; output[0] = step[0] + step[3]
vadd.s16 q1, q9, q13 ; output[1] = step[1] + step[2]
vsub.s16 q2, q9, q13 ; output[2] = step[1] - step[2]
vsub.s16 q3, q9, q15 ; output[3] = step[0] - step[3]
; stage 2 - odd half
vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5]
vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5]
vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7]
vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7]
; step2[6] * cospi_16_64
vmull.s16 q9, d28, d16
vmull.s16 q10, d29, d16
; step2[6] * cospi_16_64
vmull.s16 q11, d28, d16
vmull.s16 q12, d29, d16
; (step2[6] - step2[5]) * cospi_16_64
vmlsl.s16 q9, d26, d16
vmlsl.s16 q10, d27, d16
; (step2[5] + step2[6]) * cospi_16_64
vmlal.s16 q11, d26, d16
vmlal.s16 q12, d27, d16
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d10, q9, #14 ; >> 14
vqrshrn.s32 d11, q10, #14 ; >> 14
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d12, q11, #14 ; >> 14
vqrshrn.s32 d13, q12, #14 ; >> 14
; stage 4
vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7];
vadd.s16 q9, q1, q6 ; output[1] = step1[1] + step1[6];
vadd.s16 q10, q2, q5 ; output[2] = step1[2] + step1[5];
vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4];
vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4];
vsub.s16 q13, q2, q5 ; output[5] = step1[2] - step1[5];
vsub.s16 q14, q1, q6 ; output[6] = step1[1] - step1[6];
vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7];
; Transpose the matrix
TRANSPOSE8X8
; Then transform columns
IDCT8x8_1D
; ROUND_POWER_OF_TWO(temp_out[j], 5)
vrshr.s16 q8, q8, #5
vrshr.s16 q9, q9, #5
vrshr.s16 q10, q10, #5
vrshr.s16 q11, q11, #5
vrshr.s16 q12, q12, #5
vrshr.s16 q13, q13, #5
vrshr.s16 q14, q14, #5
vrshr.s16 q15, q15, #5
; save dest pointer
mov r0, r1
; load destination data
vld1.64 {d0}, [r1], r2
vld1.64 {d1}, [r1], r2
vld1.64 {d2}, [r1], r2
vld1.64 {d3}, [r1], r2
vld1.64 {d4}, [r1], r2
vld1.64 {d5}, [r1], r2
vld1.64 {d6}, [r1], r2
vld1.64 {d7}, [r1]
; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]
vaddw.u8 q8, q8, d0
vaddw.u8 q9, q9, d1
vaddw.u8 q10, q10, d2
vaddw.u8 q11, q11, d3
vaddw.u8 q12, q12, d4
vaddw.u8 q13, q13, d5
vaddw.u8 q14, q14, d6
vaddw.u8 q15, q15, d7
; clip_pixel
vqmovun.s16 d0, q8
vqmovun.s16 d1, q9
vqmovun.s16 d2, q10
vqmovun.s16 d3, q11
vqmovun.s16 d4, q12
vqmovun.s16 d5, q13
vqmovun.s16 d6, q14
vqmovun.s16 d7, q15
; store the data
vst1.64 {d0}, [r0], r2
vst1.64 {d1}, [r0], r2
vst1.64 {d2}, [r0], r2
vst1.64 {d3}, [r0], r2
vst1.64 {d4}, [r0], r2
vst1.64 {d5}, [r0], r2
vst1.64 {d6}, [r0], r2
vst1.64 {d7}, [r0], r2
vpop {d8-d15}
pop {r4-r9}
bx lr
ENDP ; |vp9_short_idct10_8x8_add_neon|
END

View File

@@ -1,237 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_short_iht4x4_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
; Parallel 1D IDCT on all the columns of a 4x4 16bits data matrix which are
; loaded in d16-d19. d0 must contain cospi_8_64. d1 must contain
; cospi_16_64. d2 must contain cospi_24_64. The output will be stored back
; into d16-d19 registers. This macro will touch q10- q15 registers and use
; them as buffer during calculation.
MACRO
IDCT4x4_1D
; stage 1
vadd.s16 d23, d16, d18 ; (input[0] + input[2])
vsub.s16 d24, d16, d18 ; (input[0] - input[2])
vmull.s16 q15, d17, d2 ; input[1] * cospi_24_64
vmull.s16 q10, d17, d0 ; input[1] * cospi_8_64
vmull.s16 q13, d23, d1 ; (input[0] + input[2]) * cospi_16_64
vmull.s16 q14, d24, d1 ; (input[0] - input[2]) * cospi_16_64
vmlsl.s16 q15, d19, d0 ; input[1] * cospi_24_64 - input[3] * cospi_8_64
vmlal.s16 q10, d19, d2 ; input[1] * cospi_8_64 + input[3] * cospi_24_64
; dct_const_round_shift
vqrshrn.s32 d26, q13, #14
vqrshrn.s32 d27, q14, #14
vqrshrn.s32 d29, q15, #14
vqrshrn.s32 d28, q10, #14
; stage 2
; output[0] = step[0] + step[3];
; output[1] = step[1] + step[2];
; output[3] = step[0] - step[3];
; output[2] = step[1] - step[2];
vadd.s16 q8, q13, q14
vsub.s16 q9, q13, q14
vswp d18, d19
MEND
; Parallel 1D IADST on all the columns of a 4x4 16bits data matrix which
; loaded in d16-d19. d3 must contain sinpi_1_9. d4 must contain sinpi_2_9.
; d5 must contain sinpi_4_9. d6 must contain sinpi_3_9. The output will be
; stored back into d16-d19 registers. This macro will touch q11,q12,q13,
; q14,q15 registers and use them as buffer during calculation.
MACRO
IADST4x4_1D
vmull.s16 q10, d3, d16 ; s0 = sinpi_1_9 * x0
vmull.s16 q11, d4, d16 ; s1 = sinpi_2_9 * x0
vmull.s16 q12, d6, d17 ; s2 = sinpi_3_9 * x1
vmull.s16 q13, d5, d18 ; s3 = sinpi_4_9 * x2
vmull.s16 q14, d3, d18 ; s4 = sinpi_1_9 * x2
vmovl.s16 q15, d16 ; expand x0 from 16 bit to 32 bit
vaddw.s16 q15, q15, d19 ; x0 + x3
vmull.s16 q8, d4, d19 ; s5 = sinpi_2_9 * x3
vsubw.s16 q15, q15, d18 ; s7 = x0 + x3 - x2
vmull.s16 q9, d5, d19 ; s6 = sinpi_4_9 * x3
vadd.s32 q10, q10, q13 ; x0 = s0 + s3 + s5
vadd.s32 q10, q10, q8
vsub.s32 q11, q11, q14 ; x1 = s1 - s4 - s6
vdup.32 q8, r0 ; duplicate sinpi_3_9
vsub.s32 q11, q11, q9
vmul.s32 q15, q15, q8 ; x2 = sinpi_3_9 * s7
vadd.s32 q13, q10, q12 ; s0 = x0 + x3
vadd.s32 q10, q10, q11 ; x0 + x1
vadd.s32 q14, q11, q12 ; s1 = x1 + x3
vsub.s32 q10, q10, q12 ; s3 = x0 + x1 - x3
; dct_const_round_shift
vqrshrn.s32 d16, q13, #14
vqrshrn.s32 d17, q14, #14
vqrshrn.s32 d18, q15, #14
vqrshrn.s32 d19, q10, #14
MEND
; Generate cosine constants in d6 - d8 for the IDCT
MACRO
GENERATE_COSINE_CONSTANTS
; cospi_8_64 = 15137 = 0x3b21
mov r0, #0x3b00
add r0, #0x21
; cospi_16_64 = 11585 = 0x2d41
mov r3, #0x2d00
add r3, #0x41
; cospi_24_64 = 6270 = 0x187e
mov r12, #0x1800
add r12, #0x7e
; generate constant vectors
vdup.16 d0, r0 ; duplicate cospi_8_64
vdup.16 d1, r3 ; duplicate cospi_16_64
vdup.16 d2, r12 ; duplicate cospi_24_64
MEND
; Generate sine constants in d1 - d4 for the IADST.
MACRO
GENERATE_SINE_CONSTANTS
; sinpi_1_9 = 5283 = 0x14A3
mov r0, #0x1400
add r0, #0xa3
; sinpi_2_9 = 9929 = 0x26C9
mov r3, #0x2600
add r3, #0xc9
; sinpi_4_9 = 15212 = 0x3B6C
mov r12, #0x3b00
add r12, #0x6c
; generate constant vectors
vdup.16 d3, r0 ; duplicate sinpi_1_9
; sinpi_3_9 = 13377 = 0x3441
mov r0, #0x3400
add r0, #0x41
vdup.16 d4, r3 ; duplicate sinpi_2_9
vdup.16 d5, r12 ; duplicate sinpi_4_9
vdup.16 q3, r0 ; duplicate sinpi_3_9
MEND
; Transpose a 4x4 16bits data matrix. Datas are loaded in d16-d19.
MACRO
TRANSPOSE4X4
vtrn.16 d16, d17
vtrn.16 d18, d19
vtrn.32 q8, q9
MEND
AREA Block, CODE, READONLY ; name this block of code
;void vp9_short_iht4x4_add_neon(int16_t *input, uint8_t *dest,
; int dest_stride, int tx_type)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride
; r3 int tx_type)
; This function will only handle tx_type of 1,2,3.
|vp9_short_iht4x4_add_neon| PROC
; load the inputs into d16-d19
vld1.s16 {q8,q9}, [r0]!
; transpose the input data
TRANSPOSE4X4
; decide the type of transform
cmp r3, #2
beq idct_iadst
cmp r3, #3
beq iadst_iadst
iadst_idct
; generate constants
GENERATE_COSINE_CONSTANTS
GENERATE_SINE_CONSTANTS
; first transform rows
IDCT4x4_1D
; transpose the matrix
TRANSPOSE4X4
; then transform columns
IADST4x4_1D
b end_vp9_short_iht4x4_add_neon
idct_iadst
; generate constants
GENERATE_COSINE_CONSTANTS
GENERATE_SINE_CONSTANTS
; first transform rows
IADST4x4_1D
; transpose the matrix
TRANSPOSE4X4
; then transform columns
IDCT4x4_1D
b end_vp9_short_iht4x4_add_neon
iadst_iadst
; generate constants
GENERATE_SINE_CONSTANTS
; first transform rows
IADST4x4_1D
; transpose the matrix
TRANSPOSE4X4
; then transform columns
IADST4x4_1D
end_vp9_short_iht4x4_add_neon
; ROUND_POWER_OF_TWO(temp_out[j], 4)
vrshr.s16 q8, q8, #4
vrshr.s16 q9, q9, #4
vld1.32 {d26[0]}, [r1], r2
vld1.32 {d26[1]}, [r1], r2
vld1.32 {d27[0]}, [r1], r2
vld1.32 {d27[1]}, [r1]
; ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]
vaddw.u8 q8, q8, d26
vaddw.u8 q9, q9, d27
; clip_pixel
vqmovun.s16 d26, q8
vqmovun.s16 d27, q9
; do the stores in reverse order with negative post-increment, by changing
; the sign of the stride
rsb r2, r2, #0
vst1.32 {d27[1]}, [r1], r2
vst1.32 {d27[0]}, [r1], r2
vst1.32 {d26[1]}, [r1], r2
vst1.32 {d26[0]}, [r1] ; no post-increment
bx lr
ENDP ; |vp9_short_iht4x4_add_neon|
END

View File

@@ -1,696 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_short_iht8x8_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
; Generate IADST constants in r0 - r12 for the IADST.
MACRO
GENERATE_IADST_CONSTANTS
; generate cospi_2_64 = 16305
mov r0, #0x3f00
add r0, #0xb1
; generate cospi_30_64 = 1606
mov r1, #0x600
add r1, #0x46
; generate cospi_10_64 = 14449
mov r2, #0x3800
add r2, #0x71
; generate cospi_22_64 = 7723
mov r3, #0x1e00
add r3, #0x2b
; generate cospi_18_64 = 10394
mov r4, #0x2800
add r4, #0x9a
; generate cospi_14_64 = 12665
mov r5, #0x3100
add r5, #0x79
; generate cospi_26_64 = 4756
mov r6, #0x1200
add r6, #0x94
; generate cospi_6_64 = 15679
mov r7, #0x3d00
add r7, #0x3f
; generate cospi_8_64 = 15137
mov r8, #0x3b00
add r8, #0x21
; generate cospi_24_64 = 6270
mov r9, #0x1800
add r9, #0x7e
; generate 0
mov r10, #0
; generate cospi_16_64 = 11585
mov r12, #0x2d00
add r12, #0x41
MEND
; Generate IDCT constants in r3 - r9 for the IDCT.
MACRO
GENERATE_IDCT_CONSTANTS
; generate cospi_28_64 = 3196
mov r3, #0x0c00
add r3, #0x7c
; generate cospi_4_64 = 16069
mov r4, #0x3e00
add r4, #0xc5
; generate cospi_12_64 = 13623
mov r5, #0x3500
add r5, #0x37
; generate cospi_20_64 = 9102
mov r6, #0x2300
add r6, #0x8e
; generate cospi_16_64 = 11585
mov r7, #0x2d00
add r7, #0x41
; generate cospi_24_64 = 6270
mov r8, #0x1800
add r8, #0x7e
; generate cospi_8_64 = 15137
mov r9, #0x3b00
add r9, #0x21
MEND
; Transpose a 8x8 16bits data matrix. Datas are loaded in q8-q15.
MACRO
TRANSPOSE8X8
vswp d17, d24
vswp d23, d30
vswp d21, d28
vswp d19, d26
vtrn.32 q8, q10
vtrn.32 q9, q11
vtrn.32 q12, q14
vtrn.32 q13, q15
vtrn.16 q8, q9
vtrn.16 q10, q11
vtrn.16 q12, q13
vtrn.16 q14, q15
MEND
; Parallel 1D IDCT on all the columns of a 8x8 16bits data matrix which are
; loaded in q8-q15. The IDCT constants are loaded in r3 - r9. The output
; will be stored back into q8-q15 registers. This macro will touch q0-q7
; registers and use them as buffer during calculation.
MACRO
IDCT8x8_1D
; stage 1
vdup.16 d0, r3 ; duplicate cospi_28_64
vdup.16 d1, r4 ; duplicate cospi_4_64
vdup.16 d2, r5 ; duplicate cospi_12_64
vdup.16 d3, r6 ; duplicate cospi_20_64
; input[1] * cospi_28_64
vmull.s16 q2, d18, d0
vmull.s16 q3, d19, d0
; input[5] * cospi_12_64
vmull.s16 q5, d26, d2
vmull.s16 q6, d27, d2
; input[1]*cospi_28_64-input[7]*cospi_4_64
vmlsl.s16 q2, d30, d1
vmlsl.s16 q3, d31, d1
; input[5] * cospi_12_64 - input[3] * cospi_20_64
vmlsl.s16 q5, d22, d3
vmlsl.s16 q6, d23, d3
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d8, q2, #14 ; >> 14
vqrshrn.s32 d9, q3, #14 ; >> 14
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d10, q5, #14 ; >> 14
vqrshrn.s32 d11, q6, #14 ; >> 14
; input[1] * cospi_4_64
vmull.s16 q2, d18, d1
vmull.s16 q3, d19, d1
; input[5] * cospi_20_64
vmull.s16 q9, d26, d3
vmull.s16 q13, d27, d3
; input[1]*cospi_4_64+input[7]*cospi_28_64
vmlal.s16 q2, d30, d0
vmlal.s16 q3, d31, d0
; input[5] * cospi_20_64 + input[3] * cospi_12_64
vmlal.s16 q9, d22, d2
vmlal.s16 q13, d23, d2
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d14, q2, #14 ; >> 14
vqrshrn.s32 d15, q3, #14 ; >> 14
; stage 2 & stage 3 - even half
vdup.16 d0, r7 ; duplicate cospi_16_64
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d12, q9, #14 ; >> 14
vqrshrn.s32 d13, q13, #14 ; >> 14
; input[0] * cospi_16_64
vmull.s16 q2, d16, d0
vmull.s16 q3, d17, d0
; input[0] * cospi_16_64
vmull.s16 q13, d16, d0
vmull.s16 q15, d17, d0
; (input[0] + input[2]) * cospi_16_64
vmlal.s16 q2, d24, d0
vmlal.s16 q3, d25, d0
; (input[0] - input[2]) * cospi_16_64
vmlsl.s16 q13, d24, d0
vmlsl.s16 q15, d25, d0
vdup.16 d0, r8 ; duplicate cospi_24_64
vdup.16 d1, r9 ; duplicate cospi_8_64
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d18, q2, #14 ; >> 14
vqrshrn.s32 d19, q3, #14 ; >> 14
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d22, q13, #14 ; >> 14
vqrshrn.s32 d23, q15, #14 ; >> 14
; input[1] * cospi_24_64
vmull.s16 q2, d20, d0
vmull.s16 q3, d21, d0
; input[1] * cospi_8_64
vmull.s16 q8, d20, d1
vmull.s16 q12, d21, d1
; input[1] * cospi_24_64 - input[3] * cospi_8_64
vmlsl.s16 q2, d28, d1
vmlsl.s16 q3, d29, d1
; input[1] * cospi_8_64 + input[3] * cospi_24_64
vmlal.s16 q8, d28, d0
vmlal.s16 q12, d29, d0
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d26, q2, #14 ; >> 14
vqrshrn.s32 d27, q3, #14 ; >> 14
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d30, q8, #14 ; >> 14
vqrshrn.s32 d31, q12, #14 ; >> 14
vadd.s16 q0, q9, q15 ; output[0] = step[0] + step[3]
vadd.s16 q1, q11, q13 ; output[1] = step[1] + step[2]
vsub.s16 q2, q11, q13 ; output[2] = step[1] - step[2]
vsub.s16 q3, q9, q15 ; output[3] = step[0] - step[3]
; stage 3 -odd half
vdup.16 d16, r7 ; duplicate cospi_16_64
; stage 2 - odd half
vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5]
vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5]
vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7]
vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7]
; step2[6] * cospi_16_64
vmull.s16 q9, d28, d16
vmull.s16 q10, d29, d16
; step2[6] * cospi_16_64
vmull.s16 q11, d28, d16
vmull.s16 q12, d29, d16
; (step2[6] - step2[5]) * cospi_16_64
vmlsl.s16 q9, d26, d16
vmlsl.s16 q10, d27, d16
; (step2[5] + step2[6]) * cospi_16_64
vmlal.s16 q11, d26, d16
vmlal.s16 q12, d27, d16
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d10, q9, #14 ; >> 14
vqrshrn.s32 d11, q10, #14 ; >> 14
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d12, q11, #14 ; >> 14
vqrshrn.s32 d13, q12, #14 ; >> 14
; stage 4
vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7];
vadd.s16 q9, q1, q6 ; output[1] = step1[1] + step1[6];
vadd.s16 q10, q2, q5 ; output[2] = step1[2] + step1[5];
vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4];
vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4];
vsub.s16 q13, q2, q5 ; output[5] = step1[2] - step1[5];
vsub.s16 q14, q1, q6 ; output[6] = step1[1] - step1[6];
vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7];
MEND
; Parallel 1D IADST on all the columns of a 8x8 16bits data matrix which
; loaded in q8-q15. IADST constants are loaded in r0 - r12 registers. The
; output will be stored back into q8-q15 registers. This macro will touch
; q0 - q7 registers and use them as buffer during calculation.
MACRO
IADST8X8_1D
vdup.16 d14, r0 ; duplicate cospi_2_64
vdup.16 d15, r1 ; duplicate cospi_30_64
; cospi_2_64 * x0
vmull.s16 q1, d30, d14
vmull.s16 q2, d31, d14
; cospi_30_64 * x0
vmull.s16 q3, d30, d15
vmull.s16 q4, d31, d15
vdup.16 d30, r4 ; duplicate cospi_18_64
vdup.16 d31, r5 ; duplicate cospi_14_64
; s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
vmlal.s16 q1, d16, d15
vmlal.s16 q2, d17, d15
; s1 = cospi_30_64 * x0 - cospi_2_64 * x1
vmlsl.s16 q3, d16, d14
vmlsl.s16 q4, d17, d14
; cospi_18_64 * x4
vmull.s16 q5, d22, d30
vmull.s16 q6, d23, d30
; cospi_14_64 * x4
vmull.s16 q7, d22, d31
vmull.s16 q8, d23, d31
; s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
vmlal.s16 q5, d24, d31
vmlal.s16 q6, d25, d31
; s5 = cospi_14_64 * x4 - cospi_18_64 * x5
vmlsl.s16 q7, d24, d30
vmlsl.s16 q8, d25, d30
; (s0 + s4)
vadd.s32 q11, q1, q5
vadd.s32 q12, q2, q6
vdup.16 d0, r2 ; duplicate cospi_10_64
vdup.16 d1, r3 ; duplicate cospi_22_64
; (s0 - s4)
vsub.s32 q1, q1, q5
vsub.s32 q2, q2, q6
; x0 = dct_const_round_shift(s0 + s4);
vqrshrn.s32 d22, q11, #14 ; >> 14
vqrshrn.s32 d23, q12, #14 ; >> 14
; (s1 + s5)
vadd.s32 q12, q3, q7
vadd.s32 q15, q4, q8
; (s1 - s5)
vsub.s32 q3, q3, q7
vsub.s32 q4, q4, q8
; x4 = dct_const_round_shift(s0 - s4);
vqrshrn.s32 d2, q1, #14 ; >> 14
vqrshrn.s32 d3, q2, #14 ; >> 14
; x1 = dct_const_round_shift(s1 + s5);
vqrshrn.s32 d24, q12, #14 ; >> 14
vqrshrn.s32 d25, q15, #14 ; >> 14
; x5 = dct_const_round_shift(s1 - s5);
vqrshrn.s32 d6, q3, #14 ; >> 14
vqrshrn.s32 d7, q4, #14 ; >> 14
; cospi_10_64 * x2
vmull.s16 q4, d26, d0
vmull.s16 q5, d27, d0
; cospi_22_64 * x2
vmull.s16 q2, d26, d1
vmull.s16 q6, d27, d1
vdup.16 d30, r6 ; duplicate cospi_26_64
vdup.16 d31, r7 ; duplicate cospi_6_64
; s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
vmlal.s16 q4, d20, d1
vmlal.s16 q5, d21, d1
; s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
vmlsl.s16 q2, d20, d0
vmlsl.s16 q6, d21, d0
; cospi_26_64 * x6
vmull.s16 q0, d18, d30
vmull.s16 q13, d19, d30
; s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
vmlal.s16 q0, d28, d31
vmlal.s16 q13, d29, d31
; cospi_6_64 * x6
vmull.s16 q10, d18, d31
vmull.s16 q9, d19, d31
; s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
vmlsl.s16 q10, d28, d30
vmlsl.s16 q9, d29, d30
; (s3 + s7)
vadd.s32 q14, q2, q10
vadd.s32 q15, q6, q9
; (s3 - s7)
vsub.s32 q2, q2, q10
vsub.s32 q6, q6, q9
; x3 = dct_const_round_shift(s3 + s7);
vqrshrn.s32 d28, q14, #14 ; >> 14
vqrshrn.s32 d29, q15, #14 ; >> 14
; x7 = dct_const_round_shift(s3 - s7);
vqrshrn.s32 d4, q2, #14 ; >> 14
vqrshrn.s32 d5, q6, #14 ; >> 14
; (s2 + s6)
vadd.s32 q9, q4, q0
vadd.s32 q10, q5, q13
; (s2 - s6)
vsub.s32 q4, q4, q0
vsub.s32 q5, q5, q13
vdup.16 d30, r8 ; duplicate cospi_8_64
vdup.16 d31, r9 ; duplicate cospi_24_64
; x2 = dct_const_round_shift(s2 + s6);
vqrshrn.s32 d18, q9, #14 ; >> 14
vqrshrn.s32 d19, q10, #14 ; >> 14
; x6 = dct_const_round_shift(s2 - s6);
vqrshrn.s32 d8, q4, #14 ; >> 14
vqrshrn.s32 d9, q5, #14 ; >> 14
; cospi_8_64 * x4
vmull.s16 q5, d2, d30
vmull.s16 q6, d3, d30
; cospi_24_64 * x4
vmull.s16 q7, d2, d31
vmull.s16 q0, d3, d31
; s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
vmlal.s16 q5, d6, d31
vmlal.s16 q6, d7, d31
; s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
vmlsl.s16 q7, d6, d30
vmlsl.s16 q0, d7, d30
; cospi_8_64 * x7
vmull.s16 q1, d4, d30
vmull.s16 q3, d5, d30
; cospi_24_64 * x7
vmull.s16 q10, d4, d31
vmull.s16 q2, d5, d31
; s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
vmlsl.s16 q1, d8, d31
vmlsl.s16 q3, d9, d31
; s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
vmlal.s16 q10, d8, d30
vmlal.s16 q2, d9, d30
vadd.s16 q8, q11, q9 ; x0 = s0 + s2;
vsub.s16 q11, q11, q9 ; x2 = s0 - s2;
vadd.s16 q4, q12, q14 ; x1 = s1 + s3;
vsub.s16 q12, q12, q14 ; x3 = s1 - s3;
; (s4 + s6)
vadd.s32 q14, q5, q1
vadd.s32 q15, q6, q3
; (s4 - s6)
vsub.s32 q5, q5, q1
vsub.s32 q6, q6, q3
; x4 = dct_const_round_shift(s4 + s6);
vqrshrn.s32 d18, q14, #14 ; >> 14
vqrshrn.s32 d19, q15, #14 ; >> 14
; x6 = dct_const_round_shift(s4 - s6);
vqrshrn.s32 d10, q5, #14 ; >> 14
vqrshrn.s32 d11, q6, #14 ; >> 14
; (s5 + s7)
vadd.s32 q1, q7, q10
vadd.s32 q3, q0, q2
; (s5 - s7))
vsub.s32 q7, q7, q10
vsub.s32 q0, q0, q2
; x5 = dct_const_round_shift(s5 + s7);
vqrshrn.s32 d28, q1, #14 ; >> 14
vqrshrn.s32 d29, q3, #14 ; >> 14
; x7 = dct_const_round_shift(s5 - s7);
vqrshrn.s32 d14, q7, #14 ; >> 14
vqrshrn.s32 d15, q0, #14 ; >> 14
vdup.16 d30, r12 ; duplicate cospi_16_64
; cospi_16_64 * x2
vmull.s16 q2, d22, d30
vmull.s16 q3, d23, d30
; cospi_6_64 * x6
vmull.s16 q13, d22, d30
vmull.s16 q1, d23, d30
; cospi_16_64 * x2 + cospi_16_64 * x3;
vmlal.s16 q2, d24, d30
vmlal.s16 q3, d25, d30
; cospi_16_64 * x2 - cospi_16_64 * x3;
vmlsl.s16 q13, d24, d30
vmlsl.s16 q1, d25, d30
; x2 = dct_const_round_shift(s2);
vqrshrn.s32 d4, q2, #14 ; >> 14
vqrshrn.s32 d5, q3, #14 ; >> 14
;x3 = dct_const_round_shift(s3);
vqrshrn.s32 d24, q13, #14 ; >> 14
vqrshrn.s32 d25, q1, #14 ; >> 14
; cospi_16_64 * x6
vmull.s16 q13, d10, d30
vmull.s16 q1, d11, d30
; cospi_6_64 * x6
vmull.s16 q11, d10, d30
vmull.s16 q0, d11, d30
; cospi_16_64 * x6 + cospi_16_64 * x7;
vmlal.s16 q13, d14, d30
vmlal.s16 q1, d15, d30
; cospi_16_64 * x6 - cospi_16_64 * x7;
vmlsl.s16 q11, d14, d30
vmlsl.s16 q0, d15, d30
; x6 = dct_const_round_shift(s6);
vqrshrn.s32 d20, q13, #14 ; >> 14
vqrshrn.s32 d21, q1, #14 ; >> 14
;x7 = dct_const_round_shift(s7);
vqrshrn.s32 d12, q11, #14 ; >> 14
vqrshrn.s32 d13, q0, #14 ; >> 14
vdup.16 q5, r10 ; duplicate 0
vsub.s16 q9, q5, q9 ; output[1] = -x4;
vsub.s16 q11, q5, q2 ; output[3] = -x2;
vsub.s16 q13, q5, q6 ; output[5] = -x7;
vsub.s16 q15, q5, q4 ; output[7] = -x1;
MEND
AREA Block, CODE, READONLY ; name this block of code
;void vp9_short_iht8x8_add_neon(int16_t *input, uint8_t *dest,
; int dest_stride, int tx_type)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride
; r3 int tx_type)
; This function will only handle tx_type of 1,2,3.
|vp9_short_iht8x8_add_neon| PROC
; load the inputs into d16-d19
vld1.s16 {q8,q9}, [r0]!
vld1.s16 {q10,q11}, [r0]!
vld1.s16 {q12,q13}, [r0]!
vld1.s16 {q14,q15}, [r0]!
push {r0-r10}
; transpose the input data
TRANSPOSE8X8
; decide the type of transform
cmp r3, #2
beq idct_iadst
cmp r3, #3
beq iadst_iadst
iadst_idct
; generate IDCT constants
GENERATE_IDCT_CONSTANTS
; first transform rows
IDCT8x8_1D
; transpose the matrix
TRANSPOSE8X8
; generate IADST constants
GENERATE_IADST_CONSTANTS
; then transform columns
IADST8X8_1D
b end_vp9_short_iht8x8_add_neon
idct_iadst
; generate IADST constants
GENERATE_IADST_CONSTANTS
; first transform rows
IADST8X8_1D
; transpose the matrix
TRANSPOSE8X8
; generate IDCT constants
GENERATE_IDCT_CONSTANTS
; then transform columns
IDCT8x8_1D
b end_vp9_short_iht8x8_add_neon
iadst_iadst
; generate IADST constants
GENERATE_IADST_CONSTANTS
; first transform rows
IADST8X8_1D
; transpose the matrix
TRANSPOSE8X8
; then transform columns
IADST8X8_1D
end_vp9_short_iht8x8_add_neon
pop {r0-r10}
; ROUND_POWER_OF_TWO(temp_out[j], 5)
vrshr.s16 q8, q8, #5
vrshr.s16 q9, q9, #5
vrshr.s16 q10, q10, #5
vrshr.s16 q11, q11, #5
vrshr.s16 q12, q12, #5
vrshr.s16 q13, q13, #5
vrshr.s16 q14, q14, #5
vrshr.s16 q15, q15, #5
; save dest pointer
mov r0, r1
; load destination data
vld1.64 {d0}, [r1], r2
vld1.64 {d1}, [r1], r2
vld1.64 {d2}, [r1], r2
vld1.64 {d3}, [r1], r2
vld1.64 {d4}, [r1], r2
vld1.64 {d5}, [r1], r2
vld1.64 {d6}, [r1], r2
vld1.64 {d7}, [r1]
; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]
vaddw.u8 q8, q8, d0
vaddw.u8 q9, q9, d1
vaddw.u8 q10, q10, d2
vaddw.u8 q11, q11, d3
vaddw.u8 q12, q12, d4
vaddw.u8 q13, q13, d5
vaddw.u8 q14, q14, d6
vaddw.u8 q15, q15, d7
; clip_pixel
vqmovun.s16 d0, q8
vqmovun.s16 d1, q9
vqmovun.s16 d2, q10
vqmovun.s16 d3, q11
vqmovun.s16 d4, q12
vqmovun.s16 d5, q13
vqmovun.s16 d6, q14
vqmovun.s16 d7, q15
; store the data
vst1.64 {d0}, [r0], r2
vst1.64 {d1}, [r0], r2
vst1.64 {d2}, [r0], r2
vst1.64 {d3}, [r0], r2
vst1.64 {d4}, [r0], r2
vst1.64 {d5}, [r0], r2
vst1.64 {d6}, [r0], r2
vst1.64 {d7}, [r0], r2
bx lr
ENDP ; |vp9_short_iht8x8_add_neon|
END

View File

@@ -13,7 +13,6 @@
#include "vp9_rtcd.h"
#include "vp9/common/vp9_onyxc_int.h"
void vp9_machine_specific_config(VP9_COMMON *cm) {
(void)cm;
void vp9_machine_specific_config(VP9_COMMON *ctx) {
vp9_rtcd();
}

Some files were not shown because too many files have changed in this diff Show More