Compare commits
6 Commits
stable-vp9
...
m29-baseli
Author | SHA1 | Date | |
---|---|---|---|
![]() |
28147a449a | ||
![]() |
33149cbb4c | ||
![]() |
3f454060bb | ||
![]() |
d19ed5f249 | ||
![]() |
a801f7a295 | ||
![]() |
e39bd6407f |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,8 +1,6 @@
|
||||
*.a
|
||||
*.asm.s
|
||||
*.d
|
||||
*.gcno
|
||||
*.gcda
|
||||
*.o
|
||||
*~
|
||||
/*.ivf
|
||||
@@ -16,7 +14,7 @@
|
||||
/.install-*
|
||||
/.libs
|
||||
/Makefile
|
||||
/config.log
|
||||
/config.err
|
||||
/config.mk
|
||||
/decode_to_md5
|
||||
/decode_to_md5.c
|
||||
|
36
README
36
README
@@ -1,7 +1,7 @@
|
||||
vpx Multi-Format Codec SDK
|
||||
README - 1 August 2013
|
||||
README - 21 June 2012
|
||||
|
||||
Welcome to the WebM VP8/VP9 Codec SDK!
|
||||
Welcome to the WebM VP8 Codec SDK!
|
||||
|
||||
COMPILING THE APPLICATIONS/LIBRARIES:
|
||||
The build system used is similar to autotools. Building generally consists of
|
||||
@@ -53,63 +53,33 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
||||
armv5te-android-gcc
|
||||
armv5te-linux-rvct
|
||||
armv5te-linux-gcc
|
||||
armv5te-none-rvct
|
||||
armv6-darwin-gcc
|
||||
armv6-linux-rvct
|
||||
armv6-linux-gcc
|
||||
armv6-none-rvct
|
||||
armv7-android-gcc
|
||||
armv7-darwin-gcc
|
||||
armv7-linux-rvct
|
||||
armv7-linux-gcc
|
||||
armv7-none-rvct
|
||||
armv7-win32-vs11
|
||||
mips32-linux-gcc
|
||||
ppc32-darwin8-gcc
|
||||
ppc32-darwin9-gcc
|
||||
ppc32-linux-gcc
|
||||
ppc64-darwin8-gcc
|
||||
ppc64-darwin9-gcc
|
||||
ppc64-linux-gcc
|
||||
sparc-solaris-gcc
|
||||
x86-android-gcc
|
||||
x86-darwin8-gcc
|
||||
x86-darwin8-icc
|
||||
x86-darwin9-gcc
|
||||
x86-darwin9-icc
|
||||
x86-darwin10-gcc
|
||||
x86-darwin11-gcc
|
||||
x86-darwin12-gcc
|
||||
x86-darwin13-gcc
|
||||
x86-linux-gcc
|
||||
x86-linux-icc
|
||||
x86-os2-gcc
|
||||
x86-solaris-gcc
|
||||
x86-win32-gcc
|
||||
x86-win32-vs7
|
||||
x86-win32-vs8
|
||||
x86-win32-vs9
|
||||
x86-win32-vs10
|
||||
x86-win32-vs11
|
||||
x86_64-darwin9-gcc
|
||||
x86_64-darwin10-gcc
|
||||
x86_64-darwin11-gcc
|
||||
x86_64-darwin12-gcc
|
||||
x86_64-darwin13-gcc
|
||||
x86_64-linux-gcc
|
||||
x86_64-linux-icc
|
||||
x86_64-solaris-gcc
|
||||
x86_64-win64-gcc
|
||||
x86_64-win64-vs8
|
||||
x86_64-win64-vs9
|
||||
x86_64-win64-vs10
|
||||
x86_64-win64-vs11
|
||||
universal-darwin8-gcc
|
||||
universal-darwin9-gcc
|
||||
universal-darwin10-gcc
|
||||
universal-darwin11-gcc
|
||||
universal-darwin12-gcc
|
||||
universal-darwin13-gcc
|
||||
generic-gnu
|
||||
|
||||
The generic-gnu target, in conjunction with the CROSS environment variable,
|
||||
@@ -127,7 +97,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
||||
|
||||
5. Configuration errors
|
||||
If the configuration step fails, the first step is to look in the error log.
|
||||
This defaults to config.log. This should give a good indication of what went
|
||||
This defaults to config.err. This should give a good indication of what went
|
||||
wrong. If not, contact us for support.
|
||||
|
||||
SUPPORT
|
||||
|
@@ -7,7 +7,18 @@ REM in the file PATENTS. All contributing project authors may
|
||||
REM be found in the AUTHORS file in the root of the source tree.
|
||||
echo on
|
||||
|
||||
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp9/common/vp9_asm_com_offsets.c"
|
||||
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp9/decoder/vp9_asm_dec_offsets.c"
|
||||
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp9/encoder/vp9_asm_enc_offsets.c"
|
||||
obj_int_extract.exe rvds "vp9_asm_com_offsets.obj" > "vp9_asm_com_offsets.asm"
|
||||
obj_int_extract.exe rvds "vp9_asm_dec_offsets.obj" > "vp9_asm_dec_offsets.asm"
|
||||
obj_int_extract.exe rvds "vp9_asm_enc_offsets.obj" > "vp9_asm_enc_offsets.asm"
|
||||
|
||||
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp8/common/vp8_asm_com_offsets.c"
|
||||
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp8/decoder/vp8_asm_dec_offsets.c"
|
||||
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp8/encoder/vp8_asm_enc_offsets.c"
|
||||
obj_int_extract.exe rvds "vp8_asm_com_offsets.obj" > "vp8_asm_com_offsets.asm"
|
||||
obj_int_extract.exe rvds "vp8_asm_dec_offsets.obj" > "vp8_asm_dec_offsets.asm"
|
||||
obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
|
||||
|
||||
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vpx_scale/vpx_scale_asm_offsets.c"
|
||||
|
@@ -1,4 +1,4 @@
|
||||
#!/bin/sh
|
||||
#!/bin/bash
|
||||
##
|
||||
## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
##
|
||||
@@ -13,20 +13,20 @@
|
||||
verbose=0
|
||||
set -- $*
|
||||
for i; do
|
||||
if [ "$i" = "-o" ]; then
|
||||
if [ "$i" == "-o" ]; then
|
||||
on_of=1
|
||||
elif [ "$i" = "-v" ]; then
|
||||
elif [ "$i" == "-v" ]; then
|
||||
verbose=1
|
||||
elif [ "$i" = "-g" ]; then
|
||||
elif [ "$i" == "-g" ]; then
|
||||
args="${args} --debug"
|
||||
elif [ "$on_of" = "1" ]; then
|
||||
elif [ "$on_of" == "1" ]; then
|
||||
outfile=$i
|
||||
on_of=0
|
||||
elif [ -f "$i" ]; then
|
||||
infiles="$infiles $i"
|
||||
elif [ "${i#-l}" != "$i" ]; then
|
||||
elif [ "${i:0:2}" == "-l" ]; then
|
||||
libs="$libs ${i#-l}"
|
||||
elif [ "${i#-L}" != "$i" ]; then
|
||||
elif [ "${i:0:2}" == "-L" ]; then
|
||||
libpaths="${libpaths} ${i#-L}"
|
||||
else
|
||||
args="${args} ${i}"
|
||||
|
@@ -1,4 +1,4 @@
|
||||
#!/bin/sh
|
||||
#!/bin/bash
|
||||
##
|
||||
## configure.sh
|
||||
##
|
||||
@@ -75,7 +75,7 @@ Options:
|
||||
|
||||
Build options:
|
||||
--help print this message
|
||||
--log=yes|no|FILE file configure log is written to [config.log]
|
||||
--log=yes|no|FILE file configure log is written to [config.err]
|
||||
--target=TARGET target platform tuple [generic-gnu]
|
||||
--cpu=CPU optimize for a specific cpu rather than a family
|
||||
--extra-cflags=ECFLAGS add ECFLAGS to CFLAGS [$CFLAGS]
|
||||
@@ -198,11 +198,11 @@ add_extralibs() {
|
||||
#
|
||||
# Boolean Manipulation Functions
|
||||
#
|
||||
enable_feature(){
|
||||
enable(){
|
||||
set_all yes $*
|
||||
}
|
||||
|
||||
disable_feature(){
|
||||
disable(){
|
||||
set_all no $*
|
||||
}
|
||||
|
||||
@@ -219,7 +219,7 @@ soft_enable() {
|
||||
for var in $*; do
|
||||
if ! disabled $var; then
|
||||
log_echo " enabling $var"
|
||||
enable_feature $var
|
||||
enable $var
|
||||
fi
|
||||
done
|
||||
}
|
||||
@@ -228,7 +228,7 @@ soft_disable() {
|
||||
for var in $*; do
|
||||
if ! enabled $var; then
|
||||
log_echo " disabling $var"
|
||||
disable_feature $var
|
||||
disable $var
|
||||
fi
|
||||
done
|
||||
}
|
||||
@@ -251,10 +251,10 @@ tolower(){
|
||||
# Temporary File Functions
|
||||
#
|
||||
source_path=${0%/*}
|
||||
enable_feature source_path_used
|
||||
enable source_path_used
|
||||
if test -z "$source_path" -o "$source_path" = "." ; then
|
||||
source_path="`pwd`"
|
||||
disable_feature source_path_used
|
||||
disable source_path_used
|
||||
fi
|
||||
|
||||
if test ! -z "$TMPDIR" ; then
|
||||
@@ -264,13 +264,12 @@ elif test ! -z "$TEMPDIR" ; then
|
||||
else
|
||||
TMPDIRx="/tmp"
|
||||
fi
|
||||
RAND=$(awk 'BEGIN { srand(); printf "%d\n",(rand() * 32768)}')
|
||||
TMP_H="${TMPDIRx}/vpx-conf-$$-${RAND}.h"
|
||||
TMP_C="${TMPDIRx}/vpx-conf-$$-${RAND}.c"
|
||||
TMP_CC="${TMPDIRx}/vpx-conf-$$-${RAND}.cc"
|
||||
TMP_O="${TMPDIRx}/vpx-conf-$$-${RAND}.o"
|
||||
TMP_X="${TMPDIRx}/vpx-conf-$$-${RAND}.x"
|
||||
TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RAND}.asm"
|
||||
TMP_H="${TMPDIRx}/vpx-conf-$$-${RANDOM}.h"
|
||||
TMP_C="${TMPDIRx}/vpx-conf-$$-${RANDOM}.c"
|
||||
TMP_CC="${TMPDIRx}/vpx-conf-$$-${RANDOM}.cc"
|
||||
TMP_O="${TMPDIRx}/vpx-conf-$$-${RANDOM}.o"
|
||||
TMP_X="${TMPDIRx}/vpx-conf-$$-${RANDOM}.x"
|
||||
TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RANDOM}.asm"
|
||||
|
||||
clean_temp_files() {
|
||||
rm -f ${TMP_C} ${TMP_CC} ${TMP_H} ${TMP_O} ${TMP_X} ${TMP_ASM}
|
||||
@@ -317,8 +316,8 @@ check_header(){
|
||||
header=$1
|
||||
shift
|
||||
var=`echo $header | sed 's/[^A-Za-z0-9_]/_/g'`
|
||||
disable_feature $var
|
||||
check_cpp "$@" <<EOF && enable_feature $var
|
||||
disable $var
|
||||
check_cpp "$@" <<EOF && enable $var
|
||||
#include "$header"
|
||||
int x;
|
||||
EOF
|
||||
@@ -480,7 +479,7 @@ process_common_cmdline() {
|
||||
for opt in "$@"; do
|
||||
optval="${opt#*=}"
|
||||
case "$opt" in
|
||||
--child) enable_feature child
|
||||
--child) enable child
|
||||
;;
|
||||
--log*)
|
||||
logging="$optval"
|
||||
@@ -492,7 +491,7 @@ process_common_cmdline() {
|
||||
;;
|
||||
--target=*) toolchain="${toolchain:-${optval}}"
|
||||
;;
|
||||
--force-target=*) toolchain="${toolchain:-${optval}}"; enable_feature force_toolchain
|
||||
--force-target=*) toolchain="${toolchain:-${optval}}"; enable force_toolchain
|
||||
;;
|
||||
--cpu)
|
||||
;;
|
||||
@@ -512,7 +511,7 @@ process_common_cmdline() {
|
||||
echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null ||
|
||||
die_unknown $opt
|
||||
fi
|
||||
${action}_feature $option
|
||||
$action $option
|
||||
;;
|
||||
--require-?*)
|
||||
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
|
||||
@@ -524,11 +523,11 @@ process_common_cmdline() {
|
||||
;;
|
||||
--force-enable-?*|--force-disable-?*)
|
||||
eval `echo "$opt" | sed 's/--force-/action=/;s/-/ option=/;s/-/_/g'`
|
||||
${action}_feature $option
|
||||
$action $option
|
||||
;;
|
||||
--libc=*)
|
||||
[ -d "${optval}" ] || die "Not a directory: ${optval}"
|
||||
disable_feature builtin_libc
|
||||
disable builtin_libc
|
||||
alt_libc="${optval}"
|
||||
;;
|
||||
--as=*)
|
||||
@@ -654,10 +653,6 @@ process_common_toolchain() {
|
||||
tgt_isa=x86_64
|
||||
tgt_os=darwin12
|
||||
;;
|
||||
*darwin13*)
|
||||
tgt_isa=x86_64
|
||||
tgt_os=darwin13
|
||||
;;
|
||||
x86_64*mingw32*)
|
||||
tgt_os=win64
|
||||
;;
|
||||
@@ -697,13 +692,13 @@ process_common_toolchain() {
|
||||
|
||||
# Mark the specific ISA requested as enabled
|
||||
soft_enable ${tgt_isa}
|
||||
enable_feature ${tgt_os}
|
||||
enable_feature ${tgt_cc}
|
||||
enable ${tgt_os}
|
||||
enable ${tgt_cc}
|
||||
|
||||
# Enable the architecture family
|
||||
case ${tgt_isa} in
|
||||
arm*) enable_feature arm;;
|
||||
mips*) enable_feature mips;;
|
||||
arm*) enable arm;;
|
||||
mips*) enable mips;;
|
||||
esac
|
||||
|
||||
# PIC is probably what we want when building shared libs
|
||||
@@ -756,17 +751,13 @@ process_common_toolchain() {
|
||||
add_cflags "-mmacosx-version-min=10.8"
|
||||
add_ldflags "-mmacosx-version-min=10.8"
|
||||
;;
|
||||
*-darwin13-*)
|
||||
add_cflags "-mmacosx-version-min=10.9"
|
||||
add_ldflags "-mmacosx-version-min=10.9"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Handle Solaris variants. Solaris 10 needs -lposix4
|
||||
case ${toolchain} in
|
||||
sparc-solaris-*)
|
||||
add_extralibs -lposix4
|
||||
disable_feature fast_unaligned
|
||||
disable fast_unaligned
|
||||
;;
|
||||
*-solaris-*)
|
||||
add_extralibs -lposix4
|
||||
@@ -791,7 +782,7 @@ process_common_toolchain() {
|
||||
;;
|
||||
armv5te)
|
||||
soft_enable edsp
|
||||
disable_feature fast_unaligned
|
||||
disable fast_unaligned
|
||||
;;
|
||||
esac
|
||||
|
||||
@@ -806,7 +797,7 @@ process_common_toolchain() {
|
||||
arch_int=${arch_int%%te}
|
||||
check_add_asflags --defsym ARCHITECTURE=${arch_int}
|
||||
tune_cflags="-mtune="
|
||||
if [ ${tgt_isa} = "armv7" ]; then
|
||||
if [ ${tgt_isa} == "armv7" ]; then
|
||||
if [ -z "${float_abi}" ]; then
|
||||
check_cpp <<EOF && float_abi=hard || float_abi=softfp
|
||||
#ifndef __ARM_PCS_VFP
|
||||
@@ -843,8 +834,8 @@ EOF
|
||||
asm_conversion_cmd="${source_path}/build/make/ads2armasm_ms.pl"
|
||||
AS_SFX=.s
|
||||
msvs_arch_dir=arm-msvs
|
||||
disable_feature multithread
|
||||
disable_feature unit_tests
|
||||
disable multithread
|
||||
disable unit_tests
|
||||
;;
|
||||
rvct)
|
||||
CC=armcc
|
||||
@@ -856,7 +847,7 @@ EOF
|
||||
tune_cflags="--cpu="
|
||||
tune_asflags="--cpu="
|
||||
if [ -z "${tune_cpu}" ]; then
|
||||
if [ ${tgt_isa} = "armv7" ]; then
|
||||
if [ ${tgt_isa} == "armv7" ]; then
|
||||
if enabled neon
|
||||
then
|
||||
check_add_cflags --fpu=softvfp+vfpv3
|
||||
@@ -881,8 +872,8 @@ EOF
|
||||
|
||||
case ${tgt_os} in
|
||||
none*)
|
||||
disable_feature multithread
|
||||
disable_feature os_support
|
||||
disable multithread
|
||||
disable os_support
|
||||
;;
|
||||
|
||||
android*)
|
||||
@@ -914,9 +905,9 @@ EOF
|
||||
# Cortex-A8 implementations (NDK Dev Guide)
|
||||
add_ldflags "-Wl,--fix-cortex-a8"
|
||||
|
||||
enable_feature pic
|
||||
enable pic
|
||||
soft_enable realtime_only
|
||||
if [ ${tgt_isa} = "armv7" ]; then
|
||||
if [ ${tgt_isa} == "armv7" ]; then
|
||||
soft_enable runtime_cpu_detect
|
||||
fi
|
||||
if enabled runtime_cpu_detect; then
|
||||
@@ -970,7 +961,7 @@ EOF
|
||||
;;
|
||||
|
||||
linux*)
|
||||
enable_feature linux
|
||||
enable linux
|
||||
if enabled rvct; then
|
||||
# Check if we have CodeSourcery GCC in PATH. Needed for
|
||||
# libraries
|
||||
@@ -1001,14 +992,14 @@ EOF
|
||||
tune_cflags="-mtune="
|
||||
if enabled dspr2; then
|
||||
check_add_cflags -mips32r2 -mdspr2
|
||||
disable_feature fast_unaligned
|
||||
disable fast_unaligned
|
||||
fi
|
||||
check_add_cflags -march=${tgt_isa}
|
||||
check_add_asflags -march=${tgt_isa}
|
||||
check_add_asflags -KPIC
|
||||
;;
|
||||
ppc*)
|
||||
enable_feature ppc
|
||||
enable ppc
|
||||
bits=${tgt_isa##ppc}
|
||||
link_with_cc=gcc
|
||||
setup_gnu_toolchain
|
||||
@@ -1156,7 +1147,7 @@ EOF
|
||||
;;
|
||||
universal*|*-gcc|generic-gnu)
|
||||
link_with_cc=gcc
|
||||
enable_feature gcc
|
||||
enable gcc
|
||||
setup_gnu_toolchain
|
||||
;;
|
||||
esac
|
||||
@@ -1190,12 +1181,6 @@ EOF
|
||||
fi
|
||||
fi
|
||||
|
||||
# default use_x86inc to yes if pic is no or 64bit or we are not on darwin
|
||||
echo " checking here for x86inc \"${tgt_isa}\" \"$pic\" "
|
||||
if [ ${tgt_isa} = x86_64 -o ! "$pic" = "yes" -o "${tgt_os#darwin}" = "${tgt_os}" ]; then
|
||||
soft_enable use_x86inc
|
||||
fi
|
||||
|
||||
# Position Independent Code (PIC) support, for building relocatable
|
||||
# shared objects
|
||||
enabled gcc && enabled pic && check_add_cflags -fPIC
|
||||
@@ -1205,14 +1190,14 @@ EOF
|
||||
enabled linux && check_add_cflags -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0
|
||||
|
||||
# Check for strip utility variant
|
||||
${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable_feature gnu_strip
|
||||
${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable gnu_strip
|
||||
|
||||
# Try to determine target endianness
|
||||
check_cc <<EOF
|
||||
unsigned int e = 'O'<<24 | '2'<<16 | 'B'<<8 | 'E';
|
||||
EOF
|
||||
[ -f "${TMP_O}" ] && od -A n -t x1 "${TMP_O}" | tr -d '\n' |
|
||||
grep '4f *32 *42 *45' >/dev/null 2>&1 && enable_feature big_endian
|
||||
grep '4f *32 *42 *45' >/dev/null 2>&1 && enable big_endian
|
||||
|
||||
# Try to find which inline keywords are supported
|
||||
check_cc <<EOF && INLINE="inline"
|
||||
@@ -1237,7 +1222,7 @@ EOF
|
||||
if enabled dspr2; then
|
||||
if enabled big_endian; then
|
||||
echo "dspr2 optimizations are available only for little endian platforms"
|
||||
disable_feature dspr2
|
||||
disable dspr2
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
@@ -1288,8 +1273,8 @@ print_config_h() {
|
||||
|
||||
print_webm_license() {
|
||||
local destination=$1
|
||||
local prefix="$2"
|
||||
local suffix="$3"
|
||||
local prefix=$2
|
||||
local suffix=$3
|
||||
shift 3
|
||||
cat <<EOF > ${destination}
|
||||
${prefix} Copyright (c) 2011 The WebM project authors. All Rights Reserved.${suffix}
|
||||
@@ -1310,8 +1295,8 @@ process_detect() {
|
||||
true;
|
||||
}
|
||||
|
||||
enable_feature logging
|
||||
logfile="config.log"
|
||||
enable logging
|
||||
logfile="config.err"
|
||||
self=$0
|
||||
process() {
|
||||
cmdline_args="$@"
|
||||
|
@@ -1,4 +1,4 @@
|
||||
#!/bin/sh
|
||||
#!/bin/bash
|
||||
##
|
||||
## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
##
|
||||
|
@@ -381,7 +381,7 @@ generate_vcproj() {
|
||||
RuntimeLibrary="$debug_runtime" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="2" \
|
||||
DebugInformationFormat="1" \
|
||||
$warn_64bit \
|
||||
|
||||
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="true"
|
||||
@@ -395,7 +395,7 @@ generate_vcproj() {
|
||||
RuntimeLibrary="$debug_runtime" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="2" \
|
||||
DebugInformationFormat="1" \
|
||||
$warn_64bit \
|
||||
|
||||
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="true"
|
||||
|
@@ -72,21 +72,10 @@ parse_project() {
|
||||
eval "${var}_name=$name"
|
||||
eval "${var}_guid=$guid"
|
||||
|
||||
if [ "$sfx" = "vcproj" ]; then
|
||||
cur_config_list=`grep -A1 '<Configuration' $file |
|
||||
grep Name | cut -d\" -f2`
|
||||
else
|
||||
cur_config_list=`grep -B1 'Label="Configuration"' $file |
|
||||
grep Condition | cut -d\' -f4`
|
||||
fi
|
||||
new_config_list=$(for i in $config_list $cur_config_list; do
|
||||
echo $i
|
||||
done | sort | uniq)
|
||||
if [ "$config_list" != "" ] && [ "$config_list" != "$new_config_list" ]; then
|
||||
mixed_platforms=1
|
||||
fi
|
||||
config_list="$new_config_list"
|
||||
eval "${var}_config_list=\"$cur_config_list\""
|
||||
# assume that all projects have the same list of possible configurations,
|
||||
# so overwriting old config_lists is not a problem
|
||||
config_list=`grep -A1 '<Configuration' $file |
|
||||
grep Name | cut -d\" -f2`
|
||||
proj_list="${proj_list} ${var}"
|
||||
}
|
||||
|
||||
@@ -136,11 +125,6 @@ process_global() {
|
||||
indent_push
|
||||
IFS_bak=${IFS}
|
||||
IFS=$'\r'$'\n'
|
||||
if [ "$mixed_platforms" != "" ]; then
|
||||
config_list="
|
||||
Release|Mixed Platforms
|
||||
Debug|Mixed Platforms"
|
||||
fi
|
||||
for config in ${config_list}; do
|
||||
echo "${indent}$config = $config"
|
||||
done
|
||||
@@ -155,17 +139,10 @@ Debug|Mixed Platforms"
|
||||
indent_push
|
||||
for proj in ${proj_list}; do
|
||||
eval "local proj_guid=\${${proj}_guid}"
|
||||
eval "local proj_config_list=\${${proj}_config_list}"
|
||||
IFS=$'\r'$'\n'
|
||||
for config in ${proj_config_list}; do
|
||||
if [ "$mixed_platforms" != "" ]; then
|
||||
local c=${config%%|*}
|
||||
echo "${indent}${proj_guid}.${c}|Mixed Platforms.ActiveCfg = ${config}"
|
||||
echo "${indent}${proj_guid}.${c}|Mixed Platforms.Build.0 = ${config}"
|
||||
else
|
||||
echo "${indent}${proj_guid}.${config}.ActiveCfg = ${config}"
|
||||
echo "${indent}${proj_guid}.${config}.Build.0 = ${config}"
|
||||
fi
|
||||
for config in ${config_list}; do
|
||||
echo "${indent}${proj_guid}.${config}.ActiveCfg = ${config}"
|
||||
echo "${indent}${proj_guid}.${config}.Build.0 = ${config}"
|
||||
|
||||
done
|
||||
IFS=${IFS_bak}
|
||||
@@ -191,14 +168,9 @@ process_makefile() {
|
||||
IFS=$'\r'$'\n'
|
||||
local TAB=$'\t'
|
||||
cat <<EOF
|
||||
ifeq (\$(CONFIG_VS_VERSION),7)
|
||||
MSBUILD_TOOL := devenv.com
|
||||
else
|
||||
MSBUILD_TOOL := msbuild.exe
|
||||
endif
|
||||
found_devenv := \$(shell which \$(MSBUILD_TOOL) >/dev/null 2>&1 && echo yes)
|
||||
found_devenv := \$(shell which devenv.com >/dev/null 2>&1 && echo yes)
|
||||
.nodevenv.once:
|
||||
${TAB}@echo " * \$(MSBUILD_TOOL) not found in path."
|
||||
${TAB}@echo " * devenv.com not found in path."
|
||||
${TAB}@echo " * "
|
||||
${TAB}@echo " * You will have to build all configurations manually using the"
|
||||
${TAB}@echo " * Visual Studio IDE. To allow make to build them automatically,"
|
||||
@@ -223,17 +195,16 @@ ${TAB}rm -rf "$platform"/"$config"
|
||||
ifneq (\$(found_devenv),)
|
||||
ifeq (\$(CONFIG_VS_VERSION),7)
|
||||
$nows_sln_config: $outfile
|
||||
${TAB}\$(MSBUILD_TOOL) $outfile -build "$config"
|
||||
${TAB}devenv.com $outfile -build "$config"
|
||||
|
||||
else
|
||||
$nows_sln_config: $outfile
|
||||
${TAB}\$(MSBUILD_TOOL) $outfile -m -t:Build \\
|
||||
${TAB}${TAB}-p:Configuration="$config" -p:Platform="$platform"
|
||||
${TAB}devenv.com $outfile -build "$sln_config"
|
||||
|
||||
endif
|
||||
else
|
||||
$nows_sln_config: $outfile .nodevenv.once
|
||||
${TAB}@echo " * Skipping build of $sln_config (\$(MSBUILD_TOOL) not in path)."
|
||||
${TAB}@echo " * Skipping build of $sln_config (devenv.com not in path)."
|
||||
${TAB}@echo " * "
|
||||
endif
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
#!/bin/sh
|
||||
#!/bin/bash
|
||||
##
|
||||
## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
##
|
||||
|
@@ -7,6 +7,17 @@ REM in the file PATENTS. All contributing project authors may
|
||||
REM be found in the AUTHORS file in the root of the source tree.
|
||||
echo on
|
||||
|
||||
cl /I "./" /I "%1" /nologo /c "%1/vp9/common/vp9_asm_com_offsets.c"
|
||||
cl /I "./" /I "%1" /nologo /c "%1/vp9/decoder/vp9_asm_dec_offsets.c"
|
||||
cl /I "./" /I "%1" /nologo /c "%1/vp9/encoder/vp9_asm_enc_offsets.c"
|
||||
obj_int_extract.exe rvds "vp9_asm_com_offsets.obj" > "vp9_asm_com_offsets.asm"
|
||||
obj_int_extract.exe rvds "vp9_asm_dec_offsets.obj" > "vp9_asm_dec_offsets.asm"
|
||||
obj_int_extract.exe rvds "vp9_asm_enc_offsets.obj" > "vp9_asm_enc_offsets.asm"
|
||||
|
||||
cl /I "./" /I "%1" /nologo /c "%1/vp8/common/vp8_asm_com_offsets.c"
|
||||
cl /I "./" /I "%1" /nologo /c "%1/vp8/decoder/vp8_asm_dec_offsets.c"
|
||||
cl /I "./" /I "%1" /nologo /c "%1/vp8/encoder/vp8_asm_enc_offsets.c"
|
||||
obj_int_extract.exe rvds "vp8_asm_com_offsets.obj" > "vp8_asm_com_offsets.asm"
|
||||
obj_int_extract.exe rvds "vp8_asm_dec_offsets.obj" > "vp8_asm_dec_offsets.asm"
|
||||
obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
|
||||
|
||||
|
99
configure
vendored
99
configure
vendored
@@ -1,4 +1,4 @@
|
||||
#!/bin/sh
|
||||
#!/bin/bash
|
||||
##
|
||||
## configure
|
||||
##
|
||||
@@ -38,7 +38,6 @@ Advanced options:
|
||||
${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders)
|
||||
${toggle_mem_tracker} track memory usage
|
||||
${toggle_postproc} postprocessing
|
||||
${toggle_vp9_postproc} vp9 specific postprocessing
|
||||
${toggle_multithread} multithreaded encoding and decoding
|
||||
${toggle_spatial_resampling} spatial sampling (scaling) support
|
||||
${toggle_realtime_only} enable this option while building for real-time encoding
|
||||
@@ -116,7 +115,6 @@ all_platforms="${all_platforms} x86-darwin9-icc"
|
||||
all_platforms="${all_platforms} x86-darwin10-gcc"
|
||||
all_platforms="${all_platforms} x86-darwin11-gcc"
|
||||
all_platforms="${all_platforms} x86-darwin12-gcc"
|
||||
all_platforms="${all_platforms} x86-darwin13-gcc"
|
||||
all_platforms="${all_platforms} x86-linux-gcc"
|
||||
all_platforms="${all_platforms} x86-linux-icc"
|
||||
all_platforms="${all_platforms} x86-os2-gcc"
|
||||
@@ -131,7 +129,6 @@ all_platforms="${all_platforms} x86_64-darwin9-gcc"
|
||||
all_platforms="${all_platforms} x86_64-darwin10-gcc"
|
||||
all_platforms="${all_platforms} x86_64-darwin11-gcc"
|
||||
all_platforms="${all_platforms} x86_64-darwin12-gcc"
|
||||
all_platforms="${all_platforms} x86_64-darwin13-gcc"
|
||||
all_platforms="${all_platforms} x86_64-linux-gcc"
|
||||
all_platforms="${all_platforms} x86_64-linux-icc"
|
||||
all_platforms="${all_platforms} x86_64-solaris-gcc"
|
||||
@@ -145,7 +142,6 @@ all_platforms="${all_platforms} universal-darwin9-gcc"
|
||||
all_platforms="${all_platforms} universal-darwin10-gcc"
|
||||
all_platforms="${all_platforms} universal-darwin11-gcc"
|
||||
all_platforms="${all_platforms} universal-darwin12-gcc"
|
||||
all_platforms="${all_platforms} universal-darwin13-gcc"
|
||||
all_platforms="${all_platforms} generic-gnu"
|
||||
|
||||
# all_targets is a list of all targets that can be configured
|
||||
@@ -154,7 +150,7 @@ all_targets="libs examples docs"
|
||||
|
||||
# all targets available are enabled, by default.
|
||||
for t in ${all_targets}; do
|
||||
[ -f ${source_path}/${t}.mk ] && enable_feature ${t}
|
||||
[ -f ${source_path}/${t}.mk ] && enable ${t}
|
||||
done
|
||||
|
||||
# check installed doxygen version
|
||||
@@ -165,30 +161,30 @@ if [ ${doxy_major:-0} -ge 1 ]; then
|
||||
doxy_minor=${doxy_version%%.*}
|
||||
doxy_patch=${doxy_version##*.}
|
||||
|
||||
[ $doxy_major -gt 1 ] && enable_feature doxygen
|
||||
[ $doxy_minor -gt 5 ] && enable_feature doxygen
|
||||
[ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable_feature doxygen
|
||||
[ $doxy_major -gt 1 ] && enable doxygen
|
||||
[ $doxy_minor -gt 5 ] && enable doxygen
|
||||
[ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable doxygen
|
||||
fi
|
||||
|
||||
# install everything except the sources, by default. sources will have
|
||||
# to be enabled when doing dist builds, since that's no longer a common
|
||||
# case.
|
||||
enabled doxygen && php -v >/dev/null 2>&1 && enable_feature install_docs
|
||||
enable_feature install_bins
|
||||
enable_feature install_libs
|
||||
enabled doxygen && php -v >/dev/null 2>&1 && enable install_docs
|
||||
enable install_bins
|
||||
enable install_libs
|
||||
|
||||
enable_feature static
|
||||
enable_feature optimizations
|
||||
enable_feature fast_unaligned #allow unaligned accesses, if supported by hw
|
||||
enable_feature md5
|
||||
enable_feature spatial_resampling
|
||||
enable_feature multithread
|
||||
enable_feature os_support
|
||||
enable_feature temporal_denoising
|
||||
enable static
|
||||
enable optimizations
|
||||
enable fast_unaligned #allow unaligned accesses, if supported by hw
|
||||
enable md5
|
||||
enable spatial_resampling
|
||||
enable multithread
|
||||
enable os_support
|
||||
enable temporal_denoising
|
||||
|
||||
[ -d ${source_path}/../include ] && enable_feature alt_tree_layout
|
||||
[ -d ${source_path}/../include ] && enable alt_tree_layout
|
||||
for d in vp8 vp9; do
|
||||
[ -d ${source_path}/${d} ] && disable_feature alt_tree_layout;
|
||||
[ -d ${source_path}/${d} ] && disable alt_tree_layout;
|
||||
done
|
||||
|
||||
if ! enabled alt_tree_layout; then
|
||||
@@ -201,10 +197,10 @@ else
|
||||
[ -f ${source_path}/../include/vpx/vp8dx.h ] && CODECS="${CODECS} vp8_decoder"
|
||||
[ -f ${source_path}/../include/vpx/vp9cx.h ] && CODECS="${CODECS} vp9_encoder"
|
||||
[ -f ${source_path}/../include/vpx/vp9dx.h ] && CODECS="${CODECS} vp9_decoder"
|
||||
[ -f ${source_path}/../include/vpx/vp8cx.h ] || disable_feature vp8_encoder
|
||||
[ -f ${source_path}/../include/vpx/vp8dx.h ] || disable_feature vp8_decoder
|
||||
[ -f ${source_path}/../include/vpx/vp9cx.h ] || disable_feature vp9_encoder
|
||||
[ -f ${source_path}/../include/vpx/vp9dx.h ] || disable_feature vp9_decoder
|
||||
[ -f ${source_path}/../include/vpx/vp8cx.h ] || disable vp8_encoder
|
||||
[ -f ${source_path}/../include/vpx/vp8dx.h ] || disable vp8_decoder
|
||||
[ -f ${source_path}/../include/vpx/vp9cx.h ] || disable vp9_encoder
|
||||
[ -f ${source_path}/../include/vpx/vp9dx.h ] || disable vp9_decoder
|
||||
|
||||
[ -f ${source_path}/../lib/*/*mt.lib ] && soft_enable static_msvcrt
|
||||
fi
|
||||
@@ -251,6 +247,7 @@ EXPERIMENT_LIST="
|
||||
multiple_arf
|
||||
non420
|
||||
alpha
|
||||
balanced_coeftree
|
||||
"
|
||||
CONFIG_LIST="
|
||||
external_build
|
||||
@@ -258,7 +255,6 @@ CONFIG_LIST="
|
||||
install_bins
|
||||
install_libs
|
||||
install_srcs
|
||||
use_x86inc
|
||||
debug
|
||||
gprof
|
||||
gcov
|
||||
@@ -280,7 +276,6 @@ CONFIG_LIST="
|
||||
dc_recon
|
||||
runtime_cpu_detect
|
||||
postproc
|
||||
vp9_postproc
|
||||
multithread
|
||||
internal_stats
|
||||
${CODECS}
|
||||
@@ -316,7 +311,6 @@ CMDLINE_SELECT="
|
||||
gprof
|
||||
gcov
|
||||
pic
|
||||
use_x86inc
|
||||
optimizations
|
||||
ccache
|
||||
runtime_cpu_detect
|
||||
@@ -335,7 +329,6 @@ CMDLINE_SELECT="
|
||||
dequant_tokens
|
||||
dc_recon
|
||||
postproc
|
||||
vp9_postproc
|
||||
multithread
|
||||
internal_stats
|
||||
${CODECS}
|
||||
@@ -361,12 +354,12 @@ process_cmdline() {
|
||||
for opt do
|
||||
optval="${opt#*=}"
|
||||
case "$opt" in
|
||||
--disable-codecs) for c in ${CODECS}; do disable_feature $c; done ;;
|
||||
--disable-codecs) for c in ${CODECS}; do disable $c; done ;;
|
||||
--enable-?*|--disable-?*)
|
||||
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
|
||||
if echo "${EXPERIMENT_LIST}" | grep "^ *$option\$" >/dev/null; then
|
||||
if enabled experimental; then
|
||||
${action}_feature $option
|
||||
$action $option
|
||||
else
|
||||
log_echo "Ignoring $opt -- not in experimental mode."
|
||||
fi
|
||||
@@ -387,8 +380,8 @@ post_process_cmdline() {
|
||||
# If the codec family is enabled, enable all components of that family.
|
||||
log_echo "Configuring selected codecs"
|
||||
for c in ${CODECS}; do
|
||||
disabled ${c%%_*} && disable_feature ${c}
|
||||
enabled ${c%%_*} && enable_feature ${c}
|
||||
disabled ${c%%_*} && disable ${c}
|
||||
enabled ${c%%_*} && enable ${c}
|
||||
done
|
||||
|
||||
# Enable all detected codecs, if they haven't been disabled
|
||||
@@ -396,12 +389,12 @@ post_process_cmdline() {
|
||||
|
||||
# Enable the codec family if any component of that family is enabled
|
||||
for c in ${CODECS}; do
|
||||
enabled $c && enable_feature ${c%_*}
|
||||
enabled $c && enable ${c%_*}
|
||||
done
|
||||
|
||||
# Set the {en,de}coders variable if any algorithm in that class is enabled
|
||||
for c in ${CODECS}; do
|
||||
enabled ${c} && enable_feature ${c##*_}s
|
||||
enabled ${c} && enable ${c##*_}s
|
||||
done
|
||||
}
|
||||
|
||||
@@ -441,7 +434,7 @@ process_targets() {
|
||||
done
|
||||
enabled debug_libs && DIST_DIR="${DIST_DIR}-debug"
|
||||
enabled codec_srcs && DIST_DIR="${DIST_DIR}-src"
|
||||
! enabled postproc && ! enabled vp9_postproc && DIST_DIR="${DIST_DIR}-nopost"
|
||||
! enabled postproc && DIST_DIR="${DIST_DIR}-nopost"
|
||||
! enabled multithread && DIST_DIR="${DIST_DIR}-nomt"
|
||||
! enabled install_docs && DIST_DIR="${DIST_DIR}-nodocs"
|
||||
DIST_DIR="${DIST_DIR}-${tgt_isa}-${tgt_os}"
|
||||
@@ -511,13 +504,13 @@ process_detect() {
|
||||
fi
|
||||
if [ -z "$CC" ] || enabled external_build; then
|
||||
echo "Bypassing toolchain for environment detection."
|
||||
enable_feature external_build
|
||||
enable external_build
|
||||
check_header() {
|
||||
log fake_check_header "$@"
|
||||
header=$1
|
||||
shift
|
||||
var=`echo $header | sed 's/[^A-Za-z0-9_]/_/g'`
|
||||
disable_feature $var
|
||||
disable $var
|
||||
# Headers common to all environments
|
||||
case $header in
|
||||
stdio.h)
|
||||
@@ -529,7 +522,7 @@ process_detect() {
|
||||
[ -f "${d##-I}/$header" ] && result=true && break
|
||||
done
|
||||
${result:-true}
|
||||
esac && enable_feature $var
|
||||
esac && enable $var
|
||||
|
||||
# Specialize windows and POSIX environments.
|
||||
case $toolchain in
|
||||
@@ -537,7 +530,7 @@ process_detect() {
|
||||
case $header-$toolchain in
|
||||
stdint*-gcc) true;;
|
||||
*) false;;
|
||||
esac && enable_feature $var
|
||||
esac && enable $var
|
||||
;;
|
||||
*)
|
||||
case $header in
|
||||
@@ -546,7 +539,7 @@ process_detect() {
|
||||
sys/mman.h) true;;
|
||||
unistd.h) true;;
|
||||
*) false;;
|
||||
esac && enable_feature $var
|
||||
esac && enable $var
|
||||
esac
|
||||
enabled $var
|
||||
}
|
||||
@@ -564,7 +557,7 @@ EOF
|
||||
check_header sys/mman.h
|
||||
check_header unistd.h # for sysconf(3) and friends.
|
||||
|
||||
check_header vpx/vpx_integer.h -I${source_path} && enable_feature vpx_ports
|
||||
check_header vpx/vpx_integer.h -I${source_path} && enable vpx_ports
|
||||
}
|
||||
|
||||
process_toolchain() {
|
||||
@@ -646,18 +639,14 @@ process_toolchain() {
|
||||
# ccache only really works on gcc toolchains
|
||||
enabled gcc || soft_disable ccache
|
||||
if enabled mips; then
|
||||
enable_feature dequant_tokens
|
||||
enable_feature dc_recon
|
||||
fi
|
||||
|
||||
if enabled internal_stats; then
|
||||
enable_feature vp9_postproc
|
||||
enable dequant_tokens
|
||||
enable dc_recon
|
||||
fi
|
||||
|
||||
# Enable the postbuild target if building for visual studio.
|
||||
case "$tgt_cc" in
|
||||
vs*) enable_feature msvs
|
||||
enable_feature solution
|
||||
vs*) enable msvs
|
||||
enable solution
|
||||
vs_version=${tgt_cc##vs}
|
||||
case $vs_version in
|
||||
[789])
|
||||
@@ -693,14 +682,6 @@ process_toolchain() {
|
||||
# iOS/ARM builds do not work with gtest. This does not match
|
||||
# x86 targets.
|
||||
;;
|
||||
*-win*)
|
||||
# Some mingw toolchains don't have pthread available by default.
|
||||
# Treat these more like visual studio where threading in gtest
|
||||
# would be disabled for the same reason.
|
||||
check_cxx "$@" <<EOF && soft_enable unit_tests
|
||||
int z;
|
||||
EOF
|
||||
;;
|
||||
*)
|
||||
enabled pthread_h && check_cxx "$@" <<EOF && soft_enable unit_tests
|
||||
int z;
|
||||
|
@@ -49,9 +49,6 @@ vpxenc.DESCRIPTION = Full featured encoder
|
||||
UTILS-$(CONFIG_VP8_ENCODER) += vp8_scalable_patterns.c
|
||||
vp8_scalable_patterns.GUID = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C
|
||||
vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder
|
||||
UTILS-$(CONFIG_VP8_ENCODER) += vp9_spatial_scalable_encoder.c
|
||||
vp8_scalable_patterns.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D
|
||||
vp8_scalable_patterns.DESCRIPTION = Spatial Scalable Encoder
|
||||
|
||||
# Clean up old ivfenc, ivfdec binaries.
|
||||
ifeq ($(CONFIG_MSVS),yes)
|
||||
|
18
libs.mk
18
libs.mk
@@ -57,13 +57,6 @@ CLEAN-OBJS += $$(BUILD_PFX)$(1).h
|
||||
RTCD += $$(BUILD_PFX)$(1).h
|
||||
endef
|
||||
|
||||
# x86inc.asm is not compatible with pic 32bit builds. Restrict
|
||||
# files which use it to 64bit builds or 32bit without pic
|
||||
USE_X86INC = no
|
||||
ifeq ($(CONFIG_USE_X86INC),yes)
|
||||
USE_X86INC = yes
|
||||
endif
|
||||
|
||||
CODEC_SRCS-yes += CHANGELOG
|
||||
CODEC_SRCS-yes += libs.mk
|
||||
|
||||
@@ -390,11 +383,6 @@ LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
|
||||
$(call enabled,LIBVPX_TEST_DATA))
|
||||
libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)
|
||||
|
||||
libvpx_test_srcs.txt:
|
||||
@echo " [CREATE] $@"
|
||||
@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | sort -u > $@
|
||||
CLEAN-OBJS += libvpx_test_srcs.txt
|
||||
|
||||
$(LIBVPX_TEST_DATA):
|
||||
@echo " [DOWNLOAD] $@"
|
||||
$(qexec)trap 'rm -f $@' INT TERM &&\
|
||||
@@ -455,10 +443,6 @@ else
|
||||
include $(SRC_PATH_BARE)/third_party/googletest/gtest.mk
|
||||
GTEST_SRCS := $(addprefix third_party/googletest/src/,$(call enabled,GTEST_SRCS))
|
||||
GTEST_OBJS=$(call objs,$(GTEST_SRCS))
|
||||
ifeq ($(filter win%,$(TGT_OS)),$(TGT_OS))
|
||||
# Disabling pthreads globally will cause issues on darwin and possibly elsewhere
|
||||
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -DGTEST_HAS_PTHREAD=0
|
||||
endif
|
||||
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
|
||||
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
|
||||
OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS)
|
||||
@@ -483,7 +467,7 @@ $(foreach bin,$(LIBVPX_TEST_BINS),\
|
||||
lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\
|
||||
$(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\
|
||||
$(LIBVPX_TEST_OBJS) \
|
||||
-L. -lvpx -lgtest $(extralibs) -lm)\
|
||||
-L. -lvpx -lgtest -lpthread -lm)\
|
||||
)))\
|
||||
$(if $(LIPO_LIBS),$(eval $(call lipo_bin_template,$(bin))))\
|
||||
|
||||
|
@@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef TEST_ACM_RANDOM_H_
|
||||
#define TEST_ACM_RANDOM_H_
|
||||
#ifndef LIBVPX_TEST_ACM_RANDOM_H_
|
||||
#define LIBVPX_TEST_ACM_RANDOM_H_
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
@@ -59,4 +59,4 @@ class ACMRandom {
|
||||
|
||||
} // namespace libvpx_test
|
||||
|
||||
#endif // TEST_ACM_RANDOM_H_
|
||||
#endif // LIBVPX_TEST_ACM_RANDOM_H_
|
||||
|
@@ -33,6 +33,10 @@ class AltRefTest : public ::libvpx_test::EncoderTest,
|
||||
altref_count_ = 0;
|
||||
}
|
||||
|
||||
virtual bool Continue() const {
|
||||
return !HasFatalFailure() && !abort_;
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
|
||||
libvpx_test::Encoder *encoder) {
|
||||
if (video->frame() == 1) {
|
||||
|
@@ -27,10 +27,14 @@ class BordersTest : public ::libvpx_test::EncoderTest,
|
||||
SetMode(GET_PARAM(1));
|
||||
}
|
||||
|
||||
virtual bool Continue() const {
|
||||
return !HasFatalFailure() && !abort_;
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
|
||||
::libvpx_test::Encoder *encoder) {
|
||||
if (video->frame() == 1) {
|
||||
encoder->Control(VP8E_SET_CPUUSED, 1);
|
||||
if ( video->frame() == 1) {
|
||||
encoder->Control(VP8E_SET_CPUUSED, 0);
|
||||
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
|
||||
encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
|
||||
encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
|
||||
|
@@ -10,7 +10,7 @@
|
||||
#ifndef TEST_CLEAR_SYSTEM_STATE_H_
|
||||
#define TEST_CLEAR_SYSTEM_STATE_H_
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_config.h"
|
||||
extern "C" {
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
# include "vpx_ports/x86.h"
|
||||
|
@@ -134,14 +134,14 @@ class VP8CodecFactory : public CodecFactory {
|
||||
|
||||
const libvpx_test::VP8CodecFactory kVP8;
|
||||
|
||||
#define VP8_INSTANTIATE_TEST_CASE(test, ...)\
|
||||
#define VP8_INSTANTIATE_TEST_CASE(test, params)\
|
||||
INSTANTIATE_TEST_CASE_P(VP8, test, \
|
||||
::testing::Combine( \
|
||||
::testing::Values(static_cast<const libvpx_test::CodecFactory*>( \
|
||||
&libvpx_test::kVP8)), \
|
||||
__VA_ARGS__))
|
||||
params))
|
||||
#else
|
||||
#define VP8_INSTANTIATE_TEST_CASE(test, ...)
|
||||
#define VP8_INSTANTIATE_TEST_CASE(test, params)
|
||||
#endif // CONFIG_VP8
|
||||
|
||||
|
||||
@@ -216,14 +216,14 @@ class VP9CodecFactory : public CodecFactory {
|
||||
|
||||
const libvpx_test::VP9CodecFactory kVP9;
|
||||
|
||||
#define VP9_INSTANTIATE_TEST_CASE(test, ...)\
|
||||
#define VP9_INSTANTIATE_TEST_CASE(test, params)\
|
||||
INSTANTIATE_TEST_CASE_P(VP9, test, \
|
||||
::testing::Combine( \
|
||||
::testing::Values(static_cast<const libvpx_test::CodecFactory*>( \
|
||||
&libvpx_test::kVP9)), \
|
||||
__VA_ARGS__))
|
||||
params))
|
||||
#else
|
||||
#define VP9_INSTANTIATE_TEST_CASE(test, ...)
|
||||
#define VP9_INSTANTIATE_TEST_CASE(test, params)
|
||||
#endif // CONFIG_VP9
|
||||
|
||||
|
||||
|
@@ -40,6 +40,10 @@ class ConfigTest : public ::libvpx_test::EncoderTest,
|
||||
++frame_count_out_;
|
||||
}
|
||||
|
||||
virtual bool Continue() const {
|
||||
return !HasFatalFailure() && !abort_;
|
||||
}
|
||||
|
||||
unsigned int frame_count_in_;
|
||||
unsigned int frame_count_out_;
|
||||
unsigned int frame_count_max_;
|
||||
|
@@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "test/acm_random.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
@@ -23,8 +22,8 @@ extern "C" {
|
||||
}
|
||||
|
||||
namespace {
|
||||
typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
typedef void (*convolve_fn_t)(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride,
|
||||
const int16_t *filter_x, int filter_x_stride,
|
||||
const int16_t *filter_y, int filter_y_stride,
|
||||
int w, int h);
|
||||
@@ -188,7 +187,7 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
|
||||
|
||||
protected:
|
||||
static const int kDataAlignment = 16;
|
||||
static const int kOuterBlockSize = 256;
|
||||
static const int kOuterBlockSize = 128;
|
||||
static const int kInputStride = kOuterBlockSize;
|
||||
static const int kOutputStride = kOuterBlockSize;
|
||||
static const int kMaxDimension = 64;
|
||||
@@ -212,7 +211,7 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
|
||||
|
||||
virtual void SetUp() {
|
||||
UUT_ = GET_PARAM(2);
|
||||
/* Set up guard blocks for an inner block centered in the outer block */
|
||||
/* Set up guard blocks for an inner block cetered in the outer block */
|
||||
for (int i = 0; i < kOutputBufferSize; ++i) {
|
||||
if (IsIndexInBorder(i))
|
||||
output_[i] = 255;
|
||||
@@ -225,10 +224,6 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
|
||||
input_[i] = prng.Rand8Extremes();
|
||||
}
|
||||
|
||||
void SetConstantInput(int value) {
|
||||
memset(input_, value, kInputBufferSize);
|
||||
}
|
||||
|
||||
void CheckGuardBlocks() {
|
||||
for (int i = 0; i < kOutputBufferSize; ++i) {
|
||||
if (IsIndexInBorder(i))
|
||||
@@ -461,86 +456,45 @@ DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = {
|
||||
{ 128}
|
||||
};
|
||||
|
||||
/* This test exercises the horizontal and vertical filter functions. */
|
||||
TEST_P(ConvolveTest, ChangeFilterWorks) {
|
||||
uint8_t* const in = input();
|
||||
uint8_t* const out = output();
|
||||
|
||||
/* Assume that the first input sample is at the 8/16th position. */
|
||||
const int kInitialSubPelOffset = 8;
|
||||
|
||||
/* Filters are 8-tap, so the first filter tap will be applied to the pixel
|
||||
* at position -3 with respect to the current filtering position. Since
|
||||
* kInitialSubPelOffset is set to 8, we first select sub-pixel filter 8,
|
||||
* which is non-zero only in the last tap. So, applying the filter at the
|
||||
* current input position will result in an output equal to the pixel at
|
||||
* offset +4 (-3 + 7) with respect to the current filtering position.
|
||||
*/
|
||||
const int kPixelSelected = 4;
|
||||
|
||||
/* Assume that each output pixel requires us to step on by 17/16th pixels in
|
||||
* the input.
|
||||
*/
|
||||
const int kInputPixelStep = 17;
|
||||
|
||||
/* The filters are setup in such a way that the expected output produces
|
||||
* sets of 8 identical output samples. As the filter position moves to the
|
||||
* next 1/16th pixel position the only active (=128) filter tap moves one
|
||||
* position to the left, resulting in the same input pixel being replicated
|
||||
* in to the output for 8 consecutive samples. After each set of 8 positions
|
||||
* the filters select a different input pixel. kFilterPeriodAdjust below
|
||||
* computes which input pixel is written to the output for a specified
|
||||
* x or y position.
|
||||
*/
|
||||
|
||||
/* Test the horizontal filter. */
|
||||
REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride,
|
||||
kChangeFilters[kInitialSubPelOffset],
|
||||
kInputPixelStep, NULL, 0, Width(), Height()));
|
||||
kChangeFilters[8], 17, kChangeFilters[4], 16,
|
||||
Width(), Height()));
|
||||
|
||||
for (int x = 0; x < Width(); ++x) {
|
||||
const int kQ4StepAdjust = x >> 4;
|
||||
const int kFilterPeriodAdjust = (x >> 3) << 3;
|
||||
const int ref_x =
|
||||
kPixelSelected + ((kInitialSubPelOffset
|
||||
+ kFilterPeriodAdjust * kInputPixelStep)
|
||||
>> SUBPEL_BITS);
|
||||
ASSERT_EQ(in[ref_x], out[x]) << "x == " << x << "width = " << Width();
|
||||
const int ref_x = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;
|
||||
ASSERT_EQ(in[ref_x], out[x]) << "x == " << x;
|
||||
}
|
||||
|
||||
/* Test the vertical filter. */
|
||||
REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride,
|
||||
NULL, 0, kChangeFilters[kInitialSubPelOffset],
|
||||
kInputPixelStep, Width(), Height()));
|
||||
kChangeFilters[4], 16, kChangeFilters[8], 17,
|
||||
Width(), Height()));
|
||||
|
||||
for (int y = 0; y < Height(); ++y) {
|
||||
const int kQ4StepAdjust = y >> 4;
|
||||
const int kFilterPeriodAdjust = (y >> 3) << 3;
|
||||
const int ref_y =
|
||||
kPixelSelected + ((kInitialSubPelOffset
|
||||
+ kFilterPeriodAdjust * kInputPixelStep)
|
||||
>> SUBPEL_BITS);
|
||||
const int ref_y = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;
|
||||
ASSERT_EQ(in[ref_y * kInputStride], out[y * kInputStride]) << "y == " << y;
|
||||
}
|
||||
|
||||
/* Test the horizontal and vertical filters in combination. */
|
||||
REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
|
||||
kChangeFilters[kInitialSubPelOffset],
|
||||
kInputPixelStep,
|
||||
kChangeFilters[kInitialSubPelOffset],
|
||||
kInputPixelStep,
|
||||
kChangeFilters[8], 17, kChangeFilters[8], 17,
|
||||
Width(), Height()));
|
||||
|
||||
for (int y = 0; y < Height(); ++y) {
|
||||
const int kQ4StepAdjustY = y >> 4;
|
||||
const int kFilterPeriodAdjustY = (y >> 3) << 3;
|
||||
const int ref_y =
|
||||
kPixelSelected + ((kInitialSubPelOffset
|
||||
+ kFilterPeriodAdjustY * kInputPixelStep)
|
||||
>> SUBPEL_BITS);
|
||||
const int ref_y = kQ4StepAdjustY + kFilterPeriodAdjustY + kPixelSelected;
|
||||
for (int x = 0; x < Width(); ++x) {
|
||||
const int kQ4StepAdjustX = x >> 4;
|
||||
const int kFilterPeriodAdjustX = (x >> 3) << 3;
|
||||
const int ref_x =
|
||||
kPixelSelected + ((kInitialSubPelOffset
|
||||
+ kFilterPeriodAdjustX * kInputPixelStep)
|
||||
>> SUBPEL_BITS);
|
||||
const int ref_x = kQ4StepAdjustX + kFilterPeriodAdjustX + kPixelSelected;
|
||||
|
||||
ASSERT_EQ(in[ref_y * kInputStride + ref_x], out[y * kOutputStride + x])
|
||||
<< "x == " << x << ", y == " << y;
|
||||
@@ -548,34 +502,6 @@ TEST_P(ConvolveTest, ChangeFilterWorks) {
|
||||
}
|
||||
}
|
||||
|
||||
/* This test exercises that enough rows and columns are filtered with every
|
||||
possible initial fractional positions and scaling steps. */
|
||||
TEST_P(ConvolveTest, CheckScalingFiltering) {
|
||||
uint8_t* const in = input();
|
||||
uint8_t* const out = output();
|
||||
|
||||
SetConstantInput(127);
|
||||
|
||||
for (int frac = 0; frac < 16; ++frac) {
|
||||
for (int step = 1; step <= 32; ++step) {
|
||||
/* Test the horizontal and vertical filters in combination. */
|
||||
REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
|
||||
vp9_sub_pel_filters_8[frac], step,
|
||||
vp9_sub_pel_filters_8[frac], step,
|
||||
Width(), Height()));
|
||||
|
||||
CheckGuardBlocks();
|
||||
|
||||
for (int y = 0; y < Height(); ++y) {
|
||||
for (int x = 0; x < Width(); ++x) {
|
||||
ASSERT_EQ(in[y * kInputStride + x], out[y * kOutputStride + x])
|
||||
<< "x == " << x << ", y == " << y
|
||||
<< ", frac == " << frac << ", step == " << step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
@@ -601,9 +527,9 @@ INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
|
||||
|
||||
#if HAVE_SSSE3
|
||||
const ConvolveFunctions convolve8_ssse3(
|
||||
vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3,
|
||||
vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3,
|
||||
vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3);
|
||||
vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_c,
|
||||
vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_c,
|
||||
vp9_convolve8_ssse3, vp9_convolve8_avg_c);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
|
||||
make_tuple(4, 4, &convolve8_ssse3),
|
||||
@@ -620,26 +546,4 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
|
||||
make_tuple(32, 64, &convolve8_ssse3),
|
||||
make_tuple(64, 64, &convolve8_ssse3)));
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
const ConvolveFunctions convolve8_neon(
|
||||
vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
|
||||
vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
|
||||
vp9_convolve8_neon, vp9_convolve8_avg_neon);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
|
||||
make_tuple(4, 4, &convolve8_neon),
|
||||
make_tuple(8, 4, &convolve8_neon),
|
||||
make_tuple(4, 8, &convolve8_neon),
|
||||
make_tuple(8, 8, &convolve8_neon),
|
||||
make_tuple(16, 8, &convolve8_neon),
|
||||
make_tuple(8, 16, &convolve8_neon),
|
||||
make_tuple(16, 16, &convolve8_neon),
|
||||
make_tuple(32, 16, &convolve8_neon),
|
||||
make_tuple(16, 32, &convolve8_neon),
|
||||
make_tuple(32, 32, &convolve8_neon),
|
||||
make_tuple(64, 32, &convolve8_neon),
|
||||
make_tuple(32, 64, &convolve8_neon),
|
||||
make_tuple(64, 64, &convolve8_neon)));
|
||||
#endif
|
||||
} // namespace
|
||||
|
@@ -1,112 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include <climits>
|
||||
#include <vector>
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/codec_factory.h"
|
||||
#include "test/encode_test_driver.h"
|
||||
#include "test/i420_video_source.h"
|
||||
#include "test/util.h"
|
||||
|
||||
namespace {
|
||||
|
||||
class CpuSpeedTest : public ::libvpx_test::EncoderTest,
|
||||
public ::libvpx_test::CodecTestWith2Params<
|
||||
libvpx_test::TestMode, int> {
|
||||
protected:
|
||||
CpuSpeedTest() : EncoderTest(GET_PARAM(0)) {}
|
||||
|
||||
virtual void SetUp() {
|
||||
InitializeConfig();
|
||||
SetMode(GET_PARAM(1));
|
||||
set_cpu_used_ = GET_PARAM(2);
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
|
||||
::libvpx_test::Encoder *encoder) {
|
||||
if (video->frame() == 1) {
|
||||
encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
|
||||
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
|
||||
encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
|
||||
encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
|
||||
encoder->Control(VP8E_SET_ARNR_TYPE, 3);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
|
||||
}
|
||||
}
|
||||
int set_cpu_used_;
|
||||
};
|
||||
|
||||
TEST_P(CpuSpeedTest, TestQ0) {
|
||||
// Validate that this non multiple of 64 wide clip encodes and decodes
|
||||
// without a mismatch when passing in a very low max q. This pushes
|
||||
// the encoder to producing lots of big partitions which will likely
|
||||
// extend into the border and test the border condition.
|
||||
cfg_.g_lag_in_frames = 25;
|
||||
cfg_.rc_2pass_vbr_minsection_pct = 5;
|
||||
cfg_.rc_2pass_vbr_minsection_pct = 2000;
|
||||
cfg_.rc_target_bitrate = 400;
|
||||
cfg_.rc_max_quantizer = 0;
|
||||
cfg_.rc_min_quantizer = 0;
|
||||
|
||||
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
|
||||
20);
|
||||
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
}
|
||||
|
||||
|
||||
TEST_P(CpuSpeedTest, TestEncodeHighBitrate) {
|
||||
// Validate that this non multiple of 64 wide clip encodes and decodes
|
||||
// without a mismatch when passing in a very low max q. This pushes
|
||||
// the encoder to producing lots of big partitions which will likely
|
||||
// extend into the border and test the border condition.
|
||||
cfg_.g_lag_in_frames = 25;
|
||||
cfg_.rc_2pass_vbr_minsection_pct = 5;
|
||||
cfg_.rc_2pass_vbr_minsection_pct = 2000;
|
||||
cfg_.rc_target_bitrate = 12000;
|
||||
cfg_.rc_max_quantizer = 10;
|
||||
cfg_.rc_min_quantizer = 0;
|
||||
|
||||
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
|
||||
40);
|
||||
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
}
|
||||
TEST_P(CpuSpeedTest, TestLowBitrate) {
|
||||
// Validate that this clip encodes and decodes without a mismatch
|
||||
// when passing in a very high min q. This pushes the encoder to producing
|
||||
// lots of small partitions which might will test the other condition.
|
||||
|
||||
cfg_.g_lag_in_frames = 25;
|
||||
cfg_.rc_2pass_vbr_minsection_pct = 5;
|
||||
cfg_.rc_2pass_vbr_minsection_pct = 2000;
|
||||
cfg_.rc_target_bitrate = 200;
|
||||
cfg_.rc_min_quantizer = 40;
|
||||
|
||||
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
|
||||
40);
|
||||
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
}
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
#define VP9_FACTORY \
|
||||
static_cast<const libvpx_test::CodecFactory*> (&libvpx_test::kVP9)
|
||||
|
||||
VP9_INSTANTIATE_TEST_CASE(
|
||||
CpuSpeedTest,
|
||||
::testing::Values(::libvpx_test::kTwoPassGood),
|
||||
::testing::Range(0, 5));
|
||||
} // namespace
|
@@ -42,6 +42,10 @@ class CQTest : public ::libvpx_test::EncoderTest,
|
||||
n_frames_ = 0;
|
||||
}
|
||||
|
||||
virtual bool Continue() const {
|
||||
return !HasFatalFailure() && !abort_;
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
|
||||
libvpx_test::Encoder *encoder) {
|
||||
if (video->frame() == 1) {
|
||||
|
@@ -36,6 +36,10 @@ class DatarateTest : public ::libvpx_test::EncoderTest,
|
||||
duration_ = 0.0;
|
||||
}
|
||||
|
||||
virtual bool Continue() const {
|
||||
return !HasFatalFailure() && !abort_;
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
|
||||
::libvpx_test::Encoder *encoder) {
|
||||
const vpx_rational_t tb = video->timebase();
|
||||
@@ -75,7 +79,7 @@ class DatarateTest : public ::libvpx_test::EncoderTest,
|
||||
bits_in_buffer_model_ -= frame_size_in_bits;
|
||||
|
||||
// Update the running total of bits for end of test datarate checks.
|
||||
bits_total_ += frame_size_in_bits;
|
||||
bits_total_ += frame_size_in_bits ;
|
||||
|
||||
// If first drop not set and we have a drop set it to this time.
|
||||
if (!first_drop_ && duration > 1)
|
||||
|
@@ -13,16 +13,14 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
|
||||
extern "C" {
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *output, int pitch);
|
||||
#include "vp9_rtcd.h"
|
||||
void vp9_short_idct16x16_add_c(short *input, uint8_t *output, int pitch);
|
||||
}
|
||||
|
||||
#include "acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
@@ -32,13 +30,12 @@ namespace {
|
||||
#ifdef _MSC_VER
|
||||
static int round(double x) {
|
||||
if (x < 0)
|
||||
return static_cast<int>(ceil(x - 0.5));
|
||||
return (int)ceil(x - 0.5);
|
||||
else
|
||||
return static_cast<int>(floor(x + 0.5));
|
||||
return (int)floor(x + 0.5);
|
||||
}
|
||||
#endif
|
||||
|
||||
const int kNumCoeffs = 256;
|
||||
const double PI = 3.1415926535898;
|
||||
void reference2_16x16_idct_2d(double *input, double *output) {
|
||||
double x;
|
||||
@@ -47,9 +44,7 @@ void reference2_16x16_idct_2d(double *input, double *output) {
|
||||
double s = 0;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
for (int j = 0; j < 16; ++j) {
|
||||
x = cos(PI * j * (l + 0.5) / 16.0) *
|
||||
cos(PI * i * (k + 0.5) / 16.0) *
|
||||
input[i * 16 + j] / 256;
|
||||
x=cos(PI*j*(l+0.5)/16.0)*cos(PI*i*(k+0.5)/16.0)*input[i*16+j]/256;
|
||||
if (i != 0)
|
||||
x *= sqrt(2.0);
|
||||
if (j != 0)
|
||||
@@ -63,23 +58,23 @@ void reference2_16x16_idct_2d(double *input, double *output) {
|
||||
}
|
||||
|
||||
|
||||
const double C1 = 0.995184726672197;
|
||||
const double C2 = 0.98078528040323;
|
||||
const double C3 = 0.956940335732209;
|
||||
const double C4 = 0.923879532511287;
|
||||
const double C5 = 0.881921264348355;
|
||||
const double C6 = 0.831469612302545;
|
||||
const double C7 = 0.773010453362737;
|
||||
const double C8 = 0.707106781186548;
|
||||
const double C9 = 0.634393284163646;
|
||||
const double C10 = 0.555570233019602;
|
||||
const double C11 = 0.471396736825998;
|
||||
const double C12 = 0.38268343236509;
|
||||
const double C13 = 0.290284677254462;
|
||||
const double C14 = 0.195090322016128;
|
||||
const double C15 = 0.098017140329561;
|
||||
static const double C1 = 0.995184726672197;
|
||||
static const double C2 = 0.98078528040323;
|
||||
static const double C3 = 0.956940335732209;
|
||||
static const double C4 = 0.923879532511287;
|
||||
static const double C5 = 0.881921264348355;
|
||||
static const double C6 = 0.831469612302545;
|
||||
static const double C7 = 0.773010453362737;
|
||||
static const double C8 = 0.707106781186548;
|
||||
static const double C9 = 0.634393284163646;
|
||||
static const double C10 = 0.555570233019602;
|
||||
static const double C11 = 0.471396736825998;
|
||||
static const double C12 = 0.38268343236509;
|
||||
static const double C13 = 0.290284677254462;
|
||||
static const double C14 = 0.195090322016128;
|
||||
static const double C15 = 0.098017140329561;
|
||||
|
||||
void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
static void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
double step[16];
|
||||
double intermediate[16];
|
||||
double temp1, temp2;
|
||||
@@ -112,36 +107,36 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
output[6] = step[1] - step[6];
|
||||
output[7] = step[0] - step[7];
|
||||
|
||||
temp1 = step[ 8] * C7;
|
||||
temp2 = step[15] * C9;
|
||||
temp1 = step[ 8]*C7;
|
||||
temp2 = step[15]*C9;
|
||||
output[ 8] = temp1 + temp2;
|
||||
|
||||
temp1 = step[ 9] * C11;
|
||||
temp2 = step[14] * C5;
|
||||
temp1 = step[ 9]*C11;
|
||||
temp2 = step[14]*C5;
|
||||
output[ 9] = temp1 - temp2;
|
||||
|
||||
temp1 = step[10] * C3;
|
||||
temp2 = step[13] * C13;
|
||||
temp1 = step[10]*C3;
|
||||
temp2 = step[13]*C13;
|
||||
output[10] = temp1 + temp2;
|
||||
|
||||
temp1 = step[11] * C15;
|
||||
temp2 = step[12] * C1;
|
||||
temp1 = step[11]*C15;
|
||||
temp2 = step[12]*C1;
|
||||
output[11] = temp1 - temp2;
|
||||
|
||||
temp1 = step[11] * C1;
|
||||
temp2 = step[12] * C15;
|
||||
temp1 = step[11]*C1;
|
||||
temp2 = step[12]*C15;
|
||||
output[12] = temp2 + temp1;
|
||||
|
||||
temp1 = step[10] * C13;
|
||||
temp2 = step[13] * C3;
|
||||
temp1 = step[10]*C13;
|
||||
temp2 = step[13]*C3;
|
||||
output[13] = temp2 - temp1;
|
||||
|
||||
temp1 = step[ 9] * C5;
|
||||
temp2 = step[14] * C11;
|
||||
temp1 = step[ 9]*C5;
|
||||
temp2 = step[14]*C11;
|
||||
output[14] = temp2 + temp1;
|
||||
|
||||
temp1 = step[ 8] * C9;
|
||||
temp2 = step[15] * C7;
|
||||
temp1 = step[ 8]*C9;
|
||||
temp2 = step[15]*C7;
|
||||
output[15] = temp2 - temp1;
|
||||
|
||||
// step 3
|
||||
@@ -150,20 +145,20 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
step[ 2] = output[1] - output[2];
|
||||
step[ 3] = output[0] - output[3];
|
||||
|
||||
temp1 = output[4] * C14;
|
||||
temp2 = output[7] * C2;
|
||||
temp1 = output[4]*C14;
|
||||
temp2 = output[7]*C2;
|
||||
step[ 4] = temp1 + temp2;
|
||||
|
||||
temp1 = output[5] * C10;
|
||||
temp2 = output[6] * C6;
|
||||
temp1 = output[5]*C10;
|
||||
temp2 = output[6]*C6;
|
||||
step[ 5] = temp1 + temp2;
|
||||
|
||||
temp1 = output[5] * C6;
|
||||
temp2 = output[6] * C10;
|
||||
temp1 = output[5]*C6;
|
||||
temp2 = output[6]*C10;
|
||||
step[ 6] = temp2 - temp1;
|
||||
|
||||
temp1 = output[4] * C2;
|
||||
temp2 = output[7] * C14;
|
||||
temp1 = output[4]*C2;
|
||||
temp2 = output[7]*C14;
|
||||
step[ 7] = temp2 - temp1;
|
||||
|
||||
step[ 8] = output[ 8] + output[11];
|
||||
@@ -180,18 +175,18 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
output[ 0] = (step[ 0] + step[ 1]);
|
||||
output[ 8] = (step[ 0] - step[ 1]);
|
||||
|
||||
temp1 = step[2] * C12;
|
||||
temp2 = step[3] * C4;
|
||||
temp1 = step[2]*C12;
|
||||
temp2 = step[3]*C4;
|
||||
temp1 = temp1 + temp2;
|
||||
output[ 4] = 2*(temp1 * C8);
|
||||
output[ 4] = 2*(temp1*C8);
|
||||
|
||||
temp1 = step[2] * C4;
|
||||
temp2 = step[3] * C12;
|
||||
temp1 = step[2]*C4;
|
||||
temp2 = step[3]*C12;
|
||||
temp1 = temp2 - temp1;
|
||||
output[12] = 2 * (temp1 * C8);
|
||||
output[12] = 2*(temp1*C8);
|
||||
|
||||
output[ 2] = 2 * ((step[4] + step[ 5]) * C8);
|
||||
output[14] = 2 * ((step[7] - step[ 6]) * C8);
|
||||
output[ 2] = 2*((step[4] + step[ 5])*C8);
|
||||
output[14] = 2*((step[7] - step[ 6])*C8);
|
||||
|
||||
temp1 = step[4] - step[5];
|
||||
temp2 = step[6] + step[7];
|
||||
@@ -201,17 +196,17 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
intermediate[8] = step[8] + step[14];
|
||||
intermediate[9] = step[9] + step[15];
|
||||
|
||||
temp1 = intermediate[8] * C12;
|
||||
temp2 = intermediate[9] * C4;
|
||||
temp1 = intermediate[8]*C12;
|
||||
temp2 = intermediate[9]*C4;
|
||||
temp1 = temp1 - temp2;
|
||||
output[3] = 2 * (temp1 * C8);
|
||||
output[3] = 2*(temp1*C8);
|
||||
|
||||
temp1 = intermediate[8] * C4;
|
||||
temp2 = intermediate[9] * C12;
|
||||
temp1 = intermediate[8]*C4;
|
||||
temp2 = intermediate[9]*C12;
|
||||
temp1 = temp2 + temp1;
|
||||
output[13] = 2 * (temp1 * C8);
|
||||
output[13] = 2*(temp1*C8);
|
||||
|
||||
output[ 9] = 2 * ((step[10] + step[11]) * C8);
|
||||
output[ 9] = 2*((step[10] + step[11])*C8);
|
||||
|
||||
intermediate[11] = step[10] - step[11];
|
||||
intermediate[12] = step[12] + step[13];
|
||||
@@ -222,300 +217,150 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
|
||||
output[15] = (intermediate[11] + intermediate[12]);
|
||||
output[ 1] = -(intermediate[11] - intermediate[12]);
|
||||
|
||||
output[ 7] = 2 * (intermediate[13] * C8);
|
||||
output[ 7] = 2*(intermediate[13]*C8);
|
||||
|
||||
temp1 = intermediate[14] * C12;
|
||||
temp2 = intermediate[15] * C4;
|
||||
temp1 = intermediate[14]*C12;
|
||||
temp2 = intermediate[15]*C4;
|
||||
temp1 = temp1 - temp2;
|
||||
output[11] = -2 * (temp1 * C8);
|
||||
output[11] = -2*(temp1*C8);
|
||||
|
||||
temp1 = intermediate[14] * C4;
|
||||
temp2 = intermediate[15] * C12;
|
||||
temp1 = intermediate[14]*C4;
|
||||
temp2 = intermediate[15]*C12;
|
||||
temp1 = temp2 + temp1;
|
||||
output[ 5] = 2 * (temp1 * C8);
|
||||
output[ 5] = 2*(temp1*C8);
|
||||
}
|
||||
|
||||
void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
|
||||
static void reference_16x16_dct_1d(double in[16], double out[16]) {
|
||||
const double kPi = 3.141592653589793238462643383279502884;
|
||||
const double kInvSqrt2 = 0.707106781186547524400844362104;
|
||||
for (int k = 0; k < 16; k++) {
|
||||
out[k] = 0.0;
|
||||
for (int n = 0; n < 16; n++)
|
||||
out[k] += in[n]*cos(kPi*(2*n+1)*k/32.0);
|
||||
if (k == 0)
|
||||
out[k] = out[k]*kInvSqrt2;
|
||||
}
|
||||
}
|
||||
|
||||
void reference_16x16_dct_2d(int16_t input[16*16], double output[16*16]) {
|
||||
// First transform columns
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
double temp_in[16], temp_out[16];
|
||||
for (int j = 0; j < 16; ++j)
|
||||
temp_in[j] = input[j * 16 + i];
|
||||
temp_in[j] = input[j*16 + i];
|
||||
butterfly_16x16_dct_1d(temp_in, temp_out);
|
||||
for (int j = 0; j < 16; ++j)
|
||||
output[j * 16 + i] = temp_out[j];
|
||||
output[j*16 + i] = temp_out[j];
|
||||
}
|
||||
// Then transform rows
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
double temp_in[16], temp_out[16];
|
||||
for (int j = 0; j < 16; ++j)
|
||||
temp_in[j] = output[j + i * 16];
|
||||
temp_in[j] = output[j + i*16];
|
||||
butterfly_16x16_dct_1d(temp_in, temp_out);
|
||||
// Scale by some magic number
|
||||
for (int j = 0; j < 16; ++j)
|
||||
output[j + i * 16] = temp_out[j]/2;
|
||||
output[j + i*16] = temp_out[j]/2;
|
||||
}
|
||||
}
|
||||
|
||||
typedef void (*fdct_t)(int16_t *in, int16_t *out, int stride);
|
||||
typedef void (*idct_t)(int16_t *in, uint8_t *out, int stride);
|
||||
typedef void (*fht_t) (int16_t *in, int16_t *out, int stride, int tx_type);
|
||||
typedef void (*iht_t) (int16_t *in, uint8_t *dst, int stride, int tx_type);
|
||||
|
||||
void fdct16x16_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
|
||||
vp9_short_fdct16x16_c(in, out, stride);
|
||||
}
|
||||
TEST(VP9Idct16x16Test, AccuracyCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int16_t in[256], coeff[256];
|
||||
uint8_t dst[256], src[256];
|
||||
double out_r[256];
|
||||
|
||||
void fht16x16_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
|
||||
vp9_short_fht16x16_c(in, out, stride, tx_type);
|
||||
}
|
||||
|
||||
class Trans16x16TestBase {
|
||||
public:
|
||||
virtual ~Trans16x16TestBase() {}
|
||||
|
||||
protected:
|
||||
virtual void RunFwdTxfm(int16_t *in, int16_t *out, int stride) = 0;
|
||||
|
||||
virtual void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) = 0;
|
||||
|
||||
void RunAccuracyCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
uint32_t max_error = 0;
|
||||
int64_t total_error = 0;
|
||||
const int count_test_block = 10000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
|
||||
test_temp_block, pitch_));
|
||||
REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
|
||||
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
const uint32_t diff = dst[j] - src[j];
|
||||
const uint32_t error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
}
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 256; ++j)
|
||||
in[j] = src[j] - dst[j];
|
||||
|
||||
EXPECT_GE(1u, max_error)
|
||||
<< "Error: 16x16 FHT/IHT has an individual round trip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block , total_error)
|
||||
<< "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
|
||||
reference_16x16_dct_2d(in, out_r);
|
||||
for (int j = 0; j < 256; j++)
|
||||
coeff[j] = round(out_r[j]);
|
||||
vp9_short_idct16x16_add_c(coeff, dst, 16);
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
EXPECT_GE(1, error)
|
||||
<< "Error: 16x16 IDCT has error " << error
|
||||
<< " at index " << j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RunCoeffCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
||||
// we need enable fdct test once we re-do the 16 point fdct.
|
||||
TEST(VP9Fdct16x16Test, AccuracyCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int max_error = 0;
|
||||
double total_error = 0;
|
||||
const int count_test_block = 1000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int16_t test_input_block[256];
|
||||
int16_t test_temp_block[256];
|
||||
uint8_t dst[256], src[256];
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
}
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 256; ++j)
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
|
||||
fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
|
||||
REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
|
||||
const int pitch = 32;
|
||||
vp9_short_fdct16x16_c(test_input_block, test_temp_block, pitch);
|
||||
vp9_short_idct16x16_add_c(test_temp_block, dst, 16);
|
||||
|
||||
// The minimum quant value is 4.
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
EXPECT_EQ(output_block[j], output_ref_block[j]);
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
}
|
||||
|
||||
void RunMemCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
||||
EXPECT_GE(1, max_error)
|
||||
<< "Error: 16x16 FDCT/IDCT has an individual round trip error > 1";
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
|
||||
}
|
||||
if (i == 0)
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_extreme_block[j] = 255;
|
||||
if (i == 1)
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_extreme_block[j] = -255;
|
||||
EXPECT_GE(count_test_block , total_error)
|
||||
<< "Error: 16x16 FDCT/IDCT has average round trip error > 1 per block";
|
||||
}
|
||||
|
||||
fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
|
||||
REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
|
||||
output_block, pitch_));
|
||||
TEST(VP9Fdct16x16Test, CoeffSizeCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int16_t input_block[256], input_extreme_block[256];
|
||||
int16_t output_block[256], output_extreme_block[256];
|
||||
|
||||
// The minimum quant value is 4.
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
EXPECT_EQ(output_block[j], output_ref_block[j]);
|
||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
|
||||
<< "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
|
||||
}
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
|
||||
}
|
||||
if (i == 0)
|
||||
for (int j = 0; j < 256; ++j)
|
||||
input_extreme_block[j] = 255;
|
||||
|
||||
const int pitch = 32;
|
||||
vp9_short_fdct16x16_c(input_block, output_block, pitch);
|
||||
vp9_short_fdct16x16_c(input_extreme_block, output_extreme_block, pitch);
|
||||
|
||||
// The minimum quant value is 4.
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
EXPECT_GE(4*DCT_MAX_VALUE, abs(output_block[j]))
|
||||
<< "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
|
||||
EXPECT_GE(4*DCT_MAX_VALUE, abs(output_extreme_block[j]))
|
||||
<< "Error: 16x16 FDCT extreme has coefficient larger than 4*DCT_MAX_VALUE";
|
||||
}
|
||||
}
|
||||
|
||||
void RunInvAccuracyCheck() {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
double out_r[kNumCoeffs];
|
||||
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
in[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
reference_16x16_dct_2d(in, out_r);
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
coeff[j] = round(out_r[j]);
|
||||
|
||||
const int pitch = 32;
|
||||
REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch));
|
||||
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
const uint32_t diff = dst[j] - src[j];
|
||||
const uint32_t error = diff * diff;
|
||||
EXPECT_GE(1u, error)
|
||||
<< "Error: 16x16 IDCT has error " << error
|
||||
<< " at index " << j;
|
||||
}
|
||||
}
|
||||
}
|
||||
int pitch_;
|
||||
int tx_type_;
|
||||
fht_t fwd_txfm_ref;
|
||||
};
|
||||
|
||||
class Trans16x16DCT : public Trans16x16TestBase,
|
||||
public PARAMS(fdct_t, idct_t, int) {
|
||||
public:
|
||||
virtual ~Trans16x16DCT() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
fwd_txfm_ = GET_PARAM(0);
|
||||
inv_txfm_ = GET_PARAM(1);
|
||||
tx_type_ = GET_PARAM(2);
|
||||
pitch_ = 32;
|
||||
fwd_txfm_ref = fdct16x16_ref;
|
||||
}
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
|
||||
fwd_txfm_(in, out, stride);
|
||||
}
|
||||
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
|
||||
inv_txfm_(out, dst, stride >> 1);
|
||||
}
|
||||
|
||||
fdct_t fwd_txfm_;
|
||||
idct_t inv_txfm_;
|
||||
};
|
||||
|
||||
TEST_P(Trans16x16DCT, AccuracyCheck) {
|
||||
RunAccuracyCheck();
|
||||
}
|
||||
|
||||
TEST_P(Trans16x16DCT, CoeffCheck) {
|
||||
RunCoeffCheck();
|
||||
}
|
||||
|
||||
TEST_P(Trans16x16DCT, MemCheck) {
|
||||
RunMemCheck();
|
||||
}
|
||||
|
||||
TEST_P(Trans16x16DCT, InvAccuracyCheck) {
|
||||
RunInvAccuracyCheck();
|
||||
}
|
||||
|
||||
class Trans16x16HT : public Trans16x16TestBase,
|
||||
public PARAMS(fht_t, iht_t, int) {
|
||||
public:
|
||||
virtual ~Trans16x16HT() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
fwd_txfm_ = GET_PARAM(0);
|
||||
inv_txfm_ = GET_PARAM(1);
|
||||
tx_type_ = GET_PARAM(2);
|
||||
pitch_ = 16;
|
||||
fwd_txfm_ref = fht16x16_ref;
|
||||
}
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
|
||||
fwd_txfm_(in, out, stride, tx_type_);
|
||||
}
|
||||
void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
|
||||
inv_txfm_(out, dst, stride, tx_type_);
|
||||
}
|
||||
|
||||
fht_t fwd_txfm_;
|
||||
iht_t inv_txfm_;
|
||||
};
|
||||
|
||||
TEST_P(Trans16x16HT, AccuracyCheck) {
|
||||
RunAccuracyCheck();
|
||||
}
|
||||
|
||||
TEST_P(Trans16x16HT, CoeffCheck) {
|
||||
RunCoeffCheck();
|
||||
}
|
||||
|
||||
TEST_P(Trans16x16HT, MemCheck) {
|
||||
RunMemCheck();
|
||||
}
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Trans16x16DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct16x16_c, &vp9_short_idct16x16_add_c, 0)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Trans16x16HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 0),
|
||||
make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 1),
|
||||
make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 2),
|
||||
make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 3)));
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans16x16DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct16x16_sse2, &vp9_short_idct16x16_add_c, 0)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans16x16HT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 0),
|
||||
make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 1),
|
||||
make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 2),
|
||||
make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 3)));
|
||||
#endif
|
||||
} // namespace
|
||||
|
@@ -13,17 +13,15 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
|
||||
extern "C" {
|
||||
#include "./vpx_config.h"
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch);
|
||||
void vp9_short_idct32x32_add_c(short *input, uint8_t *output, int pitch);
|
||||
}
|
||||
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
@@ -32,15 +30,35 @@ namespace {
|
||||
#ifdef _MSC_VER
|
||||
static int round(double x) {
|
||||
if (x < 0)
|
||||
return static_cast<int>(ceil(x - 0.5));
|
||||
return (int)ceil(x - 0.5);
|
||||
else
|
||||
return static_cast<int>(floor(x + 0.5));
|
||||
return (int)floor(x + 0.5);
|
||||
}
|
||||
#endif
|
||||
|
||||
const int kNumCoeffs = 1024;
|
||||
const double kPi = 3.141592653589793238462643383279502884;
|
||||
void reference_32x32_dct_1d(const double in[32], double out[32], int stride) {
|
||||
static const double kPi = 3.141592653589793238462643383279502884;
|
||||
static void reference2_32x32_idct_2d(double *input, double *output) {
|
||||
double x;
|
||||
for (int l = 0; l < 32; ++l) {
|
||||
for (int k = 0; k < 32; ++k) {
|
||||
double s = 0;
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
for (int j = 0; j < 32; ++j) {
|
||||
x = cos(kPi * j * (l + 0.5) / 32.0) *
|
||||
cos(kPi * i * (k + 0.5) / 32.0) * input[i * 32 + j] / 1024;
|
||||
if (i != 0)
|
||||
x *= sqrt(2.0);
|
||||
if (j != 0)
|
||||
x *= sqrt(2.0);
|
||||
s += x;
|
||||
}
|
||||
}
|
||||
output[k * 32 + l] = s / 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void reference_32x32_dct_1d(double in[32], double out[32], int stride) {
|
||||
const double kInvSqrt2 = 0.707106781186547524400844362104;
|
||||
for (int k = 0; k < 32; k++) {
|
||||
out[k] = 0.0;
|
||||
@@ -51,8 +69,7 @@ void reference_32x32_dct_1d(const double in[32], double out[32], int stride) {
|
||||
}
|
||||
}
|
||||
|
||||
void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
|
||||
double output[kNumCoeffs]) {
|
||||
static void reference_32x32_dct_2d(int16_t input[32*32], double output[32*32]) {
|
||||
// First transform columns
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
double temp_in[32], temp_out[32];
|
||||
@@ -74,165 +91,27 @@ void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
|
||||
}
|
||||
}
|
||||
|
||||
typedef void (*fwd_txfm_t)(int16_t *in, int16_t *out, int stride);
|
||||
typedef void (*inv_txfm_t)(int16_t *in, uint8_t *dst, int stride);
|
||||
|
||||
class Trans32x32Test : public PARAMS(fwd_txfm_t, inv_txfm_t, int) {
|
||||
public:
|
||||
virtual ~Trans32x32Test() {}
|
||||
virtual void SetUp() {
|
||||
fwd_txfm_ = GET_PARAM(0);
|
||||
inv_txfm_ = GET_PARAM(1);
|
||||
version_ = GET_PARAM(2); // 0: high precision forward transform
|
||||
// 1: low precision version for rd loop
|
||||
}
|
||||
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
protected:
|
||||
int version_;
|
||||
fwd_txfm_t fwd_txfm_;
|
||||
inv_txfm_t inv_txfm_;
|
||||
};
|
||||
|
||||
TEST_P(Trans32x32Test, AccuracyCheck) {
|
||||
TEST(VP9Idct32x32Test, AccuracyCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
uint32_t max_error = 0;
|
||||
int64_t total_error = 0;
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
int16_t in[1024], coeff[1024];
|
||||
uint8_t dst[1024], src[1024];
|
||||
double out_r[1024];
|
||||
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
const int pitch = 64;
|
||||
REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, pitch));
|
||||
REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
|
||||
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
const uint32_t diff = dst[j] - src[j];
|
||||
const uint32_t error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
}
|
||||
|
||||
if (version_ == 1) {
|
||||
max_error /= 2;
|
||||
total_error /= 45;
|
||||
}
|
||||
|
||||
EXPECT_GE(1u, max_error)
|
||||
<< "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block, total_error)
|
||||
<< "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
|
||||
}
|
||||
|
||||
TEST_P(Trans32x32Test, CoeffCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
|
||||
const int pitch = 64;
|
||||
vp9_short_fdct32x32_c(input_block, output_ref_block, pitch);
|
||||
REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, pitch));
|
||||
|
||||
if (version_ == 0) {
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
EXPECT_EQ(output_block[j], output_ref_block[j])
|
||||
<< "Error: 32x32 FDCT versions have mismatched coefficients";
|
||||
} else {
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
|
||||
<< "Error: 32x32 FDCT rd has mismatched coefficients";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(Trans32x32Test, MemCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 2000;
|
||||
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
input_extreme_block[j] = rnd.Rand8() & 1 ? 255 : -255;
|
||||
}
|
||||
if (i == 0)
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_extreme_block[j] = 255;
|
||||
if (i == 1)
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
input_extreme_block[j] = -255;
|
||||
|
||||
const int pitch = 64;
|
||||
vp9_short_fdct32x32_c(input_extreme_block, output_ref_block, pitch);
|
||||
REGISTER_STATE_CHECK(fwd_txfm_(input_extreme_block, output_block, pitch));
|
||||
|
||||
// The minimum quant value is 4.
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
if (version_ == 0) {
|
||||
EXPECT_EQ(output_block[j], output_ref_block[j])
|
||||
<< "Error: 32x32 FDCT versions have mismatched coefficients";
|
||||
} else {
|
||||
EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
|
||||
<< "Error: 32x32 FDCT rd has mismatched coefficients";
|
||||
}
|
||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_ref_block[j]))
|
||||
<< "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
|
||||
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
|
||||
<< "Error: 32x32 FDCT has coefficient larger than "
|
||||
<< "4*DCT_MAX_VALUE";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(Trans32x32Test, InverseAccuracy) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
double out_r[kNumCoeffs];
|
||||
|
||||
// Initialize a test block with input range [-255, 255]
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
for (int j = 0; j < 1024; ++j)
|
||||
in[j] = src[j] - dst[j];
|
||||
}
|
||||
|
||||
reference_32x32_dct_2d(in, out_r);
|
||||
for (int j = 0; j < kNumCoeffs; ++j)
|
||||
for (int j = 0; j < 1024; j++)
|
||||
coeff[j] = round(out_r[j]);
|
||||
REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
vp9_short_idct32x32_add_c(coeff, dst, 32);
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
const int error = diff * diff;
|
||||
EXPECT_GE(1, error)
|
||||
@@ -242,21 +121,72 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
|
||||
}
|
||||
}
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
TEST(VP9Fdct32x32Test, AccuracyCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
unsigned int max_error = 0;
|
||||
int64_t total_error = 0;
|
||||
const int count_test_block = 1000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int16_t test_input_block[1024];
|
||||
int16_t test_temp_block[1024];
|
||||
uint8_t dst[1024], src[1024];
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Trans32x32Test,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct32x32_c, &vp9_short_idct32x32_add_c, 0),
|
||||
make_tuple(&vp9_short_fdct32x32_rd_c, &vp9_short_idct32x32_add_c, 1)));
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
dst[j] = rnd.Rand8();
|
||||
}
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 1024; ++j)
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans32x32Test,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct32x32_sse2,
|
||||
&vp9_short_idct32x32_add_sse2, 0),
|
||||
make_tuple(&vp9_short_fdct32x32_rd_sse2,
|
||||
&vp9_short_idct32x32_add_sse2, 1)));
|
||||
#endif
|
||||
const int pitch = 64;
|
||||
vp9_short_fdct32x32_c(test_input_block, test_temp_block, pitch);
|
||||
vp9_short_idct32x32_add_c(test_temp_block, dst, 32);
|
||||
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
const unsigned diff = dst[j] - src[j];
|
||||
const unsigned error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_GE(1u, max_error)
|
||||
<< "Error: 32x32 FDCT/IDCT has an individual roundtrip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block, total_error)
|
||||
<< "Error: 32x32 FDCT/IDCT has average roundtrip error > 1 per block";
|
||||
}
|
||||
|
||||
TEST(VP9Fdct32x32Test, CoeffSizeCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
const int count_test_block = 1000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int16_t input_block[1024], input_extreme_block[1024];
|
||||
int16_t output_block[1024], output_extreme_block[1024];
|
||||
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
|
||||
}
|
||||
if (i == 0)
|
||||
for (int j = 0; j < 1024; ++j)
|
||||
input_extreme_block[j] = 255;
|
||||
|
||||
const int pitch = 64;
|
||||
vp9_short_fdct32x32_c(input_block, output_block, pitch);
|
||||
vp9_short_fdct32x32_c(input_extreme_block, output_extreme_block, pitch);
|
||||
|
||||
// The minimum quant value is 4.
|
||||
for (int j = 0; j < 1024; ++j) {
|
||||
EXPECT_GE(4*DCT_MAX_VALUE, abs(output_block[j]))
|
||||
<< "Error: 32x32 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
|
||||
EXPECT_GE(4*DCT_MAX_VALUE, abs(output_extreme_block[j]))
|
||||
<< "Error: 32x32 FDCT extreme has coefficient larger than "
|
||||
"4*DCT_MAX_VALUE";
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
@@ -12,7 +12,7 @@
|
||||
#define TEST_DECODE_TEST_DRIVER_H_
|
||||
#include <cstring>
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vpx/vpx_decoder.h"
|
||||
|
||||
namespace libvpx_test {
|
||||
@@ -36,8 +36,9 @@ class DxDataIterator {
|
||||
};
|
||||
|
||||
// Provides a simplified interface to manage one video decoding.
|
||||
// Similar to Encoder class, the exact services should be added
|
||||
// as more tests are added.
|
||||
//
|
||||
// TODO: similar to Encoder class, the exact services should be
|
||||
// added as more tests are added.
|
||||
class Decoder {
|
||||
public:
|
||||
Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
|
||||
|
@@ -8,7 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_config.h"
|
||||
#include "test/codec_factory.h"
|
||||
#include "test/encode_test_driver.h"
|
||||
#include "test/decode_test_driver.h"
|
||||
@@ -114,19 +114,19 @@ static bool compare_img(const vpx_image_t *img1,
|
||||
const unsigned int height_y = img1->d_h;
|
||||
unsigned int i;
|
||||
for (i = 0; i < height_y; ++i)
|
||||
match = (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
|
||||
img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
|
||||
width_y) == 0) && match;
|
||||
match = ( memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
|
||||
img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
|
||||
width_y) == 0) && match;
|
||||
const unsigned int width_uv = (img1->d_w + 1) >> 1;
|
||||
const unsigned int height_uv = (img1->d_h + 1) >> 1;
|
||||
for (i = 0; i < height_uv; ++i)
|
||||
match = (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
|
||||
img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
|
||||
width_uv) == 0) && match;
|
||||
match = ( memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
|
||||
img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
|
||||
width_uv) == 0) && match;
|
||||
for (i = 0; i < height_uv; ++i)
|
||||
match = (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
|
||||
img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
|
||||
width_uv) == 0) && match;
|
||||
match = ( memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
|
||||
img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
|
||||
width_uv) == 0) && match;
|
||||
return match;
|
||||
}
|
||||
|
||||
@@ -158,7 +158,7 @@ void EncoderTest::RunLoop(VideoSource *video) {
|
||||
Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
|
||||
bool again;
|
||||
for (again = true, video->Begin(); again; video->Next()) {
|
||||
again = (video->img() != NULL);
|
||||
again = video->img() != NULL;
|
||||
|
||||
PreEncodeFrameHook(video);
|
||||
PreEncodeFrameHook(video, encoder);
|
||||
|
@@ -190,9 +190,7 @@ class EncoderTest {
|
||||
virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {}
|
||||
|
||||
// Hook to determine whether the encode loop should continue.
|
||||
virtual bool Continue() const {
|
||||
return !(::testing::Test::HasFatalFailure() || abort_);
|
||||
}
|
||||
virtual bool Continue() const { return !abort_; }
|
||||
|
||||
const CodecFactory *codec_;
|
||||
// Hook to determine whether to decode frame after encoding
|
||||
|
@@ -50,6 +50,10 @@ class ErrorResilienceTest : public ::libvpx_test::EncoderTest,
|
||||
mismatch_nframes_ = 0;
|
||||
}
|
||||
|
||||
virtual bool Continue() const {
|
||||
return !HasFatalFailure() && !abort_;
|
||||
}
|
||||
|
||||
virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
psnr_ += pkt->data.psnr.psnr[0];
|
||||
nframes_++;
|
||||
@@ -62,7 +66,7 @@ class ErrorResilienceTest : public ::libvpx_test::EncoderTest,
|
||||
if (droppable_nframes_ > 0 &&
|
||||
(cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
|
||||
for (unsigned int i = 0; i < droppable_nframes_; ++i) {
|
||||
if (droppable_frames_[i] == video->frame()) {
|
||||
if (droppable_frames_[i] == nframes_) {
|
||||
std::cout << " Encoding droppable frame: "
|
||||
<< droppable_frames_[i] << "\n";
|
||||
frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST |
|
||||
@@ -148,7 +152,7 @@ TEST_P(ErrorResilienceTest, OnVersusOff) {
|
||||
const vpx_rational timebase = { 33333333, 1000000000 };
|
||||
cfg_.g_timebase = timebase;
|
||||
cfg_.rc_target_bitrate = 2000;
|
||||
cfg_.g_lag_in_frames = 10;
|
||||
cfg_.g_lag_in_frames = 25;
|
||||
|
||||
init_flags_ = VPX_CODEC_USE_PSNR;
|
||||
|
||||
@@ -179,9 +183,6 @@ TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) {
|
||||
const vpx_rational timebase = { 33333333, 1000000000 };
|
||||
cfg_.g_timebase = timebase;
|
||||
cfg_.rc_target_bitrate = 500;
|
||||
// FIXME(debargha): Fix this to work for any lag.
|
||||
// Currently this test only works for lag = 0
|
||||
cfg_.g_lag_in_frames = 0;
|
||||
|
||||
init_flags_ = VPX_CODEC_USE_PSNR;
|
||||
|
||||
|
@@ -15,69 +15,68 @@
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
extern "C" {
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9_rtcd.h"
|
||||
}
|
||||
|
||||
#include "test/acm_random.h"
|
||||
#include "acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
namespace {
|
||||
void fdct4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
||||
int stride, int /*tx_type*/) {
|
||||
void fdct4x4(int16_t *in, int16_t *out, uint8_t *dst, int stride, int tx_type) {
|
||||
vp9_short_fdct4x4_c(in, out, stride);
|
||||
}
|
||||
void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
||||
int stride, int /*tx_type*/) {
|
||||
void idct4x4_add(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
vp9_short_idct4x4_add_c(out, dst, stride >> 1);
|
||||
}
|
||||
void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
||||
int stride, int tx_type) {
|
||||
void fht4x4(int16_t *in, int16_t *out, uint8_t *dst, int stride, int tx_type) {
|
||||
vp9_short_fht4x4_c(in, out, stride >> 1, tx_type);
|
||||
}
|
||||
void iht4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
||||
void iht4x4_add(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
vp9_short_iht4x4_add_c(out, dst, stride >> 1, tx_type);
|
||||
}
|
||||
|
||||
class FwdTrans4x4Test : public ::testing::TestWithParam<int> {
|
||||
public:
|
||||
virtual ~FwdTrans4x4Test() {}
|
||||
virtual void SetUp() {
|
||||
tx_type_ = GetParam();
|
||||
if (tx_type_ == 0) {
|
||||
fwd_txfm_ = fdct4x4;
|
||||
inv_txfm_ = idct4x4_add;
|
||||
FwdTrans4x4Test() {SetUpTestTxfm();}
|
||||
~FwdTrans4x4Test() {}
|
||||
|
||||
void SetUpTestTxfm() {
|
||||
tx_type = GetParam();
|
||||
if (tx_type == 0) {
|
||||
fwd_txfm = fdct4x4;
|
||||
inv_txfm = idct4x4_add;
|
||||
} else {
|
||||
fwd_txfm_ = fht4x4;
|
||||
inv_txfm_ = iht4x4_add;
|
||||
fwd_txfm = fht4x4;
|
||||
inv_txfm = iht4x4_add;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
(*fwd_txfm_)(in, out, dst, stride, tx_type);
|
||||
(*fwd_txfm)(in, out, dst, stride, tx_type);
|
||||
}
|
||||
|
||||
void RunInvTxfm(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
(*inv_txfm_)(in, out, dst, stride, tx_type);
|
||||
(*inv_txfm)(in, out, dst, stride, tx_type);
|
||||
}
|
||||
|
||||
int tx_type_;
|
||||
void (*fwd_txfm_)(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int tx_type;
|
||||
void (*fwd_txfm)(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type);
|
||||
void (*inv_txfm_)(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
void (*inv_txfm)(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type);
|
||||
};
|
||||
|
||||
TEST_P(FwdTrans4x4Test, SignBiasCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 16);
|
||||
int16_t test_input_block[16];
|
||||
int16_t test_output_block[16];
|
||||
const int pitch = 8;
|
||||
int count_sign_block[16][2];
|
||||
const int count_test_block = 1000000;
|
||||
@@ -88,7 +87,7 @@ TEST_P(FwdTrans4x4Test, SignBiasCheck) {
|
||||
for (int j = 0; j < 16; ++j)
|
||||
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
|
||||
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type_);
|
||||
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type);
|
||||
|
||||
for (int j = 0; j < 16; ++j) {
|
||||
if (test_output_block[j] < 0)
|
||||
@@ -104,7 +103,7 @@ TEST_P(FwdTrans4x4Test, SignBiasCheck) {
|
||||
EXPECT_TRUE(bias_acceptable)
|
||||
<< "Error: 4x4 FDCT/FHT has a sign bias > 1%"
|
||||
<< " for input range [-255, 255] at index " << j
|
||||
<< " tx_type " << tx_type_;
|
||||
<< " tx_type " << tx_type;
|
||||
}
|
||||
|
||||
memset(count_sign_block, 0, sizeof(count_sign_block));
|
||||
@@ -113,7 +112,7 @@ TEST_P(FwdTrans4x4Test, SignBiasCheck) {
|
||||
for (int j = 0; j < 16; ++j)
|
||||
test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
|
||||
|
||||
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type_);
|
||||
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type);
|
||||
|
||||
for (int j = 0; j < 16; ++j) {
|
||||
if (test_output_block[j] < 0)
|
||||
@@ -136,13 +135,12 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
|
||||
int max_error = 0;
|
||||
int total_error = 0;
|
||||
double total_error = 0;
|
||||
const int count_test_block = 1000000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 16);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 16);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 16);
|
||||
int16_t test_input_block[16];
|
||||
int16_t test_temp_block[16];
|
||||
uint8_t dst[16], src[16];
|
||||
|
||||
for (int j = 0; j < 16; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
@@ -153,10 +151,10 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
|
||||
const int pitch = 8;
|
||||
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type);
|
||||
|
||||
for (int j = 0; j < 16; ++j) {
|
||||
if (test_temp_block[j] > 0) {
|
||||
if(test_temp_block[j] > 0) {
|
||||
test_temp_block[j] += 2;
|
||||
test_temp_block[j] /= 4;
|
||||
test_temp_block[j] *= 4;
|
||||
@@ -168,7 +166,7 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
|
||||
}
|
||||
|
||||
// inverse transform and reconstruct the pixel block
|
||||
RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
|
||||
RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type);
|
||||
|
||||
for (int j = 0; j < 16; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
@@ -183,7 +181,7 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
|
||||
|
||||
EXPECT_GE(count_test_block, total_error)
|
||||
<< "Error: FDCT/IDCT or FHT/IHT has average "
|
||||
<< "roundtrip error > 1 per block";
|
||||
"roundtrip error > 1 per block";
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(VP9, FwdTrans4x4Test, ::testing::Range(0, 4));
|
||||
|
@@ -13,78 +13,23 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
extern "C" {
|
||||
#include "./vp9_rtcd.h"
|
||||
void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *output, int pitch);
|
||||
#include "vp9_rtcd.h"
|
||||
void vp9_short_idct8x8_add_c(short *input, uint8_t *output, int pitch);
|
||||
}
|
||||
|
||||
#include "test/acm_random.h"
|
||||
#include "acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
namespace {
|
||||
void fdct8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
||||
int stride, int /*tx_type*/) {
|
||||
vp9_short_fdct8x8_c(in, out, stride);
|
||||
}
|
||||
void idct8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
||||
int stride, int /*tx_type*/) {
|
||||
vp9_short_idct8x8_add_c(out, dst, stride >> 1);
|
||||
}
|
||||
void fht8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
||||
int stride, int tx_type) {
|
||||
// TODO(jingning): need to refactor this to test both _c and _sse2 functions,
|
||||
// when we have all inverse dct functions done sse2.
|
||||
#if HAVE_SSE2
|
||||
vp9_short_fht8x8_sse2(in, out, stride >> 1, tx_type);
|
||||
#else
|
||||
vp9_short_fht8x8_c(in, out, stride >> 1, tx_type);
|
||||
#endif
|
||||
}
|
||||
void iht8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
vp9_short_iht8x8_add_c(out, dst, stride >> 1, tx_type);
|
||||
}
|
||||
|
||||
class FwdTrans8x8Test : public ::testing::TestWithParam<int> {
|
||||
public:
|
||||
virtual ~FwdTrans8x8Test() {}
|
||||
virtual void SetUp() {
|
||||
tx_type_ = GetParam();
|
||||
if (tx_type_ == 0) {
|
||||
fwd_txfm = fdct8x8;
|
||||
inv_txfm = idct8x8_add;
|
||||
} else {
|
||||
fwd_txfm = fht8x8;
|
||||
inv_txfm = iht8x8_add;
|
||||
}
|
||||
}
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
protected:
|
||||
void RunFwdTxfm(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
(*fwd_txfm)(in, out, dst, stride, tx_type);
|
||||
}
|
||||
void RunInvTxfm(int16_t *in, int16_t *out, uint8_t *dst,
|
||||
int stride, int tx_type) {
|
||||
(*inv_txfm)(in, out, dst, stride, tx_type);
|
||||
}
|
||||
|
||||
int tx_type_;
|
||||
void (*fwd_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
|
||||
void (*inv_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
|
||||
};
|
||||
|
||||
TEST_P(FwdTrans8x8Test, SignBiasCheck) {
|
||||
TEST(VP9Fdct8x8Test, SignBiasCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 64);
|
||||
int16_t test_input_block[64];
|
||||
int16_t test_output_block[64];
|
||||
const int pitch = 16;
|
||||
int count_sign_block[64][2];
|
||||
const int count_test_block = 100000;
|
||||
@@ -95,9 +40,8 @@ TEST_P(FwdTrans8x8Test, SignBiasCheck) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_output_block,
|
||||
NULL, pitch, tx_type_));
|
||||
|
||||
vp9_short_fdct8x8_c(test_input_block, test_output_block, pitch);
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_output_block[j] < 0)
|
||||
@@ -111,7 +55,7 @@ TEST_P(FwdTrans8x8Test, SignBiasCheck) {
|
||||
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
|
||||
const int max_diff = 1125;
|
||||
EXPECT_LT(diff, max_diff)
|
||||
<< "Error: 8x8 FDCT/FHT has a sign bias > "
|
||||
<< "Error: 8x8 FDCT has a sign bias > "
|
||||
<< 1. * max_diff / count_test_block * 100 << "%"
|
||||
<< " for input range [-255, 255] at index " << j
|
||||
<< " count0: " << count_sign_block[j][0]
|
||||
@@ -125,9 +69,8 @@ TEST_P(FwdTrans8x8Test, SignBiasCheck) {
|
||||
// Initialize a test block with input range [-15, 15].
|
||||
for (int j = 0; j < 64; ++j)
|
||||
test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_output_block,
|
||||
NULL, pitch, tx_type_));
|
||||
|
||||
vp9_short_fdct8x8_c(test_input_block, test_output_block, pitch);
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_output_block[j] < 0)
|
||||
@@ -141,25 +84,24 @@ TEST_P(FwdTrans8x8Test, SignBiasCheck) {
|
||||
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
|
||||
const int max_diff = 10000;
|
||||
EXPECT_LT(diff, max_diff)
|
||||
<< "Error: 4x4 FDCT/FHT has a sign bias > "
|
||||
<< "Error: 4x4 FDCT has a sign bias > "
|
||||
<< 1. * max_diff / count_test_block * 100 << "%"
|
||||
<< " for input range [-15, 15] at index " << j
|
||||
<< " count0: " << count_sign_block[j][0]
|
||||
<< " count1: " << count_sign_block[j][1]
|
||||
<< " diff: " << diff;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
|
||||
TEST(VP9Fdct8x8Test, RoundTripErrorCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int max_error = 0;
|
||||
int total_error = 0;
|
||||
double total_error = 0;
|
||||
const int count_test_block = 100000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
|
||||
int16_t test_input_block[64];
|
||||
int16_t test_temp_block[64];
|
||||
uint8_t dst[64], src[64];
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
src[j] = rnd.Rand8();
|
||||
@@ -170,11 +112,9 @@ TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
|
||||
const int pitch = 16;
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_temp_block,
|
||||
dst, pitch, tx_type_));
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (test_temp_block[j] > 0) {
|
||||
vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
|
||||
for (int j = 0; j < 64; ++j){
|
||||
if(test_temp_block[j] > 0) {
|
||||
test_temp_block[j] += 2;
|
||||
test_temp_block[j] /= 4;
|
||||
test_temp_block[j] *= 4;
|
||||
@@ -184,9 +124,7 @@ TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
|
||||
test_temp_block[j] *= 4;
|
||||
}
|
||||
}
|
||||
REGISTER_STATE_CHECK(
|
||||
RunInvTxfm(test_input_block, test_temp_block,
|
||||
dst, pitch, tx_type_));
|
||||
vp9_short_idct8x8_add_c(test_temp_block, dst, 8);
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
@@ -198,23 +136,21 @@ TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
|
||||
}
|
||||
|
||||
EXPECT_GE(1, max_error)
|
||||
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual roundtrip error > 1";
|
||||
<< "Error: 8x8 FDCT/IDCT has an individual roundtrip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block/5, total_error)
|
||||
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
|
||||
"error > 1/5 per block";
|
||||
}
|
||||
<< "Error: 8x8 FDCT/IDCT has average roundtrip error > 1/5 per block";
|
||||
};
|
||||
|
||||
TEST_P(FwdTrans8x8Test, ExtremalCheck) {
|
||||
TEST(VP9Fdct8x8Test, ExtremalCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int max_error = 0;
|
||||
int total_error = 0;
|
||||
double total_error = 0;
|
||||
const int count_test_block = 100000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
|
||||
int16_t test_input_block[64];
|
||||
int16_t test_temp_block[64];
|
||||
uint8_t dst[64], src[64];
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
src[j] = rnd.Rand8() % 2 ? 255 : 0;
|
||||
@@ -225,12 +161,8 @@ TEST_P(FwdTrans8x8Test, ExtremalCheck) {
|
||||
test_input_block[j] = src[j] - dst[j];
|
||||
|
||||
const int pitch = 16;
|
||||
REGISTER_STATE_CHECK(
|
||||
RunFwdTxfm(test_input_block, test_temp_block,
|
||||
dst, pitch, tx_type_));
|
||||
REGISTER_STATE_CHECK(
|
||||
RunInvTxfm(test_input_block, test_temp_block,
|
||||
dst, pitch, tx_type_));
|
||||
vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
|
||||
vp9_short_idct8x8_add_c(test_temp_block, dst, 8);
|
||||
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
const int diff = dst[j] - src[j];
|
||||
@@ -241,14 +173,13 @@ TEST_P(FwdTrans8x8Test, ExtremalCheck) {
|
||||
}
|
||||
|
||||
EXPECT_GE(1, max_error)
|
||||
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has an"
|
||||
<< "Error: Extremal 8x8 FDCT/IDCT has an"
|
||||
<< " individual roundtrip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block/5, total_error)
|
||||
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
|
||||
<< "Error: Extremal 8x8 FDCT/IDCT has average"
|
||||
<< " roundtrip error > 1/5 per block";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(VP9, FwdTrans8x8Test, ::testing::Range(0, 4));
|
||||
} // namespace
|
||||
|
@@ -11,7 +11,6 @@
|
||||
#define TEST_I420_VIDEO_SOURCE_H_
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
||||
#include "test/video_source.h"
|
||||
|
||||
@@ -35,6 +34,7 @@ class I420VideoSource : public VideoSource {
|
||||
height_(0),
|
||||
framerate_numerator_(rate_numerator),
|
||||
framerate_denominator_(rate_denominator) {
|
||||
|
||||
// This initializes raw_sz_, width_, height_ and allocates an img.
|
||||
SetSize(width, height);
|
||||
}
|
||||
@@ -49,7 +49,7 @@ class I420VideoSource : public VideoSource {
|
||||
if (input_file_)
|
||||
fclose(input_file_);
|
||||
input_file_ = OpenTestDataFile(file_name_);
|
||||
ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
|
||||
ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
|
||||
<< file_name_;
|
||||
if (start_) {
|
||||
fseek(input_file_, raw_sz_ * start_, SEEK_SET);
|
||||
@@ -92,7 +92,6 @@ class I420VideoSource : public VideoSource {
|
||||
}
|
||||
|
||||
virtual void FillFrame() {
|
||||
ASSERT_TRUE(input_file_ != NULL);
|
||||
// Read a frame from input_file.
|
||||
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
|
||||
limit_ = frame_;
|
||||
@@ -109,8 +108,8 @@ class I420VideoSource : public VideoSource {
|
||||
unsigned int frame_;
|
||||
unsigned int width_;
|
||||
unsigned int height_;
|
||||
int framerate_numerator_;
|
||||
int framerate_denominator_;
|
||||
unsigned int framerate_numerator_;
|
||||
unsigned int framerate_denominator_;
|
||||
};
|
||||
|
||||
} // namespace libvpx_test
|
||||
|
@@ -15,10 +15,10 @@
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
extern "C" {
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9_rtcd.h"
|
||||
}
|
||||
|
||||
#include "test/acm_random.h"
|
||||
#include "acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
@@ -27,10 +27,10 @@ namespace {
|
||||
|
||||
#ifdef _MSC_VER
|
||||
static int round(double x) {
|
||||
if (x < 0)
|
||||
return static_cast<int>(ceil(x - 0.5));
|
||||
if(x < 0)
|
||||
return (int)ceil(x - 0.5);
|
||||
else
|
||||
return static_cast<int>(floor(x + 0.5));
|
||||
return (int)floor(x + 0.5);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@@ -8,6 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
extern "C" {
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
@@ -16,101 +17,105 @@ extern "C" {
|
||||
#include "test/register_state_check.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
typedef void (*idct_fn_t)(int16_t *input, unsigned char *pred_ptr,
|
||||
typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride);
|
||||
namespace {
|
||||
class IDCTTest : public ::testing::TestWithParam<idct_fn_t> {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
int i;
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
int i;
|
||||
|
||||
UUT = GetParam();
|
||||
memset(input, 0, sizeof(input));
|
||||
/* Set up guard blocks */
|
||||
for (i = 0; i < 256; i++) output[i] = ((i & 0xF) < 4 && (i < 64)) ? 0 : -1;
|
||||
}
|
||||
UUT = GetParam();
|
||||
memset(input, 0, sizeof(input));
|
||||
/* Set up guard blocks */
|
||||
for (i = 0; i < 256; i++)
|
||||
output[i] = ((i & 0xF) < 4 && (i < 64)) ? 0 : -1;
|
||||
}
|
||||
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
virtual void TearDown() {
|
||||
libvpx_test::ClearSystemState();
|
||||
}
|
||||
|
||||
idct_fn_t UUT;
|
||||
int16_t input[16];
|
||||
unsigned char output[256];
|
||||
unsigned char predict[256];
|
||||
idct_fn_t UUT;
|
||||
short input[16];
|
||||
unsigned char output[256];
|
||||
unsigned char predict[256];
|
||||
};
|
||||
|
||||
TEST_P(IDCTTest, TestGuardBlocks) {
|
||||
int i;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
if ((i & 0xF) < 4 && i < 64)
|
||||
EXPECT_EQ(0, output[i]) << i;
|
||||
else
|
||||
EXPECT_EQ(255, output[i]);
|
||||
for (i = 0; i < 256; i++)
|
||||
if ((i & 0xF) < 4 && i < 64)
|
||||
EXPECT_EQ(0, output[i]) << i;
|
||||
else
|
||||
EXPECT_EQ(255, output[i]);
|
||||
}
|
||||
|
||||
TEST_P(IDCTTest, TestAllZeros) {
|
||||
int i;
|
||||
int i;
|
||||
|
||||
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
|
||||
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
if ((i & 0xF) < 4 && i < 64)
|
||||
EXPECT_EQ(0, output[i]) << "i==" << i;
|
||||
else
|
||||
EXPECT_EQ(255, output[i]) << "i==" << i;
|
||||
for (i = 0; i < 256; i++)
|
||||
if ((i & 0xF) < 4 && i < 64)
|
||||
EXPECT_EQ(0, output[i]) << "i==" << i;
|
||||
else
|
||||
EXPECT_EQ(255, output[i]) << "i==" << i;
|
||||
}
|
||||
|
||||
TEST_P(IDCTTest, TestAllOnes) {
|
||||
int i;
|
||||
int i;
|
||||
|
||||
input[0] = 4;
|
||||
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
|
||||
input[0] = 4;
|
||||
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
if ((i & 0xF) < 4 && i < 64)
|
||||
EXPECT_EQ(1, output[i]) << "i==" << i;
|
||||
else
|
||||
EXPECT_EQ(255, output[i]) << "i==" << i;
|
||||
for (i = 0; i < 256; i++)
|
||||
if ((i & 0xF) < 4 && i < 64)
|
||||
EXPECT_EQ(1, output[i]) << "i==" << i;
|
||||
else
|
||||
EXPECT_EQ(255, output[i]) << "i==" << i;
|
||||
}
|
||||
|
||||
TEST_P(IDCTTest, TestAddOne) {
|
||||
int i;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 256; i++) predict[i] = i;
|
||||
input[0] = 4;
|
||||
REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
|
||||
for (i = 0; i < 256; i++)
|
||||
predict[i] = i;
|
||||
input[0] = 4;
|
||||
REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
if ((i & 0xF) < 4 && i < 64)
|
||||
EXPECT_EQ(i + 1, output[i]) << "i==" << i;
|
||||
else
|
||||
EXPECT_EQ(255, output[i]) << "i==" << i;
|
||||
for (i = 0; i < 256; i++)
|
||||
if ((i & 0xF) < 4 && i < 64)
|
||||
EXPECT_EQ(i+1, output[i]) << "i==" << i;
|
||||
else
|
||||
EXPECT_EQ(255, output[i]) << "i==" << i;
|
||||
}
|
||||
|
||||
TEST_P(IDCTTest, TestWithData) {
|
||||
int i;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++) input[i] = i;
|
||||
for (i = 0; i < 16; i++)
|
||||
input[i] = i;
|
||||
|
||||
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
|
||||
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
if ((i & 0xF) > 3 || i > 63)
|
||||
EXPECT_EQ(255, output[i]) << "i==" << i;
|
||||
else if (i == 0)
|
||||
EXPECT_EQ(11, output[i]) << "i==" << i;
|
||||
else if (i == 34)
|
||||
EXPECT_EQ(1, output[i]) << "i==" << i;
|
||||
else if (i == 2 || i == 17 || i == 32)
|
||||
EXPECT_EQ(3, output[i]) << "i==" << i;
|
||||
else
|
||||
EXPECT_EQ(0, output[i]) << "i==" << i;
|
||||
for (i = 0; i < 256; i++)
|
||||
if ((i & 0xF) > 3 || i > 63)
|
||||
EXPECT_EQ(255, output[i]) << "i==" << i;
|
||||
else if (i == 0)
|
||||
EXPECT_EQ(11, output[i]) << "i==" << i;
|
||||
else if (i == 34)
|
||||
EXPECT_EQ(1, output[i]) << "i==" << i;
|
||||
else if (i == 2 || i == 17 || i == 32)
|
||||
EXPECT_EQ(3, output[i]) << "i==" << i;
|
||||
else
|
||||
EXPECT_EQ(0, output[i]) << "i==" << i;
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(C, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_c));
|
||||
INSTANTIATE_TEST_CASE_P(C, IDCTTest,
|
||||
::testing::Values(vp8_short_idct4x4llm_c));
|
||||
#if HAVE_MMX
|
||||
INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
|
||||
::testing::Values(vp8_short_idct4x4llm_mmx));
|
||||
|
@@ -15,8 +15,8 @@
|
||||
#include "test/register_state_check.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
extern "C" {
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
}
|
||||
@@ -27,8 +27,6 @@ using libvpx_test::ACMRandom;
|
||||
|
||||
class IntraPredBase {
|
||||
public:
|
||||
virtual ~IntraPredBase() {}
|
||||
|
||||
virtual void TearDown() {
|
||||
libvpx_test::ClearSystemState();
|
||||
}
|
||||
@@ -106,9 +104,9 @@ class IntraPredBase {
|
||||
for (int y = 0; y < block_size_; y++)
|
||||
sum += data_ptr_[p][y * stride_ - 1];
|
||||
expected = (sum + (1 << (shift - 1))) >> shift;
|
||||
} else {
|
||||
} else
|
||||
expected = 0x80;
|
||||
}
|
||||
|
||||
// check that all subsequent lines are equal to the first
|
||||
for (int y = 1; y < block_size_; ++y)
|
||||
ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_],
|
||||
|
@@ -28,7 +28,7 @@ static unsigned int MemGetLe32(const uint8_t *mem) {
|
||||
// so that we can do actual file decodes.
|
||||
class IVFVideoSource : public CompressedVideoSource {
|
||||
public:
|
||||
explicit IVFVideoSource(const std::string &file_name)
|
||||
IVFVideoSource(const std::string &file_name)
|
||||
: file_name_(file_name),
|
||||
input_file_(NULL),
|
||||
compressed_frame_buf_(NULL),
|
||||
@@ -47,13 +47,12 @@ class IVFVideoSource : public CompressedVideoSource {
|
||||
virtual void Init() {
|
||||
// Allocate a buffer for read in the compressed video frame.
|
||||
compressed_frame_buf_ = new uint8_t[libvpx_test::kCodeBufferSize];
|
||||
ASSERT_TRUE(compressed_frame_buf_ != NULL)
|
||||
<< "Allocate frame buffer failed";
|
||||
ASSERT_TRUE(compressed_frame_buf_) << "Allocate frame buffer failed";
|
||||
}
|
||||
|
||||
virtual void Begin() {
|
||||
input_file_ = OpenTestDataFile(file_name_);
|
||||
ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
|
||||
ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
|
||||
<< file_name_;
|
||||
|
||||
// Read file header
|
||||
@@ -73,7 +72,6 @@ class IVFVideoSource : public CompressedVideoSource {
|
||||
}
|
||||
|
||||
void FillFrame() {
|
||||
ASSERT_TRUE(input_file_ != NULL);
|
||||
uint8_t frame_hdr[kIvfFrameHdrSize];
|
||||
// Check frame header and read a frame from input_file.
|
||||
if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_)
|
||||
|
@@ -31,6 +31,10 @@ class KeyframeTest : public ::libvpx_test::EncoderTest,
|
||||
set_cpu_used_ = 0;
|
||||
}
|
||||
|
||||
virtual bool Continue() const {
|
||||
return !HasFatalFailure() && !abort_;
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
|
||||
::libvpx_test::Encoder *encoder) {
|
||||
if (kf_do_force_kf_)
|
||||
@@ -132,6 +136,7 @@ TEST_P(KeyframeTest, TestAutoKeyframe) {
|
||||
// Verify that keyframes match the file keyframes in the file.
|
||||
for (std::vector<vpx_codec_pts_t>::const_iterator iter = kf_pts_list_.begin();
|
||||
iter != kf_pts_list_.end(); ++iter) {
|
||||
|
||||
if (deadline_ == VPX_DL_REALTIME && *iter > 0)
|
||||
EXPECT_EQ(0, (*iter - 1) % 30) << "Unexpected keyframe at frame "
|
||||
<< *iter;
|
||||
|
@@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef TEST_MD5_HELPER_H_
|
||||
#define TEST_MD5_HELPER_H_
|
||||
#ifndef LIBVPX_TEST_MD5_HELPER_H_
|
||||
#define LIBVPX_TEST_MD5_HELPER_H_
|
||||
|
||||
extern "C" {
|
||||
#include "./md5_utils.h"
|
||||
@@ -25,15 +25,9 @@ class MD5 {
|
||||
|
||||
void Add(const vpx_image_t *img) {
|
||||
for (int plane = 0; plane < 3; ++plane) {
|
||||
const uint8_t *buf = img->planes[plane];
|
||||
// Calculate the width and height to do the md5 check. For the chroma
|
||||
// plane, we never want to round down and thus skip a pixel so if
|
||||
// we are shifting by 1 (chroma_shift) we add 1 before doing the shift.
|
||||
// This works only for chroma_shift of 0 and 1.
|
||||
const int h = plane ? (img->d_h + img->y_chroma_shift) >>
|
||||
img->y_chroma_shift : img->d_h;
|
||||
const int w = plane ? (img->d_w + img->x_chroma_shift) >>
|
||||
img->x_chroma_shift : img->d_w;
|
||||
uint8_t *buf = img->planes[plane];
|
||||
const int h = plane ? (img->d_h + 1) >> 1 : img->d_h;
|
||||
const int w = plane ? (img->d_w + 1) >> 1 : img->d_w;
|
||||
|
||||
for (int y = 0; y < h; ++y) {
|
||||
MD5Update(&md5_, buf, w);
|
||||
@@ -67,4 +61,4 @@ class MD5 {
|
||||
|
||||
} // namespace libvpx_test
|
||||
|
||||
#endif // TEST_MD5_HELPER_H_
|
||||
#endif // LIBVPX_TEST_MD5_HELPER_H_
|
||||
|
@@ -11,8 +11,8 @@
|
||||
#include "test/register_state_check.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
extern "C" {
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
}
|
||||
@@ -63,8 +63,7 @@ TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) {
|
||||
// Pointers to top-left pixel of block in the input and output images.
|
||||
uint8_t *const src_image_ptr = src_image + (input_stride << 1);
|
||||
uint8_t *const dst_image_ptr = dst_image + 8;
|
||||
uint8_t *const flimits =
|
||||
reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
|
||||
uint8_t *const flimits = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
|
||||
(void)vpx_memset(flimits, 255, block_width);
|
||||
|
||||
// Initialize pixels in the input:
|
||||
|
@@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef TEST_REGISTER_STATE_CHECK_H_
|
||||
#define TEST_REGISTER_STATE_CHECK_H_
|
||||
#ifndef LIBVPX_TEST_REGISTER_STATE_CHECK_H_
|
||||
#define LIBVPX_TEST_REGISTER_STATE_CHECK_H_
|
||||
|
||||
#ifdef _WIN64
|
||||
|
||||
@@ -92,4 +92,4 @@ class RegisterStateCheck {};
|
||||
|
||||
#endif // _WIN64
|
||||
|
||||
#endif // TEST_REGISTER_STATE_CHECK_H_
|
||||
#endif // LIBVPX_TEST_REGISTER_STATE_CHECK_H_
|
||||
|
@@ -16,68 +16,8 @@
|
||||
#include "test/video_source.h"
|
||||
#include "test/util.h"
|
||||
|
||||
// Enable(1) or Disable(0) writing of the compressed bitstream.
|
||||
#define WRITE_COMPRESSED_STREAM 0
|
||||
|
||||
namespace {
|
||||
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
static void mem_put_le16(char *const mem, const unsigned int val) {
|
||||
mem[0] = val;
|
||||
mem[1] = val >> 8;
|
||||
}
|
||||
|
||||
static void mem_put_le32(char *const mem, const unsigned int val) {
|
||||
mem[0] = val;
|
||||
mem[1] = val >> 8;
|
||||
mem[2] = val >> 16;
|
||||
mem[3] = val >> 24;
|
||||
}
|
||||
|
||||
static void write_ivf_file_header(const vpx_codec_enc_cfg_t *const cfg,
|
||||
int frame_cnt, FILE *const outfile) {
|
||||
char header[32];
|
||||
|
||||
header[0] = 'D';
|
||||
header[1] = 'K';
|
||||
header[2] = 'I';
|
||||
header[3] = 'F';
|
||||
mem_put_le16(header + 4, 0); /* version */
|
||||
mem_put_le16(header + 6, 32); /* headersize */
|
||||
mem_put_le32(header + 8, 0x30395056); /* fourcc (vp9) */
|
||||
mem_put_le16(header + 12, cfg->g_w); /* width */
|
||||
mem_put_le16(header + 14, cfg->g_h); /* height */
|
||||
mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
|
||||
mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
|
||||
mem_put_le32(header + 24, frame_cnt); /* length */
|
||||
mem_put_le32(header + 28, 0); /* unused */
|
||||
|
||||
(void)fwrite(header, 1, 32, outfile);
|
||||
}
|
||||
|
||||
static void write_ivf_frame_size(FILE *const outfile, const size_t size) {
|
||||
char header[4];
|
||||
mem_put_le32(header, static_cast<unsigned int>(size));
|
||||
(void)fwrite(header, 1, 4, outfile);
|
||||
}
|
||||
|
||||
static void write_ivf_frame_header(const vpx_codec_cx_pkt_t *const pkt,
|
||||
FILE *const outfile) {
|
||||
char header[12];
|
||||
vpx_codec_pts_t pts;
|
||||
|
||||
if (pkt->kind != VPX_CODEC_CX_FRAME_PKT)
|
||||
return;
|
||||
|
||||
pts = pkt->data.frame.pts;
|
||||
mem_put_le32(header, static_cast<unsigned int>(pkt->data.frame.sz));
|
||||
mem_put_le32(header + 4, pts & 0xFFFFFFFF);
|
||||
mem_put_le32(header + 8, pts >> 32);
|
||||
|
||||
(void)fwrite(header, 1, 12, outfile);
|
||||
}
|
||||
#endif // WRITE_COMPRESSED_STREAM
|
||||
|
||||
const unsigned int kInitialWidth = 320;
|
||||
const unsigned int kInitialHeight = 240;
|
||||
|
||||
@@ -102,8 +42,6 @@ class ResizingVideoSource : public ::libvpx_test::DummyVideoSource {
|
||||
limit_ = 60;
|
||||
}
|
||||
|
||||
virtual ~ResizingVideoSource() {}
|
||||
|
||||
protected:
|
||||
virtual void Next() {
|
||||
++frame_;
|
||||
@@ -118,15 +56,13 @@ class ResizeTest : public ::libvpx_test::EncoderTest,
|
||||
protected:
|
||||
ResizeTest() : EncoderTest(GET_PARAM(0)) {}
|
||||
|
||||
virtual ~ResizeTest() {}
|
||||
|
||||
struct FrameInfo {
|
||||
FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
|
||||
: pts(_pts), w(_w), h(_h) {}
|
||||
|
||||
vpx_codec_pts_t pts;
|
||||
unsigned int w;
|
||||
unsigned int h;
|
||||
unsigned int w;
|
||||
unsigned int h;
|
||||
};
|
||||
|
||||
virtual void SetUp() {
|
||||
@@ -134,6 +70,10 @@ class ResizeTest : public ::libvpx_test::EncoderTest,
|
||||
SetMode(GET_PARAM(1));
|
||||
}
|
||||
|
||||
virtual bool Continue() const {
|
||||
return !HasFatalFailure() && !abort_;
|
||||
}
|
||||
|
||||
virtual void DecompressedFrameHook(const vpx_image_t &img,
|
||||
vpx_codec_pts_t pts) {
|
||||
frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
|
||||
@@ -159,47 +99,17 @@ TEST_P(ResizeTest, TestExternalResizeWorks) {
|
||||
}
|
||||
}
|
||||
|
||||
const unsigned int kStepDownFrame = 3;
|
||||
const unsigned int kStepUpFrame = 6;
|
||||
|
||||
class ResizeInternalTest : public ResizeTest {
|
||||
protected:
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
ResizeInternalTest()
|
||||
: ResizeTest(),
|
||||
frame0_psnr_(0.0),
|
||||
outfile_(NULL),
|
||||
out_frames_(0) {}
|
||||
#else
|
||||
ResizeInternalTest() : ResizeTest(), frame0_psnr_(0.0) {}
|
||||
#endif
|
||||
|
||||
virtual ~ResizeInternalTest() {}
|
||||
|
||||
virtual void BeginPassHook(unsigned int /*pass*/) {
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
outfile_ = fopen("vp90-2-05-resize.ivf", "wb");
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual void EndPassHook() {
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
if (outfile_) {
|
||||
if (!fseek(outfile_, 0, SEEK_SET))
|
||||
write_ivf_file_header(&cfg_, out_frames_, outfile_);
|
||||
fclose(outfile_);
|
||||
outfile_ = NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
|
||||
libvpx_test::Encoder *encoder) {
|
||||
if (video->frame() == kStepDownFrame) {
|
||||
if (video->frame() == 3) {
|
||||
struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE};
|
||||
encoder->Control(VP8E_SET_SCALEMODE, &mode);
|
||||
}
|
||||
if (video->frame() == kStepUpFrame) {
|
||||
if (video->frame() == 6) {
|
||||
struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
|
||||
encoder->Control(VP8E_SET_SCALEMODE, &mode);
|
||||
}
|
||||
@@ -211,46 +121,21 @@ class ResizeInternalTest : public ResizeTest {
|
||||
EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 1.0);
|
||||
}
|
||||
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
++out_frames_;
|
||||
|
||||
// Write initial file header if first frame.
|
||||
if (pkt->data.frame.pts == 0)
|
||||
write_ivf_file_header(&cfg_, 0, outfile_);
|
||||
|
||||
// Write frame header and data.
|
||||
write_ivf_frame_header(pkt, outfile_);
|
||||
(void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_);
|
||||
#endif
|
||||
}
|
||||
|
||||
double frame0_psnr_;
|
||||
#if WRITE_COMPRESSED_STREAM
|
||||
FILE *outfile_;
|
||||
unsigned int out_frames_;
|
||||
#endif
|
||||
};
|
||||
|
||||
TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
|
||||
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
|
||||
30, 1, 0, 10);
|
||||
init_flags_ = VPX_CODEC_USE_PSNR;
|
||||
|
||||
// q picked such that initial keyframe on this clip is ~30dB PSNR
|
||||
cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
|
||||
|
||||
// If the number of frames being encoded is smaller than g_lag_in_frames
|
||||
// the encoded frame is unavailable using the current API. Comparing
|
||||
// frames to detect mismatch would then not be possible. Set
|
||||
// g_lag_in_frames = 0 to get around this.
|
||||
cfg_.g_lag_in_frames = 0;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
|
||||
for (std::vector<FrameInfo>::iterator info = frame_info_list_.begin();
|
||||
info != frame_info_list_.end(); ++info) {
|
||||
const vpx_codec_pts_t pts = info->pts;
|
||||
if (pts >= kStepDownFrame && pts < kStepUpFrame) {
|
||||
if (pts >= 3 && pts < 6) {
|
||||
ASSERT_EQ(282U, info->w) << "Frame " << pts << " had unexpected width";
|
||||
ASSERT_EQ(173U, info->h) << "Frame " << pts << " had unexpected height";
|
||||
} else {
|
||||
|
@@ -17,6 +17,7 @@ extern "C" {
|
||||
#include "./vpx_config.h"
|
||||
#if CONFIG_VP8_ENCODER
|
||||
#include "./vp8_rtcd.h"
|
||||
//#include "vp8/common/blockd.h"
|
||||
#endif
|
||||
#if CONFIG_VP9_ENCODER
|
||||
#include "./vp9_rtcd.h"
|
||||
@@ -427,7 +428,6 @@ INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
|
||||
|
||||
#if HAVE_SSE
|
||||
#if CONFIG_VP9_ENCODER
|
||||
#if CONFIG_USE_X86INC
|
||||
const sad_m_by_n_fn_t sad_4x4_sse_vp9 = vp9_sad4x4_sse;
|
||||
const sad_m_by_n_fn_t sad_4x8_sse_vp9 = vp9_sad4x8_sse;
|
||||
INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::Values(
|
||||
@@ -441,7 +441,6 @@ INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::Values(
|
||||
make_tuple(4, 4, sad_4x4x4d_sse)));
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2
|
||||
#if CONFIG_VP8_ENCODER
|
||||
@@ -452,20 +451,14 @@ const sad_m_by_n_fn_t sad_8x8_wmt = vp8_sad8x8_wmt;
|
||||
const sad_m_by_n_fn_t sad_4x4_wmt = vp8_sad4x4_wmt;
|
||||
#endif
|
||||
#if CONFIG_VP9_ENCODER
|
||||
#if CONFIG_USE_X86INC
|
||||
const sad_m_by_n_fn_t sad_64x64_sse2_vp9 = vp9_sad64x64_sse2;
|
||||
const sad_m_by_n_fn_t sad_64x32_sse2_vp9 = vp9_sad64x32_sse2;
|
||||
const sad_m_by_n_fn_t sad_32x64_sse2_vp9 = vp9_sad32x64_sse2;
|
||||
const sad_m_by_n_fn_t sad_32x32_sse2_vp9 = vp9_sad32x32_sse2;
|
||||
const sad_m_by_n_fn_t sad_32x16_sse2_vp9 = vp9_sad32x16_sse2;
|
||||
const sad_m_by_n_fn_t sad_16x32_sse2_vp9 = vp9_sad16x32_sse2;
|
||||
const sad_m_by_n_fn_t sad_16x16_sse2_vp9 = vp9_sad16x16_sse2;
|
||||
const sad_m_by_n_fn_t sad_16x8_sse2_vp9 = vp9_sad16x8_sse2;
|
||||
const sad_m_by_n_fn_t sad_8x16_sse2_vp9 = vp9_sad8x16_sse2;
|
||||
const sad_m_by_n_fn_t sad_16x8_sse2_vp9 = vp9_sad16x8_sse2;
|
||||
const sad_m_by_n_fn_t sad_8x8_sse2_vp9 = vp9_sad8x8_sse2;
|
||||
const sad_m_by_n_fn_t sad_8x4_sse2_vp9 = vp9_sad8x4_sse2;
|
||||
#endif
|
||||
#endif
|
||||
const sad_m_by_n_test_param_t sse2_tests[] = {
|
||||
#if CONFIG_VP8_ENCODER
|
||||
make_tuple(16, 16, sad_16x16_wmt),
|
||||
@@ -475,25 +468,18 @@ const sad_m_by_n_test_param_t sse2_tests[] = {
|
||||
make_tuple(4, 4, sad_4x4_wmt),
|
||||
#endif
|
||||
#if CONFIG_VP9_ENCODER
|
||||
#if CONFIG_USE_X86INC
|
||||
make_tuple(64, 64, sad_64x64_sse2_vp9),
|
||||
make_tuple(64, 32, sad_64x32_sse2_vp9),
|
||||
make_tuple(32, 64, sad_32x64_sse2_vp9),
|
||||
make_tuple(32, 32, sad_32x32_sse2_vp9),
|
||||
make_tuple(32, 16, sad_32x16_sse2_vp9),
|
||||
make_tuple(16, 32, sad_16x32_sse2_vp9),
|
||||
make_tuple(16, 16, sad_16x16_sse2_vp9),
|
||||
make_tuple(16, 8, sad_16x8_sse2_vp9),
|
||||
make_tuple(8, 16, sad_8x16_sse2_vp9),
|
||||
make_tuple(16, 8, sad_16x8_sse2_vp9),
|
||||
make_tuple(8, 8, sad_8x8_sse2_vp9),
|
||||
make_tuple(8, 4, sad_8x4_sse2_vp9),
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
#if CONFIG_USE_X86INC
|
||||
const sad_n_by_n_by_4_fn_t sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2;
|
||||
const sad_n_by_n_by_4_fn_t sad_64x32x4d_sse2 = vp9_sad64x32x4d_sse2;
|
||||
const sad_n_by_n_by_4_fn_t sad_32x64x4d_sse2 = vp9_sad32x64x4d_sse2;
|
||||
@@ -519,7 +505,6 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
|
||||
make_tuple(8, 4, sad_8x4x4d_sse2)));
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE3
|
||||
#if CONFIG_VP8_ENCODER
|
||||
@@ -538,11 +523,9 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADx4Test, ::testing::Values(
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3
|
||||
#if CONFIG_USE_X86INC
|
||||
const sad_m_by_n_fn_t sad_16x16_sse3 = vp8_sad16x16_sse3;
|
||||
INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
|
||||
make_tuple(16, 16, sad_16x16_sse3)));
|
||||
#endif
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
@@ -17,19 +17,15 @@
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
extern "C" {
|
||||
#include "vp8/encoder/onyx_int.h"
|
||||
}
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
namespace {
|
||||
|
||||
TEST(Vp8RoiMapTest, ParameterCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int delta_q[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
|
||||
int delta_lf[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
|
||||
unsigned int threshold[MAX_MB_SEGMENTS] = { 0, 100, 200, 300 };
|
||||
@@ -125,10 +121,10 @@ TEST(Vp8RoiMapTest, ParameterCheck) {
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
int rand_deltas[4];
|
||||
int deltas_valid;
|
||||
rand_deltas[0] = rnd(160) - 80;
|
||||
rand_deltas[1] = rnd(160) - 80;
|
||||
rand_deltas[2] = rnd(160) - 80;
|
||||
rand_deltas[3] = rnd(160) - 80;
|
||||
rand_deltas[0] = (rand() % 160) - 80;
|
||||
rand_deltas[1] = (rand() % 160) - 80;
|
||||
rand_deltas[2] = (rand() % 160) - 80;
|
||||
rand_deltas[3] = (rand() % 160) - 80;
|
||||
|
||||
deltas_valid = ((abs(rand_deltas[0]) <= 63) &&
|
||||
(abs(rand_deltas[1]) <= 63) &&
|
||||
|
@@ -13,8 +13,8 @@
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
extern "C" {
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vp8/encoder/block.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
@@ -51,7 +51,7 @@ TEST_P(SubtractBlockTest, SimpleSubtract) {
|
||||
bd.predictor = reinterpret_cast<unsigned char*>(
|
||||
vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*bd.predictor)));
|
||||
|
||||
for (int i = 0; kSrcStride[i] > 0; ++i) {
|
||||
for(int i = 0; kSrcStride[i] > 0; ++i) {
|
||||
// start at block0
|
||||
be.src = 0;
|
||||
be.base_src = &source;
|
||||
@@ -61,7 +61,7 @@ TEST_P(SubtractBlockTest, SimpleSubtract) {
|
||||
int16_t *src_diff = be.src_diff;
|
||||
for (int r = 0; r < kBlockHeight; ++r) {
|
||||
for (int c = 0; c < kBlockWidth; ++c) {
|
||||
src_diff[c] = static_cast<int16_t>(0xa5a5);
|
||||
src_diff[c] = 0xa5a5;
|
||||
}
|
||||
src_diff += kDiffPredStride;
|
||||
}
|
||||
|
@@ -33,6 +33,10 @@ class SuperframeTest : public ::libvpx_test::EncoderTest,
|
||||
delete[] modified_buf_;
|
||||
}
|
||||
|
||||
virtual bool Continue() const {
|
||||
return !HasFatalFailure() && !abort_;
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
|
||||
libvpx_test::Encoder *encoder) {
|
||||
if (video->frame() == 1) {
|
||||
|
@@ -520,12 +520,3 @@ d17bc08eedfc60c4c23d576a6c964a21bf854d1f vp90-2-03-size-226x202.webm
|
||||
83c6d8f2969b759e10e5c6542baca1265c874c29 vp90-2-03-size-226x224.webm.md5
|
||||
fe0af2ee47b1e5f6a66db369e2d7e9d870b38dce vp90-2-03-size-226x226.webm
|
||||
94ad19b8b699cea105e2ff18f0df2afd7242bcf7 vp90-2-03-size-226x226.webm.md5
|
||||
b6524e4084d15b5d0caaa3d3d1368db30cbee69c vp90-2-03-deltaq.webm
|
||||
65f45ec9a55537aac76104818278e0978f94a678 vp90-2-03-deltaq.webm.md5
|
||||
4dbb87494c7f565ffc266c98d17d0d8c7a5c5aba vp90-2-05-resize.ivf
|
||||
7f6d8879336239a43dbb6c9f13178cb11cf7ed09 vp90-2-05-resize.ivf.md5
|
||||
bf61ddc1f716eba58d4c9837d4e91031d9ce4ffe vp90-2-06-bilinear.webm
|
||||
f6235f937552e11d8eb331ec55da6b3aa596b9ac vp90-2-06-bilinear.webm.md5
|
||||
495256cfd123fe777b2c0406862ed8468a1f4677 vp91-2-04-yv444.webm
|
||||
65e3a7ffef61ab340d9140f335ecc49125970c2c vp91-2-04-yv444.webm.md5
|
||||
|
||||
|
13
test/test.mk
13
test/test.mk
@@ -24,9 +24,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += resize_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc
|
||||
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../md5_utils.h ../md5_utils.c
|
||||
LIBVPX_TEST_SRCS-yes += decode_test_driver.cc
|
||||
@@ -89,7 +87,6 @@ LIBVPX_TEST_SRCS-yes += tile_independence_test.cc
|
||||
endif
|
||||
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9) += convolve_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_thread_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
|
||||
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
|
||||
@@ -629,11 +626,3 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5
|
||||
|
@@ -8,7 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include <string>
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_config.h"
|
||||
extern "C" {
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#include "vpx_ports/x86.h"
|
||||
@@ -48,9 +48,7 @@ int main(int argc, char **argv) {
|
||||
#endif
|
||||
|
||||
#if !CONFIG_SHARED
|
||||
// Shared library builds don't support whitebox tests
|
||||
// that exercise internal symbols.
|
||||
|
||||
/* Shared library builds don't support whitebox tests that exercise internal symbols. */
|
||||
#if CONFIG_VP8
|
||||
vp8_rtcd();
|
||||
#endif
|
||||
|
@@ -159,11 +159,7 @@ const char *kVP9TestVectors[] = {
|
||||
"vp90-2-03-size-226x198.webm", "vp90-2-03-size-226x200.webm",
|
||||
"vp90-2-03-size-226x202.webm", "vp90-2-03-size-226x208.webm",
|
||||
"vp90-2-03-size-226x210.webm", "vp90-2-03-size-226x224.webm",
|
||||
"vp90-2-03-size-226x226.webm", "vp90-2-03-deltaq.webm",
|
||||
"vp90-2-05-resize.ivf", "vp90-2-06-bilinear.webm",
|
||||
#if CONFIG_NON420
|
||||
"vp91-2-04-yv444.webm"
|
||||
#endif
|
||||
"vp90-2-03-size-226x226.webm"
|
||||
};
|
||||
#endif
|
||||
|
||||
@@ -185,7 +181,6 @@ class TestVectorTest : public ::libvpx_test::DecoderTest,
|
||||
|
||||
virtual void DecompressedFrameHook(const vpx_image_t& img,
|
||||
const unsigned int frame_number) {
|
||||
ASSERT_TRUE(md5_file_ != NULL);
|
||||
char expected_md5[33];
|
||||
char junk[128];
|
||||
|
||||
|
@@ -23,13 +23,10 @@ extern "C" {
|
||||
|
||||
namespace {
|
||||
class TileIndependenceTest : public ::libvpx_test::EncoderTest,
|
||||
public ::libvpx_test::CodecTestWithParam<int> {
|
||||
public ::libvpx_test::CodecTestWithParam<int> {
|
||||
protected:
|
||||
TileIndependenceTest()
|
||||
: EncoderTest(GET_PARAM(0)),
|
||||
md5_fw_order_(),
|
||||
md5_inv_order_(),
|
||||
n_tiles_(GET_PARAM(1)) {
|
||||
TileIndependenceTest() : EncoderTest(GET_PARAM(0)), n_tiles_(GET_PARAM(1)),
|
||||
md5_fw_order_(), md5_inv_order_() {
|
||||
init_flags_ = VPX_CODEC_USE_PSNR;
|
||||
vpx_codec_dec_cfg_t cfg;
|
||||
cfg.w = 704;
|
||||
@@ -59,8 +56,9 @@ class TileIndependenceTest : public ::libvpx_test::EncoderTest,
|
||||
|
||||
void UpdateMD5(::libvpx_test::Decoder *dec, const vpx_codec_cx_pkt_t *pkt,
|
||||
::libvpx_test::MD5 *md5) {
|
||||
const vpx_codec_err_t res = dec->DecodeFrame(
|
||||
reinterpret_cast<uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz);
|
||||
const vpx_codec_err_t res =
|
||||
dec->DecodeFrame(reinterpret_cast<uint8_t*>(pkt->data.frame.buf),
|
||||
pkt->data.frame.sz);
|
||||
if (res != VPX_CODEC_OK) {
|
||||
abort_ = true;
|
||||
ASSERT_EQ(VPX_CODEC_OK, res);
|
||||
@@ -74,11 +72,11 @@ class TileIndependenceTest : public ::libvpx_test::EncoderTest,
|
||||
UpdateMD5(inv_dec_, pkt, &md5_inv_order_);
|
||||
}
|
||||
|
||||
::libvpx_test::MD5 md5_fw_order_, md5_inv_order_;
|
||||
::libvpx_test::Decoder *fw_dec_, *inv_dec_;
|
||||
|
||||
private:
|
||||
int n_tiles_;
|
||||
protected:
|
||||
::libvpx_test::MD5 md5_fw_order_, md5_inv_order_;
|
||||
::libvpx_test::Decoder *fw_dec_, *inv_dec_;
|
||||
};
|
||||
|
||||
// run an encode with 2 or 4 tiles, and do the decode both in normal and
|
||||
@@ -95,7 +93,7 @@ TEST_P(TileIndependenceTest, MD5Match) {
|
||||
timebase.den, timebase.num, 0, 30);
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
|
||||
const char *md5_fw_str = md5_fw_order_.Get();
|
||||
const char *md5_fw_str = md5_fw_order_.Get();
|
||||
const char *md5_inv_str = md5_inv_order_.Get();
|
||||
|
||||
// could use ASSERT_EQ(!memcmp(.., .., 16) here, but this gives nicer
|
||||
@@ -104,6 +102,7 @@ TEST_P(TileIndependenceTest, MD5Match) {
|
||||
ASSERT_STREQ(md5_fw_str, md5_inv_str);
|
||||
}
|
||||
|
||||
VP9_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Range(0, 2, 1));
|
||||
VP9_INSTANTIATE_TEST_CASE(TileIndependenceTest,
|
||||
::testing::Range(0, 2, 1));
|
||||
|
||||
} // namespace
|
||||
|
@@ -37,7 +37,7 @@ static double compute_psnr(const vpx_image_t *img1,
|
||||
img2->planes[VPX_PLANE_Y][i * img2->stride[VPX_PLANE_Y] + j];
|
||||
sqrerr += d * d;
|
||||
}
|
||||
double mse = static_cast<double>(sqrerr) / (width_y * height_y);
|
||||
double mse = sqrerr / (width_y * height_y);
|
||||
double psnr = 100.0;
|
||||
if (mse > 0.0) {
|
||||
psnr = 10 * log10(255.0 * 255.0 / mse);
|
||||
|
@@ -16,16 +16,16 @@
|
||||
#include "test/register_state_check.h"
|
||||
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_config.h"
|
||||
extern "C" {
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#if CONFIG_VP8_ENCODER
|
||||
# include "vp8/common/variance.h"
|
||||
# include "./vp8_rtcd.h"
|
||||
# include "vp8_rtcd.h"
|
||||
#endif
|
||||
#if CONFIG_VP9_ENCODER
|
||||
# include "vp9/encoder/vp9_variance.h"
|
||||
# include "./vp9_rtcd.h"
|
||||
# include "vp9_rtcd.h"
|
||||
#endif
|
||||
}
|
||||
#include "test/acm_random.h"
|
||||
@@ -107,8 +107,8 @@ static unsigned int subpel_avg_variance_ref(const uint8_t *ref,
|
||||
}
|
||||
|
||||
template<typename VarianceFunctionType>
|
||||
class VarianceTest
|
||||
: public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > {
|
||||
class VarianceTest :
|
||||
public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > {
|
||||
public:
|
||||
virtual void SetUp() {
|
||||
const tuple<int, int, VarianceFunctionType>& params = this->GetParam();
|
||||
@@ -191,9 +191,9 @@ void VarianceTest<VarianceFunctionType>::OneQuarterTest() {
|
||||
}
|
||||
|
||||
template<typename SubpelVarianceFunctionType>
|
||||
class SubpelVarianceTest
|
||||
: public ::testing::TestWithParam<tuple<int, int,
|
||||
SubpelVarianceFunctionType> > {
|
||||
class SubpelVarianceTest :
|
||||
public ::testing::TestWithParam<tuple<int, int,
|
||||
SubpelVarianceFunctionType> > {
|
||||
public:
|
||||
virtual void SetUp() {
|
||||
const tuple<int, int, SubpelVarianceFunctionType>& params =
|
||||
@@ -218,7 +218,6 @@ class SubpelVarianceTest
|
||||
vpx_free(src_);
|
||||
delete[] ref_;
|
||||
vpx_free(sec_);
|
||||
libvpx_test::ClearSystemState();
|
||||
}
|
||||
|
||||
protected:
|
||||
@@ -483,7 +482,6 @@ INSTANTIATE_TEST_CASE_P(
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2
|
||||
#if CONFIG_USE_X86INC
|
||||
const vp9_variance_fn_t variance4x4_sse2 = vp9_variance4x4_sse2;
|
||||
const vp9_variance_fn_t variance4x8_sse2 = vp9_variance4x8_sse2;
|
||||
const vp9_variance_fn_t variance8x4_sse2 = vp9_variance8x4_sse2;
|
||||
@@ -597,11 +595,8 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(6, 5, subpel_avg_variance64x32_sse2),
|
||||
make_tuple(6, 6, subpel_avg_variance64x64_sse2)));
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3
|
||||
#if CONFIG_USE_X86INC
|
||||
|
||||
const vp9_subpixvariance_fn_t subpel_variance4x4_ssse3 =
|
||||
vp9_sub_pixel_variance4x4_ssse3;
|
||||
const vp9_subpixvariance_fn_t subpel_variance4x8_ssse3 =
|
||||
@@ -686,7 +681,6 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(6, 5, subpel_avg_variance64x32_ssse3),
|
||||
make_tuple(6, 6, subpel_avg_variance64x64_ssse3)));
|
||||
#endif
|
||||
#endif
|
||||
#endif // CONFIG_VP9_ENCODER
|
||||
|
||||
} // namespace vp9
|
||||
|
@@ -8,6 +8,10 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
extern "C" {
|
||||
#include "vp8/encoder/boolhuff.h"
|
||||
#include "vp8/decoder/dboolhuff.h"
|
||||
}
|
||||
|
||||
#include <math.h>
|
||||
#include <stddef.h>
|
||||
@@ -20,11 +24,6 @@
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
extern "C" {
|
||||
#include "vp8/encoder/boolhuff.h"
|
||||
#include "vp8/decoder/dboolhuff.h"
|
||||
}
|
||||
|
||||
namespace {
|
||||
const int num_tests = 10;
|
||||
|
||||
@@ -45,7 +44,7 @@ void encrypt_buffer(uint8_t *buffer, int size) {
|
||||
|
||||
void test_decrypt_cb(void *decrypt_state, const uint8_t *input,
|
||||
uint8_t *output, int count) {
|
||||
int offset = input - reinterpret_cast<uint8_t *>(decrypt_state);
|
||||
int offset = input - (uint8_t *)decrypt_state;
|
||||
for (int i = 0; i < count; i++) {
|
||||
output[i] = input[i] ^ secret_key[(offset + i) & 15];
|
||||
}
|
||||
@@ -59,10 +58,10 @@ TEST(VP8, TestBitIO) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
for (int n = 0; n < num_tests; ++n) {
|
||||
for (int method = 0; method <= 7; ++method) { // we generate various proba
|
||||
const int kBitsToTest = 1000;
|
||||
uint8_t probas[kBitsToTest];
|
||||
const int bits_to_test = 1000;
|
||||
uint8_t probas[bits_to_test];
|
||||
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
const int parity = i & 1;
|
||||
probas[i] =
|
||||
(method == 0) ? 0 : (method == 1) ? 255 :
|
||||
@@ -77,14 +76,14 @@ TEST(VP8, TestBitIO) {
|
||||
}
|
||||
for (int bit_method = 0; bit_method <= 3; ++bit_method) {
|
||||
const int random_seed = 6432;
|
||||
const int kBufferSize = 10000;
|
||||
const int buffer_size = 10000;
|
||||
ACMRandom bit_rnd(random_seed);
|
||||
BOOL_CODER bw;
|
||||
uint8_t bw_buffer[kBufferSize];
|
||||
vp8_start_encode(&bw, bw_buffer, bw_buffer + kBufferSize);
|
||||
uint8_t bw_buffer[buffer_size];
|
||||
vp8_start_encode(&bw, bw_buffer, bw_buffer + buffer_size);
|
||||
|
||||
int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
if (bit_method == 2) {
|
||||
bit = (i & 1);
|
||||
} else if (bit_method == 3) {
|
||||
@@ -99,20 +98,19 @@ TEST(VP8, TestBitIO) {
|
||||
#if CONFIG_DECRYPT
|
||||
encrypt_buffer(bw_buffer, buffer_size);
|
||||
vp8dx_start_decode(&br, bw_buffer, buffer_size,
|
||||
test_decrypt_cb,
|
||||
reinterpret_cast<void *>(bw_buffer));
|
||||
test_decrypt_cb, (void *)bw_buffer);
|
||||
#else
|
||||
vp8dx_start_decode(&br, bw_buffer, kBufferSize, NULL, NULL);
|
||||
vp8dx_start_decode(&br, bw_buffer, buffer_size, NULL, NULL);
|
||||
#endif
|
||||
bit_rnd.Reset(random_seed);
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
if (bit_method == 2) {
|
||||
bit = (i & 1);
|
||||
} else if (bit_method == 3) {
|
||||
bit = bit_rnd(2);
|
||||
}
|
||||
GTEST_ASSERT_EQ(vp8dx_decode_bool(&br, probas[i]), bit)
|
||||
<< "pos: "<< i << " / " << kBitsToTest
|
||||
<< "pos: "<< i << " / " << bits_to_test
|
||||
<< " bit_method: " << bit_method
|
||||
<< " method: " << method;
|
||||
}
|
||||
|
@@ -26,8 +26,7 @@ const uint8_t test_key[16] = {
|
||||
0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf0
|
||||
};
|
||||
|
||||
void encrypt_buffer(const uint8_t *src, uint8_t *dst,
|
||||
int size, int offset = 0) {
|
||||
void encrypt_buffer(const uint8_t *src, uint8_t *dst, int size, int offset = 0) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
dst[i] = src[i] ^ test_key[(offset + i) & 15];
|
||||
}
|
||||
@@ -35,11 +34,10 @@ void encrypt_buffer(const uint8_t *src, uint8_t *dst,
|
||||
|
||||
void test_decrypt_cb(void *decrypt_state, const uint8_t *input,
|
||||
uint8_t *output, int count) {
|
||||
encrypt_buffer(input, output, count,
|
||||
input - reinterpret_cast<uint8_t *>(decrypt_state));
|
||||
encrypt_buffer(input, output, count, input - (uint8_t *)decrypt_state);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
||||
namespace libvpx_test {
|
||||
|
||||
|
@@ -18,7 +18,7 @@
|
||||
|
||||
|
||||
extern "C" {
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8_rtcd.h"
|
||||
}
|
||||
|
||||
#include "test/acm_random.h"
|
||||
|
@@ -19,7 +19,7 @@ extern "C" {
|
||||
#include "vp9/decoder/vp9_dboolhuff.h"
|
||||
}
|
||||
|
||||
#include "test/acm_random.h"
|
||||
#include "acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
@@ -32,10 +32,10 @@ TEST(VP9, TestBitIO) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
for (int n = 0; n < num_tests; ++n) {
|
||||
for (int method = 0; method <= 7; ++method) { // we generate various proba
|
||||
const int kBitsToTest = 1000;
|
||||
uint8_t probas[kBitsToTest];
|
||||
const int bits_to_test = 1000;
|
||||
uint8_t probas[bits_to_test];
|
||||
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
const int parity = i & 1;
|
||||
probas[i] =
|
||||
(method == 0) ? 0 : (method == 1) ? 255 :
|
||||
@@ -50,14 +50,14 @@ TEST(VP9, TestBitIO) {
|
||||
}
|
||||
for (int bit_method = 0; bit_method <= 3; ++bit_method) {
|
||||
const int random_seed = 6432;
|
||||
const int kBufferSize = 10000;
|
||||
const int buffer_size = 10000;
|
||||
ACMRandom bit_rnd(random_seed);
|
||||
vp9_writer bw;
|
||||
uint8_t bw_buffer[kBufferSize];
|
||||
uint8_t bw_buffer[buffer_size];
|
||||
vp9_start_encode(&bw, bw_buffer);
|
||||
|
||||
int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
if (bit_method == 2) {
|
||||
bit = (i & 1);
|
||||
} else if (bit_method == 3) {
|
||||
@@ -72,16 +72,16 @@ TEST(VP9, TestBitIO) {
|
||||
GTEST_ASSERT_EQ(bw_buffer[0] & 0x80, 0);
|
||||
|
||||
vp9_reader br;
|
||||
vp9_reader_init(&br, bw_buffer, kBufferSize);
|
||||
vp9_reader_init(&br, bw_buffer, buffer_size);
|
||||
bit_rnd.Reset(random_seed);
|
||||
for (int i = 0; i < kBitsToTest; ++i) {
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
if (bit_method == 2) {
|
||||
bit = (i & 1);
|
||||
} else if (bit_method == 3) {
|
||||
bit = bit_rnd(2);
|
||||
}
|
||||
GTEST_ASSERT_EQ(vp9_read(&br, probas[i]), bit)
|
||||
<< "pos: " << i << " / " << kBitsToTest
|
||||
<< "pos: " << i << " / " << bits_to_test
|
||||
<< " bit_method: " << bit_method
|
||||
<< " method: " << method;
|
||||
}
|
||||
|
@@ -1,75 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
|
||||
Use of this source code is governed by a BSD-style license
|
||||
that can be found in the LICENSE file in the root of the source
|
||||
tree. An additional intellectual property rights grant can be found
|
||||
in the file PATENTS. All contributing project authors may
|
||||
be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/codec_factory.h"
|
||||
#include "test/encode_test_driver.h"
|
||||
#include "test/i420_video_source.h"
|
||||
#include "test/util.h"
|
||||
|
||||
namespace {
|
||||
|
||||
const int kMaxPsnr = 100;
|
||||
|
||||
class LossLessTest : public ::libvpx_test::EncoderTest,
|
||||
public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
|
||||
protected:
|
||||
LossLessTest() : EncoderTest(GET_PARAM(0)),
|
||||
psnr_(kMaxPsnr),
|
||||
nframes_(0),
|
||||
encoding_mode_(GET_PARAM(1)) {
|
||||
}
|
||||
|
||||
virtual ~LossLessTest() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
InitializeConfig();
|
||||
SetMode(encoding_mode_);
|
||||
}
|
||||
|
||||
virtual void BeginPassHook(unsigned int /*pass*/) {
|
||||
psnr_ = 0.0;
|
||||
nframes_ = 0;
|
||||
}
|
||||
|
||||
virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
if (pkt->data.psnr.psnr[0] < psnr_)
|
||||
psnr_= pkt->data.psnr.psnr[0];
|
||||
}
|
||||
|
||||
double GetMinPsnr() const {
|
||||
return psnr_;
|
||||
}
|
||||
|
||||
private:
|
||||
double psnr_;
|
||||
unsigned int nframes_;
|
||||
libvpx_test::TestMode encoding_mode_;
|
||||
};
|
||||
|
||||
TEST_P(LossLessTest, TestLossLessEncoding) {
|
||||
const vpx_rational timebase = { 33333333, 1000000000 };
|
||||
cfg_.g_timebase = timebase;
|
||||
cfg_.rc_target_bitrate = 2000;
|
||||
cfg_.g_lag_in_frames = 25;
|
||||
cfg_.rc_min_quantizer = 0;
|
||||
cfg_.rc_max_quantizer = 0;
|
||||
|
||||
init_flags_ = VPX_CODEC_USE_PSNR;
|
||||
|
||||
// intentionally changed the dimension for better testing coverage
|
||||
libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 356, 284,
|
||||
timebase.den, timebase.num, 0, 30);
|
||||
|
||||
const double psnr_lossless = GetMinPsnr();
|
||||
EXPECT_GE(psnr_lossless, kMaxPsnr);
|
||||
}
|
||||
VP9_INSTANTIATE_TEST_CASE(LossLessTest, ALL_TEST_MODES);
|
||||
} // namespace
|
@@ -39,8 +39,8 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
|
||||
// FIXME(rbultje) split in its own file
|
||||
for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES;
|
||||
bsize = static_cast<BLOCK_SIZE>(static_cast<int>(bsize) + 1)) {
|
||||
for (BLOCK_SIZE_TYPE bsize = BLOCK_SIZE_AB4X4; bsize < BLOCK_SIZE_TYPES;
|
||||
bsize = static_cast<BLOCK_SIZE_TYPE>(static_cast<int>(bsize) + 1)) {
|
||||
const int block_width = 4 << b_width_log2(bsize);
|
||||
const int block_height = 4 << b_height_log2(bsize);
|
||||
int16_t *diff = reinterpret_cast<int16_t *>(
|
||||
@@ -93,8 +93,9 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
|
||||
INSTANTIATE_TEST_CASE_P(C, VP9SubtractBlockTest,
|
||||
::testing::Values(vp9_subtract_block_c));
|
||||
|
||||
#if HAVE_SSE2 && CONFIG_USE_X86INC
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, VP9SubtractBlockTest,
|
||||
::testing::Values(vp9_subtract_block_sse2));
|
||||
#endif
|
||||
|
||||
} // namespace vp9
|
||||
|
@@ -1,109 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vp9/decoder/vp9_thread.h"
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/codec_factory.h"
|
||||
#include "test/decode_test_driver.h"
|
||||
#include "test/md5_helper.h"
|
||||
#include "test/webm_video_source.h"
|
||||
|
||||
namespace {
|
||||
|
||||
class VP9WorkerThreadTest : public ::testing::Test {
|
||||
protected:
|
||||
virtual ~VP9WorkerThreadTest() {}
|
||||
virtual void SetUp() {
|
||||
vp9_worker_init(&worker_);
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
vp9_worker_end(&worker_);
|
||||
}
|
||||
|
||||
VP9Worker worker_;
|
||||
};
|
||||
|
||||
int ThreadHook(void* data, void* return_value) {
|
||||
int* const hook_data = reinterpret_cast<int*>(data);
|
||||
*hook_data = 5;
|
||||
return *reinterpret_cast<int*>(return_value);
|
||||
}
|
||||
|
||||
TEST_F(VP9WorkerThreadTest, HookSuccess) {
|
||||
EXPECT_TRUE(vp9_worker_sync(&worker_)); // should be a no-op.
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
EXPECT_TRUE(vp9_worker_reset(&worker_));
|
||||
|
||||
int hook_data = 0;
|
||||
int return_value = 1; // return successfully from the hook
|
||||
worker_.hook = ThreadHook;
|
||||
worker_.data1 = &hook_data;
|
||||
worker_.data2 = &return_value;
|
||||
|
||||
vp9_worker_launch(&worker_);
|
||||
EXPECT_TRUE(vp9_worker_sync(&worker_));
|
||||
EXPECT_FALSE(worker_.had_error);
|
||||
EXPECT_EQ(5, hook_data);
|
||||
|
||||
EXPECT_TRUE(vp9_worker_sync(&worker_)); // should be a no-op.
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(VP9WorkerThreadTest, HookFailure) {
|
||||
EXPECT_TRUE(vp9_worker_reset(&worker_));
|
||||
|
||||
int hook_data = 0;
|
||||
int return_value = 0; // return failure from the hook
|
||||
worker_.hook = ThreadHook;
|
||||
worker_.data1 = &hook_data;
|
||||
worker_.data2 = &return_value;
|
||||
|
||||
vp9_worker_launch(&worker_);
|
||||
EXPECT_FALSE(vp9_worker_sync(&worker_));
|
||||
EXPECT_TRUE(worker_.had_error);
|
||||
|
||||
// Ensure _reset() clears the error and _launch() can be called again.
|
||||
return_value = 1;
|
||||
EXPECT_TRUE(vp9_worker_reset(&worker_));
|
||||
EXPECT_FALSE(worker_.had_error);
|
||||
vp9_worker_launch(&worker_);
|
||||
EXPECT_TRUE(vp9_worker_sync(&worker_));
|
||||
EXPECT_FALSE(worker_.had_error);
|
||||
}
|
||||
|
||||
TEST(VP9DecodeMTTest, MTDecode) {
|
||||
libvpx_test::WebMVideoSource video("vp90-2-03-size-226x226.webm");
|
||||
video.Init();
|
||||
|
||||
vpx_codec_dec_cfg_t cfg = {0};
|
||||
cfg.threads = 2;
|
||||
libvpx_test::VP9Decoder decoder(cfg, 0);
|
||||
|
||||
libvpx_test::MD5 md5;
|
||||
for (video.Begin(); video.cxdata(); video.Next()) {
|
||||
const vpx_codec_err_t res =
|
||||
decoder.DecodeFrame(video.cxdata(), video.frame_size());
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
|
||||
|
||||
libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
|
||||
const vpx_image_t *img = NULL;
|
||||
|
||||
// Get decompressed data
|
||||
while ((img = dec_iter.Next())) {
|
||||
md5.Add(img);
|
||||
}
|
||||
}
|
||||
EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc", md5.Get());
|
||||
}
|
||||
|
||||
} // namespace
|
@@ -99,7 +99,7 @@ class WebMVideoSource : public CompressedVideoSource {
|
||||
|
||||
virtual void Begin() {
|
||||
input_file_ = OpenTestDataFile(file_name_);
|
||||
ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
|
||||
ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
|
||||
<< file_name_;
|
||||
|
||||
nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb,
|
||||
@@ -130,7 +130,6 @@ class WebMVideoSource : public CompressedVideoSource {
|
||||
}
|
||||
|
||||
void FillFrame() {
|
||||
ASSERT_TRUE(input_file_ != NULL);
|
||||
if (chunk_ >= chunks_) {
|
||||
unsigned int track;
|
||||
|
||||
|
12
third_party/libyuv/source/scale.c
vendored
12
third_party/libyuv/source/scale.c
vendored
@@ -1370,12 +1370,12 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
mov edx, [esp + 8 + 12] // src_stride
|
||||
mov ecx, [esp + 8 + 16] // dst_width
|
||||
mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
|
||||
shr eax, 1
|
||||
cmp eax, 0
|
||||
je xloop1
|
||||
cmp eax, 64
|
||||
cmp eax, 128
|
||||
je xloop2
|
||||
|
||||
shr eax, 1
|
||||
mov ah,al
|
||||
neg al
|
||||
add al, 128
|
||||
@@ -2132,11 +2132,11 @@ void ScaleFilterRows_SSSE3(uint8* dst_ptr,
|
||||
"mov 0x14(%esp),%edx \n"
|
||||
"mov 0x18(%esp),%ecx \n"
|
||||
"mov 0x1c(%esp),%eax \n"
|
||||
"shr %eax \n"
|
||||
"cmp $0x0,%eax \n"
|
||||
"je 2f \n"
|
||||
"cmp $0x40,%eax \n"
|
||||
"cmp $0x80,%eax \n"
|
||||
"je 3f \n"
|
||||
"shr %eax \n"
|
||||
"mov %al,%ah \n"
|
||||
"neg %al \n"
|
||||
"add $0x80,%al \n"
|
||||
@@ -2662,7 +2662,6 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
|
||||
static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
|
||||
const uint8* src_ptr, int src_stride,
|
||||
int dst_width, int source_y_fraction) {
|
||||
source_y_fraction >>= 1;
|
||||
if (source_y_fraction == 0) {
|
||||
asm volatile (
|
||||
"1:"
|
||||
@@ -2681,7 +2680,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
|
||||
: "memory", "cc", "rax"
|
||||
);
|
||||
return;
|
||||
} else if (source_y_fraction == 64) {
|
||||
} else if (source_y_fraction == 128) {
|
||||
asm volatile (
|
||||
"1:"
|
||||
"movdqa (%1),%%xmm0 \n"
|
||||
@@ -2704,6 +2703,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
|
||||
} else {
|
||||
asm volatile (
|
||||
"mov %3,%%eax \n"
|
||||
"shr %%eax \n"
|
||||
"mov %%al,%%ah \n"
|
||||
"neg %%al \n"
|
||||
"add $0x80,%%al \n"
|
||||
|
@@ -173,6 +173,7 @@ void vp8_create_common(VP8_COMMON *oci)
|
||||
oci->use_bilinear_mc_filter = 0;
|
||||
oci->full_pixel = 0;
|
||||
oci->multi_token_partition = ONE_PARTITION;
|
||||
oci->clr_type = REG_YUV;
|
||||
oci->clamp_type = RECON_CLAMP_REQUIRED;
|
||||
|
||||
/* Initialize reference frame sign bias structure to defaults */
|
||||
|
@@ -41,8 +41,7 @@ extern "C"
|
||||
{
|
||||
USAGE_STREAM_FROM_SERVER = 0x0,
|
||||
USAGE_LOCAL_FILE_PLAYBACK = 0x1,
|
||||
USAGE_CONSTRAINED_QUALITY = 0x2,
|
||||
USAGE_CONSTANT_QUALITY = 0x3
|
||||
USAGE_CONSTRAINED_QUALITY = 0x2
|
||||
} END_USAGE;
|
||||
|
||||
|
||||
|
@@ -72,6 +72,7 @@ typedef struct VP8Common
|
||||
int horiz_scale;
|
||||
int vert_scale;
|
||||
|
||||
YUV_TYPE clr_type;
|
||||
CLAMP_TYPE clamp_type;
|
||||
|
||||
YV12_BUFFER_CONFIG *frame_to_show;
|
||||
@@ -114,6 +115,9 @@ typedef struct VP8Common
|
||||
int uvdc_delta_q;
|
||||
int uvac_delta_q;
|
||||
|
||||
unsigned int frames_since_golden;
|
||||
unsigned int frames_till_alt_ref_frame;
|
||||
|
||||
/* We allocate a MODE_INFO struct for each macroblock, together with
|
||||
an extra row on top and column on the left to simplify prediction. */
|
||||
|
||||
@@ -153,6 +157,7 @@ typedef struct VP8Common
|
||||
|
||||
unsigned int current_video_frame;
|
||||
|
||||
int near_boffset[3];
|
||||
int version;
|
||||
|
||||
TOKEN_PARTITION multi_token_partition;
|
||||
@@ -160,10 +165,8 @@ typedef struct VP8Common
|
||||
#ifdef PACKET_TESTING
|
||||
VP8_HEADER oh;
|
||||
#endif
|
||||
#if CONFIG_POSTPROC_VISUALIZER
|
||||
double bitrate;
|
||||
double framerate;
|
||||
#endif
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
int processor_core_count;
|
||||
|
@@ -923,7 +923,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
|
||||
if (flags & VP8D_DEBUG_TXT_RATE_INFO)
|
||||
{
|
||||
char message[512];
|
||||
sprintf(message, "Bitrate: %10.2f framerate: %10.2f ", oci->bitrate, oci->framerate);
|
||||
sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate);
|
||||
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
|
||||
}
|
||||
|
||||
|
52
vp8/common/vp8_asm_com_offsets.c
Normal file
52
vp8/common/vp8_asm_com_offsets.c
Normal file
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx_ports/asm_offsets.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
#if CONFIG_POSTPROC
|
||||
#include "postproc.h"
|
||||
#endif /* CONFIG_POSTPROC */
|
||||
|
||||
BEGIN
|
||||
|
||||
#if CONFIG_POSTPROC
|
||||
/* mfqe.c / filter_by_weight */
|
||||
DEFINE(MFQE_PRECISION_VAL, MFQE_PRECISION);
|
||||
#endif /* CONFIG_POSTPROC */
|
||||
|
||||
END
|
||||
|
||||
/* add asserts for any offset that is not supported by assembly code */
|
||||
/* add asserts for any size that is not supported by assembly code */
|
||||
|
||||
#if HAVE_MEDIA
|
||||
/* switch case in vp8_intra4x4_predict_armv6 is based on these enumerated values */
|
||||
ct_assert(B_DC_PRED, B_DC_PRED == 0);
|
||||
ct_assert(B_TM_PRED, B_TM_PRED == 1);
|
||||
ct_assert(B_VE_PRED, B_VE_PRED == 2);
|
||||
ct_assert(B_HE_PRED, B_HE_PRED == 3);
|
||||
ct_assert(B_LD_PRED, B_LD_PRED == 4);
|
||||
ct_assert(B_RD_PRED, B_RD_PRED == 5);
|
||||
ct_assert(B_VR_PRED, B_VR_PRED == 6);
|
||||
ct_assert(B_VL_PRED, B_VL_PRED == 7);
|
||||
ct_assert(B_HD_PRED, B_HD_PRED == 8);
|
||||
ct_assert(B_HU_PRED, B_HU_PRED == 9);
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2
|
||||
#if CONFIG_POSTPROC
|
||||
/* vp8_filter_by_weight16x16 and 8x8 */
|
||||
ct_assert(MFQE_PRECISION_VAL, MFQE_PRECISION == 4)
|
||||
#endif /* CONFIG_POSTPROC */
|
||||
#endif /* HAVE_SSE2 */
|
@@ -1095,7 +1095,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
|
||||
"Failed to allocate bool decoder 0");
|
||||
if (pc->frame_type == KEY_FRAME) {
|
||||
(void)vp8_read_bit(bc); // colorspace
|
||||
pc->clr_type = (YUV_TYPE)vp8_read_bit(bc);
|
||||
pc->clamp_type = (CLAMP_TYPE)vp8_read_bit(bc);
|
||||
}
|
||||
|
||||
|
@@ -430,6 +430,7 @@ int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_st
|
||||
*time_stamp = pbi->last_time_stamp;
|
||||
*time_end_stamp = 0;
|
||||
|
||||
sd->clrtype = pbi->common.clr_type;
|
||||
#if CONFIG_POSTPROC
|
||||
ret = vp8_post_proc_frame(&pbi->common, sd, flags);
|
||||
#else
|
||||
|
26
vp8/decoder/vp8_asm_dec_offsets.c
Normal file
26
vp8/decoder/vp8_asm_dec_offsets.c
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/asm_offsets.h"
|
||||
#include "onyxd_int.h"
|
||||
|
||||
BEGIN
|
||||
|
||||
DEFINE(bool_decoder_user_buffer_end, offsetof(BOOL_DECODER, user_buffer_end));
|
||||
DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer));
|
||||
DEFINE(bool_decoder_value, offsetof(BOOL_DECODER, value));
|
||||
DEFINE(bool_decoder_count, offsetof(BOOL_DECODER, count));
|
||||
DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range));
|
||||
|
||||
END
|
||||
|
||||
/* add asserts for any offset that is not supported by assembly code */
|
||||
/* add asserts for any size that is not supported by assembly code */
|
@@ -1322,7 +1322,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
|
||||
vp8_start_encode(bc, cx_data, cx_data_end);
|
||||
|
||||
/* signal clr type */
|
||||
vp8_write_bit(bc, 0);
|
||||
vp8_write_bit(bc, pc->clr_type);
|
||||
vp8_write_bit(bc, pc->clamp_type);
|
||||
|
||||
}
|
||||
|
@@ -1325,7 +1325,7 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
|
||||
return Q;
|
||||
}
|
||||
|
||||
extern void vp8_new_framerate(VP8_COMP *cpi, double framerate);
|
||||
extern void vp8_new_frame_rate(VP8_COMP *cpi, double framerate);
|
||||
|
||||
void vp8_init_second_pass(VP8_COMP *cpi)
|
||||
{
|
||||
@@ -1349,9 +1349,9 @@ void vp8_init_second_pass(VP8_COMP *cpi)
|
||||
* sum duration is not. Its calculated based on the actual durations of
|
||||
* all frames from the first pass.
|
||||
*/
|
||||
vp8_new_framerate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration);
|
||||
vp8_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration);
|
||||
|
||||
cpi->output_framerate = cpi->framerate;
|
||||
cpi->output_frame_rate = cpi->frame_rate;
|
||||
cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
|
||||
cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats.duration * two_pass_min_rate / 10000000.0);
|
||||
|
||||
@@ -2398,7 +2398,7 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
target_frame_size += cpi->min_frame_bandwidth;
|
||||
|
||||
/* Every other frame gets a few extra bits */
|
||||
if ( (cpi->frames_since_golden & 0x01) &&
|
||||
if ( (cpi->common.frames_since_golden & 0x01) &&
|
||||
(cpi->frames_till_gf_update_due > 0) )
|
||||
{
|
||||
target_frame_size += cpi->twopass.alt_extra_bits;
|
||||
@@ -2529,7 +2529,7 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
|
||||
/* Set nominal per second bandwidth for this frame */
|
||||
cpi->target_bandwidth = (int)
|
||||
(cpi->per_frame_bandwidth * cpi->output_framerate);
|
||||
(cpi->per_frame_bandwidth * cpi->output_frame_rate);
|
||||
if (cpi->target_bandwidth < 0)
|
||||
cpi->target_bandwidth = 0;
|
||||
|
||||
@@ -3185,7 +3185,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
|
||||
/* Convert to a per second bitrate */
|
||||
cpi->target_bandwidth = (int)(cpi->twopass.kf_bits *
|
||||
cpi->output_framerate);
|
||||
cpi->output_frame_rate);
|
||||
}
|
||||
|
||||
/* Note the total error score of the kf group minus the key frame itself */
|
||||
@@ -3224,7 +3224,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
cpi->common.vert_scale = NORMAL;
|
||||
|
||||
/* Calculate Average bits per frame. */
|
||||
av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->framerate);
|
||||
av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate);
|
||||
|
||||
/* CBR... Use the clip average as the target for deciding resample */
|
||||
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
|
||||
@@ -3299,7 +3299,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
}
|
||||
else
|
||||
{
|
||||
int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->framerate));
|
||||
int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate));
|
||||
int64_t over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level;
|
||||
|
||||
/* If triggered last time the threshold for triggering again is
|
||||
|
@@ -301,11 +301,11 @@ static int rescale(int val, int num, int denom)
|
||||
static void init_temporal_layer_context(VP8_COMP *cpi,
|
||||
VP8_CONFIG *oxcf,
|
||||
const int layer,
|
||||
double prev_layer_framerate)
|
||||
double prev_layer_frame_rate)
|
||||
{
|
||||
LAYER_CONTEXT *lc = &cpi->layer_context[layer];
|
||||
|
||||
lc->framerate = cpi->output_framerate / cpi->oxcf.rate_decimator[layer];
|
||||
lc->frame_rate = cpi->output_frame_rate / cpi->oxcf.rate_decimator[layer];
|
||||
lc->target_bandwidth = cpi->oxcf.target_bitrate[layer] * 1000;
|
||||
|
||||
lc->starting_buffer_level_in_ms = oxcf->starting_buffer_level;
|
||||
@@ -335,7 +335,7 @@ static void init_temporal_layer_context(VP8_COMP *cpi,
|
||||
lc->avg_frame_size_for_layer =
|
||||
(int)((cpi->oxcf.target_bitrate[layer] -
|
||||
cpi->oxcf.target_bitrate[layer-1]) * 1000 /
|
||||
(lc->framerate - prev_layer_framerate));
|
||||
(lc->frame_rate - prev_layer_frame_rate));
|
||||
|
||||
lc->active_worst_quality = cpi->oxcf.worst_allowed_q;
|
||||
lc->active_best_quality = cpi->oxcf.best_allowed_q;
|
||||
@@ -363,7 +363,7 @@ static void reset_temporal_layer_change(VP8_COMP *cpi,
|
||||
const int prev_num_layers)
|
||||
{
|
||||
int i;
|
||||
double prev_layer_framerate = 0;
|
||||
double prev_layer_frame_rate = 0;
|
||||
const int curr_num_layers = cpi->oxcf.number_of_layers;
|
||||
// If the previous state was 1 layer, get current layer context from cpi.
|
||||
// We need this to set the layer context for the new layers below.
|
||||
@@ -377,7 +377,7 @@ static void reset_temporal_layer_change(VP8_COMP *cpi,
|
||||
LAYER_CONTEXT *lc = &cpi->layer_context[i];
|
||||
if (i >= prev_num_layers)
|
||||
{
|
||||
init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
|
||||
init_temporal_layer_context(cpi, oxcf, i, prev_layer_frame_rate);
|
||||
}
|
||||
// The initial buffer levels are set based on their starting levels.
|
||||
// We could set the buffer levels based on the previous state (normalized
|
||||
@@ -403,8 +403,8 @@ static void reset_temporal_layer_change(VP8_COMP *cpi,
|
||||
lc->bits_off_target = lc->buffer_level;
|
||||
restore_layer_context(cpi, 0);
|
||||
}
|
||||
prev_layer_framerate = cpi->output_framerate /
|
||||
cpi->oxcf.rate_decimator[i];
|
||||
prev_layer_frame_rate = cpi->output_frame_rate /
|
||||
cpi->oxcf.rate_decimator[i];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1282,21 +1282,21 @@ int vp8_reverse_trans(int x)
|
||||
|
||||
return 63;
|
||||
}
|
||||
void vp8_new_framerate(VP8_COMP *cpi, double framerate)
|
||||
void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
|
||||
{
|
||||
if(framerate < .1)
|
||||
framerate = 30;
|
||||
|
||||
cpi->framerate = framerate;
|
||||
cpi->output_framerate = framerate;
|
||||
cpi->frame_rate = framerate;
|
||||
cpi->output_frame_rate = framerate;
|
||||
cpi->per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth /
|
||||
cpi->output_framerate);
|
||||
cpi->output_frame_rate);
|
||||
cpi->av_per_frame_bandwidth = cpi->per_frame_bandwidth;
|
||||
cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth *
|
||||
cpi->oxcf.two_pass_vbrmin_section / 100);
|
||||
|
||||
/* Set Maximum gf/arf interval */
|
||||
cpi->max_gf_interval = ((int)(cpi->output_framerate / 2.0) + 2);
|
||||
cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2);
|
||||
|
||||
if(cpi->max_gf_interval < 12)
|
||||
cpi->max_gf_interval = 12;
|
||||
@@ -1337,13 +1337,13 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
|
||||
* seems like a reasonable framerate, then use that as a guess, otherwise
|
||||
* use 30.
|
||||
*/
|
||||
cpi->framerate = (double)(oxcf->timebase.den) /
|
||||
(double)(oxcf->timebase.num);
|
||||
cpi->frame_rate = (double)(oxcf->timebase.den) /
|
||||
(double)(oxcf->timebase.num);
|
||||
|
||||
if (cpi->framerate > 180)
|
||||
cpi->framerate = 30;
|
||||
if (cpi->frame_rate > 180)
|
||||
cpi->frame_rate = 30;
|
||||
|
||||
cpi->ref_framerate = cpi->framerate;
|
||||
cpi->ref_frame_rate = cpi->frame_rate;
|
||||
|
||||
/* change includes all joint functionality */
|
||||
vp8_change_config(cpi, oxcf);
|
||||
@@ -1369,13 +1369,13 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
|
||||
if (cpi->oxcf.number_of_layers > 1)
|
||||
{
|
||||
unsigned int i;
|
||||
double prev_layer_framerate=0;
|
||||
double prev_layer_frame_rate=0;
|
||||
|
||||
for (i=0; i<cpi->oxcf.number_of_layers; i++)
|
||||
{
|
||||
init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
|
||||
prev_layer_framerate = cpi->output_framerate /
|
||||
cpi->oxcf.rate_decimator[i];
|
||||
init_temporal_layer_context(cpi, oxcf, i, prev_layer_frame_rate);
|
||||
prev_layer_frame_rate = cpi->output_frame_rate /
|
||||
cpi->oxcf.rate_decimator[i];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1399,14 +1399,14 @@ static void update_layer_contexts (VP8_COMP *cpi)
|
||||
if (oxcf->number_of_layers > 1)
|
||||
{
|
||||
unsigned int i;
|
||||
double prev_layer_framerate=0;
|
||||
double prev_layer_frame_rate=0;
|
||||
|
||||
for (i=0; i<oxcf->number_of_layers; i++)
|
||||
{
|
||||
LAYER_CONTEXT *lc = &cpi->layer_context[i];
|
||||
|
||||
lc->framerate =
|
||||
cpi->ref_framerate / oxcf->rate_decimator[i];
|
||||
lc->frame_rate =
|
||||
cpi->ref_frame_rate / oxcf->rate_decimator[i];
|
||||
lc->target_bandwidth = oxcf->target_bitrate[i] * 1000;
|
||||
|
||||
lc->starting_buffer_level = rescale(
|
||||
@@ -1432,9 +1432,9 @@ static void update_layer_contexts (VP8_COMP *cpi)
|
||||
lc->avg_frame_size_for_layer =
|
||||
(int)((oxcf->target_bitrate[i] -
|
||||
oxcf->target_bitrate[i-1]) * 1000 /
|
||||
(lc->framerate - prev_layer_framerate));
|
||||
(lc->frame_rate - prev_layer_frame_rate));
|
||||
|
||||
prev_layer_framerate = lc->framerate;
|
||||
prev_layer_frame_rate = lc->frame_rate;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1625,7 +1625,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
|
||||
cpi->oxcf.target_bandwidth, 1000);
|
||||
|
||||
/* Set up frame rate and related parameters rate control values. */
|
||||
vp8_new_framerate(cpi, cpi->framerate);
|
||||
vp8_new_frame_rate(cpi, cpi->frame_rate);
|
||||
|
||||
/* Set absolute upper and lower quality limits */
|
||||
cpi->worst_quality = cpi->oxcf.worst_allowed_q;
|
||||
@@ -1945,7 +1945,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
|
||||
for (i = 0; i < KEY_FRAME_CONTEXT; i++)
|
||||
{
|
||||
cpi->prior_key_frame_distance[i] = (int)cpi->output_framerate;
|
||||
cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
|
||||
}
|
||||
|
||||
#ifdef OUTPUT_YUV_SRC
|
||||
@@ -2273,7 +2273,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
|
||||
{
|
||||
extern int count_mb_seg[4];
|
||||
FILE *f = fopen("modes.stt", "a");
|
||||
double dr = (double)cpi->framerate * (double)bytes * (double)8 / (double)count / (double)1000 ;
|
||||
double dr = (double)cpi->frame_rate * (double)bytes * (double)8 / (double)count / (double)1000 ;
|
||||
fprintf(f, "intra_mode in Intra Frames:\n");
|
||||
fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d\n", y_modes[0], y_modes[1], y_modes[2], y_modes[3], y_modes[4]);
|
||||
fprintf(f, "UV:%8d, %8d, %8d, %8d\n", uv_modes[0], uv_modes[1], uv_modes[2], uv_modes[3]);
|
||||
@@ -2750,7 +2750,7 @@ static void update_alt_ref_frame_stats(VP8_COMP *cpi)
|
||||
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
|
||||
|
||||
/* this frame refreshes means next frames don't unless specified by user */
|
||||
cpi->frames_since_golden = 0;
|
||||
cpi->common.frames_since_golden = 0;
|
||||
|
||||
/* Clear the alternate reference update pending flag. */
|
||||
cpi->source_alt_ref_pending = 0;
|
||||
@@ -2802,7 +2802,7 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
|
||||
* user
|
||||
*/
|
||||
cm->refresh_golden_frame = 0;
|
||||
cpi->frames_since_golden = 0;
|
||||
cpi->common.frames_since_golden = 0;
|
||||
|
||||
cpi->recent_ref_frame_usage[INTRA_FRAME] = 1;
|
||||
cpi->recent_ref_frame_usage[LAST_FRAME] = 1;
|
||||
@@ -2834,12 +2834,12 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
|
||||
if (cpi->frames_till_gf_update_due > 0)
|
||||
cpi->frames_till_gf_update_due--;
|
||||
|
||||
if (cpi->frames_till_alt_ref_frame)
|
||||
cpi->frames_till_alt_ref_frame --;
|
||||
if (cpi->common.frames_till_alt_ref_frame)
|
||||
cpi->common.frames_till_alt_ref_frame --;
|
||||
|
||||
cpi->frames_since_golden ++;
|
||||
cpi->common.frames_since_golden ++;
|
||||
|
||||
if (cpi->frames_since_golden > 1)
|
||||
if (cpi->common.frames_since_golden > 1)
|
||||
{
|
||||
cpi->recent_ref_frame_usage[INTRA_FRAME] +=
|
||||
cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME];
|
||||
@@ -2890,11 +2890,11 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi)
|
||||
cpi->prob_last_coded = 200;
|
||||
cpi->prob_gf_coded = 1;
|
||||
}
|
||||
else if (cpi->frames_since_golden == 0)
|
||||
else if (cpi->common.frames_since_golden == 0)
|
||||
{
|
||||
cpi->prob_last_coded = 214;
|
||||
}
|
||||
else if (cpi->frames_since_golden == 1)
|
||||
else if (cpi->common.frames_since_golden == 1)
|
||||
{
|
||||
cpi->prob_last_coded = 192;
|
||||
cpi->prob_gf_coded = 220;
|
||||
@@ -3368,12 +3368,12 @@ static void encode_frame_to_data_rate
|
||||
cpi->per_frame_bandwidth = cpi->twopass.gf_bits;
|
||||
/* per second target bitrate */
|
||||
cpi->target_bandwidth = (int)(cpi->twopass.gf_bits *
|
||||
cpi->output_framerate);
|
||||
cpi->output_frame_rate);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
cpi->per_frame_bandwidth = (int)(cpi->target_bandwidth / cpi->output_framerate);
|
||||
cpi->per_frame_bandwidth = (int)(cpi->target_bandwidth / cpi->output_frame_rate);
|
||||
|
||||
/* Default turn off buffer to buffer copying */
|
||||
cm->copy_buffer_to_gf = 0;
|
||||
@@ -4557,7 +4557,7 @@ static void encode_frame_to_data_rate
|
||||
{
|
||||
LAYER_CONTEXT *lc = &cpi->layer_context[i];
|
||||
int bits_off_for_this_layer =
|
||||
(int)(lc->target_bandwidth / lc->framerate -
|
||||
(int)(lc->target_bandwidth / lc->frame_rate -
|
||||
cpi->projected_frame_size);
|
||||
|
||||
lc->bits_off_target += bits_off_for_this_layer;
|
||||
@@ -4805,7 +4805,7 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
|
||||
{
|
||||
double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth
|
||||
*cpi->oxcf.two_pass_vbrmin_section / 100);
|
||||
cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->framerate);
|
||||
cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->frame_rate);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -4821,10 +4821,8 @@ int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_C
|
||||
{
|
||||
#if HAVE_NEON
|
||||
int64_t store_reg[8];
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
#endif
|
||||
VP8_COMMON *cm = &cpi->common;
|
||||
#endif
|
||||
#endif
|
||||
struct vpx_usec_timer timer;
|
||||
int res = 0;
|
||||
|
||||
@@ -4850,6 +4848,7 @@ int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_C
|
||||
if(vp8_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
|
||||
frame_flags, cpi->active_map_enabled ? cpi->active_map : NULL))
|
||||
res = -1;
|
||||
cm->clr_type = sd->clrtype;
|
||||
vpx_usec_timer_mark(&timer);
|
||||
cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
|
||||
|
||||
@@ -4934,7 +4933,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
|
||||
cpi->frames_till_gf_update_due);
|
||||
force_src_buffer = &cpi->alt_ref_buffer;
|
||||
}
|
||||
cpi->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due;
|
||||
cm->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due;
|
||||
cm->refresh_alt_ref_frame = 1;
|
||||
cm->refresh_golden_frame = 0;
|
||||
cm->refresh_last_frame = 0;
|
||||
@@ -5039,7 +5038,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
|
||||
if (this_duration)
|
||||
{
|
||||
if (step)
|
||||
cpi->ref_framerate = 10000000.0 / this_duration;
|
||||
cpi->ref_frame_rate = 10000000.0 / this_duration;
|
||||
else
|
||||
{
|
||||
double avg_duration, interval;
|
||||
@@ -5053,11 +5052,11 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
|
||||
if(interval > 10000000.0)
|
||||
interval = 10000000;
|
||||
|
||||
avg_duration = 10000000.0 / cpi->ref_framerate;
|
||||
avg_duration = 10000000.0 / cpi->ref_frame_rate;
|
||||
avg_duration *= (interval - avg_duration + this_duration);
|
||||
avg_duration /= interval;
|
||||
|
||||
cpi->ref_framerate = 10000000.0 / avg_duration;
|
||||
cpi->ref_frame_rate = 10000000.0 / avg_duration;
|
||||
}
|
||||
|
||||
if (cpi->oxcf.number_of_layers > 1)
|
||||
@@ -5068,12 +5067,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
|
||||
for (i=0; i<cpi->oxcf.number_of_layers; i++)
|
||||
{
|
||||
LAYER_CONTEXT *lc = &cpi->layer_context[i];
|
||||
lc->framerate = cpi->ref_framerate /
|
||||
cpi->oxcf.rate_decimator[i];
|
||||
lc->frame_rate = cpi->ref_frame_rate /
|
||||
cpi->oxcf.rate_decimator[i];
|
||||
}
|
||||
}
|
||||
else
|
||||
vp8_new_framerate(cpi, cpi->ref_framerate);
|
||||
vp8_new_frame_rate(cpi, cpi->ref_frame_rate);
|
||||
}
|
||||
|
||||
cpi->last_time_stamp_seen = cpi->source->ts_start;
|
||||
@@ -5090,7 +5089,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
|
||||
layer = cpi->oxcf.layer_id[
|
||||
cpi->temporal_pattern_counter % cpi->oxcf.periodicity];
|
||||
restore_layer_context (cpi, layer);
|
||||
vp8_new_framerate(cpi, cpi->layer_context[layer].framerate);
|
||||
vp8_new_frame_rate (cpi, cpi->layer_context[layer].frame_rate);
|
||||
}
|
||||
|
||||
if (cpi->compressor_speed == 2)
|
||||
|
@@ -232,7 +232,7 @@ enum
|
||||
typedef struct
|
||||
{
|
||||
/* Layer configuration */
|
||||
double framerate;
|
||||
double frame_rate;
|
||||
int target_bandwidth;
|
||||
|
||||
/* Layer specific coding parameters */
|
||||
@@ -320,7 +320,6 @@ typedef struct VP8_COMP
|
||||
YV12_BUFFER_CONFIG scaled_source;
|
||||
YV12_BUFFER_CONFIG *last_frame_unscaled_source;
|
||||
|
||||
unsigned int frames_till_alt_ref_frame;
|
||||
/* frame in src_buffers has been identified to be encoded as an alt ref */
|
||||
int source_alt_ref_pending;
|
||||
/* an alt ref frame has been encoded and is usable */
|
||||
@@ -370,7 +369,6 @@ typedef struct VP8_COMP
|
||||
double key_frame_rate_correction_factor;
|
||||
double gf_rate_correction_factor;
|
||||
|
||||
unsigned int frames_since_golden;
|
||||
/* Count down till next GF */
|
||||
int frames_till_gf_update_due;
|
||||
|
||||
@@ -403,7 +401,7 @@ typedef struct VP8_COMP
|
||||
/* Minimum allocation that should be used for any frame */
|
||||
int min_frame_bandwidth;
|
||||
int inter_frame_target;
|
||||
double output_framerate;
|
||||
double output_frame_rate;
|
||||
int64_t last_time_stamp_seen;
|
||||
int64_t last_end_time_stamp_seen;
|
||||
int64_t first_time_stamp_ever;
|
||||
@@ -417,8 +415,8 @@ typedef struct VP8_COMP
|
||||
|
||||
int buffered_mode;
|
||||
|
||||
double framerate;
|
||||
double ref_framerate;
|
||||
double frame_rate;
|
||||
double ref_frame_rate;
|
||||
int64_t buffer_level;
|
||||
int64_t bits_off_target;
|
||||
|
||||
|
@@ -313,7 +313,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
/* Get baseline error score */
|
||||
|
||||
/* Copy the unfiltered / processed recon buffer to the new buffer */
|
||||
vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
|
||||
vp8cx_set_alt_lf_level(cpi, filt_mid);
|
||||
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_mid);
|
||||
@@ -339,7 +339,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
if(ss_err[filt_low] == 0)
|
||||
{
|
||||
/* Get Low filter error score */
|
||||
vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
vp8cx_set_alt_lf_level(cpi, filt_low);
|
||||
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low);
|
||||
|
||||
@@ -367,7 +367,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
{
|
||||
if(ss_err[filt_high] == 0)
|
||||
{
|
||||
vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
|
||||
vp8cx_set_alt_lf_level(cpi, filt_high);
|
||||
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high);
|
||||
|
||||
|
@@ -234,7 +234,7 @@ void vp8_save_coding_context(VP8_COMP *cpi)
|
||||
cc->frames_since_key = cpi->frames_since_key;
|
||||
cc->filter_level = cpi->common.filter_level;
|
||||
cc->frames_till_gf_update_due = cpi->frames_till_gf_update_due;
|
||||
cc->frames_since_golden = cpi->frames_since_golden;
|
||||
cc->frames_since_golden = cpi->common.frames_since_golden;
|
||||
|
||||
vp8_copy(cc->mvc, cpi->common.fc.mvc);
|
||||
vp8_copy(cc->mvcosts, cpi->rd_costs.mvcosts);
|
||||
@@ -271,7 +271,7 @@ void vp8_restore_coding_context(VP8_COMP *cpi)
|
||||
cpi->frames_since_key = cc->frames_since_key;
|
||||
cpi->common.filter_level = cc->filter_level;
|
||||
cpi->frames_till_gf_update_due = cc->frames_till_gf_update_due;
|
||||
cpi->frames_since_golden = cc->frames_since_golden;
|
||||
cpi->common.frames_since_golden = cc->frames_since_golden;
|
||||
|
||||
vp8_copy(cpi->common.fc.mvc, cc->mvc);
|
||||
|
||||
@@ -388,7 +388,7 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
|
||||
int initial_boost = 32; /* |3.0 * per_frame_bandwidth| */
|
||||
/* Boost depends somewhat on frame rate: only used for 1 layer case. */
|
||||
if (cpi->oxcf.number_of_layers == 1) {
|
||||
kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
|
||||
kf_boost = MAX(initial_boost, (int)(2 * cpi->output_frame_rate - 16));
|
||||
}
|
||||
else {
|
||||
/* Initial factor: set target size to: |3.0 * per_frame_bandwidth|. */
|
||||
@@ -399,9 +399,9 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
|
||||
kf_boost = kf_boost * kf_boost_qadjustment[Q] / 100;
|
||||
|
||||
/* frame separation adjustment ( down) */
|
||||
if (cpi->frames_since_key < cpi->output_framerate / 2)
|
||||
if (cpi->frames_since_key < cpi->output_frame_rate / 2)
|
||||
kf_boost = (int)(kf_boost
|
||||
* cpi->frames_since_key / (cpi->output_framerate / 2));
|
||||
* cpi->frames_since_key / (cpi->output_frame_rate / 2));
|
||||
|
||||
/* Minimal target size is |2* per_frame_bandwidth|. */
|
||||
if (kf_boost < 16)
|
||||
@@ -715,7 +715,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
|
||||
if (Adjustment > (cpi->this_frame_target - min_frame_target))
|
||||
Adjustment = (cpi->this_frame_target - min_frame_target);
|
||||
|
||||
if (cpi->frames_since_golden == (cpi->current_gf_interval >> 1))
|
||||
if (cpi->common.frames_since_golden == (cpi->current_gf_interval >> 1))
|
||||
cpi->this_frame_target += ((cpi->current_gf_interval - 1) * Adjustment);
|
||||
else
|
||||
cpi->this_frame_target -= Adjustment;
|
||||
@@ -1360,7 +1360,7 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi)
|
||||
* whichever is smaller.
|
||||
*/
|
||||
int key_freq = cpi->oxcf.key_freq>0 ? cpi->oxcf.key_freq : 1;
|
||||
av_key_frame_frequency = 1 + (int)cpi->output_framerate * 2;
|
||||
av_key_frame_frequency = 1 + (int)cpi->output_frame_rate * 2;
|
||||
|
||||
if (cpi->oxcf.auto_key && av_key_frame_frequency > key_freq)
|
||||
av_key_frame_frequency = key_freq;
|
||||
|
@@ -341,7 +341,7 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue)
|
||||
|
||||
void vp8_auto_select_speed(VP8_COMP *cpi)
|
||||
{
|
||||
int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
|
||||
int milliseconds_for_compress = (int)(1000000 / cpi->frame_rate);
|
||||
|
||||
milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
|
||||
|
||||
|
@@ -66,6 +66,7 @@ VP8_COMMON_SRCS-yes += common/setupintrarecon.c
|
||||
VP8_COMMON_SRCS-yes += common/swapyv12buffer.c
|
||||
VP8_COMMON_SRCS-yes += common/variance_c.c
|
||||
VP8_COMMON_SRCS-yes += common/variance.h
|
||||
VP8_COMMON_SRCS-yes += common/vp8_asm_com_offsets.c
|
||||
VP8_COMMON_SRCS-yes += common/vp8_entropymodedata.h
|
||||
|
||||
|
||||
@@ -191,4 +192,7 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance8x8_neon$(A
|
||||
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
|
||||
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
|
||||
|
||||
$(eval $(call asm_offsets_template,\
|
||||
vp8_asm_com_offsets.asm, $(VP8_PREFIX)common/vp8_asm_com_offsets.c))
|
||||
|
||||
$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh))
|
||||
|
@@ -153,7 +153,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
|
||||
#else
|
||||
RANGE_CHECK_HI(cfg, g_lag_in_frames, 25);
|
||||
#endif
|
||||
RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_Q);
|
||||
RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CQ);
|
||||
RANGE_CHECK_HI(cfg, rc_undershoot_pct, 1000);
|
||||
RANGE_CHECK_HI(cfg, rc_overshoot_pct, 1000);
|
||||
RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);
|
||||
@@ -204,7 +204,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
|
||||
RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6);
|
||||
RANGE_CHECK(vp8_cfg, arnr_type, 1, 3);
|
||||
RANGE_CHECK(vp8_cfg, cq_level, 0, 63);
|
||||
if (finalize && (cfg->rc_end_usage == VPX_CQ || cfg->rc_end_usage == VPX_Q))
|
||||
if(finalize && cfg->rc_end_usage == VPX_CQ)
|
||||
RANGE_CHECK(vp8_cfg, cq_level,
|
||||
cfg->rc_min_quantizer, cfg->rc_max_quantizer);
|
||||
|
||||
@@ -327,14 +327,17 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
|
||||
oxcf->resample_up_water_mark = cfg.rc_resize_up_thresh;
|
||||
oxcf->resample_down_water_mark = cfg.rc_resize_down_thresh;
|
||||
|
||||
if (cfg.rc_end_usage == VPX_VBR) {
|
||||
oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;
|
||||
} else if (cfg.rc_end_usage == VPX_CBR) {
|
||||
oxcf->end_usage = USAGE_STREAM_FROM_SERVER;
|
||||
} else if (cfg.rc_end_usage == VPX_CQ) {
|
||||
oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
|
||||
} else if (cfg.rc_end_usage == VPX_Q) {
|
||||
oxcf->end_usage = USAGE_CONSTANT_QUALITY;
|
||||
if (cfg.rc_end_usage == VPX_VBR)
|
||||
{
|
||||
oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;
|
||||
}
|
||||
else if (cfg.rc_end_usage == VPX_CBR)
|
||||
{
|
||||
oxcf->end_usage = USAGE_STREAM_FROM_SERVER;
|
||||
}
|
||||
else if (cfg.rc_end_usage == VPX_CQ)
|
||||
{
|
||||
oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
|
||||
}
|
||||
|
||||
oxcf->target_bandwidth = cfg.rc_target_bitrate;
|
||||
@@ -692,6 +695,7 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
|
||||
yv12->uv_stride = img->stride[VPX_PLANE_U];
|
||||
|
||||
yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
|
||||
yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12);
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -1075,7 +1079,11 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx)
|
||||
ctx->preview_img.planes[VPX_PLANE_U] = sd.u_buffer;
|
||||
ctx->preview_img.planes[VPX_PLANE_V] = sd.v_buffer;
|
||||
|
||||
ctx->preview_img.fmt = VPX_IMG_FMT_I420;
|
||||
if (sd.clrtype == REG_YUV)
|
||||
ctx->preview_img.fmt = VPX_IMG_FMT_I420;
|
||||
else
|
||||
ctx->preview_img.fmt = VPX_IMG_FMT_VPXI420;
|
||||
|
||||
ctx->preview_img.x_chroma_shift = 1;
|
||||
ctx->preview_img.y_chroma_shift = 1;
|
||||
|
||||
@@ -1269,7 +1277,7 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
|
||||
1, /* g_delete_first_pass_file */
|
||||
"vp8.fpf" /* first pass filename */
|
||||
#endif
|
||||
VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */
|
||||
|
||||
1, /* ts_number_layers */
|
||||
{0}, /* ts_target_bitrate */
|
||||
{0}, /* ts_rate_decimator */
|
||||
|
@@ -41,6 +41,15 @@ typedef enum
|
||||
|
||||
static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned int id;
|
||||
unsigned long sz;
|
||||
unsigned int align;
|
||||
unsigned int flags;
|
||||
unsigned long(*calc_sz)(const vpx_codec_dec_cfg_t *, vpx_codec_flags_t);
|
||||
} mem_req_t;
|
||||
|
||||
static const mem_req_t vp8_mem_req_segs[] =
|
||||
{
|
||||
{VP8_SEG_ALG_PRIV, 0, 8, VPX_CODEC_MEM_ZERO, vp8_priv_sz},
|
||||
@@ -84,6 +93,65 @@ static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_
|
||||
return sizeof(vpx_codec_alg_priv_t);
|
||||
}
|
||||
|
||||
|
||||
static void vp8_mmap_dtor(vpx_codec_mmap_t *mmap)
|
||||
{
|
||||
free(mmap->priv);
|
||||
}
|
||||
|
||||
static vpx_codec_err_t vp8_mmap_alloc(vpx_codec_mmap_t *mmap)
|
||||
{
|
||||
vpx_codec_err_t res;
|
||||
unsigned int align;
|
||||
|
||||
align = mmap->align ? mmap->align - 1 : 0;
|
||||
|
||||
if (mmap->flags & VPX_CODEC_MEM_ZERO)
|
||||
mmap->priv = calloc(1, mmap->sz + align);
|
||||
else
|
||||
mmap->priv = malloc(mmap->sz + align);
|
||||
|
||||
res = (mmap->priv) ? VPX_CODEC_OK : VPX_CODEC_MEM_ERROR;
|
||||
mmap->base = (void *)((((uintptr_t)mmap->priv) + align) & ~(uintptr_t)align);
|
||||
mmap->dtor = vp8_mmap_dtor;
|
||||
return res;
|
||||
}
|
||||
|
||||
static vpx_codec_err_t vp8_validate_mmaps(const vp8_stream_info_t *si,
|
||||
const vpx_codec_mmap_t *mmaps,
|
||||
vpx_codec_flags_t init_flags)
|
||||
{
|
||||
int i;
|
||||
vpx_codec_err_t res = VPX_CODEC_OK;
|
||||
|
||||
for (i = 0; i < NELEMENTS(vp8_mem_req_segs) - 1; i++)
|
||||
{
|
||||
/* Ensure the segment has been allocated */
|
||||
if (!mmaps[i].base)
|
||||
{
|
||||
res = VPX_CODEC_MEM_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Verify variable size segment is big enough for the current si. */
|
||||
if (vp8_mem_req_segs[i].calc_sz)
|
||||
{
|
||||
vpx_codec_dec_cfg_t cfg;
|
||||
|
||||
cfg.w = si->w;
|
||||
cfg.h = si->h;
|
||||
|
||||
if (mmaps[i].sz < vp8_mem_req_segs[i].calc_sz(&cfg, init_flags))
|
||||
{
|
||||
res = VPX_CODEC_MEM_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void vp8_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap)
|
||||
{
|
||||
int i;
|
||||
@@ -110,6 +178,16 @@ static void vp8_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap)
|
||||
}
|
||||
}
|
||||
|
||||
static void *mmap_lkup(vpx_codec_alg_priv_t *ctx, unsigned int id)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NELEMENTS(ctx->mmaps); i++)
|
||||
if (ctx->mmaps[i].id == id)
|
||||
return ctx->mmaps[i].base;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
static void vp8_finalize_mmaps(vpx_codec_alg_priv_t *ctx)
|
||||
{
|
||||
/* nothing to clean up */
|
||||
@@ -136,7 +214,7 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
|
||||
mmap.align = vp8_mem_req_segs[0].align;
|
||||
mmap.flags = vp8_mem_req_segs[0].flags;
|
||||
|
||||
res = vpx_mmap_alloc(&mmap);
|
||||
res = vp8_mmap_alloc(&mmap);
|
||||
if (res != VPX_CODEC_OK) return res;
|
||||
|
||||
vp8_init_ctx(ctx, &mmap);
|
||||
@@ -288,7 +366,8 @@ static void yuvconfig2image(vpx_image_t *img,
|
||||
* the Y, U, and V planes, nor other alignment adjustments that
|
||||
* might be representable by a YV12_BUFFER_CONFIG, so we just
|
||||
* initialize all the fields.*/
|
||||
img->fmt = VPX_IMG_FMT_I420;
|
||||
img->fmt = yv12->clrtype == REG_YUV ?
|
||||
VPX_IMG_FMT_I420 : VPX_IMG_FMT_VPXI420;
|
||||
img->w = yv12->y_stride;
|
||||
img->h = (yv12->y_height + 2 * VP8BORDERINPIXELS + 15) & ~15;
|
||||
img->d_w = yv12->y_width;
|
||||
@@ -409,7 +488,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
|
||||
ctx->mmaps[i].sz = vp8_mem_req_segs[i].calc_sz(&cfg,
|
||||
ctx->base.init_flags);
|
||||
|
||||
res = vpx_mmap_alloc(&ctx->mmaps[i]);
|
||||
res = vp8_mmap_alloc(&ctx->mmaps[i]);
|
||||
}
|
||||
|
||||
if (!res)
|
||||
@@ -421,9 +500,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
|
||||
/* Initialize the decoder instance on the first frame*/
|
||||
if (!res && !ctx->decoder_init)
|
||||
{
|
||||
res = vpx_validate_mmaps(&ctx->si, ctx->mmaps,
|
||||
vp8_mem_req_segs, NELEMENTS(vp8_mem_req_segs),
|
||||
ctx->base.init_flags);
|
||||
res = vp8_validate_mmaps(&ctx->si, ctx->mmaps, ctx->base.init_flags);
|
||||
|
||||
if (!res)
|
||||
{
|
||||
@@ -720,6 +797,8 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
|
||||
yv12->uv_stride = img->stride[VPX_PLANE_U];
|
||||
|
||||
yv12->border = (img->stride[VPX_PLANE_Y] - img->d_w) / 2;
|
||||
yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@@ -35,5 +35,9 @@ VP8_DX_SRCS-yes += decoder/onyxd_int.h
|
||||
VP8_DX_SRCS-yes += decoder/treereader.h
|
||||
VP8_DX_SRCS-yes += decoder/onyxd_if.c
|
||||
VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c
|
||||
VP8_DX_SRCS-yes += decoder/vp8_asm_dec_offsets.c
|
||||
|
||||
VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes))
|
||||
|
||||
$(eval $(call asm_offsets_template,\
|
||||
vp8_asm_dec_offsets.asm, $(VP8_PREFIX)decoder/vp8_asm_dec_offsets.c))
|
||||
|
@@ -1,116 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_convolve_avg_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
|vp9_convolve_avg_neon| PROC
|
||||
push {r4-r6, lr}
|
||||
ldrd r4, r5, [sp, #32]
|
||||
mov r6, r2
|
||||
|
||||
cmp r4, #32
|
||||
bgt avg64
|
||||
beq avg32
|
||||
cmp r4, #8
|
||||
bgt avg16
|
||||
beq avg8
|
||||
b avg4
|
||||
|
||||
avg64
|
||||
sub lr, r1, #32
|
||||
sub r4, r3, #32
|
||||
avg64_h
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q0-q1}, [r0]!
|
||||
vld1.8 {q2-q3}, [r0], lr
|
||||
pld [r2, r3]
|
||||
vld1.8 {q8-q9}, [r6@128]!
|
||||
vld1.8 {q10-q11}, [r6@128], r4
|
||||
vrhadd.u8 q0, q0, q8
|
||||
vrhadd.u8 q1, q1, q9
|
||||
vrhadd.u8 q2, q2, q10
|
||||
vrhadd.u8 q3, q3, q11
|
||||
vst1.8 {q0-q1}, [r2@128]!
|
||||
vst1.8 {q2-q3}, [r2@128], r4
|
||||
subs r5, r5, #1
|
||||
bgt avg64_h
|
||||
pop {r4-r6, pc}
|
||||
|
||||
avg32
|
||||
vld1.8 {q0-q1}, [r0], r1
|
||||
vld1.8 {q2-q3}, [r0], r1
|
||||
vld1.8 {q8-q9}, [r6@128], r3
|
||||
vld1.8 {q10-q11}, [r6@128], r3
|
||||
pld [r0]
|
||||
vrhadd.u8 q0, q0, q8
|
||||
pld [r0, r1]
|
||||
vrhadd.u8 q1, q1, q9
|
||||
pld [r6]
|
||||
vrhadd.u8 q2, q2, q10
|
||||
pld [r6, r3]
|
||||
vrhadd.u8 q3, q3, q11
|
||||
vst1.8 {q0-q1}, [r2@128], r3
|
||||
vst1.8 {q2-q3}, [r2@128], r3
|
||||
subs r5, r5, #2
|
||||
bgt avg32
|
||||
pop {r4-r6, pc}
|
||||
|
||||
avg16
|
||||
vld1.8 {q0}, [r0], r1
|
||||
vld1.8 {q1}, [r0], r1
|
||||
vld1.8 {q2}, [r6@128], r3
|
||||
vld1.8 {q3}, [r6@128], r3
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
vrhadd.u8 q0, q0, q2
|
||||
pld [r6]
|
||||
pld [r6, r3]
|
||||
vrhadd.u8 q1, q1, q3
|
||||
vst1.8 {q0}, [r2@128], r3
|
||||
vst1.8 {q1}, [r2@128], r3
|
||||
subs r5, r5, #2
|
||||
bgt avg16
|
||||
pop {r4-r6, pc}
|
||||
|
||||
avg8
|
||||
vld1.8 {d0}, [r0], r1
|
||||
vld1.8 {d1}, [r0], r1
|
||||
vld1.8 {d2}, [r6@64], r3
|
||||
vld1.8 {d3}, [r6@64], r3
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
vrhadd.u8 q0, q0, q1
|
||||
pld [r6]
|
||||
pld [r6, r3]
|
||||
vst1.8 {d0}, [r2@64], r3
|
||||
vst1.8 {d1}, [r2@64], r3
|
||||
subs r5, r5, #2
|
||||
bgt avg8
|
||||
pop {r4-r6, pc}
|
||||
|
||||
avg4
|
||||
vld1.32 {d0[0]}, [r0], r1
|
||||
vld1.32 {d0[1]}, [r0], r1
|
||||
vld1.32 {d2[0]}, [r6@32], r3
|
||||
vld1.32 {d2[1]}, [r6@32], r3
|
||||
vrhadd.u8 d0, d0, d2
|
||||
vst1.32 {d0[0]}, [r2@32], r3
|
||||
vst1.32 {d0[1]}, [r2@32], r3
|
||||
subs r5, r5, #2
|
||||
bgt avg4
|
||||
pop {r4-r6, pc}
|
||||
ENDP
|
||||
|
||||
END
|
@@ -1,302 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
; These functions are only valid when:
|
||||
; x_step_q4 == 16
|
||||
; w%4 == 0
|
||||
; h%4 == 0
|
||||
; taps == 8
|
||||
; VP9_FILTER_WEIGHT == 128
|
||||
; VP9_FILTER_SHIFT == 7
|
||||
|
||||
EXPORT |vp9_convolve8_avg_horiz_neon|
|
||||
EXPORT |vp9_convolve8_avg_vert_neon|
|
||||
IMPORT |vp9_convolve8_avg_horiz_c|
|
||||
IMPORT |vp9_convolve8_avg_vert_c|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; Multiply and accumulate by q0
|
||||
MACRO
|
||||
MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7
|
||||
vmull.s16 $dst, $src0, d0[0]
|
||||
vmlal.s16 $dst, $src1, d0[1]
|
||||
vmlal.s16 $dst, $src2, d0[2]
|
||||
vmlal.s16 $dst, $src3, d0[3]
|
||||
vmlal.s16 $dst, $src4, d1[0]
|
||||
vmlal.s16 $dst, $src5, d1[1]
|
||||
vmlal.s16 $dst, $src6, d1[2]
|
||||
vmlal.s16 $dst, $src7, d1[3]
|
||||
MEND
|
||||
|
||||
; r0 const uint8_t *src
|
||||
; r1 int src_stride
|
||||
; r2 uint8_t *dst
|
||||
; r3 int dst_stride
|
||||
; sp[]const int16_t *filter_x
|
||||
; sp[]int x_step_q4
|
||||
; sp[]const int16_t *filter_y ; unused
|
||||
; sp[]int y_step_q4 ; unused
|
||||
; sp[]int w
|
||||
; sp[]int h
|
||||
|
||||
|vp9_convolve8_avg_horiz_neon| PROC
|
||||
ldr r12, [sp, #4] ; x_step_q4
|
||||
cmp r12, #16
|
||||
bne vp9_convolve8_avg_horiz_c
|
||||
|
||||
push {r4-r10, lr}
|
||||
|
||||
sub r0, r0, #3 ; adjust for taps
|
||||
|
||||
ldr r5, [sp, #32] ; filter_x
|
||||
ldr r6, [sp, #48] ; w
|
||||
ldr r7, [sp, #52] ; h
|
||||
|
||||
vld1.s16 {q0}, [r5] ; filter_x
|
||||
|
||||
sub r8, r1, r1, lsl #2 ; -src_stride * 3
|
||||
add r8, r8, #4 ; -src_stride * 3 + 4
|
||||
|
||||
sub r4, r3, r3, lsl #2 ; -dst_stride * 3
|
||||
add r4, r4, #4 ; -dst_stride * 3 + 4
|
||||
|
||||
rsb r9, r6, r1, lsl #2 ; reset src for outer loop
|
||||
sub r9, r9, #7
|
||||
rsb r12, r6, r3, lsl #2 ; reset dst for outer loop
|
||||
|
||||
mov r10, r6 ; w loop counter
|
||||
|
||||
loop_horiz_v
|
||||
vld1.8 {d24}, [r0], r1
|
||||
vld1.8 {d25}, [r0], r1
|
||||
vld1.8 {d26}, [r0], r1
|
||||
vld1.8 {d27}, [r0], r8
|
||||
|
||||
vtrn.16 q12, q13
|
||||
vtrn.8 d24, d25
|
||||
vtrn.8 d26, d27
|
||||
|
||||
pld [r0, r1, lsl #2]
|
||||
|
||||
vmovl.u8 q8, d24
|
||||
vmovl.u8 q9, d25
|
||||
vmovl.u8 q10, d26
|
||||
vmovl.u8 q11, d27
|
||||
|
||||
; save a few instructions in the inner loop
|
||||
vswp d17, d18
|
||||
vmov d23, d21
|
||||
|
||||
add r0, r0, #3
|
||||
|
||||
loop_horiz
|
||||
add r5, r0, #64
|
||||
|
||||
vld1.32 {d28[]}, [r0], r1
|
||||
vld1.32 {d29[]}, [r0], r1
|
||||
vld1.32 {d31[]}, [r0], r1
|
||||
vld1.32 {d30[]}, [r0], r8
|
||||
|
||||
pld [r5]
|
||||
|
||||
vtrn.16 d28, d31
|
||||
vtrn.16 d29, d30
|
||||
vtrn.8 d28, d29
|
||||
vtrn.8 d31, d30
|
||||
|
||||
pld [r5, r1]
|
||||
|
||||
; extract to s16
|
||||
vtrn.32 q14, q15
|
||||
vmovl.u8 q12, d28
|
||||
vmovl.u8 q13, d29
|
||||
|
||||
pld [r5, r1, lsl #1]
|
||||
|
||||
; slightly out of order load to match the existing data
|
||||
vld1.u32 {d6[0]}, [r2], r3
|
||||
vld1.u32 {d7[0]}, [r2], r3
|
||||
vld1.u32 {d6[1]}, [r2], r3
|
||||
vld1.u32 {d7[1]}, [r2], r3
|
||||
|
||||
sub r2, r2, r3, lsl #2 ; reset for store
|
||||
|
||||
; src[] * filter_x
|
||||
MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24
|
||||
MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26
|
||||
MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27
|
||||
MULTIPLY_BY_Q0 q15, d22, d18, d19, d23, d24, d26, d27, d25
|
||||
|
||||
pld [r5, -r8]
|
||||
|
||||
; += 64 >> 7
|
||||
vqrshrun.s32 d2, q1, #7
|
||||
vqrshrun.s32 d3, q2, #7
|
||||
vqrshrun.s32 d4, q14, #7
|
||||
vqrshrun.s32 d5, q15, #7
|
||||
|
||||
; saturate
|
||||
vqmovn.u16 d2, q1
|
||||
vqmovn.u16 d3, q2
|
||||
|
||||
; transpose
|
||||
vtrn.16 d2, d3
|
||||
vtrn.32 d2, d3
|
||||
vtrn.8 d2, d3
|
||||
|
||||
; average the new value and the dst value
|
||||
vrhadd.u8 q1, q1, q3
|
||||
|
||||
vst1.u32 {d2[0]}, [r2@32], r3
|
||||
vst1.u32 {d3[0]}, [r2@32], r3
|
||||
vst1.u32 {d2[1]}, [r2@32], r3
|
||||
vst1.u32 {d3[1]}, [r2@32], r4
|
||||
|
||||
vmov q8, q9
|
||||
vmov d20, d23
|
||||
vmov q11, q12
|
||||
vmov q9, q13
|
||||
|
||||
subs r6, r6, #4 ; w -= 4
|
||||
bgt loop_horiz
|
||||
|
||||
; outer loop
|
||||
mov r6, r10 ; restore w counter
|
||||
add r0, r0, r9 ; src += src_stride * 4 - w
|
||||
add r2, r2, r12 ; dst += dst_stride * 4 - w
|
||||
subs r7, r7, #4 ; h -= 4
|
||||
bgt loop_horiz_v
|
||||
|
||||
pop {r4-r10, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
|vp9_convolve8_avg_vert_neon| PROC
|
||||
ldr r12, [sp, #12]
|
||||
cmp r12, #16
|
||||
bne vp9_convolve8_avg_vert_c
|
||||
|
||||
push {r4-r8, lr}
|
||||
|
||||
; adjust for taps
|
||||
sub r0, r0, r1
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
ldr r4, [sp, #32] ; filter_y
|
||||
ldr r6, [sp, #40] ; w
|
||||
ldr lr, [sp, #44] ; h
|
||||
|
||||
vld1.s16 {q0}, [r4] ; filter_y
|
||||
|
||||
lsl r1, r1, #1
|
||||
lsl r3, r3, #1
|
||||
|
||||
loop_vert_h
|
||||
mov r4, r0
|
||||
add r7, r0, r1, asr #1
|
||||
mov r5, r2
|
||||
add r8, r2, r3, asr #1
|
||||
mov r12, lr ; h loop counter
|
||||
|
||||
vld1.u32 {d16[0]}, [r4], r1
|
||||
vld1.u32 {d16[1]}, [r7], r1
|
||||
vld1.u32 {d18[0]}, [r4], r1
|
||||
vld1.u32 {d18[1]}, [r7], r1
|
||||
vld1.u32 {d20[0]}, [r4], r1
|
||||
vld1.u32 {d20[1]}, [r7], r1
|
||||
vld1.u32 {d22[0]}, [r4], r1
|
||||
|
||||
vmovl.u8 q8, d16
|
||||
vmovl.u8 q9, d18
|
||||
vmovl.u8 q10, d20
|
||||
vmovl.u8 q11, d22
|
||||
|
||||
loop_vert
|
||||
; always process a 4x4 block at a time
|
||||
vld1.u32 {d24[0]}, [r7], r1
|
||||
vld1.u32 {d26[0]}, [r4], r1
|
||||
vld1.u32 {d26[1]}, [r7], r1
|
||||
vld1.u32 {d24[1]}, [r4], r1
|
||||
|
||||
; extract to s16
|
||||
vmovl.u8 q12, d24
|
||||
vmovl.u8 q13, d26
|
||||
|
||||
vld1.u32 {d6[0]}, [r5@32], r3
|
||||
vld1.u32 {d6[1]}, [r8@32], r3
|
||||
vld1.u32 {d7[0]}, [r5@32], r3
|
||||
vld1.u32 {d7[1]}, [r8@32], r3
|
||||
|
||||
pld [r7]
|
||||
pld [r4]
|
||||
|
||||
; src[] * filter_y
|
||||
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24
|
||||
|
||||
pld [r7, r1]
|
||||
pld [r4, r1]
|
||||
|
||||
MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d24, d26
|
||||
|
||||
pld [r5]
|
||||
pld [r8]
|
||||
|
||||
MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d24, d26, d27
|
||||
|
||||
pld [r5, r3]
|
||||
pld [r8, r3]
|
||||
|
||||
MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d24, d26, d27, d25
|
||||
|
||||
; += 64 >> 7
|
||||
vqrshrun.s32 d2, q1, #7
|
||||
vqrshrun.s32 d3, q2, #7
|
||||
vqrshrun.s32 d4, q14, #7
|
||||
vqrshrun.s32 d5, q15, #7
|
||||
|
||||
; saturate
|
||||
vqmovn.u16 d2, q1
|
||||
vqmovn.u16 d3, q2
|
||||
|
||||
; average the new value and the dst value
|
||||
vrhadd.u8 q1, q1, q3
|
||||
|
||||
sub r5, r5, r3, lsl #1 ; reset for store
|
||||
sub r8, r8, r3, lsl #1
|
||||
|
||||
vst1.u32 {d2[0]}, [r5@32], r3
|
||||
vst1.u32 {d2[1]}, [r8@32], r3
|
||||
vst1.u32 {d3[0]}, [r5@32], r3
|
||||
vst1.u32 {d3[1]}, [r8@32], r3
|
||||
|
||||
vmov q8, q10
|
||||
vmov d18, d22
|
||||
vmov d19, d24
|
||||
vmov q10, q13
|
||||
vmov d22, d25
|
||||
|
||||
subs r12, r12, #4 ; h -= 4
|
||||
bgt loop_vert
|
||||
|
||||
; outer loop
|
||||
add r0, r0, #4
|
||||
add r2, r2, #4
|
||||
subs r6, r6, #4 ; w -= 4
|
||||
bgt loop_vert_h
|
||||
|
||||
pop {r4-r8, pc}
|
||||
|
||||
ENDP
|
||||
END
|
@@ -1,280 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
; These functions are only valid when:
|
||||
; x_step_q4 == 16
|
||||
; w%4 == 0
|
||||
; h%4 == 0
|
||||
; taps == 8
|
||||
; VP9_FILTER_WEIGHT == 128
|
||||
; VP9_FILTER_SHIFT == 7
|
||||
|
||||
EXPORT |vp9_convolve8_horiz_neon|
|
||||
EXPORT |vp9_convolve8_vert_neon|
|
||||
IMPORT |vp9_convolve8_horiz_c|
|
||||
IMPORT |vp9_convolve8_vert_c|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; Multiply and accumulate by q0
|
||||
MACRO
|
||||
MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7
|
||||
vmull.s16 $dst, $src0, d0[0]
|
||||
vmlal.s16 $dst, $src1, d0[1]
|
||||
vmlal.s16 $dst, $src2, d0[2]
|
||||
vmlal.s16 $dst, $src3, d0[3]
|
||||
vmlal.s16 $dst, $src4, d1[0]
|
||||
vmlal.s16 $dst, $src5, d1[1]
|
||||
vmlal.s16 $dst, $src6, d1[2]
|
||||
vmlal.s16 $dst, $src7, d1[3]
|
||||
MEND
|
||||
|
||||
; r0 const uint8_t *src
|
||||
; r1 int src_stride
|
||||
; r2 uint8_t *dst
|
||||
; r3 int dst_stride
|
||||
; sp[]const int16_t *filter_x
|
||||
; sp[]int x_step_q4
|
||||
; sp[]const int16_t *filter_y ; unused
|
||||
; sp[]int y_step_q4 ; unused
|
||||
; sp[]int w
|
||||
; sp[]int h
|
||||
|
||||
|vp9_convolve8_horiz_neon| PROC
|
||||
ldr r12, [sp, #4] ; x_step_q4
|
||||
cmp r12, #16
|
||||
bne vp9_convolve8_horiz_c
|
||||
|
||||
push {r4-r10, lr}
|
||||
|
||||
sub r0, r0, #3 ; adjust for taps
|
||||
|
||||
ldr r5, [sp, #32] ; filter_x
|
||||
ldr r6, [sp, #48] ; w
|
||||
ldr r7, [sp, #52] ; h
|
||||
|
||||
vld1.s16 {q0}, [r5] ; filter_x
|
||||
|
||||
sub r8, r1, r1, lsl #2 ; -src_stride * 3
|
||||
add r8, r8, #4 ; -src_stride * 3 + 4
|
||||
|
||||
sub r4, r3, r3, lsl #2 ; -dst_stride * 3
|
||||
add r4, r4, #4 ; -dst_stride * 3 + 4
|
||||
|
||||
rsb r9, r6, r1, lsl #2 ; reset src for outer loop
|
||||
sub r9, r9, #7
|
||||
rsb r12, r6, r3, lsl #2 ; reset dst for outer loop
|
||||
|
||||
mov r10, r6 ; w loop counter
|
||||
|
||||
loop_horiz_v
|
||||
vld1.8 {d24}, [r0], r1
|
||||
vld1.8 {d25}, [r0], r1
|
||||
vld1.8 {d26}, [r0], r1
|
||||
vld1.8 {d27}, [r0], r8
|
||||
|
||||
vtrn.16 q12, q13
|
||||
vtrn.8 d24, d25
|
||||
vtrn.8 d26, d27
|
||||
|
||||
pld [r0, r1, lsl #2]
|
||||
|
||||
vmovl.u8 q8, d24
|
||||
vmovl.u8 q9, d25
|
||||
vmovl.u8 q10, d26
|
||||
vmovl.u8 q11, d27
|
||||
|
||||
; save a few instructions in the inner loop
|
||||
vswp d17, d18
|
||||
vmov d23, d21
|
||||
|
||||
add r0, r0, #3
|
||||
|
||||
loop_horiz
|
||||
add r5, r0, #64
|
||||
|
||||
vld1.32 {d28[]}, [r0], r1
|
||||
vld1.32 {d29[]}, [r0], r1
|
||||
vld1.32 {d31[]}, [r0], r1
|
||||
vld1.32 {d30[]}, [r0], r8
|
||||
|
||||
pld [r5]
|
||||
|
||||
vtrn.16 d28, d31
|
||||
vtrn.16 d29, d30
|
||||
vtrn.8 d28, d29
|
||||
vtrn.8 d31, d30
|
||||
|
||||
pld [r5, r1]
|
||||
|
||||
; extract to s16
|
||||
vtrn.32 q14, q15
|
||||
vmovl.u8 q12, d28
|
||||
vmovl.u8 q13, d29
|
||||
|
||||
pld [r5, r1, lsl #1]
|
||||
|
||||
; src[] * filter_x
|
||||
MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24
|
||||
MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26
|
||||
MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27
|
||||
MULTIPLY_BY_Q0 q15, d22, d18, d19, d23, d24, d26, d27, d25
|
||||
|
||||
pld [r5, -r8]
|
||||
|
||||
; += 64 >> 7
|
||||
vqrshrun.s32 d2, q1, #7
|
||||
vqrshrun.s32 d3, q2, #7
|
||||
vqrshrun.s32 d4, q14, #7
|
||||
vqrshrun.s32 d5, q15, #7
|
||||
|
||||
; saturate
|
||||
vqmovn.u16 d2, q1
|
||||
vqmovn.u16 d3, q2
|
||||
|
||||
; transpose
|
||||
vtrn.16 d2, d3
|
||||
vtrn.32 d2, d3
|
||||
vtrn.8 d2, d3
|
||||
|
||||
vst1.u32 {d2[0]}, [r2@32], r3
|
||||
vst1.u32 {d3[0]}, [r2@32], r3
|
||||
vst1.u32 {d2[1]}, [r2@32], r3
|
||||
vst1.u32 {d3[1]}, [r2@32], r4
|
||||
|
||||
vmov q8, q9
|
||||
vmov d20, d23
|
||||
vmov q11, q12
|
||||
vmov q9, q13
|
||||
|
||||
subs r6, r6, #4 ; w -= 4
|
||||
bgt loop_horiz
|
||||
|
||||
; outer loop
|
||||
mov r6, r10 ; restore w counter
|
||||
add r0, r0, r9 ; src += src_stride * 4 - w
|
||||
add r2, r2, r12 ; dst += dst_stride * 4 - w
|
||||
subs r7, r7, #4 ; h -= 4
|
||||
bgt loop_horiz_v
|
||||
|
||||
pop {r4-r10, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
|vp9_convolve8_vert_neon| PROC
|
||||
ldr r12, [sp, #12]
|
||||
cmp r12, #16
|
||||
bne vp9_convolve8_vert_c
|
||||
|
||||
push {r4-r8, lr}
|
||||
|
||||
; adjust for taps
|
||||
sub r0, r0, r1
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
ldr r4, [sp, #32] ; filter_y
|
||||
ldr r6, [sp, #40] ; w
|
||||
ldr lr, [sp, #44] ; h
|
||||
|
||||
vld1.s16 {q0}, [r4] ; filter_y
|
||||
|
||||
lsl r1, r1, #1
|
||||
lsl r3, r3, #1
|
||||
|
||||
loop_vert_h
|
||||
mov r4, r0
|
||||
add r7, r0, r1, asr #1
|
||||
mov r5, r2
|
||||
add r8, r2, r3, asr #1
|
||||
mov r12, lr ; h loop counter
|
||||
|
||||
vld1.u32 {d16[0]}, [r4], r1
|
||||
vld1.u32 {d16[1]}, [r7], r1
|
||||
vld1.u32 {d18[0]}, [r4], r1
|
||||
vld1.u32 {d18[1]}, [r7], r1
|
||||
vld1.u32 {d20[0]}, [r4], r1
|
||||
vld1.u32 {d20[1]}, [r7], r1
|
||||
vld1.u32 {d22[0]}, [r4], r1
|
||||
|
||||
vmovl.u8 q8, d16
|
||||
vmovl.u8 q9, d18
|
||||
vmovl.u8 q10, d20
|
||||
vmovl.u8 q11, d22
|
||||
|
||||
loop_vert
|
||||
; always process a 4x4 block at a time
|
||||
vld1.u32 {d24[0]}, [r7], r1
|
||||
vld1.u32 {d26[0]}, [r4], r1
|
||||
vld1.u32 {d26[1]}, [r7], r1
|
||||
vld1.u32 {d24[1]}, [r4], r1
|
||||
|
||||
; extract to s16
|
||||
vmovl.u8 q12, d24
|
||||
vmovl.u8 q13, d26
|
||||
|
||||
pld [r5]
|
||||
pld [r8]
|
||||
|
||||
; src[] * filter_y
|
||||
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24
|
||||
|
||||
pld [r5, r3]
|
||||
pld [r8, r3]
|
||||
|
||||
MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d24, d26
|
||||
|
||||
pld [r7]
|
||||
pld [r4]
|
||||
|
||||
MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d24, d26, d27
|
||||
|
||||
pld [r7, r1]
|
||||
pld [r4, r1]
|
||||
|
||||
MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d24, d26, d27, d25
|
||||
|
||||
; += 64 >> 7
|
||||
vqrshrun.s32 d2, q1, #7
|
||||
vqrshrun.s32 d3, q2, #7
|
||||
vqrshrun.s32 d4, q14, #7
|
||||
vqrshrun.s32 d5, q15, #7
|
||||
|
||||
; saturate
|
||||
vqmovn.u16 d2, q1
|
||||
vqmovn.u16 d3, q2
|
||||
|
||||
vst1.u32 {d2[0]}, [r5@32], r3
|
||||
vst1.u32 {d2[1]}, [r8@32], r3
|
||||
vst1.u32 {d3[0]}, [r5@32], r3
|
||||
vst1.u32 {d3[1]}, [r8@32], r3
|
||||
|
||||
vmov q8, q10
|
||||
vmov d18, d22
|
||||
vmov d19, d24
|
||||
vmov q10, q13
|
||||
vmov d22, d25
|
||||
|
||||
subs r12, r12, #4 ; h -= 4
|
||||
bgt loop_vert
|
||||
|
||||
; outer loop
|
||||
add r0, r0, #4
|
||||
add r2, r2, #4
|
||||
subs r6, r6, #4 ; w -= 4
|
||||
bgt loop_vert_h
|
||||
|
||||
pop {r4-r8, pc}
|
||||
|
||||
ENDP
|
||||
END
|
@@ -1,78 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h) {
|
||||
/* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
|
||||
* maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
|
||||
*/
|
||||
DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72);
|
||||
|
||||
// Account for the vertical phase needing 3 lines prior and 4 lines post
|
||||
int intermediate_height = h + 7;
|
||||
|
||||
if (x_step_q4 != 16 || y_step_q4 != 16)
|
||||
return vp9_convolve8_c(src, src_stride,
|
||||
dst, dst_stride,
|
||||
filter_x, x_step_q4,
|
||||
filter_y, y_step_q4,
|
||||
w, h);
|
||||
|
||||
/* Filter starting 3 lines back. The neon implementation will ignore the
|
||||
* given height and filter a multiple of 4 lines. Since this goes in to
|
||||
* the temp buffer which has lots of extra room and is subsequently discarded
|
||||
* this is safe if somewhat less than ideal.
|
||||
*/
|
||||
vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
|
||||
temp, 64,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, intermediate_height);
|
||||
|
||||
/* Step into the temp buffer 3 lines to get the actual frame data */
|
||||
vp9_convolve8_vert_neon(temp + 64 * 3, 64,
|
||||
dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h);
|
||||
}
|
||||
|
||||
void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h) {
|
||||
DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72);
|
||||
int intermediate_height = h + 7;
|
||||
|
||||
if (x_step_q4 != 16 || y_step_q4 != 16)
|
||||
return vp9_convolve8_avg_c(src, src_stride,
|
||||
dst, dst_stride,
|
||||
filter_x, x_step_q4,
|
||||
filter_y, y_step_q4,
|
||||
w, h);
|
||||
|
||||
/* This implementation has the same issues as above. In addition, we only want
|
||||
* to average the values after both passes.
|
||||
*/
|
||||
vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
|
||||
temp, 64,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, intermediate_height);
|
||||
vp9_convolve8_avg_vert_neon(temp + 64 * 3,
|
||||
64, dst, dst_stride,
|
||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
||||
w, h);
|
||||
}
|
@@ -1,84 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_convolve_copy_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
|vp9_convolve_copy_neon| PROC
|
||||
push {r4-r5, lr}
|
||||
ldrd r4, r5, [sp, #28]
|
||||
|
||||
cmp r4, #32
|
||||
bgt copy64
|
||||
beq copy32
|
||||
cmp r4, #8
|
||||
bgt copy16
|
||||
beq copy8
|
||||
b copy4
|
||||
|
||||
copy64
|
||||
sub lr, r1, #32
|
||||
sub r3, r3, #32
|
||||
copy64_h
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q0-q1}, [r0]!
|
||||
vld1.8 {q2-q3}, [r0], lr
|
||||
vst1.8 {q0-q1}, [r2@128]!
|
||||
vst1.8 {q2-q3}, [r2@128], r3
|
||||
subs r5, r5, #1
|
||||
bgt copy64_h
|
||||
pop {r4-r5, pc}
|
||||
|
||||
copy32
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q0-q1}, [r0], r1
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q2-q3}, [r0], r1
|
||||
vst1.8 {q0-q1}, [r2@128], r3
|
||||
vst1.8 {q2-q3}, [r2@128], r3
|
||||
subs r5, r5, #2
|
||||
bgt copy32
|
||||
pop {r4-r5, pc}
|
||||
|
||||
copy16
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q0}, [r0], r1
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {q1}, [r0], r1
|
||||
vst1.8 {q0}, [r2@128], r3
|
||||
vst1.8 {q1}, [r2@128], r3
|
||||
subs r5, r5, #2
|
||||
bgt copy16
|
||||
pop {r4-r5, pc}
|
||||
|
||||
copy8
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {d0}, [r0], r1
|
||||
pld [r0, r1, lsl #1]
|
||||
vld1.8 {d2}, [r0], r1
|
||||
vst1.8 {d0}, [r2@64], r3
|
||||
vst1.8 {d2}, [r2@64], r3
|
||||
subs r5, r5, #2
|
||||
bgt copy8
|
||||
pop {r4-r5, pc}
|
||||
|
||||
copy4
|
||||
ldr r12, [r0], r1
|
||||
str r12, [r2], r3
|
||||
subs r5, r5, #1
|
||||
bgt copy4
|
||||
pop {r4-r5, pc}
|
||||
ENDP
|
||||
|
||||
END
|
@@ -1,69 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp9_dc_only_idct_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
;void vp9_dc_only_idct_add_neon(int input_dc, uint8_t *pred_ptr,
|
||||
; uint8_t *dst_ptr, int pitch, int stride)
|
||||
;
|
||||
; r0 int input_dc
|
||||
; r1 uint8_t *pred_ptr
|
||||
; r2 uint8_t *dst_ptr
|
||||
; r3 int pitch
|
||||
; sp int stride
|
||||
|
||||
|vp9_dc_only_idct_add_neon| PROC
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r12, #0x2d00
|
||||
add r12, #0x41
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
mul r0, r0, r12 ; input_dc * cospi_16_64
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; dct_const_round_shift(out * cospi_16_64)
|
||||
mul r0, r0, r12 ; out * cospi_16_64
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; ROUND_POWER_OF_TWO(out, 4)
|
||||
add r0, r0, #8 ; + (1 <<((4) - 1))
|
||||
asr r0, r0, #4 ; >> 4
|
||||
|
||||
vdup.16 q0, r0; ; duplicate a1
|
||||
ldr r12, [sp] ; load stride
|
||||
|
||||
vld1.32 {d2[0]}, [r1], r3
|
||||
vld1.32 {d2[1]}, [r1], r3
|
||||
vld1.32 {d4[0]}, [r1], r3
|
||||
vld1.32 {d4[1]}, [r1]
|
||||
|
||||
vaddw.u8 q1, q0, d2 ; a1 + pred_ptr[c]
|
||||
vaddw.u8 q2, q0, d4
|
||||
|
||||
vqmovun.s16 d2, q1 ; clip_pixel
|
||||
vqmovun.s16 d4, q2
|
||||
|
||||
vst1.32 {d2[0]}, [r2], r12
|
||||
vst1.32 {d2[1]}, [r2], r12
|
||||
vst1.32 {d4[0]}, [r2], r12
|
||||
vst1.32 {d4[1]}, [r2]
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp9_dc_only_idct_add_neon|
|
||||
|
||||
END
|
@@ -1,169 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
|
||||
extern void vp9_short_idct16x16_add_neon_pass1(int16_t *input,
|
||||
int16_t *output,
|
||||
int output_stride);
|
||||
extern void vp9_short_idct16x16_add_neon_pass2(int16_t *src,
|
||||
int16_t *output,
|
||||
int16_t *pass1Output,
|
||||
int16_t skip_adding,
|
||||
uint8_t *dest,
|
||||
int dest_stride);
|
||||
extern void vp9_short_idct10_16x16_add_neon_pass1(int16_t *input,
|
||||
int16_t *output,
|
||||
int output_stride);
|
||||
extern void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,
|
||||
int16_t *output,
|
||||
int16_t *pass1Output,
|
||||
int16_t skip_adding,
|
||||
uint8_t *dest,
|
||||
int dest_stride);
|
||||
extern void save_neon_registers();
|
||||
extern void restore_neon_registers();
|
||||
|
||||
|
||||
void vp9_short_idct16x16_add_neon(int16_t *input,
|
||||
uint8_t *dest, int dest_stride) {
|
||||
int16_t pass1_output[16*16] = {0};
|
||||
int16_t row_idct_output[16*16] = {0};
|
||||
|
||||
// save d8-d15 register values.
|
||||
save_neon_registers();
|
||||
|
||||
/* Parallel idct on the upper 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(input, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7
|
||||
// which will be saved into row_idct_output.
|
||||
vp9_short_idct16x16_add_neon_pass2(input+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
0,
|
||||
dest,
|
||||
dest_stride);
|
||||
|
||||
/* Parallel idct on the lower 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(input+8*16, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7
|
||||
// which will be saved into row_idct_output.
|
||||
vp9_short_idct16x16_add_neon_pass2(input+8*16+1,
|
||||
row_idct_output+8,
|
||||
pass1_output,
|
||||
0,
|
||||
dest,
|
||||
dest_stride);
|
||||
|
||||
/* Parallel idct on the left 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
1,
|
||||
dest,
|
||||
dest_stride);
|
||||
|
||||
/* Parallel idct on the right 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,
|
||||
row_idct_output+8,
|
||||
pass1_output,
|
||||
1,
|
||||
dest+8,
|
||||
dest_stride);
|
||||
|
||||
// restore d8-d15 register values.
|
||||
restore_neon_registers();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void vp9_short_idct10_16x16_add_neon(int16_t *input,
|
||||
uint8_t *dest, int dest_stride) {
|
||||
int16_t pass1_output[16*16] = {0};
|
||||
int16_t row_idct_output[16*16] = {0};
|
||||
|
||||
// save d8-d15 register values.
|
||||
save_neon_registers();
|
||||
|
||||
/* Parallel idct on the upper 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct10_16x16_add_neon_pass1(input, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7
|
||||
// which will be saved into row_idct_output.
|
||||
vp9_short_idct10_16x16_add_neon_pass2(input+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
0,
|
||||
dest,
|
||||
dest_stride);
|
||||
|
||||
/* Skip Parallel idct on the lower 8 rows as they are all 0s */
|
||||
|
||||
/* Parallel idct on the left 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
1,
|
||||
dest,
|
||||
dest_stride);
|
||||
|
||||
/* Parallel idct on the right 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,
|
||||
row_idct_output+8,
|
||||
pass1_output,
|
||||
1,
|
||||
dest+8,
|
||||
dest_stride);
|
||||
|
||||
// restore d8-d15 register values.
|
||||
restore_neon_registers();
|
||||
|
||||
return;
|
||||
}
|
@@ -1,47 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vp9/common/vp9_common.h"
|
||||
|
||||
// defined in vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
|
||||
extern void idct32_transpose_and_transform(int16_t *transpose_buffer,
|
||||
int16_t *output, int16_t *input);
|
||||
extern void idct32_combine_add(uint8_t *dest, int16_t *out, int dest_stride);
|
||||
|
||||
|
||||
// defined in vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
|
||||
extern void save_neon_registers();
|
||||
extern void restore_neon_registers();
|
||||
|
||||
void vp9_short_idct32x32_add_neon(int16_t *input, uint8_t *dest,
|
||||
int dest_stride) {
|
||||
// TODO(cd): move the creation of these buffers within the ASM file
|
||||
// internal buffer used to transpose 8 lines into before transforming them
|
||||
int16_t transpose_buffer[32 * 8];
|
||||
// results of the first pass (transpose and transform rows)
|
||||
int16_t pass1[32 * 32];
|
||||
// results of the second pass (transpose and transform columns)
|
||||
int16_t pass2[32 * 32];
|
||||
|
||||
// save register we need to preserve
|
||||
save_neon_registers();
|
||||
// process rows
|
||||
idct32_transpose_and_transform(transpose_buffer, pass1, input);
|
||||
// process columns
|
||||
// TODO(cd): do these two steps/passes within the ASM file
|
||||
idct32_transpose_and_transform(transpose_buffer, pass2, pass1);
|
||||
// combine and add to dest
|
||||
// TODO(cd): integrate this within the last storage step of the second pass
|
||||
idct32_combine_add(dest, pass2, dest_stride);
|
||||
// restore register we need to preserve
|
||||
restore_neon_registers();
|
||||
}
|
||||
|
||||
// TODO(cd): Eliminate this file altogether when everything is in ASM file
|
@@ -1,708 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_loop_filter_horizontal_edge_neon|
|
||||
EXPORT |vp9_loop_filter_vertical_edge_neon|
|
||||
EXPORT |vp9_mbloop_filter_horizontal_edge_neon|
|
||||
EXPORT |vp9_mbloop_filter_vertical_edge_neon|
|
||||
ARM
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
|
||||
; works on 16 iterations at a time.
|
||||
; TODO(fgalligan): See about removing the count code as this function is only
|
||||
; called with a count of 1.
|
||||
;
|
||||
; void vp9_loop_filter_horizontal_edge_neon(uint8_t *s,
|
||||
; int p /* pitch */,
|
||||
; const uint8_t *blimit,
|
||||
; const uint8_t *limit,
|
||||
; const uint8_t *thresh,
|
||||
; int count)
|
||||
;
|
||||
; r0 uint8_t *s,
|
||||
; r1 int p, /* pitch */
|
||||
; r2 const uint8_t *blimit,
|
||||
; r3 const uint8_t *limit,
|
||||
; sp const uint8_t *thresh,
|
||||
; sp+4 int count
|
||||
|vp9_loop_filter_horizontal_edge_neon| PROC
|
||||
push {lr}
|
||||
|
||||
vld1.8 {d0[]}, [r2] ; duplicate *blimit
|
||||
ldr r12, [sp, #8] ; load count
|
||||
ldr r2, [sp, #4] ; load thresh
|
||||
add r1, r1, r1 ; double pitch
|
||||
|
||||
cmp r12, #0
|
||||
beq end_vp9_lf_h_edge
|
||||
|
||||
vld1.8 {d1[]}, [r3] ; duplicate *limit
|
||||
vld1.8 {d2[]}, [r2] ; duplicate *thresh
|
||||
|
||||
count_lf_h_loop
|
||||
sub r2, r0, r1, lsl #1 ; move src pointer down by 4 lines
|
||||
add r3, r2, r1, lsr #1 ; set to 3 lines down
|
||||
|
||||
vld1.u8 {d3}, [r2@64], r1 ; p3
|
||||
vld1.u8 {d4}, [r3@64], r1 ; p2
|
||||
vld1.u8 {d5}, [r2@64], r1 ; p1
|
||||
vld1.u8 {d6}, [r3@64], r1 ; p0
|
||||
vld1.u8 {d7}, [r2@64], r1 ; q0
|
||||
vld1.u8 {d16}, [r3@64], r1 ; q1
|
||||
vld1.u8 {d17}, [r2@64] ; q2
|
||||
vld1.u8 {d18}, [r3@64] ; q3
|
||||
|
||||
sub r2, r2, r1, lsl #1
|
||||
sub r3, r3, r1, lsl #1
|
||||
|
||||
bl vp9_loop_filter_neon
|
||||
|
||||
vst1.u8 {d4}, [r2@64], r1 ; store op1
|
||||
vst1.u8 {d5}, [r3@64], r1 ; store op0
|
||||
vst1.u8 {d6}, [r2@64], r1 ; store oq0
|
||||
vst1.u8 {d7}, [r3@64], r1 ; store oq1
|
||||
|
||||
add r0, r0, #8
|
||||
subs r12, r12, #1
|
||||
bne count_lf_h_loop
|
||||
|
||||
end_vp9_lf_h_edge
|
||||
pop {pc}
|
||||
ENDP ; |vp9_loop_filter_horizontal_edge_neon|
|
||||
|
||||
; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
|
||||
; works on 16 iterations at a time.
|
||||
; TODO(fgalligan): See about removing the count code as this function is only
|
||||
; called with a count of 1.
|
||||
;
|
||||
; void vp9_loop_filter_vertical_edge_neon(uint8_t *s,
|
||||
; int p /* pitch */,
|
||||
; const uint8_t *blimit,
|
||||
; const uint8_t *limit,
|
||||
; const uint8_t *thresh,
|
||||
; int count)
|
||||
;
|
||||
; r0 uint8_t *s,
|
||||
; r1 int p, /* pitch */
|
||||
; r2 const uint8_t *blimit,
|
||||
; r3 const uint8_t *limit,
|
||||
; sp const uint8_t *thresh,
|
||||
; sp+4 int count
|
||||
|vp9_loop_filter_vertical_edge_neon| PROC
|
||||
push {lr}
|
||||
|
||||
vld1.8 {d0[]}, [r2] ; duplicate *blimit
|
||||
ldr r12, [sp, #8] ; load count
|
||||
vld1.8 {d1[]}, [r3] ; duplicate *limit
|
||||
|
||||
ldr r3, [sp, #4] ; load thresh
|
||||
sub r2, r0, #4 ; move s pointer down by 4 columns
|
||||
cmp r12, #0
|
||||
beq end_vp9_lf_v_edge
|
||||
|
||||
vld1.8 {d2[]}, [r3] ; duplicate *thresh
|
||||
|
||||
count_lf_v_loop
|
||||
vld1.u8 {d3}, [r2], r1 ; load s data
|
||||
vld1.u8 {d4}, [r2], r1
|
||||
vld1.u8 {d5}, [r2], r1
|
||||
vld1.u8 {d6}, [r2], r1
|
||||
vld1.u8 {d7}, [r2], r1
|
||||
vld1.u8 {d16}, [r2], r1
|
||||
vld1.u8 {d17}, [r2], r1
|
||||
vld1.u8 {d18}, [r2]
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 d3, d7
|
||||
vtrn.32 d4, d16
|
||||
vtrn.32 d5, d17
|
||||
vtrn.32 d6, d18
|
||||
|
||||
vtrn.16 d3, d5
|
||||
vtrn.16 d4, d6
|
||||
vtrn.16 d7, d17
|
||||
vtrn.16 d16, d18
|
||||
|
||||
vtrn.8 d3, d4
|
||||
vtrn.8 d5, d6
|
||||
vtrn.8 d7, d16
|
||||
vtrn.8 d17, d18
|
||||
|
||||
bl vp9_loop_filter_neon
|
||||
|
||||
sub r0, r0, #2
|
||||
|
||||
;store op1, op0, oq0, oq1
|
||||
vst4.8 {d4[0], d5[0], d6[0], d7[0]}, [r0], r1
|
||||
vst4.8 {d4[1], d5[1], d6[1], d7[1]}, [r0], r1
|
||||
vst4.8 {d4[2], d5[2], d6[2], d7[2]}, [r0], r1
|
||||
vst4.8 {d4[3], d5[3], d6[3], d7[3]}, [r0], r1
|
||||
vst4.8 {d4[4], d5[4], d6[4], d7[4]}, [r0], r1
|
||||
vst4.8 {d4[5], d5[5], d6[5], d7[5]}, [r0], r1
|
||||
vst4.8 {d4[6], d5[6], d6[6], d7[6]}, [r0], r1
|
||||
vst4.8 {d4[7], d5[7], d6[7], d7[7]}, [r0]
|
||||
|
||||
add r0, r0, r1, lsl #3 ; s += pitch * 8
|
||||
subs r12, r12, #1
|
||||
subne r2, r0, #4 ; move s pointer down by 4 columns
|
||||
bne count_lf_v_loop
|
||||
|
||||
end_vp9_lf_v_edge
|
||||
pop {pc}
|
||||
ENDP ; |vp9_loop_filter_vertical_edge_neon|
|
||||
|
||||
; void vp9_loop_filter_neon();
|
||||
; This is a helper function for the loopfilters. The invidual functions do the
|
||||
; necessary load, transpose (if necessary) and store. The function does not use
|
||||
; registers d8-d15.
|
||||
;
|
||||
; Inputs:
|
||||
; r0-r3, r12 PRESERVE
|
||||
; d0 blimit
|
||||
; d1 limit
|
||||
; d2 thresh
|
||||
; d3 p3
|
||||
; d4 p2
|
||||
; d5 p1
|
||||
; d6 p0
|
||||
; d7 q0
|
||||
; d16 q1
|
||||
; d17 q2
|
||||
; d18 q3
|
||||
;
|
||||
; Outputs:
|
||||
; d4 op1
|
||||
; d5 op0
|
||||
; d6 oq0
|
||||
; d7 oq1
|
||||
|vp9_loop_filter_neon| PROC
|
||||
; filter_mask
|
||||
vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2)
|
||||
vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1)
|
||||
vabd.u8 d21, d5, d6 ; m3 = abs(p1 - p0)
|
||||
vabd.u8 d22, d16, d7 ; m4 = abs(q1 - q0)
|
||||
vabd.u8 d3, d17, d16 ; m5 = abs(q2 - q1)
|
||||
vabd.u8 d4, d18, d17 ; m6 = abs(q3 - q2)
|
||||
|
||||
; only compare the largest value to limit
|
||||
vmax.u8 d19, d19, d20 ; m1 = max(m1, m2)
|
||||
vmax.u8 d20, d21, d22 ; m2 = max(m3, m4)
|
||||
|
||||
vabd.u8 d17, d6, d7 ; abs(p0 - q0)
|
||||
|
||||
vmax.u8 d3, d3, d4 ; m3 = max(m5, m6)
|
||||
|
||||
vmov.u8 d18, #0x80
|
||||
|
||||
vmax.u8 d23, d19, d20 ; m1 = max(m1, m2)
|
||||
|
||||
; hevmask
|
||||
vcgt.u8 d21, d21, d2 ; (abs(p1 - p0) > thresh)*-1
|
||||
vcgt.u8 d22, d22, d2 ; (abs(q1 - q0) > thresh)*-1
|
||||
vmax.u8 d23, d23, d3 ; m1 = max(m1, m3)
|
||||
|
||||
vabd.u8 d28, d5, d16 ; a = abs(p1 - q1)
|
||||
vqadd.u8 d17, d17, d17 ; b = abs(p0 - q0) * 2
|
||||
|
||||
veor d7, d7, d18 ; qs0
|
||||
|
||||
vcge.u8 d23, d1, d23 ; abs(m1) > limit
|
||||
|
||||
; filter() function
|
||||
; convert to signed
|
||||
|
||||
vshr.u8 d28, d28, #1 ; a = a / 2
|
||||
veor d6, d6, d18 ; ps0
|
||||
|
||||
veor d5, d5, d18 ; ps1
|
||||
vqadd.u8 d17, d17, d28 ; a = b + a
|
||||
|
||||
veor d16, d16, d18 ; qs1
|
||||
|
||||
vmov.u8 d19, #3
|
||||
|
||||
vsub.s8 d28, d7, d6 ; ( qs0 - ps0)
|
||||
|
||||
vcge.u8 d17, d0, d17 ; a > blimit
|
||||
|
||||
vqsub.s8 d27, d5, d16 ; filter = clamp(ps1-qs1)
|
||||
vorr d22, d21, d22 ; hevmask
|
||||
|
||||
vmull.s8 q12, d28, d19 ; 3 * ( qs0 - ps0)
|
||||
|
||||
vand d27, d27, d22 ; filter &= hev
|
||||
vand d23, d23, d17 ; filter_mask
|
||||
|
||||
vaddw.s8 q12, q12, d27 ; filter + 3 * (qs0 - ps0)
|
||||
|
||||
vmov.u8 d17, #4
|
||||
|
||||
; filter = clamp(filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d27, q12
|
||||
|
||||
vand d27, d27, d23 ; filter &= mask
|
||||
|
||||
vqadd.s8 d28, d27, d19 ; filter2 = clamp(filter+3)
|
||||
vqadd.s8 d27, d27, d17 ; filter1 = clamp(filter+4)
|
||||
vshr.s8 d28, d28, #3 ; filter2 >>= 3
|
||||
vshr.s8 d27, d27, #3 ; filter1 >>= 3
|
||||
|
||||
vqadd.s8 d19, d6, d28 ; u = clamp(ps0 + filter2)
|
||||
vqsub.s8 d26, d7, d27 ; u = clamp(qs0 - filter1)
|
||||
|
||||
; outer tap adjustments
|
||||
vrshr.s8 d27, d27, #1 ; filter = ++filter1 >> 1
|
||||
|
||||
veor d6, d26, d18 ; *oq0 = u^0x80
|
||||
|
||||
vbic d27, d27, d22 ; filter &= ~hev
|
||||
|
||||
vqadd.s8 d21, d5, d27 ; u = clamp(ps1 + filter)
|
||||
vqsub.s8 d20, d16, d27 ; u = clamp(qs1 - filter)
|
||||
|
||||
veor d5, d19, d18 ; *op0 = u^0x80
|
||||
veor d4, d21, d18 ; *op1 = u^0x80
|
||||
veor d7, d20, d18 ; *oq1 = u^0x80
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp9_loop_filter_neon|
|
||||
|
||||
; void vp9_mbloop_filter_horizontal_edge_neon(uint8_t *s, int p,
|
||||
; const uint8_t *blimit,
|
||||
; const uint8_t *limit,
|
||||
; const uint8_t *thresh,
|
||||
; int count)
|
||||
; r0 uint8_t *s,
|
||||
; r1 int p, /* pitch */
|
||||
; r2 const uint8_t *blimit,
|
||||
; r3 const uint8_t *limit,
|
||||
; sp const uint8_t *thresh,
|
||||
; sp+4 int count
|
||||
|vp9_mbloop_filter_horizontal_edge_neon| PROC
|
||||
push {r4-r5, lr}
|
||||
|
||||
vld1.8 {d0[]}, [r2] ; duplicate *blimit
|
||||
ldr r12, [sp, #16] ; load count
|
||||
ldr r2, [sp, #12] ; load thresh
|
||||
add r1, r1, r1 ; double pitch
|
||||
|
||||
cmp r12, #0
|
||||
beq end_vp9_mblf_h_edge
|
||||
|
||||
vld1.8 {d1[]}, [r3] ; duplicate *limit
|
||||
vld1.8 {d2[]}, [r2] ; duplicate *thresh
|
||||
|
||||
count_mblf_h_loop
|
||||
sub r3, r0, r1, lsl #1 ; move src pointer down by 4 lines
|
||||
add r2, r3, r1, lsr #1 ; set to 3 lines down
|
||||
|
||||
vld1.u8 {d3}, [r3@64], r1 ; p3
|
||||
vld1.u8 {d4}, [r2@64], r1 ; p2
|
||||
vld1.u8 {d5}, [r3@64], r1 ; p1
|
||||
vld1.u8 {d6}, [r2@64], r1 ; p0
|
||||
vld1.u8 {d7}, [r3@64], r1 ; q0
|
||||
vld1.u8 {d16}, [r2@64], r1 ; q1
|
||||
vld1.u8 {d17}, [r3@64] ; q2
|
||||
vld1.u8 {d18}, [r2@64], r1 ; q3
|
||||
|
||||
sub r3, r3, r1, lsl #1
|
||||
sub r2, r2, r1, lsl #2
|
||||
|
||||
bl vp9_mbloop_filter_neon
|
||||
|
||||
vst1.u8 {d0}, [r2@64], r1 ; store op2
|
||||
vst1.u8 {d1}, [r3@64], r1 ; store op1
|
||||
vst1.u8 {d2}, [r2@64], r1 ; store op0
|
||||
vst1.u8 {d3}, [r3@64], r1 ; store oq0
|
||||
vst1.u8 {d4}, [r2@64], r1 ; store oq1
|
||||
vst1.u8 {d5}, [r3@64], r1 ; store oq2
|
||||
|
||||
add r0, r0, #8
|
||||
subs r12, r12, #1
|
||||
bne count_mblf_h_loop
|
||||
|
||||
end_vp9_mblf_h_edge
|
||||
pop {r4-r5, pc}
|
||||
|
||||
ENDP ; |vp9_mbloop_filter_horizontal_edge_neon|
|
||||
|
||||
; void vp9_mbloop_filter_vertical_edge_neon(uint8_t *s,
|
||||
; int pitch,
|
||||
; const uint8_t *blimit,
|
||||
; const uint8_t *limit,
|
||||
; const uint8_t *thresh,
|
||||
; int count)
|
||||
;
|
||||
; r0 uint8_t *s,
|
||||
; r1 int pitch,
|
||||
; r2 const uint8_t *blimit,
|
||||
; r3 const uint8_t *limit,
|
||||
; sp const uint8_t *thresh,
|
||||
; sp+4 int count
|
||||
|vp9_mbloop_filter_vertical_edge_neon| PROC
|
||||
push {r4-r5, lr}
|
||||
|
||||
vld1.8 {d0[]}, [r2] ; duplicate *blimit
|
||||
ldr r12, [sp, #16] ; load count
|
||||
vld1.8 {d1[]}, [r3] ; duplicate *limit
|
||||
|
||||
ldr r3, [sp, #12] ; load thresh
|
||||
sub r2, r0, #4 ; move s pointer down by 4 columns
|
||||
cmp r12, #0
|
||||
beq end_vp9_mblf_v_edge
|
||||
|
||||
vld1.8 {d2[]}, [r3] ; duplicate *thresh
|
||||
|
||||
count_mblf_v_loop
|
||||
vld1.u8 {d3}, [r2], r1 ; load s data
|
||||
vld1.u8 {d4}, [r2], r1
|
||||
vld1.u8 {d5}, [r2], r1
|
||||
vld1.u8 {d6}, [r2], r1
|
||||
vld1.u8 {d7}, [r2], r1
|
||||
vld1.u8 {d16}, [r2], r1
|
||||
vld1.u8 {d17}, [r2], r1
|
||||
vld1.u8 {d18}, [r2]
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 d3, d7
|
||||
vtrn.32 d4, d16
|
||||
vtrn.32 d5, d17
|
||||
vtrn.32 d6, d18
|
||||
|
||||
vtrn.16 d3, d5
|
||||
vtrn.16 d4, d6
|
||||
vtrn.16 d7, d17
|
||||
vtrn.16 d16, d18
|
||||
|
||||
vtrn.8 d3, d4
|
||||
vtrn.8 d5, d6
|
||||
vtrn.8 d7, d16
|
||||
vtrn.8 d17, d18
|
||||
|
||||
sub r2, r0, #3
|
||||
add r3, r0, #1
|
||||
|
||||
bl vp9_mbloop_filter_neon
|
||||
|
||||
;store op2, op1, op0, oq0
|
||||
vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r2], r1
|
||||
vst4.8 {d0[1], d1[1], d2[1], d3[1]}, [r2], r1
|
||||
vst4.8 {d0[2], d1[2], d2[2], d3[2]}, [r2], r1
|
||||
vst4.8 {d0[3], d1[3], d2[3], d3[3]}, [r2], r1
|
||||
vst4.8 {d0[4], d1[4], d2[4], d3[4]}, [r2], r1
|
||||
vst4.8 {d0[5], d1[5], d2[5], d3[5]}, [r2], r1
|
||||
vst4.8 {d0[6], d1[6], d2[6], d3[6]}, [r2], r1
|
||||
vst4.8 {d0[7], d1[7], d2[7], d3[7]}, [r2]
|
||||
|
||||
;store oq1, oq2
|
||||
vst2.8 {d4[0], d5[0]}, [r3], r1
|
||||
vst2.8 {d4[1], d5[1]}, [r3], r1
|
||||
vst2.8 {d4[2], d5[2]}, [r3], r1
|
||||
vst2.8 {d4[3], d5[3]}, [r3], r1
|
||||
vst2.8 {d4[4], d5[4]}, [r3], r1
|
||||
vst2.8 {d4[5], d5[5]}, [r3], r1
|
||||
vst2.8 {d4[6], d5[6]}, [r3], r1
|
||||
vst2.8 {d4[7], d5[7]}, [r3]
|
||||
|
||||
add r0, r0, r1, lsl #3 ; s += pitch * 8
|
||||
subs r12, r12, #1
|
||||
subne r2, r0, #4 ; move s pointer down by 4 columns
|
||||
bne count_mblf_v_loop
|
||||
|
||||
end_vp9_mblf_v_edge
|
||||
pop {r4-r5, pc}
|
||||
ENDP ; |vp9_mbloop_filter_vertical_edge_neon|
|
||||
|
||||
; void vp9_mbloop_filter_neon();
|
||||
; This is a helper function for the loopfilters. The invidual functions do the
|
||||
; necessary load, transpose (if necessary) and store. The function does not use
|
||||
; registers d8-d15.
|
||||
;
|
||||
; Inputs:
|
||||
; r0-r3, r12 PRESERVE
|
||||
; d0 blimit
|
||||
; d1 limit
|
||||
; d2 thresh
|
||||
; d3 p3
|
||||
; d4 p2
|
||||
; d5 p1
|
||||
; d6 p0
|
||||
; d7 q0
|
||||
; d16 q1
|
||||
; d17 q2
|
||||
; d18 q3
|
||||
;
|
||||
; Outputs:
|
||||
; d0 op2
|
||||
; d1 op1
|
||||
; d2 op0
|
||||
; d3 oq0
|
||||
; d4 oq1
|
||||
; d5 oq2
|
||||
|vp9_mbloop_filter_neon| PROC
|
||||
; filter_mask
|
||||
vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2)
|
||||
vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1)
|
||||
vabd.u8 d21, d5, d6 ; m3 = abs(p1 - p0)
|
||||
vabd.u8 d22, d16, d7 ; m4 = abs(q1 - q0)
|
||||
vabd.u8 d23, d17, d16 ; m5 = abs(q2 - q1)
|
||||
vabd.u8 d24, d18, d17 ; m6 = abs(q3 - q2)
|
||||
|
||||
; only compare the largest value to limit
|
||||
vmax.u8 d19, d19, d20 ; m1 = max(m1, m2)
|
||||
vmax.u8 d20, d21, d22 ; m2 = max(m3, m4)
|
||||
|
||||
vabd.u8 d25, d6, d4 ; m7 = abs(p0 - p2)
|
||||
|
||||
vmax.u8 d23, d23, d24 ; m3 = max(m5, m6)
|
||||
|
||||
vabd.u8 d26, d7, d17 ; m8 = abs(q0 - q2)
|
||||
|
||||
vmax.u8 d19, d19, d20
|
||||
|
||||
vabd.u8 d24, d6, d7 ; m9 = abs(p0 - q0)
|
||||
vabd.u8 d27, d3, d6 ; m10 = abs(p3 - p0)
|
||||
vabd.u8 d28, d18, d7 ; m11 = abs(q3 - q0)
|
||||
|
||||
vmax.u8 d19, d19, d23
|
||||
|
||||
vabd.u8 d23, d5, d16 ; a = abs(p1 - q1)
|
||||
vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2
|
||||
|
||||
; abs () > limit
|
||||
vcge.u8 d19, d1, d19
|
||||
|
||||
; only compare the largest value to thresh
|
||||
vmax.u8 d25, d25, d26 ; m4 = max(m7, m8)
|
||||
vmax.u8 d26, d27, d28 ; m5 = max(m10, m11)
|
||||
|
||||
vshr.u8 d23, d23, #1 ; a = a / 2
|
||||
|
||||
vmax.u8 d25, d25, d26 ; m4 = max(m4, m5)
|
||||
|
||||
vqadd.u8 d24, d24, d23 ; a = b + a
|
||||
|
||||
vmax.u8 d20, d20, d25 ; m2 = max(m2, m4)
|
||||
|
||||
vmov.u8 d23, #1
|
||||
vcge.u8 d24, d0, d24 ; a > blimit
|
||||
|
||||
vcgt.u8 d21, d21, d2 ; (abs(p1 - p0) > thresh)*-1
|
||||
|
||||
vcge.u8 d20, d23, d20 ; flat
|
||||
|
||||
vand d19, d19, d24 ; mask
|
||||
|
||||
vcgt.u8 d23, d22, d2 ; (abs(q1 - q0) > thresh)*-1
|
||||
|
||||
vand d20, d20, d19 ; flat & mask
|
||||
|
||||
vmov.u8 d22, #0x80
|
||||
|
||||
vorr d23, d21, d23 ; hev
|
||||
|
||||
; This instruction will truncate the "flat & mask" masks down to 4 bits
|
||||
; each to fit into one 32 bit arm register. The values are stored in
|
||||
; q10.64[0].
|
||||
vshrn.u16 d30, q10, #4
|
||||
vmov.u32 r4, d30[0] ; flat & mask 4bits
|
||||
|
||||
adds r5, r4, #1 ; Check for all 1's
|
||||
|
||||
; If mask and flat are 1's for all vectors, then we only need to execute
|
||||
; the power branch for all vectors.
|
||||
beq power_branch_only
|
||||
|
||||
cmp r4, #0 ; Check for 0, set flag for later
|
||||
|
||||
; mbfilter() function
|
||||
; filter() function
|
||||
; convert to signed
|
||||
veor d21, d7, d22 ; qs0
|
||||
veor d24, d6, d22 ; ps0
|
||||
veor d25, d5, d22 ; ps1
|
||||
veor d26, d16, d22 ; qs1
|
||||
|
||||
vmov.u8 d27, #3
|
||||
|
||||
vsub.s8 d28, d21, d24 ; ( qs0 - ps0)
|
||||
|
||||
vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1)
|
||||
|
||||
vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0)
|
||||
|
||||
vand d29, d29, d23 ; filter &= hev
|
||||
|
||||
vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0)
|
||||
|
||||
vmov.u8 d29, #4
|
||||
|
||||
; filter = clamp(filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d28, q15
|
||||
|
||||
vand d28, d28, d19 ; filter &= mask
|
||||
|
||||
vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3)
|
||||
vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4)
|
||||
vshr.s8 d30, d30, #3 ; filter2 >>= 3
|
||||
vshr.s8 d29, d29, #3 ; filter1 >>= 3
|
||||
|
||||
vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2)
|
||||
vqsub.s8 d21, d21, d29 ; oq0 = clamp(qs0 - filter1)
|
||||
|
||||
; outer tap adjustments: ++filter1 >> 1
|
||||
vrshr.s8 d29, d29, #1
|
||||
vbic d29, d29, d23 ; filter &= ~hev
|
||||
|
||||
vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter)
|
||||
vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter)
|
||||
|
||||
; If mask and flat are 0's for all vectors, then we only need to execute
|
||||
; the filter branch for all vectors.
|
||||
beq filter_branch_only
|
||||
|
||||
; If mask and flat are mixed then we must perform both branches and
|
||||
; combine the data.
|
||||
veor d24, d24, d22 ; *f_op0 = u^0x80
|
||||
veor d21, d21, d22 ; *f_oq0 = u^0x80
|
||||
veor d25, d25, d22 ; *f_op1 = u^0x80
|
||||
veor d26, d26, d22 ; *f_oq1 = u^0x80
|
||||
|
||||
; At this point we have already executed the filter branch. The filter
|
||||
; branch does not set op2 or oq2, so use p2 and q2. Execute the power
|
||||
; branch and combine the data.
|
||||
vmov.u8 d23, #2
|
||||
vaddl.u8 q14, d6, d7 ; r_op2 = p0 + q0
|
||||
vmlal.u8 q14, d3, d27 ; r_op2 += p3 * 3
|
||||
vmlal.u8 q14, d4, d23 ; r_op2 += p2 * 2
|
||||
|
||||
vbif d0, d4, d20 ; op2 |= p2 & ~(flat & mask)
|
||||
|
||||
vaddw.u8 q14, d5 ; r_op2 += p1
|
||||
|
||||
vbif d1, d25, d20 ; op1 |= f_op1 & ~(flat & mask)
|
||||
|
||||
vqrshrn.u16 d30, q14, #3 ; r_op2
|
||||
|
||||
vsubw.u8 q14, d3 ; r_op1 = r_op2 - p3
|
||||
vsubw.u8 q14, d4 ; r_op1 -= p2
|
||||
vaddw.u8 q14, d5 ; r_op1 += p1
|
||||
vaddw.u8 q14, d16 ; r_op1 += q1
|
||||
|
||||
vbif d2, d24, d20 ; op0 |= f_op0 & ~(flat & mask)
|
||||
|
||||
vqrshrn.u16 d31, q14, #3 ; r_op1
|
||||
|
||||
vsubw.u8 q14, d3 ; r_op0 = r_op1 - p3
|
||||
vsubw.u8 q14, d5 ; r_op0 -= p1
|
||||
vaddw.u8 q14, d6 ; r_op0 += p0
|
||||
vaddw.u8 q14, d17 ; r_op0 += q2
|
||||
|
||||
vbit d0, d30, d20 ; op2 |= r_op2 & (flat & mask)
|
||||
|
||||
vqrshrn.u16 d23, q14, #3 ; r_op0
|
||||
|
||||
vsubw.u8 q14, d3 ; r_oq0 = r_op0 - p3
|
||||
vsubw.u8 q14, d6 ; r_oq0 -= p0
|
||||
vaddw.u8 q14, d7 ; r_oq0 += q0
|
||||
|
||||
vbit d1, d31, d20 ; op1 |= r_op1 & (flat & mask)
|
||||
|
||||
vaddw.u8 q14, d18 ; oq0 += q3
|
||||
|
||||
vbit d2, d23, d20 ; op0 |= r_op0 & (flat & mask)
|
||||
|
||||
vqrshrn.u16 d22, q14, #3 ; r_oq0
|
||||
|
||||
vsubw.u8 q14, d4 ; r_oq1 = r_oq0 - p2
|
||||
vsubw.u8 q14, d7 ; r_oq1 -= q0
|
||||
vaddw.u8 q14, d16 ; r_oq1 += q1
|
||||
|
||||
vbif d3, d21, d20 ; oq0 |= f_oq0 & ~(flat & mask)
|
||||
|
||||
vaddw.u8 q14, d18 ; r_oq1 += q3
|
||||
|
||||
vbif d4, d26, d20 ; oq1 |= f_oq1 & ~(flat & mask)
|
||||
|
||||
vqrshrn.u16 d6, q14, #3 ; r_oq1
|
||||
|
||||
vsubw.u8 q14, d5 ; r_oq2 = r_oq1 - p1
|
||||
vsubw.u8 q14, d16 ; r_oq2 -= q1
|
||||
vaddw.u8 q14, d17 ; r_oq2 += q2
|
||||
vaddw.u8 q14, d18 ; r_oq2 += q3
|
||||
|
||||
vbif d5, d17, d20 ; oq2 |= q2 & ~(flat & mask)
|
||||
|
||||
vqrshrn.u16 d7, q14, #3 ; r_oq2
|
||||
|
||||
vbit d3, d22, d20 ; oq0 |= r_oq0 & (flat & mask)
|
||||
vbit d4, d6, d20 ; oq1 |= r_oq1 & (flat & mask)
|
||||
vbit d5, d7, d20 ; oq2 |= r_oq2 & (flat & mask)
|
||||
|
||||
bx lr
|
||||
|
||||
power_branch_only
|
||||
vmov.u8 d27, #3
|
||||
vmov.u8 d21, #2
|
||||
vaddl.u8 q14, d6, d7 ; op2 = p0 + q0
|
||||
vmlal.u8 q14, d3, d27 ; op2 += p3 * 3
|
||||
vmlal.u8 q14, d4, d21 ; op2 += p2 * 2
|
||||
vaddw.u8 q14, d5 ; op2 += p1
|
||||
vqrshrn.u16 d0, q14, #3 ; op2
|
||||
|
||||
vsubw.u8 q14, d3 ; op1 = op2 - p3
|
||||
vsubw.u8 q14, d4 ; op1 -= p2
|
||||
vaddw.u8 q14, d5 ; op1 += p1
|
||||
vaddw.u8 q14, d16 ; op1 += q1
|
||||
vqrshrn.u16 d1, q14, #3 ; op1
|
||||
|
||||
vsubw.u8 q14, d3 ; op0 = op1 - p3
|
||||
vsubw.u8 q14, d5 ; op0 -= p1
|
||||
vaddw.u8 q14, d6 ; op0 += p0
|
||||
vaddw.u8 q14, d17 ; op0 += q2
|
||||
vqrshrn.u16 d2, q14, #3 ; op0
|
||||
|
||||
vsubw.u8 q14, d3 ; oq0 = op0 - p3
|
||||
vsubw.u8 q14, d6 ; oq0 -= p0
|
||||
vaddw.u8 q14, d7 ; oq0 += q0
|
||||
vaddw.u8 q14, d18 ; oq0 += q3
|
||||
vqrshrn.u16 d3, q14, #3 ; oq0
|
||||
|
||||
vsubw.u8 q14, d4 ; oq1 = oq0 - p2
|
||||
vsubw.u8 q14, d7 ; oq1 -= q0
|
||||
vaddw.u8 q14, d16 ; oq1 += q1
|
||||
vaddw.u8 q14, d18 ; oq1 += q3
|
||||
vqrshrn.u16 d4, q14, #3 ; oq1
|
||||
|
||||
vsubw.u8 q14, d5 ; oq2 = oq1 - p1
|
||||
vsubw.u8 q14, d16 ; oq2 -= q1
|
||||
vaddw.u8 q14, d17 ; oq2 += q2
|
||||
vaddw.u8 q14, d18 ; oq2 += q3
|
||||
vqrshrn.u16 d5, q14, #3 ; oq2
|
||||
|
||||
bx lr
|
||||
|
||||
filter_branch_only
|
||||
; TODO(fgalligan): See if we can rearange registers so we do not need to
|
||||
; do the 2 vswp.
|
||||
vswp d0, d4 ; op2
|
||||
vswp d5, d17 ; oq2
|
||||
veor d2, d24, d22 ; *op0 = u^0x80
|
||||
veor d3, d21, d22 ; *oq0 = u^0x80
|
||||
veor d1, d25, d22 ; *op1 = u^0x80
|
||||
veor d4, d26, d22 ; *oq1 = u^0x80
|
||||
|
||||
bx lr
|
||||
|
||||
ENDP ; |vp9_mbloop_filter_neon|
|
||||
|
||||
END
|
@@ -1,603 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_mb_lpf_horizontal_edge_w_neon|
|
||||
EXPORT |vp9_mb_lpf_vertical_edge_w_neon|
|
||||
ARM
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; void vp9_mb_lpf_horizontal_edge_w_neon(uint8_t *s, int p,
|
||||
; const uint8_t *blimit,
|
||||
; const uint8_t *limit,
|
||||
; const uint8_t *thresh
|
||||
; int count)
|
||||
; r0 uint8_t *s,
|
||||
; r1 int p, /* pitch */
|
||||
; r2 const uint8_t *blimit,
|
||||
; r3 const uint8_t *limit,
|
||||
; sp const uint8_t *thresh,
|
||||
|vp9_mb_lpf_horizontal_edge_w_neon| PROC
|
||||
push {r4-r8, lr}
|
||||
vpush {d8-d15}
|
||||
ldr r4, [sp, #88] ; load thresh
|
||||
ldr r12, [sp, #92] ; load count
|
||||
|
||||
h_count
|
||||
vld1.8 {d16[]}, [r2] ; load *blimit
|
||||
vld1.8 {d17[]}, [r3] ; load *limit
|
||||
vld1.8 {d18[]}, [r4] ; load *thresh
|
||||
|
||||
sub r8, r0, r1, lsl #3 ; move src pointer down by 8 lines
|
||||
|
||||
vld1.u8 {d0}, [r8@64], r1 ; p7
|
||||
vld1.u8 {d1}, [r8@64], r1 ; p6
|
||||
vld1.u8 {d2}, [r8@64], r1 ; p5
|
||||
vld1.u8 {d3}, [r8@64], r1 ; p4
|
||||
vld1.u8 {d4}, [r8@64], r1 ; p3
|
||||
vld1.u8 {d5}, [r8@64], r1 ; p2
|
||||
vld1.u8 {d6}, [r8@64], r1 ; p1
|
||||
vld1.u8 {d7}, [r8@64], r1 ; p0
|
||||
vld1.u8 {d8}, [r8@64], r1 ; q0
|
||||
vld1.u8 {d9}, [r8@64], r1 ; q1
|
||||
vld1.u8 {d10}, [r8@64], r1 ; q2
|
||||
vld1.u8 {d11}, [r8@64], r1 ; q3
|
||||
vld1.u8 {d12}, [r8@64], r1 ; q4
|
||||
vld1.u8 {d13}, [r8@64], r1 ; q5
|
||||
vld1.u8 {d14}, [r8@64], r1 ; q6
|
||||
vld1.u8 {d15}, [r8@64], r1 ; q7
|
||||
|
||||
bl vp9_wide_mbfilter_neon
|
||||
|
||||
tst r7, #1
|
||||
beq h_mbfilter
|
||||
|
||||
; flat && mask were not set for any of the channels. Just store the values
|
||||
; from filter.
|
||||
sub r8, r0, r1, lsl #1
|
||||
|
||||
vst1.u8 {d25}, [r8@64], r1 ; store op1
|
||||
vst1.u8 {d24}, [r8@64], r1 ; store op0
|
||||
vst1.u8 {d23}, [r8@64], r1 ; store oq0
|
||||
vst1.u8 {d26}, [r8@64], r1 ; store oq1
|
||||
|
||||
b h_next
|
||||
|
||||
h_mbfilter
|
||||
tst r7, #2
|
||||
beq h_wide_mbfilter
|
||||
|
||||
; flat2 was not set for any of the channels. Just store the values from
|
||||
; mbfilter.
|
||||
sub r8, r0, r1, lsl #1
|
||||
sub r8, r8, r1
|
||||
|
||||
vst1.u8 {d18}, [r8@64], r1 ; store op2
|
||||
vst1.u8 {d19}, [r8@64], r1 ; store op1
|
||||
vst1.u8 {d20}, [r8@64], r1 ; store op0
|
||||
vst1.u8 {d21}, [r8@64], r1 ; store oq0
|
||||
vst1.u8 {d22}, [r8@64], r1 ; store oq1
|
||||
vst1.u8 {d23}, [r8@64], r1 ; store oq2
|
||||
|
||||
b h_next
|
||||
|
||||
h_wide_mbfilter
|
||||
sub r8, r0, r1, lsl #3
|
||||
add r8, r8, r1
|
||||
|
||||
vst1.u8 {d16}, [r8@64], r1 ; store op6
|
||||
vst1.u8 {d24}, [r8@64], r1 ; store op5
|
||||
vst1.u8 {d25}, [r8@64], r1 ; store op4
|
||||
vst1.u8 {d26}, [r8@64], r1 ; store op3
|
||||
vst1.u8 {d27}, [r8@64], r1 ; store op2
|
||||
vst1.u8 {d18}, [r8@64], r1 ; store op1
|
||||
vst1.u8 {d19}, [r8@64], r1 ; store op0
|
||||
vst1.u8 {d20}, [r8@64], r1 ; store oq0
|
||||
vst1.u8 {d21}, [r8@64], r1 ; store oq1
|
||||
vst1.u8 {d22}, [r8@64], r1 ; store oq2
|
||||
vst1.u8 {d23}, [r8@64], r1 ; store oq3
|
||||
vst1.u8 {d1}, [r8@64], r1 ; store oq4
|
||||
vst1.u8 {d2}, [r8@64], r1 ; store oq5
|
||||
vst1.u8 {d3}, [r8@64], r1 ; store oq6
|
||||
|
||||
h_next
|
||||
add r0, r0, #8
|
||||
subs r12, r12, #1
|
||||
bne h_count
|
||||
|
||||
vpop {d8-d15}
|
||||
pop {r4-r8, pc}
|
||||
|
||||
ENDP ; |vp9_mb_lpf_horizontal_edge_w_neon|
|
||||
|
||||
; void vp9_mb_lpf_vertical_edge_w_neon(uint8_t *s, int p,
|
||||
; const uint8_t *blimit,
|
||||
; const uint8_t *limit,
|
||||
; const uint8_t *thresh)
|
||||
; r0 uint8_t *s,
|
||||
; r1 int p, /* pitch */
|
||||
; r2 const uint8_t *blimit,
|
||||
; r3 const uint8_t *limit,
|
||||
; sp const uint8_t *thresh,
|
||||
|vp9_mb_lpf_vertical_edge_w_neon| PROC
|
||||
push {r4-r8, lr}
|
||||
vpush {d8-d15}
|
||||
ldr r4, [sp, #88] ; load thresh
|
||||
|
||||
vld1.8 {d16[]}, [r2] ; load *blimit
|
||||
vld1.8 {d17[]}, [r3] ; load *limit
|
||||
vld1.8 {d18[]}, [r4] ; load *thresh
|
||||
|
||||
sub r8, r0, #8
|
||||
|
||||
vld1.8 {d0}, [r8@64], r1
|
||||
vld1.8 {d8}, [r0@64], r1
|
||||
vld1.8 {d1}, [r8@64], r1
|
||||
vld1.8 {d9}, [r0@64], r1
|
||||
vld1.8 {d2}, [r8@64], r1
|
||||
vld1.8 {d10}, [r0@64], r1
|
||||
vld1.8 {d3}, [r8@64], r1
|
||||
vld1.8 {d11}, [r0@64], r1
|
||||
vld1.8 {d4}, [r8@64], r1
|
||||
vld1.8 {d12}, [r0@64], r1
|
||||
vld1.8 {d5}, [r8@64], r1
|
||||
vld1.8 {d13}, [r0@64], r1
|
||||
vld1.8 {d6}, [r8@64], r1
|
||||
vld1.8 {d14}, [r0@64], r1
|
||||
vld1.8 {d7}, [r8@64], r1
|
||||
vld1.8 {d15}, [r0@64], r1
|
||||
|
||||
sub r0, r0, r1, lsl #3
|
||||
|
||||
vtrn.32 q0, q2
|
||||
vtrn.32 q1, q3
|
||||
vtrn.32 q4, q6
|
||||
vtrn.32 q5, q7
|
||||
|
||||
vtrn.16 q0, q1
|
||||
vtrn.16 q2, q3
|
||||
vtrn.16 q4, q5
|
||||
vtrn.16 q6, q7
|
||||
|
||||
vtrn.8 d0, d1
|
||||
vtrn.8 d2, d3
|
||||
vtrn.8 d4, d5
|
||||
vtrn.8 d6, d7
|
||||
|
||||
vtrn.8 d8, d9
|
||||
vtrn.8 d10, d11
|
||||
vtrn.8 d12, d13
|
||||
vtrn.8 d14, d15
|
||||
|
||||
bl vp9_wide_mbfilter_neon
|
||||
|
||||
tst r7, #1
|
||||
beq v_mbfilter
|
||||
|
||||
; flat && mask were not set for any of the channels. Just store the values
|
||||
; from filter.
|
||||
sub r8, r0, #2
|
||||
|
||||
vswp d23, d25
|
||||
|
||||
vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1
|
||||
vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1
|
||||
vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1
|
||||
vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1
|
||||
vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1
|
||||
vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1
|
||||
vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1
|
||||
vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1
|
||||
|
||||
b v_end
|
||||
|
||||
v_mbfilter
|
||||
tst r7, #2
|
||||
beq v_wide_mbfilter
|
||||
|
||||
; flat2 was not set for any of the channels. Just store the values from
|
||||
; mbfilter.
|
||||
sub r8, r0, #3
|
||||
|
||||
vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1
|
||||
vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1
|
||||
vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1
|
||||
vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1
|
||||
vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1
|
||||
vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1
|
||||
vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1
|
||||
vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1
|
||||
vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1
|
||||
vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1
|
||||
vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1
|
||||
vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1
|
||||
vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1
|
||||
vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1
|
||||
vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1
|
||||
vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1
|
||||
|
||||
b v_end
|
||||
|
||||
v_wide_mbfilter
|
||||
sub r8, r0, #8
|
||||
|
||||
vtrn.32 d0, d26
|
||||
vtrn.32 d16, d27
|
||||
vtrn.32 d24, d18
|
||||
vtrn.32 d25, d19
|
||||
|
||||
vtrn.16 d0, d24
|
||||
vtrn.16 d16, d25
|
||||
vtrn.16 d26, d18
|
||||
vtrn.16 d27, d19
|
||||
|
||||
vtrn.8 d0, d16
|
||||
vtrn.8 d24, d25
|
||||
vtrn.8 d26, d27
|
||||
vtrn.8 d18, d19
|
||||
|
||||
vtrn.32 d20, d1
|
||||
vtrn.32 d21, d2
|
||||
vtrn.32 d22, d3
|
||||
vtrn.32 d23, d15
|
||||
|
||||
vtrn.16 d20, d22
|
||||
vtrn.16 d21, d23
|
||||
vtrn.16 d1, d3
|
||||
vtrn.16 d2, d15
|
||||
|
||||
vtrn.8 d20, d21
|
||||
vtrn.8 d22, d23
|
||||
vtrn.8 d1, d2
|
||||
vtrn.8 d3, d15
|
||||
|
||||
vst1.8 {d0}, [r8@64], r1
|
||||
vst1.8 {d20}, [r0@64], r1
|
||||
vst1.8 {d16}, [r8@64], r1
|
||||
vst1.8 {d21}, [r0@64], r1
|
||||
vst1.8 {d24}, [r8@64], r1
|
||||
vst1.8 {d22}, [r0@64], r1
|
||||
vst1.8 {d25}, [r8@64], r1
|
||||
vst1.8 {d23}, [r0@64], r1
|
||||
vst1.8 {d26}, [r8@64], r1
|
||||
vst1.8 {d1}, [r0@64], r1
|
||||
vst1.8 {d27}, [r8@64], r1
|
||||
vst1.8 {d2}, [r0@64], r1
|
||||
vst1.8 {d18}, [r8@64], r1
|
||||
vst1.8 {d3}, [r0@64], r1
|
||||
vst1.8 {d19}, [r8@64], r1
|
||||
vst1.8 {d15}, [r0@64], r1
|
||||
|
||||
v_end
|
||||
vpop {d8-d15}
|
||||
pop {r4-r8, pc}
|
||||
|
||||
ENDP ; |vp9_mb_lpf_vertical_edge_w_neon|
|
||||
|
||||
; void vp9_wide_mbfilter_neon();
|
||||
; This is a helper function for the loopfilters. The invidual functions do the
|
||||
; necessary load, transpose (if necessary) and store.
|
||||
;
|
||||
; r0-r3 PRESERVE
|
||||
; d16 blimit
|
||||
; d17 limit
|
||||
; d18 thresh
|
||||
; d0 p7
|
||||
; d1 p6
|
||||
; d2 p5
|
||||
; d3 p4
|
||||
; d4 p3
|
||||
; d5 p2
|
||||
; d6 p1
|
||||
; d7 p0
|
||||
; d8 q0
|
||||
; d9 q1
|
||||
; d10 q2
|
||||
; d11 q3
|
||||
; d12 q4
|
||||
; d13 q5
|
||||
; d14 q6
|
||||
; d15 q7
|
||||
|vp9_wide_mbfilter_neon| PROC
|
||||
mov r7, #0
|
||||
|
||||
; filter_mask
|
||||
vabd.u8 d19, d4, d5 ; abs(p3 - p2)
|
||||
vabd.u8 d20, d5, d6 ; abs(p2 - p1)
|
||||
vabd.u8 d21, d6, d7 ; abs(p1 - p0)
|
||||
vabd.u8 d22, d9, d8 ; abs(q1 - q0)
|
||||
vabd.u8 d23, d10, d9 ; abs(q2 - q1)
|
||||
vabd.u8 d24, d11, d10 ; abs(q3 - q2)
|
||||
|
||||
; only compare the largest value to limit
|
||||
vmax.u8 d19, d19, d20 ; max(abs(p3 - p2), abs(p2 - p1))
|
||||
vmax.u8 d20, d21, d22 ; max(abs(p1 - p0), abs(q1 - q0))
|
||||
vmax.u8 d23, d23, d24 ; max(abs(q2 - q1), abs(q3 - q2))
|
||||
vmax.u8 d19, d19, d20
|
||||
|
||||
vabd.u8 d24, d7, d8 ; abs(p0 - q0)
|
||||
|
||||
vmax.u8 d19, d19, d23
|
||||
|
||||
vabd.u8 d23, d6, d9 ; a = abs(p1 - q1)
|
||||
vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2
|
||||
|
||||
; abs () > limit
|
||||
vcge.u8 d19, d17, d19
|
||||
|
||||
; flatmask4
|
||||
vabd.u8 d25, d7, d5 ; abs(p0 - p2)
|
||||
vabd.u8 d26, d8, d10 ; abs(q0 - q2)
|
||||
vabd.u8 d27, d4, d7 ; abs(p3 - p0)
|
||||
vabd.u8 d28, d11, d8 ; abs(q3 - q0)
|
||||
|
||||
; only compare the largest value to thresh
|
||||
vmax.u8 d25, d25, d26 ; max(abs(p0 - p2), abs(q0 - q2))
|
||||
vmax.u8 d26, d27, d28 ; max(abs(p3 - p0), abs(q3 - q0))
|
||||
vmax.u8 d25, d25, d26
|
||||
vmax.u8 d20, d20, d25
|
||||
|
||||
vshr.u8 d23, d23, #1 ; a = a / 2
|
||||
vqadd.u8 d24, d24, d23 ; a = b + a
|
||||
|
||||
vmov.u8 d30, #1
|
||||
vcge.u8 d24, d16, d24 ; (a > blimit * 2 + limit) * -1
|
||||
|
||||
vcge.u8 d20, d30, d20 ; flat
|
||||
|
||||
vand d19, d19, d24 ; mask
|
||||
|
||||
; hevmask
|
||||
vcgt.u8 d21, d21, d18 ; (abs(p1 - p0) > thresh)*-1
|
||||
vcgt.u8 d22, d22, d18 ; (abs(q1 - q0) > thresh)*-1
|
||||
vorr d21, d21, d22 ; hev
|
||||
|
||||
vand d16, d20, d19 ; flat && mask
|
||||
vmov r5, r6, d16
|
||||
|
||||
; flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7)
|
||||
vabd.u8 d22, d3, d7 ; abs(p4 - p0)
|
||||
vabd.u8 d23, d12, d8 ; abs(q4 - q0)
|
||||
vabd.u8 d24, d7, d2 ; abs(p0 - p5)
|
||||
vabd.u8 d25, d8, d13 ; abs(q0 - q5)
|
||||
vabd.u8 d26, d1, d7 ; abs(p6 - p0)
|
||||
vabd.u8 d27, d14, d8 ; abs(q6 - q0)
|
||||
vabd.u8 d28, d0, d7 ; abs(p7 - p0)
|
||||
vabd.u8 d29, d15, d8 ; abs(q7 - q0)
|
||||
|
||||
; only compare the largest value to thresh
|
||||
vmax.u8 d22, d22, d23 ; max(abs(p4 - p0), abs(q4 - q0))
|
||||
vmax.u8 d23, d24, d25 ; max(abs(p0 - p5), abs(q0 - q5))
|
||||
vmax.u8 d24, d26, d27 ; max(abs(p6 - p0), abs(q6 - q0))
|
||||
vmax.u8 d25, d28, d29 ; max(abs(p7 - p0), abs(q7 - q0))
|
||||
|
||||
vmax.u8 d26, d22, d23
|
||||
vmax.u8 d27, d24, d25
|
||||
vmax.u8 d23, d26, d27
|
||||
|
||||
vcge.u8 d18, d30, d23 ; flat2
|
||||
|
||||
vmov.u8 d22, #0x80
|
||||
|
||||
orrs r5, r5, r6 ; Check for 0
|
||||
orreq r7, r7, #1 ; Only do filter branch
|
||||
|
||||
vand d17, d18, d16 ; flat2 && flat && mask
|
||||
vmov r5, r6, d17
|
||||
|
||||
; mbfilter() function
|
||||
|
||||
; filter() function
|
||||
; convert to signed
|
||||
veor d23, d8, d22 ; qs0
|
||||
veor d24, d7, d22 ; ps0
|
||||
veor d25, d6, d22 ; ps1
|
||||
veor d26, d9, d22 ; qs1
|
||||
|
||||
vmov.u8 d27, #3
|
||||
|
||||
vsub.s8 d28, d23, d24 ; ( qs0 - ps0)
|
||||
vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1)
|
||||
vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0)
|
||||
vand d29, d29, d21 ; filter &= hev
|
||||
vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0)
|
||||
vmov.u8 d29, #4
|
||||
|
||||
; filter = clamp(filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d28, q15
|
||||
|
||||
vand d28, d28, d19 ; filter &= mask
|
||||
|
||||
vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3)
|
||||
vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4)
|
||||
vshr.s8 d30, d30, #3 ; filter2 >>= 3
|
||||
vshr.s8 d29, d29, #3 ; filter1 >>= 3
|
||||
|
||||
|
||||
vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2)
|
||||
vqsub.s8 d23, d23, d29 ; oq0 = clamp(qs0 - filter1)
|
||||
|
||||
; outer tap adjustments: ++filter1 >> 1
|
||||
vrshr.s8 d29, d29, #1
|
||||
vbic d29, d29, d21 ; filter &= ~hev
|
||||
|
||||
vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter)
|
||||
vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter)
|
||||
|
||||
veor d24, d24, d22 ; *f_op0 = u^0x80
|
||||
veor d23, d23, d22 ; *f_oq0 = u^0x80
|
||||
veor d25, d25, d22 ; *f_op1 = u^0x80
|
||||
veor d26, d26, d22 ; *f_oq1 = u^0x80
|
||||
|
||||
tst r7, #1
|
||||
bxne lr
|
||||
|
||||
; mbfilter flat && mask branch
|
||||
; TODO(fgalligan): Can I decrease the cycles shifting to consective d's
|
||||
; and using vibt on the q's?
|
||||
vmov.u8 d29, #2
|
||||
vaddl.u8 q15, d7, d8 ; op2 = p0 + q0
|
||||
vmlal.u8 q15, d4, d27 ; op2 = p0 + q0 + p3 * 3
|
||||
vmlal.u8 q15, d5, d29 ; op2 = p0 + q0 + p3 * 3 + p2 * 2
|
||||
vaddl.u8 q10, d4, d5
|
||||
vaddw.u8 q15, d6 ; op2=p1 + p0 + q0 + p3 * 3 + p2 *2
|
||||
vaddl.u8 q14, d6, d9
|
||||
vqrshrn.u16 d18, q15, #3 ; r_op2
|
||||
|
||||
vsub.i16 q15, q10
|
||||
vaddl.u8 q10, d4, d6
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d7, d10
|
||||
vqrshrn.u16 d19, q15, #3 ; r_op1
|
||||
|
||||
vsub.i16 q15, q10
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d8, d11
|
||||
vqrshrn.u16 d20, q15, #3 ; r_op0
|
||||
|
||||
vsubw.u8 q15, d4 ; oq0 = op0 - p3
|
||||
vsubw.u8 q15, d7 ; oq0 -= p0
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d9, d11
|
||||
vqrshrn.u16 d21, q15, #3 ; r_oq0
|
||||
|
||||
vsubw.u8 q15, d5 ; oq1 = oq0 - p2
|
||||
vsubw.u8 q15, d8 ; oq1 -= q0
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d10, d11
|
||||
vqrshrn.u16 d22, q15, #3 ; r_oq1
|
||||
|
||||
vsubw.u8 q15, d6 ; oq2 = oq0 - p1
|
||||
vsubw.u8 q15, d9 ; oq2 -= q1
|
||||
vadd.i16 q15, q14
|
||||
vqrshrn.u16 d27, q15, #3 ; r_oq2
|
||||
|
||||
; Filter does not set op2 or oq2, so use p2 and q2.
|
||||
vbif d18, d5, d16 ; t_op2 |= p2 & ~(flat & mask)
|
||||
vbif d19, d25, d16 ; t_op1 |= f_op1 & ~(flat & mask)
|
||||
vbif d20, d24, d16 ; t_op0 |= f_op0 & ~(flat & mask)
|
||||
vbif d21, d23, d16 ; t_oq0 |= f_oq0 & ~(flat & mask)
|
||||
vbif d22, d26, d16 ; t_oq1 |= f_oq1 & ~(flat & mask)
|
||||
|
||||
vbit d23, d27, d16 ; t_oq2 |= r_oq2 & (flat & mask)
|
||||
vbif d23, d10, d16 ; t_oq2 |= q2 & ~(flat & mask)
|
||||
|
||||
tst r7, #2
|
||||
bxne lr
|
||||
|
||||
; wide_mbfilter flat2 && flat && mask branch
|
||||
vmov.u8 d16, #7
|
||||
vaddl.u8 q15, d7, d8 ; op6 = p0 + q0
|
||||
vaddl.u8 q12, d2, d3
|
||||
vaddl.u8 q13, d4, d5
|
||||
vaddl.u8 q14, d1, d6
|
||||
vmlal.u8 q15, d0, d16 ; op6 += p7 * 3
|
||||
vadd.i16 q12, q13
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d2, d9
|
||||
vadd.i16 q15, q12
|
||||
vaddl.u8 q12, d0, d1
|
||||
vaddw.u8 q15, d1
|
||||
vaddl.u8 q13, d0, d2
|
||||
vadd.i16 q14, q15, q14
|
||||
vqrshrn.u16 d16, q15, #4 ; w_op6
|
||||
|
||||
vsub.i16 q15, q14, q12
|
||||
vaddl.u8 q14, d3, d10
|
||||
vqrshrn.u16 d24, q15, #4 ; w_op5
|
||||
|
||||
vsub.i16 q15, q13
|
||||
vaddl.u8 q13, d0, d3
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d4, d11
|
||||
vqrshrn.u16 d25, q15, #4 ; w_op4
|
||||
|
||||
vadd.i16 q15, q14
|
||||
vaddl.u8 q14, d0, d4
|
||||
vsub.i16 q15, q13
|
||||
vsub.i16 q14, q15, q14
|
||||
vqrshrn.u16 d26, q15, #4 ; w_op3
|
||||
|
||||
vaddw.u8 q15, q14, d5 ; op2 += p2
|
||||
vaddl.u8 q14, d0, d5
|
||||
vaddw.u8 q15, d12 ; op2 += q4
|
||||
vbif d26, d4, d17 ; op3 |= p3 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d27, q15, #4 ; w_op2
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d0, d6
|
||||
vaddw.u8 q15, d6 ; op1 += p1
|
||||
vaddw.u8 q15, d13 ; op1 += q5
|
||||
vbif d27, d18, d17 ; op2 |= t_op2 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d18, q15, #4 ; w_op1
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d0, d7
|
||||
vaddw.u8 q15, d7 ; op0 += p0
|
||||
vaddw.u8 q15, d14 ; op0 += q6
|
||||
vbif d18, d19, d17 ; op1 |= t_op1 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d19, q15, #4 ; w_op0
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d1, d8
|
||||
vaddw.u8 q15, d8 ; oq0 += q0
|
||||
vaddw.u8 q15, d15 ; oq0 += q7
|
||||
vbif d19, d20, d17 ; op0 |= t_op0 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d20, q15, #4 ; w_oq0
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d2, d9
|
||||
vaddw.u8 q15, d9 ; oq1 += q1
|
||||
vaddl.u8 q4, d10, d15
|
||||
vaddw.u8 q15, d15 ; oq1 += q7
|
||||
vbif d20, d21, d17 ; oq0 |= t_oq0 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d21, q15, #4 ; w_oq1
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d3, d10
|
||||
vadd.i16 q15, q4
|
||||
vaddl.u8 q4, d11, d15
|
||||
vbif d21, d22, d17 ; oq1 |= t_oq1 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d22, q15, #4 ; w_oq2
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d4, d11
|
||||
vadd.i16 q15, q4
|
||||
vaddl.u8 q4, d12, d15
|
||||
vbif d22, d23, d17 ; oq2 |= t_oq2 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d23, q15, #4 ; w_oq3
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d5, d12
|
||||
vadd.i16 q15, q4
|
||||
vaddl.u8 q4, d13, d15
|
||||
vbif d16, d1, d17 ; op6 |= p6 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d1, q15, #4 ; w_oq4
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vaddl.u8 q14, d6, d13
|
||||
vadd.i16 q15, q4
|
||||
vaddl.u8 q4, d14, d15
|
||||
vbif d24, d2, d17 ; op5 |= p5 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d2, q15, #4 ; w_oq5
|
||||
|
||||
vsub.i16 q15, q14
|
||||
vbif d25, d3, d17 ; op4 |= p4 & ~(f2 & f & m)
|
||||
vadd.i16 q15, q4
|
||||
vbif d23, d11, d17 ; oq3 |= q3 & ~(f2 & f & m)
|
||||
vqrshrn.u16 d3, q15, #4 ; w_oq6
|
||||
vbif d1, d12, d17 ; oq4 |= q4 & ~(f2 & f & m)
|
||||
vbif d2, d13, d17 ; oq5 |= q5 & ~(f2 & f & m)
|
||||
vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m)
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp9_wide_mbfilter_neon|
|
||||
|
||||
END
|
@@ -1,198 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp9_short_idct16x16_1_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
;void vp9_short_idct16x16_1_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct16x16_1_add_neon| PROC
|
||||
ldrsh r0, [r0]
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r12, #0x2d00
|
||||
add r12, #0x41
|
||||
|
||||
; out = dct_const_round_shift(input[0] * cospi_16_64)
|
||||
mul r0, r0, r12 ; input[0] * cospi_16_64
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; out = dct_const_round_shift(out * cospi_16_64)
|
||||
mul r0, r0, r12 ; out * cospi_16_64
|
||||
mov r12, r1 ; save dest
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; a1 = ROUND_POWER_OF_TWO(out, 6)
|
||||
add r0, r0, #32 ; + (1 <<((6) - 1))
|
||||
asr r0, r0, #6 ; >> 6
|
||||
|
||||
vdup.s16 q0, r0 ; duplicate a1
|
||||
mov r0, #8
|
||||
sub r2, #8
|
||||
|
||||
; load destination data row0 - row3
|
||||
vld1.64 {d2}, [r1], r0
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r0
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r0
|
||||
vld1.64 {d7}, [r1], r2
|
||||
vld1.64 {d16}, [r1], r0
|
||||
vld1.64 {d17}, [r1], r2
|
||||
|
||||
vaddw.u8 q9, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d3 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d4 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d5 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
vaddw.u8 q9, q0, d6 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d7 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d16 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d17 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
; load destination data row4 - row7
|
||||
vld1.64 {d2}, [r1], r0
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r0
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r0
|
||||
vld1.64 {d7}, [r1], r2
|
||||
vld1.64 {d16}, [r1], r0
|
||||
vld1.64 {d17}, [r1], r2
|
||||
|
||||
vaddw.u8 q9, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d3 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d4 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d5 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
vaddw.u8 q9, q0, d6 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d7 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d16 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d17 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
; load destination data row8 - row11
|
||||
vld1.64 {d2}, [r1], r0
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r0
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r0
|
||||
vld1.64 {d7}, [r1], r2
|
||||
vld1.64 {d16}, [r1], r0
|
||||
vld1.64 {d17}, [r1], r2
|
||||
|
||||
vaddw.u8 q9, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d3 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d4 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d5 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
vaddw.u8 q9, q0, d6 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d7 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d16 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d17 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
; load destination data row12 - row15
|
||||
vld1.64 {d2}, [r1], r0
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r0
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r0
|
||||
vld1.64 {d7}, [r1], r2
|
||||
vld1.64 {d16}, [r1], r0
|
||||
vld1.64 {d17}, [r1], r2
|
||||
|
||||
vaddw.u8 q9, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d3 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d4 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d5 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
vaddw.u8 q9, q0, d6 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d7 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d16 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d17 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r0
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r0
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct16x16_1_add_neon|
|
||||
|
||||
END
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,68 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp9_short_idct4x4_1_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
;void vp9_short_idct4x4_1_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct4x4_1_add_neon| PROC
|
||||
ldrsh r0, [r0]
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r12, #0x2d00
|
||||
add r12, #0x41
|
||||
|
||||
; out = dct_const_round_shift(input[0] * cospi_16_64)
|
||||
mul r0, r0, r12 ; input[0] * cospi_16_64
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; out = dct_const_round_shift(out * cospi_16_64)
|
||||
mul r0, r0, r12 ; out * cospi_16_64
|
||||
mov r12, r1 ; save dest
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; a1 = ROUND_POWER_OF_TWO(out, 4)
|
||||
add r0, r0, #8 ; + (1 <<((4) - 1))
|
||||
asr r0, r0, #4 ; >> 4
|
||||
|
||||
vdup.s16 q0, r0 ; duplicate a1
|
||||
|
||||
vld1.32 {d2[0]}, [r1], r2
|
||||
vld1.32 {d2[1]}, [r1], r2
|
||||
vld1.32 {d4[0]}, [r1], r2
|
||||
vld1.32 {d4[1]}, [r1]
|
||||
|
||||
vaddw.u8 q8, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q9, q0, d4
|
||||
|
||||
vqmovun.s16 d6, q8 ; clip_pixel
|
||||
vqmovun.s16 d7, q9
|
||||
|
||||
vst1.32 {d6[0]}, [r12], r2
|
||||
vst1.32 {d6[1]}, [r12], r2
|
||||
vst1.32 {d7[0]}, [r12], r2
|
||||
vst1.32 {d7[1]}, [r12]
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct4x4_1_add_neon|
|
||||
|
||||
END
|
@@ -1,190 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_short_idct4x4_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void vp9_short_idct4x4_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct4x4_add_neon| PROC
|
||||
|
||||
; The 2D transform is done with two passes which are actually pretty
|
||||
; similar. We first transform the rows. This is done by transposing
|
||||
; the inputs, doing an SIMD column transform (the columns are the
|
||||
; transposed rows) and then transpose the results (so that it goes back
|
||||
; in normal/row positions). Then, we transform the columns by doing
|
||||
; another SIMD column transform.
|
||||
; So, two passes of a transpose followed by a column transform.
|
||||
|
||||
; load the inputs into q8-q9, d16-d19
|
||||
vld1.s16 {q8,q9}, [r0]!
|
||||
|
||||
; generate scalar constants
|
||||
; cospi_8_64 = 15137 = 0x3b21
|
||||
mov r0, #0x3b00
|
||||
add r0, #0x21
|
||||
; cospi_16_64 = 11585 = 0x2d41
|
||||
mov r3, #0x2d00
|
||||
add r3, #0x41
|
||||
; cospi_24_64 = 6270 = 0x 187e
|
||||
mov r12, #0x1800
|
||||
add r12, #0x7e
|
||||
|
||||
; transpose the input data
|
||||
; 00 01 02 03 d16
|
||||
; 10 11 12 13 d17
|
||||
; 20 21 22 23 d18
|
||||
; 30 31 32 33 d19
|
||||
vtrn.16 d16, d17
|
||||
vtrn.16 d18, d19
|
||||
|
||||
; generate constant vectors
|
||||
vdup.16 d20, r0 ; replicate cospi_8_64
|
||||
vdup.16 d21, r3 ; replicate cospi_16_64
|
||||
|
||||
; 00 10 02 12 d16
|
||||
; 01 11 03 13 d17
|
||||
; 20 30 22 32 d18
|
||||
; 21 31 23 33 d19
|
||||
vtrn.32 q8, q9
|
||||
; 00 10 20 30 d16
|
||||
; 01 11 21 31 d17
|
||||
; 02 12 22 32 d18
|
||||
; 03 13 23 33 d19
|
||||
|
||||
vdup.16 d22, r12 ; replicate cospi_24_64
|
||||
|
||||
; do the transform on transposed rows
|
||||
|
||||
; stage 1
|
||||
vadd.s16 d23, d16, d18 ; (input[0] + input[2])
|
||||
vsub.s16 d24, d16, d18 ; (input[0] - input[2])
|
||||
|
||||
vmull.s16 q15, d17, d22 ; input[1] * cospi_24_64
|
||||
vmull.s16 q1, d17, d20 ; input[1] * cospi_8_64
|
||||
|
||||
; (input[0] + input[2]) * cospi_16_64;
|
||||
; (input[0] - input[2]) * cospi_16_64;
|
||||
vmull.s16 q13, d23, d21
|
||||
vmull.s16 q14, d24, d21
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64;
|
||||
; input[1] * cospi_8_64 + input[3] * cospi_24_64;
|
||||
vmlsl.s16 q15, d19, d20
|
||||
vmlal.s16 q1, d19, d22
|
||||
|
||||
; dct_const_round_shift
|
||||
vqrshrn.s32 d26, q13, #14
|
||||
vqrshrn.s32 d27, q14, #14
|
||||
vqrshrn.s32 d29, q15, #14
|
||||
vqrshrn.s32 d28, q1, #14
|
||||
|
||||
; stage 2
|
||||
; output[0] = step[0] + step[3];
|
||||
; output[1] = step[1] + step[2];
|
||||
; output[3] = step[0] - step[3];
|
||||
; output[2] = step[1] - step[2];
|
||||
vadd.s16 q8, q13, q14
|
||||
vsub.s16 q9, q13, q14
|
||||
vswp d18, d19
|
||||
|
||||
; transpose the results
|
||||
; 00 01 02 03 d16
|
||||
; 10 11 12 13 d17
|
||||
; 20 21 22 23 d18
|
||||
; 30 31 32 33 d19
|
||||
vtrn.16 d16, d17
|
||||
vtrn.16 d18, d19
|
||||
; 00 10 02 12 d16
|
||||
; 01 11 03 13 d17
|
||||
; 20 30 22 32 d18
|
||||
; 21 31 23 33 d19
|
||||
vtrn.32 q8, q9
|
||||
; 00 10 20 30 d16
|
||||
; 01 11 21 31 d17
|
||||
; 02 12 22 32 d18
|
||||
; 03 13 23 33 d19
|
||||
|
||||
; do the transform on columns
|
||||
|
||||
; stage 1
|
||||
vadd.s16 d23, d16, d18 ; (input[0] + input[2])
|
||||
vsub.s16 d24, d16, d18 ; (input[0] - input[2])
|
||||
|
||||
vmull.s16 q15, d17, d22 ; input[1] * cospi_24_64
|
||||
vmull.s16 q1, d17, d20 ; input[1] * cospi_8_64
|
||||
|
||||
; (input[0] + input[2]) * cospi_16_64;
|
||||
; (input[0] - input[2]) * cospi_16_64;
|
||||
vmull.s16 q13, d23, d21
|
||||
vmull.s16 q14, d24, d21
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64;
|
||||
; input[1] * cospi_8_64 + input[3] * cospi_24_64;
|
||||
vmlsl.s16 q15, d19, d20
|
||||
vmlal.s16 q1, d19, d22
|
||||
|
||||
; dct_const_round_shift
|
||||
vqrshrn.s32 d26, q13, #14
|
||||
vqrshrn.s32 d27, q14, #14
|
||||
vqrshrn.s32 d29, q15, #14
|
||||
vqrshrn.s32 d28, q1, #14
|
||||
|
||||
; stage 2
|
||||
; output[0] = step[0] + step[3];
|
||||
; output[1] = step[1] + step[2];
|
||||
; output[3] = step[0] - step[3];
|
||||
; output[2] = step[1] - step[2];
|
||||
vadd.s16 q8, q13, q14
|
||||
vsub.s16 q9, q13, q14
|
||||
|
||||
; The results are in two registers, one of them being swapped. This will
|
||||
; be taken care of by loading the 'dest' value in a swapped fashion and
|
||||
; also storing them in the same swapped fashion.
|
||||
; temp_out[0, 1] = d16, d17 = q8
|
||||
; temp_out[2, 3] = d19, d18 = q9 swapped
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 4)
|
||||
vrshr.s16 q8, q8, #4
|
||||
vrshr.s16 q9, q9, #4
|
||||
|
||||
vld1.32 {d26[0]}, [r1], r2
|
||||
vld1.32 {d26[1]}, [r1], r2
|
||||
vld1.32 {d27[1]}, [r1], r2
|
||||
vld1.32 {d27[0]}, [r1] ; no post-increment
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]
|
||||
vaddw.u8 q8, q8, d26
|
||||
vaddw.u8 q9, q9, d27
|
||||
|
||||
; clip_pixel
|
||||
vqmovun.s16 d26, q8
|
||||
vqmovun.s16 d27, q9
|
||||
|
||||
; do the stores in reverse order with negative post-increment, by changing
|
||||
; the sign of the stride
|
||||
rsb r2, r2, #0
|
||||
vst1.32 {d27[0]}, [r1], r2
|
||||
vst1.32 {d27[1]}, [r1], r2
|
||||
vst1.32 {d26[1]}, [r1], r2
|
||||
vst1.32 {d26[0]}, [r1] ; no post-increment
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct4x4_add_neon|
|
||||
|
||||
END
|
@@ -1,88 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp9_short_idct8x8_1_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
;void vp9_short_idct8x8_1_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct8x8_1_add_neon| PROC
|
||||
ldrsh r0, [r0]
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r12, #0x2d00
|
||||
add r12, #0x41
|
||||
|
||||
; out = dct_const_round_shift(input[0] * cospi_16_64)
|
||||
mul r0, r0, r12 ; input[0] * cospi_16_64
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; out = dct_const_round_shift(out * cospi_16_64)
|
||||
mul r0, r0, r12 ; out * cospi_16_64
|
||||
mov r12, r1 ; save dest
|
||||
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
|
||||
asr r0, r0, #14 ; >> DCT_CONST_BITS
|
||||
|
||||
; a1 = ROUND_POWER_OF_TWO(out, 5)
|
||||
add r0, r0, #16 ; + (1 <<((5) - 1))
|
||||
asr r0, r0, #5 ; >> 5
|
||||
|
||||
vdup.s16 q0, r0 ; duplicate a1
|
||||
|
||||
; load destination data
|
||||
vld1.64 {d2}, [r1], r2
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r2
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r2
|
||||
vld1.64 {d7}, [r1], r2
|
||||
vld1.64 {d16}, [r1], r2
|
||||
vld1.64 {d17}, [r1]
|
||||
|
||||
vaddw.u8 q9, q0, d2 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d3 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d4 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d5 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r2
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r2
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
vaddw.u8 q9, q0, d6 ; dest[x] + a1
|
||||
vaddw.u8 q10, q0, d7 ; dest[x] + a1
|
||||
vaddw.u8 q11, q0, d16 ; dest[x] + a1
|
||||
vaddw.u8 q12, q0, d17 ; dest[x] + a1
|
||||
vqmovun.s16 d2, q9 ; clip_pixel
|
||||
vqmovun.s16 d3, q10 ; clip_pixel
|
||||
vqmovun.s16 d30, q11 ; clip_pixel
|
||||
vqmovun.s16 d31, q12 ; clip_pixel
|
||||
vst1.64 {d2}, [r12], r2
|
||||
vst1.64 {d3}, [r12], r2
|
||||
vst1.64 {d30}, [r12], r2
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct8x8_1_add_neon|
|
||||
|
||||
END
|
@@ -1,519 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_short_idct8x8_add_neon|
|
||||
EXPORT |vp9_short_idct10_8x8_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; Parallel 1D IDCT on all the columns of a 8x8 16bit data matrix which are
|
||||
; loaded in q8-q15. The output will be stored back into q8-q15 registers.
|
||||
; This macro will touch q0-q7 registers and use them as buffer during
|
||||
; calculation.
|
||||
MACRO
|
||||
IDCT8x8_1D
|
||||
; stage 1
|
||||
vdup.16 d0, r3 ; duplicate cospi_28_64
|
||||
vdup.16 d1, r4 ; duplicate cospi_4_64
|
||||
vdup.16 d2, r5 ; duplicate cospi_12_64
|
||||
vdup.16 d3, r6 ; duplicate cospi_20_64
|
||||
|
||||
; input[1] * cospi_28_64
|
||||
vmull.s16 q2, d18, d0
|
||||
vmull.s16 q3, d19, d0
|
||||
|
||||
; input[5] * cospi_12_64
|
||||
vmull.s16 q5, d26, d2
|
||||
vmull.s16 q6, d27, d2
|
||||
|
||||
; input[1]*cospi_28_64-input[7]*cospi_4_64
|
||||
vmlsl.s16 q2, d30, d1
|
||||
vmlsl.s16 q3, d31, d1
|
||||
|
||||
; input[5] * cospi_12_64 - input[3] * cospi_20_64
|
||||
vmlsl.s16 q5, d22, d3
|
||||
vmlsl.s16 q6, d23, d3
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d8, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d9, q3, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q5, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q6, #14 ; >> 14
|
||||
|
||||
; input[1] * cospi_4_64
|
||||
vmull.s16 q2, d18, d1
|
||||
vmull.s16 q3, d19, d1
|
||||
|
||||
; input[5] * cospi_20_64
|
||||
vmull.s16 q9, d26, d3
|
||||
vmull.s16 q13, d27, d3
|
||||
|
||||
; input[1]*cospi_4_64+input[7]*cospi_28_64
|
||||
vmlal.s16 q2, d30, d0
|
||||
vmlal.s16 q3, d31, d0
|
||||
|
||||
; input[5] * cospi_20_64 + input[3] * cospi_12_64
|
||||
vmlal.s16 q9, d22, d2
|
||||
vmlal.s16 q13, d23, d2
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d14, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d15, q3, #14 ; >> 14
|
||||
|
||||
; stage 2 & stage 3 - even half
|
||||
vdup.16 d0, r7 ; duplicate cospi_16_64
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q13, #14 ; >> 14
|
||||
|
||||
; input[0] * cospi_16_64
|
||||
vmull.s16 q2, d16, d0
|
||||
vmull.s16 q3, d17, d0
|
||||
|
||||
; input[0] * cospi_16_64
|
||||
vmull.s16 q13, d16, d0
|
||||
vmull.s16 q15, d17, d0
|
||||
|
||||
; (input[0] + input[2]) * cospi_16_64
|
||||
vmlal.s16 q2, d24, d0
|
||||
vmlal.s16 q3, d25, d0
|
||||
|
||||
; (input[0] - input[2]) * cospi_16_64
|
||||
vmlsl.s16 q13, d24, d0
|
||||
vmlsl.s16 q15, d25, d0
|
||||
|
||||
vdup.16 d0, r8 ; duplicate cospi_24_64
|
||||
vdup.16 d1, r9 ; duplicate cospi_8_64
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d18, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d19, q3, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d22, q13, #14 ; >> 14
|
||||
vqrshrn.s32 d23, q15, #14 ; >> 14
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64
|
||||
; input[1] * cospi_24_64
|
||||
vmull.s16 q2, d20, d0
|
||||
vmull.s16 q3, d21, d0
|
||||
|
||||
; input[1] * cospi_8_64
|
||||
vmull.s16 q8, d20, d1
|
||||
vmull.s16 q12, d21, d1
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64
|
||||
vmlsl.s16 q2, d28, d1
|
||||
vmlsl.s16 q3, d29, d1
|
||||
|
||||
; input[1] * cospi_8_64 + input[3] * cospi_24_64
|
||||
vmlal.s16 q8, d28, d0
|
||||
vmlal.s16 q12, d29, d0
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d26, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d27, q3, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d30, q8, #14 ; >> 14
|
||||
vqrshrn.s32 d31, q12, #14 ; >> 14
|
||||
|
||||
vadd.s16 q0, q9, q15 ; output[0] = step[0] + step[3]
|
||||
vadd.s16 q1, q11, q13 ; output[1] = step[1] + step[2]
|
||||
vsub.s16 q2, q11, q13 ; output[2] = step[1] - step[2]
|
||||
vsub.s16 q3, q9, q15 ; output[3] = step[0] - step[3]
|
||||
|
||||
; stage 3 -odd half
|
||||
vdup.16 d16, r7 ; duplicate cospi_16_64
|
||||
|
||||
; stage 2 - odd half
|
||||
vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5]
|
||||
vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5]
|
||||
vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7]
|
||||
vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7]
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q9, d28, d16
|
||||
vmull.s16 q10, d29, d16
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q11, d28, d16
|
||||
vmull.s16 q12, d29, d16
|
||||
|
||||
; (step2[6] - step2[5]) * cospi_16_64
|
||||
vmlsl.s16 q9, d26, d16
|
||||
vmlsl.s16 q10, d27, d16
|
||||
|
||||
; (step2[5] + step2[6]) * cospi_16_64
|
||||
vmlal.s16 q11, d26, d16
|
||||
vmlal.s16 q12, d27, d16
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q10, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q11, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q12, #14 ; >> 14
|
||||
|
||||
; stage 4
|
||||
vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7];
|
||||
vadd.s16 q9, q1, q6 ; output[1] = step1[1] + step1[6];
|
||||
vadd.s16 q10, q2, q5 ; output[2] = step1[2] + step1[5];
|
||||
vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4];
|
||||
vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4];
|
||||
vsub.s16 q13, q2, q5 ; output[5] = step1[2] - step1[5];
|
||||
vsub.s16 q14, q1, q6 ; output[6] = step1[1] - step1[6];
|
||||
vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7];
|
||||
MEND
|
||||
|
||||
; Transpose a 8x8 16bit data matrix. Datas are loaded in q8-q15.
|
||||
MACRO
|
||||
TRANSPOSE8X8
|
||||
vswp d17, d24
|
||||
vswp d23, d30
|
||||
vswp d21, d28
|
||||
vswp d19, d26
|
||||
vtrn.32 q8, q10
|
||||
vtrn.32 q9, q11
|
||||
vtrn.32 q12, q14
|
||||
vtrn.32 q13, q15
|
||||
vtrn.16 q8, q9
|
||||
vtrn.16 q10, q11
|
||||
vtrn.16 q12, q13
|
||||
vtrn.16 q14, q15
|
||||
MEND
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void vp9_short_idct8x8_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct8x8_add_neon| PROC
|
||||
push {r4-r9}
|
||||
vpush {d8-d15}
|
||||
vld1.s16 {q8,q9}, [r0]!
|
||||
vld1.s16 {q10,q11}, [r0]!
|
||||
vld1.s16 {q12,q13}, [r0]!
|
||||
vld1.s16 {q14,q15}, [r0]!
|
||||
|
||||
; transpose the input data
|
||||
TRANSPOSE8X8
|
||||
|
||||
; generate cospi_28_64 = 3196
|
||||
mov r3, #0x0c00
|
||||
add r3, #0x7c
|
||||
|
||||
; generate cospi_4_64 = 16069
|
||||
mov r4, #0x3e00
|
||||
add r4, #0xc5
|
||||
|
||||
; generate cospi_12_64 = 13623
|
||||
mov r5, #0x3500
|
||||
add r5, #0x37
|
||||
|
||||
; generate cospi_20_64 = 9102
|
||||
mov r6, #0x2300
|
||||
add r6, #0x8e
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r7, #0x2d00
|
||||
add r7, #0x41
|
||||
|
||||
; generate cospi_24_64 = 6270
|
||||
mov r8, #0x1800
|
||||
add r8, #0x7e
|
||||
|
||||
; generate cospi_8_64 = 15137
|
||||
mov r9, #0x3b00
|
||||
add r9, #0x21
|
||||
|
||||
; First transform rows
|
||||
IDCT8x8_1D
|
||||
|
||||
; Transpose the matrix
|
||||
TRANSPOSE8X8
|
||||
|
||||
; Then transform columns
|
||||
IDCT8x8_1D
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 5)
|
||||
vrshr.s16 q8, q8, #5
|
||||
vrshr.s16 q9, q9, #5
|
||||
vrshr.s16 q10, q10, #5
|
||||
vrshr.s16 q11, q11, #5
|
||||
vrshr.s16 q12, q12, #5
|
||||
vrshr.s16 q13, q13, #5
|
||||
vrshr.s16 q14, q14, #5
|
||||
vrshr.s16 q15, q15, #5
|
||||
|
||||
; save dest pointer
|
||||
mov r0, r1
|
||||
|
||||
; load destination data
|
||||
vld1.64 {d0}, [r1], r2
|
||||
vld1.64 {d1}, [r1], r2
|
||||
vld1.64 {d2}, [r1], r2
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r2
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r2
|
||||
vld1.64 {d7}, [r1]
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]
|
||||
vaddw.u8 q8, q8, d0
|
||||
vaddw.u8 q9, q9, d1
|
||||
vaddw.u8 q10, q10, d2
|
||||
vaddw.u8 q11, q11, d3
|
||||
vaddw.u8 q12, q12, d4
|
||||
vaddw.u8 q13, q13, d5
|
||||
vaddw.u8 q14, q14, d6
|
||||
vaddw.u8 q15, q15, d7
|
||||
|
||||
; clip_pixel
|
||||
vqmovun.s16 d0, q8
|
||||
vqmovun.s16 d1, q9
|
||||
vqmovun.s16 d2, q10
|
||||
vqmovun.s16 d3, q11
|
||||
vqmovun.s16 d4, q12
|
||||
vqmovun.s16 d5, q13
|
||||
vqmovun.s16 d6, q14
|
||||
vqmovun.s16 d7, q15
|
||||
|
||||
; store the data
|
||||
vst1.64 {d0}, [r0], r2
|
||||
vst1.64 {d1}, [r0], r2
|
||||
vst1.64 {d2}, [r0], r2
|
||||
vst1.64 {d3}, [r0], r2
|
||||
vst1.64 {d4}, [r0], r2
|
||||
vst1.64 {d5}, [r0], r2
|
||||
vst1.64 {d6}, [r0], r2
|
||||
vst1.64 {d7}, [r0], r2
|
||||
|
||||
vpop {d8-d15}
|
||||
pop {r4-r9}
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct8x8_add_neon|
|
||||
|
||||
;void vp9_short_idct10_8x8_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct10_8x8_add_neon| PROC
|
||||
push {r4-r9}
|
||||
vpush {d8-d15}
|
||||
vld1.s16 {q8,q9}, [r0]!
|
||||
vld1.s16 {q10,q11}, [r0]!
|
||||
vld1.s16 {q12,q13}, [r0]!
|
||||
vld1.s16 {q14,q15}, [r0]!
|
||||
|
||||
; transpose the input data
|
||||
TRANSPOSE8X8
|
||||
|
||||
; generate cospi_28_64 = 3196
|
||||
mov r3, #0x0c00
|
||||
add r3, #0x7c
|
||||
|
||||
; generate cospi_4_64 = 16069
|
||||
mov r4, #0x3e00
|
||||
add r4, #0xc5
|
||||
|
||||
; generate cospi_12_64 = 13623
|
||||
mov r5, #0x3500
|
||||
add r5, #0x37
|
||||
|
||||
; generate cospi_20_64 = 9102
|
||||
mov r6, #0x2300
|
||||
add r6, #0x8e
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r7, #0x2d00
|
||||
add r7, #0x41
|
||||
|
||||
; generate cospi_24_64 = 6270
|
||||
mov r8, #0x1800
|
||||
add r8, #0x7e
|
||||
|
||||
; generate cospi_8_64 = 15137
|
||||
mov r9, #0x3b00
|
||||
add r9, #0x21
|
||||
|
||||
; First transform rows
|
||||
; stage 1
|
||||
; The following instructions use vqrdmulh to do the
|
||||
; dct_const_round_shift(input[1] * cospi_28_64). vqrdmulh will do doubling
|
||||
; multiply and shift the result by 16 bits instead of 14 bits. So we need
|
||||
; to double the constants before multiplying to compensate this.
|
||||
mov r12, r3, lsl #1
|
||||
vdup.16 q0, r12 ; duplicate cospi_28_64*2
|
||||
mov r12, r4, lsl #1
|
||||
vdup.16 q1, r12 ; duplicate cospi_4_64*2
|
||||
|
||||
; dct_const_round_shift(input[1] * cospi_28_64)
|
||||
vqrdmulh.s16 q4, q9, q0
|
||||
|
||||
mov r12, r6, lsl #1
|
||||
rsb r12, #0
|
||||
vdup.16 q0, r12 ; duplicate -cospi_20_64*2
|
||||
|
||||
; dct_const_round_shift(input[1] * cospi_4_64)
|
||||
vqrdmulh.s16 q7, q9, q1
|
||||
|
||||
mov r12, r5, lsl #1
|
||||
vdup.16 q1, r12 ; duplicate cospi_12_64*2
|
||||
|
||||
; dct_const_round_shift(- input[3] * cospi_20_64)
|
||||
vqrdmulh.s16 q5, q11, q0
|
||||
|
||||
mov r12, r7, lsl #1
|
||||
vdup.16 q0, r12 ; duplicate cospi_16_64*2
|
||||
|
||||
; dct_const_round_shift(input[3] * cospi_12_64)
|
||||
vqrdmulh.s16 q6, q11, q1
|
||||
|
||||
; stage 2 & stage 3 - even half
|
||||
mov r12, r8, lsl #1
|
||||
vdup.16 q1, r12 ; duplicate cospi_24_64*2
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrdmulh.s16 q9, q8, q0
|
||||
|
||||
mov r12, r9, lsl #1
|
||||
vdup.16 q0, r12 ; duplicate cospi_8_64*2
|
||||
|
||||
; dct_const_round_shift(input[1] * cospi_24_64)
|
||||
vqrdmulh.s16 q13, q10, q1
|
||||
|
||||
; dct_const_round_shift(input[1] * cospi_8_64)
|
||||
vqrdmulh.s16 q15, q10, q0
|
||||
|
||||
; stage 3 -odd half
|
||||
vdup.16 d16, r7 ; duplicate cospi_16_64
|
||||
|
||||
vadd.s16 q0, q9, q15 ; output[0] = step[0] + step[3]
|
||||
vadd.s16 q1, q9, q13 ; output[1] = step[1] + step[2]
|
||||
vsub.s16 q2, q9, q13 ; output[2] = step[1] - step[2]
|
||||
vsub.s16 q3, q9, q15 ; output[3] = step[0] - step[3]
|
||||
|
||||
; stage 2 - odd half
|
||||
vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5]
|
||||
vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5]
|
||||
vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7]
|
||||
vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7]
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q9, d28, d16
|
||||
vmull.s16 q10, d29, d16
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q11, d28, d16
|
||||
vmull.s16 q12, d29, d16
|
||||
|
||||
; (step2[6] - step2[5]) * cospi_16_64
|
||||
vmlsl.s16 q9, d26, d16
|
||||
vmlsl.s16 q10, d27, d16
|
||||
|
||||
; (step2[5] + step2[6]) * cospi_16_64
|
||||
vmlal.s16 q11, d26, d16
|
||||
vmlal.s16 q12, d27, d16
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q10, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q11, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q12, #14 ; >> 14
|
||||
|
||||
; stage 4
|
||||
vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7];
|
||||
vadd.s16 q9, q1, q6 ; output[1] = step1[1] + step1[6];
|
||||
vadd.s16 q10, q2, q5 ; output[2] = step1[2] + step1[5];
|
||||
vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4];
|
||||
vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4];
|
||||
vsub.s16 q13, q2, q5 ; output[5] = step1[2] - step1[5];
|
||||
vsub.s16 q14, q1, q6 ; output[6] = step1[1] - step1[6];
|
||||
vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7];
|
||||
|
||||
; Transpose the matrix
|
||||
TRANSPOSE8X8
|
||||
|
||||
; Then transform columns
|
||||
IDCT8x8_1D
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 5)
|
||||
vrshr.s16 q8, q8, #5
|
||||
vrshr.s16 q9, q9, #5
|
||||
vrshr.s16 q10, q10, #5
|
||||
vrshr.s16 q11, q11, #5
|
||||
vrshr.s16 q12, q12, #5
|
||||
vrshr.s16 q13, q13, #5
|
||||
vrshr.s16 q14, q14, #5
|
||||
vrshr.s16 q15, q15, #5
|
||||
|
||||
; save dest pointer
|
||||
mov r0, r1
|
||||
|
||||
; load destination data
|
||||
vld1.64 {d0}, [r1], r2
|
||||
vld1.64 {d1}, [r1], r2
|
||||
vld1.64 {d2}, [r1], r2
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r2
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r2
|
||||
vld1.64 {d7}, [r1]
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]
|
||||
vaddw.u8 q8, q8, d0
|
||||
vaddw.u8 q9, q9, d1
|
||||
vaddw.u8 q10, q10, d2
|
||||
vaddw.u8 q11, q11, d3
|
||||
vaddw.u8 q12, q12, d4
|
||||
vaddw.u8 q13, q13, d5
|
||||
vaddw.u8 q14, q14, d6
|
||||
vaddw.u8 q15, q15, d7
|
||||
|
||||
; clip_pixel
|
||||
vqmovun.s16 d0, q8
|
||||
vqmovun.s16 d1, q9
|
||||
vqmovun.s16 d2, q10
|
||||
vqmovun.s16 d3, q11
|
||||
vqmovun.s16 d4, q12
|
||||
vqmovun.s16 d5, q13
|
||||
vqmovun.s16 d6, q14
|
||||
vqmovun.s16 d7, q15
|
||||
|
||||
; store the data
|
||||
vst1.64 {d0}, [r0], r2
|
||||
vst1.64 {d1}, [r0], r2
|
||||
vst1.64 {d2}, [r0], r2
|
||||
vst1.64 {d3}, [r0], r2
|
||||
vst1.64 {d4}, [r0], r2
|
||||
vst1.64 {d5}, [r0], r2
|
||||
vst1.64 {d6}, [r0], r2
|
||||
vst1.64 {d7}, [r0], r2
|
||||
|
||||
vpop {d8-d15}
|
||||
pop {r4-r9}
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct10_8x8_add_neon|
|
||||
|
||||
END
|
@@ -1,237 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_short_iht4x4_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; Parallel 1D IDCT on all the columns of a 4x4 16bits data matrix which are
|
||||
; loaded in d16-d19. d0 must contain cospi_8_64. d1 must contain
|
||||
; cospi_16_64. d2 must contain cospi_24_64. The output will be stored back
|
||||
; into d16-d19 registers. This macro will touch q10- q15 registers and use
|
||||
; them as buffer during calculation.
|
||||
MACRO
|
||||
IDCT4x4_1D
|
||||
; stage 1
|
||||
vadd.s16 d23, d16, d18 ; (input[0] + input[2])
|
||||
vsub.s16 d24, d16, d18 ; (input[0] - input[2])
|
||||
|
||||
vmull.s16 q15, d17, d2 ; input[1] * cospi_24_64
|
||||
vmull.s16 q10, d17, d0 ; input[1] * cospi_8_64
|
||||
vmull.s16 q13, d23, d1 ; (input[0] + input[2]) * cospi_16_64
|
||||
vmull.s16 q14, d24, d1 ; (input[0] - input[2]) * cospi_16_64
|
||||
vmlsl.s16 q15, d19, d0 ; input[1] * cospi_24_64 - input[3] * cospi_8_64
|
||||
vmlal.s16 q10, d19, d2 ; input[1] * cospi_8_64 + input[3] * cospi_24_64
|
||||
|
||||
; dct_const_round_shift
|
||||
vqrshrn.s32 d26, q13, #14
|
||||
vqrshrn.s32 d27, q14, #14
|
||||
vqrshrn.s32 d29, q15, #14
|
||||
vqrshrn.s32 d28, q10, #14
|
||||
|
||||
; stage 2
|
||||
; output[0] = step[0] + step[3];
|
||||
; output[1] = step[1] + step[2];
|
||||
; output[3] = step[0] - step[3];
|
||||
; output[2] = step[1] - step[2];
|
||||
vadd.s16 q8, q13, q14
|
||||
vsub.s16 q9, q13, q14
|
||||
vswp d18, d19
|
||||
MEND
|
||||
|
||||
; Parallel 1D IADST on all the columns of a 4x4 16bits data matrix which
|
||||
; loaded in d16-d19. d3 must contain sinpi_1_9. d4 must contain sinpi_2_9.
|
||||
; d5 must contain sinpi_4_9. d6 must contain sinpi_3_9. The output will be
|
||||
; stored back into d16-d19 registers. This macro will touch q11,q12,q13,
|
||||
; q14,q15 registers and use them as buffer during calculation.
|
||||
MACRO
|
||||
IADST4x4_1D
|
||||
vmull.s16 q10, d3, d16 ; s0 = sinpi_1_9 * x0
|
||||
vmull.s16 q11, d4, d16 ; s1 = sinpi_2_9 * x0
|
||||
vmull.s16 q12, d6, d17 ; s2 = sinpi_3_9 * x1
|
||||
vmull.s16 q13, d5, d18 ; s3 = sinpi_4_9 * x2
|
||||
vmull.s16 q14, d3, d18 ; s4 = sinpi_1_9 * x2
|
||||
vmovl.s16 q15, d16 ; expand x0 from 16 bit to 32 bit
|
||||
vaddw.s16 q15, q15, d19 ; x0 + x3
|
||||
vmull.s16 q8, d4, d19 ; s5 = sinpi_2_9 * x3
|
||||
vsubw.s16 q15, q15, d18 ; s7 = x0 + x3 - x2
|
||||
vmull.s16 q9, d5, d19 ; s6 = sinpi_4_9 * x3
|
||||
|
||||
vadd.s32 q10, q10, q13 ; x0 = s0 + s3 + s5
|
||||
vadd.s32 q10, q10, q8
|
||||
vsub.s32 q11, q11, q14 ; x1 = s1 - s4 - s6
|
||||
vdup.32 q8, r0 ; duplicate sinpi_3_9
|
||||
vsub.s32 q11, q11, q9
|
||||
vmul.s32 q15, q15, q8 ; x2 = sinpi_3_9 * s7
|
||||
|
||||
vadd.s32 q13, q10, q12 ; s0 = x0 + x3
|
||||
vadd.s32 q10, q10, q11 ; x0 + x1
|
||||
vadd.s32 q14, q11, q12 ; s1 = x1 + x3
|
||||
vsub.s32 q10, q10, q12 ; s3 = x0 + x1 - x3
|
||||
|
||||
; dct_const_round_shift
|
||||
vqrshrn.s32 d16, q13, #14
|
||||
vqrshrn.s32 d17, q14, #14
|
||||
vqrshrn.s32 d18, q15, #14
|
||||
vqrshrn.s32 d19, q10, #14
|
||||
MEND
|
||||
|
||||
; Generate cosine constants in d6 - d8 for the IDCT
|
||||
MACRO
|
||||
GENERATE_COSINE_CONSTANTS
|
||||
; cospi_8_64 = 15137 = 0x3b21
|
||||
mov r0, #0x3b00
|
||||
add r0, #0x21
|
||||
; cospi_16_64 = 11585 = 0x2d41
|
||||
mov r3, #0x2d00
|
||||
add r3, #0x41
|
||||
; cospi_24_64 = 6270 = 0x187e
|
||||
mov r12, #0x1800
|
||||
add r12, #0x7e
|
||||
|
||||
; generate constant vectors
|
||||
vdup.16 d0, r0 ; duplicate cospi_8_64
|
||||
vdup.16 d1, r3 ; duplicate cospi_16_64
|
||||
vdup.16 d2, r12 ; duplicate cospi_24_64
|
||||
MEND
|
||||
|
||||
; Generate sine constants in d1 - d4 for the IADST.
|
||||
MACRO
|
||||
GENERATE_SINE_CONSTANTS
|
||||
; sinpi_1_9 = 5283 = 0x14A3
|
||||
mov r0, #0x1400
|
||||
add r0, #0xa3
|
||||
; sinpi_2_9 = 9929 = 0x26C9
|
||||
mov r3, #0x2600
|
||||
add r3, #0xc9
|
||||
; sinpi_4_9 = 15212 = 0x3B6C
|
||||
mov r12, #0x3b00
|
||||
add r12, #0x6c
|
||||
|
||||
; generate constant vectors
|
||||
vdup.16 d3, r0 ; duplicate sinpi_1_9
|
||||
|
||||
; sinpi_3_9 = 13377 = 0x3441
|
||||
mov r0, #0x3400
|
||||
add r0, #0x41
|
||||
|
||||
vdup.16 d4, r3 ; duplicate sinpi_2_9
|
||||
vdup.16 d5, r12 ; duplicate sinpi_4_9
|
||||
vdup.16 q3, r0 ; duplicate sinpi_3_9
|
||||
MEND
|
||||
|
||||
; Transpose a 4x4 16bits data matrix. Datas are loaded in d16-d19.
|
||||
MACRO
|
||||
TRANSPOSE4X4
|
||||
vtrn.16 d16, d17
|
||||
vtrn.16 d18, d19
|
||||
vtrn.32 q8, q9
|
||||
MEND
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void vp9_short_iht4x4_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride, int tx_type)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride
|
||||
; r3 int tx_type)
|
||||
; This function will only handle tx_type of 1,2,3.
|
||||
|vp9_short_iht4x4_add_neon| PROC
|
||||
|
||||
; load the inputs into d16-d19
|
||||
vld1.s16 {q8,q9}, [r0]!
|
||||
|
||||
; transpose the input data
|
||||
TRANSPOSE4X4
|
||||
|
||||
; decide the type of transform
|
||||
cmp r3, #2
|
||||
beq idct_iadst
|
||||
cmp r3, #3
|
||||
beq iadst_iadst
|
||||
|
||||
iadst_idct
|
||||
; generate constants
|
||||
GENERATE_COSINE_CONSTANTS
|
||||
GENERATE_SINE_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IDCT4x4_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE4X4
|
||||
|
||||
; then transform columns
|
||||
IADST4x4_1D
|
||||
|
||||
b end_vp9_short_iht4x4_add_neon
|
||||
|
||||
idct_iadst
|
||||
; generate constants
|
||||
GENERATE_COSINE_CONSTANTS
|
||||
GENERATE_SINE_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IADST4x4_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE4X4
|
||||
|
||||
; then transform columns
|
||||
IDCT4x4_1D
|
||||
|
||||
b end_vp9_short_iht4x4_add_neon
|
||||
|
||||
iadst_iadst
|
||||
; generate constants
|
||||
GENERATE_SINE_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IADST4x4_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE4X4
|
||||
|
||||
; then transform columns
|
||||
IADST4x4_1D
|
||||
|
||||
end_vp9_short_iht4x4_add_neon
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 4)
|
||||
vrshr.s16 q8, q8, #4
|
||||
vrshr.s16 q9, q9, #4
|
||||
|
||||
vld1.32 {d26[0]}, [r1], r2
|
||||
vld1.32 {d26[1]}, [r1], r2
|
||||
vld1.32 {d27[0]}, [r1], r2
|
||||
vld1.32 {d27[1]}, [r1]
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]
|
||||
vaddw.u8 q8, q8, d26
|
||||
vaddw.u8 q9, q9, d27
|
||||
|
||||
; clip_pixel
|
||||
vqmovun.s16 d26, q8
|
||||
vqmovun.s16 d27, q9
|
||||
|
||||
; do the stores in reverse order with negative post-increment, by changing
|
||||
; the sign of the stride
|
||||
rsb r2, r2, #0
|
||||
vst1.32 {d27[1]}, [r1], r2
|
||||
vst1.32 {d27[0]}, [r1], r2
|
||||
vst1.32 {d26[1]}, [r1], r2
|
||||
vst1.32 {d26[0]}, [r1] ; no post-increment
|
||||
bx lr
|
||||
ENDP ; |vp9_short_iht4x4_add_neon|
|
||||
|
||||
END
|
@@ -1,696 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_short_iht8x8_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; Generate IADST constants in r0 - r12 for the IADST.
|
||||
MACRO
|
||||
GENERATE_IADST_CONSTANTS
|
||||
; generate cospi_2_64 = 16305
|
||||
mov r0, #0x3f00
|
||||
add r0, #0xb1
|
||||
|
||||
; generate cospi_30_64 = 1606
|
||||
mov r1, #0x600
|
||||
add r1, #0x46
|
||||
|
||||
; generate cospi_10_64 = 14449
|
||||
mov r2, #0x3800
|
||||
add r2, #0x71
|
||||
|
||||
; generate cospi_22_64 = 7723
|
||||
mov r3, #0x1e00
|
||||
add r3, #0x2b
|
||||
|
||||
; generate cospi_18_64 = 10394
|
||||
mov r4, #0x2800
|
||||
add r4, #0x9a
|
||||
|
||||
; generate cospi_14_64 = 12665
|
||||
mov r5, #0x3100
|
||||
add r5, #0x79
|
||||
|
||||
; generate cospi_26_64 = 4756
|
||||
mov r6, #0x1200
|
||||
add r6, #0x94
|
||||
|
||||
; generate cospi_6_64 = 15679
|
||||
mov r7, #0x3d00
|
||||
add r7, #0x3f
|
||||
|
||||
; generate cospi_8_64 = 15137
|
||||
mov r8, #0x3b00
|
||||
add r8, #0x21
|
||||
|
||||
; generate cospi_24_64 = 6270
|
||||
mov r9, #0x1800
|
||||
add r9, #0x7e
|
||||
|
||||
; generate 0
|
||||
mov r10, #0
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r12, #0x2d00
|
||||
add r12, #0x41
|
||||
MEND
|
||||
|
||||
; Generate IDCT constants in r3 - r9 for the IDCT.
|
||||
MACRO
|
||||
GENERATE_IDCT_CONSTANTS
|
||||
; generate cospi_28_64 = 3196
|
||||
mov r3, #0x0c00
|
||||
add r3, #0x7c
|
||||
|
||||
; generate cospi_4_64 = 16069
|
||||
mov r4, #0x3e00
|
||||
add r4, #0xc5
|
||||
|
||||
; generate cospi_12_64 = 13623
|
||||
mov r5, #0x3500
|
||||
add r5, #0x37
|
||||
|
||||
; generate cospi_20_64 = 9102
|
||||
mov r6, #0x2300
|
||||
add r6, #0x8e
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
mov r7, #0x2d00
|
||||
add r7, #0x41
|
||||
|
||||
; generate cospi_24_64 = 6270
|
||||
mov r8, #0x1800
|
||||
add r8, #0x7e
|
||||
|
||||
; generate cospi_8_64 = 15137
|
||||
mov r9, #0x3b00
|
||||
add r9, #0x21
|
||||
MEND
|
||||
|
||||
; Transpose a 8x8 16bits data matrix. Datas are loaded in q8-q15.
|
||||
MACRO
|
||||
TRANSPOSE8X8
|
||||
vswp d17, d24
|
||||
vswp d23, d30
|
||||
vswp d21, d28
|
||||
vswp d19, d26
|
||||
vtrn.32 q8, q10
|
||||
vtrn.32 q9, q11
|
||||
vtrn.32 q12, q14
|
||||
vtrn.32 q13, q15
|
||||
vtrn.16 q8, q9
|
||||
vtrn.16 q10, q11
|
||||
vtrn.16 q12, q13
|
||||
vtrn.16 q14, q15
|
||||
MEND
|
||||
|
||||
; Parallel 1D IDCT on all the columns of a 8x8 16bits data matrix which are
|
||||
; loaded in q8-q15. The IDCT constants are loaded in r3 - r9. The output
|
||||
; will be stored back into q8-q15 registers. This macro will touch q0-q7
|
||||
; registers and use them as buffer during calculation.
|
||||
MACRO
|
||||
IDCT8x8_1D
|
||||
; stage 1
|
||||
vdup.16 d0, r3 ; duplicate cospi_28_64
|
||||
vdup.16 d1, r4 ; duplicate cospi_4_64
|
||||
vdup.16 d2, r5 ; duplicate cospi_12_64
|
||||
vdup.16 d3, r6 ; duplicate cospi_20_64
|
||||
|
||||
; input[1] * cospi_28_64
|
||||
vmull.s16 q2, d18, d0
|
||||
vmull.s16 q3, d19, d0
|
||||
|
||||
; input[5] * cospi_12_64
|
||||
vmull.s16 q5, d26, d2
|
||||
vmull.s16 q6, d27, d2
|
||||
|
||||
; input[1]*cospi_28_64-input[7]*cospi_4_64
|
||||
vmlsl.s16 q2, d30, d1
|
||||
vmlsl.s16 q3, d31, d1
|
||||
|
||||
; input[5] * cospi_12_64 - input[3] * cospi_20_64
|
||||
vmlsl.s16 q5, d22, d3
|
||||
vmlsl.s16 q6, d23, d3
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d8, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d9, q3, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q5, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q6, #14 ; >> 14
|
||||
|
||||
; input[1] * cospi_4_64
|
||||
vmull.s16 q2, d18, d1
|
||||
vmull.s16 q3, d19, d1
|
||||
|
||||
; input[5] * cospi_20_64
|
||||
vmull.s16 q9, d26, d3
|
||||
vmull.s16 q13, d27, d3
|
||||
|
||||
; input[1]*cospi_4_64+input[7]*cospi_28_64
|
||||
vmlal.s16 q2, d30, d0
|
||||
vmlal.s16 q3, d31, d0
|
||||
|
||||
; input[5] * cospi_20_64 + input[3] * cospi_12_64
|
||||
vmlal.s16 q9, d22, d2
|
||||
vmlal.s16 q13, d23, d2
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d14, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d15, q3, #14 ; >> 14
|
||||
|
||||
; stage 2 & stage 3 - even half
|
||||
vdup.16 d0, r7 ; duplicate cospi_16_64
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q13, #14 ; >> 14
|
||||
|
||||
; input[0] * cospi_16_64
|
||||
vmull.s16 q2, d16, d0
|
||||
vmull.s16 q3, d17, d0
|
||||
|
||||
; input[0] * cospi_16_64
|
||||
vmull.s16 q13, d16, d0
|
||||
vmull.s16 q15, d17, d0
|
||||
|
||||
; (input[0] + input[2]) * cospi_16_64
|
||||
vmlal.s16 q2, d24, d0
|
||||
vmlal.s16 q3, d25, d0
|
||||
|
||||
; (input[0] - input[2]) * cospi_16_64
|
||||
vmlsl.s16 q13, d24, d0
|
||||
vmlsl.s16 q15, d25, d0
|
||||
|
||||
vdup.16 d0, r8 ; duplicate cospi_24_64
|
||||
vdup.16 d1, r9 ; duplicate cospi_8_64
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d18, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d19, q3, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d22, q13, #14 ; >> 14
|
||||
vqrshrn.s32 d23, q15, #14 ; >> 14
|
||||
|
||||
; input[1] * cospi_24_64
|
||||
vmull.s16 q2, d20, d0
|
||||
vmull.s16 q3, d21, d0
|
||||
|
||||
; input[1] * cospi_8_64
|
||||
vmull.s16 q8, d20, d1
|
||||
vmull.s16 q12, d21, d1
|
||||
|
||||
; input[1] * cospi_24_64 - input[3] * cospi_8_64
|
||||
vmlsl.s16 q2, d28, d1
|
||||
vmlsl.s16 q3, d29, d1
|
||||
|
||||
; input[1] * cospi_8_64 + input[3] * cospi_24_64
|
||||
vmlal.s16 q8, d28, d0
|
||||
vmlal.s16 q12, d29, d0
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d26, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d27, q3, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d30, q8, #14 ; >> 14
|
||||
vqrshrn.s32 d31, q12, #14 ; >> 14
|
||||
|
||||
vadd.s16 q0, q9, q15 ; output[0] = step[0] + step[3]
|
||||
vadd.s16 q1, q11, q13 ; output[1] = step[1] + step[2]
|
||||
vsub.s16 q2, q11, q13 ; output[2] = step[1] - step[2]
|
||||
vsub.s16 q3, q9, q15 ; output[3] = step[0] - step[3]
|
||||
|
||||
; stage 3 -odd half
|
||||
vdup.16 d16, r7 ; duplicate cospi_16_64
|
||||
|
||||
; stage 2 - odd half
|
||||
vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5]
|
||||
vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5]
|
||||
vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7]
|
||||
vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7]
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q9, d28, d16
|
||||
vmull.s16 q10, d29, d16
|
||||
|
||||
; step2[6] * cospi_16_64
|
||||
vmull.s16 q11, d28, d16
|
||||
vmull.s16 q12, d29, d16
|
||||
|
||||
; (step2[6] - step2[5]) * cospi_16_64
|
||||
vmlsl.s16 q9, d26, d16
|
||||
vmlsl.s16 q10, d27, d16
|
||||
|
||||
; (step2[5] + step2[6]) * cospi_16_64
|
||||
vmlal.s16 q11, d26, d16
|
||||
vmlal.s16 q12, d27, d16
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d10, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q10, #14 ; >> 14
|
||||
|
||||
; dct_const_round_shift(input_dc * cospi_16_64)
|
||||
vqrshrn.s32 d12, q11, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q12, #14 ; >> 14
|
||||
|
||||
; stage 4
|
||||
vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7];
|
||||
vadd.s16 q9, q1, q6 ; output[1] = step1[1] + step1[6];
|
||||
vadd.s16 q10, q2, q5 ; output[2] = step1[2] + step1[5];
|
||||
vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4];
|
||||
vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4];
|
||||
vsub.s16 q13, q2, q5 ; output[5] = step1[2] - step1[5];
|
||||
vsub.s16 q14, q1, q6 ; output[6] = step1[1] - step1[6];
|
||||
vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7];
|
||||
MEND
|
||||
|
||||
; Parallel 1D IADST on all the columns of a 8x8 16bits data matrix which
|
||||
; loaded in q8-q15. IADST constants are loaded in r0 - r12 registers. The
|
||||
; output will be stored back into q8-q15 registers. This macro will touch
|
||||
; q0 - q7 registers and use them as buffer during calculation.
|
||||
MACRO
|
||||
IADST8X8_1D
|
||||
vdup.16 d14, r0 ; duplicate cospi_2_64
|
||||
vdup.16 d15, r1 ; duplicate cospi_30_64
|
||||
|
||||
; cospi_2_64 * x0
|
||||
vmull.s16 q1, d30, d14
|
||||
vmull.s16 q2, d31, d14
|
||||
|
||||
; cospi_30_64 * x0
|
||||
vmull.s16 q3, d30, d15
|
||||
vmull.s16 q4, d31, d15
|
||||
|
||||
vdup.16 d30, r4 ; duplicate cospi_18_64
|
||||
vdup.16 d31, r5 ; duplicate cospi_14_64
|
||||
|
||||
; s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
|
||||
vmlal.s16 q1, d16, d15
|
||||
vmlal.s16 q2, d17, d15
|
||||
|
||||
; s1 = cospi_30_64 * x0 - cospi_2_64 * x1
|
||||
vmlsl.s16 q3, d16, d14
|
||||
vmlsl.s16 q4, d17, d14
|
||||
|
||||
; cospi_18_64 * x4
|
||||
vmull.s16 q5, d22, d30
|
||||
vmull.s16 q6, d23, d30
|
||||
|
||||
; cospi_14_64 * x4
|
||||
vmull.s16 q7, d22, d31
|
||||
vmull.s16 q8, d23, d31
|
||||
|
||||
; s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
|
||||
vmlal.s16 q5, d24, d31
|
||||
vmlal.s16 q6, d25, d31
|
||||
|
||||
; s5 = cospi_14_64 * x4 - cospi_18_64 * x5
|
||||
vmlsl.s16 q7, d24, d30
|
||||
vmlsl.s16 q8, d25, d30
|
||||
|
||||
; (s0 + s4)
|
||||
vadd.s32 q11, q1, q5
|
||||
vadd.s32 q12, q2, q6
|
||||
|
||||
vdup.16 d0, r2 ; duplicate cospi_10_64
|
||||
vdup.16 d1, r3 ; duplicate cospi_22_64
|
||||
|
||||
; (s0 - s4)
|
||||
vsub.s32 q1, q1, q5
|
||||
vsub.s32 q2, q2, q6
|
||||
|
||||
; x0 = dct_const_round_shift(s0 + s4);
|
||||
vqrshrn.s32 d22, q11, #14 ; >> 14
|
||||
vqrshrn.s32 d23, q12, #14 ; >> 14
|
||||
|
||||
; (s1 + s5)
|
||||
vadd.s32 q12, q3, q7
|
||||
vadd.s32 q15, q4, q8
|
||||
|
||||
; (s1 - s5)
|
||||
vsub.s32 q3, q3, q7
|
||||
vsub.s32 q4, q4, q8
|
||||
|
||||
; x4 = dct_const_round_shift(s0 - s4);
|
||||
vqrshrn.s32 d2, q1, #14 ; >> 14
|
||||
vqrshrn.s32 d3, q2, #14 ; >> 14
|
||||
|
||||
; x1 = dct_const_round_shift(s1 + s5);
|
||||
vqrshrn.s32 d24, q12, #14 ; >> 14
|
||||
vqrshrn.s32 d25, q15, #14 ; >> 14
|
||||
|
||||
; x5 = dct_const_round_shift(s1 - s5);
|
||||
vqrshrn.s32 d6, q3, #14 ; >> 14
|
||||
vqrshrn.s32 d7, q4, #14 ; >> 14
|
||||
|
||||
; cospi_10_64 * x2
|
||||
vmull.s16 q4, d26, d0
|
||||
vmull.s16 q5, d27, d0
|
||||
|
||||
; cospi_22_64 * x2
|
||||
vmull.s16 q2, d26, d1
|
||||
vmull.s16 q6, d27, d1
|
||||
|
||||
vdup.16 d30, r6 ; duplicate cospi_26_64
|
||||
vdup.16 d31, r7 ; duplicate cospi_6_64
|
||||
|
||||
; s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
|
||||
vmlal.s16 q4, d20, d1
|
||||
vmlal.s16 q5, d21, d1
|
||||
|
||||
; s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
|
||||
vmlsl.s16 q2, d20, d0
|
||||
vmlsl.s16 q6, d21, d0
|
||||
|
||||
; cospi_26_64 * x6
|
||||
vmull.s16 q0, d18, d30
|
||||
vmull.s16 q13, d19, d30
|
||||
|
||||
; s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
|
||||
vmlal.s16 q0, d28, d31
|
||||
vmlal.s16 q13, d29, d31
|
||||
|
||||
; cospi_6_64 * x6
|
||||
vmull.s16 q10, d18, d31
|
||||
vmull.s16 q9, d19, d31
|
||||
|
||||
; s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
|
||||
vmlsl.s16 q10, d28, d30
|
||||
vmlsl.s16 q9, d29, d30
|
||||
|
||||
; (s3 + s7)
|
||||
vadd.s32 q14, q2, q10
|
||||
vadd.s32 q15, q6, q9
|
||||
|
||||
; (s3 - s7)
|
||||
vsub.s32 q2, q2, q10
|
||||
vsub.s32 q6, q6, q9
|
||||
|
||||
; x3 = dct_const_round_shift(s3 + s7);
|
||||
vqrshrn.s32 d28, q14, #14 ; >> 14
|
||||
vqrshrn.s32 d29, q15, #14 ; >> 14
|
||||
|
||||
; x7 = dct_const_round_shift(s3 - s7);
|
||||
vqrshrn.s32 d4, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d5, q6, #14 ; >> 14
|
||||
|
||||
; (s2 + s6)
|
||||
vadd.s32 q9, q4, q0
|
||||
vadd.s32 q10, q5, q13
|
||||
|
||||
; (s2 - s6)
|
||||
vsub.s32 q4, q4, q0
|
||||
vsub.s32 q5, q5, q13
|
||||
|
||||
vdup.16 d30, r8 ; duplicate cospi_8_64
|
||||
vdup.16 d31, r9 ; duplicate cospi_24_64
|
||||
|
||||
; x2 = dct_const_round_shift(s2 + s6);
|
||||
vqrshrn.s32 d18, q9, #14 ; >> 14
|
||||
vqrshrn.s32 d19, q10, #14 ; >> 14
|
||||
|
||||
; x6 = dct_const_round_shift(s2 - s6);
|
||||
vqrshrn.s32 d8, q4, #14 ; >> 14
|
||||
vqrshrn.s32 d9, q5, #14 ; >> 14
|
||||
|
||||
; cospi_8_64 * x4
|
||||
vmull.s16 q5, d2, d30
|
||||
vmull.s16 q6, d3, d30
|
||||
|
||||
; cospi_24_64 * x4
|
||||
vmull.s16 q7, d2, d31
|
||||
vmull.s16 q0, d3, d31
|
||||
|
||||
; s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
|
||||
vmlal.s16 q5, d6, d31
|
||||
vmlal.s16 q6, d7, d31
|
||||
|
||||
; s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
|
||||
vmlsl.s16 q7, d6, d30
|
||||
vmlsl.s16 q0, d7, d30
|
||||
|
||||
; cospi_8_64 * x7
|
||||
vmull.s16 q1, d4, d30
|
||||
vmull.s16 q3, d5, d30
|
||||
|
||||
; cospi_24_64 * x7
|
||||
vmull.s16 q10, d4, d31
|
||||
vmull.s16 q2, d5, d31
|
||||
|
||||
; s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
|
||||
vmlsl.s16 q1, d8, d31
|
||||
vmlsl.s16 q3, d9, d31
|
||||
|
||||
; s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
|
||||
vmlal.s16 q10, d8, d30
|
||||
vmlal.s16 q2, d9, d30
|
||||
|
||||
vadd.s16 q8, q11, q9 ; x0 = s0 + s2;
|
||||
|
||||
vsub.s16 q11, q11, q9 ; x2 = s0 - s2;
|
||||
|
||||
vadd.s16 q4, q12, q14 ; x1 = s1 + s3;
|
||||
|
||||
vsub.s16 q12, q12, q14 ; x3 = s1 - s3;
|
||||
|
||||
; (s4 + s6)
|
||||
vadd.s32 q14, q5, q1
|
||||
vadd.s32 q15, q6, q3
|
||||
|
||||
; (s4 - s6)
|
||||
vsub.s32 q5, q5, q1
|
||||
vsub.s32 q6, q6, q3
|
||||
|
||||
; x4 = dct_const_round_shift(s4 + s6);
|
||||
vqrshrn.s32 d18, q14, #14 ; >> 14
|
||||
vqrshrn.s32 d19, q15, #14 ; >> 14
|
||||
|
||||
; x6 = dct_const_round_shift(s4 - s6);
|
||||
vqrshrn.s32 d10, q5, #14 ; >> 14
|
||||
vqrshrn.s32 d11, q6, #14 ; >> 14
|
||||
|
||||
; (s5 + s7)
|
||||
vadd.s32 q1, q7, q10
|
||||
vadd.s32 q3, q0, q2
|
||||
|
||||
; (s5 - s7))
|
||||
vsub.s32 q7, q7, q10
|
||||
vsub.s32 q0, q0, q2
|
||||
|
||||
; x5 = dct_const_round_shift(s5 + s7);
|
||||
vqrshrn.s32 d28, q1, #14 ; >> 14
|
||||
vqrshrn.s32 d29, q3, #14 ; >> 14
|
||||
|
||||
; x7 = dct_const_round_shift(s5 - s7);
|
||||
vqrshrn.s32 d14, q7, #14 ; >> 14
|
||||
vqrshrn.s32 d15, q0, #14 ; >> 14
|
||||
|
||||
vdup.16 d30, r12 ; duplicate cospi_16_64
|
||||
|
||||
; cospi_16_64 * x2
|
||||
vmull.s16 q2, d22, d30
|
||||
vmull.s16 q3, d23, d30
|
||||
|
||||
; cospi_6_64 * x6
|
||||
vmull.s16 q13, d22, d30
|
||||
vmull.s16 q1, d23, d30
|
||||
|
||||
; cospi_16_64 * x2 + cospi_16_64 * x3;
|
||||
vmlal.s16 q2, d24, d30
|
||||
vmlal.s16 q3, d25, d30
|
||||
|
||||
; cospi_16_64 * x2 - cospi_16_64 * x3;
|
||||
vmlsl.s16 q13, d24, d30
|
||||
vmlsl.s16 q1, d25, d30
|
||||
|
||||
; x2 = dct_const_round_shift(s2);
|
||||
vqrshrn.s32 d4, q2, #14 ; >> 14
|
||||
vqrshrn.s32 d5, q3, #14 ; >> 14
|
||||
|
||||
;x3 = dct_const_round_shift(s3);
|
||||
vqrshrn.s32 d24, q13, #14 ; >> 14
|
||||
vqrshrn.s32 d25, q1, #14 ; >> 14
|
||||
|
||||
; cospi_16_64 * x6
|
||||
vmull.s16 q13, d10, d30
|
||||
vmull.s16 q1, d11, d30
|
||||
|
||||
; cospi_6_64 * x6
|
||||
vmull.s16 q11, d10, d30
|
||||
vmull.s16 q0, d11, d30
|
||||
|
||||
; cospi_16_64 * x6 + cospi_16_64 * x7;
|
||||
vmlal.s16 q13, d14, d30
|
||||
vmlal.s16 q1, d15, d30
|
||||
|
||||
; cospi_16_64 * x6 - cospi_16_64 * x7;
|
||||
vmlsl.s16 q11, d14, d30
|
||||
vmlsl.s16 q0, d15, d30
|
||||
|
||||
; x6 = dct_const_round_shift(s6);
|
||||
vqrshrn.s32 d20, q13, #14 ; >> 14
|
||||
vqrshrn.s32 d21, q1, #14 ; >> 14
|
||||
|
||||
;x7 = dct_const_round_shift(s7);
|
||||
vqrshrn.s32 d12, q11, #14 ; >> 14
|
||||
vqrshrn.s32 d13, q0, #14 ; >> 14
|
||||
|
||||
vdup.16 q5, r10 ; duplicate 0
|
||||
|
||||
vsub.s16 q9, q5, q9 ; output[1] = -x4;
|
||||
vsub.s16 q11, q5, q2 ; output[3] = -x2;
|
||||
vsub.s16 q13, q5, q6 ; output[5] = -x7;
|
||||
vsub.s16 q15, q5, q4 ; output[7] = -x1;
|
||||
MEND
|
||||
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void vp9_short_iht8x8_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride, int tx_type)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride
|
||||
; r3 int tx_type)
|
||||
; This function will only handle tx_type of 1,2,3.
|
||||
|vp9_short_iht8x8_add_neon| PROC
|
||||
|
||||
; load the inputs into d16-d19
|
||||
vld1.s16 {q8,q9}, [r0]!
|
||||
vld1.s16 {q10,q11}, [r0]!
|
||||
vld1.s16 {q12,q13}, [r0]!
|
||||
vld1.s16 {q14,q15}, [r0]!
|
||||
|
||||
push {r0-r10}
|
||||
|
||||
; transpose the input data
|
||||
TRANSPOSE8X8
|
||||
|
||||
; decide the type of transform
|
||||
cmp r3, #2
|
||||
beq idct_iadst
|
||||
cmp r3, #3
|
||||
beq iadst_iadst
|
||||
|
||||
iadst_idct
|
||||
; generate IDCT constants
|
||||
GENERATE_IDCT_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IDCT8x8_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE8X8
|
||||
|
||||
; generate IADST constants
|
||||
GENERATE_IADST_CONSTANTS
|
||||
|
||||
; then transform columns
|
||||
IADST8X8_1D
|
||||
|
||||
b end_vp9_short_iht8x8_add_neon
|
||||
|
||||
idct_iadst
|
||||
; generate IADST constants
|
||||
GENERATE_IADST_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IADST8X8_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE8X8
|
||||
|
||||
; generate IDCT constants
|
||||
GENERATE_IDCT_CONSTANTS
|
||||
|
||||
; then transform columns
|
||||
IDCT8x8_1D
|
||||
|
||||
b end_vp9_short_iht8x8_add_neon
|
||||
|
||||
iadst_iadst
|
||||
; generate IADST constants
|
||||
GENERATE_IADST_CONSTANTS
|
||||
|
||||
; first transform rows
|
||||
IADST8X8_1D
|
||||
|
||||
; transpose the matrix
|
||||
TRANSPOSE8X8
|
||||
|
||||
; then transform columns
|
||||
IADST8X8_1D
|
||||
|
||||
end_vp9_short_iht8x8_add_neon
|
||||
pop {r0-r10}
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 5)
|
||||
vrshr.s16 q8, q8, #5
|
||||
vrshr.s16 q9, q9, #5
|
||||
vrshr.s16 q10, q10, #5
|
||||
vrshr.s16 q11, q11, #5
|
||||
vrshr.s16 q12, q12, #5
|
||||
vrshr.s16 q13, q13, #5
|
||||
vrshr.s16 q14, q14, #5
|
||||
vrshr.s16 q15, q15, #5
|
||||
|
||||
; save dest pointer
|
||||
mov r0, r1
|
||||
|
||||
; load destination data
|
||||
vld1.64 {d0}, [r1], r2
|
||||
vld1.64 {d1}, [r1], r2
|
||||
vld1.64 {d2}, [r1], r2
|
||||
vld1.64 {d3}, [r1], r2
|
||||
vld1.64 {d4}, [r1], r2
|
||||
vld1.64 {d5}, [r1], r2
|
||||
vld1.64 {d6}, [r1], r2
|
||||
vld1.64 {d7}, [r1]
|
||||
|
||||
; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]
|
||||
vaddw.u8 q8, q8, d0
|
||||
vaddw.u8 q9, q9, d1
|
||||
vaddw.u8 q10, q10, d2
|
||||
vaddw.u8 q11, q11, d3
|
||||
vaddw.u8 q12, q12, d4
|
||||
vaddw.u8 q13, q13, d5
|
||||
vaddw.u8 q14, q14, d6
|
||||
vaddw.u8 q15, q15, d7
|
||||
|
||||
; clip_pixel
|
||||
vqmovun.s16 d0, q8
|
||||
vqmovun.s16 d1, q9
|
||||
vqmovun.s16 d2, q10
|
||||
vqmovun.s16 d3, q11
|
||||
vqmovun.s16 d4, q12
|
||||
vqmovun.s16 d5, q13
|
||||
vqmovun.s16 d6, q14
|
||||
vqmovun.s16 d7, q15
|
||||
|
||||
; store the data
|
||||
vst1.64 {d0}, [r0], r2
|
||||
vst1.64 {d1}, [r0], r2
|
||||
vst1.64 {d2}, [r0], r2
|
||||
vst1.64 {d3}, [r0], r2
|
||||
vst1.64 {d4}, [r0], r2
|
||||
vst1.64 {d5}, [r0], r2
|
||||
vst1.64 {d6}, [r0], r2
|
||||
vst1.64 {d7}, [r0], r2
|
||||
bx lr
|
||||
ENDP ; |vp9_short_iht8x8_add_neon|
|
||||
|
||||
END
|
@@ -13,7 +13,6 @@
|
||||
#include "vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_onyxc_int.h"
|
||||
|
||||
void vp9_machine_specific_config(VP9_COMMON *cm) {
|
||||
(void)cm;
|
||||
void vp9_machine_specific_config(VP9_COMMON *ctx) {
|
||||
vp9_rtcd();
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user