Compare commits

..

6 Commits

Author SHA1 Message Date
Ami Fischman
28147a449a libvpx: enable building for iOS devices (armv7)
Allow output of gas syntax assembly directly from obj_int_extract

Change-Id: I33a747e87ef1c590a8766dea17f8cb2497e54591
2013-07-19 14:05:59 -07:00
Ronald S. Bultje
33149cbb4c Replace generated quant tables with static lookup tables.
This prevents possible float rounding issues between architectures.

Change-Id: I6ed260aebd49feb4cfb5596a5370c44be5f72167
2013-07-16 14:04:41 -07:00
John Koleszar
3f454060bb Fix above context pointers
In the prior code, the above context pointers used for entropy
decoding were initialized on the first frame, and not updated when
the frame size changed. The per-frame code which initializes the
contexts assumes that the contexts are contiguous, leading to an
incomplete initialization when the frame is smaller. This commit
updates the pointers so that the context is contigous whenever
the frame size changes.

Conflicts:
	vp9/common/vp9_alloccommon.c

Change-Id: I08b53e3a30c8289491212311682ff1b8028cff6c
2013-07-16 14:04:41 -07:00
Yaowu Xu
d19ed5f249 Change to extend full border only when needed
This is a short term optimization till we work out a decoder
implementation requiring no frame border extension.

Change-Id: I02d15bfde4d926b50a4e58b393d8c4062d1be70f
2013-07-16 14:04:39 -07:00
Ronald S. Bultje
a801f7a295 Increase border size from 96 to 160.
This is required because upon downscaling, if a motion vector points
partially into the UMV (e.g. all minus 1 of 64+7 pixels, i.e. 70),
then we can point up to 140 pixels into the larger-resolution (2x)
reference buffer UMV, which means the UMV for reference buffers in
downscaling needs to be 140 rounded up to the nearest multiple of 32,
i.e. 160.

Longer-term, we should probably handle the UMV differently by detecting
edge coverage on-the-fly and using a temporary buffer for edge extensions
instead of adding 160 pixels on all sides of the image (which means a
CIF image uses 3x its own area size for borders).

Change-Id: I5184443e6731cd6721fc6a5d430a53e7d91b4f7e
2013-07-16 12:41:10 -07:00
Dmitry Kovalev
e39bd6407f Fixing vp9_get_pred_context_comp_ref_p function.
Adding missed parenthesis around boolean expressions. Bitstream is changed.
Regenerating test vectors.

Conflicts:
	vp9/common/vp9_pred_common.c

Change-Id: I4cc00b761e9473f92f180a9fc3a0c607f0aaae56
2013-07-16 12:40:48 -07:00
210 changed files with 16220 additions and 24565 deletions

4
.gitignore vendored
View File

@@ -1,8 +1,6 @@
*.a
*.asm.s
*.d
*.gcno
*.gcda
*.o
*~
/*.ivf
@@ -16,7 +14,7 @@
/.install-*
/.libs
/Makefile
/config.log
/config.err
/config.mk
/decode_to_md5
/decode_to_md5.c

36
README
View File

@@ -1,7 +1,7 @@
vpx Multi-Format Codec SDK
README - 1 August 2013
README - 21 June 2012
Welcome to the WebM VP8/VP9 Codec SDK!
Welcome to the WebM VP8 Codec SDK!
COMPILING THE APPLICATIONS/LIBRARIES:
The build system used is similar to autotools. Building generally consists of
@@ -53,63 +53,33 @@ COMPILING THE APPLICATIONS/LIBRARIES:
armv5te-android-gcc
armv5te-linux-rvct
armv5te-linux-gcc
armv5te-none-rvct
armv6-darwin-gcc
armv6-linux-rvct
armv6-linux-gcc
armv6-none-rvct
armv7-android-gcc
armv7-darwin-gcc
armv7-linux-rvct
armv7-linux-gcc
armv7-none-rvct
armv7-win32-vs11
mips32-linux-gcc
ppc32-darwin8-gcc
ppc32-darwin9-gcc
ppc32-linux-gcc
ppc64-darwin8-gcc
ppc64-darwin9-gcc
ppc64-linux-gcc
sparc-solaris-gcc
x86-android-gcc
x86-darwin8-gcc
x86-darwin8-icc
x86-darwin9-gcc
x86-darwin9-icc
x86-darwin10-gcc
x86-darwin11-gcc
x86-darwin12-gcc
x86-darwin13-gcc
x86-linux-gcc
x86-linux-icc
x86-os2-gcc
x86-solaris-gcc
x86-win32-gcc
x86-win32-vs7
x86-win32-vs8
x86-win32-vs9
x86-win32-vs10
x86-win32-vs11
x86_64-darwin9-gcc
x86_64-darwin10-gcc
x86_64-darwin11-gcc
x86_64-darwin12-gcc
x86_64-darwin13-gcc
x86_64-linux-gcc
x86_64-linux-icc
x86_64-solaris-gcc
x86_64-win64-gcc
x86_64-win64-vs8
x86_64-win64-vs9
x86_64-win64-vs10
x86_64-win64-vs11
universal-darwin8-gcc
universal-darwin9-gcc
universal-darwin10-gcc
universal-darwin11-gcc
universal-darwin12-gcc
universal-darwin13-gcc
generic-gnu
The generic-gnu target, in conjunction with the CROSS environment variable,
@@ -127,7 +97,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
5. Configuration errors
If the configuration step fails, the first step is to look in the error log.
This defaults to config.log. This should give a good indication of what went
This defaults to config.err. This should give a good indication of what went
wrong. If not, contact us for support.
SUPPORT

View File

@@ -7,7 +7,18 @@ REM in the file PATENTS. All contributing project authors may
REM be found in the AUTHORS file in the root of the source tree.
echo on
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp9/common/vp9_asm_com_offsets.c"
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp9/decoder/vp9_asm_dec_offsets.c"
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp9/encoder/vp9_asm_enc_offsets.c"
obj_int_extract.exe rvds "vp9_asm_com_offsets.obj" > "vp9_asm_com_offsets.asm"
obj_int_extract.exe rvds "vp9_asm_dec_offsets.obj" > "vp9_asm_dec_offsets.asm"
obj_int_extract.exe rvds "vp9_asm_enc_offsets.obj" > "vp9_asm_enc_offsets.asm"
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp8/common/vp8_asm_com_offsets.c"
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp8/decoder/vp8_asm_dec_offsets.c"
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp8/encoder/vp8_asm_enc_offsets.c"
obj_int_extract.exe rvds "vp8_asm_com_offsets.obj" > "vp8_asm_com_offsets.asm"
obj_int_extract.exe rvds "vp8_asm_dec_offsets.obj" > "vp8_asm_dec_offsets.asm"
obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vpx_scale/vpx_scale_asm_offsets.c"

View File

@@ -75,7 +75,7 @@ Options:
Build options:
--help print this message
--log=yes|no|FILE file configure log is written to [config.log]
--log=yes|no|FILE file configure log is written to [config.err]
--target=TARGET target platform tuple [generic-gnu]
--cpu=CPU optimize for a specific cpu rather than a family
--extra-cflags=ECFLAGS add ECFLAGS to CFLAGS [$CFLAGS]
@@ -653,10 +653,6 @@ process_common_toolchain() {
tgt_isa=x86_64
tgt_os=darwin12
;;
*darwin13*)
tgt_isa=x86_64
tgt_os=darwin13
;;
x86_64*mingw32*)
tgt_os=win64
;;
@@ -755,10 +751,6 @@ process_common_toolchain() {
add_cflags "-mmacosx-version-min=10.8"
add_ldflags "-mmacosx-version-min=10.8"
;;
*-darwin13-*)
add_cflags "-mmacosx-version-min=10.9"
add_ldflags "-mmacosx-version-min=10.9"
;;
esac
# Handle Solaris variants. Solaris 10 needs -lposix4
@@ -1189,12 +1181,6 @@ EOF
fi
fi
# default use_x86inc to yes if pic is no or 64bit or we are not on darwin
echo " checking here for x86inc \"${tgt_isa}\" \"$pic\" "
if [ ${tgt_isa} = x86_64 -o ! "$pic" == "yes" -o ! ${tgt_os:0:6} = darwin ]; then
soft_enable use_x86inc
fi
# Position Independent Code (PIC) support, for building relocatable
# shared objects
enabled gcc && enabled pic && check_add_cflags -fPIC
@@ -1310,7 +1296,7 @@ process_detect() {
}
enable logging
logfile="config.log"
logfile="config.err"
self=$0
process() {
cmdline_args="$@"

View File

@@ -381,7 +381,7 @@ generate_vcproj() {
RuntimeLibrary="$debug_runtime" \
UsePrecompiledHeader="0" \
WarningLevel="3" \
DebugInformationFormat="2" \
DebugInformationFormat="1" \
$warn_64bit \
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="true"
@@ -395,7 +395,7 @@ generate_vcproj() {
RuntimeLibrary="$debug_runtime" \
UsePrecompiledHeader="0" \
WarningLevel="3" \
DebugInformationFormat="2" \
DebugInformationFormat="1" \
$warn_64bit \
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="true"

View File

@@ -72,21 +72,10 @@ parse_project() {
eval "${var}_name=$name"
eval "${var}_guid=$guid"
if [ "$sfx" = "vcproj" ]; then
cur_config_list=`grep -A1 '<Configuration' $file |
grep Name | cut -d\" -f2`
else
cur_config_list=`grep -B1 'Label="Configuration"' $file |
grep Condition | cut -d\' -f4`
fi
new_config_list=$(for i in $config_list $cur_config_list; do
echo $i
done | sort | uniq)
if [ "$config_list" != "" ] && [ "$config_list" != "$new_config_list" ]; then
mixed_platforms=1
fi
config_list="$new_config_list"
eval "${var}_config_list=\"$cur_config_list\""
# assume that all projects have the same list of possible configurations,
# so overwriting old config_lists is not a problem
config_list=`grep -A1 '<Configuration' $file |
grep Name | cut -d\" -f2`
proj_list="${proj_list} ${var}"
}
@@ -136,11 +125,6 @@ process_global() {
indent_push
IFS_bak=${IFS}
IFS=$'\r'$'\n'
if [ "$mixed_platforms" != "" ]; then
config_list="
Release|Mixed Platforms
Debug|Mixed Platforms"
fi
for config in ${config_list}; do
echo "${indent}$config = $config"
done
@@ -155,17 +139,10 @@ Debug|Mixed Platforms"
indent_push
for proj in ${proj_list}; do
eval "local proj_guid=\${${proj}_guid}"
eval "local proj_config_list=\${${proj}_config_list}"
IFS=$'\r'$'\n'
for config in ${proj_config_list}; do
if [ "$mixed_platforms" != "" ]; then
local c=${config%%|*}
echo "${indent}${proj_guid}.${c}|Mixed Platforms.ActiveCfg = ${config}"
echo "${indent}${proj_guid}.${c}|Mixed Platforms.Build.0 = ${config}"
else
echo "${indent}${proj_guid}.${config}.ActiveCfg = ${config}"
echo "${indent}${proj_guid}.${config}.Build.0 = ${config}"
fi
for config in ${config_list}; do
echo "${indent}${proj_guid}.${config}.ActiveCfg = ${config}"
echo "${indent}${proj_guid}.${config}.Build.0 = ${config}"
done
IFS=${IFS_bak}
@@ -191,14 +168,9 @@ process_makefile() {
IFS=$'\r'$'\n'
local TAB=$'\t'
cat <<EOF
ifeq (\$(CONFIG_VS_VERSION),7)
MSBUILD_TOOL := devenv.com
else
MSBUILD_TOOL := msbuild.exe
endif
found_devenv := \$(shell which \$(MSBUILD_TOOL) >/dev/null 2>&1 && echo yes)
found_devenv := \$(shell which devenv.com >/dev/null 2>&1 && echo yes)
.nodevenv.once:
${TAB}@echo " * \$(MSBUILD_TOOL) not found in path."
${TAB}@echo " * devenv.com not found in path."
${TAB}@echo " * "
${TAB}@echo " * You will have to build all configurations manually using the"
${TAB}@echo " * Visual Studio IDE. To allow make to build them automatically,"
@@ -223,17 +195,16 @@ ${TAB}rm -rf "$platform"/"$config"
ifneq (\$(found_devenv),)
ifeq (\$(CONFIG_VS_VERSION),7)
$nows_sln_config: $outfile
${TAB}\$(MSBUILD_TOOL) $outfile -build "$config"
${TAB}devenv.com $outfile -build "$config"
else
$nows_sln_config: $outfile
${TAB}\$(MSBUILD_TOOL) $outfile -m -t:Build \\
${TAB}${TAB}-p:Configuration="$config" -p:Platform="$platform"
${TAB}devenv.com $outfile -build "$sln_config"
endif
else
$nows_sln_config: $outfile .nodevenv.once
${TAB}@echo " * Skipping build of $sln_config (\$(MSBUILD_TOOL) not in path)."
${TAB}@echo " * Skipping build of $sln_config (devenv.com not in path)."
${TAB}@echo " * "
endif

View File

@@ -7,6 +7,17 @@ REM in the file PATENTS. All contributing project authors may
REM be found in the AUTHORS file in the root of the source tree.
echo on
cl /I "./" /I "%1" /nologo /c "%1/vp9/common/vp9_asm_com_offsets.c"
cl /I "./" /I "%1" /nologo /c "%1/vp9/decoder/vp9_asm_dec_offsets.c"
cl /I "./" /I "%1" /nologo /c "%1/vp9/encoder/vp9_asm_enc_offsets.c"
obj_int_extract.exe rvds "vp9_asm_com_offsets.obj" > "vp9_asm_com_offsets.asm"
obj_int_extract.exe rvds "vp9_asm_dec_offsets.obj" > "vp9_asm_dec_offsets.asm"
obj_int_extract.exe rvds "vp9_asm_enc_offsets.obj" > "vp9_asm_enc_offsets.asm"
cl /I "./" /I "%1" /nologo /c "%1/vp8/common/vp8_asm_com_offsets.c"
cl /I "./" /I "%1" /nologo /c "%1/vp8/decoder/vp8_asm_dec_offsets.c"
cl /I "./" /I "%1" /nologo /c "%1/vp8/encoder/vp8_asm_enc_offsets.c"
obj_int_extract.exe rvds "vp8_asm_com_offsets.obj" > "vp8_asm_com_offsets.asm"
obj_int_extract.exe rvds "vp8_asm_dec_offsets.obj" > "vp8_asm_dec_offsets.asm"
obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"

18
configure vendored
View File

@@ -115,7 +115,6 @@ all_platforms="${all_platforms} x86-darwin9-icc"
all_platforms="${all_platforms} x86-darwin10-gcc"
all_platforms="${all_platforms} x86-darwin11-gcc"
all_platforms="${all_platforms} x86-darwin12-gcc"
all_platforms="${all_platforms} x86-darwin13-gcc"
all_platforms="${all_platforms} x86-linux-gcc"
all_platforms="${all_platforms} x86-linux-icc"
all_platforms="${all_platforms} x86-os2-gcc"
@@ -130,7 +129,6 @@ all_platforms="${all_platforms} x86_64-darwin9-gcc"
all_platforms="${all_platforms} x86_64-darwin10-gcc"
all_platforms="${all_platforms} x86_64-darwin11-gcc"
all_platforms="${all_platforms} x86_64-darwin12-gcc"
all_platforms="${all_platforms} x86_64-darwin13-gcc"
all_platforms="${all_platforms} x86_64-linux-gcc"
all_platforms="${all_platforms} x86_64-linux-icc"
all_platforms="${all_platforms} x86_64-solaris-gcc"
@@ -144,7 +142,6 @@ all_platforms="${all_platforms} universal-darwin9-gcc"
all_platforms="${all_platforms} universal-darwin10-gcc"
all_platforms="${all_platforms} universal-darwin11-gcc"
all_platforms="${all_platforms} universal-darwin12-gcc"
all_platforms="${all_platforms} universal-darwin13-gcc"
all_platforms="${all_platforms} generic-gnu"
# all_targets is a list of all targets that can be configured
@@ -250,10 +247,7 @@ EXPERIMENT_LIST="
multiple_arf
non420
alpha
interintra
filterintra
masked_interintra
masked_interinter
balanced_coeftree
"
CONFIG_LIST="
external_build
@@ -261,7 +255,6 @@ CONFIG_LIST="
install_bins
install_libs
install_srcs
use_x86inc
debug
gprof
gcov
@@ -318,7 +311,6 @@ CMDLINE_SELECT="
gprof
gcov
pic
use_x86inc
optimizations
ccache
runtime_cpu_detect
@@ -690,14 +682,6 @@ process_toolchain() {
# iOS/ARM builds do not work with gtest. This does not match
# x86 targets.
;;
*-win*)
# Some mingw toolchains don't have pthread available by default.
# Treat these more like visual studio where threading in gtest
# would be disabled for the same reason.
check_cxx "$@" <<EOF && soft_enable unit_tests
int z;
EOF
;;
*)
enabled pthread_h && check_cxx "$@" <<EOF && soft_enable unit_tests
int z;

18
libs.mk
View File

@@ -57,13 +57,6 @@ CLEAN-OBJS += $$(BUILD_PFX)$(1).h
RTCD += $$(BUILD_PFX)$(1).h
endef
# x86inc.asm is not compatible with pic 32bit builds. Restrict
# files which use it to 64bit builds or 32bit without pic
USE_X86INC = no
ifeq ($(CONFIG_USE_X86INC),yes)
USE_X86INC = yes
endif
CODEC_SRCS-yes += CHANGELOG
CODEC_SRCS-yes += libs.mk
@@ -390,11 +383,6 @@ LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
$(call enabled,LIBVPX_TEST_DATA))
libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)
libvpx_test_srcs.txt:
@echo " [CREATE] $@"
@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | sort -u > $@
CLEAN-OBJS += libvpx_test_srcs.txt
$(LIBVPX_TEST_DATA):
@echo " [DOWNLOAD] $@"
$(qexec)trap 'rm -f $@' INT TERM &&\
@@ -455,10 +443,6 @@ else
include $(SRC_PATH_BARE)/third_party/googletest/gtest.mk
GTEST_SRCS := $(addprefix third_party/googletest/src/,$(call enabled,GTEST_SRCS))
GTEST_OBJS=$(call objs,$(GTEST_SRCS))
ifeq ($(filter win%,$(TGT_OS)),$(TGT_OS))
# Disabling pthreads globally will cause issues on darwin and possibly elsewhere
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -DGTEST_HAS_PTHREAD=0
endif
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS)
@@ -483,7 +467,7 @@ $(foreach bin,$(LIBVPX_TEST_BINS),\
lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\
$(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\
$(LIBVPX_TEST_OBJS) \
-L. -lvpx -lgtest $(extralibs) -lm)\
-L. -lvpx -lgtest -lpthread -lm)\
)))\
$(if $(LIPO_LIBS),$(eval $(call lipo_bin_template,$(bin))))\

View File

@@ -38,7 +38,7 @@ class ACMRandom {
// Returns a random value near 0 or near 255, to better exercise
// saturation behavior.
const uint8_t r = Rand8();
return r <= 128 ? 255 - (r >> 4) : r >> 4;
return r < 128 ? r << 4 : r >> 4;
}
int PseudoUniform(int range) {

View File

@@ -33,6 +33,10 @@ class AltRefTest : public ::libvpx_test::EncoderTest,
altref_count_ = 0;
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
libvpx_test::Encoder *encoder) {
if (video->frame() == 1) {

View File

@@ -27,6 +27,10 @@ class BordersTest : public ::libvpx_test::EncoderTest,
SetMode(GET_PARAM(1));
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
if ( video->frame() == 1) {

View File

@@ -134,14 +134,14 @@ class VP8CodecFactory : public CodecFactory {
const libvpx_test::VP8CodecFactory kVP8;
#define VP8_INSTANTIATE_TEST_CASE(test, ...)\
#define VP8_INSTANTIATE_TEST_CASE(test, params)\
INSTANTIATE_TEST_CASE_P(VP8, test, \
::testing::Combine( \
::testing::Values(static_cast<const libvpx_test::CodecFactory*>( \
&libvpx_test::kVP8)), \
__VA_ARGS__))
params))
#else
#define VP8_INSTANTIATE_TEST_CASE(test, ...)
#define VP8_INSTANTIATE_TEST_CASE(test, params)
#endif // CONFIG_VP8
@@ -216,14 +216,14 @@ class VP9CodecFactory : public CodecFactory {
const libvpx_test::VP9CodecFactory kVP9;
#define VP9_INSTANTIATE_TEST_CASE(test, ...)\
#define VP9_INSTANTIATE_TEST_CASE(test, params)\
INSTANTIATE_TEST_CASE_P(VP9, test, \
::testing::Combine( \
::testing::Values(static_cast<const libvpx_test::CodecFactory*>( \
&libvpx_test::kVP9)), \
__VA_ARGS__))
params))
#else
#define VP9_INSTANTIATE_TEST_CASE(test, ...)
#define VP9_INSTANTIATE_TEST_CASE(test, params)
#endif // CONFIG_VP9

View File

@@ -40,6 +40,10 @@ class ConfigTest : public ::libvpx_test::EncoderTest,
++frame_count_out_;
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
unsigned int frame_count_in_;
unsigned int frame_count_out_;
unsigned int frame_count_max_;

View File

@@ -22,8 +22,8 @@ extern "C" {
}
namespace {
typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
typedef void (*convolve_fn_t)(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int filter_x_stride,
const int16_t *filter_y, int filter_y_stride,
int w, int h);
@@ -211,7 +211,7 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
virtual void SetUp() {
UUT_ = GET_PARAM(2);
/* Set up guard blocks for an inner block centered in the outer block */
/* Set up guard blocks for an inner block cetered in the outer block */
for (int i = 0; i < kOutputBufferSize; ++i) {
if (IsIndexInBorder(i))
output_[i] = 255;
@@ -527,9 +527,9 @@ INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
#if HAVE_SSSE3
const ConvolveFunctions convolve8_ssse3(
vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3,
vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3,
vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3);
vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_c,
vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_c,
vp9_convolve8_ssse3, vp9_convolve8_avg_c);
INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
make_tuple(4, 4, &convolve8_ssse3),
@@ -546,26 +546,4 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
make_tuple(32, 64, &convolve8_ssse3),
make_tuple(64, 64, &convolve8_ssse3)));
#endif
#if HAVE_NEON
const ConvolveFunctions convolve8_neon(
vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
vp9_convolve8_neon, vp9_convolve8_avg_neon);
INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
make_tuple(4, 4, &convolve8_neon),
make_tuple(8, 4, &convolve8_neon),
make_tuple(4, 8, &convolve8_neon),
make_tuple(8, 8, &convolve8_neon),
make_tuple(16, 8, &convolve8_neon),
make_tuple(8, 16, &convolve8_neon),
make_tuple(16, 16, &convolve8_neon),
make_tuple(32, 16, &convolve8_neon),
make_tuple(16, 32, &convolve8_neon),
make_tuple(32, 32, &convolve8_neon),
make_tuple(64, 32, &convolve8_neon),
make_tuple(32, 64, &convolve8_neon),
make_tuple(64, 64, &convolve8_neon)));
#endif
} // namespace

View File

@@ -1,112 +0,0 @@
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <climits>
#include <vector>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
#include "test/util.h"
namespace {
class CpuSpeedTest : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWith2Params<
libvpx_test::TestMode, int> {
protected:
CpuSpeedTest() : EncoderTest(GET_PARAM(0)) {}
virtual void SetUp() {
InitializeConfig();
SetMode(GET_PARAM(1));
set_cpu_used_ = GET_PARAM(2);
}
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
if (video->frame() == 1) {
encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
encoder->Control(VP8E_SET_ARNR_TYPE, 3);
}
}
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
}
}
int set_cpu_used_;
};
TEST_P(CpuSpeedTest, TestQ0) {
// Validate that this non multiple of 64 wide clip encodes and decodes
// without a mismatch when passing in a very low max q. This pushes
// the encoder to producing lots of big partitions which will likely
// extend into the border and test the border condition.
cfg_.g_lag_in_frames = 25;
cfg_.rc_2pass_vbr_minsection_pct = 5;
cfg_.rc_2pass_vbr_minsection_pct = 2000;
cfg_.rc_target_bitrate = 400;
cfg_.rc_max_quantizer = 0;
cfg_.rc_min_quantizer = 0;
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
20);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
TEST_P(CpuSpeedTest, TestEncodeHighBitrate) {
// Validate that this non multiple of 64 wide clip encodes and decodes
// without a mismatch when passing in a very low max q. This pushes
// the encoder to producing lots of big partitions which will likely
// extend into the border and test the border condition.
cfg_.g_lag_in_frames = 25;
cfg_.rc_2pass_vbr_minsection_pct = 5;
cfg_.rc_2pass_vbr_minsection_pct = 2000;
cfg_.rc_target_bitrate = 12000;
cfg_.rc_max_quantizer = 10;
cfg_.rc_min_quantizer = 0;
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
40);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
TEST_P(CpuSpeedTest, TestLowBitrate) {
// Validate that this clip encodes and decodes without a mismatch
// when passing in a very high min q. This pushes the encoder to producing
// lots of small partitions which might will test the other condition.
cfg_.g_lag_in_frames = 25;
cfg_.rc_2pass_vbr_minsection_pct = 5;
cfg_.rc_2pass_vbr_minsection_pct = 2000;
cfg_.rc_target_bitrate = 200;
cfg_.rc_min_quantizer = 40;
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
40);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
using std::tr1::make_tuple;
#define VP9_FACTORY \
static_cast<const libvpx_test::CodecFactory*> (&libvpx_test::kVP9)
VP9_INSTANTIATE_TEST_CASE(
CpuSpeedTest,
::testing::Values(::libvpx_test::kTwoPassGood),
::testing::Range(0, 3));
} // namespace

View File

@@ -42,6 +42,10 @@ class CQTest : public ::libvpx_test::EncoderTest,
n_frames_ = 0;
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
libvpx_test::Encoder *encoder) {
if (video->frame() == 1) {

View File

@@ -36,6 +36,10 @@ class DatarateTest : public ::libvpx_test::EncoderTest,
duration_ = 0.0;
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
const vpx_rational_t tb = video->timebase();

View File

@@ -13,7 +13,6 @@
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "vpx_ports/mem.h"
extern "C" {
#include "vp9/common/vp9_entropy.h"
@@ -265,131 +264,6 @@ void reference_16x16_dct_2d(int16_t input[16*16], double output[16*16]) {
}
}
void fdct16x16(int16_t *in, int16_t *out, uint8_t* /*dst*/,
int stride, int /*tx_type*/) {
vp9_short_fdct16x16_c(in, out, stride);
}
void idct16x16_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
int stride, int /*tx_type*/) {
vp9_short_idct16x16_add_c(out, dst, stride >> 1);
}
void fht16x16(int16_t *in, int16_t *out, uint8_t* /*dst*/,
int stride, int tx_type) {
// FIXME(jingning): need to test both SSE2 and c
#if HAVE_SSE2
vp9_short_fht16x16_sse2(in, out, stride >> 1, tx_type);
#else
vp9_short_fht16x16_c(in, out, stride >> 1, tx_type);
#endif
}
void iht16x16_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
vp9_short_iht16x16_add_c(out, dst, stride >> 1, tx_type);
}
class FwdTrans16x16Test : public ::testing::TestWithParam<int> {
public:
virtual ~FwdTrans16x16Test() {}
virtual void SetUp() {
tx_type_ = GetParam();
if (tx_type_ == 0) {
fwd_txfm = fdct16x16;
inv_txfm = idct16x16_add;
} else {
fwd_txfm = fht16x16;
inv_txfm = iht16x16_add;
}
}
protected:
void RunFwdTxfm(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
(*fwd_txfm)(in, out, dst, stride, tx_type);
}
void RunInvTxfm(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
(*inv_txfm)(in, out, dst, stride, tx_type);
}
int tx_type_;
void (*fwd_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
void (*inv_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
};
TEST_P(FwdTrans16x16Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int max_error = 0;
double total_error = 0;
const int count_test_block = 10000;
for (int i = 0; i < count_test_block; ++i) {
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 256);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 256);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 256);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 256);
for (int j = 0; j < 256; ++j) {
src[j] = rnd.Rand8();
dst[j] = rnd.Rand8();
// Initialize a test block with input range [-255, 255].
test_input_block[j] = src[j] - dst[j];
}
const int pitch = 32;
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
for (int j = 0; j < 256; ++j) {
const int diff = dst[j] - src[j];
const int error = diff * diff;
if (max_error < error)
max_error = error;
total_error += error;
}
}
EXPECT_GE(1, max_error)
<< "Error: 16x16 FHT/IHT has an individual round trip error > 1";
EXPECT_GE(count_test_block , total_error)
<< "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
}
TEST_P(FwdTrans16x16Test, CoeffSizeCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
for (int i = 0; i < count_test_block; ++i) {
DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, 256);
DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, 256);
DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, 256);
DECLARE_ALIGNED_ARRAY(16, int16_t, output_extreme_block, 256);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 256);
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < 256; ++j) {
input_block[j] = rnd.Rand8() - rnd.Rand8();
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
}
if (i == 0)
for (int j = 0; j < 256; ++j)
input_extreme_block[j] = 255;
const int pitch = 32;
RunFwdTxfm(input_block, output_block, dst, pitch, tx_type_);
RunFwdTxfm(input_extreme_block, output_extreme_block, dst, pitch, tx_type_);
// The minimum quant value is 4.
for (int j = 0; j < 256; ++j) {
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
<< "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_extreme_block[j]))
<< "Error: 16x16 FDCT extreme has coefficient larger "
<< "than 4*DCT_MAX_VALUE";
}
}
}
INSTANTIATE_TEST_CASE_P(VP9, FwdTrans16x16Test, ::testing::Range(0, 4));
TEST(VP9Idct16x16Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
@@ -421,4 +295,72 @@ TEST(VP9Idct16x16Test, AccuracyCheck) {
}
}
// we need enable fdct test once we re-do the 16 point fdct.
TEST(VP9Fdct16x16Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int max_error = 0;
double total_error = 0;
const int count_test_block = 1000;
for (int i = 0; i < count_test_block; ++i) {
int16_t test_input_block[256];
int16_t test_temp_block[256];
uint8_t dst[256], src[256];
for (int j = 0; j < 256; ++j) {
src[j] = rnd.Rand8();
dst[j] = rnd.Rand8();
}
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < 256; ++j)
test_input_block[j] = src[j] - dst[j];
const int pitch = 32;
vp9_short_fdct16x16_c(test_input_block, test_temp_block, pitch);
vp9_short_idct16x16_add_c(test_temp_block, dst, 16);
for (int j = 0; j < 256; ++j) {
const int diff = dst[j] - src[j];
const int error = diff * diff;
if (max_error < error)
max_error = error;
total_error += error;
}
}
EXPECT_GE(1, max_error)
<< "Error: 16x16 FDCT/IDCT has an individual round trip error > 1";
EXPECT_GE(count_test_block , total_error)
<< "Error: 16x16 FDCT/IDCT has average round trip error > 1 per block";
}
TEST(VP9Fdct16x16Test, CoeffSizeCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
for (int i = 0; i < count_test_block; ++i) {
int16_t input_block[256], input_extreme_block[256];
int16_t output_block[256], output_extreme_block[256];
// Initialize a test block with input range [-255, 255].
for (int j = 0; j < 256; ++j) {
input_block[j] = rnd.Rand8() - rnd.Rand8();
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
}
if (i == 0)
for (int j = 0; j < 256; ++j)
input_extreme_block[j] = 255;
const int pitch = 32;
vp9_short_fdct16x16_c(input_block, output_block, pitch);
vp9_short_fdct16x16_c(input_extreme_block, output_extreme_block, pitch);
// The minimum quant value is 4.
for (int j = 0; j < 256; ++j) {
EXPECT_GE(4*DCT_MAX_VALUE, abs(output_block[j]))
<< "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
EXPECT_GE(4*DCT_MAX_VALUE, abs(output_extreme_block[j]))
<< "Error: 16x16 FDCT extreme has coefficient larger than 4*DCT_MAX_VALUE";
}
}
}
} // namespace

View File

@@ -190,9 +190,7 @@ class EncoderTest {
virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {}
// Hook to determine whether the encode loop should continue.
virtual bool Continue() const {
return !(::testing::Test::HasFatalFailure() || abort_);
}
virtual bool Continue() const { return !abort_; }
const CodecFactory *codec_;
// Hook to determine whether to decode frame after encoding

View File

@@ -50,6 +50,10 @@ class ErrorResilienceTest : public ::libvpx_test::EncoderTest,
mismatch_nframes_ = 0;
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
psnr_ += pkt->data.psnr.psnr[0];
nframes_++;

View File

@@ -20,64 +20,63 @@ extern "C" {
#include "acm_random.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
using libvpx_test::ACMRandom;
namespace {
void fdct4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
int stride, int /*tx_type*/) {
void fdct4x4(int16_t *in, int16_t *out, uint8_t *dst, int stride, int tx_type) {
vp9_short_fdct4x4_c(in, out, stride);
}
void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
int stride, int /*tx_type*/) {
void idct4x4_add(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
vp9_short_idct4x4_add_c(out, dst, stride >> 1);
}
void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
int stride, int tx_type) {
void fht4x4(int16_t *in, int16_t *out, uint8_t *dst, int stride, int tx_type) {
vp9_short_fht4x4_c(in, out, stride >> 1, tx_type);
}
void iht4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
void iht4x4_add(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
vp9_short_iht4x4_add_c(out, dst, stride >> 1, tx_type);
}
class FwdTrans4x4Test : public ::testing::TestWithParam<int> {
public:
virtual ~FwdTrans4x4Test() {}
virtual void SetUp() {
tx_type_ = GetParam();
if (tx_type_ == 0) {
fwd_txfm_ = fdct4x4;
inv_txfm_ = idct4x4_add;
FwdTrans4x4Test() {SetUpTestTxfm();}
~FwdTrans4x4Test() {}
void SetUpTestTxfm() {
tx_type = GetParam();
if (tx_type == 0) {
fwd_txfm = fdct4x4;
inv_txfm = idct4x4_add;
} else {
fwd_txfm_ = fht4x4;
inv_txfm_ = iht4x4_add;
fwd_txfm = fht4x4;
inv_txfm = iht4x4_add;
}
}
protected:
void RunFwdTxfm(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
(*fwd_txfm_)(in, out, dst, stride, tx_type);
(*fwd_txfm)(in, out, dst, stride, tx_type);
}
void RunInvTxfm(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
(*inv_txfm_)(in, out, dst, stride, tx_type);
(*inv_txfm)(in, out, dst, stride, tx_type);
}
int tx_type_;
void (*fwd_txfm_)(int16_t *in, int16_t *out, uint8_t *dst,
int tx_type;
void (*fwd_txfm)(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type);
void (*inv_txfm_)(int16_t *in, int16_t *out, uint8_t *dst,
void (*inv_txfm)(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type);
};
TEST_P(FwdTrans4x4Test, SignBiasCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 16);
int16_t test_input_block[16];
int16_t test_output_block[16];
const int pitch = 8;
int count_sign_block[16][2];
const int count_test_block = 1000000;
@@ -88,7 +87,7 @@ TEST_P(FwdTrans4x4Test, SignBiasCheck) {
for (int j = 0; j < 16; ++j)
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type_);
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type);
for (int j = 0; j < 16; ++j) {
if (test_output_block[j] < 0)
@@ -104,7 +103,7 @@ TEST_P(FwdTrans4x4Test, SignBiasCheck) {
EXPECT_TRUE(bias_acceptable)
<< "Error: 4x4 FDCT/FHT has a sign bias > 1%"
<< " for input range [-255, 255] at index " << j
<< " tx_type " << tx_type_;
<< " tx_type " << tx_type;
}
memset(count_sign_block, 0, sizeof(count_sign_block));
@@ -113,7 +112,7 @@ TEST_P(FwdTrans4x4Test, SignBiasCheck) {
for (int j = 0; j < 16; ++j)
test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type_);
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type);
for (int j = 0; j < 16; ++j) {
if (test_output_block[j] < 0)
@@ -139,10 +138,9 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
double total_error = 0;
const int count_test_block = 1000000;
for (int i = 0; i < count_test_block; ++i) {
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 16);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 16);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 16);
int16_t test_input_block[16];
int16_t test_temp_block[16];
uint8_t dst[16], src[16];
for (int j = 0; j < 16; ++j) {
src[j] = rnd.Rand8();
@@ -153,7 +151,7 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
test_input_block[j] = src[j] - dst[j];
const int pitch = 8;
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type);
for (int j = 0; j < 16; ++j) {
if(test_temp_block[j] > 0) {
@@ -168,7 +166,7 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
}
// inverse transform and reconstruct the pixel block
RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type);
for (int j = 0; j < 16; ++j) {
const int diff = dst[j] - src[j];
@@ -183,7 +181,7 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
EXPECT_GE(count_test_block, total_error)
<< "Error: FDCT/IDCT or FHT/IHT has average "
<< "roundtrip error > 1 per block";
"roundtrip error > 1 per block";
}
INSTANTIATE_TEST_CASE_P(VP9, FwdTrans4x4Test, ::testing::Range(0, 4));

View File

@@ -13,7 +13,6 @@
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "vpx_ports/mem.h"
extern "C" {
#include "vp9_rtcd.h"
@@ -26,62 +25,11 @@ void vp9_short_idct8x8_add_c(short *input, uint8_t *output, int pitch);
using libvpx_test::ACMRandom;
namespace {
void fdct8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/,
int stride, int /*tx_type*/) {
vp9_short_fdct8x8_c(in, out, stride);
}
void idct8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
int stride, int /*tx_type*/) {
vp9_short_idct8x8_add_c(out, dst, stride >> 1);
}
void fht8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/,
int stride, int tx_type) {
// TODO(jingning): need to refactor this to test both _c and _sse2 functions,
// when we have all inverse dct functions done sse2.
#if HAVE_SSE2
vp9_short_fht8x8_sse2(in, out, stride >> 1, tx_type);
#else
vp9_short_fht8x8_c(in, out, stride >> 1, tx_type);
#endif
}
void iht8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
vp9_short_iht8x8_add_c(out, dst, stride >> 1, tx_type);
}
class FwdTrans8x8Test : public ::testing::TestWithParam<int> {
public:
virtual ~FwdTrans8x8Test() {}
virtual void SetUp() {
tx_type_ = GetParam();
if (tx_type_ == 0) {
fwd_txfm = fdct8x8;
inv_txfm = idct8x8_add;
} else {
fwd_txfm = fht8x8;
inv_txfm = iht8x8_add;
}
}
protected:
void RunFwdTxfm(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
(*fwd_txfm)(in, out, dst, stride, tx_type);
}
void RunInvTxfm(int16_t *in, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
(*inv_txfm)(in, out, dst, stride, tx_type);
}
int tx_type_;
void (*fwd_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
void (*inv_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
};
TEST_P(FwdTrans8x8Test, SignBiasCheck) {
TEST(VP9Fdct8x8Test, SignBiasCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 64);
int16_t test_input_block[64];
int16_t test_output_block[64];
const int pitch = 16;
int count_sign_block[64][2];
const int count_test_block = 100000;
@@ -93,7 +41,7 @@ TEST_P(FwdTrans8x8Test, SignBiasCheck) {
for (int j = 0; j < 64; ++j)
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type_);
vp9_short_fdct8x8_c(test_input_block, test_output_block, pitch);
for (int j = 0; j < 64; ++j) {
if (test_output_block[j] < 0)
@@ -107,7 +55,7 @@ TEST_P(FwdTrans8x8Test, SignBiasCheck) {
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
const int max_diff = 1125;
EXPECT_LT(diff, max_diff)
<< "Error: 8x8 FDCT/FHT has a sign bias > "
<< "Error: 8x8 FDCT has a sign bias > "
<< 1. * max_diff / count_test_block * 100 << "%"
<< " for input range [-255, 255] at index " << j
<< " count0: " << count_sign_block[j][0]
@@ -122,7 +70,7 @@ TEST_P(FwdTrans8x8Test, SignBiasCheck) {
for (int j = 0; j < 64; ++j)
test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type_);
vp9_short_fdct8x8_c(test_input_block, test_output_block, pitch);
for (int j = 0; j < 64; ++j) {
if (test_output_block[j] < 0)
@@ -136,25 +84,24 @@ TEST_P(FwdTrans8x8Test, SignBiasCheck) {
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
const int max_diff = 10000;
EXPECT_LT(diff, max_diff)
<< "Error: 4x4 FDCT/FHT has a sign bias > "
<< "Error: 4x4 FDCT has a sign bias > "
<< 1. * max_diff / count_test_block * 100 << "%"
<< " for input range [-15, 15] at index " << j
<< " count0: " << count_sign_block[j][0]
<< " count1: " << count_sign_block[j][1]
<< " diff: " << diff;
}
}
};
TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
TEST(VP9Fdct8x8Test, RoundTripErrorCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int max_error = 0;
double total_error = 0;
const int count_test_block = 100000;
for (int i = 0; i < count_test_block; ++i) {
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
int16_t test_input_block[64];
int16_t test_temp_block[64];
uint8_t dst[64], src[64];
for (int j = 0; j < 64; ++j) {
src[j] = rnd.Rand8();
@@ -165,7 +112,7 @@ TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
test_input_block[j] = src[j] - dst[j];
const int pitch = 16;
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
for (int j = 0; j < 64; ++j){
if(test_temp_block[j] > 0) {
test_temp_block[j] += 2;
@@ -177,7 +124,7 @@ TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
test_temp_block[j] *= 4;
}
}
RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
vp9_short_idct8x8_add_c(test_temp_block, dst, 8);
for (int j = 0; j < 64; ++j) {
const int diff = dst[j] - src[j];
@@ -189,23 +136,21 @@ TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
}
EXPECT_GE(1, max_error)
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual roundtrip error > 1";
<< "Error: 8x8 FDCT/IDCT has an individual roundtrip error > 1";
EXPECT_GE(count_test_block/5, total_error)
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
"error > 1/5 per block";
}
<< "Error: 8x8 FDCT/IDCT has average roundtrip error > 1/5 per block";
};
TEST_P(FwdTrans8x8Test, ExtremalCheck) {
TEST(VP9Fdct8x8Test, ExtremalCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int max_error = 0;
double total_error = 0;
const int count_test_block = 100000;
for (int i = 0; i < count_test_block; ++i) {
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
int16_t test_input_block[64];
int16_t test_temp_block[64];
uint8_t dst[64], src[64];
for (int j = 0; j < 64; ++j) {
src[j] = rnd.Rand8() % 2 ? 255 : 0;
@@ -216,8 +161,8 @@ TEST_P(FwdTrans8x8Test, ExtremalCheck) {
test_input_block[j] = src[j] - dst[j];
const int pitch = 16;
RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
vp9_short_idct8x8_add_c(test_temp_block, dst, 8);
for (int j = 0; j < 64; ++j) {
const int diff = dst[j] - src[j];
@@ -228,14 +173,13 @@ TEST_P(FwdTrans8x8Test, ExtremalCheck) {
}
EXPECT_GE(1, max_error)
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has an"
<< "Error: Extremal 8x8 FDCT/IDCT has an"
<< " individual roundtrip error > 1";
EXPECT_GE(count_test_block/5, total_error)
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
<< "Error: Extremal 8x8 FDCT/IDCT has average"
<< " roundtrip error > 1/5 per block";
}
}
};
INSTANTIATE_TEST_CASE_P(VP9, FwdTrans8x8Test, ::testing::Range(0, 4));
} // namespace

View File

@@ -49,7 +49,7 @@ class I420VideoSource : public VideoSource {
if (input_file_)
fclose(input_file_);
input_file_ = OpenTestDataFile(file_name_);
ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
<< file_name_;
if (start_) {
fseek(input_file_, raw_sz_ * start_, SEEK_SET);
@@ -92,7 +92,6 @@ class I420VideoSource : public VideoSource {
}
virtual void FillFrame() {
ASSERT_TRUE(input_file_ != NULL);
// Read a frame from input_file.
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
@@ -109,8 +108,8 @@ class I420VideoSource : public VideoSource {
unsigned int frame_;
unsigned int width_;
unsigned int height_;
int framerate_numerator_;
int framerate_denominator_;
unsigned int framerate_numerator_;
unsigned int framerate_denominator_;
};
} // namespace libvpx_test

View File

@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
extern "C" {
#include "./vpx_config.h"
#include "./vp8_rtcd.h"
@@ -21,94 +22,100 @@ typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr,
int dst_stride);
namespace {
class IDCTTest : public ::testing::TestWithParam<idct_fn_t> {
protected:
virtual void SetUp() {
int i;
protected:
virtual void SetUp() {
int i;
UUT = GetParam();
memset(input, 0, sizeof(input));
/* Set up guard blocks */
for (i = 0; i < 256; i++) output[i] = ((i & 0xF) < 4 && (i < 64)) ? 0 : -1;
}
UUT = GetParam();
memset(input, 0, sizeof(input));
/* Set up guard blocks */
for (i = 0; i < 256; i++)
output[i] = ((i & 0xF) < 4 && (i < 64)) ? 0 : -1;
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
virtual void TearDown() {
libvpx_test::ClearSystemState();
}
idct_fn_t UUT;
short input[16];
unsigned char output[256];
unsigned char predict[256];
idct_fn_t UUT;
short input[16];
unsigned char output[256];
unsigned char predict[256];
};
TEST_P(IDCTTest, TestGuardBlocks) {
int i;
int i;
for (i = 0; i < 256; i++)
if ((i & 0xF) < 4 && i < 64)
EXPECT_EQ(0, output[i]) << i;
else
EXPECT_EQ(255, output[i]);
for (i = 0; i < 256; i++)
if ((i & 0xF) < 4 && i < 64)
EXPECT_EQ(0, output[i]) << i;
else
EXPECT_EQ(255, output[i]);
}
TEST_P(IDCTTest, TestAllZeros) {
int i;
int i;
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
for (i = 0; i < 256; i++)
if ((i & 0xF) < 4 && i < 64)
EXPECT_EQ(0, output[i]) << "i==" << i;
else
EXPECT_EQ(255, output[i]) << "i==" << i;
for (i = 0; i < 256; i++)
if ((i & 0xF) < 4 && i < 64)
EXPECT_EQ(0, output[i]) << "i==" << i;
else
EXPECT_EQ(255, output[i]) << "i==" << i;
}
TEST_P(IDCTTest, TestAllOnes) {
int i;
int i;
input[0] = 4;
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
input[0] = 4;
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
for (i = 0; i < 256; i++)
if ((i & 0xF) < 4 && i < 64)
EXPECT_EQ(1, output[i]) << "i==" << i;
else
EXPECT_EQ(255, output[i]) << "i==" << i;
for (i = 0; i < 256; i++)
if ((i & 0xF) < 4 && i < 64)
EXPECT_EQ(1, output[i]) << "i==" << i;
else
EXPECT_EQ(255, output[i]) << "i==" << i;
}
TEST_P(IDCTTest, TestAddOne) {
int i;
int i;
for (i = 0; i < 256; i++) predict[i] = i;
input[0] = 4;
REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
for (i = 0; i < 256; i++)
predict[i] = i;
input[0] = 4;
REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
for (i = 0; i < 256; i++)
if ((i & 0xF) < 4 && i < 64)
EXPECT_EQ(i + 1, output[i]) << "i==" << i;
else
EXPECT_EQ(255, output[i]) << "i==" << i;
for (i = 0; i < 256; i++)
if ((i & 0xF) < 4 && i < 64)
EXPECT_EQ(i+1, output[i]) << "i==" << i;
else
EXPECT_EQ(255, output[i]) << "i==" << i;
}
TEST_P(IDCTTest, TestWithData) {
int i;
int i;
for (i = 0; i < 16; i++) input[i] = i;
for (i = 0; i < 16; i++)
input[i] = i;
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
for (i = 0; i < 256; i++)
if ((i & 0xF) > 3 || i > 63)
EXPECT_EQ(255, output[i]) << "i==" << i;
else if (i == 0)
EXPECT_EQ(11, output[i]) << "i==" << i;
else if (i == 34)
EXPECT_EQ(1, output[i]) << "i==" << i;
else if (i == 2 || i == 17 || i == 32)
EXPECT_EQ(3, output[i]) << "i==" << i;
else
EXPECT_EQ(0, output[i]) << "i==" << i;
for (i = 0; i < 256; i++)
if ((i & 0xF) > 3 || i > 63)
EXPECT_EQ(255, output[i]) << "i==" << i;
else if (i == 0)
EXPECT_EQ(11, output[i]) << "i==" << i;
else if (i == 34)
EXPECT_EQ(1, output[i]) << "i==" << i;
else if (i == 2 || i == 17 || i == 32)
EXPECT_EQ(3, output[i]) << "i==" << i;
else
EXPECT_EQ(0, output[i]) << "i==" << i;
}
INSTANTIATE_TEST_CASE_P(C, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_c));
INSTANTIATE_TEST_CASE_P(C, IDCTTest,
::testing::Values(vp8_short_idct4x4llm_c));
#if HAVE_MMX
INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
::testing::Values(vp8_short_idct4x4llm_mmx));

View File

@@ -27,8 +27,6 @@ using libvpx_test::ACMRandom;
class IntraPredBase {
public:
virtual ~IntraPredBase() {}
virtual void TearDown() {
libvpx_test::ClearSystemState();
}

View File

@@ -47,13 +47,12 @@ class IVFVideoSource : public CompressedVideoSource {
virtual void Init() {
// Allocate a buffer for read in the compressed video frame.
compressed_frame_buf_ = new uint8_t[libvpx_test::kCodeBufferSize];
ASSERT_TRUE(compressed_frame_buf_ != NULL)
<< "Allocate frame buffer failed";
ASSERT_TRUE(compressed_frame_buf_) << "Allocate frame buffer failed";
}
virtual void Begin() {
input_file_ = OpenTestDataFile(file_name_);
ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
<< file_name_;
// Read file header
@@ -73,7 +72,6 @@ class IVFVideoSource : public CompressedVideoSource {
}
void FillFrame() {
ASSERT_TRUE(input_file_ != NULL);
uint8_t frame_hdr[kIvfFrameHdrSize];
// Check frame header and read a frame from input_file.
if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_)

View File

@@ -31,6 +31,10 @@ class KeyframeTest : public ::libvpx_test::EncoderTest,
set_cpu_used_ = 0;
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
if (kf_do_force_kf_)

View File

@@ -70,6 +70,10 @@ class ResizeTest : public ::libvpx_test::EncoderTest,
SetMode(GET_PARAM(1));
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void DecompressedFrameHook(const vpx_image_t &img,
vpx_codec_pts_t pts) {
frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));

View File

@@ -428,7 +428,6 @@ INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
#if HAVE_SSE
#if CONFIG_VP9_ENCODER
#if CONFIG_USE_X86INC
const sad_m_by_n_fn_t sad_4x4_sse_vp9 = vp9_sad4x4_sse;
const sad_m_by_n_fn_t sad_4x8_sse_vp9 = vp9_sad4x8_sse;
INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::Values(
@@ -442,7 +441,6 @@ INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::Values(
make_tuple(4, 4, sad_4x4x4d_sse)));
#endif
#endif
#endif
#if HAVE_SSE2
#if CONFIG_VP8_ENCODER
@@ -453,20 +451,14 @@ const sad_m_by_n_fn_t sad_8x8_wmt = vp8_sad8x8_wmt;
const sad_m_by_n_fn_t sad_4x4_wmt = vp8_sad4x4_wmt;
#endif
#if CONFIG_VP9_ENCODER
#if CONFIG_USE_X86INC
const sad_m_by_n_fn_t sad_64x64_sse2_vp9 = vp9_sad64x64_sse2;
const sad_m_by_n_fn_t sad_64x32_sse2_vp9 = vp9_sad64x32_sse2;
const sad_m_by_n_fn_t sad_32x64_sse2_vp9 = vp9_sad32x64_sse2;
const sad_m_by_n_fn_t sad_32x32_sse2_vp9 = vp9_sad32x32_sse2;
const sad_m_by_n_fn_t sad_32x16_sse2_vp9 = vp9_sad32x16_sse2;
const sad_m_by_n_fn_t sad_16x32_sse2_vp9 = vp9_sad16x32_sse2;
const sad_m_by_n_fn_t sad_16x16_sse2_vp9 = vp9_sad16x16_sse2;
const sad_m_by_n_fn_t sad_16x8_sse2_vp9 = vp9_sad16x8_sse2;
const sad_m_by_n_fn_t sad_8x16_sse2_vp9 = vp9_sad8x16_sse2;
const sad_m_by_n_fn_t sad_16x8_sse2_vp9 = vp9_sad16x8_sse2;
const sad_m_by_n_fn_t sad_8x8_sse2_vp9 = vp9_sad8x8_sse2;
const sad_m_by_n_fn_t sad_8x4_sse2_vp9 = vp9_sad8x4_sse2;
#endif
#endif
const sad_m_by_n_test_param_t sse2_tests[] = {
#if CONFIG_VP8_ENCODER
make_tuple(16, 16, sad_16x16_wmt),
@@ -476,25 +468,18 @@ const sad_m_by_n_test_param_t sse2_tests[] = {
make_tuple(4, 4, sad_4x4_wmt),
#endif
#if CONFIG_VP9_ENCODER
#if CONFIG_USE_X86INC
make_tuple(64, 64, sad_64x64_sse2_vp9),
make_tuple(64, 32, sad_64x32_sse2_vp9),
make_tuple(32, 64, sad_32x64_sse2_vp9),
make_tuple(32, 32, sad_32x32_sse2_vp9),
make_tuple(32, 16, sad_32x16_sse2_vp9),
make_tuple(16, 32, sad_16x32_sse2_vp9),
make_tuple(16, 16, sad_16x16_sse2_vp9),
make_tuple(16, 8, sad_16x8_sse2_vp9),
make_tuple(8, 16, sad_8x16_sse2_vp9),
make_tuple(16, 8, sad_16x8_sse2_vp9),
make_tuple(8, 8, sad_8x8_sse2_vp9),
make_tuple(8, 4, sad_8x4_sse2_vp9),
#endif
#endif
};
INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
#if CONFIG_VP9_ENCODER
#if CONFIG_USE_X86INC
const sad_n_by_n_by_4_fn_t sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_64x32x4d_sse2 = vp9_sad64x32x4d_sse2;
const sad_n_by_n_by_4_fn_t sad_32x64x4d_sse2 = vp9_sad32x64x4d_sse2;
@@ -520,7 +505,6 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
make_tuple(8, 4, sad_8x4x4d_sse2)));
#endif
#endif
#endif
#if HAVE_SSE3
#if CONFIG_VP8_ENCODER
@@ -539,11 +523,9 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADx4Test, ::testing::Values(
#endif
#if HAVE_SSSE3
#if CONFIG_USE_X86INC
const sad_m_by_n_fn_t sad_16x16_sse3 = vp8_sad16x16_sse3;
INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
make_tuple(16, 16, sad_16x16_sse3)));
#endif
#endif
} // namespace

View File

@@ -61,7 +61,7 @@ TEST_P(SubtractBlockTest, SimpleSubtract) {
int16_t *src_diff = be.src_diff;
for (int r = 0; r < kBlockHeight; ++r) {
for (int c = 0; c < kBlockWidth; ++c) {
src_diff[c] = static_cast<int16_t>(0xa5a5);
src_diff[c] = 0xa5a5;
}
src_diff += kDiffPredStride;
}

View File

@@ -33,6 +33,10 @@ class SuperframeTest : public ::libvpx_test::EncoderTest,
delete[] modified_buf_;
}
virtual bool Continue() const {
return !HasFatalFailure() && !abort_;
}
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
libvpx_test::Encoder *encoder) {
if (video->frame() == 1) {

View File

@@ -25,8 +25,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../md5_utils.h ../md5_utils.c
LIBVPX_TEST_SRCS-yes += decode_test_driver.cc
@@ -89,7 +87,6 @@ LIBVPX_TEST_SRCS-yes += tile_independence_test.cc
endif
LIBVPX_TEST_SRCS-$(CONFIG_VP9) += convolve_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_thread_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc

View File

@@ -181,7 +181,6 @@ class TestVectorTest : public ::libvpx_test::DecoderTest,
virtual void DecompressedFrameHook(const vpx_image_t& img,
const unsigned int frame_number) {
ASSERT_TRUE(md5_file_ != NULL);
char expected_md5[33];
char junk[128];

View File

@@ -23,13 +23,10 @@ extern "C" {
namespace {
class TileIndependenceTest : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWithParam<int> {
public ::libvpx_test::CodecTestWithParam<int> {
protected:
TileIndependenceTest()
: EncoderTest(GET_PARAM(0)),
md5_fw_order_(),
md5_inv_order_(),
n_tiles_(GET_PARAM(1)) {
TileIndependenceTest() : EncoderTest(GET_PARAM(0)), n_tiles_(GET_PARAM(1)),
md5_fw_order_(), md5_inv_order_() {
init_flags_ = VPX_CODEC_USE_PSNR;
vpx_codec_dec_cfg_t cfg;
cfg.w = 704;
@@ -59,8 +56,9 @@ class TileIndependenceTest : public ::libvpx_test::EncoderTest,
void UpdateMD5(::libvpx_test::Decoder *dec, const vpx_codec_cx_pkt_t *pkt,
::libvpx_test::MD5 *md5) {
const vpx_codec_err_t res = dec->DecodeFrame(
reinterpret_cast<uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz);
const vpx_codec_err_t res =
dec->DecodeFrame(reinterpret_cast<uint8_t*>(pkt->data.frame.buf),
pkt->data.frame.sz);
if (res != VPX_CODEC_OK) {
abort_ = true;
ASSERT_EQ(VPX_CODEC_OK, res);
@@ -74,11 +72,11 @@ class TileIndependenceTest : public ::libvpx_test::EncoderTest,
UpdateMD5(inv_dec_, pkt, &md5_inv_order_);
}
::libvpx_test::MD5 md5_fw_order_, md5_inv_order_;
::libvpx_test::Decoder *fw_dec_, *inv_dec_;
private:
int n_tiles_;
protected:
::libvpx_test::MD5 md5_fw_order_, md5_inv_order_;
::libvpx_test::Decoder *fw_dec_, *inv_dec_;
};
// run an encode with 2 or 4 tiles, and do the decode both in normal and
@@ -95,7 +93,7 @@ TEST_P(TileIndependenceTest, MD5Match) {
timebase.den, timebase.num, 0, 30);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
const char *md5_fw_str = md5_fw_order_.Get();
const char *md5_fw_str = md5_fw_order_.Get();
const char *md5_inv_str = md5_inv_order_.Get();
// could use ASSERT_EQ(!memcmp(.., .., 16) here, but this gives nicer
@@ -104,6 +102,7 @@ TEST_P(TileIndependenceTest, MD5Match) {
ASSERT_STREQ(md5_fw_str, md5_inv_str);
}
VP9_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Range(0, 2, 1));
VP9_INSTANTIATE_TEST_CASE(TileIndependenceTest,
::testing::Range(0, 2, 1));
} // namespace

View File

@@ -37,7 +37,7 @@ static double compute_psnr(const vpx_image_t *img1,
img2->planes[VPX_PLANE_Y][i * img2->stride[VPX_PLANE_Y] + j];
sqrerr += d * d;
}
double mse = static_cast<double>(sqrerr) / (width_y * height_y);
double mse = sqrerr / (width_y * height_y);
double psnr = 100.0;
if (mse > 0.0) {
psnr = 10 * log10(255.0 * 255.0 / mse);

View File

@@ -218,7 +218,6 @@ class SubpelVarianceTest :
vpx_free(src_);
delete[] ref_;
vpx_free(sec_);
libvpx_test::ClearSystemState();
}
protected:
@@ -483,7 +482,6 @@ INSTANTIATE_TEST_CASE_P(
#endif
#if HAVE_SSE2
#if CONFIG_USE_X86INC
const vp9_variance_fn_t variance4x4_sse2 = vp9_variance4x4_sse2;
const vp9_variance_fn_t variance4x8_sse2 = vp9_variance4x8_sse2;
const vp9_variance_fn_t variance8x4_sse2 = vp9_variance8x4_sse2;
@@ -597,11 +595,8 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(6, 5, subpel_avg_variance64x32_sse2),
make_tuple(6, 6, subpel_avg_variance64x64_sse2)));
#endif
#endif
#if HAVE_SSSE3
#if CONFIG_USE_X86INC
const vp9_subpixvariance_fn_t subpel_variance4x4_ssse3 =
vp9_sub_pixel_variance4x4_ssse3;
const vp9_subpixvariance_fn_t subpel_variance4x8_ssse3 =
@@ -686,7 +681,6 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(6, 5, subpel_avg_variance64x32_ssse3),
make_tuple(6, 6, subpel_avg_variance64x64_ssse3)));
#endif
#endif
#endif // CONFIG_VP9_ENCODER
} // namespace vp9

View File

@@ -1,75 +0,0 @@
/*
Copyright (c) 2012 The WebM project authors. All Rights Reserved.
Use of this source code is governed by a BSD-style license
that can be found in the LICENSE file in the root of the source
tree. An additional intellectual property rights grant can be found
in the file PATENTS. All contributing project authors may
be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
#include "test/util.h"
namespace {
const int kMaxPsnr = 100;
class LossLessTest : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
protected:
LossLessTest() : EncoderTest(GET_PARAM(0)),
psnr_(kMaxPsnr),
nframes_(0),
encoding_mode_(GET_PARAM(1)) {
}
virtual ~LossLessTest() {}
virtual void SetUp() {
InitializeConfig();
SetMode(encoding_mode_);
}
virtual void BeginPassHook(unsigned int /*pass*/) {
psnr_ = 0.0;
nframes_ = 0;
}
virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
if (pkt->data.psnr.psnr[0] < psnr_)
psnr_= pkt->data.psnr.psnr[0];
}
double GetMinPsnr() const {
return psnr_;
}
private:
double psnr_;
unsigned int nframes_;
libvpx_test::TestMode encoding_mode_;
};
TEST_P(LossLessTest, TestLossLessEncoding) {
const vpx_rational timebase = { 33333333, 1000000000 };
cfg_.g_timebase = timebase;
cfg_.rc_target_bitrate = 2000;
cfg_.g_lag_in_frames = 25;
cfg_.rc_min_quantizer = 0;
cfg_.rc_max_quantizer = 0;
init_flags_ = VPX_CODEC_USE_PSNR;
// intentionally changed the dimension for better testing coverage
libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 356, 284,
timebase.den, timebase.num, 0, 30);
const double psnr_lossless = GetMinPsnr();
EXPECT_GE(psnr_lossless, kMaxPsnr);
}
VP9_INSTANTIATE_TEST_CASE(LossLessTest, ALL_TEST_MODES);
} // namespace

View File

@@ -39,7 +39,7 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
// FIXME(rbultje) split in its own file
for (BLOCK_SIZE_TYPE bsize = BLOCK_4X4; bsize < BLOCK_SIZE_TYPES;
for (BLOCK_SIZE_TYPE bsize = BLOCK_SIZE_AB4X4; bsize < BLOCK_SIZE_TYPES;
bsize = static_cast<BLOCK_SIZE_TYPE>(static_cast<int>(bsize) + 1)) {
const int block_width = 4 << b_width_log2(bsize);
const int block_height = 4 << b_height_log2(bsize);
@@ -93,8 +93,9 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
INSTANTIATE_TEST_CASE_P(C, VP9SubtractBlockTest,
::testing::Values(vp9_subtract_block_c));
#if HAVE_SSE2 && CONFIG_USE_X86INC
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(SSE2, VP9SubtractBlockTest,
::testing::Values(vp9_subtract_block_sse2));
#endif
} // namespace vp9

View File

@@ -1,109 +0,0 @@
/*
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp9/decoder/vp9_thread.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/codec_factory.h"
#include "test/decode_test_driver.h"
#include "test/md5_helper.h"
#include "test/webm_video_source.h"
namespace {
class VP9WorkerThreadTest : public ::testing::Test {
protected:
virtual ~VP9WorkerThreadTest() {}
virtual void SetUp() {
vp9_worker_init(&worker_);
}
virtual void TearDown() {
vp9_worker_end(&worker_);
}
VP9Worker worker_;
};
int ThreadHook(void* data, void* return_value) {
int* const hook_data = reinterpret_cast<int*>(data);
*hook_data = 5;
return *reinterpret_cast<int*>(return_value);
}
TEST_F(VP9WorkerThreadTest, HookSuccess) {
EXPECT_TRUE(vp9_worker_sync(&worker_)); // should be a no-op.
for (int i = 0; i < 2; ++i) {
EXPECT_TRUE(vp9_worker_reset(&worker_));
int hook_data = 0;
int return_value = 1; // return successfully from the hook
worker_.hook = ThreadHook;
worker_.data1 = &hook_data;
worker_.data2 = &return_value;
vp9_worker_launch(&worker_);
EXPECT_TRUE(vp9_worker_sync(&worker_));
EXPECT_FALSE(worker_.had_error);
EXPECT_EQ(5, hook_data);
EXPECT_TRUE(vp9_worker_sync(&worker_)); // should be a no-op.
}
}
TEST_F(VP9WorkerThreadTest, HookFailure) {
EXPECT_TRUE(vp9_worker_reset(&worker_));
int hook_data = 0;
int return_value = 0; // return failure from the hook
worker_.hook = ThreadHook;
worker_.data1 = &hook_data;
worker_.data2 = &return_value;
vp9_worker_launch(&worker_);
EXPECT_FALSE(vp9_worker_sync(&worker_));
EXPECT_TRUE(worker_.had_error);
// Ensure _reset() clears the error and _launch() can be called again.
return_value = 1;
EXPECT_TRUE(vp9_worker_reset(&worker_));
EXPECT_FALSE(worker_.had_error);
vp9_worker_launch(&worker_);
EXPECT_TRUE(vp9_worker_sync(&worker_));
EXPECT_FALSE(worker_.had_error);
}
TEST(VP9DecodeMTTest, MTDecode) {
libvpx_test::WebMVideoSource video("vp90-2-03-size-226x226.webm");
video.Init();
vpx_codec_dec_cfg_t cfg = {0};
cfg.threads = 2;
libvpx_test::VP9Decoder decoder(cfg, 0);
libvpx_test::MD5 md5;
for (video.Begin(); video.cxdata(); video.Next()) {
const vpx_codec_err_t res =
decoder.DecodeFrame(video.cxdata(), video.frame_size());
ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
const vpx_image_t *img = NULL;
// Get decompressed data
while ((img = dec_iter.Next())) {
md5.Add(img);
}
}
EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc", md5.Get());
}
} // namespace

View File

@@ -99,7 +99,7 @@ class WebMVideoSource : public CompressedVideoSource {
virtual void Begin() {
input_file_ = OpenTestDataFile(file_name_);
ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
<< file_name_;
nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb,
@@ -130,7 +130,6 @@ class WebMVideoSource : public CompressedVideoSource {
}
void FillFrame() {
ASSERT_TRUE(input_file_ != NULL);
if (chunk_ >= chunks_) {
unsigned int track;

View File

@@ -1370,12 +1370,12 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
mov edx, [esp + 8 + 12] // src_stride
mov ecx, [esp + 8 + 16] // dst_width
mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
shr eax, 1
cmp eax, 0
je xloop1
cmp eax, 64
cmp eax, 128
je xloop2
shr eax, 1
mov ah,al
neg al
add al, 128
@@ -2132,11 +2132,11 @@ void ScaleFilterRows_SSSE3(uint8* dst_ptr,
"mov 0x14(%esp),%edx \n"
"mov 0x18(%esp),%ecx \n"
"mov 0x1c(%esp),%eax \n"
"shr %eax \n"
"cmp $0x0,%eax \n"
"je 2f \n"
"cmp $0x40,%eax \n"
"cmp $0x80,%eax \n"
"je 3f \n"
"shr %eax \n"
"mov %al,%ah \n"
"neg %al \n"
"add $0x80,%al \n"
@@ -2662,7 +2662,6 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
const uint8* src_ptr, int src_stride,
int dst_width, int source_y_fraction) {
source_y_fraction >>= 1;
if (source_y_fraction == 0) {
asm volatile (
"1:"
@@ -2681,7 +2680,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
: "memory", "cc", "rax"
);
return;
} else if (source_y_fraction == 64) {
} else if (source_y_fraction == 128) {
asm volatile (
"1:"
"movdqa (%1),%%xmm0 \n"
@@ -2704,6 +2703,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
} else {
asm volatile (
"mov %3,%%eax \n"
"shr %%eax \n"
"mov %%al,%%ah \n"
"neg %%al \n"
"add $0x80,%%al \n"

View File

@@ -173,6 +173,7 @@ void vp8_create_common(VP8_COMMON *oci)
oci->use_bilinear_mc_filter = 0;
oci->full_pixel = 0;
oci->multi_token_partition = ONE_PARTITION;
oci->clr_type = REG_YUV;
oci->clamp_type = RECON_CLAMP_REQUIRED;
/* Initialize reference frame sign bias structure to defaults */

View File

@@ -72,6 +72,7 @@ typedef struct VP8Common
int horiz_scale;
int vert_scale;
YUV_TYPE clr_type;
CLAMP_TYPE clamp_type;
YV12_BUFFER_CONFIG *frame_to_show;
@@ -114,6 +115,9 @@ typedef struct VP8Common
int uvdc_delta_q;
int uvac_delta_q;
unsigned int frames_since_golden;
unsigned int frames_till_alt_ref_frame;
/* We allocate a MODE_INFO struct for each macroblock, together with
an extra row on top and column on the left to simplify prediction. */
@@ -153,6 +157,7 @@ typedef struct VP8Common
unsigned int current_video_frame;
int near_boffset[3];
int version;
TOKEN_PARTITION multi_token_partition;
@@ -160,10 +165,8 @@ typedef struct VP8Common
#ifdef PACKET_TESTING
VP8_HEADER oh;
#endif
#if CONFIG_POSTPROC_VISUALIZER
double bitrate;
double framerate;
#endif
#if CONFIG_MULTITHREAD
int processor_core_count;

View File

@@ -923,7 +923,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
if (flags & VP8D_DEBUG_TXT_RATE_INFO)
{
char message[512];
sprintf(message, "Bitrate: %10.2f framerate: %10.2f ", oci->bitrate, oci->framerate);
sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate);
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
}

View File

@@ -0,0 +1,52 @@
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vpx/vpx_codec.h"
#include "vpx_ports/asm_offsets.h"
#include "vp8/common/blockd.h"
#if CONFIG_POSTPROC
#include "postproc.h"
#endif /* CONFIG_POSTPROC */
BEGIN
#if CONFIG_POSTPROC
/* mfqe.c / filter_by_weight */
DEFINE(MFQE_PRECISION_VAL, MFQE_PRECISION);
#endif /* CONFIG_POSTPROC */
END
/* add asserts for any offset that is not supported by assembly code */
/* add asserts for any size that is not supported by assembly code */
#if HAVE_MEDIA
/* switch case in vp8_intra4x4_predict_armv6 is based on these enumerated values */
ct_assert(B_DC_PRED, B_DC_PRED == 0);
ct_assert(B_TM_PRED, B_TM_PRED == 1);
ct_assert(B_VE_PRED, B_VE_PRED == 2);
ct_assert(B_HE_PRED, B_HE_PRED == 3);
ct_assert(B_LD_PRED, B_LD_PRED == 4);
ct_assert(B_RD_PRED, B_RD_PRED == 5);
ct_assert(B_VR_PRED, B_VR_PRED == 6);
ct_assert(B_VL_PRED, B_VL_PRED == 7);
ct_assert(B_HD_PRED, B_HD_PRED == 8);
ct_assert(B_HU_PRED, B_HU_PRED == 9);
#endif
#if HAVE_SSE2
#if CONFIG_POSTPROC
/* vp8_filter_by_weight16x16 and 8x8 */
ct_assert(MFQE_PRECISION_VAL, MFQE_PRECISION == 4)
#endif /* CONFIG_POSTPROC */
#endif /* HAVE_SSE2 */

View File

@@ -1095,7 +1095,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate bool decoder 0");
if (pc->frame_type == KEY_FRAME) {
(void)vp8_read_bit(bc); // colorspace
pc->clr_type = (YUV_TYPE)vp8_read_bit(bc);
pc->clamp_type = (CLAMP_TYPE)vp8_read_bit(bc);
}

View File

@@ -430,6 +430,7 @@ int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_st
*time_stamp = pbi->last_time_stamp;
*time_end_stamp = 0;
sd->clrtype = pbi->common.clr_type;
#if CONFIG_POSTPROC
ret = vp8_post_proc_frame(&pbi->common, sd, flags);
#else

View File

@@ -0,0 +1,26 @@
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_ports/asm_offsets.h"
#include "onyxd_int.h"
BEGIN
DEFINE(bool_decoder_user_buffer_end, offsetof(BOOL_DECODER, user_buffer_end));
DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer));
DEFINE(bool_decoder_value, offsetof(BOOL_DECODER, value));
DEFINE(bool_decoder_count, offsetof(BOOL_DECODER, count));
DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range));
END
/* add asserts for any offset that is not supported by assembly code */
/* add asserts for any size that is not supported by assembly code */

View File

@@ -1322,7 +1322,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
vp8_start_encode(bc, cx_data, cx_data_end);
/* signal clr type */
vp8_write_bit(bc, 0);
vp8_write_bit(bc, pc->clr_type);
vp8_write_bit(bc, pc->clamp_type);
}

View File

@@ -1325,7 +1325,7 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
return Q;
}
extern void vp8_new_framerate(VP8_COMP *cpi, double framerate);
extern void vp8_new_frame_rate(VP8_COMP *cpi, double framerate);
void vp8_init_second_pass(VP8_COMP *cpi)
{
@@ -1349,9 +1349,9 @@ void vp8_init_second_pass(VP8_COMP *cpi)
* sum duration is not. Its calculated based on the actual durations of
* all frames from the first pass.
*/
vp8_new_framerate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration);
vp8_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration);
cpi->output_framerate = cpi->framerate;
cpi->output_frame_rate = cpi->frame_rate;
cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats.duration * two_pass_min_rate / 10000000.0);
@@ -2398,7 +2398,7 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
target_frame_size += cpi->min_frame_bandwidth;
/* Every other frame gets a few extra bits */
if ( (cpi->frames_since_golden & 0x01) &&
if ( (cpi->common.frames_since_golden & 0x01) &&
(cpi->frames_till_gf_update_due > 0) )
{
target_frame_size += cpi->twopass.alt_extra_bits;
@@ -2529,7 +2529,7 @@ void vp8_second_pass(VP8_COMP *cpi)
/* Set nominal per second bandwidth for this frame */
cpi->target_bandwidth = (int)
(cpi->per_frame_bandwidth * cpi->output_framerate);
(cpi->per_frame_bandwidth * cpi->output_frame_rate);
if (cpi->target_bandwidth < 0)
cpi->target_bandwidth = 0;
@@ -3185,7 +3185,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
/* Convert to a per second bitrate */
cpi->target_bandwidth = (int)(cpi->twopass.kf_bits *
cpi->output_framerate);
cpi->output_frame_rate);
}
/* Note the total error score of the kf group minus the key frame itself */
@@ -3224,7 +3224,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->common.vert_scale = NORMAL;
/* Calculate Average bits per frame. */
av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->framerate);
av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate);
/* CBR... Use the clip average as the target for deciding resample */
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
@@ -3299,7 +3299,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
}
else
{
int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->framerate));
int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate));
int64_t over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level;
/* If triggered last time the threshold for triggering again is

View File

@@ -301,11 +301,11 @@ static int rescale(int val, int num, int denom)
static void init_temporal_layer_context(VP8_COMP *cpi,
VP8_CONFIG *oxcf,
const int layer,
double prev_layer_framerate)
double prev_layer_frame_rate)
{
LAYER_CONTEXT *lc = &cpi->layer_context[layer];
lc->framerate = cpi->output_framerate / cpi->oxcf.rate_decimator[layer];
lc->frame_rate = cpi->output_frame_rate / cpi->oxcf.rate_decimator[layer];
lc->target_bandwidth = cpi->oxcf.target_bitrate[layer] * 1000;
lc->starting_buffer_level_in_ms = oxcf->starting_buffer_level;
@@ -335,7 +335,7 @@ static void init_temporal_layer_context(VP8_COMP *cpi,
lc->avg_frame_size_for_layer =
(int)((cpi->oxcf.target_bitrate[layer] -
cpi->oxcf.target_bitrate[layer-1]) * 1000 /
(lc->framerate - prev_layer_framerate));
(lc->frame_rate - prev_layer_frame_rate));
lc->active_worst_quality = cpi->oxcf.worst_allowed_q;
lc->active_best_quality = cpi->oxcf.best_allowed_q;
@@ -363,7 +363,7 @@ static void reset_temporal_layer_change(VP8_COMP *cpi,
const int prev_num_layers)
{
int i;
double prev_layer_framerate = 0;
double prev_layer_frame_rate = 0;
const int curr_num_layers = cpi->oxcf.number_of_layers;
// If the previous state was 1 layer, get current layer context from cpi.
// We need this to set the layer context for the new layers below.
@@ -377,7 +377,7 @@ static void reset_temporal_layer_change(VP8_COMP *cpi,
LAYER_CONTEXT *lc = &cpi->layer_context[i];
if (i >= prev_num_layers)
{
init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
init_temporal_layer_context(cpi, oxcf, i, prev_layer_frame_rate);
}
// The initial buffer levels are set based on their starting levels.
// We could set the buffer levels based on the previous state (normalized
@@ -403,8 +403,8 @@ static void reset_temporal_layer_change(VP8_COMP *cpi,
lc->bits_off_target = lc->buffer_level;
restore_layer_context(cpi, 0);
}
prev_layer_framerate = cpi->output_framerate /
cpi->oxcf.rate_decimator[i];
prev_layer_frame_rate = cpi->output_frame_rate /
cpi->oxcf.rate_decimator[i];
}
}
@@ -1282,21 +1282,21 @@ int vp8_reverse_trans(int x)
return 63;
}
void vp8_new_framerate(VP8_COMP *cpi, double framerate)
void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
{
if(framerate < .1)
framerate = 30;
cpi->framerate = framerate;
cpi->output_framerate = framerate;
cpi->frame_rate = framerate;
cpi->output_frame_rate = framerate;
cpi->per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth /
cpi->output_framerate);
cpi->output_frame_rate);
cpi->av_per_frame_bandwidth = cpi->per_frame_bandwidth;
cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth *
cpi->oxcf.two_pass_vbrmin_section / 100);
/* Set Maximum gf/arf interval */
cpi->max_gf_interval = ((int)(cpi->output_framerate / 2.0) + 2);
cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2);
if(cpi->max_gf_interval < 12)
cpi->max_gf_interval = 12;
@@ -1337,13 +1337,13 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
* seems like a reasonable framerate, then use that as a guess, otherwise
* use 30.
*/
cpi->framerate = (double)(oxcf->timebase.den) /
(double)(oxcf->timebase.num);
cpi->frame_rate = (double)(oxcf->timebase.den) /
(double)(oxcf->timebase.num);
if (cpi->framerate > 180)
cpi->framerate = 30;
if (cpi->frame_rate > 180)
cpi->frame_rate = 30;
cpi->ref_framerate = cpi->framerate;
cpi->ref_frame_rate = cpi->frame_rate;
/* change includes all joint functionality */
vp8_change_config(cpi, oxcf);
@@ -1369,13 +1369,13 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
if (cpi->oxcf.number_of_layers > 1)
{
unsigned int i;
double prev_layer_framerate=0;
double prev_layer_frame_rate=0;
for (i=0; i<cpi->oxcf.number_of_layers; i++)
{
init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
prev_layer_framerate = cpi->output_framerate /
cpi->oxcf.rate_decimator[i];
init_temporal_layer_context(cpi, oxcf, i, prev_layer_frame_rate);
prev_layer_frame_rate = cpi->output_frame_rate /
cpi->oxcf.rate_decimator[i];
}
}
@@ -1399,14 +1399,14 @@ static void update_layer_contexts (VP8_COMP *cpi)
if (oxcf->number_of_layers > 1)
{
unsigned int i;
double prev_layer_framerate=0;
double prev_layer_frame_rate=0;
for (i=0; i<oxcf->number_of_layers; i++)
{
LAYER_CONTEXT *lc = &cpi->layer_context[i];
lc->framerate =
cpi->ref_framerate / oxcf->rate_decimator[i];
lc->frame_rate =
cpi->ref_frame_rate / oxcf->rate_decimator[i];
lc->target_bandwidth = oxcf->target_bitrate[i] * 1000;
lc->starting_buffer_level = rescale(
@@ -1432,9 +1432,9 @@ static void update_layer_contexts (VP8_COMP *cpi)
lc->avg_frame_size_for_layer =
(int)((oxcf->target_bitrate[i] -
oxcf->target_bitrate[i-1]) * 1000 /
(lc->framerate - prev_layer_framerate));
(lc->frame_rate - prev_layer_frame_rate));
prev_layer_framerate = lc->framerate;
prev_layer_frame_rate = lc->frame_rate;
}
}
}
@@ -1625,7 +1625,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cpi->oxcf.target_bandwidth, 1000);
/* Set up frame rate and related parameters rate control values. */
vp8_new_framerate(cpi, cpi->framerate);
vp8_new_frame_rate(cpi, cpi->frame_rate);
/* Set absolute upper and lower quality limits */
cpi->worst_quality = cpi->oxcf.worst_allowed_q;
@@ -1945,7 +1945,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
for (i = 0; i < KEY_FRAME_CONTEXT; i++)
{
cpi->prior_key_frame_distance[i] = (int)cpi->output_framerate;
cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
}
#ifdef OUTPUT_YUV_SRC
@@ -2273,7 +2273,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
{
extern int count_mb_seg[4];
FILE *f = fopen("modes.stt", "a");
double dr = (double)cpi->framerate * (double)bytes * (double)8 / (double)count / (double)1000 ;
double dr = (double)cpi->frame_rate * (double)bytes * (double)8 / (double)count / (double)1000 ;
fprintf(f, "intra_mode in Intra Frames:\n");
fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d\n", y_modes[0], y_modes[1], y_modes[2], y_modes[3], y_modes[4]);
fprintf(f, "UV:%8d, %8d, %8d, %8d\n", uv_modes[0], uv_modes[1], uv_modes[2], uv_modes[3]);
@@ -2750,7 +2750,7 @@ static void update_alt_ref_frame_stats(VP8_COMP *cpi)
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
/* this frame refreshes means next frames don't unless specified by user */
cpi->frames_since_golden = 0;
cpi->common.frames_since_golden = 0;
/* Clear the alternate reference update pending flag. */
cpi->source_alt_ref_pending = 0;
@@ -2802,7 +2802,7 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
* user
*/
cm->refresh_golden_frame = 0;
cpi->frames_since_golden = 0;
cpi->common.frames_since_golden = 0;
cpi->recent_ref_frame_usage[INTRA_FRAME] = 1;
cpi->recent_ref_frame_usage[LAST_FRAME] = 1;
@@ -2834,12 +2834,12 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
if (cpi->frames_till_gf_update_due > 0)
cpi->frames_till_gf_update_due--;
if (cpi->frames_till_alt_ref_frame)
cpi->frames_till_alt_ref_frame --;
if (cpi->common.frames_till_alt_ref_frame)
cpi->common.frames_till_alt_ref_frame --;
cpi->frames_since_golden ++;
cpi->common.frames_since_golden ++;
if (cpi->frames_since_golden > 1)
if (cpi->common.frames_since_golden > 1)
{
cpi->recent_ref_frame_usage[INTRA_FRAME] +=
cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME];
@@ -2890,11 +2890,11 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi)
cpi->prob_last_coded = 200;
cpi->prob_gf_coded = 1;
}
else if (cpi->frames_since_golden == 0)
else if (cpi->common.frames_since_golden == 0)
{
cpi->prob_last_coded = 214;
}
else if (cpi->frames_since_golden == 1)
else if (cpi->common.frames_since_golden == 1)
{
cpi->prob_last_coded = 192;
cpi->prob_gf_coded = 220;
@@ -3368,12 +3368,12 @@ static void encode_frame_to_data_rate
cpi->per_frame_bandwidth = cpi->twopass.gf_bits;
/* per second target bitrate */
cpi->target_bandwidth = (int)(cpi->twopass.gf_bits *
cpi->output_framerate);
cpi->output_frame_rate);
}
}
else
#endif
cpi->per_frame_bandwidth = (int)(cpi->target_bandwidth / cpi->output_framerate);
cpi->per_frame_bandwidth = (int)(cpi->target_bandwidth / cpi->output_frame_rate);
/* Default turn off buffer to buffer copying */
cm->copy_buffer_to_gf = 0;
@@ -4557,7 +4557,7 @@ static void encode_frame_to_data_rate
{
LAYER_CONTEXT *lc = &cpi->layer_context[i];
int bits_off_for_this_layer =
(int)(lc->target_bandwidth / lc->framerate -
(int)(lc->target_bandwidth / lc->frame_rate -
cpi->projected_frame_size);
lc->bits_off_target += bits_off_for_this_layer;
@@ -4805,7 +4805,7 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
{
double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth
*cpi->oxcf.two_pass_vbrmin_section / 100);
cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->framerate);
cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->frame_rate);
}
}
#endif
@@ -4821,10 +4821,8 @@ int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_C
{
#if HAVE_NEON
int64_t store_reg[8];
#if CONFIG_RUNTIME_CPU_DETECT
#endif
VP8_COMMON *cm = &cpi->common;
#endif
#endif
struct vpx_usec_timer timer;
int res = 0;
@@ -4850,6 +4848,7 @@ int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_C
if(vp8_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
frame_flags, cpi->active_map_enabled ? cpi->active_map : NULL))
res = -1;
cm->clr_type = sd->clrtype;
vpx_usec_timer_mark(&timer);
cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
@@ -4934,7 +4933,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
cpi->frames_till_gf_update_due);
force_src_buffer = &cpi->alt_ref_buffer;
}
cpi->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due;
cm->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due;
cm->refresh_alt_ref_frame = 1;
cm->refresh_golden_frame = 0;
cm->refresh_last_frame = 0;
@@ -5039,7 +5038,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if (this_duration)
{
if (step)
cpi->ref_framerate = 10000000.0 / this_duration;
cpi->ref_frame_rate = 10000000.0 / this_duration;
else
{
double avg_duration, interval;
@@ -5053,11 +5052,11 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if(interval > 10000000.0)
interval = 10000000;
avg_duration = 10000000.0 / cpi->ref_framerate;
avg_duration = 10000000.0 / cpi->ref_frame_rate;
avg_duration *= (interval - avg_duration + this_duration);
avg_duration /= interval;
cpi->ref_framerate = 10000000.0 / avg_duration;
cpi->ref_frame_rate = 10000000.0 / avg_duration;
}
if (cpi->oxcf.number_of_layers > 1)
@@ -5068,12 +5067,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
for (i=0; i<cpi->oxcf.number_of_layers; i++)
{
LAYER_CONTEXT *lc = &cpi->layer_context[i];
lc->framerate = cpi->ref_framerate /
cpi->oxcf.rate_decimator[i];
lc->frame_rate = cpi->ref_frame_rate /
cpi->oxcf.rate_decimator[i];
}
}
else
vp8_new_framerate(cpi, cpi->ref_framerate);
vp8_new_frame_rate(cpi, cpi->ref_frame_rate);
}
cpi->last_time_stamp_seen = cpi->source->ts_start;
@@ -5090,7 +5089,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
layer = cpi->oxcf.layer_id[
cpi->temporal_pattern_counter % cpi->oxcf.periodicity];
restore_layer_context (cpi, layer);
vp8_new_framerate(cpi, cpi->layer_context[layer].framerate);
vp8_new_frame_rate (cpi, cpi->layer_context[layer].frame_rate);
}
if (cpi->compressor_speed == 2)

View File

@@ -232,7 +232,7 @@ enum
typedef struct
{
/* Layer configuration */
double framerate;
double frame_rate;
int target_bandwidth;
/* Layer specific coding parameters */
@@ -320,7 +320,6 @@ typedef struct VP8_COMP
YV12_BUFFER_CONFIG scaled_source;
YV12_BUFFER_CONFIG *last_frame_unscaled_source;
unsigned int frames_till_alt_ref_frame;
/* frame in src_buffers has been identified to be encoded as an alt ref */
int source_alt_ref_pending;
/* an alt ref frame has been encoded and is usable */
@@ -370,7 +369,6 @@ typedef struct VP8_COMP
double key_frame_rate_correction_factor;
double gf_rate_correction_factor;
unsigned int frames_since_golden;
/* Count down till next GF */
int frames_till_gf_update_due;
@@ -403,7 +401,7 @@ typedef struct VP8_COMP
/* Minimum allocation that should be used for any frame */
int min_frame_bandwidth;
int inter_frame_target;
double output_framerate;
double output_frame_rate;
int64_t last_time_stamp_seen;
int64_t last_end_time_stamp_seen;
int64_t first_time_stamp_ever;
@@ -417,8 +415,8 @@ typedef struct VP8_COMP
int buffered_mode;
double framerate;
double ref_framerate;
double frame_rate;
double ref_frame_rate;
int64_t buffer_level;
int64_t bits_off_target;

View File

@@ -234,7 +234,7 @@ void vp8_save_coding_context(VP8_COMP *cpi)
cc->frames_since_key = cpi->frames_since_key;
cc->filter_level = cpi->common.filter_level;
cc->frames_till_gf_update_due = cpi->frames_till_gf_update_due;
cc->frames_since_golden = cpi->frames_since_golden;
cc->frames_since_golden = cpi->common.frames_since_golden;
vp8_copy(cc->mvc, cpi->common.fc.mvc);
vp8_copy(cc->mvcosts, cpi->rd_costs.mvcosts);
@@ -271,7 +271,7 @@ void vp8_restore_coding_context(VP8_COMP *cpi)
cpi->frames_since_key = cc->frames_since_key;
cpi->common.filter_level = cc->filter_level;
cpi->frames_till_gf_update_due = cc->frames_till_gf_update_due;
cpi->frames_since_golden = cc->frames_since_golden;
cpi->common.frames_since_golden = cc->frames_since_golden;
vp8_copy(cpi->common.fc.mvc, cc->mvc);
@@ -388,7 +388,7 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
int initial_boost = 32; /* |3.0 * per_frame_bandwidth| */
/* Boost depends somewhat on frame rate: only used for 1 layer case. */
if (cpi->oxcf.number_of_layers == 1) {
kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
kf_boost = MAX(initial_boost, (int)(2 * cpi->output_frame_rate - 16));
}
else {
/* Initial factor: set target size to: |3.0 * per_frame_bandwidth|. */
@@ -399,9 +399,9 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
kf_boost = kf_boost * kf_boost_qadjustment[Q] / 100;
/* frame separation adjustment ( down) */
if (cpi->frames_since_key < cpi->output_framerate / 2)
if (cpi->frames_since_key < cpi->output_frame_rate / 2)
kf_boost = (int)(kf_boost
* cpi->frames_since_key / (cpi->output_framerate / 2));
* cpi->frames_since_key / (cpi->output_frame_rate / 2));
/* Minimal target size is |2* per_frame_bandwidth|. */
if (kf_boost < 16)
@@ -715,7 +715,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
if (Adjustment > (cpi->this_frame_target - min_frame_target))
Adjustment = (cpi->this_frame_target - min_frame_target);
if (cpi->frames_since_golden == (cpi->current_gf_interval >> 1))
if (cpi->common.frames_since_golden == (cpi->current_gf_interval >> 1))
cpi->this_frame_target += ((cpi->current_gf_interval - 1) * Adjustment);
else
cpi->this_frame_target -= Adjustment;
@@ -1360,7 +1360,7 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi)
* whichever is smaller.
*/
int key_freq = cpi->oxcf.key_freq>0 ? cpi->oxcf.key_freq : 1;
av_key_frame_frequency = 1 + (int)cpi->output_framerate * 2;
av_key_frame_frequency = 1 + (int)cpi->output_frame_rate * 2;
if (cpi->oxcf.auto_key && av_key_frame_frequency > key_freq)
av_key_frame_frequency = key_freq;

View File

@@ -341,7 +341,7 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue)
void vp8_auto_select_speed(VP8_COMP *cpi)
{
int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
int milliseconds_for_compress = (int)(1000000 / cpi->frame_rate);
milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;

View File

@@ -66,6 +66,7 @@ VP8_COMMON_SRCS-yes += common/setupintrarecon.c
VP8_COMMON_SRCS-yes += common/swapyv12buffer.c
VP8_COMMON_SRCS-yes += common/variance_c.c
VP8_COMMON_SRCS-yes += common/variance.h
VP8_COMMON_SRCS-yes += common/vp8_asm_com_offsets.c
VP8_COMMON_SRCS-yes += common/vp8_entropymodedata.h
@@ -191,4 +192,7 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance8x8_neon$(A
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
$(eval $(call asm_offsets_template,\
vp8_asm_com_offsets.asm, $(VP8_PREFIX)common/vp8_asm_com_offsets.c))
$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh))

View File

@@ -695,6 +695,7 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
yv12->uv_stride = img->stride[VPX_PLANE_U];
yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12);
return res;
}
@@ -1078,7 +1079,11 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx)
ctx->preview_img.planes[VPX_PLANE_U] = sd.u_buffer;
ctx->preview_img.planes[VPX_PLANE_V] = sd.v_buffer;
ctx->preview_img.fmt = VPX_IMG_FMT_I420;
if (sd.clrtype == REG_YUV)
ctx->preview_img.fmt = VPX_IMG_FMT_I420;
else
ctx->preview_img.fmt = VPX_IMG_FMT_VPXI420;
ctx->preview_img.x_chroma_shift = 1;
ctx->preview_img.y_chroma_shift = 1;

View File

@@ -41,6 +41,15 @@ typedef enum
static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t);
typedef struct
{
unsigned int id;
unsigned long sz;
unsigned int align;
unsigned int flags;
unsigned long(*calc_sz)(const vpx_codec_dec_cfg_t *, vpx_codec_flags_t);
} mem_req_t;
static const mem_req_t vp8_mem_req_segs[] =
{
{VP8_SEG_ALG_PRIV, 0, 8, VPX_CODEC_MEM_ZERO, vp8_priv_sz},
@@ -84,6 +93,65 @@ static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_
return sizeof(vpx_codec_alg_priv_t);
}
static void vp8_mmap_dtor(vpx_codec_mmap_t *mmap)
{
free(mmap->priv);
}
static vpx_codec_err_t vp8_mmap_alloc(vpx_codec_mmap_t *mmap)
{
vpx_codec_err_t res;
unsigned int align;
align = mmap->align ? mmap->align - 1 : 0;
if (mmap->flags & VPX_CODEC_MEM_ZERO)
mmap->priv = calloc(1, mmap->sz + align);
else
mmap->priv = malloc(mmap->sz + align);
res = (mmap->priv) ? VPX_CODEC_OK : VPX_CODEC_MEM_ERROR;
mmap->base = (void *)((((uintptr_t)mmap->priv) + align) & ~(uintptr_t)align);
mmap->dtor = vp8_mmap_dtor;
return res;
}
static vpx_codec_err_t vp8_validate_mmaps(const vp8_stream_info_t *si,
const vpx_codec_mmap_t *mmaps,
vpx_codec_flags_t init_flags)
{
int i;
vpx_codec_err_t res = VPX_CODEC_OK;
for (i = 0; i < NELEMENTS(vp8_mem_req_segs) - 1; i++)
{
/* Ensure the segment has been allocated */
if (!mmaps[i].base)
{
res = VPX_CODEC_MEM_ERROR;
break;
}
/* Verify variable size segment is big enough for the current si. */
if (vp8_mem_req_segs[i].calc_sz)
{
vpx_codec_dec_cfg_t cfg;
cfg.w = si->w;
cfg.h = si->h;
if (mmaps[i].sz < vp8_mem_req_segs[i].calc_sz(&cfg, init_flags))
{
res = VPX_CODEC_MEM_ERROR;
break;
}
}
}
return res;
}
static void vp8_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap)
{
int i;
@@ -110,6 +178,16 @@ static void vp8_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap)
}
}
static void *mmap_lkup(vpx_codec_alg_priv_t *ctx, unsigned int id)
{
int i;
for (i = 0; i < NELEMENTS(ctx->mmaps); i++)
if (ctx->mmaps[i].id == id)
return ctx->mmaps[i].base;
return NULL;
}
static void vp8_finalize_mmaps(vpx_codec_alg_priv_t *ctx)
{
/* nothing to clean up */
@@ -136,7 +214,7 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
mmap.align = vp8_mem_req_segs[0].align;
mmap.flags = vp8_mem_req_segs[0].flags;
res = vpx_mmap_alloc(&mmap);
res = vp8_mmap_alloc(&mmap);
if (res != VPX_CODEC_OK) return res;
vp8_init_ctx(ctx, &mmap);
@@ -288,7 +366,8 @@ static void yuvconfig2image(vpx_image_t *img,
* the Y, U, and V planes, nor other alignment adjustments that
* might be representable by a YV12_BUFFER_CONFIG, so we just
* initialize all the fields.*/
img->fmt = VPX_IMG_FMT_I420;
img->fmt = yv12->clrtype == REG_YUV ?
VPX_IMG_FMT_I420 : VPX_IMG_FMT_VPXI420;
img->w = yv12->y_stride;
img->h = (yv12->y_height + 2 * VP8BORDERINPIXELS + 15) & ~15;
img->d_w = yv12->y_width;
@@ -409,7 +488,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
ctx->mmaps[i].sz = vp8_mem_req_segs[i].calc_sz(&cfg,
ctx->base.init_flags);
res = vpx_mmap_alloc(&ctx->mmaps[i]);
res = vp8_mmap_alloc(&ctx->mmaps[i]);
}
if (!res)
@@ -421,9 +500,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
/* Initialize the decoder instance on the first frame*/
if (!res && !ctx->decoder_init)
{
res = vpx_validate_mmaps(&ctx->si, ctx->mmaps,
vp8_mem_req_segs, NELEMENTS(vp8_mem_req_segs),
ctx->base.init_flags);
res = vp8_validate_mmaps(&ctx->si, ctx->mmaps, ctx->base.init_flags);
if (!res)
{
@@ -720,6 +797,8 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
yv12->uv_stride = img->stride[VPX_PLANE_U];
yv12->border = (img->stride[VPX_PLANE_Y] - img->d_w) / 2;
yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12);
return res;
}

View File

@@ -35,5 +35,9 @@ VP8_DX_SRCS-yes += decoder/onyxd_int.h
VP8_DX_SRCS-yes += decoder/treereader.h
VP8_DX_SRCS-yes += decoder/onyxd_if.c
VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c
VP8_DX_SRCS-yes += decoder/vp8_asm_dec_offsets.c
VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes))
$(eval $(call asm_offsets_template,\
vp8_asm_dec_offsets.asm, $(VP8_PREFIX)decoder/vp8_asm_dec_offsets.c))

View File

@@ -18,7 +18,6 @@
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <time.h>
#define VPX_CODEC_DISABLE_COMPAT 1
#include "vpx/vpx_encoder.h"
#include "vpx/vp8cx.h"
@@ -138,8 +137,6 @@ int main(int argc, char **argv) {
int layer_flags[VPX_TS_MAX_PERIODICITY] = {0};
int flag_periodicity;
int max_intra_size_pct;
clock_t before;
clock_t after;
/* Check usage and arguments */
if (argc < 9)
@@ -642,7 +639,6 @@ int main(int argc, char **argv) {
vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT,
max_intra_size_pct);
before = clock();
frame_avail = 1;
while (frame_avail || got_data) {
vpx_codec_iter_t iter = NULL;
@@ -664,8 +660,8 @@ int main(int argc, char **argv) {
got_data = 1;
switch (pkt->kind) {
case VPX_CODEC_CX_FRAME_PKT:
for (i = cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
i < cfg.ts_number_layers; i++)
for (i=cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
i<cfg.ts_number_layers; i++)
{
write_ivf_frame_header(outfile[i], pkt);
(void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
@@ -680,13 +676,9 @@ int main(int argc, char **argv) {
frame_cnt++;
pts += frame_duration;
}
after = clock();
printf("Processed %d frames in %ld ms.\n", frame_cnt-1,
(int) (after - before) / (CLOCKS_PER_SEC / 1000));
fclose (infile);
printf ("Processed %d frames.\n",frame_cnt-1);
if (vpx_codec_destroy(&codec))
die_codec (&codec, "Failed to destroy codec");

View File

@@ -1,258 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
; These functions are only valid when:
; x_step_q4 == 16
; w%4 == 0
; h%4 == 0
; taps == 8
; VP9_FILTER_WEIGHT == 128
; VP9_FILTER_SHIFT == 7
EXPORT |vp9_convolve8_avg_horiz_neon|
EXPORT |vp9_convolve8_avg_vert_neon|
IMPORT |vp9_convolve8_avg_horiz_c|
IMPORT |vp9_convolve8_avg_vert_c|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
; Multiply and accumulate by q0
MACRO
MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7
vmull.s16 $dst, $src0, d0[0]
vmlal.s16 $dst, $src1, d0[1]
vmlal.s16 $dst, $src2, d0[2]
vmlal.s16 $dst, $src3, d0[3]
vmlal.s16 $dst, $src4, d1[0]
vmlal.s16 $dst, $src5, d1[1]
vmlal.s16 $dst, $src6, d1[2]
vmlal.s16 $dst, $src7, d1[3]
MEND
; r0 const uint8_t *src
; r1 int src_stride
; r2 uint8_t *dst
; r3 int dst_stride
; sp[]const int16_t *filter_x
; sp[]int x_step_q4
; sp[]const int16_t *filter_y ; unused
; sp[]int y_step_q4 ; unused
; sp[]int w
; sp[]int h
|vp9_convolve8_avg_horiz_neon| PROC
ldr r12, [sp, #4] ; x_step_q4
cmp r12, #16
bne vp9_convolve8_avg_horiz_c
push {r4-r10, lr}
sub r0, r0, #3 ; adjust for taps
ldr r5, [sp, #32] ; filter_x
ldr r6, [sp, #48] ; w
ldr r7, [sp, #52] ; h
vld1.s16 {q0}, [r5] ; filter_x
add r8, r1, r1, lsl #1 ; src_stride * 3
add r8, r8, #4 ; src_stride * 3 + 4
rsb r8, r8, #0 ; reset for src
add r4, r3, r3, lsl #1 ; dst_stride * 3
sub r4, r4, #4 ; dst_stride * 3 - 4
rsb r4, r4, #0 ; reset for dst
sub r9, r1, #8 ; post increment for src load
rsb r1, r6, r1, lsl #2 ; reset src for outer loop
rsb r12, r6, r3, lsl #2 ; reset dst for outer loop
mov r10, r6 ; w loop counter
loop_horiz
vld1.8 {d24}, [r0]!
vld3.u8 {d28[0], d29[0], d30[0]}, [r0], r9
vld1.8 {d25}, [r0]!
vld3.u8 {d28[1], d29[1], d30[1]}, [r0], r9
vld1.8 {d26}, [r0]!
vld3.u8 {d28[2], d29[2], d30[2]}, [r0], r9
vld1.8 {d27}, [r0]!
vld3.u8 {d28[3], d29[3], d30[3]}, [r0], r8
vtrn.16 q12, q13
vtrn.8 d24, d25
vtrn.8 d26, d27
; extract to s16
vmovl.u8 q8, d24
vmovl.u8 q9, d25
vmovl.u8 q10, d26
vmovl.u8 q11, d27
vtrn.32 d28, d29 ; only the first half is populated
vmovl.u8 q12, d28
vmovl.u8 q13, d30
; slightly out of order load to match the existing data
vld1.u32 {d6[0]}, [r2], r3
vld1.u32 {d7[0]}, [r2], r3
vld1.u32 {d6[1]}, [r2], r3
vld1.u32 {d7[1]}, [r2], r3
sub r2, r2, r3, lsl #2 ; reset for store
; src[] * filter_x
MULTIPLY_BY_Q0 q1, d16, d18, d20, d22, d17, d19, d21, d23
MULTIPLY_BY_Q0 q2, d18, d20, d22, d17, d19, d21, d23, d24
MULTIPLY_BY_Q0 q14, d20, d22, d17, d19, d21, d23, d24, d25
MULTIPLY_BY_Q0 q15, d22, d17, d19, d21, d23, d24, d25, d26
; += 64 >> 7
vqrshrun.s32 d2, q1, #7
vqrshrun.s32 d3, q2, #7
vqrshrun.s32 d4, q14, #7
vqrshrun.s32 d5, q15, #7
; saturate
vqmovn.u16 d2, q1
vqmovn.u16 d3, q2
; transpose
vtrn.16 d2, d3
vtrn.32 d2, d3
vtrn.8 d2, d3
; average the new value and the dst value
vrhadd.u8 q1, q1, q3
vst1.u32 {d2[0]}, [r2], r3
vst1.u32 {d3[0]}, [r2], r3
vst1.u32 {d2[1]}, [r2], r3
vst1.u32 {d3[1]}, [r2], r4
subs r6, r6, #4 ; w -= 4
bgt loop_horiz
; outer loop
mov r6, r10 ; restore w counter
add r0, r0, r1 ; src += src_stride * 4 - w
add r2, r2, r12 ; dst += dst_stride * 4 - w
subs r7, r7, #4 ; h -= 4
bgt loop_horiz
pop {r4-r10, pc}
ENDP
|vp9_convolve8_avg_vert_neon| PROC
ldr r12, [sp, #12]
cmp r12, #16
bne vp9_convolve8_avg_vert_c
push {r4-r10, lr}
; adjust for taps
sub r0, r0, r1
sub r0, r0, r1, lsl #1
ldr r7, [sp, #40] ; filter_y
ldr r8, [sp, #48] ; w
ldr r9, [sp, #52] ; h
vld1.s16 {q0}, [r7] ; filter_y
mov r5, r1, lsl #1 ; src_stride * 2
add r5, r5, r1, lsl #3 ; src_stride * 10
sub r5, r5, #4 ; src_stride * 10 + 4
rsb r5, r5, #0 ; reset for src
add r6, r3, r3, lsl #1 ; dst_stride * 3
sub r6, r6, #4 ; dst_stride * 3 - 4
rsb r6, r6, #0 ; reset for dst
rsb r7, r8, r1, lsl #2 ; reset src for outer loop
rsb r12, r8, r3, lsl #2 ; reset dst for outer loop
mov r10, r8 ; w loop counter
loop_vert
; always process a 4x4 block at a time
vld1.u32 {d16[0]}, [r0], r1
vld1.u32 {d16[1]}, [r0], r1
vld1.u32 {d18[0]}, [r0], r1
vld1.u32 {d18[1]}, [r0], r1
vld1.u32 {d20[0]}, [r0], r1
vld1.u32 {d20[1]}, [r0], r1
vld1.u32 {d22[0]}, [r0], r1
vld1.u32 {d22[1]}, [r0], r1
vld1.u32 {d24[0]}, [r0], r1
vld1.u32 {d24[1]}, [r0], r1
vld1.u32 {d26[0]}, [r0], r5
; extract to s16
vmovl.u8 q8, d16
vmovl.u8 q9, d18
vmovl.u8 q10, d20
vmovl.u8 q11, d22
vmovl.u8 q12, d24
vmovl.u8 q13, d26
vld1.u32 {d6[0]}, [r2], r3
vld1.u32 {d6[1]}, [r2], r3
vld1.u32 {d7[0]}, [r2], r3
vld1.u32 {d7[1]}, [r2], r3
sub r2, r2, r3, lsl #2 ; reset for store
; src[] * filter_y
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d23
MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d23, d24
MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d23, d24, d25
MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d23, d24, d25, d26
; += 64 >> 7
vqrshrun.s32 d2, q1, #7
vqrshrun.s32 d3, q2, #7
vqrshrun.s32 d4, q14, #7
vqrshrun.s32 d5, q15, #7
; saturate
vqmovn.u16 d2, q1
vqmovn.u16 d3, q2
; average the new value and the dst value
vrhadd.u8 q1, q1, q3
vst1.u32 {d2[0]}, [r2], r3
vst1.u32 {d2[1]}, [r2], r3
vst1.u32 {d3[0]}, [r2], r3
vst1.u32 {d3[1]}, [r2], r6
subs r8, r8, #4 ; w -= 4
bgt loop_vert
; outer loop
mov r8, r10 ; restore w counter
add r0, r0, r7 ; src += 4 * src_stride - w
add r2, r2, r12 ; dst += 4 * dst_stride - w
subs r9, r9, #4 ; h -= 4
bgt loop_vert
pop {r4-r10, pc}
ENDP
END

View File

@@ -1,237 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
; These functions are only valid when:
; x_step_q4 == 16
; w%4 == 0
; h%4 == 0
; taps == 8
; VP9_FILTER_WEIGHT == 128
; VP9_FILTER_SHIFT == 7
EXPORT |vp9_convolve8_horiz_neon|
EXPORT |vp9_convolve8_vert_neon|
IMPORT |vp9_convolve8_horiz_c|
IMPORT |vp9_convolve8_vert_c|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
; Multiply and accumulate by q0
MACRO
MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7
vmull.s16 $dst, $src0, d0[0]
vmlal.s16 $dst, $src1, d0[1]
vmlal.s16 $dst, $src2, d0[2]
vmlal.s16 $dst, $src3, d0[3]
vmlal.s16 $dst, $src4, d1[0]
vmlal.s16 $dst, $src5, d1[1]
vmlal.s16 $dst, $src6, d1[2]
vmlal.s16 $dst, $src7, d1[3]
MEND
; r0 const uint8_t *src
; r1 int src_stride
; r2 uint8_t *dst
; r3 int dst_stride
; sp[]const int16_t *filter_x
; sp[]int x_step_q4
; sp[]const int16_t *filter_y ; unused
; sp[]int y_step_q4 ; unused
; sp[]int w
; sp[]int h
|vp9_convolve8_horiz_neon| PROC
ldr r12, [sp, #4] ; x_step_q4
cmp r12, #16
bne vp9_convolve8_horiz_c
push {r4-r10, lr}
sub r0, r0, #3 ; adjust for taps
ldr r5, [sp, #32] ; filter_x
ldr r6, [sp, #48] ; w
ldr r7, [sp, #52] ; h
vld1.s16 {q0}, [r5] ; filter_x
add r8, r1, r1, lsl #1 ; src_stride * 3
add r8, r8, #4 ; src_stride * 3 + 4
rsb r8, r8, #0 ; reset for src
add r4, r3, r3, lsl #1 ; dst_stride * 3
sub r4, r4, #4 ; dst_stride * 3 - 4
rsb r4, r4, #0 ; reset for dst
sub r9, r1, #8 ; post increment for src load
rsb r1, r6, r1, lsl #2 ; reset src for outer loop
rsb r12, r6, r3, lsl #2 ; reset dst for outer loop
mov r10, r6 ; w loop counter
loop_horiz
vld1.8 {d24}, [r0]!
vld3.u8 {d28[0], d29[0], d30[0]}, [r0], r9
vld1.8 {d25}, [r0]!
vld3.u8 {d28[1], d29[1], d30[1]}, [r0], r9
vld1.8 {d26}, [r0]!
vld3.u8 {d28[2], d29[2], d30[2]}, [r0], r9
vld1.8 {d27}, [r0]!
vld3.u8 {d28[3], d29[3], d30[3]}, [r0], r8
vtrn.16 q12, q13
vtrn.8 d24, d25
vtrn.8 d26, d27
; extract to s16
vmovl.u8 q8, d24
vmovl.u8 q9, d25
vmovl.u8 q10, d26
vmovl.u8 q11, d27
vtrn.32 d28, d29 ; only the first half is populated
vmovl.u8 q12, d28
vmovl.u8 q13, d30
; src[] * filter_x
MULTIPLY_BY_Q0 q1, d16, d18, d20, d22, d17, d19, d21, d23
MULTIPLY_BY_Q0 q2, d18, d20, d22, d17, d19, d21, d23, d24
MULTIPLY_BY_Q0 q14, d20, d22, d17, d19, d21, d23, d24, d25
MULTIPLY_BY_Q0 q15, d22, d17, d19, d21, d23, d24, d25, d26
; += 64 >> 7
vqrshrun.s32 d2, q1, #7
vqrshrun.s32 d3, q2, #7
vqrshrun.s32 d4, q14, #7
vqrshrun.s32 d5, q15, #7
; saturate
vqmovn.u16 d2, q1
vqmovn.u16 d3, q2
; transpose
vtrn.16 d2, d3
vtrn.32 d2, d3
vtrn.8 d2, d3
vst1.u32 {d2[0]}, [r2], r3
vst1.u32 {d3[0]}, [r2], r3
vst1.u32 {d2[1]}, [r2], r3
vst1.u32 {d3[1]}, [r2], r4
subs r6, r6, #4 ; w -= 4
bgt loop_horiz
; outer loop
mov r6, r10 ; restore w counter
add r0, r0, r1 ; src += src_stride * 4 - w
add r2, r2, r12 ; dst += dst_stride * 4 - w
subs r7, r7, #4 ; h -= 4
bgt loop_horiz
pop {r4-r10, pc}
ENDP
|vp9_convolve8_vert_neon| PROC
ldr r12, [sp, #12]
cmp r12, #16
bne vp9_convolve8_vert_c
push {r4-r10, lr}
; adjust for taps
sub r0, r0, r1
sub r0, r0, r1, lsl #1
ldr r7, [sp, #40] ; filter_y
ldr r8, [sp, #48] ; w
ldr r9, [sp, #52] ; h
vld1.s16 {q0}, [r7] ; filter_y
mov r5, r1, lsl #1 ; src_stride * 2
add r5, r5, r1, lsl #3 ; src_stride * 10
sub r5, r5, #4 ; src_stride * 10 + 4
rsb r5, r5, #0 ; reset for src
add r6, r3, r3, lsl #1 ; dst_stride * 3
sub r6, r6, #4 ; dst_stride * 3 - 4
rsb r6, r6, #0 ; reset for dst
rsb r7, r8, r1, lsl #2 ; reset src for outer loop
rsb r12, r8, r3, lsl #2 ; reset dst for outer loop
mov r10, r8 ; w loop counter
loop_vert
; always process a 4x4 block at a time
vld1.u32 {d16[0]}, [r0], r1
vld1.u32 {d16[1]}, [r0], r1
vld1.u32 {d18[0]}, [r0], r1
vld1.u32 {d18[1]}, [r0], r1
vld1.u32 {d20[0]}, [r0], r1
vld1.u32 {d20[1]}, [r0], r1
vld1.u32 {d22[0]}, [r0], r1
vld1.u32 {d22[1]}, [r0], r1
vld1.u32 {d24[0]}, [r0], r1
vld1.u32 {d24[1]}, [r0], r1
vld1.u32 {d26[0]}, [r0], r5
; extract to s16
vmovl.u8 q8, d16
vmovl.u8 q9, d18
vmovl.u8 q10, d20
vmovl.u8 q11, d22
vmovl.u8 q12, d24
vmovl.u8 q13, d26
; src[] * filter_y
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d23
MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d23, d24
MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d23, d24, d25
MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d23, d24, d25, d26
; += 64 >> 7
vqrshrun.s32 d2, q1, #7
vqrshrun.s32 d3, q2, #7
vqrshrun.s32 d4, q14, #7
vqrshrun.s32 d5, q15, #7
; saturate
vqmovn.u16 d2, q1
vqmovn.u16 d3, q2
vst1.u32 {d2[0]}, [r2], r3
vst1.u32 {d2[1]}, [r2], r3
vst1.u32 {d3[0]}, [r2], r3
vst1.u32 {d3[1]}, [r2], r6
subs r8, r8, #4 ; w -= 4
bgt loop_vert
; outer loop
mov r8, r10 ; restore w counter
add r0, r0, r7 ; src += 4 * src_stride - w
add r2, r2, r12 ; dst += 4 * dst_stride - w
subs r9, r9, #4 ; h -= 4
bgt loop_vert
pop {r4-r10, pc}
ENDP
END

View File

@@ -1,77 +0,0 @@
/*
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_common.h"
void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
/* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
* maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
*/
uint8_t temp[64 * 72];
// Account for the vertical phase needing 3 lines prior and 4 lines post
int intermediate_height = h + 7;
if (x_step_q4 != 16 || y_step_q4 != 16)
return vp9_convolve8_c(src, src_stride,
dst, dst_stride,
filter_x, x_step_q4,
filter_y, y_step_q4,
w, h);
/* Filter starting 3 lines back. The neon implementation will ignore the
* given height and filter a multiple of 4 lines. Since this goes in to
* the temp buffer which has lots of extra room and is subsequently discarded
* this is safe if somewhat less than ideal.
*/
vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
temp, 64,
filter_x, x_step_q4, filter_y, y_step_q4,
w, intermediate_height);
/* Step into the temp buffer 3 lines to get the actual frame data */
vp9_convolve8_vert_neon(temp + 64 * 3, 64,
dst, dst_stride,
filter_x, x_step_q4, filter_y, y_step_q4,
w, h);
}
void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
uint8_t temp[64 * 72];
int intermediate_height = h + 7;
if (x_step_q4 != 16 || y_step_q4 != 16)
return vp9_convolve8_avg_c(src, src_stride,
dst, dst_stride,
filter_x, x_step_q4,
filter_y, y_step_q4,
w, h);
/* This implementation has the same issues as above. In addition, we only want
* to average the values after both passes.
*/
vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
temp, 64,
filter_x, x_step_q4, filter_y, y_step_q4,
w, intermediate_height);
vp9_convolve8_avg_vert_neon(temp + 64 * 3,
64, dst, dst_stride,
filter_x, x_step_q4, filter_y, y_step_q4,
w, h);
}

View File

@@ -1,69 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license and patent
; grant that can be found in the LICENSE file in the root of the source
; tree. All contributing project authors may be found in the AUTHORS
; file in the root of the source tree.
;
EXPORT |vp9_dc_only_idct_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
;void vp9_dc_only_idct_add_neon(int input_dc, uint8_t *pred_ptr,
; uint8_t *dst_ptr, int pitch, int stride)
;
; r0 int input_dc
; r1 uint8_t *pred_ptr
; r2 uint8_t *dst_ptr
; r3 int pitch
; sp int stride
|vp9_dc_only_idct_add_neon| PROC
; generate cospi_16_64 = 11585
mov r12, #0x2d00
add r12, #0x41
; dct_const_round_shift(input_dc * cospi_16_64)
mul r0, r0, r12 ; input_dc * cospi_16_64
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
asr r0, r0, #14 ; >> DCT_CONST_BITS
; dct_const_round_shift(out * cospi_16_64)
mul r0, r0, r12 ; out * cospi_16_64
add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
asr r0, r0, #14 ; >> DCT_CONST_BITS
; ROUND_POWER_OF_TWO(out, 4)
add r0, r0, #8 ; + (1 <<((4) - 1))
asr r0, r0, #4 ; >> 4
vdup.16 q0, r0; ; duplicate a1
ldr r12, [sp] ; load stride
vld1.32 {d2[0]}, [r1], r3
vld1.32 {d2[1]}, [r1], r3
vld1.32 {d4[0]}, [r1], r3
vld1.32 {d4[1]}, [r1]
vaddw.u8 q1, q0, d2 ; a1 + pred_ptr[c]
vaddw.u8 q2, q0, d4
vqmovun.s16 d2, q1 ; clip_pixel
vqmovun.s16 d4, q2
vst1.32 {d2[0]}, [r2], r12
vst1.32 {d2[1]}, [r2], r12
vst1.32 {d4[0]}, [r2], r12
vst1.32 {d4[1]}, [r2]
bx lr
ENDP ; |vp9_dc_only_idct_add_neon|
END

View File

@@ -1,708 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_loop_filter_horizontal_edge_neon|
EXPORT |vp9_loop_filter_vertical_edge_neon|
EXPORT |vp9_mbloop_filter_horizontal_edge_neon|
EXPORT |vp9_mbloop_filter_vertical_edge_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
; void vp9_loop_filter_horizontal_edge_neon(uint8_t *s,
; int p /* pitch */,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh,
; int count)
;
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
|vp9_loop_filter_horizontal_edge_neon| PROC
push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
ldr r12, [sp, #8] ; load count
ldr r2, [sp, #4] ; load thresh
add r1, r1, r1 ; double pitch
cmp r12, #0
beq end_vp9_lf_h_edge
vld1.8 {d1[]}, [r3] ; duplicate *limit
vld1.8 {d2[]}, [r2] ; duplicate *thresh
count_lf_h_loop
sub r2, r0, r1, lsl #1 ; move src pointer down by 4 lines
add r3, r2, r1, lsr #1 ; set to 3 lines down
vld1.u8 {d3}, [r2@64], r1 ; p3
vld1.u8 {d4}, [r3@64], r1 ; p2
vld1.u8 {d5}, [r2@64], r1 ; p1
vld1.u8 {d6}, [r3@64], r1 ; p0
vld1.u8 {d7}, [r2@64], r1 ; q0
vld1.u8 {d16}, [r3@64], r1 ; q1
vld1.u8 {d17}, [r2@64] ; q2
vld1.u8 {d18}, [r3@64] ; q3
sub r2, r2, r1, lsl #1
sub r3, r3, r1, lsl #1
bl vp9_loop_filter_neon
vst1.u8 {d4}, [r2@64], r1 ; store op1
vst1.u8 {d5}, [r3@64], r1 ; store op0
vst1.u8 {d6}, [r2@64], r1 ; store oq0
vst1.u8 {d7}, [r3@64], r1 ; store oq1
add r0, r0, #8
subs r12, r12, #1
bne count_lf_h_loop
end_vp9_lf_h_edge
pop {pc}
ENDP ; |vp9_loop_filter_horizontal_edge_neon|
; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
; void vp9_loop_filter_vertical_edge_neon(uint8_t *s,
; int p /* pitch */,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh,
; int count)
;
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
|vp9_loop_filter_vertical_edge_neon| PROC
push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
ldr r12, [sp, #8] ; load count
vld1.8 {d1[]}, [r3] ; duplicate *limit
ldr r3, [sp, #4] ; load thresh
sub r2, r0, #4 ; move s pointer down by 4 columns
cmp r12, #0
beq end_vp9_lf_v_edge
vld1.8 {d2[]}, [r3] ; duplicate *thresh
count_lf_v_loop
vld1.u8 {d3}, [r2], r1 ; load s data
vld1.u8 {d4}, [r2], r1
vld1.u8 {d5}, [r2], r1
vld1.u8 {d6}, [r2], r1
vld1.u8 {d7}, [r2], r1
vld1.u8 {d16}, [r2], r1
vld1.u8 {d17}, [r2], r1
vld1.u8 {d18}, [r2]
;transpose to 8x16 matrix
vtrn.32 d3, d7
vtrn.32 d4, d16
vtrn.32 d5, d17
vtrn.32 d6, d18
vtrn.16 d3, d5
vtrn.16 d4, d6
vtrn.16 d7, d17
vtrn.16 d16, d18
vtrn.8 d3, d4
vtrn.8 d5, d6
vtrn.8 d7, d16
vtrn.8 d17, d18
bl vp9_loop_filter_neon
sub r0, r0, #2
;store op1, op0, oq0, oq1
vst4.8 {d4[0], d5[0], d6[0], d7[0]}, [r0], r1
vst4.8 {d4[1], d5[1], d6[1], d7[1]}, [r0], r1
vst4.8 {d4[2], d5[2], d6[2], d7[2]}, [r0], r1
vst4.8 {d4[3], d5[3], d6[3], d7[3]}, [r0], r1
vst4.8 {d4[4], d5[4], d6[4], d7[4]}, [r0], r1
vst4.8 {d4[5], d5[5], d6[5], d7[5]}, [r0], r1
vst4.8 {d4[6], d5[6], d6[6], d7[6]}, [r0], r1
vst4.8 {d4[7], d5[7], d6[7], d7[7]}, [r0]
add r0, r0, r1, lsl #3 ; s += pitch * 8
subs r12, r12, #1
subne r2, r0, #4 ; move s pointer down by 4 columns
bne count_lf_v_loop
end_vp9_lf_v_edge
pop {pc}
ENDP ; |vp9_loop_filter_vertical_edge_neon|
; void vp9_loop_filter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store. The function does not use
; registers d8-d15.
;
; Inputs:
; r0-r3, r12 PRESERVE
; d0 blimit
; d1 limit
; d2 thresh
; d3 p3
; d4 p2
; d5 p1
; d6 p0
; d7 q0
; d16 q1
; d17 q2
; d18 q3
;
; Outputs:
; d4 op1
; d5 op0
; d6 oq0
; d7 oq1
|vp9_loop_filter_neon| PROC
; filter_mask
vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2)
vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1)
vabd.u8 d21, d5, d6 ; m3 = abs(p1 - p0)
vabd.u8 d22, d16, d7 ; m4 = abs(q1 - q0)
vabd.u8 d3, d17, d16 ; m5 = abs(q2 - q1)
vabd.u8 d4, d18, d17 ; m6 = abs(q3 - q2)
; only compare the largest value to limit
vmax.u8 d19, d19, d20 ; m1 = max(m1, m2)
vmax.u8 d20, d21, d22 ; m2 = max(m3, m4)
vabd.u8 d17, d6, d7 ; abs(p0 - q0)
vmax.u8 d3, d3, d4 ; m3 = max(m5, m6)
vmov.u8 d18, #0x80
vmax.u8 d23, d19, d20 ; m1 = max(m1, m2)
; hevmask
vcgt.u8 d21, d21, d2 ; (abs(p1 - p0) > thresh)*-1
vcgt.u8 d22, d22, d2 ; (abs(q1 - q0) > thresh)*-1
vmax.u8 d23, d23, d3 ; m1 = max(m1, m3)
vabd.u8 d28, d5, d16 ; a = abs(p1 - q1)
vqadd.u8 d17, d17, d17 ; b = abs(p0 - q0) * 2
veor d7, d7, d18 ; qs0
vcge.u8 d23, d1, d23 ; abs(m1) > limit
; filter() function
; convert to signed
vshr.u8 d28, d28, #1 ; a = a / 2
veor d6, d6, d18 ; ps0
veor d5, d5, d18 ; ps1
vqadd.u8 d17, d17, d28 ; a = b + a
veor d16, d16, d18 ; qs1
vmov.u8 d19, #3
vsub.s8 d28, d7, d6 ; ( qs0 - ps0)
vcge.u8 d17, d0, d17 ; a > blimit
vqsub.s8 d27, d5, d16 ; filter = clamp(ps1-qs1)
vorr d22, d21, d22 ; hevmask
vmull.s8 q12, d28, d19 ; 3 * ( qs0 - ps0)
vand d27, d27, d22 ; filter &= hev
vand d23, d23, d17 ; filter_mask
vaddw.s8 q12, q12, d27 ; filter + 3 * (qs0 - ps0)
vmov.u8 d17, #4
; filter = clamp(filter + 3 * ( qs0 - ps0))
vqmovn.s16 d27, q12
vand d27, d27, d23 ; filter &= mask
vqadd.s8 d28, d27, d19 ; filter2 = clamp(filter+3)
vqadd.s8 d27, d27, d17 ; filter1 = clamp(filter+4)
vshr.s8 d28, d28, #3 ; filter2 >>= 3
vshr.s8 d27, d27, #3 ; filter1 >>= 3
vqadd.s8 d19, d6, d28 ; u = clamp(ps0 + filter2)
vqsub.s8 d26, d7, d27 ; u = clamp(qs0 - filter1)
; outer tap adjustments
vrshr.s8 d27, d27, #1 ; filter = ++filter1 >> 1
veor d6, d26, d18 ; *oq0 = u^0x80
vbic d27, d27, d22 ; filter &= ~hev
vqadd.s8 d21, d5, d27 ; u = clamp(ps1 + filter)
vqsub.s8 d20, d16, d27 ; u = clamp(qs1 - filter)
veor d5, d19, d18 ; *op0 = u^0x80
veor d4, d21, d18 ; *op1 = u^0x80
veor d7, d20, d18 ; *oq1 = u^0x80
bx lr
ENDP ; |vp9_loop_filter_neon|
; void vp9_mbloop_filter_horizontal_edge_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh,
; int count)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
|vp9_mbloop_filter_horizontal_edge_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
ldr r12, [sp, #16] ; load count
ldr r2, [sp, #12] ; load thresh
add r1, r1, r1 ; double pitch
cmp r12, #0
beq end_vp9_mblf_h_edge
vld1.8 {d1[]}, [r3] ; duplicate *limit
vld1.8 {d2[]}, [r2] ; duplicate *thresh
count_mblf_h_loop
sub r3, r0, r1, lsl #1 ; move src pointer down by 4 lines
add r2, r3, r1, lsr #1 ; set to 3 lines down
vld1.u8 {d3}, [r3@64], r1 ; p3
vld1.u8 {d4}, [r2@64], r1 ; p2
vld1.u8 {d5}, [r3@64], r1 ; p1
vld1.u8 {d6}, [r2@64], r1 ; p0
vld1.u8 {d7}, [r3@64], r1 ; q0
vld1.u8 {d16}, [r2@64], r1 ; q1
vld1.u8 {d17}, [r3@64] ; q2
vld1.u8 {d18}, [r2@64], r1 ; q3
sub r3, r3, r1, lsl #1
sub r2, r2, r1, lsl #2
bl vp9_mbloop_filter_neon
vst1.u8 {d0}, [r2@64], r1 ; store op2
vst1.u8 {d1}, [r3@64], r1 ; store op1
vst1.u8 {d2}, [r2@64], r1 ; store op0
vst1.u8 {d3}, [r3@64], r1 ; store oq0
vst1.u8 {d4}, [r2@64], r1 ; store oq1
vst1.u8 {d5}, [r3@64], r1 ; store oq2
add r0, r0, #8
subs r12, r12, #1
bne count_mblf_h_loop
end_vp9_mblf_h_edge
pop {r4-r5, pc}
ENDP ; |vp9_mbloop_filter_horizontal_edge_neon|
; void vp9_mbloop_filter_vertical_edge_neon(uint8_t *s,
; int pitch,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh,
; int count)
;
; r0 uint8_t *s,
; r1 int pitch,
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
|vp9_mbloop_filter_vertical_edge_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
ldr r12, [sp, #16] ; load count
vld1.8 {d1[]}, [r3] ; duplicate *limit
ldr r3, [sp, #12] ; load thresh
sub r2, r0, #4 ; move s pointer down by 4 columns
cmp r12, #0
beq end_vp9_mblf_v_edge
vld1.8 {d2[]}, [r3] ; duplicate *thresh
count_mblf_v_loop
vld1.u8 {d3}, [r2], r1 ; load s data
vld1.u8 {d4}, [r2], r1
vld1.u8 {d5}, [r2], r1
vld1.u8 {d6}, [r2], r1
vld1.u8 {d7}, [r2], r1
vld1.u8 {d16}, [r2], r1
vld1.u8 {d17}, [r2], r1
vld1.u8 {d18}, [r2]
;transpose to 8x16 matrix
vtrn.32 d3, d7
vtrn.32 d4, d16
vtrn.32 d5, d17
vtrn.32 d6, d18
vtrn.16 d3, d5
vtrn.16 d4, d6
vtrn.16 d7, d17
vtrn.16 d16, d18
vtrn.8 d3, d4
vtrn.8 d5, d6
vtrn.8 d7, d16
vtrn.8 d17, d18
sub r2, r0, #3
add r3, r0, #1
bl vp9_mbloop_filter_neon
;store op2, op1, op0, oq0
vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r2], r1
vst4.8 {d0[1], d1[1], d2[1], d3[1]}, [r2], r1
vst4.8 {d0[2], d1[2], d2[2], d3[2]}, [r2], r1
vst4.8 {d0[3], d1[3], d2[3], d3[3]}, [r2], r1
vst4.8 {d0[4], d1[4], d2[4], d3[4]}, [r2], r1
vst4.8 {d0[5], d1[5], d2[5], d3[5]}, [r2], r1
vst4.8 {d0[6], d1[6], d2[6], d3[6]}, [r2], r1
vst4.8 {d0[7], d1[7], d2[7], d3[7]}, [r2]
;store oq1, oq2
vst2.8 {d4[0], d5[0]}, [r3], r1
vst2.8 {d4[1], d5[1]}, [r3], r1
vst2.8 {d4[2], d5[2]}, [r3], r1
vst2.8 {d4[3], d5[3]}, [r3], r1
vst2.8 {d4[4], d5[4]}, [r3], r1
vst2.8 {d4[5], d5[5]}, [r3], r1
vst2.8 {d4[6], d5[6]}, [r3], r1
vst2.8 {d4[7], d5[7]}, [r3]
add r0, r0, r1, lsl #3 ; s += pitch * 8
subs r12, r12, #1
subne r2, r0, #4 ; move s pointer down by 4 columns
bne count_mblf_v_loop
end_vp9_mblf_v_edge
pop {r4-r5, pc}
ENDP ; |vp9_mbloop_filter_vertical_edge_neon|
; void vp9_mbloop_filter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store. The function does not use
; registers d8-d15.
;
; Inputs:
; r0-r3, r12 PRESERVE
; d0 blimit
; d1 limit
; d2 thresh
; d3 p3
; d4 p2
; d5 p1
; d6 p0
; d7 q0
; d16 q1
; d17 q2
; d18 q3
;
; Outputs:
; d0 op2
; d1 op1
; d2 op0
; d3 oq0
; d4 oq1
; d5 oq2
|vp9_mbloop_filter_neon| PROC
; filter_mask
vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2)
vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1)
vabd.u8 d21, d5, d6 ; m3 = abs(p1 - p0)
vabd.u8 d22, d16, d7 ; m4 = abs(q1 - q0)
vabd.u8 d23, d17, d16 ; m5 = abs(q2 - q1)
vabd.u8 d24, d18, d17 ; m6 = abs(q3 - q2)
; only compare the largest value to limit
vmax.u8 d19, d19, d20 ; m1 = max(m1, m2)
vmax.u8 d20, d21, d22 ; m2 = max(m3, m4)
vabd.u8 d25, d6, d4 ; m7 = abs(p0 - p2)
vmax.u8 d23, d23, d24 ; m3 = max(m5, m6)
vabd.u8 d26, d7, d17 ; m8 = abs(q0 - q2)
vmax.u8 d19, d19, d20
vabd.u8 d24, d6, d7 ; m9 = abs(p0 - q0)
vabd.u8 d27, d3, d6 ; m10 = abs(p3 - p0)
vabd.u8 d28, d18, d7 ; m11 = abs(q3 - q0)
vmax.u8 d19, d19, d23
vabd.u8 d23, d5, d16 ; a = abs(p1 - q1)
vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2
; abs () > limit
vcge.u8 d19, d1, d19
; only compare the largest value to thresh
vmax.u8 d25, d25, d26 ; m4 = max(m7, m8)
vmax.u8 d26, d27, d28 ; m5 = max(m10, m11)
vshr.u8 d23, d23, #1 ; a = a / 2
vmax.u8 d25, d25, d26 ; m4 = max(m4, m5)
vqadd.u8 d24, d24, d23 ; a = b + a
vmax.u8 d20, d20, d25 ; m2 = max(m2, m4)
vmov.u8 d23, #1
vcge.u8 d24, d0, d24 ; a > blimit
vcgt.u8 d21, d21, d2 ; (abs(p1 - p0) > thresh)*-1
vcge.u8 d20, d23, d20 ; flat
vand d19, d19, d24 ; mask
vcgt.u8 d23, d22, d2 ; (abs(q1 - q0) > thresh)*-1
vand d20, d20, d19 ; flat & mask
vmov.u8 d22, #0x80
vorr d23, d21, d23 ; hev
; This instruction will truncate the "flat & mask" masks down to 4 bits
; each to fit into one 32 bit arm register. The values are stored in
; q10.64[0].
vshrn.u16 d30, q10, #4
vmov.u32 r4, d30[0] ; flat & mask 4bits
adds r5, r4, #1 ; Check for all 1's
; If mask and flat are 1's for all vectors, then we only need to execute
; the power branch for all vectors.
beq power_branch_only
cmp r4, #0 ; Check for 0, set flag for later
; mbfilter() function
; filter() function
; convert to signed
veor d21, d7, d22 ; qs0
veor d24, d6, d22 ; ps0
veor d25, d5, d22 ; ps1
veor d26, d16, d22 ; qs1
vmov.u8 d27, #3
vsub.s8 d28, d21, d24 ; ( qs0 - ps0)
vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1)
vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0)
vand d29, d29, d23 ; filter &= hev
vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0)
vmov.u8 d29, #4
; filter = clamp(filter + 3 * ( qs0 - ps0))
vqmovn.s16 d28, q15
vand d28, d28, d19 ; filter &= mask
vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3)
vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4)
vshr.s8 d30, d30, #3 ; filter2 >>= 3
vshr.s8 d29, d29, #3 ; filter1 >>= 3
vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2)
vqsub.s8 d21, d21, d29 ; oq0 = clamp(qs0 - filter1)
; outer tap adjustments: ++filter1 >> 1
vrshr.s8 d29, d29, #1
vbic d29, d29, d23 ; filter &= ~hev
vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter)
vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter)
; If mask and flat are 0's for all vectors, then we only need to execute
; the filter branch for all vectors.
beq filter_branch_only
; If mask and flat are mixed then we must perform both branches and
; combine the data.
veor d24, d24, d22 ; *f_op0 = u^0x80
veor d21, d21, d22 ; *f_oq0 = u^0x80
veor d25, d25, d22 ; *f_op1 = u^0x80
veor d26, d26, d22 ; *f_oq1 = u^0x80
; At this point we have already executed the filter branch. The filter
; branch does not set op2 or oq2, so use p2 and q2. Execute the power
; branch and combine the data.
vmov.u8 d23, #2
vaddl.u8 q14, d6, d7 ; r_op2 = p0 + q0
vmlal.u8 q14, d3, d27 ; r_op2 += p3 * 3
vmlal.u8 q14, d4, d23 ; r_op2 += p2 * 2
vbif d0, d4, d20 ; op2 |= p2 & ~(flat & mask)
vaddw.u8 q14, d5 ; r_op2 += p1
vbif d1, d25, d20 ; op1 |= f_op1 & ~(flat & mask)
vqrshrn.u16 d30, q14, #3 ; r_op2
vsubw.u8 q14, d3 ; r_op1 = r_op2 - p3
vsubw.u8 q14, d4 ; r_op1 -= p2
vaddw.u8 q14, d5 ; r_op1 += p1
vaddw.u8 q14, d16 ; r_op1 += q1
vbif d2, d24, d20 ; op0 |= f_op0 & ~(flat & mask)
vqrshrn.u16 d31, q14, #3 ; r_op1
vsubw.u8 q14, d3 ; r_op0 = r_op1 - p3
vsubw.u8 q14, d5 ; r_op0 -= p1
vaddw.u8 q14, d6 ; r_op0 += p0
vaddw.u8 q14, d17 ; r_op0 += q2
vbit d0, d30, d20 ; op2 |= r_op2 & (flat & mask)
vqrshrn.u16 d23, q14, #3 ; r_op0
vsubw.u8 q14, d3 ; r_oq0 = r_op0 - p3
vsubw.u8 q14, d6 ; r_oq0 -= p0
vaddw.u8 q14, d7 ; r_oq0 += q0
vbit d1, d31, d20 ; op1 |= r_op1 & (flat & mask)
vaddw.u8 q14, d18 ; oq0 += q3
vbit d2, d23, d20 ; op0 |= r_op0 & (flat & mask)
vqrshrn.u16 d22, q14, #3 ; r_oq0
vsubw.u8 q14, d4 ; r_oq1 = r_oq0 - p2
vsubw.u8 q14, d7 ; r_oq1 -= q0
vaddw.u8 q14, d16 ; r_oq1 += q1
vbif d3, d21, d20 ; oq0 |= f_oq0 & ~(flat & mask)
vaddw.u8 q14, d18 ; r_oq1 += q3
vbif d4, d26, d20 ; oq1 |= f_oq1 & ~(flat & mask)
vqrshrn.u16 d6, q14, #3 ; r_oq1
vsubw.u8 q14, d5 ; r_oq2 = r_oq1 - p1
vsubw.u8 q14, d16 ; r_oq2 -= q1
vaddw.u8 q14, d17 ; r_oq2 += q2
vaddw.u8 q14, d18 ; r_oq2 += q3
vbif d5, d17, d20 ; oq2 |= q2 & ~(flat & mask)
vqrshrn.u16 d7, q14, #3 ; r_oq2
vbit d3, d22, d20 ; oq0 |= r_oq0 & (flat & mask)
vbit d4, d6, d20 ; oq1 |= r_oq1 & (flat & mask)
vbit d5, d7, d20 ; oq2 |= r_oq2 & (flat & mask)
bx lr
power_branch_only
vmov.u8 d27, #3
vmov.u8 d21, #2
vaddl.u8 q14, d6, d7 ; op2 = p0 + q0
vmlal.u8 q14, d3, d27 ; op2 += p3 * 3
vmlal.u8 q14, d4, d21 ; op2 += p2 * 2
vaddw.u8 q14, d5 ; op2 += p1
vqrshrn.u16 d0, q14, #3 ; op2
vsubw.u8 q14, d3 ; op1 = op2 - p3
vsubw.u8 q14, d4 ; op1 -= p2
vaddw.u8 q14, d5 ; op1 += p1
vaddw.u8 q14, d16 ; op1 += q1
vqrshrn.u16 d1, q14, #3 ; op1
vsubw.u8 q14, d3 ; op0 = op1 - p3
vsubw.u8 q14, d5 ; op0 -= p1
vaddw.u8 q14, d6 ; op0 += p0
vaddw.u8 q14, d17 ; op0 += q2
vqrshrn.u16 d2, q14, #3 ; op0
vsubw.u8 q14, d3 ; oq0 = op0 - p3
vsubw.u8 q14, d6 ; oq0 -= p0
vaddw.u8 q14, d7 ; oq0 += q0
vaddw.u8 q14, d18 ; oq0 += q3
vqrshrn.u16 d3, q14, #3 ; oq0
vsubw.u8 q14, d4 ; oq1 = oq0 - p2
vsubw.u8 q14, d7 ; oq1 -= q0
vaddw.u8 q14, d16 ; oq1 += q1
vaddw.u8 q14, d18 ; oq1 += q3
vqrshrn.u16 d4, q14, #3 ; oq1
vsubw.u8 q14, d5 ; oq2 = oq1 - p1
vsubw.u8 q14, d16 ; oq2 -= q1
vaddw.u8 q14, d17 ; oq2 += q2
vaddw.u8 q14, d18 ; oq2 += q3
vqrshrn.u16 d5, q14, #3 ; oq2
bx lr
filter_branch_only
; TODO(fgalligan): See if we can rearange registers so we do not need to
; do the 2 vswp.
vswp d0, d4 ; op2
vswp d5, d17 ; oq2
veor d2, d24, d22 ; *op0 = u^0x80
veor d3, d21, d22 ; *oq0 = u^0x80
veor d1, d25, d22 ; *op1 = u^0x80
veor d4, d26, d22 ; *oq1 = u^0x80
bx lr
ENDP ; |vp9_mbloop_filter_neon|
END

View File

@@ -1,618 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_mb_lpf_horizontal_edge_w_neon|
EXPORT |vp9_mb_lpf_vertical_edge_w_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
; void vp9_mb_lpf_horizontal_edge_w_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh
; int count)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
|vp9_mb_lpf_horizontal_edge_w_neon| PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] ; load thresh
ldr r12, [sp, #92] ; load count
h_count
vld1.8 {d16[]}, [r2] ; load *blimit
vld1.8 {d17[]}, [r3] ; load *limit
vld1.8 {d18[]}, [r4] ; load *thresh
sub r8, r0, r1, lsl #3 ; move src pointer down by 8 lines
vld1.u8 {d0}, [r8@64], r1 ; p7
vld1.u8 {d1}, [r8@64], r1 ; p6
vld1.u8 {d2}, [r8@64], r1 ; p5
vld1.u8 {d3}, [r8@64], r1 ; p4
vld1.u8 {d4}, [r8@64], r1 ; p3
vld1.u8 {d5}, [r8@64], r1 ; p2
vld1.u8 {d6}, [r8@64], r1 ; p1
vld1.u8 {d7}, [r8@64], r1 ; p0
vld1.u8 {d8}, [r8@64], r1 ; q0
vld1.u8 {d9}, [r8@64], r1 ; q1
vld1.u8 {d10}, [r8@64], r1 ; q2
vld1.u8 {d11}, [r8@64], r1 ; q3
vld1.u8 {d12}, [r8@64], r1 ; q4
vld1.u8 {d13}, [r8@64], r1 ; q5
vld1.u8 {d14}, [r8@64], r1 ; q6
vld1.u8 {d15}, [r8@64], r1 ; q7
bl vp9_wide_mbfilter_neon
tst r7, #1
beq h_mbfilter
; flat && mask were not set for any of the channels. Just store the values
; from filter.
sub r8, r0, r1, lsl #1
vst1.u8 {d25}, [r8@64], r1 ; store op1
vst1.u8 {d24}, [r8@64], r1 ; store op0
vst1.u8 {d23}, [r8@64], r1 ; store oq0
vst1.u8 {d26}, [r8@64], r1 ; store oq1
b h_next
h_mbfilter
tst r7, #2
beq h_wide_mbfilter
; flat2 was not set for any of the channels. Just store the values from
; mbfilter.
sub r8, r0, r1, lsl #1
sub r8, r8, r1
vst1.u8 {d18}, [r8@64], r1 ; store op2
vst1.u8 {d19}, [r8@64], r1 ; store op1
vst1.u8 {d20}, [r8@64], r1 ; store op0
vst1.u8 {d21}, [r8@64], r1 ; store oq0
vst1.u8 {d22}, [r8@64], r1 ; store oq1
vst1.u8 {d23}, [r8@64], r1 ; store oq2
b h_next
h_wide_mbfilter
sub r8, r0, r1, lsl #3
add r8, r8, r1
vst1.u8 {d16}, [r8@64], r1 ; store op6
vst1.u8 {d24}, [r8@64], r1 ; store op5
vst1.u8 {d25}, [r8@64], r1 ; store op4
vst1.u8 {d26}, [r8@64], r1 ; store op3
vst1.u8 {d27}, [r8@64], r1 ; store op2
vst1.u8 {d18}, [r8@64], r1 ; store op1
vst1.u8 {d19}, [r8@64], r1 ; store op0
vst1.u8 {d20}, [r8@64], r1 ; store oq0
vst1.u8 {d21}, [r8@64], r1 ; store oq1
vst1.u8 {d22}, [r8@64], r1 ; store oq2
vst1.u8 {d23}, [r8@64], r1 ; store oq3
vst1.u8 {d1}, [r8@64], r1 ; store oq4
vst1.u8 {d2}, [r8@64], r1 ; store oq5
vst1.u8 {d3}, [r8@64], r1 ; store oq6
h_next
add r0, r0, #8
subs r12, r12, #1
bne h_count
vpop {d8-d15}
pop {r4-r8, pc}
ENDP ; |vp9_mb_lpf_horizontal_edge_w_neon|
; void vp9_mb_lpf_vertical_edge_w_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
|vp9_mb_lpf_vertical_edge_w_neon| PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] ; load thresh
vld1.8 {d16[]}, [r2] ; load *blimit
vld1.8 {d17[]}, [r3] ; load *limit
vld1.8 {d18[]}, [r4] ; load *thresh
sub r8, r0, #8
vld1.8 {d0}, [r8@64], r1
vld1.8 {d8}, [r0@64], r1
vld1.8 {d1}, [r8@64], r1
vld1.8 {d9}, [r0@64], r1
vld1.8 {d2}, [r8@64], r1
vld1.8 {d10}, [r0@64], r1
vld1.8 {d3}, [r8@64], r1
vld1.8 {d11}, [r0@64], r1
vld1.8 {d4}, [r8@64], r1
vld1.8 {d12}, [r0@64], r1
vld1.8 {d5}, [r8@64], r1
vld1.8 {d13}, [r0@64], r1
vld1.8 {d6}, [r8@64], r1
vld1.8 {d14}, [r0@64], r1
vld1.8 {d7}, [r8@64], r1
vld1.8 {d15}, [r0@64], r1
sub r0, r0, r1, lsl #3
vtrn.32 q0, q2
vtrn.32 q1, q3
vtrn.32 q4, q6
vtrn.32 q5, q7
vtrn.16 q0, q1
vtrn.16 q2, q3
vtrn.16 q4, q5
vtrn.16 q6, q7
vtrn.8 d0, d1
vtrn.8 d2, d3
vtrn.8 d4, d5
vtrn.8 d6, d7
vtrn.8 d8, d9
vtrn.8 d10, d11
vtrn.8 d12, d13
vtrn.8 d14, d15
bl vp9_wide_mbfilter_neon
tst r7, #1
beq v_mbfilter
; flat && mask were not set for any of the channels. Just store the values
; from filter.
sub r8, r0, #2
vswp d23, d25
vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1
vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1
vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1
vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1
vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1
vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1
vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1
vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1
b v_end
v_mbfilter
tst r7, #2
beq v_wide_mbfilter
; flat2 was not set for any of the channels. Just store the values from
; mbfilter.
sub r8, r0, #3
vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1
vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1
vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1
vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1
vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1
vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1
vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1
vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1
vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1
vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1
vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1
vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1
vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1
vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1
vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1
vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1
b v_end
v_wide_mbfilter
sub r8, r0, #8
vtrn.32 d0, d26
vtrn.32 d16, d27
vtrn.32 d24, d18
vtrn.32 d25, d19
vtrn.16 d0, d24
vtrn.16 d16, d25
vtrn.16 d26, d18
vtrn.16 d27, d19
vtrn.8 d0, d16
vtrn.8 d24, d25
vtrn.8 d26, d27
vtrn.8 d18, d19
vtrn.32 d20, d1
vtrn.32 d21, d2
vtrn.32 d22, d3
vtrn.32 d23, d15
vtrn.16 d20, d22
vtrn.16 d21, d23
vtrn.16 d1, d3
vtrn.16 d2, d15
vtrn.8 d20, d21
vtrn.8 d22, d23
vtrn.8 d1, d2
vtrn.8 d3, d15
vst1.8 {d0}, [r8@64], r1
vst1.8 {d20}, [r0@64], r1
vst1.8 {d16}, [r8@64], r1
vst1.8 {d21}, [r0@64], r1
vst1.8 {d24}, [r8@64], r1
vst1.8 {d22}, [r0@64], r1
vst1.8 {d25}, [r8@64], r1
vst1.8 {d23}, [r0@64], r1
vst1.8 {d26}, [r8@64], r1
vst1.8 {d1}, [r0@64], r1
vst1.8 {d27}, [r8@64], r1
vst1.8 {d2}, [r0@64], r1
vst1.8 {d18}, [r8@64], r1
vst1.8 {d3}, [r0@64], r1
vst1.8 {d19}, [r8@64], r1
vst1.8 {d15}, [r0@64], r1
v_end
vpop {d8-d15}
pop {r4-r8, pc}
ENDP ; |vp9_mb_lpf_vertical_edge_w_neon|
; void vp9_wide_mbfilter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store.
;
; r0-r3 PRESERVE
; d16 blimit
; d17 limit
; d18 thresh
; d0 p7
; d1 p6
; d2 p5
; d3 p4
; d4 p3
; d5 p2
; d6 p1
; d7 p0
; d8 q0
; d9 q1
; d10 q2
; d11 q3
; d12 q4
; d13 q5
; d14 q6
; d15 q7
|vp9_wide_mbfilter_neon| PROC
mov r7, #0
; filter_mask
vabd.u8 d19, d4, d5 ; abs(p3 - p2)
vabd.u8 d20, d5, d6 ; abs(p2 - p1)
vabd.u8 d21, d6, d7 ; abs(p1 - p0)
vabd.u8 d22, d9, d8 ; abs(q1 - q0)
vabd.u8 d23, d10, d9 ; abs(q2 - q1)
vabd.u8 d24, d11, d10 ; abs(q3 - q2)
; only compare the largest value to limit
vmax.u8 d19, d19, d20 ; max(abs(p3 - p2), abs(p2 - p1))
vmax.u8 d20, d21, d22 ; max(abs(p1 - p0), abs(q1 - q0))
vmax.u8 d23, d23, d24 ; max(abs(q2 - q1), abs(q3 - q2))
vmax.u8 d19, d19, d20
vabd.u8 d24, d7, d8 ; abs(p0 - q0)
vmax.u8 d19, d19, d23
vabd.u8 d23, d6, d9 ; a = abs(p1 - q1)
vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2
; abs () > limit
vcge.u8 d19, d17, d19
; flatmask4
vabd.u8 d25, d7, d5 ; abs(p0 - p2)
vabd.u8 d26, d8, d10 ; abs(q0 - q2)
vabd.u8 d27, d4, d7 ; abs(p3 - p0)
vabd.u8 d28, d11, d8 ; abs(q3 - q0)
; only compare the largest value to thresh
vmax.u8 d25, d25, d26 ; max(abs(p0 - p2), abs(q0 - q2))
vmax.u8 d26, d27, d28 ; max(abs(p3 - p0), abs(q3 - q0))
vmax.u8 d25, d25, d26
vmax.u8 d20, d20, d25
vshr.u8 d23, d23, #1 ; a = a / 2
vqadd.u8 d24, d24, d23 ; a = b + a
vmov.u8 d30, #1
vcge.u8 d24, d16, d24 ; (a > blimit * 2 + limit) * -1
vcge.u8 d20, d30, d20 ; flat
vand d19, d19, d24 ; mask
; hevmask
vcgt.u8 d21, d21, d18 ; (abs(p1 - p0) > thresh)*-1
vcgt.u8 d22, d22, d18 ; (abs(q1 - q0) > thresh)*-1
vorr d21, d21, d22 ; hev
vand d16, d20, d19 ; flat && mask
vmov r5, r6, d16
orrs r5, r5, r6 ; Check for 0
orreq r7, r7, #1 ; Only do filter branch
; flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7)
vabd.u8 d22, d3, d7 ; abs(p4 - p0)
vabd.u8 d23, d12, d8 ; abs(q4 - q0)
vabd.u8 d24, d7, d2 ; abs(p0 - p5)
vabd.u8 d25, d8, d13 ; abs(q0 - q5)
vabd.u8 d26, d1, d7 ; abs(p6 - p0)
vabd.u8 d27, d14, d8 ; abs(q6 - q0)
vabd.u8 d28, d0, d7 ; abs(p7 - p0)
vabd.u8 d29, d15, d8 ; abs(q7 - q0)
; only compare the largest value to thresh
vmax.u8 d22, d22, d23 ; max(abs(p4 - p0), abs(q4 - q0))
vmax.u8 d23, d24, d25 ; max(abs(p0 - p5), abs(q0 - q5))
vmax.u8 d24, d26, d27 ; max(abs(p6 - p0), abs(q6 - q0))
vmax.u8 d25, d28, d29 ; max(abs(p7 - p0), abs(q7 - q0))
vmax.u8 d26, d22, d23
vmax.u8 d27, d24, d25
vmax.u8 d23, d26, d27
vcge.u8 d18, d30, d23 ; flat2
vmov.u8 d22, #0x80
vand d17, d18, d16 ; flat2 && flat && mask
vmov r5, r6, d17
orrs r5, r5, r6 ; Check for 0
orreq r7, r7, #2 ; Only do mbfilter branch
; mbfilter() function
; filter() function
; convert to signed
veor d23, d8, d22 ; qs0
veor d24, d7, d22 ; ps0
veor d25, d6, d22 ; ps1
veor d26, d9, d22 ; qs1
vmov.u8 d27, #3
vsub.s8 d28, d23, d24 ; ( qs0 - ps0)
vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1)
vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0)
vand d29, d29, d21 ; filter &= hev
vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0)
vmov.u8 d29, #4
; filter = clamp(filter + 3 * ( qs0 - ps0))
vqmovn.s16 d28, q15
vand d28, d28, d19 ; filter &= mask
vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3)
vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4)
vshr.s8 d30, d30, #3 ; filter2 >>= 3
vshr.s8 d29, d29, #3 ; filter1 >>= 3
vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2)
vqsub.s8 d23, d23, d29 ; oq0 = clamp(qs0 - filter1)
; outer tap adjustments: ++filter1 >> 1
vrshr.s8 d29, d29, #1
vbic d29, d29, d21 ; filter &= ~hev
vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter)
vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter)
veor d24, d24, d22 ; *f_op0 = u^0x80
veor d23, d23, d22 ; *f_oq0 = u^0x80
veor d25, d25, d22 ; *f_op1 = u^0x80
veor d26, d26, d22 ; *f_oq1 = u^0x80
tst r7, #1
bxne lr
; mbfilter flat && mask branch
; TODO(fgalligan): Can I decrease the cycles shifting to consective d's
; and using vibt on the q's?
vmov.u8 d29, #2
vaddl.u8 q15, d7, d8 ; op2 = p0 + q0
vmlal.u8 q15, d4, d27 ; op2 = p0 + q0 + p3 * 3
vmlal.u8 q15, d5, d29 ; op2 = p0 + q0 + p3 * 3 + p2 * 2
vaddw.u8 q15, d6 ; op2=p1 + p0 + q0 + p3 * 3 + p2 *2
vqrshrn.u16 d18, q15, #3 ; r_op2
vsubw.u8 q15, d4 ; op1 = op2 - p3
vsubw.u8 q15, d5 ; op1 -= p2
vaddw.u8 q15, d6 ; op1 += p1
vaddw.u8 q15, d9 ; op1 += q1
vqrshrn.u16 d19, q15, #3 ; r_op1
vsubw.u8 q15, d4 ; op0 = op1 - p3
vsubw.u8 q15, d6 ; op0 -= p1
vaddw.u8 q15, d7 ; op0 += p0
vaddw.u8 q15, d10 ; op0 += q2
vqrshrn.u16 d20, q15, #3 ; r_op0
vsubw.u8 q15, d4 ; oq0 = op0 - p3
vsubw.u8 q15, d7 ; oq0 -= p0
vaddw.u8 q15, d8 ; oq0 += q0
vaddw.u8 q15, d11 ; oq0 += q3
vqrshrn.u16 d21, q15, #3 ; r_oq0
vsubw.u8 q15, d5 ; oq1 = oq0 - p2
vsubw.u8 q15, d8 ; oq1 -= q0
vaddw.u8 q15, d9 ; oq1 += q1
vaddw.u8 q15, d11 ; oq1 += q3
vqrshrn.u16 d22, q15, #3 ; r_oq1
vsubw.u8 q15, d6 ; oq2 = oq0 - p1
vsubw.u8 q15, d9 ; oq2 -= q1
vaddw.u8 q15, d10 ; oq2 += q2
vaddw.u8 q15, d11 ; oq2 += q3
vqrshrn.u16 d27, q15, #3 ; r_oq2
; Filter does not set op2 or oq2, so use p2 and q2.
vbif d18, d5, d16 ; t_op2 |= p2 & ~(flat & mask)
vbif d19, d25, d16 ; t_op1 |= f_op1 & ~(flat & mask)
vbif d20, d24, d16 ; t_op0 |= f_op0 & ~(flat & mask)
vbif d21, d23, d16 ; t_oq0 |= f_oq0 & ~(flat & mask)
vbif d22, d26, d16 ; t_oq1 |= f_oq1 & ~(flat & mask)
vbit d23, d27, d16 ; t_oq2 |= r_oq2 & (flat & mask)
vbif d23, d10, d16 ; t_oq2 |= q2 & ~(flat & mask)
tst r7, #2
bxne lr
; wide_mbfilter flat2 && flat && mask branch
vmov.u8 d16, #7
vaddl.u8 q15, d7, d8 ; op6 = p0 + q0
vmlal.u8 q15, d0, d16 ; op6 += p7 * 3
vmlal.u8 q15, d1, d29 ; op6 += p6 * 2
vaddw.u8 q15, d2 ; op6 += p5
vaddw.u8 q15, d3 ; op6 += p4
vaddw.u8 q15, d4 ; op6 += p3
vaddw.u8 q15, d5 ; op6 += p2
vaddw.u8 q15, d6 ; op6 += p1
vqrshrn.u16 d16, q15, #4 ; w_op6
vsubw.u8 q15, d0 ; op5 = op6 - p7
vsubw.u8 q15, d1 ; op5 -= p6
vaddw.u8 q15, d2 ; op5 += p5
vaddw.u8 q15, d9 ; op5 += q1
vqrshrn.u16 d24, q15, #4 ; w_op5
vsubw.u8 q15, d0 ; op4 = op5 - p7
vsubw.u8 q15, d2 ; op4 -= p5
vaddw.u8 q15, d3 ; op4 += p4
vaddw.u8 q15, d10 ; op4 += q2
vqrshrn.u16 d25, q15, #4 ; w_op4
vsubw.u8 q15, d0 ; op3 = op4 - p7
vsubw.u8 q15, d3 ; op3 -= p4
vaddw.u8 q15, d4 ; op3 += p3
vaddw.u8 q15, d11 ; op3 += q3
vqrshrn.u16 d26, q15, #4 ; w_op3
vsubw.u8 q15, d0 ; op2 = op3 - p7
vsubw.u8 q15, d4 ; op2 -= p3
vaddw.u8 q15, d5 ; op2 += p2
vaddw.u8 q15, d12 ; op2 += q4
vqrshrn.u16 d27, q15, #4 ; w_op2
vbif d27, d18, d17 ; op2 |= t_op2 & ~(f2 & f & m)
vsubw.u8 q15, d0 ; op1 = op2 - p7
vsubw.u8 q15, d5 ; op1 -= p2
vaddw.u8 q15, d6 ; op1 += p1
vaddw.u8 q15, d13 ; op1 += q5
vqrshrn.u16 d18, q15, #4 ; w_op1
vbif d18, d19, d17 ; op1 |= t_op1 & ~(f2 & f & m)
vsubw.u8 q15, d0 ; op0 = op1 - p7
vsubw.u8 q15, d6 ; op0 -= p1
vaddw.u8 q15, d7 ; op0 += p0
vaddw.u8 q15, d14 ; op0 += q6
vqrshrn.u16 d19, q15, #4 ; w_op0
vbif d19, d20, d17 ; op0 |= t_op0 & ~(f2 & f & m)
vsubw.u8 q15, d0 ; oq0 = op0 - p7
vsubw.u8 q15, d7 ; oq0 -= p0
vaddw.u8 q15, d8 ; oq0 += q0
vaddw.u8 q15, d15 ; oq0 += q7
vqrshrn.u16 d20, q15, #4 ; w_oq0
vbif d20, d21, d17 ; oq0 |= t_oq0 & ~(f2 & f & m)
vsubw.u8 q15, d1 ; oq1 = oq0 - p6
vsubw.u8 q15, d8 ; oq1 -= q0
vaddw.u8 q15, d9 ; oq1 += q1
vaddw.u8 q15, d15 ; oq1 += q7
vqrshrn.u16 d21, q15, #4 ; w_oq1
vbif d21, d22, d17 ; oq1 |= t_oq1 & ~(f2 & f & m)
vsubw.u8 q15, d2 ; oq2 = oq1 - p5
vsubw.u8 q15, d9 ; oq2 -= q1
vaddw.u8 q15, d10 ; oq2 += q2
vaddw.u8 q15, d15 ; oq2 += q7
vqrshrn.u16 d22, q15, #4 ; w_oq2
vbif d22, d23, d17 ; oq2 |= t_oq2 & ~(f2 & f & m)
vsubw.u8 q15, d3 ; oq3 = oq2 - p4
vsubw.u8 q15, d10 ; oq3 -= q2
vaddw.u8 q15, d11 ; oq3 += q3
vaddw.u8 q15, d15 ; oq3 += q7
vqrshrn.u16 d23, q15, #4 ; w_oq3
vbif d16, d1, d17 ; op6 |= p6 & ~(f2 & f & m)
vsubw.u8 q15, d4 ; oq4 = oq3 - p3
vsubw.u8 q15, d11 ; oq4 -= q3
vaddw.u8 q15, d12 ; oq4 += q4
vaddw.u8 q15, d15 ; oq4 += q7
vqrshrn.u16 d1, q15, #4 ; w_oq4
vbif d24, d2, d17 ; op5 |= p5 & ~(f2 & f & m)
vsubw.u8 q15, d5 ; oq5 = oq4 - p2
vsubw.u8 q15, d12 ; oq5 -= q4
vaddw.u8 q15, d13 ; oq5 += q5
vaddw.u8 q15, d15 ; oq5 += q7
vqrshrn.u16 d2, q15, #4 ; w_oq5
vbif d25, d3, d17 ; op4 |= p4 & ~(f2 & f & m)
vsubw.u8 q15, d6 ; oq6 = oq5 - p1
vsubw.u8 q15, d13 ; oq6 -= q5
vaddw.u8 q15, d14 ; oq6 += q6
vaddw.u8 q15, d15 ; oq6 += q7
vqrshrn.u16 d3, q15, #4 ; w_oq6
vbif d26, d4, d17 ; op3 |= p3 & ~(f2 & f & m)
vbif d23, d11, d17 ; oq3 |= q3 & ~(f2 & f & m)
vbif d1, d12, d17 ; oq4 |= q4 & ~(f2 & f & m)
vbif d2, d13, d17 ; oq5 |= q5 & ~(f2 & f & m)
vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m)
bx lr
ENDP ; |vp9_wide_mbfilter_neon|
END

View File

@@ -1,356 +0,0 @@
;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp9_short_idct8x8_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
; Parallel 1D IDCT on all the columns of a 8x8 16bit data matrix which are
; loaded in q8-q15. The output will be stored back into q8-q15 registers.
; This macro will touch q0-q7 registers and use them as buffer during
; calculation.
MACRO
IDCT8x8_1D
; stage 1
vdup.16 d0, r3 ; duplicate cospi_28_64
vdup.16 d1, r4 ; duplicate cospi_4_64
; input[1] * cospi_28_64
vmull.s16 q2, d18, d0
vmull.s16 q3, d19, d0
; input[7] * cospi_4_64
vmull.s16 q4, d30, d1
vmull.s16 q5, d31, d1
; input[1]*cospi_28_64-input[7]*cospi_4_64
vsub.s32 q6, q2, q4
vsub.s32 q7, q3, q5
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d8, q6, #14 ; >> 14
vqrshrn.s32 d9, q7, #14 ; >> 14
; input[1] * cospi_4_64
vmull.s16 q2, d18, d1
vmull.s16 q3, d19, d1
; input[7] * cospi_28_64
vmull.s16 q1, d30, d0
vmull.s16 q5, d31, d0
; input[1]*cospi_4_64+input[7]*cospi_28_64
vadd.s32 q2, q2, q1
vadd.s32 q3, q3, q5
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d14, q2, #14 ; >> 14
vqrshrn.s32 d15, q3, #14 ; >> 14
vdup.16 d0, r5 ; duplicate cospi_12_64
vdup.16 d1, r6 ; duplicate cospi_20_64
; input[5] * cospi_12_64
vmull.s16 q2, d26, d0
vmull.s16 q3, d27, d0
; input[3] * cospi_20_64
vmull.s16 q5, d22, d1
vmull.s16 q6, d23, d1
; input[5] * cospi_12_64 - input[3] * cospi_20_64
vsub.s32 q2, q2, q5
vsub.s32 q3, q3, q6
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d10, q2, #14 ; >> 14
vqrshrn.s32 d11, q3, #14 ; >> 14
; input[5] * cospi_20_64
vmull.s16 q2, d26, d1
vmull.s16 q3, d27, d1
; input[3] * cospi_12_64
vmull.s16 q9, d22, d0
vmull.s16 q15, d23, d0
; input[5] * cospi_20_64 + input[3] * cospi_12_64
vadd.s32 q0, q2, q9
vadd.s32 q1, q3, q15
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d12, q0, #14 ; >> 14
vqrshrn.s32 d13, q1, #14 ; >> 14
; stage 2 & stage 3 - even half
vdup.16 d0, r7 ; duplicate cospi_16_64
; input[0] * cospi_16_64
vmull.s16 q2, d16, d0
vmull.s16 q3, d17, d0
; input[2] * cospi_16_64
vmull.s16 q9, d24, d0
vmull.s16 q11, d25, d0
; (input[0] + input[2]) * cospi_16_64
vadd.s32 q9, q2, q9
vadd.s32 q11, q3, q11
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d18, q9, #14 ; >> 14
vqrshrn.s32 d19, q11, #14 ; >> 14
; input[0] * cospi_16_64
vmull.s16 q2, d16, d0
vmull.s16 q3, d17, d0
; input[2] * cospi_16_64
vmull.s16 q13, d24, d0
vmull.s16 q15, d25, d0
; (input[0] - input[2]) * cospi_16_64
vsub.s32 q2, q2, q13
vsub.s32 q3, q3, q15
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d22, q2, #14 ; >> 14
vqrshrn.s32 d23, q3, #14 ; >> 14
; input[1] * cospi_24_64 - input[3] * cospi_8_64
vdup.16 d0, r8 ; duplicate cospi_24_64
vdup.16 d1, r9 ; duplicate cospi_8_64
; input[1] * cospi_24_64
vmull.s16 q2, d20, d0
vmull.s16 q3, d21, d0
; input[3] * cospi_8_64
vmull.s16 q13, d28, d1
vmull.s16 q15, d29, d1
; input[1] * cospi_24_64 - input[3] * cospi_8_64
vsub.s32 q2, q2, q13
vsub.s32 q3, q3, q15
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d26, q2, #14 ; >> 14
vqrshrn.s32 d27, q3, #14 ; >> 14
; input[1] * cospi_8_64
vmull.s16 q2, d20, d1
vmull.s16 q3, d21, d1
; input[3] * cospi_24_64
vmull.s16 q8, d28, d0
vmull.s16 q10, d29, d0
; input[1] * cospi_8_64 + input[3] * cospi_24_64
vadd.s32 q0, q2, q8
vadd.s32 q1, q3, q10
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d30, q0, #14 ; >> 14
vqrshrn.s32 d31, q1, #14 ; >> 14
vadd.s16 q0, q9, q15 ; output[0] = step[0] + step[3]
vadd.s16 q1, q11, q13 ; output[1] = step[1] + step[2]
vsub.s16 q2, q11, q13 ; output[2] = step[1] - step[2]
vsub.s16 q3, q9, q15 ; output[3] = step[0] - step[3]
; stage 2 - odd half
vsub.s16 q13, q4, q5 ; step2[5] = step1[4] - step1[5]
vadd.s16 q4, q4, q5 ; step2[4] = step1[4] + step1[5]
vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7]
vadd.s16 q7, q7, q6 ; step2[7] = step1[6] + step1[7]
; stage 3 -odd half
vdup.16 d16, r7 ; duplicate cospi_16_64
; step2[6] * cospi_16_64
vmull.s16 q9, d28, d16
vmull.s16 q10, d29, d16
; step2[5] * cospi_16_64
vmull.s16 q11, d26, d16
vmull.s16 q12, d27, d16
; (step2[6] - step2[5]) * cospi_16_64
vsub.s32 q9, q9, q11
vsub.s32 q10, q10, q12
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d10, q9, #14 ; >> 14
vqrshrn.s32 d11, q10, #14 ; >> 14
; step2[6] * cospi_16_64
vmull.s16 q9, d28, d16
vmull.s16 q10, d29, d16
; step2[5] * cospi_16_64
vmull.s16 q11, d26, d16
vmull.s16 q12, d27, d16
; (step2[5] + step2[6]) * cospi_16_64
vadd.s32 q9, q9, q11
vadd.s32 q10, q10, q12
; dct_const_round_shift(input_dc * cospi_16_64)
vqrshrn.s32 d12, q9, #14 ; >> 14
vqrshrn.s32 d13, q10, #14 ; >> 14
; stage 4
vadd.s16 q8, q0, q7 ; output[0] = step1[0] + step1[7];
vadd.s16 q9, q1, q6 ; output[1] = step1[1] + step1[6];
vadd.s16 q10, q2, q5 ; output[2] = step1[2] + step1[5];
vadd.s16 q11, q3, q4 ; output[3] = step1[3] + step1[4];
vsub.s16 q12, q3, q4 ; output[4] = step1[3] - step1[4];
vsub.s16 q13, q2, q5 ; output[5] = step1[2] - step1[5];
vsub.s16 q14, q1, q6 ; output[6] = step1[1] - step1[6];
vsub.s16 q15, q0, q7 ; output[7] = step1[0] - step1[7];
MEND
; Transpose a 8x8 16bit data matrix. Datas are loaded in q8-q15.
MACRO
TRANSPOSE8X8
vswp d17, d24
vswp d23, d30
vswp d21, d28
vswp d19, d26
vtrn.32 q8, q10
vtrn.32 q9, q11
vtrn.32 q12, q14
vtrn.32 q13, q15
vtrn.16 q8, q9
vtrn.16 q10, q11
vtrn.16 q12, q13
vtrn.16 q14, q15
MEND
AREA Block, CODE, READONLY ; name this block of code
;void vp9_short_idct8x8_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
|vp9_short_idct8x8_add_neon| PROC
push {r4-r9}
vld1.s16 {q8}, [r0]!
vld1.s16 {q9}, [r0]!
vld1.s16 {q10}, [r0]!
vld1.s16 {q11}, [r0]!
vld1.s16 {q12}, [r0]!
vld1.s16 {q13}, [r0]!
vld1.s16 {q14}, [r0]!
vld1.s16 {q15}, [r0]!
; transpose the input data
TRANSPOSE8X8
; generate cospi_28_64 = 3196
mov r3, #0x0c00
add r3, #0x7c
; generate cospi_4_64 = 16069
mov r4, #0x3e00
add r4, #0xc5
; generate cospi_12_64 = 13623
mov r5, #0x3500
add r5, #0x37
; generate cospi_20_64 = 9102
mov r6, #0x2300
add r6, #0x8e
; generate cospi_16_64 = 11585
mov r7, #0x2d00
add r7, #0x41
; generate cospi_24_64 = 6270
mov r8, #0x1800
add r8, #0x7e
; generate cospi_8_64 = 15137
mov r9, #0x3b00
add r9, #0x21
; First transform rows
IDCT8x8_1D
; Transpose the matrix
TRANSPOSE8X8
; Then transform columns
IDCT8x8_1D
; ROUND_POWER_OF_TWO(temp_out[j], 5)
vrshr.s16 q8, q8, #5
vrshr.s16 q9, q9, #5
vrshr.s16 q10, q10, #5
vrshr.s16 q11, q11, #5
vrshr.s16 q12, q12, #5
vrshr.s16 q13, q13, #5
vrshr.s16 q14, q14, #5
vrshr.s16 q15, q15, #5
; save dest pointer
mov r0, r1
; load destination data
vld1.64 {d0}, [r1], r2
vld1.64 {d1}, [r1], r2
vld1.64 {d2}, [r1], r2
vld1.64 {d3}, [r1], r2
vld1.64 {d4}, [r1], r2
vld1.64 {d5}, [r1], r2
vld1.64 {d6}, [r1], r2
vld1.64 {d7}, [r1]
; ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]
vaddw.u8 q8, q8, d0
vaddw.u8 q9, q9, d1
vaddw.u8 q10, q10, d2
vaddw.u8 q11, q11, d3
vaddw.u8 q12, q12, d4
vaddw.u8 q13, q13, d5
vaddw.u8 q14, q14, d6
vaddw.u8 q15, q15, d7
; clip_pixel
vqmovun.s16 d0, q8
vqmovun.s16 d1, q9
vqmovun.s16 d2, q10
vqmovun.s16 d3, q11
vqmovun.s16 d4, q12
vqmovun.s16 d5, q13
vqmovun.s16 d6, q14
vqmovun.s16 d7, q15
; store the data
vst1.64 {d0}, [r0], r2
vst1.64 {d1}, [r0], r2
vst1.64 {d2}, [r0], r2
vst1.64 {d3}, [r0], r2
vst1.64 {d4}, [r0], r2
vst1.64 {d5}, [r0], r2
vst1.64 {d6}, [r0], r2
vst1.64 {d7}, [r0], r2
pop {r4-r9}
bx lr
ENDP ; |vp9_short_idct8x8_add_neon|
END

View File

@@ -11,7 +11,6 @@
#include "./vpx_config.h"
#include "vpx_mem/vpx_mem.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_entropymv.h"
@@ -53,6 +52,7 @@ void vp9_free_frame_buffers(VP9_COMMON *oci) {
for (i = 0; i < NUM_YV12_BUFFERS; i++)
vp9_free_frame_buffer(&oci->yv12_fb[i]);
vp9_free_frame_buffer(&oci->temp_scale_frame);
vp9_free_frame_buffer(&oci->post_proc_buffer);
vpx_free(oci->mip);
@@ -62,9 +62,9 @@ void vp9_free_frame_buffers(VP9_COMMON *oci) {
vpx_free(oci->above_context[0]);
for (i = 0; i < MAX_MB_PLANE; i++)
oci->above_context[i] = 0;
oci->mip = NULL;
oci->prev_mip = NULL;
oci->above_seg_context = NULL;
oci->mip = 0;
oci->prev_mip = 0;
oci->above_seg_context = 0;
}
static void set_mb_mi(VP9_COMMON *cm, int aligned_width, int aligned_height) {
@@ -74,7 +74,7 @@ static void set_mb_mi(VP9_COMMON *cm, int aligned_width, int aligned_height) {
cm->mi_cols = aligned_width >> LOG2_MI_SIZE;
cm->mi_rows = aligned_height >> LOG2_MI_SIZE;
cm->mode_info_stride = cm->mi_cols + MI_BLOCK_SIZE;
cm->mode_info_stride = cm->mi_cols + 64 / MI_SIZE;
}
static void setup_mi(VP9_COMMON *cm) {
@@ -94,11 +94,11 @@ static void setup_mi(VP9_COMMON *cm) {
int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
int i, mi_cols;
const int aligned_width = ALIGN_POWER_OF_TWO(width, LOG2_MI_SIZE);
const int aligned_height = ALIGN_POWER_OF_TWO(height, LOG2_MI_SIZE);
// Our internal buffers are always multiples of 16
const int aligned_width = multiple8(width);
const int aligned_height = multiple8(height);
const int ss_x = oci->subsampling_x;
const int ss_y = oci->subsampling_y;
int mi_size;
vp9_free_frame_buffers(oci);
@@ -120,6 +120,10 @@ int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
oci->fb_idx_ref_cnt[i] = 1;
}
if (vp9_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, ss_x, ss_y,
VP9BORDERINPIXELS) < 0)
goto fail;
if (vp9_alloc_frame_buffer(&oci->post_proc_buffer, width, height, ss_x, ss_y,
VP9BORDERINPIXELS) < 0)
goto fail;
@@ -127,13 +131,14 @@ int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
set_mb_mi(oci, aligned_width, aligned_height);
// Allocation
mi_size = oci->mode_info_stride * (oci->mi_rows + MI_BLOCK_SIZE);
oci->mip = vpx_calloc(mi_size, sizeof(MODE_INFO));
oci->mip = vpx_calloc(oci->mode_info_stride * (oci->mi_rows + 64 / MI_SIZE),
sizeof(MODE_INFO));
if (!oci->mip)
goto fail;
oci->prev_mip = vpx_calloc(mi_size, sizeof(MODE_INFO));
oci->prev_mip = vpx_calloc(oci->mode_info_stride *
(oci->mi_rows + 64 / MI_SIZE),
sizeof(MODE_INFO));
if (!oci->prev_mip)
goto fail;
@@ -141,7 +146,7 @@ int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
// FIXME(jkoleszar): allocate subsampled arrays for U/V once subsampling
// information is exposed at this level
mi_cols = mi_cols_aligned_to_sb(oci->mi_cols);
mi_cols = mi_cols_aligned_to_sb(oci);
// 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
// block where mi unit size is 8x8.
@@ -169,8 +174,9 @@ void vp9_create_common(VP9_COMMON *oci) {
vp9_init_mbmode_probs(oci);
oci->tx_mode = ONLY_4X4;
oci->txfm_mode = ONLY_4X4;
oci->comp_pred_mode = HYBRID_PREDICTION;
oci->clr_type = REG_YUV;
// Initialize reference frame sign bias structure to defaults
vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
@@ -188,13 +194,13 @@ void vp9_initialize_common() {
void vp9_update_frame_size(VP9_COMMON *cm) {
int i, mi_cols;
const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, LOG2_MI_SIZE);
const int aligned_height = ALIGN_POWER_OF_TWO(cm->height, LOG2_MI_SIZE);
const int aligned_width = multiple8(cm->width);
const int aligned_height = multiple8(cm->height);
set_mb_mi(cm, aligned_width, aligned_height);
setup_mi(cm);
mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
mi_cols = mi_cols_aligned_to_sb(cm);
for (i = 1; i < MAX_MB_PLANE; i++)
cm->above_context[i] =
cm->above_context[0] + i * sizeof(ENTROPY_CONTEXT) * 2 * mi_cols;

View File

@@ -0,0 +1,21 @@
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vpx/vpx_codec.h"
#include "vpx_ports/asm_offsets.h"
BEGIN
END
/* add asserts for any offset that is not supported by assembly code */
/* add asserts for any size that is not supported by assembly code */

View File

@@ -13,22 +13,28 @@
#define VP9_COMMON_VP9_BLOCKD_H_
#include "./vpx_config.h"
#include "vpx_ports/mem.h"
#include "vpx_scale/yv12config.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_common_data.h"
#include "vp9/common/vp9_convolve.h"
#include "vp9/common/vp9_enums.h"
#include "vp9/common/vp9_mv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_treecoder.h"
#include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_enums.h"
#define BLOCK_SIZE_GROUPS 4
#define MAX_MB_SEGMENTS 8
#define MB_SEG_TREE_PROBS (MAX_MB_SEGMENTS-1)
#define PREDICTION_PROBS 3
#define MBSKIP_CONTEXTS 3
#define MAX_REF_LF_DELTAS 4
#define MAX_MODE_LF_DELTAS 2
/* Segment Feature Masks */
#define SEGMENT_DELTADATA 0
#define SEGMENT_ABSDATA 1
#define MAX_MV_REF_CANDIDATES 2
#define INTRA_INTER_CONTEXTS 4
@@ -56,11 +62,11 @@ typedef enum {
} FRAME_TYPE;
typedef enum {
EIGHTTAP = 0,
EIGHTTAP_SMOOTH = 1,
EIGHTTAP_SHARP = 2,
BILINEAR = 3,
SWITCHABLE = 4 /* should be the last one */
EIGHTTAP_SMOOTH,
EIGHTTAP,
EIGHTTAP_SHARP,
BILINEAR,
SWITCHABLE /* should be the last one */
} INTERPOLATIONFILTERTYPE;
typedef enum {
@@ -81,37 +87,56 @@ typedef enum {
MB_MODE_COUNT
} MB_PREDICTION_MODE;
static INLINE int is_intra_mode(MB_PREDICTION_MODE mode) {
return mode <= TM_PRED;
}
static INLINE int is_inter_mode(MB_PREDICTION_MODE mode) {
return mode >= NEARESTMV && mode <= NEWMV;
}
#if CONFIG_FILTERINTRA
static INLINE int is_filter_allowed(MB_PREDICTION_MODE mode) {
return mode != DC_PRED &&
mode != D45_PRED &&
mode != D27_PRED &&
mode != D63_PRED;
}
#endif
// Segment level features.
typedef enum {
SEG_LVL_ALT_Q = 0, // Use alternate Quantizer ....
SEG_LVL_ALT_LF = 1, // Use alternate loop filter value...
SEG_LVL_REF_FRAME = 2, // Optional Segment reference frame
SEG_LVL_SKIP = 3, // Optional Segment (0,0) + skip mode
SEG_LVL_MAX = 4 // Number of MB level features supported
} SEG_LVL_FEATURES;
// Segment level features.
typedef enum {
TX_4X4 = 0, // 4x4 dct transform
TX_8X8 = 1, // 8x8 dct transform
TX_16X16 = 2, // 16x16 dct transform
TX_32X32 = 3, // 32x32 dct transform
TX_SIZE_MAX_SB, // Number of transforms available to SBs
} TX_SIZE;
typedef enum {
DCT_DCT = 0, // DCT in both horizontal and vertical
ADST_DCT = 1, // ADST in vertical, DCT in horizontal
DCT_ADST = 2, // DCT in vertical, ADST in horizontal
ADST_ADST = 3 // ADST in both directions
} TX_TYPE;
#define VP9_INTRA_MODES (TM_PRED + 1)
#define VP9_INTER_MODES (1 + NEWMV - NEARESTMV)
static INLINE int inter_mode_offset(MB_PREDICTION_MODE mode) {
return (mode - NEARESTMV);
}
#define WHT_UPSCALE_FACTOR 2
#define TX_SIZE_PROBS 6 // (TX_SIZE_MAX_SB * (TX_SIZE_MAX_SB - 1) / 2)
#define get_tx_probs(c, b) ((b) < BLOCK_SIZE_MB16X16 ? \
(c)->fc.tx_probs_8x8p : \
(b) < BLOCK_SIZE_SB32X32 ? \
(c)->fc.tx_probs_16x16p : (c)->fc.tx_probs_32x32p)
/* For keyframes, intra block modes are predicted by the (already decoded)
modes for the Y blocks to the left and above us; for interframes, there
is a single probability table. */
union b_mode_info {
MB_PREDICTION_MODE as_mode;
struct {
MB_PREDICTION_MODE first;
} as_mode;
int_mv as_mv[2]; // first, second inter predictor motion vectors
};
@@ -125,101 +150,71 @@ typedef enum {
} MV_REFERENCE_FRAME;
static INLINE int b_width_log2(BLOCK_SIZE_TYPE sb_type) {
return b_width_log2_lookup[sb_type];
}
static INLINE int b_height_log2(BLOCK_SIZE_TYPE sb_type) {
return b_height_log2_lookup[sb_type];
}
static INLINE int mi_width_log2(BLOCK_SIZE_TYPE sb_type) {
return mi_width_log2_lookup[sb_type];
}
static INLINE int mi_height_log2(BLOCK_SIZE_TYPE sb_type) {
return mi_height_log2_lookup[sb_type];
}
#if CONFIG_INTERINTRA
static INLINE TX_SIZE intra_size_log2_for_interintra(int bs) {
switch (bs) {
case 4:
return TX_4X4;
break;
case 8:
return TX_8X8;
break;
case 16:
return TX_16X16;
break;
case 32:
return TX_32X32;
break;
default:
return TX_32X32;
break;
switch (sb_type) {
case BLOCK_SIZE_SB4X8:
case BLOCK_SIZE_AB4X4: return 0;
case BLOCK_SIZE_SB8X4:
case BLOCK_SIZE_SB8X8:
case BLOCK_SIZE_SB8X16: return 1;
case BLOCK_SIZE_SB16X8:
case BLOCK_SIZE_MB16X16:
case BLOCK_SIZE_SB16X32: return 2;
case BLOCK_SIZE_SB32X16:
case BLOCK_SIZE_SB32X32:
case BLOCK_SIZE_SB32X64: return 3;
case BLOCK_SIZE_SB64X32:
case BLOCK_SIZE_SB64X64: return 4;
default: assert(0);
return -1;
}
}
static INLINE int is_interintra_allowed(BLOCK_SIZE_TYPE sb_type) {
return ((sb_type >= BLOCK_8X8) && (sb_type < BLOCK_64X64));
static INLINE int b_height_log2(BLOCK_SIZE_TYPE sb_type) {
switch (sb_type) {
case BLOCK_SIZE_SB8X4:
case BLOCK_SIZE_AB4X4: return 0;
case BLOCK_SIZE_SB4X8:
case BLOCK_SIZE_SB8X8:
case BLOCK_SIZE_SB16X8: return 1;
case BLOCK_SIZE_SB8X16:
case BLOCK_SIZE_MB16X16:
case BLOCK_SIZE_SB32X16: return 2;
case BLOCK_SIZE_SB16X32:
case BLOCK_SIZE_SB32X32:
case BLOCK_SIZE_SB64X32: return 3;
case BLOCK_SIZE_SB32X64:
case BLOCK_SIZE_SB64X64: return 4;
default: assert(0);
return -1;
}
}
#if CONFIG_MASKED_INTERINTRA
#define MASK_BITS_SML_INTERINTRA 3
#define MASK_BITS_MED_INTERINTRA 4
#define MASK_BITS_BIG_INTERINTRA 5
#define MASK_NONE_INTERINTRA -1
static INLINE int get_mask_bits_interintra(BLOCK_SIZE_TYPE sb_type) {
if (sb_type == BLOCK_4X4)
return 0;
if (sb_type <= BLOCK_8X8)
return MASK_BITS_SML_INTERINTRA;
else if (sb_type <= BLOCK_32X32)
return MASK_BITS_MED_INTERINTRA;
else
return MASK_BITS_BIG_INTERINTRA;
static INLINE int mi_width_log2(BLOCK_SIZE_TYPE sb_type) {
int a = b_width_log2(sb_type) - 1;
// align 4x4 block to mode_info
if (a < 0)
a = 0;
assert(a >= 0);
return a;
}
#endif
#endif
#if CONFIG_MASKED_INTERINTER
#define MASK_BITS_SML 3
#define MASK_BITS_MED 4
#define MASK_BITS_BIG 5
#define MASK_NONE -1
static inline int get_mask_bits(BLOCK_SIZE_TYPE sb_type) {
if (sb_type == BLOCK_4X4)
return 0;
if (sb_type <= BLOCK_8X8)
return MASK_BITS_SML;
else if (sb_type <= BLOCK_32X32)
return MASK_BITS_MED;
else
return MASK_BITS_BIG;
static INLINE int mi_height_log2(BLOCK_SIZE_TYPE sb_type) {
int a = b_height_log2(sb_type) - 1;
if (a < 0)
a = 0;
assert(a >= 0);
return a;
}
#endif
typedef struct {
MB_PREDICTION_MODE mode, uv_mode;
#if CONFIG_INTERINTRA
MB_PREDICTION_MODE interintra_mode, interintra_uv_mode;
#if CONFIG_MASKED_INTERINTRA
int interintra_mask_index;
int interintra_uv_mask_index;
int use_masked_interintra;
#endif
#endif
#if CONFIG_FILTERINTRA
int filterbit, uv_filterbit;
#endif
MV_REFERENCE_FRAME ref_frame[2];
TX_SIZE txfm_size;
int_mv mv[2]; // for each reference frame used
int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
int_mv best_mv, best_second_mv;
uint8_t mb_mode_context[MAX_REF_FRAMES];
int mb_mode_context[MAX_REF_FRAMES];
unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
unsigned char segment_id; // Segment id for current frame
@@ -235,33 +230,15 @@ typedef struct {
INTERPOLATIONFILTERTYPE interp_filter;
BLOCK_SIZE_TYPE sb_type;
#if CONFIG_MASKED_INTERINTER
int use_masked_compound;
int mask_index;
#endif
} MB_MODE_INFO;
typedef struct {
MB_MODE_INFO mbmi;
#if CONFIG_FILTERINTRA
int b_filter_info[4];
#endif
union b_mode_info bmi[4];
} MODE_INFO;
static int is_inter_block(const MB_MODE_INFO *mbmi) {
return mbmi->ref_frame[0] > INTRA_FRAME;
}
enum mv_precision {
MV_PRECISION_Q3,
MV_PRECISION_Q4
};
#define VP9_REF_SCALE_SHIFT 14
#define VP9_REF_NO_SCALE (1 << VP9_REF_SCALE_SHIFT)
#define VP9_REF_NO_SCALE 16384
struct scale_factors {
int x_scale_fp; // horizontal fixed point scale factor
@@ -274,8 +251,9 @@ struct scale_factors {
int (*scale_value_x)(int val, const struct scale_factors *scale);
int (*scale_value_y)(int val, const struct scale_factors *scale);
void (*set_scaled_offsets)(struct scale_factors *scale, int row, int col);
MV32 (*scale_mv_q3_to_q4)(const MV *mv, const struct scale_factors *scale);
MV32 (*scale_mv_q4)(const MV *mv, const struct scale_factors *scale);
int_mv32 (*scale_mv_q3_to_q4)(const int_mv *src_mv,
const struct scale_factors *scale);
int32_t (*scale_mv_component_q4)(int mv_q4, int scale_fp, int offset_q4);
convolve_fn_t predict[2][2][2]; // horiz, vert, avg
};
@@ -307,53 +285,71 @@ struct macroblockd_plane {
#define BLOCK_OFFSET(x, i, n) ((x) + (i) * (n))
#define MAX_REF_LF_DELTAS 4
#define MAX_MODE_LF_DELTAS 2
struct loopfilter {
int filter_level;
int sharpness_level;
int last_sharpness_level;
uint8_t mode_ref_delta_enabled;
uint8_t mode_ref_delta_update;
// 0 = Intra, Last, GF, ARF
signed char ref_deltas[MAX_REF_LF_DELTAS];
signed char last_ref_deltas[MAX_REF_LF_DELTAS];
// 0 = ZERO_MV, MV
signed char mode_deltas[MAX_MODE_LF_DELTAS];
signed char last_mode_deltas[MAX_MODE_LF_DELTAS];
};
typedef struct macroblockd {
struct macroblockd_plane plane[MAX_MB_PLANE];
struct scale_factors scale_factor[2];
struct scale_factors scale_factor_uv[2];
MODE_INFO *prev_mode_info_context;
MODE_INFO *mode_info_context;
int mode_info_stride;
FRAME_TYPE frame_type;
int up_available;
int left_available;
int right_available;
struct segmentation seg;
struct loopfilter lf;
// partition contexts
PARTITION_CONTEXT *above_seg_context;
PARTITION_CONTEXT *left_seg_context;
/* 0 (disable) 1 (enable) segmentation */
unsigned char segmentation_enabled;
/* 0 (do not update) 1 (update) the macroblock segmentation map. */
unsigned char update_mb_segmentation_map;
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
unsigned char update_mb_segmentation_data;
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
unsigned char mb_segment_abs_delta;
/* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
/* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
// Probability Tree used to code Segment number
vp9_prob mb_segment_tree_probs[MB_SEG_TREE_PROBS];
// Segment features
int16_t segment_feature_data[MAX_MB_SEGMENTS][SEG_LVL_MAX];
unsigned int segment_feature_mask[MAX_MB_SEGMENTS];
/* mode_based Loop filter adjustment */
unsigned char mode_ref_lf_delta_enabled;
unsigned char mode_ref_lf_delta_update;
/* Delta values have the range +/- MAX_LOOP_FILTER */
/* 0 = Intra, Last, GF, ARF */
signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS];
/* 0 = Intra, Last, GF, ARF */
signed char ref_lf_deltas[MAX_REF_LF_DELTAS];
/* 0 = ZERO_MV, MV */
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
/* 0 = ZERO_MV, MV */
signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];
/* Distance of MB away from frame edges */
int mb_to_left_edge;
int mb_to_right_edge;
int mb_to_top_edge;
int mb_to_bottom_edge;
unsigned int frames_since_golden;
unsigned int frames_till_alt_ref_frame;
int lossless;
/* Inverse transform function pointers. */
void (*inv_txm4x4_1_add)(int16_t *input, uint8_t *dest, int stride);
@@ -366,33 +362,32 @@ typedef struct macroblockd {
int corrupted;
unsigned char sb_index; // index of 32x32 block inside the 64x64 block
unsigned char mb_index; // index of 16x16 block inside the 32x32 block
unsigned char b_index; // index of 8x8 block inside the 16x16 block
unsigned char ab_index; // index of 4x4 block inside the 8x8 block
int sb_index; // index of 32x32 block inside the 64x64 block
int mb_index; // index of 16x16 block inside the 32x32 block
int b_index; // index of 8x8 block inside the 16x16 block
int ab_index; // index of 4x4 block inside the 8x8 block
int q_index;
} MACROBLOCKD;
static INLINE unsigned char *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) {
static int *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) {
switch (subsize) {
case BLOCK_64X64:
case BLOCK_64X32:
case BLOCK_32X64:
case BLOCK_32X32:
case BLOCK_SIZE_SB64X64:
case BLOCK_SIZE_SB64X32:
case BLOCK_SIZE_SB32X64:
case BLOCK_SIZE_SB32X32:
return &xd->sb_index;
case BLOCK_32X16:
case BLOCK_16X32:
case BLOCK_16X16:
case BLOCK_SIZE_SB32X16:
case BLOCK_SIZE_SB16X32:
case BLOCK_SIZE_MB16X16:
return &xd->mb_index;
case BLOCK_16X8:
case BLOCK_8X16:
case BLOCK_8X8:
case BLOCK_SIZE_SB16X8:
case BLOCK_SIZE_SB8X16:
case BLOCK_SIZE_SB8X8:
return &xd->b_index;
case BLOCK_8X4:
case BLOCK_4X8:
case BLOCK_4X4:
case BLOCK_SIZE_SB8X4:
case BLOCK_SIZE_SB4X8:
case BLOCK_SIZE_AB4X4:
return &xd->ab_index;
default:
assert(0);
@@ -403,28 +398,45 @@ static INLINE unsigned char *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsi
static INLINE void update_partition_context(MACROBLOCKD *xd,
BLOCK_SIZE_TYPE sb_type,
BLOCK_SIZE_TYPE sb_size) {
const int bsl = b_width_log2(sb_size), bs = (1 << bsl) / 2;
const int bwl = b_width_log2(sb_type);
const int bhl = b_height_log2(sb_type);
const int boffset = b_width_log2(BLOCK_64X64) - bsl;
const char pcval0 = ~(0xe << boffset);
const char pcval1 = ~(0xf << boffset);
const char pcvalue[2] = {pcval0, pcval1};
assert(MAX(bwl, bhl) <= bsl);
int bsl = b_width_log2(sb_size), bs = (1 << bsl) / 2;
int bwl = b_width_log2(sb_type);
int bhl = b_height_log2(sb_type);
int boffset = b_width_log2(BLOCK_SIZE_SB64X64) - bsl;
int i;
// update the partition context at the end notes. set partition bits
// of block sizes larger than the current one to be one, and partition
// bits of smaller block sizes to be zero.
vpx_memset(xd->above_seg_context, pcvalue[bwl == bsl], bs);
vpx_memset(xd->left_seg_context, pcvalue[bhl == bsl], bs);
if ((bwl == bsl) && (bhl == bsl)) {
for (i = 0; i < bs; i++)
xd->left_seg_context[i] = ~(0xf << boffset);
for (i = 0; i < bs; i++)
xd->above_seg_context[i] = ~(0xf << boffset);
} else if ((bwl == bsl) && (bhl < bsl)) {
for (i = 0; i < bs; i++)
xd->left_seg_context[i] = ~(0xe << boffset);
for (i = 0; i < bs; i++)
xd->above_seg_context[i] = ~(0xf << boffset);
} else if ((bwl < bsl) && (bhl == bsl)) {
for (i = 0; i < bs; i++)
xd->left_seg_context[i] = ~(0xf << boffset);
for (i = 0; i < bs; i++)
xd->above_seg_context[i] = ~(0xe << boffset);
} else if ((bwl < bsl) && (bhl < bsl)) {
for (i = 0; i < bs; i++)
xd->left_seg_context[i] = ~(0xe << boffset);
for (i = 0; i < bs; i++)
xd->above_seg_context[i] = ~(0xe << boffset);
} else {
assert(0);
}
}
static INLINE int partition_plane_context(MACROBLOCKD *xd,
BLOCK_SIZE_TYPE sb_type) {
int bsl = mi_width_log2(sb_type), bs = 1 << bsl;
int above = 0, left = 0, i;
int boffset = mi_width_log2(BLOCK_64X64) - bsl;
int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl;
assert(mi_width_log2(sb_type) == mi_height_log2(sb_type));
assert(bsl >= 0);
@@ -443,57 +455,134 @@ static INLINE int partition_plane_context(MACROBLOCKD *xd,
static BLOCK_SIZE_TYPE get_subsize(BLOCK_SIZE_TYPE bsize,
PARTITION_TYPE partition) {
BLOCK_SIZE_TYPE subsize = subsize_lookup[partition][bsize];
assert(subsize != BLOCK_SIZE_TYPES);
BLOCK_SIZE_TYPE subsize;
switch (partition) {
case PARTITION_NONE:
subsize = bsize;
break;
case PARTITION_HORZ:
if (bsize == BLOCK_SIZE_SB64X64)
subsize = BLOCK_SIZE_SB64X32;
else if (bsize == BLOCK_SIZE_SB32X32)
subsize = BLOCK_SIZE_SB32X16;
else if (bsize == BLOCK_SIZE_MB16X16)
subsize = BLOCK_SIZE_SB16X8;
else if (bsize == BLOCK_SIZE_SB8X8)
subsize = BLOCK_SIZE_SB8X4;
else
assert(0);
break;
case PARTITION_VERT:
if (bsize == BLOCK_SIZE_SB64X64)
subsize = BLOCK_SIZE_SB32X64;
else if (bsize == BLOCK_SIZE_SB32X32)
subsize = BLOCK_SIZE_SB16X32;
else if (bsize == BLOCK_SIZE_MB16X16)
subsize = BLOCK_SIZE_SB8X16;
else if (bsize == BLOCK_SIZE_SB8X8)
subsize = BLOCK_SIZE_SB4X8;
else
assert(0);
break;
case PARTITION_SPLIT:
if (bsize == BLOCK_SIZE_SB64X64)
subsize = BLOCK_SIZE_SB32X32;
else if (bsize == BLOCK_SIZE_SB32X32)
subsize = BLOCK_SIZE_MB16X16;
else if (bsize == BLOCK_SIZE_MB16X16)
subsize = BLOCK_SIZE_SB8X8;
else if (bsize == BLOCK_SIZE_SB8X8)
subsize = BLOCK_SIZE_AB4X4;
else
assert(0);
break;
default:
assert(0);
}
return subsize;
}
extern const TX_TYPE mode2txfm_map[MB_MODE_COUNT];
// transform mapping
static TX_TYPE txfm_map(MB_PREDICTION_MODE bmode) {
switch (bmode) {
case TM_PRED :
case D135_PRED :
return ADST_ADST;
static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type,
const MACROBLOCKD *xd, int ib) {
const MODE_INFO *const mi = xd->mode_info_context;
const MB_MODE_INFO *const mbmi = &mi->mbmi;
case V_PRED :
case D117_PRED :
case D63_PRED:
return ADST_DCT;
if (plane_type != PLANE_TYPE_Y_WITH_DC ||
xd->lossless ||
is_inter_block(mbmi))
return DCT_DCT;
case H_PRED :
case D153_PRED :
case D27_PRED :
return DCT_ADST;
return mode2txfm_map[mbmi->sb_type < BLOCK_8X8 ?
mi->bmi[ib].as_mode : mbmi->mode];
}
static INLINE TX_TYPE get_tx_type_8x8(PLANE_TYPE plane_type,
const MACROBLOCKD *xd) {
return plane_type == PLANE_TYPE_Y_WITH_DC ?
mode2txfm_map[xd->mode_info_context->mbmi.mode] : DCT_DCT;
}
static INLINE TX_TYPE get_tx_type_16x16(PLANE_TYPE plane_type,
const MACROBLOCKD *xd) {
return plane_type == PLANE_TYPE_Y_WITH_DC ?
mode2txfm_map[xd->mode_info_context->mbmi.mode] : DCT_DCT;
}
static void setup_block_dptrs(MACROBLOCKD *xd, int ss_x, int ss_y) {
int i;
for (i = 0; i < MAX_MB_PLANE; i++) {
xd->plane[i].plane_type = i ? PLANE_TYPE_UV : PLANE_TYPE_Y_WITH_DC;
xd->plane[i].subsampling_x = i ? ss_x : 0;
xd->plane[i].subsampling_y = i ? ss_y : 0;
default:
return DCT_DCT;
}
#if CONFIG_ALPHA
// TODO(jkoleszar): Using the Y w/h for now
xd->plane[3].subsampling_x = 0;
xd->plane[3].subsampling_y = 0;
#endif
}
static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) {
TX_TYPE tx_type;
MODE_INFO *mi = xd->mode_info_context;
MB_MODE_INFO *const mbmi = &mi->mbmi;
if (xd->lossless || mbmi->ref_frame[0] != INTRA_FRAME)
return DCT_DCT;
if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
tx_type = txfm_map(mi->bmi[ib].as_mode.first);
} else {
assert(mbmi->mode <= TM_PRED);
tx_type = txfm_map(mbmi->mode);
}
return tx_type;
}
static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) {
return MIN(mbmi->txfm_size, max_uv_txsize_lookup[mbmi->sb_type]);
static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) {
TX_TYPE tx_type = DCT_DCT;
if (xd->mode_info_context->mbmi.mode <= TM_PRED) {
tx_type = txfm_map(xd->mode_info_context->mbmi.mode);
}
return tx_type;
}
static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, int ib) {
TX_TYPE tx_type = DCT_DCT;
if (xd->mode_info_context->mbmi.mode <= TM_PRED) {
tx_type = txfm_map(xd->mode_info_context->mbmi.mode);
}
return tx_type;
}
void vp9_setup_block_dptrs(MACROBLOCKD *xd,
int subsampling_x, int subsampling_y);
static TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) {
const TX_SIZE size = mbmi->txfm_size;
switch (mbmi->sb_type) {
case BLOCK_SIZE_SB64X64:
return size;
case BLOCK_SIZE_SB64X32:
case BLOCK_SIZE_SB32X64:
case BLOCK_SIZE_SB32X32:
if (size == TX_32X32)
return TX_16X16;
else
return size;
case BLOCK_SIZE_SB32X16:
case BLOCK_SIZE_SB16X32:
case BLOCK_SIZE_MB16X16:
if (size == TX_16X16)
return TX_8X8;
else
return size;
default:
return TX_4X4;
}
return size;
}
struct plane_block_idx {
@@ -532,16 +621,6 @@ static INLINE int plane_block_height(BLOCK_SIZE_TYPE bsize,
return 4 << (b_height_log2(bsize) - plane->subsampling_y);
}
static INLINE int plane_block_width_log2by4(
BLOCK_SIZE_TYPE bsize, const struct macroblockd_plane* plane) {
return (b_width_log2(bsize) - plane->subsampling_x);
}
static INLINE int plane_block_height_log2by4(
BLOCK_SIZE_TYPE bsize, const struct macroblockd_plane* plane) {
return (b_height_log2(bsize) - plane->subsampling_y);
}
typedef void (*foreach_transformed_block_visitor)(int plane, int block,
BLOCK_SIZE_TYPE bsize,
int ss_txfrm_size,
@@ -587,16 +666,16 @@ static INLINE void foreach_transformed_block_in_plane(
// it to 4x4 block sizes.
if (xd->mb_to_right_edge < 0)
max_blocks_wide +=
(xd->mb_to_right_edge >> (5 + xd->plane[plane].subsampling_x));
+ (xd->mb_to_right_edge >> (5 + xd->plane[plane].subsampling_x));
if (xd->mb_to_bottom_edge < 0)
max_blocks_high +=
(xd->mb_to_bottom_edge >> (5 + xd->plane[plane].subsampling_y));
+ (xd->mb_to_bottom_edge >> (5 + xd->plane[plane].subsampling_y));
i = 0;
// Unlike the normal case - in here we have to keep track of the
// row and column of the blocks we use so that we know if we are in
// the unrestricted motion border.
// the unrestricted motion border..
for (r = 0; r < (1 << sh); r += (1 << tx_size)) {
for (c = 0; c < (1 << sw); c += (1 << tx_size)) {
if (r < max_blocks_high && c < max_blocks_wide)
@@ -654,8 +733,8 @@ static INLINE void foreach_predicted_block_in_plane(
// size of the predictor to use.
int pred_w, pred_h;
if (xd->mode_info_context->mbmi.sb_type < BLOCK_8X8) {
assert(bsize == BLOCK_8X8);
if (xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8) {
assert(bsize == BLOCK_SIZE_SB8X8);
pred_w = 0;
pred_h = 0;
} else {
@@ -718,11 +797,11 @@ static int txfrm_block_to_raster_block(MACROBLOCKD *xd,
int ss_txfrm_size) {
const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
const int txwl = ss_txfrm_size / 2;
const int tx_cols_log2 = bwl - txwl;
const int tx_cols = 1 << tx_cols_log2;
const int tx_cols_lg2 = bwl - txwl;
const int tx_cols = 1 << tx_cols_lg2;
const int raster_mb = block >> ss_txfrm_size;
const int x = (raster_mb & (tx_cols - 1)) << (txwl);
const int y = raster_mb >> tx_cols_log2 << (txwl);
const int y = raster_mb >> tx_cols_lg2 << (txwl);
return x + (y << bwl);
}
@@ -733,50 +812,13 @@ static void txfrm_block_to_raster_xy(MACROBLOCKD *xd,
int *x, int *y) {
const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
const int txwl = ss_txfrm_size / 2;
const int tx_cols_log2 = bwl - txwl;
const int tx_cols = 1 << tx_cols_log2;
const int tx_cols_lg2 = bwl - txwl;
const int tx_cols = 1 << tx_cols_lg2;
const int raster_mb = block >> ss_txfrm_size;
*x = (raster_mb & (tx_cols - 1)) << (txwl);
*y = raster_mb >> tx_cols_log2 << (txwl);
*y = raster_mb >> tx_cols_lg2 << (txwl);
}
#if CONFIG_INTERINTRA
static void extend_for_interintra(MACROBLOCKD* const xd,
BLOCK_SIZE_TYPE bsize) {
int bh = 4 << b_height_log2(bsize), bw = 4 << b_width_log2(bsize);
int ystride = xd->plane[0].dst.stride, uvstride = xd->plane[1].dst.stride;
uint8_t *pixel_y, *pixel_u, *pixel_v;
int ymargin, uvmargin;
if (xd->mb_to_bottom_edge < 0) {
int r;
ymargin = 0 - xd->mb_to_bottom_edge / 8;
uvmargin = 0 - xd->mb_to_bottom_edge / 16;
pixel_y = xd->plane[0].dst.buf - 1 + (bh - ymargin -1) * ystride;
pixel_u = xd->plane[1].dst.buf - 1 + (bh / 2 - uvmargin - 1) * uvstride;
pixel_v = xd->plane[2].dst.buf - 1 + (bh / 2 - uvmargin - 1) * uvstride;
for (r = 0; r < ymargin; r++)
xd->plane[0].dst.buf[-1 + (bh - r -1) * ystride] = *pixel_y;
for (r = 0; r < uvmargin; r++) {
xd->plane[1].dst.buf[-1 + (bh / 2 - r -1) * uvstride] = *pixel_u;
xd->plane[2].dst.buf[-1 + (bh / 2 - r -1) * uvstride] = *pixel_v;
}
}
if (xd->mb_to_right_edge < 0) {
ymargin = 0 - xd->mb_to_right_edge / 8;
uvmargin = 0 - xd->mb_to_right_edge / 16;
pixel_y = xd->plane[0].dst.buf + bw - ymargin - 1 - ystride;
pixel_u = xd->plane[1].dst.buf + bw / 2 - uvmargin - 1 - uvstride;
pixel_v = xd->plane[2].dst.buf + bw / 2 - uvmargin - 1 - uvstride;
vpx_memset(xd->plane[0].dst.buf + bw - ymargin - ystride,
*pixel_y, ymargin);
vpx_memset(xd->plane[1].dst.buf + bw / 2 - uvmargin - uvstride,
*pixel_u, uvmargin);
vpx_memset(xd->plane[2].dst.buf + bw / 2 - uvmargin - uvstride,
*pixel_v, uvmargin);
}
}
#endif
static void extend_for_intra(MACROBLOCKD* const xd, int plane, int block,
BLOCK_SIZE_TYPE bsize, int ss_txfrm_size) {
const int bw = plane_block_width(bsize, &xd->plane[plane]);
@@ -817,39 +859,46 @@ static void extend_for_intra(MACROBLOCKD* const xd, int plane, int block,
}
}
static void set_contexts_on_border(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
int plane, int tx_size_in_blocks,
int eob, int aoff, int loff,
ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) {
struct macroblockd_plane *pd = &xd->plane[plane];
int plane, int ss_tx_size, int eob, int aoff,
int loff, ENTROPY_CONTEXT *A,
ENTROPY_CONTEXT *L) {
const int bw = b_width_log2(bsize), bh = b_height_log2(bsize);
const int sw = bw - xd->plane[plane].subsampling_x;
const int sh = bh - xd->plane[plane].subsampling_y;
int mi_blocks_wide = 1 << sw;
int mi_blocks_high = 1 << sh;
int tx_size_in_blocks = (1 << ss_tx_size);
int above_contexts = tx_size_in_blocks;
int left_contexts = tx_size_in_blocks;
int mi_blocks_wide = 1 << plane_block_width_log2by4(bsize, pd);
int mi_blocks_high = 1 << plane_block_height_log2by4(bsize, pd);
int pt;
// xd->mb_to_right_edge is in units of pixels * 8. This converts
// it to 4x4 block sizes.
if (xd->mb_to_right_edge < 0)
mi_blocks_wide += (xd->mb_to_right_edge >> (5 + pd->subsampling_x));
if (xd->mb_to_right_edge < 0) {
mi_blocks_wide += (xd->mb_to_right_edge
>> (5 + xd->plane[plane].subsampling_x));
}
// this code attempts to avoid copying into contexts that are outside
// our border. Any blocks that do are set to 0...
if (above_contexts + aoff > mi_blocks_wide)
above_contexts = mi_blocks_wide - aoff;
if (xd->mb_to_bottom_edge < 0)
mi_blocks_high += (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
if (left_contexts + loff > mi_blocks_high)
if (xd->mb_to_bottom_edge < 0) {
mi_blocks_high += (xd->mb_to_bottom_edge
>> (5 + xd->plane[plane].subsampling_y));
}
if (left_contexts + loff > mi_blocks_high) {
left_contexts = mi_blocks_high - loff;
}
for (pt = 0; pt < above_contexts; pt++)
A[pt] = eob > 0;
for (pt = above_contexts; pt < tx_size_in_blocks; pt++)
for (pt = above_contexts; pt < (1 << ss_tx_size); pt++)
A[pt] = 0;
for (pt = 0; pt < left_contexts; pt++)
L[pt] = eob > 0;
for (pt = left_contexts; pt < tx_size_in_blocks; pt++)
for (pt = left_contexts; pt < (1 << ss_tx_size); pt++)
L[pt] = 0;
}

View File

@@ -22,11 +22,12 @@
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#define ROUND_POWER_OF_TWO(value, n) \
(((value) + (1 << ((n) - 1))) >> (n))
#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n))
#define ALIGN_POWER_OF_TWO(value, n) \
(((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
/* If we don't want to use ROUND_POWER_OF_TWO macro
static INLINE int16_t round_power_of_two(int16_t value, int n) {
return (value + (1 << (n - 1))) >> n;
}*/
// Only need this for fixed-size arrays, for structs just assign.
#define vp9_copy(dest, src) { \
@@ -55,35 +56,10 @@ static INLINE double fclamp(double value, double low, double high) {
return value < low ? low : (value > high ? high : value);
}
static int get_unsigned_bits(unsigned int num_values) {
int cat = 0;
if (num_values <= 1)
return 0;
num_values--;
while (num_values > 0) {
cat++;
num_values >>= 1;
}
return cat;
static INLINE int multiple8(int value) {
return (value + 7) & ~7;
}
#if CONFIG_DEBUG
#define CHECK_MEM_ERROR(cm, lval, expr) do { \
lval = (expr); \
if (!lval) \
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \
"Failed to allocate "#lval" at %s:%d", \
__FILE__, __LINE__); \
} while (0)
#else
#define CHECK_MEM_ERROR(cm, lval, expr) do { \
lval = (expr); \
if (!lval) \
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \
"Failed to allocate "#lval); \
} while (0)
#endif
#define SYNC_CODE_0 0x49
#define SYNC_CODE_1 0x83
#define SYNC_CODE_2 0x42

View File

@@ -1,124 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_common_data.h"
// Log 2 conversion lookup tables for block width and height
const int b_width_log2_lookup[BLOCK_SIZE_TYPES] =
{0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4};
const int b_height_log2_lookup[BLOCK_SIZE_TYPES] =
{0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4};
const int num_4x4_blocks_wide_lookup[BLOCK_SIZE_TYPES] =
{1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16};
const int num_4x4_blocks_high_lookup[BLOCK_SIZE_TYPES] =
{1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16};
// Log 2 conversion lookup tables for modeinfo width and height
const int mi_width_log2_lookup[BLOCK_SIZE_TYPES] =
{0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3};
const int num_8x8_blocks_wide_lookup[BLOCK_SIZE_TYPES] =
{1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8};
const int mi_height_log2_lookup[BLOCK_SIZE_TYPES] =
{0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3};
const int num_8x8_blocks_high_lookup[BLOCK_SIZE_TYPES] =
{1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8};
// MIN(3, MIN(b_width_log2(bsize), b_height_log2(bsize)))
const int size_group_lookup[BLOCK_SIZE_TYPES] =
{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3};
const int num_pels_log2_lookup[BLOCK_SIZE_TYPES] =
{4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12};
const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES] = {
{ // 4X4
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID
}, { // 8X8
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID
}, { // 16X16
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID
}, { // 32X32
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT,
PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID
}, { // 64X64
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ,
PARTITION_NONE
}
};
const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES] = {
{ // PARTITION_NONE
BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
BLOCK_8X8, BLOCK_8X16, BLOCK_16X8,
BLOCK_16X16, BLOCK_16X32, BLOCK_32X16,
BLOCK_32X32, BLOCK_32X64, BLOCK_64X32,
BLOCK_64X64,
}, { // PARTITION_HORZ
BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_8X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_16X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_32X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_64X32,
}, { // PARTITION_VERT
BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_4X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_8X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_16X32, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_32X64,
}, { // PARTITION_SPLIT
BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_4X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_8X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_16X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_32X32,
}
};
const TX_SIZE max_txsize_lookup[BLOCK_SIZE_TYPES] = {
TX_4X4, TX_4X4, TX_4X4,
TX_8X8, TX_8X8, TX_8X8,
TX_16X16, TX_16X16, TX_16X16,
TX_32X32, TX_32X32, TX_32X32, TX_32X32
};
const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES] = {
TX_4X4, TX_4X4, TX_4X4,
TX_4X4, TX_4X4, TX_4X4,
TX_8X8, TX_8X8, TX_8X8,
TX_16X16, TX_16X16, TX_16X16, TX_32X32
};
const BLOCK_SIZE_TYPE bsize_from_dim_lookup[5][5] = {
{ BLOCK_4X4, BLOCK_4X8, BLOCK_4X8, BLOCK_4X8, BLOCK_4X8 },
{ BLOCK_8X4, BLOCK_8X8, BLOCK_8X16, BLOCK_8X16, BLOCK_8X16 },
{ BLOCK_16X8, BLOCK_16X8, BLOCK_16X16, BLOCK_16X32, BLOCK_16X32 },
{ BLOCK_32X16, BLOCK_32X16, BLOCK_32X16, BLOCK_32X32, BLOCK_32X64 },
{ BLOCK_64X32, BLOCK_64X32, BLOCK_64X32, BLOCK_64X32, BLOCK_64X64 }
};

View File

@@ -1,32 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_COMMON_VP9_COMMON_DATA_H_
#define VP9_COMMON_VP9_COMMON_DATA_H_
#include "vp9/common/vp9_enums.h"
extern const int b_width_log2_lookup[BLOCK_SIZE_TYPES];
extern const int b_height_log2_lookup[BLOCK_SIZE_TYPES];
extern const int mi_width_log2_lookup[BLOCK_SIZE_TYPES];
extern const int mi_height_log2_lookup[BLOCK_SIZE_TYPES];
extern const int num_8x8_blocks_wide_lookup[BLOCK_SIZE_TYPES];
extern const int num_8x8_blocks_high_lookup[BLOCK_SIZE_TYPES];
extern const int num_4x4_blocks_high_lookup[BLOCK_SIZE_TYPES];
extern const int num_4x4_blocks_wide_lookup[BLOCK_SIZE_TYPES];
extern const int size_group_lookup[BLOCK_SIZE_TYPES];
extern const int num_pels_log2_lookup[BLOCK_SIZE_TYPES];
extern const PARTITION_TYPE partition_lookup[][BLOCK_SIZE_TYPES];
extern const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES];
extern const TX_SIZE max_txsize_lookup[BLOCK_SIZE_TYPES];
extern const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES];
extern const BLOCK_SIZE_TYPE bsize_from_dim_lookup[5][5];
#endif // VP9_COMMON_VP9_COMMON_DATA_H

View File

@@ -38,8 +38,8 @@
*/
#define ALIGN_FILTERS_256 1
static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
static void convolve_horiz_c(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x0, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h, int taps) {
@@ -80,8 +80,8 @@ static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
}
}
static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
static void convolve_avg_horiz_c(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x0, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h, int taps) {
@@ -122,8 +122,8 @@ static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
}
}
static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
static void convolve_vert_c(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y0, int y_step_q4,
int w, int h, int taps) {
@@ -164,8 +164,8 @@ static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,
}
}
static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
static void convolve_avg_vert_c(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y0, int y_step_q4,
int w, int h, int taps) {
@@ -207,8 +207,8 @@ static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
}
}
static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
static void convolve_c(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h, int taps) {
@@ -217,13 +217,12 @@ static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,
* h == 64, taps == 8.
*/
uint8_t temp[64 * 135];
int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1;
int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1;
assert(w <= 64);
assert(h <= 64);
assert(taps <= 8);
assert(y_step_q4 <= 32);
assert(x_step_q4 <= 32);
if (intermediate_height < h)
intermediate_height = h;
@@ -237,8 +236,8 @@ static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,
w, h, taps);
}
static void convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
static void convolve_avg_c(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h, int taps) {
@@ -247,13 +246,12 @@ static void convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
* h == 64, taps == 8.
*/
uint8_t temp[64 * 135];
int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1;
int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1;
assert(w <= 64);
assert(h <= 64);
assert(taps <= 8);
assert(y_step_q4 <= 32);
assert(x_step_q4 <= 32);
if (intermediate_height < h)
intermediate_height = h;
@@ -267,8 +265,8 @@ static void convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
w, h, taps);
}
void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
void vp9_convolve8_horiz_c(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
@@ -277,8 +275,8 @@ void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
w, h, 8);
}
void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
void vp9_convolve8_avg_horiz_c(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
@@ -287,8 +285,8 @@ void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
w, h, 8);
}
void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
void vp9_convolve8_vert_c(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
@@ -297,8 +295,8 @@ void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
w, h, 8);
}
void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
void vp9_convolve8_avg_vert_c(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
@@ -307,8 +305,8 @@ void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
w, h, 8);
}
void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
void vp9_convolve8_c(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
@@ -317,8 +315,8 @@ void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
w, h, 8);
}
void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
void vp9_convolve8_avg_c(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
@@ -339,25 +337,33 @@ void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
w, h);
}
void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int filter_x_stride,
const int16_t *filter_y, int filter_y_stride,
int w, int h) {
int r;
void vp9_convolve_copy(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int filter_x_stride,
const int16_t *filter_y, int filter_y_stride,
int w, int h) {
if (w == 16 && h == 16) {
vp9_copy_mem16x16(src, src_stride, dst, dst_stride);
} else if (w == 8 && h == 8) {
vp9_copy_mem8x8(src, src_stride, dst, dst_stride);
} else if (w == 8 && h == 4) {
vp9_copy_mem8x4(src, src_stride, dst, dst_stride);
} else {
int r;
for (r = h; r > 0; --r) {
memcpy(dst, src, w);
src += src_stride;
dst += dst_stride;
for (r = h; r > 0; --r) {
memcpy(dst, src, w);
src += src_stride;
dst += dst_stride;
}
}
}
void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int filter_x_stride,
const int16_t *filter_y, int filter_y_stride,
int w, int h) {
void vp9_convolve_avg(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int filter_x_stride,
const int16_t *filter_y, int filter_y_stride,
int w, int h) {
int x, y;
for (y = 0; y < h; ++y) {

View File

@@ -13,12 +13,26 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
typedef void (*convolve_fn_t)(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h);
// Not a convolution, a block copy conforming to the convolution prototype
void vp9_convolve_copy(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h);
// Not a convolution, a block average conforming to the convolution prototype
void vp9_convolve_avg(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h);
struct subpix_fn_table {
const int16_t (*filter_x)[8];
const int16_t (*filter_y)[8];

View File

@@ -8,7 +8,695 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*Generated file, included by vp9_entropy.c*/
#if CONFIG_BALANCED_COEFTREE
static const vp9_coeff_probs_model default_coef_probs_4x4[BLOCK_TYPES] = {
{ /* block Type 0 */
{ /* Intra */
{ /* Coeff Band 0 */
{ 6, 213, 178 },
{ 26, 113, 132 },
{ 34, 17, 68 }
}, { /* Coeff Band 1 */
{ 66, 96, 178 },
{ 63, 96, 174 },
{ 67, 54, 154 },
{ 62, 28, 126 },
{ 48, 9, 84 },
{ 20, 1, 32 }
}, { /* Coeff Band 2 */
{ 64, 144, 206 },
{ 70, 99, 191 },
{ 69, 36, 152 },
{ 55, 9, 106 },
{ 35, 1, 60 },
{ 14, 1, 22 }
}, { /* Coeff Band 3 */
{ 82, 154, 222 },
{ 83, 112, 205 },
{ 81, 31, 164 },
{ 62, 7, 118 },
{ 42, 1, 74 },
{ 18, 1, 30 }
}, { /* Coeff Band 4 */
{ 52, 179, 233 },
{ 64, 132, 214 },
{ 73, 36, 170 },
{ 59, 8, 116 },
{ 38, 1, 65 },
{ 15, 1, 26 }
}, { /* Coeff Band 5 */
{ 29, 175, 238 },
{ 26, 169, 223 },
{ 41, 80, 182 },
{ 39, 32, 127 },
{ 26, 10, 69 },
{ 11, 2, 28 }
}
}, { /* Inter */
{ /* Coeff Band 0 */
{ 21, 226, 234 },
{ 52, 182, 212 },
{ 80, 112, 177 }
}, { /* Coeff Band 1 */
{ 111, 164, 243 },
{ 88, 152, 231 },
{ 90, 43, 186 },
{ 70, 12, 132 },
{ 44, 2, 76 },
{ 19, 1, 33 }
}, { /* Coeff Band 2 */
{ 96, 185, 246 },
{ 99, 127, 231 },
{ 88, 21, 177 },
{ 64, 5, 122 },
{ 38, 1, 69 },
{ 18, 1, 30 }
}, { /* Coeff Band 3 */
{ 84, 206, 249 },
{ 94, 147, 237 },
{ 95, 33, 187 },
{ 71, 8, 131 },
{ 47, 1, 83 },
{ 26, 1, 44 }
}, { /* Coeff Band 4 */
{ 38, 221, 252 },
{ 58, 177, 241 },
{ 78, 46, 188 },
{ 59, 9, 122 },
{ 34, 1, 66 },
{ 18, 1, 34 }
}, { /* Coeff Band 5 */
{ 21, 216, 253 },
{ 21, 206, 244 },
{ 42, 93, 200 },
{ 43, 41, 146 },
{ 36, 13, 93 },
{ 31, 1, 55 }
}
}
}, { /* block Type 1 */
{ /* Intra */
{ /* Coeff Band 0 */
{ 7, 213, 219 },
{ 23, 139, 182 },
{ 38, 60, 125 }
}, { /* Coeff Band 1 */
{ 69, 156, 220 },
{ 52, 178, 213 },
{ 69, 111, 190 },
{ 69, 58, 155 },
{ 58, 21, 104 },
{ 39, 7, 60 }
}, { /* Coeff Band 2 */
{ 68, 189, 228 },
{ 70, 158, 221 },
{ 83, 64, 189 },
{ 73, 18, 141 },
{ 48, 4, 88 },
{ 23, 1, 41 }
}, { /* Coeff Band 3 */
{ 99, 194, 236 },
{ 91, 138, 224 },
{ 91, 53, 189 },
{ 74, 20, 142 },
{ 48, 6, 90 },
{ 22, 1, 41 }
}, { /* Coeff Band 4 */
{ 52, 203, 244 },
{ 60, 168, 231 },
{ 75, 62, 189 },
{ 61, 18, 132 },
{ 38, 4, 72 },
{ 17, 1, 39 }
}, { /* Coeff Band 5 */
{ 33, 192, 247 },
{ 31, 185, 234 },
{ 46, 85, 185 },
{ 39, 35, 132 },
{ 28, 15, 80 },
{ 13, 5, 38 }
}
}, { /* Inter */
{ /* Coeff Band 0 */
{ 5, 247, 246 },
{ 28, 209, 228 },
{ 65, 137, 203 }
}, { /* Coeff Band 1 */
{ 69, 208, 250 },
{ 54, 207, 242 },
{ 81, 92, 204 },
{ 70, 54, 153 },
{ 58, 40, 108 },
{ 58, 35, 71 }
}, { /* Coeff Band 2 */
{ 65, 215, 250 },
{ 72, 185, 239 },
{ 92, 50, 197 },
{ 75, 14, 147 },
{ 49, 2, 99 },
{ 26, 1, 53 }
}, { /* Coeff Band 3 */
{ 70, 220, 251 },
{ 76, 186, 241 },
{ 90, 65, 198 },
{ 75, 26, 151 },
{ 58, 12, 112 },
{ 34, 6, 49 }
}, { /* Coeff Band 4 */
{ 34, 224, 253 },
{ 44, 204, 245 },
{ 69, 85, 204 },
{ 64, 31, 150 },
{ 44, 2, 78 },
{ 1, 1, 128 }
}, { /* Coeff Band 5 */
{ 25, 216, 253 },
{ 21, 215, 248 },
{ 47, 108, 214 },
{ 47, 48, 160 },
{ 26, 20, 90 },
{ 64, 171, 128 }
}
}
}
};
static const vp9_coeff_probs_model default_coef_probs_8x8[BLOCK_TYPES] = {
{ /* block Type 0 */
{ /* Intra */
{ /* Coeff Band 0 */
{ 9, 203, 199 },
{ 26, 92, 128 },
{ 28, 11, 55 }
}, { /* Coeff Band 1 */
{ 99, 54, 160 },
{ 78, 99, 155 },
{ 80, 44, 138 },
{ 71, 17, 115 },
{ 51, 5, 80 },
{ 27, 1, 40 }
}, { /* Coeff Band 2 */
{ 135, 81, 190 },
{ 113, 61, 182 },
{ 93, 16, 153 },
{ 70, 4, 115 },
{ 41, 1, 68 },
{ 16, 1, 27 }
}, { /* Coeff Band 3 */
{ 155, 103, 214 },
{ 129, 48, 199 },
{ 95, 10, 159 },
{ 63, 1, 110 },
{ 32, 1, 58 },
{ 12, 1, 21 }
}, { /* Coeff Band 4 */
{ 163, 149, 231 },
{ 137, 69, 213 },
{ 95, 11, 164 },
{ 62, 3, 108 },
{ 32, 1, 57 },
{ 13, 1, 22 }
}, { /* Coeff Band 5 */
{ 136, 189, 239 },
{ 123, 102, 223 },
{ 97, 19, 170 },
{ 66, 4, 111 },
{ 38, 1, 60 },
{ 18, 1, 26 }
}
}, { /* Inter */
{ /* Coeff Band 0 */
{ 24, 226, 244 },
{ 54, 178, 211 },
{ 80, 74, 152 }
}, { /* Coeff Band 1 */
{ 145, 153, 236 },
{ 101, 163, 223 },
{ 108, 50, 187 },
{ 90, 22, 145 },
{ 66, 8, 97 },
{ 42, 4, 50 }
}, { /* Coeff Band 2 */
{ 150, 159, 238 },
{ 128, 90, 218 },
{ 94, 9, 163 },
{ 64, 3, 110 },
{ 34, 1, 61 },
{ 13, 1, 24 }
}, { /* Coeff Band 3 */
{ 151, 162, 242 },
{ 135, 80, 222 },
{ 93, 9, 166 },
{ 61, 3, 111 },
{ 31, 1, 59 },
{ 12, 1, 22 }
}, { /* Coeff Band 4 */
{ 161, 170, 245 },
{ 140, 84, 228 },
{ 99, 8, 174 },
{ 64, 1, 116 },
{ 34, 1, 63 },
{ 14, 1, 26 }
}, { /* Coeff Band 5 */
{ 138, 197, 246 },
{ 127, 109, 233 },
{ 100, 16, 179 },
{ 66, 3, 119 },
{ 37, 1, 66 },
{ 16, 1, 30 }
}
}
}, { /* block Type 1 */
{ /* Intra */
{ /* Coeff Band 0 */
{ 6, 216, 212 },
{ 25, 134, 171 },
{ 43, 48, 118 }
}, { /* Coeff Band 1 */
{ 93, 112, 209 },
{ 66, 159, 206 },
{ 82, 78, 184 },
{ 75, 28, 148 },
{ 46, 4, 82 },
{ 18, 1, 28 }
}, { /* Coeff Band 2 */
{ 108, 148, 220 },
{ 90, 130, 216 },
{ 92, 40, 186 },
{ 73, 10, 135 },
{ 46, 1, 79 },
{ 20, 1, 35 }
}, { /* Coeff Band 3 */
{ 125, 173, 232 },
{ 109, 117, 223 },
{ 97, 31, 183 },
{ 71, 7, 127 },
{ 44, 1, 76 },
{ 21, 1, 36 }
}, { /* Coeff Band 4 */
{ 133, 195, 236 },
{ 112, 121, 224 },
{ 97, 23, 178 },
{ 69, 3, 122 },
{ 42, 1, 72 },
{ 19, 1, 34 }
}, { /* Coeff Band 5 */
{ 132, 180, 238 },
{ 119, 102, 225 },
{ 101, 18, 179 },
{ 71, 3, 124 },
{ 42, 1, 70 },
{ 17, 1, 28 }
}
}, { /* Inter */
{ /* Coeff Band 0 */
{ 5, 242, 250 },
{ 26, 198, 226 },
{ 58, 98, 168 }
}, { /* Coeff Band 1 */
{ 82, 201, 246 },
{ 50, 219, 237 },
{ 94, 107, 205 },
{ 89, 61, 167 },
{ 77, 31, 131 },
{ 57, 14, 91 }
}, { /* Coeff Band 2 */
{ 99, 202, 247 },
{ 96, 165, 234 },
{ 100, 31, 190 },
{ 72, 8, 131 },
{ 41, 1, 72 },
{ 14, 1, 24 }
}, { /* Coeff Band 3 */
{ 108, 204, 248 },
{ 107, 156, 235 },
{ 103, 27, 186 },
{ 71, 4, 124 },
{ 39, 1, 66 },
{ 14, 1, 19 }
}, { /* Coeff Band 4 */
{ 120, 211, 248 },
{ 118, 149, 234 },
{ 107, 19, 182 },
{ 72, 3, 126 },
{ 40, 1, 69 },
{ 16, 1, 24 }
}, { /* Coeff Band 5 */
{ 127, 199, 245 },
{ 122, 125, 232 },
{ 112, 20, 186 },
{ 82, 3, 136 },
{ 55, 1, 88 },
{ 10, 1, 38 }
}
}
}
};
static const vp9_coeff_probs_model default_coef_probs_16x16[BLOCK_TYPES] = {
{ /* block Type 0 */
{ /* Intra */
{ /* Coeff Band 0 */
{ 25, 9, 101 },
{ 25, 2, 67 },
{ 15, 1, 28 }
}, { /* Coeff Band 1 */
{ 67, 30, 118 },
{ 61, 56, 116 },
{ 60, 31, 105 },
{ 52, 11, 85 },
{ 34, 2, 54 },
{ 14, 1, 22 }
}, { /* Coeff Band 2 */
{ 107, 58, 149 },
{ 92, 53, 147 },
{ 78, 14, 123 },
{ 56, 3, 87 },
{ 35, 1, 56 },
{ 17, 1, 27 }
}, { /* Coeff Band 3 */
{ 142, 61, 171 },
{ 111, 30, 162 },
{ 80, 4, 128 },
{ 53, 1, 87 },
{ 31, 1, 52 },
{ 14, 1, 24 }
}, { /* Coeff Band 4 */
{ 171, 73, 200 },
{ 129, 28, 184 },
{ 86, 3, 140 },
{ 54, 1, 90 },
{ 28, 1, 49 },
{ 12, 1, 21 }
}, { /* Coeff Band 5 */
{ 193, 129, 227 },
{ 148, 28, 200 },
{ 90, 2, 144 },
{ 53, 1, 90 },
{ 28, 1, 50 },
{ 13, 1, 22 }
}
}, { /* Inter */
{ /* Coeff Band 0 */
{ 60, 7, 234 },
{ 64, 4, 184 },
{ 56, 1, 104 }
}, { /* Coeff Band 1 */
{ 150, 111, 210 },
{ 87, 185, 202 },
{ 101, 81, 177 },
{ 90, 34, 142 },
{ 67, 11, 95 },
{ 38, 2, 51 }
}, { /* Coeff Band 2 */
{ 153, 139, 218 },
{ 120, 72, 195 },
{ 90, 11, 147 },
{ 63, 3, 101 },
{ 39, 1, 61 },
{ 20, 1, 33 }
}, { /* Coeff Band 3 */
{ 171, 132, 223 },
{ 131, 56, 200 },
{ 92, 6, 147 },
{ 58, 1, 95 },
{ 32, 1, 52 },
{ 14, 1, 23 }
}, { /* Coeff Band 4 */
{ 183, 137, 227 },
{ 139, 48, 204 },
{ 91, 3, 148 },
{ 55, 1, 91 },
{ 28, 1, 47 },
{ 13, 1, 21 }
}, { /* Coeff Band 5 */
{ 198, 149, 234 },
{ 153, 32, 208 },
{ 95, 2, 148 },
{ 55, 1, 90 },
{ 30, 1, 51 },
{ 16, 1, 25 }
}
}
}, { /* block Type 1 */
{ /* Intra */
{ /* Coeff Band 0 */
{ 7, 209, 217 },
{ 31, 106, 151 },
{ 40, 21, 86 }
}, { /* Coeff Band 1 */
{ 101, 71, 184 },
{ 74, 131, 177 },
{ 88, 50, 158 },
{ 78, 16, 129 },
{ 51, 2, 82 },
{ 18, 1, 29 }
}, { /* Coeff Band 2 */
{ 116, 115, 199 },
{ 102, 88, 191 },
{ 94, 22, 160 },
{ 74, 6, 122 },
{ 47, 1, 77 },
{ 18, 1, 30 }
}, { /* Coeff Band 3 */
{ 157, 124, 210 },
{ 130, 53, 201 },
{ 102, 10, 165 },
{ 73, 1, 120 },
{ 42, 1, 69 },
{ 16, 1, 27 }
}, { /* Coeff Band 4 */
{ 174, 147, 225 },
{ 134, 67, 212 },
{ 100, 10, 168 },
{ 66, 1, 111 },
{ 36, 1, 60 },
{ 16, 1, 27 }
}, { /* Coeff Band 5 */
{ 185, 165, 232 },
{ 147, 56, 214 },
{ 105, 5, 165 },
{ 66, 1, 108 },
{ 35, 1, 59 },
{ 16, 1, 27 }
}
}, { /* Inter */
{ /* Coeff Band 0 */
{ 3, 232, 245 },
{ 18, 162, 210 },
{ 38, 64, 131 }
}, { /* Coeff Band 1 */
{ 84, 187, 239 },
{ 35, 231, 231 },
{ 82, 150, 209 },
{ 87, 97, 181 },
{ 81, 64, 151 },
{ 67, 60, 119 }
}, { /* Coeff Band 2 */
{ 107, 185, 239 },
{ 100, 149, 224 },
{ 107, 34, 185 },
{ 83, 12, 141 },
{ 49, 4, 92 },
{ 21, 1, 40 }
}, { /* Coeff Band 3 */
{ 125, 184, 243 },
{ 121, 127, 228 },
{ 113, 25, 185 },
{ 82, 6, 134 },
{ 48, 1, 82 },
{ 26, 1, 38 }
}, { /* Coeff Band 4 */
{ 143, 185, 245 },
{ 133, 115, 231 },
{ 114, 14, 184 },
{ 77, 3, 126 },
{ 43, 1, 68 },
{ 34, 1, 40 }
}, { /* Coeff Band 5 */
{ 170, 194, 241 },
{ 151, 80, 226 },
{ 118, 9, 180 },
{ 81, 1, 130 },
{ 51, 1, 78 },
{ 18, 1, 49 }
}
}
}
};
static const vp9_coeff_probs_model default_coef_probs_32x32[BLOCK_TYPES] = {
{ /* block Type 0 */
{ /* Intra */
{ /* Coeff Band 0 */
{ 29, 42, 137 },
{ 26, 3, 60 },
{ 13, 1, 23 }
}, { /* Coeff Band 1 */
{ 69, 36, 122 },
{ 63, 57, 123 },
{ 60, 33, 112 },
{ 52, 11, 90 },
{ 32, 2, 52 },
{ 10, 1, 15 }
}, { /* Coeff Band 2 */
{ 107, 55, 143 },
{ 86, 69, 143 },
{ 74, 24, 116 },
{ 52, 5, 78 },
{ 29, 1, 44 },
{ 12, 1, 18 }
}, { /* Coeff Band 3 */
{ 137, 71, 160 },
{ 107, 34, 152 },
{ 73, 6, 114 },
{ 44, 1, 69 },
{ 25, 1, 40 },
{ 12, 1, 18 }
}, { /* Coeff Band 4 */
{ 165, 70, 174 },
{ 118, 24, 159 },
{ 74, 3, 117 },
{ 45, 1, 73 },
{ 26, 1, 43 },
{ 12, 1, 19 }
}, { /* Coeff Band 5 */
{ 220, 93, 223 },
{ 153, 10, 187 },
{ 86, 2, 131 },
{ 49, 1, 79 },
{ 26, 1, 43 },
{ 12, 1, 20 }
}
}, { /* Inter */
{ /* Coeff Band 0 */
{ 30, 58, 227 },
{ 35, 10, 172 },
{ 24, 23, 112 }
}, { /* Coeff Band 1 */
{ 117, 145, 219 },
{ 51, 221, 216 },
{ 75, 169, 196 },
{ 88, 96, 165 },
{ 77, 43, 117 },
{ 53, 18, 60 }
}, { /* Coeff Band 2 */
{ 128, 176, 225 },
{ 108, 114, 202 },
{ 92, 19, 152 },
{ 65, 4, 103 },
{ 38, 1, 61 },
{ 19, 1, 30 }
}, { /* Coeff Band 3 */
{ 146, 184, 228 },
{ 122, 95, 205 },
{ 92, 11, 149 },
{ 62, 1, 98 },
{ 35, 1, 57 },
{ 17, 1, 26 }
}, { /* Coeff Band 4 */
{ 165, 192, 230 },
{ 132, 81, 206 },
{ 93, 6, 147 },
{ 58, 1, 94 },
{ 32, 1, 52 },
{ 15, 1, 24 }
}, { /* Coeff Band 5 */
{ 204, 223, 234 },
{ 156, 49, 204 },
{ 97, 3, 145 },
{ 59, 1, 92 },
{ 33, 1, 52 },
{ 15, 1, 24 }
}
}
}, { /* block Type 1 */
{ /* Intra */
{ /* Coeff Band 0 */
{ 7, 184, 200 },
{ 25, 67, 113 },
{ 30, 9, 59 }
}, { /* Coeff Band 1 */
{ 92, 42, 158 },
{ 65, 121, 159 },
{ 77, 56, 146 },
{ 70, 22, 120 },
{ 47, 4, 76 },
{ 18, 1, 26 }
}, { /* Coeff Band 2 */
{ 113, 81, 177 },
{ 96, 75, 167 },
{ 84, 24, 136 },
{ 63, 8, 100 },
{ 37, 1, 58 },
{ 13, 1, 19 }
}, { /* Coeff Band 3 */
{ 147, 85, 194 },
{ 119, 36, 178 },
{ 88, 8, 139 },
{ 59, 1, 93 },
{ 31, 1, 49 },
{ 10, 1, 18 }
}, { /* Coeff Band 4 */
{ 169, 108, 210 },
{ 131, 41, 191 },
{ 92, 5, 144 },
{ 56, 1, 88 },
{ 29, 1, 47 },
{ 14, 1, 22 }
}, { /* Coeff Band 5 */
{ 210, 106, 223 },
{ 148, 14, 192 },
{ 89, 2, 138 },
{ 52, 1, 84 },
{ 29, 1, 47 },
{ 14, 1, 23 }
}
}, { /* Inter */
{ /* Coeff Band 0 */
{ 3, 207, 245 },
{ 12, 102, 213 },
{ 18, 33, 144 }
}, { /* Coeff Band 1 */
{ 85, 205, 245 },
{ 18, 249, 242 },
{ 59, 221, 229 },
{ 91, 166, 213 },
{ 88, 117, 183 },
{ 70, 95, 149 }
}, { /* Coeff Band 2 */
{ 114, 193, 241 },
{ 104, 155, 221 },
{ 100, 33, 181 },
{ 78, 10, 132 },
{ 43, 2, 75 },
{ 15, 1, 48 }
}, { /* Coeff Band 3 */
{ 118, 198, 244 },
{ 117, 142, 224 },
{ 111, 25, 179 },
{ 83, 4, 134 },
{ 57, 1, 84 },
{ 1, 1, 1 }
}, { /* Coeff Band 4 */
{ 144, 201, 248 },
{ 136, 130, 234 },
{ 124, 12, 188 },
{ 83, 1, 130 },
{ 61, 1, 66 },
{ 64, 171, 128 }
}, { /* Coeff Band 5 */
{ 174, 227, 250 },
{ 165, 118, 242 },
{ 132, 21, 197 },
{ 84, 3, 134 },
{ 70, 1, 69 },
{ 1, 1, 1 }
}
}
}
};
#else
static const vp9_coeff_probs_model default_coef_probs_4x4[BLOCK_TYPES] = {
{ /* block Type 0 */
{ /* Intra */
@@ -693,4 +1381,4 @@ static const vp9_coeff_probs_model default_coef_probs_32x32[BLOCK_TYPES] = {
}
}
};
#endif

View File

@@ -15,8 +15,6 @@
#include "vpx_mem/vpx_mem.h"
#include "vpx/vpx_integer.h"
#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES)
DECLARE_ALIGNED(16, const uint8_t, vp9_norm[256]) = {
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
@@ -52,28 +50,28 @@ DECLARE_ALIGNED(16, const uint8_t, vp9_pt_energy_class[MAX_ENTROPY_TOKENS]) = {
0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5
};
DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_4x4[16]) = {
DECLARE_ALIGNED(16, const int, vp9_default_scan_4x4[16]) = {
0, 4, 1, 5,
8, 2, 12, 9,
3, 6, 13, 10,
7, 14, 11, 15,
};
DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_4x4[16]) = {
DECLARE_ALIGNED(16, const int, vp9_col_scan_4x4[16]) = {
0, 4, 8, 1,
12, 5, 9, 2,
13, 6, 10, 3,
7, 14, 11, 15,
};
DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_4x4[16]) = {
DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]) = {
0, 1, 4, 2,
5, 3, 6, 8,
9, 7, 12, 10,
13, 11, 14, 15,
};
DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_8x8[64]) = {
DECLARE_ALIGNED(64, const int, vp9_default_scan_8x8[64]) = {
0, 8, 1, 16, 9, 2, 17, 24,
10, 3, 18, 25, 32, 11, 4, 26,
33, 19, 40, 12, 34, 27, 5, 41,
@@ -84,7 +82,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_8x8[64]) = {
46, 39, 61, 54, 47, 62, 55, 63,
};
DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_8x8[64]) = {
DECLARE_ALIGNED(16, const int, vp9_col_scan_8x8[64]) = {
0, 8, 16, 1, 24, 9, 32, 17,
2, 40, 25, 10, 33, 18, 48, 3,
26, 41, 11, 56, 19, 34, 4, 49,
@@ -95,7 +93,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_8x8[64]) = {
31, 61, 39, 54, 47, 62, 55, 63,
};
DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_8x8[64]) = {
DECLARE_ALIGNED(16, const int, vp9_row_scan_8x8[64]) = {
0, 1, 2, 8, 9, 3, 16, 10,
4, 17, 11, 24, 5, 18, 25, 12,
19, 26, 32, 6, 13, 20, 33, 27,
@@ -106,7 +104,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_8x8[64]) = {
60, 39, 61, 47, 54, 55, 62, 63,
};
DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_16x16[256]) = {
DECLARE_ALIGNED(16, const int, vp9_default_scan_16x16[256]) = {
0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, 80,
50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, 21, 52,
98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, 129, 38, 69,
@@ -125,7 +123,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_16x16[256]) = {
190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239, 255,
};
DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_16x16[256]) = {
DECLARE_ALIGNED(16, const int, vp9_col_scan_16x16[256]) = {
0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, 81,
34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, 129, 4,
67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, 68, 115, 21,
@@ -144,7 +142,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_16x16[256]) = {
159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239, 255,
};
DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_16x16[256]) = {
DECLARE_ALIGNED(16, const int, vp9_row_scan_16x16[256]) = {
0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, 20,
49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, 66, 52,
23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, 83, 97, 69,
@@ -163,7 +161,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_16x16[256]) = {
190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254, 255,
};
DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_32x32[1024]) = {
DECLARE_ALIGNED(16, const int, vp9_default_scan_32x32[1024]) = {
0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, 160, 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, 162, 193, 68, 131, 37, 100,
225, 194, 256, 163, 69, 132, 6, 226, 257, 288, 195, 101, 164, 38, 258, 7, 227, 289, 133, 320, 70, 196, 165, 290, 259, 228, 39, 321, 102, 352, 8, 197,
71, 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292, 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, 293, 41, 417, 199, 136,
@@ -202,8 +200,13 @@ DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_32x32[1024]) = {
const vp9_tree_index vp9_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */
{
#if CONFIG_BALANCED_COEFTREE
-ZERO_TOKEN, 2, /* 0 = ZERO */
-DCT_EOB_TOKEN, 4, /* 1 = EOB */
#else
-DCT_EOB_TOKEN, 2, /* 0 = EOB */
-ZERO_TOKEN, 4, /* 1 = ZERO */
#endif
-ONE_TOKEN, 6, /* 2 = ONE */
8, 12, /* 3 = LOW_VAL */
-TWO_TOKEN, 10, /* 4 = TWO */
@@ -230,8 +233,13 @@ static const vp9_prob Pcat6[] = {
};
const vp9_tree_index vp9_coefmodel_tree[6] = {
#if CONFIG_BALANCED_COEFTREE
-ZERO_TOKEN, 2,
-DCT_EOB_MODEL_TOKEN, 4,
#else
-DCT_EOB_MODEL_TOKEN, 2, /* 0 = EOB */
-ZERO_TOKEN, 4, /* 1 = ZERO */
#endif
-ONE_TOKEN, -TWO_TOKEN,
};
@@ -244,7 +252,7 @@ const vp9_tree_index vp9_coefmodel_tree[6] = {
// the probabilities for the rest of the nodes.
// beta = 8
static const vp9_prob modelcoefprobs_pareto8[COEFPROB_MODELS][MODEL_NODES] = {
const vp9_prob vp9_modelcoefprobs_pareto8[COEFPROB_MODELS][MODEL_NODES] = {
{ 3, 86, 128, 6, 86, 23, 88, 29},
{ 9, 86, 129, 17, 88, 61, 94, 76},
{ 15, 87, 129, 28, 89, 93, 100, 110},
@@ -378,7 +386,8 @@ static const vp9_prob modelcoefprobs_pareto8[COEFPROB_MODELS][MODEL_NODES] = {
static void extend_model_to_full_distribution(vp9_prob p,
vp9_prob *tree_probs) {
const int l = ((p - 1) / 2);
const vp9_prob (*model)[MODEL_NODES] = modelcoefprobs_pareto8;
const vp9_prob (*model)[MODEL_NODES];
model = vp9_modelcoefprobs_pareto8;
if (p & 1) {
vpx_memcpy(tree_probs + UNCONSTRAINED_NODES,
model[l], MODEL_NODES * sizeof(vp9_prob));
@@ -397,6 +406,16 @@ void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full) {
extend_model_to_full_distribution(model[PIVOT_NODE], full);
}
void vp9_model_to_full_probs_sb(
vp9_prob model[COEF_BANDS][PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES],
vp9_prob full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]) {
int c, p;
for (c = 0; c < COEF_BANDS; ++c)
for (p = 0; p < PREV_COEF_CONTEXTS; ++p) {
vp9_model_to_full_probs(model[c][p], full[c][p]);
}
}
static vp9_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[28];
static void init_bit_tree(vp9_tree_index *p, int n) {
@@ -419,7 +438,7 @@ static void init_bit_trees() {
init_bit_tree(cat6, 14);
}
const vp9_extra_bit vp9_extra_bits[12] = {
vp9_extra_bit vp9_extra_bits[12] = {
{ 0, 0, 0, 0},
{ 0, 0, 0, 1},
{ 0, 0, 0, 2},
@@ -436,50 +455,69 @@ const vp9_extra_bit vp9_extra_bits[12] = {
#include "vp9/common/vp9_default_coef_probs.h"
// This function updates and then returns n AC coefficient context
// This is currently a placeholder function to allow experimentation
// using various context models based on the energy earlier tokens
// within the current block.
//
// For now it just returns the previously used context.
#define MAX_NEIGHBORS 2
int vp9_get_coef_context(const int *scan, const int *neighbors,
int nb_pad, uint8_t *token_cache, int c, int l) {
int eob = l;
assert(nb_pad == MAX_NEIGHBORS);
if (c == eob) {
return 0;
} else {
int ctx;
assert(neighbors[MAX_NEIGHBORS * c + 0] >= 0);
if (neighbors[MAX_NEIGHBORS * c + 1] >= 0) {
ctx = (1 + token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]] +
token_cache[scan[neighbors[MAX_NEIGHBORS * c + 1]]]) >> 1;
} else {
ctx = token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]];
}
return ctx;
}
};
void vp9_default_coef_probs(VP9_COMMON *pc) {
vp9_copy(pc->fc.coef_probs[TX_4X4], default_coef_probs_4x4);
vp9_copy(pc->fc.coef_probs[TX_8X8], default_coef_probs_8x8);
vp9_copy(pc->fc.coef_probs[TX_16X16], default_coef_probs_16x16);
vp9_copy(pc->fc.coef_probs[TX_32X32], default_coef_probs_32x32);
vpx_memcpy(pc->fc.coef_probs[TX_4X4], default_coef_probs_4x4,
sizeof(pc->fc.coef_probs[TX_4X4]));
vpx_memcpy(pc->fc.coef_probs[TX_8X8], default_coef_probs_8x8,
sizeof(pc->fc.coef_probs[TX_8X8]));
vpx_memcpy(pc->fc.coef_probs[TX_16X16], default_coef_probs_16x16,
sizeof(pc->fc.coef_probs[TX_16X16]));
vpx_memcpy(pc->fc.coef_probs[TX_32X32], default_coef_probs_32x32,
sizeof(pc->fc.coef_probs[TX_32X32]));
}
// Neighborhood 5-tuples for various scans and blocksizes,
// in {top, left, topleft, topright, bottomleft} order
// for each position in raster scan order.
// -1 indicates the neighbor does not exist.
DECLARE_ALIGNED(16, int16_t,
vp9_default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_col_scan_4x4_neighbors[17 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_row_scan_4x4_neighbors[17 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_row_scan_8x8_neighbors[65 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_col_scan_16x16_neighbors[257 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_row_scan_16x16_neighbors[257 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int,
vp9_default_scan_4x4_neighbors[16 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int,
vp9_col_scan_4x4_neighbors[16 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int,
vp9_row_scan_4x4_neighbors[16 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int,
vp9_col_scan_8x8_neighbors[64 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int,
vp9_row_scan_8x8_neighbors[64 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int,
vp9_default_scan_8x8_neighbors[64 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int,
vp9_col_scan_16x16_neighbors[256 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int,
vp9_row_scan_16x16_neighbors[256 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int,
vp9_default_scan_16x16_neighbors[256 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int,
vp9_default_scan_32x32_neighbors[1024 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_4x4[16]);
DECLARE_ALIGNED(16, int16_t, vp9_col_iscan_4x4[16]);
DECLARE_ALIGNED(16, int16_t, vp9_row_iscan_4x4[16]);
DECLARE_ALIGNED(16, int16_t, vp9_col_iscan_8x8[64]);
DECLARE_ALIGNED(16, int16_t, vp9_row_iscan_8x8[64]);
DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_8x8[64]);
DECLARE_ALIGNED(16, int16_t, vp9_col_iscan_16x16[256]);
DECLARE_ALIGNED(16, int16_t, vp9_row_iscan_16x16[256]);
DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_16x16[256]);
DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_32x32[1024]);
static int find_in_scan(const int16_t *scan, int l, int idx) {
static int find_in_scan(const int *scan, int l, int idx) {
int n, l2 = l * l;
for (n = 0; n < l2; n++) {
int rc = scan[n];
@@ -489,19 +527,14 @@ static int find_in_scan(const int16_t *scan, int l, int idx) {
assert(0);
return -1;
}
static void init_scan_neighbors(const int16_t *scan,
int16_t *iscan,
int l, int16_t *neighbors) {
static void init_scan_neighbors(const int *scan, int l, int *neighbors,
int max_neighbors) {
int l2 = l * l;
int n, i, j;
// dc doesn't use this type of prediction
neighbors[MAX_NEIGHBORS * 0 + 0] = 0;
neighbors[MAX_NEIGHBORS * 0 + 1] = 0;
iscan[0] = find_in_scan(scan, l, 0);
for (n = 1; n < l2; n++) {
for (n = 0; n < l2; n++) {
int rc = scan[n];
iscan[n] = find_in_scan(scan, l, n);
assert(max_neighbors == MAX_NEIGHBORS);
i = rc / l;
j = rc % l;
if (i > 0 && j > 0) {
@@ -513,84 +546,93 @@ static void init_scan_neighbors(const int16_t *scan,
// Therefore, if we use ADST/DCT, prefer the DCT neighbor coeff
// as a context. If ADST or DCT is used in both directions, we
// use the combination of the two as a context.
int a = (i - 1) * l + j;
int b = i * l + j - 1;
int a = find_in_scan(scan, l, (i - 1) * l + j);
int b = find_in_scan(scan, l, i * l + j - 1);
if (scan == vp9_col_scan_4x4 || scan == vp9_col_scan_8x8 ||
scan == vp9_col_scan_16x16) {
// in the col/row scan cases (as well as left/top edge cases), we set
// both contexts to the same value, so we can branchlessly do a+b+1>>1
// which automatically becomes a if a == b
neighbors[MAX_NEIGHBORS * n + 0] =
neighbors[MAX_NEIGHBORS * n + 1] = a;
neighbors[max_neighbors * n + 0] = a;
neighbors[max_neighbors * n + 1] = -1;
} else if (scan == vp9_row_scan_4x4 || scan == vp9_row_scan_8x8 ||
scan == vp9_row_scan_16x16) {
neighbors[MAX_NEIGHBORS * n + 0] =
neighbors[MAX_NEIGHBORS * n + 1] = b;
neighbors[max_neighbors * n + 0] = b;
neighbors[max_neighbors * n + 1] = -1;
} else {
neighbors[MAX_NEIGHBORS * n + 0] = a;
neighbors[MAX_NEIGHBORS * n + 1] = b;
neighbors[max_neighbors * n + 0] = a;
neighbors[max_neighbors * n + 1] = b;
}
} else if (i > 0) {
neighbors[MAX_NEIGHBORS * n + 0] =
neighbors[MAX_NEIGHBORS * n + 1] = (i - 1) * l + j;
neighbors[max_neighbors * n + 0] = find_in_scan(scan, l, (i - 1) * l + j);
neighbors[max_neighbors * n + 1] = -1;
} else if (j > 0) {
neighbors[max_neighbors * n + 0] =
find_in_scan(scan, l, i * l + j - 1);
neighbors[max_neighbors * n + 1] = -1;
} else {
assert(j > 0);
neighbors[MAX_NEIGHBORS * n + 0] =
neighbors[MAX_NEIGHBORS * n + 1] = i * l + j - 1;
assert(n == 0);
// dc predictor doesn't use previous tokens
neighbors[max_neighbors * n + 0] = -1;
}
assert(iscan[neighbors[MAX_NEIGHBORS * n + 0]] < n);
assert(neighbors[max_neighbors * n + 0] < n);
}
// one padding item so we don't have to add branches in code to handle
// calls to get_coef_context() for the token after the final dc token
neighbors[MAX_NEIGHBORS * l2 + 0] = 0;
neighbors[MAX_NEIGHBORS * l2 + 1] = 0;
}
void vp9_init_neighbors() {
init_scan_neighbors(vp9_default_scan_4x4, vp9_default_iscan_4x4, 4,
vp9_default_scan_4x4_neighbors);
init_scan_neighbors(vp9_row_scan_4x4, vp9_row_iscan_4x4, 4,
vp9_row_scan_4x4_neighbors);
init_scan_neighbors(vp9_col_scan_4x4, vp9_col_iscan_4x4, 4,
vp9_col_scan_4x4_neighbors);
init_scan_neighbors(vp9_default_scan_8x8, vp9_default_iscan_8x8, 8,
vp9_default_scan_8x8_neighbors);
init_scan_neighbors(vp9_row_scan_8x8, vp9_row_iscan_8x8, 8,
vp9_row_scan_8x8_neighbors);
init_scan_neighbors(vp9_col_scan_8x8, vp9_col_iscan_8x8, 8,
vp9_col_scan_8x8_neighbors);
init_scan_neighbors(vp9_default_scan_16x16, vp9_default_iscan_16x16, 16,
vp9_default_scan_16x16_neighbors);
init_scan_neighbors(vp9_row_scan_16x16, vp9_row_iscan_16x16, 16,
vp9_row_scan_16x16_neighbors);
init_scan_neighbors(vp9_col_scan_16x16, vp9_col_iscan_16x16, 16,
vp9_col_scan_16x16_neighbors);
init_scan_neighbors(vp9_default_scan_32x32, vp9_default_iscan_32x32, 32,
vp9_default_scan_32x32_neighbors);
init_scan_neighbors(vp9_default_scan_4x4, 4,
vp9_default_scan_4x4_neighbors, MAX_NEIGHBORS);
init_scan_neighbors(vp9_row_scan_4x4, 4,
vp9_row_scan_4x4_neighbors, MAX_NEIGHBORS);
init_scan_neighbors(vp9_col_scan_4x4, 4,
vp9_col_scan_4x4_neighbors, MAX_NEIGHBORS);
init_scan_neighbors(vp9_default_scan_8x8, 8,
vp9_default_scan_8x8_neighbors, MAX_NEIGHBORS);
init_scan_neighbors(vp9_row_scan_8x8, 8,
vp9_row_scan_8x8_neighbors, MAX_NEIGHBORS);
init_scan_neighbors(vp9_col_scan_8x8, 8,
vp9_col_scan_8x8_neighbors, MAX_NEIGHBORS);
init_scan_neighbors(vp9_default_scan_16x16, 16,
vp9_default_scan_16x16_neighbors, MAX_NEIGHBORS);
init_scan_neighbors(vp9_row_scan_16x16, 16,
vp9_row_scan_16x16_neighbors, MAX_NEIGHBORS);
init_scan_neighbors(vp9_col_scan_16x16, 16,
vp9_col_scan_16x16_neighbors, MAX_NEIGHBORS);
init_scan_neighbors(vp9_default_scan_32x32, 32,
vp9_default_scan_32x32_neighbors, MAX_NEIGHBORS);
}
const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan) {
const int *vp9_get_coef_neighbors_handle(const int *scan, int *pad) {
if (scan == vp9_default_scan_4x4) {
*pad = MAX_NEIGHBORS;
return vp9_default_scan_4x4_neighbors;
} else if (scan == vp9_row_scan_4x4) {
*pad = MAX_NEIGHBORS;
return vp9_row_scan_4x4_neighbors;
} else if (scan == vp9_col_scan_4x4) {
*pad = MAX_NEIGHBORS;
return vp9_col_scan_4x4_neighbors;
} else if (scan == vp9_default_scan_8x8) {
*pad = MAX_NEIGHBORS;
return vp9_default_scan_8x8_neighbors;
} else if (scan == vp9_row_scan_8x8) {
*pad = 2;
return vp9_row_scan_8x8_neighbors;
} else if (scan == vp9_col_scan_8x8) {
*pad = 2;
return vp9_col_scan_8x8_neighbors;
} else if (scan == vp9_default_scan_16x16) {
*pad = MAX_NEIGHBORS;
return vp9_default_scan_16x16_neighbors;
} else if (scan == vp9_row_scan_16x16) {
*pad = 2;
return vp9_row_scan_16x16_neighbors;
} else if (scan == vp9_col_scan_16x16) {
*pad = 2;
return vp9_col_scan_16x16_neighbors;
} else {
assert(scan == vp9_default_scan_32x32);
} else if (scan == vp9_default_scan_32x32) {
*pad = MAX_NEIGHBORS;
return vp9_default_scan_32x32_neighbors;
} else {
assert(0);
return NULL;
}
}
@@ -609,17 +651,40 @@ void vp9_coef_tree_initialize() {
#define COEF_COUNT_SAT_AFTER_KEY 24
#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128
static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE tx_size,
unsigned int count_sat,
unsigned int update_factor) {
FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
void vp9_full_to_model_count(unsigned int *model_count,
unsigned int *full_count) {
int n;
model_count[ZERO_TOKEN] = full_count[ZERO_TOKEN];
model_count[ONE_TOKEN] = full_count[ONE_TOKEN];
model_count[TWO_TOKEN] = full_count[TWO_TOKEN];
for (n = THREE_TOKEN; n < DCT_EOB_TOKEN; ++n)
model_count[TWO_TOKEN] += full_count[n];
model_count[DCT_EOB_MODEL_TOKEN] = full_count[DCT_EOB_TOKEN];
}
vp9_coeff_probs_model *dst_coef_probs = cm->fc.coef_probs[tx_size];
vp9_coeff_probs_model *pre_coef_probs = pre_fc->coef_probs[tx_size];
vp9_coeff_count_model *coef_counts = cm->counts.coef[tx_size];
void vp9_full_to_model_counts(
vp9_coeff_count_model *model_count, vp9_coeff_count *full_count) {
int i, j, k, l;
for (i = 0; i < BLOCK_TYPES; ++i)
for (j = 0; j < REF_TYPES; ++j)
for (k = 0; k < COEF_BANDS; ++k)
for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
if (l >= 3 && k == 0)
continue;
vp9_full_to_model_count(model_count[i][j][k][l],
full_count[i][j][k][l]);
}
}
static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE txfm_size,
int count_sat, int update_factor) {
vp9_coeff_probs_model *dst_coef_probs = cm->fc.coef_probs[txfm_size];
vp9_coeff_probs_model *pre_coef_probs = cm->fc.pre_coef_probs[txfm_size];
vp9_coeff_count_model *coef_counts = cm->fc.coef_counts[txfm_size];
unsigned int (*eob_branch_count)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] =
cm->counts.eob_branch[tx_size];
int t, i, j, k, l;
cm->fc.eob_branch_counts[txfm_size];
int t, i, j, k, l, count;
int factor;
unsigned int branch_ct[UNCONSTRAINED_NODES][2];
vp9_prob coef_probs[UNCONSTRAINED_NODES];
int entropy_nodes_adapt = UNCONSTRAINED_NODES;
@@ -630,23 +695,34 @@ static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE tx_size,
for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
if (l >= 3 && k == 0)
continue;
vp9_tree_probs_from_distribution(vp9_coefmodel_tree, coef_probs,
branch_ct, coef_counts[i][j][k][l],
0);
vp9_tree_probs_from_distribution(
vp9_coefmodel_tree,
coef_probs, branch_ct,
coef_counts[i][j][k][l], 0);
#if CONFIG_BALANCED_COEFTREE
branch_ct[1][1] = eob_branch_count[i][j][k][l] - branch_ct[1][0];
coef_probs[1] = get_binary_prob(branch_ct[1][0], branch_ct[1][1]);
#else
branch_ct[0][1] = eob_branch_count[i][j][k][l] - branch_ct[0][0];
coef_probs[0] = get_binary_prob(branch_ct[0][0], branch_ct[0][1]);
for (t = 0; t < entropy_nodes_adapt; ++t)
dst_coef_probs[i][j][k][l][t] = merge_probs(
pre_coef_probs[i][j][k][l][t], coef_probs[t],
branch_ct[t], count_sat, update_factor);
#endif
for (t = 0; t < entropy_nodes_adapt; ++t) {
count = branch_ct[t][0] + branch_ct[t][1];
count = count > count_sat ? count_sat : count;
factor = (update_factor * count / count_sat);
dst_coef_probs[i][j][k][l][t] =
weighted_prob(pre_coef_probs[i][j][k][l][t],
coef_probs[t], factor);
}
}
}
void vp9_adapt_coef_probs(VP9_COMMON *cm) {
TX_SIZE t;
unsigned int count_sat, update_factor;
int count_sat;
int update_factor; /* denominator 256 */
if (cm->frame_type == KEY_FRAME || cm->intra_only) {
if ((cm->frame_type == KEY_FRAME) || cm->intra_only) {
update_factor = COEF_MAX_UPDATE_FACTOR_KEY;
count_sat = COEF_COUNT_SAT_KEY;
} else if (cm->last_frame_type == KEY_FRAME) {

View File

@@ -50,7 +50,9 @@ typedef struct {
int base_val;
} vp9_extra_bit;
extern const vp9_extra_bit vp9_extra_bits[12]; /* indexed by token value */
extern vp9_extra_bit vp9_extra_bits[12]; /* indexed by token value */
#define PROB_UPDATE_BASELINE_COST 7
#define MAX_PROB 255
#define DCT_MAX_VALUE 16384
@@ -80,6 +82,7 @@ extern const vp9_extra_bit vp9_extra_bits[12]; /* indexed by token value */
coefficient band (and since zigzag positions 0, 1, and 2 are in
distinct bands). */
/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */
#define PREV_COEF_CONTEXTS 6
// #define ENTROPY_STATS
@@ -96,62 +99,22 @@ typedef vp9_prob vp9_coeff_probs[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
struct VP9Common;
void vp9_default_coef_probs(struct VP9Common *);
extern DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_4x4[16]);
extern DECLARE_ALIGNED(16, const int, vp9_default_scan_4x4[16]);
extern DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_4x4[16]);
extern DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_4x4[16]);
extern DECLARE_ALIGNED(16, const int, vp9_col_scan_4x4[16]);
extern DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]);
extern DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_8x8[64]);
extern DECLARE_ALIGNED(64, const int, vp9_default_scan_8x8[64]);
extern DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_8x8[64]);
extern DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_8x8[64]);
extern DECLARE_ALIGNED(16, const int, vp9_col_scan_8x8[64]);
extern DECLARE_ALIGNED(16, const int, vp9_row_scan_8x8[64]);
extern DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_16x16[256]);
extern DECLARE_ALIGNED(16, const int, vp9_default_scan_16x16[256]);
extern DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_16x16[256]);
extern DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_16x16[256]);
extern DECLARE_ALIGNED(16, const int, vp9_col_scan_16x16[256]);
extern DECLARE_ALIGNED(16, const int, vp9_row_scan_16x16[256]);
extern DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_32x32[1024]);
extern DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_4x4[16]);
extern DECLARE_ALIGNED(16, int16_t, vp9_col_iscan_4x4[16]);
extern DECLARE_ALIGNED(16, int16_t, vp9_row_iscan_4x4[16]);
extern DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_8x8[64]);
extern DECLARE_ALIGNED(16, int16_t, vp9_col_iscan_8x8[64]);
extern DECLARE_ALIGNED(16, int16_t, vp9_row_iscan_8x8[64]);
extern DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_16x16[256]);
extern DECLARE_ALIGNED(16, int16_t, vp9_col_iscan_16x16[256]);
extern DECLARE_ALIGNED(16, int16_t, vp9_row_iscan_16x16[256]);
extern DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_32x32[1024]);
#define MAX_NEIGHBORS 2
extern DECLARE_ALIGNED(16, int16_t,
vp9_default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]);
extern DECLARE_ALIGNED(16, int16_t,
vp9_col_scan_4x4_neighbors[17 * MAX_NEIGHBORS]);
extern DECLARE_ALIGNED(16, int16_t,
vp9_row_scan_4x4_neighbors[17 * MAX_NEIGHBORS]);
extern DECLARE_ALIGNED(16, int16_t,
vp9_col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]);
extern DECLARE_ALIGNED(16, int16_t,
vp9_row_scan_8x8_neighbors[65 * MAX_NEIGHBORS]);
extern DECLARE_ALIGNED(16, int16_t,
vp9_default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]);
extern DECLARE_ALIGNED(16, int16_t,
vp9_col_scan_16x16_neighbors[257 * MAX_NEIGHBORS]);
extern DECLARE_ALIGNED(16, int16_t,
vp9_row_scan_16x16_neighbors[257 * MAX_NEIGHBORS]);
extern DECLARE_ALIGNED(16, int16_t,
vp9_default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]);
extern DECLARE_ALIGNED(16, int16_t,
vp9_default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]);
extern DECLARE_ALIGNED(16, const int, vp9_default_scan_32x32[1024]);
void vp9_coef_tree_initialize(void);
void vp9_adapt_coef_probs(struct VP9Common *);
@@ -185,14 +148,9 @@ static int get_coef_band(const uint8_t * band_translate, int coef_index) {
? (COEF_BANDS-1) : band_translate[coef_index];
}
static INLINE int get_coef_context(const int16_t *neighbors,
uint8_t *token_cache,
int c) {
return (1 + token_cache[neighbors[MAX_NEIGHBORS * c + 0]] +
token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1;
}
const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan);
extern int vp9_get_coef_context(const int *scan, const int *neighbors,
int nb_pad, uint8_t *token_cache, int c, int l);
const int *vp9_get_coef_neighbors_handle(const int *scan, int *pad);
// 128 lists of probabilities are stored for the following ONE node probs:
@@ -202,6 +160,7 @@ const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan);
#define COEFPROB_MODELS 128
#define UNCONSTRAINED_NODES 3
#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES)
#define PIVOT_NODE 2 // which node is pivot
@@ -215,10 +174,20 @@ typedef unsigned int vp9_coeff_count_model[REF_TYPES][COEF_BANDS]
typedef unsigned int vp9_coeff_stats_model[REF_TYPES][COEF_BANDS]
[PREV_COEF_CONTEXTS]
[UNCONSTRAINED_NODES][2];
extern void vp9_full_to_model_count(unsigned int *model_count,
unsigned int *full_count);
extern void vp9_full_to_model_counts(
vp9_coeff_count_model *model_count, vp9_coeff_count *full_count);
void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full);
static INLINE const int16_t* get_scan_4x4(TX_TYPE tx_type) {
void vp9_model_to_full_probs_sb(
vp9_prob model[COEF_BANDS][PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES],
vp9_prob full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]);
extern const vp9_prob vp9_modelcoefprobs[COEFPROB_MODELS][ENTROPY_NODES - 1];
static INLINE const int* get_scan_4x4(TX_TYPE tx_type) {
switch (tx_type) {
case ADST_DCT:
return vp9_row_scan_4x4;
@@ -229,36 +198,7 @@ static INLINE const int16_t* get_scan_4x4(TX_TYPE tx_type) {
}
}
static INLINE void get_scan_nb_4x4(TX_TYPE tx_type,
const int16_t **scan, const int16_t **nb) {
switch (tx_type) {
case ADST_DCT:
*scan = vp9_row_scan_4x4;
*nb = vp9_row_scan_4x4_neighbors;
break;
case DCT_ADST:
*scan = vp9_col_scan_4x4;
*nb = vp9_col_scan_4x4_neighbors;
break;
default:
*scan = vp9_default_scan_4x4;
*nb = vp9_default_scan_4x4_neighbors;
break;
}
}
static INLINE const int16_t* get_iscan_4x4(TX_TYPE tx_type) {
switch (tx_type) {
case ADST_DCT:
return vp9_row_iscan_4x4;
case DCT_ADST:
return vp9_col_iscan_4x4;
default:
return vp9_default_iscan_4x4;
}
}
static INLINE const int16_t* get_scan_8x8(TX_TYPE tx_type) {
static INLINE const int* get_scan_8x8(TX_TYPE tx_type) {
switch (tx_type) {
case ADST_DCT:
return vp9_row_scan_8x8;
@@ -269,36 +209,7 @@ static INLINE const int16_t* get_scan_8x8(TX_TYPE tx_type) {
}
}
static INLINE void get_scan_nb_8x8(TX_TYPE tx_type,
const int16_t **scan, const int16_t **nb) {
switch (tx_type) {
case ADST_DCT:
*scan = vp9_row_scan_8x8;
*nb = vp9_row_scan_8x8_neighbors;
break;
case DCT_ADST:
*scan = vp9_col_scan_8x8;
*nb = vp9_col_scan_8x8_neighbors;
break;
default:
*scan = vp9_default_scan_8x8;
*nb = vp9_default_scan_8x8_neighbors;
break;
}
}
static INLINE const int16_t* get_iscan_8x8(TX_TYPE tx_type) {
switch (tx_type) {
case ADST_DCT:
return vp9_row_iscan_8x8;
case DCT_ADST:
return vp9_col_iscan_8x8;
default:
return vp9_default_iscan_8x8;
}
}
static INLINE const int16_t* get_scan_16x16(TX_TYPE tx_type) {
static INLINE const int* get_scan_16x16(TX_TYPE tx_type) {
switch (tx_type) {
case ADST_DCT:
return vp9_row_scan_16x16;
@@ -309,35 +220,6 @@ static INLINE const int16_t* get_scan_16x16(TX_TYPE tx_type) {
}
}
static INLINE void get_scan_nb_16x16(TX_TYPE tx_type,
const int16_t **scan, const int16_t **nb) {
switch (tx_type) {
case ADST_DCT:
*scan = vp9_row_scan_16x16;
*nb = vp9_row_scan_16x16_neighbors;
break;
case DCT_ADST:
*scan = vp9_col_scan_16x16;
*nb = vp9_col_scan_16x16_neighbors;
break;
default:
*scan = vp9_default_scan_16x16;
*nb = vp9_default_scan_16x16_neighbors;
break;
}
}
static INLINE const int16_t* get_iscan_16x16(TX_TYPE tx_type) {
switch (tx_type) {
case ADST_DCT:
return vp9_row_iscan_16x16;
case DCT_ADST:
return vp9_col_iscan_16x16;
default:
return vp9_default_iscan_16x16;
}
}
enum { VP9_COEF_UPDATE_PROB = 252 };
#endif // VP9_COMMON_VP9_ENTROPY_H_

View File

@@ -8,14 +8,15 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_modecont.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_alloccommon.h"
#include "vpx_mem/vpx_mem.h"
#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_seg_common.h"
const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES]
[VP9_INTRA_MODES - 1] = {
static const vp9_prob default_kf_uv_probs[VP9_INTRA_MODES]
[VP9_INTRA_MODES - 1] = {
{ 144, 11, 54, 157, 195, 130, 46, 58, 108 } /* y = dc */,
{ 118, 15, 123, 148, 131, 101, 44, 93, 131 } /* y = v */,
{ 113, 12, 23, 188, 226, 142, 26, 32, 125 } /* y = h */,
@@ -50,9 +51,8 @@ static const vp9_prob default_if_uv_probs[VP9_INTRA_MODES]
{ 101, 21, 107, 181, 192, 103, 19, 67, 125 } /* y = tm */
};
static const vp9_prob default_partition_probs[NUM_FRAME_TYPES]
[NUM_PARTITION_CONTEXTS]
[PARTITION_TYPES - 1] = {
const vp9_prob vp9_partition_probs[NUM_FRAME_TYPES][NUM_PARTITION_CONTEXTS]
[PARTITION_TYPES - 1] = {
{ /* frame_type = keyframe */
/* 8x8 -> 4x4 */
{ 158, 97, 94 } /* a/l both not split */,
@@ -98,143 +98,6 @@ static const vp9_prob default_partition_probs[NUM_FRAME_TYPES]
}
};
const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES]
[VP9_INTRA_MODES]
[VP9_INTRA_MODES - 1] = {
{ /* above = dc */
{ 137, 30, 42, 148, 151, 207, 70, 52, 91 } /* left = dc */,
{ 92, 45, 102, 136, 116, 180, 74, 90, 100 } /* left = v */,
{ 73, 32, 19, 187, 222, 215, 46, 34, 100 } /* left = h */,
{ 91, 30, 32, 116, 121, 186, 93, 86, 94 } /* left = d45 */,
{ 72, 35, 36, 149, 68, 206, 68, 63, 105 } /* left = d135 */,
{ 73, 31, 28, 138, 57, 124, 55, 122, 151 } /* left = d117 */,
{ 67, 23, 21, 140, 126, 197, 40, 37, 171 } /* left = d153 */,
{ 86, 27, 28, 128, 154, 212, 45, 43, 53 } /* left = d27 */,
{ 74, 32, 27, 107, 86, 160, 63, 134, 102 } /* left = d63 */,
{ 59, 67, 44, 140, 161, 202, 78, 67, 119 } /* left = tm */
}, { /* above = v */
{ 63, 36, 126, 146, 123, 158, 60, 90, 96 } /* left = dc */,
{ 43, 46, 168, 134, 107, 128, 69, 142, 92 } /* left = v */,
{ 44, 29, 68, 159, 201, 177, 50, 57, 77 } /* left = h */,
{ 58, 38, 76, 114, 97, 172, 78, 133, 92 } /* left = d45 */,
{ 46, 41, 76, 140, 63, 184, 69, 112, 57 } /* left = d135 */,
{ 38, 32, 85, 140, 46, 112, 54, 151, 133 } /* left = d117 */,
{ 39, 27, 61, 131, 110, 175, 44, 75, 136 } /* left = d153 */,
{ 52, 30, 74, 113, 130, 175, 51, 64, 58 } /* left = d27 */,
{ 47, 35, 80, 100, 74, 143, 64, 163, 74 } /* left = d63 */,
{ 36, 61, 116, 114, 128, 162, 80, 125, 82 } /* left = tm */
}, { /* above = h */
{ 82, 26, 26, 171, 208, 204, 44, 32, 105 } /* left = dc */,
{ 55, 44, 68, 166, 179, 192, 57, 57, 108 } /* left = v */,
{ 42, 26, 11, 199, 241, 228, 23, 15, 85 } /* left = h */,
{ 68, 42, 19, 131, 160, 199, 55, 52, 83 } /* left = d45 */,
{ 58, 50, 25, 139, 115, 232, 39, 52, 118 } /* left = d135 */,
{ 50, 35, 33, 153, 104, 162, 64, 59, 131 } /* left = d117 */,
{ 44, 24, 16, 150, 177, 202, 33, 19, 156 } /* left = d153 */,
{ 55, 27, 12, 153, 203, 218, 26, 27, 49 } /* left = d27 */,
{ 53, 49, 21, 110, 116, 168, 59, 80, 76 } /* left = d63 */,
{ 38, 72, 19, 168, 203, 212, 50, 50, 107 } /* left = tm */
}, { /* above = d45 */
{ 103, 26, 36, 129, 132, 201, 83, 80, 93 } /* left = dc */,
{ 59, 38, 83, 112, 103, 162, 98, 136, 90 } /* left = v */,
{ 62, 30, 23, 158, 200, 207, 59, 57, 50 } /* left = h */,
{ 67, 30, 29, 84, 86, 191, 102, 91, 59 } /* left = d45 */,
{ 60, 32, 33, 112, 71, 220, 64, 89, 104 } /* left = d135 */,
{ 53, 26, 34, 130, 56, 149, 84, 120, 103 } /* left = d117 */,
{ 53, 21, 23, 133, 109, 210, 56, 77, 172 } /* left = d153 */,
{ 77, 19, 29, 112, 142, 228, 55, 66, 36 } /* left = d27 */,
{ 61, 29, 29, 93, 97, 165, 83, 175, 162 } /* left = d63 */,
{ 47, 47, 43, 114, 137, 181, 100, 99, 95 } /* left = tm */
}, { /* above = d135 */
{ 69, 23, 29, 128, 83, 199, 46, 44, 101 } /* left = dc */,
{ 53, 40, 55, 139, 69, 183, 61, 80, 110 } /* left = v */,
{ 40, 29, 19, 161, 180, 207, 43, 24, 91 } /* left = h */,
{ 60, 34, 19, 105, 61, 198, 53, 64, 89 } /* left = d45 */,
{ 52, 31, 22, 158, 40, 209, 58, 62, 89 } /* left = d135 */,
{ 44, 31, 29, 147, 46, 158, 56, 102, 198 } /* left = d117 */,
{ 35, 19, 12, 135, 87, 209, 41, 45, 167 } /* left = d153 */,
{ 55, 25, 21, 118, 95, 215, 38, 39, 66 } /* left = d27 */,
{ 51, 38, 25, 113, 58, 164, 70, 93, 97 } /* left = d63 */,
{ 47, 54, 34, 146, 108, 203, 72, 103, 151 } /* left = tm */
}, { /* above = d117 */
{ 64, 19, 37, 156, 66, 138, 49, 95, 133 } /* left = dc */,
{ 46, 27, 80, 150, 55, 124, 55, 121, 135 } /* left = v */,
{ 36, 23, 27, 165, 149, 166, 54, 64, 118 } /* left = h */,
{ 53, 21, 36, 131, 63, 163, 60, 109, 81 } /* left = d45 */,
{ 40, 26, 35, 154, 40, 185, 51, 97, 123 } /* left = d135 */,
{ 35, 19, 34, 179, 19, 97, 48, 129, 124 } /* left = d117 */,
{ 36, 20, 26, 136, 62, 164, 33, 77, 154 } /* left = d153 */,
{ 45, 18, 32, 130, 90, 157, 40, 79, 91 } /* left = d27 */,
{ 45, 26, 28, 129, 45, 129, 49, 147, 123 } /* left = d63 */,
{ 38, 44, 51, 136, 74, 162, 57, 97, 121 } /* left = tm */
}, { /* above = d153 */
{ 75, 17, 22, 136, 138, 185, 32, 34, 166 } /* left = dc */,
{ 56, 39, 58, 133, 117, 173, 48, 53, 187 } /* left = v */,
{ 35, 21, 12, 161, 212, 207, 20, 23, 145 } /* left = h */,
{ 56, 29, 19, 117, 109, 181, 55, 68, 112 } /* left = d45 */,
{ 47, 29, 17, 153, 64, 220, 59, 51, 114 } /* left = d135 */,
{ 46, 16, 24, 136, 76, 147, 41, 64, 172 } /* left = d117 */,
{ 34, 17, 11, 108, 152, 187, 13, 15, 209 } /* left = d153 */,
{ 51, 24, 14, 115, 133, 209, 32, 26, 104 } /* left = d27 */,
{ 55, 30, 18, 122, 79, 179, 44, 88, 116 } /* left = d63 */,
{ 37, 49, 25, 129, 168, 164, 41, 54, 148 } /* left = tm */
}, { /* above = d27 */
{ 82, 22, 32, 127, 143, 213, 39, 41, 70 } /* left = dc */,
{ 62, 44, 61, 123, 105, 189, 48, 57, 64 } /* left = v */,
{ 47, 25, 17, 175, 222, 220, 24, 30, 86 } /* left = h */,
{ 68, 36, 17, 106, 102, 206, 59, 74, 74 } /* left = d45 */,
{ 57, 39, 23, 151, 68, 216, 55, 63, 58 } /* left = d135 */,
{ 49, 30, 35, 141, 70, 168, 82, 40, 115 } /* left = d117 */,
{ 51, 25, 15, 136, 129, 202, 38, 35, 139 } /* left = d153 */,
{ 68, 26, 16, 111, 141, 215, 29, 28, 28 } /* left = d27 */,
{ 59, 39, 19, 114, 75, 180, 77, 104, 42 } /* left = d63 */,
{ 40, 61, 26, 126, 152, 206, 61, 59, 93 } /* left = tm */
}, { /* above = d63 */
{ 78, 23, 39, 111, 117, 170, 74, 124, 94 } /* left = dc */,
{ 48, 34, 86, 101, 92, 146, 78, 179, 134 } /* left = v */,
{ 47, 22, 24, 138, 187, 178, 68, 69, 59 } /* left = h */,
{ 56, 25, 33, 105, 112, 187, 95, 177, 129 } /* left = d45 */,
{ 48, 31, 27, 114, 63, 183, 82, 116, 56 } /* left = d135 */,
{ 43, 28, 37, 121, 63, 123, 61, 192, 169 } /* left = d117 */,
{ 42, 17, 24, 109, 97, 177, 56, 76, 122 } /* left = d153 */,
{ 58, 18, 28, 105, 139, 182, 70, 92, 63 } /* left = d27 */,
{ 46, 23, 32, 74, 86, 150, 67, 183, 88 } /* left = d63 */,
{ 36, 38, 48, 92, 122, 165, 88, 137, 91 } /* left = tm */
}, { /* above = tm */
{ 65, 70, 60, 155, 159, 199, 61, 60, 81 } /* left = dc */,
{ 44, 78, 115, 132, 119, 173, 71, 112, 93 } /* left = v */,
{ 39, 38, 21, 184, 227, 206, 42, 32, 64 } /* left = h */,
{ 58, 47, 36, 124, 137, 193, 80, 82, 78 } /* left = d45 */,
{ 49, 50, 35, 144, 95, 205, 63, 78, 59 } /* left = d135 */,
{ 41, 53, 52, 148, 71, 142, 65, 128, 51 } /* left = d117 */,
{ 40, 36, 28, 143, 143, 202, 40, 55, 137 } /* left = d153 */,
{ 52, 34, 29, 129, 183, 227, 42, 35, 43 } /* left = d27 */,
{ 42, 44, 44, 104, 105, 164, 64, 130, 80 } /* left = d63 */,
{ 43, 81, 53, 140, 169, 204, 68, 84, 72 } /* left = tm */
}
};
#if CONFIG_FILTERINTRA
const vp9_prob vp9_default_filterintra_prob[TX_SIZES][VP9_INTRA_MODES] = {
// DC V H D45 D135 D117 D153 D27 D63 TM
{160, 153, 171, 160, 140, 117, 115, 160, 160, 116}, // TX_4X4
{180, 151, 191, 180, 118, 66, 97, 180, 180, 120}, // TX_8X8
{200, 200, 200, 200, 200, 200, 200, 200, 200, 200}, // TX_16X16
{220, 220, 220, 220, 220, 220, 220, 220, 220, 220}, // TX_32X32
};
#endif
static const vp9_prob default_inter_mode_probs[INTER_MODE_CONTEXTS]
[VP9_INTER_MODES - 1] = {
{2, 173, 34}, // 0 = both zero mv
{7, 145, 85}, // 1 = one zero mv + one a predicted mv
{7, 166, 63}, // 2 = two predicted mvs
{7, 94, 66}, // 3 = one predicted/zero and one new mv
{8, 64, 46}, // 4 = two new mvs
{17, 81, 31}, // 5 = one intra neighbour + x
{25, 29, 30}, // 6 = two intra neighbours
};
/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
const vp9_tree_index vp9_intra_mode_tree[VP9_INTRA_MODES * 2 - 2] = {
-DC_PRED, 2, /* 0 = DC_NODE */
@@ -248,7 +111,7 @@ const vp9_tree_index vp9_intra_mode_tree[VP9_INTRA_MODES * 2 - 2] = {
-D153_PRED, -D27_PRED /* 8 = D153_NODE */
};
const vp9_tree_index vp9_inter_mode_tree[6] = {
const vp9_tree_index vp9_sb_mv_ref_tree[6] = {
-ZEROMV, 2,
-NEARESTMV, 4,
-NEARMV, -NEWMV
@@ -261,7 +124,8 @@ const vp9_tree_index vp9_partition_tree[6] = {
};
struct vp9_token vp9_intra_mode_encodings[VP9_INTRA_MODES];
struct vp9_token vp9_inter_mode_encodings[VP9_INTER_MODES];
struct vp9_token vp9_sb_mv_ref_encoding_array[VP9_INTER_MODES];
struct vp9_token vp9_partition_encodings[PARTITION_TYPES];
@@ -285,15 +149,20 @@ static const vp9_prob default_single_ref_p[REF_CONTEXTS][2] = {
{ 238, 247 }
};
static const struct tx_probs default_tx_probs = {
{ { 3, 136, 37 },
{ 5, 52, 13 } },
{ { 20, 152 },
{ 15, 101 } },
{ { 100 },
{ 66 } }
const vp9_prob vp9_default_tx_probs_32x32p[TX_SIZE_CONTEXTS]
[TX_SIZE_MAX_SB - 1] = {
{ 3, 136, 37, },
{ 5, 52, 13, },
};
const vp9_prob vp9_default_tx_probs_16x16p[TX_SIZE_CONTEXTS]
[TX_SIZE_MAX_SB - 2] = {
{ 20, 152, },
{ 15, 101, },
};
const vp9_prob vp9_default_tx_probs_8x8p[TX_SIZE_CONTEXTS]
[TX_SIZE_MAX_SB - 3] = {
{ 100, },
{ 66, },
};
void tx_counts_to_branch_counts_32x32(unsigned int *tx_count_32x32p,
@@ -312,96 +181,144 @@ void tx_counts_to_branch_counts_32x32(unsigned int *tx_count_32x32p,
void tx_counts_to_branch_counts_16x16(unsigned int *tx_count_16x16p,
unsigned int (*ct_16x16p)[2]) {
ct_16x16p[0][0] = tx_count_16x16p[TX_4X4];
ct_16x16p[0][1] = tx_count_16x16p[TX_8X8] + tx_count_16x16p[TX_16X16];
ct_16x16p[0][1] = tx_count_16x16p[TX_8X8] +
tx_count_16x16p[TX_16X16];
ct_16x16p[1][0] = tx_count_16x16p[TX_8X8];
ct_16x16p[1][1] = tx_count_16x16p[TX_16X16];
}
void tx_counts_to_branch_counts_8x8(unsigned int *tx_count_8x8p,
unsigned int (*ct_8x8p)[2]) {
ct_8x8p[0][0] = tx_count_8x8p[TX_4X4];
ct_8x8p[0][1] = tx_count_8x8p[TX_8X8];
ct_8x8p[0][0] = tx_count_8x8p[TX_4X4];
ct_8x8p[0][1] = tx_count_8x8p[TX_8X8];
}
static const vp9_prob default_mbskip_probs[MBSKIP_CONTEXTS] = {
const vp9_prob vp9_default_mbskip_probs[MBSKIP_CONTEXTS] = {
192, 128, 64
};
static const vp9_prob default_switchable_interp_prob[VP9_SWITCHABLE_FILTERS+1]
[VP9_SWITCHABLE_FILTERS-1] = {
void vp9_init_mbmode_probs(VP9_COMMON *x) {
vpx_memcpy(x->fc.uv_mode_prob, default_if_uv_probs,
sizeof(default_if_uv_probs));
vpx_memcpy(x->kf_uv_mode_prob, default_kf_uv_probs,
sizeof(default_kf_uv_probs));
vpx_memcpy(x->fc.y_mode_prob, default_if_y_probs,
sizeof(default_if_y_probs));
vpx_memcpy(x->fc.switchable_interp_prob, vp9_switchable_interp_prob,
sizeof(vp9_switchable_interp_prob));
vpx_memcpy(x->fc.partition_prob, vp9_partition_probs,
sizeof(vp9_partition_probs));
vpx_memcpy(x->fc.intra_inter_prob, default_intra_inter_p,
sizeof(default_intra_inter_p));
vpx_memcpy(x->fc.comp_inter_prob, default_comp_inter_p,
sizeof(default_comp_inter_p));
vpx_memcpy(x->fc.comp_ref_prob, default_comp_ref_p,
sizeof(default_comp_ref_p));
vpx_memcpy(x->fc.single_ref_prob, default_single_ref_p,
sizeof(default_single_ref_p));
vpx_memcpy(x->fc.tx_probs_32x32p, vp9_default_tx_probs_32x32p,
sizeof(vp9_default_tx_probs_32x32p));
vpx_memcpy(x->fc.tx_probs_16x16p, vp9_default_tx_probs_16x16p,
sizeof(vp9_default_tx_probs_16x16p));
vpx_memcpy(x->fc.tx_probs_8x8p, vp9_default_tx_probs_8x8p,
sizeof(vp9_default_tx_probs_8x8p));
vpx_memcpy(x->fc.mbskip_probs, vp9_default_mbskip_probs,
sizeof(vp9_default_mbskip_probs));
}
const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = {
-0, 2,
-1, -2
};
struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
const INTERPOLATIONFILTERTYPE vp9_switchable_interp[VP9_SWITCHABLE_FILTERS] = {
EIGHTTAP, EIGHTTAP_SMOOTH, EIGHTTAP_SHARP};
const int vp9_switchable_interp_map[SWITCHABLE+1] = {1, 0, 2, -1, -1};
const vp9_prob vp9_switchable_interp_prob [VP9_SWITCHABLE_FILTERS+1]
[VP9_SWITCHABLE_FILTERS-1] = {
{ 235, 162, },
{ 36, 255, },
{ 34, 3, },
{ 149, 144, },
};
#if CONFIG_INTERINTRA
static const vp9_prob default_interintra_prob[BLOCK_SIZE_TYPES] = {
192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
};
#if CONFIG_MASKED_INTERINTRA
static const vp9_prob default_masked_interintra_prob[BLOCK_SIZE_TYPES] = {
// 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180
192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
};
#endif
#endif
#if CONFIG_MASKED_INTERINTER
static const vp9_prob default_masked_interinter_prob[BLOCK_SIZE_TYPES] = {
192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
};
#endif
void vp9_init_mbmode_probs(VP9_COMMON *cm) {
vp9_copy(cm->fc.uv_mode_prob, default_if_uv_probs);
vp9_copy(cm->fc.y_mode_prob, default_if_y_probs);
vp9_copy(cm->fc.switchable_interp_prob, default_switchable_interp_prob);
vp9_copy(cm->fc.partition_prob, default_partition_probs);
vp9_copy(cm->fc.intra_inter_prob, default_intra_inter_p);
vp9_copy(cm->fc.comp_inter_prob, default_comp_inter_p);
vp9_copy(cm->fc.comp_ref_prob, default_comp_ref_p);
vp9_copy(cm->fc.single_ref_prob, default_single_ref_p);
cm->fc.tx_probs = default_tx_probs;
vp9_copy(cm->fc.mbskip_probs, default_mbskip_probs);
#if CONFIG_INTERINTRA
vp9_copy(cm->fc.interintra_prob, default_interintra_prob);
#if CONFIG_MASKED_INTERINTRA
vp9_copy(cm->fc.masked_interintra_prob, default_masked_interintra_prob);
#endif
#endif
#if CONFIG_FILTERINTRA
vp9_copy(cm->fc.filterintra_prob, vp9_default_filterintra_prob);
#endif
#if CONFIG_MASKED_INTERINTER
vp9_copy(cm->fc.masked_compound_prob, default_masked_interinter_prob);
#endif
}
const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = {
-EIGHTTAP, 2,
-EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP
};
struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
// Indicates if the filter is interpolating or non-interpolating
const int vp9_is_interpolating_filter[SWITCHABLE + 1] = {1, 1, 1, 1, -1};
void vp9_entropy_mode_init() {
vp9_tokens_from_tree(vp9_intra_mode_encodings, vp9_intra_mode_tree);
vp9_tokens_from_tree(vp9_switchable_interp_encodings,
vp9_switchable_interp_tree);
vp9_tokens_from_tree(vp9_partition_encodings, vp9_partition_tree);
vp9_tokens_from_tree_offset(vp9_inter_mode_encodings,
vp9_inter_mode_tree, NEARESTMV);
vp9_tokens_from_tree_offset(vp9_sb_mv_ref_encoding_array,
vp9_sb_mv_ref_tree, NEARESTMV);
}
#define COUNT_SAT 20
#define MAX_UPDATE_FACTOR 128
static int update_ct(vp9_prob pre_prob, vp9_prob prob, unsigned int ct[2]) {
return merge_probs(pre_prob, prob, ct, COUNT_SAT, MAX_UPDATE_FACTOR);
void vp9_init_mode_contexts(VP9_COMMON *pc) {
vpx_memset(pc->fc.inter_mode_counts, 0, sizeof(pc->fc.inter_mode_counts));
vpx_memcpy(pc->fc.inter_mode_probs,
vp9_default_inter_mode_probs,
sizeof(vp9_default_inter_mode_probs));
}
static int update_ct2(vp9_prob pre_prob, unsigned int ct[2]) {
return merge_probs2(pre_prob, ct, COUNT_SAT, MAX_UPDATE_FACTOR);
void vp9_accum_mv_refs(VP9_COMMON *pc,
MB_PREDICTION_MODE m,
const int context) {
unsigned int (*inter_mode_counts)[VP9_INTER_MODES - 1][2] =
pc->fc.inter_mode_counts;
if (m == ZEROMV) {
++inter_mode_counts[context][0][0];
} else {
++inter_mode_counts[context][0][1];
if (m == NEARESTMV) {
++inter_mode_counts[context][1][0];
} else {
++inter_mode_counts[context][1][1];
if (m == NEARMV) {
++inter_mode_counts[context][2][0];
} else {
++inter_mode_counts[context][2][1];
}
}
}
}
#define MVREF_COUNT_SAT 20
#define MVREF_MAX_UPDATE_FACTOR 128
void vp9_adapt_mode_context(VP9_COMMON *pc) {
int i, j;
unsigned int (*inter_mode_counts)[VP9_INTER_MODES - 1][2] =
pc->fc.inter_mode_counts;
vp9_prob (*mode_context)[VP9_INTER_MODES - 1] = pc->fc.inter_mode_probs;
for (j = 0; j < INTER_MODE_CONTEXTS; j++) {
for (i = 0; i < VP9_INTER_MODES - 1; i++) {
int count = inter_mode_counts[j][i][0] + inter_mode_counts[j][i][1];
int factor;
count = count > MVREF_COUNT_SAT ? MVREF_COUNT_SAT : count;
factor = (MVREF_MAX_UPDATE_FACTOR * count / MVREF_COUNT_SAT);
mode_context[j][i] = weighted_prob(
pc->fc.pre_inter_mode_probs[j][i],
get_binary_prob(inter_mode_counts[j][i][0],
inter_mode_counts[j][i][1]),
factor);
}
}
}
#define MODE_COUNT_SAT 20
#define MODE_MAX_UPDATE_FACTOR 128
static int update_mode_ct(vp9_prob pre_prob, vp9_prob prob,
unsigned int branch_ct[2]) {
int factor, count = branch_ct[0] + branch_ct[1];
count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
return weighted_prob(pre_prob, prob, factor);
}
static void update_mode_probs(int n_modes,
@@ -416,170 +333,189 @@ static void update_mode_probs(int n_modes,
assert(n_modes - 1 < MAX_PROBS);
vp9_tree_probs_from_distribution(tree, probs, branch_ct, cnt, tok0_offset);
for (t = 0; t < n_modes - 1; ++t)
dst_probs[t] = update_ct(pre_probs[t], probs[t], branch_ct[t]);
dst_probs[t] = update_mode_ct(pre_probs[t], probs[t], branch_ct[t]);
}
static int update_mode_ct2(vp9_prob pre_prob, unsigned int branch_ct[2]) {
return update_mode_ct(pre_prob, get_binary_prob(branch_ct[0],
branch_ct[1]), branch_ct);
}
// #define MODE_COUNT_TESTING
void vp9_adapt_mode_probs(VP9_COMMON *cm) {
int i, j;
FRAME_CONTEXT *fc = &cm->fc;
FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
FRAME_COUNTS *counts = &cm->counts;
#ifdef MODE_COUNT_TESTING
int t;
printf("static const unsigned int\nymode_counts"
"[VP9_INTRA_MODES] = {\n");
for (t = 0; t < VP9_INTRA_MODES; ++t)
printf("%d, ", fc->ymode_counts[t]);
printf("};\n");
printf("static const unsigned int\nuv_mode_counts"
"[VP9_INTRA_MODES] [VP9_INTRA_MODES] = {\n");
for (i = 0; i < VP9_INTRA_MODES; ++i) {
printf(" {");
for (t = 0; t < VP9_INTRA_MODES; ++t)
printf("%d, ", fc->uv_mode_counts[i][t]);
printf("},\n");
}
printf("};\n");
printf("static const unsigned int\nbmode_counts"
"[VP9_NKF_BINTRAMODES] = {\n");
for (t = 0; t < VP9_NKF_BINTRAMODES; ++t)
printf("%d, ", fc->bmode_counts[t]);
printf("};\n");
printf("static const unsigned int\ni8x8_mode_counts"
"[VP9_I8X8_MODES] = {\n");
for (t = 0; t < VP9_I8X8_MODES; ++t)
printf("%d, ", fc->i8x8_mode_counts[t]);
printf("};\n");
printf("static const unsigned int\nmbsplit_counts"
"[VP9_NUMMBSPLITS] = {\n");
for (t = 0; t < VP9_NUMMBSPLITS; ++t)
printf("%d, ", fc->mbsplit_counts[t]);
printf("};\n");
#endif
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
fc->intra_inter_prob[i] = update_ct2(pre_fc->intra_inter_prob[i],
counts->intra_inter[i]);
fc->intra_inter_prob[i] = update_mode_ct2(fc->pre_intra_inter_prob[i],
fc->intra_inter_count[i]);
for (i = 0; i < COMP_INTER_CONTEXTS; i++)
fc->comp_inter_prob[i] = update_ct2(pre_fc->comp_inter_prob[i],
counts->comp_inter[i]);
fc->comp_inter_prob[i] = update_mode_ct2(fc->pre_comp_inter_prob[i],
fc->comp_inter_count[i]);
for (i = 0; i < REF_CONTEXTS; i++)
fc->comp_ref_prob[i] = update_ct2(pre_fc->comp_ref_prob[i],
counts->comp_ref[i]);
fc->comp_ref_prob[i] = update_mode_ct2(fc->pre_comp_ref_prob[i],
fc->comp_ref_count[i]);
for (i = 0; i < REF_CONTEXTS; i++)
for (j = 0; j < 2; j++)
fc->single_ref_prob[i][j] = update_ct2(pre_fc->single_ref_prob[i][j],
counts->single_ref[i][j]);
for (i = 0; i < INTER_MODE_CONTEXTS; i++)
update_mode_probs(VP9_INTER_MODES, vp9_inter_mode_tree,
counts->inter_mode[i], pre_fc->inter_mode_probs[i],
fc->inter_mode_probs[i], NEARESTMV);
fc->single_ref_prob[i][j] = update_mode_ct2(fc->pre_single_ref_prob[i][j],
fc->single_ref_count[i][j]);
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree,
counts->y_mode[i], pre_fc->y_mode_prob[i],
fc->y_mode_counts[i], fc->pre_y_mode_prob[i],
fc->y_mode_prob[i], 0);
for (i = 0; i < VP9_INTRA_MODES; ++i)
update_mode_probs(VP9_INTRA_MODES, vp9_intra_mode_tree,
counts->uv_mode[i], pre_fc->uv_mode_prob[i],
fc->uv_mode_counts[i], fc->pre_uv_mode_prob[i],
fc->uv_mode_prob[i], 0);
for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
update_mode_probs(PARTITION_TYPES, vp9_partition_tree,
counts->partition[i],
pre_fc->partition_prob[INTER_FRAME][i],
fc->partition_counts[i], fc->pre_partition_prob[i],
fc->partition_prob[INTER_FRAME][i], 0);
if (cm->mcomp_filter_type == SWITCHABLE) {
for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
update_mode_probs(VP9_SWITCHABLE_FILTERS, vp9_switchable_interp_tree,
counts->switchable_interp[i],
pre_fc->switchable_interp_prob[i],
fc->switchable_interp_count[i],
fc->pre_switchable_interp_prob[i],
fc->switchable_interp_prob[i], 0);
}
}
if (cm->tx_mode == TX_MODE_SELECT) {
if (cm->txfm_mode == TX_MODE_SELECT) {
int j;
unsigned int branch_ct_8x8p[TX_SIZES - 3][2];
unsigned int branch_ct_16x16p[TX_SIZES - 2][2];
unsigned int branch_ct_32x32p[TX_SIZES - 1][2];
unsigned int branch_ct_8x8p[TX_SIZE_MAX_SB - 3][2];
unsigned int branch_ct_16x16p[TX_SIZE_MAX_SB - 2][2];
unsigned int branch_ct_32x32p[TX_SIZE_MAX_SB - 1][2];
for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], branch_ct_8x8p);
for (j = 0; j < TX_SIZES - 3; ++j)
fc->tx_probs.p8x8[i][j] = update_ct2(pre_fc->tx_probs.p8x8[i][j],
branch_ct_8x8p[j]);
tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i],
branch_ct_16x16p);
for (j = 0; j < TX_SIZES - 2; ++j)
fc->tx_probs.p16x16[i][j] = update_ct2(pre_fc->tx_probs.p16x16[i][j],
branch_ct_16x16p[j]);
tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i],
branch_ct_32x32p);
for (j = 0; j < TX_SIZES - 1; ++j)
fc->tx_probs.p32x32[i][j] = update_ct2(pre_fc->tx_probs.p32x32[i][j],
branch_ct_32x32p[j]);
}
}
for (i = 0; i < MBSKIP_CONTEXTS; ++i)
fc->mbskip_probs[i] = update_ct2(pre_fc->mbskip_probs[i],
counts->mbskip[i]);
#if CONFIG_INTERINTRA
if (cm->use_interintra) {
for (i = 0; i < BLOCK_SIZE_TYPES; ++i) {
if (is_interintra_allowed(i))
fc->interintra_prob[i] = update_ct2(pre_fc->interintra_prob[i],
counts->interintra[i]);
}
#if CONFIG_MASKED_INTERINTRA
if (cm->use_masked_interintra) {
for (i = 0; i < BLOCK_SIZE_TYPES; ++i) {
if (is_interintra_allowed(i) && get_mask_bits_interintra(i))
fc->masked_interintra_prob[i] = update_ct2(
pre_fc->masked_interintra_prob[i],
counts->masked_interintra[i]);
tx_counts_to_branch_counts_8x8(cm->fc.tx_count_8x8p[i],
branch_ct_8x8p);
for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j) {
int factor;
int count = branch_ct_8x8p[j][0] + branch_ct_8x8p[j][1];
vp9_prob prob = get_binary_prob(branch_ct_8x8p[j][0],
branch_ct_8x8p[j][1]);
count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
cm->fc.tx_probs_8x8p[i][j] = weighted_prob(
cm->fc.pre_tx_probs_8x8p[i][j], prob, factor);
}
}
#endif
}
#endif
#if CONFIG_FILTERINTRA
for (i = 0; i < TX_SIZES; ++i)
for (j = 0; j < VP9_INTRA_MODES; ++j)
fc->filterintra_prob[i][j] = update_ct2(pre_fc->filterintra_prob[i][j],
counts->filterintra[i][j]);
#endif
#if CONFIG_MASKED_INTERINTER
if (cm->use_masked_compound) {
for (i = 0; i < BLOCK_SIZE_TYPES; ++i) {
if (get_mask_bits(i))
fc->masked_compound_prob[i] = update_ct2
(pre_fc->masked_compound_prob[i],
counts->masked_compound[i]);
for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
tx_counts_to_branch_counts_16x16(cm->fc.tx_count_16x16p[i],
branch_ct_16x16p);
for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j) {
int factor;
int count = branch_ct_16x16p[j][0] + branch_ct_16x16p[j][1];
vp9_prob prob = get_binary_prob(branch_ct_16x16p[j][0],
branch_ct_16x16p[j][1]);
count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
cm->fc.tx_probs_16x16p[i][j] = weighted_prob(
cm->fc.pre_tx_probs_16x16p[i][j], prob, factor);
}
}
for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
tx_counts_to_branch_counts_32x32(cm->fc.tx_count_32x32p[i],
branch_ct_32x32p);
for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j) {
int factor;
int count = branch_ct_32x32p[j][0] + branch_ct_32x32p[j][1];
vp9_prob prob = get_binary_prob(branch_ct_32x32p[j][0],
branch_ct_32x32p[j][1]);
count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT);
cm->fc.tx_probs_32x32p[i][j] = weighted_prob(
cm->fc.pre_tx_probs_32x32p[i][j], prob, factor);
}
}
}
#endif
for (i = 0; i < MBSKIP_CONTEXTS; ++i)
fc->mbskip_probs[i] = update_mode_ct2(fc->pre_mbskip_probs[i],
fc->mbskip_count[i]);
}
static void set_default_lf_deltas(struct loopfilter *lf) {
lf->mode_ref_delta_enabled = 1;
lf->mode_ref_delta_update = 1;
static void set_default_lf_deltas(MACROBLOCKD *xd) {
xd->mode_ref_lf_delta_enabled = 1;
xd->mode_ref_lf_delta_update = 1;
lf->ref_deltas[INTRA_FRAME] = 1;
lf->ref_deltas[LAST_FRAME] = 0;
lf->ref_deltas[GOLDEN_FRAME] = -1;
lf->ref_deltas[ALTREF_FRAME] = -1;
xd->ref_lf_deltas[INTRA_FRAME] = 1;
xd->ref_lf_deltas[LAST_FRAME] = 0;
xd->ref_lf_deltas[GOLDEN_FRAME] = -1;
xd->ref_lf_deltas[ALTREF_FRAME] = -1;
lf->mode_deltas[0] = 0;
lf->mode_deltas[1] = 0;
xd->mode_lf_deltas[0] = 0; // Zero
xd->mode_lf_deltas[1] = 0; // New mv
}
void vp9_setup_past_independence(VP9_COMMON *cm, MACROBLOCKD *xd) {
// Reset the segment feature data to the default stats:
// Features disabled, 0, with delta coding (Default state).
struct loopfilter *const lf = &xd->lf;
int i;
vp9_clearall_segfeatures(&xd->seg);
xd->seg.abs_delta = SEGMENT_DELTADATA;
vp9_clearall_segfeatures(xd);
xd->mb_segment_abs_delta = SEGMENT_DELTADATA;
if (cm->last_frame_seg_map)
vpx_memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
// Reset the mode ref deltas for loop filter
vp9_zero(lf->last_ref_deltas);
vp9_zero(lf->last_mode_deltas);
set_default_lf_deltas(lf);
// To force update of the sharpness
lf->last_sharpness_level = -1;
vpx_memset(xd->last_ref_lf_deltas, 0, sizeof(xd->last_ref_lf_deltas));
vpx_memset(xd->last_mode_lf_deltas, 0, sizeof(xd->last_mode_lf_deltas));
set_default_lf_deltas(xd);
vp9_default_coef_probs(cm);
vp9_init_mbmode_probs(cm);
vpx_memcpy(cm->kf_y_mode_prob, vp9_kf_default_bmode_probs,
sizeof(vp9_kf_default_bmode_probs));
vp9_init_mv_probs(cm);
vp9_copy(cm->fc.inter_mode_probs, default_inter_mode_probs);
if (cm->frame_type == KEY_FRAME ||
cm->error_resilient_mode || cm->reset_frame_context == 3) {
// To force update of the sharpness
cm->last_sharpness_level = -1;
vp9_init_mode_contexts(cm);
if ((cm->frame_type == KEY_FRAME) ||
cm->error_resilient_mode || (cm->reset_frame_context == 3)) {
// Reset all frame contexts.
for (i = 0; i < NUM_FRAME_CONTEXTS; ++i)
cm->frame_contexts[i] = cm->fc;
vpx_memcpy(&cm->frame_contexts[i], &cm->fc, sizeof(cm->fc));
} else if (cm->reset_frame_context == 2) {
// Reset only the frame context specified in the frame header.
cm->frame_contexts[cm->frame_context_idx] = cm->fc;
vpx_memcpy(&cm->frame_contexts[cm->frame_context_idx], &cm->fc,
sizeof(cm->fc));
}
vpx_memset(cm->prev_mip, 0,
@@ -593,7 +529,7 @@ void vp9_setup_past_independence(VP9_COMMON *cm, MACROBLOCKD *xd) {
vp9_update_mode_info_border(cm, cm->prev_mip);
vp9_update_mode_info_in_image(cm, cm->prev_mi);
vp9_zero(cm->ref_frame_sign_bias);
vpx_memset(cm->ref_frame_sign_bias, 0, sizeof(cm->ref_frame_sign_bias));
cm->frame_context_idx = 0;
}

View File

@@ -16,69 +16,81 @@
#define SUBMVREF_COUNT 5
#define TX_SIZE_CONTEXTS 2
#define VP9_MODE_UPDATE_PROB 252
#define VP9_SWITCHABLE_FILTERS 3 // number of switchable filters
#if CONFIG_INTERINTRA
#define VP9_UPD_INTERINTRA_PROB 248
#define SEPARATE_INTERINTRA_UV 0
#if CONFIG_MASKED_INTERINTRA
#define VP9_UPD_MASKED_INTERINTRA_PROB 248
#endif
#endif
#if CONFIG_MASKED_INTERINTER
#define VP9_UPD_MASKED_COMPOUND_PROB 248
#endif
// #define MODE_STATS
struct VP9Common;
extern int vp9_mv_cont(const int_mv *l, const int_mv *a);
struct tx_probs {
vp9_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZES - 1];
vp9_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 2];
vp9_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 3];
};
struct tx_counts {
unsigned int p32x32[TX_SIZE_CONTEXTS][TX_SIZES];
unsigned int p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 1];
unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2];
};
extern const vp9_prob vp9_kf_uv_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES - 1];
extern const vp9_prob vp9_kf_y_mode_prob[VP9_INTRA_MODES][VP9_INTRA_MODES]
[VP9_INTRA_MODES - 1];
extern const vp9_prob vp9_kf_default_bmode_probs[VP9_INTRA_MODES]
[VP9_INTRA_MODES]
[VP9_INTRA_MODES - 1];
extern const vp9_tree_index vp9_intra_mode_tree[];
extern const vp9_tree_index vp9_inter_mode_tree[];
extern const vp9_tree_index vp9_sb_mv_ref_tree[];
extern struct vp9_token vp9_intra_mode_encodings[VP9_INTRA_MODES];
extern struct vp9_token vp9_inter_mode_encodings[VP9_INTER_MODES];
/* Inter mode values do not start at zero */
extern struct vp9_token vp9_sb_mv_ref_encoding_array[VP9_INTER_MODES];
// probability models for partition information
extern const vp9_tree_index vp9_partition_tree[];
extern const vp9_tree_index vp9_partition_tree[];
extern struct vp9_token vp9_partition_encodings[PARTITION_TYPES];
extern const vp9_prob vp9_partition_probs[NUM_FRAME_TYPES]
[NUM_PARTITION_CONTEXTS]
[PARTITION_TYPES - 1];
extern const vp9_tree_index vp9_switchable_interp_tree
[2 * (VP9_SWITCHABLE_FILTERS - 1)];
void vp9_entropy_mode_init(void);
extern struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
void vp9_entropy_mode_init();
struct VP9Common;
/* sets up common features to forget past dependence */
void vp9_setup_past_independence(struct VP9Common *cm, MACROBLOCKD *xd);
void vp9_init_mbmode_probs(struct VP9Common *x);
extern void vp9_init_mode_contexts(struct VP9Common *pc);
extern void vp9_adapt_mode_context(struct VP9Common *pc);
extern void vp9_accum_mv_refs(struct VP9Common *pc,
MB_PREDICTION_MODE m,
const int context);
void vp9_adapt_mode_probs(struct VP9Common *);
void tx_counts_to_branch_counts_32x32(unsigned int *tx_count_32x32p,
unsigned int (*ct_32x32p)[2]);
void tx_counts_to_branch_counts_16x16(unsigned int *tx_count_16x16p,
unsigned int (*ct_16x16p)[2]);
void tx_counts_to_branch_counts_8x8(unsigned int *tx_count_8x8p,
unsigned int (*ct_8x8p)[2]);
#define VP9_SWITCHABLE_FILTERS 3 /* number of switchable filters */
extern const INTERPOLATIONFILTERTYPE vp9_switchable_interp
[VP9_SWITCHABLE_FILTERS];
extern const int vp9_switchable_interp_map[SWITCHABLE + 1];
extern const int vp9_is_interpolating_filter[SWITCHABLE + 1];
extern const vp9_tree_index vp9_switchable_interp_tree
[2 * (VP9_SWITCHABLE_FILTERS - 1)];
extern struct vp9_token vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS];
extern const vp9_prob vp9_switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
[VP9_SWITCHABLE_FILTERS - 1];
extern const vp9_prob vp9_default_tx_probs_32x32p[TX_SIZE_CONTEXTS]
[TX_SIZE_MAX_SB - 1];
extern const vp9_prob vp9_default_tx_probs_16x16p[TX_SIZE_CONTEXTS]
[TX_SIZE_MAX_SB - 2];
extern const vp9_prob vp9_default_tx_probs_8x8p[TX_SIZE_CONTEXTS]
[TX_SIZE_MAX_SB - 3];
extern void tx_counts_to_branch_counts_32x32(unsigned int *tx_count_32x32p,
unsigned int (*ct_32x32p)[2]);
extern void tx_counts_to_branch_counts_16x16(unsigned int *tx_count_16x16p,
unsigned int (*ct_16x16p)[2]);
extern void tx_counts_to_branch_counts_8x8(unsigned int *tx_count_8x8p,
unsigned int (*ct_8x8p)[2]);
#endif // VP9_COMMON_VP9_ENTROPYMODE_H_

View File

@@ -12,11 +12,16 @@
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_entropymv.h"
//#define MV_COUNT_TESTING
#define MV_COUNT_SAT 20
#define MV_MAX_UPDATE_FACTOR 128
/* Integer pel reference mv threshold for use of high-precision 1/8 mv */
#define COMPANDED_MVREF_THRESH 8
#define COMPANDED_MVREF_THRESH 8
/* Smooth or bias the mv-counts before prob computation */
/* #define SMOOTH_MV_COUNTS */
const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2] = {
-MV_JOINT_ZERO, 2,
@@ -51,7 +56,7 @@ const vp9_tree_index vp9_mv_fp_tree [2 * 4 - 2] = {
};
struct vp9_token vp9_mv_fp_encodings[4];
static const nmv_context default_nmv_context = {
const nmv_context vp9_default_nmv_context = {
{32, 64, 96},
{
{ /* vert component */
@@ -77,10 +82,21 @@ static const nmv_context default_nmv_context = {
},
};
MV_JOINT_TYPE vp9_get_mv_joint(const MV *mv) {
if (mv->row == 0 && mv->col == 0)
return MV_JOINT_ZERO;
else if (mv->row == 0 && mv->col != 0)
return MV_JOINT_HNZVZ;
else if (mv->row != 0 && mv->col == 0)
return MV_JOINT_HZVNZ;
else
return MV_JOINT_HNZVNZ;
}
#define mv_class_base(c) ((c) ? (CLASS0_SIZE << (c + 2)) : 0)
MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) {
MV_CLASS_TYPE c = MV_CLASS_0;
MV_CLASS_TYPE c;
if (z < CLASS0_SIZE * 8) c = MV_CLASS_0;
else if (z < CLASS0_SIZE * 16) c = MV_CLASS_1;
else if (z < CLASS0_SIZE * 32) c = MV_CLASS_2;
@@ -107,6 +123,12 @@ int vp9_get_mv_mag(MV_CLASS_TYPE c, int offset) {
return mv_class_base(c) + offset;
}
static void inc_mv_component_count(int v, nmv_component_counts *comp_counts,
int incr) {
assert (v != 0);
comp_counts->mvcount[MV_MAX + v] += incr;
}
static void inc_mv_component(int v, nmv_component_counts *comp_counts,
int incr, int usehp) {
int s, z, c, o, d, e, f;
@@ -149,6 +171,24 @@ static void inc_mv_component(int v, nmv_component_counts *comp_counts,
}
}
#ifdef SMOOTH_MV_COUNTS
static void smooth_counts(nmv_component_counts *mvcomp) {
static const int flen = 3; // (filter_length + 1) / 2
static const int fval[] = {8, 3, 1};
static const int fvalbits = 4;
int i;
unsigned int smvcount[MV_VALS];
vpx_memcpy(smvcount, mvcomp->mvcount, sizeof(smvcount));
smvcount[MV_MAX] = (smvcount[MV_MAX - 1] + smvcount[MV_MAX + 1]) >> 1;
for (i = flen - 1; i <= MV_VALS - flen; ++i) {
int j, s = smvcount[i] * fval[0];
for (j = 1; j < flen; ++j)
s += (smvcount[i - j] + smvcount[i + j]) * fval[j];
mvcomp->mvcount[i] = (s + (1 << (fvalbits - 1))) >> fvalbits;
}
}
#endif
static void counts_to_context(nmv_component_counts *mvcomp, int usehp) {
int v;
vpx_memset(mvcomp->sign, 0, sizeof(nmv_component_counts) - sizeof(mvcomp->mvcount));
@@ -158,19 +198,27 @@ static void counts_to_context(nmv_component_counts *mvcomp, int usehp) {
}
}
void vp9_inc_mv(const MV *mv, nmv_context_counts *counts) {
void vp9_increment_nmv(const MV *mv, const MV *ref, nmv_context_counts *mvctx,
int usehp) {
const MV_JOINT_TYPE j = vp9_get_mv_joint(mv);
++counts->joints[j];
mvctx->joints[j]++;
usehp = usehp && vp9_use_mv_hp(ref);
if (mv_joint_vertical(j))
++counts->comps[0].mvcount[MV_MAX + mv->row];
inc_mv_component_count(mv->row, &mvctx->comps[0], 1);
if (mv_joint_horizontal(j))
++counts->comps[1].mvcount[MV_MAX + mv->col];
inc_mv_component_count(mv->col, &mvctx->comps[1], 1);
}
static vp9_prob adapt_prob(vp9_prob prep, const unsigned int ct[2]) {
return merge_probs2(prep, ct, MV_COUNT_SAT, MV_MAX_UPDATE_FACTOR);
static void adapt_prob(vp9_prob *dest, vp9_prob prep, unsigned int ct[2]) {
const int count = MIN(ct[0] + ct[1], MV_COUNT_SAT);
if (count) {
const vp9_prob newp = get_binary_prob(ct[0], ct[1]);
const int factor = MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT;
*dest = weighted_prob(prep, newp, factor);
} else {
*dest = prep;
}
}
void vp9_counts_process(nmv_context_counts *nmv_count, int usehp) {
@@ -178,61 +226,212 @@ void vp9_counts_process(nmv_context_counts *nmv_count, int usehp) {
counts_to_context(&nmv_count->comps[1], usehp);
}
void vp9_counts_to_nmv_context(
nmv_context_counts *nmv_count,
nmv_context *prob,
int usehp,
unsigned int (*branch_ct_joint)[2],
unsigned int (*branch_ct_sign)[2],
unsigned int (*branch_ct_classes)[MV_CLASSES - 1][2],
unsigned int (*branch_ct_class0)[CLASS0_SIZE - 1][2],
unsigned int (*branch_ct_bits)[MV_OFFSET_BITS][2],
unsigned int (*branch_ct_class0_fp)[CLASS0_SIZE][4 - 1][2],
unsigned int (*branch_ct_fp)[4 - 1][2],
unsigned int (*branch_ct_class0_hp)[2],
unsigned int (*branch_ct_hp)[2]) {
int i, j, k;
vp9_counts_process(nmv_count, usehp);
vp9_tree_probs_from_distribution(vp9_mv_joint_tree,
prob->joints,
branch_ct_joint,
nmv_count->joints, 0);
for (i = 0; i < 2; ++i) {
const uint32_t s0 = nmv_count->comps[i].sign[0];
const uint32_t s1 = nmv_count->comps[i].sign[1];
prob->comps[i].sign = get_binary_prob(s0, s1);
branch_ct_sign[i][0] = s0;
branch_ct_sign[i][1] = s1;
vp9_tree_probs_from_distribution(vp9_mv_class_tree,
prob->comps[i].classes,
branch_ct_classes[i],
nmv_count->comps[i].classes, 0);
vp9_tree_probs_from_distribution(vp9_mv_class0_tree,
prob->comps[i].class0,
branch_ct_class0[i],
nmv_count->comps[i].class0, 0);
for (j = 0; j < MV_OFFSET_BITS; ++j) {
const uint32_t b0 = nmv_count->comps[i].bits[j][0];
const uint32_t b1 = nmv_count->comps[i].bits[j][1];
prob->comps[i].bits[j] = get_binary_prob(b0, b1);
branch_ct_bits[i][j][0] = b0;
branch_ct_bits[i][j][1] = b1;
}
}
for (i = 0; i < 2; ++i) {
for (k = 0; k < CLASS0_SIZE; ++k) {
vp9_tree_probs_from_distribution(vp9_mv_fp_tree,
prob->comps[i].class0_fp[k],
branch_ct_class0_fp[i][k],
nmv_count->comps[i].class0_fp[k], 0);
}
vp9_tree_probs_from_distribution(vp9_mv_fp_tree,
prob->comps[i].fp,
branch_ct_fp[i],
nmv_count->comps[i].fp, 0);
}
if (usehp) {
for (i = 0; i < 2; ++i) {
const uint32_t c0_hp0 = nmv_count->comps[i].class0_hp[0];
const uint32_t c0_hp1 = nmv_count->comps[i].class0_hp[1];
const uint32_t hp0 = nmv_count->comps[i].hp[0];
const uint32_t hp1 = nmv_count->comps[i].hp[1];
prob->comps[i].class0_hp = get_binary_prob(c0_hp0, c0_hp1);
branch_ct_class0_hp[i][0] = c0_hp0;
branch_ct_class0_hp[i][1] = c0_hp1;
prob->comps[i].hp = get_binary_prob(hp0, hp1);
branch_ct_hp[i][0] = hp0;
branch_ct_hp[i][1] = hp1;
}
}
}
static unsigned int adapt_probs(unsigned int i,
vp9_tree tree,
vp9_prob this_probs[],
const vp9_prob last_probs[],
const unsigned int num_events[]) {
vp9_prob this_prob;
const unsigned int left = tree[i] <= 0
const uint32_t left = tree[i] <= 0
? num_events[-tree[i]]
: adapt_probs(tree[i], tree, this_probs, last_probs, num_events);
const unsigned int right = tree[i + 1] <= 0
const uint32_t right = tree[i + 1] <= 0
? num_events[-tree[i + 1]]
: adapt_probs(tree[i + 1], tree, this_probs, last_probs, num_events);
const unsigned int ct[2] = { left, right };
this_probs[i >> 1] = adapt_prob(last_probs[i >> 1], ct);
uint32_t weight = left + right;
if (weight) {
this_prob = get_binary_prob(left, right);
weight = weight > MV_COUNT_SAT ? MV_COUNT_SAT : weight;
this_prob = weighted_prob(last_probs[i >> 1], this_prob,
MV_MAX_UPDATE_FACTOR * weight / MV_COUNT_SAT);
} else {
this_prob = last_probs[i >> 1];
}
this_probs[i >> 1] = this_prob;
return left + right;
}
void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) {
void vp9_adapt_mv_probs(VP9_COMMON *cm, int usehp) {
int i, j;
#ifdef MV_COUNT_TESTING
printf("joints count: ");
for (j = 0; j < MV_JOINTS; ++j) printf("%d ", cm->fc.NMVcount.joints[j]);
printf("\n"); fflush(stdout);
printf("signs count:\n");
for (i = 0; i < 2; ++i)
printf("%d/%d ", cm->fc.NMVcount.comps[i].sign[0], cm->fc.NMVcount.comps[i].sign[1]);
printf("\n"); fflush(stdout);
printf("classes count:\n");
for (i = 0; i < 2; ++i) {
for (j = 0; j < MV_CLASSES; ++j)
printf("%d ", cm->fc.NMVcount.comps[i].classes[j]);
printf("\n"); fflush(stdout);
}
printf("class0 count:\n");
for (i = 0; i < 2; ++i) {
for (j = 0; j < CLASS0_SIZE; ++j)
printf("%d ", cm->fc.NMVcount.comps[i].class0[j]);
printf("\n"); fflush(stdout);
}
printf("bits count:\n");
for (i = 0; i < 2; ++i) {
for (j = 0; j < MV_OFFSET_BITS; ++j)
printf("%d/%d ", cm->fc.NMVcount.comps[i].bits[j][0],
cm->fc.NMVcount.comps[i].bits[j][1]);
printf("\n"); fflush(stdout);
}
printf("class0_fp count:\n");
for (i = 0; i < 2; ++i) {
for (j = 0; j < CLASS0_SIZE; ++j) {
printf("{");
for (k = 0; k < 4; ++k)
printf("%d ", cm->fc.NMVcount.comps[i].class0_fp[j][k]);
printf("}, ");
}
printf("\n"); fflush(stdout);
}
printf("fp count:\n");
for (i = 0; i < 2; ++i) {
for (j = 0; j < 4; ++j)
printf("%d ", cm->fc.NMVcount.comps[i].fp[j]);
printf("\n"); fflush(stdout);
}
if (usehp) {
printf("class0_hp count:\n");
for (i = 0; i < 2; ++i)
printf("%d/%d ", cm->fc.NMVcount.comps[i].class0_hp[0],
cm->fc.NMVcount.comps[i].class0_hp[1]);
printf("\n"); fflush(stdout);
printf("hp count:\n");
for (i = 0; i < 2; ++i)
printf("%d/%d ", cm->fc.NMVcount.comps[i].hp[0],
cm->fc.NMVcount.comps[i].hp[1]);
printf("\n"); fflush(stdout);
}
#endif
#ifdef SMOOTH_MV_COUNTS
smooth_counts(&cm->fc.NMVcount.comps[0]);
smooth_counts(&cm->fc.NMVcount.comps[1]);
#endif
vp9_counts_process(&cm->fc.NMVcount, usehp);
FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
nmv_context *ctx = &cm->fc.nmvc;
nmv_context *pre_ctx = &pre_fc->nmvc;
nmv_context_counts *cts = &cm->counts.mv;
vp9_counts_process(cts, allow_hp);
adapt_probs(0, vp9_mv_joint_tree, ctx->joints, pre_ctx->joints, cts->joints);
adapt_probs(0, vp9_mv_joint_tree,
cm->fc.nmvc.joints, cm->fc.pre_nmvc.joints,
cm->fc.NMVcount.joints);
for (i = 0; i < 2; ++i) {
ctx->comps[i].sign = adapt_prob(pre_ctx->comps[i].sign, cts->comps[i].sign);
adapt_probs(0, vp9_mv_class_tree, ctx->comps[i].classes,
pre_ctx->comps[i].classes, cts->comps[i].classes);
adapt_probs(0, vp9_mv_class0_tree, ctx->comps[i].class0,
pre_ctx->comps[i].class0, cts->comps[i].class0);
for (j = 0; j < MV_OFFSET_BITS; ++j)
ctx->comps[i].bits[j] = adapt_prob(pre_ctx->comps[i].bits[j],
cts->comps[i].bits[j]);
for (j = 0; j < CLASS0_SIZE; ++j)
adapt_probs(0, vp9_mv_fp_tree, ctx->comps[i].class0_fp[j],
pre_ctx->comps[i].class0_fp[j], cts->comps[i].class0_fp[j]);
adapt_probs(0, vp9_mv_fp_tree, ctx->comps[i].fp, pre_ctx->comps[i].fp,
cts->comps[i].fp);
if (allow_hp) {
ctx->comps[i].class0_hp = adapt_prob(pre_ctx->comps[i].class0_hp,
cts->comps[i].class0_hp);
ctx->comps[i].hp = adapt_prob(pre_ctx->comps[i].hp, cts->comps[i].hp);
adapt_prob(&cm->fc.nmvc.comps[i].sign,
cm->fc.pre_nmvc.comps[i].sign,
cm->fc.NMVcount.comps[i].sign);
adapt_probs(0, vp9_mv_class_tree,
cm->fc.nmvc.comps[i].classes, cm->fc.pre_nmvc.comps[i].classes,
cm->fc.NMVcount.comps[i].classes);
adapt_probs(0, vp9_mv_class0_tree,
cm->fc.nmvc.comps[i].class0, cm->fc.pre_nmvc.comps[i].class0,
cm->fc.NMVcount.comps[i].class0);
for (j = 0; j < MV_OFFSET_BITS; ++j) {
adapt_prob(&cm->fc.nmvc.comps[i].bits[j],
cm->fc.pre_nmvc.comps[i].bits[j],
cm->fc.NMVcount.comps[i].bits[j]);
}
}
for (i = 0; i < 2; ++i) {
for (j = 0; j < CLASS0_SIZE; ++j) {
adapt_probs(0, vp9_mv_fp_tree,
cm->fc.nmvc.comps[i].class0_fp[j],
cm->fc.pre_nmvc.comps[i].class0_fp[j],
cm->fc.NMVcount.comps[i].class0_fp[j]);
}
adapt_probs(0, vp9_mv_fp_tree,
cm->fc.nmvc.comps[i].fp,
cm->fc.pre_nmvc.comps[i].fp,
cm->fc.NMVcount.comps[i].fp);
}
if (usehp) {
for (i = 0; i < 2; ++i) {
adapt_prob(&cm->fc.nmvc.comps[i].class0_hp,
cm->fc.pre_nmvc.comps[i].class0_hp,
cm->fc.NMVcount.comps[i].class0_hp);
adapt_prob(&cm->fc.nmvc.comps[i].hp,
cm->fc.pre_nmvc.comps[i].hp,
cm->fc.NMVcount.comps[i].hp);
}
}
}
@@ -245,5 +444,5 @@ void vp9_entropy_mv_init() {
}
void vp9_init_mv_probs(VP9_COMMON *cm) {
cm->fc.nmvc = default_nmv_context;
vpx_memcpy(&cm->fc.nmvc, &vp9_default_nmv_context, sizeof(nmv_context));
}

View File

@@ -26,6 +26,10 @@ int vp9_use_mv_hp(const MV *ref);
#define VP9_NMV_UPDATE_PROB 252
//#define MV_GROUP_UPDATE
#define LOW_PRECISION_MV_UPDATE /* Use 7 bit forward update */
/* Symbols for coding which components are zero jointly */
#define MV_JOINTS 4
typedef enum {
@@ -95,14 +99,7 @@ typedef struct {
nmv_component comps[2];
} nmv_context;
static INLINE MV_JOINT_TYPE vp9_get_mv_joint(const MV *mv) {
if (mv->row == 0) {
return mv->col == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ;
} else {
return mv->col == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ;
}
}
MV_JOINT_TYPE vp9_get_mv_joint(const MV *mv);
MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset);
int vp9_get_mv_mag(MV_CLASS_TYPE c, int offset);
@@ -124,8 +121,22 @@ typedef struct {
nmv_component_counts comps[2];
} nmv_context_counts;
void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx);
void vp9_increment_nmv(const MV *mv, const MV *ref, nmv_context_counts *mvctx,
int usehp);
extern const nmv_context vp9_default_nmv_context;
void vp9_counts_to_nmv_context(
nmv_context_counts *NMVcount,
nmv_context *prob,
int usehp,
unsigned int (*branch_ct_joint)[2],
unsigned int (*branch_ct_sign)[2],
unsigned int (*branch_ct_classes)[MV_CLASSES - 1][2],
unsigned int (*branch_ct_class0)[CLASS0_SIZE - 1][2],
unsigned int (*branch_ct_bits)[MV_OFFSET_BITS][2],
unsigned int (*branch_ct_class0_fp)[CLASS0_SIZE][4 - 1][2],
unsigned int (*branch_ct_fp)[4 - 1][2],
unsigned int (*branch_ct_class0_hp)[2],
unsigned int (*branch_ct_hp)[2]);
void vp9_counts_process(nmv_context_counts *NMVcount, int usehp);
#endif // VP9_COMMON_VP9_ENTROPYMV_H_

View File

@@ -14,28 +14,24 @@
#include "./vpx_config.h"
#define LOG2_MI_SIZE 3
#define LOG2_MI_BLOCK_SIZE (6 - LOG2_MI_SIZE) // 64 = 2^6
#define MAX_BLOCK_SIZE (1 << 6) // max block size in pixel
#define MI_SIZE (1 << LOG2_MI_SIZE) // pixels per mi-unit
#define MI_BLOCK_SIZE (1 << LOG2_MI_BLOCK_SIZE) // mi-units per max block
#define MI_MASK (MI_BLOCK_SIZE - 1)
#define MI_SIZE (1 << LOG2_MI_SIZE)
#define MI_MASK ((64 >> LOG2_MI_SIZE) - 1)
typedef enum BLOCK_SIZE_TYPE {
BLOCK_4X4,
BLOCK_4X8,
BLOCK_8X4,
BLOCK_8X8,
BLOCK_8X16,
BLOCK_16X8,
BLOCK_16X16,
BLOCK_16X32,
BLOCK_32X16,
BLOCK_32X32,
BLOCK_32X64,
BLOCK_64X32,
BLOCK_64X64,
BLOCK_SIZE_AB4X4,
BLOCK_SIZE_SB4X8,
BLOCK_SIZE_SB8X4,
BLOCK_SIZE_SB8X8,
BLOCK_SIZE_SB8X16,
BLOCK_SIZE_SB16X8,
BLOCK_SIZE_MB16X16,
BLOCK_SIZE_SB16X32,
BLOCK_SIZE_SB32X16,
BLOCK_SIZE_SB32X32,
BLOCK_SIZE_SB32X64,
BLOCK_SIZE_SB64X32,
BLOCK_SIZE_SB64X64,
BLOCK_SIZE_TYPES
} BLOCK_SIZE_TYPE;
@@ -44,34 +40,10 @@ typedef enum PARTITION_TYPE {
PARTITION_HORZ,
PARTITION_VERT,
PARTITION_SPLIT,
PARTITION_TYPES, PARTITION_INVALID = PARTITION_TYPES
PARTITION_TYPES
} PARTITION_TYPE;
#define PARTITION_PLOFFSET 4 // number of probability models per block size
#define NUM_PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
typedef enum {
TX_4X4 = 0, // 4x4 dct transform
TX_8X8 = 1, // 8x8 dct transform
TX_16X16 = 2, // 16x16 dct transform
TX_32X32 = 3, // 32x32 dct transform
TX_SIZES
} TX_SIZE;
typedef enum {
ONLY_4X4 = 0,
ALLOW_8X8 = 1,
ALLOW_16X16 = 2,
ALLOW_32X32 = 3,
TX_MODE_SELECT = 4,
TX_MODES = 5,
} TX_MODE;
typedef enum {
DCT_DCT = 0, // DCT in both horizontal and vertical
ADST_DCT = 1, // ADST in vertical, DCT in horizontal
DCT_ADST = 2, // DCT in vertical, ADST in horizontal
ADST_ADST = 3 // ADST in both directions
} TX_TYPE;
#endif // VP9_COMMON_VP9_ENUMS_H_

View File

@@ -8,10 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_mem/vpx_mem.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_extend.h"
#include "vpx_mem/vpx_mem.h"
static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
uint8_t *dst, int dst_pitch,
@@ -109,14 +107,14 @@ void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
const int src_y_offset = srcy * src->y_stride + srcx;
const int dst_y_offset = srcy * dst->y_stride + srcx;
const int et_uv = ROUND_POWER_OF_TWO(et_y, 1);
const int el_uv = ROUND_POWER_OF_TWO(el_y, 1);
const int eb_uv = ROUND_POWER_OF_TWO(eb_y, 1);
const int er_uv = ROUND_POWER_OF_TWO(er_y, 1);
const int et_uv = (et_y + 1) >> 1;
const int el_uv = (el_y + 1) >> 1;
const int eb_uv = (eb_y + 1) >> 1;
const int er_uv = (er_y + 1) >> 1;
const int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
const int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
const int srch_uv = ROUND_POWER_OF_TWO(srch, 1);
const int srcw_uv = ROUND_POWER_OF_TWO(srcw, 1);
const int srch_uv = (srch + 1) >> 1;
const int srcw_uv = (srcw + 1) >> 1;
copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride,
dst->y_buffer + dst_y_offset, dst->y_stride,

View File

@@ -14,13 +14,12 @@
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_sadmxn.h"
static void lower_mv_precision(MV *mv, int allow_hp) {
const int use_hp = allow_hp && vp9_use_mv_hp(mv);
if (!use_hp) {
if (mv->row & 1)
mv->row += (mv->row > 0 ? -1 : 1);
if (mv->col & 1)
mv->col += (mv->col > 0 ? -1 : 1);
static void lower_mv_precision(int_mv *mv, int usehp) {
if (!usehp || !vp9_use_mv_hp(&mv->as_mv)) {
if (mv->as_mv.row & 1)
mv->as_mv.row += (mv->as_mv.row > 0 ? -1 : 1);
if (mv->as_mv.col & 1)
mv->as_mv.col += (mv->as_mv.col > 0 ? -1 : 1);
}
}
@@ -32,8 +31,8 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
int i;
// Make sure all the candidates are properly clamped etc
for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
lower_mv_precision(&mvlist[i].as_mv, xd->allow_high_precision_mv);
clamp_mv2(&mvlist[i].as_mv, xd);
lower_mv_precision(&mvlist[i], xd->allow_high_precision_mv);
clamp_mv2(&mvlist[i], xd);
}
*nearest = mvlist[0];
*near = mvlist[1];
@@ -42,8 +41,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
int_mv *dst_nearest,
int_mv *dst_near,
int block_idx, int ref_idx,
int mi_row, int mi_col) {
int block_idx, int ref_idx) {
int_mv dst_list[MAX_MV_REF_CANDIDATES];
int_mv mv_list[MAX_MV_REF_CANDIDATES];
MODE_INFO *mi = xd->mode_info_context;
@@ -55,8 +53,7 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
vp9_find_mv_refs_idx(cm, xd, xd->mode_info_context,
xd->prev_mode_info_context,
mbmi->ref_frame[ref_idx],
mv_list, cm->ref_frame_sign_bias, block_idx,
mi_row, mi_col);
mv_list, cm->ref_frame_sign_bias, block_idx);
dst_list[1].as_int = 0;
if (block_idx == 0) {

View File

@@ -28,20 +28,53 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
int_mv *nearest,
int_mv *near);
static void mv_bias(int refmb_ref_frame_sign_bias, int refframe,
int_mv *mvp, const int *ref_frame_sign_bias) {
MV xmv = mvp->as_mv;
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe]) {
xmv.row *= -1;
xmv.col *= -1;
}
mvp->as_mv = xmv;
}
// TODO(jingning): this mv clamping function should be block size dependent.
static void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
xd->mb_to_top_edge - LEFT_TOP_MARGIN,
xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
static void clamp_mv(int_mv *mv,
int mb_to_left_edge,
int mb_to_right_edge,
int mb_to_top_edge,
int mb_to_bottom_edge) {
mv->as_mv.col = clamp(mv->as_mv.col, mb_to_left_edge, mb_to_right_edge);
mv->as_mv.row = clamp(mv->as_mv.row, mb_to_top_edge, mb_to_bottom_edge);
}
static int clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) {
int_mv tmp_mv;
tmp_mv.as_int = mv->as_int;
clamp_mv(mv,
xd->mb_to_left_edge - LEFT_TOP_MARGIN,
xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
xd->mb_to_top_edge - LEFT_TOP_MARGIN,
xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
return tmp_mv.as_int != mv->as_int;
}
static int check_mv_bounds(int_mv *mv,
int mb_to_left_edge, int mb_to_right_edge,
int mb_to_top_edge, int mb_to_bottom_edge) {
return mv->as_mv.col < mb_to_left_edge ||
mv->as_mv.col > mb_to_right_edge ||
mv->as_mv.row < mb_to_top_edge ||
mv->as_mv.row > mb_to_bottom_edge;
}
void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *pc,
MACROBLOCKD *xd,
int_mv *dst_nearest,
int_mv *dst_near,
int block_idx, int ref_idx,
int mi_row, int mi_col);
int block_idx, int ref_idx);
static MB_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) {
// FIXME(rbultje, jingning): temporary hack because jenkins doesn't
@@ -50,16 +83,16 @@ static MB_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) {
/* On L edge, get from MB to left of us */
--cur_mb;
if (is_inter_block(&cur_mb->mbmi)) {
if (cur_mb->mbmi.ref_frame[0] != INTRA_FRAME) {
return DC_PRED;
} else if (cur_mb->mbmi.sb_type < BLOCK_8X8) {
return (cur_mb->bmi + 1 + b)->as_mode;
} else if (cur_mb->mbmi.sb_type < BLOCK_SIZE_SB8X8) {
return ((cur_mb->bmi + 1 + b)->as_mode.first);
} else {
return cur_mb->mbmi.mode;
}
}
assert(b == 1 || b == 3);
return (cur_mb->bmi + b - 1)->as_mode;
return (cur_mb->bmi + b - 1)->as_mode.first;
}
static MB_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb,
@@ -68,16 +101,16 @@ static MB_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb,
/* On top edge, get from MB above us */
cur_mb -= mi_stride;
if (is_inter_block(&cur_mb->mbmi)) {
if (cur_mb->mbmi.ref_frame[0] != INTRA_FRAME) {
return DC_PRED;
} else if (cur_mb->mbmi.sb_type < BLOCK_8X8) {
return (cur_mb->bmi + 2 + b)->as_mode;
} else if (cur_mb->mbmi.sb_type < BLOCK_SIZE_SB8X8) {
return ((cur_mb->bmi + 2 + b)->as_mode.first);
} else {
return cur_mb->mbmi.mode;
}
}
return (cur_mb->bmi + b - 2)->as_mode;
return (cur_mb->bmi + b - 2)->as_mode.first;
}
#endif // VP9_COMMON_VP9_FINDNEARMV_H_

View File

@@ -124,7 +124,9 @@ void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
// Rows
for (i = 0; i < 4; ++i) {
vp9_idct4_1d(input, outptr);
for (j = 0; j < 4; ++j)
temp_in[j] = input[j];
vp9_idct4_1d(temp_in, outptr);
input += 4;
outptr += 4;
}
@@ -156,6 +158,23 @@ void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
}
}
void vp9_dc_only_idct_add_c(int input_dc, uint8_t *pred_ptr,
uint8_t *dst_ptr, int pitch, int stride) {
int a1;
int r, c;
int16_t out = dct_const_round_shift(input_dc * cospi_16_64);
out = dct_const_round_shift(out * cospi_16_64);
a1 = ROUND_POWER_OF_TWO(out, 4);
for (r = 0; r < 4; r++) {
for (c = 0; c < 4; c++)
dst_ptr[c] = clip_pixel(a1 + pred_ptr[c]);
dst_ptr += stride;
pred_ptr += pitch;
}
}
static void idct8_1d(int16_t *input, int16_t *output) {
int16_t step1[8], step2[8];
int temp1, temp2;
@@ -225,19 +244,6 @@ void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
}
}
void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
int i, j;
int a1;
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
out = dct_const_round_shift(out * cospi_16_64);
a1 = ROUND_POWER_OF_TWO(out, 5);
for (j = 0; j < 8; ++j) {
for (i = 0; i < 8; ++i)
dest[i] = clip_pixel(dest[i] + a1);
dest += dest_stride;
}
}
static void iadst4_1d(int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
@@ -422,11 +428,12 @@ void vp9_short_iht8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride,
void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest,
int dest_stride) {
int16_t out[8 * 8] = { 0 };
int16_t out[8 * 8];
int16_t *outptr = out;
int i, j;
int16_t temp_in[8], temp_out[8];
vpx_memset(out, 0, sizeof(out));
// First transform rows
// only first 4 row has non-zero coefs
for (i = 0; i < 4; ++i) {
@@ -446,6 +453,12 @@ void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest,
}
}
void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output) {
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
out = dct_const_round_shift(out * cospi_16_64);
output[0] = ROUND_POWER_OF_TWO(out, 5);
}
static void idct16_1d(int16_t *input, int16_t *output) {
int16_t step1[16], step2[16];
int temp1, temp2;
@@ -522,7 +535,6 @@ static void idct16_1d(int16_t *input, int16_t *output) {
step1[14] = -step2[14] + step2[15];
step1[15] = step2[14] + step2[15];
// stage 4
temp1 = (step1[0] + step1[1]) * cospi_16_64;
temp2 = (step1[0] - step1[1]) * cospi_16_64;
step2[0] = dct_const_round_shift(temp1);
@@ -840,13 +852,15 @@ void vp9_short_iht16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride,
void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest,
int dest_stride) {
int16_t out[16 * 16] = { 0 };
int16_t out[16 * 16];
int16_t *outptr = out;
int i, j;
int16_t temp_in[16], temp_out[16];
// First transform rows. Since all non-zero dct coefficients are in
// upper-left 4x4 area, we only need to calculate first 4 rows here.
/* First transform rows. Since all non-zero dct coefficients are in
* upper-left 4x4 area, we only need to calculate first 4 rows here.
*/
vpx_memset(out, 0, sizeof(out));
for (i = 0; i < 4; ++i) {
idct16_1d(input, outptr);
input += 16;
@@ -864,18 +878,10 @@ void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest,
}
}
void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest,
int dest_stride) {
int i, j;
int a1;
void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output) {
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
out = dct_const_round_shift(out * cospi_16_64);
a1 = ROUND_POWER_OF_TWO(out, 6);
for (j = 0; j < 16; ++j) {
for (i = 0; i < 16; ++i)
dest[i] = clip_pixel(dest[i] + a1);
dest += dest_stride;
}
output[0] = ROUND_POWER_OF_TWO(out, 6);
}
static void idct32_1d(int16_t *input, int16_t *output) {
@@ -1274,3 +1280,31 @@ void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) {
out = dct_const_round_shift(out * cospi_16_64);
output[0] = ROUND_POWER_OF_TWO(out, 6);
}
void vp9_short_idct10_32x32_add_c(int16_t *input, uint8_t *dest,
int dest_stride) {
int16_t out[32 * 32];
int16_t *outptr = out;
int i, j;
int16_t temp_in[32], temp_out[32];
/* First transform rows. Since all non-zero dct coefficients are in
* upper-left 4x4 area, we only need to calculate first 4 rows here.
*/
vpx_memset(out, 0, sizeof(out));
for (i = 0; i < 4; ++i) {
idct32_1d(input, outptr);
input += 32;
outptr += 32;
}
// Columns
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j)
temp_in[j] = out[j * 32 + i];
idct32_1d(temp_in, temp_out);
for (j = 0; j < 32; ++j)
dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * dest_stride + i]);
}
}

View File

@@ -22,15 +22,10 @@
#define DCT_CONST_BITS 14
#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1))
#define WHT_UPSCALE_FACTOR 2
#define pair_set_epi16(a, b) \
_mm_set1_epi32(((uint16_t)(a)) + (((uint16_t)(b)) << 16))
// Constants:
// for (int i = 1; i< 32; ++i)
// printf("static const int cospi_%d_64 = %.0f;\n", i,
// round(16384 * cos(i*M_PI/64)));
// Constants are round(16384 * cos(k*Pi/64)) where k = 1 to 31.
// Note: sin(k*Pi/64) = cos((32-k)*Pi/64)
static const int cospi_1_64 = 16364;
static const int cospi_2_64 = 16305;

View File

@@ -0,0 +1,253 @@
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp9/common/vp9_onyxc_int.h"
#define MAX_REGIONS 24000
#ifndef NULL
#define NULL 0
#endif
#define min_mbs_in_region 3
// this linked list structure holds equivalences for connected
// component labeling
struct list_el {
int label;
int seg_value;
int count;
struct list_el *next;
};
typedef struct list_el item;
// connected colorsegments
typedef struct {
int min_x;
int min_y;
int max_x;
int max_y;
int64_t sum_x;
int64_t sum_y;
int pixels;
int seg_value;
int label;
} segment_info;
typedef enum {
SEGMENT_MODE,
SEGMENT_MV,
SEGMENT_REFFRAME,
SEGMENT_SKIPPED
} SEGMENT_TYPE;
// this merges the two equivalence lists and
// then makes sure that every label points to the same
// equivalence list
void merge(item *labels, int u, int v) {
item *a = labels[u].next;
item *b = labels[v].next;
item c;
item *it = &c;
int count;
// check if they are already merged
if (u == v || a == b)
return;
count = a->count + b->count;
// merge 2 sorted linked lists.
while (a != NULL && b != NULL) {
if (a->label < b->label) {
it->next = a;
a = a->next;
} else {
it->next = b;
b = b->next;
}
it = it->next;
}
if (a == NULL)
it->next = b;
else
it->next = a;
it = c.next;
// make sure every equivalence in the linked list points to this new ll
while (it != NULL) {
labels[it->label].next = c.next;
it = it->next;
}
c.next->count = count;
}
void segment_via_mode_info(VP9_COMMON *oci, int how) {
MODE_INFO *mi = oci->mi;
int i, j;
int mb_index = 0;
int label = 1;
int pitch = oci->mb_cols;
// holds linked list equivalences
// the max should probably be allocated at a higher level in oci
item equivalences[MAX_REGIONS];
int eq_ptr = 0;
item labels[MAX_REGIONS];
segment_info segments[MAX_REGIONS];
int label_count = 1;
int labeling[400 * 300];
int *lp = labeling;
label_count = 1;
memset(labels, 0, sizeof(labels));
memset(segments, 0, sizeof(segments));
/* Go through each macroblock first pass labelling */
for (i = 0; i < oci->mb_rows; i++, lp += pitch) {
for (j = 0; j < oci->mb_cols; j++) {
// int above seg_value, left seg_value, this seg_value...
int a = -1, l = -1, n = -1;
// above label, left label
int al = -1, ll = -1;
if (i) {
al = lp[j - pitch];
a = labels[al].next->seg_value;
}
if (j) {
ll = lp[j - 1];
l = labels[ll].next->seg_value;
}
// what setting are we going to do the implicit segmentation on
switch (how) {
case SEGMENT_MODE:
n = mi[mb_index].mbmi.mode;
break;
case SEGMENT_MV:
n = mi[mb_index].mbmi.mv[0].as_int;
if (mi[mb_index].mbmi.ref_frame[0] == INTRA_FRAME)
n = -9999999;
break;
case SEGMENT_REFFRAME:
n = mi[mb_index].mbmi.ref_frame[0];
break;
case SEGMENT_SKIPPED:
n = mi[mb_index].mbmi.mb_skip_coeff;
break;
}
// above and left both have the same seg_value
if (n == a && n == l) {
// pick the lowest label
lp[j] = (al < ll ? al : ll);
labels[lp[j]].next->count++;
// merge the above and left equivalencies
merge(labels, al, ll);
}
// this matches above seg_value
else if (n == a) {
// give it the same label as above
lp[j] = al;
labels[al].next->count++;
}
// this matches left seg_value
else if (n == l) {
// give it the same label as above
lp[j] = ll;
labels[ll].next->count++;
} else {
// new label doesn't match either
item *e = &labels[label];
item *nl = &equivalences[eq_ptr++];
lp[j] = label;
nl->label = label;
nl->next = 0;
nl->seg_value = n;
nl->count = 1;
e->next = nl;
label++;
}
mb_index++;
}
mb_index++;
}
lp = labeling;
// give new labels to regions
for (i = 1; i < label; i++)
if (labels[i].next->count > min_mbs_in_region &&
labels[labels[i].next->label].label == 0) {
segment_info *cs = &segments[label_count];
cs->label = label_count;
labels[labels[i].next->label].label = label_count++;
labels[labels[i].next->label].seg_value = labels[i].next->seg_value;
cs->seg_value = labels[labels[i].next->label].seg_value;
cs->min_x = oci->mb_cols;
cs->min_y = oci->mb_rows;
cs->max_x = 0;
cs->max_y = 0;
cs->sum_x = 0;
cs->sum_y = 0;
cs->pixels = 0;
}
lp = labeling;
// this is just to gather stats...
for (i = 0; i < oci->mb_rows; i++, lp += pitch) {
for (j = 0; j < oci->mb_cols; j++) {
const int old_lab = labels[lp[j]].next->label;
const int lab = labels[old_lab].label;
segment_info *cs = &segments[lab];
cs->min_x = MIN(cs->min_x, j);
cs->max_x = MAX(cs->max_x, j);
cs->min_y = MIN(cs->min_y, i);
cs->max_y = MAX(cs->max_y, i);
cs->sum_x += j;
cs->sum_y += i;
cs->pixels++;
lp[j] = lab;
mb_index++;
}
mb_index++;
}
{
lp = labeling;
printf("labelling \n");
mb_index = 0;
for (i = 0; i < oci->mb_rows; i++, lp += pitch) {
for (j = 0; j < oci->mb_cols; j++) {
printf("%4d", lp[j]);
}
printf(" ");
for (j = 0; j < oci->mb_cols; j++, mb_index++) {
// printf("%3d",mi[mb_index].mbmi.mode );
printf("%4d:%4d", mi[mb_index].mbmi.mv[0].as_mv.row,
mi[mb_index].mbmi.mv[0].as_mv.col);
}
printf("\n");
++mb_index;
}
printf("\n");
}
}

View File

@@ -16,12 +16,6 @@
#include "vp9/common/vp9_seg_common.h"
struct loop_filter_info {
const uint8_t *mblim;
const uint8_t *lim;
const uint8_t *hev_thr;
};
static void lf_init_lut(loop_filter_info_n *lfi) {
lfi->mode_lf_lut[DC_PRED] = 0;
lfi->mode_lf_lut[D45_PRED] = 0;
@@ -39,13 +33,18 @@ static void lf_init_lut(loop_filter_info_n *lfi) {
lfi->mode_lf_lut[NEWMV] = 1;
}
static void update_sharpness(loop_filter_info_n *const lfi, int sharpness_lvl) {
int lvl;
void vp9_loop_filter_update_sharpness(loop_filter_info_n *lfi,
int sharpness_lvl) {
int i;
// For each possible value for the loop filter fill out limits
for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
// Set loop filter paramaeters that control sharpness.
int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
/* For each possible value for the loop filter fill out limits */
for (i = 0; i <= MAX_LOOP_FILTER; i++) {
int filt_lvl = i;
int block_inside_limit = 0;
/* Set loop filter paramaeters that control sharpness. */
block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
block_inside_limit = block_inside_limit >> (sharpness_lvl > 4);
if (sharpness_lvl > 0) {
if (block_inside_limit > (9 - sharpness_lvl))
@@ -55,19 +54,21 @@ static void update_sharpness(loop_filter_info_n *const lfi, int sharpness_lvl) {
if (block_inside_limit < 1)
block_inside_limit = 1;
vpx_memset(lfi->lim[lvl], block_inside_limit, SIMD_WIDTH);
vpx_memset(lfi->mblim[lvl], (2 * (lvl + 2) + block_inside_limit),
vpx_memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH);
vpx_memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit),
SIMD_WIDTH);
vpx_memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit),
SIMD_WIDTH);
}
}
void vp9_loop_filter_init(VP9_COMMON *cm, struct loopfilter *lf) {
void vp9_loop_filter_init(VP9_COMMON *cm) {
loop_filter_info_n *lfi = &cm->lf_info;
int i;
// init limits for given sharpness
update_sharpness(lfi, lf->sharpness_level);
lf->last_sharpness_level = lf->sharpness_level;
vp9_loop_filter_update_sharpness(lfi, cm->sharpness_level);
cm->last_sharpness_level = cm->sharpness_level;
// init LUT for lvl and hev thr picking
lf_init_lut(lfi);
@@ -77,69 +78,98 @@ void vp9_loop_filter_init(VP9_COMMON *cm, struct loopfilter *lf) {
vpx_memset(lfi->hev_thr[i], i, SIMD_WIDTH);
}
void vp9_loop_filter_frame_init(VP9_COMMON *const cm, MACROBLOCKD *const xd,
void vp9_loop_filter_frame_init(VP9_COMMON *cm,
MACROBLOCKD *xd,
int default_filt_lvl) {
int seg_id;
int seg, // segment number
ref, // index in ref_lf_deltas
mode; // index in mode_lf_deltas
// n_shift is the a multiplier for lf_deltas
// the multiplier is 1 for when filter_lvl is between 0 and 31;
// 2 when filter_lvl is between 32 and 63
const int n_shift = default_filt_lvl >> 5;
loop_filter_info_n *const lfi = &cm->lf_info;
struct loopfilter *const lf = &xd->lf;
struct segmentation *const seg = &xd->seg;
int n_shift = default_filt_lvl >> 5;
// update limits if sharpness has changed
if (lf->last_sharpness_level != lf->sharpness_level) {
update_sharpness(lfi, lf->sharpness_level);
lf->last_sharpness_level = lf->sharpness_level;
loop_filter_info_n *lfi = &cm->lf_info;
/* update limits if sharpness has changed */
// printf("vp9_loop_filter_frame_init %d\n", default_filt_lvl);
// printf("sharpness level: %d [%d]\n",
// cm->sharpness_level, cm->last_sharpness_level);
if (cm->last_sharpness_level != cm->sharpness_level) {
vp9_loop_filter_update_sharpness(lfi, cm->sharpness_level);
cm->last_sharpness_level = cm->sharpness_level;
}
for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
int lvl_seg = default_filt_lvl, ref, mode, intra_lvl;
for (seg = 0; seg < MAX_MB_SEGMENTS; seg++) {
int lvl_seg = default_filt_lvl;
int lvl_ref, lvl_mode;
// Set the baseline filter values for each segment
if (vp9_segfeature_active(&xd->seg, seg_id, SEG_LVL_ALT_LF)) {
const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
lvl_seg = seg->abs_delta == SEGMENT_ABSDATA
? data
: clamp(default_filt_lvl + data, 0, MAX_LOOP_FILTER);
if (vp9_segfeature_active(xd, seg, SEG_LVL_ALT_LF)) {
/* Abs value */
if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA) {
lvl_seg = vp9_get_segdata(xd, seg, SEG_LVL_ALT_LF);
} else { /* Delta Value */
lvl_seg += vp9_get_segdata(xd, seg, SEG_LVL_ALT_LF);
lvl_seg = clamp(lvl_seg, 0, 63);
}
}
if (!lf->mode_ref_delta_enabled) {
// we could get rid of this if we assume that deltas are set to
// zero when not in use; encoder always uses deltas
vpx_memset(lfi->lvl[seg_id][0], lvl_seg, 4 * 4);
if (!xd->mode_ref_lf_delta_enabled) {
/* we could get rid of this if we assume that deltas are set to
* zero when not in use; encoder always uses deltas
*/
vpx_memset(lfi->lvl[seg][0], lvl_seg, 4 * 4);
continue;
}
intra_lvl = lvl_seg + (lf->ref_deltas[INTRA_FRAME] << n_shift);
lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
lvl_ref = lvl_seg;
for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref)
for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
const int inter_lvl = lvl_seg + (lf->ref_deltas[ref] << n_shift)
+ (lf->mode_deltas[mode] << n_shift);
lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
/* INTRA_FRAME */
ref = INTRA_FRAME;
/* Apply delta for reference frame */
lvl_ref += xd->ref_lf_deltas[ref] << n_shift;
mode = 0; /* all the rest of Intra modes */
lvl_mode = lvl_ref;
lfi->lvl[seg][ref][mode] = clamp(lvl_mode, 0, 63);
/* LAST, GOLDEN, ALT */
for (ref = 1; ref < MAX_REF_FRAMES; ref++) {
int lvl_ref = lvl_seg;
/* Apply delta for reference frame */
lvl_ref += xd->ref_lf_deltas[ref] << n_shift;
/* Apply delta for Inter modes */
for (mode = 0; mode < MAX_MODE_LF_DELTAS; mode++) {
lvl_mode = lvl_ref + (xd->mode_lf_deltas[mode] << n_shift);
lfi->lvl[seg][ref][mode] = clamp(lvl_mode, 0, 63);
}
}
}
}
static int build_lfi(const loop_filter_info_n *const lfi_n,
const MB_MODE_INFO *const mbmi,
struct loop_filter_info *const lfi) {
const int seg = mbmi->segment_id;
const int ref = mbmi->ref_frame[0];
const int mode = lfi_n->mode_lf_lut[mbmi->mode];
const int filter_level = lfi_n->lvl[seg][ref][mode];
static int build_lfi(const VP9_COMMON *cm, const MB_MODE_INFO *mbmi,
struct loop_filter_info *lfi) {
const loop_filter_info_n *lfi_n = &cm->lf_info;
int mode = mbmi->mode;
int mode_index = lfi_n->mode_lf_lut[mode];
int seg = mbmi->segment_id;
int ref_frame = mbmi->ref_frame[0];
int filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
if (filter_level > 0) {
if (filter_level) {
const int hev_index = filter_level >> 4;
lfi->mblim = lfi_n->mblim[filter_level];
lfi->blim = lfi_n->blim[filter_level];
lfi->lim = lfi_n->lim[filter_level];
lfi->hev_thr = lfi_n->hev_thr[filter_level >> 4];
lfi->hev_thr = lfi_n->hev_thr[hev_index];
return 1;
} else {
return 0;
}
return 0;
}
static void filter_selectively_vert(uint8_t *s, int pitch,
@@ -191,22 +221,14 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
int only_4x4_1,
const struct loop_filter_info *lfi) {
unsigned int mask;
int count;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= count) {
count = 1;
mask; mask >>= 1) {
if (mask & 1) {
if (!only_4x4_1) {
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 2);
count = 2;
} else {
vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr);
assert(!(mask_8x8 & 1));
assert(!(mask_4x4 & 1));
assert(!(mask_4x4_int & 1));
@@ -227,43 +249,42 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1);
}
s += 8 * count;
lfi += count;
mask_16x16 >>= count;
mask_8x8 >>= count;
mask_4x4 >>= count;
mask_4x4_int >>= count;
s += 8;
lfi++;
mask_16x16 >>= 1;
mask_8x8 >>= 1;
mask_4x4 >>= 1;
mask_4x4_int >>= 1;
}
}
static void filter_block_plane(VP9_COMMON *const cm,
struct macroblockd_plane *const plane,
const MODE_INFO *mi,
int mi_row, int mi_col) {
const int ss_x = plane->subsampling_x;
const int ss_y = plane->subsampling_y;
const int row_step = 1 << ss_x;
const int col_step = 1 << ss_y;
const int row_step_stride = cm->mode_info_stride * row_step;
struct buf_2d *const dst = &plane->dst;
static void filter_block_plane(VP9_COMMON *cm, MACROBLOCKD *xd,
int plane, int mi_row, int mi_col) {
const int ss_x = xd->plane[plane].subsampling_x;
const int ss_y = xd->plane[plane].subsampling_y;
const int row_step = 1 << xd->plane[plane].subsampling_y;
const int col_step = 1 << xd->plane[plane].subsampling_x;
struct buf_2d * const dst = &xd->plane[plane].dst;
uint8_t* const dst0 = dst->buf;
unsigned int mask_16x16[MI_BLOCK_SIZE] = {0};
unsigned int mask_8x8[MI_BLOCK_SIZE] = {0};
unsigned int mask_4x4[MI_BLOCK_SIZE] = {0};
unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0};
struct loop_filter_info lfi[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
MODE_INFO* const mi0 = xd->mode_info_context;
unsigned int mask_16x16[64 / MI_SIZE] = {0};
unsigned int mask_8x8[64 / MI_SIZE] = {0};
unsigned int mask_4x4[64 / MI_SIZE] = {0};
unsigned int mask_4x4_int[64 / MI_SIZE] = {0};
struct loop_filter_info lfi[64 / MI_SIZE][64 / MI_SIZE];
int r, c;
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
for (r = 0; r < 64 / MI_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
unsigned int mask_16x16_c = 0;
unsigned int mask_8x8_c = 0;
unsigned int mask_4x4_c = 0;
unsigned int border_mask;
// Determine the vertical edges that need filtering
for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
for (c = 0; c < 64 / MI_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
const MODE_INFO * const mi = xd->mode_info_context;
const int skip_this = mi[c].mbmi.mb_skip_coeff
&& is_inter_block(&mi[c].mbmi);
&& mi[c].mbmi.ref_frame[0] != INTRA_FRAME;
// left edge of current unit is block/partition edge -> no skip
const int block_edge_left = b_width_log2(mi[c].mbmi.sb_type) ?
!(c & ((1 << (b_width_log2(mi[c].mbmi.sb_type)-1)) - 1)) : 1;
@@ -272,14 +293,14 @@ static void filter_block_plane(VP9_COMMON *const cm,
const int block_edge_above = b_height_log2(mi[c].mbmi.sb_type) ?
!(r & ((1 << (b_height_log2(mi[c].mbmi.sb_type)-1)) - 1)) : 1;
const int skip_this_r = skip_this && !block_edge_above;
const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
? get_uv_tx_size(&mi[c].mbmi)
: mi[c].mbmi.txfm_size;
const TX_SIZE tx_size = plane ? get_uv_tx_size(&mi[c].mbmi)
: mi[c].mbmi.txfm_size;
const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
// Filter level can vary per MI
if (!build_lfi(&cm->lf_info, &mi[c].mbmi, lfi[r] + (c >> ss_x)))
if (!build_lfi(cm, &mi[c].mbmi,
lfi[r] + (c >> xd->plane[plane].subsampling_x)))
continue;
// Build masks based on the transform size of each block
@@ -338,12 +359,13 @@ static void filter_block_plane(VP9_COMMON *const cm,
mask_4x4_c & border_mask,
mask_4x4_int[r], lfi[r]);
dst->buf += 8 * dst->stride;
mi += row_step_stride;
xd->mode_info_context += cm->mode_info_stride * row_step;
}
// Now do horizontal pass
dst->buf = dst0;
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
xd->mode_info_context = mi0;
for (r = 0; r < 64 / MI_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
@@ -353,41 +375,30 @@ static void filter_block_plane(VP9_COMMON *const cm,
mask_4x4[r],
mask_4x4_int_r, mi_row + r == 0, lfi[r]);
dst->buf += 8 * dst->stride;
xd->mode_info_context += cm->mode_info_stride * row_step;
}
}
void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
VP9_COMMON *cm, MACROBLOCKD *xd,
int start, int stop, int y_only) {
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
void vp9_loop_filter_frame(VP9_COMMON *cm,
MACROBLOCKD *xd,
int frame_filter_level,
int y_only) {
int mi_row, mi_col;
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
// Initialize the loop filter for this frame.
vp9_loop_filter_frame_init(cm, xd, frame_filter_level);
for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 64 / MI_SIZE) {
MODE_INFO* const mi = cm->mi + mi_row * cm->mode_info_stride;
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += 64 / MI_SIZE) {
int plane;
setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
for (plane = 0; plane < num_planes; ++plane) {
filter_block_plane(cm, &xd->plane[plane], mi + mi_col, mi_row, mi_col);
setup_dst_planes(xd, cm->frame_to_show, mi_row, mi_col);
for (plane = 0; plane < (y_only ? 1 : MAX_MB_PLANE); plane++) {
xd->mode_info_context = mi + mi_col;
filter_block_plane(cm, xd, plane, mi_row, mi_col);
}
}
}
}
void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd,
int frame_filter_level, int y_only) {
if (!frame_filter_level) return;
vp9_loop_filter_frame_init(cm, xd, frame_filter_level);
vp9_loop_filter_rows(cm->frame_to_show, cm, xd,
0, cm->mi_rows, y_only);
}
int vp9_loop_filter_worker(void *arg1, void *arg2) {
LFWorkerData *const lf_data = (LFWorkerData*)arg1;
(void)arg2;
vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, &lf_data->xd,
lf_data->start, lf_data->stop, lf_data->y_only);
return 1;
}

View File

@@ -13,39 +13,61 @@
#include "vpx_ports/mem.h"
#include "vpx_config.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_seg_common.h"
#define MAX_LOOP_FILTER 63
#define MAX_SHARPNESS 7
#define SIMD_WIDTH 16
// Need to align this structure so when it is declared and
// passed it can be loaded into vector registers.
/* Need to align this structure so when it is declared and
* passed it can be loaded into vector registers.
*/
typedef struct {
DECLARE_ALIGNED(SIMD_WIDTH, uint8_t,
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char,
mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
DECLARE_ALIGNED(SIMD_WIDTH, uint8_t,
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char,
blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char,
lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
DECLARE_ALIGNED(SIMD_WIDTH, uint8_t,
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char,
hev_thr[4][SIMD_WIDTH]);
uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS];
uint8_t mode_lf_lut[MB_MODE_COUNT];
unsigned char lvl[MAX_MB_SEGMENTS][4][4];
unsigned char mode_lf_lut[MB_MODE_COUNT];
} loop_filter_info_n;
struct loop_filter_info {
const unsigned char *mblim;
const unsigned char *blim;
const unsigned char *lim;
const unsigned char *hev_thr;
};
#define prototype_loopfilter(sym) \
void sym(uint8_t *src, int pitch, const unsigned char *blimit, \
const unsigned char *limit, const unsigned char *thresh, int count)
#define prototype_loopfilter_block(sym) \
void sym(uint8_t *y, uint8_t *u, uint8_t *v, \
int ystride, int uv_stride, struct loop_filter_info *lfi)
#if ARCH_X86 || ARCH_X86_64
#include "x86/vp9_loopfilter_x86.h"
#endif
typedef void loop_filter_uvfunction(uint8_t *u, /* source pointer */
int p, /* pitch */
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
uint8_t *v);
/* assorted loopfilter functions which get used elsewhere */
struct VP9Common;
struct macroblockd;
void vp9_loop_filter_init(struct VP9Common *cm, struct loopfilter *lf);
void vp9_loop_filter_init(struct VP9Common *cm);
// Update the loop filter for the current frame.
// This should be called before vp9_loop_filter_rows(), vp9_loop_filter_frame()
// calls this function directly.
void vp9_loop_filter_frame_init(struct VP9Common *const cm,
struct macroblockd *const xd,
void vp9_loop_filter_frame_init(struct VP9Common *cm,
struct macroblockd *mbd,
int default_filt_lvl);
void vp9_loop_filter_frame(struct VP9Common *cm,
@@ -53,22 +75,11 @@ void vp9_loop_filter_frame(struct VP9Common *cm,
int filter_level,
int y_only);
// Apply the loop filter to [start, stop) macro block rows in frame_buffer.
void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
struct VP9Common *cm, struct macroblockd *xd,
int start, int stop, int y_only);
void vp9_loop_filter_partial_frame(struct VP9Common *cm,
struct macroblockd *mbd,
int default_filt_lvl);
typedef struct LoopFilterWorkerData {
const YV12_BUFFER_CONFIG *frame_buffer;
struct VP9Common *cm;
struct macroblockd xd; // TODO(jzern): most of this is unnecessary to the
// loopfilter. the planes are necessary as their state
// is changed during decode.
int start;
int stop;
int y_only;
} LFWorkerData;
void vp9_loop_filter_update_sharpness(loop_filter_info_n *lfi,
int sharpness_lvl);
// Operates on the rows described by LFWorkerData passed as 'arg1'.
int vp9_loop_filter_worker(void *arg1, void *arg2);
#endif // VP9_COMMON_VP9_LOOPFILTER_H_

View File

@@ -34,44 +34,17 @@ static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit,
return ~mask;
}
static INLINE int8_t flat_mask4(uint8_t thresh,
uint8_t p3, uint8_t p2,
uint8_t p1, uint8_t p0,
uint8_t q0, uint8_t q1,
uint8_t q2, uint8_t q3) {
int8_t mask = 0;
mask |= (abs(p1 - p0) > thresh) * -1;
mask |= (abs(q1 - q0) > thresh) * -1;
mask |= (abs(p2 - p0) > thresh) * -1;
mask |= (abs(q2 - q0) > thresh) * -1;
mask |= (abs(p3 - p0) > thresh) * -1;
mask |= (abs(q3 - q0) > thresh) * -1;
return ~mask;
}
static INLINE int8_t flat_mask5(uint8_t thresh,
uint8_t p4, uint8_t p3,
uint8_t p2, uint8_t p1,
uint8_t p0, uint8_t q0,
uint8_t q1, uint8_t q2,
uint8_t q3, uint8_t q4) {
int8_t mask = ~flat_mask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3);
mask |= (abs(p4 - p0) > thresh) * -1;
mask |= (abs(q4 - q0) > thresh) * -1;
return ~mask;
}
// is there high edge variance internal edge: 11111111 yes, 00000000 no
static INLINE int8_t hev_mask(uint8_t thresh, uint8_t p1, uint8_t p0,
uint8_t q0, uint8_t q1) {
static INLINE int8_t hevmask(uint8_t thresh, uint8_t p1, uint8_t p0,
uint8_t q0, uint8_t q1) {
int8_t hev = 0;
hev |= (abs(p1 - p0) > thresh) * -1;
hev |= (abs(q1 - q0) > thresh) * -1;
return hev;
}
static INLINE void filter4(int8_t mask, uint8_t hev, uint8_t *op1,
uint8_t *op0, uint8_t *oq0, uint8_t *oq1) {
static INLINE void filter(int8_t mask, uint8_t hev, uint8_t *op1,
uint8_t *op0, uint8_t *oq0, uint8_t *oq1) {
int8_t filter1, filter2;
const int8_t ps1 = (int8_t) *op1 ^ 0x80;
@@ -95,7 +68,7 @@ static INLINE void filter4(int8_t mask, uint8_t hev, uint8_t *op1,
*op0 = signed_char_clamp(ps0 + filter2) ^ 0x80;
// outer tap adjustments
filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev;
filter = ((filter1 + 1) >> 1) & ~hev;
*oq1 = signed_char_clamp(qs1 - filter) ^ 0x80;
*op1 = signed_char_clamp(ps1 + filter) ^ 0x80;
@@ -115,8 +88,8 @@ void vp9_loop_filter_horizontal_edge_c(uint8_t *s, int p /* pitch */,
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
const int8_t mask = filter_mask(*limit, *blimit,
p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1);
filter4(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
const int8_t hev = hevmask(*thresh, p1, p0, q0, q1);
filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
++s;
}
}
@@ -135,30 +108,57 @@ void vp9_loop_filter_vertical_edge_c(uint8_t *s, int pitch,
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask = filter_mask(*limit, *blimit,
p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1);
filter4(mask, hev, s - 2, s - 1, s, s + 1);
const int8_t hev = hevmask(*thresh, p1, p0, q0, q1);
filter(mask, hev, s - 2, s - 1, s, s + 1);
s += pitch;
}
}
static INLINE void filter8(int8_t mask, uint8_t hev, uint8_t flat,
uint8_t *op3, uint8_t *op2,
uint8_t *op1, uint8_t *op0,
uint8_t *oq0, uint8_t *oq1,
uint8_t *oq2, uint8_t *oq3) {
static INLINE int8_t flatmask4(uint8_t thresh,
uint8_t p3, uint8_t p2,
uint8_t p1, uint8_t p0,
uint8_t q0, uint8_t q1,
uint8_t q2, uint8_t q3) {
int8_t flat = 0;
flat |= (abs(p1 - p0) > thresh) * -1;
flat |= (abs(q1 - q0) > thresh) * -1;
flat |= (abs(p0 - p2) > thresh) * -1;
flat |= (abs(q0 - q2) > thresh) * -1;
flat |= (abs(p3 - p0) > thresh) * -1;
flat |= (abs(q3 - q0) > thresh) * -1;
return ~flat;
}
static INLINE signed char flatmask5(uint8_t thresh,
uint8_t p4, uint8_t p3, uint8_t p2,
uint8_t p1, uint8_t p0,
uint8_t q0, uint8_t q1, uint8_t q2,
uint8_t q3, uint8_t q4) {
int8_t flat = 0;
flat |= (abs(p4 - p0) > thresh) * -1;
flat |= (abs(q4 - q0) > thresh) * -1;
flat = ~flat;
return flat & flatmask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3);
}
static INLINE void mbfilter(int8_t mask, uint8_t hev, uint8_t flat,
uint8_t *op3, uint8_t *op2,
uint8_t *op1, uint8_t *op0,
uint8_t *oq0, uint8_t *oq1,
uint8_t *oq2, uint8_t *oq3) {
// use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line
if (flat && mask) {
const uint8_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;
const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3;
// 7-tap filter [1, 1, 1, 2, 1, 1, 1]
*op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0, 3);
*op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1, 3);
*op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2, 3);
*oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3, 3);
*oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3, 3);
*oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3, 3);
*op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0, 3);
*op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1, 3);
*op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2, 3);
*oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3, 3);
*oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3, 3);
*oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3, 3);
} else {
filter4(mask, hev, op1, op0, oq0, oq1);
filter(mask, hev, op1, op0, oq0, oq1);
}
}
@@ -177,10 +177,11 @@ void vp9_mbloop_filter_horizontal_edge_c(uint8_t *s, int p,
const int8_t mask = filter_mask(*limit, *blimit,
p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1);
const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
filter8(mask, hev, flat, s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p,
s, s + 1 * p, s + 2 * p, s + 3 * p);
const int8_t hev = hevmask(*thresh, p1, p0, q0, q1);
const int8_t flat = flatmask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
mbfilter(mask, hev, flat,
s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p,
s, s + 1 * p, s + 2 * p, s + 3 * p);
++s;
}
}
@@ -197,24 +198,23 @@ void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, int pitch,
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask = filter_mask(*limit, *blimit,
p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t hev = hev_mask(thresh[0], p1, p0, q0, q1);
const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
filter8(mask, hev, flat, s - 4, s - 3, s - 2, s - 1,
s, s + 1, s + 2, s + 3);
const int8_t hev = hevmask(thresh[0], p1, p0, q0, q1);
const int8_t flat = flatmask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
mbfilter(mask, hev, flat, s - 4, s - 3, s - 2, s - 1,
s, s + 1, s + 2, s + 3);
s += pitch;
}
}
static INLINE void filter16(int8_t mask, uint8_t hev,
uint8_t flat, uint8_t flat2,
uint8_t *op7, uint8_t *op6,
uint8_t *op5, uint8_t *op4,
uint8_t *op3, uint8_t *op2,
uint8_t *op1, uint8_t *op0,
uint8_t *oq0, uint8_t *oq1,
uint8_t *oq2, uint8_t *oq3,
uint8_t *oq4, uint8_t *oq5,
uint8_t *oq6, uint8_t *oq7) {
static INLINE void wide_mbfilter(int8_t mask, uint8_t hev,
uint8_t flat, uint8_t flat2,
uint8_t *op7, uint8_t *op6, uint8_t *op5,
uint8_t *op4, uint8_t *op3, uint8_t *op2,
uint8_t *op1, uint8_t *op0, uint8_t *oq0,
uint8_t *oq1, uint8_t *oq2, uint8_t *oq3,
uint8_t *oq4, uint8_t *oq5, uint8_t *oq6,
uint8_t *oq7) {
// use a 15 tap filter [1,1,1,1,1,1,1,2,1,1,1,1,1,1,1] for flat line
if (flat2 && flat && mask) {
const uint8_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4,
p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;
@@ -222,7 +222,6 @@ static INLINE void filter16(int8_t mask, uint8_t hev,
const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3,
q4 = *oq4, q5 = *oq5, q6 = *oq6, q7 = *oq7;
// 15-tap filter [1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1]
*op6 = ROUND_POWER_OF_TWO(p7 * 7 + p6 * 2 + p5 + p4 + p3 + p2 + p1 + p0 +
q0, 4);
*op5 = ROUND_POWER_OF_TWO(p7 * 6 + p6 + p5 * 2 + p4 + p3 + p2 + p1 + p0 +
@@ -252,35 +251,35 @@ static INLINE void filter16(int8_t mask, uint8_t hev,
*oq6 = ROUND_POWER_OF_TWO(p0 +
q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + q7 * 7, 4);
} else {
filter8(mask, hev, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3);
mbfilter(mask, hev, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3);
}
}
void vp9_mb_lpf_horizontal_edge_w_c(uint8_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
int count) {
const uint8_t *thresh) {
int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
for (i = 0; i < 8 * count; ++i) {
for (i = 0; i < 8; ++i) {
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
const int8_t mask = filter_mask(*limit, *blimit,
p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1);
const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t flat2 = flat_mask5(1,
const int8_t hev = hevmask(*thresh, p1, p0, q0, q1);
const int8_t flat = flatmask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t flat2 = flatmask5(1,
s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], p0,
q0, s[4 * p], s[5 * p], s[6 * p], s[7 * p]);
filter16(mask, hev, flat, flat2,
s - 8 * p, s - 7 * p, s - 6 * p, s - 5 * p,
s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p,
s, s + 1 * p, s + 2 * p, s + 3 * p,
s + 4 * p, s + 5 * p, s + 6 * p, s + 7 * p);
wide_mbfilter(mask, hev, flat, flat2,
s - 8 * p, s - 7 * p, s - 6 * p, s - 5 * p,
s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p,
s, s + 1 * p, s + 2 * p, s + 3 * p,
s + 4 * p, s + 5 * p, s + 6 * p, s + 7 * p);
++s;
}
}
@@ -296,14 +295,14 @@ void vp9_mb_lpf_vertical_edge_w_c(uint8_t *s, int p,
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask = filter_mask(*limit, *blimit,
p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1);
const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t flat2 = flat_mask5(1, s[-8], s[-7], s[-6], s[-5], p0,
q0, s[4], s[5], s[6], s[7]);
const int8_t hev = hevmask(*thresh, p1, p0, q0, q1);
const int8_t flat = flatmask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t flat2 = flatmask5(1, s[-8], s[-7], s[-6], s[-5], p0,
q0, s[4], s[5], s[6], s[7]);
filter16(mask, hev, flat, flat2,
s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1,
s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7);
wide_mbfilter(mask, hev, flat, flat2,
s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1,
s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7);
s += p;
}
}

803
vp9/common/vp9_maskingmv.c Normal file
View File

@@ -0,0 +1,803 @@
/*
============================================================================
Name : vp9_maskingmv.c
Author : jimbankoski
Version :
Copyright : Your copyright notice
Description : Hello World in C, Ansi-style
============================================================================
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
unsigned int vp9_sad16x16_sse3(
unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr,
int ref_stride,
int max_err);
int vp8_growmaskmb_sse3(
unsigned char *om,
unsigned char *nm);
void vp8_makemask_sse3(
unsigned char *y,
unsigned char *u,
unsigned char *v,
unsigned char *ym,
int yp,
int uvp,
int ys,
int us,
int vs,
int yt,
int ut,
int vt);
unsigned int vp9_sad16x16_unmasked_wmt(
unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr,
int ref_stride,
unsigned char *mask);
unsigned int vp9_sad16x16_masked_wmt(
unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr,
int ref_stride,
unsigned char *mask);
unsigned int vp8_masked_predictor_wmt(
unsigned char *masked,
unsigned char *unmasked,
int src_stride,
unsigned char *dst_ptr,
int dst_stride,
unsigned char *mask);
unsigned int vp8_masked_predictor_uv_wmt(
unsigned char *masked,
unsigned char *unmasked,
int src_stride,
unsigned char *dst_ptr,
int dst_stride,
unsigned char *mask);
unsigned int vp8_uv_from_y_mask(
unsigned char *ymask,
unsigned char *uvmask);
int yp = 16;
unsigned char sxy[] = {
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90,
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120, 90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90
};
unsigned char sts[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
};
unsigned char str[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
unsigned char y[] = {
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40,
40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40,
40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40,
60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40,
60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40,
60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40,
40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40,
40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40,
40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40,
40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40,
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40,
40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40
};
int uvp = 8;
unsigned char u[] = {
90, 80, 70, 70, 90, 90, 90, 17,
90, 80, 70, 70, 90, 90, 90, 17,
84, 70, 70, 90, 90, 90, 17, 17,
84, 70, 70, 90, 90, 90, 17, 17,
80, 70, 70, 90, 90, 90, 17, 17,
90, 80, 70, 70, 90, 90, 90, 17,
90, 80, 70, 70, 90, 90, 90, 17,
90, 80, 70, 70, 90, 90, 90, 17
};
unsigned char v[] = {
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80
};
unsigned char ym[256];
unsigned char uvm[64];
typedef struct {
unsigned char y;
unsigned char yt;
unsigned char u;
unsigned char ut;
unsigned char v;
unsigned char vt;
unsigned char use;
} COLOR_SEG_ELEMENT;
/*
COLOR_SEG_ELEMENT segmentation[]=
{
{ 60,4,80,17,80,10, 1},
{ 40,4,15,10,80,10, 1},
};
*/
COLOR_SEG_ELEMENT segmentation[] = {
{ 79, 44, 92, 44, 237, 60, 1},
};
unsigned char pixel_mask(unsigned char y, unsigned char u, unsigned char v,
COLOR_SEG_ELEMENT sgm[],
int c) {
COLOR_SEG_ELEMENT *s = sgm;
unsigned char m = 0;
int i;
for (i = 0; i < c; i++, s++)
m |= (abs(y - s->y) < s->yt &&
abs(u - s->u) < s->ut &&
abs(v - s->v) < s->vt ? 255 : 0);
return m;
}
int neighbors[256][8];
int makeneighbors(void) {
int i, j;
for (i = 0; i < 256; i++) {
int r = (i >> 4), c = (i & 15);
int ni = 0;
for (j = 0; j < 8; j++)
neighbors[i][j] = i;
for (j = 0; j < 256; j++) {
int nr = (j >> 4), nc = (j & 15);
if (abs(nr - r) < 2 && abs(nc - c) < 2)
neighbors[i][ni++] = j;
}
}
return 0;
}
void grow_ymask(unsigned char *ym) {
unsigned char nym[256];
int i, j;
for (i = 0; i < 256; i++) {
nym[i] = ym[i];
for (j = 0; j < 8; j++) {
nym[i] |= ym[neighbors[i][j]];
}
}
for (i = 0; i < 256; i++)
ym[i] = nym[i];
}
void make_mb_mask(unsigned char *y, unsigned char *u, unsigned char *v,
unsigned char *ym, unsigned char *uvm,
int yp, int uvp,
COLOR_SEG_ELEMENT sgm[],
int count) {
int r, c;
unsigned char *oym = ym;
memset(ym, 20, 256);
for (r = 0; r < 8; r++, uvm += 8, u += uvp, v += uvp, y += (yp + yp), ym += 32)
for (c = 0; c < 8; c++) {
int y1 = y[c << 1];
int u1 = u[c];
int v1 = v[c];
int m = pixel_mask(y1, u1, v1, sgm, count);
uvm[c] = m;
ym[c << 1] = uvm[c]; // = pixel_mask(y[c<<1],u[c],v[c],sgm,count);
ym[(c << 1) + 1] = pixel_mask(y[1 + (c << 1)], u[c], v[c], sgm, count);
ym[(c << 1) + 16] = pixel_mask(y[yp + (c << 1)], u[c], v[c], sgm, count);
ym[(c << 1) + 17] = pixel_mask(y[1 + yp + (c << 1)], u[c], v[c], sgm, count);
}
grow_ymask(oym);
}
int masked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
unsigned char *ym) {
int i, j;
unsigned sad = 0;
for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16)
for (j = 0; j < 16; j++)
if (ym[j])
sad += abs(src[j] - dst[j]);
return sad;
}
int compare_masks(unsigned char *sym, unsigned char *ym) {
int i, j;
unsigned sad = 0;
for (i = 0; i < 16; i++, sym += 16, ym += 16)
for (j = 0; j < 16; j++)
sad += (sym[j] != ym[j] ? 1 : 0);
return sad;
}
int unmasked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
unsigned char *ym) {
int i, j;
unsigned sad = 0;
for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16)
for (j = 0; j < 16; j++)
if (!ym[j])
sad += abs(src[j] - dst[j]);
return sad;
}
int masked_motion_search(unsigned char *y, unsigned char *u, unsigned char *v,
int yp, int uvp,
unsigned char *dy, unsigned char *du, unsigned char *dv,
int dyp, int duvp,
COLOR_SEG_ELEMENT sgm[],
int count,
int *mi,
int *mj,
int *ui,
int *uj,
int *wm) {
int i, j;
unsigned char ym[256];
unsigned char uvm[64];
unsigned char dym[256];
unsigned char duvm[64];
unsigned int e = 0;
int beste = 256;
int bmi = -32, bmj = -32;
int bui = -32, buj = -32;
int beste1 = 256;
int bmi1 = -32, bmj1 = -32;
int bui1 = -32, buj1 = -32;
int obeste;
// first try finding best mask and then unmasked
beste = 0xffffffff;
// find best unmasked mv
for (i = -32; i < 32; i++) {
unsigned char *dyz = i * dyp + dy;
unsigned char *duz = i / 2 * duvp + du;
unsigned char *dvz = i / 2 * duvp + dv;
for (j = -32; j < 32; j++) {
// 0,0 masked destination
make_mb_mask(dyz + j, duz + j / 2, dvz + j / 2, dym, duvm, dyp, duvp, sgm, count);
e = unmasked_sad(y, yp, dyz + j, dyp, dym);
if (e < beste) {
bui = i;
buj = j;
beste = e;
}
}
}
// bui=0;buj=0;
// best mv masked destination
make_mb_mask(dy + bui * dyp + buj, du + bui / 2 * duvp + buj / 2, dv + bui / 2 * duvp + buj / 2,
dym, duvm, dyp, duvp, sgm, count);
obeste = beste;
beste = 0xffffffff;
// find best masked
for (i = -32; i < 32; i++) {
unsigned char *dyz = i * dyp + dy;
for (j = -32; j < 32; j++) {
e = masked_sad(y, yp, dyz + j, dyp, dym);
if (e < beste) {
bmi = i;
bmj = j;
beste = e;
}
}
}
beste1 = beste + obeste;
bmi1 = bmi;
bmj1 = bmj;
bui1 = bui;
buj1 = buj;
beste = 0xffffffff;
// source mask
make_mb_mask(y, u, v, ym, uvm, yp, uvp, sgm, count);
// find best mask
for (i = -32; i < 32; i++) {
unsigned char *dyz = i * dyp + dy;
unsigned char *duz = i / 2 * duvp + du;
unsigned char *dvz = i / 2 * duvp + dv;
for (j = -32; j < 32; j++) {
// 0,0 masked destination
make_mb_mask(dyz + j, duz + j / 2, dvz + j / 2, dym, duvm, dyp, duvp, sgm, count);
e = compare_masks(ym, dym);
if (e < beste) {
bmi = i;
bmj = j;
beste = e;
}
}
}
// best mv masked destination
make_mb_mask(dy + bmi * dyp + bmj, du + bmi / 2 * duvp + bmj / 2, dv + bmi / 2 * duvp + bmj / 2,
dym, duvm, dyp, duvp, sgm, count);
obeste = masked_sad(y, yp, dy + bmi * dyp + bmj, dyp, dym);
beste = 0xffffffff;
// find best unmasked mv
for (i = -32; i < 32; i++) {
unsigned char *dyz = i * dyp + dy;
for (j = -32; j < 32; j++) {
e = unmasked_sad(y, yp, dyz + j, dyp, dym);
if (e < beste) {
bui = i;
buj = j;
beste = e;
}
}
}
beste += obeste;
if (beste < beste1) {
*mi = bmi;
*mj = bmj;
*ui = bui;
*uj = buj;
*wm = 1;
} else {
*mi = bmi1;
*mj = bmj1;
*ui = bui1;
*uj = buj1;
*wm = 0;
}
return 0;
}
int predict(unsigned char *src, int p, unsigned char *dst, int dp,
unsigned char *ym, unsigned char *prd) {
int i, j;
for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16, prd += 16)
for (j = 0; j < 16; j++)
prd[j] = (ym[j] ? src[j] : dst[j]);
return 0;
}
int fast_masked_motion_search(unsigned char *y, unsigned char *u, unsigned char *v,
int yp, int uvp,
unsigned char *dy, unsigned char *du, unsigned char *dv,
int dyp, int duvp,
COLOR_SEG_ELEMENT sgm[],
int count,
int *mi,
int *mj,
int *ui,
int *uj,
int *wm) {
int i, j;
unsigned char ym[256];
unsigned char ym2[256];
unsigned char uvm[64];
unsigned char dym2[256];
unsigned char dym[256];
unsigned char duvm[64];
unsigned int e = 0;
int beste = 256;
int bmi = -32, bmj = -32;
int bui = -32, buj = -32;
int beste1 = 256;
int bmi1 = -32, bmj1 = -32;
int bui1 = -32, buj1 = -32;
int obeste;
// first try finding best mask and then unmasked
beste = 0xffffffff;
#if 0
for (i = 0; i < 16; i++) {
unsigned char *dy = i * yp + y;
for (j = 0; j < 16; j++)
printf("%2x", dy[j]);
printf("\n");
}
printf("\n");
for (i = -32; i < 48; i++) {
unsigned char *dyz = i * dyp + dy;
for (j = -32; j < 48; j++)
printf("%2x", dyz[j]);
printf("\n");
}
#endif
// find best unmasked mv
for (i = -32; i < 32; i++) {
unsigned char *dyz = i * dyp + dy;
unsigned char *duz = i / 2 * duvp + du;
unsigned char *dvz = i / 2 * duvp + dv;
for (j = -32; j < 32; j++) {
// 0,0 masked destination
vp8_makemask_sse3(dyz + j, duz + j / 2, dvz + j / 2, dym, dyp, duvp,
sgm[0].y, sgm[0].u, sgm[0].v,
sgm[0].yt, sgm[0].ut, sgm[0].vt);
vp8_growmaskmb_sse3(dym, dym2);
e = vp9_sad16x16_unmasked_wmt(y, yp, dyz + j, dyp, dym2);
if (e < beste) {
bui = i;
buj = j;
beste = e;
}
}
}
// bui=0;buj=0;
// best mv masked destination
vp8_makemask_sse3(dy + bui * dyp + buj, du + bui / 2 * duvp + buj / 2, dv + bui / 2 * duvp + buj / 2,
dym, dyp, duvp,
sgm[0].y, sgm[0].u, sgm[0].v,
sgm[0].yt, sgm[0].ut, sgm[0].vt);
vp8_growmaskmb_sse3(dym, dym2);
obeste = beste;
beste = 0xffffffff;
// find best masked
for (i = -32; i < 32; i++) {
unsigned char *dyz = i * dyp + dy;
for (j = -32; j < 32; j++) {
e = vp9_sad16x16_masked_wmt(y, yp, dyz + j, dyp, dym2);
if (e < beste) {
bmi = i;
bmj = j;
beste = e;
}
}
}
beste1 = beste + obeste;
bmi1 = bmi;
bmj1 = bmj;
bui1 = bui;
buj1 = buj;
// source mask
vp8_makemask_sse3(y, u, v,
ym, yp, uvp,
sgm[0].y, sgm[0].u, sgm[0].v,
sgm[0].yt, sgm[0].ut, sgm[0].vt);
vp8_growmaskmb_sse3(ym, ym2);
// find best mask
for (i = -32; i < 32; i++) {
unsigned char *dyz = i * dyp + dy;
unsigned char *duz = i / 2 * duvp + du;
unsigned char *dvz = i / 2 * duvp + dv;
for (j = -32; j < 32; j++) {
// 0,0 masked destination
vp8_makemask_sse3(dyz + j, duz + j / 2, dvz + j / 2, dym, dyp, duvp,
sgm[0].y, sgm[0].u, sgm[0].v,
sgm[0].yt, sgm[0].ut, sgm[0].vt);
vp8_growmaskmb_sse3(dym, dym2);
e = compare_masks(ym2, dym2);
if (e < beste) {
bmi = i;
bmj = j;
beste = e;
}
}
}
vp8_makemask_sse3(dy + bmi * dyp + bmj, du + bmi / 2 * duvp + bmj / 2, dv + bmi / 2 * duvp + bmj / 2,
dym, dyp, duvp,
sgm[0].y, sgm[0].u, sgm[0].v,
sgm[0].yt, sgm[0].ut, sgm[0].vt);
vp8_growmaskmb_sse3(dym, dym2);
obeste = vp9_sad16x16_masked_wmt(y, yp, dy + bmi * dyp + bmj, dyp, dym2);
beste = 0xffffffff;
// find best unmasked mv
for (i = -32; i < 32; i++) {
unsigned char *dyz = i * dyp + dy;
for (j = -32; j < 32; j++) {
e = vp9_sad16x16_unmasked_wmt(y, yp, dyz + j, dyp, dym2);
if (e < beste) {
bui = i;
buj = j;
beste = e;
}
}
}
beste += obeste;
if (beste < beste1) {
*mi = bmi;
*mj = bmj;
*ui = bui;
*uj = buj;
*wm = 1;
} else {
*mi = bmi1;
*mj = bmj1;
*ui = bui1;
*uj = buj1;
*wm = 0;
beste = beste1;
}
return beste;
}
int predict_all(unsigned char *ym, unsigned char *um, unsigned char *vm,
int ymp, int uvmp,
unsigned char *yp, unsigned char *up, unsigned char *vp,
int ypp, int uvpp,
COLOR_SEG_ELEMENT sgm[],
int count,
int mi,
int mj,
int ui,
int uj,
int wm) {
int i, j;
unsigned char dym[256];
unsigned char dym2[256];
unsigned char duvm[64];
unsigned char *yu = ym, *uu = um, *vu = vm;
unsigned char *dym3 = dym2;
ym += mi * ymp + mj;
um += mi / 2 * uvmp + mj / 2;
vm += mi / 2 * uvmp + mj / 2;
yu += ui * ymp + uj;
uu += ui / 2 * uvmp + uj / 2;
vu += ui / 2 * uvmp + uj / 2;
// best mv masked destination
if (wm)
vp8_makemask_sse3(ym, um, vm, dym, ymp, uvmp,
sgm[0].y, sgm[0].u, sgm[0].v,
sgm[0].yt, sgm[0].ut, sgm[0].vt);
else
vp8_makemask_sse3(yu, uu, vu, dym, ymp, uvmp,
sgm[0].y, sgm[0].u, sgm[0].v,
sgm[0].yt, sgm[0].ut, sgm[0].vt);
vp8_growmaskmb_sse3(dym, dym2);
vp8_masked_predictor_wmt(ym, yu, ymp, yp, ypp, dym3);
vp8_uv_from_y_mask(dym3, duvm);
vp8_masked_predictor_uv_wmt(um, uu, uvmp, up, uvpp, duvm);
vp8_masked_predictor_uv_wmt(vm, vu, uvmp, vp, uvpp, duvm);
return 0;
}
unsigned char f0p[1280 * 720 * 3 / 2];
unsigned char f1p[1280 * 720 * 3 / 2];
unsigned char prd[1280 * 720 * 3 / 2];
unsigned char msk[1280 * 720 * 3 / 2];
int mainz(int argc, char *argv[]) {
FILE *f = fopen(argv[1], "rb");
FILE *g = fopen(argv[2], "wb");
int w = atoi(argv[3]), h = atoi(argv[4]);
int y_stride = w, uv_stride = w / 2;
int r, c;
unsigned char *f0 = f0p, *f1 = f1p, *t;
unsigned char ym[256], uvm[64];
unsigned char ym2[256], uvm2[64];
unsigned char ym3[256], uvm3[64];
int a, b;
COLOR_SEG_ELEMENT last = { 20, 20, 20, 20, 230, 20, 1}, best;
#if 0
makeneighbors();
COLOR_SEG_ELEMENT segmentation[] = {
{ 60, 4, 80, 17, 80, 10, 1},
{ 40, 4, 15, 10, 80, 10, 1},
};
make_mb_mask(y, u, v, ym2, uvm2, 16, 8, segmentation, 1);
vp8_makemask_sse3(y, u, v, ym, (int) 16, (int) 8,
(int) segmentation[0].y, (int) segmentation[0].u, (int) segmentation[0].v,
segmentation[0].yt, segmentation[0].ut, segmentation[0].vt);
vp8_growmaskmb_sse3(ym, ym3);
a = vp9_sad16x16_masked_wmt(str, 16, sts, 16, ym3);
b = vp9_sad16x16_unmasked_wmt(str, 16, sts, 16, ym3);
vp8_masked_predictor_wmt(str, sts, 16, ym, 16, ym3);
vp8_uv_from_y_mask(ym3, uvm3);
return 4;
#endif
makeneighbors();
memset(prd, 128, w * h * 3 / 2);
fread(f0, w * h * 3 / 2, 1, f);
while (!feof(f)) {
unsigned char *ys = f1, *yd = f0, *yp = prd;
unsigned char *us = f1 + w * h, *ud = f0 + w * h, *up = prd + w * h;
unsigned char *vs = f1 + w * h * 5 / 4, *vd = f0 + w * h * 5 / 4, *vp = prd + w * h * 5 / 4;
fread(f1, w * h * 3 / 2, 1, f);
ys += 32 * y_stride;
yd += 32 * y_stride;
yp += 32 * y_stride;
us += 16 * uv_stride;
ud += 16 * uv_stride;
up += 16 * uv_stride;
vs += 16 * uv_stride;
vd += 16 * uv_stride;
vp += 16 * uv_stride;
for (r = 32; r < h - 32; r += 16,
ys += 16 * w, yd += 16 * w, yp += 16 * w,
us += 8 * uv_stride, ud += 8 * uv_stride, up += 8 * uv_stride,
vs += 8 * uv_stride, vd += 8 * uv_stride, vp += 8 * uv_stride) {
for (c = 32; c < w - 32; c += 16) {
int mi, mj, ui, uj, wm;
int bmi, bmj, bui, buj, bwm;
unsigned char ym[256];
if (vp9_sad16x16_sse3(ys + c, y_stride, yd + c, y_stride, 0xffff) == 0)
bmi = bmj = bui = buj = bwm = 0;
else {
COLOR_SEG_ELEMENT cs[5];
int j;
unsigned int beste = 0xfffffff;
unsigned int bestj = 0;
// try color from last mb segmentation
cs[0] = last;
// try color segs from 4 pixels in mb recon as segmentation
cs[1].y = yd[c + y_stride + 1];
cs[1].u = ud[c / 2 + uv_stride];
cs[1].v = vd[c / 2 + uv_stride];
cs[1].yt = cs[1].ut = cs[1].vt = 20;
cs[2].y = yd[c + w + 14];
cs[2].u = ud[c / 2 + uv_stride + 7];
cs[2].v = vd[c / 2 + uv_stride + 7];
cs[2].yt = cs[2].ut = cs[2].vt = 20;
cs[3].y = yd[c + w * 14 + 1];
cs[3].u = ud[c / 2 + uv_stride * 7];
cs[3].v = vd[c / 2 + uv_stride * 7];
cs[3].yt = cs[3].ut = cs[3].vt = 20;
cs[4].y = yd[c + w * 14 + 14];
cs[4].u = ud[c / 2 + uv_stride * 7 + 7];
cs[4].v = vd[c / 2 + uv_stride * 7 + 7];
cs[4].yt = cs[4].ut = cs[4].vt = 20;
for (j = 0; j < 5; j++) {
int e;
e = fast_masked_motion_search(
ys + c, us + c / 2, vs + c / 2, y_stride, uv_stride,
yd + c, ud + c / 2, vd + c / 2, y_stride, uv_stride,
&cs[j], 1, &mi, &mj, &ui, &uj, &wm);
if (e < beste) {
bmi = mi;
bmj = mj;
bui = ui;
buj = uj, bwm = wm;
bestj = j;
beste = e;
}
}
best = cs[bestj];
// best = segmentation[0];
last = best;
}
predict_all(yd + c, ud + c / 2, vd + c / 2, w, uv_stride,
yp + c, up + c / 2, vp + c / 2, w, uv_stride,
&best, 1, bmi, bmj, bui, buj, bwm);
}
}
fwrite(prd, w * h * 3 / 2, 1, g);
t = f0;
f0 = f1;
f1 = t;
}
fclose(f);
fclose(g);
return 0;
}

28
vp9/common/vp9_mbpitch.c Normal file
View File

@@ -0,0 +1,28 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp9/common/vp9_blockd.h"
void vp9_setup_block_dptrs(MACROBLOCKD *mb,
int subsampling_x, int subsampling_y) {
int i;
for (i = 0; i < MAX_MB_PLANE; i++) {
mb->plane[i].plane_type = i ? PLANE_TYPE_UV : PLANE_TYPE_Y_WITH_DC;
mb->plane[i].subsampling_x = i ? subsampling_x : 0;
mb->plane[i].subsampling_y = i ? subsampling_y : 0;
}
#if CONFIG_ALPHA
// TODO(jkoleszar): Using the Y w/h for now
mb->plane[3].subsampling_x = 0;
mb->plane[3].subsampling_y = 0;
#endif
}

23
vp9/common/vp9_modecont.c Normal file
View File

@@ -0,0 +1,23 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp9/common/vp9_modecont.h"
const vp9_prob vp9_default_inter_mode_probs[INTER_MODE_CONTEXTS]
[VP9_INTER_MODES - 1] = {
{2, 173, 34}, // 0 = both zero mv
{7, 145, 85}, // 1 = one zero mv + one a predicted mv
{7, 166, 63}, // 2 = two predicted mvs
{7, 94, 66}, // 3 = one predicted/zero and one new mv
{8, 64, 46}, // 4 = two new mvs
{17, 81, 31}, // 5 = one intra neighbour + x
{25, 29, 30}, // 6 = two intra neighbours
};

19
vp9/common/vp9_modecont.h Normal file
View File

@@ -0,0 +1,19 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_COMMON_VP9_MODECONT_H_
#define VP9_COMMON_VP9_MODECONT_H_
#include "vp9/common/vp9_entropy.h"
extern const vp9_prob vp9_default_inter_mode_probs[INTER_MODE_CONTEXTS]
[VP9_INTER_MODES - 1];
#endif // VP9_COMMON_VP9_MODECONT_H_

View File

@@ -0,0 +1,128 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp9/common/vp9_entropymode.h"
const vp9_prob vp9_kf_default_bmode_probs[VP9_INTRA_MODES]
[VP9_INTRA_MODES]
[VP9_INTRA_MODES - 1] = {
{ /* above = dc */
{ 137, 30, 42, 148, 151, 207, 70, 52, 91 } /* left = dc */,
{ 92, 45, 102, 136, 116, 180, 74, 90, 100 } /* left = v */,
{ 73, 32, 19, 187, 222, 215, 46, 34, 100 } /* left = h */,
{ 91, 30, 32, 116, 121, 186, 93, 86, 94 } /* left = d45 */,
{ 72, 35, 36, 149, 68, 206, 68, 63, 105 } /* left = d135 */,
{ 73, 31, 28, 138, 57, 124, 55, 122, 151 } /* left = d117 */,
{ 67, 23, 21, 140, 126, 197, 40, 37, 171 } /* left = d153 */,
{ 86, 27, 28, 128, 154, 212, 45, 43, 53 } /* left = d27 */,
{ 74, 32, 27, 107, 86, 160, 63, 134, 102 } /* left = d63 */,
{ 59, 67, 44, 140, 161, 202, 78, 67, 119 } /* left = tm */
}, { /* above = v */
{ 63, 36, 126, 146, 123, 158, 60, 90, 96 } /* left = dc */,
{ 43, 46, 168, 134, 107, 128, 69, 142, 92 } /* left = v */,
{ 44, 29, 68, 159, 201, 177, 50, 57, 77 } /* left = h */,
{ 58, 38, 76, 114, 97, 172, 78, 133, 92 } /* left = d45 */,
{ 46, 41, 76, 140, 63, 184, 69, 112, 57 } /* left = d135 */,
{ 38, 32, 85, 140, 46, 112, 54, 151, 133 } /* left = d117 */,
{ 39, 27, 61, 131, 110, 175, 44, 75, 136 } /* left = d153 */,
{ 52, 30, 74, 113, 130, 175, 51, 64, 58 } /* left = d27 */,
{ 47, 35, 80, 100, 74, 143, 64, 163, 74 } /* left = d63 */,
{ 36, 61, 116, 114, 128, 162, 80, 125, 82 } /* left = tm */
}, { /* above = h */
{ 82, 26, 26, 171, 208, 204, 44, 32, 105 } /* left = dc */,
{ 55, 44, 68, 166, 179, 192, 57, 57, 108 } /* left = v */,
{ 42, 26, 11, 199, 241, 228, 23, 15, 85 } /* left = h */,
{ 68, 42, 19, 131, 160, 199, 55, 52, 83 } /* left = d45 */,
{ 58, 50, 25, 139, 115, 232, 39, 52, 118 } /* left = d135 */,
{ 50, 35, 33, 153, 104, 162, 64, 59, 131 } /* left = d117 */,
{ 44, 24, 16, 150, 177, 202, 33, 19, 156 } /* left = d153 */,
{ 55, 27, 12, 153, 203, 218, 26, 27, 49 } /* left = d27 */,
{ 53, 49, 21, 110, 116, 168, 59, 80, 76 } /* left = d63 */,
{ 38, 72, 19, 168, 203, 212, 50, 50, 107 } /* left = tm */
}, { /* above = d45 */
{ 103, 26, 36, 129, 132, 201, 83, 80, 93 } /* left = dc */,
{ 59, 38, 83, 112, 103, 162, 98, 136, 90 } /* left = v */,
{ 62, 30, 23, 158, 200, 207, 59, 57, 50 } /* left = h */,
{ 67, 30, 29, 84, 86, 191, 102, 91, 59 } /* left = d45 */,
{ 60, 32, 33, 112, 71, 220, 64, 89, 104 } /* left = d135 */,
{ 53, 26, 34, 130, 56, 149, 84, 120, 103 } /* left = d117 */,
{ 53, 21, 23, 133, 109, 210, 56, 77, 172 } /* left = d153 */,
{ 77, 19, 29, 112, 142, 228, 55, 66, 36 } /* left = d27 */,
{ 61, 29, 29, 93, 97, 165, 83, 175, 162 } /* left = d63 */,
{ 47, 47, 43, 114, 137, 181, 100, 99, 95 } /* left = tm */
}, { /* above = d135 */
{ 69, 23, 29, 128, 83, 199, 46, 44, 101 } /* left = dc */,
{ 53, 40, 55, 139, 69, 183, 61, 80, 110 } /* left = v */,
{ 40, 29, 19, 161, 180, 207, 43, 24, 91 } /* left = h */,
{ 60, 34, 19, 105, 61, 198, 53, 64, 89 } /* left = d45 */,
{ 52, 31, 22, 158, 40, 209, 58, 62, 89 } /* left = d135 */,
{ 44, 31, 29, 147, 46, 158, 56, 102, 198 } /* left = d117 */,
{ 35, 19, 12, 135, 87, 209, 41, 45, 167 } /* left = d153 */,
{ 55, 25, 21, 118, 95, 215, 38, 39, 66 } /* left = d27 */,
{ 51, 38, 25, 113, 58, 164, 70, 93, 97 } /* left = d63 */,
{ 47, 54, 34, 146, 108, 203, 72, 103, 151 } /* left = tm */
}, { /* above = d117 */
{ 64, 19, 37, 156, 66, 138, 49, 95, 133 } /* left = dc */,
{ 46, 27, 80, 150, 55, 124, 55, 121, 135 } /* left = v */,
{ 36, 23, 27, 165, 149, 166, 54, 64, 118 } /* left = h */,
{ 53, 21, 36, 131, 63, 163, 60, 109, 81 } /* left = d45 */,
{ 40, 26, 35, 154, 40, 185, 51, 97, 123 } /* left = d135 */,
{ 35, 19, 34, 179, 19, 97, 48, 129, 124 } /* left = d117 */,
{ 36, 20, 26, 136, 62, 164, 33, 77, 154 } /* left = d153 */,
{ 45, 18, 32, 130, 90, 157, 40, 79, 91 } /* left = d27 */,
{ 45, 26, 28, 129, 45, 129, 49, 147, 123 } /* left = d63 */,
{ 38, 44, 51, 136, 74, 162, 57, 97, 121 } /* left = tm */
}, { /* above = d153 */
{ 75, 17, 22, 136, 138, 185, 32, 34, 166 } /* left = dc */,
{ 56, 39, 58, 133, 117, 173, 48, 53, 187 } /* left = v */,
{ 35, 21, 12, 161, 212, 207, 20, 23, 145 } /* left = h */,
{ 56, 29, 19, 117, 109, 181, 55, 68, 112 } /* left = d45 */,
{ 47, 29, 17, 153, 64, 220, 59, 51, 114 } /* left = d135 */,
{ 46, 16, 24, 136, 76, 147, 41, 64, 172 } /* left = d117 */,
{ 34, 17, 11, 108, 152, 187, 13, 15, 209 } /* left = d153 */,
{ 51, 24, 14, 115, 133, 209, 32, 26, 104 } /* left = d27 */,
{ 55, 30, 18, 122, 79, 179, 44, 88, 116 } /* left = d63 */,
{ 37, 49, 25, 129, 168, 164, 41, 54, 148 } /* left = tm */
}, { /* above = d27 */
{ 82, 22, 32, 127, 143, 213, 39, 41, 70 } /* left = dc */,
{ 62, 44, 61, 123, 105, 189, 48, 57, 64 } /* left = v */,
{ 47, 25, 17, 175, 222, 220, 24, 30, 86 } /* left = h */,
{ 68, 36, 17, 106, 102, 206, 59, 74, 74 } /* left = d45 */,
{ 57, 39, 23, 151, 68, 216, 55, 63, 58 } /* left = d135 */,
{ 49, 30, 35, 141, 70, 168, 82, 40, 115 } /* left = d117 */,
{ 51, 25, 15, 136, 129, 202, 38, 35, 139 } /* left = d153 */,
{ 68, 26, 16, 111, 141, 215, 29, 28, 28 } /* left = d27 */,
{ 59, 39, 19, 114, 75, 180, 77, 104, 42 } /* left = d63 */,
{ 40, 61, 26, 126, 152, 206, 61, 59, 93 } /* left = tm */
}, { /* above = d63 */
{ 78, 23, 39, 111, 117, 170, 74, 124, 94 } /* left = dc */,
{ 48, 34, 86, 101, 92, 146, 78, 179, 134 } /* left = v */,
{ 47, 22, 24, 138, 187, 178, 68, 69, 59 } /* left = h */,
{ 56, 25, 33, 105, 112, 187, 95, 177, 129 } /* left = d45 */,
{ 48, 31, 27, 114, 63, 183, 82, 116, 56 } /* left = d135 */,
{ 43, 28, 37, 121, 63, 123, 61, 192, 169 } /* left = d117 */,
{ 42, 17, 24, 109, 97, 177, 56, 76, 122 } /* left = d153 */,
{ 58, 18, 28, 105, 139, 182, 70, 92, 63 } /* left = d27 */,
{ 46, 23, 32, 74, 86, 150, 67, 183, 88 } /* left = d63 */,
{ 36, 38, 48, 92, 122, 165, 88, 137, 91 } /* left = tm */
}, { /* above = tm */
{ 65, 70, 60, 155, 159, 199, 61, 60, 81 } /* left = dc */,
{ 44, 78, 115, 132, 119, 173, 71, 112, 93 } /* left = v */,
{ 39, 38, 21, 184, 227, 206, 42, 32, 64 } /* left = h */,
{ 58, 47, 36, 124, 137, 193, 80, 82, 78 } /* left = d45 */,
{ 49, 50, 35, 144, 95, 205, 63, 78, 59 } /* left = d135 */,
{ 41, 53, 52, 148, 71, 142, 65, 128, 51 } /* left = d117 */,
{ 40, 36, 28, 143, 143, 202, 40, 55, 137 } /* left = d153 */,
{ 52, 34, 29, 129, 183, 227, 42, 35, 43 } /* left = d27 */,
{ 42, 44, 44, 104, 105, 164, 64, 130, 80 } /* left = d63 */,
{ 43, 81, 53, 140, 169, 204, 68, 84, 72 } /* left = tm */
}
};

View File

@@ -13,8 +13,6 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_common.h"
typedef struct {
int16_t row;
int16_t col;
@@ -25,15 +23,14 @@ typedef union int_mv {
MV as_mv;
} int_mv; /* facilitates faster equality tests and copies */
typedef struct {
struct mv32 {
int32_t row;
int32_t col;
} MV32;
};
static void clamp_mv(MV *mv, int min_col, int max_col,
int min_row, int max_row) {
mv->col = clamp(mv->col, min_col, max_col);
mv->row = clamp(mv->row, min_row, max_row);
}
typedef union int_mv32 {
uint64_t as_int;
struct mv32 as_mv;
} int_mv32; /* facilitates faster equality tests and copies */
#endif // VP9_COMMON_VP9_MV_H_

View File

@@ -11,285 +11,296 @@
#include "vp9/common/vp9_mvref_common.h"
#define MVREF_NEIGHBOURS 8
typedef enum {
BOTH_ZERO = 0,
ZERO_PLUS_PREDICTED = 1,
BOTH_PREDICTED = 2,
NEW_PLUS_NON_INTRA = 3,
BOTH_NEW = 4,
INTRA_PLUS_NON_INTRA = 5,
BOTH_INTRA = 6,
INVALID_CASE = 9
} motion_vector_context;
// This is used to figure out a context for the ref blocks. The code flattens
// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by
// adding 9 for each intra block, 3 for each zero mv and 1 for each new
// motion vector. This single number is then converted into a context
// with a single lookup ( counter_to_context ).
static const int mode_2_counter[MB_MODE_COUNT] = {
9, // DC_PRED
9, // V_PRED
9, // H_PRED
9, // D45_PRED
9, // D135_PRED
9, // D117_PRED
9, // D153_PRED
9, // D27_PRED
9, // D63_PRED
9, // TM_PRED
0, // NEARESTMV
0, // NEARMV
3, // ZEROMV
1, // NEWMV
};
// There are 3^3 different combinations of 3 counts that can be either 0,1 or
// 2. However the actual count can never be greater than 2 so the highest
// counter we need is 18. 9 is an invalid counter that's never used.
static const int counter_to_context[19] = {
BOTH_PREDICTED, // 0
NEW_PLUS_NON_INTRA, // 1
BOTH_NEW, // 2
ZERO_PLUS_PREDICTED, // 3
NEW_PLUS_NON_INTRA, // 4
INVALID_CASE, // 5
BOTH_ZERO, // 6
INVALID_CASE, // 7
INVALID_CASE, // 8
INTRA_PLUS_NON_INTRA, // 9
INTRA_PLUS_NON_INTRA, // 10
INVALID_CASE, // 11
INTRA_PLUS_NON_INTRA, // 12
INVALID_CASE, // 13
INVALID_CASE, // 14
INVALID_CASE, // 15
INVALID_CASE, // 16
INVALID_CASE, // 17
BOTH_INTRA // 18
};
static const int mv_ref_blocks[BLOCK_SIZE_TYPES][MVREF_NEIGHBOURS][2] = {
// 4X4
static int mv_ref_blocks[BLOCK_SIZE_TYPES][MVREF_NEIGHBOURS][2] = {
// SB4X4
{{0, -1}, {-1, 0}, {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}},
// 4X8
// SB4X8
{{0, -1}, {-1, 0}, {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}},
// 8X4
// SB8X4
{{0, -1}, {-1, 0}, {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}},
// 8X8
// SB8X8
{{0, -1}, {-1, 0}, {-1, -1}, {0, -2}, {-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}},
// 8X16
// SB8X16
{{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}},
// 16X8
// SB16X8
{{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}},
// 16X16
// SB16X16
{{0, -1}, {-1, 0}, {1, -1}, {-1, 1}, {-1, -1}, {0, -3}, {-3, 0}, {-3, -3}},
// 16X32
// SB16X32
{{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
// 32X16
// SB32X16
{{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}},
// 32X32
// SB32X32
{{1, -1}, {-1, 1}, {2, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {-3, -3}},
// 32X64
// SB32X64
{{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}},
// 64X32
// SB64X32
{{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}},
// 64X64
// SB64X64
{{3, -1}, {-1, 3}, {4, -1}, {-1, 4}, {-1, -1}, {0, -1}, {-1, 0}, {6, -1}}
};
static const int idx_n_column_to_subblock[4][2] = {
{1, 2},
{1, 3},
{3, 2},
{3, 3}
};
// clamp_mv_ref
#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
static void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER,
xd->mb_to_right_edge + MV_BORDER,
xd->mb_to_top_edge - MV_BORDER,
xd->mb_to_bottom_edge + MV_BORDER);
static void clamp_mv_ref(const MACROBLOCKD *xd, int_mv *mv) {
mv->as_mv.col = clamp(mv->as_mv.col, xd->mb_to_left_edge - MV_BORDER,
xd->mb_to_right_edge + MV_BORDER);
mv->as_mv.row = clamp(mv->as_mv.row, xd->mb_to_top_edge - MV_BORDER,
xd->mb_to_bottom_edge + MV_BORDER);
}
// This function returns either the appropriate sub block or block's mv
// on whether the block_size < 8x8 and we have check_sub_blocks set.
static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate,
int check_sub_blocks, int which_mv,
int search_col, int block_idx) {
return (check_sub_blocks && candidate->mbmi.sb_type < BLOCK_8X8
? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
.as_mv[which_mv]
: candidate->mbmi.mv[which_mv]);
// Gets a candidate reference motion vector from the given mode info
// structure if one exists that matches the given reference frame.
static int get_matching_candidate(const MODE_INFO *candidate_mi,
MV_REFERENCE_FRAME ref_frame,
int_mv *c_mv, int block_idx) {
if (ref_frame == candidate_mi->mbmi.ref_frame[0]) {
if (block_idx >= 0 && candidate_mi->mbmi.sb_type < BLOCK_SIZE_SB8X8)
c_mv->as_int = candidate_mi->bmi[block_idx].as_mv[0].as_int;
else
c_mv->as_int = candidate_mi->mbmi.mv[0].as_int;
} else if (ref_frame == candidate_mi->mbmi.ref_frame[1]) {
if (block_idx >= 0 && candidate_mi->mbmi.sb_type < BLOCK_SIZE_SB8X8)
c_mv->as_int = candidate_mi->bmi[block_idx].as_mv[1].as_int;
else
c_mv->as_int = candidate_mi->mbmi.mv[1].as_int;
} else {
return 0;
}
return 1;
}
// Gets candidate reference motion vector(s) from the given mode info
// structure if they exists and do NOT match the given reference frame.
static void get_non_matching_candidates(const MODE_INFO *candidate_mi,
MV_REFERENCE_FRAME ref_frame,
MV_REFERENCE_FRAME *c_ref_frame,
int_mv *c_mv,
MV_REFERENCE_FRAME *c2_ref_frame,
int_mv *c2_mv) {
c_mv->as_int = 0;
c2_mv->as_int = 0;
*c_ref_frame = INTRA_FRAME;
*c2_ref_frame = INTRA_FRAME;
// If first candidate not valid neither will be.
if (candidate_mi->mbmi.ref_frame[0] > INTRA_FRAME) {
// First candidate
if (candidate_mi->mbmi.ref_frame[0] != ref_frame) {
*c_ref_frame = candidate_mi->mbmi.ref_frame[0];
c_mv->as_int = candidate_mi->mbmi.mv[0].as_int;
}
// Second candidate
if ((candidate_mi->mbmi.ref_frame[1] > INTRA_FRAME) &&
(candidate_mi->mbmi.ref_frame[1] != ref_frame) &&
(candidate_mi->mbmi.mv[1].as_int != candidate_mi->mbmi.mv[0].as_int)) {
*c2_ref_frame = candidate_mi->mbmi.ref_frame[1];
c2_mv->as_int = candidate_mi->mbmi.mv[1].as_int;
}
}
}
// Performs mv sign inversion if indicated by the reference frame combination.
static INLINE int_mv scale_mv(const MODE_INFO *candidate, const int which_mv,
const MV_REFERENCE_FRAME this_ref_frame,
const int *ref_sign_bias) {
int_mv return_mv = candidate->mbmi.mv[which_mv];
static void scale_mv(MACROBLOCKD *xd, MV_REFERENCE_FRAME this_ref_frame,
MV_REFERENCE_FRAME candidate_ref_frame,
int_mv *candidate_mv, int *ref_sign_bias) {
// Sign inversion where appropriate.
if (ref_sign_bias[candidate->mbmi.ref_frame[which_mv]] !=
ref_sign_bias[this_ref_frame]) {
return_mv.as_mv.row *= -1;
return_mv.as_mv.col *= -1;
if (ref_sign_bias[candidate_ref_frame] != ref_sign_bias[this_ref_frame]) {
candidate_mv->as_mv.row = -candidate_mv->as_mv.row;
candidate_mv->as_mv.col = -candidate_mv->as_mv.col;
}
return return_mv;
}
// This macro is used to add a motion vector mv_ref list if it isn't
// already in the list. If it's the second motion vector it will also
// skip all additional processing and jump to done!
#define ADD_MV_REF_LIST(MV) \
if (refmv_count) { \
if ((MV).as_int != mv_ref_list[0].as_int) { \
mv_ref_list[refmv_count] = (MV); \
goto Done; \
} \
} else { \
mv_ref_list[refmv_count++] = (MV); \
// Add a candidate mv.
// Discard if it has already been seen.
static void add_candidate_mv(int_mv *mv_list, int *mv_scores,
int *candidate_count, int_mv candidate_mv,
int weight) {
if (*candidate_count == 0) {
mv_list[0].as_int = candidate_mv.as_int;
mv_scores[0] = weight;
*candidate_count += 1;
} else if ((*candidate_count == 1) &&
(candidate_mv.as_int != mv_list[0].as_int)) {
mv_list[1].as_int = candidate_mv.as_int;
mv_scores[1] = weight;
*candidate_count += 1;
}
// If either reference frame is different, not INTRA, and they
// are different from each other scale and add the mv to our list.
#define IF_DIFF_REF_FRAME_ADD_MV(CANDIDATE) \
if ((CANDIDATE)->mbmi.ref_frame[0] != ref_frame) { \
ADD_MV_REF_LIST(scale_mv((CANDIDATE), 0, ref_frame, ref_sign_bias)); \
} \
if ((CANDIDATE)->mbmi.ref_frame[1] != ref_frame && \
(CANDIDATE)->mbmi.ref_frame[1] > INTRA_FRAME && \
(CANDIDATE)->mbmi.mv[1].as_int != (CANDIDATE)->mbmi.mv[0].as_int) { \
ADD_MV_REF_LIST(scale_mv((CANDIDATE), 1, ref_frame, ref_sign_bias)); \
}
// Checks that the given mi_row, mi_col and search point
// are inside the borders of the tile.
static INLINE int is_inside(int mi_col, int mi_row, int cur_tile_mi_col_start,
const int mv_ref[2]) {
// Check that the candidate is within the border. We only need to check
// the left side because all the positive right side ones are for blocks that
// are large enough to support the + value they have within their border.
return !(mi_row + mv_ref[1] < 0 ||
mi_col + mv_ref[0] < cur_tile_mi_col_start);
}
// This function searches the neighbourhood of a given MB/SB
// to try and find candidate reference vectors.
//
void vp9_find_mv_refs_idx(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here,
const MODE_INFO *lf_here,
const MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list, const int *ref_sign_bias,
const int block_idx,
const int mi_row, const int mi_col) {
int idx;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
MODE_INFO *lf_here, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list, int *ref_sign_bias,
int block_idx) {
int i;
MODE_INFO *candidate_mi;
MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
int_mv c_refmv;
int_mv c2_refmv;
MV_REFERENCE_FRAME c_ref_frame;
MV_REFERENCE_FRAME c2_ref_frame;
int candidate_scores[MAX_MV_REF_CANDIDATES];
int refmv_count = 0;
const int (*mv_ref_search)[2] = mv_ref_blocks[mbmi->sb_type];
const MODE_INFO *candidate;
const int check_sub_blocks = block_idx >= 0;
int different_ref_found = 0;
int context_counter = 0;
int split_count = 0;
int (*mv_ref_search)[2];
const int mi_col = get_mi_col(xd);
const int mi_row = get_mi_row(xd);
int intra_count = 0;
int zero_count = 0;
int newmv_count = 0;
int x_idx = 0, y_idx = 0;
// Blank the reference vector list
vpx_memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
// Blank the reference vector lists and other local structures.
vpx_memset(mv_ref_list, 0, sizeof(int_mv) * MAX_MV_REF_CANDIDATES);
vpx_memset(candidate_scores, 0, sizeof(candidate_scores));
// The nearest 2 blocks are treated differently
// if the size < 8x8 we get the mv from the bmi substructure,
// and we also need to keep a mode count.
for (idx = 0; idx < 2; ++idx) {
const int *mv_ref = mv_ref_search[idx];
mv_ref_search = mv_ref_blocks[mbmi->sb_type];
if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
x_idx = block_idx & 1;
y_idx = block_idx >> 1;
}
if (!is_inside(mi_col, mi_row, cm->cur_tile_mi_col_start, mv_ref))
continue;
// We first scan for candidate vectors that match the current reference frame
// Look at nearest neigbours
for (i = 0; i < 2; ++i) {
const int mi_search_col = mi_col + mv_ref_search[i][0];
const int mi_search_row = mi_row + mv_ref_search[i][1];
if ((mi_search_col >= cm->cur_tile_mi_col_start) &&
(mi_search_col < cm->cur_tile_mi_col_end) &&
(mi_search_row >= 0) && (mi_search_row < cm->mi_rows)) {
int b;
candidate = here + mv_ref[0] + mv_ref[1] * xd->mode_info_stride;
candidate_mi = here + mv_ref_search[i][0] +
(mv_ref_search[i][1] * xd->mode_info_stride);
// Keep counts for entropy encoding.
context_counter += mode_2_counter[candidate->mbmi.mode];
if (block_idx >= 0) {
if (mv_ref_search[i][0])
b = 1 + y_idx * 2;
else
b = 2 + x_idx;
} else {
b = -1;
}
if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv, b)) {
add_candidate_mv(mv_ref_list, candidate_scores,
&refmv_count, c_refmv, 16);
}
split_count += (candidate_mi->mbmi.sb_type < BLOCK_SIZE_SB8X8 &&
candidate_mi->mbmi.ref_frame[0] != INTRA_FRAME);
// Check if the candidate comes from the same reference frame.
if (candidate->mbmi.ref_frame[0] == ref_frame) {
ADD_MV_REF_LIST(get_sub_block_mv(candidate, check_sub_blocks, 0,
mv_ref[0], block_idx));
different_ref_found = candidate->mbmi.ref_frame[1] != ref_frame;
} else {
different_ref_found = 1;
if (candidate->mbmi.ref_frame[1] == ref_frame) {
// Add second motion vector if it has the same ref_frame.
ADD_MV_REF_LIST(get_sub_block_mv(candidate, check_sub_blocks, 1,
mv_ref[0], block_idx));
// Count number of neihgbours coded intra and zeromv
intra_count += (candidate_mi->mbmi.mode < NEARESTMV);
zero_count += (candidate_mi->mbmi.mode == ZEROMV);
newmv_count += (candidate_mi->mbmi.mode >= NEWMV);
}
}
// More distant neigbours
for (i = 2; (i < MVREF_NEIGHBOURS) &&
(refmv_count < MAX_MV_REF_CANDIDATES); ++i) {
const int mi_search_col = mi_col + mv_ref_search[i][0];
const int mi_search_row = mi_row + mv_ref_search[i][1];
if ((mi_search_col >= cm->cur_tile_mi_col_start) &&
(mi_search_col < cm->cur_tile_mi_col_end) &&
(mi_search_row >= 0) && (mi_search_row < cm->mi_rows)) {
candidate_mi = here + mv_ref_search[i][0] +
(mv_ref_search[i][1] * xd->mode_info_stride);
if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv, -1)) {
add_candidate_mv(mv_ref_list, candidate_scores,
&refmv_count, c_refmv, 16);
}
}
}
// Check the rest of the neighbors in much the same way
// as before except we don't need to keep track of sub blocks or
// mode counts.
for (; idx < MVREF_NEIGHBOURS; ++idx) {
const int *mv_ref = mv_ref_search[idx];
if (!is_inside(mi_col, mi_row, cm->cur_tile_mi_col_start, mv_ref))
continue;
// Look in the last frame if it exists
if (lf_here && (refmv_count < MAX_MV_REF_CANDIDATES)) {
candidate_mi = lf_here;
if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv, -1)) {
add_candidate_mv(mv_ref_list, candidate_scores,
&refmv_count, c_refmv, 16);
}
}
candidate = here + mv_ref[0] + mv_ref[1] * xd->mode_info_stride;
// If we have not found enough candidates consider ones where the
// reference frame does not match. Break out when we have
// MAX_MV_REF_CANDIDATES candidates.
// Look first at spatial neighbours
for (i = 0; (i < MVREF_NEIGHBOURS) &&
(refmv_count < MAX_MV_REF_CANDIDATES); ++i) {
const int mi_search_col = mi_col + mv_ref_search[i][0];
const int mi_search_row = mi_row + mv_ref_search[i][1];
if ((mi_search_col >= cm->cur_tile_mi_col_start) &&
(mi_search_col < cm->cur_tile_mi_col_end) &&
(mi_search_row >= 0) && (mi_search_row < cm->mi_rows)) {
candidate_mi = here + mv_ref_search[i][0] +
(mv_ref_search[i][1] * xd->mode_info_stride);
if (candidate->mbmi.ref_frame[0] == ref_frame) {
ADD_MV_REF_LIST(candidate->mbmi.mv[0]);
different_ref_found = candidate->mbmi.ref_frame[1] != ref_frame;
} else {
different_ref_found = 1;
if (candidate->mbmi.ref_frame[1] == ref_frame) {
ADD_MV_REF_LIST(candidate->mbmi.mv[1]);
get_non_matching_candidates(candidate_mi, ref_frame,
&c_ref_frame, &c_refmv,
&c2_ref_frame, &c2_refmv);
if (c_ref_frame != INTRA_FRAME) {
scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias);
add_candidate_mv(mv_ref_list, candidate_scores,
&refmv_count, c_refmv, 1);
}
if (c2_ref_frame != INTRA_FRAME) {
scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias);
add_candidate_mv(mv_ref_list, candidate_scores,
&refmv_count, c2_refmv, 1);
}
}
}
// Check the last frame's mode and mv info.
if (lf_here != NULL) {
if (lf_here->mbmi.ref_frame[0] == ref_frame) {
ADD_MV_REF_LIST(lf_here->mbmi.mv[0]);
} else if (lf_here->mbmi.ref_frame[1] == ref_frame) {
ADD_MV_REF_LIST(lf_here->mbmi.mv[1]);
// Look at the last frame if it exists
if (lf_here && (refmv_count < MAX_MV_REF_CANDIDATES)) {
candidate_mi = lf_here;
get_non_matching_candidates(candidate_mi, ref_frame,
&c_ref_frame, &c_refmv,
&c2_ref_frame, &c2_refmv);
if (c_ref_frame != INTRA_FRAME) {
scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias);
add_candidate_mv(mv_ref_list, candidate_scores,
&refmv_count, c_refmv, 1);
}
if (c2_ref_frame != INTRA_FRAME) {
scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias);
add_candidate_mv(mv_ref_list, candidate_scores,
&refmv_count, c2_refmv, 1);
}
}
// Since we couldn't find 2 mvs from the same reference frame
// go back through the neighbors and find motion vectors from
// different reference frames.
if (different_ref_found) {
for (idx = 0; idx < MVREF_NEIGHBOURS; ++idx) {
const int *mv_ref = mv_ref_search[idx];
if (!is_inside(mi_col, mi_row, cm->cur_tile_mi_col_start, mv_ref))
continue;
candidate = here + mv_ref[0] + mv_ref[1] * xd->mode_info_stride;
// If the candidate is INTRA we don't want to consider its mv.
if (!is_inter_block(&candidate->mbmi))
continue;
IF_DIFF_REF_FRAME_ADD_MV(candidate);
if (!intra_count) {
if (!newmv_count) {
// 0 = both zero mv
// 1 = one zero mv + one a predicted mv
// 2 = two predicted mvs
mbmi->mb_mode_context[ref_frame] = 2 - zero_count;
} else {
// 3 = one predicted/zero and one new mv
// 4 = two new mvs
mbmi->mb_mode_context[ref_frame] = 2 + newmv_count;
}
} else {
// 5 = one intra neighbour + x
// 6 = two intra neighbours
mbmi->mb_mode_context[ref_frame] = 4 + intra_count;
}
// Since we still don't have a candidate we'll try the last frame.
if (lf_here != NULL && is_inter_block(&lf_here->mbmi)) {
IF_DIFF_REF_FRAME_ADD_MV(lf_here);
}
Done:
mbmi->mb_mode_context[ref_frame] = counter_to_context[context_counter];
// Clamp vectors
for (idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx)
clamp_mv_ref(&mv_ref_list[idx].as_mv, xd);
for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
clamp_mv_ref(xd, &mv_ref_list[i]);
}
}
#undef ADD_MV_REF_LIST
#undef IF_DIFF_REF_FRAME_ADD_MV

View File

@@ -17,13 +17,11 @@
void vp9_find_mv_refs_idx(VP9_COMMON *cm,
MACROBLOCKD *xd,
MODE_INFO *here,
const MODE_INFO *lf_here,
const MV_REFERENCE_FRAME ref_frame,
MODE_INFO *lf_here,
MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
const int *ref_sign_bias,
const int block_idx,
const int mi_row,
const int mi_col);
int *ref_sign_bias,
int block_idx);
static INLINE void vp9_find_mv_refs(VP9_COMMON *cm,
MACROBLOCKD *xd,
@@ -31,10 +29,9 @@ static INLINE void vp9_find_mv_refs(VP9_COMMON *cm,
MODE_INFO *lf_here,
MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
int *ref_sign_bias,
int mi_row, int mi_col) {
int *ref_sign_bias) {
vp9_find_mv_refs_idx(cm, xd, here, lf_here, ref_frame,
mv_ref_list, ref_sign_bias, -1, mi_row, mi_col);
mv_ref_list, ref_sign_bias, -1);
}
#endif // VP9_COMMON_VP9_MVREF_COMMON_H_

Some files were not shown because too many files have changed in this diff Show More